{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 200.0, "global_step": 30500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003278688524590164, "grad_norm": 25.710773468017578, "learning_rate": 2.1857923497267763e-08, "loss": 6.2227, "step": 1 }, { "epoch": 0.006557377049180328, "grad_norm": 28.64084815979004, "learning_rate": 4.3715846994535526e-08, "loss": 6.4043, "step": 2 }, { "epoch": 0.009836065573770493, "grad_norm": 27.950037002563477, "learning_rate": 6.557377049180328e-08, "loss": 6.1973, "step": 3 }, { "epoch": 0.013114754098360656, "grad_norm": 36.76369094848633, "learning_rate": 8.743169398907105e-08, "loss": 6.1543, "step": 4 }, { "epoch": 0.01639344262295082, "grad_norm": 33.72147750854492, "learning_rate": 1.0928961748633881e-07, "loss": 6.5801, "step": 5 }, { "epoch": 0.019672131147540985, "grad_norm": 34.656105041503906, "learning_rate": 1.3114754098360656e-07, "loss": 6.5215, "step": 6 }, { "epoch": 0.022950819672131147, "grad_norm": 32.06599807739258, "learning_rate": 1.5300546448087432e-07, "loss": 6.4922, "step": 7 }, { "epoch": 0.02622950819672131, "grad_norm": 33.78942108154297, "learning_rate": 1.748633879781421e-07, "loss": 6.459, "step": 8 }, { "epoch": 0.029508196721311476, "grad_norm": 26.56684684753418, "learning_rate": 1.9672131147540986e-07, "loss": 6.3145, "step": 9 }, { "epoch": 0.03278688524590164, "grad_norm": 42.33388137817383, "learning_rate": 2.1857923497267762e-07, "loss": 6.4492, "step": 10 }, { "epoch": 0.036065573770491806, "grad_norm": 31.881439208984375, "learning_rate": 2.404371584699454e-07, "loss": 6.418, "step": 11 }, { "epoch": 0.03934426229508197, "grad_norm": 31.14425277709961, "learning_rate": 2.622950819672131e-07, "loss": 6.1484, "step": 12 }, { "epoch": 0.04262295081967213, "grad_norm": 34.47117233276367, "learning_rate": 2.841530054644809e-07, "loss": 6.4531, "step": 13 }, { "epoch": 0.04590163934426229, "grad_norm": 37.10658264160156, "learning_rate": 3.0601092896174863e-07, "loss": 6.5391, "step": 14 }, { "epoch": 0.04918032786885246, "grad_norm": 47.11949920654297, "learning_rate": 3.278688524590164e-07, "loss": 6.4902, "step": 15 }, { "epoch": 0.05245901639344262, "grad_norm": 57.98966598510742, "learning_rate": 3.497267759562842e-07, "loss": 6.4004, "step": 16 }, { "epoch": 0.05573770491803279, "grad_norm": 51.14267349243164, "learning_rate": 3.7158469945355194e-07, "loss": 6.5918, "step": 17 }, { "epoch": 0.05901639344262295, "grad_norm": 28.430864334106445, "learning_rate": 3.934426229508197e-07, "loss": 6.4219, "step": 18 }, { "epoch": 0.06229508196721312, "grad_norm": 29.535619735717773, "learning_rate": 4.1530054644808746e-07, "loss": 6.1855, "step": 19 }, { "epoch": 0.06557377049180328, "grad_norm": 34.70895767211914, "learning_rate": 4.3715846994535524e-07, "loss": 6.4219, "step": 20 }, { "epoch": 0.06885245901639345, "grad_norm": 28.821842193603516, "learning_rate": 4.59016393442623e-07, "loss": 6.4492, "step": 21 }, { "epoch": 0.07213114754098361, "grad_norm": 42.65898132324219, "learning_rate": 4.808743169398908e-07, "loss": 6.4062, "step": 22 }, { "epoch": 0.07540983606557378, "grad_norm": 29.36783790588379, "learning_rate": 5.027322404371585e-07, "loss": 6.3965, "step": 23 }, { "epoch": 0.07868852459016394, "grad_norm": 30.249521255493164, "learning_rate": 5.245901639344262e-07, "loss": 6.3672, "step": 24 }, { "epoch": 0.08196721311475409, "grad_norm": 29.723167419433594, "learning_rate": 5.46448087431694e-07, "loss": 6.4688, "step": 25 }, { "epoch": 0.08524590163934426, "grad_norm": 26.779592514038086, "learning_rate": 5.683060109289618e-07, "loss": 6.2383, "step": 26 }, { "epoch": 0.08852459016393442, "grad_norm": 30.06792449951172, "learning_rate": 5.901639344262295e-07, "loss": 6.2129, "step": 27 }, { "epoch": 0.09180327868852459, "grad_norm": 33.85888671875, "learning_rate": 6.120218579234973e-07, "loss": 6.166, "step": 28 }, { "epoch": 0.09508196721311475, "grad_norm": 27.253803253173828, "learning_rate": 6.338797814207651e-07, "loss": 6.125, "step": 29 }, { "epoch": 0.09836065573770492, "grad_norm": 37.074119567871094, "learning_rate": 6.557377049180328e-07, "loss": 6.2344, "step": 30 }, { "epoch": 0.10163934426229508, "grad_norm": 31.048490524291992, "learning_rate": 6.775956284153006e-07, "loss": 6.1523, "step": 31 }, { "epoch": 0.10491803278688525, "grad_norm": 277.86456298828125, "learning_rate": 6.994535519125684e-07, "loss": 6.0273, "step": 32 }, { "epoch": 0.10819672131147541, "grad_norm": 28.42243766784668, "learning_rate": 7.213114754098361e-07, "loss": 6.0703, "step": 33 }, { "epoch": 0.11147540983606558, "grad_norm": 25.075790405273438, "learning_rate": 7.431693989071039e-07, "loss": 6.0996, "step": 34 }, { "epoch": 0.11475409836065574, "grad_norm": 39.9362678527832, "learning_rate": 7.650273224043716e-07, "loss": 5.8223, "step": 35 }, { "epoch": 0.1180327868852459, "grad_norm": 25.539011001586914, "learning_rate": 7.868852459016395e-07, "loss": 6.084, "step": 36 }, { "epoch": 0.12131147540983607, "grad_norm": 54.11709976196289, "learning_rate": 8.087431693989072e-07, "loss": 6.041, "step": 37 }, { "epoch": 0.12459016393442623, "grad_norm": 32.912681579589844, "learning_rate": 8.306010928961749e-07, "loss": 6.0547, "step": 38 }, { "epoch": 0.12786885245901639, "grad_norm": 39.48442840576172, "learning_rate": 8.524590163934427e-07, "loss": 5.918, "step": 39 }, { "epoch": 0.13114754098360656, "grad_norm": 30.89902687072754, "learning_rate": 8.743169398907105e-07, "loss": 6.0664, "step": 40 }, { "epoch": 0.13442622950819672, "grad_norm": 21.370811462402344, "learning_rate": 8.961748633879782e-07, "loss": 5.8066, "step": 41 }, { "epoch": 0.1377049180327869, "grad_norm": 24.136213302612305, "learning_rate": 9.18032786885246e-07, "loss": 5.5371, "step": 42 }, { "epoch": 0.14098360655737704, "grad_norm": 32.86609649658203, "learning_rate": 9.398907103825138e-07, "loss": 5.6016, "step": 43 }, { "epoch": 0.14426229508196722, "grad_norm": 30.664430618286133, "learning_rate": 9.617486338797815e-07, "loss": 5.4736, "step": 44 }, { "epoch": 0.14754098360655737, "grad_norm": 21.05451011657715, "learning_rate": 9.836065573770493e-07, "loss": 5.459, "step": 45 }, { "epoch": 0.15081967213114755, "grad_norm": 63.610923767089844, "learning_rate": 1.005464480874317e-06, "loss": 5.5488, "step": 46 }, { "epoch": 0.1540983606557377, "grad_norm": 23.905902862548828, "learning_rate": 1.0273224043715847e-06, "loss": 5.5684, "step": 47 }, { "epoch": 0.15737704918032788, "grad_norm": 24.645139694213867, "learning_rate": 1.0491803278688525e-06, "loss": 5.8184, "step": 48 }, { "epoch": 0.16065573770491803, "grad_norm": 21.347951889038086, "learning_rate": 1.0710382513661204e-06, "loss": 5.6641, "step": 49 }, { "epoch": 0.16393442622950818, "grad_norm": 24.697437286376953, "learning_rate": 1.092896174863388e-06, "loss": 5.8223, "step": 50 }, { "epoch": 0.16721311475409836, "grad_norm": 30.537342071533203, "learning_rate": 1.1147540983606559e-06, "loss": 5.752, "step": 51 }, { "epoch": 0.17049180327868851, "grad_norm": 21.438995361328125, "learning_rate": 1.1366120218579236e-06, "loss": 5.5508, "step": 52 }, { "epoch": 0.1737704918032787, "grad_norm": 20.227550506591797, "learning_rate": 1.1584699453551913e-06, "loss": 5.7168, "step": 53 }, { "epoch": 0.17704918032786884, "grad_norm": 20.099853515625, "learning_rate": 1.180327868852459e-06, "loss": 5.4668, "step": 54 }, { "epoch": 0.18032786885245902, "grad_norm": 18.78045082092285, "learning_rate": 1.2021857923497268e-06, "loss": 5.5, "step": 55 }, { "epoch": 0.18360655737704917, "grad_norm": 22.590129852294922, "learning_rate": 1.2240437158469945e-06, "loss": 5.3652, "step": 56 }, { "epoch": 0.18688524590163935, "grad_norm": 31.33005142211914, "learning_rate": 1.2459016393442625e-06, "loss": 5.2422, "step": 57 }, { "epoch": 0.1901639344262295, "grad_norm": 15.850641250610352, "learning_rate": 1.2677595628415302e-06, "loss": 5.25, "step": 58 }, { "epoch": 0.19344262295081968, "grad_norm": 29.369230270385742, "learning_rate": 1.2896174863387977e-06, "loss": 5.1777, "step": 59 }, { "epoch": 0.19672131147540983, "grad_norm": 22.63234519958496, "learning_rate": 1.3114754098360657e-06, "loss": 5.0488, "step": 60 }, { "epoch": 0.2, "grad_norm": 27.64787483215332, "learning_rate": 1.3333333333333334e-06, "loss": 5.3066, "step": 61 }, { "epoch": 0.20327868852459016, "grad_norm": 30.185606002807617, "learning_rate": 1.3551912568306011e-06, "loss": 5.1094, "step": 62 }, { "epoch": 0.20655737704918034, "grad_norm": 20.33966636657715, "learning_rate": 1.377049180327869e-06, "loss": 4.7383, "step": 63 }, { "epoch": 0.2098360655737705, "grad_norm": 29.74634552001953, "learning_rate": 1.3989071038251368e-06, "loss": 4.8057, "step": 64 }, { "epoch": 0.21311475409836064, "grad_norm": 18.375520706176758, "learning_rate": 1.4207650273224043e-06, "loss": 4.7949, "step": 65 }, { "epoch": 0.21639344262295082, "grad_norm": 28.539918899536133, "learning_rate": 1.4426229508196723e-06, "loss": 4.7051, "step": 66 }, { "epoch": 0.21967213114754097, "grad_norm": 20.133228302001953, "learning_rate": 1.46448087431694e-06, "loss": 4.8467, "step": 67 }, { "epoch": 0.22295081967213115, "grad_norm": 30.01630210876465, "learning_rate": 1.4863387978142078e-06, "loss": 4.8809, "step": 68 }, { "epoch": 0.2262295081967213, "grad_norm": 31.763431549072266, "learning_rate": 1.5081967213114757e-06, "loss": 4.9053, "step": 69 }, { "epoch": 0.22950819672131148, "grad_norm": 14.949121475219727, "learning_rate": 1.5300546448087432e-06, "loss": 5.0371, "step": 70 }, { "epoch": 0.23278688524590163, "grad_norm": 13.54267406463623, "learning_rate": 1.551912568306011e-06, "loss": 4.627, "step": 71 }, { "epoch": 0.2360655737704918, "grad_norm": 16.481355667114258, "learning_rate": 1.573770491803279e-06, "loss": 4.7656, "step": 72 }, { "epoch": 0.23934426229508196, "grad_norm": 18.558795928955078, "learning_rate": 1.5956284153005466e-06, "loss": 4.8086, "step": 73 }, { "epoch": 0.24262295081967214, "grad_norm": 21.792646408081055, "learning_rate": 1.6174863387978144e-06, "loss": 4.9688, "step": 74 }, { "epoch": 0.2459016393442623, "grad_norm": 19.933223724365234, "learning_rate": 1.6393442622950819e-06, "loss": 4.6865, "step": 75 }, { "epoch": 0.24918032786885247, "grad_norm": 15.499825477600098, "learning_rate": 1.6612021857923498e-06, "loss": 4.5283, "step": 76 }, { "epoch": 0.25245901639344265, "grad_norm": 13.732904434204102, "learning_rate": 1.6830601092896176e-06, "loss": 4.1035, "step": 77 }, { "epoch": 0.25573770491803277, "grad_norm": 13.144404411315918, "learning_rate": 1.7049180327868853e-06, "loss": 4.7529, "step": 78 }, { "epoch": 0.25901639344262295, "grad_norm": 16.45539093017578, "learning_rate": 1.7267759562841532e-06, "loss": 4.627, "step": 79 }, { "epoch": 0.26229508196721313, "grad_norm": 11.588747024536133, "learning_rate": 1.748633879781421e-06, "loss": 4.4385, "step": 80 }, { "epoch": 0.26557377049180325, "grad_norm": 14.9818696975708, "learning_rate": 1.7704918032786885e-06, "loss": 4.3242, "step": 81 }, { "epoch": 0.26885245901639343, "grad_norm": 12.847567558288574, "learning_rate": 1.7923497267759564e-06, "loss": 4.1016, "step": 82 }, { "epoch": 0.2721311475409836, "grad_norm": 17.035430908203125, "learning_rate": 1.8142076502732242e-06, "loss": 4.1094, "step": 83 }, { "epoch": 0.2754098360655738, "grad_norm": 18.941162109375, "learning_rate": 1.836065573770492e-06, "loss": 4.4346, "step": 84 }, { "epoch": 0.2786885245901639, "grad_norm": 19.253890991210938, "learning_rate": 1.8579234972677599e-06, "loss": 4.541, "step": 85 }, { "epoch": 0.2819672131147541, "grad_norm": 10.979122161865234, "learning_rate": 1.8797814207650276e-06, "loss": 4.2607, "step": 86 }, { "epoch": 0.28524590163934427, "grad_norm": 12.736722946166992, "learning_rate": 1.9016393442622951e-06, "loss": 4.249, "step": 87 }, { "epoch": 0.28852459016393445, "grad_norm": 16.142955780029297, "learning_rate": 1.923497267759563e-06, "loss": 4.3613, "step": 88 }, { "epoch": 0.29180327868852457, "grad_norm": 11.06995677947998, "learning_rate": 1.945355191256831e-06, "loss": 4.2305, "step": 89 }, { "epoch": 0.29508196721311475, "grad_norm": 19.434099197387695, "learning_rate": 1.9672131147540985e-06, "loss": 4.5996, "step": 90 }, { "epoch": 0.2983606557377049, "grad_norm": 13.221315383911133, "learning_rate": 1.9890710382513663e-06, "loss": 3.9365, "step": 91 }, { "epoch": 0.3016393442622951, "grad_norm": 16.304529190063477, "learning_rate": 2.010928961748634e-06, "loss": 4.0137, "step": 92 }, { "epoch": 0.30491803278688523, "grad_norm": 12.670670509338379, "learning_rate": 2.0327868852459017e-06, "loss": 4.0889, "step": 93 }, { "epoch": 0.3081967213114754, "grad_norm": 14.945727348327637, "learning_rate": 2.0546448087431695e-06, "loss": 4.1279, "step": 94 }, { "epoch": 0.3114754098360656, "grad_norm": 12.863651275634766, "learning_rate": 2.0765027322404376e-06, "loss": 4.127, "step": 95 }, { "epoch": 0.31475409836065577, "grad_norm": 12.514780044555664, "learning_rate": 2.098360655737705e-06, "loss": 4.1133, "step": 96 }, { "epoch": 0.3180327868852459, "grad_norm": 10.037373542785645, "learning_rate": 2.1202185792349727e-06, "loss": 3.8359, "step": 97 }, { "epoch": 0.32131147540983607, "grad_norm": 15.63622760772705, "learning_rate": 2.142076502732241e-06, "loss": 3.8105, "step": 98 }, { "epoch": 0.32459016393442625, "grad_norm": 10.965630531311035, "learning_rate": 2.1639344262295085e-06, "loss": 3.9463, "step": 99 }, { "epoch": 0.32786885245901637, "grad_norm": 12.48837661743164, "learning_rate": 2.185792349726776e-06, "loss": 3.9297, "step": 100 }, { "epoch": 0.33114754098360655, "grad_norm": 9.419021606445312, "learning_rate": 2.207650273224044e-06, "loss": 3.7441, "step": 101 }, { "epoch": 0.3344262295081967, "grad_norm": 9.243003845214844, "learning_rate": 2.2295081967213117e-06, "loss": 3.9287, "step": 102 }, { "epoch": 0.3377049180327869, "grad_norm": 12.192901611328125, "learning_rate": 2.2513661202185795e-06, "loss": 3.8594, "step": 103 }, { "epoch": 0.34098360655737703, "grad_norm": 10.612401962280273, "learning_rate": 2.273224043715847e-06, "loss": 3.874, "step": 104 }, { "epoch": 0.3442622950819672, "grad_norm": 10.438440322875977, "learning_rate": 2.295081967213115e-06, "loss": 3.9619, "step": 105 }, { "epoch": 0.3475409836065574, "grad_norm": 13.256019592285156, "learning_rate": 2.3169398907103827e-06, "loss": 3.874, "step": 106 }, { "epoch": 0.35081967213114756, "grad_norm": 14.23326587677002, "learning_rate": 2.3387978142076504e-06, "loss": 4.0635, "step": 107 }, { "epoch": 0.3540983606557377, "grad_norm": 11.524713516235352, "learning_rate": 2.360655737704918e-06, "loss": 4.2422, "step": 108 }, { "epoch": 0.35737704918032787, "grad_norm": 27.54733657836914, "learning_rate": 2.382513661202186e-06, "loss": 3.8682, "step": 109 }, { "epoch": 0.36065573770491804, "grad_norm": 14.591822624206543, "learning_rate": 2.4043715846994536e-06, "loss": 3.6602, "step": 110 }, { "epoch": 0.3639344262295082, "grad_norm": 18.851198196411133, "learning_rate": 2.4262295081967218e-06, "loss": 3.8877, "step": 111 }, { "epoch": 0.36721311475409835, "grad_norm": 12.888423919677734, "learning_rate": 2.448087431693989e-06, "loss": 3.5742, "step": 112 }, { "epoch": 0.3704918032786885, "grad_norm": 13.570504188537598, "learning_rate": 2.469945355191257e-06, "loss": 3.7021, "step": 113 }, { "epoch": 0.3737704918032787, "grad_norm": 10.947571754455566, "learning_rate": 2.491803278688525e-06, "loss": 3.9824, "step": 114 }, { "epoch": 0.3770491803278688, "grad_norm": 10.173373222351074, "learning_rate": 2.5136612021857927e-06, "loss": 3.7354, "step": 115 }, { "epoch": 0.380327868852459, "grad_norm": 9.553812980651855, "learning_rate": 2.5355191256830604e-06, "loss": 3.5391, "step": 116 }, { "epoch": 0.3836065573770492, "grad_norm": 11.169816017150879, "learning_rate": 2.5573770491803277e-06, "loss": 3.9834, "step": 117 }, { "epoch": 0.38688524590163936, "grad_norm": 18.689970016479492, "learning_rate": 2.5792349726775955e-06, "loss": 3.9131, "step": 118 }, { "epoch": 0.3901639344262295, "grad_norm": 8.905458450317383, "learning_rate": 2.6010928961748636e-06, "loss": 4.0889, "step": 119 }, { "epoch": 0.39344262295081966, "grad_norm": 7.6630859375, "learning_rate": 2.6229508196721314e-06, "loss": 3.458, "step": 120 }, { "epoch": 0.39672131147540984, "grad_norm": 11.597861289978027, "learning_rate": 2.644808743169399e-06, "loss": 3.8672, "step": 121 }, { "epoch": 0.4, "grad_norm": 13.273063659667969, "learning_rate": 2.666666666666667e-06, "loss": 3.7725, "step": 122 }, { "epoch": 0.40327868852459015, "grad_norm": 15.899773597717285, "learning_rate": 2.6885245901639346e-06, "loss": 3.8418, "step": 123 }, { "epoch": 0.4065573770491803, "grad_norm": 10.372955322265625, "learning_rate": 2.7103825136612023e-06, "loss": 3.5703, "step": 124 }, { "epoch": 0.4098360655737705, "grad_norm": 8.74384593963623, "learning_rate": 2.7322404371584705e-06, "loss": 3.8711, "step": 125 }, { "epoch": 0.4131147540983607, "grad_norm": 26.880582809448242, "learning_rate": 2.754098360655738e-06, "loss": 3.6045, "step": 126 }, { "epoch": 0.4163934426229508, "grad_norm": 8.32030200958252, "learning_rate": 2.775956284153006e-06, "loss": 3.6426, "step": 127 }, { "epoch": 0.419672131147541, "grad_norm": 10.558614730834961, "learning_rate": 2.7978142076502737e-06, "loss": 3.5977, "step": 128 }, { "epoch": 0.42295081967213116, "grad_norm": 9.257072448730469, "learning_rate": 2.819672131147541e-06, "loss": 3.6016, "step": 129 }, { "epoch": 0.4262295081967213, "grad_norm": 9.236532211303711, "learning_rate": 2.8415300546448087e-06, "loss": 3.6201, "step": 130 }, { "epoch": 0.42950819672131146, "grad_norm": 17.032794952392578, "learning_rate": 2.8633879781420764e-06, "loss": 3.8379, "step": 131 }, { "epoch": 0.43278688524590164, "grad_norm": 19.245325088500977, "learning_rate": 2.8852459016393446e-06, "loss": 3.7002, "step": 132 }, { "epoch": 0.4360655737704918, "grad_norm": 7.250633716583252, "learning_rate": 2.9071038251366123e-06, "loss": 3.6768, "step": 133 }, { "epoch": 0.43934426229508194, "grad_norm": 8.288655281066895, "learning_rate": 2.92896174863388e-06, "loss": 3.6123, "step": 134 }, { "epoch": 0.4426229508196721, "grad_norm": 8.947036743164062, "learning_rate": 2.9508196721311478e-06, "loss": 3.5137, "step": 135 }, { "epoch": 0.4459016393442623, "grad_norm": 7.143942356109619, "learning_rate": 2.9726775956284155e-06, "loss": 3.626, "step": 136 }, { "epoch": 0.4491803278688525, "grad_norm": 14.759167671203613, "learning_rate": 2.9945355191256832e-06, "loss": 3.2617, "step": 137 }, { "epoch": 0.4524590163934426, "grad_norm": 8.606980323791504, "learning_rate": 3.0163934426229514e-06, "loss": 3.6338, "step": 138 }, { "epoch": 0.4557377049180328, "grad_norm": 11.359353065490723, "learning_rate": 3.038251366120219e-06, "loss": 3.4189, "step": 139 }, { "epoch": 0.45901639344262296, "grad_norm": 9.634540557861328, "learning_rate": 3.0601092896174864e-06, "loss": 3.7969, "step": 140 }, { "epoch": 0.46229508196721314, "grad_norm": 7.860725402832031, "learning_rate": 3.081967213114754e-06, "loss": 3.3838, "step": 141 }, { "epoch": 0.46557377049180326, "grad_norm": 8.221887588500977, "learning_rate": 3.103825136612022e-06, "loss": 3.7422, "step": 142 }, { "epoch": 0.46885245901639344, "grad_norm": 10.046731948852539, "learning_rate": 3.1256830601092896e-06, "loss": 3.7061, "step": 143 }, { "epoch": 0.4721311475409836, "grad_norm": 8.396933555603027, "learning_rate": 3.147540983606558e-06, "loss": 3.8027, "step": 144 }, { "epoch": 0.47540983606557374, "grad_norm": 8.357086181640625, "learning_rate": 3.1693989071038255e-06, "loss": 3.6191, "step": 145 }, { "epoch": 0.4786885245901639, "grad_norm": 10.888108253479004, "learning_rate": 3.1912568306010933e-06, "loss": 3.5137, "step": 146 }, { "epoch": 0.4819672131147541, "grad_norm": 10.46477222442627, "learning_rate": 3.213114754098361e-06, "loss": 3.6758, "step": 147 }, { "epoch": 0.4852459016393443, "grad_norm": 7.786928176879883, "learning_rate": 3.2349726775956287e-06, "loss": 3.8779, "step": 148 }, { "epoch": 0.4885245901639344, "grad_norm": 8.274513244628906, "learning_rate": 3.2568306010928965e-06, "loss": 3.749, "step": 149 }, { "epoch": 0.4918032786885246, "grad_norm": 10.761190414428711, "learning_rate": 3.2786885245901638e-06, "loss": 3.5898, "step": 150 }, { "epoch": 0.49508196721311476, "grad_norm": 8.388467788696289, "learning_rate": 3.3005464480874324e-06, "loss": 3.5264, "step": 151 }, { "epoch": 0.49836065573770494, "grad_norm": 7.703427314758301, "learning_rate": 3.3224043715846997e-06, "loss": 3.4814, "step": 152 }, { "epoch": 0.5016393442622951, "grad_norm": 11.889690399169922, "learning_rate": 3.3442622950819674e-06, "loss": 3.4082, "step": 153 }, { "epoch": 0.5049180327868853, "grad_norm": 7.040947914123535, "learning_rate": 3.366120218579235e-06, "loss": 3.5186, "step": 154 }, { "epoch": 0.5081967213114754, "grad_norm": 10.378883361816406, "learning_rate": 3.387978142076503e-06, "loss": 3.5225, "step": 155 }, { "epoch": 0.5114754098360655, "grad_norm": 8.242631912231445, "learning_rate": 3.4098360655737706e-06, "loss": 3.5254, "step": 156 }, { "epoch": 0.5147540983606558, "grad_norm": 6.667419910430908, "learning_rate": 3.4316939890710388e-06, "loss": 3.5283, "step": 157 }, { "epoch": 0.5180327868852459, "grad_norm": 8.195940971374512, "learning_rate": 3.4535519125683065e-06, "loss": 3.5693, "step": 158 }, { "epoch": 0.521311475409836, "grad_norm": 8.00723648071289, "learning_rate": 3.4754098360655742e-06, "loss": 3.6777, "step": 159 }, { "epoch": 0.5245901639344263, "grad_norm": 6.8965582847595215, "learning_rate": 3.497267759562842e-06, "loss": 3.6045, "step": 160 }, { "epoch": 0.5278688524590164, "grad_norm": 8.07506275177002, "learning_rate": 3.5191256830601097e-06, "loss": 3.6104, "step": 161 }, { "epoch": 0.5311475409836065, "grad_norm": 8.429009437561035, "learning_rate": 3.540983606557377e-06, "loss": 3.3799, "step": 162 }, { "epoch": 0.5344262295081967, "grad_norm": 7.047390460968018, "learning_rate": 3.5628415300546447e-06, "loss": 3.6758, "step": 163 }, { "epoch": 0.5377049180327869, "grad_norm": 18.357637405395508, "learning_rate": 3.584699453551913e-06, "loss": 3.4287, "step": 164 }, { "epoch": 0.5409836065573771, "grad_norm": 9.751418113708496, "learning_rate": 3.6065573770491806e-06, "loss": 3.5439, "step": 165 }, { "epoch": 0.5442622950819672, "grad_norm": 8.382208824157715, "learning_rate": 3.6284153005464484e-06, "loss": 3.6348, "step": 166 }, { "epoch": 0.5475409836065573, "grad_norm": 7.606760025024414, "learning_rate": 3.650273224043716e-06, "loss": 3.2979, "step": 167 }, { "epoch": 0.5508196721311476, "grad_norm": 8.384288787841797, "learning_rate": 3.672131147540984e-06, "loss": 3.3413, "step": 168 }, { "epoch": 0.5540983606557377, "grad_norm": 7.686258316040039, "learning_rate": 3.6939890710382516e-06, "loss": 3.6045, "step": 169 }, { "epoch": 0.5573770491803278, "grad_norm": 10.256412506103516, "learning_rate": 3.7158469945355197e-06, "loss": 3.6699, "step": 170 }, { "epoch": 0.5606557377049181, "grad_norm": 11.0951566696167, "learning_rate": 3.7377049180327874e-06, "loss": 3.751, "step": 171 }, { "epoch": 0.5639344262295082, "grad_norm": 7.787613868713379, "learning_rate": 3.759562841530055e-06, "loss": 3.3125, "step": 172 }, { "epoch": 0.5672131147540984, "grad_norm": 8.864995956420898, "learning_rate": 3.7814207650273225e-06, "loss": 3.3545, "step": 173 }, { "epoch": 0.5704918032786885, "grad_norm": 6.565748691558838, "learning_rate": 3.8032786885245902e-06, "loss": 3.4951, "step": 174 }, { "epoch": 0.5737704918032787, "grad_norm": 7.662400722503662, "learning_rate": 3.825136612021858e-06, "loss": 3.3643, "step": 175 }, { "epoch": 0.5770491803278689, "grad_norm": 7.499402046203613, "learning_rate": 3.846994535519126e-06, "loss": 3.4883, "step": 176 }, { "epoch": 0.580327868852459, "grad_norm": 8.051335334777832, "learning_rate": 3.868852459016394e-06, "loss": 3.418, "step": 177 }, { "epoch": 0.5836065573770491, "grad_norm": 9.935887336730957, "learning_rate": 3.890710382513662e-06, "loss": 3.625, "step": 178 }, { "epoch": 0.5868852459016394, "grad_norm": 7.551872253417969, "learning_rate": 3.912568306010929e-06, "loss": 3.355, "step": 179 }, { "epoch": 0.5901639344262295, "grad_norm": 6.884659290313721, "learning_rate": 3.934426229508197e-06, "loss": 3.3643, "step": 180 }, { "epoch": 0.5934426229508196, "grad_norm": 7.74313497543335, "learning_rate": 3.956284153005464e-06, "loss": 3.2764, "step": 181 }, { "epoch": 0.5967213114754099, "grad_norm": 6.802099227905273, "learning_rate": 3.9781420765027325e-06, "loss": 3.3018, "step": 182 }, { "epoch": 0.6, "grad_norm": 11.664026260375977, "learning_rate": 4.000000000000001e-06, "loss": 3.5684, "step": 183 }, { "epoch": 0.6032786885245902, "grad_norm": 6.97332763671875, "learning_rate": 4.021857923497268e-06, "loss": 3.291, "step": 184 }, { "epoch": 0.6065573770491803, "grad_norm": 6.561279773712158, "learning_rate": 4.043715846994536e-06, "loss": 3.5293, "step": 185 }, { "epoch": 0.6098360655737705, "grad_norm": 9.276331901550293, "learning_rate": 4.0655737704918034e-06, "loss": 3.4834, "step": 186 }, { "epoch": 0.6131147540983607, "grad_norm": 8.892594337463379, "learning_rate": 4.087431693989072e-06, "loss": 3.5391, "step": 187 }, { "epoch": 0.6163934426229508, "grad_norm": 7.090384006500244, "learning_rate": 4.109289617486339e-06, "loss": 3.4316, "step": 188 }, { "epoch": 0.6196721311475409, "grad_norm": 6.770163059234619, "learning_rate": 4.131147540983607e-06, "loss": 3.6797, "step": 189 }, { "epoch": 0.6229508196721312, "grad_norm": 12.2550687789917, "learning_rate": 4.153005464480875e-06, "loss": 3.4492, "step": 190 }, { "epoch": 0.6262295081967213, "grad_norm": 7.5278167724609375, "learning_rate": 4.1748633879781425e-06, "loss": 3.3975, "step": 191 }, { "epoch": 0.6295081967213115, "grad_norm": 8.212299346923828, "learning_rate": 4.19672131147541e-06, "loss": 3.4336, "step": 192 }, { "epoch": 0.6327868852459017, "grad_norm": 8.192469596862793, "learning_rate": 4.218579234972678e-06, "loss": 3.3379, "step": 193 }, { "epoch": 0.6360655737704918, "grad_norm": 10.262571334838867, "learning_rate": 4.240437158469945e-06, "loss": 3.4297, "step": 194 }, { "epoch": 0.639344262295082, "grad_norm": 8.15339183807373, "learning_rate": 4.2622950819672135e-06, "loss": 3.7744, "step": 195 }, { "epoch": 0.6426229508196721, "grad_norm": 12.887022018432617, "learning_rate": 4.284153005464482e-06, "loss": 3.5986, "step": 196 }, { "epoch": 0.6459016393442623, "grad_norm": 7.0175862312316895, "learning_rate": 4.306010928961749e-06, "loss": 3.3555, "step": 197 }, { "epoch": 0.6491803278688525, "grad_norm": 6.068882465362549, "learning_rate": 4.327868852459017e-06, "loss": 3.5361, "step": 198 }, { "epoch": 0.6524590163934426, "grad_norm": 8.547717094421387, "learning_rate": 4.349726775956284e-06, "loss": 3.2627, "step": 199 }, { "epoch": 0.6557377049180327, "grad_norm": 9.295934677124023, "learning_rate": 4.371584699453552e-06, "loss": 3.1963, "step": 200 }, { "epoch": 0.659016393442623, "grad_norm": 5.54888916015625, "learning_rate": 4.39344262295082e-06, "loss": 3.4521, "step": 201 }, { "epoch": 0.6622950819672131, "grad_norm": 8.389147758483887, "learning_rate": 4.415300546448088e-06, "loss": 3.4521, "step": 202 }, { "epoch": 0.6655737704918033, "grad_norm": 6.4933061599731445, "learning_rate": 4.437158469945355e-06, "loss": 3.4155, "step": 203 }, { "epoch": 0.6688524590163935, "grad_norm": 8.244728088378906, "learning_rate": 4.4590163934426235e-06, "loss": 3.542, "step": 204 }, { "epoch": 0.6721311475409836, "grad_norm": 10.105704307556152, "learning_rate": 4.480874316939891e-06, "loss": 3.4521, "step": 205 }, { "epoch": 0.6754098360655738, "grad_norm": 7.666233062744141, "learning_rate": 4.502732240437159e-06, "loss": 3.248, "step": 206 }, { "epoch": 0.6786885245901639, "grad_norm": 6.876298904418945, "learning_rate": 4.524590163934426e-06, "loss": 3.7236, "step": 207 }, { "epoch": 0.6819672131147541, "grad_norm": 12.005895614624023, "learning_rate": 4.546448087431694e-06, "loss": 3.7188, "step": 208 }, { "epoch": 0.6852459016393443, "grad_norm": 8.14087200164795, "learning_rate": 4.5683060109289626e-06, "loss": 3.5078, "step": 209 }, { "epoch": 0.6885245901639344, "grad_norm": 10.200325965881348, "learning_rate": 4.59016393442623e-06, "loss": 3.46, "step": 210 }, { "epoch": 0.6918032786885245, "grad_norm": 7.9353532791137695, "learning_rate": 4.612021857923498e-06, "loss": 3.5557, "step": 211 }, { "epoch": 0.6950819672131148, "grad_norm": 10.279560089111328, "learning_rate": 4.633879781420765e-06, "loss": 3.3867, "step": 212 }, { "epoch": 0.6983606557377049, "grad_norm": 5.81951379776001, "learning_rate": 4.655737704918033e-06, "loss": 3.3271, "step": 213 }, { "epoch": 0.7016393442622951, "grad_norm": 7.533806800842285, "learning_rate": 4.677595628415301e-06, "loss": 3.6748, "step": 214 }, { "epoch": 0.7049180327868853, "grad_norm": 10.030545234680176, "learning_rate": 4.699453551912569e-06, "loss": 3.501, "step": 215 }, { "epoch": 0.7081967213114754, "grad_norm": 9.214943885803223, "learning_rate": 4.721311475409836e-06, "loss": 3.2871, "step": 216 }, { "epoch": 0.7114754098360656, "grad_norm": 9.306751251220703, "learning_rate": 4.7431693989071044e-06, "loss": 3.5693, "step": 217 }, { "epoch": 0.7147540983606557, "grad_norm": 9.144209861755371, "learning_rate": 4.765027322404372e-06, "loss": 3.2939, "step": 218 }, { "epoch": 0.7180327868852459, "grad_norm": 6.929702281951904, "learning_rate": 4.78688524590164e-06, "loss": 3.5498, "step": 219 }, { "epoch": 0.7213114754098361, "grad_norm": 10.659069061279297, "learning_rate": 4.808743169398907e-06, "loss": 3.458, "step": 220 }, { "epoch": 0.7245901639344262, "grad_norm": 9.598954200744629, "learning_rate": 4.830601092896175e-06, "loss": 3.4482, "step": 221 }, { "epoch": 0.7278688524590164, "grad_norm": 7.741015434265137, "learning_rate": 4.8524590163934435e-06, "loss": 3.3428, "step": 222 }, { "epoch": 0.7311475409836066, "grad_norm": 10.40294361114502, "learning_rate": 4.874316939890711e-06, "loss": 3.5283, "step": 223 }, { "epoch": 0.7344262295081967, "grad_norm": 24.485414505004883, "learning_rate": 4.896174863387978e-06, "loss": 3.5645, "step": 224 }, { "epoch": 0.7377049180327869, "grad_norm": 8.819168090820312, "learning_rate": 4.918032786885246e-06, "loss": 3.5498, "step": 225 }, { "epoch": 0.740983606557377, "grad_norm": 14.433055877685547, "learning_rate": 4.939890710382514e-06, "loss": 3.5625, "step": 226 }, { "epoch": 0.7442622950819672, "grad_norm": 10.948346138000488, "learning_rate": 4.961748633879782e-06, "loss": 3.3359, "step": 227 }, { "epoch": 0.7475409836065574, "grad_norm": 7.965230464935303, "learning_rate": 4.98360655737705e-06, "loss": 3.3975, "step": 228 }, { "epoch": 0.7508196721311475, "grad_norm": 7.791070461273193, "learning_rate": 5.005464480874317e-06, "loss": 3.5781, "step": 229 }, { "epoch": 0.7540983606557377, "grad_norm": 6.69915771484375, "learning_rate": 5.027322404371585e-06, "loss": 3.1768, "step": 230 }, { "epoch": 0.7573770491803279, "grad_norm": 10.093745231628418, "learning_rate": 5.0491803278688535e-06, "loss": 3.3506, "step": 231 }, { "epoch": 0.760655737704918, "grad_norm": 7.001753330230713, "learning_rate": 5.071038251366121e-06, "loss": 3.4736, "step": 232 }, { "epoch": 0.7639344262295082, "grad_norm": 6.036037921905518, "learning_rate": 5.092896174863389e-06, "loss": 3.3594, "step": 233 }, { "epoch": 0.7672131147540984, "grad_norm": 11.483301162719727, "learning_rate": 5.1147540983606555e-06, "loss": 3.4316, "step": 234 }, { "epoch": 0.7704918032786885, "grad_norm": 6.565312385559082, "learning_rate": 5.1366120218579245e-06, "loss": 3.1816, "step": 235 }, { "epoch": 0.7737704918032787, "grad_norm": 7.521879196166992, "learning_rate": 5.158469945355191e-06, "loss": 3.498, "step": 236 }, { "epoch": 0.7770491803278688, "grad_norm": 9.474894523620605, "learning_rate": 5.180327868852459e-06, "loss": 3.6445, "step": 237 }, { "epoch": 0.780327868852459, "grad_norm": 6.335714340209961, "learning_rate": 5.202185792349727e-06, "loss": 3.3389, "step": 238 }, { "epoch": 0.7836065573770492, "grad_norm": 8.930586814880371, "learning_rate": 5.2240437158469946e-06, "loss": 3.1377, "step": 239 }, { "epoch": 0.7868852459016393, "grad_norm": 11.783924102783203, "learning_rate": 5.245901639344263e-06, "loss": 3.3125, "step": 240 }, { "epoch": 0.7901639344262295, "grad_norm": 7.7034807205200195, "learning_rate": 5.26775956284153e-06, "loss": 3.2139, "step": 241 }, { "epoch": 0.7934426229508197, "grad_norm": 11.978560447692871, "learning_rate": 5.289617486338798e-06, "loss": 3.2949, "step": 242 }, { "epoch": 0.7967213114754098, "grad_norm": 24.181058883666992, "learning_rate": 5.3114754098360655e-06, "loss": 3.4648, "step": 243 }, { "epoch": 0.8, "grad_norm": 8.870766639709473, "learning_rate": 5.333333333333334e-06, "loss": 3.1289, "step": 244 }, { "epoch": 0.8032786885245902, "grad_norm": 7.487761497497559, "learning_rate": 5.355191256830602e-06, "loss": 3.4922, "step": 245 }, { "epoch": 0.8065573770491803, "grad_norm": 7.746020793914795, "learning_rate": 5.377049180327869e-06, "loss": 3.3721, "step": 246 }, { "epoch": 0.8098360655737705, "grad_norm": 8.975412368774414, "learning_rate": 5.398907103825137e-06, "loss": 3.4512, "step": 247 }, { "epoch": 0.8131147540983606, "grad_norm": 11.032635688781738, "learning_rate": 5.420765027322405e-06, "loss": 3.1245, "step": 248 }, { "epoch": 0.8163934426229508, "grad_norm": 12.165133476257324, "learning_rate": 5.442622950819673e-06, "loss": 3.207, "step": 249 }, { "epoch": 0.819672131147541, "grad_norm": 7.29417610168457, "learning_rate": 5.464480874316941e-06, "loss": 3.292, "step": 250 }, { "epoch": 0.8229508196721311, "grad_norm": 35.78287124633789, "learning_rate": 5.486338797814208e-06, "loss": 3.2295, "step": 251 }, { "epoch": 0.8262295081967214, "grad_norm": 9.265624046325684, "learning_rate": 5.508196721311476e-06, "loss": 3.1992, "step": 252 }, { "epoch": 0.8295081967213115, "grad_norm": 6.449073791503906, "learning_rate": 5.530054644808744e-06, "loss": 3.4014, "step": 253 }, { "epoch": 0.8327868852459016, "grad_norm": 8.145757675170898, "learning_rate": 5.551912568306012e-06, "loss": 3.25, "step": 254 }, { "epoch": 0.8360655737704918, "grad_norm": 8.555072784423828, "learning_rate": 5.573770491803278e-06, "loss": 3.8096, "step": 255 }, { "epoch": 0.839344262295082, "grad_norm": 9.657212257385254, "learning_rate": 5.595628415300547e-06, "loss": 3.3896, "step": 256 }, { "epoch": 0.8426229508196721, "grad_norm": 7.222768783569336, "learning_rate": 5.6174863387978155e-06, "loss": 3.4033, "step": 257 }, { "epoch": 0.8459016393442623, "grad_norm": 5.727254390716553, "learning_rate": 5.639344262295082e-06, "loss": 3.4346, "step": 258 }, { "epoch": 0.8491803278688524, "grad_norm": 6.653439044952393, "learning_rate": 5.66120218579235e-06, "loss": 3.1211, "step": 259 }, { "epoch": 0.8524590163934426, "grad_norm": 10.25393009185791, "learning_rate": 5.683060109289617e-06, "loss": 3.2627, "step": 260 }, { "epoch": 0.8557377049180328, "grad_norm": 9.719036102294922, "learning_rate": 5.7049180327868855e-06, "loss": 3.4023, "step": 261 }, { "epoch": 0.8590163934426229, "grad_norm": 10.332015037536621, "learning_rate": 5.726775956284153e-06, "loss": 3.5049, "step": 262 }, { "epoch": 0.8622950819672132, "grad_norm": 12.109919548034668, "learning_rate": 5.748633879781421e-06, "loss": 3.4756, "step": 263 }, { "epoch": 0.8655737704918033, "grad_norm": 11.7739839553833, "learning_rate": 5.770491803278689e-06, "loss": 3.1348, "step": 264 }, { "epoch": 0.8688524590163934, "grad_norm": 7.098759651184082, "learning_rate": 5.7923497267759565e-06, "loss": 3.3403, "step": 265 }, { "epoch": 0.8721311475409836, "grad_norm": 6.860527038574219, "learning_rate": 5.814207650273225e-06, "loss": 3.125, "step": 266 }, { "epoch": 0.8754098360655738, "grad_norm": 6.676937580108643, "learning_rate": 5.836065573770492e-06, "loss": 3.5234, "step": 267 }, { "epoch": 0.8786885245901639, "grad_norm": 7.9877424240112305, "learning_rate": 5.85792349726776e-06, "loss": 3.2324, "step": 268 }, { "epoch": 0.8819672131147541, "grad_norm": 13.659160614013672, "learning_rate": 5.879781420765028e-06, "loss": 3.3105, "step": 269 }, { "epoch": 0.8852459016393442, "grad_norm": 6.34415864944458, "learning_rate": 5.9016393442622956e-06, "loss": 3.166, "step": 270 }, { "epoch": 0.8885245901639345, "grad_norm": 16.478591918945312, "learning_rate": 5.923497267759564e-06, "loss": 3.2656, "step": 271 }, { "epoch": 0.8918032786885246, "grad_norm": 8.617118835449219, "learning_rate": 5.945355191256831e-06, "loss": 3.2246, "step": 272 }, { "epoch": 0.8950819672131147, "grad_norm": 11.704219818115234, "learning_rate": 5.967213114754099e-06, "loss": 3.6201, "step": 273 }, { "epoch": 0.898360655737705, "grad_norm": 8.965121269226074, "learning_rate": 5.9890710382513665e-06, "loss": 3.1406, "step": 274 }, { "epoch": 0.9016393442622951, "grad_norm": 8.009188652038574, "learning_rate": 6.010928961748635e-06, "loss": 3.3135, "step": 275 }, { "epoch": 0.9049180327868852, "grad_norm": 5.9181227684021, "learning_rate": 6.032786885245903e-06, "loss": 3.2881, "step": 276 }, { "epoch": 0.9081967213114754, "grad_norm": 11.03315258026123, "learning_rate": 6.05464480874317e-06, "loss": 3.4443, "step": 277 }, { "epoch": 0.9114754098360656, "grad_norm": 11.626940727233887, "learning_rate": 6.076502732240438e-06, "loss": 3.1846, "step": 278 }, { "epoch": 0.9147540983606557, "grad_norm": 6.574389934539795, "learning_rate": 6.098360655737705e-06, "loss": 3.4688, "step": 279 }, { "epoch": 0.9180327868852459, "grad_norm": 9.197421073913574, "learning_rate": 6.120218579234973e-06, "loss": 3.4287, "step": 280 }, { "epoch": 0.921311475409836, "grad_norm": 9.353991508483887, "learning_rate": 6.14207650273224e-06, "loss": 3.1489, "step": 281 }, { "epoch": 0.9245901639344263, "grad_norm": 7.688765048980713, "learning_rate": 6.163934426229508e-06, "loss": 3.2754, "step": 282 }, { "epoch": 0.9278688524590164, "grad_norm": 8.804039001464844, "learning_rate": 6.1857923497267765e-06, "loss": 3.3071, "step": 283 }, { "epoch": 0.9311475409836065, "grad_norm": 8.340035438537598, "learning_rate": 6.207650273224044e-06, "loss": 3.4697, "step": 284 }, { "epoch": 0.9344262295081968, "grad_norm": 7.45577335357666, "learning_rate": 6.229508196721312e-06, "loss": 3.2861, "step": 285 }, { "epoch": 0.9377049180327869, "grad_norm": 8.196723937988281, "learning_rate": 6.251366120218579e-06, "loss": 3.3301, "step": 286 }, { "epoch": 0.940983606557377, "grad_norm": 11.025135040283203, "learning_rate": 6.2732240437158475e-06, "loss": 3.25, "step": 287 }, { "epoch": 0.9442622950819672, "grad_norm": 7.389035224914551, "learning_rate": 6.295081967213116e-06, "loss": 3.2598, "step": 288 }, { "epoch": 0.9475409836065574, "grad_norm": 7.503753662109375, "learning_rate": 6.316939890710383e-06, "loss": 3.165, "step": 289 }, { "epoch": 0.9508196721311475, "grad_norm": 7.365726470947266, "learning_rate": 6.338797814207651e-06, "loss": 2.9854, "step": 290 }, { "epoch": 0.9540983606557377, "grad_norm": 10.078577995300293, "learning_rate": 6.360655737704918e-06, "loss": 3.4434, "step": 291 }, { "epoch": 0.9573770491803278, "grad_norm": 5.570989608764648, "learning_rate": 6.3825136612021865e-06, "loss": 2.8735, "step": 292 }, { "epoch": 0.9606557377049181, "grad_norm": 6.477457046508789, "learning_rate": 6.404371584699454e-06, "loss": 3.123, "step": 293 }, { "epoch": 0.9639344262295082, "grad_norm": 7.717476844787598, "learning_rate": 6.426229508196722e-06, "loss": 3.2705, "step": 294 }, { "epoch": 0.9672131147540983, "grad_norm": 10.636682510375977, "learning_rate": 6.44808743169399e-06, "loss": 3.5029, "step": 295 }, { "epoch": 0.9704918032786886, "grad_norm": 24.9122257232666, "learning_rate": 6.4699453551912575e-06, "loss": 3.3799, "step": 296 }, { "epoch": 0.9737704918032787, "grad_norm": 9.225789070129395, "learning_rate": 6.491803278688526e-06, "loss": 3.0566, "step": 297 }, { "epoch": 0.9770491803278688, "grad_norm": 5.821405410766602, "learning_rate": 6.513661202185793e-06, "loss": 3.2119, "step": 298 }, { "epoch": 0.980327868852459, "grad_norm": 9.270519256591797, "learning_rate": 6.535519125683061e-06, "loss": 3.2188, "step": 299 }, { "epoch": 0.9836065573770492, "grad_norm": 7.95923376083374, "learning_rate": 6.5573770491803276e-06, "loss": 3.3916, "step": 300 }, { "epoch": 0.9868852459016394, "grad_norm": 11.024293899536133, "learning_rate": 6.5792349726775966e-06, "loss": 3.3379, "step": 301 }, { "epoch": 0.9901639344262295, "grad_norm": 9.279967308044434, "learning_rate": 6.601092896174865e-06, "loss": 3.4443, "step": 302 }, { "epoch": 0.9934426229508196, "grad_norm": 8.785676956176758, "learning_rate": 6.622950819672131e-06, "loss": 3.1299, "step": 303 }, { "epoch": 0.9967213114754099, "grad_norm": 8.657179832458496, "learning_rate": 6.644808743169399e-06, "loss": 3.1133, "step": 304 }, { "epoch": 1.0, "grad_norm": 11.182159423828125, "learning_rate": 6.666666666666667e-06, "loss": 3.2266, "step": 305 }, { "epoch": 1.0032786885245901, "grad_norm": 9.394021034240723, "learning_rate": 6.688524590163935e-06, "loss": 3.3086, "step": 306 }, { "epoch": 1.0065573770491802, "grad_norm": 6.639245510101318, "learning_rate": 6.710382513661202e-06, "loss": 3.1182, "step": 307 }, { "epoch": 1.0098360655737706, "grad_norm": 6.839327335357666, "learning_rate": 6.73224043715847e-06, "loss": 3.1836, "step": 308 }, { "epoch": 1.0131147540983607, "grad_norm": 11.921886444091797, "learning_rate": 6.7540983606557384e-06, "loss": 3.1108, "step": 309 }, { "epoch": 1.0163934426229508, "grad_norm": 8.358570098876953, "learning_rate": 6.775956284153006e-06, "loss": 3.2695, "step": 310 }, { "epoch": 1.019672131147541, "grad_norm": 11.555234909057617, "learning_rate": 6.797814207650274e-06, "loss": 2.8735, "step": 311 }, { "epoch": 1.022950819672131, "grad_norm": 15.685929298400879, "learning_rate": 6.819672131147541e-06, "loss": 3.2891, "step": 312 }, { "epoch": 1.0262295081967212, "grad_norm": 12.459662437438965, "learning_rate": 6.841530054644809e-06, "loss": 3.2402, "step": 313 }, { "epoch": 1.0295081967213116, "grad_norm": 8.12995433807373, "learning_rate": 6.8633879781420775e-06, "loss": 3.4824, "step": 314 }, { "epoch": 1.0327868852459017, "grad_norm": 10.586140632629395, "learning_rate": 6.885245901639345e-06, "loss": 3.3428, "step": 315 }, { "epoch": 1.0360655737704918, "grad_norm": 6.352329254150391, "learning_rate": 6.907103825136613e-06, "loss": 3.2871, "step": 316 }, { "epoch": 1.039344262295082, "grad_norm": 8.949673652648926, "learning_rate": 6.92896174863388e-06, "loss": 3.3848, "step": 317 }, { "epoch": 1.042622950819672, "grad_norm": 6.245799541473389, "learning_rate": 6.9508196721311484e-06, "loss": 3.4844, "step": 318 }, { "epoch": 1.0459016393442624, "grad_norm": 17.472169876098633, "learning_rate": 6.972677595628416e-06, "loss": 2.9766, "step": 319 }, { "epoch": 1.0491803278688525, "grad_norm": 7.396842956542969, "learning_rate": 6.994535519125684e-06, "loss": 3.3379, "step": 320 }, { "epoch": 1.0524590163934426, "grad_norm": 6.515445232391357, "learning_rate": 7.016393442622952e-06, "loss": 3.2842, "step": 321 }, { "epoch": 1.0557377049180328, "grad_norm": 6.610074043273926, "learning_rate": 7.038251366120219e-06, "loss": 3.2354, "step": 322 }, { "epoch": 1.0590163934426229, "grad_norm": 8.78217601776123, "learning_rate": 7.0601092896174875e-06, "loss": 3.4297, "step": 323 }, { "epoch": 1.0622950819672132, "grad_norm": 7.796133518218994, "learning_rate": 7.081967213114754e-06, "loss": 3.2651, "step": 324 }, { "epoch": 1.0655737704918034, "grad_norm": 9.725189208984375, "learning_rate": 7.103825136612022e-06, "loss": 3.0488, "step": 325 }, { "epoch": 1.0688524590163935, "grad_norm": 10.851350784301758, "learning_rate": 7.1256830601092895e-06, "loss": 3.0117, "step": 326 }, { "epoch": 1.0721311475409836, "grad_norm": 5.985225200653076, "learning_rate": 7.147540983606558e-06, "loss": 2.9316, "step": 327 }, { "epoch": 1.0754098360655737, "grad_norm": 7.061980724334717, "learning_rate": 7.169398907103826e-06, "loss": 3.167, "step": 328 }, { "epoch": 1.0786885245901638, "grad_norm": 6.416960716247559, "learning_rate": 7.191256830601093e-06, "loss": 3.0181, "step": 329 }, { "epoch": 1.0819672131147542, "grad_norm": 10.00206470489502, "learning_rate": 7.213114754098361e-06, "loss": 3.1763, "step": 330 }, { "epoch": 1.0852459016393443, "grad_norm": 11.82321834564209, "learning_rate": 7.2349726775956286e-06, "loss": 3.0674, "step": 331 }, { "epoch": 1.0885245901639344, "grad_norm": 5.795101165771484, "learning_rate": 7.256830601092897e-06, "loss": 3.1562, "step": 332 }, { "epoch": 1.0918032786885246, "grad_norm": 10.577892303466797, "learning_rate": 7.278688524590165e-06, "loss": 3.1675, "step": 333 }, { "epoch": 1.0950819672131147, "grad_norm": 8.972949028015137, "learning_rate": 7.300546448087432e-06, "loss": 3.166, "step": 334 }, { "epoch": 1.098360655737705, "grad_norm": 6.5378522872924805, "learning_rate": 7.3224043715847e-06, "loss": 3.1924, "step": 335 }, { "epoch": 1.1016393442622952, "grad_norm": 6.911514759063721, "learning_rate": 7.344262295081968e-06, "loss": 3.2998, "step": 336 }, { "epoch": 1.1049180327868853, "grad_norm": 6.814826488494873, "learning_rate": 7.366120218579236e-06, "loss": 3.0957, "step": 337 }, { "epoch": 1.1081967213114754, "grad_norm": 11.536693572998047, "learning_rate": 7.387978142076503e-06, "loss": 3.0269, "step": 338 }, { "epoch": 1.1114754098360655, "grad_norm": 5.357202053070068, "learning_rate": 7.409836065573771e-06, "loss": 3.0913, "step": 339 }, { "epoch": 1.1147540983606556, "grad_norm": 9.54206371307373, "learning_rate": 7.4316939890710394e-06, "loss": 3.3174, "step": 340 }, { "epoch": 1.118032786885246, "grad_norm": 11.30484390258789, "learning_rate": 7.453551912568307e-06, "loss": 3.2676, "step": 341 }, { "epoch": 1.1213114754098361, "grad_norm": 7.228575706481934, "learning_rate": 7.475409836065575e-06, "loss": 3.0684, "step": 342 }, { "epoch": 1.1245901639344262, "grad_norm": 5.543628692626953, "learning_rate": 7.497267759562842e-06, "loss": 3.0215, "step": 343 }, { "epoch": 1.1278688524590164, "grad_norm": 7.670444488525391, "learning_rate": 7.51912568306011e-06, "loss": 3.4131, "step": 344 }, { "epoch": 1.1311475409836065, "grad_norm": 7.433183670043945, "learning_rate": 7.540983606557377e-06, "loss": 3.1934, "step": 345 }, { "epoch": 1.1344262295081968, "grad_norm": 13.581217765808105, "learning_rate": 7.562841530054645e-06, "loss": 3.1982, "step": 346 }, { "epoch": 1.137704918032787, "grad_norm": 9.924436569213867, "learning_rate": 7.584699453551914e-06, "loss": 3.2119, "step": 347 }, { "epoch": 1.140983606557377, "grad_norm": 4.624984264373779, "learning_rate": 7.6065573770491804e-06, "loss": 3.2285, "step": 348 }, { "epoch": 1.1442622950819672, "grad_norm": 8.959603309631348, "learning_rate": 7.628415300546449e-06, "loss": 3.2578, "step": 349 }, { "epoch": 1.1475409836065573, "grad_norm": 5.598023891448975, "learning_rate": 7.650273224043716e-06, "loss": 3.2334, "step": 350 }, { "epoch": 1.1508196721311474, "grad_norm": 7.592049598693848, "learning_rate": 7.672131147540985e-06, "loss": 3.0625, "step": 351 }, { "epoch": 1.1540983606557378, "grad_norm": 11.596220016479492, "learning_rate": 7.693989071038252e-06, "loss": 2.9795, "step": 352 }, { "epoch": 1.157377049180328, "grad_norm": 7.686442852020264, "learning_rate": 7.71584699453552e-06, "loss": 3.0723, "step": 353 }, { "epoch": 1.160655737704918, "grad_norm": 6.8060150146484375, "learning_rate": 7.737704918032789e-06, "loss": 2.9619, "step": 354 }, { "epoch": 1.1639344262295082, "grad_norm": 12.29932689666748, "learning_rate": 7.759562841530056e-06, "loss": 3.3545, "step": 355 }, { "epoch": 1.1672131147540983, "grad_norm": 14.609642028808594, "learning_rate": 7.781420765027323e-06, "loss": 3.1035, "step": 356 }, { "epoch": 1.1704918032786886, "grad_norm": 11.507431983947754, "learning_rate": 7.80327868852459e-06, "loss": 3.0879, "step": 357 }, { "epoch": 1.1737704918032787, "grad_norm": 9.29450511932373, "learning_rate": 7.825136612021858e-06, "loss": 3.1699, "step": 358 }, { "epoch": 1.1770491803278689, "grad_norm": 8.678855895996094, "learning_rate": 7.846994535519127e-06, "loss": 3.1182, "step": 359 }, { "epoch": 1.180327868852459, "grad_norm": 7.692296981811523, "learning_rate": 7.868852459016394e-06, "loss": 3.166, "step": 360 }, { "epoch": 1.1836065573770491, "grad_norm": 6.612729549407959, "learning_rate": 7.890710382513661e-06, "loss": 3.0278, "step": 361 }, { "epoch": 1.1868852459016392, "grad_norm": 8.512601852416992, "learning_rate": 7.912568306010929e-06, "loss": 3.1328, "step": 362 }, { "epoch": 1.1901639344262296, "grad_norm": 7.257461071014404, "learning_rate": 7.934426229508198e-06, "loss": 2.9717, "step": 363 }, { "epoch": 1.1934426229508197, "grad_norm": 9.009414672851562, "learning_rate": 7.956284153005465e-06, "loss": 3.0, "step": 364 }, { "epoch": 1.1967213114754098, "grad_norm": 5.401551723480225, "learning_rate": 7.978142076502732e-06, "loss": 3.2236, "step": 365 }, { "epoch": 1.2, "grad_norm": 8.119751930236816, "learning_rate": 8.000000000000001e-06, "loss": 2.9023, "step": 366 }, { "epoch": 1.20327868852459, "grad_norm": 8.28824234008789, "learning_rate": 8.021857923497269e-06, "loss": 2.9844, "step": 367 }, { "epoch": 1.2065573770491804, "grad_norm": 6.021730422973633, "learning_rate": 8.043715846994536e-06, "loss": 3.1621, "step": 368 }, { "epoch": 1.2098360655737705, "grad_norm": 8.922839164733887, "learning_rate": 8.065573770491803e-06, "loss": 3.0986, "step": 369 }, { "epoch": 1.2131147540983607, "grad_norm": 7.427271842956543, "learning_rate": 8.087431693989072e-06, "loss": 3.1064, "step": 370 }, { "epoch": 1.2163934426229508, "grad_norm": 22.57861328125, "learning_rate": 8.10928961748634e-06, "loss": 3.1211, "step": 371 }, { "epoch": 1.219672131147541, "grad_norm": 7.37894868850708, "learning_rate": 8.131147540983607e-06, "loss": 2.9756, "step": 372 }, { "epoch": 1.222950819672131, "grad_norm": 5.945425510406494, "learning_rate": 8.153005464480876e-06, "loss": 3.3174, "step": 373 }, { "epoch": 1.2262295081967214, "grad_norm": 6.394283771514893, "learning_rate": 8.174863387978143e-06, "loss": 2.9531, "step": 374 }, { "epoch": 1.2295081967213115, "grad_norm": 6.735688209533691, "learning_rate": 8.19672131147541e-06, "loss": 3.0244, "step": 375 }, { "epoch": 1.2327868852459016, "grad_norm": 8.274998664855957, "learning_rate": 8.218579234972678e-06, "loss": 3.1201, "step": 376 }, { "epoch": 1.2360655737704918, "grad_norm": 9.880097389221191, "learning_rate": 8.240437158469947e-06, "loss": 3.001, "step": 377 }, { "epoch": 1.2393442622950819, "grad_norm": 8.401915550231934, "learning_rate": 8.262295081967214e-06, "loss": 3.2012, "step": 378 }, { "epoch": 1.2426229508196722, "grad_norm": 9.479646682739258, "learning_rate": 8.284153005464481e-06, "loss": 3.084, "step": 379 }, { "epoch": 1.2459016393442623, "grad_norm": 11.817873001098633, "learning_rate": 8.30601092896175e-06, "loss": 2.8477, "step": 380 }, { "epoch": 1.2491803278688525, "grad_norm": 9.87424373626709, "learning_rate": 8.327868852459016e-06, "loss": 2.7725, "step": 381 }, { "epoch": 1.2524590163934426, "grad_norm": 6.135134220123291, "learning_rate": 8.349726775956285e-06, "loss": 3.02, "step": 382 }, { "epoch": 1.2557377049180327, "grad_norm": 7.987420082092285, "learning_rate": 8.371584699453552e-06, "loss": 2.7026, "step": 383 }, { "epoch": 1.2590163934426228, "grad_norm": 10.162532806396484, "learning_rate": 8.39344262295082e-06, "loss": 3.0566, "step": 384 }, { "epoch": 1.2622950819672132, "grad_norm": 9.407600402832031, "learning_rate": 8.415300546448089e-06, "loss": 3.0591, "step": 385 }, { "epoch": 1.2655737704918033, "grad_norm": 6.939650535583496, "learning_rate": 8.437158469945356e-06, "loss": 3.1309, "step": 386 }, { "epoch": 1.2688524590163934, "grad_norm": 10.403114318847656, "learning_rate": 8.459016393442623e-06, "loss": 2.8711, "step": 387 }, { "epoch": 1.2721311475409836, "grad_norm": 10.360599517822266, "learning_rate": 8.48087431693989e-06, "loss": 3.4092, "step": 388 }, { "epoch": 1.275409836065574, "grad_norm": 12.41563606262207, "learning_rate": 8.50273224043716e-06, "loss": 3.2217, "step": 389 }, { "epoch": 1.278688524590164, "grad_norm": 8.948834419250488, "learning_rate": 8.524590163934427e-06, "loss": 3.2412, "step": 390 }, { "epoch": 1.2819672131147541, "grad_norm": 10.452371597290039, "learning_rate": 8.546448087431694e-06, "loss": 3.0117, "step": 391 }, { "epoch": 1.2852459016393443, "grad_norm": 7.677484035491943, "learning_rate": 8.568306010928963e-06, "loss": 3.3047, "step": 392 }, { "epoch": 1.2885245901639344, "grad_norm": 6.682272911071777, "learning_rate": 8.59016393442623e-06, "loss": 3.0508, "step": 393 }, { "epoch": 1.2918032786885245, "grad_norm": 9.547876358032227, "learning_rate": 8.612021857923498e-06, "loss": 3.2295, "step": 394 }, { "epoch": 1.2950819672131146, "grad_norm": 6.019890785217285, "learning_rate": 8.633879781420765e-06, "loss": 3.1709, "step": 395 }, { "epoch": 1.298360655737705, "grad_norm": 6.680045127868652, "learning_rate": 8.655737704918034e-06, "loss": 3.1201, "step": 396 }, { "epoch": 1.301639344262295, "grad_norm": 8.571054458618164, "learning_rate": 8.677595628415301e-06, "loss": 2.9189, "step": 397 }, { "epoch": 1.3049180327868852, "grad_norm": 29.20925521850586, "learning_rate": 8.699453551912569e-06, "loss": 3.0732, "step": 398 }, { "epoch": 1.3081967213114754, "grad_norm": 8.341346740722656, "learning_rate": 8.721311475409838e-06, "loss": 2.8042, "step": 399 }, { "epoch": 1.3114754098360657, "grad_norm": 9.555011749267578, "learning_rate": 8.743169398907103e-06, "loss": 3.0166, "step": 400 }, { "epoch": 1.3147540983606558, "grad_norm": 9.26165771484375, "learning_rate": 8.765027322404372e-06, "loss": 3.084, "step": 401 }, { "epoch": 1.318032786885246, "grad_norm": 9.475183486938477, "learning_rate": 8.78688524590164e-06, "loss": 3.0928, "step": 402 }, { "epoch": 1.321311475409836, "grad_norm": 7.897613525390625, "learning_rate": 8.808743169398907e-06, "loss": 3.0713, "step": 403 }, { "epoch": 1.3245901639344262, "grad_norm": 7.664556980133057, "learning_rate": 8.830601092896176e-06, "loss": 2.835, "step": 404 }, { "epoch": 1.3278688524590163, "grad_norm": 7.210049152374268, "learning_rate": 8.852459016393443e-06, "loss": 2.8188, "step": 405 }, { "epoch": 1.3311475409836064, "grad_norm": 18.05609893798828, "learning_rate": 8.87431693989071e-06, "loss": 3.123, "step": 406 }, { "epoch": 1.3344262295081968, "grad_norm": 5.998955726623535, "learning_rate": 8.896174863387978e-06, "loss": 2.9268, "step": 407 }, { "epoch": 1.337704918032787, "grad_norm": 7.503890514373779, "learning_rate": 8.918032786885247e-06, "loss": 2.8594, "step": 408 }, { "epoch": 1.340983606557377, "grad_norm": 8.424395561218262, "learning_rate": 8.939890710382514e-06, "loss": 3.0791, "step": 409 }, { "epoch": 1.3442622950819672, "grad_norm": 9.430055618286133, "learning_rate": 8.961748633879782e-06, "loss": 3.0381, "step": 410 }, { "epoch": 1.3475409836065575, "grad_norm": 7.939853191375732, "learning_rate": 8.98360655737705e-06, "loss": 3.1455, "step": 411 }, { "epoch": 1.3508196721311476, "grad_norm": 7.6745100021362305, "learning_rate": 9.005464480874318e-06, "loss": 3.4521, "step": 412 }, { "epoch": 1.3540983606557377, "grad_norm": 9.782946586608887, "learning_rate": 9.027322404371585e-06, "loss": 2.9307, "step": 413 }, { "epoch": 1.3573770491803279, "grad_norm": 9.667211532592773, "learning_rate": 9.049180327868853e-06, "loss": 3.0957, "step": 414 }, { "epoch": 1.360655737704918, "grad_norm": 8.198561668395996, "learning_rate": 9.071038251366122e-06, "loss": 3.084, "step": 415 }, { "epoch": 1.3639344262295081, "grad_norm": 7.897920608520508, "learning_rate": 9.092896174863389e-06, "loss": 3.1162, "step": 416 }, { "epoch": 1.3672131147540982, "grad_norm": 7.521897792816162, "learning_rate": 9.114754098360656e-06, "loss": 3.2783, "step": 417 }, { "epoch": 1.3704918032786886, "grad_norm": 6.282433986663818, "learning_rate": 9.136612021857925e-06, "loss": 2.8662, "step": 418 }, { "epoch": 1.3737704918032787, "grad_norm": 10.895795822143555, "learning_rate": 9.158469945355192e-06, "loss": 3.1406, "step": 419 }, { "epoch": 1.3770491803278688, "grad_norm": 8.286861419677734, "learning_rate": 9.18032786885246e-06, "loss": 3.0498, "step": 420 }, { "epoch": 1.380327868852459, "grad_norm": 20.651430130004883, "learning_rate": 9.202185792349727e-06, "loss": 2.7568, "step": 421 }, { "epoch": 1.3836065573770493, "grad_norm": 7.5781354904174805, "learning_rate": 9.224043715846996e-06, "loss": 3.0059, "step": 422 }, { "epoch": 1.3868852459016394, "grad_norm": 9.9003267288208, "learning_rate": 9.245901639344263e-06, "loss": 2.8643, "step": 423 }, { "epoch": 1.3901639344262295, "grad_norm": 8.531061172485352, "learning_rate": 9.26775956284153e-06, "loss": 2.9521, "step": 424 }, { "epoch": 1.3934426229508197, "grad_norm": 6.726922035217285, "learning_rate": 9.2896174863388e-06, "loss": 2.8057, "step": 425 }, { "epoch": 1.3967213114754098, "grad_norm": 8.559724807739258, "learning_rate": 9.311475409836065e-06, "loss": 2.9639, "step": 426 }, { "epoch": 1.4, "grad_norm": 10.791718482971191, "learning_rate": 9.333333333333334e-06, "loss": 3.0557, "step": 427 }, { "epoch": 1.40327868852459, "grad_norm": 16.835142135620117, "learning_rate": 9.355191256830602e-06, "loss": 2.873, "step": 428 }, { "epoch": 1.4065573770491804, "grad_norm": 8.243974685668945, "learning_rate": 9.377049180327869e-06, "loss": 3.1074, "step": 429 }, { "epoch": 1.4098360655737705, "grad_norm": 7.370208740234375, "learning_rate": 9.398907103825138e-06, "loss": 2.9355, "step": 430 }, { "epoch": 1.4131147540983606, "grad_norm": 7.249535083770752, "learning_rate": 9.420765027322405e-06, "loss": 3.0771, "step": 431 }, { "epoch": 1.4163934426229507, "grad_norm": 6.287960529327393, "learning_rate": 9.442622950819673e-06, "loss": 3.1211, "step": 432 }, { "epoch": 1.419672131147541, "grad_norm": 11.157362937927246, "learning_rate": 9.46448087431694e-06, "loss": 2.8809, "step": 433 }, { "epoch": 1.4229508196721312, "grad_norm": 8.140580177307129, "learning_rate": 9.486338797814209e-06, "loss": 2.9658, "step": 434 }, { "epoch": 1.4262295081967213, "grad_norm": 9.370312690734863, "learning_rate": 9.508196721311476e-06, "loss": 2.8418, "step": 435 }, { "epoch": 1.4295081967213115, "grad_norm": 11.307353973388672, "learning_rate": 9.530054644808743e-06, "loss": 2.9375, "step": 436 }, { "epoch": 1.4327868852459016, "grad_norm": 7.0421624183654785, "learning_rate": 9.551912568306013e-06, "loss": 3.2549, "step": 437 }, { "epoch": 1.4360655737704917, "grad_norm": 12.70803165435791, "learning_rate": 9.57377049180328e-06, "loss": 2.7734, "step": 438 }, { "epoch": 1.4393442622950818, "grad_norm": 6.85513973236084, "learning_rate": 9.595628415300547e-06, "loss": 2.7432, "step": 439 }, { "epoch": 1.4426229508196722, "grad_norm": 7.544546127319336, "learning_rate": 9.617486338797814e-06, "loss": 2.959, "step": 440 }, { "epoch": 1.4459016393442623, "grad_norm": 13.369409561157227, "learning_rate": 9.639344262295083e-06, "loss": 2.9653, "step": 441 }, { "epoch": 1.4491803278688524, "grad_norm": 7.817831516265869, "learning_rate": 9.66120218579235e-06, "loss": 2.918, "step": 442 }, { "epoch": 1.4524590163934425, "grad_norm": 18.714441299438477, "learning_rate": 9.683060109289618e-06, "loss": 3.1748, "step": 443 }, { "epoch": 1.455737704918033, "grad_norm": 32.041507720947266, "learning_rate": 9.704918032786887e-06, "loss": 2.9209, "step": 444 }, { "epoch": 1.459016393442623, "grad_norm": 9.466680526733398, "learning_rate": 9.726775956284153e-06, "loss": 2.9902, "step": 445 }, { "epoch": 1.4622950819672131, "grad_norm": 7.889480113983154, "learning_rate": 9.748633879781422e-06, "loss": 2.6914, "step": 446 }, { "epoch": 1.4655737704918033, "grad_norm": 10.376809120178223, "learning_rate": 9.770491803278689e-06, "loss": 3.0156, "step": 447 }, { "epoch": 1.4688524590163934, "grad_norm": 10.119595527648926, "learning_rate": 9.792349726775956e-06, "loss": 3.3223, "step": 448 }, { "epoch": 1.4721311475409835, "grad_norm": 7.655210018157959, "learning_rate": 9.814207650273225e-06, "loss": 3.1523, "step": 449 }, { "epoch": 1.4754098360655736, "grad_norm": 8.984585762023926, "learning_rate": 9.836065573770493e-06, "loss": 3.0469, "step": 450 }, { "epoch": 1.478688524590164, "grad_norm": 8.629581451416016, "learning_rate": 9.85792349726776e-06, "loss": 2.9053, "step": 451 }, { "epoch": 1.481967213114754, "grad_norm": 7.91368293762207, "learning_rate": 9.879781420765027e-06, "loss": 2.9385, "step": 452 }, { "epoch": 1.4852459016393442, "grad_norm": 8.454898834228516, "learning_rate": 9.901639344262296e-06, "loss": 2.8926, "step": 453 }, { "epoch": 1.4885245901639343, "grad_norm": 8.395441055297852, "learning_rate": 9.923497267759564e-06, "loss": 2.9287, "step": 454 }, { "epoch": 1.4918032786885247, "grad_norm": 8.188992500305176, "learning_rate": 9.945355191256831e-06, "loss": 2.7783, "step": 455 }, { "epoch": 1.4950819672131148, "grad_norm": 9.640780448913574, "learning_rate": 9.9672131147541e-06, "loss": 2.7461, "step": 456 }, { "epoch": 1.498360655737705, "grad_norm": 8.533658027648926, "learning_rate": 9.989071038251367e-06, "loss": 3.0, "step": 457 }, { "epoch": 1.501639344262295, "grad_norm": 5.560873031616211, "learning_rate": 1.0010928961748634e-05, "loss": 3.0229, "step": 458 }, { "epoch": 1.5049180327868852, "grad_norm": 10.65381908416748, "learning_rate": 1.0032786885245902e-05, "loss": 3.0479, "step": 459 }, { "epoch": 1.5081967213114753, "grad_norm": 12.98940372467041, "learning_rate": 1.005464480874317e-05, "loss": 2.6895, "step": 460 }, { "epoch": 1.5114754098360654, "grad_norm": 9.351346015930176, "learning_rate": 1.0076502732240438e-05, "loss": 3.1074, "step": 461 }, { "epoch": 1.5147540983606558, "grad_norm": 7.885035514831543, "learning_rate": 1.0098360655737707e-05, "loss": 2.8486, "step": 462 }, { "epoch": 1.518032786885246, "grad_norm": 8.010515213012695, "learning_rate": 1.0120218579234973e-05, "loss": 2.9941, "step": 463 }, { "epoch": 1.521311475409836, "grad_norm": 9.85828971862793, "learning_rate": 1.0142076502732242e-05, "loss": 2.8555, "step": 464 }, { "epoch": 1.5245901639344264, "grad_norm": 9.761761665344238, "learning_rate": 1.0163934426229509e-05, "loss": 2.8945, "step": 465 }, { "epoch": 1.5278688524590165, "grad_norm": 8.390084266662598, "learning_rate": 1.0185792349726778e-05, "loss": 3.0166, "step": 466 }, { "epoch": 1.5311475409836066, "grad_norm": 8.48552131652832, "learning_rate": 1.0207650273224044e-05, "loss": 3.166, "step": 467 }, { "epoch": 1.5344262295081967, "grad_norm": 10.5907621383667, "learning_rate": 1.0229508196721311e-05, "loss": 3.0449, "step": 468 }, { "epoch": 1.5377049180327869, "grad_norm": 8.237924575805664, "learning_rate": 1.025136612021858e-05, "loss": 3.0264, "step": 469 }, { "epoch": 1.540983606557377, "grad_norm": 6.831518650054932, "learning_rate": 1.0273224043715849e-05, "loss": 2.9619, "step": 470 }, { "epoch": 1.544262295081967, "grad_norm": 8.218546867370605, "learning_rate": 1.0295081967213116e-05, "loss": 2.8755, "step": 471 }, { "epoch": 1.5475409836065572, "grad_norm": 10.046923637390137, "learning_rate": 1.0316939890710382e-05, "loss": 2.9268, "step": 472 }, { "epoch": 1.5508196721311476, "grad_norm": 7.299755096435547, "learning_rate": 1.0338797814207651e-05, "loss": 2.6494, "step": 473 }, { "epoch": 1.5540983606557377, "grad_norm": 5.812628746032715, "learning_rate": 1.0360655737704918e-05, "loss": 2.7217, "step": 474 }, { "epoch": 1.5573770491803278, "grad_norm": 8.498147964477539, "learning_rate": 1.0382513661202187e-05, "loss": 2.7734, "step": 475 }, { "epoch": 1.5606557377049182, "grad_norm": 8.108220100402832, "learning_rate": 1.0404371584699455e-05, "loss": 2.8193, "step": 476 }, { "epoch": 1.5639344262295083, "grad_norm": 8.027198791503906, "learning_rate": 1.0426229508196722e-05, "loss": 3.1362, "step": 477 }, { "epoch": 1.5672131147540984, "grad_norm": 6.801766395568848, "learning_rate": 1.0448087431693989e-05, "loss": 2.9219, "step": 478 }, { "epoch": 1.5704918032786885, "grad_norm": 8.344350814819336, "learning_rate": 1.0469945355191258e-05, "loss": 2.8799, "step": 479 }, { "epoch": 1.5737704918032787, "grad_norm": 7.581808090209961, "learning_rate": 1.0491803278688525e-05, "loss": 3.0342, "step": 480 }, { "epoch": 1.5770491803278688, "grad_norm": 7.521820545196533, "learning_rate": 1.0513661202185794e-05, "loss": 2.7085, "step": 481 }, { "epoch": 1.580327868852459, "grad_norm": 14.095394134521484, "learning_rate": 1.053551912568306e-05, "loss": 2.7539, "step": 482 }, { "epoch": 1.583606557377049, "grad_norm": 6.466699600219727, "learning_rate": 1.0557377049180329e-05, "loss": 2.8228, "step": 483 }, { "epoch": 1.5868852459016394, "grad_norm": 9.444782257080078, "learning_rate": 1.0579234972677596e-05, "loss": 2.8779, "step": 484 }, { "epoch": 1.5901639344262295, "grad_norm": 9.655708312988281, "learning_rate": 1.0601092896174865e-05, "loss": 2.813, "step": 485 }, { "epoch": 1.5934426229508196, "grad_norm": 6.9721574783325195, "learning_rate": 1.0622950819672131e-05, "loss": 2.9844, "step": 486 }, { "epoch": 1.59672131147541, "grad_norm": 8.981244087219238, "learning_rate": 1.06448087431694e-05, "loss": 2.959, "step": 487 }, { "epoch": 1.6, "grad_norm": 11.947728157043457, "learning_rate": 1.0666666666666667e-05, "loss": 2.8828, "step": 488 }, { "epoch": 1.6032786885245902, "grad_norm": 6.39213752746582, "learning_rate": 1.0688524590163936e-05, "loss": 2.7207, "step": 489 }, { "epoch": 1.6065573770491803, "grad_norm": 8.148490905761719, "learning_rate": 1.0710382513661204e-05, "loss": 2.7559, "step": 490 }, { "epoch": 1.6098360655737705, "grad_norm": 8.074980735778809, "learning_rate": 1.073224043715847e-05, "loss": 3.0469, "step": 491 }, { "epoch": 1.6131147540983606, "grad_norm": 8.037668228149414, "learning_rate": 1.0754098360655738e-05, "loss": 3.0938, "step": 492 }, { "epoch": 1.6163934426229507, "grad_norm": 10.084352493286133, "learning_rate": 1.0775956284153006e-05, "loss": 2.7812, "step": 493 }, { "epoch": 1.6196721311475408, "grad_norm": 6.650521278381348, "learning_rate": 1.0797814207650275e-05, "loss": 2.7017, "step": 494 }, { "epoch": 1.6229508196721312, "grad_norm": 8.22300910949707, "learning_rate": 1.0819672131147544e-05, "loss": 2.9434, "step": 495 }, { "epoch": 1.6262295081967213, "grad_norm": 13.958842277526855, "learning_rate": 1.084153005464481e-05, "loss": 3.0068, "step": 496 }, { "epoch": 1.6295081967213116, "grad_norm": 7.517796516418457, "learning_rate": 1.0863387978142076e-05, "loss": 2.7695, "step": 497 }, { "epoch": 1.6327868852459018, "grad_norm": 7.920039653778076, "learning_rate": 1.0885245901639345e-05, "loss": 2.9668, "step": 498 }, { "epoch": 1.6360655737704919, "grad_norm": 6.5224528312683105, "learning_rate": 1.0907103825136613e-05, "loss": 2.9438, "step": 499 }, { "epoch": 1.639344262295082, "grad_norm": 9.324108123779297, "learning_rate": 1.0928961748633882e-05, "loss": 2.8784, "step": 500 }, { "epoch": 1.6426229508196721, "grad_norm": 9.717549324035645, "learning_rate": 1.0950819672131147e-05, "loss": 2.5918, "step": 501 }, { "epoch": 1.6459016393442623, "grad_norm": 8.156440734863281, "learning_rate": 1.0972677595628416e-05, "loss": 2.6982, "step": 502 }, { "epoch": 1.6491803278688524, "grad_norm": 7.821223735809326, "learning_rate": 1.0994535519125684e-05, "loss": 2.8428, "step": 503 }, { "epoch": 1.6524590163934425, "grad_norm": 8.255231857299805, "learning_rate": 1.1016393442622953e-05, "loss": 2.8887, "step": 504 }, { "epoch": 1.6557377049180326, "grad_norm": 10.461238861083984, "learning_rate": 1.1038251366120218e-05, "loss": 2.8281, "step": 505 }, { "epoch": 1.659016393442623, "grad_norm": 8.591665267944336, "learning_rate": 1.1060109289617487e-05, "loss": 2.7173, "step": 506 }, { "epoch": 1.662295081967213, "grad_norm": 10.010729789733887, "learning_rate": 1.1081967213114755e-05, "loss": 3.5537, "step": 507 }, { "epoch": 1.6655737704918034, "grad_norm": 5.970381259918213, "learning_rate": 1.1103825136612024e-05, "loss": 2.7798, "step": 508 }, { "epoch": 1.6688524590163936, "grad_norm": 10.188258171081543, "learning_rate": 1.1125683060109291e-05, "loss": 2.7188, "step": 509 }, { "epoch": 1.6721311475409837, "grad_norm": 10.691878318786621, "learning_rate": 1.1147540983606557e-05, "loss": 3.0176, "step": 510 }, { "epoch": 1.6754098360655738, "grad_norm": 12.115982055664062, "learning_rate": 1.1169398907103826e-05, "loss": 2.6943, "step": 511 }, { "epoch": 1.678688524590164, "grad_norm": 11.664708137512207, "learning_rate": 1.1191256830601095e-05, "loss": 3.019, "step": 512 }, { "epoch": 1.681967213114754, "grad_norm": 9.05016040802002, "learning_rate": 1.1213114754098362e-05, "loss": 2.9712, "step": 513 }, { "epoch": 1.6852459016393442, "grad_norm": 10.574763298034668, "learning_rate": 1.1234972677595631e-05, "loss": 2.7744, "step": 514 }, { "epoch": 1.6885245901639343, "grad_norm": 8.202086448669434, "learning_rate": 1.1256830601092897e-05, "loss": 3.0479, "step": 515 }, { "epoch": 1.6918032786885244, "grad_norm": 7.302971363067627, "learning_rate": 1.1278688524590164e-05, "loss": 2.9526, "step": 516 }, { "epoch": 1.6950819672131148, "grad_norm": 9.390225410461426, "learning_rate": 1.1300546448087433e-05, "loss": 2.6777, "step": 517 }, { "epoch": 1.698360655737705, "grad_norm": 11.824459075927734, "learning_rate": 1.13224043715847e-05, "loss": 2.8442, "step": 518 }, { "epoch": 1.7016393442622952, "grad_norm": 9.544570922851562, "learning_rate": 1.134426229508197e-05, "loss": 3.0059, "step": 519 }, { "epoch": 1.7049180327868854, "grad_norm": 8.564409255981445, "learning_rate": 1.1366120218579235e-05, "loss": 3.0312, "step": 520 }, { "epoch": 1.7081967213114755, "grad_norm": 8.903915405273438, "learning_rate": 1.1387978142076504e-05, "loss": 2.8818, "step": 521 }, { "epoch": 1.7114754098360656, "grad_norm": 8.298206329345703, "learning_rate": 1.1409836065573771e-05, "loss": 3.1748, "step": 522 }, { "epoch": 1.7147540983606557, "grad_norm": 7.06096076965332, "learning_rate": 1.143169398907104e-05, "loss": 2.9644, "step": 523 }, { "epoch": 1.7180327868852459, "grad_norm": 8.53333568572998, "learning_rate": 1.1453551912568306e-05, "loss": 2.7344, "step": 524 }, { "epoch": 1.721311475409836, "grad_norm": 10.445050239562988, "learning_rate": 1.1475409836065575e-05, "loss": 2.8379, "step": 525 }, { "epoch": 1.724590163934426, "grad_norm": 12.157598495483398, "learning_rate": 1.1497267759562842e-05, "loss": 2.6943, "step": 526 }, { "epoch": 1.7278688524590164, "grad_norm": 10.18016529083252, "learning_rate": 1.1519125683060111e-05, "loss": 2.7622, "step": 527 }, { "epoch": 1.7311475409836066, "grad_norm": 10.188250541687012, "learning_rate": 1.1540983606557378e-05, "loss": 2.8975, "step": 528 }, { "epoch": 1.7344262295081967, "grad_norm": 11.326924324035645, "learning_rate": 1.1562841530054646e-05, "loss": 2.8076, "step": 529 }, { "epoch": 1.737704918032787, "grad_norm": 11.008745193481445, "learning_rate": 1.1584699453551913e-05, "loss": 2.9629, "step": 530 }, { "epoch": 1.7409836065573772, "grad_norm": 11.371987342834473, "learning_rate": 1.1606557377049182e-05, "loss": 2.8223, "step": 531 }, { "epoch": 1.7442622950819673, "grad_norm": 11.004063606262207, "learning_rate": 1.162841530054645e-05, "loss": 2.9121, "step": 532 }, { "epoch": 1.7475409836065574, "grad_norm": 15.878734588623047, "learning_rate": 1.1650273224043718e-05, "loss": 2.8838, "step": 533 }, { "epoch": 1.7508196721311475, "grad_norm": 9.083786964416504, "learning_rate": 1.1672131147540984e-05, "loss": 2.8311, "step": 534 }, { "epoch": 1.7540983606557377, "grad_norm": 8.521618843078613, "learning_rate": 1.1693989071038251e-05, "loss": 2.7622, "step": 535 }, { "epoch": 1.7573770491803278, "grad_norm": 12.743377685546875, "learning_rate": 1.171584699453552e-05, "loss": 3.0615, "step": 536 }, { "epoch": 1.760655737704918, "grad_norm": 6.5421366691589355, "learning_rate": 1.173770491803279e-05, "loss": 2.7661, "step": 537 }, { "epoch": 1.7639344262295082, "grad_norm": 8.817001342773438, "learning_rate": 1.1759562841530057e-05, "loss": 2.791, "step": 538 }, { "epoch": 1.7672131147540984, "grad_norm": 7.229492664337158, "learning_rate": 1.1781420765027322e-05, "loss": 2.9443, "step": 539 }, { "epoch": 1.7704918032786885, "grad_norm": 7.916929721832275, "learning_rate": 1.1803278688524591e-05, "loss": 3.106, "step": 540 }, { "epoch": 1.7737704918032788, "grad_norm": 7.828059673309326, "learning_rate": 1.1825136612021858e-05, "loss": 2.9565, "step": 541 }, { "epoch": 1.777049180327869, "grad_norm": 8.20531177520752, "learning_rate": 1.1846994535519127e-05, "loss": 3.1348, "step": 542 }, { "epoch": 1.780327868852459, "grad_norm": 7.875805854797363, "learning_rate": 1.1868852459016393e-05, "loss": 2.7471, "step": 543 }, { "epoch": 1.7836065573770492, "grad_norm": 7.438549041748047, "learning_rate": 1.1890710382513662e-05, "loss": 2.7939, "step": 544 }, { "epoch": 1.7868852459016393, "grad_norm": 10.985294342041016, "learning_rate": 1.191256830601093e-05, "loss": 2.7686, "step": 545 }, { "epoch": 1.7901639344262295, "grad_norm": 8.315796852111816, "learning_rate": 1.1934426229508198e-05, "loss": 2.8711, "step": 546 }, { "epoch": 1.7934426229508196, "grad_norm": 10.269339561462402, "learning_rate": 1.1956284153005466e-05, "loss": 2.9473, "step": 547 }, { "epoch": 1.7967213114754097, "grad_norm": 8.699333190917969, "learning_rate": 1.1978142076502733e-05, "loss": 2.6572, "step": 548 }, { "epoch": 1.8, "grad_norm": 8.382908821105957, "learning_rate": 1.2e-05, "loss": 2.7935, "step": 549 }, { "epoch": 1.8032786885245902, "grad_norm": 8.706124305725098, "learning_rate": 1.202185792349727e-05, "loss": 2.7549, "step": 550 }, { "epoch": 1.8065573770491803, "grad_norm": 10.072162628173828, "learning_rate": 1.2043715846994537e-05, "loss": 2.4878, "step": 551 }, { "epoch": 1.8098360655737706, "grad_norm": 6.457977771759033, "learning_rate": 1.2065573770491806e-05, "loss": 2.7725, "step": 552 }, { "epoch": 1.8131147540983608, "grad_norm": 7.72472620010376, "learning_rate": 1.2087431693989071e-05, "loss": 2.9805, "step": 553 }, { "epoch": 1.8163934426229509, "grad_norm": 7.997943878173828, "learning_rate": 1.210928961748634e-05, "loss": 2.7095, "step": 554 }, { "epoch": 1.819672131147541, "grad_norm": 7.692997455596924, "learning_rate": 1.2131147540983608e-05, "loss": 2.5229, "step": 555 }, { "epoch": 1.8229508196721311, "grad_norm": 8.360854148864746, "learning_rate": 1.2153005464480877e-05, "loss": 2.917, "step": 556 }, { "epoch": 1.8262295081967213, "grad_norm": 12.499968528747559, "learning_rate": 1.2174863387978144e-05, "loss": 2.7803, "step": 557 }, { "epoch": 1.8295081967213114, "grad_norm": 8.571412086486816, "learning_rate": 1.219672131147541e-05, "loss": 2.9741, "step": 558 }, { "epoch": 1.8327868852459015, "grad_norm": 10.42758560180664, "learning_rate": 1.2218579234972678e-05, "loss": 2.9375, "step": 559 }, { "epoch": 1.8360655737704918, "grad_norm": 10.055444717407227, "learning_rate": 1.2240437158469946e-05, "loss": 2.875, "step": 560 }, { "epoch": 1.839344262295082, "grad_norm": 11.950071334838867, "learning_rate": 1.2262295081967215e-05, "loss": 2.6885, "step": 561 }, { "epoch": 1.842622950819672, "grad_norm": 8.712224006652832, "learning_rate": 1.228415300546448e-05, "loss": 2.7568, "step": 562 }, { "epoch": 1.8459016393442624, "grad_norm": 8.715909004211426, "learning_rate": 1.230601092896175e-05, "loss": 2.6479, "step": 563 }, { "epoch": 1.8491803278688526, "grad_norm": 8.231682777404785, "learning_rate": 1.2327868852459017e-05, "loss": 2.8242, "step": 564 }, { "epoch": 1.8524590163934427, "grad_norm": 8.090936660766602, "learning_rate": 1.2349726775956286e-05, "loss": 2.6997, "step": 565 }, { "epoch": 1.8557377049180328, "grad_norm": 8.086977005004883, "learning_rate": 1.2371584699453553e-05, "loss": 2.9214, "step": 566 }, { "epoch": 1.859016393442623, "grad_norm": 9.864916801452637, "learning_rate": 1.239344262295082e-05, "loss": 2.623, "step": 567 }, { "epoch": 1.862295081967213, "grad_norm": 9.400548934936523, "learning_rate": 1.2415300546448088e-05, "loss": 2.6904, "step": 568 }, { "epoch": 1.8655737704918032, "grad_norm": 9.747089385986328, "learning_rate": 1.2437158469945357e-05, "loss": 2.8076, "step": 569 }, { "epoch": 1.8688524590163933, "grad_norm": 17.27737045288086, "learning_rate": 1.2459016393442624e-05, "loss": 2.7847, "step": 570 }, { "epoch": 1.8721311475409836, "grad_norm": 7.786876201629639, "learning_rate": 1.2480874316939893e-05, "loss": 2.8574, "step": 571 }, { "epoch": 1.8754098360655738, "grad_norm": 14.358674049377441, "learning_rate": 1.2502732240437159e-05, "loss": 2.7295, "step": 572 }, { "epoch": 1.8786885245901639, "grad_norm": 10.902332305908203, "learning_rate": 1.2524590163934428e-05, "loss": 2.8662, "step": 573 }, { "epoch": 1.8819672131147542, "grad_norm": 8.982264518737793, "learning_rate": 1.2546448087431695e-05, "loss": 2.9766, "step": 574 }, { "epoch": 1.8852459016393444, "grad_norm": 6.925209999084473, "learning_rate": 1.2568306010928964e-05, "loss": 2.6533, "step": 575 }, { "epoch": 1.8885245901639345, "grad_norm": 10.647449493408203, "learning_rate": 1.2590163934426231e-05, "loss": 3.2729, "step": 576 }, { "epoch": 1.8918032786885246, "grad_norm": 9.641692161560059, "learning_rate": 1.2612021857923497e-05, "loss": 2.8721, "step": 577 }, { "epoch": 1.8950819672131147, "grad_norm": 9.380814552307129, "learning_rate": 1.2633879781420766e-05, "loss": 2.6279, "step": 578 }, { "epoch": 1.8983606557377048, "grad_norm": 7.520554065704346, "learning_rate": 1.2655737704918035e-05, "loss": 2.5337, "step": 579 }, { "epoch": 1.901639344262295, "grad_norm": 9.267802238464355, "learning_rate": 1.2677595628415302e-05, "loss": 2.8604, "step": 580 }, { "epoch": 1.904918032786885, "grad_norm": 8.129403114318848, "learning_rate": 1.2699453551912568e-05, "loss": 2.8169, "step": 581 }, { "epoch": 1.9081967213114754, "grad_norm": 8.900020599365234, "learning_rate": 1.2721311475409837e-05, "loss": 2.6411, "step": 582 }, { "epoch": 1.9114754098360656, "grad_norm": 15.301031112670898, "learning_rate": 1.2743169398907104e-05, "loss": 2.6797, "step": 583 }, { "epoch": 1.9147540983606557, "grad_norm": 10.337018013000488, "learning_rate": 1.2765027322404373e-05, "loss": 2.8711, "step": 584 }, { "epoch": 1.918032786885246, "grad_norm": 11.795730590820312, "learning_rate": 1.2786885245901642e-05, "loss": 2.7725, "step": 585 }, { "epoch": 1.9213114754098362, "grad_norm": 7.791945457458496, "learning_rate": 1.2808743169398908e-05, "loss": 2.7217, "step": 586 }, { "epoch": 1.9245901639344263, "grad_norm": 11.23378849029541, "learning_rate": 1.2830601092896175e-05, "loss": 2.8916, "step": 587 }, { "epoch": 1.9278688524590164, "grad_norm": 9.467313766479492, "learning_rate": 1.2852459016393444e-05, "loss": 2.8408, "step": 588 }, { "epoch": 1.9311475409836065, "grad_norm": 11.188268661499023, "learning_rate": 1.2874316939890711e-05, "loss": 2.8525, "step": 589 }, { "epoch": 1.9344262295081966, "grad_norm": 9.412188529968262, "learning_rate": 1.289617486338798e-05, "loss": 2.7153, "step": 590 }, { "epoch": 1.9377049180327868, "grad_norm": 8.806011199951172, "learning_rate": 1.2918032786885246e-05, "loss": 2.9614, "step": 591 }, { "epoch": 1.940983606557377, "grad_norm": 8.440743446350098, "learning_rate": 1.2939890710382515e-05, "loss": 2.7227, "step": 592 }, { "epoch": 1.9442622950819672, "grad_norm": 9.756824493408203, "learning_rate": 1.2961748633879782e-05, "loss": 2.7061, "step": 593 }, { "epoch": 1.9475409836065574, "grad_norm": 12.840327262878418, "learning_rate": 1.2983606557377051e-05, "loss": 2.6836, "step": 594 }, { "epoch": 1.9508196721311475, "grad_norm": 8.52310562133789, "learning_rate": 1.3005464480874317e-05, "loss": 2.7676, "step": 595 }, { "epoch": 1.9540983606557378, "grad_norm": 9.084774017333984, "learning_rate": 1.3027322404371586e-05, "loss": 2.6504, "step": 596 }, { "epoch": 1.957377049180328, "grad_norm": 9.408848762512207, "learning_rate": 1.3049180327868853e-05, "loss": 2.6479, "step": 597 }, { "epoch": 1.960655737704918, "grad_norm": 6.8153767585754395, "learning_rate": 1.3071038251366122e-05, "loss": 2.792, "step": 598 }, { "epoch": 1.9639344262295082, "grad_norm": 22.846561431884766, "learning_rate": 1.309289617486339e-05, "loss": 3.2676, "step": 599 }, { "epoch": 1.9672131147540983, "grad_norm": 6.911410331726074, "learning_rate": 1.3114754098360655e-05, "loss": 2.79, "step": 600 }, { "epoch": 1.9704918032786884, "grad_norm": 8.509675979614258, "learning_rate": 1.3136612021857924e-05, "loss": 2.8823, "step": 601 }, { "epoch": 1.9737704918032786, "grad_norm": 9.140427589416504, "learning_rate": 1.3158469945355193e-05, "loss": 2.6338, "step": 602 }, { "epoch": 1.9770491803278687, "grad_norm": 6.658791542053223, "learning_rate": 1.318032786885246e-05, "loss": 2.7188, "step": 603 }, { "epoch": 1.980327868852459, "grad_norm": 9.199284553527832, "learning_rate": 1.320218579234973e-05, "loss": 2.7349, "step": 604 }, { "epoch": 1.9836065573770492, "grad_norm": 9.496105194091797, "learning_rate": 1.3224043715846995e-05, "loss": 2.96, "step": 605 }, { "epoch": 1.9868852459016395, "grad_norm": 11.325531959533691, "learning_rate": 1.3245901639344262e-05, "loss": 2.8818, "step": 606 }, { "epoch": 1.9901639344262296, "grad_norm": 15.471463203430176, "learning_rate": 1.3267759562841531e-05, "loss": 2.6758, "step": 607 }, { "epoch": 1.9934426229508198, "grad_norm": 10.108735084533691, "learning_rate": 1.3289617486338799e-05, "loss": 2.6875, "step": 608 }, { "epoch": 1.9967213114754099, "grad_norm": 6.884893417358398, "learning_rate": 1.3311475409836068e-05, "loss": 2.9004, "step": 609 }, { "epoch": 2.0, "grad_norm": 20.803585052490234, "learning_rate": 1.3333333333333333e-05, "loss": 2.8418, "step": 610 }, { "epoch": 2.00327868852459, "grad_norm": 11.81991195678711, "learning_rate": 1.3355191256830602e-05, "loss": 2.792, "step": 611 }, { "epoch": 2.0065573770491802, "grad_norm": 8.304170608520508, "learning_rate": 1.337704918032787e-05, "loss": 2.543, "step": 612 }, { "epoch": 2.0098360655737704, "grad_norm": 7.867931842803955, "learning_rate": 1.3398907103825139e-05, "loss": 2.5928, "step": 613 }, { "epoch": 2.0131147540983605, "grad_norm": 7.428008079528809, "learning_rate": 1.3420765027322404e-05, "loss": 2.8633, "step": 614 }, { "epoch": 2.0163934426229506, "grad_norm": 9.725910186767578, "learning_rate": 1.3442622950819673e-05, "loss": 2.5903, "step": 615 }, { "epoch": 2.019672131147541, "grad_norm": 23.23394775390625, "learning_rate": 1.346448087431694e-05, "loss": 2.6816, "step": 616 }, { "epoch": 2.0229508196721313, "grad_norm": 14.170706748962402, "learning_rate": 1.348633879781421e-05, "loss": 2.6411, "step": 617 }, { "epoch": 2.0262295081967214, "grad_norm": 11.067046165466309, "learning_rate": 1.3508196721311477e-05, "loss": 2.6025, "step": 618 }, { "epoch": 2.0295081967213116, "grad_norm": 8.468632698059082, "learning_rate": 1.3530054644808742e-05, "loss": 2.6235, "step": 619 }, { "epoch": 2.0327868852459017, "grad_norm": 8.739800453186035, "learning_rate": 1.3551912568306011e-05, "loss": 2.7451, "step": 620 }, { "epoch": 2.036065573770492, "grad_norm": 6.242934703826904, "learning_rate": 1.357377049180328e-05, "loss": 2.8223, "step": 621 }, { "epoch": 2.039344262295082, "grad_norm": 9.029082298278809, "learning_rate": 1.3595628415300548e-05, "loss": 2.9316, "step": 622 }, { "epoch": 2.042622950819672, "grad_norm": 8.595215797424316, "learning_rate": 1.3617486338797817e-05, "loss": 2.7783, "step": 623 }, { "epoch": 2.045901639344262, "grad_norm": 8.480000495910645, "learning_rate": 1.3639344262295082e-05, "loss": 2.6548, "step": 624 }, { "epoch": 2.0491803278688523, "grad_norm": 6.512568473815918, "learning_rate": 1.366120218579235e-05, "loss": 2.6572, "step": 625 }, { "epoch": 2.0524590163934424, "grad_norm": 9.284653663635254, "learning_rate": 1.3683060109289619e-05, "loss": 2.7334, "step": 626 }, { "epoch": 2.055737704918033, "grad_norm": 10.556669235229492, "learning_rate": 1.3704918032786888e-05, "loss": 2.8921, "step": 627 }, { "epoch": 2.059016393442623, "grad_norm": 9.573077201843262, "learning_rate": 1.3726775956284155e-05, "loss": 2.9565, "step": 628 }, { "epoch": 2.0622950819672132, "grad_norm": 7.933951377868652, "learning_rate": 1.374863387978142e-05, "loss": 2.7373, "step": 629 }, { "epoch": 2.0655737704918034, "grad_norm": 9.693153381347656, "learning_rate": 1.377049180327869e-05, "loss": 2.7017, "step": 630 }, { "epoch": 2.0688524590163935, "grad_norm": 7.428844928741455, "learning_rate": 1.3792349726775957e-05, "loss": 2.7178, "step": 631 }, { "epoch": 2.0721311475409836, "grad_norm": 16.135223388671875, "learning_rate": 1.3814207650273226e-05, "loss": 2.8511, "step": 632 }, { "epoch": 2.0754098360655737, "grad_norm": 15.312707901000977, "learning_rate": 1.3836065573770492e-05, "loss": 2.6841, "step": 633 }, { "epoch": 2.078688524590164, "grad_norm": 13.76740837097168, "learning_rate": 1.385792349726776e-05, "loss": 2.5898, "step": 634 }, { "epoch": 2.081967213114754, "grad_norm": 14.069330215454102, "learning_rate": 1.3879781420765028e-05, "loss": 2.6816, "step": 635 }, { "epoch": 2.085245901639344, "grad_norm": 8.130561828613281, "learning_rate": 1.3901639344262297e-05, "loss": 2.9331, "step": 636 }, { "epoch": 2.088524590163934, "grad_norm": 11.01263427734375, "learning_rate": 1.3923497267759564e-05, "loss": 2.7998, "step": 637 }, { "epoch": 2.091803278688525, "grad_norm": 6.518977642059326, "learning_rate": 1.3945355191256832e-05, "loss": 2.7549, "step": 638 }, { "epoch": 2.095081967213115, "grad_norm": 9.136117935180664, "learning_rate": 1.3967213114754099e-05, "loss": 2.7056, "step": 639 }, { "epoch": 2.098360655737705, "grad_norm": 13.343233108520508, "learning_rate": 1.3989071038251368e-05, "loss": 2.9033, "step": 640 }, { "epoch": 2.101639344262295, "grad_norm": 8.994856834411621, "learning_rate": 1.4010928961748635e-05, "loss": 2.6826, "step": 641 }, { "epoch": 2.1049180327868853, "grad_norm": 8.94896125793457, "learning_rate": 1.4032786885245904e-05, "loss": 2.502, "step": 642 }, { "epoch": 2.1081967213114754, "grad_norm": 15.25666618347168, "learning_rate": 1.405464480874317e-05, "loss": 2.4775, "step": 643 }, { "epoch": 2.1114754098360655, "grad_norm": 12.643622398376465, "learning_rate": 1.4076502732240439e-05, "loss": 2.791, "step": 644 }, { "epoch": 2.1147540983606556, "grad_norm": 7.5574631690979, "learning_rate": 1.4098360655737706e-05, "loss": 2.6484, "step": 645 }, { "epoch": 2.1180327868852458, "grad_norm": 22.775575637817383, "learning_rate": 1.4120218579234975e-05, "loss": 2.752, "step": 646 }, { "epoch": 2.121311475409836, "grad_norm": 7.639135360717773, "learning_rate": 1.4142076502732242e-05, "loss": 2.6411, "step": 647 }, { "epoch": 2.1245901639344265, "grad_norm": 6.885555267333984, "learning_rate": 1.4163934426229508e-05, "loss": 2.6787, "step": 648 }, { "epoch": 2.1278688524590166, "grad_norm": 9.083076477050781, "learning_rate": 1.4185792349726777e-05, "loss": 2.6543, "step": 649 }, { "epoch": 2.1311475409836067, "grad_norm": 8.990350723266602, "learning_rate": 1.4207650273224044e-05, "loss": 2.437, "step": 650 }, { "epoch": 2.134426229508197, "grad_norm": 10.17431640625, "learning_rate": 1.4229508196721313e-05, "loss": 2.8037, "step": 651 }, { "epoch": 2.137704918032787, "grad_norm": 15.800955772399902, "learning_rate": 1.4251366120218579e-05, "loss": 2.8086, "step": 652 }, { "epoch": 2.140983606557377, "grad_norm": 9.701533317565918, "learning_rate": 1.4273224043715848e-05, "loss": 2.7773, "step": 653 }, { "epoch": 2.144262295081967, "grad_norm": 19.534475326538086, "learning_rate": 1.4295081967213115e-05, "loss": 2.3926, "step": 654 }, { "epoch": 2.1475409836065573, "grad_norm": 7.747833251953125, "learning_rate": 1.4316939890710384e-05, "loss": 2.5356, "step": 655 }, { "epoch": 2.1508196721311474, "grad_norm": 8.922057151794434, "learning_rate": 1.4338797814207652e-05, "loss": 2.6724, "step": 656 }, { "epoch": 2.1540983606557376, "grad_norm": 6.7422776222229, "learning_rate": 1.4360655737704919e-05, "loss": 2.3833, "step": 657 }, { "epoch": 2.1573770491803277, "grad_norm": 5.877311706542969, "learning_rate": 1.4382513661202186e-05, "loss": 2.8286, "step": 658 }, { "epoch": 2.160655737704918, "grad_norm": 8.909833908081055, "learning_rate": 1.4404371584699455e-05, "loss": 2.7402, "step": 659 }, { "epoch": 2.1639344262295084, "grad_norm": 19.35559844970703, "learning_rate": 1.4426229508196722e-05, "loss": 2.7217, "step": 660 }, { "epoch": 2.1672131147540985, "grad_norm": 10.786640167236328, "learning_rate": 1.4448087431693991e-05, "loss": 2.6631, "step": 661 }, { "epoch": 2.1704918032786886, "grad_norm": 24.195798873901367, "learning_rate": 1.4469945355191257e-05, "loss": 2.707, "step": 662 }, { "epoch": 2.1737704918032787, "grad_norm": 11.094082832336426, "learning_rate": 1.4491803278688526e-05, "loss": 2.7168, "step": 663 }, { "epoch": 2.177049180327869, "grad_norm": 8.382084846496582, "learning_rate": 1.4513661202185793e-05, "loss": 2.8174, "step": 664 }, { "epoch": 2.180327868852459, "grad_norm": 7.923414707183838, "learning_rate": 1.4535519125683062e-05, "loss": 2.5171, "step": 665 }, { "epoch": 2.183606557377049, "grad_norm": 8.059049606323242, "learning_rate": 1.455737704918033e-05, "loss": 2.6875, "step": 666 }, { "epoch": 2.1868852459016392, "grad_norm": 6.50253963470459, "learning_rate": 1.4579234972677595e-05, "loss": 2.5405, "step": 667 }, { "epoch": 2.1901639344262294, "grad_norm": 10.434953689575195, "learning_rate": 1.4601092896174864e-05, "loss": 2.7354, "step": 668 }, { "epoch": 2.1934426229508195, "grad_norm": 11.025424003601074, "learning_rate": 1.4622950819672133e-05, "loss": 2.6455, "step": 669 }, { "epoch": 2.19672131147541, "grad_norm": 10.74526596069336, "learning_rate": 1.46448087431694e-05, "loss": 2.6929, "step": 670 }, { "epoch": 2.2, "grad_norm": 13.615324020385742, "learning_rate": 1.4666666666666666e-05, "loss": 2.4341, "step": 671 }, { "epoch": 2.2032786885245903, "grad_norm": 9.349550247192383, "learning_rate": 1.4688524590163935e-05, "loss": 2.6934, "step": 672 }, { "epoch": 2.2065573770491804, "grad_norm": 8.423543930053711, "learning_rate": 1.4710382513661203e-05, "loss": 2.5142, "step": 673 }, { "epoch": 2.2098360655737705, "grad_norm": 10.077756881713867, "learning_rate": 1.4732240437158472e-05, "loss": 2.5488, "step": 674 }, { "epoch": 2.2131147540983607, "grad_norm": 10.956117630004883, "learning_rate": 1.4754098360655739e-05, "loss": 2.6582, "step": 675 }, { "epoch": 2.216393442622951, "grad_norm": 7.15712833404541, "learning_rate": 1.4775956284153006e-05, "loss": 2.6226, "step": 676 }, { "epoch": 2.219672131147541, "grad_norm": 6.817385196685791, "learning_rate": 1.4797814207650274e-05, "loss": 2.625, "step": 677 }, { "epoch": 2.222950819672131, "grad_norm": 8.75587272644043, "learning_rate": 1.4819672131147543e-05, "loss": 2.7607, "step": 678 }, { "epoch": 2.226229508196721, "grad_norm": 12.707048416137695, "learning_rate": 1.484153005464481e-05, "loss": 2.6938, "step": 679 }, { "epoch": 2.2295081967213113, "grad_norm": 5.990927696228027, "learning_rate": 1.4863387978142079e-05, "loss": 2.5723, "step": 680 }, { "epoch": 2.2327868852459014, "grad_norm": 7.29932165145874, "learning_rate": 1.4885245901639344e-05, "loss": 2.6426, "step": 681 }, { "epoch": 2.236065573770492, "grad_norm": 10.12005615234375, "learning_rate": 1.4907103825136613e-05, "loss": 2.5459, "step": 682 }, { "epoch": 2.239344262295082, "grad_norm": 10.188096046447754, "learning_rate": 1.492896174863388e-05, "loss": 2.752, "step": 683 }, { "epoch": 2.2426229508196722, "grad_norm": 7.517059803009033, "learning_rate": 1.495081967213115e-05, "loss": 2.4004, "step": 684 }, { "epoch": 2.2459016393442623, "grad_norm": 9.88967514038086, "learning_rate": 1.4972677595628417e-05, "loss": 2.73, "step": 685 }, { "epoch": 2.2491803278688525, "grad_norm": 9.15849781036377, "learning_rate": 1.4994535519125684e-05, "loss": 2.6963, "step": 686 }, { "epoch": 2.2524590163934426, "grad_norm": 7.17484188079834, "learning_rate": 1.5016393442622952e-05, "loss": 2.71, "step": 687 }, { "epoch": 2.2557377049180327, "grad_norm": 11.474014282226562, "learning_rate": 1.503825136612022e-05, "loss": 2.7119, "step": 688 }, { "epoch": 2.259016393442623, "grad_norm": 6.805150985717773, "learning_rate": 1.5060109289617488e-05, "loss": 2.8262, "step": 689 }, { "epoch": 2.262295081967213, "grad_norm": 8.281304359436035, "learning_rate": 1.5081967213114754e-05, "loss": 2.4995, "step": 690 }, { "epoch": 2.265573770491803, "grad_norm": 14.25182056427002, "learning_rate": 1.5103825136612023e-05, "loss": 2.6045, "step": 691 }, { "epoch": 2.2688524590163937, "grad_norm": 10.302835464477539, "learning_rate": 1.512568306010929e-05, "loss": 2.4722, "step": 692 }, { "epoch": 2.2721311475409838, "grad_norm": 9.397382736206055, "learning_rate": 1.5147540983606559e-05, "loss": 2.6748, "step": 693 }, { "epoch": 2.275409836065574, "grad_norm": 8.671537399291992, "learning_rate": 1.5169398907103828e-05, "loss": 2.6855, "step": 694 }, { "epoch": 2.278688524590164, "grad_norm": 9.917725563049316, "learning_rate": 1.5191256830601094e-05, "loss": 2.4258, "step": 695 }, { "epoch": 2.281967213114754, "grad_norm": 8.110369682312012, "learning_rate": 1.5213114754098361e-05, "loss": 2.7178, "step": 696 }, { "epoch": 2.2852459016393443, "grad_norm": 7.8975653648376465, "learning_rate": 1.523497267759563e-05, "loss": 2.4346, "step": 697 }, { "epoch": 2.2885245901639344, "grad_norm": 9.975326538085938, "learning_rate": 1.5256830601092897e-05, "loss": 2.7412, "step": 698 }, { "epoch": 2.2918032786885245, "grad_norm": 7.524602890014648, "learning_rate": 1.5278688524590165e-05, "loss": 2.5488, "step": 699 }, { "epoch": 2.2950819672131146, "grad_norm": 10.732077598571777, "learning_rate": 1.5300546448087432e-05, "loss": 2.9404, "step": 700 }, { "epoch": 2.2983606557377048, "grad_norm": 8.509692192077637, "learning_rate": 1.53224043715847e-05, "loss": 2.6035, "step": 701 }, { "epoch": 2.301639344262295, "grad_norm": 9.91398811340332, "learning_rate": 1.534426229508197e-05, "loss": 2.5938, "step": 702 }, { "epoch": 2.304918032786885, "grad_norm": 8.404139518737793, "learning_rate": 1.5366120218579237e-05, "loss": 2.7349, "step": 703 }, { "epoch": 2.3081967213114756, "grad_norm": 10.89338493347168, "learning_rate": 1.5387978142076504e-05, "loss": 2.5039, "step": 704 }, { "epoch": 2.3114754098360657, "grad_norm": 13.250032424926758, "learning_rate": 1.5409836065573772e-05, "loss": 2.3853, "step": 705 }, { "epoch": 2.314754098360656, "grad_norm": 9.260924339294434, "learning_rate": 1.543169398907104e-05, "loss": 2.8496, "step": 706 }, { "epoch": 2.318032786885246, "grad_norm": 7.5222859382629395, "learning_rate": 1.5453551912568306e-05, "loss": 2.5083, "step": 707 }, { "epoch": 2.321311475409836, "grad_norm": 12.781291007995605, "learning_rate": 1.5475409836065577e-05, "loss": 2.77, "step": 708 }, { "epoch": 2.324590163934426, "grad_norm": 9.691520690917969, "learning_rate": 1.549726775956284e-05, "loss": 2.8569, "step": 709 }, { "epoch": 2.3278688524590163, "grad_norm": 8.334084510803223, "learning_rate": 1.551912568306011e-05, "loss": 2.7021, "step": 710 }, { "epoch": 2.3311475409836064, "grad_norm": 17.31492042541504, "learning_rate": 1.554098360655738e-05, "loss": 2.5972, "step": 711 }, { "epoch": 2.3344262295081966, "grad_norm": 9.173656463623047, "learning_rate": 1.5562841530054646e-05, "loss": 2.4482, "step": 712 }, { "epoch": 2.337704918032787, "grad_norm": 12.352076530456543, "learning_rate": 1.5584699453551914e-05, "loss": 2.4453, "step": 713 }, { "epoch": 2.3409836065573773, "grad_norm": 68.87675476074219, "learning_rate": 1.560655737704918e-05, "loss": 2.502, "step": 714 }, { "epoch": 2.3442622950819674, "grad_norm": 8.230330467224121, "learning_rate": 1.5628415300546448e-05, "loss": 2.7319, "step": 715 }, { "epoch": 2.3475409836065575, "grad_norm": 11.475406646728516, "learning_rate": 1.5650273224043716e-05, "loss": 2.6445, "step": 716 }, { "epoch": 2.3508196721311476, "grad_norm": 10.11894416809082, "learning_rate": 1.5672131147540986e-05, "loss": 2.9033, "step": 717 }, { "epoch": 2.3540983606557377, "grad_norm": 8.995369911193848, "learning_rate": 1.5693989071038254e-05, "loss": 2.7603, "step": 718 }, { "epoch": 2.357377049180328, "grad_norm": 7.8913397789001465, "learning_rate": 1.571584699453552e-05, "loss": 2.4932, "step": 719 }, { "epoch": 2.360655737704918, "grad_norm": 9.765506744384766, "learning_rate": 1.5737704918032788e-05, "loss": 2.5137, "step": 720 }, { "epoch": 2.363934426229508, "grad_norm": 15.317418098449707, "learning_rate": 1.5759562841530055e-05, "loss": 2.6655, "step": 721 }, { "epoch": 2.3672131147540982, "grad_norm": 10.332319259643555, "learning_rate": 1.5781420765027323e-05, "loss": 2.6265, "step": 722 }, { "epoch": 2.3704918032786884, "grad_norm": 17.022483825683594, "learning_rate": 1.580327868852459e-05, "loss": 2.7783, "step": 723 }, { "epoch": 2.3737704918032785, "grad_norm": 11.24491024017334, "learning_rate": 1.5825136612021857e-05, "loss": 2.6909, "step": 724 }, { "epoch": 2.3770491803278686, "grad_norm": 11.381476402282715, "learning_rate": 1.5846994535519128e-05, "loss": 2.562, "step": 725 }, { "epoch": 2.380327868852459, "grad_norm": 8.831503868103027, "learning_rate": 1.5868852459016395e-05, "loss": 2.7627, "step": 726 }, { "epoch": 2.3836065573770493, "grad_norm": 13.329334259033203, "learning_rate": 1.5890710382513663e-05, "loss": 2.5591, "step": 727 }, { "epoch": 2.3868852459016394, "grad_norm": 10.032230377197266, "learning_rate": 1.591256830601093e-05, "loss": 2.5386, "step": 728 }, { "epoch": 2.3901639344262295, "grad_norm": 21.867149353027344, "learning_rate": 1.5934426229508197e-05, "loss": 2.6489, "step": 729 }, { "epoch": 2.3934426229508197, "grad_norm": 10.444823265075684, "learning_rate": 1.5956284153005465e-05, "loss": 2.6284, "step": 730 }, { "epoch": 2.39672131147541, "grad_norm": 8.89278507232666, "learning_rate": 1.5978142076502735e-05, "loss": 2.4985, "step": 731 }, { "epoch": 2.4, "grad_norm": 16.796449661254883, "learning_rate": 1.6000000000000003e-05, "loss": 2.7627, "step": 732 }, { "epoch": 2.40327868852459, "grad_norm": 11.193546295166016, "learning_rate": 1.6021857923497267e-05, "loss": 2.5566, "step": 733 }, { "epoch": 2.40655737704918, "grad_norm": 9.657023429870605, "learning_rate": 1.6043715846994537e-05, "loss": 2.5952, "step": 734 }, { "epoch": 2.4098360655737707, "grad_norm": 7.866175651550293, "learning_rate": 1.6065573770491805e-05, "loss": 2.8188, "step": 735 }, { "epoch": 2.413114754098361, "grad_norm": 32.59635543823242, "learning_rate": 1.6087431693989072e-05, "loss": 2.7158, "step": 736 }, { "epoch": 2.416393442622951, "grad_norm": 8.502220153808594, "learning_rate": 1.6109289617486343e-05, "loss": 2.6943, "step": 737 }, { "epoch": 2.419672131147541, "grad_norm": 11.327669143676758, "learning_rate": 1.6131147540983607e-05, "loss": 2.5747, "step": 738 }, { "epoch": 2.422950819672131, "grad_norm": 10.787079811096191, "learning_rate": 1.6153005464480874e-05, "loss": 2.9766, "step": 739 }, { "epoch": 2.4262295081967213, "grad_norm": 7.25958251953125, "learning_rate": 1.6174863387978145e-05, "loss": 2.3677, "step": 740 }, { "epoch": 2.4295081967213115, "grad_norm": 8.594948768615723, "learning_rate": 1.6196721311475412e-05, "loss": 2.5439, "step": 741 }, { "epoch": 2.4327868852459016, "grad_norm": 11.990208625793457, "learning_rate": 1.621857923497268e-05, "loss": 2.6357, "step": 742 }, { "epoch": 2.4360655737704917, "grad_norm": 14.197102546691895, "learning_rate": 1.6240437158469946e-05, "loss": 2.7544, "step": 743 }, { "epoch": 2.439344262295082, "grad_norm": 10.464057922363281, "learning_rate": 1.6262295081967214e-05, "loss": 2.4849, "step": 744 }, { "epoch": 2.442622950819672, "grad_norm": 9.833406448364258, "learning_rate": 1.628415300546448e-05, "loss": 2.7832, "step": 745 }, { "epoch": 2.445901639344262, "grad_norm": 9.394417762756348, "learning_rate": 1.6306010928961752e-05, "loss": 2.623, "step": 746 }, { "epoch": 2.4491803278688526, "grad_norm": 8.504725456237793, "learning_rate": 1.6327868852459016e-05, "loss": 2.6816, "step": 747 }, { "epoch": 2.4524590163934428, "grad_norm": 11.043588638305664, "learning_rate": 1.6349726775956286e-05, "loss": 2.751, "step": 748 }, { "epoch": 2.455737704918033, "grad_norm": 10.956014633178711, "learning_rate": 1.6371584699453554e-05, "loss": 2.5869, "step": 749 }, { "epoch": 2.459016393442623, "grad_norm": 8.804317474365234, "learning_rate": 1.639344262295082e-05, "loss": 2.7256, "step": 750 }, { "epoch": 2.462295081967213, "grad_norm": 11.192793846130371, "learning_rate": 1.641530054644809e-05, "loss": 2.6025, "step": 751 }, { "epoch": 2.4655737704918033, "grad_norm": 10.963794708251953, "learning_rate": 1.6437158469945356e-05, "loss": 2.624, "step": 752 }, { "epoch": 2.4688524590163934, "grad_norm": 13.053224563598633, "learning_rate": 1.6459016393442623e-05, "loss": 2.6025, "step": 753 }, { "epoch": 2.4721311475409835, "grad_norm": 6.055501461029053, "learning_rate": 1.6480874316939894e-05, "loss": 2.4316, "step": 754 }, { "epoch": 2.4754098360655736, "grad_norm": 10.196263313293457, "learning_rate": 1.650273224043716e-05, "loss": 2.8877, "step": 755 }, { "epoch": 2.4786885245901638, "grad_norm": 13.623723030090332, "learning_rate": 1.6524590163934428e-05, "loss": 2.5703, "step": 756 }, { "epoch": 2.4819672131147543, "grad_norm": 6.990116596221924, "learning_rate": 1.6546448087431696e-05, "loss": 2.6748, "step": 757 }, { "epoch": 2.4852459016393444, "grad_norm": 12.306563377380371, "learning_rate": 1.6568306010928963e-05, "loss": 2.4751, "step": 758 }, { "epoch": 2.4885245901639346, "grad_norm": 10.591422080993652, "learning_rate": 1.659016393442623e-05, "loss": 2.7246, "step": 759 }, { "epoch": 2.4918032786885247, "grad_norm": 8.647806167602539, "learning_rate": 1.66120218579235e-05, "loss": 2.6353, "step": 760 }, { "epoch": 2.495081967213115, "grad_norm": 27.94918441772461, "learning_rate": 1.6633879781420765e-05, "loss": 2.4644, "step": 761 }, { "epoch": 2.498360655737705, "grad_norm": 9.523733139038086, "learning_rate": 1.6655737704918032e-05, "loss": 2.5332, "step": 762 }, { "epoch": 2.501639344262295, "grad_norm": 7.612496852874756, "learning_rate": 1.6677595628415303e-05, "loss": 2.647, "step": 763 }, { "epoch": 2.504918032786885, "grad_norm": 10.050192832946777, "learning_rate": 1.669945355191257e-05, "loss": 2.8047, "step": 764 }, { "epoch": 2.5081967213114753, "grad_norm": 8.98185920715332, "learning_rate": 1.6721311475409837e-05, "loss": 2.6006, "step": 765 }, { "epoch": 2.5114754098360654, "grad_norm": 6.538755893707275, "learning_rate": 1.6743169398907105e-05, "loss": 2.5605, "step": 766 }, { "epoch": 2.5147540983606556, "grad_norm": 11.092233657836914, "learning_rate": 1.6765027322404372e-05, "loss": 2.5776, "step": 767 }, { "epoch": 2.5180327868852457, "grad_norm": 8.355868339538574, "learning_rate": 1.678688524590164e-05, "loss": 2.4287, "step": 768 }, { "epoch": 2.521311475409836, "grad_norm": 9.968733787536621, "learning_rate": 1.680874316939891e-05, "loss": 2.6396, "step": 769 }, { "epoch": 2.5245901639344264, "grad_norm": 7.022056579589844, "learning_rate": 1.6830601092896177e-05, "loss": 2.4619, "step": 770 }, { "epoch": 2.5278688524590165, "grad_norm": 8.061748504638672, "learning_rate": 1.6852459016393445e-05, "loss": 2.4766, "step": 771 }, { "epoch": 2.5311475409836066, "grad_norm": 11.783705711364746, "learning_rate": 1.6874316939890712e-05, "loss": 2.6748, "step": 772 }, { "epoch": 2.5344262295081967, "grad_norm": 8.035846710205078, "learning_rate": 1.689617486338798e-05, "loss": 2.5947, "step": 773 }, { "epoch": 2.537704918032787, "grad_norm": 23.291046142578125, "learning_rate": 1.6918032786885247e-05, "loss": 2.4277, "step": 774 }, { "epoch": 2.540983606557377, "grad_norm": 8.058792114257812, "learning_rate": 1.6939890710382517e-05, "loss": 2.7129, "step": 775 }, { "epoch": 2.544262295081967, "grad_norm": 10.187752723693848, "learning_rate": 1.696174863387978e-05, "loss": 2.2358, "step": 776 }, { "epoch": 2.5475409836065572, "grad_norm": 13.40510082244873, "learning_rate": 1.6983606557377052e-05, "loss": 2.5688, "step": 777 }, { "epoch": 2.550819672131148, "grad_norm": 8.829716682434082, "learning_rate": 1.700546448087432e-05, "loss": 2.7231, "step": 778 }, { "epoch": 2.554098360655738, "grad_norm": 9.269083023071289, "learning_rate": 1.7027322404371587e-05, "loss": 2.5986, "step": 779 }, { "epoch": 2.557377049180328, "grad_norm": 7.4231977462768555, "learning_rate": 1.7049180327868854e-05, "loss": 2.6602, "step": 780 }, { "epoch": 2.560655737704918, "grad_norm": 14.616063117980957, "learning_rate": 1.707103825136612e-05, "loss": 2.71, "step": 781 }, { "epoch": 2.5639344262295083, "grad_norm": 9.466253280639648, "learning_rate": 1.709289617486339e-05, "loss": 2.5195, "step": 782 }, { "epoch": 2.5672131147540984, "grad_norm": 6.600862503051758, "learning_rate": 1.711475409836066e-05, "loss": 2.7783, "step": 783 }, { "epoch": 2.5704918032786885, "grad_norm": 6.655094146728516, "learning_rate": 1.7136612021857926e-05, "loss": 2.6162, "step": 784 }, { "epoch": 2.5737704918032787, "grad_norm": 8.769511222839355, "learning_rate": 1.715846994535519e-05, "loss": 2.4326, "step": 785 }, { "epoch": 2.577049180327869, "grad_norm": 10.978974342346191, "learning_rate": 1.718032786885246e-05, "loss": 2.833, "step": 786 }, { "epoch": 2.580327868852459, "grad_norm": 8.410907745361328, "learning_rate": 1.720218579234973e-05, "loss": 2.4712, "step": 787 }, { "epoch": 2.583606557377049, "grad_norm": 12.654293060302734, "learning_rate": 1.7224043715846996e-05, "loss": 2.7534, "step": 788 }, { "epoch": 2.586885245901639, "grad_norm": 10.202850341796875, "learning_rate": 1.7245901639344263e-05, "loss": 2.6567, "step": 789 }, { "epoch": 2.5901639344262293, "grad_norm": 14.934903144836426, "learning_rate": 1.726775956284153e-05, "loss": 2.8242, "step": 790 }, { "epoch": 2.5934426229508194, "grad_norm": 9.394798278808594, "learning_rate": 1.7289617486338798e-05, "loss": 2.7686, "step": 791 }, { "epoch": 2.59672131147541, "grad_norm": 8.1449613571167, "learning_rate": 1.731147540983607e-05, "loss": 2.7324, "step": 792 }, { "epoch": 2.6, "grad_norm": 8.094403266906738, "learning_rate": 1.7333333333333336e-05, "loss": 2.3081, "step": 793 }, { "epoch": 2.60327868852459, "grad_norm": 6.594949245452881, "learning_rate": 1.7355191256830603e-05, "loss": 2.541, "step": 794 }, { "epoch": 2.6065573770491803, "grad_norm": 8.223007202148438, "learning_rate": 1.737704918032787e-05, "loss": 2.6831, "step": 795 }, { "epoch": 2.6098360655737705, "grad_norm": 9.91349983215332, "learning_rate": 1.7398907103825138e-05, "loss": 2.812, "step": 796 }, { "epoch": 2.6131147540983606, "grad_norm": 9.936088562011719, "learning_rate": 1.7420765027322405e-05, "loss": 2.623, "step": 797 }, { "epoch": 2.6163934426229507, "grad_norm": 15.295231819152832, "learning_rate": 1.7442622950819676e-05, "loss": 2.5576, "step": 798 }, { "epoch": 2.619672131147541, "grad_norm": 9.852563858032227, "learning_rate": 1.746448087431694e-05, "loss": 2.6797, "step": 799 }, { "epoch": 2.6229508196721314, "grad_norm": 8.03742790222168, "learning_rate": 1.7486338797814207e-05, "loss": 2.626, "step": 800 }, { "epoch": 2.6262295081967215, "grad_norm": 7.563012599945068, "learning_rate": 1.7508196721311478e-05, "loss": 2.6206, "step": 801 }, { "epoch": 2.6295081967213116, "grad_norm": 7.057340145111084, "learning_rate": 1.7530054644808745e-05, "loss": 2.5249, "step": 802 }, { "epoch": 2.6327868852459018, "grad_norm": 6.62748908996582, "learning_rate": 1.7551912568306012e-05, "loss": 2.8643, "step": 803 }, { "epoch": 2.636065573770492, "grad_norm": 8.550007820129395, "learning_rate": 1.757377049180328e-05, "loss": 2.6846, "step": 804 }, { "epoch": 2.639344262295082, "grad_norm": 14.702227592468262, "learning_rate": 1.7595628415300547e-05, "loss": 2.5874, "step": 805 }, { "epoch": 2.642622950819672, "grad_norm": 8.70467472076416, "learning_rate": 1.7617486338797814e-05, "loss": 2.4678, "step": 806 }, { "epoch": 2.6459016393442623, "grad_norm": 16.98849868774414, "learning_rate": 1.7639344262295085e-05, "loss": 2.3965, "step": 807 }, { "epoch": 2.6491803278688524, "grad_norm": 7.545299053192139, "learning_rate": 1.7661202185792352e-05, "loss": 2.541, "step": 808 }, { "epoch": 2.6524590163934425, "grad_norm": 6.220411777496338, "learning_rate": 1.768306010928962e-05, "loss": 2.4717, "step": 809 }, { "epoch": 2.6557377049180326, "grad_norm": 9.105379104614258, "learning_rate": 1.7704918032786887e-05, "loss": 2.3286, "step": 810 }, { "epoch": 2.6590163934426227, "grad_norm": 7.917072772979736, "learning_rate": 1.7726775956284154e-05, "loss": 2.4229, "step": 811 }, { "epoch": 2.662295081967213, "grad_norm": 7.308115005493164, "learning_rate": 1.774863387978142e-05, "loss": 2.5298, "step": 812 }, { "epoch": 2.6655737704918034, "grad_norm": 12.134856224060059, "learning_rate": 1.7770491803278692e-05, "loss": 2.7549, "step": 813 }, { "epoch": 2.6688524590163936, "grad_norm": 8.664544105529785, "learning_rate": 1.7792349726775956e-05, "loss": 2.625, "step": 814 }, { "epoch": 2.6721311475409837, "grad_norm": 6.543949127197266, "learning_rate": 1.7814207650273227e-05, "loss": 2.6519, "step": 815 }, { "epoch": 2.675409836065574, "grad_norm": 8.27515697479248, "learning_rate": 1.7836065573770494e-05, "loss": 2.6216, "step": 816 }, { "epoch": 2.678688524590164, "grad_norm": 7.4623589515686035, "learning_rate": 1.785792349726776e-05, "loss": 2.6479, "step": 817 }, { "epoch": 2.681967213114754, "grad_norm": 8.524256706237793, "learning_rate": 1.787978142076503e-05, "loss": 2.5137, "step": 818 }, { "epoch": 2.685245901639344, "grad_norm": 13.027067184448242, "learning_rate": 1.7901639344262296e-05, "loss": 2.6143, "step": 819 }, { "epoch": 2.6885245901639343, "grad_norm": 6.535643577575684, "learning_rate": 1.7923497267759563e-05, "loss": 2.5898, "step": 820 }, { "epoch": 2.6918032786885244, "grad_norm": 7.014885425567627, "learning_rate": 1.7945355191256834e-05, "loss": 2.7061, "step": 821 }, { "epoch": 2.695081967213115, "grad_norm": 7.583407878875732, "learning_rate": 1.79672131147541e-05, "loss": 2.5405, "step": 822 }, { "epoch": 2.698360655737705, "grad_norm": 15.339051246643066, "learning_rate": 1.7989071038251365e-05, "loss": 2.5479, "step": 823 }, { "epoch": 2.7016393442622952, "grad_norm": 6.64138650894165, "learning_rate": 1.8010928961748636e-05, "loss": 2.4536, "step": 824 }, { "epoch": 2.7049180327868854, "grad_norm": 6.4714131355285645, "learning_rate": 1.8032786885245903e-05, "loss": 2.8223, "step": 825 }, { "epoch": 2.7081967213114755, "grad_norm": 6.490016460418701, "learning_rate": 1.805464480874317e-05, "loss": 2.5654, "step": 826 }, { "epoch": 2.7114754098360656, "grad_norm": 7.476641654968262, "learning_rate": 1.807650273224044e-05, "loss": 2.8008, "step": 827 }, { "epoch": 2.7147540983606557, "grad_norm": 7.694881916046143, "learning_rate": 1.8098360655737705e-05, "loss": 2.8428, "step": 828 }, { "epoch": 2.718032786885246, "grad_norm": 9.059954643249512, "learning_rate": 1.8120218579234972e-05, "loss": 2.4775, "step": 829 }, { "epoch": 2.721311475409836, "grad_norm": 8.960643768310547, "learning_rate": 1.8142076502732243e-05, "loss": 2.457, "step": 830 }, { "epoch": 2.724590163934426, "grad_norm": 7.287745952606201, "learning_rate": 1.816393442622951e-05, "loss": 2.5596, "step": 831 }, { "epoch": 2.7278688524590162, "grad_norm": 10.014598846435547, "learning_rate": 1.8185792349726778e-05, "loss": 2.4912, "step": 832 }, { "epoch": 2.7311475409836063, "grad_norm": 8.327805519104004, "learning_rate": 1.8207650273224045e-05, "loss": 2.5977, "step": 833 }, { "epoch": 2.7344262295081965, "grad_norm": 6.714339733123779, "learning_rate": 1.8229508196721312e-05, "loss": 2.3398, "step": 834 }, { "epoch": 2.737704918032787, "grad_norm": 8.3279390335083, "learning_rate": 1.825136612021858e-05, "loss": 2.7627, "step": 835 }, { "epoch": 2.740983606557377, "grad_norm": 6.438652038574219, "learning_rate": 1.827322404371585e-05, "loss": 2.5874, "step": 836 }, { "epoch": 2.7442622950819673, "grad_norm": 9.19836139678955, "learning_rate": 1.8295081967213114e-05, "loss": 2.6274, "step": 837 }, { "epoch": 2.7475409836065574, "grad_norm": 8.422933578491211, "learning_rate": 1.8316939890710385e-05, "loss": 2.6348, "step": 838 }, { "epoch": 2.7508196721311475, "grad_norm": 14.693835258483887, "learning_rate": 1.8338797814207652e-05, "loss": 2.7373, "step": 839 }, { "epoch": 2.7540983606557377, "grad_norm": 8.476128578186035, "learning_rate": 1.836065573770492e-05, "loss": 2.604, "step": 840 }, { "epoch": 2.7573770491803278, "grad_norm": 13.670044898986816, "learning_rate": 1.8382513661202187e-05, "loss": 2.6875, "step": 841 }, { "epoch": 2.760655737704918, "grad_norm": 5.800052165985107, "learning_rate": 1.8404371584699454e-05, "loss": 2.4546, "step": 842 }, { "epoch": 2.7639344262295085, "grad_norm": 7.990893363952637, "learning_rate": 1.842622950819672e-05, "loss": 2.46, "step": 843 }, { "epoch": 2.7672131147540986, "grad_norm": 9.300350189208984, "learning_rate": 1.8448087431693992e-05, "loss": 2.6777, "step": 844 }, { "epoch": 2.7704918032786887, "grad_norm": 6.938973426818848, "learning_rate": 1.846994535519126e-05, "loss": 2.4878, "step": 845 }, { "epoch": 2.773770491803279, "grad_norm": 7.850001335144043, "learning_rate": 1.8491803278688527e-05, "loss": 2.604, "step": 846 }, { "epoch": 2.777049180327869, "grad_norm": 37.376895904541016, "learning_rate": 1.8513661202185794e-05, "loss": 2.6348, "step": 847 }, { "epoch": 2.780327868852459, "grad_norm": 9.558303833007812, "learning_rate": 1.853551912568306e-05, "loss": 2.5513, "step": 848 }, { "epoch": 2.783606557377049, "grad_norm": 10.31317138671875, "learning_rate": 1.855737704918033e-05, "loss": 2.4385, "step": 849 }, { "epoch": 2.7868852459016393, "grad_norm": 8.869303703308105, "learning_rate": 1.85792349726776e-05, "loss": 2.9072, "step": 850 }, { "epoch": 2.7901639344262295, "grad_norm": 9.04704475402832, "learning_rate": 1.8601092896174863e-05, "loss": 2.6201, "step": 851 }, { "epoch": 2.7934426229508196, "grad_norm": 6.850374221801758, "learning_rate": 1.862295081967213e-05, "loss": 2.6865, "step": 852 }, { "epoch": 2.7967213114754097, "grad_norm": 12.442651748657227, "learning_rate": 1.86448087431694e-05, "loss": 2.5771, "step": 853 }, { "epoch": 2.8, "grad_norm": 9.197686195373535, "learning_rate": 1.866666666666667e-05, "loss": 2.6968, "step": 854 }, { "epoch": 2.80327868852459, "grad_norm": 7.0720367431640625, "learning_rate": 1.8688524590163936e-05, "loss": 2.394, "step": 855 }, { "epoch": 2.80655737704918, "grad_norm": 7.230443477630615, "learning_rate": 1.8710382513661203e-05, "loss": 2.6396, "step": 856 }, { "epoch": 2.8098360655737706, "grad_norm": 11.930196762084961, "learning_rate": 1.873224043715847e-05, "loss": 2.501, "step": 857 }, { "epoch": 2.8131147540983608, "grad_norm": 8.500990867614746, "learning_rate": 1.8754098360655738e-05, "loss": 2.5039, "step": 858 }, { "epoch": 2.816393442622951, "grad_norm": 7.677558898925781, "learning_rate": 1.877595628415301e-05, "loss": 2.6641, "step": 859 }, { "epoch": 2.819672131147541, "grad_norm": 8.586888313293457, "learning_rate": 1.8797814207650276e-05, "loss": 2.4502, "step": 860 }, { "epoch": 2.822950819672131, "grad_norm": 7.213002681732178, "learning_rate": 1.8819672131147543e-05, "loss": 2.4927, "step": 861 }, { "epoch": 2.8262295081967213, "grad_norm": 15.055634498596191, "learning_rate": 1.884153005464481e-05, "loss": 2.6831, "step": 862 }, { "epoch": 2.8295081967213114, "grad_norm": 7.917607307434082, "learning_rate": 1.8863387978142078e-05, "loss": 2.644, "step": 863 }, { "epoch": 2.8327868852459015, "grad_norm": 8.047310829162598, "learning_rate": 1.8885245901639345e-05, "loss": 2.4766, "step": 864 }, { "epoch": 2.836065573770492, "grad_norm": 8.796905517578125, "learning_rate": 1.8907103825136616e-05, "loss": 2.5181, "step": 865 }, { "epoch": 2.839344262295082, "grad_norm": 7.827326774597168, "learning_rate": 1.892896174863388e-05, "loss": 2.583, "step": 866 }, { "epoch": 2.8426229508196723, "grad_norm": 6.646262168884277, "learning_rate": 1.895081967213115e-05, "loss": 2.519, "step": 867 }, { "epoch": 2.8459016393442624, "grad_norm": 9.244793891906738, "learning_rate": 1.8972677595628418e-05, "loss": 2.5459, "step": 868 }, { "epoch": 2.8491803278688526, "grad_norm": 7.838392734527588, "learning_rate": 1.8994535519125685e-05, "loss": 2.5869, "step": 869 }, { "epoch": 2.8524590163934427, "grad_norm": 8.273508071899414, "learning_rate": 1.9016393442622952e-05, "loss": 2.8418, "step": 870 }, { "epoch": 2.855737704918033, "grad_norm": 7.1319804191589355, "learning_rate": 1.903825136612022e-05, "loss": 2.5742, "step": 871 }, { "epoch": 2.859016393442623, "grad_norm": 8.176284790039062, "learning_rate": 1.9060109289617487e-05, "loss": 2.6567, "step": 872 }, { "epoch": 2.862295081967213, "grad_norm": 7.119852066040039, "learning_rate": 1.9081967213114754e-05, "loss": 2.4521, "step": 873 }, { "epoch": 2.865573770491803, "grad_norm": 6.4817891120910645, "learning_rate": 1.9103825136612025e-05, "loss": 2.5166, "step": 874 }, { "epoch": 2.8688524590163933, "grad_norm": 12.064628601074219, "learning_rate": 1.912568306010929e-05, "loss": 2.5103, "step": 875 }, { "epoch": 2.8721311475409834, "grad_norm": 6.969466209411621, "learning_rate": 1.914754098360656e-05, "loss": 2.4873, "step": 876 }, { "epoch": 2.8754098360655735, "grad_norm": 7.979155540466309, "learning_rate": 1.9169398907103827e-05, "loss": 2.5527, "step": 877 }, { "epoch": 2.8786885245901637, "grad_norm": 8.268303871154785, "learning_rate": 1.9191256830601094e-05, "loss": 2.5747, "step": 878 }, { "epoch": 2.8819672131147542, "grad_norm": 6.866151809692383, "learning_rate": 1.921311475409836e-05, "loss": 2.3804, "step": 879 }, { "epoch": 2.8852459016393444, "grad_norm": 8.128806114196777, "learning_rate": 1.923497267759563e-05, "loss": 2.6196, "step": 880 }, { "epoch": 2.8885245901639345, "grad_norm": 7.145298480987549, "learning_rate": 1.9256830601092896e-05, "loss": 2.4409, "step": 881 }, { "epoch": 2.8918032786885246, "grad_norm": 6.635857582092285, "learning_rate": 1.9278688524590167e-05, "loss": 2.5522, "step": 882 }, { "epoch": 2.8950819672131147, "grad_norm": 8.214898109436035, "learning_rate": 1.9300546448087434e-05, "loss": 2.541, "step": 883 }, { "epoch": 2.898360655737705, "grad_norm": 6.4922919273376465, "learning_rate": 1.93224043715847e-05, "loss": 2.3096, "step": 884 }, { "epoch": 2.901639344262295, "grad_norm": 7.657047271728516, "learning_rate": 1.934426229508197e-05, "loss": 2.5044, "step": 885 }, { "epoch": 2.904918032786885, "grad_norm": 9.156484603881836, "learning_rate": 1.9366120218579236e-05, "loss": 2.5117, "step": 886 }, { "epoch": 2.9081967213114757, "grad_norm": 7.959072113037109, "learning_rate": 1.9387978142076503e-05, "loss": 2.4932, "step": 887 }, { "epoch": 2.911475409836066, "grad_norm": 7.443611145019531, "learning_rate": 1.9409836065573774e-05, "loss": 2.499, "step": 888 }, { "epoch": 2.914754098360656, "grad_norm": 6.8415117263793945, "learning_rate": 1.9431693989071038e-05, "loss": 2.4668, "step": 889 }, { "epoch": 2.918032786885246, "grad_norm": 7.568009376525879, "learning_rate": 1.9453551912568305e-05, "loss": 2.4863, "step": 890 }, { "epoch": 2.921311475409836, "grad_norm": 6.665019989013672, "learning_rate": 1.9475409836065576e-05, "loss": 2.6489, "step": 891 }, { "epoch": 2.9245901639344263, "grad_norm": 12.251970291137695, "learning_rate": 1.9497267759562843e-05, "loss": 2.4395, "step": 892 }, { "epoch": 2.9278688524590164, "grad_norm": 8.317174911499023, "learning_rate": 1.951912568306011e-05, "loss": 2.4277, "step": 893 }, { "epoch": 2.9311475409836065, "grad_norm": 6.914112567901611, "learning_rate": 1.9540983606557378e-05, "loss": 2.3286, "step": 894 }, { "epoch": 2.9344262295081966, "grad_norm": 7.907754898071289, "learning_rate": 1.9562841530054645e-05, "loss": 2.6577, "step": 895 }, { "epoch": 2.9377049180327868, "grad_norm": 9.579557418823242, "learning_rate": 1.9584699453551913e-05, "loss": 2.2798, "step": 896 }, { "epoch": 2.940983606557377, "grad_norm": 9.077560424804688, "learning_rate": 1.9606557377049183e-05, "loss": 2.5342, "step": 897 }, { "epoch": 2.944262295081967, "grad_norm": 11.75727367401123, "learning_rate": 1.962841530054645e-05, "loss": 2.666, "step": 898 }, { "epoch": 2.947540983606557, "grad_norm": 21.645462036132812, "learning_rate": 1.9650273224043718e-05, "loss": 2.7842, "step": 899 }, { "epoch": 2.9508196721311473, "grad_norm": 7.5250678062438965, "learning_rate": 1.9672131147540985e-05, "loss": 2.7212, "step": 900 }, { "epoch": 2.954098360655738, "grad_norm": 16.497955322265625, "learning_rate": 1.9693989071038253e-05, "loss": 2.5391, "step": 901 }, { "epoch": 2.957377049180328, "grad_norm": 6.484947204589844, "learning_rate": 1.971584699453552e-05, "loss": 2.6221, "step": 902 }, { "epoch": 2.960655737704918, "grad_norm": 7.641224384307861, "learning_rate": 1.973770491803279e-05, "loss": 2.6489, "step": 903 }, { "epoch": 2.963934426229508, "grad_norm": 9.279112815856934, "learning_rate": 1.9759562841530054e-05, "loss": 2.583, "step": 904 }, { "epoch": 2.9672131147540983, "grad_norm": 6.130147457122803, "learning_rate": 1.9781420765027325e-05, "loss": 2.5859, "step": 905 }, { "epoch": 2.9704918032786884, "grad_norm": 6.337494373321533, "learning_rate": 1.9803278688524592e-05, "loss": 2.5186, "step": 906 }, { "epoch": 2.9737704918032786, "grad_norm": 6.143677711486816, "learning_rate": 1.982513661202186e-05, "loss": 2.3979, "step": 907 }, { "epoch": 2.9770491803278687, "grad_norm": 23.765522003173828, "learning_rate": 1.9846994535519127e-05, "loss": 2.5098, "step": 908 }, { "epoch": 2.9803278688524593, "grad_norm": 9.835738182067871, "learning_rate": 1.9868852459016394e-05, "loss": 2.3691, "step": 909 }, { "epoch": 2.9836065573770494, "grad_norm": 9.174903869628906, "learning_rate": 1.9890710382513662e-05, "loss": 2.5332, "step": 910 }, { "epoch": 2.9868852459016395, "grad_norm": 7.767894268035889, "learning_rate": 1.9912568306010932e-05, "loss": 2.5986, "step": 911 }, { "epoch": 2.9901639344262296, "grad_norm": 8.067703247070312, "learning_rate": 1.99344262295082e-05, "loss": 2.4888, "step": 912 }, { "epoch": 2.9934426229508198, "grad_norm": 8.40115737915039, "learning_rate": 1.9956284153005464e-05, "loss": 2.731, "step": 913 }, { "epoch": 2.99672131147541, "grad_norm": 21.31868553161621, "learning_rate": 1.9978142076502734e-05, "loss": 2.5161, "step": 914 }, { "epoch": 3.0, "grad_norm": 10.031523704528809, "learning_rate": 2e-05, "loss": 2.6074, "step": 915 }, { "epoch": 3.00327868852459, "grad_norm": 9.995223999023438, "learning_rate": 1.9999999943619805e-05, "loss": 2.5352, "step": 916 }, { "epoch": 3.0065573770491802, "grad_norm": 8.516386032104492, "learning_rate": 1.9999999774479207e-05, "loss": 2.6079, "step": 917 }, { "epoch": 3.0098360655737704, "grad_norm": 6.965017318725586, "learning_rate": 1.9999999492578216e-05, "loss": 2.7192, "step": 918 }, { "epoch": 3.0131147540983605, "grad_norm": 9.606278419494629, "learning_rate": 1.9999999097916835e-05, "loss": 2.5664, "step": 919 }, { "epoch": 3.0163934426229506, "grad_norm": 8.07213020324707, "learning_rate": 1.9999998590495066e-05, "loss": 2.5283, "step": 920 }, { "epoch": 3.019672131147541, "grad_norm": 7.7616353034973145, "learning_rate": 1.9999997970312918e-05, "loss": 2.4185, "step": 921 }, { "epoch": 3.0229508196721313, "grad_norm": 8.083903312683105, "learning_rate": 1.999999723737039e-05, "loss": 2.5254, "step": 922 }, { "epoch": 3.0262295081967214, "grad_norm": 9.746734619140625, "learning_rate": 1.9999996391667497e-05, "loss": 2.499, "step": 923 }, { "epoch": 3.0295081967213116, "grad_norm": 9.940817832946777, "learning_rate": 1.999999543320425e-05, "loss": 2.3179, "step": 924 }, { "epoch": 3.0327868852459017, "grad_norm": 6.060477256774902, "learning_rate": 1.9999994361980657e-05, "loss": 2.3628, "step": 925 }, { "epoch": 3.036065573770492, "grad_norm": 8.716836929321289, "learning_rate": 1.999999317799673e-05, "loss": 2.6406, "step": 926 }, { "epoch": 3.039344262295082, "grad_norm": 6.547914981842041, "learning_rate": 1.9999991881252482e-05, "loss": 2.4336, "step": 927 }, { "epoch": 3.042622950819672, "grad_norm": 9.41111946105957, "learning_rate": 1.9999990471747926e-05, "loss": 2.4995, "step": 928 }, { "epoch": 3.045901639344262, "grad_norm": 7.561422824859619, "learning_rate": 1.9999988949483082e-05, "loss": 2.2275, "step": 929 }, { "epoch": 3.0491803278688523, "grad_norm": 10.108428001403809, "learning_rate": 1.9999987314457966e-05, "loss": 2.4365, "step": 930 }, { "epoch": 3.0524590163934424, "grad_norm": 6.377636909484863, "learning_rate": 1.9999985566672594e-05, "loss": 2.5239, "step": 931 }, { "epoch": 3.055737704918033, "grad_norm": 6.621293067932129, "learning_rate": 1.9999983706126985e-05, "loss": 2.5215, "step": 932 }, { "epoch": 3.059016393442623, "grad_norm": 6.998989582061768, "learning_rate": 1.999998173282117e-05, "loss": 2.4868, "step": 933 }, { "epoch": 3.0622950819672132, "grad_norm": 5.268550872802734, "learning_rate": 1.9999979646755155e-05, "loss": 2.4409, "step": 934 }, { "epoch": 3.0655737704918034, "grad_norm": 36.84987258911133, "learning_rate": 1.9999977447928978e-05, "loss": 2.4966, "step": 935 }, { "epoch": 3.0688524590163935, "grad_norm": 10.627116203308105, "learning_rate": 1.9999975136342655e-05, "loss": 2.4155, "step": 936 }, { "epoch": 3.0721311475409836, "grad_norm": 9.428391456604004, "learning_rate": 1.9999972711996216e-05, "loss": 2.4375, "step": 937 }, { "epoch": 3.0754098360655737, "grad_norm": 6.660289287567139, "learning_rate": 1.999997017488969e-05, "loss": 2.4204, "step": 938 }, { "epoch": 3.078688524590164, "grad_norm": 7.827796459197998, "learning_rate": 1.9999967525023098e-05, "loss": 2.2866, "step": 939 }, { "epoch": 3.081967213114754, "grad_norm": 7.629360198974609, "learning_rate": 1.9999964762396476e-05, "loss": 2.3779, "step": 940 }, { "epoch": 3.085245901639344, "grad_norm": 6.219681262969971, "learning_rate": 1.9999961887009855e-05, "loss": 2.3442, "step": 941 }, { "epoch": 3.088524590163934, "grad_norm": 8.427523612976074, "learning_rate": 1.9999958898863266e-05, "loss": 2.3984, "step": 942 }, { "epoch": 3.091803278688525, "grad_norm": 10.383281707763672, "learning_rate": 1.9999955797956744e-05, "loss": 2.4395, "step": 943 }, { "epoch": 3.095081967213115, "grad_norm": 17.950664520263672, "learning_rate": 1.9999952584290324e-05, "loss": 2.5776, "step": 944 }, { "epoch": 3.098360655737705, "grad_norm": 11.309504508972168, "learning_rate": 1.999994925786404e-05, "loss": 2.3506, "step": 945 }, { "epoch": 3.101639344262295, "grad_norm": 6.472440719604492, "learning_rate": 1.999994581867793e-05, "loss": 2.4785, "step": 946 }, { "epoch": 3.1049180327868853, "grad_norm": 8.345820426940918, "learning_rate": 1.9999942266732037e-05, "loss": 2.6416, "step": 947 }, { "epoch": 3.1081967213114754, "grad_norm": 8.925543785095215, "learning_rate": 1.9999938602026392e-05, "loss": 2.4619, "step": 948 }, { "epoch": 3.1114754098360655, "grad_norm": 7.611775875091553, "learning_rate": 1.9999934824561046e-05, "loss": 2.4277, "step": 949 }, { "epoch": 3.1147540983606556, "grad_norm": 8.05739974975586, "learning_rate": 1.9999930934336037e-05, "loss": 2.4272, "step": 950 }, { "epoch": 3.1180327868852458, "grad_norm": 9.965254783630371, "learning_rate": 1.9999926931351407e-05, "loss": 2.2769, "step": 951 }, { "epoch": 3.121311475409836, "grad_norm": 8.657100677490234, "learning_rate": 1.9999922815607203e-05, "loss": 2.4971, "step": 952 }, { "epoch": 3.1245901639344265, "grad_norm": 18.538455963134766, "learning_rate": 1.9999918587103476e-05, "loss": 2.2993, "step": 953 }, { "epoch": 3.1278688524590166, "grad_norm": 7.334896564483643, "learning_rate": 1.9999914245840267e-05, "loss": 2.751, "step": 954 }, { "epoch": 3.1311475409836067, "grad_norm": 6.731992244720459, "learning_rate": 1.999990979181763e-05, "loss": 2.1953, "step": 955 }, { "epoch": 3.134426229508197, "grad_norm": 8.333086013793945, "learning_rate": 1.999990522503561e-05, "loss": 2.5176, "step": 956 }, { "epoch": 3.137704918032787, "grad_norm": 7.162135601043701, "learning_rate": 1.999990054549426e-05, "loss": 2.5571, "step": 957 }, { "epoch": 3.140983606557377, "grad_norm": 6.375207424163818, "learning_rate": 1.9999895753193638e-05, "loss": 2.5522, "step": 958 }, { "epoch": 3.144262295081967, "grad_norm": 9.171944618225098, "learning_rate": 1.9999890848133793e-05, "loss": 2.3853, "step": 959 }, { "epoch": 3.1475409836065573, "grad_norm": 8.941956520080566, "learning_rate": 1.999988583031478e-05, "loss": 2.7109, "step": 960 }, { "epoch": 3.1508196721311474, "grad_norm": 6.7613420486450195, "learning_rate": 1.999988069973666e-05, "loss": 2.4316, "step": 961 }, { "epoch": 3.1540983606557376, "grad_norm": 6.699718475341797, "learning_rate": 1.9999875456399485e-05, "loss": 2.5791, "step": 962 }, { "epoch": 3.1573770491803277, "grad_norm": 7.5721354484558105, "learning_rate": 1.9999870100303317e-05, "loss": 2.3169, "step": 963 }, { "epoch": 3.160655737704918, "grad_norm": 6.877976417541504, "learning_rate": 1.9999864631448215e-05, "loss": 2.4297, "step": 964 }, { "epoch": 3.1639344262295084, "grad_norm": 6.096713542938232, "learning_rate": 1.9999859049834244e-05, "loss": 2.5244, "step": 965 }, { "epoch": 3.1672131147540985, "grad_norm": 8.100872039794922, "learning_rate": 1.999985335546147e-05, "loss": 2.3008, "step": 966 }, { "epoch": 3.1704918032786886, "grad_norm": 11.254450798034668, "learning_rate": 1.9999847548329943e-05, "loss": 2.3032, "step": 967 }, { "epoch": 3.1737704918032787, "grad_norm": 11.253681182861328, "learning_rate": 1.999984162843974e-05, "loss": 2.5161, "step": 968 }, { "epoch": 3.177049180327869, "grad_norm": 9.147894859313965, "learning_rate": 1.9999835595790927e-05, "loss": 2.4053, "step": 969 }, { "epoch": 3.180327868852459, "grad_norm": 9.08027172088623, "learning_rate": 1.9999829450383573e-05, "loss": 2.3579, "step": 970 }, { "epoch": 3.183606557377049, "grad_norm": 9.60564136505127, "learning_rate": 1.9999823192217743e-05, "loss": 2.4746, "step": 971 }, { "epoch": 3.1868852459016392, "grad_norm": 7.336839199066162, "learning_rate": 1.9999816821293508e-05, "loss": 2.3403, "step": 972 }, { "epoch": 3.1901639344262294, "grad_norm": 8.972092628479004, "learning_rate": 1.999981033761094e-05, "loss": 2.5938, "step": 973 }, { "epoch": 3.1934426229508195, "grad_norm": 9.308574676513672, "learning_rate": 1.9999803741170115e-05, "loss": 2.5312, "step": 974 }, { "epoch": 3.19672131147541, "grad_norm": 10.380254745483398, "learning_rate": 1.9999797031971106e-05, "loss": 2.7192, "step": 975 }, { "epoch": 3.2, "grad_norm": 10.565759658813477, "learning_rate": 1.999979021001399e-05, "loss": 2.4209, "step": 976 }, { "epoch": 3.2032786885245903, "grad_norm": 10.476936340332031, "learning_rate": 1.9999783275298838e-05, "loss": 2.4385, "step": 977 }, { "epoch": 3.2065573770491804, "grad_norm": 6.81144380569458, "learning_rate": 1.9999776227825736e-05, "loss": 2.4746, "step": 978 }, { "epoch": 3.2098360655737705, "grad_norm": 9.449687004089355, "learning_rate": 1.999976906759476e-05, "loss": 2.5005, "step": 979 }, { "epoch": 3.2131147540983607, "grad_norm": 6.627106189727783, "learning_rate": 1.9999761794605986e-05, "loss": 2.6304, "step": 980 }, { "epoch": 3.216393442622951, "grad_norm": 7.750405788421631, "learning_rate": 1.9999754408859506e-05, "loss": 2.5181, "step": 981 }, { "epoch": 3.219672131147541, "grad_norm": 7.191287040710449, "learning_rate": 1.9999746910355396e-05, "loss": 2.5439, "step": 982 }, { "epoch": 3.222950819672131, "grad_norm": 8.557252883911133, "learning_rate": 1.999973929909374e-05, "loss": 2.3633, "step": 983 }, { "epoch": 3.226229508196721, "grad_norm": 29.899898529052734, "learning_rate": 1.999973157507463e-05, "loss": 2.3345, "step": 984 }, { "epoch": 3.2295081967213113, "grad_norm": 7.726943016052246, "learning_rate": 1.9999723738298146e-05, "loss": 2.5679, "step": 985 }, { "epoch": 3.2327868852459014, "grad_norm": 7.306193828582764, "learning_rate": 1.9999715788764384e-05, "loss": 2.3589, "step": 986 }, { "epoch": 3.236065573770492, "grad_norm": 7.6648173332214355, "learning_rate": 1.9999707726473427e-05, "loss": 2.4873, "step": 987 }, { "epoch": 3.239344262295082, "grad_norm": 12.803098678588867, "learning_rate": 1.9999699551425365e-05, "loss": 2.5454, "step": 988 }, { "epoch": 3.2426229508196722, "grad_norm": 7.80911111831665, "learning_rate": 1.99996912636203e-05, "loss": 2.5361, "step": 989 }, { "epoch": 3.2459016393442623, "grad_norm": 7.41262149810791, "learning_rate": 1.9999682863058314e-05, "loss": 2.4102, "step": 990 }, { "epoch": 3.2491803278688525, "grad_norm": 13.493359565734863, "learning_rate": 1.9999674349739507e-05, "loss": 2.2341, "step": 991 }, { "epoch": 3.2524590163934426, "grad_norm": 6.679170608520508, "learning_rate": 1.9999665723663976e-05, "loss": 2.5811, "step": 992 }, { "epoch": 3.2557377049180327, "grad_norm": 7.507174015045166, "learning_rate": 1.9999656984831815e-05, "loss": 2.4414, "step": 993 }, { "epoch": 3.259016393442623, "grad_norm": 24.6840763092041, "learning_rate": 1.999964813324313e-05, "loss": 2.3608, "step": 994 }, { "epoch": 3.262295081967213, "grad_norm": 8.428903579711914, "learning_rate": 1.999963916889801e-05, "loss": 2.3027, "step": 995 }, { "epoch": 3.265573770491803, "grad_norm": 9.520085334777832, "learning_rate": 1.9999630091796565e-05, "loss": 2.3564, "step": 996 }, { "epoch": 3.2688524590163937, "grad_norm": 8.907970428466797, "learning_rate": 1.999962090193889e-05, "loss": 2.6221, "step": 997 }, { "epoch": 3.2721311475409838, "grad_norm": 9.700804710388184, "learning_rate": 1.9999611599325095e-05, "loss": 2.5288, "step": 998 }, { "epoch": 3.275409836065574, "grad_norm": 7.275920391082764, "learning_rate": 1.999960218395528e-05, "loss": 2.5674, "step": 999 }, { "epoch": 3.278688524590164, "grad_norm": 13.916285514831543, "learning_rate": 1.9999592655829553e-05, "loss": 2.3843, "step": 1000 }, { "epoch": 3.281967213114754, "grad_norm": 7.463507175445557, "learning_rate": 1.9999583014948025e-05, "loss": 2.5908, "step": 1001 }, { "epoch": 3.2852459016393443, "grad_norm": 10.14920425415039, "learning_rate": 1.99995732613108e-05, "loss": 2.1753, "step": 1002 }, { "epoch": 3.2885245901639344, "grad_norm": 8.16128158569336, "learning_rate": 1.9999563394917988e-05, "loss": 2.6094, "step": 1003 }, { "epoch": 3.2918032786885245, "grad_norm": 6.455923080444336, "learning_rate": 1.9999553415769702e-05, "loss": 2.3306, "step": 1004 }, { "epoch": 3.2950819672131146, "grad_norm": 7.811330318450928, "learning_rate": 1.9999543323866058e-05, "loss": 2.3716, "step": 1005 }, { "epoch": 3.2983606557377048, "grad_norm": 8.622511863708496, "learning_rate": 1.9999533119207162e-05, "loss": 2.3511, "step": 1006 }, { "epoch": 3.301639344262295, "grad_norm": 8.587008476257324, "learning_rate": 1.9999522801793135e-05, "loss": 2.5488, "step": 1007 }, { "epoch": 3.304918032786885, "grad_norm": 8.162694931030273, "learning_rate": 1.999951237162409e-05, "loss": 2.3521, "step": 1008 }, { "epoch": 3.3081967213114756, "grad_norm": 8.920218467712402, "learning_rate": 1.999950182870015e-05, "loss": 2.353, "step": 1009 }, { "epoch": 3.3114754098360657, "grad_norm": 10.420265197753906, "learning_rate": 1.9999491173021427e-05, "loss": 2.4561, "step": 1010 }, { "epoch": 3.314754098360656, "grad_norm": 8.271897315979004, "learning_rate": 1.9999480404588044e-05, "loss": 2.2388, "step": 1011 }, { "epoch": 3.318032786885246, "grad_norm": 7.366892337799072, "learning_rate": 1.9999469523400122e-05, "loss": 2.3755, "step": 1012 }, { "epoch": 3.321311475409836, "grad_norm": 7.309403896331787, "learning_rate": 1.9999458529457787e-05, "loss": 2.4126, "step": 1013 }, { "epoch": 3.324590163934426, "grad_norm": 11.979930877685547, "learning_rate": 1.999944742276116e-05, "loss": 2.269, "step": 1014 }, { "epoch": 3.3278688524590163, "grad_norm": 7.266773700714111, "learning_rate": 1.9999436203310366e-05, "loss": 2.2324, "step": 1015 }, { "epoch": 3.3311475409836064, "grad_norm": 8.309161186218262, "learning_rate": 1.9999424871105528e-05, "loss": 2.5552, "step": 1016 }, { "epoch": 3.3344262295081966, "grad_norm": 7.265388011932373, "learning_rate": 1.9999413426146785e-05, "loss": 2.3931, "step": 1017 }, { "epoch": 3.337704918032787, "grad_norm": 9.180120468139648, "learning_rate": 1.9999401868434254e-05, "loss": 2.4814, "step": 1018 }, { "epoch": 3.3409836065573773, "grad_norm": 8.503789901733398, "learning_rate": 1.999939019796807e-05, "loss": 2.2949, "step": 1019 }, { "epoch": 3.3442622950819674, "grad_norm": 11.841552734375, "learning_rate": 1.9999378414748365e-05, "loss": 2.7305, "step": 1020 }, { "epoch": 3.3475409836065575, "grad_norm": 11.320070266723633, "learning_rate": 1.9999366518775273e-05, "loss": 2.4741, "step": 1021 }, { "epoch": 3.3508196721311476, "grad_norm": 9.030987739562988, "learning_rate": 1.9999354510048924e-05, "loss": 2.3672, "step": 1022 }, { "epoch": 3.3540983606557377, "grad_norm": 7.552350997924805, "learning_rate": 1.999934238856946e-05, "loss": 2.5444, "step": 1023 }, { "epoch": 3.357377049180328, "grad_norm": 10.241394996643066, "learning_rate": 1.999933015433701e-05, "loss": 2.4673, "step": 1024 }, { "epoch": 3.360655737704918, "grad_norm": 8.66174030303955, "learning_rate": 1.999931780735172e-05, "loss": 2.353, "step": 1025 }, { "epoch": 3.363934426229508, "grad_norm": 5.579679012298584, "learning_rate": 1.9999305347613723e-05, "loss": 2.4067, "step": 1026 }, { "epoch": 3.3672131147540982, "grad_norm": 7.361496448516846, "learning_rate": 1.9999292775123162e-05, "loss": 2.5127, "step": 1027 }, { "epoch": 3.3704918032786884, "grad_norm": 5.842492580413818, "learning_rate": 1.999928008988018e-05, "loss": 2.7388, "step": 1028 }, { "epoch": 3.3737704918032785, "grad_norm": 8.191801071166992, "learning_rate": 1.9999267291884914e-05, "loss": 2.4316, "step": 1029 }, { "epoch": 3.3770491803278686, "grad_norm": 8.335641860961914, "learning_rate": 1.9999254381137515e-05, "loss": 2.4609, "step": 1030 }, { "epoch": 3.380327868852459, "grad_norm": 10.558150291442871, "learning_rate": 1.9999241357638126e-05, "loss": 2.7617, "step": 1031 }, { "epoch": 3.3836065573770493, "grad_norm": 9.2302827835083, "learning_rate": 1.9999228221386894e-05, "loss": 2.6421, "step": 1032 }, { "epoch": 3.3868852459016394, "grad_norm": 7.171724796295166, "learning_rate": 1.999921497238397e-05, "loss": 2.3042, "step": 1033 }, { "epoch": 3.3901639344262295, "grad_norm": 6.84669303894043, "learning_rate": 1.9999201610629497e-05, "loss": 2.4541, "step": 1034 }, { "epoch": 3.3934426229508197, "grad_norm": 8.041687965393066, "learning_rate": 1.999918813612363e-05, "loss": 2.3569, "step": 1035 }, { "epoch": 3.39672131147541, "grad_norm": 13.27484130859375, "learning_rate": 1.999917454886652e-05, "loss": 2.4722, "step": 1036 }, { "epoch": 3.4, "grad_norm": 8.716421127319336, "learning_rate": 1.999916084885832e-05, "loss": 2.3086, "step": 1037 }, { "epoch": 3.40327868852459, "grad_norm": 9.504079818725586, "learning_rate": 1.9999147036099184e-05, "loss": 2.4209, "step": 1038 }, { "epoch": 3.40655737704918, "grad_norm": 9.414859771728516, "learning_rate": 1.9999133110589272e-05, "loss": 2.3813, "step": 1039 }, { "epoch": 3.4098360655737707, "grad_norm": 12.846217155456543, "learning_rate": 1.9999119072328738e-05, "loss": 2.5347, "step": 1040 }, { "epoch": 3.413114754098361, "grad_norm": 7.160704612731934, "learning_rate": 1.9999104921317737e-05, "loss": 2.6172, "step": 1041 }, { "epoch": 3.416393442622951, "grad_norm": 6.143978595733643, "learning_rate": 1.9999090657556433e-05, "loss": 2.4043, "step": 1042 }, { "epoch": 3.419672131147541, "grad_norm": 7.189377307891846, "learning_rate": 1.9999076281044984e-05, "loss": 2.5664, "step": 1043 }, { "epoch": 3.422950819672131, "grad_norm": 6.621867656707764, "learning_rate": 1.9999061791783556e-05, "loss": 2.5376, "step": 1044 }, { "epoch": 3.4262295081967213, "grad_norm": 10.938919067382812, "learning_rate": 1.9999047189772305e-05, "loss": 2.4785, "step": 1045 }, { "epoch": 3.4295081967213115, "grad_norm": 6.16029691696167, "learning_rate": 1.9999032475011408e-05, "loss": 2.4458, "step": 1046 }, { "epoch": 3.4327868852459016, "grad_norm": 7.892474174499512, "learning_rate": 1.9999017647501017e-05, "loss": 2.5166, "step": 1047 }, { "epoch": 3.4360655737704917, "grad_norm": 14.777018547058105, "learning_rate": 1.999900270724131e-05, "loss": 2.312, "step": 1048 }, { "epoch": 3.439344262295082, "grad_norm": 8.833479881286621, "learning_rate": 1.999898765423245e-05, "loss": 2.3145, "step": 1049 }, { "epoch": 3.442622950819672, "grad_norm": 7.869678020477295, "learning_rate": 1.9998972488474607e-05, "loss": 2.4365, "step": 1050 }, { "epoch": 3.445901639344262, "grad_norm": 6.0511651039123535, "learning_rate": 1.9998957209967953e-05, "loss": 2.4531, "step": 1051 }, { "epoch": 3.4491803278688526, "grad_norm": 5.253143310546875, "learning_rate": 1.999894181871266e-05, "loss": 2.3525, "step": 1052 }, { "epoch": 3.4524590163934428, "grad_norm": 7.399739742279053, "learning_rate": 1.99989263147089e-05, "loss": 2.4292, "step": 1053 }, { "epoch": 3.455737704918033, "grad_norm": 9.376023292541504, "learning_rate": 1.9998910697956853e-05, "loss": 2.4224, "step": 1054 }, { "epoch": 3.459016393442623, "grad_norm": 6.365492820739746, "learning_rate": 1.999889496845669e-05, "loss": 2.4297, "step": 1055 }, { "epoch": 3.462295081967213, "grad_norm": 6.380743026733398, "learning_rate": 1.999887912620859e-05, "loss": 2.5034, "step": 1056 }, { "epoch": 3.4655737704918033, "grad_norm": 7.783343315124512, "learning_rate": 1.999886317121273e-05, "loss": 2.3813, "step": 1057 }, { "epoch": 3.4688524590163934, "grad_norm": 15.896947860717773, "learning_rate": 1.9998847103469294e-05, "loss": 2.4541, "step": 1058 }, { "epoch": 3.4721311475409835, "grad_norm": 6.497613430023193, "learning_rate": 1.999883092297846e-05, "loss": 2.2061, "step": 1059 }, { "epoch": 3.4754098360655736, "grad_norm": 8.592870712280273, "learning_rate": 1.999881462974041e-05, "loss": 2.3047, "step": 1060 }, { "epoch": 3.4786885245901638, "grad_norm": 6.385178565979004, "learning_rate": 1.999879822375533e-05, "loss": 2.2905, "step": 1061 }, { "epoch": 3.4819672131147543, "grad_norm": 7.11569881439209, "learning_rate": 1.9998781705023405e-05, "loss": 2.3091, "step": 1062 }, { "epoch": 3.4852459016393444, "grad_norm": 7.5933942794799805, "learning_rate": 1.9998765073544818e-05, "loss": 2.6611, "step": 1063 }, { "epoch": 3.4885245901639346, "grad_norm": 6.599364757537842, "learning_rate": 1.999874832931976e-05, "loss": 2.4048, "step": 1064 }, { "epoch": 3.4918032786885247, "grad_norm": 8.367451667785645, "learning_rate": 1.9998731472348418e-05, "loss": 2.3823, "step": 1065 }, { "epoch": 3.495081967213115, "grad_norm": 9.28726863861084, "learning_rate": 1.999871450263098e-05, "loss": 2.5542, "step": 1066 }, { "epoch": 3.498360655737705, "grad_norm": 10.20995044708252, "learning_rate": 1.9998697420167645e-05, "loss": 2.4111, "step": 1067 }, { "epoch": 3.501639344262295, "grad_norm": 5.915597915649414, "learning_rate": 1.99986802249586e-05, "loss": 2.4375, "step": 1068 }, { "epoch": 3.504918032786885, "grad_norm": 5.896823883056641, "learning_rate": 1.9998662917004033e-05, "loss": 2.4922, "step": 1069 }, { "epoch": 3.5081967213114753, "grad_norm": 10.440691947937012, "learning_rate": 1.999864549630415e-05, "loss": 2.4263, "step": 1070 }, { "epoch": 3.5114754098360654, "grad_norm": 8.368537902832031, "learning_rate": 1.9998627962859145e-05, "loss": 2.2935, "step": 1071 }, { "epoch": 3.5147540983606556, "grad_norm": 5.629356861114502, "learning_rate": 1.9998610316669213e-05, "loss": 2.3022, "step": 1072 }, { "epoch": 3.5180327868852457, "grad_norm": 7.426488876342773, "learning_rate": 1.9998592557734553e-05, "loss": 2.312, "step": 1073 }, { "epoch": 3.521311475409836, "grad_norm": 8.355572700500488, "learning_rate": 1.9998574686055366e-05, "loss": 2.4678, "step": 1074 }, { "epoch": 3.5245901639344264, "grad_norm": 8.440521240234375, "learning_rate": 1.9998556701631852e-05, "loss": 2.5312, "step": 1075 }, { "epoch": 3.5278688524590165, "grad_norm": 9.363455772399902, "learning_rate": 1.9998538604464218e-05, "loss": 2.4858, "step": 1076 }, { "epoch": 3.5311475409836066, "grad_norm": 7.234804630279541, "learning_rate": 1.9998520394552663e-05, "loss": 2.2422, "step": 1077 }, { "epoch": 3.5344262295081967, "grad_norm": 10.777366638183594, "learning_rate": 1.9998502071897397e-05, "loss": 2.6904, "step": 1078 }, { "epoch": 3.537704918032787, "grad_norm": 32.75130081176758, "learning_rate": 1.999848363649862e-05, "loss": 2.3452, "step": 1079 }, { "epoch": 3.540983606557377, "grad_norm": 6.006423473358154, "learning_rate": 1.999846508835655e-05, "loss": 2.3569, "step": 1080 }, { "epoch": 3.544262295081967, "grad_norm": 7.3800506591796875, "learning_rate": 1.9998446427471386e-05, "loss": 2.3628, "step": 1081 }, { "epoch": 3.5475409836065572, "grad_norm": 7.955408096313477, "learning_rate": 1.9998427653843345e-05, "loss": 2.354, "step": 1082 }, { "epoch": 3.550819672131148, "grad_norm": 8.108278274536133, "learning_rate": 1.9998408767472633e-05, "loss": 2.5503, "step": 1083 }, { "epoch": 3.554098360655738, "grad_norm": 9.310647010803223, "learning_rate": 1.9998389768359468e-05, "loss": 2.332, "step": 1084 }, { "epoch": 3.557377049180328, "grad_norm": 16.077112197875977, "learning_rate": 1.9998370656504066e-05, "loss": 2.3843, "step": 1085 }, { "epoch": 3.560655737704918, "grad_norm": 7.632185935974121, "learning_rate": 1.9998351431906637e-05, "loss": 2.3975, "step": 1086 }, { "epoch": 3.5639344262295083, "grad_norm": 8.281712532043457, "learning_rate": 1.99983320945674e-05, "loss": 2.6553, "step": 1087 }, { "epoch": 3.5672131147540984, "grad_norm": 5.534873962402344, "learning_rate": 1.9998312644486574e-05, "loss": 2.4048, "step": 1088 }, { "epoch": 3.5704918032786885, "grad_norm": 6.574867248535156, "learning_rate": 1.9998293081664376e-05, "loss": 2.4248, "step": 1089 }, { "epoch": 3.5737704918032787, "grad_norm": 8.836042404174805, "learning_rate": 1.9998273406101026e-05, "loss": 2.5898, "step": 1090 }, { "epoch": 3.577049180327869, "grad_norm": 8.6514310836792, "learning_rate": 1.999825361779675e-05, "loss": 2.2856, "step": 1091 }, { "epoch": 3.580327868852459, "grad_norm": 7.838352680206299, "learning_rate": 1.9998233716751766e-05, "loss": 2.5039, "step": 1092 }, { "epoch": 3.583606557377049, "grad_norm": 5.683335781097412, "learning_rate": 1.9998213702966307e-05, "loss": 2.4453, "step": 1093 }, { "epoch": 3.586885245901639, "grad_norm": 9.197203636169434, "learning_rate": 1.999819357644059e-05, "loss": 2.4399, "step": 1094 }, { "epoch": 3.5901639344262293, "grad_norm": 7.916896820068359, "learning_rate": 1.999817333717484e-05, "loss": 2.3652, "step": 1095 }, { "epoch": 3.5934426229508194, "grad_norm": 6.169180393218994, "learning_rate": 1.99981529851693e-05, "loss": 2.5835, "step": 1096 }, { "epoch": 3.59672131147541, "grad_norm": 6.625876426696777, "learning_rate": 1.9998132520424183e-05, "loss": 2.2397, "step": 1097 }, { "epoch": 3.6, "grad_norm": 7.764813423156738, "learning_rate": 1.9998111942939727e-05, "loss": 2.3267, "step": 1098 }, { "epoch": 3.60327868852459, "grad_norm": 6.053173542022705, "learning_rate": 1.9998091252716166e-05, "loss": 2.2905, "step": 1099 }, { "epoch": 3.6065573770491803, "grad_norm": 7.0031843185424805, "learning_rate": 1.9998070449753728e-05, "loss": 2.4507, "step": 1100 }, { "epoch": 3.6098360655737705, "grad_norm": 12.14484691619873, "learning_rate": 1.999804953405265e-05, "loss": 2.5063, "step": 1101 }, { "epoch": 3.6131147540983606, "grad_norm": 6.646005630493164, "learning_rate": 1.999802850561317e-05, "loss": 2.4956, "step": 1102 }, { "epoch": 3.6163934426229507, "grad_norm": 7.419061660766602, "learning_rate": 1.9998007364435522e-05, "loss": 2.3794, "step": 1103 }, { "epoch": 3.619672131147541, "grad_norm": 10.460023880004883, "learning_rate": 1.9997986110519947e-05, "loss": 2.3579, "step": 1104 }, { "epoch": 3.6229508196721314, "grad_norm": 12.42957592010498, "learning_rate": 1.999796474386668e-05, "loss": 2.5615, "step": 1105 }, { "epoch": 3.6262295081967215, "grad_norm": 5.984172821044922, "learning_rate": 1.9997943264475973e-05, "loss": 2.5942, "step": 1106 }, { "epoch": 3.6295081967213116, "grad_norm": 6.446585655212402, "learning_rate": 1.9997921672348053e-05, "loss": 2.3525, "step": 1107 }, { "epoch": 3.6327868852459018, "grad_norm": 8.207179069519043, "learning_rate": 1.999789996748317e-05, "loss": 2.2544, "step": 1108 }, { "epoch": 3.636065573770492, "grad_norm": 5.058564186096191, "learning_rate": 1.9997878149881576e-05, "loss": 2.5151, "step": 1109 }, { "epoch": 3.639344262295082, "grad_norm": 9.863587379455566, "learning_rate": 1.9997856219543506e-05, "loss": 2.7139, "step": 1110 }, { "epoch": 3.642622950819672, "grad_norm": 6.132303237915039, "learning_rate": 1.9997834176469214e-05, "loss": 2.415, "step": 1111 }, { "epoch": 3.6459016393442623, "grad_norm": 6.369599342346191, "learning_rate": 1.9997812020658947e-05, "loss": 2.4473, "step": 1112 }, { "epoch": 3.6491803278688524, "grad_norm": 8.094077110290527, "learning_rate": 1.999778975211295e-05, "loss": 2.2788, "step": 1113 }, { "epoch": 3.6524590163934425, "grad_norm": 5.473631381988525, "learning_rate": 1.9997767370831485e-05, "loss": 2.3228, "step": 1114 }, { "epoch": 3.6557377049180326, "grad_norm": 7.550333499908447, "learning_rate": 1.9997744876814792e-05, "loss": 2.4038, "step": 1115 }, { "epoch": 3.6590163934426227, "grad_norm": 5.632371425628662, "learning_rate": 1.9997722270063137e-05, "loss": 2.2681, "step": 1116 }, { "epoch": 3.662295081967213, "grad_norm": 13.69797134399414, "learning_rate": 1.9997699550576763e-05, "loss": 2.7207, "step": 1117 }, { "epoch": 3.6655737704918034, "grad_norm": 5.535505771636963, "learning_rate": 1.9997676718355935e-05, "loss": 2.5088, "step": 1118 }, { "epoch": 3.6688524590163936, "grad_norm": 7.623466968536377, "learning_rate": 1.9997653773400903e-05, "loss": 2.2778, "step": 1119 }, { "epoch": 3.6721311475409837, "grad_norm": 6.061447620391846, "learning_rate": 1.9997630715711932e-05, "loss": 2.4194, "step": 1120 }, { "epoch": 3.675409836065574, "grad_norm": 6.463170528411865, "learning_rate": 1.999760754528928e-05, "loss": 2.3228, "step": 1121 }, { "epoch": 3.678688524590164, "grad_norm": 8.384654998779297, "learning_rate": 1.9997584262133207e-05, "loss": 2.4824, "step": 1122 }, { "epoch": 3.681967213114754, "grad_norm": 7.249739170074463, "learning_rate": 1.9997560866243977e-05, "loss": 2.2534, "step": 1123 }, { "epoch": 3.685245901639344, "grad_norm": 7.049050331115723, "learning_rate": 1.999753735762185e-05, "loss": 2.4121, "step": 1124 }, { "epoch": 3.6885245901639343, "grad_norm": 8.85206413269043, "learning_rate": 1.99975137362671e-05, "loss": 2.4326, "step": 1125 }, { "epoch": 3.6918032786885244, "grad_norm": 10.46074104309082, "learning_rate": 1.9997490002179987e-05, "loss": 2.2837, "step": 1126 }, { "epoch": 3.695081967213115, "grad_norm": 5.4135308265686035, "learning_rate": 1.9997466155360777e-05, "loss": 2.6113, "step": 1127 }, { "epoch": 3.698360655737705, "grad_norm": 9.586560249328613, "learning_rate": 1.9997442195809742e-05, "loss": 2.5488, "step": 1128 }, { "epoch": 3.7016393442622952, "grad_norm": 6.485198497772217, "learning_rate": 1.9997418123527153e-05, "loss": 2.2012, "step": 1129 }, { "epoch": 3.7049180327868854, "grad_norm": 7.611371994018555, "learning_rate": 1.999739393851328e-05, "loss": 2.5142, "step": 1130 }, { "epoch": 3.7081967213114755, "grad_norm": 5.920197010040283, "learning_rate": 1.9997369640768395e-05, "loss": 2.394, "step": 1131 }, { "epoch": 3.7114754098360656, "grad_norm": 22.408918380737305, "learning_rate": 1.999734523029277e-05, "loss": 2.3623, "step": 1132 }, { "epoch": 3.7147540983606557, "grad_norm": 6.373462200164795, "learning_rate": 1.9997320707086686e-05, "loss": 2.4023, "step": 1133 }, { "epoch": 3.718032786885246, "grad_norm": 6.814894676208496, "learning_rate": 1.9997296071150417e-05, "loss": 2.394, "step": 1134 }, { "epoch": 3.721311475409836, "grad_norm": 5.0920281410217285, "learning_rate": 1.9997271322484237e-05, "loss": 2.3945, "step": 1135 }, { "epoch": 3.724590163934426, "grad_norm": 5.920422554016113, "learning_rate": 1.999724646108843e-05, "loss": 2.3008, "step": 1136 }, { "epoch": 3.7278688524590162, "grad_norm": 8.485121726989746, "learning_rate": 1.9997221486963276e-05, "loss": 2.4307, "step": 1137 }, { "epoch": 3.7311475409836063, "grad_norm": 11.153953552246094, "learning_rate": 1.9997196400109055e-05, "loss": 2.2783, "step": 1138 }, { "epoch": 3.7344262295081965, "grad_norm": 10.028952598571777, "learning_rate": 1.9997171200526048e-05, "loss": 2.2559, "step": 1139 }, { "epoch": 3.737704918032787, "grad_norm": 10.225560188293457, "learning_rate": 1.9997145888214542e-05, "loss": 2.4487, "step": 1140 }, { "epoch": 3.740983606557377, "grad_norm": 10.337909698486328, "learning_rate": 1.999712046317482e-05, "loss": 2.3433, "step": 1141 }, { "epoch": 3.7442622950819673, "grad_norm": 6.518843173980713, "learning_rate": 1.9997094925407173e-05, "loss": 2.4478, "step": 1142 }, { "epoch": 3.7475409836065574, "grad_norm": 6.479879856109619, "learning_rate": 1.9997069274911886e-05, "loss": 2.4136, "step": 1143 }, { "epoch": 3.7508196721311475, "grad_norm": 5.834664821624756, "learning_rate": 1.999704351168925e-05, "loss": 2.5532, "step": 1144 }, { "epoch": 3.7540983606557377, "grad_norm": 7.9476752281188965, "learning_rate": 1.9997017635739554e-05, "loss": 2.3813, "step": 1145 }, { "epoch": 3.7573770491803278, "grad_norm": 8.969605445861816, "learning_rate": 1.9996991647063085e-05, "loss": 2.7017, "step": 1146 }, { "epoch": 3.760655737704918, "grad_norm": 8.512678146362305, "learning_rate": 1.9996965545660145e-05, "loss": 2.3079, "step": 1147 }, { "epoch": 3.7639344262295085, "grad_norm": 9.875066757202148, "learning_rate": 1.999693933153102e-05, "loss": 2.3438, "step": 1148 }, { "epoch": 3.7672131147540986, "grad_norm": 6.277003765106201, "learning_rate": 1.9996913004676015e-05, "loss": 2.2886, "step": 1149 }, { "epoch": 3.7704918032786887, "grad_norm": 7.1254401206970215, "learning_rate": 1.9996886565095422e-05, "loss": 2.4224, "step": 1150 }, { "epoch": 3.773770491803279, "grad_norm": 6.433928966522217, "learning_rate": 1.9996860012789536e-05, "loss": 2.5664, "step": 1151 }, { "epoch": 3.777049180327869, "grad_norm": 4.682600498199463, "learning_rate": 1.9996833347758658e-05, "loss": 2.4263, "step": 1152 }, { "epoch": 3.780327868852459, "grad_norm": 6.9384260177612305, "learning_rate": 1.9996806570003095e-05, "loss": 2.4072, "step": 1153 }, { "epoch": 3.783606557377049, "grad_norm": 18.764249801635742, "learning_rate": 1.9996779679523143e-05, "loss": 2.4209, "step": 1154 }, { "epoch": 3.7868852459016393, "grad_norm": 6.79553747177124, "learning_rate": 1.99967526763191e-05, "loss": 2.3916, "step": 1155 }, { "epoch": 3.7901639344262295, "grad_norm": 13.520297050476074, "learning_rate": 1.999672556039128e-05, "loss": 2.2773, "step": 1156 }, { "epoch": 3.7934426229508196, "grad_norm": 6.87178897857666, "learning_rate": 1.999669833173999e-05, "loss": 2.2632, "step": 1157 }, { "epoch": 3.7967213114754097, "grad_norm": 8.055981636047363, "learning_rate": 1.9996670990365524e-05, "loss": 2.4761, "step": 1158 }, { "epoch": 3.8, "grad_norm": 6.30996036529541, "learning_rate": 1.9996643536268202e-05, "loss": 2.3125, "step": 1159 }, { "epoch": 3.80327868852459, "grad_norm": 9.853095054626465, "learning_rate": 1.9996615969448333e-05, "loss": 2.313, "step": 1160 }, { "epoch": 3.80655737704918, "grad_norm": 5.850662708282471, "learning_rate": 1.9996588289906223e-05, "loss": 2.2905, "step": 1161 }, { "epoch": 3.8098360655737706, "grad_norm": 5.985240459442139, "learning_rate": 1.9996560497642185e-05, "loss": 2.2363, "step": 1162 }, { "epoch": 3.8131147540983608, "grad_norm": 6.9253926277160645, "learning_rate": 1.9996532592656534e-05, "loss": 2.3496, "step": 1163 }, { "epoch": 3.816393442622951, "grad_norm": 6.70944881439209, "learning_rate": 1.9996504574949588e-05, "loss": 2.437, "step": 1164 }, { "epoch": 3.819672131147541, "grad_norm": 6.796156883239746, "learning_rate": 1.9996476444521656e-05, "loss": 2.3052, "step": 1165 }, { "epoch": 3.822950819672131, "grad_norm": 5.451990604400635, "learning_rate": 1.999644820137306e-05, "loss": 2.2573, "step": 1166 }, { "epoch": 3.8262295081967213, "grad_norm": 6.017702102661133, "learning_rate": 1.9996419845504113e-05, "loss": 2.2256, "step": 1167 }, { "epoch": 3.8295081967213114, "grad_norm": 5.4986653327941895, "learning_rate": 1.999639137691514e-05, "loss": 2.3721, "step": 1168 }, { "epoch": 3.8327868852459015, "grad_norm": 6.7978620529174805, "learning_rate": 1.999636279560646e-05, "loss": 2.29, "step": 1169 }, { "epoch": 3.836065573770492, "grad_norm": 7.975494861602783, "learning_rate": 1.99963341015784e-05, "loss": 2.4141, "step": 1170 }, { "epoch": 3.839344262295082, "grad_norm": 5.978869915008545, "learning_rate": 1.9996305294831275e-05, "loss": 2.3115, "step": 1171 }, { "epoch": 3.8426229508196723, "grad_norm": 6.416851043701172, "learning_rate": 1.9996276375365417e-05, "loss": 2.4834, "step": 1172 }, { "epoch": 3.8459016393442624, "grad_norm": 5.112527847290039, "learning_rate": 1.9996247343181147e-05, "loss": 2.4077, "step": 1173 }, { "epoch": 3.8491803278688526, "grad_norm": 6.061201572418213, "learning_rate": 1.9996218198278798e-05, "loss": 2.3262, "step": 1174 }, { "epoch": 3.8524590163934427, "grad_norm": 4.999220848083496, "learning_rate": 1.999618894065869e-05, "loss": 2.5596, "step": 1175 }, { "epoch": 3.855737704918033, "grad_norm": 18.825403213500977, "learning_rate": 1.9996159570321162e-05, "loss": 2.3237, "step": 1176 }, { "epoch": 3.859016393442623, "grad_norm": 5.590780735015869, "learning_rate": 1.9996130087266544e-05, "loss": 2.4072, "step": 1177 }, { "epoch": 3.862295081967213, "grad_norm": 7.2192230224609375, "learning_rate": 1.9996100491495164e-05, "loss": 2.3091, "step": 1178 }, { "epoch": 3.865573770491803, "grad_norm": 5.228376388549805, "learning_rate": 1.9996070783007354e-05, "loss": 2.3677, "step": 1179 }, { "epoch": 3.8688524590163933, "grad_norm": 8.350008010864258, "learning_rate": 1.9996040961803454e-05, "loss": 2.3428, "step": 1180 }, { "epoch": 3.8721311475409834, "grad_norm": 9.754035949707031, "learning_rate": 1.9996011027883803e-05, "loss": 2.3105, "step": 1181 }, { "epoch": 3.8754098360655735, "grad_norm": 5.624241828918457, "learning_rate": 1.999598098124873e-05, "loss": 2.3408, "step": 1182 }, { "epoch": 3.8786885245901637, "grad_norm": 11.661718368530273, "learning_rate": 1.999595082189858e-05, "loss": 2.4937, "step": 1183 }, { "epoch": 3.8819672131147542, "grad_norm": 6.570637226104736, "learning_rate": 1.999592054983369e-05, "loss": 2.5195, "step": 1184 }, { "epoch": 3.8852459016393444, "grad_norm": 8.556144714355469, "learning_rate": 1.9995890165054404e-05, "loss": 2.3574, "step": 1185 }, { "epoch": 3.8885245901639345, "grad_norm": 9.319220542907715, "learning_rate": 1.9995859667561063e-05, "loss": 2.4614, "step": 1186 }, { "epoch": 3.8918032786885246, "grad_norm": 4.269582271575928, "learning_rate": 1.9995829057354012e-05, "loss": 2.3701, "step": 1187 }, { "epoch": 3.8950819672131147, "grad_norm": 6.877078533172607, "learning_rate": 1.9995798334433595e-05, "loss": 2.2397, "step": 1188 }, { "epoch": 3.898360655737705, "grad_norm": 4.975828170776367, "learning_rate": 1.9995767498800158e-05, "loss": 2.5029, "step": 1189 }, { "epoch": 3.901639344262295, "grad_norm": 5.787050247192383, "learning_rate": 1.999573655045405e-05, "loss": 2.1938, "step": 1190 }, { "epoch": 3.904918032786885, "grad_norm": 11.26229476928711, "learning_rate": 1.999570548939562e-05, "loss": 2.1841, "step": 1191 }, { "epoch": 3.9081967213114757, "grad_norm": 5.866332530975342, "learning_rate": 1.9995674315625216e-05, "loss": 2.2495, "step": 1192 }, { "epoch": 3.911475409836066, "grad_norm": 5.527360916137695, "learning_rate": 1.999564302914319e-05, "loss": 2.3955, "step": 1193 }, { "epoch": 3.914754098360656, "grad_norm": 8.530866622924805, "learning_rate": 1.99956116299499e-05, "loss": 2.4116, "step": 1194 }, { "epoch": 3.918032786885246, "grad_norm": 7.49611234664917, "learning_rate": 1.9995580118045694e-05, "loss": 2.1685, "step": 1195 }, { "epoch": 3.921311475409836, "grad_norm": 6.936310291290283, "learning_rate": 1.999554849343093e-05, "loss": 2.4565, "step": 1196 }, { "epoch": 3.9245901639344263, "grad_norm": 8.895477294921875, "learning_rate": 1.9995516756105965e-05, "loss": 2.4761, "step": 1197 }, { "epoch": 3.9278688524590164, "grad_norm": 6.280650615692139, "learning_rate": 1.9995484906071152e-05, "loss": 2.198, "step": 1198 }, { "epoch": 3.9311475409836065, "grad_norm": 8.959507942199707, "learning_rate": 1.9995452943326855e-05, "loss": 2.5117, "step": 1199 }, { "epoch": 3.9344262295081966, "grad_norm": 7.092957973480225, "learning_rate": 1.9995420867873437e-05, "loss": 2.3838, "step": 1200 }, { "epoch": 3.9377049180327868, "grad_norm": 7.249965667724609, "learning_rate": 1.999538867971125e-05, "loss": 2.457, "step": 1201 }, { "epoch": 3.940983606557377, "grad_norm": 5.509912014007568, "learning_rate": 1.9995356378840667e-05, "loss": 2.4282, "step": 1202 }, { "epoch": 3.944262295081967, "grad_norm": 6.583002090454102, "learning_rate": 1.999532396526205e-05, "loss": 2.1895, "step": 1203 }, { "epoch": 3.947540983606557, "grad_norm": 7.3312530517578125, "learning_rate": 1.999529143897576e-05, "loss": 2.2832, "step": 1204 }, { "epoch": 3.9508196721311473, "grad_norm": 8.828215599060059, "learning_rate": 1.9995258799982168e-05, "loss": 2.4897, "step": 1205 }, { "epoch": 3.954098360655738, "grad_norm": 7.91680383682251, "learning_rate": 1.999522604828164e-05, "loss": 2.4995, "step": 1206 }, { "epoch": 3.957377049180328, "grad_norm": 9.093006134033203, "learning_rate": 1.9995193183874545e-05, "loss": 2.4507, "step": 1207 }, { "epoch": 3.960655737704918, "grad_norm": 5.371570110321045, "learning_rate": 1.9995160206761256e-05, "loss": 2.3945, "step": 1208 }, { "epoch": 3.963934426229508, "grad_norm": 7.253530979156494, "learning_rate": 1.9995127116942143e-05, "loss": 2.3125, "step": 1209 }, { "epoch": 3.9672131147540983, "grad_norm": 5.956315040588379, "learning_rate": 1.9995093914417574e-05, "loss": 2.3403, "step": 1210 }, { "epoch": 3.9704918032786884, "grad_norm": 8.405375480651855, "learning_rate": 1.9995060599187937e-05, "loss": 2.2993, "step": 1211 }, { "epoch": 3.9737704918032786, "grad_norm": 6.516397953033447, "learning_rate": 1.9995027171253597e-05, "loss": 2.2788, "step": 1212 }, { "epoch": 3.9770491803278687, "grad_norm": 6.818605899810791, "learning_rate": 1.9994993630614933e-05, "loss": 2.4009, "step": 1213 }, { "epoch": 3.9803278688524593, "grad_norm": 10.420808792114258, "learning_rate": 1.9994959977272322e-05, "loss": 2.5742, "step": 1214 }, { "epoch": 3.9836065573770494, "grad_norm": 7.887224197387695, "learning_rate": 1.9994926211226146e-05, "loss": 2.2842, "step": 1215 }, { "epoch": 3.9868852459016395, "grad_norm": 6.613847732543945, "learning_rate": 1.999489233247679e-05, "loss": 2.4551, "step": 1216 }, { "epoch": 3.9901639344262296, "grad_norm": 6.256563186645508, "learning_rate": 1.9994858341024622e-05, "loss": 2.3955, "step": 1217 }, { "epoch": 3.9934426229508198, "grad_norm": 5.484189510345459, "learning_rate": 1.999482423687004e-05, "loss": 2.4727, "step": 1218 }, { "epoch": 3.99672131147541, "grad_norm": 6.482273101806641, "learning_rate": 1.999479002001342e-05, "loss": 2.186, "step": 1219 }, { "epoch": 4.0, "grad_norm": 6.363482475280762, "learning_rate": 1.9994755690455154e-05, "loss": 2.3086, "step": 1220 }, { "epoch": 4.00327868852459, "grad_norm": 4.8712687492370605, "learning_rate": 1.999472124819562e-05, "loss": 2.2539, "step": 1221 }, { "epoch": 4.00655737704918, "grad_norm": 5.887904167175293, "learning_rate": 1.9994686693235215e-05, "loss": 2.3394, "step": 1222 }, { "epoch": 4.00983606557377, "grad_norm": 8.73054027557373, "learning_rate": 1.9994652025574326e-05, "loss": 2.2773, "step": 1223 }, { "epoch": 4.0131147540983605, "grad_norm": 6.88695764541626, "learning_rate": 1.9994617245213344e-05, "loss": 2.4546, "step": 1224 }, { "epoch": 4.016393442622951, "grad_norm": 6.317714691162109, "learning_rate": 1.9994582352152658e-05, "loss": 2.1562, "step": 1225 }, { "epoch": 4.019672131147541, "grad_norm": 6.9156646728515625, "learning_rate": 1.999454734639267e-05, "loss": 2.2954, "step": 1226 }, { "epoch": 4.022950819672131, "grad_norm": 6.6392083168029785, "learning_rate": 1.9994512227933763e-05, "loss": 2.4707, "step": 1227 }, { "epoch": 4.026229508196721, "grad_norm": 6.432285785675049, "learning_rate": 1.9994476996776342e-05, "loss": 2.2642, "step": 1228 }, { "epoch": 4.029508196721311, "grad_norm": 9.577125549316406, "learning_rate": 1.99944416529208e-05, "loss": 2.2021, "step": 1229 }, { "epoch": 4.032786885245901, "grad_norm": 6.316585540771484, "learning_rate": 1.999440619636754e-05, "loss": 2.3369, "step": 1230 }, { "epoch": 4.036065573770492, "grad_norm": 6.5917205810546875, "learning_rate": 1.9994370627116954e-05, "loss": 2.269, "step": 1231 }, { "epoch": 4.039344262295082, "grad_norm": 8.004570007324219, "learning_rate": 1.9994334945169448e-05, "loss": 2.2178, "step": 1232 }, { "epoch": 4.0426229508196725, "grad_norm": 7.194581031799316, "learning_rate": 1.9994299150525425e-05, "loss": 2.2769, "step": 1233 }, { "epoch": 4.045901639344263, "grad_norm": 7.2074055671691895, "learning_rate": 1.999426324318529e-05, "loss": 2.2988, "step": 1234 }, { "epoch": 4.049180327868853, "grad_norm": 18.957128524780273, "learning_rate": 1.9994227223149444e-05, "loss": 2.5547, "step": 1235 }, { "epoch": 4.052459016393443, "grad_norm": 9.006526947021484, "learning_rate": 1.999419109041829e-05, "loss": 2.2397, "step": 1236 }, { "epoch": 4.055737704918033, "grad_norm": 6.845522880554199, "learning_rate": 1.9994154844992248e-05, "loss": 2.3521, "step": 1237 }, { "epoch": 4.059016393442623, "grad_norm": 5.766297817230225, "learning_rate": 1.9994118486871714e-05, "loss": 2.2349, "step": 1238 }, { "epoch": 4.062295081967213, "grad_norm": 6.860787391662598, "learning_rate": 1.9994082016057105e-05, "loss": 2.5337, "step": 1239 }, { "epoch": 4.065573770491803, "grad_norm": 6.814500331878662, "learning_rate": 1.9994045432548828e-05, "loss": 2.2661, "step": 1240 }, { "epoch": 4.0688524590163935, "grad_norm": 7.459895610809326, "learning_rate": 1.99940087363473e-05, "loss": 2.2681, "step": 1241 }, { "epoch": 4.072131147540984, "grad_norm": 7.795438289642334, "learning_rate": 1.9993971927452928e-05, "loss": 2.377, "step": 1242 }, { "epoch": 4.075409836065574, "grad_norm": 5.03941011428833, "learning_rate": 1.9993935005866138e-05, "loss": 2.2944, "step": 1243 }, { "epoch": 4.078688524590164, "grad_norm": 7.720576286315918, "learning_rate": 1.9993897971587333e-05, "loss": 2.2686, "step": 1244 }, { "epoch": 4.081967213114754, "grad_norm": 6.511646270751953, "learning_rate": 1.999386082461694e-05, "loss": 2.168, "step": 1245 }, { "epoch": 4.085245901639344, "grad_norm": 5.348281383514404, "learning_rate": 1.9993823564955375e-05, "loss": 2.207, "step": 1246 }, { "epoch": 4.088524590163934, "grad_norm": 7.120620250701904, "learning_rate": 1.999378619260306e-05, "loss": 2.2236, "step": 1247 }, { "epoch": 4.091803278688524, "grad_norm": 6.539315223693848, "learning_rate": 1.9993748707560413e-05, "loss": 2.145, "step": 1248 }, { "epoch": 4.0950819672131145, "grad_norm": 6.817874431610107, "learning_rate": 1.999371110982786e-05, "loss": 2.2129, "step": 1249 }, { "epoch": 4.098360655737705, "grad_norm": 7.091748237609863, "learning_rate": 1.999367339940582e-05, "loss": 2.187, "step": 1250 }, { "epoch": 4.101639344262295, "grad_norm": 8.174005508422852, "learning_rate": 1.9993635576294726e-05, "loss": 2.2847, "step": 1251 }, { "epoch": 4.104918032786885, "grad_norm": 9.482701301574707, "learning_rate": 1.9993597640494998e-05, "loss": 2.0562, "step": 1252 }, { "epoch": 4.108196721311476, "grad_norm": 8.612915992736816, "learning_rate": 1.9993559592007067e-05, "loss": 2.2788, "step": 1253 }, { "epoch": 4.111475409836066, "grad_norm": 8.41639232635498, "learning_rate": 1.9993521430831357e-05, "loss": 2.3682, "step": 1254 }, { "epoch": 4.114754098360656, "grad_norm": 9.303656578063965, "learning_rate": 1.9993483156968305e-05, "loss": 2.312, "step": 1255 }, { "epoch": 4.118032786885246, "grad_norm": 6.466533184051514, "learning_rate": 1.999344477041834e-05, "loss": 2.417, "step": 1256 }, { "epoch": 4.121311475409836, "grad_norm": 7.055037021636963, "learning_rate": 1.9993406271181898e-05, "loss": 2.1416, "step": 1257 }, { "epoch": 4.1245901639344265, "grad_norm": 7.290410995483398, "learning_rate": 1.9993367659259404e-05, "loss": 2.3237, "step": 1258 }, { "epoch": 4.127868852459017, "grad_norm": 7.719918251037598, "learning_rate": 1.9993328934651303e-05, "loss": 2.1587, "step": 1259 }, { "epoch": 4.131147540983607, "grad_norm": 7.117659091949463, "learning_rate": 1.9993290097358024e-05, "loss": 2.2847, "step": 1260 }, { "epoch": 4.134426229508197, "grad_norm": 4.432666301727295, "learning_rate": 1.9993251147380012e-05, "loss": 2.3223, "step": 1261 }, { "epoch": 4.137704918032787, "grad_norm": 6.643818378448486, "learning_rate": 1.99932120847177e-05, "loss": 2.3438, "step": 1262 }, { "epoch": 4.140983606557377, "grad_norm": 12.905832290649414, "learning_rate": 1.9993172909371533e-05, "loss": 2.2798, "step": 1263 }, { "epoch": 4.144262295081967, "grad_norm": 7.30301570892334, "learning_rate": 1.999313362134195e-05, "loss": 2.2993, "step": 1264 }, { "epoch": 4.147540983606557, "grad_norm": 7.138467311859131, "learning_rate": 1.99930942206294e-05, "loss": 2.2192, "step": 1265 }, { "epoch": 4.150819672131147, "grad_norm": 5.606064319610596, "learning_rate": 1.9993054707234317e-05, "loss": 2.1821, "step": 1266 }, { "epoch": 4.154098360655738, "grad_norm": 7.534795761108398, "learning_rate": 1.9993015081157155e-05, "loss": 2.3945, "step": 1267 }, { "epoch": 4.157377049180328, "grad_norm": 12.26885986328125, "learning_rate": 1.999297534239836e-05, "loss": 2.1216, "step": 1268 }, { "epoch": 4.160655737704918, "grad_norm": 7.777595520019531, "learning_rate": 1.999293549095837e-05, "loss": 2.1375, "step": 1269 }, { "epoch": 4.163934426229508, "grad_norm": 6.005998611450195, "learning_rate": 1.9992895526837647e-05, "loss": 2.1177, "step": 1270 }, { "epoch": 4.167213114754098, "grad_norm": 7.160705089569092, "learning_rate": 1.9992855450036638e-05, "loss": 2.1177, "step": 1271 }, { "epoch": 4.170491803278688, "grad_norm": 5.486116886138916, "learning_rate": 1.9992815260555792e-05, "loss": 2.3809, "step": 1272 }, { "epoch": 4.173770491803278, "grad_norm": 7.763748645782471, "learning_rate": 1.9992774958395565e-05, "loss": 2.5322, "step": 1273 }, { "epoch": 4.177049180327868, "grad_norm": 5.361795902252197, "learning_rate": 1.9992734543556413e-05, "loss": 2.2764, "step": 1274 }, { "epoch": 4.180327868852459, "grad_norm": 6.403895854949951, "learning_rate": 1.9992694016038785e-05, "loss": 2.3408, "step": 1275 }, { "epoch": 4.18360655737705, "grad_norm": 5.79236364364624, "learning_rate": 1.9992653375843143e-05, "loss": 1.9146, "step": 1276 }, { "epoch": 4.18688524590164, "grad_norm": 6.6005730628967285, "learning_rate": 1.9992612622969946e-05, "loss": 2.4995, "step": 1277 }, { "epoch": 4.19016393442623, "grad_norm": 6.754226207733154, "learning_rate": 1.9992571757419653e-05, "loss": 2.2549, "step": 1278 }, { "epoch": 4.19344262295082, "grad_norm": 5.676904678344727, "learning_rate": 1.999253077919272e-05, "loss": 2.209, "step": 1279 }, { "epoch": 4.19672131147541, "grad_norm": 8.715211868286133, "learning_rate": 1.9992489688289614e-05, "loss": 2.2236, "step": 1280 }, { "epoch": 4.2, "grad_norm": 8.452347755432129, "learning_rate": 1.99924484847108e-05, "loss": 2.3345, "step": 1281 }, { "epoch": 4.20327868852459, "grad_norm": 5.926138401031494, "learning_rate": 1.9992407168456735e-05, "loss": 2.2925, "step": 1282 }, { "epoch": 4.20655737704918, "grad_norm": 6.258457660675049, "learning_rate": 1.999236573952789e-05, "loss": 2.3076, "step": 1283 }, { "epoch": 4.2098360655737705, "grad_norm": 7.089434623718262, "learning_rate": 1.9992324197924736e-05, "loss": 2.2529, "step": 1284 }, { "epoch": 4.213114754098361, "grad_norm": 6.511361122131348, "learning_rate": 1.9992282543647737e-05, "loss": 2.1519, "step": 1285 }, { "epoch": 4.216393442622951, "grad_norm": 10.412921905517578, "learning_rate": 1.999224077669736e-05, "loss": 2.0732, "step": 1286 }, { "epoch": 4.219672131147541, "grad_norm": 5.677690505981445, "learning_rate": 1.999219889707408e-05, "loss": 2.1187, "step": 1287 }, { "epoch": 4.222950819672131, "grad_norm": 6.410314083099365, "learning_rate": 1.999215690477837e-05, "loss": 2.3564, "step": 1288 }, { "epoch": 4.226229508196721, "grad_norm": 5.827301979064941, "learning_rate": 1.99921147998107e-05, "loss": 2.2627, "step": 1289 }, { "epoch": 4.229508196721311, "grad_norm": 4.760543346405029, "learning_rate": 1.9992072582171546e-05, "loss": 2.3457, "step": 1290 }, { "epoch": 4.232786885245901, "grad_norm": 6.858874797821045, "learning_rate": 1.9992030251861384e-05, "loss": 2.2046, "step": 1291 }, { "epoch": 4.2360655737704915, "grad_norm": 5.721587657928467, "learning_rate": 1.999198780888069e-05, "loss": 2.3086, "step": 1292 }, { "epoch": 4.239344262295082, "grad_norm": 7.412247180938721, "learning_rate": 1.9991945253229953e-05, "loss": 2.3516, "step": 1293 }, { "epoch": 4.242622950819672, "grad_norm": 6.597192764282227, "learning_rate": 1.9991902584909636e-05, "loss": 2.0405, "step": 1294 }, { "epoch": 4.245901639344262, "grad_norm": 6.321567058563232, "learning_rate": 1.999185980392023e-05, "loss": 2.1997, "step": 1295 }, { "epoch": 4.249180327868853, "grad_norm": 5.775531768798828, "learning_rate": 1.999181691026222e-05, "loss": 2.4883, "step": 1296 }, { "epoch": 4.252459016393443, "grad_norm": 5.708487510681152, "learning_rate": 1.999177390393608e-05, "loss": 2.4482, "step": 1297 }, { "epoch": 4.255737704918033, "grad_norm": 5.051143646240234, "learning_rate": 1.9991730784942304e-05, "loss": 2.062, "step": 1298 }, { "epoch": 4.259016393442623, "grad_norm": 11.589128494262695, "learning_rate": 1.999168755328137e-05, "loss": 2.1885, "step": 1299 }, { "epoch": 4.262295081967213, "grad_norm": 6.143886089324951, "learning_rate": 1.9991644208953776e-05, "loss": 2.2275, "step": 1300 }, { "epoch": 4.2655737704918035, "grad_norm": 5.421433925628662, "learning_rate": 1.999160075196e-05, "loss": 2.1182, "step": 1301 }, { "epoch": 4.268852459016394, "grad_norm": 7.353605270385742, "learning_rate": 1.9991557182300538e-05, "loss": 2.1572, "step": 1302 }, { "epoch": 4.272131147540984, "grad_norm": 7.226676940917969, "learning_rate": 1.9991513499975883e-05, "loss": 2.3062, "step": 1303 }, { "epoch": 4.275409836065574, "grad_norm": 22.26732635498047, "learning_rate": 1.9991469704986523e-05, "loss": 2.3066, "step": 1304 }, { "epoch": 4.278688524590164, "grad_norm": 8.091521263122559, "learning_rate": 1.9991425797332952e-05, "loss": 1.9766, "step": 1305 }, { "epoch": 4.281967213114754, "grad_norm": 6.584952354431152, "learning_rate": 1.9991381777015667e-05, "loss": 2.3706, "step": 1306 }, { "epoch": 4.285245901639344, "grad_norm": 9.178985595703125, "learning_rate": 1.9991337644035166e-05, "loss": 2.3389, "step": 1307 }, { "epoch": 4.288524590163934, "grad_norm": 6.376687526702881, "learning_rate": 1.9991293398391945e-05, "loss": 2.3496, "step": 1308 }, { "epoch": 4.2918032786885245, "grad_norm": 7.158993244171143, "learning_rate": 1.99912490400865e-05, "loss": 2.1528, "step": 1309 }, { "epoch": 4.295081967213115, "grad_norm": 8.03917407989502, "learning_rate": 1.9991204569119337e-05, "loss": 2.4253, "step": 1310 }, { "epoch": 4.298360655737705, "grad_norm": 6.81865930557251, "learning_rate": 1.9991159985490952e-05, "loss": 2.3174, "step": 1311 }, { "epoch": 4.301639344262295, "grad_norm": 7.710112571716309, "learning_rate": 1.999111528920185e-05, "loss": 2.1919, "step": 1312 }, { "epoch": 4.304918032786885, "grad_norm": 6.5251145362854, "learning_rate": 1.9991070480252533e-05, "loss": 2.1274, "step": 1313 }, { "epoch": 4.308196721311475, "grad_norm": 5.840324878692627, "learning_rate": 1.999102555864351e-05, "loss": 2.272, "step": 1314 }, { "epoch": 4.311475409836065, "grad_norm": 4.930078506469727, "learning_rate": 1.9990980524375286e-05, "loss": 2.3237, "step": 1315 }, { "epoch": 4.314754098360655, "grad_norm": 5.302310943603516, "learning_rate": 1.9990935377448372e-05, "loss": 2.2422, "step": 1316 }, { "epoch": 4.3180327868852455, "grad_norm": 5.759896278381348, "learning_rate": 1.9990890117863267e-05, "loss": 2.3672, "step": 1317 }, { "epoch": 4.321311475409836, "grad_norm": 7.358537673950195, "learning_rate": 1.9990844745620493e-05, "loss": 2.4058, "step": 1318 }, { "epoch": 4.324590163934427, "grad_norm": 8.865067481994629, "learning_rate": 1.9990799260720555e-05, "loss": 2.2314, "step": 1319 }, { "epoch": 4.327868852459017, "grad_norm": 12.234969139099121, "learning_rate": 1.9990753663163968e-05, "loss": 2.3188, "step": 1320 }, { "epoch": 4.331147540983607, "grad_norm": 5.778533935546875, "learning_rate": 1.9990707952951243e-05, "loss": 2.1455, "step": 1321 }, { "epoch": 4.334426229508197, "grad_norm": 6.089225769042969, "learning_rate": 1.9990662130082903e-05, "loss": 2.3374, "step": 1322 }, { "epoch": 4.337704918032787, "grad_norm": 6.77669620513916, "learning_rate": 1.9990616194559455e-05, "loss": 2.1162, "step": 1323 }, { "epoch": 4.340983606557377, "grad_norm": 6.908276557922363, "learning_rate": 1.9990570146381424e-05, "loss": 2.3188, "step": 1324 }, { "epoch": 4.344262295081967, "grad_norm": 6.4747443199157715, "learning_rate": 1.9990523985549327e-05, "loss": 2.0913, "step": 1325 }, { "epoch": 4.3475409836065575, "grad_norm": 7.538565635681152, "learning_rate": 1.9990477712063687e-05, "loss": 2.5005, "step": 1326 }, { "epoch": 4.350819672131148, "grad_norm": 8.371650695800781, "learning_rate": 1.999043132592502e-05, "loss": 2.2217, "step": 1327 }, { "epoch": 4.354098360655738, "grad_norm": 5.686996936798096, "learning_rate": 1.999038482713385e-05, "loss": 2.2148, "step": 1328 }, { "epoch": 4.357377049180328, "grad_norm": 11.464170455932617, "learning_rate": 1.999033821569071e-05, "loss": 2.3491, "step": 1329 }, { "epoch": 4.360655737704918, "grad_norm": 6.6451849937438965, "learning_rate": 1.9990291491596116e-05, "loss": 2.4355, "step": 1330 }, { "epoch": 4.363934426229508, "grad_norm": 6.113999366760254, "learning_rate": 1.9990244654850598e-05, "loss": 2.498, "step": 1331 }, { "epoch": 4.367213114754098, "grad_norm": 5.697882175445557, "learning_rate": 1.9990197705454682e-05, "loss": 2.2681, "step": 1332 }, { "epoch": 4.370491803278688, "grad_norm": 10.173162460327148, "learning_rate": 1.9990150643408904e-05, "loss": 2.0522, "step": 1333 }, { "epoch": 4.3737704918032785, "grad_norm": 8.250874519348145, "learning_rate": 1.9990103468713788e-05, "loss": 2.1118, "step": 1334 }, { "epoch": 4.377049180327869, "grad_norm": 7.5282206535339355, "learning_rate": 1.999005618136987e-05, "loss": 2.2686, "step": 1335 }, { "epoch": 4.380327868852459, "grad_norm": 6.45988655090332, "learning_rate": 1.9990008781377677e-05, "loss": 2.4302, "step": 1336 }, { "epoch": 4.383606557377049, "grad_norm": 6.8488874435424805, "learning_rate": 1.9989961268737754e-05, "loss": 2.3931, "step": 1337 }, { "epoch": 4.386885245901639, "grad_norm": 7.970024108886719, "learning_rate": 1.9989913643450627e-05, "loss": 2.2026, "step": 1338 }, { "epoch": 4.390163934426229, "grad_norm": 9.393023490905762, "learning_rate": 1.9989865905516836e-05, "loss": 2.1685, "step": 1339 }, { "epoch": 4.39344262295082, "grad_norm": 10.347450256347656, "learning_rate": 1.9989818054936923e-05, "loss": 2.0098, "step": 1340 }, { "epoch": 4.39672131147541, "grad_norm": 5.291154861450195, "learning_rate": 1.9989770091711423e-05, "loss": 2.1626, "step": 1341 }, { "epoch": 4.4, "grad_norm": 10.715744018554688, "learning_rate": 1.998972201584088e-05, "loss": 2.1885, "step": 1342 }, { "epoch": 4.4032786885245905, "grad_norm": 6.814695358276367, "learning_rate": 1.9989673827325834e-05, "loss": 2.1226, "step": 1343 }, { "epoch": 4.406557377049181, "grad_norm": 8.650118827819824, "learning_rate": 1.998962552616683e-05, "loss": 2.2803, "step": 1344 }, { "epoch": 4.409836065573771, "grad_norm": 4.439847469329834, "learning_rate": 1.9989577112364405e-05, "loss": 2.105, "step": 1345 }, { "epoch": 4.413114754098361, "grad_norm": 6.507091045379639, "learning_rate": 1.998952858591912e-05, "loss": 2.2344, "step": 1346 }, { "epoch": 4.416393442622951, "grad_norm": 7.662691593170166, "learning_rate": 1.998947994683151e-05, "loss": 2.1392, "step": 1347 }, { "epoch": 4.419672131147541, "grad_norm": 7.768380641937256, "learning_rate": 1.9989431195102127e-05, "loss": 2.1045, "step": 1348 }, { "epoch": 4.422950819672131, "grad_norm": 6.397309303283691, "learning_rate": 1.998938233073152e-05, "loss": 2.2871, "step": 1349 }, { "epoch": 4.426229508196721, "grad_norm": 6.258263111114502, "learning_rate": 1.9989333353720243e-05, "loss": 2.2056, "step": 1350 }, { "epoch": 4.4295081967213115, "grad_norm": 6.917745590209961, "learning_rate": 1.9989284264068845e-05, "loss": 2.334, "step": 1351 }, { "epoch": 4.432786885245902, "grad_norm": 6.381770610809326, "learning_rate": 1.9989235061777878e-05, "loss": 2.1938, "step": 1352 }, { "epoch": 4.436065573770492, "grad_norm": 7.24508810043335, "learning_rate": 1.9989185746847903e-05, "loss": 2.1328, "step": 1353 }, { "epoch": 4.439344262295082, "grad_norm": 6.66859769821167, "learning_rate": 1.9989136319279474e-05, "loss": 2.21, "step": 1354 }, { "epoch": 4.442622950819672, "grad_norm": 6.957003116607666, "learning_rate": 1.9989086779073142e-05, "loss": 2.1284, "step": 1355 }, { "epoch": 4.445901639344262, "grad_norm": 10.258379936218262, "learning_rate": 1.9989037126229474e-05, "loss": 2.2744, "step": 1356 }, { "epoch": 4.449180327868852, "grad_norm": 6.52393913269043, "learning_rate": 1.9988987360749027e-05, "loss": 2.229, "step": 1357 }, { "epoch": 4.452459016393442, "grad_norm": 6.254486560821533, "learning_rate": 1.998893748263236e-05, "loss": 2.334, "step": 1358 }, { "epoch": 4.4557377049180324, "grad_norm": 5.220952987670898, "learning_rate": 1.9988887491880037e-05, "loss": 2.1597, "step": 1359 }, { "epoch": 4.459016393442623, "grad_norm": 12.169927597045898, "learning_rate": 1.9988837388492622e-05, "loss": 2.2764, "step": 1360 }, { "epoch": 4.462295081967213, "grad_norm": 6.528616428375244, "learning_rate": 1.9988787172470682e-05, "loss": 2.2202, "step": 1361 }, { "epoch": 4.465573770491803, "grad_norm": 5.001804828643799, "learning_rate": 1.9988736843814777e-05, "loss": 2.1431, "step": 1362 }, { "epoch": 4.468852459016394, "grad_norm": 12.169919967651367, "learning_rate": 1.9988686402525478e-05, "loss": 2.1338, "step": 1363 }, { "epoch": 4.472131147540984, "grad_norm": 7.42421817779541, "learning_rate": 1.9988635848603356e-05, "loss": 2.1826, "step": 1364 }, { "epoch": 4.475409836065574, "grad_norm": 6.017554759979248, "learning_rate": 1.998858518204898e-05, "loss": 2.2061, "step": 1365 }, { "epoch": 4.478688524590164, "grad_norm": 6.570552825927734, "learning_rate": 1.998853440286292e-05, "loss": 2.1392, "step": 1366 }, { "epoch": 4.481967213114754, "grad_norm": 6.635951995849609, "learning_rate": 1.998848351104575e-05, "loss": 2.4082, "step": 1367 }, { "epoch": 4.4852459016393444, "grad_norm": 5.655879020690918, "learning_rate": 1.998843250659804e-05, "loss": 2.2319, "step": 1368 }, { "epoch": 4.488524590163935, "grad_norm": 23.192476272583008, "learning_rate": 1.998838138952037e-05, "loss": 1.915, "step": 1369 }, { "epoch": 4.491803278688525, "grad_norm": 5.596128463745117, "learning_rate": 1.9988330159813313e-05, "loss": 2.3701, "step": 1370 }, { "epoch": 4.495081967213115, "grad_norm": 6.020814418792725, "learning_rate": 1.998827881747745e-05, "loss": 2.335, "step": 1371 }, { "epoch": 4.498360655737705, "grad_norm": 5.518019676208496, "learning_rate": 1.998822736251336e-05, "loss": 2.1626, "step": 1372 }, { "epoch": 4.501639344262295, "grad_norm": 5.471186637878418, "learning_rate": 1.9988175794921618e-05, "loss": 2.4761, "step": 1373 }, { "epoch": 4.504918032786885, "grad_norm": 5.990815162658691, "learning_rate": 1.998812411470281e-05, "loss": 2.2017, "step": 1374 }, { "epoch": 4.508196721311475, "grad_norm": 6.6852617263793945, "learning_rate": 1.998807232185752e-05, "loss": 2.1626, "step": 1375 }, { "epoch": 4.511475409836065, "grad_norm": 5.934200763702393, "learning_rate": 1.9988020416386327e-05, "loss": 2.2935, "step": 1376 }, { "epoch": 4.5147540983606556, "grad_norm": 8.798763275146484, "learning_rate": 1.9987968398289818e-05, "loss": 2.1323, "step": 1377 }, { "epoch": 4.518032786885246, "grad_norm": 4.94208288192749, "learning_rate": 1.998791626756858e-05, "loss": 2.3823, "step": 1378 }, { "epoch": 4.521311475409836, "grad_norm": 5.90256404876709, "learning_rate": 1.9987864024223205e-05, "loss": 2.2515, "step": 1379 }, { "epoch": 4.524590163934426, "grad_norm": 9.143851280212402, "learning_rate": 1.9987811668254276e-05, "loss": 2.0928, "step": 1380 }, { "epoch": 4.527868852459016, "grad_norm": 6.267242908477783, "learning_rate": 1.9987759199662386e-05, "loss": 2.3042, "step": 1381 }, { "epoch": 4.531147540983606, "grad_norm": 6.971308708190918, "learning_rate": 1.9987706618448125e-05, "loss": 2.1626, "step": 1382 }, { "epoch": 4.534426229508197, "grad_norm": 5.540724754333496, "learning_rate": 1.9987653924612088e-05, "loss": 2.1226, "step": 1383 }, { "epoch": 4.537704918032787, "grad_norm": 7.2119035720825195, "learning_rate": 1.998760111815487e-05, "loss": 2.4312, "step": 1384 }, { "epoch": 4.540983606557377, "grad_norm": 6.771824359893799, "learning_rate": 1.9987548199077062e-05, "loss": 2.2764, "step": 1385 }, { "epoch": 4.5442622950819676, "grad_norm": 7.065238952636719, "learning_rate": 1.9987495167379265e-05, "loss": 2.2007, "step": 1386 }, { "epoch": 4.547540983606558, "grad_norm": 5.891613960266113, "learning_rate": 1.9987442023062077e-05, "loss": 1.959, "step": 1387 }, { "epoch": 4.550819672131148, "grad_norm": 9.217880249023438, "learning_rate": 1.9987388766126096e-05, "loss": 2.29, "step": 1388 }, { "epoch": 4.554098360655738, "grad_norm": 6.098889350891113, "learning_rate": 1.9987335396571922e-05, "loss": 2.4868, "step": 1389 }, { "epoch": 4.557377049180328, "grad_norm": 8.604470252990723, "learning_rate": 1.9987281914400153e-05, "loss": 2.3018, "step": 1390 }, { "epoch": 4.560655737704918, "grad_norm": 7.829953670501709, "learning_rate": 1.99872283196114e-05, "loss": 2.2832, "step": 1391 }, { "epoch": 4.563934426229508, "grad_norm": 5.547098159790039, "learning_rate": 1.9987174612206262e-05, "loss": 2.3071, "step": 1392 }, { "epoch": 4.567213114754098, "grad_norm": 7.195805072784424, "learning_rate": 1.998712079218535e-05, "loss": 2.1953, "step": 1393 }, { "epoch": 4.5704918032786885, "grad_norm": 6.033261775970459, "learning_rate": 1.9987066859549266e-05, "loss": 2.2217, "step": 1394 }, { "epoch": 4.573770491803279, "grad_norm": 6.847478866577148, "learning_rate": 1.9987012814298617e-05, "loss": 2.2832, "step": 1395 }, { "epoch": 4.577049180327869, "grad_norm": 9.053121566772461, "learning_rate": 1.9986958656434016e-05, "loss": 2.084, "step": 1396 }, { "epoch": 4.580327868852459, "grad_norm": 7.2537641525268555, "learning_rate": 1.998690438595607e-05, "loss": 2.1655, "step": 1397 }, { "epoch": 4.583606557377049, "grad_norm": 5.693087577819824, "learning_rate": 1.9986850002865394e-05, "loss": 2.2266, "step": 1398 }, { "epoch": 4.586885245901639, "grad_norm": 6.3501505851745605, "learning_rate": 1.9986795507162603e-05, "loss": 2.1479, "step": 1399 }, { "epoch": 4.590163934426229, "grad_norm": 6.442376613616943, "learning_rate": 1.9986740898848306e-05, "loss": 2.1147, "step": 1400 }, { "epoch": 4.593442622950819, "grad_norm": 6.58513069152832, "learning_rate": 1.9986686177923124e-05, "loss": 2.4058, "step": 1401 }, { "epoch": 4.5967213114754095, "grad_norm": 11.1644926071167, "learning_rate": 1.998663134438767e-05, "loss": 2.2627, "step": 1402 }, { "epoch": 4.6, "grad_norm": 7.211146354675293, "learning_rate": 1.9986576398242566e-05, "loss": 2.1831, "step": 1403 }, { "epoch": 4.60327868852459, "grad_norm": 8.36959171295166, "learning_rate": 1.9986521339488427e-05, "loss": 2.2764, "step": 1404 }, { "epoch": 4.60655737704918, "grad_norm": 6.272721290588379, "learning_rate": 1.998646616812588e-05, "loss": 2.3428, "step": 1405 }, { "epoch": 4.60983606557377, "grad_norm": 8.566177368164062, "learning_rate": 1.998641088415554e-05, "loss": 2.0732, "step": 1406 }, { "epoch": 4.613114754098361, "grad_norm": 6.593827724456787, "learning_rate": 1.998635548757804e-05, "loss": 2.1382, "step": 1407 }, { "epoch": 4.616393442622951, "grad_norm": 7.000254154205322, "learning_rate": 1.998629997839399e-05, "loss": 2.0713, "step": 1408 }, { "epoch": 4.619672131147541, "grad_norm": 7.640887260437012, "learning_rate": 1.998624435660403e-05, "loss": 2.2041, "step": 1409 }, { "epoch": 4.622950819672131, "grad_norm": 9.137694358825684, "learning_rate": 1.9986188622208782e-05, "loss": 2.1528, "step": 1410 }, { "epoch": 4.6262295081967215, "grad_norm": 6.56645393371582, "learning_rate": 1.9986132775208872e-05, "loss": 2.0571, "step": 1411 }, { "epoch": 4.629508196721312, "grad_norm": 6.361895561218262, "learning_rate": 1.9986076815604934e-05, "loss": 2.3193, "step": 1412 }, { "epoch": 4.632786885245902, "grad_norm": 5.5308332443237305, "learning_rate": 1.9986020743397595e-05, "loss": 2.144, "step": 1413 }, { "epoch": 4.636065573770492, "grad_norm": 5.662609100341797, "learning_rate": 1.998596455858749e-05, "loss": 2.1079, "step": 1414 }, { "epoch": 4.639344262295082, "grad_norm": 5.3361077308654785, "learning_rate": 1.9985908261175253e-05, "loss": 2.355, "step": 1415 }, { "epoch": 4.642622950819672, "grad_norm": 6.5820207595825195, "learning_rate": 1.998585185116152e-05, "loss": 2.1309, "step": 1416 }, { "epoch": 4.645901639344262, "grad_norm": 17.950576782226562, "learning_rate": 1.998579532854692e-05, "loss": 2.3486, "step": 1417 }, { "epoch": 4.649180327868852, "grad_norm": 7.057338714599609, "learning_rate": 1.9985738693332095e-05, "loss": 2.4624, "step": 1418 }, { "epoch": 4.6524590163934425, "grad_norm": 7.591104984283447, "learning_rate": 1.9985681945517687e-05, "loss": 2.2476, "step": 1419 }, { "epoch": 4.655737704918033, "grad_norm": 8.249428749084473, "learning_rate": 1.998562508510433e-05, "loss": 2.3735, "step": 1420 }, { "epoch": 4.659016393442623, "grad_norm": 6.197564601898193, "learning_rate": 1.9985568112092667e-05, "loss": 2.2002, "step": 1421 }, { "epoch": 4.662295081967213, "grad_norm": 8.224055290222168, "learning_rate": 1.9985511026483343e-05, "loss": 2.3057, "step": 1422 }, { "epoch": 4.665573770491803, "grad_norm": 6.2963972091674805, "learning_rate": 1.9985453828277e-05, "loss": 2.4155, "step": 1423 }, { "epoch": 4.668852459016393, "grad_norm": 6.671773910522461, "learning_rate": 1.9985396517474283e-05, "loss": 2.1362, "step": 1424 }, { "epoch": 4.672131147540983, "grad_norm": 7.581842422485352, "learning_rate": 1.9985339094075836e-05, "loss": 2.0408, "step": 1425 }, { "epoch": 4.675409836065574, "grad_norm": 7.98220157623291, "learning_rate": 1.9985281558082312e-05, "loss": 2.1416, "step": 1426 }, { "epoch": 4.678688524590164, "grad_norm": 7.388979911804199, "learning_rate": 1.9985223909494352e-05, "loss": 2.2905, "step": 1427 }, { "epoch": 4.6819672131147545, "grad_norm": 6.337071895599365, "learning_rate": 1.9985166148312616e-05, "loss": 2.4243, "step": 1428 }, { "epoch": 4.685245901639345, "grad_norm": 5.930590629577637, "learning_rate": 1.9985108274537745e-05, "loss": 2.2412, "step": 1429 }, { "epoch": 4.688524590163935, "grad_norm": 10.190104484558105, "learning_rate": 1.9985050288170396e-05, "loss": 2.2134, "step": 1430 }, { "epoch": 4.691803278688525, "grad_norm": 9.433650970458984, "learning_rate": 1.9984992189211227e-05, "loss": 2.207, "step": 1431 }, { "epoch": 4.695081967213115, "grad_norm": 8.040424346923828, "learning_rate": 1.9984933977660884e-05, "loss": 2.3071, "step": 1432 }, { "epoch": 4.698360655737705, "grad_norm": 8.590861320495605, "learning_rate": 1.9984875653520035e-05, "loss": 2.2607, "step": 1433 }, { "epoch": 4.701639344262295, "grad_norm": 5.512430191040039, "learning_rate": 1.9984817216789327e-05, "loss": 2.3433, "step": 1434 }, { "epoch": 4.704918032786885, "grad_norm": 11.83382797241211, "learning_rate": 1.9984758667469424e-05, "loss": 2.2905, "step": 1435 }, { "epoch": 4.7081967213114755, "grad_norm": 9.992788314819336, "learning_rate": 1.9984700005560987e-05, "loss": 2.3828, "step": 1436 }, { "epoch": 4.711475409836066, "grad_norm": 8.159333229064941, "learning_rate": 1.9984641231064673e-05, "loss": 2.1519, "step": 1437 }, { "epoch": 4.714754098360656, "grad_norm": 6.954378604888916, "learning_rate": 1.9984582343981153e-05, "loss": 2.2026, "step": 1438 }, { "epoch": 4.718032786885246, "grad_norm": 21.300579071044922, "learning_rate": 1.998452334431108e-05, "loss": 2.3569, "step": 1439 }, { "epoch": 4.721311475409836, "grad_norm": 7.663823127746582, "learning_rate": 1.9984464232055128e-05, "loss": 2.293, "step": 1440 }, { "epoch": 4.724590163934426, "grad_norm": 6.833023548126221, "learning_rate": 1.998440500721396e-05, "loss": 2.1411, "step": 1441 }, { "epoch": 4.727868852459016, "grad_norm": 6.142462730407715, "learning_rate": 1.9984345669788244e-05, "loss": 2.228, "step": 1442 }, { "epoch": 4.731147540983606, "grad_norm": 6.371670246124268, "learning_rate": 1.998428621977865e-05, "loss": 2.021, "step": 1443 }, { "epoch": 4.7344262295081965, "grad_norm": 7.711598873138428, "learning_rate": 1.9984226657185845e-05, "loss": 2.2764, "step": 1444 }, { "epoch": 4.737704918032787, "grad_norm": 8.446901321411133, "learning_rate": 1.9984166982010508e-05, "loss": 2.3438, "step": 1445 }, { "epoch": 4.740983606557377, "grad_norm": 7.357739448547363, "learning_rate": 1.9984107194253305e-05, "loss": 2.292, "step": 1446 }, { "epoch": 4.744262295081967, "grad_norm": 5.521874904632568, "learning_rate": 1.9984047293914912e-05, "loss": 2.1743, "step": 1447 }, { "epoch": 4.747540983606557, "grad_norm": 8.133734703063965, "learning_rate": 1.9983987280996006e-05, "loss": 2.2661, "step": 1448 }, { "epoch": 4.750819672131147, "grad_norm": 7.329035758972168, "learning_rate": 1.9983927155497262e-05, "loss": 2.1326, "step": 1449 }, { "epoch": 4.754098360655737, "grad_norm": 6.760156631469727, "learning_rate": 1.998386691741936e-05, "loss": 2.2324, "step": 1450 }, { "epoch": 4.757377049180328, "grad_norm": 11.178727149963379, "learning_rate": 1.9983806566762975e-05, "loss": 2.1675, "step": 1451 }, { "epoch": 4.760655737704918, "grad_norm": 9.398758888244629, "learning_rate": 1.9983746103528794e-05, "loss": 2.2217, "step": 1452 }, { "epoch": 4.7639344262295085, "grad_norm": 5.239104270935059, "learning_rate": 1.9983685527717493e-05, "loss": 2.2207, "step": 1453 }, { "epoch": 4.767213114754099, "grad_norm": 6.320292949676514, "learning_rate": 1.9983624839329757e-05, "loss": 2.5942, "step": 1454 }, { "epoch": 4.770491803278689, "grad_norm": 8.1940279006958, "learning_rate": 1.9983564038366274e-05, "loss": 2.1997, "step": 1455 }, { "epoch": 4.773770491803279, "grad_norm": 8.403454780578613, "learning_rate": 1.998350312482772e-05, "loss": 2.29, "step": 1456 }, { "epoch": 4.777049180327869, "grad_norm": 26.36774253845215, "learning_rate": 1.9983442098714792e-05, "loss": 2.0679, "step": 1457 }, { "epoch": 4.780327868852459, "grad_norm": 5.951857566833496, "learning_rate": 1.9983380960028174e-05, "loss": 2.4409, "step": 1458 }, { "epoch": 4.783606557377049, "grad_norm": 9.122281074523926, "learning_rate": 1.9983319708768555e-05, "loss": 2.292, "step": 1459 }, { "epoch": 4.786885245901639, "grad_norm": 7.442955493927002, "learning_rate": 1.9983258344936628e-05, "loss": 2.1792, "step": 1460 }, { "epoch": 4.7901639344262295, "grad_norm": 5.811371326446533, "learning_rate": 1.998319686853308e-05, "loss": 2.1499, "step": 1461 }, { "epoch": 4.79344262295082, "grad_norm": 6.453220844268799, "learning_rate": 1.998313527955861e-05, "loss": 2.3154, "step": 1462 }, { "epoch": 4.79672131147541, "grad_norm": 9.373294830322266, "learning_rate": 1.998307357801391e-05, "loss": 2.1787, "step": 1463 }, { "epoch": 4.8, "grad_norm": 6.97261381149292, "learning_rate": 1.9983011763899674e-05, "loss": 2.4019, "step": 1464 }, { "epoch": 4.80327868852459, "grad_norm": 17.498647689819336, "learning_rate": 1.99829498372166e-05, "loss": 2.2021, "step": 1465 }, { "epoch": 4.80655737704918, "grad_norm": 5.704181671142578, "learning_rate": 1.9982887797965388e-05, "loss": 2.3555, "step": 1466 }, { "epoch": 4.80983606557377, "grad_norm": 9.923523902893066, "learning_rate": 1.9982825646146734e-05, "loss": 2.1646, "step": 1467 }, { "epoch": 4.81311475409836, "grad_norm": 5.987518787384033, "learning_rate": 1.9982763381761344e-05, "loss": 2.2412, "step": 1468 }, { "epoch": 4.81639344262295, "grad_norm": 8.220510482788086, "learning_rate": 1.9982701004809918e-05, "loss": 1.9165, "step": 1469 }, { "epoch": 4.8196721311475414, "grad_norm": 7.301466941833496, "learning_rate": 1.998263851529316e-05, "loss": 2.1948, "step": 1470 }, { "epoch": 4.822950819672132, "grad_norm": 8.226073265075684, "learning_rate": 1.9982575913211773e-05, "loss": 2.2534, "step": 1471 }, { "epoch": 4.826229508196722, "grad_norm": 8.224586486816406, "learning_rate": 1.998251319856646e-05, "loss": 2.1406, "step": 1472 }, { "epoch": 4.829508196721312, "grad_norm": 7.8241400718688965, "learning_rate": 1.998245037135793e-05, "loss": 2.1421, "step": 1473 }, { "epoch": 4.832786885245902, "grad_norm": 9.193680763244629, "learning_rate": 1.9982387431586897e-05, "loss": 2.1323, "step": 1474 }, { "epoch": 4.836065573770492, "grad_norm": 8.85851001739502, "learning_rate": 1.9982324379254068e-05, "loss": 2.2915, "step": 1475 }, { "epoch": 4.839344262295082, "grad_norm": 7.173589706420898, "learning_rate": 1.998226121436015e-05, "loss": 2.231, "step": 1476 }, { "epoch": 4.842622950819672, "grad_norm": 10.591852188110352, "learning_rate": 1.998219793690586e-05, "loss": 2.4849, "step": 1477 }, { "epoch": 4.845901639344262, "grad_norm": 8.097725868225098, "learning_rate": 1.9982134546891904e-05, "loss": 2.2065, "step": 1478 }, { "epoch": 4.849180327868853, "grad_norm": 10.569482803344727, "learning_rate": 1.9982071044319007e-05, "loss": 2.335, "step": 1479 }, { "epoch": 4.852459016393443, "grad_norm": 11.611197471618652, "learning_rate": 1.9982007429187876e-05, "loss": 2.2822, "step": 1480 }, { "epoch": 4.855737704918033, "grad_norm": 8.364215850830078, "learning_rate": 1.9981943701499236e-05, "loss": 2.1816, "step": 1481 }, { "epoch": 4.859016393442623, "grad_norm": 7.908661842346191, "learning_rate": 1.9981879861253802e-05, "loss": 2.1387, "step": 1482 }, { "epoch": 4.862295081967213, "grad_norm": 6.804011344909668, "learning_rate": 1.9981815908452296e-05, "loss": 2.0659, "step": 1483 }, { "epoch": 4.865573770491803, "grad_norm": 7.531585216522217, "learning_rate": 1.9981751843095435e-05, "loss": 2.0347, "step": 1484 }, { "epoch": 4.868852459016393, "grad_norm": 8.766802787780762, "learning_rate": 1.9981687665183945e-05, "loss": 2.3159, "step": 1485 }, { "epoch": 4.872131147540983, "grad_norm": 7.28433895111084, "learning_rate": 1.9981623374718545e-05, "loss": 2.3667, "step": 1486 }, { "epoch": 4.8754098360655735, "grad_norm": 8.939484596252441, "learning_rate": 1.9981558971699965e-05, "loss": 2.334, "step": 1487 }, { "epoch": 4.878688524590164, "grad_norm": 12.071334838867188, "learning_rate": 1.9981494456128928e-05, "loss": 2.3862, "step": 1488 }, { "epoch": 4.881967213114754, "grad_norm": 6.632842063903809, "learning_rate": 1.9981429828006162e-05, "loss": 2.1904, "step": 1489 }, { "epoch": 4.885245901639344, "grad_norm": 9.255080223083496, "learning_rate": 1.99813650873324e-05, "loss": 2.3154, "step": 1490 }, { "epoch": 4.888524590163934, "grad_norm": 6.341553211212158, "learning_rate": 1.9981300234108368e-05, "loss": 2.1953, "step": 1491 }, { "epoch": 4.891803278688524, "grad_norm": 6.087660312652588, "learning_rate": 1.9981235268334796e-05, "loss": 2.0737, "step": 1492 }, { "epoch": 4.895081967213114, "grad_norm": 8.58743953704834, "learning_rate": 1.998117019001242e-05, "loss": 2.1143, "step": 1493 }, { "epoch": 4.898360655737705, "grad_norm": 5.786322593688965, "learning_rate": 1.998110499914197e-05, "loss": 2.0952, "step": 1494 }, { "epoch": 4.901639344262295, "grad_norm": 6.627779483795166, "learning_rate": 1.9981039695724186e-05, "loss": 2.1636, "step": 1495 }, { "epoch": 4.9049180327868855, "grad_norm": 8.098111152648926, "learning_rate": 1.9980974279759803e-05, "loss": 2.1309, "step": 1496 }, { "epoch": 4.908196721311476, "grad_norm": 8.973103523254395, "learning_rate": 1.9980908751249556e-05, "loss": 2.3896, "step": 1497 }, { "epoch": 4.911475409836066, "grad_norm": 9.941335678100586, "learning_rate": 1.9980843110194183e-05, "loss": 2.2754, "step": 1498 }, { "epoch": 4.914754098360656, "grad_norm": 8.165277481079102, "learning_rate": 1.9980777356594427e-05, "loss": 1.9807, "step": 1499 }, { "epoch": 4.918032786885246, "grad_norm": 11.77686595916748, "learning_rate": 1.998071149045103e-05, "loss": 2.3657, "step": 1500 }, { "epoch": 4.921311475409836, "grad_norm": 7.103067874908447, "learning_rate": 1.9980645511764733e-05, "loss": 2.209, "step": 1501 }, { "epoch": 4.924590163934426, "grad_norm": 6.238903045654297, "learning_rate": 1.9980579420536282e-05, "loss": 2.2671, "step": 1502 }, { "epoch": 4.927868852459016, "grad_norm": 7.563755512237549, "learning_rate": 1.9980513216766423e-05, "loss": 2.2944, "step": 1503 }, { "epoch": 4.9311475409836065, "grad_norm": 9.377640724182129, "learning_rate": 1.9980446900455893e-05, "loss": 2.0977, "step": 1504 }, { "epoch": 4.934426229508197, "grad_norm": 7.002419948577881, "learning_rate": 1.9980380471605453e-05, "loss": 2.0996, "step": 1505 }, { "epoch": 4.937704918032787, "grad_norm": 5.882610321044922, "learning_rate": 1.9980313930215843e-05, "loss": 2.2944, "step": 1506 }, { "epoch": 4.940983606557377, "grad_norm": 7.829829692840576, "learning_rate": 1.998024727628782e-05, "loss": 2.2119, "step": 1507 }, { "epoch": 4.944262295081967, "grad_norm": 6.730907917022705, "learning_rate": 1.9980180509822132e-05, "loss": 2.0552, "step": 1508 }, { "epoch": 4.947540983606557, "grad_norm": 6.02219820022583, "learning_rate": 1.998011363081953e-05, "loss": 2.1177, "step": 1509 }, { "epoch": 4.950819672131147, "grad_norm": 7.043502330780029, "learning_rate": 1.998004663928077e-05, "loss": 2.3472, "step": 1510 }, { "epoch": 4.954098360655737, "grad_norm": 8.748068809509277, "learning_rate": 1.9979979535206605e-05, "loss": 2.3198, "step": 1511 }, { "epoch": 4.9573770491803275, "grad_norm": 11.634552001953125, "learning_rate": 1.9979912318597797e-05, "loss": 2.3882, "step": 1512 }, { "epoch": 4.9606557377049185, "grad_norm": 6.803202152252197, "learning_rate": 1.9979844989455105e-05, "loss": 2.2207, "step": 1513 }, { "epoch": 4.963934426229509, "grad_norm": 8.508455276489258, "learning_rate": 1.997977754777928e-05, "loss": 2.0601, "step": 1514 }, { "epoch": 4.967213114754099, "grad_norm": 7.415704727172852, "learning_rate": 1.9979709993571086e-05, "loss": 2.0366, "step": 1515 }, { "epoch": 4.970491803278689, "grad_norm": 7.393529415130615, "learning_rate": 1.9979642326831287e-05, "loss": 2.1929, "step": 1516 }, { "epoch": 4.973770491803279, "grad_norm": 10.494998931884766, "learning_rate": 1.9979574547560644e-05, "loss": 1.9795, "step": 1517 }, { "epoch": 4.977049180327869, "grad_norm": 7.580985069274902, "learning_rate": 1.997950665575992e-05, "loss": 2.2778, "step": 1518 }, { "epoch": 4.980327868852459, "grad_norm": 10.378073692321777, "learning_rate": 1.9979438651429886e-05, "loss": 2.2339, "step": 1519 }, { "epoch": 4.983606557377049, "grad_norm": 8.468212127685547, "learning_rate": 1.9979370534571304e-05, "loss": 2.2144, "step": 1520 }, { "epoch": 4.9868852459016395, "grad_norm": 7.934915065765381, "learning_rate": 1.9979302305184943e-05, "loss": 2.0815, "step": 1521 }, { "epoch": 4.99016393442623, "grad_norm": 6.991296768188477, "learning_rate": 1.997923396327157e-05, "loss": 2.2109, "step": 1522 }, { "epoch": 4.99344262295082, "grad_norm": 10.399545669555664, "learning_rate": 1.9979165508831964e-05, "loss": 2.123, "step": 1523 }, { "epoch": 4.99672131147541, "grad_norm": 10.524517059326172, "learning_rate": 1.9979096941866887e-05, "loss": 2.2485, "step": 1524 }, { "epoch": 5.0, "grad_norm": 7.140697002410889, "learning_rate": 1.997902826237712e-05, "loss": 2.187, "step": 1525 }, { "epoch": 5.00327868852459, "grad_norm": 8.169147491455078, "learning_rate": 1.997895947036343e-05, "loss": 2.1108, "step": 1526 }, { "epoch": 5.00655737704918, "grad_norm": 8.516389846801758, "learning_rate": 1.9978890565826596e-05, "loss": 1.9973, "step": 1527 }, { "epoch": 5.00983606557377, "grad_norm": 8.228826522827148, "learning_rate": 1.9978821548767398e-05, "loss": 2.1606, "step": 1528 }, { "epoch": 5.0131147540983605, "grad_norm": 11.464664459228516, "learning_rate": 1.997875241918661e-05, "loss": 2.1118, "step": 1529 }, { "epoch": 5.016393442622951, "grad_norm": 13.243736267089844, "learning_rate": 1.9978683177085013e-05, "loss": 1.9395, "step": 1530 }, { "epoch": 5.019672131147541, "grad_norm": 8.640641212463379, "learning_rate": 1.9978613822463392e-05, "loss": 2.1738, "step": 1531 }, { "epoch": 5.022950819672131, "grad_norm": 9.802693367004395, "learning_rate": 1.997854435532252e-05, "loss": 2.1245, "step": 1532 }, { "epoch": 5.026229508196721, "grad_norm": 10.737569808959961, "learning_rate": 1.997847477566319e-05, "loss": 1.7605, "step": 1533 }, { "epoch": 5.029508196721311, "grad_norm": 8.67050552368164, "learning_rate": 1.9978405083486175e-05, "loss": 2.2017, "step": 1534 }, { "epoch": 5.032786885245901, "grad_norm": 8.70710563659668, "learning_rate": 1.9978335278792272e-05, "loss": 2.0735, "step": 1535 }, { "epoch": 5.036065573770492, "grad_norm": 6.711272239685059, "learning_rate": 1.9978265361582264e-05, "loss": 2.1763, "step": 1536 }, { "epoch": 5.039344262295082, "grad_norm": 25.72671127319336, "learning_rate": 1.997819533185694e-05, "loss": 2.23, "step": 1537 }, { "epoch": 5.0426229508196725, "grad_norm": 8.592236518859863, "learning_rate": 1.9978125189617086e-05, "loss": 2.0771, "step": 1538 }, { "epoch": 5.045901639344263, "grad_norm": 8.001669883728027, "learning_rate": 1.9978054934863496e-05, "loss": 2.2251, "step": 1539 }, { "epoch": 5.049180327868853, "grad_norm": 8.462847709655762, "learning_rate": 1.9977984567596965e-05, "loss": 2.0093, "step": 1540 }, { "epoch": 5.052459016393443, "grad_norm": 6.095378875732422, "learning_rate": 1.9977914087818284e-05, "loss": 2.2354, "step": 1541 }, { "epoch": 5.055737704918033, "grad_norm": 7.159593105316162, "learning_rate": 1.9977843495528245e-05, "loss": 2.2114, "step": 1542 }, { "epoch": 5.059016393442623, "grad_norm": 9.8024263381958, "learning_rate": 1.9977772790727646e-05, "loss": 2.1504, "step": 1543 }, { "epoch": 5.062295081967213, "grad_norm": 8.518183708190918, "learning_rate": 1.9977701973417286e-05, "loss": 2.02, "step": 1544 }, { "epoch": 5.065573770491803, "grad_norm": 9.790088653564453, "learning_rate": 1.997763104359796e-05, "loss": 2.0073, "step": 1545 }, { "epoch": 5.0688524590163935, "grad_norm": 10.21187686920166, "learning_rate": 1.9977560001270472e-05, "loss": 2.0898, "step": 1546 }, { "epoch": 5.072131147540984, "grad_norm": 9.070298194885254, "learning_rate": 1.9977488846435616e-05, "loss": 1.978, "step": 1547 }, { "epoch": 5.075409836065574, "grad_norm": 8.795758247375488, "learning_rate": 1.9977417579094206e-05, "loss": 2.0088, "step": 1548 }, { "epoch": 5.078688524590164, "grad_norm": 9.655962944030762, "learning_rate": 1.9977346199247037e-05, "loss": 2.1509, "step": 1549 }, { "epoch": 5.081967213114754, "grad_norm": 6.732538223266602, "learning_rate": 1.9977274706894912e-05, "loss": 2.1411, "step": 1550 }, { "epoch": 5.085245901639344, "grad_norm": 6.911019325256348, "learning_rate": 1.9977203102038645e-05, "loss": 2.0381, "step": 1551 }, { "epoch": 5.088524590163934, "grad_norm": 5.987919330596924, "learning_rate": 1.9977131384679038e-05, "loss": 1.9971, "step": 1552 }, { "epoch": 5.091803278688524, "grad_norm": 11.64391040802002, "learning_rate": 1.99770595548169e-05, "loss": 2.3379, "step": 1553 }, { "epoch": 5.0950819672131145, "grad_norm": 7.635878562927246, "learning_rate": 1.9976987612453044e-05, "loss": 2.1973, "step": 1554 }, { "epoch": 5.098360655737705, "grad_norm": 9.873507499694824, "learning_rate": 1.9976915557588277e-05, "loss": 2.1602, "step": 1555 }, { "epoch": 5.101639344262295, "grad_norm": 9.08297061920166, "learning_rate": 1.9976843390223416e-05, "loss": 2.1504, "step": 1556 }, { "epoch": 5.104918032786885, "grad_norm": 18.10301399230957, "learning_rate": 1.997677111035927e-05, "loss": 2.0391, "step": 1557 }, { "epoch": 5.108196721311476, "grad_norm": 7.81624174118042, "learning_rate": 1.9976698717996662e-05, "loss": 2.0776, "step": 1558 }, { "epoch": 5.111475409836066, "grad_norm": 6.890059947967529, "learning_rate": 1.9976626213136396e-05, "loss": 2.1147, "step": 1559 }, { "epoch": 5.114754098360656, "grad_norm": 7.69344425201416, "learning_rate": 1.9976553595779298e-05, "loss": 2.0552, "step": 1560 }, { "epoch": 5.118032786885246, "grad_norm": 11.44412899017334, "learning_rate": 1.997648086592619e-05, "loss": 1.9536, "step": 1561 }, { "epoch": 5.121311475409836, "grad_norm": 6.6571431159973145, "learning_rate": 1.997640802357788e-05, "loss": 2.2432, "step": 1562 }, { "epoch": 5.1245901639344265, "grad_norm": 12.879629135131836, "learning_rate": 1.9976335068735202e-05, "loss": 2.0544, "step": 1563 }, { "epoch": 5.127868852459017, "grad_norm": 9.828841209411621, "learning_rate": 1.9976262001398973e-05, "loss": 2.1509, "step": 1564 }, { "epoch": 5.131147540983607, "grad_norm": 9.415770530700684, "learning_rate": 1.9976188821570013e-05, "loss": 2.0737, "step": 1565 }, { "epoch": 5.134426229508197, "grad_norm": 8.337950706481934, "learning_rate": 1.9976115529249155e-05, "loss": 1.9692, "step": 1566 }, { "epoch": 5.137704918032787, "grad_norm": 7.890899658203125, "learning_rate": 1.9976042124437222e-05, "loss": 2.1221, "step": 1567 }, { "epoch": 5.140983606557377, "grad_norm": 11.9644775390625, "learning_rate": 1.9975968607135038e-05, "loss": 2.0874, "step": 1568 }, { "epoch": 5.144262295081967, "grad_norm": 4.8267364501953125, "learning_rate": 1.9975894977343437e-05, "loss": 2.2925, "step": 1569 }, { "epoch": 5.147540983606557, "grad_norm": 9.445503234863281, "learning_rate": 1.997582123506325e-05, "loss": 2.1729, "step": 1570 }, { "epoch": 5.150819672131147, "grad_norm": 7.435296535491943, "learning_rate": 1.9975747380295307e-05, "loss": 2.0776, "step": 1571 }, { "epoch": 5.154098360655738, "grad_norm": 7.472503185272217, "learning_rate": 1.9975673413040437e-05, "loss": 2.1753, "step": 1572 }, { "epoch": 5.157377049180328, "grad_norm": 9.920249938964844, "learning_rate": 1.997559933329948e-05, "loss": 2.3408, "step": 1573 }, { "epoch": 5.160655737704918, "grad_norm": 7.888008117675781, "learning_rate": 1.9975525141073263e-05, "loss": 1.9404, "step": 1574 }, { "epoch": 5.163934426229508, "grad_norm": 6.402342319488525, "learning_rate": 1.9975450836362635e-05, "loss": 2.1289, "step": 1575 }, { "epoch": 5.167213114754098, "grad_norm": 7.906586647033691, "learning_rate": 1.9975376419168423e-05, "loss": 2.0938, "step": 1576 }, { "epoch": 5.170491803278688, "grad_norm": 7.454020023345947, "learning_rate": 1.997530188949147e-05, "loss": 1.8862, "step": 1577 }, { "epoch": 5.173770491803278, "grad_norm": 6.743445873260498, "learning_rate": 1.997522724733262e-05, "loss": 2.1201, "step": 1578 }, { "epoch": 5.177049180327868, "grad_norm": 7.975820541381836, "learning_rate": 1.9975152492692706e-05, "loss": 2.0737, "step": 1579 }, { "epoch": 5.180327868852459, "grad_norm": 6.961062908172607, "learning_rate": 1.997507762557258e-05, "loss": 2.0698, "step": 1580 }, { "epoch": 5.18360655737705, "grad_norm": 6.637353420257568, "learning_rate": 1.9975002645973082e-05, "loss": 2.187, "step": 1581 }, { "epoch": 5.18688524590164, "grad_norm": 5.540402889251709, "learning_rate": 1.9974927553895056e-05, "loss": 2.1494, "step": 1582 }, { "epoch": 5.19016393442623, "grad_norm": 12.659754753112793, "learning_rate": 1.997485234933935e-05, "loss": 1.9165, "step": 1583 }, { "epoch": 5.19344262295082, "grad_norm": 7.882145404815674, "learning_rate": 1.9974777032306817e-05, "loss": 2.0127, "step": 1584 }, { "epoch": 5.19672131147541, "grad_norm": 7.560960292816162, "learning_rate": 1.9974701602798298e-05, "loss": 2.29, "step": 1585 }, { "epoch": 5.2, "grad_norm": 9.911782264709473, "learning_rate": 1.997462606081465e-05, "loss": 2.1455, "step": 1586 }, { "epoch": 5.20327868852459, "grad_norm": 8.18790340423584, "learning_rate": 1.9974550406356718e-05, "loss": 2.0942, "step": 1587 }, { "epoch": 5.20655737704918, "grad_norm": 7.457713603973389, "learning_rate": 1.997447463942536e-05, "loss": 1.9116, "step": 1588 }, { "epoch": 5.2098360655737705, "grad_norm": 9.996002197265625, "learning_rate": 1.997439876002143e-05, "loss": 1.9565, "step": 1589 }, { "epoch": 5.213114754098361, "grad_norm": 9.834907531738281, "learning_rate": 1.9974322768145787e-05, "loss": 1.9829, "step": 1590 }, { "epoch": 5.216393442622951, "grad_norm": 9.83687686920166, "learning_rate": 1.997424666379928e-05, "loss": 2.2842, "step": 1591 }, { "epoch": 5.219672131147541, "grad_norm": 8.268999099731445, "learning_rate": 1.9974170446982773e-05, "loss": 2.0845, "step": 1592 }, { "epoch": 5.222950819672131, "grad_norm": 11.629720687866211, "learning_rate": 1.9974094117697125e-05, "loss": 1.8154, "step": 1593 }, { "epoch": 5.226229508196721, "grad_norm": 8.343816757202148, "learning_rate": 1.997401767594319e-05, "loss": 1.9014, "step": 1594 }, { "epoch": 5.229508196721311, "grad_norm": 6.051238536834717, "learning_rate": 1.997394112172184e-05, "loss": 2.0776, "step": 1595 }, { "epoch": 5.232786885245901, "grad_norm": 10.33871841430664, "learning_rate": 1.9973864455033933e-05, "loss": 1.9712, "step": 1596 }, { "epoch": 5.2360655737704915, "grad_norm": 6.854450225830078, "learning_rate": 1.9973787675880334e-05, "loss": 2.4346, "step": 1597 }, { "epoch": 5.239344262295082, "grad_norm": 10.249946594238281, "learning_rate": 1.997371078426191e-05, "loss": 2.1284, "step": 1598 }, { "epoch": 5.242622950819672, "grad_norm": 7.096327304840088, "learning_rate": 1.997363378017952e-05, "loss": 2.2769, "step": 1599 }, { "epoch": 5.245901639344262, "grad_norm": 8.281886100769043, "learning_rate": 1.997355666363405e-05, "loss": 2.1162, "step": 1600 }, { "epoch": 5.249180327868853, "grad_norm": 8.540175437927246, "learning_rate": 1.997347943462635e-05, "loss": 1.8965, "step": 1601 }, { "epoch": 5.252459016393443, "grad_norm": 7.194281101226807, "learning_rate": 1.9973402093157303e-05, "loss": 2.0708, "step": 1602 }, { "epoch": 5.255737704918033, "grad_norm": 8.408552169799805, "learning_rate": 1.997332463922778e-05, "loss": 1.8906, "step": 1603 }, { "epoch": 5.259016393442623, "grad_norm": 8.005224227905273, "learning_rate": 1.9973247072838646e-05, "loss": 2.0259, "step": 1604 }, { "epoch": 5.262295081967213, "grad_norm": 7.879806995391846, "learning_rate": 1.9973169393990784e-05, "loss": 2.0474, "step": 1605 }, { "epoch": 5.2655737704918035, "grad_norm": 5.85604190826416, "learning_rate": 1.997309160268507e-05, "loss": 2.1162, "step": 1606 }, { "epoch": 5.268852459016394, "grad_norm": 11.283258438110352, "learning_rate": 1.9973013698922378e-05, "loss": 1.9502, "step": 1607 }, { "epoch": 5.272131147540984, "grad_norm": 7.109856605529785, "learning_rate": 1.9972935682703584e-05, "loss": 2.2446, "step": 1608 }, { "epoch": 5.275409836065574, "grad_norm": 8.431119918823242, "learning_rate": 1.9972857554029576e-05, "loss": 2.0669, "step": 1609 }, { "epoch": 5.278688524590164, "grad_norm": 7.283205986022949, "learning_rate": 1.9972779312901227e-05, "loss": 2.1592, "step": 1610 }, { "epoch": 5.281967213114754, "grad_norm": 8.11882209777832, "learning_rate": 1.9972700959319422e-05, "loss": 1.9775, "step": 1611 }, { "epoch": 5.285245901639344, "grad_norm": 8.54434871673584, "learning_rate": 1.9972622493285045e-05, "loss": 2.0171, "step": 1612 }, { "epoch": 5.288524590163934, "grad_norm": 7.757009506225586, "learning_rate": 1.997254391479898e-05, "loss": 2.1128, "step": 1613 }, { "epoch": 5.2918032786885245, "grad_norm": 9.159770965576172, "learning_rate": 1.9972465223862114e-05, "loss": 2.0981, "step": 1614 }, { "epoch": 5.295081967213115, "grad_norm": 7.892049312591553, "learning_rate": 1.9972386420475334e-05, "loss": 1.8174, "step": 1615 }, { "epoch": 5.298360655737705, "grad_norm": 10.985105514526367, "learning_rate": 1.997230750463953e-05, "loss": 2.1362, "step": 1616 }, { "epoch": 5.301639344262295, "grad_norm": 8.998886108398438, "learning_rate": 1.997222847635559e-05, "loss": 2.1538, "step": 1617 }, { "epoch": 5.304918032786885, "grad_norm": 10.132243156433105, "learning_rate": 1.9972149335624404e-05, "loss": 2.0493, "step": 1618 }, { "epoch": 5.308196721311475, "grad_norm": 14.825630187988281, "learning_rate": 1.9972070082446863e-05, "loss": 2.0649, "step": 1619 }, { "epoch": 5.311475409836065, "grad_norm": 11.789731979370117, "learning_rate": 1.997199071682387e-05, "loss": 2.1548, "step": 1620 }, { "epoch": 5.314754098360655, "grad_norm": 8.266108512878418, "learning_rate": 1.997191123875631e-05, "loss": 2.1035, "step": 1621 }, { "epoch": 5.3180327868852455, "grad_norm": 7.439622402191162, "learning_rate": 1.997183164824508e-05, "loss": 2.0879, "step": 1622 }, { "epoch": 5.321311475409836, "grad_norm": 7.902544021606445, "learning_rate": 1.9971751945291083e-05, "loss": 2.2437, "step": 1623 }, { "epoch": 5.324590163934427, "grad_norm": 7.210568428039551, "learning_rate": 1.9971672129895214e-05, "loss": 2.0317, "step": 1624 }, { "epoch": 5.327868852459017, "grad_norm": 8.070721626281738, "learning_rate": 1.9971592202058373e-05, "loss": 2.0898, "step": 1625 }, { "epoch": 5.331147540983607, "grad_norm": 8.594472885131836, "learning_rate": 1.9971512161781463e-05, "loss": 2.1631, "step": 1626 }, { "epoch": 5.334426229508197, "grad_norm": 9.513978004455566, "learning_rate": 1.9971432009065384e-05, "loss": 2.0605, "step": 1627 }, { "epoch": 5.337704918032787, "grad_norm": 9.80453109741211, "learning_rate": 1.9971351743911046e-05, "loss": 2.2871, "step": 1628 }, { "epoch": 5.340983606557377, "grad_norm": 8.952208518981934, "learning_rate": 1.9971271366319348e-05, "loss": 2.2183, "step": 1629 }, { "epoch": 5.344262295081967, "grad_norm": 7.801649570465088, "learning_rate": 1.9971190876291195e-05, "loss": 2.3604, "step": 1630 }, { "epoch": 5.3475409836065575, "grad_norm": 8.34658145904541, "learning_rate": 1.99711102738275e-05, "loss": 2.2446, "step": 1631 }, { "epoch": 5.350819672131148, "grad_norm": 7.82101583480835, "learning_rate": 1.9971029558929166e-05, "loss": 2.0542, "step": 1632 }, { "epoch": 5.354098360655738, "grad_norm": 8.58828353881836, "learning_rate": 1.9970948731597112e-05, "loss": 2.1567, "step": 1633 }, { "epoch": 5.357377049180328, "grad_norm": 6.323055267333984, "learning_rate": 1.997086779183224e-05, "loss": 1.9126, "step": 1634 }, { "epoch": 5.360655737704918, "grad_norm": 10.872703552246094, "learning_rate": 1.9970786739635465e-05, "loss": 2.2671, "step": 1635 }, { "epoch": 5.363934426229508, "grad_norm": 8.531351089477539, "learning_rate": 1.9970705575007705e-05, "loss": 2.2495, "step": 1636 }, { "epoch": 5.367213114754098, "grad_norm": 9.158732414245605, "learning_rate": 1.997062429794987e-05, "loss": 2.0623, "step": 1637 }, { "epoch": 5.370491803278688, "grad_norm": 8.535676956176758, "learning_rate": 1.9970542908462883e-05, "loss": 1.9297, "step": 1638 }, { "epoch": 5.3737704918032785, "grad_norm": 6.640349864959717, "learning_rate": 1.9970461406547657e-05, "loss": 2.0259, "step": 1639 }, { "epoch": 5.377049180327869, "grad_norm": 7.525559902191162, "learning_rate": 1.997037979220511e-05, "loss": 2.3735, "step": 1640 }, { "epoch": 5.380327868852459, "grad_norm": 7.6560845375061035, "learning_rate": 1.9970298065436167e-05, "loss": 2.0215, "step": 1641 }, { "epoch": 5.383606557377049, "grad_norm": 7.43419885635376, "learning_rate": 1.9970216226241742e-05, "loss": 2.3159, "step": 1642 }, { "epoch": 5.386885245901639, "grad_norm": 7.516702651977539, "learning_rate": 1.997013427462277e-05, "loss": 2.1582, "step": 1643 }, { "epoch": 5.390163934426229, "grad_norm": 10.913426399230957, "learning_rate": 1.9970052210580162e-05, "loss": 1.9482, "step": 1644 }, { "epoch": 5.39344262295082, "grad_norm": 8.922063827514648, "learning_rate": 1.9969970034114853e-05, "loss": 1.9937, "step": 1645 }, { "epoch": 5.39672131147541, "grad_norm": 11.452108383178711, "learning_rate": 1.9969887745227764e-05, "loss": 2.291, "step": 1646 }, { "epoch": 5.4, "grad_norm": 8.902762413024902, "learning_rate": 1.9969805343919822e-05, "loss": 2.1025, "step": 1647 }, { "epoch": 5.4032786885245905, "grad_norm": 8.197122573852539, "learning_rate": 1.9969722830191964e-05, "loss": 2.1567, "step": 1648 }, { "epoch": 5.406557377049181, "grad_norm": 15.89451789855957, "learning_rate": 1.996964020404511e-05, "loss": 2.0864, "step": 1649 }, { "epoch": 5.409836065573771, "grad_norm": 6.867155075073242, "learning_rate": 1.99695574654802e-05, "loss": 1.9355, "step": 1650 }, { "epoch": 5.413114754098361, "grad_norm": 8.084172248840332, "learning_rate": 1.996947461449816e-05, "loss": 2.0825, "step": 1651 }, { "epoch": 5.416393442622951, "grad_norm": 18.816020965576172, "learning_rate": 1.9969391651099933e-05, "loss": 2.3369, "step": 1652 }, { "epoch": 5.419672131147541, "grad_norm": 9.511040687561035, "learning_rate": 1.9969308575286445e-05, "loss": 2.2812, "step": 1653 }, { "epoch": 5.422950819672131, "grad_norm": 8.585094451904297, "learning_rate": 1.996922538705864e-05, "loss": 2.1646, "step": 1654 }, { "epoch": 5.426229508196721, "grad_norm": 7.608633995056152, "learning_rate": 1.9969142086417452e-05, "loss": 2.0044, "step": 1655 }, { "epoch": 5.4295081967213115, "grad_norm": 7.822731018066406, "learning_rate": 1.9969058673363824e-05, "loss": 2.2378, "step": 1656 }, { "epoch": 5.432786885245902, "grad_norm": 9.16964054107666, "learning_rate": 1.996897514789869e-05, "loss": 2.0122, "step": 1657 }, { "epoch": 5.436065573770492, "grad_norm": 7.250139236450195, "learning_rate": 1.9968891510023e-05, "loss": 1.8979, "step": 1658 }, { "epoch": 5.439344262295082, "grad_norm": 7.926200866699219, "learning_rate": 1.9968807759737695e-05, "loss": 2.1899, "step": 1659 }, { "epoch": 5.442622950819672, "grad_norm": 6.922533988952637, "learning_rate": 1.9968723897043714e-05, "loss": 2.0811, "step": 1660 }, { "epoch": 5.445901639344262, "grad_norm": 6.226615905761719, "learning_rate": 1.9968639921942007e-05, "loss": 2.1973, "step": 1661 }, { "epoch": 5.449180327868852, "grad_norm": 6.863236904144287, "learning_rate": 1.996855583443352e-05, "loss": 2.0532, "step": 1662 }, { "epoch": 5.452459016393442, "grad_norm": 10.970602989196777, "learning_rate": 1.99684716345192e-05, "loss": 2.0405, "step": 1663 }, { "epoch": 5.4557377049180324, "grad_norm": 9.645875930786133, "learning_rate": 1.9968387322199998e-05, "loss": 1.8467, "step": 1664 }, { "epoch": 5.459016393442623, "grad_norm": 7.8525309562683105, "learning_rate": 1.9968302897476865e-05, "loss": 2.166, "step": 1665 }, { "epoch": 5.462295081967213, "grad_norm": 8.811202049255371, "learning_rate": 1.9968218360350752e-05, "loss": 1.9419, "step": 1666 }, { "epoch": 5.465573770491803, "grad_norm": 8.981207847595215, "learning_rate": 1.9968133710822616e-05, "loss": 2.0283, "step": 1667 }, { "epoch": 5.468852459016394, "grad_norm": 8.536513328552246, "learning_rate": 1.9968048948893406e-05, "loss": 2.0098, "step": 1668 }, { "epoch": 5.472131147540984, "grad_norm": 9.695384979248047, "learning_rate": 1.9967964074564077e-05, "loss": 2.0493, "step": 1669 }, { "epoch": 5.475409836065574, "grad_norm": 7.142619609832764, "learning_rate": 1.9967879087835594e-05, "loss": 2.1807, "step": 1670 }, { "epoch": 5.478688524590164, "grad_norm": 7.551440238952637, "learning_rate": 1.9967793988708908e-05, "loss": 2.1785, "step": 1671 }, { "epoch": 5.481967213114754, "grad_norm": 8.29560375213623, "learning_rate": 1.996770877718498e-05, "loss": 2.0474, "step": 1672 }, { "epoch": 5.4852459016393444, "grad_norm": 6.022891998291016, "learning_rate": 1.9967623453264773e-05, "loss": 2.0244, "step": 1673 }, { "epoch": 5.488524590163935, "grad_norm": 8.456840515136719, "learning_rate": 1.9967538016949247e-05, "loss": 1.936, "step": 1674 }, { "epoch": 5.491803278688525, "grad_norm": 12.52991771697998, "learning_rate": 1.9967452468239366e-05, "loss": 2.0488, "step": 1675 }, { "epoch": 5.495081967213115, "grad_norm": 7.76677131652832, "learning_rate": 1.9967366807136094e-05, "loss": 2.0327, "step": 1676 }, { "epoch": 5.498360655737705, "grad_norm": 7.79191255569458, "learning_rate": 1.99672810336404e-05, "loss": 2.0635, "step": 1677 }, { "epoch": 5.501639344262295, "grad_norm": 11.475760459899902, "learning_rate": 1.996719514775325e-05, "loss": 2.3271, "step": 1678 }, { "epoch": 5.504918032786885, "grad_norm": 19.77271842956543, "learning_rate": 1.9967109149475608e-05, "loss": 2.0728, "step": 1679 }, { "epoch": 5.508196721311475, "grad_norm": 6.821928977966309, "learning_rate": 1.9967023038808448e-05, "loss": 2.0361, "step": 1680 }, { "epoch": 5.511475409836065, "grad_norm": 14.942279815673828, "learning_rate": 1.996693681575274e-05, "loss": 2.1392, "step": 1681 }, { "epoch": 5.5147540983606556, "grad_norm": 7.662624359130859, "learning_rate": 1.996685048030946e-05, "loss": 1.9971, "step": 1682 }, { "epoch": 5.518032786885246, "grad_norm": 8.633316993713379, "learning_rate": 1.996676403247957e-05, "loss": 2.0972, "step": 1683 }, { "epoch": 5.521311475409836, "grad_norm": 7.9206366539001465, "learning_rate": 1.9966677472264064e-05, "loss": 1.9932, "step": 1684 }, { "epoch": 5.524590163934426, "grad_norm": 5.791770935058594, "learning_rate": 1.99665907996639e-05, "loss": 2.0205, "step": 1685 }, { "epoch": 5.527868852459016, "grad_norm": 10.845852851867676, "learning_rate": 1.996650401468006e-05, "loss": 2.0869, "step": 1686 }, { "epoch": 5.531147540983606, "grad_norm": 54.85243606567383, "learning_rate": 1.9966417117313527e-05, "loss": 2.0107, "step": 1687 }, { "epoch": 5.534426229508197, "grad_norm": 7.753519058227539, "learning_rate": 1.996633010756528e-05, "loss": 1.8486, "step": 1688 }, { "epoch": 5.537704918032787, "grad_norm": 7.704925537109375, "learning_rate": 1.9966242985436298e-05, "loss": 2.2676, "step": 1689 }, { "epoch": 5.540983606557377, "grad_norm": 6.987824440002441, "learning_rate": 1.996615575092756e-05, "loss": 2.0698, "step": 1690 }, { "epoch": 5.5442622950819676, "grad_norm": 8.409460067749023, "learning_rate": 1.996606840404006e-05, "loss": 2.3857, "step": 1691 }, { "epoch": 5.547540983606558, "grad_norm": 8.987788200378418, "learning_rate": 1.9965980944774773e-05, "loss": 2.1069, "step": 1692 }, { "epoch": 5.550819672131148, "grad_norm": 7.037967681884766, "learning_rate": 1.9965893373132686e-05, "loss": 2.0752, "step": 1693 }, { "epoch": 5.554098360655738, "grad_norm": 11.354703903198242, "learning_rate": 1.9965805689114796e-05, "loss": 1.957, "step": 1694 }, { "epoch": 5.557377049180328, "grad_norm": 18.504173278808594, "learning_rate": 1.996571789272208e-05, "loss": 1.8984, "step": 1695 }, { "epoch": 5.560655737704918, "grad_norm": 11.50061321258545, "learning_rate": 1.9965629983955535e-05, "loss": 2.1846, "step": 1696 }, { "epoch": 5.563934426229508, "grad_norm": 11.31307315826416, "learning_rate": 1.9965541962816154e-05, "loss": 1.9004, "step": 1697 }, { "epoch": 5.567213114754098, "grad_norm": 8.803403854370117, "learning_rate": 1.996545382930492e-05, "loss": 2.0117, "step": 1698 }, { "epoch": 5.5704918032786885, "grad_norm": 10.229866981506348, "learning_rate": 1.9965365583422834e-05, "loss": 2.2051, "step": 1699 }, { "epoch": 5.573770491803279, "grad_norm": 8.565326690673828, "learning_rate": 1.996527722517089e-05, "loss": 2.1479, "step": 1700 }, { "epoch": 5.577049180327869, "grad_norm": 9.149974822998047, "learning_rate": 1.9965188754550086e-05, "loss": 2.2163, "step": 1701 }, { "epoch": 5.580327868852459, "grad_norm": 10.093875885009766, "learning_rate": 1.9965100171561414e-05, "loss": 2.0347, "step": 1702 }, { "epoch": 5.583606557377049, "grad_norm": 8.070230484008789, "learning_rate": 1.9965011476205876e-05, "loss": 2.0776, "step": 1703 }, { "epoch": 5.586885245901639, "grad_norm": 30.06971549987793, "learning_rate": 1.996492266848448e-05, "loss": 2.0762, "step": 1704 }, { "epoch": 5.590163934426229, "grad_norm": 7.6577534675598145, "learning_rate": 1.9964833748398213e-05, "loss": 2.165, "step": 1705 }, { "epoch": 5.593442622950819, "grad_norm": 6.691161155700684, "learning_rate": 1.996474471594809e-05, "loss": 1.9087, "step": 1706 }, { "epoch": 5.5967213114754095, "grad_norm": 9.5197114944458, "learning_rate": 1.9964655571135105e-05, "loss": 1.9446, "step": 1707 }, { "epoch": 5.6, "grad_norm": 11.129594802856445, "learning_rate": 1.9964566313960265e-05, "loss": 2.2769, "step": 1708 }, { "epoch": 5.60327868852459, "grad_norm": 9.40282154083252, "learning_rate": 1.9964476944424585e-05, "loss": 2.0, "step": 1709 }, { "epoch": 5.60655737704918, "grad_norm": 7.972977638244629, "learning_rate": 1.9964387462529066e-05, "loss": 2.1177, "step": 1710 }, { "epoch": 5.60983606557377, "grad_norm": 8.024630546569824, "learning_rate": 1.9964297868274717e-05, "loss": 1.9883, "step": 1711 }, { "epoch": 5.613114754098361, "grad_norm": 19.011615753173828, "learning_rate": 1.996420816166255e-05, "loss": 1.9688, "step": 1712 }, { "epoch": 5.616393442622951, "grad_norm": 8.935709953308105, "learning_rate": 1.9964118342693576e-05, "loss": 2.0356, "step": 1713 }, { "epoch": 5.619672131147541, "grad_norm": 11.333087921142578, "learning_rate": 1.9964028411368805e-05, "loss": 2.022, "step": 1714 }, { "epoch": 5.622950819672131, "grad_norm": 35.70766067504883, "learning_rate": 1.9963938367689255e-05, "loss": 2.1675, "step": 1715 }, { "epoch": 5.6262295081967215, "grad_norm": 9.351444244384766, "learning_rate": 1.996384821165594e-05, "loss": 2.1235, "step": 1716 }, { "epoch": 5.629508196721312, "grad_norm": 9.697672843933105, "learning_rate": 1.996375794326988e-05, "loss": 2.0559, "step": 1717 }, { "epoch": 5.632786885245902, "grad_norm": 13.820737838745117, "learning_rate": 1.9963667562532083e-05, "loss": 1.9727, "step": 1718 }, { "epoch": 5.636065573770492, "grad_norm": 18.06081199645996, "learning_rate": 1.996357706944358e-05, "loss": 2.0386, "step": 1719 }, { "epoch": 5.639344262295082, "grad_norm": 9.709168434143066, "learning_rate": 1.9963486464005385e-05, "loss": 2.0117, "step": 1720 }, { "epoch": 5.642622950819672, "grad_norm": 10.194173812866211, "learning_rate": 1.996339574621852e-05, "loss": 2.134, "step": 1721 }, { "epoch": 5.645901639344262, "grad_norm": 13.35315990447998, "learning_rate": 1.9963304916084008e-05, "loss": 2.0859, "step": 1722 }, { "epoch": 5.649180327868852, "grad_norm": 13.715578079223633, "learning_rate": 1.9963213973602876e-05, "loss": 1.978, "step": 1723 }, { "epoch": 5.6524590163934425, "grad_norm": 8.872730255126953, "learning_rate": 1.9963122918776142e-05, "loss": 2.1567, "step": 1724 }, { "epoch": 5.655737704918033, "grad_norm": 9.130016326904297, "learning_rate": 1.9963031751604843e-05, "loss": 2.1377, "step": 1725 }, { "epoch": 5.659016393442623, "grad_norm": 8.28971004486084, "learning_rate": 1.9962940472090003e-05, "loss": 2.2627, "step": 1726 }, { "epoch": 5.662295081967213, "grad_norm": 8.247785568237305, "learning_rate": 1.9962849080232643e-05, "loss": 2.2495, "step": 1727 }, { "epoch": 5.665573770491803, "grad_norm": 7.018664360046387, "learning_rate": 1.996275757603381e-05, "loss": 2.3506, "step": 1728 }, { "epoch": 5.668852459016393, "grad_norm": 8.636150360107422, "learning_rate": 1.996266595949452e-05, "loss": 2.2549, "step": 1729 }, { "epoch": 5.672131147540983, "grad_norm": 9.781607627868652, "learning_rate": 1.9962574230615816e-05, "loss": 2.2524, "step": 1730 }, { "epoch": 5.675409836065574, "grad_norm": 7.020482540130615, "learning_rate": 1.996248238939873e-05, "loss": 1.916, "step": 1731 }, { "epoch": 5.678688524590164, "grad_norm": 16.710681915283203, "learning_rate": 1.9962390435844296e-05, "loss": 2.2446, "step": 1732 }, { "epoch": 5.6819672131147545, "grad_norm": 9.091238975524902, "learning_rate": 1.996229836995355e-05, "loss": 1.9404, "step": 1733 }, { "epoch": 5.685245901639345, "grad_norm": 7.287168979644775, "learning_rate": 1.996220619172753e-05, "loss": 2.0493, "step": 1734 }, { "epoch": 5.688524590163935, "grad_norm": 10.95102310180664, "learning_rate": 1.9962113901167282e-05, "loss": 2.0464, "step": 1735 }, { "epoch": 5.691803278688525, "grad_norm": 10.545698165893555, "learning_rate": 1.9962021498273837e-05, "loss": 2.0254, "step": 1736 }, { "epoch": 5.695081967213115, "grad_norm": 20.686634063720703, "learning_rate": 1.9961928983048244e-05, "loss": 2.021, "step": 1737 }, { "epoch": 5.698360655737705, "grad_norm": 9.423811912536621, "learning_rate": 1.9961836355491543e-05, "loss": 2.2588, "step": 1738 }, { "epoch": 5.701639344262295, "grad_norm": 9.398543357849121, "learning_rate": 1.996174361560478e-05, "loss": 1.9058, "step": 1739 }, { "epoch": 5.704918032786885, "grad_norm": 10.329008102416992, "learning_rate": 1.9961650763389e-05, "loss": 2.0322, "step": 1740 }, { "epoch": 5.7081967213114755, "grad_norm": 8.689282417297363, "learning_rate": 1.996155779884525e-05, "loss": 1.8889, "step": 1741 }, { "epoch": 5.711475409836066, "grad_norm": 8.173868179321289, "learning_rate": 1.996146472197458e-05, "loss": 2.0264, "step": 1742 }, { "epoch": 5.714754098360656, "grad_norm": 10.838069915771484, "learning_rate": 1.9961371532778038e-05, "loss": 2.25, "step": 1743 }, { "epoch": 5.718032786885246, "grad_norm": 15.368976593017578, "learning_rate": 1.9961278231256672e-05, "loss": 1.9722, "step": 1744 }, { "epoch": 5.721311475409836, "grad_norm": 10.625487327575684, "learning_rate": 1.996118481741154e-05, "loss": 2.0283, "step": 1745 }, { "epoch": 5.724590163934426, "grad_norm": 11.29957103729248, "learning_rate": 1.996109129124369e-05, "loss": 1.9854, "step": 1746 }, { "epoch": 5.727868852459016, "grad_norm": 11.204331398010254, "learning_rate": 1.996099765275418e-05, "loss": 2.187, "step": 1747 }, { "epoch": 5.731147540983606, "grad_norm": 11.441615104675293, "learning_rate": 1.9960903901944066e-05, "loss": 1.8423, "step": 1748 }, { "epoch": 5.7344262295081965, "grad_norm": 10.652326583862305, "learning_rate": 1.99608100388144e-05, "loss": 2.0908, "step": 1749 }, { "epoch": 5.737704918032787, "grad_norm": 9.90213680267334, "learning_rate": 1.9960716063366244e-05, "loss": 2.3032, "step": 1750 }, { "epoch": 5.740983606557377, "grad_norm": 11.969792366027832, "learning_rate": 1.996062197560066e-05, "loss": 2.1123, "step": 1751 }, { "epoch": 5.744262295081967, "grad_norm": 10.19028091430664, "learning_rate": 1.9960527775518708e-05, "loss": 2.0398, "step": 1752 }, { "epoch": 5.747540983606557, "grad_norm": 15.985247611999512, "learning_rate": 1.9960433463121447e-05, "loss": 2.1562, "step": 1753 }, { "epoch": 5.750819672131147, "grad_norm": 11.12162971496582, "learning_rate": 1.996033903840994e-05, "loss": 1.8872, "step": 1754 }, { "epoch": 5.754098360655737, "grad_norm": 6.842006206512451, "learning_rate": 1.996024450138526e-05, "loss": 2.1045, "step": 1755 }, { "epoch": 5.757377049180328, "grad_norm": 7.024391174316406, "learning_rate": 1.9960149852048463e-05, "loss": 2.3789, "step": 1756 }, { "epoch": 5.760655737704918, "grad_norm": 11.475325584411621, "learning_rate": 1.996005509040062e-05, "loss": 2.0513, "step": 1757 }, { "epoch": 5.7639344262295085, "grad_norm": 6.106758117675781, "learning_rate": 1.9959960216442803e-05, "loss": 2.0093, "step": 1758 }, { "epoch": 5.767213114754099, "grad_norm": 6.681177139282227, "learning_rate": 1.9959865230176077e-05, "loss": 2.2432, "step": 1759 }, { "epoch": 5.770491803278689, "grad_norm": 7.654348850250244, "learning_rate": 1.9959770131601516e-05, "loss": 1.8955, "step": 1760 }, { "epoch": 5.773770491803279, "grad_norm": 7.055047512054443, "learning_rate": 1.9959674920720192e-05, "loss": 2.1367, "step": 1761 }, { "epoch": 5.777049180327869, "grad_norm": 10.142072677612305, "learning_rate": 1.9959579597533176e-05, "loss": 1.9746, "step": 1762 }, { "epoch": 5.780327868852459, "grad_norm": 8.486379623413086, "learning_rate": 1.9959484162041544e-05, "loss": 1.8354, "step": 1763 }, { "epoch": 5.783606557377049, "grad_norm": 7.863948345184326, "learning_rate": 1.9959388614246373e-05, "loss": 2.083, "step": 1764 }, { "epoch": 5.786885245901639, "grad_norm": 10.997029304504395, "learning_rate": 1.995929295414874e-05, "loss": 1.8833, "step": 1765 }, { "epoch": 5.7901639344262295, "grad_norm": 14.384309768676758, "learning_rate": 1.9959197181749725e-05, "loss": 2.1943, "step": 1766 }, { "epoch": 5.79344262295082, "grad_norm": 7.2538557052612305, "learning_rate": 1.995910129705041e-05, "loss": 2.124, "step": 1767 }, { "epoch": 5.79672131147541, "grad_norm": 13.225057601928711, "learning_rate": 1.9959005300051867e-05, "loss": 2.1318, "step": 1768 }, { "epoch": 5.8, "grad_norm": 10.816082954406738, "learning_rate": 1.995890919075519e-05, "loss": 2.1182, "step": 1769 }, { "epoch": 5.80327868852459, "grad_norm": 11.091176986694336, "learning_rate": 1.995881296916145e-05, "loss": 2.0195, "step": 1770 }, { "epoch": 5.80655737704918, "grad_norm": 6.612680435180664, "learning_rate": 1.9958716635271745e-05, "loss": 2.1968, "step": 1771 }, { "epoch": 5.80983606557377, "grad_norm": 8.545839309692383, "learning_rate": 1.9958620189087153e-05, "loss": 2.0327, "step": 1772 }, { "epoch": 5.81311475409836, "grad_norm": 7.151360034942627, "learning_rate": 1.9958523630608767e-05, "loss": 1.9624, "step": 1773 }, { "epoch": 5.81639344262295, "grad_norm": 10.903175354003906, "learning_rate": 1.9958426959837668e-05, "loss": 2.0889, "step": 1774 }, { "epoch": 5.8196721311475414, "grad_norm": 7.12551212310791, "learning_rate": 1.9958330176774953e-05, "loss": 2.0557, "step": 1775 }, { "epoch": 5.822950819672132, "grad_norm": 24.511302947998047, "learning_rate": 1.9958233281421715e-05, "loss": 2.1157, "step": 1776 }, { "epoch": 5.826229508196722, "grad_norm": 7.994050025939941, "learning_rate": 1.995813627377904e-05, "loss": 2.0645, "step": 1777 }, { "epoch": 5.829508196721312, "grad_norm": 18.364946365356445, "learning_rate": 1.9958039153848025e-05, "loss": 2.043, "step": 1778 }, { "epoch": 5.832786885245902, "grad_norm": 9.967768669128418, "learning_rate": 1.9957941921629763e-05, "loss": 2.0186, "step": 1779 }, { "epoch": 5.836065573770492, "grad_norm": 11.838359832763672, "learning_rate": 1.995784457712535e-05, "loss": 1.9507, "step": 1780 }, { "epoch": 5.839344262295082, "grad_norm": 7.100188732147217, "learning_rate": 1.995774712033589e-05, "loss": 2.1377, "step": 1781 }, { "epoch": 5.842622950819672, "grad_norm": 8.052971839904785, "learning_rate": 1.995764955126248e-05, "loss": 2.291, "step": 1782 }, { "epoch": 5.845901639344262, "grad_norm": 25.499858856201172, "learning_rate": 1.9957551869906214e-05, "loss": 2.1509, "step": 1783 }, { "epoch": 5.849180327868853, "grad_norm": 6.5821614265441895, "learning_rate": 1.9957454076268196e-05, "loss": 2.0674, "step": 1784 }, { "epoch": 5.852459016393443, "grad_norm": 6.789089679718018, "learning_rate": 1.9957356170349533e-05, "loss": 2.29, "step": 1785 }, { "epoch": 5.855737704918033, "grad_norm": 12.822843551635742, "learning_rate": 1.9957258152151327e-05, "loss": 1.8921, "step": 1786 }, { "epoch": 5.859016393442623, "grad_norm": 6.982877731323242, "learning_rate": 1.995716002167468e-05, "loss": 2.2397, "step": 1787 }, { "epoch": 5.862295081967213, "grad_norm": 7.3521342277526855, "learning_rate": 1.9957061778920703e-05, "loss": 2.0356, "step": 1788 }, { "epoch": 5.865573770491803, "grad_norm": 7.640570163726807, "learning_rate": 1.9956963423890497e-05, "loss": 2.0054, "step": 1789 }, { "epoch": 5.868852459016393, "grad_norm": 8.796984672546387, "learning_rate": 1.995686495658518e-05, "loss": 2.2236, "step": 1790 }, { "epoch": 5.872131147540983, "grad_norm": 8.035408020019531, "learning_rate": 1.995676637700586e-05, "loss": 1.9033, "step": 1791 }, { "epoch": 5.8754098360655735, "grad_norm": 18.330821990966797, "learning_rate": 1.9956667685153643e-05, "loss": 1.9751, "step": 1792 }, { "epoch": 5.878688524590164, "grad_norm": 7.865031719207764, "learning_rate": 1.9956568881029645e-05, "loss": 2.1777, "step": 1793 }, { "epoch": 5.881967213114754, "grad_norm": 8.486599922180176, "learning_rate": 1.995646996463498e-05, "loss": 2.1655, "step": 1794 }, { "epoch": 5.885245901639344, "grad_norm": 6.095679759979248, "learning_rate": 1.995637093597077e-05, "loss": 2.1196, "step": 1795 }, { "epoch": 5.888524590163934, "grad_norm": 5.816275119781494, "learning_rate": 1.995627179503812e-05, "loss": 2.0747, "step": 1796 }, { "epoch": 5.891803278688524, "grad_norm": 7.485843658447266, "learning_rate": 1.9956172541838154e-05, "loss": 2.0894, "step": 1797 }, { "epoch": 5.895081967213114, "grad_norm": 9.939231872558594, "learning_rate": 1.9956073176371992e-05, "loss": 2.2505, "step": 1798 }, { "epoch": 5.898360655737705, "grad_norm": 16.516613006591797, "learning_rate": 1.9955973698640753e-05, "loss": 2.1426, "step": 1799 }, { "epoch": 5.901639344262295, "grad_norm": 9.901778221130371, "learning_rate": 1.9955874108645557e-05, "loss": 2.209, "step": 1800 }, { "epoch": 5.9049180327868855, "grad_norm": 7.864047527313232, "learning_rate": 1.9955774406387535e-05, "loss": 1.8708, "step": 1801 }, { "epoch": 5.908196721311476, "grad_norm": 8.607817649841309, "learning_rate": 1.99556745918678e-05, "loss": 2.1489, "step": 1802 }, { "epoch": 5.911475409836066, "grad_norm": 6.695093631744385, "learning_rate": 1.9955574665087487e-05, "loss": 2.1079, "step": 1803 }, { "epoch": 5.914754098360656, "grad_norm": 7.06363582611084, "learning_rate": 1.9955474626047714e-05, "loss": 1.8877, "step": 1804 }, { "epoch": 5.918032786885246, "grad_norm": 8.797085762023926, "learning_rate": 1.9955374474749615e-05, "loss": 2.1714, "step": 1805 }, { "epoch": 5.921311475409836, "grad_norm": 8.544628143310547, "learning_rate": 1.9955274211194317e-05, "loss": 2.2021, "step": 1806 }, { "epoch": 5.924590163934426, "grad_norm": 6.147254467010498, "learning_rate": 1.9955173835382952e-05, "loss": 1.937, "step": 1807 }, { "epoch": 5.927868852459016, "grad_norm": 6.395956039428711, "learning_rate": 1.9955073347316652e-05, "loss": 2.0225, "step": 1808 }, { "epoch": 5.9311475409836065, "grad_norm": 19.770614624023438, "learning_rate": 1.9954972746996545e-05, "loss": 2.1855, "step": 1809 }, { "epoch": 5.934426229508197, "grad_norm": 9.241229057312012, "learning_rate": 1.9954872034423776e-05, "loss": 2.0693, "step": 1810 }, { "epoch": 5.937704918032787, "grad_norm": 13.29813289642334, "learning_rate": 1.9954771209599473e-05, "loss": 2.1733, "step": 1811 }, { "epoch": 5.940983606557377, "grad_norm": 5.975359916687012, "learning_rate": 1.995467027252477e-05, "loss": 2.0864, "step": 1812 }, { "epoch": 5.944262295081967, "grad_norm": 6.646712303161621, "learning_rate": 1.9954569223200814e-05, "loss": 2.2515, "step": 1813 }, { "epoch": 5.947540983606557, "grad_norm": 6.435547351837158, "learning_rate": 1.9954468061628738e-05, "loss": 1.9375, "step": 1814 }, { "epoch": 5.950819672131147, "grad_norm": 13.351332664489746, "learning_rate": 1.9954366787809685e-05, "loss": 2.1001, "step": 1815 }, { "epoch": 5.954098360655737, "grad_norm": 10.894767761230469, "learning_rate": 1.9954265401744797e-05, "loss": 2.0557, "step": 1816 }, { "epoch": 5.9573770491803275, "grad_norm": 5.856509208679199, "learning_rate": 1.9954163903435213e-05, "loss": 2.0928, "step": 1817 }, { "epoch": 5.9606557377049185, "grad_norm": 6.566301345825195, "learning_rate": 1.9954062292882088e-05, "loss": 1.9609, "step": 1818 }, { "epoch": 5.963934426229509, "grad_norm": 5.653203964233398, "learning_rate": 1.9953960570086553e-05, "loss": 2.189, "step": 1819 }, { "epoch": 5.967213114754099, "grad_norm": 5.961733818054199, "learning_rate": 1.9953858735049768e-05, "loss": 2.0361, "step": 1820 }, { "epoch": 5.970491803278689, "grad_norm": 6.40803861618042, "learning_rate": 1.9953756787772874e-05, "loss": 2.0908, "step": 1821 }, { "epoch": 5.973770491803279, "grad_norm": 10.233307838439941, "learning_rate": 1.995365472825703e-05, "loss": 2.2451, "step": 1822 }, { "epoch": 5.977049180327869, "grad_norm": 7.754739761352539, "learning_rate": 1.995355255650337e-05, "loss": 2.1167, "step": 1823 }, { "epoch": 5.980327868852459, "grad_norm": 9.831216812133789, "learning_rate": 1.9953450272513062e-05, "loss": 2.1187, "step": 1824 }, { "epoch": 5.983606557377049, "grad_norm": 7.565732955932617, "learning_rate": 1.9953347876287252e-05, "loss": 2.1138, "step": 1825 }, { "epoch": 5.9868852459016395, "grad_norm": 13.83617115020752, "learning_rate": 1.9953245367827094e-05, "loss": 1.8787, "step": 1826 }, { "epoch": 5.99016393442623, "grad_norm": 6.343388557434082, "learning_rate": 1.9953142747133748e-05, "loss": 2.2842, "step": 1827 }, { "epoch": 5.99344262295082, "grad_norm": 6.305782794952393, "learning_rate": 1.9953040014208367e-05, "loss": 2.041, "step": 1828 }, { "epoch": 5.99672131147541, "grad_norm": 7.097678184509277, "learning_rate": 1.995293716905211e-05, "loss": 1.9473, "step": 1829 }, { "epoch": 6.0, "grad_norm": 12.318808555603027, "learning_rate": 1.995283421166614e-05, "loss": 2.0342, "step": 1830 }, { "epoch": 6.00327868852459, "grad_norm": 6.558553218841553, "learning_rate": 1.9952731142051617e-05, "loss": 2.0327, "step": 1831 }, { "epoch": 6.00655737704918, "grad_norm": 7.348428726196289, "learning_rate": 1.99526279602097e-05, "loss": 1.9175, "step": 1832 }, { "epoch": 6.00983606557377, "grad_norm": 8.386190414428711, "learning_rate": 1.9952524666141558e-05, "loss": 1.9272, "step": 1833 }, { "epoch": 6.0131147540983605, "grad_norm": 7.8465094566345215, "learning_rate": 1.995242125984835e-05, "loss": 1.7578, "step": 1834 }, { "epoch": 6.016393442622951, "grad_norm": 6.294727802276611, "learning_rate": 1.9952317741331244e-05, "loss": 1.9663, "step": 1835 }, { "epoch": 6.019672131147541, "grad_norm": 7.908141613006592, "learning_rate": 1.9952214110591407e-05, "loss": 2.354, "step": 1836 }, { "epoch": 6.022950819672131, "grad_norm": 9.3822660446167, "learning_rate": 1.9952110367630008e-05, "loss": 1.9927, "step": 1837 }, { "epoch": 6.026229508196721, "grad_norm": 8.508171081542969, "learning_rate": 1.995200651244822e-05, "loss": 1.896, "step": 1838 }, { "epoch": 6.029508196721311, "grad_norm": 10.43674373626709, "learning_rate": 1.995190254504721e-05, "loss": 1.8706, "step": 1839 }, { "epoch": 6.032786885245901, "grad_norm": 8.23437213897705, "learning_rate": 1.995179846542815e-05, "loss": 2.0632, "step": 1840 }, { "epoch": 6.036065573770492, "grad_norm": 6.555308818817139, "learning_rate": 1.9951694273592216e-05, "loss": 1.897, "step": 1841 }, { "epoch": 6.039344262295082, "grad_norm": 7.118055820465088, "learning_rate": 1.995158996954058e-05, "loss": 1.9736, "step": 1842 }, { "epoch": 6.0426229508196725, "grad_norm": 6.722135066986084, "learning_rate": 1.9951485553274422e-05, "loss": 1.8457, "step": 1843 }, { "epoch": 6.045901639344263, "grad_norm": 5.587881088256836, "learning_rate": 1.9951381024794916e-05, "loss": 2.1167, "step": 1844 }, { "epoch": 6.049180327868853, "grad_norm": 7.702302932739258, "learning_rate": 1.9951276384103245e-05, "loss": 1.8853, "step": 1845 }, { "epoch": 6.052459016393443, "grad_norm": 9.983460426330566, "learning_rate": 1.9951171631200584e-05, "loss": 1.9785, "step": 1846 }, { "epoch": 6.055737704918033, "grad_norm": 5.557128429412842, "learning_rate": 1.9951066766088117e-05, "loss": 1.8191, "step": 1847 }, { "epoch": 6.059016393442623, "grad_norm": 6.651610851287842, "learning_rate": 1.9950961788767024e-05, "loss": 1.8081, "step": 1848 }, { "epoch": 6.062295081967213, "grad_norm": 11.051244735717773, "learning_rate": 1.995085669923849e-05, "loss": 1.9722, "step": 1849 }, { "epoch": 6.065573770491803, "grad_norm": 5.974552631378174, "learning_rate": 1.9950751497503705e-05, "loss": 2.02, "step": 1850 }, { "epoch": 6.0688524590163935, "grad_norm": 16.24712562561035, "learning_rate": 1.9950646183563848e-05, "loss": 1.8547, "step": 1851 }, { "epoch": 6.072131147540984, "grad_norm": 10.045361518859863, "learning_rate": 1.9950540757420108e-05, "loss": 2.2422, "step": 1852 }, { "epoch": 6.075409836065574, "grad_norm": 6.0629191398620605, "learning_rate": 1.9950435219073674e-05, "loss": 1.9465, "step": 1853 }, { "epoch": 6.078688524590164, "grad_norm": 5.974918365478516, "learning_rate": 1.995032956852574e-05, "loss": 2.23, "step": 1854 }, { "epoch": 6.081967213114754, "grad_norm": 7.835793972015381, "learning_rate": 1.995022380577749e-05, "loss": 1.9629, "step": 1855 }, { "epoch": 6.085245901639344, "grad_norm": 8.867568969726562, "learning_rate": 1.9950117930830127e-05, "loss": 1.979, "step": 1856 }, { "epoch": 6.088524590163934, "grad_norm": 7.512955665588379, "learning_rate": 1.9950011943684835e-05, "loss": 2.1191, "step": 1857 }, { "epoch": 6.091803278688524, "grad_norm": 8.005187034606934, "learning_rate": 1.9949905844342815e-05, "loss": 1.9272, "step": 1858 }, { "epoch": 6.0950819672131145, "grad_norm": 6.3680195808410645, "learning_rate": 1.994979963280526e-05, "loss": 2.1396, "step": 1859 }, { "epoch": 6.098360655737705, "grad_norm": 9.958282470703125, "learning_rate": 1.9949693309073368e-05, "loss": 1.7468, "step": 1860 }, { "epoch": 6.101639344262295, "grad_norm": 7.978207588195801, "learning_rate": 1.994958687314834e-05, "loss": 1.9785, "step": 1861 }, { "epoch": 6.104918032786885, "grad_norm": 8.304567337036133, "learning_rate": 1.9949480325031375e-05, "loss": 1.7773, "step": 1862 }, { "epoch": 6.108196721311476, "grad_norm": 5.332719802856445, "learning_rate": 1.9949373664723676e-05, "loss": 1.8291, "step": 1863 }, { "epoch": 6.111475409836066, "grad_norm": 6.984309196472168, "learning_rate": 1.994926689222644e-05, "loss": 1.8362, "step": 1864 }, { "epoch": 6.114754098360656, "grad_norm": 8.096719741821289, "learning_rate": 1.994916000754088e-05, "loss": 2.0474, "step": 1865 }, { "epoch": 6.118032786885246, "grad_norm": 8.322274208068848, "learning_rate": 1.9949053010668194e-05, "loss": 2.0708, "step": 1866 }, { "epoch": 6.121311475409836, "grad_norm": 7.738406181335449, "learning_rate": 1.994894590160959e-05, "loss": 1.813, "step": 1867 }, { "epoch": 6.1245901639344265, "grad_norm": 8.461481094360352, "learning_rate": 1.9948838680366277e-05, "loss": 2.0598, "step": 1868 }, { "epoch": 6.127868852459017, "grad_norm": 7.824894905090332, "learning_rate": 1.9948731346939466e-05, "loss": 1.8838, "step": 1869 }, { "epoch": 6.131147540983607, "grad_norm": 7.37776517868042, "learning_rate": 1.994862390133036e-05, "loss": 1.9985, "step": 1870 }, { "epoch": 6.134426229508197, "grad_norm": 8.200847625732422, "learning_rate": 1.994851634354018e-05, "loss": 1.9937, "step": 1871 }, { "epoch": 6.137704918032787, "grad_norm": 5.951084613800049, "learning_rate": 1.9948408673570133e-05, "loss": 1.9429, "step": 1872 }, { "epoch": 6.140983606557377, "grad_norm": 7.06359338760376, "learning_rate": 1.9948300891421438e-05, "loss": 2.0698, "step": 1873 }, { "epoch": 6.144262295081967, "grad_norm": 5.338343143463135, "learning_rate": 1.9948192997095305e-05, "loss": 2.0005, "step": 1874 }, { "epoch": 6.147540983606557, "grad_norm": 5.013659477233887, "learning_rate": 1.994808499059295e-05, "loss": 1.9219, "step": 1875 }, { "epoch": 6.150819672131147, "grad_norm": 10.810327529907227, "learning_rate": 1.994797687191559e-05, "loss": 1.9941, "step": 1876 }, { "epoch": 6.154098360655738, "grad_norm": 8.785638809204102, "learning_rate": 1.9947868641064453e-05, "loss": 2.0674, "step": 1877 }, { "epoch": 6.157377049180328, "grad_norm": 9.364213943481445, "learning_rate": 1.9947760298040753e-05, "loss": 1.8521, "step": 1878 }, { "epoch": 6.160655737704918, "grad_norm": 7.174406051635742, "learning_rate": 1.9947651842845716e-05, "loss": 2.1182, "step": 1879 }, { "epoch": 6.163934426229508, "grad_norm": 5.969959259033203, "learning_rate": 1.9947543275480555e-05, "loss": 1.8086, "step": 1880 }, { "epoch": 6.167213114754098, "grad_norm": 8.826691627502441, "learning_rate": 1.99474345959465e-05, "loss": 1.7378, "step": 1881 }, { "epoch": 6.170491803278688, "grad_norm": 9.332420349121094, "learning_rate": 1.9947325804244783e-05, "loss": 2.2363, "step": 1882 }, { "epoch": 6.173770491803278, "grad_norm": 6.152048110961914, "learning_rate": 1.9947216900376624e-05, "loss": 1.9453, "step": 1883 }, { "epoch": 6.177049180327868, "grad_norm": 10.125072479248047, "learning_rate": 1.994710788434325e-05, "loss": 2.0986, "step": 1884 }, { "epoch": 6.180327868852459, "grad_norm": 6.432436943054199, "learning_rate": 1.9946998756145894e-05, "loss": 2.1436, "step": 1885 }, { "epoch": 6.18360655737705, "grad_norm": 9.166114807128906, "learning_rate": 1.9946889515785782e-05, "loss": 2.2334, "step": 1886 }, { "epoch": 6.18688524590164, "grad_norm": 7.708532810211182, "learning_rate": 1.994678016326415e-05, "loss": 1.9607, "step": 1887 }, { "epoch": 6.19016393442623, "grad_norm": 7.527921199798584, "learning_rate": 1.994667069858223e-05, "loss": 1.9778, "step": 1888 }, { "epoch": 6.19344262295082, "grad_norm": 8.370064735412598, "learning_rate": 1.9946561121741253e-05, "loss": 1.9966, "step": 1889 }, { "epoch": 6.19672131147541, "grad_norm": 9.99727725982666, "learning_rate": 1.994645143274246e-05, "loss": 2.1016, "step": 1890 }, { "epoch": 6.2, "grad_norm": 6.701849937438965, "learning_rate": 1.9946341631587086e-05, "loss": 1.9849, "step": 1891 }, { "epoch": 6.20327868852459, "grad_norm": 8.742341995239258, "learning_rate": 1.994623171827637e-05, "loss": 2.022, "step": 1892 }, { "epoch": 6.20655737704918, "grad_norm": 7.427040100097656, "learning_rate": 1.9946121692811547e-05, "loss": 1.9482, "step": 1893 }, { "epoch": 6.2098360655737705, "grad_norm": 8.141646385192871, "learning_rate": 1.994601155519386e-05, "loss": 2.0369, "step": 1894 }, { "epoch": 6.213114754098361, "grad_norm": 8.38252067565918, "learning_rate": 1.9945901305424554e-05, "loss": 2.1035, "step": 1895 }, { "epoch": 6.216393442622951, "grad_norm": 7.383883476257324, "learning_rate": 1.9945790943504868e-05, "loss": 2.0908, "step": 1896 }, { "epoch": 6.219672131147541, "grad_norm": 6.441706657409668, "learning_rate": 1.9945680469436047e-05, "loss": 2.1724, "step": 1897 }, { "epoch": 6.222950819672131, "grad_norm": 9.686681747436523, "learning_rate": 1.994556988321934e-05, "loss": 1.9106, "step": 1898 }, { "epoch": 6.226229508196721, "grad_norm": 19.529504776000977, "learning_rate": 1.9945459184855995e-05, "loss": 2.1201, "step": 1899 }, { "epoch": 6.229508196721311, "grad_norm": 6.604842185974121, "learning_rate": 1.994534837434725e-05, "loss": 1.9663, "step": 1900 }, { "epoch": 6.232786885245901, "grad_norm": 6.959726333618164, "learning_rate": 1.9945237451694364e-05, "loss": 1.9438, "step": 1901 }, { "epoch": 6.2360655737704915, "grad_norm": 9.560462951660156, "learning_rate": 1.9945126416898586e-05, "loss": 1.8601, "step": 1902 }, { "epoch": 6.239344262295082, "grad_norm": 10.875850677490234, "learning_rate": 1.9945015269961168e-05, "loss": 1.7783, "step": 1903 }, { "epoch": 6.242622950819672, "grad_norm": 8.428069114685059, "learning_rate": 1.994490401088336e-05, "loss": 1.8877, "step": 1904 }, { "epoch": 6.245901639344262, "grad_norm": 6.76967716217041, "learning_rate": 1.994479263966642e-05, "loss": 2.0889, "step": 1905 }, { "epoch": 6.249180327868853, "grad_norm": 6.257287502288818, "learning_rate": 1.9944681156311606e-05, "loss": 1.98, "step": 1906 }, { "epoch": 6.252459016393443, "grad_norm": 6.654535293579102, "learning_rate": 1.994456956082017e-05, "loss": 1.9377, "step": 1907 }, { "epoch": 6.255737704918033, "grad_norm": 7.113922595977783, "learning_rate": 1.994445785319337e-05, "loss": 1.9824, "step": 1908 }, { "epoch": 6.259016393442623, "grad_norm": 6.2040510177612305, "learning_rate": 1.9944346033432472e-05, "loss": 2.02, "step": 1909 }, { "epoch": 6.262295081967213, "grad_norm": 7.061152935028076, "learning_rate": 1.994423410153873e-05, "loss": 2.1924, "step": 1910 }, { "epoch": 6.2655737704918035, "grad_norm": 7.219064712524414, "learning_rate": 1.9944122057513413e-05, "loss": 2.0579, "step": 1911 }, { "epoch": 6.268852459016394, "grad_norm": 9.841728210449219, "learning_rate": 1.9944009901357777e-05, "loss": 2.0027, "step": 1912 }, { "epoch": 6.272131147540984, "grad_norm": 5.834028720855713, "learning_rate": 1.994389763307309e-05, "loss": 2.0669, "step": 1913 }, { "epoch": 6.275409836065574, "grad_norm": 5.805506706237793, "learning_rate": 1.994378525266062e-05, "loss": 1.9546, "step": 1914 }, { "epoch": 6.278688524590164, "grad_norm": 10.84541130065918, "learning_rate": 1.9943672760121634e-05, "loss": 1.9888, "step": 1915 }, { "epoch": 6.281967213114754, "grad_norm": 5.7950286865234375, "learning_rate": 1.9943560155457394e-05, "loss": 1.7771, "step": 1916 }, { "epoch": 6.285245901639344, "grad_norm": 6.3489203453063965, "learning_rate": 1.994344743866918e-05, "loss": 1.9033, "step": 1917 }, { "epoch": 6.288524590163934, "grad_norm": 20.929588317871094, "learning_rate": 1.9943334609758255e-05, "loss": 1.937, "step": 1918 }, { "epoch": 6.2918032786885245, "grad_norm": 8.509903907775879, "learning_rate": 1.994322166872589e-05, "loss": 2.0806, "step": 1919 }, { "epoch": 6.295081967213115, "grad_norm": 6.061761379241943, "learning_rate": 1.994310861557337e-05, "loss": 2.0444, "step": 1920 }, { "epoch": 6.298360655737705, "grad_norm": 13.057917594909668, "learning_rate": 1.9942995450301958e-05, "loss": 2.293, "step": 1921 }, { "epoch": 6.301639344262295, "grad_norm": 8.824196815490723, "learning_rate": 1.9942882172912932e-05, "loss": 1.9597, "step": 1922 }, { "epoch": 6.304918032786885, "grad_norm": 7.181890487670898, "learning_rate": 1.9942768783407573e-05, "loss": 1.9802, "step": 1923 }, { "epoch": 6.308196721311475, "grad_norm": 7.833621978759766, "learning_rate": 1.9942655281787158e-05, "loss": 1.9736, "step": 1924 }, { "epoch": 6.311475409836065, "grad_norm": 7.122422218322754, "learning_rate": 1.9942541668052968e-05, "loss": 1.9561, "step": 1925 }, { "epoch": 6.314754098360655, "grad_norm": 8.249268531799316, "learning_rate": 1.9942427942206282e-05, "loss": 1.9849, "step": 1926 }, { "epoch": 6.3180327868852455, "grad_norm": 8.303862571716309, "learning_rate": 1.9942314104248382e-05, "loss": 2.1323, "step": 1927 }, { "epoch": 6.321311475409836, "grad_norm": 10.061029434204102, "learning_rate": 1.994220015418056e-05, "loss": 2.0127, "step": 1928 }, { "epoch": 6.324590163934427, "grad_norm": 7.068620204925537, "learning_rate": 1.9942086092004084e-05, "loss": 2.2612, "step": 1929 }, { "epoch": 6.327868852459017, "grad_norm": 11.930177688598633, "learning_rate": 1.9941971917720256e-05, "loss": 2.0737, "step": 1930 }, { "epoch": 6.331147540983607, "grad_norm": 7.956937789916992, "learning_rate": 1.9941857631330358e-05, "loss": 1.8765, "step": 1931 }, { "epoch": 6.334426229508197, "grad_norm": 7.397496223449707, "learning_rate": 1.9941743232835676e-05, "loss": 1.9897, "step": 1932 }, { "epoch": 6.337704918032787, "grad_norm": 13.898258209228516, "learning_rate": 1.9941628722237505e-05, "loss": 2.0854, "step": 1933 }, { "epoch": 6.340983606557377, "grad_norm": 7.674834251403809, "learning_rate": 1.994151409953713e-05, "loss": 1.8213, "step": 1934 }, { "epoch": 6.344262295081967, "grad_norm": 8.970247268676758, "learning_rate": 1.994139936473585e-05, "loss": 2.1621, "step": 1935 }, { "epoch": 6.3475409836065575, "grad_norm": 12.470614433288574, "learning_rate": 1.9941284517834952e-05, "loss": 2.0044, "step": 1936 }, { "epoch": 6.350819672131148, "grad_norm": 8.784823417663574, "learning_rate": 1.9941169558835737e-05, "loss": 1.9839, "step": 1937 }, { "epoch": 6.354098360655738, "grad_norm": 5.662154674530029, "learning_rate": 1.99410544877395e-05, "loss": 1.8921, "step": 1938 }, { "epoch": 6.357377049180328, "grad_norm": 6.332821846008301, "learning_rate": 1.9940939304547536e-05, "loss": 2.002, "step": 1939 }, { "epoch": 6.360655737704918, "grad_norm": 6.396687030792236, "learning_rate": 1.994082400926115e-05, "loss": 1.9746, "step": 1940 }, { "epoch": 6.363934426229508, "grad_norm": 7.268651485443115, "learning_rate": 1.9940708601881628e-05, "loss": 1.9448, "step": 1941 }, { "epoch": 6.367213114754098, "grad_norm": 9.650667190551758, "learning_rate": 1.994059308241029e-05, "loss": 1.9014, "step": 1942 }, { "epoch": 6.370491803278688, "grad_norm": 7.197697162628174, "learning_rate": 1.9940477450848425e-05, "loss": 1.9336, "step": 1943 }, { "epoch": 6.3737704918032785, "grad_norm": 9.312577247619629, "learning_rate": 1.994036170719734e-05, "loss": 1.9385, "step": 1944 }, { "epoch": 6.377049180327869, "grad_norm": 7.2929816246032715, "learning_rate": 1.994024585145834e-05, "loss": 1.9966, "step": 1945 }, { "epoch": 6.380327868852459, "grad_norm": 8.519113540649414, "learning_rate": 1.9940129883632738e-05, "loss": 2.0317, "step": 1946 }, { "epoch": 6.383606557377049, "grad_norm": 21.57369613647461, "learning_rate": 1.9940013803721836e-05, "loss": 1.9419, "step": 1947 }, { "epoch": 6.386885245901639, "grad_norm": 6.135921955108643, "learning_rate": 1.993989761172694e-05, "loss": 1.9199, "step": 1948 }, { "epoch": 6.390163934426229, "grad_norm": 7.973226547241211, "learning_rate": 1.9939781307649366e-05, "loss": 1.9185, "step": 1949 }, { "epoch": 6.39344262295082, "grad_norm": 13.320082664489746, "learning_rate": 1.9939664891490423e-05, "loss": 2.1372, "step": 1950 }, { "epoch": 6.39672131147541, "grad_norm": 8.274187088012695, "learning_rate": 1.9939548363251424e-05, "loss": 1.8252, "step": 1951 }, { "epoch": 6.4, "grad_norm": 12.392560958862305, "learning_rate": 1.9939431722933678e-05, "loss": 1.9868, "step": 1952 }, { "epoch": 6.4032786885245905, "grad_norm": 9.695039749145508, "learning_rate": 1.9939314970538512e-05, "loss": 1.9717, "step": 1953 }, { "epoch": 6.406557377049181, "grad_norm": 11.198650360107422, "learning_rate": 1.993919810606723e-05, "loss": 1.9536, "step": 1954 }, { "epoch": 6.409836065573771, "grad_norm": 10.768159866333008, "learning_rate": 1.9939081129521155e-05, "loss": 1.7021, "step": 1955 }, { "epoch": 6.413114754098361, "grad_norm": 7.814785480499268, "learning_rate": 1.993896404090161e-05, "loss": 1.8071, "step": 1956 }, { "epoch": 6.416393442622951, "grad_norm": 13.315463066101074, "learning_rate": 1.993884684020991e-05, "loss": 2.188, "step": 1957 }, { "epoch": 6.419672131147541, "grad_norm": 8.539875984191895, "learning_rate": 1.9938729527447374e-05, "loss": 1.9321, "step": 1958 }, { "epoch": 6.422950819672131, "grad_norm": 7.224980354309082, "learning_rate": 1.9938612102615336e-05, "loss": 2.1328, "step": 1959 }, { "epoch": 6.426229508196721, "grad_norm": 7.661697864532471, "learning_rate": 1.993849456571511e-05, "loss": 1.9207, "step": 1960 }, { "epoch": 6.4295081967213115, "grad_norm": 12.735052108764648, "learning_rate": 1.9938376916748024e-05, "loss": 1.873, "step": 1961 }, { "epoch": 6.432786885245902, "grad_norm": 7.927979946136475, "learning_rate": 1.9938259155715403e-05, "loss": 1.9961, "step": 1962 }, { "epoch": 6.436065573770492, "grad_norm": 13.142711639404297, "learning_rate": 1.993814128261858e-05, "loss": 2.041, "step": 1963 }, { "epoch": 6.439344262295082, "grad_norm": 6.772900581359863, "learning_rate": 1.993802329745888e-05, "loss": 1.8569, "step": 1964 }, { "epoch": 6.442622950819672, "grad_norm": 9.893248558044434, "learning_rate": 1.993790520023763e-05, "loss": 2.1226, "step": 1965 }, { "epoch": 6.445901639344262, "grad_norm": 5.401491165161133, "learning_rate": 1.9937786990956174e-05, "loss": 1.8774, "step": 1966 }, { "epoch": 6.449180327868852, "grad_norm": 6.3439741134643555, "learning_rate": 1.993766866961583e-05, "loss": 1.8877, "step": 1967 }, { "epoch": 6.452459016393442, "grad_norm": 6.2329301834106445, "learning_rate": 1.9937550236217943e-05, "loss": 1.9536, "step": 1968 }, { "epoch": 6.4557377049180324, "grad_norm": 8.495580673217773, "learning_rate": 1.9937431690763844e-05, "loss": 2.0527, "step": 1969 }, { "epoch": 6.459016393442623, "grad_norm": 8.048564910888672, "learning_rate": 1.993731303325487e-05, "loss": 1.7815, "step": 1970 }, { "epoch": 6.462295081967213, "grad_norm": 9.360533714294434, "learning_rate": 1.993719426369236e-05, "loss": 1.835, "step": 1971 }, { "epoch": 6.465573770491803, "grad_norm": 7.921350479125977, "learning_rate": 1.9937075382077654e-05, "loss": 2.0688, "step": 1972 }, { "epoch": 6.468852459016394, "grad_norm": 6.609210968017578, "learning_rate": 1.9936956388412086e-05, "loss": 1.98, "step": 1973 }, { "epoch": 6.472131147540984, "grad_norm": 8.230547904968262, "learning_rate": 1.993683728269701e-05, "loss": 1.9985, "step": 1974 }, { "epoch": 6.475409836065574, "grad_norm": 7.618993282318115, "learning_rate": 1.9936718064933757e-05, "loss": 1.8853, "step": 1975 }, { "epoch": 6.478688524590164, "grad_norm": 6.12484884262085, "learning_rate": 1.9936598735123675e-05, "loss": 1.9214, "step": 1976 }, { "epoch": 6.481967213114754, "grad_norm": 12.90139389038086, "learning_rate": 1.9936479293268113e-05, "loss": 1.8899, "step": 1977 }, { "epoch": 6.4852459016393444, "grad_norm": 7.7440505027771, "learning_rate": 1.9936359739368418e-05, "loss": 1.9087, "step": 1978 }, { "epoch": 6.488524590163935, "grad_norm": 6.262972831726074, "learning_rate": 1.9936240073425932e-05, "loss": 1.7786, "step": 1979 }, { "epoch": 6.491803278688525, "grad_norm": 18.56094741821289, "learning_rate": 1.993612029544201e-05, "loss": 1.998, "step": 1980 }, { "epoch": 6.495081967213115, "grad_norm": 9.762199401855469, "learning_rate": 1.9936000405418e-05, "loss": 1.9316, "step": 1981 }, { "epoch": 6.498360655737705, "grad_norm": 10.070526123046875, "learning_rate": 1.9935880403355255e-05, "loss": 1.9644, "step": 1982 }, { "epoch": 6.501639344262295, "grad_norm": 6.772207736968994, "learning_rate": 1.9935760289255125e-05, "loss": 2.0381, "step": 1983 }, { "epoch": 6.504918032786885, "grad_norm": 6.401250839233398, "learning_rate": 1.993564006311897e-05, "loss": 2.1426, "step": 1984 }, { "epoch": 6.508196721311475, "grad_norm": 10.064623832702637, "learning_rate": 1.993551972494814e-05, "loss": 2.1182, "step": 1985 }, { "epoch": 6.511475409836065, "grad_norm": 8.563838958740234, "learning_rate": 1.9935399274744e-05, "loss": 2.1606, "step": 1986 }, { "epoch": 6.5147540983606556, "grad_norm": 6.99924373626709, "learning_rate": 1.99352787125079e-05, "loss": 1.9775, "step": 1987 }, { "epoch": 6.518032786885246, "grad_norm": 7.512824058532715, "learning_rate": 1.9935158038241203e-05, "loss": 2.1045, "step": 1988 }, { "epoch": 6.521311475409836, "grad_norm": 11.189863204956055, "learning_rate": 1.9935037251945267e-05, "loss": 1.8979, "step": 1989 }, { "epoch": 6.524590163934426, "grad_norm": 7.238169193267822, "learning_rate": 1.9934916353621458e-05, "loss": 2.0742, "step": 1990 }, { "epoch": 6.527868852459016, "grad_norm": 6.609234809875488, "learning_rate": 1.9934795343271138e-05, "loss": 1.8875, "step": 1991 }, { "epoch": 6.531147540983606, "grad_norm": 6.577526569366455, "learning_rate": 1.993467422089567e-05, "loss": 2.0557, "step": 1992 }, { "epoch": 6.534426229508197, "grad_norm": 8.231047630310059, "learning_rate": 1.993455298649642e-05, "loss": 1.7109, "step": 1993 }, { "epoch": 6.537704918032787, "grad_norm": 7.0251288414001465, "learning_rate": 1.993443164007476e-05, "loss": 2.0933, "step": 1994 }, { "epoch": 6.540983606557377, "grad_norm": 6.30311918258667, "learning_rate": 1.9934310181632047e-05, "loss": 2.1221, "step": 1995 }, { "epoch": 6.5442622950819676, "grad_norm": 8.91756534576416, "learning_rate": 1.9934188611169664e-05, "loss": 1.9021, "step": 1996 }, { "epoch": 6.547540983606558, "grad_norm": 9.103594779968262, "learning_rate": 1.993406692868897e-05, "loss": 1.8923, "step": 1997 }, { "epoch": 6.550819672131148, "grad_norm": 5.986033916473389, "learning_rate": 1.9933945134191346e-05, "loss": 2.0566, "step": 1998 }, { "epoch": 6.554098360655738, "grad_norm": 7.205105781555176, "learning_rate": 1.9933823227678162e-05, "loss": 1.9155, "step": 1999 }, { "epoch": 6.557377049180328, "grad_norm": 9.728691101074219, "learning_rate": 1.993370120915079e-05, "loss": 2.1479, "step": 2000 }, { "epoch": 6.560655737704918, "grad_norm": 7.874690532684326, "learning_rate": 1.993357907861061e-05, "loss": 1.9878, "step": 2001 }, { "epoch": 6.563934426229508, "grad_norm": 8.549609184265137, "learning_rate": 1.9933456836058996e-05, "loss": 2.1055, "step": 2002 }, { "epoch": 6.567213114754098, "grad_norm": 8.414835929870605, "learning_rate": 1.993333448149733e-05, "loss": 1.9644, "step": 2003 }, { "epoch": 6.5704918032786885, "grad_norm": 10.004170417785645, "learning_rate": 1.993321201492699e-05, "loss": 1.958, "step": 2004 }, { "epoch": 6.573770491803279, "grad_norm": 7.630732536315918, "learning_rate": 1.9933089436349355e-05, "loss": 2.0525, "step": 2005 }, { "epoch": 6.577049180327869, "grad_norm": 8.982572555541992, "learning_rate": 1.993296674576581e-05, "loss": 1.9338, "step": 2006 }, { "epoch": 6.580327868852459, "grad_norm": 7.489508628845215, "learning_rate": 1.9932843943177737e-05, "loss": 2.0659, "step": 2007 }, { "epoch": 6.583606557377049, "grad_norm": 7.275721549987793, "learning_rate": 1.9932721028586522e-05, "loss": 1.9229, "step": 2008 }, { "epoch": 6.586885245901639, "grad_norm": 6.271424293518066, "learning_rate": 1.9932598001993547e-05, "loss": 1.8662, "step": 2009 }, { "epoch": 6.590163934426229, "grad_norm": 7.392149925231934, "learning_rate": 1.9932474863400204e-05, "loss": 1.7417, "step": 2010 }, { "epoch": 6.593442622950819, "grad_norm": 7.207428455352783, "learning_rate": 1.9932351612807878e-05, "loss": 1.874, "step": 2011 }, { "epoch": 6.5967213114754095, "grad_norm": 8.98928451538086, "learning_rate": 1.9932228250217963e-05, "loss": 2.0845, "step": 2012 }, { "epoch": 6.6, "grad_norm": 7.459725379943848, "learning_rate": 1.9932104775631847e-05, "loss": 1.9062, "step": 2013 }, { "epoch": 6.60327868852459, "grad_norm": 5.644625663757324, "learning_rate": 1.9931981189050923e-05, "loss": 1.8633, "step": 2014 }, { "epoch": 6.60655737704918, "grad_norm": 7.584081649780273, "learning_rate": 1.9931857490476583e-05, "loss": 1.9404, "step": 2015 }, { "epoch": 6.60983606557377, "grad_norm": 11.334716796875, "learning_rate": 1.9931733679910224e-05, "loss": 1.7485, "step": 2016 }, { "epoch": 6.613114754098361, "grad_norm": 5.598092079162598, "learning_rate": 1.9931609757353245e-05, "loss": 1.8608, "step": 2017 }, { "epoch": 6.616393442622951, "grad_norm": 7.780254364013672, "learning_rate": 1.9931485722807034e-05, "loss": 1.8989, "step": 2018 }, { "epoch": 6.619672131147541, "grad_norm": 10.134251594543457, "learning_rate": 1.9931361576272998e-05, "loss": 1.7983, "step": 2019 }, { "epoch": 6.622950819672131, "grad_norm": 7.539371967315674, "learning_rate": 1.9931237317752532e-05, "loss": 1.748, "step": 2020 }, { "epoch": 6.6262295081967215, "grad_norm": 12.710543632507324, "learning_rate": 1.993111294724704e-05, "loss": 2.011, "step": 2021 }, { "epoch": 6.629508196721312, "grad_norm": 9.324592590332031, "learning_rate": 1.9930988464757925e-05, "loss": 1.8838, "step": 2022 }, { "epoch": 6.632786885245902, "grad_norm": 7.056035041809082, "learning_rate": 1.9930863870286588e-05, "loss": 1.801, "step": 2023 }, { "epoch": 6.636065573770492, "grad_norm": 9.089774131774902, "learning_rate": 1.993073916383444e-05, "loss": 1.8281, "step": 2024 }, { "epoch": 6.639344262295082, "grad_norm": 5.393138408660889, "learning_rate": 1.993061434540288e-05, "loss": 1.8894, "step": 2025 }, { "epoch": 6.642622950819672, "grad_norm": 5.523228168487549, "learning_rate": 1.9930489414993315e-05, "loss": 1.9727, "step": 2026 }, { "epoch": 6.645901639344262, "grad_norm": 6.202809810638428, "learning_rate": 1.9930364372607157e-05, "loss": 1.9399, "step": 2027 }, { "epoch": 6.649180327868852, "grad_norm": 20.36492347717285, "learning_rate": 1.993023921824582e-05, "loss": 1.8867, "step": 2028 }, { "epoch": 6.6524590163934425, "grad_norm": 5.0898261070251465, "learning_rate": 1.9930113951910706e-05, "loss": 1.9221, "step": 2029 }, { "epoch": 6.655737704918033, "grad_norm": 7.468288898468018, "learning_rate": 1.992998857360324e-05, "loss": 1.8252, "step": 2030 }, { "epoch": 6.659016393442623, "grad_norm": 11.104262351989746, "learning_rate": 1.9929863083324822e-05, "loss": 2.343, "step": 2031 }, { "epoch": 6.662295081967213, "grad_norm": 9.610032081604004, "learning_rate": 1.9929737481076873e-05, "loss": 2.1997, "step": 2032 }, { "epoch": 6.665573770491803, "grad_norm": 8.397183418273926, "learning_rate": 1.992961176686081e-05, "loss": 1.998, "step": 2033 }, { "epoch": 6.668852459016393, "grad_norm": 7.203337669372559, "learning_rate": 1.9929485940678052e-05, "loss": 1.8901, "step": 2034 }, { "epoch": 6.672131147540983, "grad_norm": 6.840331077575684, "learning_rate": 1.9929360002530016e-05, "loss": 1.9849, "step": 2035 }, { "epoch": 6.675409836065574, "grad_norm": 5.829222679138184, "learning_rate": 1.9929233952418123e-05, "loss": 1.78, "step": 2036 }, { "epoch": 6.678688524590164, "grad_norm": 7.366125583648682, "learning_rate": 1.9929107790343788e-05, "loss": 1.8027, "step": 2037 }, { "epoch": 6.6819672131147545, "grad_norm": 6.737093925476074, "learning_rate": 1.9928981516308445e-05, "loss": 1.8821, "step": 2038 }, { "epoch": 6.685245901639345, "grad_norm": 5.544009685516357, "learning_rate": 1.9928855130313507e-05, "loss": 2.2324, "step": 2039 }, { "epoch": 6.688524590163935, "grad_norm": 8.71578598022461, "learning_rate": 1.9928728632360407e-05, "loss": 2.2471, "step": 2040 }, { "epoch": 6.691803278688525, "grad_norm": 7.014872074127197, "learning_rate": 1.9928602022450567e-05, "loss": 1.8013, "step": 2041 }, { "epoch": 6.695081967213115, "grad_norm": 6.229204177856445, "learning_rate": 1.9928475300585415e-05, "loss": 1.9392, "step": 2042 }, { "epoch": 6.698360655737705, "grad_norm": 8.708298683166504, "learning_rate": 1.9928348466766384e-05, "loss": 2.0938, "step": 2043 }, { "epoch": 6.701639344262295, "grad_norm": 5.64739990234375, "learning_rate": 1.9928221520994903e-05, "loss": 1.9055, "step": 2044 }, { "epoch": 6.704918032786885, "grad_norm": 5.832401275634766, "learning_rate": 1.9928094463272395e-05, "loss": 1.8076, "step": 2045 }, { "epoch": 6.7081967213114755, "grad_norm": 6.921487331390381, "learning_rate": 1.9927967293600304e-05, "loss": 1.9121, "step": 2046 }, { "epoch": 6.711475409836066, "grad_norm": 6.874172687530518, "learning_rate": 1.9927840011980057e-05, "loss": 2.0464, "step": 2047 }, { "epoch": 6.714754098360656, "grad_norm": 16.691450119018555, "learning_rate": 1.992771261841309e-05, "loss": 2.0244, "step": 2048 }, { "epoch": 6.718032786885246, "grad_norm": 7.107603549957275, "learning_rate": 1.9927585112900846e-05, "loss": 1.8994, "step": 2049 }, { "epoch": 6.721311475409836, "grad_norm": 7.292140007019043, "learning_rate": 1.9927457495444757e-05, "loss": 2.1138, "step": 2050 }, { "epoch": 6.724590163934426, "grad_norm": 6.734272480010986, "learning_rate": 1.992732976604626e-05, "loss": 2.0752, "step": 2051 }, { "epoch": 6.727868852459016, "grad_norm": 20.784631729125977, "learning_rate": 1.9927201924706798e-05, "loss": 1.874, "step": 2052 }, { "epoch": 6.731147540983606, "grad_norm": 6.974928855895996, "learning_rate": 1.9927073971427813e-05, "loss": 1.8301, "step": 2053 }, { "epoch": 6.7344262295081965, "grad_norm": 10.731244087219238, "learning_rate": 1.9926945906210752e-05, "loss": 1.8779, "step": 2054 }, { "epoch": 6.737704918032787, "grad_norm": 7.066757678985596, "learning_rate": 1.992681772905705e-05, "loss": 2.1118, "step": 2055 }, { "epoch": 6.740983606557377, "grad_norm": 7.2107744216918945, "learning_rate": 1.9926689439968157e-05, "loss": 1.9539, "step": 2056 }, { "epoch": 6.744262295081967, "grad_norm": 8.116506576538086, "learning_rate": 1.992656103894552e-05, "loss": 2.0112, "step": 2057 }, { "epoch": 6.747540983606557, "grad_norm": 5.600281238555908, "learning_rate": 1.9926432525990584e-05, "loss": 1.9966, "step": 2058 }, { "epoch": 6.750819672131147, "grad_norm": 7.232183933258057, "learning_rate": 1.99263039011048e-05, "loss": 1.7603, "step": 2059 }, { "epoch": 6.754098360655737, "grad_norm": 9.727633476257324, "learning_rate": 1.9926175164289623e-05, "loss": 1.9302, "step": 2060 }, { "epoch": 6.757377049180328, "grad_norm": 5.748019695281982, "learning_rate": 1.99260463155465e-05, "loss": 1.7019, "step": 2061 }, { "epoch": 6.760655737704918, "grad_norm": 7.528354644775391, "learning_rate": 1.9925917354876883e-05, "loss": 1.7822, "step": 2062 }, { "epoch": 6.7639344262295085, "grad_norm": 9.19643497467041, "learning_rate": 1.9925788282282226e-05, "loss": 1.9133, "step": 2063 }, { "epoch": 6.767213114754099, "grad_norm": 7.788099765777588, "learning_rate": 1.9925659097763986e-05, "loss": 2.1797, "step": 2064 }, { "epoch": 6.770491803278689, "grad_norm": 7.004133224487305, "learning_rate": 1.992552980132362e-05, "loss": 2.0225, "step": 2065 }, { "epoch": 6.773770491803279, "grad_norm": 7.583788871765137, "learning_rate": 1.992540039296259e-05, "loss": 1.9844, "step": 2066 }, { "epoch": 6.777049180327869, "grad_norm": 7.042766571044922, "learning_rate": 1.9925270872682347e-05, "loss": 2.0044, "step": 2067 }, { "epoch": 6.780327868852459, "grad_norm": 6.663905143737793, "learning_rate": 1.9925141240484355e-05, "loss": 1.9482, "step": 2068 }, { "epoch": 6.783606557377049, "grad_norm": 5.960007667541504, "learning_rate": 1.9925011496370075e-05, "loss": 1.9121, "step": 2069 }, { "epoch": 6.786885245901639, "grad_norm": 5.0601396560668945, "learning_rate": 1.992488164034097e-05, "loss": 1.8831, "step": 2070 }, { "epoch": 6.7901639344262295, "grad_norm": 6.457515239715576, "learning_rate": 1.9924751672398506e-05, "loss": 2.0195, "step": 2071 }, { "epoch": 6.79344262295082, "grad_norm": 7.143552780151367, "learning_rate": 1.9924621592544154e-05, "loss": 1.9658, "step": 2072 }, { "epoch": 6.79672131147541, "grad_norm": 6.77696418762207, "learning_rate": 1.9924491400779366e-05, "loss": 2.0098, "step": 2073 }, { "epoch": 6.8, "grad_norm": 10.258151054382324, "learning_rate": 1.9924361097105624e-05, "loss": 1.7783, "step": 2074 }, { "epoch": 6.80327868852459, "grad_norm": 6.840819835662842, "learning_rate": 1.992423068152439e-05, "loss": 2.0474, "step": 2075 }, { "epoch": 6.80655737704918, "grad_norm": 10.810078620910645, "learning_rate": 1.9924100154037137e-05, "loss": 2.0801, "step": 2076 }, { "epoch": 6.80983606557377, "grad_norm": 7.838278293609619, "learning_rate": 1.9923969514645333e-05, "loss": 1.8259, "step": 2077 }, { "epoch": 6.81311475409836, "grad_norm": 7.957963466644287, "learning_rate": 1.9923838763350457e-05, "loss": 2.0957, "step": 2078 }, { "epoch": 6.81639344262295, "grad_norm": 8.092057228088379, "learning_rate": 1.9923707900153984e-05, "loss": 2.0156, "step": 2079 }, { "epoch": 6.8196721311475414, "grad_norm": 6.436166286468506, "learning_rate": 1.9923576925057385e-05, "loss": 2.0425, "step": 2080 }, { "epoch": 6.822950819672132, "grad_norm": 13.04790210723877, "learning_rate": 1.9923445838062136e-05, "loss": 1.937, "step": 2081 }, { "epoch": 6.826229508196722, "grad_norm": 8.301151275634766, "learning_rate": 1.9923314639169718e-05, "loss": 1.8105, "step": 2082 }, { "epoch": 6.829508196721312, "grad_norm": 7.411782264709473, "learning_rate": 1.992318332838161e-05, "loss": 1.874, "step": 2083 }, { "epoch": 6.832786885245902, "grad_norm": 7.004777431488037, "learning_rate": 1.9923051905699288e-05, "loss": 1.8872, "step": 2084 }, { "epoch": 6.836065573770492, "grad_norm": 11.465468406677246, "learning_rate": 1.9922920371124243e-05, "loss": 1.9243, "step": 2085 }, { "epoch": 6.839344262295082, "grad_norm": 6.502686500549316, "learning_rate": 1.9922788724657956e-05, "loss": 2.0293, "step": 2086 }, { "epoch": 6.842622950819672, "grad_norm": 9.30858325958252, "learning_rate": 1.9922656966301903e-05, "loss": 1.6311, "step": 2087 }, { "epoch": 6.845901639344262, "grad_norm": 7.177227973937988, "learning_rate": 1.9922525096057578e-05, "loss": 1.8516, "step": 2088 }, { "epoch": 6.849180327868853, "grad_norm": 6.105560302734375, "learning_rate": 1.992239311392647e-05, "loss": 2.188, "step": 2089 }, { "epoch": 6.852459016393443, "grad_norm": 7.2532958984375, "learning_rate": 1.9922261019910056e-05, "loss": 1.9561, "step": 2090 }, { "epoch": 6.855737704918033, "grad_norm": 10.487678527832031, "learning_rate": 1.9922128814009837e-05, "loss": 1.8818, "step": 2091 }, { "epoch": 6.859016393442623, "grad_norm": 7.567094326019287, "learning_rate": 1.99219964962273e-05, "loss": 1.8765, "step": 2092 }, { "epoch": 6.862295081967213, "grad_norm": 5.864492893218994, "learning_rate": 1.9921864066563933e-05, "loss": 1.9751, "step": 2093 }, { "epoch": 6.865573770491803, "grad_norm": 6.1983962059021, "learning_rate": 1.9921731525021234e-05, "loss": 1.9141, "step": 2094 }, { "epoch": 6.868852459016393, "grad_norm": 19.395023345947266, "learning_rate": 1.9921598871600694e-05, "loss": 2.0747, "step": 2095 }, { "epoch": 6.872131147540983, "grad_norm": 7.158670425415039, "learning_rate": 1.9921466106303817e-05, "loss": 1.8984, "step": 2096 }, { "epoch": 6.8754098360655735, "grad_norm": 16.10408592224121, "learning_rate": 1.9921333229132087e-05, "loss": 1.9724, "step": 2097 }, { "epoch": 6.878688524590164, "grad_norm": 9.190773963928223, "learning_rate": 1.992120024008701e-05, "loss": 1.7998, "step": 2098 }, { "epoch": 6.881967213114754, "grad_norm": 8.309059143066406, "learning_rate": 1.992106713917009e-05, "loss": 1.8096, "step": 2099 }, { "epoch": 6.885245901639344, "grad_norm": 6.697022914886475, "learning_rate": 1.992093392638282e-05, "loss": 2.1719, "step": 2100 }, { "epoch": 6.888524590163934, "grad_norm": 6.774765968322754, "learning_rate": 1.99208006017267e-05, "loss": 1.9233, "step": 2101 }, { "epoch": 6.891803278688524, "grad_norm": 7.121787071228027, "learning_rate": 1.9920667165203243e-05, "loss": 2.001, "step": 2102 }, { "epoch": 6.895081967213114, "grad_norm": 13.232556343078613, "learning_rate": 1.992053361681395e-05, "loss": 1.9805, "step": 2103 }, { "epoch": 6.898360655737705, "grad_norm": 6.7263503074646, "learning_rate": 1.9920399956560322e-05, "loss": 1.9761, "step": 2104 }, { "epoch": 6.901639344262295, "grad_norm": 9.292461395263672, "learning_rate": 1.9920266184443874e-05, "loss": 1.9902, "step": 2105 }, { "epoch": 6.9049180327868855, "grad_norm": 8.150347709655762, "learning_rate": 1.9920132300466107e-05, "loss": 2.0122, "step": 2106 }, { "epoch": 6.908196721311476, "grad_norm": 7.044556617736816, "learning_rate": 1.9919998304628535e-05, "loss": 1.916, "step": 2107 }, { "epoch": 6.911475409836066, "grad_norm": 7.087650775909424, "learning_rate": 1.9919864196932667e-05, "loss": 1.9229, "step": 2108 }, { "epoch": 6.914754098360656, "grad_norm": 11.260643005371094, "learning_rate": 1.991972997738002e-05, "loss": 1.9126, "step": 2109 }, { "epoch": 6.918032786885246, "grad_norm": 10.129694938659668, "learning_rate": 1.9919595645972097e-05, "loss": 1.9819, "step": 2110 }, { "epoch": 6.921311475409836, "grad_norm": 12.07696533203125, "learning_rate": 1.9919461202710422e-05, "loss": 1.918, "step": 2111 }, { "epoch": 6.924590163934426, "grad_norm": 8.11023235321045, "learning_rate": 1.9919326647596508e-05, "loss": 1.6924, "step": 2112 }, { "epoch": 6.927868852459016, "grad_norm": 9.331859588623047, "learning_rate": 1.9919191980631875e-05, "loss": 2.0332, "step": 2113 }, { "epoch": 6.9311475409836065, "grad_norm": 6.406831741333008, "learning_rate": 1.9919057201818037e-05, "loss": 1.8525, "step": 2114 }, { "epoch": 6.934426229508197, "grad_norm": 7.79849100112915, "learning_rate": 1.9918922311156517e-05, "loss": 1.9727, "step": 2115 }, { "epoch": 6.937704918032787, "grad_norm": 9.765876770019531, "learning_rate": 1.9918787308648836e-05, "loss": 1.8643, "step": 2116 }, { "epoch": 6.940983606557377, "grad_norm": 8.038521766662598, "learning_rate": 1.9918652194296512e-05, "loss": 2.0142, "step": 2117 }, { "epoch": 6.944262295081967, "grad_norm": 6.123176574707031, "learning_rate": 1.9918516968101074e-05, "loss": 1.8813, "step": 2118 }, { "epoch": 6.947540983606557, "grad_norm": 9.511046409606934, "learning_rate": 1.9918381630064042e-05, "loss": 2.021, "step": 2119 }, { "epoch": 6.950819672131147, "grad_norm": 6.603940486907959, "learning_rate": 1.9918246180186948e-05, "loss": 2.208, "step": 2120 }, { "epoch": 6.954098360655737, "grad_norm": 7.294633388519287, "learning_rate": 1.9918110618471314e-05, "loss": 1.9761, "step": 2121 }, { "epoch": 6.9573770491803275, "grad_norm": 12.212682723999023, "learning_rate": 1.991797494491867e-05, "loss": 2.1338, "step": 2122 }, { "epoch": 6.9606557377049185, "grad_norm": 7.869489669799805, "learning_rate": 1.991783915953055e-05, "loss": 1.9751, "step": 2123 }, { "epoch": 6.963934426229509, "grad_norm": 7.187969207763672, "learning_rate": 1.9917703262308478e-05, "loss": 1.9487, "step": 2124 }, { "epoch": 6.967213114754099, "grad_norm": 7.331493854522705, "learning_rate": 1.9917567253253988e-05, "loss": 1.8672, "step": 2125 }, { "epoch": 6.970491803278689, "grad_norm": 7.12455940246582, "learning_rate": 1.991743113236862e-05, "loss": 1.8118, "step": 2126 }, { "epoch": 6.973770491803279, "grad_norm": 7.29185152053833, "learning_rate": 1.99172948996539e-05, "loss": 2.147, "step": 2127 }, { "epoch": 6.977049180327869, "grad_norm": 9.338237762451172, "learning_rate": 1.9917158555111375e-05, "loss": 1.927, "step": 2128 }, { "epoch": 6.980327868852459, "grad_norm": 8.814062118530273, "learning_rate": 1.991702209874257e-05, "loss": 1.8462, "step": 2129 }, { "epoch": 6.983606557377049, "grad_norm": 7.1604814529418945, "learning_rate": 1.9916885530549032e-05, "loss": 1.8501, "step": 2130 }, { "epoch": 6.9868852459016395, "grad_norm": 8.249971389770508, "learning_rate": 1.9916748850532298e-05, "loss": 1.8928, "step": 2131 }, { "epoch": 6.99016393442623, "grad_norm": 7.229896545410156, "learning_rate": 1.991661205869391e-05, "loss": 1.9575, "step": 2132 }, { "epoch": 6.99344262295082, "grad_norm": 10.782246589660645, "learning_rate": 1.991647515503541e-05, "loss": 2.0195, "step": 2133 }, { "epoch": 6.99672131147541, "grad_norm": 7.890417098999023, "learning_rate": 1.9916338139558343e-05, "loss": 1.9575, "step": 2134 }, { "epoch": 7.0, "grad_norm": 7.232193946838379, "learning_rate": 1.9916201012264255e-05, "loss": 1.9429, "step": 2135 }, { "epoch": 7.00327868852459, "grad_norm": 6.727558612823486, "learning_rate": 1.9916063773154686e-05, "loss": 1.8491, "step": 2136 }, { "epoch": 7.00655737704918, "grad_norm": 14.467048645019531, "learning_rate": 1.991592642223119e-05, "loss": 1.9795, "step": 2137 }, { "epoch": 7.00983606557377, "grad_norm": 8.447813987731934, "learning_rate": 1.991578895949531e-05, "loss": 2.0312, "step": 2138 }, { "epoch": 7.0131147540983605, "grad_norm": 8.42054557800293, "learning_rate": 1.9915651384948606e-05, "loss": 1.8896, "step": 2139 }, { "epoch": 7.016393442622951, "grad_norm": 8.317573547363281, "learning_rate": 1.991551369859262e-05, "loss": 1.752, "step": 2140 }, { "epoch": 7.019672131147541, "grad_norm": 7.432085990905762, "learning_rate": 1.991537590042891e-05, "loss": 1.751, "step": 2141 }, { "epoch": 7.022950819672131, "grad_norm": 8.438892364501953, "learning_rate": 1.9915237990459024e-05, "loss": 2.0449, "step": 2142 }, { "epoch": 7.026229508196721, "grad_norm": 7.245691776275635, "learning_rate": 1.9915099968684523e-05, "loss": 1.853, "step": 2143 }, { "epoch": 7.029508196721311, "grad_norm": 10.273487091064453, "learning_rate": 1.991496183510696e-05, "loss": 1.7764, "step": 2144 }, { "epoch": 7.032786885245901, "grad_norm": 7.061801910400391, "learning_rate": 1.991482358972789e-05, "loss": 1.6558, "step": 2145 }, { "epoch": 7.036065573770492, "grad_norm": 6.469074726104736, "learning_rate": 1.9914685232548877e-05, "loss": 1.925, "step": 2146 }, { "epoch": 7.039344262295082, "grad_norm": 9.432757377624512, "learning_rate": 1.991454676357148e-05, "loss": 1.7039, "step": 2147 }, { "epoch": 7.0426229508196725, "grad_norm": 9.81153678894043, "learning_rate": 1.991440818279726e-05, "loss": 1.6592, "step": 2148 }, { "epoch": 7.045901639344263, "grad_norm": 13.57650375366211, "learning_rate": 1.991426949022778e-05, "loss": 1.9097, "step": 2149 }, { "epoch": 7.049180327868853, "grad_norm": 6.414165496826172, "learning_rate": 1.9914130685864602e-05, "loss": 1.8379, "step": 2150 }, { "epoch": 7.052459016393443, "grad_norm": 19.317298889160156, "learning_rate": 1.991399176970929e-05, "loss": 1.7686, "step": 2151 }, { "epoch": 7.055737704918033, "grad_norm": 29.132349014282227, "learning_rate": 1.9913852741763416e-05, "loss": 1.9214, "step": 2152 }, { "epoch": 7.059016393442623, "grad_norm": 7.1894025802612305, "learning_rate": 1.9913713602028546e-05, "loss": 1.8198, "step": 2153 }, { "epoch": 7.062295081967213, "grad_norm": 6.146124839782715, "learning_rate": 1.9913574350506243e-05, "loss": 1.9209, "step": 2154 }, { "epoch": 7.065573770491803, "grad_norm": 8.173232078552246, "learning_rate": 1.9913434987198087e-05, "loss": 1.8489, "step": 2155 }, { "epoch": 7.0688524590163935, "grad_norm": 9.274133682250977, "learning_rate": 1.9913295512105638e-05, "loss": 1.8743, "step": 2156 }, { "epoch": 7.072131147540984, "grad_norm": 9.762054443359375, "learning_rate": 1.991315592523048e-05, "loss": 1.8784, "step": 2157 }, { "epoch": 7.075409836065574, "grad_norm": 9.723298072814941, "learning_rate": 1.9913016226574182e-05, "loss": 1.8179, "step": 2158 }, { "epoch": 7.078688524590164, "grad_norm": 6.779567718505859, "learning_rate": 1.9912876416138317e-05, "loss": 1.8379, "step": 2159 }, { "epoch": 7.081967213114754, "grad_norm": 7.542664051055908, "learning_rate": 1.9912736493924463e-05, "loss": 1.7461, "step": 2160 }, { "epoch": 7.085245901639344, "grad_norm": 6.831579208374023, "learning_rate": 1.9912596459934197e-05, "loss": 1.8281, "step": 2161 }, { "epoch": 7.088524590163934, "grad_norm": 9.72623348236084, "learning_rate": 1.9912456314169103e-05, "loss": 1.8708, "step": 2162 }, { "epoch": 7.091803278688524, "grad_norm": 7.179681301116943, "learning_rate": 1.9912316056630756e-05, "loss": 1.9375, "step": 2163 }, { "epoch": 7.0950819672131145, "grad_norm": 6.804352283477783, "learning_rate": 1.991217568732074e-05, "loss": 1.8452, "step": 2164 }, { "epoch": 7.098360655737705, "grad_norm": 8.931135177612305, "learning_rate": 1.9912035206240638e-05, "loss": 1.6956, "step": 2165 }, { "epoch": 7.101639344262295, "grad_norm": 9.265837669372559, "learning_rate": 1.991189461339203e-05, "loss": 1.897, "step": 2166 }, { "epoch": 7.104918032786885, "grad_norm": 9.534493446350098, "learning_rate": 1.9911753908776505e-05, "loss": 1.9023, "step": 2167 }, { "epoch": 7.108196721311476, "grad_norm": 10.942056655883789, "learning_rate": 1.991161309239565e-05, "loss": 1.7241, "step": 2168 }, { "epoch": 7.111475409836066, "grad_norm": 10.713653564453125, "learning_rate": 1.9911472164251053e-05, "loss": 1.9419, "step": 2169 }, { "epoch": 7.114754098360656, "grad_norm": 9.256211280822754, "learning_rate": 1.99113311243443e-05, "loss": 2.0391, "step": 2170 }, { "epoch": 7.118032786885246, "grad_norm": 12.635429382324219, "learning_rate": 1.9911189972676987e-05, "loss": 1.7722, "step": 2171 }, { "epoch": 7.121311475409836, "grad_norm": 7.202627658843994, "learning_rate": 1.9911048709250696e-05, "loss": 1.9561, "step": 2172 }, { "epoch": 7.1245901639344265, "grad_norm": 9.038519859313965, "learning_rate": 1.9910907334067028e-05, "loss": 2.0444, "step": 2173 }, { "epoch": 7.127868852459017, "grad_norm": 5.624301433563232, "learning_rate": 1.9910765847127578e-05, "loss": 1.7856, "step": 2174 }, { "epoch": 7.131147540983607, "grad_norm": 7.962967872619629, "learning_rate": 1.9910624248433938e-05, "loss": 1.771, "step": 2175 }, { "epoch": 7.134426229508197, "grad_norm": 9.541071891784668, "learning_rate": 1.9910482537987704e-05, "loss": 1.8313, "step": 2176 }, { "epoch": 7.137704918032787, "grad_norm": 7.574064254760742, "learning_rate": 1.991034071579047e-05, "loss": 1.8857, "step": 2177 }, { "epoch": 7.140983606557377, "grad_norm": 9.519067764282227, "learning_rate": 1.9910198781843847e-05, "loss": 1.9434, "step": 2178 }, { "epoch": 7.144262295081967, "grad_norm": 9.584793090820312, "learning_rate": 1.9910056736149427e-05, "loss": 1.9189, "step": 2179 }, { "epoch": 7.147540983606557, "grad_norm": 7.145684719085693, "learning_rate": 1.9909914578708816e-05, "loss": 1.8677, "step": 2180 }, { "epoch": 7.150819672131147, "grad_norm": 7.844091892242432, "learning_rate": 1.990977230952361e-05, "loss": 1.7937, "step": 2181 }, { "epoch": 7.154098360655738, "grad_norm": 8.205596923828125, "learning_rate": 1.990962992859542e-05, "loss": 1.9553, "step": 2182 }, { "epoch": 7.157377049180328, "grad_norm": 7.887129783630371, "learning_rate": 1.990948743592585e-05, "loss": 1.7529, "step": 2183 }, { "epoch": 7.160655737704918, "grad_norm": 8.10596752166748, "learning_rate": 1.9909344831516503e-05, "loss": 1.8376, "step": 2184 }, { "epoch": 7.163934426229508, "grad_norm": 8.584563255310059, "learning_rate": 1.9909202115368992e-05, "loss": 1.8374, "step": 2185 }, { "epoch": 7.167213114754098, "grad_norm": 6.475170612335205, "learning_rate": 1.990905928748492e-05, "loss": 1.8159, "step": 2186 }, { "epoch": 7.170491803278688, "grad_norm": 8.292354583740234, "learning_rate": 1.9908916347865907e-05, "loss": 1.8315, "step": 2187 }, { "epoch": 7.173770491803278, "grad_norm": 10.806431770324707, "learning_rate": 1.9908773296513557e-05, "loss": 1.6597, "step": 2188 }, { "epoch": 7.177049180327868, "grad_norm": 7.071215629577637, "learning_rate": 1.9908630133429488e-05, "loss": 1.9341, "step": 2189 }, { "epoch": 7.180327868852459, "grad_norm": 12.943554878234863, "learning_rate": 1.990848685861531e-05, "loss": 1.8262, "step": 2190 }, { "epoch": 7.18360655737705, "grad_norm": 8.93262767791748, "learning_rate": 1.990834347207264e-05, "loss": 1.801, "step": 2191 }, { "epoch": 7.18688524590164, "grad_norm": 6.409380912780762, "learning_rate": 1.9908199973803094e-05, "loss": 1.9023, "step": 2192 }, { "epoch": 7.19016393442623, "grad_norm": 9.898117065429688, "learning_rate": 1.9908056363808294e-05, "loss": 1.7466, "step": 2193 }, { "epoch": 7.19344262295082, "grad_norm": 6.601751804351807, "learning_rate": 1.9907912642089855e-05, "loss": 1.8701, "step": 2194 }, { "epoch": 7.19672131147541, "grad_norm": 7.303606986999512, "learning_rate": 1.99077688086494e-05, "loss": 1.8442, "step": 2195 }, { "epoch": 7.2, "grad_norm": 11.174051284790039, "learning_rate": 1.990762486348855e-05, "loss": 1.8896, "step": 2196 }, { "epoch": 7.20327868852459, "grad_norm": 9.473371505737305, "learning_rate": 1.9907480806608927e-05, "loss": 1.9458, "step": 2197 }, { "epoch": 7.20655737704918, "grad_norm": 7.136452674865723, "learning_rate": 1.9907336638012162e-05, "loss": 1.8374, "step": 2198 }, { "epoch": 7.2098360655737705, "grad_norm": 8.42847728729248, "learning_rate": 1.990719235769987e-05, "loss": 1.916, "step": 2199 }, { "epoch": 7.213114754098361, "grad_norm": 8.177501678466797, "learning_rate": 1.9907047965673684e-05, "loss": 1.9028, "step": 2200 }, { "epoch": 7.216393442622951, "grad_norm": 6.576298236846924, "learning_rate": 1.990690346193523e-05, "loss": 1.7581, "step": 2201 }, { "epoch": 7.219672131147541, "grad_norm": 8.585416793823242, "learning_rate": 1.9906758846486146e-05, "loss": 1.8604, "step": 2202 }, { "epoch": 7.222950819672131, "grad_norm": 8.066176414489746, "learning_rate": 1.990661411932805e-05, "loss": 1.8384, "step": 2203 }, { "epoch": 7.226229508196721, "grad_norm": 8.516929626464844, "learning_rate": 1.990646928046258e-05, "loss": 1.9792, "step": 2204 }, { "epoch": 7.229508196721311, "grad_norm": 11.312797546386719, "learning_rate": 1.9906324329891366e-05, "loss": 1.9043, "step": 2205 }, { "epoch": 7.232786885245901, "grad_norm": 8.043866157531738, "learning_rate": 1.9906179267616047e-05, "loss": 1.7808, "step": 2206 }, { "epoch": 7.2360655737704915, "grad_norm": 7.499270915985107, "learning_rate": 1.990603409363826e-05, "loss": 1.925, "step": 2207 }, { "epoch": 7.239344262295082, "grad_norm": 10.13015365600586, "learning_rate": 1.990588880795964e-05, "loss": 1.8721, "step": 2208 }, { "epoch": 7.242622950819672, "grad_norm": 6.329481601715088, "learning_rate": 1.990574341058182e-05, "loss": 2.0488, "step": 2209 }, { "epoch": 7.245901639344262, "grad_norm": 8.311662673950195, "learning_rate": 1.9905597901506442e-05, "loss": 2.0796, "step": 2210 }, { "epoch": 7.249180327868853, "grad_norm": 5.63818883895874, "learning_rate": 1.9905452280735155e-05, "loss": 1.8906, "step": 2211 }, { "epoch": 7.252459016393443, "grad_norm": 11.703761100769043, "learning_rate": 1.9905306548269587e-05, "loss": 1.8928, "step": 2212 }, { "epoch": 7.255737704918033, "grad_norm": 8.47628402709961, "learning_rate": 1.9905160704111392e-05, "loss": 1.928, "step": 2213 }, { "epoch": 7.259016393442623, "grad_norm": 12.616963386535645, "learning_rate": 1.9905014748262212e-05, "loss": 2.0361, "step": 2214 }, { "epoch": 7.262295081967213, "grad_norm": 7.701417922973633, "learning_rate": 1.9904868680723692e-05, "loss": 1.936, "step": 2215 }, { "epoch": 7.2655737704918035, "grad_norm": 9.655338287353516, "learning_rate": 1.9904722501497477e-05, "loss": 2.0483, "step": 2216 }, { "epoch": 7.268852459016394, "grad_norm": 9.12037181854248, "learning_rate": 1.9904576210585222e-05, "loss": 1.9741, "step": 2217 }, { "epoch": 7.272131147540984, "grad_norm": 11.393627166748047, "learning_rate": 1.990442980798857e-05, "loss": 1.9458, "step": 2218 }, { "epoch": 7.275409836065574, "grad_norm": 6.946646213531494, "learning_rate": 1.990428329370917e-05, "loss": 1.7466, "step": 2219 }, { "epoch": 7.278688524590164, "grad_norm": 7.625060081481934, "learning_rate": 1.9904136667748683e-05, "loss": 1.7314, "step": 2220 }, { "epoch": 7.281967213114754, "grad_norm": 9.77634048461914, "learning_rate": 1.9903989930108757e-05, "loss": 1.6396, "step": 2221 }, { "epoch": 7.285245901639344, "grad_norm": 8.977691650390625, "learning_rate": 1.9903843080791044e-05, "loss": 2.1187, "step": 2222 }, { "epoch": 7.288524590163934, "grad_norm": 8.114684104919434, "learning_rate": 1.9903696119797204e-05, "loss": 1.9746, "step": 2223 }, { "epoch": 7.2918032786885245, "grad_norm": 8.912519454956055, "learning_rate": 1.9903549047128894e-05, "loss": 1.9102, "step": 2224 }, { "epoch": 7.295081967213115, "grad_norm": 12.763248443603516, "learning_rate": 1.9903401862787773e-05, "loss": 1.874, "step": 2225 }, { "epoch": 7.298360655737705, "grad_norm": 9.697650909423828, "learning_rate": 1.9903254566775495e-05, "loss": 1.8467, "step": 2226 }, { "epoch": 7.301639344262295, "grad_norm": 12.042085647583008, "learning_rate": 1.9903107159093728e-05, "loss": 1.8989, "step": 2227 }, { "epoch": 7.304918032786885, "grad_norm": 9.081647872924805, "learning_rate": 1.9902959639744127e-05, "loss": 1.9014, "step": 2228 }, { "epoch": 7.308196721311475, "grad_norm": 9.002628326416016, "learning_rate": 1.9902812008728364e-05, "loss": 1.8418, "step": 2229 }, { "epoch": 7.311475409836065, "grad_norm": 12.611787796020508, "learning_rate": 1.99026642660481e-05, "loss": 1.9604, "step": 2230 }, { "epoch": 7.314754098360655, "grad_norm": 10.069226264953613, "learning_rate": 1.9902516411704994e-05, "loss": 1.7534, "step": 2231 }, { "epoch": 7.3180327868852455, "grad_norm": 9.631099700927734, "learning_rate": 1.9902368445700727e-05, "loss": 2.0195, "step": 2232 }, { "epoch": 7.321311475409836, "grad_norm": 10.745498657226562, "learning_rate": 1.9902220368036956e-05, "loss": 1.9595, "step": 2233 }, { "epoch": 7.324590163934427, "grad_norm": 7.6035919189453125, "learning_rate": 1.9902072178715353e-05, "loss": 1.8499, "step": 2234 }, { "epoch": 7.327868852459017, "grad_norm": 10.406179428100586, "learning_rate": 1.9901923877737593e-05, "loss": 1.8804, "step": 2235 }, { "epoch": 7.331147540983607, "grad_norm": 9.301201820373535, "learning_rate": 1.9901775465105346e-05, "loss": 1.7842, "step": 2236 }, { "epoch": 7.334426229508197, "grad_norm": 15.536080360412598, "learning_rate": 1.990162694082028e-05, "loss": 2.0317, "step": 2237 }, { "epoch": 7.337704918032787, "grad_norm": 7.655529022216797, "learning_rate": 1.9901478304884084e-05, "loss": 1.7874, "step": 2238 }, { "epoch": 7.340983606557377, "grad_norm": 9.394801139831543, "learning_rate": 1.990132955729842e-05, "loss": 1.6753, "step": 2239 }, { "epoch": 7.344262295081967, "grad_norm": 10.21945858001709, "learning_rate": 1.9901180698064972e-05, "loss": 1.8789, "step": 2240 }, { "epoch": 7.3475409836065575, "grad_norm": 21.75950050354004, "learning_rate": 1.9901031727185415e-05, "loss": 1.8628, "step": 2241 }, { "epoch": 7.350819672131148, "grad_norm": 5.473992824554443, "learning_rate": 1.9900882644661433e-05, "loss": 1.7144, "step": 2242 }, { "epoch": 7.354098360655738, "grad_norm": 23.463119506835938, "learning_rate": 1.99007334504947e-05, "loss": 1.9402, "step": 2243 }, { "epoch": 7.357377049180328, "grad_norm": 6.803433895111084, "learning_rate": 1.990058414468691e-05, "loss": 1.937, "step": 2244 }, { "epoch": 7.360655737704918, "grad_norm": 12.78880786895752, "learning_rate": 1.990043472723974e-05, "loss": 1.8923, "step": 2245 }, { "epoch": 7.363934426229508, "grad_norm": 8.990974426269531, "learning_rate": 1.990028519815487e-05, "loss": 1.9805, "step": 2246 }, { "epoch": 7.367213114754098, "grad_norm": 8.463171005249023, "learning_rate": 1.9900135557433994e-05, "loss": 1.6973, "step": 2247 }, { "epoch": 7.370491803278688, "grad_norm": 9.621443748474121, "learning_rate": 1.989998580507879e-05, "loss": 1.7966, "step": 2248 }, { "epoch": 7.3737704918032785, "grad_norm": 7.663715362548828, "learning_rate": 1.989983594109096e-05, "loss": 1.8979, "step": 2249 }, { "epoch": 7.377049180327869, "grad_norm": 6.198119163513184, "learning_rate": 1.9899685965472183e-05, "loss": 2.0352, "step": 2250 }, { "epoch": 7.380327868852459, "grad_norm": 6.569281578063965, "learning_rate": 1.9899535878224153e-05, "loss": 1.7554, "step": 2251 }, { "epoch": 7.383606557377049, "grad_norm": 8.830731391906738, "learning_rate": 1.9899385679348562e-05, "loss": 1.8418, "step": 2252 }, { "epoch": 7.386885245901639, "grad_norm": 7.015636920928955, "learning_rate": 1.9899235368847107e-05, "loss": 1.7751, "step": 2253 }, { "epoch": 7.390163934426229, "grad_norm": 9.1712646484375, "learning_rate": 1.989908494672148e-05, "loss": 1.8945, "step": 2254 }, { "epoch": 7.39344262295082, "grad_norm": 9.938053131103516, "learning_rate": 1.989893441297338e-05, "loss": 1.9092, "step": 2255 }, { "epoch": 7.39672131147541, "grad_norm": 8.091404914855957, "learning_rate": 1.9898783767604503e-05, "loss": 1.7336, "step": 2256 }, { "epoch": 7.4, "grad_norm": 5.943991184234619, "learning_rate": 1.989863301061654e-05, "loss": 2.04, "step": 2257 }, { "epoch": 7.4032786885245905, "grad_norm": 7.787069320678711, "learning_rate": 1.9898482142011203e-05, "loss": 1.6963, "step": 2258 }, { "epoch": 7.406557377049181, "grad_norm": 7.628276824951172, "learning_rate": 1.9898331161790188e-05, "loss": 1.8523, "step": 2259 }, { "epoch": 7.409836065573771, "grad_norm": 10.513256072998047, "learning_rate": 1.9898180069955195e-05, "loss": 1.7949, "step": 2260 }, { "epoch": 7.413114754098361, "grad_norm": 5.10508394241333, "learning_rate": 1.9898028866507934e-05, "loss": 1.9233, "step": 2261 }, { "epoch": 7.416393442622951, "grad_norm": 6.969552993774414, "learning_rate": 1.9897877551450102e-05, "loss": 2.0283, "step": 2262 }, { "epoch": 7.419672131147541, "grad_norm": 19.640239715576172, "learning_rate": 1.9897726124783412e-05, "loss": 1.7537, "step": 2263 }, { "epoch": 7.422950819672131, "grad_norm": 7.156152248382568, "learning_rate": 1.989757458650957e-05, "loss": 1.9355, "step": 2264 }, { "epoch": 7.426229508196721, "grad_norm": 10.785743713378906, "learning_rate": 1.989742293663028e-05, "loss": 1.7646, "step": 2265 }, { "epoch": 7.4295081967213115, "grad_norm": 8.103100776672363, "learning_rate": 1.9897271175147258e-05, "loss": 1.9014, "step": 2266 }, { "epoch": 7.432786885245902, "grad_norm": 8.445577621459961, "learning_rate": 1.989711930206221e-05, "loss": 1.6204, "step": 2267 }, { "epoch": 7.436065573770492, "grad_norm": 6.81315279006958, "learning_rate": 1.9896967317376858e-05, "loss": 1.9023, "step": 2268 }, { "epoch": 7.439344262295082, "grad_norm": 9.476963996887207, "learning_rate": 1.9896815221092902e-05, "loss": 2.0234, "step": 2269 }, { "epoch": 7.442622950819672, "grad_norm": 7.198822498321533, "learning_rate": 1.9896663013212065e-05, "loss": 1.8623, "step": 2270 }, { "epoch": 7.445901639344262, "grad_norm": 13.15672492980957, "learning_rate": 1.9896510693736066e-05, "loss": 1.7852, "step": 2271 }, { "epoch": 7.449180327868852, "grad_norm": 7.505738735198975, "learning_rate": 1.9896358262666618e-05, "loss": 1.8369, "step": 2272 }, { "epoch": 7.452459016393442, "grad_norm": 6.1783599853515625, "learning_rate": 1.989620572000544e-05, "loss": 1.7937, "step": 2273 }, { "epoch": 7.4557377049180324, "grad_norm": 7.321208953857422, "learning_rate": 1.9896053065754255e-05, "loss": 1.8032, "step": 2274 }, { "epoch": 7.459016393442623, "grad_norm": 7.249887466430664, "learning_rate": 1.989590029991478e-05, "loss": 1.811, "step": 2275 }, { "epoch": 7.462295081967213, "grad_norm": 7.549666881561279, "learning_rate": 1.9895747422488743e-05, "loss": 1.812, "step": 2276 }, { "epoch": 7.465573770491803, "grad_norm": 5.522728443145752, "learning_rate": 1.9895594433477862e-05, "loss": 1.8655, "step": 2277 }, { "epoch": 7.468852459016394, "grad_norm": 15.04108715057373, "learning_rate": 1.989544133288387e-05, "loss": 1.834, "step": 2278 }, { "epoch": 7.472131147540984, "grad_norm": 6.044948577880859, "learning_rate": 1.989528812070848e-05, "loss": 2.001, "step": 2279 }, { "epoch": 7.475409836065574, "grad_norm": 12.904102325439453, "learning_rate": 1.9895134796953434e-05, "loss": 1.6685, "step": 2280 }, { "epoch": 7.478688524590164, "grad_norm": 10.428121566772461, "learning_rate": 1.9894981361620452e-05, "loss": 1.6765, "step": 2281 }, { "epoch": 7.481967213114754, "grad_norm": 7.216013431549072, "learning_rate": 1.989482781471127e-05, "loss": 2.0034, "step": 2282 }, { "epoch": 7.4852459016393444, "grad_norm": 7.642518997192383, "learning_rate": 1.989467415622761e-05, "loss": 1.7432, "step": 2283 }, { "epoch": 7.488524590163935, "grad_norm": 7.292372226715088, "learning_rate": 1.9894520386171217e-05, "loss": 1.9292, "step": 2284 }, { "epoch": 7.491803278688525, "grad_norm": 7.983748435974121, "learning_rate": 1.989436650454382e-05, "loss": 1.7627, "step": 2285 }, { "epoch": 7.495081967213115, "grad_norm": 7.773024082183838, "learning_rate": 1.989421251134715e-05, "loss": 1.9106, "step": 2286 }, { "epoch": 7.498360655737705, "grad_norm": 9.932233810424805, "learning_rate": 1.9894058406582945e-05, "loss": 1.7764, "step": 2287 }, { "epoch": 7.501639344262295, "grad_norm": 12.381553649902344, "learning_rate": 1.9893904190252945e-05, "loss": 1.698, "step": 2288 }, { "epoch": 7.504918032786885, "grad_norm": 6.982187271118164, "learning_rate": 1.989374986235889e-05, "loss": 2.1211, "step": 2289 }, { "epoch": 7.508196721311475, "grad_norm": 13.58335018157959, "learning_rate": 1.989359542290252e-05, "loss": 1.9473, "step": 2290 }, { "epoch": 7.511475409836065, "grad_norm": 8.32082748413086, "learning_rate": 1.989344087188557e-05, "loss": 1.9231, "step": 2291 }, { "epoch": 7.5147540983606556, "grad_norm": 7.915403366088867, "learning_rate": 1.9893286209309793e-05, "loss": 1.915, "step": 2292 }, { "epoch": 7.518032786885246, "grad_norm": 10.700386047363281, "learning_rate": 1.989313143517692e-05, "loss": 1.8789, "step": 2293 }, { "epoch": 7.521311475409836, "grad_norm": 18.82547378540039, "learning_rate": 1.989297654948871e-05, "loss": 1.7996, "step": 2294 }, { "epoch": 7.524590163934426, "grad_norm": 5.858811378479004, "learning_rate": 1.9892821552246902e-05, "loss": 1.7207, "step": 2295 }, { "epoch": 7.527868852459016, "grad_norm": 6.651049613952637, "learning_rate": 1.9892666443453244e-05, "loss": 1.8755, "step": 2296 }, { "epoch": 7.531147540983606, "grad_norm": 6.338826656341553, "learning_rate": 1.989251122310949e-05, "loss": 1.9199, "step": 2297 }, { "epoch": 7.534426229508197, "grad_norm": 6.482193470001221, "learning_rate": 1.989235589121738e-05, "loss": 1.9277, "step": 2298 }, { "epoch": 7.537704918032787, "grad_norm": 6.637409687042236, "learning_rate": 1.9892200447778674e-05, "loss": 2.0015, "step": 2299 }, { "epoch": 7.540983606557377, "grad_norm": 6.977858066558838, "learning_rate": 1.9892044892795124e-05, "loss": 1.7324, "step": 2300 }, { "epoch": 7.5442622950819676, "grad_norm": 6.694797039031982, "learning_rate": 1.9891889226268482e-05, "loss": 1.7085, "step": 2301 }, { "epoch": 7.547540983606558, "grad_norm": 8.939641952514648, "learning_rate": 1.9891733448200506e-05, "loss": 1.9458, "step": 2302 }, { "epoch": 7.550819672131148, "grad_norm": 7.298349380493164, "learning_rate": 1.9891577558592948e-05, "loss": 1.835, "step": 2303 }, { "epoch": 7.554098360655738, "grad_norm": 6.216446399688721, "learning_rate": 1.989142155744757e-05, "loss": 1.8481, "step": 2304 }, { "epoch": 7.557377049180328, "grad_norm": 10.368154525756836, "learning_rate": 1.989126544476613e-05, "loss": 1.8306, "step": 2305 }, { "epoch": 7.560655737704918, "grad_norm": 8.0737943649292, "learning_rate": 1.9891109220550383e-05, "loss": 1.9111, "step": 2306 }, { "epoch": 7.563934426229508, "grad_norm": 7.358553886413574, "learning_rate": 1.98909528848021e-05, "loss": 1.7605, "step": 2307 }, { "epoch": 7.567213114754098, "grad_norm": 12.20753002166748, "learning_rate": 1.989079643752304e-05, "loss": 1.9121, "step": 2308 }, { "epoch": 7.5704918032786885, "grad_norm": 8.057729721069336, "learning_rate": 1.9890639878714963e-05, "loss": 1.916, "step": 2309 }, { "epoch": 7.573770491803279, "grad_norm": 10.363543510437012, "learning_rate": 1.9890483208379638e-05, "loss": 2.0, "step": 2310 }, { "epoch": 7.577049180327869, "grad_norm": 8.243356704711914, "learning_rate": 1.9890326426518832e-05, "loss": 1.7048, "step": 2311 }, { "epoch": 7.580327868852459, "grad_norm": 7.695550918579102, "learning_rate": 1.9890169533134314e-05, "loss": 1.9058, "step": 2312 }, { "epoch": 7.583606557377049, "grad_norm": 7.672791481018066, "learning_rate": 1.9890012528227853e-05, "loss": 1.8662, "step": 2313 }, { "epoch": 7.586885245901639, "grad_norm": 13.107189178466797, "learning_rate": 1.988985541180121e-05, "loss": 1.8796, "step": 2314 }, { "epoch": 7.590163934426229, "grad_norm": 6.733747482299805, "learning_rate": 1.988969818385617e-05, "loss": 1.856, "step": 2315 }, { "epoch": 7.593442622950819, "grad_norm": 10.178698539733887, "learning_rate": 1.98895408443945e-05, "loss": 1.8105, "step": 2316 }, { "epoch": 7.5967213114754095, "grad_norm": 8.564253807067871, "learning_rate": 1.988938339341797e-05, "loss": 1.7021, "step": 2317 }, { "epoch": 7.6, "grad_norm": 12.212461471557617, "learning_rate": 1.9889225830928365e-05, "loss": 2.0366, "step": 2318 }, { "epoch": 7.60327868852459, "grad_norm": 6.810788631439209, "learning_rate": 1.9889068156927454e-05, "loss": 1.7397, "step": 2319 }, { "epoch": 7.60655737704918, "grad_norm": 12.151886940002441, "learning_rate": 1.988891037141702e-05, "loss": 2.0791, "step": 2320 }, { "epoch": 7.60983606557377, "grad_norm": 9.309700965881348, "learning_rate": 1.988875247439884e-05, "loss": 1.7217, "step": 2321 }, { "epoch": 7.613114754098361, "grad_norm": 6.427943229675293, "learning_rate": 1.9888594465874692e-05, "loss": 1.8984, "step": 2322 }, { "epoch": 7.616393442622951, "grad_norm": 24.144363403320312, "learning_rate": 1.9888436345846357e-05, "loss": 1.9893, "step": 2323 }, { "epoch": 7.619672131147541, "grad_norm": 17.246824264526367, "learning_rate": 1.9888278114315628e-05, "loss": 1.96, "step": 2324 }, { "epoch": 7.622950819672131, "grad_norm": 5.790730953216553, "learning_rate": 1.9888119771284277e-05, "loss": 1.8687, "step": 2325 }, { "epoch": 7.6262295081967215, "grad_norm": 6.6334452629089355, "learning_rate": 1.9887961316754093e-05, "loss": 1.8804, "step": 2326 }, { "epoch": 7.629508196721312, "grad_norm": 7.89553165435791, "learning_rate": 1.9887802750726868e-05, "loss": 1.8235, "step": 2327 }, { "epoch": 7.632786885245902, "grad_norm": 5.619075775146484, "learning_rate": 1.9887644073204385e-05, "loss": 1.7131, "step": 2328 }, { "epoch": 7.636065573770492, "grad_norm": 7.01137113571167, "learning_rate": 1.9887485284188432e-05, "loss": 1.8955, "step": 2329 }, { "epoch": 7.639344262295082, "grad_norm": 6.406948566436768, "learning_rate": 1.9887326383680805e-05, "loss": 1.9077, "step": 2330 }, { "epoch": 7.642622950819672, "grad_norm": 6.84123420715332, "learning_rate": 1.9887167371683293e-05, "loss": 1.5957, "step": 2331 }, { "epoch": 7.645901639344262, "grad_norm": 7.737239360809326, "learning_rate": 1.988700824819769e-05, "loss": 1.7275, "step": 2332 }, { "epoch": 7.649180327868852, "grad_norm": 34.29153823852539, "learning_rate": 1.9886849013225787e-05, "loss": 1.8721, "step": 2333 }, { "epoch": 7.6524590163934425, "grad_norm": 6.9216108322143555, "learning_rate": 1.988668966676938e-05, "loss": 1.8438, "step": 2334 }, { "epoch": 7.655737704918033, "grad_norm": 7.9977617263793945, "learning_rate": 1.988653020883027e-05, "loss": 2.0112, "step": 2335 }, { "epoch": 7.659016393442623, "grad_norm": 6.510869026184082, "learning_rate": 1.9886370639410252e-05, "loss": 1.9258, "step": 2336 }, { "epoch": 7.662295081967213, "grad_norm": 6.515315532684326, "learning_rate": 1.9886210958511126e-05, "loss": 1.7061, "step": 2337 }, { "epoch": 7.665573770491803, "grad_norm": 6.601261615753174, "learning_rate": 1.988605116613469e-05, "loss": 1.7996, "step": 2338 }, { "epoch": 7.668852459016393, "grad_norm": 7.010468006134033, "learning_rate": 1.988589126228275e-05, "loss": 1.9048, "step": 2339 }, { "epoch": 7.672131147540983, "grad_norm": 15.786431312561035, "learning_rate": 1.9885731246957108e-05, "loss": 1.8794, "step": 2340 }, { "epoch": 7.675409836065574, "grad_norm": 6.537143707275391, "learning_rate": 1.9885571120159568e-05, "loss": 1.9297, "step": 2341 }, { "epoch": 7.678688524590164, "grad_norm": 6.375389099121094, "learning_rate": 1.9885410881891933e-05, "loss": 1.8594, "step": 2342 }, { "epoch": 7.6819672131147545, "grad_norm": 6.433962821960449, "learning_rate": 1.9885250532156012e-05, "loss": 1.917, "step": 2343 }, { "epoch": 7.685245901639345, "grad_norm": 7.573868751525879, "learning_rate": 1.9885090070953615e-05, "loss": 1.8823, "step": 2344 }, { "epoch": 7.688524590163935, "grad_norm": 8.150744438171387, "learning_rate": 1.9884929498286548e-05, "loss": 1.5447, "step": 2345 }, { "epoch": 7.691803278688525, "grad_norm": 7.464682579040527, "learning_rate": 1.9884768814156626e-05, "loss": 1.8569, "step": 2346 }, { "epoch": 7.695081967213115, "grad_norm": 6.725429534912109, "learning_rate": 1.9884608018565656e-05, "loss": 1.9966, "step": 2347 }, { "epoch": 7.698360655737705, "grad_norm": 6.584633827209473, "learning_rate": 1.9884447111515453e-05, "loss": 1.8706, "step": 2348 }, { "epoch": 7.701639344262295, "grad_norm": 5.78317403793335, "learning_rate": 1.9884286093007833e-05, "loss": 1.8574, "step": 2349 }, { "epoch": 7.704918032786885, "grad_norm": 6.628304481506348, "learning_rate": 1.9884124963044606e-05, "loss": 1.8613, "step": 2350 }, { "epoch": 7.7081967213114755, "grad_norm": 19.628808975219727, "learning_rate": 1.98839637216276e-05, "loss": 1.9272, "step": 2351 }, { "epoch": 7.711475409836066, "grad_norm": 5.604895114898682, "learning_rate": 1.988380236875862e-05, "loss": 1.6582, "step": 2352 }, { "epoch": 7.714754098360656, "grad_norm": 7.405445575714111, "learning_rate": 1.988364090443949e-05, "loss": 1.728, "step": 2353 }, { "epoch": 7.718032786885246, "grad_norm": 6.121435165405273, "learning_rate": 1.988347932867204e-05, "loss": 1.8145, "step": 2354 }, { "epoch": 7.721311475409836, "grad_norm": 6.503339767456055, "learning_rate": 1.988331764145808e-05, "loss": 1.9946, "step": 2355 }, { "epoch": 7.724590163934426, "grad_norm": 8.597105979919434, "learning_rate": 1.988315584279944e-05, "loss": 1.8674, "step": 2356 }, { "epoch": 7.727868852459016, "grad_norm": 7.684704303741455, "learning_rate": 1.988299393269794e-05, "loss": 1.979, "step": 2357 }, { "epoch": 7.731147540983606, "grad_norm": 8.414083480834961, "learning_rate": 1.988283191115541e-05, "loss": 1.812, "step": 2358 }, { "epoch": 7.7344262295081965, "grad_norm": 5.140405178070068, "learning_rate": 1.9882669778173672e-05, "loss": 1.8672, "step": 2359 }, { "epoch": 7.737704918032787, "grad_norm": 7.087660789489746, "learning_rate": 1.9882507533754553e-05, "loss": 1.7131, "step": 2360 }, { "epoch": 7.740983606557377, "grad_norm": 6.835423946380615, "learning_rate": 1.9882345177899895e-05, "loss": 1.792, "step": 2361 }, { "epoch": 7.744262295081967, "grad_norm": 7.925385475158691, "learning_rate": 1.9882182710611513e-05, "loss": 2.0107, "step": 2362 }, { "epoch": 7.747540983606557, "grad_norm": 9.531647682189941, "learning_rate": 1.9882020131891248e-05, "loss": 1.812, "step": 2363 }, { "epoch": 7.750819672131147, "grad_norm": 7.420258045196533, "learning_rate": 1.9881857441740932e-05, "loss": 2.1472, "step": 2364 }, { "epoch": 7.754098360655737, "grad_norm": 7.272245407104492, "learning_rate": 1.9881694640162402e-05, "loss": 1.6155, "step": 2365 }, { "epoch": 7.757377049180328, "grad_norm": 7.257333755493164, "learning_rate": 1.9881531727157484e-05, "loss": 1.8276, "step": 2366 }, { "epoch": 7.760655737704918, "grad_norm": 8.024889945983887, "learning_rate": 1.988136870272803e-05, "loss": 1.76, "step": 2367 }, { "epoch": 7.7639344262295085, "grad_norm": 5.787670612335205, "learning_rate": 1.9881205566875864e-05, "loss": 1.8108, "step": 2368 }, { "epoch": 7.767213114754099, "grad_norm": 6.8705010414123535, "learning_rate": 1.988104231960283e-05, "loss": 1.731, "step": 2369 }, { "epoch": 7.770491803278689, "grad_norm": 8.136350631713867, "learning_rate": 1.9880878960910772e-05, "loss": 2.0073, "step": 2370 }, { "epoch": 7.773770491803279, "grad_norm": 6.274984359741211, "learning_rate": 1.988071549080153e-05, "loss": 1.7703, "step": 2371 }, { "epoch": 7.777049180327869, "grad_norm": 7.1206135749816895, "learning_rate": 1.988055190927695e-05, "loss": 1.8999, "step": 2372 }, { "epoch": 7.780327868852459, "grad_norm": 6.6850266456604, "learning_rate": 1.9880388216338873e-05, "loss": 1.9692, "step": 2373 }, { "epoch": 7.783606557377049, "grad_norm": 7.081137657165527, "learning_rate": 1.9880224411989143e-05, "loss": 2.1108, "step": 2374 }, { "epoch": 7.786885245901639, "grad_norm": 8.144771575927734, "learning_rate": 1.9880060496229614e-05, "loss": 2.0469, "step": 2375 }, { "epoch": 7.7901639344262295, "grad_norm": 8.308528900146484, "learning_rate": 1.9879896469062125e-05, "loss": 1.8191, "step": 2376 }, { "epoch": 7.79344262295082, "grad_norm": 9.3018217086792, "learning_rate": 1.9879732330488535e-05, "loss": 1.9492, "step": 2377 }, { "epoch": 7.79672131147541, "grad_norm": 8.063754081726074, "learning_rate": 1.987956808051069e-05, "loss": 1.7546, "step": 2378 }, { "epoch": 7.8, "grad_norm": 9.526555061340332, "learning_rate": 1.987940371913044e-05, "loss": 1.7764, "step": 2379 }, { "epoch": 7.80327868852459, "grad_norm": 8.107738494873047, "learning_rate": 1.9879239246349647e-05, "loss": 1.9011, "step": 2380 }, { "epoch": 7.80655737704918, "grad_norm": 10.463282585144043, "learning_rate": 1.987907466217015e-05, "loss": 2.1494, "step": 2381 }, { "epoch": 7.80983606557377, "grad_norm": 6.525078296661377, "learning_rate": 1.9878909966593825e-05, "loss": 1.8438, "step": 2382 }, { "epoch": 7.81311475409836, "grad_norm": 6.17235803604126, "learning_rate": 1.9878745159622515e-05, "loss": 1.9673, "step": 2383 }, { "epoch": 7.81639344262295, "grad_norm": 6.836983680725098, "learning_rate": 1.987858024125808e-05, "loss": 2.105, "step": 2384 }, { "epoch": 7.8196721311475414, "grad_norm": 6.252928256988525, "learning_rate": 1.9878415211502382e-05, "loss": 1.8633, "step": 2385 }, { "epoch": 7.822950819672132, "grad_norm": 6.327576637268066, "learning_rate": 1.987825007035728e-05, "loss": 1.9326, "step": 2386 }, { "epoch": 7.826229508196722, "grad_norm": 5.635679721832275, "learning_rate": 1.987808481782464e-05, "loss": 2.0273, "step": 2387 }, { "epoch": 7.829508196721312, "grad_norm": 7.280615329742432, "learning_rate": 1.9877919453906325e-05, "loss": 1.9189, "step": 2388 }, { "epoch": 7.832786885245902, "grad_norm": 13.520129203796387, "learning_rate": 1.9877753978604194e-05, "loss": 1.7891, "step": 2389 }, { "epoch": 7.836065573770492, "grad_norm": 10.2086763381958, "learning_rate": 1.987758839192012e-05, "loss": 1.9111, "step": 2390 }, { "epoch": 7.839344262295082, "grad_norm": 10.007957458496094, "learning_rate": 1.9877422693855967e-05, "loss": 1.7021, "step": 2391 }, { "epoch": 7.842622950819672, "grad_norm": 9.340158462524414, "learning_rate": 1.98772568844136e-05, "loss": 1.8828, "step": 2392 }, { "epoch": 7.845901639344262, "grad_norm": 10.542725563049316, "learning_rate": 1.9877090963594892e-05, "loss": 1.9312, "step": 2393 }, { "epoch": 7.849180327868853, "grad_norm": 9.592131614685059, "learning_rate": 1.9876924931401717e-05, "loss": 1.7295, "step": 2394 }, { "epoch": 7.852459016393443, "grad_norm": 7.40236759185791, "learning_rate": 1.987675878783594e-05, "loss": 1.8245, "step": 2395 }, { "epoch": 7.855737704918033, "grad_norm": 7.669966220855713, "learning_rate": 1.9876592532899442e-05, "loss": 1.9053, "step": 2396 }, { "epoch": 7.859016393442623, "grad_norm": 6.693460941314697, "learning_rate": 1.987642616659409e-05, "loss": 1.7578, "step": 2397 }, { "epoch": 7.862295081967213, "grad_norm": 7.270810127258301, "learning_rate": 1.9876259688921765e-05, "loss": 1.8052, "step": 2398 }, { "epoch": 7.865573770491803, "grad_norm": 8.241692543029785, "learning_rate": 1.9876093099884346e-05, "loss": 1.7598, "step": 2399 }, { "epoch": 7.868852459016393, "grad_norm": 10.040868759155273, "learning_rate": 1.9875926399483708e-05, "loss": 1.9136, "step": 2400 }, { "epoch": 7.872131147540983, "grad_norm": 9.207387924194336, "learning_rate": 1.987575958772173e-05, "loss": 1.9717, "step": 2401 }, { "epoch": 7.8754098360655735, "grad_norm": 9.761691093444824, "learning_rate": 1.9875592664600294e-05, "loss": 1.8691, "step": 2402 }, { "epoch": 7.878688524590164, "grad_norm": 8.505791664123535, "learning_rate": 1.9875425630121285e-05, "loss": 1.855, "step": 2403 }, { "epoch": 7.881967213114754, "grad_norm": 8.124412536621094, "learning_rate": 1.9875258484286582e-05, "loss": 2.1372, "step": 2404 }, { "epoch": 7.885245901639344, "grad_norm": 7.311492443084717, "learning_rate": 1.9875091227098076e-05, "loss": 1.761, "step": 2405 }, { "epoch": 7.888524590163934, "grad_norm": 8.39275074005127, "learning_rate": 1.9874923858557645e-05, "loss": 1.658, "step": 2406 }, { "epoch": 7.891803278688524, "grad_norm": 19.252994537353516, "learning_rate": 1.987475637866718e-05, "loss": 1.8188, "step": 2407 }, { "epoch": 7.895081967213114, "grad_norm": 10.309096336364746, "learning_rate": 1.9874588787428572e-05, "loss": 1.6963, "step": 2408 }, { "epoch": 7.898360655737705, "grad_norm": 7.451925277709961, "learning_rate": 1.9874421084843707e-05, "loss": 1.6841, "step": 2409 }, { "epoch": 7.901639344262295, "grad_norm": 5.5167622566223145, "learning_rate": 1.9874253270914478e-05, "loss": 1.7373, "step": 2410 }, { "epoch": 7.9049180327868855, "grad_norm": 9.246310234069824, "learning_rate": 1.9874085345642774e-05, "loss": 1.8298, "step": 2411 }, { "epoch": 7.908196721311476, "grad_norm": 9.923145294189453, "learning_rate": 1.9873917309030494e-05, "loss": 2.0127, "step": 2412 }, { "epoch": 7.911475409836066, "grad_norm": 11.49515151977539, "learning_rate": 1.987374916107953e-05, "loss": 1.9253, "step": 2413 }, { "epoch": 7.914754098360656, "grad_norm": 5.949406623840332, "learning_rate": 1.9873580901791775e-05, "loss": 1.6672, "step": 2414 }, { "epoch": 7.918032786885246, "grad_norm": 6.284147262573242, "learning_rate": 1.9873412531169135e-05, "loss": 1.7104, "step": 2415 }, { "epoch": 7.921311475409836, "grad_norm": 8.47612476348877, "learning_rate": 1.98732440492135e-05, "loss": 1.8423, "step": 2416 }, { "epoch": 7.924590163934426, "grad_norm": 9.435382843017578, "learning_rate": 1.9873075455926773e-05, "loss": 1.9717, "step": 2417 }, { "epoch": 7.927868852459016, "grad_norm": 5.928816318511963, "learning_rate": 1.9872906751310852e-05, "loss": 1.8027, "step": 2418 }, { "epoch": 7.9311475409836065, "grad_norm": 7.152009010314941, "learning_rate": 1.9872737935367647e-05, "loss": 1.9365, "step": 2419 }, { "epoch": 7.934426229508197, "grad_norm": 6.480587005615234, "learning_rate": 1.9872569008099053e-05, "loss": 1.8672, "step": 2420 }, { "epoch": 7.937704918032787, "grad_norm": 5.5045294761657715, "learning_rate": 1.987239996950698e-05, "loss": 2.0154, "step": 2421 }, { "epoch": 7.940983606557377, "grad_norm": 58.78697967529297, "learning_rate": 1.9872230819593333e-05, "loss": 1.936, "step": 2422 }, { "epoch": 7.944262295081967, "grad_norm": 12.673243522644043, "learning_rate": 1.9872061558360015e-05, "loss": 1.8403, "step": 2423 }, { "epoch": 7.947540983606557, "grad_norm": 7.989517688751221, "learning_rate": 1.9871892185808945e-05, "loss": 1.8369, "step": 2424 }, { "epoch": 7.950819672131147, "grad_norm": 8.703315734863281, "learning_rate": 1.9871722701942026e-05, "loss": 1.9639, "step": 2425 }, { "epoch": 7.954098360655737, "grad_norm": 8.341850280761719, "learning_rate": 1.9871553106761167e-05, "loss": 1.9214, "step": 2426 }, { "epoch": 7.9573770491803275, "grad_norm": 13.467063903808594, "learning_rate": 1.987138340026828e-05, "loss": 1.7461, "step": 2427 }, { "epoch": 7.9606557377049185, "grad_norm": 8.264641761779785, "learning_rate": 1.9871213582465282e-05, "loss": 1.9956, "step": 2428 }, { "epoch": 7.963934426229509, "grad_norm": 8.529792785644531, "learning_rate": 1.987104365335409e-05, "loss": 1.9492, "step": 2429 }, { "epoch": 7.967213114754099, "grad_norm": 9.211324691772461, "learning_rate": 1.9870873612936618e-05, "loss": 1.9927, "step": 2430 }, { "epoch": 7.970491803278689, "grad_norm": 6.807124614715576, "learning_rate": 1.9870703461214784e-05, "loss": 1.9541, "step": 2431 }, { "epoch": 7.973770491803279, "grad_norm": 8.638496398925781, "learning_rate": 1.9870533198190503e-05, "loss": 1.8677, "step": 2432 }, { "epoch": 7.977049180327869, "grad_norm": 7.851373672485352, "learning_rate": 1.9870362823865696e-05, "loss": 1.9963, "step": 2433 }, { "epoch": 7.980327868852459, "grad_norm": 7.279760837554932, "learning_rate": 1.987019233824229e-05, "loss": 1.9746, "step": 2434 }, { "epoch": 7.983606557377049, "grad_norm": 9.469832420349121, "learning_rate": 1.9870021741322197e-05, "loss": 1.9189, "step": 2435 }, { "epoch": 7.9868852459016395, "grad_norm": 7.890145301818848, "learning_rate": 1.9869851033107354e-05, "loss": 1.8667, "step": 2436 }, { "epoch": 7.99016393442623, "grad_norm": 8.737045288085938, "learning_rate": 1.9869680213599672e-05, "loss": 1.811, "step": 2437 }, { "epoch": 7.99344262295082, "grad_norm": 7.74005651473999, "learning_rate": 1.9869509282801087e-05, "loss": 1.8242, "step": 2438 }, { "epoch": 7.99672131147541, "grad_norm": 7.658231735229492, "learning_rate": 1.9869338240713523e-05, "loss": 1.7441, "step": 2439 }, { "epoch": 8.0, "grad_norm": 8.937440872192383, "learning_rate": 1.9869167087338908e-05, "loss": 1.71, "step": 2440 }, { "epoch": 8.00327868852459, "grad_norm": 8.523120880126953, "learning_rate": 1.9868995822679173e-05, "loss": 1.6396, "step": 2441 }, { "epoch": 8.00655737704918, "grad_norm": 20.03299331665039, "learning_rate": 1.9868824446736246e-05, "loss": 1.8447, "step": 2442 }, { "epoch": 8.00983606557377, "grad_norm": 7.320449352264404, "learning_rate": 1.986865295951207e-05, "loss": 1.7771, "step": 2443 }, { "epoch": 8.01311475409836, "grad_norm": 6.908129692077637, "learning_rate": 1.9868481361008565e-05, "loss": 1.8193, "step": 2444 }, { "epoch": 8.01639344262295, "grad_norm": 8.427915573120117, "learning_rate": 1.9868309651227674e-05, "loss": 1.7405, "step": 2445 }, { "epoch": 8.01967213114754, "grad_norm": 6.616105556488037, "learning_rate": 1.986813783017133e-05, "loss": 1.7419, "step": 2446 }, { "epoch": 8.02295081967213, "grad_norm": 11.38572883605957, "learning_rate": 1.986796589784147e-05, "loss": 1.7441, "step": 2447 }, { "epoch": 8.026229508196721, "grad_norm": 7.721878528594971, "learning_rate": 1.986779385424004e-05, "loss": 1.6753, "step": 2448 }, { "epoch": 8.029508196721311, "grad_norm": 8.505413055419922, "learning_rate": 1.986762169936897e-05, "loss": 1.6899, "step": 2449 }, { "epoch": 8.032786885245901, "grad_norm": 6.728315353393555, "learning_rate": 1.9867449433230206e-05, "loss": 1.6577, "step": 2450 }, { "epoch": 8.036065573770491, "grad_norm": 7.452001094818115, "learning_rate": 1.986727705582569e-05, "loss": 1.7644, "step": 2451 }, { "epoch": 8.039344262295081, "grad_norm": 11.246855735778809, "learning_rate": 1.9867104567157367e-05, "loss": 2.0537, "step": 2452 }, { "epoch": 8.042622950819672, "grad_norm": 10.54313850402832, "learning_rate": 1.9866931967227183e-05, "loss": 1.729, "step": 2453 }, { "epoch": 8.045901639344262, "grad_norm": 7.421257495880127, "learning_rate": 1.9866759256037076e-05, "loss": 2.0181, "step": 2454 }, { "epoch": 8.049180327868852, "grad_norm": 6.194586753845215, "learning_rate": 1.9866586433589002e-05, "loss": 1.6682, "step": 2455 }, { "epoch": 8.052459016393442, "grad_norm": 6.373105049133301, "learning_rate": 1.986641349988491e-05, "loss": 1.8359, "step": 2456 }, { "epoch": 8.055737704918032, "grad_norm": 7.795002460479736, "learning_rate": 1.9866240454926745e-05, "loss": 1.6223, "step": 2457 }, { "epoch": 8.059016393442622, "grad_norm": 13.050293922424316, "learning_rate": 1.986606729871646e-05, "loss": 1.7466, "step": 2458 }, { "epoch": 8.062295081967212, "grad_norm": 9.502119064331055, "learning_rate": 1.986589403125601e-05, "loss": 1.7148, "step": 2459 }, { "epoch": 8.065573770491802, "grad_norm": 5.691775321960449, "learning_rate": 1.9865720652547345e-05, "loss": 1.6323, "step": 2460 }, { "epoch": 8.068852459016393, "grad_norm": 7.947871685028076, "learning_rate": 1.9865547162592423e-05, "loss": 1.7725, "step": 2461 }, { "epoch": 8.072131147540984, "grad_norm": 7.762508869171143, "learning_rate": 1.9865373561393197e-05, "loss": 1.7593, "step": 2462 }, { "epoch": 8.075409836065575, "grad_norm": 7.331798076629639, "learning_rate": 1.986519984895163e-05, "loss": 2.0322, "step": 2463 }, { "epoch": 8.078688524590165, "grad_norm": 6.174793243408203, "learning_rate": 1.9865026025269674e-05, "loss": 1.5803, "step": 2464 }, { "epoch": 8.081967213114755, "grad_norm": 6.687361240386963, "learning_rate": 1.9864852090349297e-05, "loss": 1.458, "step": 2465 }, { "epoch": 8.085245901639345, "grad_norm": 47.987266540527344, "learning_rate": 1.9864678044192453e-05, "loss": 1.8489, "step": 2466 }, { "epoch": 8.088524590163935, "grad_norm": 7.442786693572998, "learning_rate": 1.9864503886801108e-05, "loss": 1.8452, "step": 2467 }, { "epoch": 8.091803278688525, "grad_norm": 6.3576340675354, "learning_rate": 1.9864329618177223e-05, "loss": 1.8157, "step": 2468 }, { "epoch": 8.095081967213115, "grad_norm": 9.69864559173584, "learning_rate": 1.9864155238322768e-05, "loss": 1.7407, "step": 2469 }, { "epoch": 8.098360655737705, "grad_norm": 11.886452674865723, "learning_rate": 1.9863980747239707e-05, "loss": 1.8232, "step": 2470 }, { "epoch": 8.101639344262296, "grad_norm": 8.938434600830078, "learning_rate": 1.9863806144930005e-05, "loss": 1.7539, "step": 2471 }, { "epoch": 8.104918032786886, "grad_norm": 9.521955490112305, "learning_rate": 1.9863631431395634e-05, "loss": 1.708, "step": 2472 }, { "epoch": 8.108196721311476, "grad_norm": 8.373462677001953, "learning_rate": 1.9863456606638563e-05, "loss": 1.8911, "step": 2473 }, { "epoch": 8.111475409836066, "grad_norm": 6.5807204246521, "learning_rate": 1.986328167066076e-05, "loss": 1.9023, "step": 2474 }, { "epoch": 8.114754098360656, "grad_norm": 11.661036491394043, "learning_rate": 1.9863106623464204e-05, "loss": 1.813, "step": 2475 }, { "epoch": 8.118032786885246, "grad_norm": 8.012543678283691, "learning_rate": 1.9862931465050867e-05, "loss": 1.8145, "step": 2476 }, { "epoch": 8.121311475409836, "grad_norm": 7.5641608238220215, "learning_rate": 1.986275619542272e-05, "loss": 1.7141, "step": 2477 }, { "epoch": 8.124590163934426, "grad_norm": 7.409336090087891, "learning_rate": 1.9862580814581743e-05, "loss": 1.6553, "step": 2478 }, { "epoch": 8.127868852459017, "grad_norm": 5.583575248718262, "learning_rate": 1.9862405322529918e-05, "loss": 1.8208, "step": 2479 }, { "epoch": 8.131147540983607, "grad_norm": 7.66867733001709, "learning_rate": 1.9862229719269212e-05, "loss": 1.832, "step": 2480 }, { "epoch": 8.134426229508197, "grad_norm": 8.63215160369873, "learning_rate": 1.986205400480161e-05, "loss": 1.8462, "step": 2481 }, { "epoch": 8.137704918032787, "grad_norm": 6.7850799560546875, "learning_rate": 1.98618781791291e-05, "loss": 1.9868, "step": 2482 }, { "epoch": 8.140983606557377, "grad_norm": 6.236367702484131, "learning_rate": 1.986170224225366e-05, "loss": 1.6838, "step": 2483 }, { "epoch": 8.144262295081967, "grad_norm": 10.72835636138916, "learning_rate": 1.9861526194177276e-05, "loss": 1.7397, "step": 2484 }, { "epoch": 8.147540983606557, "grad_norm": 6.075800895690918, "learning_rate": 1.9861350034901924e-05, "loss": 1.6763, "step": 2485 }, { "epoch": 8.150819672131147, "grad_norm": 5.808544158935547, "learning_rate": 1.98611737644296e-05, "loss": 2.0039, "step": 2486 }, { "epoch": 8.154098360655738, "grad_norm": 7.7209153175354, "learning_rate": 1.986099738276229e-05, "loss": 1.812, "step": 2487 }, { "epoch": 8.157377049180328, "grad_norm": 7.874463081359863, "learning_rate": 1.9860820889901982e-05, "loss": 1.5762, "step": 2488 }, { "epoch": 8.160655737704918, "grad_norm": 8.941006660461426, "learning_rate": 1.9860644285850663e-05, "loss": 1.8774, "step": 2489 }, { "epoch": 8.163934426229508, "grad_norm": 7.006058216094971, "learning_rate": 1.986046757061033e-05, "loss": 1.6899, "step": 2490 }, { "epoch": 8.167213114754098, "grad_norm": 6.317963123321533, "learning_rate": 1.986029074418297e-05, "loss": 2.0327, "step": 2491 }, { "epoch": 8.170491803278688, "grad_norm": 8.297964096069336, "learning_rate": 1.986011380657058e-05, "loss": 1.7654, "step": 2492 }, { "epoch": 8.173770491803278, "grad_norm": 7.844199180603027, "learning_rate": 1.9859936757775158e-05, "loss": 1.918, "step": 2493 }, { "epoch": 8.177049180327868, "grad_norm": 8.839593887329102, "learning_rate": 1.9859759597798693e-05, "loss": 1.5972, "step": 2494 }, { "epoch": 8.180327868852459, "grad_norm": 8.875530242919922, "learning_rate": 1.9859582326643192e-05, "loss": 1.7441, "step": 2495 }, { "epoch": 8.183606557377049, "grad_norm": 5.296167373657227, "learning_rate": 1.9859404944310645e-05, "loss": 1.8389, "step": 2496 }, { "epoch": 8.186885245901639, "grad_norm": 6.241219520568848, "learning_rate": 1.9859227450803056e-05, "loss": 1.6572, "step": 2497 }, { "epoch": 8.190163934426229, "grad_norm": 7.149890422821045, "learning_rate": 1.985904984612243e-05, "loss": 2.0396, "step": 2498 }, { "epoch": 8.193442622950819, "grad_norm": 5.330687522888184, "learning_rate": 1.9858872130270764e-05, "loss": 1.7786, "step": 2499 }, { "epoch": 8.19672131147541, "grad_norm": 6.894339084625244, "learning_rate": 1.985869430325006e-05, "loss": 1.7798, "step": 2500 }, { "epoch": 8.2, "grad_norm": 6.037082672119141, "learning_rate": 1.9858516365062334e-05, "loss": 1.6814, "step": 2501 }, { "epoch": 8.20327868852459, "grad_norm": 6.183374404907227, "learning_rate": 1.9858338315709586e-05, "loss": 1.7827, "step": 2502 }, { "epoch": 8.20655737704918, "grad_norm": 8.295660018920898, "learning_rate": 1.9858160155193817e-05, "loss": 1.9326, "step": 2503 }, { "epoch": 8.20983606557377, "grad_norm": 7.290331840515137, "learning_rate": 1.9857981883517045e-05, "loss": 1.667, "step": 2504 }, { "epoch": 8.21311475409836, "grad_norm": 6.7907257080078125, "learning_rate": 1.985780350068128e-05, "loss": 1.8364, "step": 2505 }, { "epoch": 8.216393442622952, "grad_norm": 6.839517593383789, "learning_rate": 1.9857625006688527e-05, "loss": 1.7463, "step": 2506 }, { "epoch": 8.219672131147542, "grad_norm": 6.560611248016357, "learning_rate": 1.9857446401540807e-05, "loss": 1.9839, "step": 2507 }, { "epoch": 8.222950819672132, "grad_norm": 8.210618019104004, "learning_rate": 1.9857267685240127e-05, "loss": 1.9136, "step": 2508 }, { "epoch": 8.226229508196722, "grad_norm": 9.140288352966309, "learning_rate": 1.9857088857788504e-05, "loss": 1.7529, "step": 2509 }, { "epoch": 8.229508196721312, "grad_norm": 5.983809471130371, "learning_rate": 1.9856909919187958e-05, "loss": 1.71, "step": 2510 }, { "epoch": 8.232786885245902, "grad_norm": 6.8658528327941895, "learning_rate": 1.98567308694405e-05, "loss": 1.9011, "step": 2511 }, { "epoch": 8.236065573770492, "grad_norm": 7.452569484710693, "learning_rate": 1.9856551708548158e-05, "loss": 1.645, "step": 2512 }, { "epoch": 8.239344262295083, "grad_norm": 6.4884819984436035, "learning_rate": 1.9856372436512946e-05, "loss": 1.8523, "step": 2513 }, { "epoch": 8.242622950819673, "grad_norm": 7.600076675415039, "learning_rate": 1.9856193053336884e-05, "loss": 1.9888, "step": 2514 }, { "epoch": 8.245901639344263, "grad_norm": 7.7230024337768555, "learning_rate": 1.9856013559022e-05, "loss": 1.9321, "step": 2515 }, { "epoch": 8.249180327868853, "grad_norm": 7.968028545379639, "learning_rate": 1.9855833953570313e-05, "loss": 1.7656, "step": 2516 }, { "epoch": 8.252459016393443, "grad_norm": 6.6255316734313965, "learning_rate": 1.985565423698385e-05, "loss": 1.6611, "step": 2517 }, { "epoch": 8.255737704918033, "grad_norm": 6.463861465454102, "learning_rate": 1.9855474409264645e-05, "loss": 1.8936, "step": 2518 }, { "epoch": 8.259016393442623, "grad_norm": 5.907070636749268, "learning_rate": 1.9855294470414712e-05, "loss": 1.8264, "step": 2519 }, { "epoch": 8.262295081967213, "grad_norm": 5.4253058433532715, "learning_rate": 1.9855114420436087e-05, "loss": 1.9136, "step": 2520 }, { "epoch": 8.265573770491804, "grad_norm": 11.949764251708984, "learning_rate": 1.9854934259330804e-05, "loss": 1.6658, "step": 2521 }, { "epoch": 8.268852459016394, "grad_norm": 6.9309258460998535, "learning_rate": 1.985475398710089e-05, "loss": 1.6982, "step": 2522 }, { "epoch": 8.272131147540984, "grad_norm": 8.979316711425781, "learning_rate": 1.985457360374838e-05, "loss": 1.7524, "step": 2523 }, { "epoch": 8.275409836065574, "grad_norm": 6.592394828796387, "learning_rate": 1.9854393109275302e-05, "loss": 1.666, "step": 2524 }, { "epoch": 8.278688524590164, "grad_norm": 10.765761375427246, "learning_rate": 1.9854212503683697e-05, "loss": 1.8257, "step": 2525 }, { "epoch": 8.281967213114754, "grad_norm": 6.784571647644043, "learning_rate": 1.98540317869756e-05, "loss": 1.9033, "step": 2526 }, { "epoch": 8.285245901639344, "grad_norm": 6.531338214874268, "learning_rate": 1.985385095915305e-05, "loss": 1.6741, "step": 2527 }, { "epoch": 8.288524590163934, "grad_norm": 6.402412414550781, "learning_rate": 1.9853670020218084e-05, "loss": 1.7324, "step": 2528 }, { "epoch": 8.291803278688525, "grad_norm": 7.8217291831970215, "learning_rate": 1.9853488970172747e-05, "loss": 1.6672, "step": 2529 }, { "epoch": 8.295081967213115, "grad_norm": 6.611448287963867, "learning_rate": 1.9853307809019072e-05, "loss": 1.8149, "step": 2530 }, { "epoch": 8.298360655737705, "grad_norm": 7.5444183349609375, "learning_rate": 1.985312653675911e-05, "loss": 1.8552, "step": 2531 }, { "epoch": 8.301639344262295, "grad_norm": 8.090278625488281, "learning_rate": 1.98529451533949e-05, "loss": 1.8884, "step": 2532 }, { "epoch": 8.304918032786885, "grad_norm": 4.981700897216797, "learning_rate": 1.9852763658928488e-05, "loss": 1.8101, "step": 2533 }, { "epoch": 8.308196721311475, "grad_norm": 6.901241779327393, "learning_rate": 1.985258205336192e-05, "loss": 1.5371, "step": 2534 }, { "epoch": 8.311475409836065, "grad_norm": 6.938452243804932, "learning_rate": 1.985240033669725e-05, "loss": 1.731, "step": 2535 }, { "epoch": 8.314754098360655, "grad_norm": 8.470014572143555, "learning_rate": 1.985221850893652e-05, "loss": 1.8481, "step": 2536 }, { "epoch": 8.318032786885245, "grad_norm": 5.0153937339782715, "learning_rate": 1.985203657008178e-05, "loss": 1.7668, "step": 2537 }, { "epoch": 8.321311475409836, "grad_norm": 7.517397880554199, "learning_rate": 1.985185452013509e-05, "loss": 1.7168, "step": 2538 }, { "epoch": 8.324590163934426, "grad_norm": 7.340978622436523, "learning_rate": 1.985167235909849e-05, "loss": 1.8667, "step": 2539 }, { "epoch": 8.327868852459016, "grad_norm": 9.542835235595703, "learning_rate": 1.9851490086974045e-05, "loss": 1.9458, "step": 2540 }, { "epoch": 8.331147540983606, "grad_norm": 9.174046516418457, "learning_rate": 1.9851307703763806e-05, "loss": 1.9277, "step": 2541 }, { "epoch": 8.334426229508196, "grad_norm": 10.472065925598145, "learning_rate": 1.985112520946983e-05, "loss": 1.8989, "step": 2542 }, { "epoch": 8.337704918032786, "grad_norm": 8.379401206970215, "learning_rate": 1.9850942604094176e-05, "loss": 1.7993, "step": 2543 }, { "epoch": 8.340983606557376, "grad_norm": 7.965599536895752, "learning_rate": 1.9850759887638898e-05, "loss": 1.8638, "step": 2544 }, { "epoch": 8.344262295081966, "grad_norm": 6.120253086090088, "learning_rate": 1.985057706010606e-05, "loss": 1.6528, "step": 2545 }, { "epoch": 8.347540983606557, "grad_norm": 7.0298051834106445, "learning_rate": 1.9850394121497727e-05, "loss": 1.6909, "step": 2546 }, { "epoch": 8.350819672131147, "grad_norm": 6.195802211761475, "learning_rate": 1.9850211071815958e-05, "loss": 1.8381, "step": 2547 }, { "epoch": 8.354098360655737, "grad_norm": 7.4405364990234375, "learning_rate": 1.9850027911062816e-05, "loss": 1.8037, "step": 2548 }, { "epoch": 8.357377049180329, "grad_norm": 8.042814254760742, "learning_rate": 1.984984463924037e-05, "loss": 1.7573, "step": 2549 }, { "epoch": 8.360655737704919, "grad_norm": 7.074131965637207, "learning_rate": 1.9849661256350683e-05, "loss": 1.7852, "step": 2550 }, { "epoch": 8.363934426229509, "grad_norm": 8.50124740600586, "learning_rate": 1.9849477762395823e-05, "loss": 1.7217, "step": 2551 }, { "epoch": 8.3672131147541, "grad_norm": 7.563394546508789, "learning_rate": 1.9849294157377865e-05, "loss": 1.8042, "step": 2552 }, { "epoch": 8.37049180327869, "grad_norm": 8.466293334960938, "learning_rate": 1.984911044129887e-05, "loss": 1.8325, "step": 2553 }, { "epoch": 8.37377049180328, "grad_norm": 7.3806376457214355, "learning_rate": 1.9848926614160913e-05, "loss": 1.7583, "step": 2554 }, { "epoch": 8.37704918032787, "grad_norm": 6.69853401184082, "learning_rate": 1.984874267596607e-05, "loss": 1.6848, "step": 2555 }, { "epoch": 8.38032786885246, "grad_norm": 6.2837653160095215, "learning_rate": 1.9848558626716415e-05, "loss": 1.7935, "step": 2556 }, { "epoch": 8.38360655737705, "grad_norm": 6.9976325035095215, "learning_rate": 1.984837446641402e-05, "loss": 1.7153, "step": 2557 }, { "epoch": 8.38688524590164, "grad_norm": 5.616353511810303, "learning_rate": 1.9848190195060964e-05, "loss": 1.8657, "step": 2558 }, { "epoch": 8.39016393442623, "grad_norm": 9.410116195678711, "learning_rate": 1.9848005812659324e-05, "loss": 1.7188, "step": 2559 }, { "epoch": 8.39344262295082, "grad_norm": 5.636720180511475, "learning_rate": 1.9847821319211177e-05, "loss": 1.4907, "step": 2560 }, { "epoch": 8.39672131147541, "grad_norm": 8.503274917602539, "learning_rate": 1.9847636714718606e-05, "loss": 2.0249, "step": 2561 }, { "epoch": 8.4, "grad_norm": 7.7815351486206055, "learning_rate": 1.9847451999183692e-05, "loss": 1.8477, "step": 2562 }, { "epoch": 8.40327868852459, "grad_norm": 9.06685733795166, "learning_rate": 1.9847267172608518e-05, "loss": 1.731, "step": 2563 }, { "epoch": 8.40655737704918, "grad_norm": 5.500924587249756, "learning_rate": 1.9847082234995172e-05, "loss": 1.7766, "step": 2564 }, { "epoch": 8.40983606557377, "grad_norm": 9.048395156860352, "learning_rate": 1.9846897186345734e-05, "loss": 1.9038, "step": 2565 }, { "epoch": 8.41311475409836, "grad_norm": 5.7499098777771, "learning_rate": 1.984671202666229e-05, "loss": 1.8193, "step": 2566 }, { "epoch": 8.416393442622951, "grad_norm": 7.726580619812012, "learning_rate": 1.984652675594693e-05, "loss": 1.6982, "step": 2567 }, { "epoch": 8.419672131147541, "grad_norm": 4.95237398147583, "learning_rate": 1.9846341374201743e-05, "loss": 1.7915, "step": 2568 }, { "epoch": 8.422950819672131, "grad_norm": 5.508671283721924, "learning_rate": 1.984615588142882e-05, "loss": 1.6577, "step": 2569 }, { "epoch": 8.426229508196721, "grad_norm": 5.950633525848389, "learning_rate": 1.984597027763025e-05, "loss": 1.7656, "step": 2570 }, { "epoch": 8.429508196721311, "grad_norm": 5.143706798553467, "learning_rate": 1.984578456280813e-05, "loss": 1.8896, "step": 2571 }, { "epoch": 8.432786885245902, "grad_norm": 5.650483131408691, "learning_rate": 1.9845598736964553e-05, "loss": 1.7095, "step": 2572 }, { "epoch": 8.436065573770492, "grad_norm": 12.896806716918945, "learning_rate": 1.984541280010161e-05, "loss": 1.8359, "step": 2573 }, { "epoch": 8.439344262295082, "grad_norm": 5.264476299285889, "learning_rate": 1.9845226752221404e-05, "loss": 1.8174, "step": 2574 }, { "epoch": 8.442622950819672, "grad_norm": 7.180896282196045, "learning_rate": 1.9845040593326027e-05, "loss": 2.0054, "step": 2575 }, { "epoch": 8.445901639344262, "grad_norm": 5.308879375457764, "learning_rate": 1.9844854323417584e-05, "loss": 1.7979, "step": 2576 }, { "epoch": 8.449180327868852, "grad_norm": 6.485811710357666, "learning_rate": 1.984466794249817e-05, "loss": 1.9297, "step": 2577 }, { "epoch": 8.452459016393442, "grad_norm": 5.93226432800293, "learning_rate": 1.9844481450569894e-05, "loss": 1.6072, "step": 2578 }, { "epoch": 8.455737704918032, "grad_norm": 6.031450271606445, "learning_rate": 1.9844294847634848e-05, "loss": 1.7925, "step": 2579 }, { "epoch": 8.459016393442623, "grad_norm": 9.154953002929688, "learning_rate": 1.9844108133695146e-05, "loss": 1.7344, "step": 2580 }, { "epoch": 8.462295081967213, "grad_norm": 5.320608139038086, "learning_rate": 1.9843921308752887e-05, "loss": 2.064, "step": 2581 }, { "epoch": 8.465573770491803, "grad_norm": 5.161849498748779, "learning_rate": 1.984373437281018e-05, "loss": 1.6909, "step": 2582 }, { "epoch": 8.468852459016393, "grad_norm": 5.194894313812256, "learning_rate": 1.9843547325869136e-05, "loss": 1.7717, "step": 2583 }, { "epoch": 8.472131147540983, "grad_norm": 9.045968055725098, "learning_rate": 1.984336016793186e-05, "loss": 1.9009, "step": 2584 }, { "epoch": 8.475409836065573, "grad_norm": 5.98688268661499, "learning_rate": 1.9843172899000462e-05, "loss": 1.6016, "step": 2585 }, { "epoch": 8.478688524590163, "grad_norm": 6.0925374031066895, "learning_rate": 1.9842985519077052e-05, "loss": 1.595, "step": 2586 }, { "epoch": 8.481967213114753, "grad_norm": 7.699572563171387, "learning_rate": 1.984279802816375e-05, "loss": 1.7468, "step": 2587 }, { "epoch": 8.485245901639344, "grad_norm": 5.582244396209717, "learning_rate": 1.984261042626267e-05, "loss": 1.9077, "step": 2588 }, { "epoch": 8.488524590163934, "grad_norm": 23.800609588623047, "learning_rate": 1.984242271337592e-05, "loss": 1.679, "step": 2589 }, { "epoch": 8.491803278688524, "grad_norm": 5.607049465179443, "learning_rate": 1.984223488950562e-05, "loss": 1.8264, "step": 2590 }, { "epoch": 8.495081967213114, "grad_norm": 5.283965587615967, "learning_rate": 1.984204695465389e-05, "loss": 1.8484, "step": 2591 }, { "epoch": 8.498360655737706, "grad_norm": 8.641974449157715, "learning_rate": 1.9841858908822848e-05, "loss": 1.7012, "step": 2592 }, { "epoch": 8.501639344262294, "grad_norm": 13.901087760925293, "learning_rate": 1.984167075201461e-05, "loss": 1.9414, "step": 2593 }, { "epoch": 8.504918032786886, "grad_norm": 6.612516403198242, "learning_rate": 1.9841482484231304e-05, "loss": 1.771, "step": 2594 }, { "epoch": 8.508196721311476, "grad_norm": 7.081305027008057, "learning_rate": 1.984129410547505e-05, "loss": 1.6382, "step": 2595 }, { "epoch": 8.511475409836066, "grad_norm": 10.858263969421387, "learning_rate": 1.9841105615747974e-05, "loss": 1.6624, "step": 2596 }, { "epoch": 8.514754098360656, "grad_norm": 6.10556173324585, "learning_rate": 1.9840917015052197e-05, "loss": 1.8894, "step": 2597 }, { "epoch": 8.518032786885247, "grad_norm": 7.070224761962891, "learning_rate": 1.984072830338985e-05, "loss": 1.6265, "step": 2598 }, { "epoch": 8.521311475409837, "grad_norm": 6.479843616485596, "learning_rate": 1.984053948076306e-05, "loss": 1.6997, "step": 2599 }, { "epoch": 8.524590163934427, "grad_norm": 11.288848876953125, "learning_rate": 1.9840350547173954e-05, "loss": 1.6169, "step": 2600 }, { "epoch": 8.527868852459017, "grad_norm": 8.491297721862793, "learning_rate": 1.9840161502624665e-05, "loss": 1.813, "step": 2601 }, { "epoch": 8.531147540983607, "grad_norm": 10.073111534118652, "learning_rate": 1.9839972347117327e-05, "loss": 1.7979, "step": 2602 }, { "epoch": 8.534426229508197, "grad_norm": 7.3314409255981445, "learning_rate": 1.9839783080654067e-05, "loss": 1.6389, "step": 2603 }, { "epoch": 8.537704918032787, "grad_norm": 7.701419353485107, "learning_rate": 1.9839593703237022e-05, "loss": 1.8066, "step": 2604 }, { "epoch": 8.540983606557377, "grad_norm": 6.857725620269775, "learning_rate": 1.9839404214868328e-05, "loss": 1.9634, "step": 2605 }, { "epoch": 8.544262295081968, "grad_norm": 6.5144453048706055, "learning_rate": 1.983921461555012e-05, "loss": 1.8843, "step": 2606 }, { "epoch": 8.547540983606558, "grad_norm": 5.805550575256348, "learning_rate": 1.9839024905284538e-05, "loss": 1.959, "step": 2607 }, { "epoch": 8.550819672131148, "grad_norm": 7.7505669593811035, "learning_rate": 1.983883508407372e-05, "loss": 1.8672, "step": 2608 }, { "epoch": 8.554098360655738, "grad_norm": 6.1732940673828125, "learning_rate": 1.9838645151919808e-05, "loss": 1.7373, "step": 2609 }, { "epoch": 8.557377049180328, "grad_norm": 8.322806358337402, "learning_rate": 1.983845510882494e-05, "loss": 1.814, "step": 2610 }, { "epoch": 8.560655737704918, "grad_norm": 6.84957218170166, "learning_rate": 1.9838264954791263e-05, "loss": 1.8745, "step": 2611 }, { "epoch": 8.563934426229508, "grad_norm": 6.572484016418457, "learning_rate": 1.9838074689820916e-05, "loss": 1.4434, "step": 2612 }, { "epoch": 8.567213114754098, "grad_norm": 11.179036140441895, "learning_rate": 1.9837884313916053e-05, "loss": 1.7217, "step": 2613 }, { "epoch": 8.570491803278689, "grad_norm": 6.064435958862305, "learning_rate": 1.9837693827078812e-05, "loss": 1.731, "step": 2614 }, { "epoch": 8.573770491803279, "grad_norm": 7.8132429122924805, "learning_rate": 1.9837503229311347e-05, "loss": 1.5256, "step": 2615 }, { "epoch": 8.577049180327869, "grad_norm": 7.924384593963623, "learning_rate": 1.9837312520615798e-05, "loss": 1.8022, "step": 2616 }, { "epoch": 8.580327868852459, "grad_norm": 5.285773754119873, "learning_rate": 1.983712170099433e-05, "loss": 1.7639, "step": 2617 }, { "epoch": 8.583606557377049, "grad_norm": 6.232577800750732, "learning_rate": 1.9836930770449082e-05, "loss": 1.8657, "step": 2618 }, { "epoch": 8.58688524590164, "grad_norm": 6.35076379776001, "learning_rate": 1.9836739728982215e-05, "loss": 1.7227, "step": 2619 }, { "epoch": 8.59016393442623, "grad_norm": 10.101150512695312, "learning_rate": 1.9836548576595876e-05, "loss": 1.8628, "step": 2620 }, { "epoch": 8.59344262295082, "grad_norm": 7.185487270355225, "learning_rate": 1.983635731329223e-05, "loss": 1.7715, "step": 2621 }, { "epoch": 8.59672131147541, "grad_norm": 9.118483543395996, "learning_rate": 1.9836165939073423e-05, "loss": 1.614, "step": 2622 }, { "epoch": 8.6, "grad_norm": 11.445080757141113, "learning_rate": 1.9835974453941623e-05, "loss": 1.6807, "step": 2623 }, { "epoch": 8.60327868852459, "grad_norm": 6.839931964874268, "learning_rate": 1.983578285789898e-05, "loss": 1.7861, "step": 2624 }, { "epoch": 8.60655737704918, "grad_norm": 5.692868232727051, "learning_rate": 1.983559115094766e-05, "loss": 1.7522, "step": 2625 }, { "epoch": 8.60983606557377, "grad_norm": 6.107731819152832, "learning_rate": 1.9835399333089822e-05, "loss": 1.887, "step": 2626 }, { "epoch": 8.61311475409836, "grad_norm": 8.814759254455566, "learning_rate": 1.983520740432763e-05, "loss": 1.7708, "step": 2627 }, { "epoch": 8.61639344262295, "grad_norm": 5.671056270599365, "learning_rate": 1.983501536466325e-05, "loss": 1.8784, "step": 2628 }, { "epoch": 8.61967213114754, "grad_norm": 8.055173873901367, "learning_rate": 1.9834823214098844e-05, "loss": 1.9121, "step": 2629 }, { "epoch": 8.62295081967213, "grad_norm": 6.665326118469238, "learning_rate": 1.9834630952636584e-05, "loss": 1.885, "step": 2630 }, { "epoch": 8.62622950819672, "grad_norm": 6.825931549072266, "learning_rate": 1.983443858027863e-05, "loss": 1.7017, "step": 2631 }, { "epoch": 8.62950819672131, "grad_norm": 5.268820762634277, "learning_rate": 1.983424609702716e-05, "loss": 1.8655, "step": 2632 }, { "epoch": 8.6327868852459, "grad_norm": 7.830772399902344, "learning_rate": 1.9834053502884337e-05, "loss": 1.7085, "step": 2633 }, { "epoch": 8.636065573770491, "grad_norm": 7.839364051818848, "learning_rate": 1.983386079785234e-05, "loss": 1.7124, "step": 2634 }, { "epoch": 8.639344262295083, "grad_norm": 8.38796615600586, "learning_rate": 1.9833667981933335e-05, "loss": 1.6997, "step": 2635 }, { "epoch": 8.642622950819671, "grad_norm": 6.604795455932617, "learning_rate": 1.98334750551295e-05, "loss": 1.8389, "step": 2636 }, { "epoch": 8.645901639344263, "grad_norm": 6.869740009307861, "learning_rate": 1.983328201744301e-05, "loss": 1.7432, "step": 2637 }, { "epoch": 8.649180327868853, "grad_norm": 6.156607627868652, "learning_rate": 1.9833088868876042e-05, "loss": 1.9092, "step": 2638 }, { "epoch": 8.652459016393443, "grad_norm": 6.146914005279541, "learning_rate": 1.983289560943077e-05, "loss": 1.8376, "step": 2639 }, { "epoch": 8.655737704918034, "grad_norm": 6.332187652587891, "learning_rate": 1.9832702239109377e-05, "loss": 1.9424, "step": 2640 }, { "epoch": 8.659016393442624, "grad_norm": 9.743209838867188, "learning_rate": 1.9832508757914045e-05, "loss": 1.5627, "step": 2641 }, { "epoch": 8.662295081967214, "grad_norm": 6.70217752456665, "learning_rate": 1.983231516584695e-05, "loss": 1.6802, "step": 2642 }, { "epoch": 8.665573770491804, "grad_norm": 5.130467891693115, "learning_rate": 1.9832121462910282e-05, "loss": 1.5991, "step": 2643 }, { "epoch": 8.668852459016394, "grad_norm": 5.99024772644043, "learning_rate": 1.983192764910622e-05, "loss": 1.7156, "step": 2644 }, { "epoch": 8.672131147540984, "grad_norm": 7.979022979736328, "learning_rate": 1.983173372443695e-05, "loss": 1.6572, "step": 2645 }, { "epoch": 8.675409836065574, "grad_norm": 6.903846740722656, "learning_rate": 1.983153968890466e-05, "loss": 1.8457, "step": 2646 }, { "epoch": 8.678688524590164, "grad_norm": 5.983467102050781, "learning_rate": 1.9831345542511542e-05, "loss": 1.7263, "step": 2647 }, { "epoch": 8.681967213114755, "grad_norm": 7.301376819610596, "learning_rate": 1.983115128525978e-05, "loss": 1.844, "step": 2648 }, { "epoch": 8.685245901639345, "grad_norm": 6.089489936828613, "learning_rate": 1.983095691715156e-05, "loss": 1.543, "step": 2649 }, { "epoch": 8.688524590163935, "grad_norm": 9.718392372131348, "learning_rate": 1.9830762438189083e-05, "loss": 1.7598, "step": 2650 }, { "epoch": 8.691803278688525, "grad_norm": 11.202136993408203, "learning_rate": 1.9830567848374538e-05, "loss": 1.792, "step": 2651 }, { "epoch": 8.695081967213115, "grad_norm": 8.238272666931152, "learning_rate": 1.9830373147710117e-05, "loss": 1.812, "step": 2652 }, { "epoch": 8.698360655737705, "grad_norm": 5.640195846557617, "learning_rate": 1.983017833619802e-05, "loss": 1.8613, "step": 2653 }, { "epoch": 8.701639344262295, "grad_norm": 7.904164791107178, "learning_rate": 1.9829983413840442e-05, "loss": 1.8345, "step": 2654 }, { "epoch": 8.704918032786885, "grad_norm": 7.313251972198486, "learning_rate": 1.9829788380639576e-05, "loss": 1.8008, "step": 2655 }, { "epoch": 8.708196721311475, "grad_norm": 7.247381687164307, "learning_rate": 1.9829593236597632e-05, "loss": 1.8877, "step": 2656 }, { "epoch": 8.711475409836066, "grad_norm": 7.5160017013549805, "learning_rate": 1.98293979817168e-05, "loss": 1.5176, "step": 2657 }, { "epoch": 8.714754098360656, "grad_norm": 6.753724098205566, "learning_rate": 1.9829202615999285e-05, "loss": 1.6355, "step": 2658 }, { "epoch": 8.718032786885246, "grad_norm": 8.022068977355957, "learning_rate": 1.9829007139447294e-05, "loss": 1.9224, "step": 2659 }, { "epoch": 8.721311475409836, "grad_norm": 90.91688537597656, "learning_rate": 1.9828811552063026e-05, "loss": 1.8989, "step": 2660 }, { "epoch": 8.724590163934426, "grad_norm": 5.67863655090332, "learning_rate": 1.982861585384869e-05, "loss": 1.7241, "step": 2661 }, { "epoch": 8.727868852459016, "grad_norm": 6.470541477203369, "learning_rate": 1.982842004480649e-05, "loss": 1.5867, "step": 2662 }, { "epoch": 8.731147540983606, "grad_norm": 7.287519454956055, "learning_rate": 1.9828224124938634e-05, "loss": 1.9277, "step": 2663 }, { "epoch": 8.734426229508196, "grad_norm": 5.563647747039795, "learning_rate": 1.982802809424733e-05, "loss": 1.6978, "step": 2664 }, { "epoch": 8.737704918032787, "grad_norm": 10.36538028717041, "learning_rate": 1.9827831952734797e-05, "loss": 1.7915, "step": 2665 }, { "epoch": 8.740983606557377, "grad_norm": 6.117889404296875, "learning_rate": 1.9827635700403235e-05, "loss": 1.8589, "step": 2666 }, { "epoch": 8.744262295081967, "grad_norm": 6.573916912078857, "learning_rate": 1.9827439337254865e-05, "loss": 1.5059, "step": 2667 }, { "epoch": 8.747540983606557, "grad_norm": 7.883982181549072, "learning_rate": 1.9827242863291898e-05, "loss": 1.8521, "step": 2668 }, { "epoch": 8.750819672131147, "grad_norm": 10.327600479125977, "learning_rate": 1.982704627851655e-05, "loss": 1.9419, "step": 2669 }, { "epoch": 8.754098360655737, "grad_norm": 7.952382564544678, "learning_rate": 1.9826849582931038e-05, "loss": 1.8267, "step": 2670 }, { "epoch": 8.757377049180327, "grad_norm": 7.749011993408203, "learning_rate": 1.982665277653758e-05, "loss": 1.8103, "step": 2671 }, { "epoch": 8.760655737704917, "grad_norm": 5.757055282592773, "learning_rate": 1.9826455859338392e-05, "loss": 1.7295, "step": 2672 }, { "epoch": 8.763934426229508, "grad_norm": 6.746412754058838, "learning_rate": 1.9826258831335697e-05, "loss": 1.8174, "step": 2673 }, { "epoch": 8.767213114754098, "grad_norm": 5.97812557220459, "learning_rate": 1.982606169253172e-05, "loss": 1.9189, "step": 2674 }, { "epoch": 8.770491803278688, "grad_norm": 6.187286853790283, "learning_rate": 1.982586444292868e-05, "loss": 1.7666, "step": 2675 }, { "epoch": 8.773770491803278, "grad_norm": 7.220809459686279, "learning_rate": 1.98256670825288e-05, "loss": 1.8257, "step": 2676 }, { "epoch": 8.777049180327868, "grad_norm": 11.445677757263184, "learning_rate": 1.982546961133431e-05, "loss": 1.8516, "step": 2677 }, { "epoch": 8.780327868852458, "grad_norm": 9.191093444824219, "learning_rate": 1.9825272029347437e-05, "loss": 1.7651, "step": 2678 }, { "epoch": 8.783606557377048, "grad_norm": 12.060256004333496, "learning_rate": 1.98250743365704e-05, "loss": 1.8635, "step": 2679 }, { "epoch": 8.78688524590164, "grad_norm": 9.701147079467773, "learning_rate": 1.9824876533005438e-05, "loss": 1.9429, "step": 2680 }, { "epoch": 8.790163934426229, "grad_norm": 8.945667266845703, "learning_rate": 1.9824678618654775e-05, "loss": 1.7764, "step": 2681 }, { "epoch": 8.79344262295082, "grad_norm": 5.5519914627075195, "learning_rate": 1.9824480593520646e-05, "loss": 2.0938, "step": 2682 }, { "epoch": 8.79672131147541, "grad_norm": 8.285930633544922, "learning_rate": 1.9824282457605287e-05, "loss": 1.7764, "step": 2683 }, { "epoch": 8.8, "grad_norm": 8.129140853881836, "learning_rate": 1.9824084210910924e-05, "loss": 1.9507, "step": 2684 }, { "epoch": 8.80327868852459, "grad_norm": 6.944197654724121, "learning_rate": 1.98238858534398e-05, "loss": 1.7637, "step": 2685 }, { "epoch": 8.806557377049181, "grad_norm": 7.501062393188477, "learning_rate": 1.9823687385194147e-05, "loss": 1.7041, "step": 2686 }, { "epoch": 8.809836065573771, "grad_norm": 9.951888084411621, "learning_rate": 1.9823488806176206e-05, "loss": 1.6589, "step": 2687 }, { "epoch": 8.813114754098361, "grad_norm": 8.98727798461914, "learning_rate": 1.9823290116388215e-05, "loss": 1.9119, "step": 2688 }, { "epoch": 8.816393442622951, "grad_norm": 11.177579879760742, "learning_rate": 1.9823091315832415e-05, "loss": 1.876, "step": 2689 }, { "epoch": 8.819672131147541, "grad_norm": 5.385763168334961, "learning_rate": 1.9822892404511044e-05, "loss": 1.6626, "step": 2690 }, { "epoch": 8.822950819672132, "grad_norm": 5.866450309753418, "learning_rate": 1.982269338242635e-05, "loss": 1.8369, "step": 2691 }, { "epoch": 8.826229508196722, "grad_norm": 6.238783836364746, "learning_rate": 1.9822494249580578e-05, "loss": 1.6758, "step": 2692 }, { "epoch": 8.829508196721312, "grad_norm": 5.922269344329834, "learning_rate": 1.9822295005975964e-05, "loss": 1.7271, "step": 2693 }, { "epoch": 8.832786885245902, "grad_norm": 7.560097694396973, "learning_rate": 1.9822095651614766e-05, "loss": 1.6809, "step": 2694 }, { "epoch": 8.836065573770492, "grad_norm": 6.580049514770508, "learning_rate": 1.9821896186499226e-05, "loss": 1.6191, "step": 2695 }, { "epoch": 8.839344262295082, "grad_norm": 7.575460433959961, "learning_rate": 1.9821696610631594e-05, "loss": 1.8723, "step": 2696 }, { "epoch": 8.842622950819672, "grad_norm": 5.788534641265869, "learning_rate": 1.982149692401412e-05, "loss": 1.7139, "step": 2697 }, { "epoch": 8.845901639344262, "grad_norm": 9.289546966552734, "learning_rate": 1.9821297126649055e-05, "loss": 1.7705, "step": 2698 }, { "epoch": 8.849180327868853, "grad_norm": 10.658297538757324, "learning_rate": 1.9821097218538655e-05, "loss": 1.9341, "step": 2699 }, { "epoch": 8.852459016393443, "grad_norm": 7.25173282623291, "learning_rate": 1.9820897199685175e-05, "loss": 1.7236, "step": 2700 }, { "epoch": 8.855737704918033, "grad_norm": 14.775500297546387, "learning_rate": 1.9820697070090865e-05, "loss": 1.9001, "step": 2701 }, { "epoch": 8.859016393442623, "grad_norm": 5.265893936157227, "learning_rate": 1.9820496829757985e-05, "loss": 1.7185, "step": 2702 }, { "epoch": 8.862295081967213, "grad_norm": 6.181717872619629, "learning_rate": 1.982029647868879e-05, "loss": 1.5901, "step": 2703 }, { "epoch": 8.865573770491803, "grad_norm": 7.137194633483887, "learning_rate": 1.9820096016885547e-05, "loss": 1.686, "step": 2704 }, { "epoch": 8.868852459016393, "grad_norm": 8.477995872497559, "learning_rate": 1.981989544435051e-05, "loss": 1.6089, "step": 2705 }, { "epoch": 8.872131147540983, "grad_norm": 7.320926189422607, "learning_rate": 1.9819694761085937e-05, "loss": 2.0339, "step": 2706 }, { "epoch": 8.875409836065574, "grad_norm": 6.7083258628845215, "learning_rate": 1.9819493967094097e-05, "loss": 1.7949, "step": 2707 }, { "epoch": 8.878688524590164, "grad_norm": 7.466545104980469, "learning_rate": 1.9819293062377257e-05, "loss": 1.7598, "step": 2708 }, { "epoch": 8.881967213114754, "grad_norm": 7.2207489013671875, "learning_rate": 1.9819092046937676e-05, "loss": 1.5588, "step": 2709 }, { "epoch": 8.885245901639344, "grad_norm": 5.234469890594482, "learning_rate": 1.981889092077762e-05, "loss": 1.7729, "step": 2710 }, { "epoch": 8.888524590163934, "grad_norm": 6.923174858093262, "learning_rate": 1.9818689683899362e-05, "loss": 1.8223, "step": 2711 }, { "epoch": 8.891803278688524, "grad_norm": 7.807013511657715, "learning_rate": 1.981848833630517e-05, "loss": 1.7036, "step": 2712 }, { "epoch": 8.895081967213114, "grad_norm": 8.873449325561523, "learning_rate": 1.9818286877997315e-05, "loss": 1.5105, "step": 2713 }, { "epoch": 8.898360655737704, "grad_norm": 10.260123252868652, "learning_rate": 1.9818085308978064e-05, "loss": 1.897, "step": 2714 }, { "epoch": 8.901639344262295, "grad_norm": 8.438701629638672, "learning_rate": 1.9817883629249693e-05, "loss": 1.6187, "step": 2715 }, { "epoch": 8.904918032786885, "grad_norm": 6.040561676025391, "learning_rate": 1.9817681838814478e-05, "loss": 1.8296, "step": 2716 }, { "epoch": 8.908196721311475, "grad_norm": 7.27100944519043, "learning_rate": 1.9817479937674692e-05, "loss": 1.5381, "step": 2717 }, { "epoch": 8.911475409836065, "grad_norm": 6.002915859222412, "learning_rate": 1.981727792583261e-05, "loss": 1.908, "step": 2718 }, { "epoch": 8.914754098360655, "grad_norm": 8.672459602355957, "learning_rate": 1.9817075803290514e-05, "loss": 1.6797, "step": 2719 }, { "epoch": 8.918032786885245, "grad_norm": 6.4451422691345215, "learning_rate": 1.981687357005068e-05, "loss": 1.5647, "step": 2720 }, { "epoch": 8.921311475409835, "grad_norm": 6.403557777404785, "learning_rate": 1.9816671226115388e-05, "loss": 1.7251, "step": 2721 }, { "epoch": 8.924590163934425, "grad_norm": 8.621606826782227, "learning_rate": 1.9816468771486924e-05, "loss": 1.7593, "step": 2722 }, { "epoch": 8.927868852459017, "grad_norm": 5.514699935913086, "learning_rate": 1.9816266206167568e-05, "loss": 1.8232, "step": 2723 }, { "epoch": 8.931147540983606, "grad_norm": 7.021136283874512, "learning_rate": 1.9816063530159603e-05, "loss": 1.6553, "step": 2724 }, { "epoch": 8.934426229508198, "grad_norm": 6.020381927490234, "learning_rate": 1.9815860743465312e-05, "loss": 1.7305, "step": 2725 }, { "epoch": 8.937704918032788, "grad_norm": 9.408881187438965, "learning_rate": 1.981565784608699e-05, "loss": 1.8018, "step": 2726 }, { "epoch": 8.940983606557378, "grad_norm": 5.601431369781494, "learning_rate": 1.9815454838026918e-05, "loss": 1.9517, "step": 2727 }, { "epoch": 8.944262295081968, "grad_norm": 8.820427894592285, "learning_rate": 1.9815251719287388e-05, "loss": 1.8408, "step": 2728 }, { "epoch": 8.947540983606558, "grad_norm": 7.282981872558594, "learning_rate": 1.981504848987069e-05, "loss": 1.6685, "step": 2729 }, { "epoch": 8.950819672131148, "grad_norm": 6.001633644104004, "learning_rate": 1.9814845149779117e-05, "loss": 1.6191, "step": 2730 }, { "epoch": 8.954098360655738, "grad_norm": 6.559699535369873, "learning_rate": 1.9814641699014957e-05, "loss": 1.9067, "step": 2731 }, { "epoch": 8.957377049180328, "grad_norm": 6.947968482971191, "learning_rate": 1.9814438137580507e-05, "loss": 1.7886, "step": 2732 }, { "epoch": 8.960655737704919, "grad_norm": 10.122620582580566, "learning_rate": 1.9814234465478063e-05, "loss": 1.5242, "step": 2733 }, { "epoch": 8.963934426229509, "grad_norm": 6.3380231857299805, "learning_rate": 1.9814030682709923e-05, "loss": 1.7539, "step": 2734 }, { "epoch": 8.967213114754099, "grad_norm": 34.430198669433594, "learning_rate": 1.981382678927838e-05, "loss": 1.8147, "step": 2735 }, { "epoch": 8.970491803278689, "grad_norm": 5.992140769958496, "learning_rate": 1.981362278518574e-05, "loss": 2.0298, "step": 2736 }, { "epoch": 8.973770491803279, "grad_norm": 11.345464706420898, "learning_rate": 1.9813418670434298e-05, "loss": 1.5374, "step": 2737 }, { "epoch": 8.97704918032787, "grad_norm": 7.030801296234131, "learning_rate": 1.9813214445026357e-05, "loss": 1.6868, "step": 2738 }, { "epoch": 8.98032786885246, "grad_norm": 5.352192401885986, "learning_rate": 1.9813010108964218e-05, "loss": 1.8911, "step": 2739 }, { "epoch": 8.98360655737705, "grad_norm": 8.344388961791992, "learning_rate": 1.981280566225019e-05, "loss": 1.7856, "step": 2740 }, { "epoch": 8.98688524590164, "grad_norm": 6.815877914428711, "learning_rate": 1.9812601104886572e-05, "loss": 1.4592, "step": 2741 }, { "epoch": 8.99016393442623, "grad_norm": 9.29753303527832, "learning_rate": 1.9812396436875677e-05, "loss": 1.9658, "step": 2742 }, { "epoch": 8.99344262295082, "grad_norm": 10.076242446899414, "learning_rate": 1.9812191658219808e-05, "loss": 1.7729, "step": 2743 }, { "epoch": 8.99672131147541, "grad_norm": 6.8552937507629395, "learning_rate": 1.9811986768921278e-05, "loss": 1.8757, "step": 2744 }, { "epoch": 9.0, "grad_norm": 5.885742664337158, "learning_rate": 1.9811781768982392e-05, "loss": 1.5315, "step": 2745 }, { "epoch": 9.00327868852459, "grad_norm": 6.378337860107422, "learning_rate": 1.9811576658405465e-05, "loss": 1.5713, "step": 2746 }, { "epoch": 9.00655737704918, "grad_norm": 9.509010314941406, "learning_rate": 1.9811371437192815e-05, "loss": 1.7244, "step": 2747 }, { "epoch": 9.00983606557377, "grad_norm": 6.112339019775391, "learning_rate": 1.9811166105346746e-05, "loss": 1.7004, "step": 2748 }, { "epoch": 9.01311475409836, "grad_norm": 6.313111305236816, "learning_rate": 1.9810960662869578e-05, "loss": 1.6318, "step": 2749 }, { "epoch": 9.01639344262295, "grad_norm": 9.46192741394043, "learning_rate": 1.981075510976363e-05, "loss": 1.854, "step": 2750 }, { "epoch": 9.01967213114754, "grad_norm": 4.825533390045166, "learning_rate": 1.9810549446031216e-05, "loss": 1.6431, "step": 2751 }, { "epoch": 9.02295081967213, "grad_norm": 6.609074115753174, "learning_rate": 1.9810343671674657e-05, "loss": 1.5483, "step": 2752 }, { "epoch": 9.026229508196721, "grad_norm": 8.198391914367676, "learning_rate": 1.9810137786696273e-05, "loss": 1.4358, "step": 2753 }, { "epoch": 9.029508196721311, "grad_norm": 6.203546047210693, "learning_rate": 1.9809931791098384e-05, "loss": 1.8291, "step": 2754 }, { "epoch": 9.032786885245901, "grad_norm": 6.673219203948975, "learning_rate": 1.9809725684883315e-05, "loss": 1.4338, "step": 2755 }, { "epoch": 9.036065573770491, "grad_norm": 6.005139350891113, "learning_rate": 1.9809519468053394e-05, "loss": 1.606, "step": 2756 }, { "epoch": 9.039344262295081, "grad_norm": 9.03571891784668, "learning_rate": 1.9809313140610938e-05, "loss": 1.488, "step": 2757 }, { "epoch": 9.042622950819672, "grad_norm": 7.374305725097656, "learning_rate": 1.9809106702558277e-05, "loss": 1.6748, "step": 2758 }, { "epoch": 9.045901639344262, "grad_norm": 6.445350170135498, "learning_rate": 1.9808900153897737e-05, "loss": 1.7983, "step": 2759 }, { "epoch": 9.049180327868852, "grad_norm": 8.651741981506348, "learning_rate": 1.980869349463165e-05, "loss": 2.0278, "step": 2760 }, { "epoch": 9.052459016393442, "grad_norm": 5.663569927215576, "learning_rate": 1.980848672476235e-05, "loss": 1.6589, "step": 2761 }, { "epoch": 9.055737704918032, "grad_norm": 7.016798496246338, "learning_rate": 1.9808279844292156e-05, "loss": 1.6641, "step": 2762 }, { "epoch": 9.059016393442622, "grad_norm": 6.2070393562316895, "learning_rate": 1.9808072853223414e-05, "loss": 1.7227, "step": 2763 }, { "epoch": 9.062295081967212, "grad_norm": 11.82571792602539, "learning_rate": 1.980786575155845e-05, "loss": 1.7168, "step": 2764 }, { "epoch": 9.065573770491802, "grad_norm": 6.065202713012695, "learning_rate": 1.9807658539299605e-05, "loss": 1.7246, "step": 2765 }, { "epoch": 9.068852459016393, "grad_norm": 6.9876203536987305, "learning_rate": 1.9807451216449213e-05, "loss": 1.6057, "step": 2766 }, { "epoch": 9.072131147540984, "grad_norm": 5.550516128540039, "learning_rate": 1.980724378300961e-05, "loss": 1.6313, "step": 2767 }, { "epoch": 9.075409836065575, "grad_norm": 6.820189952850342, "learning_rate": 1.9807036238983137e-05, "loss": 1.7554, "step": 2768 }, { "epoch": 9.078688524590165, "grad_norm": 7.6287455558776855, "learning_rate": 1.980682858437213e-05, "loss": 1.7393, "step": 2769 }, { "epoch": 9.081967213114755, "grad_norm": 6.384125232696533, "learning_rate": 1.980662081917894e-05, "loss": 1.752, "step": 2770 }, { "epoch": 9.085245901639345, "grad_norm": 5.674630165100098, "learning_rate": 1.98064129434059e-05, "loss": 1.7036, "step": 2771 }, { "epoch": 9.088524590163935, "grad_norm": 7.919361114501953, "learning_rate": 1.980620495705536e-05, "loss": 1.6113, "step": 2772 }, { "epoch": 9.091803278688525, "grad_norm": 7.350612163543701, "learning_rate": 1.9805996860129658e-05, "loss": 1.7856, "step": 2773 }, { "epoch": 9.095081967213115, "grad_norm": 7.136216640472412, "learning_rate": 1.980578865263115e-05, "loss": 1.6108, "step": 2774 }, { "epoch": 9.098360655737705, "grad_norm": 11.409110069274902, "learning_rate": 1.9805580334562182e-05, "loss": 1.7988, "step": 2775 }, { "epoch": 9.101639344262296, "grad_norm": 5.892784595489502, "learning_rate": 1.9805371905925097e-05, "loss": 1.8364, "step": 2776 }, { "epoch": 9.104918032786886, "grad_norm": 8.120866775512695, "learning_rate": 1.9805163366722247e-05, "loss": 1.8833, "step": 2777 }, { "epoch": 9.108196721311476, "grad_norm": 5.311245918273926, "learning_rate": 1.980495471695599e-05, "loss": 1.6147, "step": 2778 }, { "epoch": 9.111475409836066, "grad_norm": 5.769567012786865, "learning_rate": 1.9804745956628674e-05, "loss": 1.6963, "step": 2779 }, { "epoch": 9.114754098360656, "grad_norm": 6.8525190353393555, "learning_rate": 1.980453708574265e-05, "loss": 1.7427, "step": 2780 }, { "epoch": 9.118032786885246, "grad_norm": 6.300224304199219, "learning_rate": 1.9804328104300275e-05, "loss": 1.5874, "step": 2781 }, { "epoch": 9.121311475409836, "grad_norm": 5.509880065917969, "learning_rate": 1.9804119012303907e-05, "loss": 1.7585, "step": 2782 }, { "epoch": 9.124590163934426, "grad_norm": 8.618725776672363, "learning_rate": 1.9803909809755905e-05, "loss": 1.5159, "step": 2783 }, { "epoch": 9.127868852459017, "grad_norm": 6.662092208862305, "learning_rate": 1.9803700496658627e-05, "loss": 1.7739, "step": 2784 }, { "epoch": 9.131147540983607, "grad_norm": 7.3980712890625, "learning_rate": 1.9803491073014433e-05, "loss": 1.5605, "step": 2785 }, { "epoch": 9.134426229508197, "grad_norm": 6.830699920654297, "learning_rate": 1.9803281538825683e-05, "loss": 1.6851, "step": 2786 }, { "epoch": 9.137704918032787, "grad_norm": 6.994100093841553, "learning_rate": 1.980307189409474e-05, "loss": 1.6245, "step": 2787 }, { "epoch": 9.140983606557377, "grad_norm": 7.179364204406738, "learning_rate": 1.980286213882397e-05, "loss": 1.7395, "step": 2788 }, { "epoch": 9.144262295081967, "grad_norm": 8.238813400268555, "learning_rate": 1.9802652273015735e-05, "loss": 1.6943, "step": 2789 }, { "epoch": 9.147540983606557, "grad_norm": 5.861392021179199, "learning_rate": 1.9802442296672402e-05, "loss": 1.9148, "step": 2790 }, { "epoch": 9.150819672131147, "grad_norm": 6.723029613494873, "learning_rate": 1.9802232209796345e-05, "loss": 1.6179, "step": 2791 }, { "epoch": 9.154098360655738, "grad_norm": 5.922755718231201, "learning_rate": 1.9802022012389925e-05, "loss": 1.5947, "step": 2792 }, { "epoch": 9.157377049180328, "grad_norm": 7.998676300048828, "learning_rate": 1.9801811704455517e-05, "loss": 1.9688, "step": 2793 }, { "epoch": 9.160655737704918, "grad_norm": 7.42930269241333, "learning_rate": 1.980160128599549e-05, "loss": 1.4365, "step": 2794 }, { "epoch": 9.163934426229508, "grad_norm": 5.482763290405273, "learning_rate": 1.9801390757012216e-05, "loss": 1.583, "step": 2795 }, { "epoch": 9.167213114754098, "grad_norm": 6.037469387054443, "learning_rate": 1.9801180117508076e-05, "loss": 1.6646, "step": 2796 }, { "epoch": 9.170491803278688, "grad_norm": 10.07630729675293, "learning_rate": 1.9800969367485435e-05, "loss": 1.6299, "step": 2797 }, { "epoch": 9.173770491803278, "grad_norm": 7.89139986038208, "learning_rate": 1.980075850694667e-05, "loss": 1.6318, "step": 2798 }, { "epoch": 9.177049180327868, "grad_norm": 6.85308837890625, "learning_rate": 1.9800547535894168e-05, "loss": 1.6655, "step": 2799 }, { "epoch": 9.180327868852459, "grad_norm": 5.817371845245361, "learning_rate": 1.98003364543303e-05, "loss": 1.708, "step": 2800 }, { "epoch": 9.183606557377049, "grad_norm": 7.028944492340088, "learning_rate": 1.9800125262257452e-05, "loss": 1.6196, "step": 2801 }, { "epoch": 9.186885245901639, "grad_norm": 13.507756233215332, "learning_rate": 1.9799913959678e-05, "loss": 1.769, "step": 2802 }, { "epoch": 9.190163934426229, "grad_norm": 5.871354579925537, "learning_rate": 1.979970254659433e-05, "loss": 1.678, "step": 2803 }, { "epoch": 9.193442622950819, "grad_norm": 5.795864582061768, "learning_rate": 1.979949102300882e-05, "loss": 1.6628, "step": 2804 }, { "epoch": 9.19672131147541, "grad_norm": 7.300677299499512, "learning_rate": 1.9799279388923866e-05, "loss": 1.6379, "step": 2805 }, { "epoch": 9.2, "grad_norm": 7.3214592933654785, "learning_rate": 1.9799067644341844e-05, "loss": 1.6824, "step": 2806 }, { "epoch": 9.20327868852459, "grad_norm": 7.046416759490967, "learning_rate": 1.979885578926515e-05, "loss": 1.7524, "step": 2807 }, { "epoch": 9.20655737704918, "grad_norm": 4.4946112632751465, "learning_rate": 1.9798643823696164e-05, "loss": 1.6013, "step": 2808 }, { "epoch": 9.20983606557377, "grad_norm": 7.474318027496338, "learning_rate": 1.9798431747637285e-05, "loss": 1.6089, "step": 2809 }, { "epoch": 9.21311475409836, "grad_norm": 10.444482803344727, "learning_rate": 1.9798219561090896e-05, "loss": 1.7026, "step": 2810 }, { "epoch": 9.216393442622952, "grad_norm": 22.31535530090332, "learning_rate": 1.9798007264059396e-05, "loss": 1.5315, "step": 2811 }, { "epoch": 9.219672131147542, "grad_norm": 7.181753158569336, "learning_rate": 1.9797794856545177e-05, "loss": 1.7803, "step": 2812 }, { "epoch": 9.222950819672132, "grad_norm": 7.321519374847412, "learning_rate": 1.9797582338550635e-05, "loss": 1.8306, "step": 2813 }, { "epoch": 9.226229508196722, "grad_norm": 6.66025447845459, "learning_rate": 1.979736971007816e-05, "loss": 1.7632, "step": 2814 }, { "epoch": 9.229508196721312, "grad_norm": 6.274580955505371, "learning_rate": 1.979715697113016e-05, "loss": 1.5562, "step": 2815 }, { "epoch": 9.232786885245902, "grad_norm": 8.277325630187988, "learning_rate": 1.9796944121709026e-05, "loss": 1.5645, "step": 2816 }, { "epoch": 9.236065573770492, "grad_norm": 6.802579402923584, "learning_rate": 1.9796731161817165e-05, "loss": 1.5491, "step": 2817 }, { "epoch": 9.239344262295083, "grad_norm": 6.035388946533203, "learning_rate": 1.9796518091456968e-05, "loss": 1.6533, "step": 2818 }, { "epoch": 9.242622950819673, "grad_norm": 9.725820541381836, "learning_rate": 1.9796304910630847e-05, "loss": 1.8218, "step": 2819 }, { "epoch": 9.245901639344263, "grad_norm": 6.626221656799316, "learning_rate": 1.9796091619341205e-05, "loss": 1.6626, "step": 2820 }, { "epoch": 9.249180327868853, "grad_norm": 9.734156608581543, "learning_rate": 1.979587821759044e-05, "loss": 1.7324, "step": 2821 }, { "epoch": 9.252459016393443, "grad_norm": 20.76857566833496, "learning_rate": 1.9795664705380963e-05, "loss": 1.5706, "step": 2822 }, { "epoch": 9.255737704918033, "grad_norm": 7.546860218048096, "learning_rate": 1.9795451082715186e-05, "loss": 1.4817, "step": 2823 }, { "epoch": 9.259016393442623, "grad_norm": 5.463933944702148, "learning_rate": 1.979523734959551e-05, "loss": 1.8857, "step": 2824 }, { "epoch": 9.262295081967213, "grad_norm": 13.432153701782227, "learning_rate": 1.979502350602435e-05, "loss": 1.7751, "step": 2825 }, { "epoch": 9.265573770491804, "grad_norm": 6.811150550842285, "learning_rate": 1.979480955200411e-05, "loss": 1.6865, "step": 2826 }, { "epoch": 9.268852459016394, "grad_norm": 6.510709762573242, "learning_rate": 1.9794595487537218e-05, "loss": 1.7109, "step": 2827 }, { "epoch": 9.272131147540984, "grad_norm": 5.698838710784912, "learning_rate": 1.979438131262607e-05, "loss": 1.7783, "step": 2828 }, { "epoch": 9.275409836065574, "grad_norm": 9.856182098388672, "learning_rate": 1.9794167027273096e-05, "loss": 1.7085, "step": 2829 }, { "epoch": 9.278688524590164, "grad_norm": 7.066774845123291, "learning_rate": 1.97939526314807e-05, "loss": 1.5652, "step": 2830 }, { "epoch": 9.281967213114754, "grad_norm": 8.228775978088379, "learning_rate": 1.9793738125251305e-05, "loss": 1.7686, "step": 2831 }, { "epoch": 9.285245901639344, "grad_norm": 9.308345794677734, "learning_rate": 1.9793523508587332e-05, "loss": 1.7148, "step": 2832 }, { "epoch": 9.288524590163934, "grad_norm": 4.6628522872924805, "learning_rate": 1.97933087814912e-05, "loss": 1.686, "step": 2833 }, { "epoch": 9.291803278688525, "grad_norm": 6.399942874908447, "learning_rate": 1.9793093943965324e-05, "loss": 1.6519, "step": 2834 }, { "epoch": 9.295081967213115, "grad_norm": 7.084451675415039, "learning_rate": 1.9792878996012136e-05, "loss": 1.7793, "step": 2835 }, { "epoch": 9.298360655737705, "grad_norm": 6.7501349449157715, "learning_rate": 1.979266393763405e-05, "loss": 1.7339, "step": 2836 }, { "epoch": 9.301639344262295, "grad_norm": 6.573880672454834, "learning_rate": 1.9792448768833502e-05, "loss": 1.5852, "step": 2837 }, { "epoch": 9.304918032786885, "grad_norm": 12.866227149963379, "learning_rate": 1.9792233489612908e-05, "loss": 1.7168, "step": 2838 }, { "epoch": 9.308196721311475, "grad_norm": 5.451666355133057, "learning_rate": 1.9792018099974705e-05, "loss": 1.5557, "step": 2839 }, { "epoch": 9.311475409836065, "grad_norm": 6.349833965301514, "learning_rate": 1.9791802599921315e-05, "loss": 1.7153, "step": 2840 }, { "epoch": 9.314754098360655, "grad_norm": 6.742641448974609, "learning_rate": 1.9791586989455168e-05, "loss": 1.6179, "step": 2841 }, { "epoch": 9.318032786885245, "grad_norm": 6.286189079284668, "learning_rate": 1.97913712685787e-05, "loss": 1.6074, "step": 2842 }, { "epoch": 9.321311475409836, "grad_norm": 7.50107479095459, "learning_rate": 1.9791155437294337e-05, "loss": 1.51, "step": 2843 }, { "epoch": 9.324590163934426, "grad_norm": 6.394493579864502, "learning_rate": 1.9790939495604518e-05, "loss": 1.6338, "step": 2844 }, { "epoch": 9.327868852459016, "grad_norm": 6.193881988525391, "learning_rate": 1.9790723443511675e-05, "loss": 1.8511, "step": 2845 }, { "epoch": 9.331147540983606, "grad_norm": 8.496002197265625, "learning_rate": 1.9790507281018246e-05, "loss": 1.5994, "step": 2846 }, { "epoch": 9.334426229508196, "grad_norm": 6.500321388244629, "learning_rate": 1.979029100812667e-05, "loss": 1.6028, "step": 2847 }, { "epoch": 9.337704918032786, "grad_norm": 5.875561237335205, "learning_rate": 1.979007462483938e-05, "loss": 1.6809, "step": 2848 }, { "epoch": 9.340983606557376, "grad_norm": 5.630218029022217, "learning_rate": 1.978985813115882e-05, "loss": 1.5725, "step": 2849 }, { "epoch": 9.344262295081966, "grad_norm": 5.806089401245117, "learning_rate": 1.978964152708743e-05, "loss": 1.7383, "step": 2850 }, { "epoch": 9.347540983606557, "grad_norm": 5.336593151092529, "learning_rate": 1.9789424812627658e-05, "loss": 1.6396, "step": 2851 }, { "epoch": 9.350819672131147, "grad_norm": 7.268382549285889, "learning_rate": 1.978920798778194e-05, "loss": 1.5784, "step": 2852 }, { "epoch": 9.354098360655737, "grad_norm": 5.7416863441467285, "learning_rate": 1.9788991052552724e-05, "loss": 1.7231, "step": 2853 }, { "epoch": 9.357377049180329, "grad_norm": 7.348258018493652, "learning_rate": 1.9788774006942453e-05, "loss": 1.6809, "step": 2854 }, { "epoch": 9.360655737704919, "grad_norm": 5.414692401885986, "learning_rate": 1.978855685095358e-05, "loss": 1.7183, "step": 2855 }, { "epoch": 9.363934426229509, "grad_norm": 6.195948600769043, "learning_rate": 1.978833958458855e-05, "loss": 1.7739, "step": 2856 }, { "epoch": 9.3672131147541, "grad_norm": 5.959105491638184, "learning_rate": 1.9788122207849815e-05, "loss": 1.7363, "step": 2857 }, { "epoch": 9.37049180327869, "grad_norm": 6.778439998626709, "learning_rate": 1.9787904720739825e-05, "loss": 1.6807, "step": 2858 }, { "epoch": 9.37377049180328, "grad_norm": 7.6800856590271, "learning_rate": 1.9787687123261032e-05, "loss": 1.5505, "step": 2859 }, { "epoch": 9.37704918032787, "grad_norm": 6.1894073486328125, "learning_rate": 1.978746941541589e-05, "loss": 1.6926, "step": 2860 }, { "epoch": 9.38032786885246, "grad_norm": 8.63194465637207, "learning_rate": 1.9787251597206853e-05, "loss": 1.8789, "step": 2861 }, { "epoch": 9.38360655737705, "grad_norm": 6.623157024383545, "learning_rate": 1.9787033668636376e-05, "loss": 1.7383, "step": 2862 }, { "epoch": 9.38688524590164, "grad_norm": 5.466366291046143, "learning_rate": 1.9786815629706925e-05, "loss": 1.7422, "step": 2863 }, { "epoch": 9.39016393442623, "grad_norm": 7.5422515869140625, "learning_rate": 1.9786597480420946e-05, "loss": 1.6011, "step": 2864 }, { "epoch": 9.39344262295082, "grad_norm": 6.354266166687012, "learning_rate": 1.978637922078091e-05, "loss": 1.5337, "step": 2865 }, { "epoch": 9.39672131147541, "grad_norm": 5.460873126983643, "learning_rate": 1.978616085078927e-05, "loss": 1.7849, "step": 2866 }, { "epoch": 9.4, "grad_norm": 6.672496795654297, "learning_rate": 1.978594237044849e-05, "loss": 1.6367, "step": 2867 }, { "epoch": 9.40327868852459, "grad_norm": 6.598453044891357, "learning_rate": 1.9785723779761037e-05, "loss": 1.7507, "step": 2868 }, { "epoch": 9.40655737704918, "grad_norm": 9.167969703674316, "learning_rate": 1.9785505078729374e-05, "loss": 1.6963, "step": 2869 }, { "epoch": 9.40983606557377, "grad_norm": 6.745041370391846, "learning_rate": 1.9785286267355962e-05, "loss": 1.6362, "step": 2870 }, { "epoch": 9.41311475409836, "grad_norm": 6.215709686279297, "learning_rate": 1.978506734564328e-05, "loss": 1.707, "step": 2871 }, { "epoch": 9.416393442622951, "grad_norm": 6.412309646606445, "learning_rate": 1.9784848313593788e-05, "loss": 1.7278, "step": 2872 }, { "epoch": 9.419672131147541, "grad_norm": 6.998968124389648, "learning_rate": 1.9784629171209956e-05, "loss": 1.6492, "step": 2873 }, { "epoch": 9.422950819672131, "grad_norm": 6.680891036987305, "learning_rate": 1.9784409918494255e-05, "loss": 1.6367, "step": 2874 }, { "epoch": 9.426229508196721, "grad_norm": 6.956087112426758, "learning_rate": 1.9784190555449162e-05, "loss": 1.7417, "step": 2875 }, { "epoch": 9.429508196721311, "grad_norm": 5.229959011077881, "learning_rate": 1.9783971082077147e-05, "loss": 1.7976, "step": 2876 }, { "epoch": 9.432786885245902, "grad_norm": 9.644671440124512, "learning_rate": 1.9783751498380686e-05, "loss": 1.8291, "step": 2877 }, { "epoch": 9.436065573770492, "grad_norm": 6.536865234375, "learning_rate": 1.9783531804362252e-05, "loss": 1.7974, "step": 2878 }, { "epoch": 9.439344262295082, "grad_norm": 5.645754814147949, "learning_rate": 1.9783312000024327e-05, "loss": 1.6953, "step": 2879 }, { "epoch": 9.442622950819672, "grad_norm": 6.023361682891846, "learning_rate": 1.9783092085369383e-05, "loss": 1.5408, "step": 2880 }, { "epoch": 9.445901639344262, "grad_norm": 7.201044082641602, "learning_rate": 1.978287206039991e-05, "loss": 1.7852, "step": 2881 }, { "epoch": 9.449180327868852, "grad_norm": 5.957468032836914, "learning_rate": 1.978265192511838e-05, "loss": 1.6499, "step": 2882 }, { "epoch": 9.452459016393442, "grad_norm": 10.032848358154297, "learning_rate": 1.9782431679527276e-05, "loss": 1.4753, "step": 2883 }, { "epoch": 9.455737704918032, "grad_norm": 5.385688781738281, "learning_rate": 1.9782211323629085e-05, "loss": 1.874, "step": 2884 }, { "epoch": 9.459016393442623, "grad_norm": 6.385501861572266, "learning_rate": 1.9781990857426293e-05, "loss": 1.6191, "step": 2885 }, { "epoch": 9.462295081967213, "grad_norm": 6.192411422729492, "learning_rate": 1.978177028092138e-05, "loss": 1.7148, "step": 2886 }, { "epoch": 9.465573770491803, "grad_norm": 6.192216873168945, "learning_rate": 1.978154959411684e-05, "loss": 1.3704, "step": 2887 }, { "epoch": 9.468852459016393, "grad_norm": 5.619734764099121, "learning_rate": 1.9781328797015153e-05, "loss": 1.6394, "step": 2888 }, { "epoch": 9.472131147540983, "grad_norm": 6.0687103271484375, "learning_rate": 1.9781107889618818e-05, "loss": 1.6782, "step": 2889 }, { "epoch": 9.475409836065573, "grad_norm": 7.867123126983643, "learning_rate": 1.978088687193032e-05, "loss": 1.5745, "step": 2890 }, { "epoch": 9.478688524590163, "grad_norm": 14.964004516601562, "learning_rate": 1.978066574395215e-05, "loss": 1.645, "step": 2891 }, { "epoch": 9.481967213114753, "grad_norm": 5.736128807067871, "learning_rate": 1.978044450568681e-05, "loss": 1.8628, "step": 2892 }, { "epoch": 9.485245901639344, "grad_norm": 8.208524703979492, "learning_rate": 1.9780223157136786e-05, "loss": 1.6904, "step": 2893 }, { "epoch": 9.488524590163934, "grad_norm": 7.419164657592773, "learning_rate": 1.978000169830458e-05, "loss": 1.7861, "step": 2894 }, { "epoch": 9.491803278688524, "grad_norm": 5.608860492706299, "learning_rate": 1.9779780129192682e-05, "loss": 1.9497, "step": 2895 }, { "epoch": 9.495081967213114, "grad_norm": 6.511112213134766, "learning_rate": 1.9779558449803597e-05, "loss": 1.5579, "step": 2896 }, { "epoch": 9.498360655737706, "grad_norm": 5.651193618774414, "learning_rate": 1.9779336660139824e-05, "loss": 1.7947, "step": 2897 }, { "epoch": 9.501639344262294, "grad_norm": 5.214173316955566, "learning_rate": 1.977911476020386e-05, "loss": 1.7568, "step": 2898 }, { "epoch": 9.504918032786886, "grad_norm": 5.836503982543945, "learning_rate": 1.9778892749998207e-05, "loss": 1.6819, "step": 2899 }, { "epoch": 9.508196721311476, "grad_norm": 7.291835308074951, "learning_rate": 1.9778670629525375e-05, "loss": 1.6729, "step": 2900 }, { "epoch": 9.511475409836066, "grad_norm": 7.551822185516357, "learning_rate": 1.9778448398787863e-05, "loss": 1.6223, "step": 2901 }, { "epoch": 9.514754098360656, "grad_norm": 8.859314918518066, "learning_rate": 1.9778226057788177e-05, "loss": 1.5818, "step": 2902 }, { "epoch": 9.518032786885247, "grad_norm": 8.104180335998535, "learning_rate": 1.9778003606528825e-05, "loss": 2.1274, "step": 2903 }, { "epoch": 9.521311475409837, "grad_norm": 9.047933578491211, "learning_rate": 1.9777781045012317e-05, "loss": 1.7017, "step": 2904 }, { "epoch": 9.524590163934427, "grad_norm": 8.037153244018555, "learning_rate": 1.9777558373241162e-05, "loss": 1.6304, "step": 2905 }, { "epoch": 9.527868852459017, "grad_norm": 5.963575839996338, "learning_rate": 1.977733559121787e-05, "loss": 1.573, "step": 2906 }, { "epoch": 9.531147540983607, "grad_norm": 5.847591876983643, "learning_rate": 1.977711269894495e-05, "loss": 1.7417, "step": 2907 }, { "epoch": 9.534426229508197, "grad_norm": 5.801488399505615, "learning_rate": 1.9776889696424923e-05, "loss": 1.5903, "step": 2908 }, { "epoch": 9.537704918032787, "grad_norm": 5.242936611175537, "learning_rate": 1.97766665836603e-05, "loss": 1.9531, "step": 2909 }, { "epoch": 9.540983606557377, "grad_norm": 7.554896831512451, "learning_rate": 1.9776443360653595e-05, "loss": 1.6743, "step": 2910 }, { "epoch": 9.544262295081968, "grad_norm": 9.002848625183105, "learning_rate": 1.9776220027407323e-05, "loss": 1.9155, "step": 2911 }, { "epoch": 9.547540983606558, "grad_norm": 6.836695194244385, "learning_rate": 1.977599658392401e-05, "loss": 1.8438, "step": 2912 }, { "epoch": 9.550819672131148, "grad_norm": 5.897667407989502, "learning_rate": 1.9775773030206168e-05, "loss": 1.5913, "step": 2913 }, { "epoch": 9.554098360655738, "grad_norm": 7.124727249145508, "learning_rate": 1.977554936625632e-05, "loss": 1.6277, "step": 2914 }, { "epoch": 9.557377049180328, "grad_norm": 5.708855628967285, "learning_rate": 1.977532559207699e-05, "loss": 1.6289, "step": 2915 }, { "epoch": 9.560655737704918, "grad_norm": 7.719854831695557, "learning_rate": 1.97751017076707e-05, "loss": 1.8032, "step": 2916 }, { "epoch": 9.563934426229508, "grad_norm": 8.897143363952637, "learning_rate": 1.977487771303998e-05, "loss": 1.6106, "step": 2917 }, { "epoch": 9.567213114754098, "grad_norm": 8.279589653015137, "learning_rate": 1.9774653608187343e-05, "loss": 1.6504, "step": 2918 }, { "epoch": 9.570491803278689, "grad_norm": 5.54513692855835, "learning_rate": 1.9774429393115327e-05, "loss": 1.5818, "step": 2919 }, { "epoch": 9.573770491803279, "grad_norm": 5.234227657318115, "learning_rate": 1.9774205067826453e-05, "loss": 1.7307, "step": 2920 }, { "epoch": 9.577049180327869, "grad_norm": 9.274821281433105, "learning_rate": 1.9773980632323257e-05, "loss": 1.7739, "step": 2921 }, { "epoch": 9.580327868852459, "grad_norm": 6.820525169372559, "learning_rate": 1.977375608660827e-05, "loss": 1.6797, "step": 2922 }, { "epoch": 9.583606557377049, "grad_norm": 4.682183265686035, "learning_rate": 1.9773531430684017e-05, "loss": 1.9941, "step": 2923 }, { "epoch": 9.58688524590164, "grad_norm": 5.191981792449951, "learning_rate": 1.9773306664553033e-05, "loss": 1.811, "step": 2924 }, { "epoch": 9.59016393442623, "grad_norm": 6.058061122894287, "learning_rate": 1.977308178821786e-05, "loss": 1.5454, "step": 2925 }, { "epoch": 9.59344262295082, "grad_norm": 6.913326263427734, "learning_rate": 1.9772856801681026e-05, "loss": 1.9878, "step": 2926 }, { "epoch": 9.59672131147541, "grad_norm": 5.674253463745117, "learning_rate": 1.977263170494507e-05, "loss": 1.5552, "step": 2927 }, { "epoch": 9.6, "grad_norm": 6.239050388336182, "learning_rate": 1.977240649801253e-05, "loss": 1.6968, "step": 2928 }, { "epoch": 9.60327868852459, "grad_norm": 7.254554271697998, "learning_rate": 1.9772181180885947e-05, "loss": 1.5852, "step": 2929 }, { "epoch": 9.60655737704918, "grad_norm": 5.330039978027344, "learning_rate": 1.9771955753567866e-05, "loss": 1.75, "step": 2930 }, { "epoch": 9.60983606557377, "grad_norm": 5.583973407745361, "learning_rate": 1.9771730216060816e-05, "loss": 1.7522, "step": 2931 }, { "epoch": 9.61311475409836, "grad_norm": 8.116130828857422, "learning_rate": 1.9771504568367347e-05, "loss": 1.5349, "step": 2932 }, { "epoch": 9.61639344262295, "grad_norm": 4.944185256958008, "learning_rate": 1.977127881049001e-05, "loss": 1.6694, "step": 2933 }, { "epoch": 9.61967213114754, "grad_norm": 11.347956657409668, "learning_rate": 1.977105294243134e-05, "loss": 1.5828, "step": 2934 }, { "epoch": 9.62295081967213, "grad_norm": 5.871432781219482, "learning_rate": 1.9770826964193892e-05, "loss": 1.7002, "step": 2935 }, { "epoch": 9.62622950819672, "grad_norm": 7.42799711227417, "learning_rate": 1.977060087578021e-05, "loss": 1.647, "step": 2936 }, { "epoch": 9.62950819672131, "grad_norm": 14.449338912963867, "learning_rate": 1.9770374677192845e-05, "loss": 1.6577, "step": 2937 }, { "epoch": 9.6327868852459, "grad_norm": 8.128450393676758, "learning_rate": 1.9770148368434345e-05, "loss": 1.6892, "step": 2938 }, { "epoch": 9.636065573770491, "grad_norm": 4.924927711486816, "learning_rate": 1.9769921949507264e-05, "loss": 1.6458, "step": 2939 }, { "epoch": 9.639344262295083, "grad_norm": 7.150628089904785, "learning_rate": 1.9769695420414156e-05, "loss": 1.5657, "step": 2940 }, { "epoch": 9.642622950819671, "grad_norm": 6.0972771644592285, "learning_rate": 1.9769468781157576e-05, "loss": 1.7061, "step": 2941 }, { "epoch": 9.645901639344263, "grad_norm": 7.156520366668701, "learning_rate": 1.976924203174007e-05, "loss": 1.812, "step": 2942 }, { "epoch": 9.649180327868853, "grad_norm": 6.892956256866455, "learning_rate": 1.976901517216421e-05, "loss": 1.6921, "step": 2943 }, { "epoch": 9.652459016393443, "grad_norm": 5.731619358062744, "learning_rate": 1.9768788202432546e-05, "loss": 1.5972, "step": 2944 }, { "epoch": 9.655737704918034, "grad_norm": 7.053267478942871, "learning_rate": 1.9768561122547635e-05, "loss": 1.8311, "step": 2945 }, { "epoch": 9.659016393442624, "grad_norm": 5.181550979614258, "learning_rate": 1.976833393251204e-05, "loss": 1.9888, "step": 2946 }, { "epoch": 9.662295081967214, "grad_norm": 6.95704460144043, "learning_rate": 1.976810663232833e-05, "loss": 1.6284, "step": 2947 }, { "epoch": 9.665573770491804, "grad_norm": 8.750046730041504, "learning_rate": 1.976787922199906e-05, "loss": 1.645, "step": 2948 }, { "epoch": 9.668852459016394, "grad_norm": 7.642910957336426, "learning_rate": 1.976765170152679e-05, "loss": 1.8579, "step": 2949 }, { "epoch": 9.672131147540984, "grad_norm": 6.310647010803223, "learning_rate": 1.9767424070914095e-05, "loss": 1.6228, "step": 2950 }, { "epoch": 9.675409836065574, "grad_norm": 6.0418596267700195, "learning_rate": 1.9767196330163538e-05, "loss": 1.7651, "step": 2951 }, { "epoch": 9.678688524590164, "grad_norm": 6.734859466552734, "learning_rate": 1.9766968479277684e-05, "loss": 1.6982, "step": 2952 }, { "epoch": 9.681967213114755, "grad_norm": 6.190521717071533, "learning_rate": 1.976674051825911e-05, "loss": 1.5884, "step": 2953 }, { "epoch": 9.685245901639345, "grad_norm": 5.4875311851501465, "learning_rate": 1.9766512447110377e-05, "loss": 1.5488, "step": 2954 }, { "epoch": 9.688524590163935, "grad_norm": 5.313456058502197, "learning_rate": 1.9766284265834064e-05, "loss": 1.6582, "step": 2955 }, { "epoch": 9.691803278688525, "grad_norm": 6.899127006530762, "learning_rate": 1.976605597443274e-05, "loss": 1.4561, "step": 2956 }, { "epoch": 9.695081967213115, "grad_norm": 6.8157267570495605, "learning_rate": 1.9765827572908983e-05, "loss": 1.5977, "step": 2957 }, { "epoch": 9.698360655737705, "grad_norm": 6.100060939788818, "learning_rate": 1.9765599061265364e-05, "loss": 1.6548, "step": 2958 }, { "epoch": 9.701639344262295, "grad_norm": 5.582887172698975, "learning_rate": 1.9765370439504465e-05, "loss": 1.7344, "step": 2959 }, { "epoch": 9.704918032786885, "grad_norm": 8.025569915771484, "learning_rate": 1.9765141707628858e-05, "loss": 1.5403, "step": 2960 }, { "epoch": 9.708196721311475, "grad_norm": 8.365468978881836, "learning_rate": 1.9764912865641127e-05, "loss": 1.7468, "step": 2961 }, { "epoch": 9.711475409836066, "grad_norm": 7.665487289428711, "learning_rate": 1.976468391354385e-05, "loss": 1.5576, "step": 2962 }, { "epoch": 9.714754098360656, "grad_norm": 8.616581916809082, "learning_rate": 1.9764454851339606e-05, "loss": 1.7095, "step": 2963 }, { "epoch": 9.718032786885246, "grad_norm": 6.068953990936279, "learning_rate": 1.9764225679030987e-05, "loss": 1.6865, "step": 2964 }, { "epoch": 9.721311475409836, "grad_norm": 7.241220951080322, "learning_rate": 1.9763996396620567e-05, "loss": 1.6406, "step": 2965 }, { "epoch": 9.724590163934426, "grad_norm": 6.24204683303833, "learning_rate": 1.9763767004110937e-05, "loss": 2.0532, "step": 2966 }, { "epoch": 9.727868852459016, "grad_norm": 4.933861255645752, "learning_rate": 1.976353750150468e-05, "loss": 1.5718, "step": 2967 }, { "epoch": 9.731147540983606, "grad_norm": 7.888499736785889, "learning_rate": 1.9763307888804387e-05, "loss": 1.5962, "step": 2968 }, { "epoch": 9.734426229508196, "grad_norm": 5.343472003936768, "learning_rate": 1.976307816601265e-05, "loss": 1.5435, "step": 2969 }, { "epoch": 9.737704918032787, "grad_norm": 6.5836029052734375, "learning_rate": 1.9762848333132056e-05, "loss": 1.5847, "step": 2970 }, { "epoch": 9.740983606557377, "grad_norm": 6.980310916900635, "learning_rate": 1.976261839016519e-05, "loss": 1.7207, "step": 2971 }, { "epoch": 9.744262295081967, "grad_norm": 5.610265254974365, "learning_rate": 1.9762388337114654e-05, "loss": 1.3955, "step": 2972 }, { "epoch": 9.747540983606557, "grad_norm": 6.180686950683594, "learning_rate": 1.9762158173983038e-05, "loss": 1.8296, "step": 2973 }, { "epoch": 9.750819672131147, "grad_norm": 6.648536205291748, "learning_rate": 1.976192790077294e-05, "loss": 1.5161, "step": 2974 }, { "epoch": 9.754098360655737, "grad_norm": 6.267494201660156, "learning_rate": 1.9761697517486955e-05, "loss": 1.4817, "step": 2975 }, { "epoch": 9.757377049180327, "grad_norm": 6.231285572052002, "learning_rate": 1.976146702412768e-05, "loss": 1.8882, "step": 2976 }, { "epoch": 9.760655737704917, "grad_norm": 15.886038780212402, "learning_rate": 1.976123642069772e-05, "loss": 1.5894, "step": 2977 }, { "epoch": 9.763934426229508, "grad_norm": 9.810314178466797, "learning_rate": 1.9761005707199662e-05, "loss": 1.5706, "step": 2978 }, { "epoch": 9.767213114754098, "grad_norm": 5.33796501159668, "learning_rate": 1.976077488363612e-05, "loss": 1.7075, "step": 2979 }, { "epoch": 9.770491803278688, "grad_norm": 27.122730255126953, "learning_rate": 1.9760543950009693e-05, "loss": 1.7097, "step": 2980 }, { "epoch": 9.773770491803278, "grad_norm": 6.933470249176025, "learning_rate": 1.9760312906322984e-05, "loss": 1.7041, "step": 2981 }, { "epoch": 9.777049180327868, "grad_norm": 7.028030872344971, "learning_rate": 1.9760081752578598e-05, "loss": 1.6343, "step": 2982 }, { "epoch": 9.780327868852458, "grad_norm": 6.931613922119141, "learning_rate": 1.9759850488779143e-05, "loss": 1.5378, "step": 2983 }, { "epoch": 9.783606557377048, "grad_norm": 7.952151298522949, "learning_rate": 1.9759619114927228e-05, "loss": 1.667, "step": 2984 }, { "epoch": 9.78688524590164, "grad_norm": 5.181184768676758, "learning_rate": 1.9759387631025455e-05, "loss": 1.7312, "step": 2985 }, { "epoch": 9.790163934426229, "grad_norm": 5.442166328430176, "learning_rate": 1.9759156037076443e-05, "loss": 1.5959, "step": 2986 }, { "epoch": 9.79344262295082, "grad_norm": 5.328776836395264, "learning_rate": 1.97589243330828e-05, "loss": 1.8877, "step": 2987 }, { "epoch": 9.79672131147541, "grad_norm": 6.871957778930664, "learning_rate": 1.9758692519047136e-05, "loss": 1.897, "step": 2988 }, { "epoch": 9.8, "grad_norm": 4.979539394378662, "learning_rate": 1.9758460594972068e-05, "loss": 1.731, "step": 2989 }, { "epoch": 9.80327868852459, "grad_norm": 5.91139030456543, "learning_rate": 1.9758228560860213e-05, "loss": 1.6621, "step": 2990 }, { "epoch": 9.806557377049181, "grad_norm": 7.601951599121094, "learning_rate": 1.975799641671418e-05, "loss": 1.5693, "step": 2991 }, { "epoch": 9.809836065573771, "grad_norm": 9.66270923614502, "learning_rate": 1.9757764162536597e-05, "loss": 1.6533, "step": 2992 }, { "epoch": 9.813114754098361, "grad_norm": 7.315945625305176, "learning_rate": 1.9757531798330074e-05, "loss": 1.71, "step": 2993 }, { "epoch": 9.816393442622951, "grad_norm": 5.630218505859375, "learning_rate": 1.9757299324097237e-05, "loss": 1.8542, "step": 2994 }, { "epoch": 9.819672131147541, "grad_norm": 5.616408824920654, "learning_rate": 1.9757066739840705e-05, "loss": 1.5842, "step": 2995 }, { "epoch": 9.822950819672132, "grad_norm": 6.308113098144531, "learning_rate": 1.97568340455631e-05, "loss": 1.6079, "step": 2996 }, { "epoch": 9.826229508196722, "grad_norm": 12.613653182983398, "learning_rate": 1.9756601241267045e-05, "loss": 1.8354, "step": 2997 }, { "epoch": 9.829508196721312, "grad_norm": 8.013218879699707, "learning_rate": 1.975636832695517e-05, "loss": 1.7554, "step": 2998 }, { "epoch": 9.832786885245902, "grad_norm": 6.397397518157959, "learning_rate": 1.9756135302630094e-05, "loss": 1.7771, "step": 2999 }, { "epoch": 9.836065573770492, "grad_norm": 7.860297679901123, "learning_rate": 1.9755902168294453e-05, "loss": 1.5977, "step": 3000 }, { "epoch": 9.839344262295082, "grad_norm": 9.036650657653809, "learning_rate": 1.9755668923950864e-05, "loss": 1.6416, "step": 3001 }, { "epoch": 9.842622950819672, "grad_norm": 10.166845321655273, "learning_rate": 1.9755435569601972e-05, "loss": 1.6357, "step": 3002 }, { "epoch": 9.845901639344262, "grad_norm": 9.559895515441895, "learning_rate": 1.97552021052504e-05, "loss": 1.5957, "step": 3003 }, { "epoch": 9.849180327868853, "grad_norm": 7.209537982940674, "learning_rate": 1.9754968530898776e-05, "loss": 1.6086, "step": 3004 }, { "epoch": 9.852459016393443, "grad_norm": 6.211094856262207, "learning_rate": 1.9754734846549744e-05, "loss": 1.688, "step": 3005 }, { "epoch": 9.855737704918033, "grad_norm": 7.1748456954956055, "learning_rate": 1.9754501052205935e-05, "loss": 1.6675, "step": 3006 }, { "epoch": 9.859016393442623, "grad_norm": 40.12404251098633, "learning_rate": 1.9754267147869982e-05, "loss": 1.6487, "step": 3007 }, { "epoch": 9.862295081967213, "grad_norm": 8.333292961120605, "learning_rate": 1.9754033133544527e-05, "loss": 1.5232, "step": 3008 }, { "epoch": 9.865573770491803, "grad_norm": 6.057692527770996, "learning_rate": 1.9753799009232203e-05, "loss": 1.8848, "step": 3009 }, { "epoch": 9.868852459016393, "grad_norm": 8.55333423614502, "learning_rate": 1.975356477493566e-05, "loss": 1.6045, "step": 3010 }, { "epoch": 9.872131147540983, "grad_norm": 7.496801376342773, "learning_rate": 1.975333043065753e-05, "loss": 1.5005, "step": 3011 }, { "epoch": 9.875409836065574, "grad_norm": 7.870967388153076, "learning_rate": 1.9753095976400457e-05, "loss": 1.7744, "step": 3012 }, { "epoch": 9.878688524590164, "grad_norm": 7.800453186035156, "learning_rate": 1.9752861412167084e-05, "loss": 1.8032, "step": 3013 }, { "epoch": 9.881967213114754, "grad_norm": 6.231133460998535, "learning_rate": 1.975262673796006e-05, "loss": 1.3684, "step": 3014 }, { "epoch": 9.885245901639344, "grad_norm": 6.467755317687988, "learning_rate": 1.9752391953782032e-05, "loss": 1.6284, "step": 3015 }, { "epoch": 9.888524590163934, "grad_norm": 8.378938674926758, "learning_rate": 1.9752157059635645e-05, "loss": 1.6226, "step": 3016 }, { "epoch": 9.891803278688524, "grad_norm": 6.880321025848389, "learning_rate": 1.9751922055523545e-05, "loss": 1.6257, "step": 3017 }, { "epoch": 9.895081967213114, "grad_norm": 5.558854579925537, "learning_rate": 1.9751686941448384e-05, "loss": 1.574, "step": 3018 }, { "epoch": 9.898360655737704, "grad_norm": 7.587809085845947, "learning_rate": 1.9751451717412815e-05, "loss": 1.7285, "step": 3019 }, { "epoch": 9.901639344262295, "grad_norm": 9.696605682373047, "learning_rate": 1.9751216383419488e-05, "loss": 1.613, "step": 3020 }, { "epoch": 9.904918032786885, "grad_norm": 9.2339448928833, "learning_rate": 1.975098093947106e-05, "loss": 1.7417, "step": 3021 }, { "epoch": 9.908196721311475, "grad_norm": 5.79645299911499, "learning_rate": 1.975074538557018e-05, "loss": 1.708, "step": 3022 }, { "epoch": 9.911475409836065, "grad_norm": 6.850959777832031, "learning_rate": 1.975050972171951e-05, "loss": 1.625, "step": 3023 }, { "epoch": 9.914754098360655, "grad_norm": 7.569963455200195, "learning_rate": 1.9750273947921702e-05, "loss": 1.6807, "step": 3024 }, { "epoch": 9.918032786885245, "grad_norm": 5.8374433517456055, "learning_rate": 1.975003806417942e-05, "loss": 1.7198, "step": 3025 }, { "epoch": 9.921311475409835, "grad_norm": 5.374895095825195, "learning_rate": 1.9749802070495324e-05, "loss": 1.6785, "step": 3026 }, { "epoch": 9.924590163934425, "grad_norm": 5.756499767303467, "learning_rate": 1.9749565966872065e-05, "loss": 1.8813, "step": 3027 }, { "epoch": 9.927868852459017, "grad_norm": 7.2700018882751465, "learning_rate": 1.974932975331232e-05, "loss": 1.7695, "step": 3028 }, { "epoch": 9.931147540983606, "grad_norm": 7.459142684936523, "learning_rate": 1.974909342981874e-05, "loss": 1.605, "step": 3029 }, { "epoch": 9.934426229508198, "grad_norm": 8.584403038024902, "learning_rate": 1.9748856996394e-05, "loss": 1.7791, "step": 3030 }, { "epoch": 9.937704918032788, "grad_norm": 7.843600273132324, "learning_rate": 1.9748620453040756e-05, "loss": 1.6719, "step": 3031 }, { "epoch": 9.940983606557378, "grad_norm": 8.41524887084961, "learning_rate": 1.9748383799761684e-05, "loss": 1.707, "step": 3032 }, { "epoch": 9.944262295081968, "grad_norm": 5.526680946350098, "learning_rate": 1.9748147036559446e-05, "loss": 1.5715, "step": 3033 }, { "epoch": 9.947540983606558, "grad_norm": 7.765352725982666, "learning_rate": 1.9747910163436717e-05, "loss": 1.6707, "step": 3034 }, { "epoch": 9.950819672131148, "grad_norm": 6.047243595123291, "learning_rate": 1.9747673180396164e-05, "loss": 1.8181, "step": 3035 }, { "epoch": 9.954098360655738, "grad_norm": 6.379255771636963, "learning_rate": 1.974743608744046e-05, "loss": 1.7009, "step": 3036 }, { "epoch": 9.957377049180328, "grad_norm": 4.822667121887207, "learning_rate": 1.9747198884572283e-05, "loss": 1.7466, "step": 3037 }, { "epoch": 9.960655737704919, "grad_norm": 8.782423973083496, "learning_rate": 1.9746961571794303e-05, "loss": 1.6792, "step": 3038 }, { "epoch": 9.963934426229509, "grad_norm": 7.714511871337891, "learning_rate": 1.9746724149109194e-05, "loss": 1.6582, "step": 3039 }, { "epoch": 9.967213114754099, "grad_norm": 6.8865861892700195, "learning_rate": 1.974648661651964e-05, "loss": 1.7363, "step": 3040 }, { "epoch": 9.970491803278689, "grad_norm": 8.392071723937988, "learning_rate": 1.974624897402831e-05, "loss": 1.5376, "step": 3041 }, { "epoch": 9.973770491803279, "grad_norm": 8.124414443969727, "learning_rate": 1.9746011221637897e-05, "loss": 1.741, "step": 3042 }, { "epoch": 9.97704918032787, "grad_norm": 6.22343635559082, "learning_rate": 1.974577335935107e-05, "loss": 1.6633, "step": 3043 }, { "epoch": 9.98032786885246, "grad_norm": 6.969302654266357, "learning_rate": 1.9745535387170515e-05, "loss": 1.6382, "step": 3044 }, { "epoch": 9.98360655737705, "grad_norm": 5.541525840759277, "learning_rate": 1.9745297305098917e-05, "loss": 1.7534, "step": 3045 }, { "epoch": 9.98688524590164, "grad_norm": 8.453337669372559, "learning_rate": 1.974505911313896e-05, "loss": 1.7466, "step": 3046 }, { "epoch": 9.99016393442623, "grad_norm": 5.837554454803467, "learning_rate": 1.9744820811293323e-05, "loss": 1.7737, "step": 3047 }, { "epoch": 9.99344262295082, "grad_norm": 22.71950340270996, "learning_rate": 1.9744582399564708e-05, "loss": 1.686, "step": 3048 }, { "epoch": 9.99672131147541, "grad_norm": 10.35642147064209, "learning_rate": 1.974434387795579e-05, "loss": 1.7051, "step": 3049 }, { "epoch": 10.0, "grad_norm": 6.252371311187744, "learning_rate": 1.9744105246469264e-05, "loss": 1.4841, "step": 3050 }, { "epoch": 10.00327868852459, "grad_norm": 9.09151554107666, "learning_rate": 1.974386650510782e-05, "loss": 1.5952, "step": 3051 }, { "epoch": 10.00655737704918, "grad_norm": 6.5613694190979, "learning_rate": 1.9743627653874147e-05, "loss": 1.5767, "step": 3052 }, { "epoch": 10.00983606557377, "grad_norm": 6.042884349822998, "learning_rate": 1.9743388692770943e-05, "loss": 1.6279, "step": 3053 }, { "epoch": 10.01311475409836, "grad_norm": 5.780897617340088, "learning_rate": 1.9743149621800905e-05, "loss": 1.8635, "step": 3054 }, { "epoch": 10.01639344262295, "grad_norm": 6.220804691314697, "learning_rate": 1.9742910440966722e-05, "loss": 1.4272, "step": 3055 }, { "epoch": 10.01967213114754, "grad_norm": 6.3201704025268555, "learning_rate": 1.9742671150271093e-05, "loss": 1.4878, "step": 3056 }, { "epoch": 10.02295081967213, "grad_norm": 5.358595371246338, "learning_rate": 1.9742431749716716e-05, "loss": 1.7268, "step": 3057 }, { "epoch": 10.026229508196721, "grad_norm": 5.695865154266357, "learning_rate": 1.9742192239306296e-05, "loss": 1.585, "step": 3058 }, { "epoch": 10.029508196721311, "grad_norm": 5.777499675750732, "learning_rate": 1.9741952619042524e-05, "loss": 1.6367, "step": 3059 }, { "epoch": 10.032786885245901, "grad_norm": 7.126350402832031, "learning_rate": 1.974171288892811e-05, "loss": 1.7332, "step": 3060 }, { "epoch": 10.036065573770491, "grad_norm": 10.08759593963623, "learning_rate": 1.974147304896575e-05, "loss": 1.6221, "step": 3061 }, { "epoch": 10.039344262295081, "grad_norm": 4.83104944229126, "learning_rate": 1.974123309915816e-05, "loss": 1.6262, "step": 3062 }, { "epoch": 10.042622950819672, "grad_norm": 10.572123527526855, "learning_rate": 1.9740993039508033e-05, "loss": 1.5598, "step": 3063 }, { "epoch": 10.045901639344262, "grad_norm": 10.108687400817871, "learning_rate": 1.9740752870018082e-05, "loss": 1.6138, "step": 3064 }, { "epoch": 10.049180327868852, "grad_norm": 5.311934947967529, "learning_rate": 1.9740512590691017e-05, "loss": 1.6455, "step": 3065 }, { "epoch": 10.052459016393442, "grad_norm": 6.72400426864624, "learning_rate": 1.9740272201529543e-05, "loss": 1.4907, "step": 3066 }, { "epoch": 10.055737704918032, "grad_norm": 9.347002029418945, "learning_rate": 1.9740031702536373e-05, "loss": 1.6011, "step": 3067 }, { "epoch": 10.059016393442622, "grad_norm": 11.31977367401123, "learning_rate": 1.9739791093714217e-05, "loss": 1.7617, "step": 3068 }, { "epoch": 10.062295081967212, "grad_norm": 6.684894561767578, "learning_rate": 1.9739550375065796e-05, "loss": 1.5593, "step": 3069 }, { "epoch": 10.065573770491802, "grad_norm": 8.969769477844238, "learning_rate": 1.9739309546593814e-05, "loss": 1.5864, "step": 3070 }, { "epoch": 10.068852459016393, "grad_norm": 5.909511089324951, "learning_rate": 1.9739068608300985e-05, "loss": 1.415, "step": 3071 }, { "epoch": 10.072131147540984, "grad_norm": 5.743276119232178, "learning_rate": 1.9738827560190036e-05, "loss": 1.6538, "step": 3072 }, { "epoch": 10.075409836065575, "grad_norm": 7.574795246124268, "learning_rate": 1.973858640226368e-05, "loss": 1.4995, "step": 3073 }, { "epoch": 10.078688524590165, "grad_norm": 7.256459712982178, "learning_rate": 1.9738345134524638e-05, "loss": 1.5735, "step": 3074 }, { "epoch": 10.081967213114755, "grad_norm": 7.90255880355835, "learning_rate": 1.973810375697563e-05, "loss": 1.553, "step": 3075 }, { "epoch": 10.085245901639345, "grad_norm": 8.38342571258545, "learning_rate": 1.9737862269619375e-05, "loss": 1.6099, "step": 3076 }, { "epoch": 10.088524590163935, "grad_norm": 4.910897731781006, "learning_rate": 1.97376206724586e-05, "loss": 1.5918, "step": 3077 }, { "epoch": 10.091803278688525, "grad_norm": 7.370486259460449, "learning_rate": 1.9737378965496026e-05, "loss": 1.5452, "step": 3078 }, { "epoch": 10.095081967213115, "grad_norm": 6.919717311859131, "learning_rate": 1.973713714873438e-05, "loss": 1.7534, "step": 3079 }, { "epoch": 10.098360655737705, "grad_norm": 7.111212730407715, "learning_rate": 1.9736895222176387e-05, "loss": 1.7507, "step": 3080 }, { "epoch": 10.101639344262296, "grad_norm": 6.373525619506836, "learning_rate": 1.973665318582478e-05, "loss": 1.5408, "step": 3081 }, { "epoch": 10.104918032786886, "grad_norm": 6.449493408203125, "learning_rate": 1.973641103968228e-05, "loss": 1.7493, "step": 3082 }, { "epoch": 10.108196721311476, "grad_norm": 6.151986598968506, "learning_rate": 1.9736168783751622e-05, "loss": 1.675, "step": 3083 }, { "epoch": 10.111475409836066, "grad_norm": 7.028993129730225, "learning_rate": 1.9735926418035543e-05, "loss": 1.5613, "step": 3084 }, { "epoch": 10.114754098360656, "grad_norm": 7.068805694580078, "learning_rate": 1.9735683942536768e-05, "loss": 1.4797, "step": 3085 }, { "epoch": 10.118032786885246, "grad_norm": 7.098053455352783, "learning_rate": 1.9735441357258036e-05, "loss": 1.7893, "step": 3086 }, { "epoch": 10.121311475409836, "grad_norm": 12.886722564697266, "learning_rate": 1.973519866220208e-05, "loss": 1.6025, "step": 3087 }, { "epoch": 10.124590163934426, "grad_norm": 6.462985515594482, "learning_rate": 1.9734955857371635e-05, "loss": 1.4944, "step": 3088 }, { "epoch": 10.127868852459017, "grad_norm": 5.801482677459717, "learning_rate": 1.973471294276944e-05, "loss": 1.3584, "step": 3089 }, { "epoch": 10.131147540983607, "grad_norm": 5.301521301269531, "learning_rate": 1.973446991839824e-05, "loss": 1.5977, "step": 3090 }, { "epoch": 10.134426229508197, "grad_norm": 6.902652740478516, "learning_rate": 1.9734226784260765e-05, "loss": 1.6428, "step": 3091 }, { "epoch": 10.137704918032787, "grad_norm": 6.528826713562012, "learning_rate": 1.9733983540359764e-05, "loss": 1.4932, "step": 3092 }, { "epoch": 10.140983606557377, "grad_norm": 10.091462135314941, "learning_rate": 1.9733740186697978e-05, "loss": 1.7136, "step": 3093 }, { "epoch": 10.144262295081967, "grad_norm": 5.429568767547607, "learning_rate": 1.973349672327815e-05, "loss": 1.7061, "step": 3094 }, { "epoch": 10.147540983606557, "grad_norm": 10.5806884765625, "learning_rate": 1.9733253150103028e-05, "loss": 1.5546, "step": 3095 }, { "epoch": 10.150819672131147, "grad_norm": 5.21464729309082, "learning_rate": 1.9733009467175354e-05, "loss": 1.7146, "step": 3096 }, { "epoch": 10.154098360655738, "grad_norm": 7.122311115264893, "learning_rate": 1.9732765674497882e-05, "loss": 1.5754, "step": 3097 }, { "epoch": 10.157377049180328, "grad_norm": 5.999794006347656, "learning_rate": 1.9732521772073352e-05, "loss": 1.7847, "step": 3098 }, { "epoch": 10.160655737704918, "grad_norm": 7.123395919799805, "learning_rate": 1.9732277759904527e-05, "loss": 1.856, "step": 3099 }, { "epoch": 10.163934426229508, "grad_norm": 6.408200740814209, "learning_rate": 1.9732033637994147e-05, "loss": 1.6216, "step": 3100 }, { "epoch": 10.167213114754098, "grad_norm": 7.684111595153809, "learning_rate": 1.9731789406344967e-05, "loss": 1.6094, "step": 3101 }, { "epoch": 10.170491803278688, "grad_norm": 8.211151123046875, "learning_rate": 1.973154506495975e-05, "loss": 1.6289, "step": 3102 }, { "epoch": 10.173770491803278, "grad_norm": 6.056899070739746, "learning_rate": 1.9731300613841235e-05, "loss": 1.5967, "step": 3103 }, { "epoch": 10.177049180327868, "grad_norm": 5.991641044616699, "learning_rate": 1.973105605299219e-05, "loss": 1.519, "step": 3104 }, { "epoch": 10.180327868852459, "grad_norm": 6.277364253997803, "learning_rate": 1.9730811382415377e-05, "loss": 1.5347, "step": 3105 }, { "epoch": 10.183606557377049, "grad_norm": 6.688467502593994, "learning_rate": 1.973056660211354e-05, "loss": 1.6106, "step": 3106 }, { "epoch": 10.186885245901639, "grad_norm": 9.436463356018066, "learning_rate": 1.9730321712089452e-05, "loss": 1.6416, "step": 3107 }, { "epoch": 10.190163934426229, "grad_norm": 6.2921037673950195, "learning_rate": 1.9730076712345866e-05, "loss": 1.6934, "step": 3108 }, { "epoch": 10.193442622950819, "grad_norm": 7.966132640838623, "learning_rate": 1.9729831602885553e-05, "loss": 1.6316, "step": 3109 }, { "epoch": 10.19672131147541, "grad_norm": 5.446600914001465, "learning_rate": 1.972958638371127e-05, "loss": 1.5303, "step": 3110 }, { "epoch": 10.2, "grad_norm": 6.649691581726074, "learning_rate": 1.9729341054825783e-05, "loss": 1.5876, "step": 3111 }, { "epoch": 10.20327868852459, "grad_norm": 6.621654510498047, "learning_rate": 1.972909561623186e-05, "loss": 1.5208, "step": 3112 }, { "epoch": 10.20655737704918, "grad_norm": 6.759517669677734, "learning_rate": 1.972885006793227e-05, "loss": 1.7964, "step": 3113 }, { "epoch": 10.20983606557377, "grad_norm": 6.633927822113037, "learning_rate": 1.9728604409929778e-05, "loss": 1.5854, "step": 3114 }, { "epoch": 10.21311475409836, "grad_norm": 8.26524543762207, "learning_rate": 1.9728358642227158e-05, "loss": 1.6183, "step": 3115 }, { "epoch": 10.216393442622952, "grad_norm": 6.116519451141357, "learning_rate": 1.9728112764827178e-05, "loss": 1.5945, "step": 3116 }, { "epoch": 10.219672131147542, "grad_norm": 6.338658809661865, "learning_rate": 1.9727866777732613e-05, "loss": 1.6064, "step": 3117 }, { "epoch": 10.222950819672132, "grad_norm": 7.3842644691467285, "learning_rate": 1.9727620680946234e-05, "loss": 1.5493, "step": 3118 }, { "epoch": 10.226229508196722, "grad_norm": 7.9148454666137695, "learning_rate": 1.972737447447082e-05, "loss": 1.5049, "step": 3119 }, { "epoch": 10.229508196721312, "grad_norm": 5.1120524406433105, "learning_rate": 1.9727128158309143e-05, "loss": 1.7866, "step": 3120 }, { "epoch": 10.232786885245902, "grad_norm": 5.920431137084961, "learning_rate": 1.9726881732463983e-05, "loss": 1.5605, "step": 3121 }, { "epoch": 10.236065573770492, "grad_norm": 5.299238204956055, "learning_rate": 1.972663519693812e-05, "loss": 1.5586, "step": 3122 }, { "epoch": 10.239344262295083, "grad_norm": 6.193784236907959, "learning_rate": 1.972638855173433e-05, "loss": 1.5938, "step": 3123 }, { "epoch": 10.242622950819673, "grad_norm": 4.866751670837402, "learning_rate": 1.9726141796855393e-05, "loss": 1.5833, "step": 3124 }, { "epoch": 10.245901639344263, "grad_norm": 5.947341442108154, "learning_rate": 1.9725894932304097e-05, "loss": 1.5374, "step": 3125 }, { "epoch": 10.249180327868853, "grad_norm": 26.426179885864258, "learning_rate": 1.9725647958083224e-05, "loss": 1.5942, "step": 3126 }, { "epoch": 10.252459016393443, "grad_norm": 7.942204475402832, "learning_rate": 1.972540087419556e-05, "loss": 1.4368, "step": 3127 }, { "epoch": 10.255737704918033, "grad_norm": 7.864777088165283, "learning_rate": 1.9725153680643886e-05, "loss": 1.4729, "step": 3128 }, { "epoch": 10.259016393442623, "grad_norm": 5.738066673278809, "learning_rate": 1.9724906377430994e-05, "loss": 1.6494, "step": 3129 }, { "epoch": 10.262295081967213, "grad_norm": 10.043670654296875, "learning_rate": 1.972465896455967e-05, "loss": 1.3892, "step": 3130 }, { "epoch": 10.265573770491804, "grad_norm": 7.142899990081787, "learning_rate": 1.9724411442032706e-05, "loss": 1.5195, "step": 3131 }, { "epoch": 10.268852459016394, "grad_norm": 5.734256744384766, "learning_rate": 1.9724163809852892e-05, "loss": 1.6904, "step": 3132 }, { "epoch": 10.272131147540984, "grad_norm": 6.5127716064453125, "learning_rate": 1.972391606802302e-05, "loss": 1.4077, "step": 3133 }, { "epoch": 10.275409836065574, "grad_norm": 5.077420711517334, "learning_rate": 1.9723668216545885e-05, "loss": 1.5713, "step": 3134 }, { "epoch": 10.278688524590164, "grad_norm": 6.80875825881958, "learning_rate": 1.9723420255424282e-05, "loss": 1.5022, "step": 3135 }, { "epoch": 10.281967213114754, "grad_norm": 7.183831691741943, "learning_rate": 1.9723172184661004e-05, "loss": 1.5524, "step": 3136 }, { "epoch": 10.285245901639344, "grad_norm": 5.0601630210876465, "learning_rate": 1.972292400425885e-05, "loss": 1.8213, "step": 3137 }, { "epoch": 10.288524590163934, "grad_norm": 6.182458877563477, "learning_rate": 1.972267571422062e-05, "loss": 1.6118, "step": 3138 }, { "epoch": 10.291803278688525, "grad_norm": 6.094703674316406, "learning_rate": 1.972242731454911e-05, "loss": 1.4624, "step": 3139 }, { "epoch": 10.295081967213115, "grad_norm": 5.743781566619873, "learning_rate": 1.9722178805247127e-05, "loss": 1.5176, "step": 3140 }, { "epoch": 10.298360655737705, "grad_norm": 6.443289756774902, "learning_rate": 1.9721930186317464e-05, "loss": 1.7742, "step": 3141 }, { "epoch": 10.301639344262295, "grad_norm": 5.547738552093506, "learning_rate": 1.9721681457762933e-05, "loss": 1.5752, "step": 3142 }, { "epoch": 10.304918032786885, "grad_norm": 6.0187458992004395, "learning_rate": 1.9721432619586337e-05, "loss": 1.7307, "step": 3143 }, { "epoch": 10.308196721311475, "grad_norm": 8.904315948486328, "learning_rate": 1.9721183671790477e-05, "loss": 1.4841, "step": 3144 }, { "epoch": 10.311475409836065, "grad_norm": 7.611761093139648, "learning_rate": 1.9720934614378166e-05, "loss": 1.6465, "step": 3145 }, { "epoch": 10.314754098360655, "grad_norm": 6.254424571990967, "learning_rate": 1.972068544735221e-05, "loss": 1.5217, "step": 3146 }, { "epoch": 10.318032786885245, "grad_norm": 5.755247592926025, "learning_rate": 1.9720436170715418e-05, "loss": 1.6248, "step": 3147 }, { "epoch": 10.321311475409836, "grad_norm": 5.965610027313232, "learning_rate": 1.9720186784470602e-05, "loss": 1.458, "step": 3148 }, { "epoch": 10.324590163934426, "grad_norm": 5.612600326538086, "learning_rate": 1.971993728862057e-05, "loss": 1.7988, "step": 3149 }, { "epoch": 10.327868852459016, "grad_norm": 5.119429111480713, "learning_rate": 1.9719687683168143e-05, "loss": 1.5889, "step": 3150 }, { "epoch": 10.331147540983606, "grad_norm": 5.827228546142578, "learning_rate": 1.9719437968116132e-05, "loss": 1.5583, "step": 3151 }, { "epoch": 10.334426229508196, "grad_norm": 6.053336143493652, "learning_rate": 1.9719188143467347e-05, "loss": 1.748, "step": 3152 }, { "epoch": 10.337704918032786, "grad_norm": 7.016112327575684, "learning_rate": 1.9718938209224615e-05, "loss": 1.7227, "step": 3153 }, { "epoch": 10.340983606557376, "grad_norm": 6.382828235626221, "learning_rate": 1.971868816539075e-05, "loss": 1.6692, "step": 3154 }, { "epoch": 10.344262295081966, "grad_norm": 23.729089736938477, "learning_rate": 1.9718438011968567e-05, "loss": 1.5242, "step": 3155 }, { "epoch": 10.347540983606557, "grad_norm": 4.650640964508057, "learning_rate": 1.9718187748960896e-05, "loss": 1.6006, "step": 3156 }, { "epoch": 10.350819672131147, "grad_norm": 8.215032577514648, "learning_rate": 1.971793737637055e-05, "loss": 1.5618, "step": 3157 }, { "epoch": 10.354098360655737, "grad_norm": 6.5078325271606445, "learning_rate": 1.9717686894200354e-05, "loss": 1.5122, "step": 3158 }, { "epoch": 10.357377049180329, "grad_norm": 7.980636119842529, "learning_rate": 1.9717436302453138e-05, "loss": 1.6575, "step": 3159 }, { "epoch": 10.360655737704919, "grad_norm": 5.946633815765381, "learning_rate": 1.9717185601131724e-05, "loss": 1.6013, "step": 3160 }, { "epoch": 10.363934426229509, "grad_norm": 9.610485076904297, "learning_rate": 1.9716934790238938e-05, "loss": 1.5557, "step": 3161 }, { "epoch": 10.3672131147541, "grad_norm": 7.497705459594727, "learning_rate": 1.971668386977761e-05, "loss": 1.8188, "step": 3162 }, { "epoch": 10.37049180327869, "grad_norm": 7.252406597137451, "learning_rate": 1.971643283975057e-05, "loss": 1.7676, "step": 3163 }, { "epoch": 10.37377049180328, "grad_norm": 6.731439590454102, "learning_rate": 1.9716181700160644e-05, "loss": 1.647, "step": 3164 }, { "epoch": 10.37704918032787, "grad_norm": 6.477404594421387, "learning_rate": 1.9715930451010666e-05, "loss": 1.689, "step": 3165 }, { "epoch": 10.38032786885246, "grad_norm": 12.86701774597168, "learning_rate": 1.9715679092303475e-05, "loss": 1.8027, "step": 3166 }, { "epoch": 10.38360655737705, "grad_norm": 7.471220970153809, "learning_rate": 1.9715427624041896e-05, "loss": 1.6348, "step": 3167 }, { "epoch": 10.38688524590164, "grad_norm": 7.112382411956787, "learning_rate": 1.9715176046228773e-05, "loss": 1.5415, "step": 3168 }, { "epoch": 10.39016393442623, "grad_norm": 5.718909740447998, "learning_rate": 1.9714924358866937e-05, "loss": 1.5361, "step": 3169 }, { "epoch": 10.39344262295082, "grad_norm": 6.199763774871826, "learning_rate": 1.9714672561959224e-05, "loss": 1.5129, "step": 3170 }, { "epoch": 10.39672131147541, "grad_norm": 5.560414791107178, "learning_rate": 1.971442065550848e-05, "loss": 1.6992, "step": 3171 }, { "epoch": 10.4, "grad_norm": 7.286962509155273, "learning_rate": 1.9714168639517543e-05, "loss": 1.5146, "step": 3172 }, { "epoch": 10.40327868852459, "grad_norm": 5.838881969451904, "learning_rate": 1.9713916513989257e-05, "loss": 1.8027, "step": 3173 }, { "epoch": 10.40655737704918, "grad_norm": 6.036506652832031, "learning_rate": 1.9713664278926458e-05, "loss": 1.541, "step": 3174 }, { "epoch": 10.40983606557377, "grad_norm": 5.4046711921691895, "learning_rate": 1.9713411934331995e-05, "loss": 1.7373, "step": 3175 }, { "epoch": 10.41311475409836, "grad_norm": 5.411284923553467, "learning_rate": 1.9713159480208718e-05, "loss": 1.5854, "step": 3176 }, { "epoch": 10.416393442622951, "grad_norm": 6.020194053649902, "learning_rate": 1.9712906916559463e-05, "loss": 1.4579, "step": 3177 }, { "epoch": 10.419672131147541, "grad_norm": 5.916782855987549, "learning_rate": 1.9712654243387087e-05, "loss": 1.4668, "step": 3178 }, { "epoch": 10.422950819672131, "grad_norm": 6.367114543914795, "learning_rate": 1.9712401460694438e-05, "loss": 1.4648, "step": 3179 }, { "epoch": 10.426229508196721, "grad_norm": 4.744009971618652, "learning_rate": 1.971214856848436e-05, "loss": 1.7896, "step": 3180 }, { "epoch": 10.429508196721311, "grad_norm": 6.84388542175293, "learning_rate": 1.9711895566759706e-05, "loss": 1.4465, "step": 3181 }, { "epoch": 10.432786885245902, "grad_norm": 6.80755090713501, "learning_rate": 1.9711642455523336e-05, "loss": 1.4377, "step": 3182 }, { "epoch": 10.436065573770492, "grad_norm": 6.287956237792969, "learning_rate": 1.97113892347781e-05, "loss": 1.5671, "step": 3183 }, { "epoch": 10.439344262295082, "grad_norm": 18.05760383605957, "learning_rate": 1.971113590452685e-05, "loss": 1.5918, "step": 3184 }, { "epoch": 10.442622950819672, "grad_norm": 6.902695655822754, "learning_rate": 1.971088246477245e-05, "loss": 1.5034, "step": 3185 }, { "epoch": 10.445901639344262, "grad_norm": 5.735990524291992, "learning_rate": 1.971062891551775e-05, "loss": 1.5105, "step": 3186 }, { "epoch": 10.449180327868852, "grad_norm": 5.479111194610596, "learning_rate": 1.9710375256765616e-05, "loss": 1.7754, "step": 3187 }, { "epoch": 10.452459016393442, "grad_norm": 7.314080715179443, "learning_rate": 1.97101214885189e-05, "loss": 1.7573, "step": 3188 }, { "epoch": 10.455737704918032, "grad_norm": 6.028777599334717, "learning_rate": 1.9709867610780475e-05, "loss": 1.5273, "step": 3189 }, { "epoch": 10.459016393442623, "grad_norm": 6.867299556732178, "learning_rate": 1.970961362355319e-05, "loss": 1.5027, "step": 3190 }, { "epoch": 10.462295081967213, "grad_norm": 6.416689395904541, "learning_rate": 1.9709359526839917e-05, "loss": 1.6255, "step": 3191 }, { "epoch": 10.465573770491803, "grad_norm": 7.194785118103027, "learning_rate": 1.9709105320643524e-05, "loss": 1.5156, "step": 3192 }, { "epoch": 10.468852459016393, "grad_norm": 5.029630184173584, "learning_rate": 1.970885100496687e-05, "loss": 1.738, "step": 3193 }, { "epoch": 10.472131147540983, "grad_norm": 13.872712135314941, "learning_rate": 1.9708596579812828e-05, "loss": 1.5784, "step": 3194 }, { "epoch": 10.475409836065573, "grad_norm": 5.8272624015808105, "learning_rate": 1.970834204518426e-05, "loss": 1.5237, "step": 3195 }, { "epoch": 10.478688524590163, "grad_norm": 6.647655010223389, "learning_rate": 1.970808740108405e-05, "loss": 1.5493, "step": 3196 }, { "epoch": 10.481967213114753, "grad_norm": 6.468630313873291, "learning_rate": 1.9707832647515057e-05, "loss": 1.6365, "step": 3197 }, { "epoch": 10.485245901639344, "grad_norm": 5.00201940536499, "learning_rate": 1.9707577784480155e-05, "loss": 1.6714, "step": 3198 }, { "epoch": 10.488524590163934, "grad_norm": 6.625330924987793, "learning_rate": 1.9707322811982223e-05, "loss": 1.6748, "step": 3199 }, { "epoch": 10.491803278688524, "grad_norm": 8.568720817565918, "learning_rate": 1.970706773002413e-05, "loss": 1.646, "step": 3200 }, { "epoch": 10.495081967213114, "grad_norm": 5.8176798820495605, "learning_rate": 1.9706812538608758e-05, "loss": 1.6021, "step": 3201 }, { "epoch": 10.498360655737706, "grad_norm": 8.071352005004883, "learning_rate": 1.9706557237738985e-05, "loss": 1.5129, "step": 3202 }, { "epoch": 10.501639344262294, "grad_norm": 7.3855390548706055, "learning_rate": 1.970630182741768e-05, "loss": 1.5979, "step": 3203 }, { "epoch": 10.504918032786886, "grad_norm": 5.549481391906738, "learning_rate": 1.9706046307647737e-05, "loss": 1.5527, "step": 3204 }, { "epoch": 10.508196721311476, "grad_norm": 6.702624320983887, "learning_rate": 1.9705790678432025e-05, "loss": 1.5488, "step": 3205 }, { "epoch": 10.511475409836066, "grad_norm": 58.05264663696289, "learning_rate": 1.9705534939773435e-05, "loss": 1.4753, "step": 3206 }, { "epoch": 10.514754098360656, "grad_norm": 6.300098419189453, "learning_rate": 1.9705279091674842e-05, "loss": 2.0176, "step": 3207 }, { "epoch": 10.518032786885247, "grad_norm": 7.811896324157715, "learning_rate": 1.9705023134139144e-05, "loss": 1.5918, "step": 3208 }, { "epoch": 10.521311475409837, "grad_norm": 5.4656758308410645, "learning_rate": 1.9704767067169212e-05, "loss": 1.4771, "step": 3209 }, { "epoch": 10.524590163934427, "grad_norm": 8.17127513885498, "learning_rate": 1.9704510890767947e-05, "loss": 1.5051, "step": 3210 }, { "epoch": 10.527868852459017, "grad_norm": 7.870996952056885, "learning_rate": 1.9704254604938227e-05, "loss": 1.4927, "step": 3211 }, { "epoch": 10.531147540983607, "grad_norm": 8.101133346557617, "learning_rate": 1.970399820968295e-05, "loss": 1.428, "step": 3212 }, { "epoch": 10.534426229508197, "grad_norm": 7.394786357879639, "learning_rate": 1.9703741705004998e-05, "loss": 1.7563, "step": 3213 }, { "epoch": 10.537704918032787, "grad_norm": 7.5224175453186035, "learning_rate": 1.9703485090907277e-05, "loss": 1.5505, "step": 3214 }, { "epoch": 10.540983606557377, "grad_norm": 6.368837833404541, "learning_rate": 1.9703228367392665e-05, "loss": 1.71, "step": 3215 }, { "epoch": 10.544262295081968, "grad_norm": 14.014622688293457, "learning_rate": 1.970297153446407e-05, "loss": 1.6484, "step": 3216 }, { "epoch": 10.547540983606558, "grad_norm": 8.883369445800781, "learning_rate": 1.9702714592124377e-05, "loss": 1.5491, "step": 3217 }, { "epoch": 10.550819672131148, "grad_norm": 7.418088912963867, "learning_rate": 1.9702457540376492e-05, "loss": 1.6343, "step": 3218 }, { "epoch": 10.554098360655738, "grad_norm": 9.398571014404297, "learning_rate": 1.970220037922331e-05, "loss": 1.9531, "step": 3219 }, { "epoch": 10.557377049180328, "grad_norm": 7.673293113708496, "learning_rate": 1.970194310866773e-05, "loss": 1.7432, "step": 3220 }, { "epoch": 10.560655737704918, "grad_norm": 8.108139038085938, "learning_rate": 1.9701685728712653e-05, "loss": 1.6963, "step": 3221 }, { "epoch": 10.563934426229508, "grad_norm": 10.08395004272461, "learning_rate": 1.9701428239360988e-05, "loss": 1.6533, "step": 3222 }, { "epoch": 10.567213114754098, "grad_norm": 7.790337562561035, "learning_rate": 1.9701170640615624e-05, "loss": 1.6047, "step": 3223 }, { "epoch": 10.570491803278689, "grad_norm": 31.984861373901367, "learning_rate": 1.9700912932479482e-05, "loss": 1.7004, "step": 3224 }, { "epoch": 10.573770491803279, "grad_norm": 7.225538730621338, "learning_rate": 1.9700655114955455e-05, "loss": 1.6261, "step": 3225 }, { "epoch": 10.577049180327869, "grad_norm": 5.625460624694824, "learning_rate": 1.9700397188046458e-05, "loss": 1.5874, "step": 3226 }, { "epoch": 10.580327868852459, "grad_norm": 6.251519680023193, "learning_rate": 1.9700139151755397e-05, "loss": 1.7148, "step": 3227 }, { "epoch": 10.583606557377049, "grad_norm": 7.114630222320557, "learning_rate": 1.969988100608518e-05, "loss": 1.7644, "step": 3228 }, { "epoch": 10.58688524590164, "grad_norm": 10.948811531066895, "learning_rate": 1.969962275103872e-05, "loss": 1.8462, "step": 3229 }, { "epoch": 10.59016393442623, "grad_norm": 6.49611234664917, "learning_rate": 1.969936438661893e-05, "loss": 1.3604, "step": 3230 }, { "epoch": 10.59344262295082, "grad_norm": 5.7091193199157715, "learning_rate": 1.969910591282872e-05, "loss": 1.7754, "step": 3231 }, { "epoch": 10.59672131147541, "grad_norm": 13.82352352142334, "learning_rate": 1.9698847329671004e-05, "loss": 1.7649, "step": 3232 }, { "epoch": 10.6, "grad_norm": 7.856581211090088, "learning_rate": 1.9698588637148705e-05, "loss": 1.5552, "step": 3233 }, { "epoch": 10.60327868852459, "grad_norm": 10.00484848022461, "learning_rate": 1.9698329835264732e-05, "loss": 1.6807, "step": 3234 }, { "epoch": 10.60655737704918, "grad_norm": 6.956050872802734, "learning_rate": 1.969807092402201e-05, "loss": 1.7778, "step": 3235 }, { "epoch": 10.60983606557377, "grad_norm": 7.548799991607666, "learning_rate": 1.969781190342345e-05, "loss": 1.5977, "step": 3236 }, { "epoch": 10.61311475409836, "grad_norm": 6.890161514282227, "learning_rate": 1.969755277347198e-05, "loss": 1.731, "step": 3237 }, { "epoch": 10.61639344262295, "grad_norm": 6.261634349822998, "learning_rate": 1.969729353417052e-05, "loss": 1.7114, "step": 3238 }, { "epoch": 10.61967213114754, "grad_norm": 6.1631083488464355, "learning_rate": 1.9697034185521992e-05, "loss": 1.6079, "step": 3239 }, { "epoch": 10.62295081967213, "grad_norm": 5.566563606262207, "learning_rate": 1.969677472752932e-05, "loss": 1.8977, "step": 3240 }, { "epoch": 10.62622950819672, "grad_norm": 8.446002960205078, "learning_rate": 1.969651516019543e-05, "loss": 1.7019, "step": 3241 }, { "epoch": 10.62950819672131, "grad_norm": 7.187227249145508, "learning_rate": 1.9696255483523252e-05, "loss": 1.9575, "step": 3242 }, { "epoch": 10.6327868852459, "grad_norm": 15.879354476928711, "learning_rate": 1.969599569751571e-05, "loss": 1.7012, "step": 3243 }, { "epoch": 10.636065573770491, "grad_norm": 5.949007511138916, "learning_rate": 1.9695735802175737e-05, "loss": 1.7041, "step": 3244 }, { "epoch": 10.639344262295083, "grad_norm": 7.234302997589111, "learning_rate": 1.9695475797506263e-05, "loss": 1.542, "step": 3245 }, { "epoch": 10.642622950819671, "grad_norm": 5.7141289710998535, "learning_rate": 1.969521568351022e-05, "loss": 1.8779, "step": 3246 }, { "epoch": 10.645901639344263, "grad_norm": 7.584816932678223, "learning_rate": 1.9694955460190534e-05, "loss": 1.7842, "step": 3247 }, { "epoch": 10.649180327868853, "grad_norm": 5.838107109069824, "learning_rate": 1.9694695127550147e-05, "loss": 1.6025, "step": 3248 }, { "epoch": 10.652459016393443, "grad_norm": 6.414444446563721, "learning_rate": 1.9694434685591993e-05, "loss": 1.7075, "step": 3249 }, { "epoch": 10.655737704918034, "grad_norm": 5.470775127410889, "learning_rate": 1.969417413431901e-05, "loss": 1.5452, "step": 3250 }, { "epoch": 10.659016393442624, "grad_norm": 5.474311351776123, "learning_rate": 1.9693913473734133e-05, "loss": 1.6357, "step": 3251 }, { "epoch": 10.662295081967214, "grad_norm": 5.184743881225586, "learning_rate": 1.96936527038403e-05, "loss": 1.5427, "step": 3252 }, { "epoch": 10.665573770491804, "grad_norm": 5.798811912536621, "learning_rate": 1.9693391824640455e-05, "loss": 1.6228, "step": 3253 }, { "epoch": 10.668852459016394, "grad_norm": 5.928319931030273, "learning_rate": 1.969313083613754e-05, "loss": 1.6543, "step": 3254 }, { "epoch": 10.672131147540984, "grad_norm": 7.669551849365234, "learning_rate": 1.9692869738334498e-05, "loss": 1.4751, "step": 3255 }, { "epoch": 10.675409836065574, "grad_norm": 4.4816999435424805, "learning_rate": 1.969260853123427e-05, "loss": 1.6411, "step": 3256 }, { "epoch": 10.678688524590164, "grad_norm": 8.33580207824707, "learning_rate": 1.96923472148398e-05, "loss": 1.4585, "step": 3257 }, { "epoch": 10.681967213114755, "grad_norm": 6.302737712860107, "learning_rate": 1.9692085789154044e-05, "loss": 1.5422, "step": 3258 }, { "epoch": 10.685245901639345, "grad_norm": 5.75726842880249, "learning_rate": 1.9691824254179936e-05, "loss": 1.7729, "step": 3259 }, { "epoch": 10.688524590163935, "grad_norm": 6.310609817504883, "learning_rate": 1.9691562609920435e-05, "loss": 1.5361, "step": 3260 }, { "epoch": 10.691803278688525, "grad_norm": 5.262107849121094, "learning_rate": 1.969130085637849e-05, "loss": 1.5476, "step": 3261 }, { "epoch": 10.695081967213115, "grad_norm": 8.382882118225098, "learning_rate": 1.9691038993557056e-05, "loss": 1.7295, "step": 3262 }, { "epoch": 10.698360655737705, "grad_norm": 19.890472412109375, "learning_rate": 1.9690777021459077e-05, "loss": 1.4514, "step": 3263 }, { "epoch": 10.701639344262295, "grad_norm": 6.451564788818359, "learning_rate": 1.9690514940087508e-05, "loss": 1.4441, "step": 3264 }, { "epoch": 10.704918032786885, "grad_norm": 6.087888717651367, "learning_rate": 1.969025274944531e-05, "loss": 1.8667, "step": 3265 }, { "epoch": 10.708196721311475, "grad_norm": 7.182992458343506, "learning_rate": 1.9689990449535437e-05, "loss": 1.5352, "step": 3266 }, { "epoch": 10.711475409836066, "grad_norm": 5.459536552429199, "learning_rate": 1.9689728040360848e-05, "loss": 1.6741, "step": 3267 }, { "epoch": 10.714754098360656, "grad_norm": 6.258115291595459, "learning_rate": 1.96894655219245e-05, "loss": 1.6709, "step": 3268 }, { "epoch": 10.718032786885246, "grad_norm": 5.542584419250488, "learning_rate": 1.9689202894229352e-05, "loss": 1.4795, "step": 3269 }, { "epoch": 10.721311475409836, "grad_norm": 6.066209316253662, "learning_rate": 1.9688940157278372e-05, "loss": 1.6245, "step": 3270 }, { "epoch": 10.724590163934426, "grad_norm": 5.760073184967041, "learning_rate": 1.968867731107451e-05, "loss": 1.5437, "step": 3271 }, { "epoch": 10.727868852459016, "grad_norm": 6.515119552612305, "learning_rate": 1.9688414355620743e-05, "loss": 1.4697, "step": 3272 }, { "epoch": 10.731147540983606, "grad_norm": 6.235326290130615, "learning_rate": 1.968815129092003e-05, "loss": 1.8259, "step": 3273 }, { "epoch": 10.734426229508196, "grad_norm": 5.557016849517822, "learning_rate": 1.9687888116975337e-05, "loss": 1.5718, "step": 3274 }, { "epoch": 10.737704918032787, "grad_norm": 6.120484828948975, "learning_rate": 1.9687624833789635e-05, "loss": 1.6694, "step": 3275 }, { "epoch": 10.740983606557377, "grad_norm": 6.438638210296631, "learning_rate": 1.9687361441365888e-05, "loss": 1.5771, "step": 3276 }, { "epoch": 10.744262295081967, "grad_norm": 13.40726089477539, "learning_rate": 1.9687097939707068e-05, "loss": 1.7058, "step": 3277 }, { "epoch": 10.747540983606557, "grad_norm": 7.686384201049805, "learning_rate": 1.9686834328816146e-05, "loss": 1.4951, "step": 3278 }, { "epoch": 10.750819672131147, "grad_norm": 7.137314319610596, "learning_rate": 1.9686570608696097e-05, "loss": 1.6245, "step": 3279 }, { "epoch": 10.754098360655737, "grad_norm": 5.738710880279541, "learning_rate": 1.9686306779349897e-05, "loss": 1.4324, "step": 3280 }, { "epoch": 10.757377049180327, "grad_norm": 4.961010456085205, "learning_rate": 1.968604284078051e-05, "loss": 1.6353, "step": 3281 }, { "epoch": 10.760655737704917, "grad_norm": 5.427943229675293, "learning_rate": 1.968577879299092e-05, "loss": 1.7305, "step": 3282 }, { "epoch": 10.763934426229508, "grad_norm": 5.189414024353027, "learning_rate": 1.9685514635984105e-05, "loss": 1.5881, "step": 3283 }, { "epoch": 10.767213114754098, "grad_norm": 11.377198219299316, "learning_rate": 1.9685250369763044e-05, "loss": 1.4473, "step": 3284 }, { "epoch": 10.770491803278688, "grad_norm": 7.2406768798828125, "learning_rate": 1.968498599433071e-05, "loss": 1.4639, "step": 3285 }, { "epoch": 10.773770491803278, "grad_norm": 4.697203636169434, "learning_rate": 1.9684721509690094e-05, "loss": 1.517, "step": 3286 }, { "epoch": 10.777049180327868, "grad_norm": 6.064977645874023, "learning_rate": 1.9684456915844173e-05, "loss": 1.5193, "step": 3287 }, { "epoch": 10.780327868852458, "grad_norm": 7.218037128448486, "learning_rate": 1.968419221279593e-05, "loss": 1.3354, "step": 3288 }, { "epoch": 10.783606557377048, "grad_norm": 6.2235541343688965, "learning_rate": 1.968392740054835e-05, "loss": 1.5574, "step": 3289 }, { "epoch": 10.78688524590164, "grad_norm": 6.690789222717285, "learning_rate": 1.968366247910442e-05, "loss": 1.6877, "step": 3290 }, { "epoch": 10.790163934426229, "grad_norm": 5.848775386810303, "learning_rate": 1.9683397448467124e-05, "loss": 1.592, "step": 3291 }, { "epoch": 10.79344262295082, "grad_norm": 5.824182510375977, "learning_rate": 1.9683132308639455e-05, "loss": 1.6616, "step": 3292 }, { "epoch": 10.79672131147541, "grad_norm": 4.832424640655518, "learning_rate": 1.9682867059624405e-05, "loss": 1.4873, "step": 3293 }, { "epoch": 10.8, "grad_norm": 6.578055381774902, "learning_rate": 1.9682601701424958e-05, "loss": 1.527, "step": 3294 }, { "epoch": 10.80327868852459, "grad_norm": 4.96098518371582, "learning_rate": 1.9682336234044112e-05, "loss": 1.5476, "step": 3295 }, { "epoch": 10.806557377049181, "grad_norm": 5.7812418937683105, "learning_rate": 1.9682070657484857e-05, "loss": 1.6731, "step": 3296 }, { "epoch": 10.809836065573771, "grad_norm": 6.6179728507995605, "learning_rate": 1.9681804971750186e-05, "loss": 1.8398, "step": 3297 }, { "epoch": 10.813114754098361, "grad_norm": 5.051854610443115, "learning_rate": 1.96815391768431e-05, "loss": 1.7874, "step": 3298 }, { "epoch": 10.816393442622951, "grad_norm": 6.372093677520752, "learning_rate": 1.968127327276659e-05, "loss": 1.5061, "step": 3299 }, { "epoch": 10.819672131147541, "grad_norm": 6.361581325531006, "learning_rate": 1.9681007259523664e-05, "loss": 1.6445, "step": 3300 }, { "epoch": 10.822950819672132, "grad_norm": 6.763793468475342, "learning_rate": 1.9680741137117312e-05, "loss": 1.502, "step": 3301 }, { "epoch": 10.826229508196722, "grad_norm": 9.72679328918457, "learning_rate": 1.9680474905550538e-05, "loss": 1.606, "step": 3302 }, { "epoch": 10.829508196721312, "grad_norm": 6.248546123504639, "learning_rate": 1.9680208564826344e-05, "loss": 1.6389, "step": 3303 }, { "epoch": 10.832786885245902, "grad_norm": 5.079258918762207, "learning_rate": 1.9679942114947734e-05, "loss": 1.6262, "step": 3304 }, { "epoch": 10.836065573770492, "grad_norm": 6.248411655426025, "learning_rate": 1.9679675555917714e-05, "loss": 1.6121, "step": 3305 }, { "epoch": 10.839344262295082, "grad_norm": 6.907846927642822, "learning_rate": 1.9679408887739282e-05, "loss": 1.6646, "step": 3306 }, { "epoch": 10.842622950819672, "grad_norm": 7.788307189941406, "learning_rate": 1.9679142110415455e-05, "loss": 1.7336, "step": 3307 }, { "epoch": 10.845901639344262, "grad_norm": 13.099963188171387, "learning_rate": 1.9678875223949237e-05, "loss": 1.5984, "step": 3308 }, { "epoch": 10.849180327868853, "grad_norm": 5.716592788696289, "learning_rate": 1.967860822834364e-05, "loss": 1.6936, "step": 3309 }, { "epoch": 10.852459016393443, "grad_norm": 5.7552313804626465, "learning_rate": 1.9678341123601666e-05, "loss": 1.644, "step": 3310 }, { "epoch": 10.855737704918033, "grad_norm": 7.517025947570801, "learning_rate": 1.9678073909726335e-05, "loss": 1.5078, "step": 3311 }, { "epoch": 10.859016393442623, "grad_norm": 6.44614315032959, "learning_rate": 1.9677806586720664e-05, "loss": 1.4438, "step": 3312 }, { "epoch": 10.862295081967213, "grad_norm": 5.137192249298096, "learning_rate": 1.9677539154587656e-05, "loss": 1.5444, "step": 3313 }, { "epoch": 10.865573770491803, "grad_norm": 10.278777122497559, "learning_rate": 1.967727161333033e-05, "loss": 1.3779, "step": 3314 }, { "epoch": 10.868852459016393, "grad_norm": 4.927164554595947, "learning_rate": 1.9677003962951706e-05, "loss": 1.6929, "step": 3315 }, { "epoch": 10.872131147540983, "grad_norm": 5.331173896789551, "learning_rate": 1.96767362034548e-05, "loss": 1.6748, "step": 3316 }, { "epoch": 10.875409836065574, "grad_norm": 5.2380900382995605, "learning_rate": 1.9676468334842637e-05, "loss": 1.47, "step": 3317 }, { "epoch": 10.878688524590164, "grad_norm": 5.412656784057617, "learning_rate": 1.9676200357118228e-05, "loss": 1.5903, "step": 3318 }, { "epoch": 10.881967213114754, "grad_norm": 7.4760565757751465, "learning_rate": 1.96759322702846e-05, "loss": 1.6255, "step": 3319 }, { "epoch": 10.885245901639344, "grad_norm": 5.300163269042969, "learning_rate": 1.9675664074344777e-05, "loss": 1.6572, "step": 3320 }, { "epoch": 10.888524590163934, "grad_norm": 6.245927333831787, "learning_rate": 1.9675395769301778e-05, "loss": 1.7883, "step": 3321 }, { "epoch": 10.891803278688524, "grad_norm": 7.941709518432617, "learning_rate": 1.9675127355158632e-05, "loss": 1.4778, "step": 3322 }, { "epoch": 10.895081967213114, "grad_norm": 33.911712646484375, "learning_rate": 1.9674858831918368e-05, "loss": 1.7107, "step": 3323 }, { "epoch": 10.898360655737704, "grad_norm": 9.31186294555664, "learning_rate": 1.967459019958401e-05, "loss": 1.8086, "step": 3324 }, { "epoch": 10.901639344262295, "grad_norm": 6.2590532302856445, "learning_rate": 1.967432145815859e-05, "loss": 1.5742, "step": 3325 }, { "epoch": 10.904918032786885, "grad_norm": 6.4504804611206055, "learning_rate": 1.9674052607645137e-05, "loss": 1.5012, "step": 3326 }, { "epoch": 10.908196721311475, "grad_norm": 6.432026386260986, "learning_rate": 1.967378364804668e-05, "loss": 1.5498, "step": 3327 }, { "epoch": 10.911475409836065, "grad_norm": 7.65653133392334, "learning_rate": 1.9673514579366256e-05, "loss": 1.7417, "step": 3328 }, { "epoch": 10.914754098360655, "grad_norm": 4.992860317230225, "learning_rate": 1.9673245401606896e-05, "loss": 1.6526, "step": 3329 }, { "epoch": 10.918032786885245, "grad_norm": 5.00253963470459, "learning_rate": 1.9672976114771637e-05, "loss": 1.5906, "step": 3330 }, { "epoch": 10.921311475409835, "grad_norm": 6.013515949249268, "learning_rate": 1.9672706718863512e-05, "loss": 1.5259, "step": 3331 }, { "epoch": 10.924590163934425, "grad_norm": 5.856995582580566, "learning_rate": 1.9672437213885566e-05, "loss": 1.7185, "step": 3332 }, { "epoch": 10.927868852459017, "grad_norm": 7.235840797424316, "learning_rate": 1.9672167599840833e-05, "loss": 1.5015, "step": 3333 }, { "epoch": 10.931147540983606, "grad_norm": 5.334482669830322, "learning_rate": 1.967189787673235e-05, "loss": 1.731, "step": 3334 }, { "epoch": 10.934426229508198, "grad_norm": 5.341074466705322, "learning_rate": 1.9671628044563165e-05, "loss": 1.4749, "step": 3335 }, { "epoch": 10.937704918032788, "grad_norm": 6.589937210083008, "learning_rate": 1.967135810333632e-05, "loss": 1.5964, "step": 3336 }, { "epoch": 10.940983606557378, "grad_norm": 7.214491844177246, "learning_rate": 1.9671088053054853e-05, "loss": 1.5337, "step": 3337 }, { "epoch": 10.944262295081968, "grad_norm": 16.98247528076172, "learning_rate": 1.9670817893721815e-05, "loss": 1.6753, "step": 3338 }, { "epoch": 10.947540983606558, "grad_norm": 6.541795253753662, "learning_rate": 1.967054762534025e-05, "loss": 1.6052, "step": 3339 }, { "epoch": 10.950819672131148, "grad_norm": 6.0477614402771, "learning_rate": 1.9670277247913205e-05, "loss": 1.6348, "step": 3340 }, { "epoch": 10.954098360655738, "grad_norm": 5.36768913269043, "learning_rate": 1.967000676144373e-05, "loss": 1.7285, "step": 3341 }, { "epoch": 10.957377049180328, "grad_norm": 6.336453914642334, "learning_rate": 1.9669736165934873e-05, "loss": 1.5007, "step": 3342 }, { "epoch": 10.960655737704919, "grad_norm": 5.688921928405762, "learning_rate": 1.9669465461389688e-05, "loss": 1.5486, "step": 3343 }, { "epoch": 10.963934426229509, "grad_norm": 4.886057376861572, "learning_rate": 1.9669194647811227e-05, "loss": 1.5457, "step": 3344 }, { "epoch": 10.967213114754099, "grad_norm": 10.19081974029541, "learning_rate": 1.966892372520254e-05, "loss": 1.5615, "step": 3345 }, { "epoch": 10.970491803278689, "grad_norm": 4.80082893371582, "learning_rate": 1.9668652693566687e-05, "loss": 1.6968, "step": 3346 }, { "epoch": 10.973770491803279, "grad_norm": 6.395028591156006, "learning_rate": 1.966838155290672e-05, "loss": 1.4934, "step": 3347 }, { "epoch": 10.97704918032787, "grad_norm": 7.78416633605957, "learning_rate": 1.9668110303225703e-05, "loss": 1.5867, "step": 3348 }, { "epoch": 10.98032786885246, "grad_norm": 4.928350448608398, "learning_rate": 1.9667838944526686e-05, "loss": 1.6733, "step": 3349 }, { "epoch": 10.98360655737705, "grad_norm": 7.059333801269531, "learning_rate": 1.9667567476812733e-05, "loss": 1.5908, "step": 3350 }, { "epoch": 10.98688524590164, "grad_norm": 5.137462615966797, "learning_rate": 1.966729590008691e-05, "loss": 1.7095, "step": 3351 }, { "epoch": 10.99016393442623, "grad_norm": 7.699253082275391, "learning_rate": 1.9667024214352267e-05, "loss": 1.6711, "step": 3352 }, { "epoch": 10.99344262295082, "grad_norm": 5.367777347564697, "learning_rate": 1.966675241961188e-05, "loss": 1.3367, "step": 3353 }, { "epoch": 10.99672131147541, "grad_norm": 6.028009414672852, "learning_rate": 1.9666480515868805e-05, "loss": 1.6257, "step": 3354 }, { "epoch": 11.0, "grad_norm": 5.150919437408447, "learning_rate": 1.9666208503126115e-05, "loss": 1.6541, "step": 3355 }, { "epoch": 11.00327868852459, "grad_norm": 5.534729480743408, "learning_rate": 1.966593638138687e-05, "loss": 1.5142, "step": 3356 }, { "epoch": 11.00655737704918, "grad_norm": 4.980964660644531, "learning_rate": 1.9665664150654146e-05, "loss": 1.4761, "step": 3357 }, { "epoch": 11.00983606557377, "grad_norm": 7.155509948730469, "learning_rate": 1.9665391810931006e-05, "loss": 1.5825, "step": 3358 }, { "epoch": 11.01311475409836, "grad_norm": 7.0406270027160645, "learning_rate": 1.9665119362220526e-05, "loss": 1.7222, "step": 3359 }, { "epoch": 11.01639344262295, "grad_norm": 20.207883834838867, "learning_rate": 1.9664846804525775e-05, "loss": 1.4951, "step": 3360 }, { "epoch": 11.01967213114754, "grad_norm": 6.100806713104248, "learning_rate": 1.9664574137849825e-05, "loss": 1.6345, "step": 3361 }, { "epoch": 11.02295081967213, "grad_norm": 5.981926918029785, "learning_rate": 1.9664301362195757e-05, "loss": 1.4675, "step": 3362 }, { "epoch": 11.026229508196721, "grad_norm": 6.22590446472168, "learning_rate": 1.9664028477566642e-05, "loss": 1.4513, "step": 3363 }, { "epoch": 11.029508196721311, "grad_norm": 8.743669509887695, "learning_rate": 1.9663755483965556e-05, "loss": 1.6282, "step": 3364 }, { "epoch": 11.032786885245901, "grad_norm": 4.932885646820068, "learning_rate": 1.966348238139558e-05, "loss": 1.6287, "step": 3365 }, { "epoch": 11.036065573770491, "grad_norm": 9.0145902633667, "learning_rate": 1.9663209169859792e-05, "loss": 1.4619, "step": 3366 }, { "epoch": 11.039344262295081, "grad_norm": 12.993011474609375, "learning_rate": 1.9662935849361275e-05, "loss": 1.3953, "step": 3367 }, { "epoch": 11.042622950819672, "grad_norm": 5.624324321746826, "learning_rate": 1.9662662419903106e-05, "loss": 1.4731, "step": 3368 }, { "epoch": 11.045901639344262, "grad_norm": 6.229587078094482, "learning_rate": 1.9662388881488374e-05, "loss": 1.3467, "step": 3369 }, { "epoch": 11.049180327868852, "grad_norm": 6.685810089111328, "learning_rate": 1.966211523412016e-05, "loss": 1.2356, "step": 3370 }, { "epoch": 11.052459016393442, "grad_norm": 6.046236038208008, "learning_rate": 1.9661841477801552e-05, "loss": 1.814, "step": 3371 }, { "epoch": 11.055737704918032, "grad_norm": 7.00143575668335, "learning_rate": 1.9661567612535638e-05, "loss": 1.4844, "step": 3372 }, { "epoch": 11.059016393442622, "grad_norm": 6.4838547706604, "learning_rate": 1.96612936383255e-05, "loss": 1.5156, "step": 3373 }, { "epoch": 11.062295081967212, "grad_norm": 6.030648231506348, "learning_rate": 1.9661019555174232e-05, "loss": 1.5679, "step": 3374 }, { "epoch": 11.065573770491802, "grad_norm": 5.4676690101623535, "learning_rate": 1.9660745363084924e-05, "loss": 1.4794, "step": 3375 }, { "epoch": 11.068852459016393, "grad_norm": 6.389129638671875, "learning_rate": 1.9660471062060664e-05, "loss": 1.601, "step": 3376 }, { "epoch": 11.072131147540984, "grad_norm": 6.027518272399902, "learning_rate": 1.966019665210455e-05, "loss": 1.4734, "step": 3377 }, { "epoch": 11.075409836065575, "grad_norm": 5.630916118621826, "learning_rate": 1.9659922133219676e-05, "loss": 1.3962, "step": 3378 }, { "epoch": 11.078688524590165, "grad_norm": 4.950157642364502, "learning_rate": 1.965964750540914e-05, "loss": 1.6038, "step": 3379 }, { "epoch": 11.081967213114755, "grad_norm": 5.946115493774414, "learning_rate": 1.965937276867603e-05, "loss": 1.5083, "step": 3380 }, { "epoch": 11.085245901639345, "grad_norm": 6.333712100982666, "learning_rate": 1.965909792302345e-05, "loss": 1.4253, "step": 3381 }, { "epoch": 11.088524590163935, "grad_norm": 5.735805988311768, "learning_rate": 1.9658822968454496e-05, "loss": 1.4812, "step": 3382 }, { "epoch": 11.091803278688525, "grad_norm": 5.067560195922852, "learning_rate": 1.965854790497227e-05, "loss": 1.6445, "step": 3383 }, { "epoch": 11.095081967213115, "grad_norm": 5.68037223815918, "learning_rate": 1.9658272732579878e-05, "loss": 1.365, "step": 3384 }, { "epoch": 11.098360655737705, "grad_norm": 5.471434593200684, "learning_rate": 1.9657997451280417e-05, "loss": 1.6567, "step": 3385 }, { "epoch": 11.101639344262296, "grad_norm": 5.211413860321045, "learning_rate": 1.9657722061076995e-05, "loss": 1.4519, "step": 3386 }, { "epoch": 11.104918032786886, "grad_norm": 5.01519775390625, "learning_rate": 1.965744656197271e-05, "loss": 1.3486, "step": 3387 }, { "epoch": 11.108196721311476, "grad_norm": 8.246633529663086, "learning_rate": 1.9657170953970677e-05, "loss": 1.5215, "step": 3388 }, { "epoch": 11.111475409836066, "grad_norm": 5.3167805671691895, "learning_rate": 1.9656895237074e-05, "loss": 1.5596, "step": 3389 }, { "epoch": 11.114754098360656, "grad_norm": 6.655368328094482, "learning_rate": 1.965661941128579e-05, "loss": 1.6814, "step": 3390 }, { "epoch": 11.118032786885246, "grad_norm": 8.073322296142578, "learning_rate": 1.9656343476609154e-05, "loss": 1.4456, "step": 3391 }, { "epoch": 11.121311475409836, "grad_norm": 6.203275680541992, "learning_rate": 1.9656067433047206e-05, "loss": 1.4612, "step": 3392 }, { "epoch": 11.124590163934426, "grad_norm": 4.822890758514404, "learning_rate": 1.965579128060306e-05, "loss": 1.4714, "step": 3393 }, { "epoch": 11.127868852459017, "grad_norm": 5.348603248596191, "learning_rate": 1.9655515019279825e-05, "loss": 1.7253, "step": 3394 }, { "epoch": 11.131147540983607, "grad_norm": 6.6402177810668945, "learning_rate": 1.9655238649080617e-05, "loss": 1.5461, "step": 3395 }, { "epoch": 11.134426229508197, "grad_norm": 5.830200672149658, "learning_rate": 1.965496217000856e-05, "loss": 1.6074, "step": 3396 }, { "epoch": 11.137704918032787, "grad_norm": 5.952487468719482, "learning_rate": 1.9654685582066763e-05, "loss": 1.6633, "step": 3397 }, { "epoch": 11.140983606557377, "grad_norm": 6.414702415466309, "learning_rate": 1.9654408885258346e-05, "loss": 1.4299, "step": 3398 }, { "epoch": 11.144262295081967, "grad_norm": 7.076379299163818, "learning_rate": 1.9654132079586433e-05, "loss": 1.5122, "step": 3399 }, { "epoch": 11.147540983606557, "grad_norm": 6.4872636795043945, "learning_rate": 1.965385516505414e-05, "loss": 1.2532, "step": 3400 }, { "epoch": 11.150819672131147, "grad_norm": 5.837189674377441, "learning_rate": 1.9653578141664598e-05, "loss": 1.542, "step": 3401 }, { "epoch": 11.154098360655738, "grad_norm": 5.641147136688232, "learning_rate": 1.965330100942092e-05, "loss": 1.5483, "step": 3402 }, { "epoch": 11.157377049180328, "grad_norm": 5.32330322265625, "learning_rate": 1.965302376832624e-05, "loss": 1.5669, "step": 3403 }, { "epoch": 11.160655737704918, "grad_norm": 19.42699432373047, "learning_rate": 1.9652746418383676e-05, "loss": 1.4609, "step": 3404 }, { "epoch": 11.163934426229508, "grad_norm": 5.616968631744385, "learning_rate": 1.9652468959596366e-05, "loss": 1.6528, "step": 3405 }, { "epoch": 11.167213114754098, "grad_norm": 5.915403842926025, "learning_rate": 1.9652191391967427e-05, "loss": 1.4702, "step": 3406 }, { "epoch": 11.170491803278688, "grad_norm": 7.722241401672363, "learning_rate": 1.9651913715499996e-05, "loss": 1.5044, "step": 3407 }, { "epoch": 11.173770491803278, "grad_norm": 6.787726879119873, "learning_rate": 1.9651635930197203e-05, "loss": 1.2999, "step": 3408 }, { "epoch": 11.177049180327868, "grad_norm": 5.618032932281494, "learning_rate": 1.965135803606218e-05, "loss": 1.5586, "step": 3409 }, { "epoch": 11.180327868852459, "grad_norm": 6.541698932647705, "learning_rate": 1.9651080033098057e-05, "loss": 1.5244, "step": 3410 }, { "epoch": 11.183606557377049, "grad_norm": 6.752976417541504, "learning_rate": 1.9650801921307977e-05, "loss": 1.562, "step": 3411 }, { "epoch": 11.186885245901639, "grad_norm": 9.367471694946289, "learning_rate": 1.9650523700695067e-05, "loss": 1.6145, "step": 3412 }, { "epoch": 11.190163934426229, "grad_norm": 6.397263050079346, "learning_rate": 1.965024537126247e-05, "loss": 1.2531, "step": 3413 }, { "epoch": 11.193442622950819, "grad_norm": 8.594038963317871, "learning_rate": 1.9649966933013324e-05, "loss": 1.4558, "step": 3414 }, { "epoch": 11.19672131147541, "grad_norm": 5.270386695861816, "learning_rate": 1.9649688385950765e-05, "loss": 1.5278, "step": 3415 }, { "epoch": 11.2, "grad_norm": 5.421783924102783, "learning_rate": 1.9649409730077934e-05, "loss": 1.4355, "step": 3416 }, { "epoch": 11.20327868852459, "grad_norm": 6.14069128036499, "learning_rate": 1.964913096539798e-05, "loss": 1.6233, "step": 3417 }, { "epoch": 11.20655737704918, "grad_norm": 6.495717525482178, "learning_rate": 1.9648852091914042e-05, "loss": 1.5168, "step": 3418 }, { "epoch": 11.20983606557377, "grad_norm": 5.613864421844482, "learning_rate": 1.964857310962926e-05, "loss": 1.6426, "step": 3419 }, { "epoch": 11.21311475409836, "grad_norm": 5.865532875061035, "learning_rate": 1.964829401854679e-05, "loss": 1.8237, "step": 3420 }, { "epoch": 11.216393442622952, "grad_norm": 4.861549377441406, "learning_rate": 1.964801481866977e-05, "loss": 1.665, "step": 3421 }, { "epoch": 11.219672131147542, "grad_norm": 6.08667516708374, "learning_rate": 1.964773551000135e-05, "loss": 1.5032, "step": 3422 }, { "epoch": 11.222950819672132, "grad_norm": 45.311500549316406, "learning_rate": 1.9647456092544683e-05, "loss": 1.5613, "step": 3423 }, { "epoch": 11.226229508196722, "grad_norm": 5.605258464813232, "learning_rate": 1.9647176566302913e-05, "loss": 1.5293, "step": 3424 }, { "epoch": 11.229508196721312, "grad_norm": 8.555882453918457, "learning_rate": 1.9646896931279206e-05, "loss": 1.4775, "step": 3425 }, { "epoch": 11.232786885245902, "grad_norm": 6.206857681274414, "learning_rate": 1.9646617187476698e-05, "loss": 1.4424, "step": 3426 }, { "epoch": 11.236065573770492, "grad_norm": 6.004255771636963, "learning_rate": 1.9646337334898555e-05, "loss": 1.5066, "step": 3427 }, { "epoch": 11.239344262295083, "grad_norm": 5.508604526519775, "learning_rate": 1.9646057373547927e-05, "loss": 1.6028, "step": 3428 }, { "epoch": 11.242622950819673, "grad_norm": 6.946715354919434, "learning_rate": 1.9645777303427972e-05, "loss": 1.572, "step": 3429 }, { "epoch": 11.245901639344263, "grad_norm": 5.085494041442871, "learning_rate": 1.964549712454185e-05, "loss": 1.5447, "step": 3430 }, { "epoch": 11.249180327868853, "grad_norm": 6.71180534362793, "learning_rate": 1.9645216836892723e-05, "loss": 1.4368, "step": 3431 }, { "epoch": 11.252459016393443, "grad_norm": 5.492619037628174, "learning_rate": 1.9644936440483744e-05, "loss": 1.4958, "step": 3432 }, { "epoch": 11.255737704918033, "grad_norm": 5.9237589836120605, "learning_rate": 1.964465593531808e-05, "loss": 1.6494, "step": 3433 }, { "epoch": 11.259016393442623, "grad_norm": 9.71612548828125, "learning_rate": 1.964437532139889e-05, "loss": 1.6768, "step": 3434 }, { "epoch": 11.262295081967213, "grad_norm": 14.061136245727539, "learning_rate": 1.964409459872934e-05, "loss": 1.7703, "step": 3435 }, { "epoch": 11.265573770491804, "grad_norm": 6.014132022857666, "learning_rate": 1.9643813767312597e-05, "loss": 1.2844, "step": 3436 }, { "epoch": 11.268852459016394, "grad_norm": 8.9331636428833, "learning_rate": 1.964353282715183e-05, "loss": 1.7141, "step": 3437 }, { "epoch": 11.272131147540984, "grad_norm": 5.195769309997559, "learning_rate": 1.9643251778250197e-05, "loss": 1.5269, "step": 3438 }, { "epoch": 11.275409836065574, "grad_norm": 12.052961349487305, "learning_rate": 1.9642970620610882e-05, "loss": 1.3383, "step": 3439 }, { "epoch": 11.278688524590164, "grad_norm": 7.249066352844238, "learning_rate": 1.964268935423704e-05, "loss": 1.3453, "step": 3440 }, { "epoch": 11.281967213114754, "grad_norm": 5.942902088165283, "learning_rate": 1.9642407979131855e-05, "loss": 1.4717, "step": 3441 }, { "epoch": 11.285245901639344, "grad_norm": 6.956826686859131, "learning_rate": 1.964212649529849e-05, "loss": 1.4385, "step": 3442 }, { "epoch": 11.288524590163934, "grad_norm": 6.28222131729126, "learning_rate": 1.9641844902740125e-05, "loss": 1.6628, "step": 3443 }, { "epoch": 11.291803278688525, "grad_norm": 6.361743450164795, "learning_rate": 1.9641563201459933e-05, "loss": 1.5337, "step": 3444 }, { "epoch": 11.295081967213115, "grad_norm": 7.235016822814941, "learning_rate": 1.9641281391461097e-05, "loss": 1.4321, "step": 3445 }, { "epoch": 11.298360655737705, "grad_norm": 5.564980983734131, "learning_rate": 1.9640999472746782e-05, "loss": 1.6101, "step": 3446 }, { "epoch": 11.301639344262295, "grad_norm": 6.467190742492676, "learning_rate": 1.9640717445320175e-05, "loss": 1.5378, "step": 3447 }, { "epoch": 11.304918032786885, "grad_norm": 5.661177158355713, "learning_rate": 1.9640435309184456e-05, "loss": 1.5386, "step": 3448 }, { "epoch": 11.308196721311475, "grad_norm": 5.5785040855407715, "learning_rate": 1.9640153064342805e-05, "loss": 1.6841, "step": 3449 }, { "epoch": 11.311475409836065, "grad_norm": 5.3882927894592285, "learning_rate": 1.9639870710798407e-05, "loss": 1.4106, "step": 3450 }, { "epoch": 11.314754098360655, "grad_norm": 5.6919474601745605, "learning_rate": 1.963958824855444e-05, "loss": 1.4827, "step": 3451 }, { "epoch": 11.318032786885245, "grad_norm": 7.402247428894043, "learning_rate": 1.9639305677614097e-05, "loss": 1.5579, "step": 3452 }, { "epoch": 11.321311475409836, "grad_norm": 6.770934581756592, "learning_rate": 1.963902299798056e-05, "loss": 1.3, "step": 3453 }, { "epoch": 11.324590163934426, "grad_norm": 16.057937622070312, "learning_rate": 1.9638740209657014e-05, "loss": 1.4026, "step": 3454 }, { "epoch": 11.327868852459016, "grad_norm": 6.208699703216553, "learning_rate": 1.963845731264665e-05, "loss": 1.4971, "step": 3455 }, { "epoch": 11.331147540983606, "grad_norm": 11.166407585144043, "learning_rate": 1.963817430695266e-05, "loss": 1.4946, "step": 3456 }, { "epoch": 11.334426229508196, "grad_norm": 7.983930587768555, "learning_rate": 1.9637891192578232e-05, "loss": 1.5823, "step": 3457 }, { "epoch": 11.337704918032786, "grad_norm": 6.95168399810791, "learning_rate": 1.963760796952656e-05, "loss": 1.665, "step": 3458 }, { "epoch": 11.340983606557376, "grad_norm": 7.008370399475098, "learning_rate": 1.963732463780084e-05, "loss": 1.5701, "step": 3459 }, { "epoch": 11.344262295081966, "grad_norm": 7.49396276473999, "learning_rate": 1.963704119740426e-05, "loss": 1.4785, "step": 3460 }, { "epoch": 11.347540983606557, "grad_norm": 5.875948905944824, "learning_rate": 1.9636757648340025e-05, "loss": 1.4636, "step": 3461 }, { "epoch": 11.350819672131147, "grad_norm": 10.049090385437012, "learning_rate": 1.9636473990611327e-05, "loss": 1.355, "step": 3462 }, { "epoch": 11.354098360655737, "grad_norm": 10.495200157165527, "learning_rate": 1.9636190224221364e-05, "loss": 1.5452, "step": 3463 }, { "epoch": 11.357377049180329, "grad_norm": 12.002668380737305, "learning_rate": 1.9635906349173336e-05, "loss": 1.519, "step": 3464 }, { "epoch": 11.360655737704919, "grad_norm": 5.641571998596191, "learning_rate": 1.9635622365470447e-05, "loss": 1.4895, "step": 3465 }, { "epoch": 11.363934426229509, "grad_norm": 6.06043815612793, "learning_rate": 1.9635338273115896e-05, "loss": 1.4456, "step": 3466 }, { "epoch": 11.3672131147541, "grad_norm": 8.176822662353516, "learning_rate": 1.963505407211289e-05, "loss": 1.7896, "step": 3467 }, { "epoch": 11.37049180327869, "grad_norm": 7.885281085968018, "learning_rate": 1.9634769762464628e-05, "loss": 1.5535, "step": 3468 }, { "epoch": 11.37377049180328, "grad_norm": 8.43960952758789, "learning_rate": 1.9634485344174324e-05, "loss": 1.5486, "step": 3469 }, { "epoch": 11.37704918032787, "grad_norm": 7.139896869659424, "learning_rate": 1.9634200817245176e-05, "loss": 1.4614, "step": 3470 }, { "epoch": 11.38032786885246, "grad_norm": 12.613565444946289, "learning_rate": 1.9633916181680397e-05, "loss": 1.5996, "step": 3471 }, { "epoch": 11.38360655737705, "grad_norm": 6.075810432434082, "learning_rate": 1.96336314374832e-05, "loss": 1.5657, "step": 3472 }, { "epoch": 11.38688524590164, "grad_norm": 6.534870147705078, "learning_rate": 1.9633346584656787e-05, "loss": 1.5986, "step": 3473 }, { "epoch": 11.39016393442623, "grad_norm": 5.761613368988037, "learning_rate": 1.963306162320438e-05, "loss": 1.6008, "step": 3474 }, { "epoch": 11.39344262295082, "grad_norm": 6.311721324920654, "learning_rate": 1.9632776553129185e-05, "loss": 1.7808, "step": 3475 }, { "epoch": 11.39672131147541, "grad_norm": 37.6386604309082, "learning_rate": 1.963249137443442e-05, "loss": 1.7383, "step": 3476 }, { "epoch": 11.4, "grad_norm": 6.499794960021973, "learning_rate": 1.9632206087123296e-05, "loss": 1.533, "step": 3477 }, { "epoch": 11.40327868852459, "grad_norm": 6.7929792404174805, "learning_rate": 1.9631920691199036e-05, "loss": 1.5969, "step": 3478 }, { "epoch": 11.40655737704918, "grad_norm": 8.094281196594238, "learning_rate": 1.9631635186664858e-05, "loss": 1.6333, "step": 3479 }, { "epoch": 11.40983606557377, "grad_norm": 8.651601791381836, "learning_rate": 1.9631349573523976e-05, "loss": 1.4473, "step": 3480 }, { "epoch": 11.41311475409836, "grad_norm": 6.815116882324219, "learning_rate": 1.963106385177961e-05, "loss": 1.5649, "step": 3481 }, { "epoch": 11.416393442622951, "grad_norm": 6.296660423278809, "learning_rate": 1.963077802143499e-05, "loss": 1.751, "step": 3482 }, { "epoch": 11.419672131147541, "grad_norm": 7.315540313720703, "learning_rate": 1.9630492082493334e-05, "loss": 1.7598, "step": 3483 }, { "epoch": 11.422950819672131, "grad_norm": 6.682549953460693, "learning_rate": 1.9630206034957867e-05, "loss": 1.6321, "step": 3484 }, { "epoch": 11.426229508196721, "grad_norm": 8.08096981048584, "learning_rate": 1.9629919878831813e-05, "loss": 1.3862, "step": 3485 }, { "epoch": 11.429508196721311, "grad_norm": 6.006147861480713, "learning_rate": 1.96296336141184e-05, "loss": 1.6997, "step": 3486 }, { "epoch": 11.432786885245902, "grad_norm": 7.088709354400635, "learning_rate": 1.9629347240820853e-05, "loss": 1.8345, "step": 3487 }, { "epoch": 11.436065573770492, "grad_norm": 5.8471574783325195, "learning_rate": 1.9629060758942407e-05, "loss": 1.5959, "step": 3488 }, { "epoch": 11.439344262295082, "grad_norm": 6.97538423538208, "learning_rate": 1.9628774168486288e-05, "loss": 1.3997, "step": 3489 }, { "epoch": 11.442622950819672, "grad_norm": 6.292507171630859, "learning_rate": 1.9628487469455727e-05, "loss": 1.4949, "step": 3490 }, { "epoch": 11.445901639344262, "grad_norm": 6.276614665985107, "learning_rate": 1.9628200661853964e-05, "loss": 1.6357, "step": 3491 }, { "epoch": 11.449180327868852, "grad_norm": 8.40165901184082, "learning_rate": 1.9627913745684223e-05, "loss": 1.6401, "step": 3492 }, { "epoch": 11.452459016393442, "grad_norm": 4.894691467285156, "learning_rate": 1.9627626720949748e-05, "loss": 1.7319, "step": 3493 }, { "epoch": 11.455737704918032, "grad_norm": 5.5705647468566895, "learning_rate": 1.9627339587653767e-05, "loss": 1.7271, "step": 3494 }, { "epoch": 11.459016393442623, "grad_norm": 6.987398147583008, "learning_rate": 1.9627052345799523e-05, "loss": 1.5413, "step": 3495 }, { "epoch": 11.462295081967213, "grad_norm": 7.423305034637451, "learning_rate": 1.9626764995390254e-05, "loss": 1.5688, "step": 3496 }, { "epoch": 11.465573770491803, "grad_norm": 5.993337154388428, "learning_rate": 1.9626477536429204e-05, "loss": 1.6265, "step": 3497 }, { "epoch": 11.468852459016393, "grad_norm": 6.910515785217285, "learning_rate": 1.9626189968919608e-05, "loss": 1.5198, "step": 3498 }, { "epoch": 11.472131147540983, "grad_norm": 6.549404621124268, "learning_rate": 1.9625902292864715e-05, "loss": 1.479, "step": 3499 }, { "epoch": 11.475409836065573, "grad_norm": 123.1619873046875, "learning_rate": 1.962561450826776e-05, "loss": 1.7983, "step": 3500 }, { "epoch": 11.478688524590163, "grad_norm": 6.982117176055908, "learning_rate": 1.9625326615131994e-05, "loss": 1.5474, "step": 3501 }, { "epoch": 11.481967213114753, "grad_norm": 8.727153778076172, "learning_rate": 1.9625038613460664e-05, "loss": 1.7029, "step": 3502 }, { "epoch": 11.485245901639344, "grad_norm": 7.298559188842773, "learning_rate": 1.9624750503257018e-05, "loss": 1.6155, "step": 3503 }, { "epoch": 11.488524590163934, "grad_norm": 6.869080066680908, "learning_rate": 1.96244622845243e-05, "loss": 1.3604, "step": 3504 }, { "epoch": 11.491803278688524, "grad_norm": 6.9532647132873535, "learning_rate": 1.9624173957265765e-05, "loss": 1.3445, "step": 3505 }, { "epoch": 11.495081967213114, "grad_norm": 12.149454116821289, "learning_rate": 1.962388552148466e-05, "loss": 1.6497, "step": 3506 }, { "epoch": 11.498360655737706, "grad_norm": 7.545653343200684, "learning_rate": 1.962359697718424e-05, "loss": 1.5248, "step": 3507 }, { "epoch": 11.501639344262294, "grad_norm": 5.500284671783447, "learning_rate": 1.9623308324367758e-05, "loss": 1.605, "step": 3508 }, { "epoch": 11.504918032786886, "grad_norm": 17.9703426361084, "learning_rate": 1.962301956303847e-05, "loss": 1.5417, "step": 3509 }, { "epoch": 11.508196721311476, "grad_norm": 7.850834846496582, "learning_rate": 1.962273069319963e-05, "loss": 1.5815, "step": 3510 }, { "epoch": 11.511475409836066, "grad_norm": 7.44675874710083, "learning_rate": 1.9622441714854495e-05, "loss": 1.7123, "step": 3511 }, { "epoch": 11.514754098360656, "grad_norm": 9.352910041809082, "learning_rate": 1.962215262800633e-05, "loss": 1.6531, "step": 3512 }, { "epoch": 11.518032786885247, "grad_norm": 7.898397922515869, "learning_rate": 1.9621863432658383e-05, "loss": 1.6675, "step": 3513 }, { "epoch": 11.521311475409837, "grad_norm": 6.993668079376221, "learning_rate": 1.9621574128813925e-05, "loss": 1.8096, "step": 3514 }, { "epoch": 11.524590163934427, "grad_norm": 9.82799243927002, "learning_rate": 1.9621284716476216e-05, "loss": 1.4041, "step": 3515 }, { "epoch": 11.527868852459017, "grad_norm": 11.755621910095215, "learning_rate": 1.9620995195648514e-05, "loss": 1.5796, "step": 3516 }, { "epoch": 11.531147540983607, "grad_norm": 7.631876468658447, "learning_rate": 1.962070556633409e-05, "loss": 1.5452, "step": 3517 }, { "epoch": 11.534426229508197, "grad_norm": 7.172030448913574, "learning_rate": 1.9620415828536208e-05, "loss": 1.5371, "step": 3518 }, { "epoch": 11.537704918032787, "grad_norm": 7.756645202636719, "learning_rate": 1.9620125982258136e-05, "loss": 1.595, "step": 3519 }, { "epoch": 11.540983606557377, "grad_norm": 7.880234718322754, "learning_rate": 1.961983602750314e-05, "loss": 1.5317, "step": 3520 }, { "epoch": 11.544262295081968, "grad_norm": 10.544830322265625, "learning_rate": 1.9619545964274488e-05, "loss": 1.322, "step": 3521 }, { "epoch": 11.547540983606558, "grad_norm": 8.864347457885742, "learning_rate": 1.9619255792575458e-05, "loss": 1.4441, "step": 3522 }, { "epoch": 11.550819672131148, "grad_norm": 7.063254356384277, "learning_rate": 1.9618965512409316e-05, "loss": 1.4751, "step": 3523 }, { "epoch": 11.554098360655738, "grad_norm": 7.028728485107422, "learning_rate": 1.9618675123779338e-05, "loss": 1.6389, "step": 3524 }, { "epoch": 11.557377049180328, "grad_norm": 8.132222175598145, "learning_rate": 1.9618384626688793e-05, "loss": 1.4358, "step": 3525 }, { "epoch": 11.560655737704918, "grad_norm": 6.835107803344727, "learning_rate": 1.9618094021140965e-05, "loss": 1.5266, "step": 3526 }, { "epoch": 11.563934426229508, "grad_norm": 6.292449474334717, "learning_rate": 1.9617803307139122e-05, "loss": 1.5955, "step": 3527 }, { "epoch": 11.567213114754098, "grad_norm": 7.0800275802612305, "learning_rate": 1.961751248468655e-05, "loss": 1.6226, "step": 3528 }, { "epoch": 11.570491803278689, "grad_norm": 9.191998481750488, "learning_rate": 1.9617221553786522e-05, "loss": 1.5977, "step": 3529 }, { "epoch": 11.573770491803279, "grad_norm": 6.242808818817139, "learning_rate": 1.9616930514442324e-05, "loss": 1.7134, "step": 3530 }, { "epoch": 11.577049180327869, "grad_norm": 6.472566604614258, "learning_rate": 1.9616639366657237e-05, "loss": 1.4749, "step": 3531 }, { "epoch": 11.580327868852459, "grad_norm": 6.677244663238525, "learning_rate": 1.961634811043454e-05, "loss": 1.8979, "step": 3532 }, { "epoch": 11.583606557377049, "grad_norm": 6.577874660491943, "learning_rate": 1.961605674577752e-05, "loss": 1.6445, "step": 3533 }, { "epoch": 11.58688524590164, "grad_norm": 7.129555702209473, "learning_rate": 1.961576527268946e-05, "loss": 1.4978, "step": 3534 }, { "epoch": 11.59016393442623, "grad_norm": 6.319579601287842, "learning_rate": 1.9615473691173652e-05, "loss": 1.5811, "step": 3535 }, { "epoch": 11.59344262295082, "grad_norm": 8.272706031799316, "learning_rate": 1.961518200123338e-05, "loss": 1.6392, "step": 3536 }, { "epoch": 11.59672131147541, "grad_norm": 10.171441078186035, "learning_rate": 1.9614890202871933e-05, "loss": 1.5233, "step": 3537 }, { "epoch": 11.6, "grad_norm": 7.928047180175781, "learning_rate": 1.9614598296092603e-05, "loss": 1.6572, "step": 3538 }, { "epoch": 11.60327868852459, "grad_norm": 7.020237922668457, "learning_rate": 1.961430628089868e-05, "loss": 1.644, "step": 3539 }, { "epoch": 11.60655737704918, "grad_norm": 5.725789546966553, "learning_rate": 1.9614014157293456e-05, "loss": 1.7993, "step": 3540 }, { "epoch": 11.60983606557377, "grad_norm": 7.231744766235352, "learning_rate": 1.9613721925280224e-05, "loss": 1.4006, "step": 3541 }, { "epoch": 11.61311475409836, "grad_norm": 6.565567493438721, "learning_rate": 1.9613429584862284e-05, "loss": 1.6584, "step": 3542 }, { "epoch": 11.61639344262295, "grad_norm": 7.187527179718018, "learning_rate": 1.9613137136042932e-05, "loss": 1.5811, "step": 3543 }, { "epoch": 11.61967213114754, "grad_norm": 6.3135294914245605, "learning_rate": 1.9612844578825463e-05, "loss": 1.6516, "step": 3544 }, { "epoch": 11.62295081967213, "grad_norm": 9.698102951049805, "learning_rate": 1.9612551913213175e-05, "loss": 1.3103, "step": 3545 }, { "epoch": 11.62622950819672, "grad_norm": 7.679905414581299, "learning_rate": 1.961225913920937e-05, "loss": 1.4412, "step": 3546 }, { "epoch": 11.62950819672131, "grad_norm": 5.658500671386719, "learning_rate": 1.961196625681735e-05, "loss": 1.6616, "step": 3547 }, { "epoch": 11.6327868852459, "grad_norm": 6.756407260894775, "learning_rate": 1.9611673266040414e-05, "loss": 1.6379, "step": 3548 }, { "epoch": 11.636065573770491, "grad_norm": 4.407103538513184, "learning_rate": 1.961138016688187e-05, "loss": 1.4849, "step": 3549 }, { "epoch": 11.639344262295083, "grad_norm": 6.845460891723633, "learning_rate": 1.961108695934502e-05, "loss": 1.6782, "step": 3550 }, { "epoch": 11.642622950819671, "grad_norm": 5.787448406219482, "learning_rate": 1.9610793643433175e-05, "loss": 1.4788, "step": 3551 }, { "epoch": 11.645901639344263, "grad_norm": 6.37493371963501, "learning_rate": 1.9610500219149637e-05, "loss": 1.4597, "step": 3552 }, { "epoch": 11.649180327868853, "grad_norm": 9.49094009399414, "learning_rate": 1.9610206686497717e-05, "loss": 1.4617, "step": 3553 }, { "epoch": 11.652459016393443, "grad_norm": 5.3984575271606445, "learning_rate": 1.9609913045480725e-05, "loss": 1.666, "step": 3554 }, { "epoch": 11.655737704918034, "grad_norm": 6.141320705413818, "learning_rate": 1.960961929610197e-05, "loss": 1.5444, "step": 3555 }, { "epoch": 11.659016393442624, "grad_norm": 7.367457389831543, "learning_rate": 1.9609325438364765e-05, "loss": 1.4817, "step": 3556 }, { "epoch": 11.662295081967214, "grad_norm": 7.717838287353516, "learning_rate": 1.9609031472272425e-05, "loss": 1.4893, "step": 3557 }, { "epoch": 11.665573770491804, "grad_norm": 6.348371505737305, "learning_rate": 1.9608737397828267e-05, "loss": 1.5186, "step": 3558 }, { "epoch": 11.668852459016394, "grad_norm": 6.96425199508667, "learning_rate": 1.96084432150356e-05, "loss": 1.6689, "step": 3559 }, { "epoch": 11.672131147540984, "grad_norm": 5.886472225189209, "learning_rate": 1.9608148923897752e-05, "loss": 1.6565, "step": 3560 }, { "epoch": 11.675409836065574, "grad_norm": 7.480196952819824, "learning_rate": 1.960785452441803e-05, "loss": 1.533, "step": 3561 }, { "epoch": 11.678688524590164, "grad_norm": 6.991106033325195, "learning_rate": 1.9607560016599758e-05, "loss": 1.624, "step": 3562 }, { "epoch": 11.681967213114755, "grad_norm": 6.1585774421691895, "learning_rate": 1.960726540044626e-05, "loss": 1.4275, "step": 3563 }, { "epoch": 11.685245901639345, "grad_norm": 5.256969451904297, "learning_rate": 1.9606970675960856e-05, "loss": 1.6011, "step": 3564 }, { "epoch": 11.688524590163935, "grad_norm": 7.541135311126709, "learning_rate": 1.9606675843146867e-05, "loss": 1.6455, "step": 3565 }, { "epoch": 11.691803278688525, "grad_norm": 5.679835319519043, "learning_rate": 1.960638090200762e-05, "loss": 1.5132, "step": 3566 }, { "epoch": 11.695081967213115, "grad_norm": 9.58225154876709, "learning_rate": 1.9606085852546438e-05, "loss": 1.5737, "step": 3567 }, { "epoch": 11.698360655737705, "grad_norm": 6.90753173828125, "learning_rate": 1.960579069476665e-05, "loss": 1.3658, "step": 3568 }, { "epoch": 11.701639344262295, "grad_norm": 6.597736358642578, "learning_rate": 1.9605495428671588e-05, "loss": 1.5039, "step": 3569 }, { "epoch": 11.704918032786885, "grad_norm": 7.467194557189941, "learning_rate": 1.9605200054264576e-05, "loss": 1.4023, "step": 3570 }, { "epoch": 11.708196721311475, "grad_norm": 7.057509899139404, "learning_rate": 1.960490457154895e-05, "loss": 1.5049, "step": 3571 }, { "epoch": 11.711475409836066, "grad_norm": 7.1811842918396, "learning_rate": 1.9604608980528034e-05, "loss": 1.3337, "step": 3572 }, { "epoch": 11.714754098360656, "grad_norm": 6.837899208068848, "learning_rate": 1.9604313281205164e-05, "loss": 1.4379, "step": 3573 }, { "epoch": 11.718032786885246, "grad_norm": 7.581155776977539, "learning_rate": 1.960401747358368e-05, "loss": 1.4358, "step": 3574 }, { "epoch": 11.721311475409836, "grad_norm": 6.045106887817383, "learning_rate": 1.9603721557666913e-05, "loss": 1.4238, "step": 3575 }, { "epoch": 11.724590163934426, "grad_norm": 5.770586013793945, "learning_rate": 1.9603425533458197e-05, "loss": 1.6948, "step": 3576 }, { "epoch": 11.727868852459016, "grad_norm": 7.946830749511719, "learning_rate": 1.9603129400960875e-05, "loss": 1.5596, "step": 3577 }, { "epoch": 11.731147540983606, "grad_norm": 6.325661659240723, "learning_rate": 1.9602833160178286e-05, "loss": 1.4883, "step": 3578 }, { "epoch": 11.734426229508196, "grad_norm": 5.502111434936523, "learning_rate": 1.9602536811113766e-05, "loss": 1.6206, "step": 3579 }, { "epoch": 11.737704918032787, "grad_norm": 6.234643459320068, "learning_rate": 1.9602240353770662e-05, "loss": 1.4485, "step": 3580 }, { "epoch": 11.740983606557377, "grad_norm": 6.147275924682617, "learning_rate": 1.960194378815231e-05, "loss": 1.4463, "step": 3581 }, { "epoch": 11.744262295081967, "grad_norm": 7.47806453704834, "learning_rate": 1.9601647114262062e-05, "loss": 1.5347, "step": 3582 }, { "epoch": 11.747540983606557, "grad_norm": 9.700533866882324, "learning_rate": 1.9601350332103257e-05, "loss": 1.5073, "step": 3583 }, { "epoch": 11.750819672131147, "grad_norm": 6.717498302459717, "learning_rate": 1.9601053441679244e-05, "loss": 1.5513, "step": 3584 }, { "epoch": 11.754098360655737, "grad_norm": 4.737052917480469, "learning_rate": 1.9600756442993373e-05, "loss": 1.6807, "step": 3585 }, { "epoch": 11.757377049180327, "grad_norm": 13.046181678771973, "learning_rate": 1.960045933604899e-05, "loss": 1.6975, "step": 3586 }, { "epoch": 11.760655737704917, "grad_norm": 6.0637946128845215, "learning_rate": 1.9600162120849445e-05, "loss": 1.3171, "step": 3587 }, { "epoch": 11.763934426229508, "grad_norm": 5.352948188781738, "learning_rate": 1.959986479739809e-05, "loss": 1.4888, "step": 3588 }, { "epoch": 11.767213114754098, "grad_norm": 7.10939359664917, "learning_rate": 1.9599567365698283e-05, "loss": 1.4048, "step": 3589 }, { "epoch": 11.770491803278688, "grad_norm": 7.493321418762207, "learning_rate": 1.9599269825753368e-05, "loss": 1.4436, "step": 3590 }, { "epoch": 11.773770491803278, "grad_norm": 7.095725059509277, "learning_rate": 1.9598972177566705e-05, "loss": 1.3906, "step": 3591 }, { "epoch": 11.777049180327868, "grad_norm": 10.142955780029297, "learning_rate": 1.9598674421141656e-05, "loss": 1.5986, "step": 3592 }, { "epoch": 11.780327868852458, "grad_norm": 6.4026031494140625, "learning_rate": 1.9598376556481567e-05, "loss": 1.6868, "step": 3593 }, { "epoch": 11.783606557377048, "grad_norm": 5.733099460601807, "learning_rate": 1.95980785835898e-05, "loss": 1.5408, "step": 3594 }, { "epoch": 11.78688524590164, "grad_norm": 5.350522518157959, "learning_rate": 1.9597780502469725e-05, "loss": 1.53, "step": 3595 }, { "epoch": 11.790163934426229, "grad_norm": 6.848847389221191, "learning_rate": 1.9597482313124693e-05, "loss": 1.5254, "step": 3596 }, { "epoch": 11.79344262295082, "grad_norm": 5.807425498962402, "learning_rate": 1.9597184015558066e-05, "loss": 1.541, "step": 3597 }, { "epoch": 11.79672131147541, "grad_norm": 4.840185642242432, "learning_rate": 1.9596885609773212e-05, "loss": 1.5146, "step": 3598 }, { "epoch": 11.8, "grad_norm": 7.0306549072265625, "learning_rate": 1.9596587095773496e-05, "loss": 1.5947, "step": 3599 }, { "epoch": 11.80327868852459, "grad_norm": 11.47508430480957, "learning_rate": 1.959628847356228e-05, "loss": 1.5156, "step": 3600 }, { "epoch": 11.806557377049181, "grad_norm": 5.862267971038818, "learning_rate": 1.9595989743142937e-05, "loss": 1.4814, "step": 3601 }, { "epoch": 11.809836065573771, "grad_norm": 7.236310005187988, "learning_rate": 1.9595690904518833e-05, "loss": 1.4695, "step": 3602 }, { "epoch": 11.813114754098361, "grad_norm": 5.887101173400879, "learning_rate": 1.9595391957693334e-05, "loss": 1.563, "step": 3603 }, { "epoch": 11.816393442622951, "grad_norm": 5.90653657913208, "learning_rate": 1.9595092902669815e-05, "loss": 1.5071, "step": 3604 }, { "epoch": 11.819672131147541, "grad_norm": 6.250109672546387, "learning_rate": 1.9594793739451647e-05, "loss": 1.5918, "step": 3605 }, { "epoch": 11.822950819672132, "grad_norm": 6.8418869972229, "learning_rate": 1.95944944680422e-05, "loss": 1.646, "step": 3606 }, { "epoch": 11.826229508196722, "grad_norm": 6.06036376953125, "learning_rate": 1.959419508844486e-05, "loss": 1.6338, "step": 3607 }, { "epoch": 11.829508196721312, "grad_norm": 7.402674198150635, "learning_rate": 1.9593895600662988e-05, "loss": 1.2109, "step": 3608 }, { "epoch": 11.832786885245902, "grad_norm": 7.262925624847412, "learning_rate": 1.959359600469997e-05, "loss": 1.4224, "step": 3609 }, { "epoch": 11.836065573770492, "grad_norm": 6.022946834564209, "learning_rate": 1.9593296300559182e-05, "loss": 1.5359, "step": 3610 }, { "epoch": 11.839344262295082, "grad_norm": 6.970133304595947, "learning_rate": 1.9592996488244007e-05, "loss": 1.4976, "step": 3611 }, { "epoch": 11.842622950819672, "grad_norm": 4.854939937591553, "learning_rate": 1.9592696567757818e-05, "loss": 1.7358, "step": 3612 }, { "epoch": 11.845901639344262, "grad_norm": 6.7781829833984375, "learning_rate": 1.9592396539104004e-05, "loss": 1.3687, "step": 3613 }, { "epoch": 11.849180327868853, "grad_norm": 5.988793849945068, "learning_rate": 1.9592096402285943e-05, "loss": 1.7446, "step": 3614 }, { "epoch": 11.852459016393443, "grad_norm": 6.649482727050781, "learning_rate": 1.9591796157307022e-05, "loss": 1.4944, "step": 3615 }, { "epoch": 11.855737704918033, "grad_norm": 5.832404613494873, "learning_rate": 1.959149580417063e-05, "loss": 1.394, "step": 3616 }, { "epoch": 11.859016393442623, "grad_norm": 6.000767230987549, "learning_rate": 1.9591195342880146e-05, "loss": 1.5017, "step": 3617 }, { "epoch": 11.862295081967213, "grad_norm": 6.676249027252197, "learning_rate": 1.9590894773438967e-05, "loss": 1.4927, "step": 3618 }, { "epoch": 11.865573770491803, "grad_norm": 6.088324546813965, "learning_rate": 1.9590594095850474e-05, "loss": 1.3743, "step": 3619 }, { "epoch": 11.868852459016393, "grad_norm": 7.503138542175293, "learning_rate": 1.959029331011806e-05, "loss": 1.6201, "step": 3620 }, { "epoch": 11.872131147540983, "grad_norm": 7.329133033752441, "learning_rate": 1.9589992416245118e-05, "loss": 1.6331, "step": 3621 }, { "epoch": 11.875409836065574, "grad_norm": 5.31311559677124, "learning_rate": 1.958969141423504e-05, "loss": 1.624, "step": 3622 }, { "epoch": 11.878688524590164, "grad_norm": 6.762495994567871, "learning_rate": 1.9589390304091223e-05, "loss": 1.7036, "step": 3623 }, { "epoch": 11.881967213114754, "grad_norm": 6.834388732910156, "learning_rate": 1.958908908581706e-05, "loss": 1.4536, "step": 3624 }, { "epoch": 11.885245901639344, "grad_norm": 4.917990684509277, "learning_rate": 1.9588787759415946e-05, "loss": 1.613, "step": 3625 }, { "epoch": 11.888524590163934, "grad_norm": 6.411722660064697, "learning_rate": 1.958848632489128e-05, "loss": 1.6089, "step": 3626 }, { "epoch": 11.891803278688524, "grad_norm": 6.924860954284668, "learning_rate": 1.958818478224646e-05, "loss": 1.5835, "step": 3627 }, { "epoch": 11.895081967213114, "grad_norm": 12.374764442443848, "learning_rate": 1.958788313148489e-05, "loss": 1.4736, "step": 3628 }, { "epoch": 11.898360655737704, "grad_norm": 6.29673433303833, "learning_rate": 1.9587581372609966e-05, "loss": 1.4255, "step": 3629 }, { "epoch": 11.901639344262295, "grad_norm": 5.99868106842041, "learning_rate": 1.9587279505625094e-05, "loss": 1.6636, "step": 3630 }, { "epoch": 11.904918032786885, "grad_norm": 7.059844970703125, "learning_rate": 1.9586977530533677e-05, "loss": 1.6958, "step": 3631 }, { "epoch": 11.908196721311475, "grad_norm": 9.29218864440918, "learning_rate": 1.9586675447339124e-05, "loss": 1.6389, "step": 3632 }, { "epoch": 11.911475409836065, "grad_norm": 7.065291881561279, "learning_rate": 1.9586373256044835e-05, "loss": 1.3962, "step": 3633 }, { "epoch": 11.914754098360655, "grad_norm": 15.392878532409668, "learning_rate": 1.9586070956654223e-05, "loss": 1.6257, "step": 3634 }, { "epoch": 11.918032786885245, "grad_norm": 7.243050575256348, "learning_rate": 1.9585768549170688e-05, "loss": 1.365, "step": 3635 }, { "epoch": 11.921311475409835, "grad_norm": 6.216207027435303, "learning_rate": 1.958546603359765e-05, "loss": 1.3278, "step": 3636 }, { "epoch": 11.924590163934425, "grad_norm": 5.829187393188477, "learning_rate": 1.9585163409938514e-05, "loss": 1.5454, "step": 3637 }, { "epoch": 11.927868852459017, "grad_norm": 5.526510715484619, "learning_rate": 1.95848606781967e-05, "loss": 1.6299, "step": 3638 }, { "epoch": 11.931147540983606, "grad_norm": 5.66349983215332, "learning_rate": 1.9584557838375608e-05, "loss": 1.4652, "step": 3639 }, { "epoch": 11.934426229508198, "grad_norm": 9.08676528930664, "learning_rate": 1.9584254890478665e-05, "loss": 1.5157, "step": 3640 }, { "epoch": 11.937704918032788, "grad_norm": 7.861013412475586, "learning_rate": 1.9583951834509284e-05, "loss": 1.7842, "step": 3641 }, { "epoch": 11.940983606557378, "grad_norm": 5.791637897491455, "learning_rate": 1.958364867047088e-05, "loss": 1.6858, "step": 3642 }, { "epoch": 11.944262295081968, "grad_norm": 6.427958965301514, "learning_rate": 1.9583345398366872e-05, "loss": 1.644, "step": 3643 }, { "epoch": 11.947540983606558, "grad_norm": 6.508256912231445, "learning_rate": 1.9583042018200682e-05, "loss": 1.7659, "step": 3644 }, { "epoch": 11.950819672131148, "grad_norm": 6.008739948272705, "learning_rate": 1.958273852997573e-05, "loss": 1.7092, "step": 3645 }, { "epoch": 11.954098360655738, "grad_norm": 6.25961971282959, "learning_rate": 1.9582434933695436e-05, "loss": 1.4727, "step": 3646 }, { "epoch": 11.957377049180328, "grad_norm": 6.989557266235352, "learning_rate": 1.9582131229363225e-05, "loss": 1.5396, "step": 3647 }, { "epoch": 11.960655737704919, "grad_norm": 5.0067596435546875, "learning_rate": 1.9581827416982522e-05, "loss": 1.6318, "step": 3648 }, { "epoch": 11.963934426229509, "grad_norm": 5.734844207763672, "learning_rate": 1.958152349655675e-05, "loss": 1.4285, "step": 3649 }, { "epoch": 11.967213114754099, "grad_norm": 6.971939563751221, "learning_rate": 1.958121946808934e-05, "loss": 1.6899, "step": 3650 }, { "epoch": 11.970491803278689, "grad_norm": 5.182717800140381, "learning_rate": 1.9580915331583717e-05, "loss": 1.5378, "step": 3651 }, { "epoch": 11.973770491803279, "grad_norm": 6.082070350646973, "learning_rate": 1.9580611087043315e-05, "loss": 1.4697, "step": 3652 }, { "epoch": 11.97704918032787, "grad_norm": 5.294895172119141, "learning_rate": 1.958030673447156e-05, "loss": 1.5898, "step": 3653 }, { "epoch": 11.98032786885246, "grad_norm": 5.671551704406738, "learning_rate": 1.9580002273871886e-05, "loss": 1.4883, "step": 3654 }, { "epoch": 11.98360655737705, "grad_norm": 5.984979152679443, "learning_rate": 1.9579697705247726e-05, "loss": 1.6145, "step": 3655 }, { "epoch": 11.98688524590164, "grad_norm": 7.101137638092041, "learning_rate": 1.9579393028602512e-05, "loss": 1.5994, "step": 3656 }, { "epoch": 11.99016393442623, "grad_norm": 7.2763776779174805, "learning_rate": 1.9579088243939686e-05, "loss": 1.6262, "step": 3657 }, { "epoch": 11.99344262295082, "grad_norm": 6.915921211242676, "learning_rate": 1.9578783351262676e-05, "loss": 1.4026, "step": 3658 }, { "epoch": 11.99672131147541, "grad_norm": 5.681069374084473, "learning_rate": 1.9578478350574925e-05, "loss": 1.7363, "step": 3659 }, { "epoch": 12.0, "grad_norm": 6.189259052276611, "learning_rate": 1.957817324187987e-05, "loss": 1.5251, "step": 3660 }, { "epoch": 12.00327868852459, "grad_norm": 6.186582088470459, "learning_rate": 1.9577868025180958e-05, "loss": 1.3462, "step": 3661 }, { "epoch": 12.00655737704918, "grad_norm": 6.408006191253662, "learning_rate": 1.9577562700481624e-05, "loss": 1.5791, "step": 3662 }, { "epoch": 12.00983606557377, "grad_norm": 6.104432106018066, "learning_rate": 1.9577257267785308e-05, "loss": 1.5562, "step": 3663 }, { "epoch": 12.01311475409836, "grad_norm": 5.1142659187316895, "learning_rate": 1.9576951727095462e-05, "loss": 1.364, "step": 3664 }, { "epoch": 12.01639344262295, "grad_norm": 9.300674438476562, "learning_rate": 1.9576646078415526e-05, "loss": 1.4186, "step": 3665 }, { "epoch": 12.01967213114754, "grad_norm": 5.546656608581543, "learning_rate": 1.957634032174895e-05, "loss": 1.5171, "step": 3666 }, { "epoch": 12.02295081967213, "grad_norm": 5.642866134643555, "learning_rate": 1.957603445709918e-05, "loss": 1.4287, "step": 3667 }, { "epoch": 12.026229508196721, "grad_norm": 7.844934463500977, "learning_rate": 1.9575728484469665e-05, "loss": 1.4702, "step": 3668 }, { "epoch": 12.029508196721311, "grad_norm": 6.582922458648682, "learning_rate": 1.957542240386385e-05, "loss": 1.3604, "step": 3669 }, { "epoch": 12.032786885245901, "grad_norm": 5.888630390167236, "learning_rate": 1.95751162152852e-05, "loss": 1.4016, "step": 3670 }, { "epoch": 12.036065573770491, "grad_norm": 6.643465042114258, "learning_rate": 1.9574809918737155e-05, "loss": 1.4368, "step": 3671 }, { "epoch": 12.039344262295081, "grad_norm": 6.855856418609619, "learning_rate": 1.957450351422317e-05, "loss": 1.2505, "step": 3672 }, { "epoch": 12.042622950819672, "grad_norm": 5.582939147949219, "learning_rate": 1.9574197001746705e-05, "loss": 1.6885, "step": 3673 }, { "epoch": 12.045901639344262, "grad_norm": 5.994034290313721, "learning_rate": 1.9573890381311216e-05, "loss": 1.4902, "step": 3674 }, { "epoch": 12.049180327868852, "grad_norm": 5.953763008117676, "learning_rate": 1.9573583652920157e-05, "loss": 1.4465, "step": 3675 }, { "epoch": 12.052459016393442, "grad_norm": 5.871893405914307, "learning_rate": 1.957327681657699e-05, "loss": 1.5383, "step": 3676 }, { "epoch": 12.055737704918032, "grad_norm": 5.442248344421387, "learning_rate": 1.9572969872285174e-05, "loss": 1.3313, "step": 3677 }, { "epoch": 12.059016393442622, "grad_norm": 6.844925880432129, "learning_rate": 1.9572662820048164e-05, "loss": 1.5088, "step": 3678 }, { "epoch": 12.062295081967212, "grad_norm": 6.324292182922363, "learning_rate": 1.957235565986943e-05, "loss": 1.5259, "step": 3679 }, { "epoch": 12.065573770491802, "grad_norm": 6.311607360839844, "learning_rate": 1.9572048391752436e-05, "loss": 1.2993, "step": 3680 }, { "epoch": 12.068852459016393, "grad_norm": 5.705207347869873, "learning_rate": 1.957174101570064e-05, "loss": 1.5098, "step": 3681 }, { "epoch": 12.072131147540984, "grad_norm": 5.2382941246032715, "learning_rate": 1.9571433531717513e-05, "loss": 1.3535, "step": 3682 }, { "epoch": 12.075409836065575, "grad_norm": 6.5443525314331055, "learning_rate": 1.9571125939806523e-05, "loss": 1.3428, "step": 3683 }, { "epoch": 12.078688524590165, "grad_norm": 5.270371437072754, "learning_rate": 1.9570818239971134e-05, "loss": 1.2849, "step": 3684 }, { "epoch": 12.081967213114755, "grad_norm": 6.505354881286621, "learning_rate": 1.957051043221482e-05, "loss": 1.1669, "step": 3685 }, { "epoch": 12.085245901639345, "grad_norm": 6.048213005065918, "learning_rate": 1.957020251654105e-05, "loss": 1.4189, "step": 3686 }, { "epoch": 12.088524590163935, "grad_norm": 5.430264949798584, "learning_rate": 1.9569894492953298e-05, "loss": 1.4165, "step": 3687 }, { "epoch": 12.091803278688525, "grad_norm": 8.069354057312012, "learning_rate": 1.9569586361455032e-05, "loss": 1.7339, "step": 3688 }, { "epoch": 12.095081967213115, "grad_norm": 5.424574375152588, "learning_rate": 1.9569278122049732e-05, "loss": 1.3555, "step": 3689 }, { "epoch": 12.098360655737705, "grad_norm": 5.9963226318359375, "learning_rate": 1.9568969774740868e-05, "loss": 1.2896, "step": 3690 }, { "epoch": 12.101639344262296, "grad_norm": 4.872133255004883, "learning_rate": 1.9568661319531922e-05, "loss": 1.3743, "step": 3691 }, { "epoch": 12.104918032786886, "grad_norm": 7.206067085266113, "learning_rate": 1.9568352756426374e-05, "loss": 1.394, "step": 3692 }, { "epoch": 12.108196721311476, "grad_norm": 5.175285816192627, "learning_rate": 1.95680440854277e-05, "loss": 1.636, "step": 3693 }, { "epoch": 12.111475409836066, "grad_norm": 5.783645153045654, "learning_rate": 1.956773530653938e-05, "loss": 1.6433, "step": 3694 }, { "epoch": 12.114754098360656, "grad_norm": 5.895451068878174, "learning_rate": 1.9567426419764893e-05, "loss": 1.3351, "step": 3695 }, { "epoch": 12.118032786885246, "grad_norm": 5.926326274871826, "learning_rate": 1.9567117425107728e-05, "loss": 1.4705, "step": 3696 }, { "epoch": 12.121311475409836, "grad_norm": 6.350883483886719, "learning_rate": 1.9566808322571365e-05, "loss": 1.5103, "step": 3697 }, { "epoch": 12.124590163934426, "grad_norm": 7.408817291259766, "learning_rate": 1.9566499112159292e-05, "loss": 1.3936, "step": 3698 }, { "epoch": 12.127868852459017, "grad_norm": 4.85091495513916, "learning_rate": 1.9566189793874998e-05, "loss": 1.6445, "step": 3699 }, { "epoch": 12.131147540983607, "grad_norm": 5.003332614898682, "learning_rate": 1.9565880367721963e-05, "loss": 1.3169, "step": 3700 }, { "epoch": 12.134426229508197, "grad_norm": 5.20874547958374, "learning_rate": 1.9565570833703684e-05, "loss": 1.3118, "step": 3701 }, { "epoch": 12.137704918032787, "grad_norm": 4.973940849304199, "learning_rate": 1.956526119182365e-05, "loss": 1.4546, "step": 3702 }, { "epoch": 12.140983606557377, "grad_norm": 6.540236949920654, "learning_rate": 1.9564951442085346e-05, "loss": 1.6248, "step": 3703 }, { "epoch": 12.144262295081967, "grad_norm": 5.3863749504089355, "learning_rate": 1.956464158449227e-05, "loss": 1.4121, "step": 3704 }, { "epoch": 12.147540983606557, "grad_norm": 6.83350133895874, "learning_rate": 1.9564331619047917e-05, "loss": 1.5889, "step": 3705 }, { "epoch": 12.150819672131147, "grad_norm": 5.252078533172607, "learning_rate": 1.956402154575578e-05, "loss": 1.436, "step": 3706 }, { "epoch": 12.154098360655738, "grad_norm": 5.123688697814941, "learning_rate": 1.9563711364619356e-05, "loss": 1.5244, "step": 3707 }, { "epoch": 12.157377049180328, "grad_norm": 5.130728244781494, "learning_rate": 1.9563401075642142e-05, "loss": 1.5947, "step": 3708 }, { "epoch": 12.160655737704918, "grad_norm": 6.16502046585083, "learning_rate": 1.956309067882764e-05, "loss": 1.5378, "step": 3709 }, { "epoch": 12.163934426229508, "grad_norm": 5.135807037353516, "learning_rate": 1.9562780174179346e-05, "loss": 1.5986, "step": 3710 }, { "epoch": 12.167213114754098, "grad_norm": 5.818803787231445, "learning_rate": 1.9562469561700757e-05, "loss": 1.5715, "step": 3711 }, { "epoch": 12.170491803278688, "grad_norm": 12.00204086303711, "learning_rate": 1.956215884139539e-05, "loss": 1.5105, "step": 3712 }, { "epoch": 12.173770491803278, "grad_norm": 6.860401630401611, "learning_rate": 1.9561848013266736e-05, "loss": 1.4407, "step": 3713 }, { "epoch": 12.177049180327868, "grad_norm": 5.816041946411133, "learning_rate": 1.9561537077318306e-05, "loss": 1.3582, "step": 3714 }, { "epoch": 12.180327868852459, "grad_norm": 7.817945957183838, "learning_rate": 1.95612260335536e-05, "loss": 1.1558, "step": 3715 }, { "epoch": 12.183606557377049, "grad_norm": 7.4950103759765625, "learning_rate": 1.9560914881976135e-05, "loss": 1.4795, "step": 3716 }, { "epoch": 12.186885245901639, "grad_norm": 5.352252006530762, "learning_rate": 1.9560603622589408e-05, "loss": 1.5007, "step": 3717 }, { "epoch": 12.190163934426229, "grad_norm": 5.965508937835693, "learning_rate": 1.956029225539694e-05, "loss": 1.5437, "step": 3718 }, { "epoch": 12.193442622950819, "grad_norm": 7.657482624053955, "learning_rate": 1.955998078040223e-05, "loss": 1.311, "step": 3719 }, { "epoch": 12.19672131147541, "grad_norm": 5.991833209991455, "learning_rate": 1.9559669197608802e-05, "loss": 1.5081, "step": 3720 }, { "epoch": 12.2, "grad_norm": 5.819677829742432, "learning_rate": 1.9559357507020163e-05, "loss": 1.502, "step": 3721 }, { "epoch": 12.20327868852459, "grad_norm": 5.582642555236816, "learning_rate": 1.955904570863983e-05, "loss": 1.5193, "step": 3722 }, { "epoch": 12.20655737704918, "grad_norm": 17.62660026550293, "learning_rate": 1.9558733802471313e-05, "loss": 1.2885, "step": 3723 }, { "epoch": 12.20983606557377, "grad_norm": 6.190887928009033, "learning_rate": 1.955842178851814e-05, "loss": 1.5022, "step": 3724 }, { "epoch": 12.21311475409836, "grad_norm": 6.846806049346924, "learning_rate": 1.9558109666783817e-05, "loss": 1.5095, "step": 3725 }, { "epoch": 12.216393442622952, "grad_norm": 5.244011402130127, "learning_rate": 1.955779743727187e-05, "loss": 1.5032, "step": 3726 }, { "epoch": 12.219672131147542, "grad_norm": 6.151026725769043, "learning_rate": 1.9557485099985825e-05, "loss": 1.5576, "step": 3727 }, { "epoch": 12.222950819672132, "grad_norm": 5.058536052703857, "learning_rate": 1.95571726549292e-05, "loss": 1.376, "step": 3728 }, { "epoch": 12.226229508196722, "grad_norm": 6.2970662117004395, "learning_rate": 1.955686010210551e-05, "loss": 1.4438, "step": 3729 }, { "epoch": 12.229508196721312, "grad_norm": 6.671950340270996, "learning_rate": 1.9556547441518285e-05, "loss": 1.563, "step": 3730 }, { "epoch": 12.232786885245902, "grad_norm": 4.9484076499938965, "learning_rate": 1.9556234673171053e-05, "loss": 1.3557, "step": 3731 }, { "epoch": 12.236065573770492, "grad_norm": 6.974908351898193, "learning_rate": 1.955592179706734e-05, "loss": 1.6025, "step": 3732 }, { "epoch": 12.239344262295083, "grad_norm": 6.439291477203369, "learning_rate": 1.9555608813210672e-05, "loss": 1.3228, "step": 3733 }, { "epoch": 12.242622950819673, "grad_norm": 6.943915367126465, "learning_rate": 1.955529572160458e-05, "loss": 1.5269, "step": 3734 }, { "epoch": 12.245901639344263, "grad_norm": 4.715085029602051, "learning_rate": 1.9554982522252596e-05, "loss": 1.3557, "step": 3735 }, { "epoch": 12.249180327868853, "grad_norm": 6.7373480796813965, "learning_rate": 1.9554669215158247e-05, "loss": 1.3513, "step": 3736 }, { "epoch": 12.252459016393443, "grad_norm": 6.246953964233398, "learning_rate": 1.9554355800325068e-05, "loss": 1.5229, "step": 3737 }, { "epoch": 12.255737704918033, "grad_norm": 6.113914966583252, "learning_rate": 1.9554042277756592e-05, "loss": 1.5503, "step": 3738 }, { "epoch": 12.259016393442623, "grad_norm": 6.688385486602783, "learning_rate": 1.9553728647456358e-05, "loss": 1.676, "step": 3739 }, { "epoch": 12.262295081967213, "grad_norm": 6.83485221862793, "learning_rate": 1.9553414909427898e-05, "loss": 1.4607, "step": 3740 }, { "epoch": 12.265573770491804, "grad_norm": 5.411276817321777, "learning_rate": 1.9553101063674753e-05, "loss": 1.4041, "step": 3741 }, { "epoch": 12.268852459016394, "grad_norm": 4.63851261138916, "learning_rate": 1.955278711020046e-05, "loss": 1.3577, "step": 3742 }, { "epoch": 12.272131147540984, "grad_norm": 6.159154891967773, "learning_rate": 1.9552473049008563e-05, "loss": 1.4175, "step": 3743 }, { "epoch": 12.275409836065574, "grad_norm": 6.521935939788818, "learning_rate": 1.9552158880102595e-05, "loss": 1.4924, "step": 3744 }, { "epoch": 12.278688524590164, "grad_norm": 6.927669048309326, "learning_rate": 1.955184460348611e-05, "loss": 1.7236, "step": 3745 }, { "epoch": 12.281967213114754, "grad_norm": 6.902406692504883, "learning_rate": 1.9551530219162643e-05, "loss": 1.4827, "step": 3746 }, { "epoch": 12.285245901639344, "grad_norm": 6.7692131996154785, "learning_rate": 1.9551215727135743e-05, "loss": 1.3755, "step": 3747 }, { "epoch": 12.288524590163934, "grad_norm": 6.200333118438721, "learning_rate": 1.9550901127408953e-05, "loss": 1.3909, "step": 3748 }, { "epoch": 12.291803278688525, "grad_norm": 5.478999614715576, "learning_rate": 1.955058641998582e-05, "loss": 1.6948, "step": 3749 }, { "epoch": 12.295081967213115, "grad_norm": 6.1638617515563965, "learning_rate": 1.95502716048699e-05, "loss": 1.4221, "step": 3750 }, { "epoch": 12.298360655737705, "grad_norm": 6.534964561462402, "learning_rate": 1.9549956682064733e-05, "loss": 1.4152, "step": 3751 }, { "epoch": 12.301639344262295, "grad_norm": 5.8858513832092285, "learning_rate": 1.954964165157388e-05, "loss": 1.5425, "step": 3752 }, { "epoch": 12.304918032786885, "grad_norm": 6.790421962738037, "learning_rate": 1.9549326513400883e-05, "loss": 1.5254, "step": 3753 }, { "epoch": 12.308196721311475, "grad_norm": 7.0891008377075195, "learning_rate": 1.9549011267549304e-05, "loss": 1.6415, "step": 3754 }, { "epoch": 12.311475409836065, "grad_norm": 5.542138576507568, "learning_rate": 1.9548695914022694e-05, "loss": 1.48, "step": 3755 }, { "epoch": 12.314754098360655, "grad_norm": 5.938378810882568, "learning_rate": 1.954838045282461e-05, "loss": 1.5797, "step": 3756 }, { "epoch": 12.318032786885245, "grad_norm": 5.282193183898926, "learning_rate": 1.9548064883958608e-05, "loss": 1.2922, "step": 3757 }, { "epoch": 12.321311475409836, "grad_norm": 6.171430587768555, "learning_rate": 1.9547749207428245e-05, "loss": 1.5232, "step": 3758 }, { "epoch": 12.324590163934426, "grad_norm": 5.529318332672119, "learning_rate": 1.9547433423237087e-05, "loss": 1.5061, "step": 3759 }, { "epoch": 12.327868852459016, "grad_norm": 7.535371780395508, "learning_rate": 1.9547117531388685e-05, "loss": 1.4595, "step": 3760 }, { "epoch": 12.331147540983606, "grad_norm": 11.10895824432373, "learning_rate": 1.954680153188661e-05, "loss": 1.6826, "step": 3761 }, { "epoch": 12.334426229508196, "grad_norm": 11.023377418518066, "learning_rate": 1.954648542473442e-05, "loss": 1.5149, "step": 3762 }, { "epoch": 12.337704918032786, "grad_norm": 9.97620677947998, "learning_rate": 1.954616920993568e-05, "loss": 1.3345, "step": 3763 }, { "epoch": 12.340983606557376, "grad_norm": 6.3620991706848145, "learning_rate": 1.954585288749396e-05, "loss": 1.3591, "step": 3764 }, { "epoch": 12.344262295081966, "grad_norm": 5.115563869476318, "learning_rate": 1.954553645741282e-05, "loss": 1.6494, "step": 3765 }, { "epoch": 12.347540983606557, "grad_norm": 5.440603256225586, "learning_rate": 1.9545219919695834e-05, "loss": 1.5557, "step": 3766 }, { "epoch": 12.350819672131147, "grad_norm": 4.905881404876709, "learning_rate": 1.9544903274346567e-05, "loss": 1.5095, "step": 3767 }, { "epoch": 12.354098360655737, "grad_norm": 5.389461040496826, "learning_rate": 1.9544586521368594e-05, "loss": 1.4886, "step": 3768 }, { "epoch": 12.357377049180329, "grad_norm": 6.627379894256592, "learning_rate": 1.9544269660765483e-05, "loss": 1.4509, "step": 3769 }, { "epoch": 12.360655737704919, "grad_norm": 6.43596076965332, "learning_rate": 1.954395269254081e-05, "loss": 1.3442, "step": 3770 }, { "epoch": 12.363934426229509, "grad_norm": 5.080197334289551, "learning_rate": 1.9543635616698142e-05, "loss": 1.4797, "step": 3771 }, { "epoch": 12.3672131147541, "grad_norm": 6.253623008728027, "learning_rate": 1.954331843324106e-05, "loss": 1.3647, "step": 3772 }, { "epoch": 12.37049180327869, "grad_norm": 5.462019920349121, "learning_rate": 1.9543001142173144e-05, "loss": 1.3455, "step": 3773 }, { "epoch": 12.37377049180328, "grad_norm": 9.444928169250488, "learning_rate": 1.9542683743497964e-05, "loss": 1.4148, "step": 3774 }, { "epoch": 12.37704918032787, "grad_norm": 6.486621379852295, "learning_rate": 1.9542366237219107e-05, "loss": 1.3606, "step": 3775 }, { "epoch": 12.38032786885246, "grad_norm": 5.169352054595947, "learning_rate": 1.954204862334015e-05, "loss": 1.7539, "step": 3776 }, { "epoch": 12.38360655737705, "grad_norm": 5.1156134605407715, "learning_rate": 1.9541730901864668e-05, "loss": 1.4185, "step": 3777 }, { "epoch": 12.38688524590164, "grad_norm": 5.657800674438477, "learning_rate": 1.9541413072796252e-05, "loss": 1.5381, "step": 3778 }, { "epoch": 12.39016393442623, "grad_norm": 6.073448657989502, "learning_rate": 1.9541095136138484e-05, "loss": 1.5056, "step": 3779 }, { "epoch": 12.39344262295082, "grad_norm": 8.523414611816406, "learning_rate": 1.9540777091894948e-05, "loss": 1.3413, "step": 3780 }, { "epoch": 12.39672131147541, "grad_norm": 7.417452812194824, "learning_rate": 1.9540458940069226e-05, "loss": 1.1824, "step": 3781 }, { "epoch": 12.4, "grad_norm": 6.453550815582275, "learning_rate": 1.9540140680664915e-05, "loss": 1.5411, "step": 3782 }, { "epoch": 12.40327868852459, "grad_norm": 6.0449910163879395, "learning_rate": 1.9539822313685597e-05, "loss": 1.334, "step": 3783 }, { "epoch": 12.40655737704918, "grad_norm": 6.123338222503662, "learning_rate": 1.9539503839134863e-05, "loss": 1.6699, "step": 3784 }, { "epoch": 12.40983606557377, "grad_norm": 6.104992866516113, "learning_rate": 1.9539185257016305e-05, "loss": 1.344, "step": 3785 }, { "epoch": 12.41311475409836, "grad_norm": 6.029599189758301, "learning_rate": 1.9538866567333514e-05, "loss": 1.5107, "step": 3786 }, { "epoch": 12.416393442622951, "grad_norm": 7.618459224700928, "learning_rate": 1.9538547770090085e-05, "loss": 1.1882, "step": 3787 }, { "epoch": 12.419672131147541, "grad_norm": 9.53576946258545, "learning_rate": 1.9538228865289613e-05, "loss": 1.4939, "step": 3788 }, { "epoch": 12.422950819672131, "grad_norm": 6.152660846710205, "learning_rate": 1.9537909852935692e-05, "loss": 1.5281, "step": 3789 }, { "epoch": 12.426229508196721, "grad_norm": 6.316854953765869, "learning_rate": 1.9537590733031925e-05, "loss": 1.2798, "step": 3790 }, { "epoch": 12.429508196721311, "grad_norm": 5.722578525543213, "learning_rate": 1.9537271505581902e-05, "loss": 1.6055, "step": 3791 }, { "epoch": 12.432786885245902, "grad_norm": 7.517200469970703, "learning_rate": 1.9536952170589225e-05, "loss": 1.4927, "step": 3792 }, { "epoch": 12.436065573770492, "grad_norm": 5.934058666229248, "learning_rate": 1.95366327280575e-05, "loss": 1.3843, "step": 3793 }, { "epoch": 12.439344262295082, "grad_norm": 6.266165733337402, "learning_rate": 1.9536313177990323e-05, "loss": 1.3394, "step": 3794 }, { "epoch": 12.442622950819672, "grad_norm": 4.870372772216797, "learning_rate": 1.9535993520391298e-05, "loss": 1.4497, "step": 3795 }, { "epoch": 12.445901639344262, "grad_norm": 6.82163667678833, "learning_rate": 1.9535673755264035e-05, "loss": 1.3157, "step": 3796 }, { "epoch": 12.449180327868852, "grad_norm": 5.4068169593811035, "learning_rate": 1.953535388261213e-05, "loss": 1.426, "step": 3797 }, { "epoch": 12.452459016393442, "grad_norm": 5.412114143371582, "learning_rate": 1.95350339024392e-05, "loss": 1.5444, "step": 3798 }, { "epoch": 12.455737704918032, "grad_norm": 6.154022216796875, "learning_rate": 1.9534713814748852e-05, "loss": 1.5701, "step": 3799 }, { "epoch": 12.459016393442623, "grad_norm": 5.900784969329834, "learning_rate": 1.9534393619544687e-05, "loss": 1.6299, "step": 3800 }, { "epoch": 12.462295081967213, "grad_norm": 4.779583930969238, "learning_rate": 1.9534073316830322e-05, "loss": 1.4993, "step": 3801 }, { "epoch": 12.465573770491803, "grad_norm": 5.689777851104736, "learning_rate": 1.953375290660937e-05, "loss": 1.5173, "step": 3802 }, { "epoch": 12.468852459016393, "grad_norm": 11.111231803894043, "learning_rate": 1.9533432388885436e-05, "loss": 1.4275, "step": 3803 }, { "epoch": 12.472131147540983, "grad_norm": 6.860554218292236, "learning_rate": 1.9533111763662145e-05, "loss": 1.6799, "step": 3804 }, { "epoch": 12.475409836065573, "grad_norm": 6.425690174102783, "learning_rate": 1.9532791030943102e-05, "loss": 1.6453, "step": 3805 }, { "epoch": 12.478688524590163, "grad_norm": 5.433074951171875, "learning_rate": 1.9532470190731932e-05, "loss": 1.291, "step": 3806 }, { "epoch": 12.481967213114753, "grad_norm": 5.555324077606201, "learning_rate": 1.9532149243032248e-05, "loss": 1.5059, "step": 3807 }, { "epoch": 12.485245901639344, "grad_norm": 4.984837532043457, "learning_rate": 1.9531828187847674e-05, "loss": 1.349, "step": 3808 }, { "epoch": 12.488524590163934, "grad_norm": 5.755589962005615, "learning_rate": 1.953150702518182e-05, "loss": 1.4131, "step": 3809 }, { "epoch": 12.491803278688524, "grad_norm": 5.070591449737549, "learning_rate": 1.9531185755038318e-05, "loss": 1.4663, "step": 3810 }, { "epoch": 12.495081967213114, "grad_norm": 6.462916374206543, "learning_rate": 1.9530864377420786e-05, "loss": 1.4211, "step": 3811 }, { "epoch": 12.498360655737706, "grad_norm": 5.814509391784668, "learning_rate": 1.953054289233285e-05, "loss": 1.4434, "step": 3812 }, { "epoch": 12.501639344262294, "grad_norm": 6.304711818695068, "learning_rate": 1.953022129977813e-05, "loss": 1.5732, "step": 3813 }, { "epoch": 12.504918032786886, "grad_norm": 7.845754623413086, "learning_rate": 1.9529899599760257e-05, "loss": 1.4646, "step": 3814 }, { "epoch": 12.508196721311476, "grad_norm": 6.5946526527404785, "learning_rate": 1.9529577792282863e-05, "loss": 1.4513, "step": 3815 }, { "epoch": 12.511475409836066, "grad_norm": 6.108829975128174, "learning_rate": 1.9529255877349564e-05, "loss": 1.6846, "step": 3816 }, { "epoch": 12.514754098360656, "grad_norm": 6.623331069946289, "learning_rate": 1.9528933854963997e-05, "loss": 1.4099, "step": 3817 }, { "epoch": 12.518032786885247, "grad_norm": 6.288127422332764, "learning_rate": 1.9528611725129797e-05, "loss": 1.5596, "step": 3818 }, { "epoch": 12.521311475409837, "grad_norm": 5.9356279373168945, "learning_rate": 1.9528289487850592e-05, "loss": 1.3196, "step": 3819 }, { "epoch": 12.524590163934427, "grad_norm": 6.932858467102051, "learning_rate": 1.9527967143130014e-05, "loss": 1.4209, "step": 3820 }, { "epoch": 12.527868852459017, "grad_norm": 6.77913761138916, "learning_rate": 1.9527644690971704e-05, "loss": 1.4229, "step": 3821 }, { "epoch": 12.531147540983607, "grad_norm": 10.109457969665527, "learning_rate": 1.9527322131379288e-05, "loss": 1.5779, "step": 3822 }, { "epoch": 12.534426229508197, "grad_norm": 8.099215507507324, "learning_rate": 1.9526999464356413e-05, "loss": 1.4875, "step": 3823 }, { "epoch": 12.537704918032787, "grad_norm": 6.893148422241211, "learning_rate": 1.9526676689906714e-05, "loss": 1.3835, "step": 3824 }, { "epoch": 12.540983606557377, "grad_norm": 6.055660724639893, "learning_rate": 1.9526353808033827e-05, "loss": 1.2566, "step": 3825 }, { "epoch": 12.544262295081968, "grad_norm": 5.345803260803223, "learning_rate": 1.9526030818741397e-05, "loss": 1.6821, "step": 3826 }, { "epoch": 12.547540983606558, "grad_norm": 6.459309101104736, "learning_rate": 1.9525707722033064e-05, "loss": 1.3601, "step": 3827 }, { "epoch": 12.550819672131148, "grad_norm": 7.537053108215332, "learning_rate": 1.9525384517912475e-05, "loss": 1.5557, "step": 3828 }, { "epoch": 12.554098360655738, "grad_norm": 6.240025043487549, "learning_rate": 1.952506120638327e-05, "loss": 1.3562, "step": 3829 }, { "epoch": 12.557377049180328, "grad_norm": 6.299570560455322, "learning_rate": 1.9524737787449096e-05, "loss": 1.3118, "step": 3830 }, { "epoch": 12.560655737704918, "grad_norm": 5.119077205657959, "learning_rate": 1.9524414261113604e-05, "loss": 1.5388, "step": 3831 }, { "epoch": 12.563934426229508, "grad_norm": 4.882315635681152, "learning_rate": 1.9524090627380432e-05, "loss": 1.3942, "step": 3832 }, { "epoch": 12.567213114754098, "grad_norm": 4.934244155883789, "learning_rate": 1.952376688625324e-05, "loss": 1.4985, "step": 3833 }, { "epoch": 12.570491803278689, "grad_norm": 6.116497039794922, "learning_rate": 1.952344303773567e-05, "loss": 1.593, "step": 3834 }, { "epoch": 12.573770491803279, "grad_norm": 4.805303573608398, "learning_rate": 1.952311908183138e-05, "loss": 1.4316, "step": 3835 }, { "epoch": 12.577049180327869, "grad_norm": 7.686423301696777, "learning_rate": 1.9522795018544026e-05, "loss": 1.3311, "step": 3836 }, { "epoch": 12.580327868852459, "grad_norm": 5.988722324371338, "learning_rate": 1.9522470847877252e-05, "loss": 1.4395, "step": 3837 }, { "epoch": 12.583606557377049, "grad_norm": 11.757791519165039, "learning_rate": 1.9522146569834717e-05, "loss": 1.6216, "step": 3838 }, { "epoch": 12.58688524590164, "grad_norm": 6.014592170715332, "learning_rate": 1.952182218442008e-05, "loss": 1.5405, "step": 3839 }, { "epoch": 12.59016393442623, "grad_norm": 7.06574821472168, "learning_rate": 1.9521497691637e-05, "loss": 1.3086, "step": 3840 }, { "epoch": 12.59344262295082, "grad_norm": 7.1215996742248535, "learning_rate": 1.952117309148913e-05, "loss": 1.4095, "step": 3841 }, { "epoch": 12.59672131147541, "grad_norm": 6.483234882354736, "learning_rate": 1.9520848383980136e-05, "loss": 1.4836, "step": 3842 }, { "epoch": 12.6, "grad_norm": 5.866533279418945, "learning_rate": 1.952052356911368e-05, "loss": 1.7341, "step": 3843 }, { "epoch": 12.60327868852459, "grad_norm": 5.8333282470703125, "learning_rate": 1.9520198646893415e-05, "loss": 1.4973, "step": 3844 }, { "epoch": 12.60655737704918, "grad_norm": 6.350971221923828, "learning_rate": 1.9519873617323015e-05, "loss": 1.4463, "step": 3845 }, { "epoch": 12.60983606557377, "grad_norm": 5.11051607131958, "learning_rate": 1.9519548480406144e-05, "loss": 1.4485, "step": 3846 }, { "epoch": 12.61311475409836, "grad_norm": 5.377652168273926, "learning_rate": 1.9519223236146464e-05, "loss": 1.4551, "step": 3847 }, { "epoch": 12.61639344262295, "grad_norm": 6.0491228103637695, "learning_rate": 1.9518897884547646e-05, "loss": 1.5203, "step": 3848 }, { "epoch": 12.61967213114754, "grad_norm": 7.679712772369385, "learning_rate": 1.9518572425613354e-05, "loss": 1.6729, "step": 3849 }, { "epoch": 12.62295081967213, "grad_norm": 6.942413806915283, "learning_rate": 1.9518246859347263e-05, "loss": 1.4517, "step": 3850 }, { "epoch": 12.62622950819672, "grad_norm": 4.554645538330078, "learning_rate": 1.9517921185753044e-05, "loss": 1.6091, "step": 3851 }, { "epoch": 12.62950819672131, "grad_norm": 7.7022318840026855, "learning_rate": 1.9517595404834363e-05, "loss": 1.2402, "step": 3852 }, { "epoch": 12.6327868852459, "grad_norm": 6.483682632446289, "learning_rate": 1.9517269516594904e-05, "loss": 1.4021, "step": 3853 }, { "epoch": 12.636065573770491, "grad_norm": 5.73321008682251, "learning_rate": 1.9516943521038332e-05, "loss": 1.5088, "step": 3854 }, { "epoch": 12.639344262295083, "grad_norm": 5.90377140045166, "learning_rate": 1.9516617418168327e-05, "loss": 1.49, "step": 3855 }, { "epoch": 12.642622950819671, "grad_norm": 6.592618942260742, "learning_rate": 1.9516291207988564e-05, "loss": 1.4443, "step": 3856 }, { "epoch": 12.645901639344263, "grad_norm": 6.4419660568237305, "learning_rate": 1.9515964890502728e-05, "loss": 1.4834, "step": 3857 }, { "epoch": 12.649180327868853, "grad_norm": 6.184971332550049, "learning_rate": 1.9515638465714487e-05, "loss": 1.4229, "step": 3858 }, { "epoch": 12.652459016393443, "grad_norm": 5.396666049957275, "learning_rate": 1.9515311933627534e-05, "loss": 1.4298, "step": 3859 }, { "epoch": 12.655737704918034, "grad_norm": 5.611474514007568, "learning_rate": 1.9514985294245545e-05, "loss": 1.4146, "step": 3860 }, { "epoch": 12.659016393442624, "grad_norm": 6.391238212585449, "learning_rate": 1.95146585475722e-05, "loss": 1.5059, "step": 3861 }, { "epoch": 12.662295081967214, "grad_norm": 6.763735771179199, "learning_rate": 1.9514331693611193e-05, "loss": 1.4425, "step": 3862 }, { "epoch": 12.665573770491804, "grad_norm": 4.398525238037109, "learning_rate": 1.9514004732366198e-05, "loss": 1.375, "step": 3863 }, { "epoch": 12.668852459016394, "grad_norm": 6.6694231033325195, "learning_rate": 1.951367766384091e-05, "loss": 1.2573, "step": 3864 }, { "epoch": 12.672131147540984, "grad_norm": 6.605361461639404, "learning_rate": 1.9513350488039017e-05, "loss": 1.5391, "step": 3865 }, { "epoch": 12.675409836065574, "grad_norm": 6.125646591186523, "learning_rate": 1.9513023204964203e-05, "loss": 1.5891, "step": 3866 }, { "epoch": 12.678688524590164, "grad_norm": 7.138208866119385, "learning_rate": 1.951269581462016e-05, "loss": 1.3562, "step": 3867 }, { "epoch": 12.681967213114755, "grad_norm": 6.807770252227783, "learning_rate": 1.951236831701058e-05, "loss": 1.2382, "step": 3868 }, { "epoch": 12.685245901639345, "grad_norm": 4.894894123077393, "learning_rate": 1.9512040712139164e-05, "loss": 1.627, "step": 3869 }, { "epoch": 12.688524590163935, "grad_norm": 5.846924304962158, "learning_rate": 1.9511713000009592e-05, "loss": 1.5247, "step": 3870 }, { "epoch": 12.691803278688525, "grad_norm": 6.794942855834961, "learning_rate": 1.9511385180625568e-05, "loss": 1.5156, "step": 3871 }, { "epoch": 12.695081967213115, "grad_norm": 6.64522647857666, "learning_rate": 1.9511057253990787e-05, "loss": 1.2815, "step": 3872 }, { "epoch": 12.698360655737705, "grad_norm": 6.22664737701416, "learning_rate": 1.9510729220108947e-05, "loss": 1.4084, "step": 3873 }, { "epoch": 12.701639344262295, "grad_norm": 6.418426036834717, "learning_rate": 1.9510401078983747e-05, "loss": 1.5498, "step": 3874 }, { "epoch": 12.704918032786885, "grad_norm": 8.265331268310547, "learning_rate": 1.9510072830618887e-05, "loss": 1.3495, "step": 3875 }, { "epoch": 12.708196721311475, "grad_norm": 5.868023872375488, "learning_rate": 1.9509744475018067e-05, "loss": 1.6133, "step": 3876 }, { "epoch": 12.711475409836066, "grad_norm": 5.478047847747803, "learning_rate": 1.950941601218499e-05, "loss": 1.3813, "step": 3877 }, { "epoch": 12.714754098360656, "grad_norm": 5.261471748352051, "learning_rate": 1.950908744212336e-05, "loss": 1.5314, "step": 3878 }, { "epoch": 12.718032786885246, "grad_norm": 6.367916107177734, "learning_rate": 1.950875876483688e-05, "loss": 1.3748, "step": 3879 }, { "epoch": 12.721311475409836, "grad_norm": 6.299368381500244, "learning_rate": 1.9508429980329262e-05, "loss": 1.4304, "step": 3880 }, { "epoch": 12.724590163934426, "grad_norm": 8.091215133666992, "learning_rate": 1.9508101088604207e-05, "loss": 1.6709, "step": 3881 }, { "epoch": 12.727868852459016, "grad_norm": 5.735444068908691, "learning_rate": 1.950777208966543e-05, "loss": 1.4089, "step": 3882 }, { "epoch": 12.731147540983606, "grad_norm": 5.757680892944336, "learning_rate": 1.9507442983516634e-05, "loss": 1.2747, "step": 3883 }, { "epoch": 12.734426229508196, "grad_norm": 6.174211025238037, "learning_rate": 1.9507113770161533e-05, "loss": 1.3721, "step": 3884 }, { "epoch": 12.737704918032787, "grad_norm": 5.966248512268066, "learning_rate": 1.950678444960384e-05, "loss": 1.3457, "step": 3885 }, { "epoch": 12.740983606557377, "grad_norm": 9.468894004821777, "learning_rate": 1.9506455021847267e-05, "loss": 1.5156, "step": 3886 }, { "epoch": 12.744262295081967, "grad_norm": 11.8883056640625, "learning_rate": 1.9506125486895532e-05, "loss": 1.4072, "step": 3887 }, { "epoch": 12.747540983606557, "grad_norm": 7.814558982849121, "learning_rate": 1.9505795844752343e-05, "loss": 1.5378, "step": 3888 }, { "epoch": 12.750819672131147, "grad_norm": 4.879797458648682, "learning_rate": 1.9505466095421428e-05, "loss": 1.335, "step": 3889 }, { "epoch": 12.754098360655737, "grad_norm": 8.17625617980957, "learning_rate": 1.9505136238906496e-05, "loss": 1.3982, "step": 3890 }, { "epoch": 12.757377049180327, "grad_norm": 5.062532901763916, "learning_rate": 1.9504806275211274e-05, "loss": 1.4663, "step": 3891 }, { "epoch": 12.760655737704917, "grad_norm": 5.59835147857666, "learning_rate": 1.9504476204339475e-05, "loss": 1.4355, "step": 3892 }, { "epoch": 12.763934426229508, "grad_norm": 7.1086273193359375, "learning_rate": 1.9504146026294824e-05, "loss": 1.5505, "step": 3893 }, { "epoch": 12.767213114754098, "grad_norm": 6.434091091156006, "learning_rate": 1.9503815741081047e-05, "loss": 1.479, "step": 3894 }, { "epoch": 12.770491803278688, "grad_norm": 6.801074981689453, "learning_rate": 1.9503485348701865e-05, "loss": 1.3879, "step": 3895 }, { "epoch": 12.773770491803278, "grad_norm": 6.266086101531982, "learning_rate": 1.9503154849161006e-05, "loss": 1.5381, "step": 3896 }, { "epoch": 12.777049180327868, "grad_norm": 7.4262213706970215, "learning_rate": 1.9502824242462193e-05, "loss": 1.4001, "step": 3897 }, { "epoch": 12.780327868852458, "grad_norm": 5.64981746673584, "learning_rate": 1.9502493528609155e-05, "loss": 1.3423, "step": 3898 }, { "epoch": 12.783606557377048, "grad_norm": 6.529388904571533, "learning_rate": 1.950216270760563e-05, "loss": 1.5129, "step": 3899 }, { "epoch": 12.78688524590164, "grad_norm": 5.306522846221924, "learning_rate": 1.9501831779455332e-05, "loss": 1.3022, "step": 3900 }, { "epoch": 12.790163934426229, "grad_norm": 5.104032039642334, "learning_rate": 1.9501500744162005e-05, "loss": 1.4552, "step": 3901 }, { "epoch": 12.79344262295082, "grad_norm": 5.357272148132324, "learning_rate": 1.9501169601729377e-05, "loss": 1.7097, "step": 3902 }, { "epoch": 12.79672131147541, "grad_norm": 7.214917182922363, "learning_rate": 1.9500838352161184e-05, "loss": 1.3904, "step": 3903 }, { "epoch": 12.8, "grad_norm": 4.885165691375732, "learning_rate": 1.950050699546116e-05, "loss": 1.573, "step": 3904 }, { "epoch": 12.80327868852459, "grad_norm": 6.700290203094482, "learning_rate": 1.950017553163304e-05, "loss": 1.1003, "step": 3905 }, { "epoch": 12.806557377049181, "grad_norm": 6.015804290771484, "learning_rate": 1.9499843960680568e-05, "loss": 1.5427, "step": 3906 }, { "epoch": 12.809836065573771, "grad_norm": 5.3053436279296875, "learning_rate": 1.9499512282607472e-05, "loss": 1.6111, "step": 3907 }, { "epoch": 12.813114754098361, "grad_norm": 4.630100250244141, "learning_rate": 1.9499180497417503e-05, "loss": 1.4092, "step": 3908 }, { "epoch": 12.816393442622951, "grad_norm": 6.458990097045898, "learning_rate": 1.9498848605114393e-05, "loss": 1.3564, "step": 3909 }, { "epoch": 12.819672131147541, "grad_norm": 6.816859722137451, "learning_rate": 1.949851660570189e-05, "loss": 1.2935, "step": 3910 }, { "epoch": 12.822950819672132, "grad_norm": 7.635761260986328, "learning_rate": 1.9498184499183738e-05, "loss": 1.4685, "step": 3911 }, { "epoch": 12.826229508196722, "grad_norm": 7.202475070953369, "learning_rate": 1.9497852285563677e-05, "loss": 1.3428, "step": 3912 }, { "epoch": 12.829508196721312, "grad_norm": 5.801948547363281, "learning_rate": 1.949751996484546e-05, "loss": 1.3635, "step": 3913 }, { "epoch": 12.832786885245902, "grad_norm": 5.2090535163879395, "learning_rate": 1.949718753703283e-05, "loss": 1.4983, "step": 3914 }, { "epoch": 12.836065573770492, "grad_norm": 5.0028395652771, "learning_rate": 1.9496855002129532e-05, "loss": 1.3962, "step": 3915 }, { "epoch": 12.839344262295082, "grad_norm": 5.639729976654053, "learning_rate": 1.9496522360139323e-05, "loss": 1.6045, "step": 3916 }, { "epoch": 12.842622950819672, "grad_norm": 6.6957621574401855, "learning_rate": 1.949618961106595e-05, "loss": 1.4834, "step": 3917 }, { "epoch": 12.845901639344262, "grad_norm": 5.272868633270264, "learning_rate": 1.949585675491316e-05, "loss": 1.4111, "step": 3918 }, { "epoch": 12.849180327868853, "grad_norm": 5.993705749511719, "learning_rate": 1.949552379168472e-05, "loss": 1.3713, "step": 3919 }, { "epoch": 12.852459016393443, "grad_norm": 5.986284255981445, "learning_rate": 1.9495190721384377e-05, "loss": 1.6538, "step": 3920 }, { "epoch": 12.855737704918033, "grad_norm": 4.743712425231934, "learning_rate": 1.9494857544015883e-05, "loss": 1.5415, "step": 3921 }, { "epoch": 12.859016393442623, "grad_norm": 6.560065746307373, "learning_rate": 1.9494524259582994e-05, "loss": 1.3894, "step": 3922 }, { "epoch": 12.862295081967213, "grad_norm": 6.788135528564453, "learning_rate": 1.9494190868089475e-05, "loss": 1.4185, "step": 3923 }, { "epoch": 12.865573770491803, "grad_norm": 5.1534552574157715, "learning_rate": 1.9493857369539084e-05, "loss": 1.4934, "step": 3924 }, { "epoch": 12.868852459016393, "grad_norm": 4.86951208114624, "learning_rate": 1.949352376393558e-05, "loss": 1.6143, "step": 3925 }, { "epoch": 12.872131147540983, "grad_norm": 6.624956130981445, "learning_rate": 1.9493190051282723e-05, "loss": 1.4717, "step": 3926 }, { "epoch": 12.875409836065574, "grad_norm": 5.204590320587158, "learning_rate": 1.9492856231584277e-05, "loss": 1.5132, "step": 3927 }, { "epoch": 12.878688524590164, "grad_norm": 5.377059459686279, "learning_rate": 1.949252230484401e-05, "loss": 1.4121, "step": 3928 }, { "epoch": 12.881967213114754, "grad_norm": 5.401641845703125, "learning_rate": 1.9492188271065685e-05, "loss": 1.3779, "step": 3929 }, { "epoch": 12.885245901639344, "grad_norm": 5.463314533233643, "learning_rate": 1.9491854130253064e-05, "loss": 1.6699, "step": 3930 }, { "epoch": 12.888524590163934, "grad_norm": 5.419313430786133, "learning_rate": 1.949151988240992e-05, "loss": 1.645, "step": 3931 }, { "epoch": 12.891803278688524, "grad_norm": 6.078241348266602, "learning_rate": 1.949118552754002e-05, "loss": 1.4583, "step": 3932 }, { "epoch": 12.895081967213114, "grad_norm": 8.189340591430664, "learning_rate": 1.949085106564714e-05, "loss": 1.4043, "step": 3933 }, { "epoch": 12.898360655737704, "grad_norm": 5.612695693969727, "learning_rate": 1.9490516496735038e-05, "loss": 1.5586, "step": 3934 }, { "epoch": 12.901639344262295, "grad_norm": 5.541013240814209, "learning_rate": 1.94901818208075e-05, "loss": 1.6298, "step": 3935 }, { "epoch": 12.904918032786885, "grad_norm": 5.998722553253174, "learning_rate": 1.948984703786829e-05, "loss": 1.6738, "step": 3936 }, { "epoch": 12.908196721311475, "grad_norm": 6.630997657775879, "learning_rate": 1.9489512147921195e-05, "loss": 1.4414, "step": 3937 }, { "epoch": 12.911475409836065, "grad_norm": 5.145431041717529, "learning_rate": 1.948917715096998e-05, "loss": 1.6943, "step": 3938 }, { "epoch": 12.914754098360655, "grad_norm": 5.390547275543213, "learning_rate": 1.9488842047018423e-05, "loss": 1.3384, "step": 3939 }, { "epoch": 12.918032786885245, "grad_norm": 5.270303726196289, "learning_rate": 1.9488506836070313e-05, "loss": 1.3723, "step": 3940 }, { "epoch": 12.921311475409835, "grad_norm": 5.653247356414795, "learning_rate": 1.948817151812942e-05, "loss": 1.2832, "step": 3941 }, { "epoch": 12.924590163934425, "grad_norm": 5.60671854019165, "learning_rate": 1.9487836093199525e-05, "loss": 1.4958, "step": 3942 }, { "epoch": 12.927868852459017, "grad_norm": 6.175465106964111, "learning_rate": 1.9487500561284417e-05, "loss": 1.4988, "step": 3943 }, { "epoch": 12.931147540983606, "grad_norm": 4.989861011505127, "learning_rate": 1.9487164922387875e-05, "loss": 1.5364, "step": 3944 }, { "epoch": 12.934426229508198, "grad_norm": 5.731817722320557, "learning_rate": 1.948682917651368e-05, "loss": 1.5176, "step": 3945 }, { "epoch": 12.937704918032788, "grad_norm": 7.3354387283325195, "learning_rate": 1.9486493323665626e-05, "loss": 1.4929, "step": 3946 }, { "epoch": 12.940983606557378, "grad_norm": 8.33067798614502, "learning_rate": 1.9486157363847494e-05, "loss": 1.4934, "step": 3947 }, { "epoch": 12.944262295081968, "grad_norm": 6.076749324798584, "learning_rate": 1.9485821297063077e-05, "loss": 1.3823, "step": 3948 }, { "epoch": 12.947540983606558, "grad_norm": 7.913854598999023, "learning_rate": 1.948548512331616e-05, "loss": 1.4371, "step": 3949 }, { "epoch": 12.950819672131148, "grad_norm": 5.398810386657715, "learning_rate": 1.948514884261054e-05, "loss": 1.5524, "step": 3950 }, { "epoch": 12.954098360655738, "grad_norm": 7.879519939422607, "learning_rate": 1.948481245495e-05, "loss": 1.4478, "step": 3951 }, { "epoch": 12.957377049180328, "grad_norm": 5.655909538269043, "learning_rate": 1.9484475960338344e-05, "loss": 1.5129, "step": 3952 }, { "epoch": 12.960655737704919, "grad_norm": 7.1394782066345215, "learning_rate": 1.9484139358779354e-05, "loss": 1.2852, "step": 3953 }, { "epoch": 12.963934426229509, "grad_norm": 6.252775192260742, "learning_rate": 1.9483802650276833e-05, "loss": 1.3474, "step": 3954 }, { "epoch": 12.967213114754099, "grad_norm": 5.884726047515869, "learning_rate": 1.9483465834834577e-05, "loss": 1.3638, "step": 3955 }, { "epoch": 12.970491803278689, "grad_norm": 6.140128135681152, "learning_rate": 1.9483128912456383e-05, "loss": 1.574, "step": 3956 }, { "epoch": 12.973770491803279, "grad_norm": 5.269857883453369, "learning_rate": 1.9482791883146053e-05, "loss": 1.3655, "step": 3957 }, { "epoch": 12.97704918032787, "grad_norm": 6.820924282073975, "learning_rate": 1.948245474690738e-05, "loss": 1.4805, "step": 3958 }, { "epoch": 12.98032786885246, "grad_norm": 9.618583679199219, "learning_rate": 1.9482117503744176e-05, "loss": 1.4092, "step": 3959 }, { "epoch": 12.98360655737705, "grad_norm": 6.830724716186523, "learning_rate": 1.9481780153660236e-05, "loss": 1.4165, "step": 3960 }, { "epoch": 12.98688524590164, "grad_norm": 7.540050983428955, "learning_rate": 1.9481442696659365e-05, "loss": 1.3789, "step": 3961 }, { "epoch": 12.99016393442623, "grad_norm": 7.585846424102783, "learning_rate": 1.948110513274537e-05, "loss": 1.2798, "step": 3962 }, { "epoch": 12.99344262295082, "grad_norm": 18.71476936340332, "learning_rate": 1.9480767461922053e-05, "loss": 1.6106, "step": 3963 }, { "epoch": 12.99672131147541, "grad_norm": 6.527870178222656, "learning_rate": 1.948042968419323e-05, "loss": 1.5896, "step": 3964 }, { "epoch": 13.0, "grad_norm": 6.399949550628662, "learning_rate": 1.9480091799562706e-05, "loss": 1.2798, "step": 3965 }, { "epoch": 13.00327868852459, "grad_norm": 5.360678672790527, "learning_rate": 1.947975380803429e-05, "loss": 1.2964, "step": 3966 }, { "epoch": 13.00655737704918, "grad_norm": 6.975161552429199, "learning_rate": 1.9479415709611792e-05, "loss": 1.1969, "step": 3967 }, { "epoch": 13.00983606557377, "grad_norm": 4.758316516876221, "learning_rate": 1.9479077504299024e-05, "loss": 1.4995, "step": 3968 }, { "epoch": 13.01311475409836, "grad_norm": 7.110759258270264, "learning_rate": 1.9478739192099802e-05, "loss": 1.4734, "step": 3969 }, { "epoch": 13.01639344262295, "grad_norm": 6.681360244750977, "learning_rate": 1.947840077301794e-05, "loss": 1.3673, "step": 3970 }, { "epoch": 13.01967213114754, "grad_norm": 5.260255813598633, "learning_rate": 1.9478062247057256e-05, "loss": 1.4248, "step": 3971 }, { "epoch": 13.02295081967213, "grad_norm": 5.364104270935059, "learning_rate": 1.9477723614221565e-05, "loss": 1.3562, "step": 3972 }, { "epoch": 13.026229508196721, "grad_norm": 6.226535797119141, "learning_rate": 1.9477384874514688e-05, "loss": 1.2455, "step": 3973 }, { "epoch": 13.029508196721311, "grad_norm": 4.768813610076904, "learning_rate": 1.947704602794044e-05, "loss": 1.4014, "step": 3974 }, { "epoch": 13.032786885245901, "grad_norm": 5.741642475128174, "learning_rate": 1.9476707074502645e-05, "loss": 1.5144, "step": 3975 }, { "epoch": 13.036065573770491, "grad_norm": 5.328936576843262, "learning_rate": 1.9476368014205123e-05, "loss": 1.3235, "step": 3976 }, { "epoch": 13.039344262295081, "grad_norm": 6.977009296417236, "learning_rate": 1.9476028847051702e-05, "loss": 1.3418, "step": 3977 }, { "epoch": 13.042622950819672, "grad_norm": 6.929871559143066, "learning_rate": 1.9475689573046195e-05, "loss": 1.4282, "step": 3978 }, { "epoch": 13.045901639344262, "grad_norm": 4.947092056274414, "learning_rate": 1.9475350192192443e-05, "loss": 1.5293, "step": 3979 }, { "epoch": 13.049180327868852, "grad_norm": 7.000669002532959, "learning_rate": 1.9475010704494266e-05, "loss": 1.3835, "step": 3980 }, { "epoch": 13.052459016393442, "grad_norm": 10.541566848754883, "learning_rate": 1.947467110995549e-05, "loss": 1.4233, "step": 3981 }, { "epoch": 13.055737704918032, "grad_norm": 6.294461727142334, "learning_rate": 1.9474331408579944e-05, "loss": 1.439, "step": 3982 }, { "epoch": 13.059016393442622, "grad_norm": 5.795291423797607, "learning_rate": 1.9473991600371463e-05, "loss": 1.4358, "step": 3983 }, { "epoch": 13.062295081967212, "grad_norm": 5.411056041717529, "learning_rate": 1.947365168533388e-05, "loss": 1.4338, "step": 3984 }, { "epoch": 13.065573770491802, "grad_norm": 5.492486953735352, "learning_rate": 1.9473311663471016e-05, "loss": 1.5541, "step": 3985 }, { "epoch": 13.068852459016393, "grad_norm": 5.370460510253906, "learning_rate": 1.9472971534786718e-05, "loss": 1.4602, "step": 3986 }, { "epoch": 13.072131147540984, "grad_norm": 6.595537185668945, "learning_rate": 1.947263129928482e-05, "loss": 1.4807, "step": 3987 }, { "epoch": 13.075409836065575, "grad_norm": 6.992790222167969, "learning_rate": 1.9472290956969146e-05, "loss": 1.3911, "step": 3988 }, { "epoch": 13.078688524590165, "grad_norm": 10.15477466583252, "learning_rate": 1.947195050784355e-05, "loss": 1.4539, "step": 3989 }, { "epoch": 13.081967213114755, "grad_norm": 7.278208255767822, "learning_rate": 1.9471609951911862e-05, "loss": 1.3561, "step": 3990 }, { "epoch": 13.085245901639345, "grad_norm": 7.522792339324951, "learning_rate": 1.9471269289177922e-05, "loss": 1.3162, "step": 3991 }, { "epoch": 13.088524590163935, "grad_norm": 4.720436096191406, "learning_rate": 1.9470928519645572e-05, "loss": 1.5205, "step": 3992 }, { "epoch": 13.091803278688525, "grad_norm": 5.9463701248168945, "learning_rate": 1.947058764331866e-05, "loss": 1.459, "step": 3993 }, { "epoch": 13.095081967213115, "grad_norm": 7.1969451904296875, "learning_rate": 1.947024666020102e-05, "loss": 1.417, "step": 3994 }, { "epoch": 13.098360655737705, "grad_norm": 6.436927318572998, "learning_rate": 1.946990557029651e-05, "loss": 1.4294, "step": 3995 }, { "epoch": 13.101639344262296, "grad_norm": 5.8259992599487305, "learning_rate": 1.9469564373608963e-05, "loss": 1.3757, "step": 3996 }, { "epoch": 13.104918032786886, "grad_norm": 6.09627103805542, "learning_rate": 1.9469223070142232e-05, "loss": 1.3125, "step": 3997 }, { "epoch": 13.108196721311476, "grad_norm": 6.572689056396484, "learning_rate": 1.9468881659900164e-05, "loss": 1.46, "step": 3998 }, { "epoch": 13.111475409836066, "grad_norm": 6.211003303527832, "learning_rate": 1.9468540142886616e-05, "loss": 1.2424, "step": 3999 }, { "epoch": 13.114754098360656, "grad_norm": 6.630084991455078, "learning_rate": 1.9468198519105428e-05, "loss": 1.3816, "step": 4000 }, { "epoch": 13.118032786885246, "grad_norm": 5.749484062194824, "learning_rate": 1.946785678856046e-05, "loss": 1.3445, "step": 4001 }, { "epoch": 13.121311475409836, "grad_norm": 5.585873126983643, "learning_rate": 1.946751495125556e-05, "loss": 1.2786, "step": 4002 }, { "epoch": 13.124590163934426, "grad_norm": 5.508633613586426, "learning_rate": 1.9467173007194588e-05, "loss": 1.4299, "step": 4003 }, { "epoch": 13.127868852459017, "grad_norm": 6.7005414962768555, "learning_rate": 1.9466830956381394e-05, "loss": 1.1958, "step": 4004 }, { "epoch": 13.131147540983607, "grad_norm": 5.992882251739502, "learning_rate": 1.9466488798819837e-05, "loss": 1.4426, "step": 4005 }, { "epoch": 13.134426229508197, "grad_norm": 5.136779308319092, "learning_rate": 1.9466146534513777e-05, "loss": 1.5466, "step": 4006 }, { "epoch": 13.137704918032787, "grad_norm": 7.005513668060303, "learning_rate": 1.9465804163467076e-05, "loss": 1.3079, "step": 4007 }, { "epoch": 13.140983606557377, "grad_norm": 6.162834644317627, "learning_rate": 1.946546168568359e-05, "loss": 1.3552, "step": 4008 }, { "epoch": 13.144262295081967, "grad_norm": 6.210702419281006, "learning_rate": 1.946511910116718e-05, "loss": 1.4131, "step": 4009 }, { "epoch": 13.147540983606557, "grad_norm": 7.126049518585205, "learning_rate": 1.946477640992171e-05, "loss": 1.4312, "step": 4010 }, { "epoch": 13.150819672131147, "grad_norm": 5.5128092765808105, "learning_rate": 1.946443361195105e-05, "loss": 1.5403, "step": 4011 }, { "epoch": 13.154098360655738, "grad_norm": 9.625110626220703, "learning_rate": 1.9464090707259055e-05, "loss": 1.2397, "step": 4012 }, { "epoch": 13.157377049180328, "grad_norm": 7.363924026489258, "learning_rate": 1.94637476958496e-05, "loss": 1.4038, "step": 4013 }, { "epoch": 13.160655737704918, "grad_norm": 7.053400039672852, "learning_rate": 1.9463404577726548e-05, "loss": 1.4587, "step": 4014 }, { "epoch": 13.163934426229508, "grad_norm": 5.706855297088623, "learning_rate": 1.9463061352893774e-05, "loss": 1.3711, "step": 4015 }, { "epoch": 13.167213114754098, "grad_norm": 5.555544853210449, "learning_rate": 1.946271802135514e-05, "loss": 1.4761, "step": 4016 }, { "epoch": 13.170491803278688, "grad_norm": 4.5607590675354, "learning_rate": 1.9462374583114524e-05, "loss": 1.4097, "step": 4017 }, { "epoch": 13.173770491803278, "grad_norm": 6.058002948760986, "learning_rate": 1.946203103817579e-05, "loss": 1.3257, "step": 4018 }, { "epoch": 13.177049180327868, "grad_norm": 6.18565034866333, "learning_rate": 1.9461687386542826e-05, "loss": 1.203, "step": 4019 }, { "epoch": 13.180327868852459, "grad_norm": 6.805873394012451, "learning_rate": 1.94613436282195e-05, "loss": 1.2814, "step": 4020 }, { "epoch": 13.183606557377049, "grad_norm": 5.638800621032715, "learning_rate": 1.9460999763209682e-05, "loss": 1.2695, "step": 4021 }, { "epoch": 13.186885245901639, "grad_norm": 5.637874603271484, "learning_rate": 1.9460655791517254e-05, "loss": 1.4465, "step": 4022 }, { "epoch": 13.190163934426229, "grad_norm": 5.582773685455322, "learning_rate": 1.94603117131461e-05, "loss": 1.3301, "step": 4023 }, { "epoch": 13.193442622950819, "grad_norm": 4.603658199310303, "learning_rate": 1.945996752810009e-05, "loss": 1.4551, "step": 4024 }, { "epoch": 13.19672131147541, "grad_norm": 6.256440162658691, "learning_rate": 1.9459623236383113e-05, "loss": 1.1626, "step": 4025 }, { "epoch": 13.2, "grad_norm": 5.6722412109375, "learning_rate": 1.9459278837999048e-05, "loss": 1.5073, "step": 4026 }, { "epoch": 13.20327868852459, "grad_norm": 4.9994215965271, "learning_rate": 1.945893433295178e-05, "loss": 1.2822, "step": 4027 }, { "epoch": 13.20655737704918, "grad_norm": 6.343899726867676, "learning_rate": 1.945858972124519e-05, "loss": 1.3601, "step": 4028 }, { "epoch": 13.20983606557377, "grad_norm": 6.290634632110596, "learning_rate": 1.9458245002883167e-05, "loss": 1.4722, "step": 4029 }, { "epoch": 13.21311475409836, "grad_norm": 7.0010480880737305, "learning_rate": 1.94579001778696e-05, "loss": 1.241, "step": 4030 }, { "epoch": 13.216393442622952, "grad_norm": 5.131061553955078, "learning_rate": 1.9457555246208373e-05, "loss": 1.4082, "step": 4031 }, { "epoch": 13.219672131147542, "grad_norm": 5.370796203613281, "learning_rate": 1.9457210207903378e-05, "loss": 1.3762, "step": 4032 }, { "epoch": 13.222950819672132, "grad_norm": 5.3790507316589355, "learning_rate": 1.9456865062958503e-05, "loss": 1.2378, "step": 4033 }, { "epoch": 13.226229508196722, "grad_norm": 6.042276859283447, "learning_rate": 1.9456519811377642e-05, "loss": 1.4158, "step": 4034 }, { "epoch": 13.229508196721312, "grad_norm": 5.614528179168701, "learning_rate": 1.945617445316469e-05, "loss": 1.3132, "step": 4035 }, { "epoch": 13.232786885245902, "grad_norm": 6.0207061767578125, "learning_rate": 1.945582898832354e-05, "loss": 1.4541, "step": 4036 }, { "epoch": 13.236065573770492, "grad_norm": 5.411855220794678, "learning_rate": 1.9455483416858084e-05, "loss": 1.4844, "step": 4037 }, { "epoch": 13.239344262295083, "grad_norm": 5.189587116241455, "learning_rate": 1.945513773877222e-05, "loss": 1.4221, "step": 4038 }, { "epoch": 13.242622950819673, "grad_norm": 5.203892707824707, "learning_rate": 1.9454791954069852e-05, "loss": 1.2911, "step": 4039 }, { "epoch": 13.245901639344263, "grad_norm": 5.746205806732178, "learning_rate": 1.945444606275487e-05, "loss": 1.2588, "step": 4040 }, { "epoch": 13.249180327868853, "grad_norm": 5.634666919708252, "learning_rate": 1.9454100064831177e-05, "loss": 1.3108, "step": 4041 }, { "epoch": 13.252459016393443, "grad_norm": 6.740765571594238, "learning_rate": 1.945375396030268e-05, "loss": 1.5229, "step": 4042 }, { "epoch": 13.255737704918033, "grad_norm": 5.7603068351745605, "learning_rate": 1.9453407749173278e-05, "loss": 1.1765, "step": 4043 }, { "epoch": 13.259016393442623, "grad_norm": 5.571951389312744, "learning_rate": 1.9453061431446872e-05, "loss": 1.3569, "step": 4044 }, { "epoch": 13.262295081967213, "grad_norm": 5.959230422973633, "learning_rate": 1.945271500712737e-05, "loss": 1.343, "step": 4045 }, { "epoch": 13.265573770491804, "grad_norm": 6.318362712860107, "learning_rate": 1.945236847621868e-05, "loss": 1.2615, "step": 4046 }, { "epoch": 13.268852459016394, "grad_norm": 6.230538368225098, "learning_rate": 1.9452021838724703e-05, "loss": 1.2874, "step": 4047 }, { "epoch": 13.272131147540984, "grad_norm": 5.983285427093506, "learning_rate": 1.9451675094649356e-05, "loss": 1.2725, "step": 4048 }, { "epoch": 13.275409836065574, "grad_norm": 5.943740367889404, "learning_rate": 1.9451328243996546e-05, "loss": 1.1646, "step": 4049 }, { "epoch": 13.278688524590164, "grad_norm": 5.298888683319092, "learning_rate": 1.945098128677018e-05, "loss": 1.1975, "step": 4050 }, { "epoch": 13.281967213114754, "grad_norm": 5.9233927726745605, "learning_rate": 1.9450634222974176e-05, "loss": 1.1989, "step": 4051 }, { "epoch": 13.285245901639344, "grad_norm": 5.250751972198486, "learning_rate": 1.945028705261244e-05, "loss": 1.5635, "step": 4052 }, { "epoch": 13.288524590163934, "grad_norm": 6.6655755043029785, "learning_rate": 1.94499397756889e-05, "loss": 1.3418, "step": 4053 }, { "epoch": 13.291803278688525, "grad_norm": 11.088285446166992, "learning_rate": 1.9449592392207455e-05, "loss": 1.5, "step": 4054 }, { "epoch": 13.295081967213115, "grad_norm": 4.935042381286621, "learning_rate": 1.9449244902172035e-05, "loss": 1.2773, "step": 4055 }, { "epoch": 13.298360655737705, "grad_norm": 6.190730094909668, "learning_rate": 1.9448897305586554e-05, "loss": 1.2463, "step": 4056 }, { "epoch": 13.301639344262295, "grad_norm": 6.663659572601318, "learning_rate": 1.944854960245493e-05, "loss": 1.5898, "step": 4057 }, { "epoch": 13.304918032786885, "grad_norm": 5.586486339569092, "learning_rate": 1.9448201792781084e-05, "loss": 1.385, "step": 4058 }, { "epoch": 13.308196721311475, "grad_norm": 6.146048069000244, "learning_rate": 1.9447853876568944e-05, "loss": 1.2952, "step": 4059 }, { "epoch": 13.311475409836065, "grad_norm": 6.7425312995910645, "learning_rate": 1.9447505853822425e-05, "loss": 1.2122, "step": 4060 }, { "epoch": 13.314754098360655, "grad_norm": 7.179121017456055, "learning_rate": 1.9447157724545452e-05, "loss": 1.4109, "step": 4061 }, { "epoch": 13.318032786885245, "grad_norm": 5.715258598327637, "learning_rate": 1.9446809488741957e-05, "loss": 1.4375, "step": 4062 }, { "epoch": 13.321311475409836, "grad_norm": 5.977914333343506, "learning_rate": 1.9446461146415863e-05, "loss": 1.3167, "step": 4063 }, { "epoch": 13.324590163934426, "grad_norm": 6.398069858551025, "learning_rate": 1.9446112697571097e-05, "loss": 1.4573, "step": 4064 }, { "epoch": 13.327868852459016, "grad_norm": 5.473909378051758, "learning_rate": 1.9445764142211588e-05, "loss": 1.2627, "step": 4065 }, { "epoch": 13.331147540983606, "grad_norm": 5.175118923187256, "learning_rate": 1.9445415480341267e-05, "loss": 1.312, "step": 4066 }, { "epoch": 13.334426229508196, "grad_norm": 5.469958305358887, "learning_rate": 1.9445066711964065e-05, "loss": 1.5356, "step": 4067 }, { "epoch": 13.337704918032786, "grad_norm": 5.42410135269165, "learning_rate": 1.944471783708392e-05, "loss": 1.4634, "step": 4068 }, { "epoch": 13.340983606557376, "grad_norm": 6.5265278816223145, "learning_rate": 1.9444368855704754e-05, "loss": 1.2341, "step": 4069 }, { "epoch": 13.344262295081966, "grad_norm": 5.016067028045654, "learning_rate": 1.9444019767830516e-05, "loss": 1.3455, "step": 4070 }, { "epoch": 13.347540983606557, "grad_norm": 5.248977184295654, "learning_rate": 1.9443670573465133e-05, "loss": 1.4529, "step": 4071 }, { "epoch": 13.350819672131147, "grad_norm": 7.077930927276611, "learning_rate": 1.9443321272612544e-05, "loss": 1.3029, "step": 4072 }, { "epoch": 13.354098360655737, "grad_norm": 5.733025550842285, "learning_rate": 1.9442971865276693e-05, "loss": 1.5225, "step": 4073 }, { "epoch": 13.357377049180329, "grad_norm": 4.712423324584961, "learning_rate": 1.9442622351461515e-05, "loss": 1.5881, "step": 4074 }, { "epoch": 13.360655737704919, "grad_norm": 4.928524017333984, "learning_rate": 1.9442272731170948e-05, "loss": 1.2607, "step": 4075 }, { "epoch": 13.363934426229509, "grad_norm": 9.15569019317627, "learning_rate": 1.944192300440894e-05, "loss": 1.3311, "step": 4076 }, { "epoch": 13.3672131147541, "grad_norm": 5.314361095428467, "learning_rate": 1.9441573171179433e-05, "loss": 1.3877, "step": 4077 }, { "epoch": 13.37049180327869, "grad_norm": 6.285161018371582, "learning_rate": 1.9441223231486375e-05, "loss": 1.46, "step": 4078 }, { "epoch": 13.37377049180328, "grad_norm": 5.939356803894043, "learning_rate": 1.9440873185333707e-05, "loss": 1.2084, "step": 4079 }, { "epoch": 13.37704918032787, "grad_norm": 8.606939315795898, "learning_rate": 1.9440523032725376e-05, "loss": 1.321, "step": 4080 }, { "epoch": 13.38032786885246, "grad_norm": 5.216427326202393, "learning_rate": 1.9440172773665336e-05, "loss": 1.231, "step": 4081 }, { "epoch": 13.38360655737705, "grad_norm": 6.639862060546875, "learning_rate": 1.943982240815753e-05, "loss": 1.3157, "step": 4082 }, { "epoch": 13.38688524590164, "grad_norm": 6.63145112991333, "learning_rate": 1.943947193620591e-05, "loss": 1.0774, "step": 4083 }, { "epoch": 13.39016393442623, "grad_norm": 5.948944091796875, "learning_rate": 1.943912135781443e-05, "loss": 1.366, "step": 4084 }, { "epoch": 13.39344262295082, "grad_norm": 6.492709636688232, "learning_rate": 1.9438770672987045e-05, "loss": 1.1577, "step": 4085 }, { "epoch": 13.39672131147541, "grad_norm": 5.697575569152832, "learning_rate": 1.9438419881727703e-05, "loss": 1.5339, "step": 4086 }, { "epoch": 13.4, "grad_norm": 6.618565082550049, "learning_rate": 1.9438068984040366e-05, "loss": 1.3364, "step": 4087 }, { "epoch": 13.40327868852459, "grad_norm": 5.194869041442871, "learning_rate": 1.9437717979928986e-05, "loss": 1.259, "step": 4088 }, { "epoch": 13.40655737704918, "grad_norm": 6.896469593048096, "learning_rate": 1.9437366869397526e-05, "loss": 1.4021, "step": 4089 }, { "epoch": 13.40983606557377, "grad_norm": 12.379973411560059, "learning_rate": 1.943701565244994e-05, "loss": 1.4258, "step": 4090 }, { "epoch": 13.41311475409836, "grad_norm": 5.079267978668213, "learning_rate": 1.943666432909019e-05, "loss": 1.5171, "step": 4091 }, { "epoch": 13.416393442622951, "grad_norm": 5.089606761932373, "learning_rate": 1.943631289932224e-05, "loss": 1.5297, "step": 4092 }, { "epoch": 13.419672131147541, "grad_norm": 6.252277851104736, "learning_rate": 1.943596136315005e-05, "loss": 1.3518, "step": 4093 }, { "epoch": 13.422950819672131, "grad_norm": 5.74683141708374, "learning_rate": 1.9435609720577585e-05, "loss": 1.4187, "step": 4094 }, { "epoch": 13.426229508196721, "grad_norm": 4.846704959869385, "learning_rate": 1.943525797160881e-05, "loss": 1.5691, "step": 4095 }, { "epoch": 13.429508196721311, "grad_norm": 5.376733779907227, "learning_rate": 1.943490611624769e-05, "loss": 1.2703, "step": 4096 }, { "epoch": 13.432786885245902, "grad_norm": 5.576265811920166, "learning_rate": 1.9434554154498196e-05, "loss": 1.2881, "step": 4097 }, { "epoch": 13.436065573770492, "grad_norm": 6.462350368499756, "learning_rate": 1.9434202086364292e-05, "loss": 1.3767, "step": 4098 }, { "epoch": 13.439344262295082, "grad_norm": 5.8830132484436035, "learning_rate": 1.9433849911849952e-05, "loss": 1.4839, "step": 4099 }, { "epoch": 13.442622950819672, "grad_norm": 4.899013519287109, "learning_rate": 1.9433497630959143e-05, "loss": 1.3007, "step": 4100 }, { "epoch": 13.445901639344262, "grad_norm": 4.839754581451416, "learning_rate": 1.9433145243695843e-05, "loss": 1.5212, "step": 4101 }, { "epoch": 13.449180327868852, "grad_norm": 6.361778736114502, "learning_rate": 1.943279275006402e-05, "loss": 1.3533, "step": 4102 }, { "epoch": 13.452459016393442, "grad_norm": 5.47199821472168, "learning_rate": 1.943244015006765e-05, "loss": 1.3037, "step": 4103 }, { "epoch": 13.455737704918032, "grad_norm": 5.318028450012207, "learning_rate": 1.9432087443710716e-05, "loss": 1.4069, "step": 4104 }, { "epoch": 13.459016393442623, "grad_norm": 6.491865158081055, "learning_rate": 1.9431734630997183e-05, "loss": 1.3931, "step": 4105 }, { "epoch": 13.462295081967213, "grad_norm": 6.668392658233643, "learning_rate": 1.9431381711931036e-05, "loss": 1.4326, "step": 4106 }, { "epoch": 13.465573770491803, "grad_norm": 5.902782917022705, "learning_rate": 1.9431028686516254e-05, "loss": 1.3499, "step": 4107 }, { "epoch": 13.468852459016393, "grad_norm": 5.831995010375977, "learning_rate": 1.943067555475682e-05, "loss": 1.2419, "step": 4108 }, { "epoch": 13.472131147540983, "grad_norm": 6.653365612030029, "learning_rate": 1.9430322316656714e-05, "loss": 1.1455, "step": 4109 }, { "epoch": 13.475409836065573, "grad_norm": 6.27600622177124, "learning_rate": 1.942996897221992e-05, "loss": 1.3748, "step": 4110 }, { "epoch": 13.478688524590163, "grad_norm": 7.299039363861084, "learning_rate": 1.942961552145042e-05, "loss": 1.3245, "step": 4111 }, { "epoch": 13.481967213114753, "grad_norm": 4.74837064743042, "learning_rate": 1.9429261964352198e-05, "loss": 1.5706, "step": 4112 }, { "epoch": 13.485245901639344, "grad_norm": 10.213367462158203, "learning_rate": 1.9428908300929246e-05, "loss": 1.2488, "step": 4113 }, { "epoch": 13.488524590163934, "grad_norm": 5.010441780090332, "learning_rate": 1.9428554531185545e-05, "loss": 1.5115, "step": 4114 }, { "epoch": 13.491803278688524, "grad_norm": 5.797788143157959, "learning_rate": 1.9428200655125095e-05, "loss": 1.4482, "step": 4115 }, { "epoch": 13.495081967213114, "grad_norm": 6.783174514770508, "learning_rate": 1.9427846672751873e-05, "loss": 1.3062, "step": 4116 }, { "epoch": 13.498360655737706, "grad_norm": 5.081302165985107, "learning_rate": 1.9427492584069883e-05, "loss": 1.459, "step": 4117 }, { "epoch": 13.501639344262294, "grad_norm": 7.2376813888549805, "learning_rate": 1.942713838908311e-05, "loss": 1.3403, "step": 4118 }, { "epoch": 13.504918032786886, "grad_norm": 6.7438178062438965, "learning_rate": 1.9426784087795546e-05, "loss": 1.2642, "step": 4119 }, { "epoch": 13.508196721311476, "grad_norm": 5.9676713943481445, "learning_rate": 1.9426429680211198e-05, "loss": 1.2495, "step": 4120 }, { "epoch": 13.511475409836066, "grad_norm": 5.114727973937988, "learning_rate": 1.9426075166334047e-05, "loss": 1.363, "step": 4121 }, { "epoch": 13.514754098360656, "grad_norm": 5.680177211761475, "learning_rate": 1.9425720546168102e-05, "loss": 1.5637, "step": 4122 }, { "epoch": 13.518032786885247, "grad_norm": 7.270750522613525, "learning_rate": 1.9425365819717354e-05, "loss": 1.4971, "step": 4123 }, { "epoch": 13.521311475409837, "grad_norm": 6.14804744720459, "learning_rate": 1.942501098698581e-05, "loss": 1.3997, "step": 4124 }, { "epoch": 13.524590163934427, "grad_norm": 7.336345195770264, "learning_rate": 1.9424656047977468e-05, "loss": 1.1814, "step": 4125 }, { "epoch": 13.527868852459017, "grad_norm": 6.3974080085754395, "learning_rate": 1.9424301002696325e-05, "loss": 1.29, "step": 4126 }, { "epoch": 13.531147540983607, "grad_norm": 8.599005699157715, "learning_rate": 1.9423945851146395e-05, "loss": 1.2988, "step": 4127 }, { "epoch": 13.534426229508197, "grad_norm": 7.01878023147583, "learning_rate": 1.9423590593331676e-05, "loss": 1.3262, "step": 4128 }, { "epoch": 13.537704918032787, "grad_norm": 6.444941520690918, "learning_rate": 1.9423235229256172e-05, "loss": 1.4568, "step": 4129 }, { "epoch": 13.540983606557377, "grad_norm": 4.882426738739014, "learning_rate": 1.9422879758923895e-05, "loss": 1.5229, "step": 4130 }, { "epoch": 13.544262295081968, "grad_norm": 5.245652675628662, "learning_rate": 1.942252418233885e-05, "loss": 1.4878, "step": 4131 }, { "epoch": 13.547540983606558, "grad_norm": 5.17719841003418, "learning_rate": 1.942216849950505e-05, "loss": 1.3071, "step": 4132 }, { "epoch": 13.550819672131148, "grad_norm": 5.507527828216553, "learning_rate": 1.9421812710426503e-05, "loss": 1.3015, "step": 4133 }, { "epoch": 13.554098360655738, "grad_norm": 5.56960916519165, "learning_rate": 1.9421456815107222e-05, "loss": 1.5298, "step": 4134 }, { "epoch": 13.557377049180328, "grad_norm": 9.027421951293945, "learning_rate": 1.9421100813551218e-05, "loss": 1.2561, "step": 4135 }, { "epoch": 13.560655737704918, "grad_norm": 5.292696475982666, "learning_rate": 1.9420744705762508e-05, "loss": 1.307, "step": 4136 }, { "epoch": 13.563934426229508, "grad_norm": 6.476856231689453, "learning_rate": 1.9420388491745105e-05, "loss": 1.5469, "step": 4137 }, { "epoch": 13.567213114754098, "grad_norm": 4.410252094268799, "learning_rate": 1.9420032171503028e-05, "loss": 1.3884, "step": 4138 }, { "epoch": 13.570491803278689, "grad_norm": 7.5258660316467285, "learning_rate": 1.9419675745040295e-05, "loss": 1.3621, "step": 4139 }, { "epoch": 13.573770491803279, "grad_norm": 8.366568565368652, "learning_rate": 1.941931921236092e-05, "loss": 1.4221, "step": 4140 }, { "epoch": 13.577049180327869, "grad_norm": 5.266853332519531, "learning_rate": 1.941896257346893e-05, "loss": 1.6272, "step": 4141 }, { "epoch": 13.580327868852459, "grad_norm": 5.73028564453125, "learning_rate": 1.9418605828368344e-05, "loss": 1.2822, "step": 4142 }, { "epoch": 13.583606557377049, "grad_norm": 6.690436840057373, "learning_rate": 1.9418248977063182e-05, "loss": 1.3209, "step": 4143 }, { "epoch": 13.58688524590164, "grad_norm": 5.6278605461120605, "learning_rate": 1.9417892019557473e-05, "loss": 1.3518, "step": 4144 }, { "epoch": 13.59016393442623, "grad_norm": 6.876418113708496, "learning_rate": 1.941753495585524e-05, "loss": 1.4175, "step": 4145 }, { "epoch": 13.59344262295082, "grad_norm": 5.599156856536865, "learning_rate": 1.9417177785960504e-05, "loss": 1.5488, "step": 4146 }, { "epoch": 13.59672131147541, "grad_norm": 5.438297748565674, "learning_rate": 1.94168205098773e-05, "loss": 1.0669, "step": 4147 }, { "epoch": 13.6, "grad_norm": 7.226170063018799, "learning_rate": 1.9416463127609655e-05, "loss": 1.4873, "step": 4148 }, { "epoch": 13.60327868852459, "grad_norm": 5.403006076812744, "learning_rate": 1.9416105639161598e-05, "loss": 1.437, "step": 4149 }, { "epoch": 13.60655737704918, "grad_norm": 4.9755120277404785, "learning_rate": 1.9415748044537157e-05, "loss": 1.3654, "step": 4150 }, { "epoch": 13.60983606557377, "grad_norm": 8.782368659973145, "learning_rate": 1.941539034374037e-05, "loss": 1.2136, "step": 4151 }, { "epoch": 13.61311475409836, "grad_norm": 5.278975486755371, "learning_rate": 1.9415032536775262e-05, "loss": 1.4624, "step": 4152 }, { "epoch": 13.61639344262295, "grad_norm": 6.134749889373779, "learning_rate": 1.941467462364588e-05, "loss": 1.5125, "step": 4153 }, { "epoch": 13.61967213114754, "grad_norm": 4.604854106903076, "learning_rate": 1.9414316604356248e-05, "loss": 1.5364, "step": 4154 }, { "epoch": 13.62295081967213, "grad_norm": 6.030173301696777, "learning_rate": 1.9413958478910408e-05, "loss": 1.3215, "step": 4155 }, { "epoch": 13.62622950819672, "grad_norm": 5.547104358673096, "learning_rate": 1.9413600247312397e-05, "loss": 1.2661, "step": 4156 }, { "epoch": 13.62950819672131, "grad_norm": 5.238527297973633, "learning_rate": 1.941324190956626e-05, "loss": 1.4705, "step": 4157 }, { "epoch": 13.6327868852459, "grad_norm": 8.17566204071045, "learning_rate": 1.941288346567603e-05, "loss": 1.4717, "step": 4158 }, { "epoch": 13.636065573770491, "grad_norm": 4.713554859161377, "learning_rate": 1.9412524915645753e-05, "loss": 1.2029, "step": 4159 }, { "epoch": 13.639344262295083, "grad_norm": 8.377291679382324, "learning_rate": 1.941216625947947e-05, "loss": 1.3892, "step": 4160 }, { "epoch": 13.642622950819671, "grad_norm": 4.988800048828125, "learning_rate": 1.9411807497181227e-05, "loss": 1.3013, "step": 4161 }, { "epoch": 13.645901639344263, "grad_norm": 4.781425476074219, "learning_rate": 1.9411448628755067e-05, "loss": 1.3723, "step": 4162 }, { "epoch": 13.649180327868853, "grad_norm": 5.707985877990723, "learning_rate": 1.941108965420504e-05, "loss": 1.2456, "step": 4163 }, { "epoch": 13.652459016393443, "grad_norm": 5.4019341468811035, "learning_rate": 1.9410730573535193e-05, "loss": 1.3269, "step": 4164 }, { "epoch": 13.655737704918034, "grad_norm": 6.420411109924316, "learning_rate": 1.9410371386749572e-05, "loss": 1.2949, "step": 4165 }, { "epoch": 13.659016393442624, "grad_norm": 5.6332268714904785, "learning_rate": 1.941001209385223e-05, "loss": 1.4685, "step": 4166 }, { "epoch": 13.662295081967214, "grad_norm": 7.088306427001953, "learning_rate": 1.9409652694847222e-05, "loss": 1.3066, "step": 4167 }, { "epoch": 13.665573770491804, "grad_norm": 6.522329807281494, "learning_rate": 1.940929318973859e-05, "loss": 1.4236, "step": 4168 }, { "epoch": 13.668852459016394, "grad_norm": 5.893106460571289, "learning_rate": 1.9408933578530395e-05, "loss": 1.4414, "step": 4169 }, { "epoch": 13.672131147540984, "grad_norm": 6.063662052154541, "learning_rate": 1.9408573861226694e-05, "loss": 1.3892, "step": 4170 }, { "epoch": 13.675409836065574, "grad_norm": 6.238983631134033, "learning_rate": 1.9408214037831537e-05, "loss": 1.4773, "step": 4171 }, { "epoch": 13.678688524590164, "grad_norm": 4.775546550750732, "learning_rate": 1.9407854108348987e-05, "loss": 1.5217, "step": 4172 }, { "epoch": 13.681967213114755, "grad_norm": 6.235365867614746, "learning_rate": 1.94074940727831e-05, "loss": 1.6172, "step": 4173 }, { "epoch": 13.685245901639345, "grad_norm": 6.211101055145264, "learning_rate": 1.9407133931137935e-05, "loss": 1.5471, "step": 4174 }, { "epoch": 13.688524590163935, "grad_norm": 5.326131820678711, "learning_rate": 1.9406773683417554e-05, "loss": 1.2952, "step": 4175 }, { "epoch": 13.691803278688525, "grad_norm": 6.566211223602295, "learning_rate": 1.940641332962602e-05, "loss": 1.4053, "step": 4176 }, { "epoch": 13.695081967213115, "grad_norm": 5.277928829193115, "learning_rate": 1.9406052869767395e-05, "loss": 1.4397, "step": 4177 }, { "epoch": 13.698360655737705, "grad_norm": 5.022453784942627, "learning_rate": 1.9405692303845742e-05, "loss": 1.4529, "step": 4178 }, { "epoch": 13.701639344262295, "grad_norm": 4.743651390075684, "learning_rate": 1.9405331631865132e-05, "loss": 1.4314, "step": 4179 }, { "epoch": 13.704918032786885, "grad_norm": 7.395517826080322, "learning_rate": 1.9404970853829627e-05, "loss": 1.283, "step": 4180 }, { "epoch": 13.708196721311475, "grad_norm": 4.802234172821045, "learning_rate": 1.9404609969743297e-05, "loss": 1.415, "step": 4181 }, { "epoch": 13.711475409836066, "grad_norm": 11.132512092590332, "learning_rate": 1.9404248979610213e-05, "loss": 1.2561, "step": 4182 }, { "epoch": 13.714754098360656, "grad_norm": 5.725765228271484, "learning_rate": 1.9403887883434442e-05, "loss": 1.5144, "step": 4183 }, { "epoch": 13.718032786885246, "grad_norm": 5.615833759307861, "learning_rate": 1.9403526681220057e-05, "loss": 1.5598, "step": 4184 }, { "epoch": 13.721311475409836, "grad_norm": 6.2988667488098145, "learning_rate": 1.9403165372971133e-05, "loss": 1.1226, "step": 4185 }, { "epoch": 13.724590163934426, "grad_norm": 5.841108798980713, "learning_rate": 1.9402803958691742e-05, "loss": 1.5312, "step": 4186 }, { "epoch": 13.727868852459016, "grad_norm": 5.513788223266602, "learning_rate": 1.940244243838596e-05, "loss": 1.5212, "step": 4187 }, { "epoch": 13.731147540983606, "grad_norm": 5.828296661376953, "learning_rate": 1.9402080812057863e-05, "loss": 1.6624, "step": 4188 }, { "epoch": 13.734426229508196, "grad_norm": 5.946946144104004, "learning_rate": 1.940171907971153e-05, "loss": 1.5439, "step": 4189 }, { "epoch": 13.737704918032787, "grad_norm": 5.658021926879883, "learning_rate": 1.940135724135104e-05, "loss": 1.489, "step": 4190 }, { "epoch": 13.740983606557377, "grad_norm": 6.844274520874023, "learning_rate": 1.940099529698047e-05, "loss": 1.3503, "step": 4191 }, { "epoch": 13.744262295081967, "grad_norm": 6.999078750610352, "learning_rate": 1.9400633246603904e-05, "loss": 1.4585, "step": 4192 }, { "epoch": 13.747540983606557, "grad_norm": 7.412806034088135, "learning_rate": 1.9400271090225423e-05, "loss": 1.3589, "step": 4193 }, { "epoch": 13.750819672131147, "grad_norm": 7.533016204833984, "learning_rate": 1.9399908827849114e-05, "loss": 1.5964, "step": 4194 }, { "epoch": 13.754098360655737, "grad_norm": 4.816036224365234, "learning_rate": 1.939954645947906e-05, "loss": 1.4722, "step": 4195 }, { "epoch": 13.757377049180327, "grad_norm": 4.9839019775390625, "learning_rate": 1.9399183985119344e-05, "loss": 1.6292, "step": 4196 }, { "epoch": 13.760655737704917, "grad_norm": 5.436607360839844, "learning_rate": 1.939882140477406e-05, "loss": 1.478, "step": 4197 }, { "epoch": 13.763934426229508, "grad_norm": 6.096150875091553, "learning_rate": 1.9398458718447287e-05, "loss": 1.4524, "step": 4198 }, { "epoch": 13.767213114754098, "grad_norm": 5.329448699951172, "learning_rate": 1.939809592614312e-05, "loss": 1.254, "step": 4199 }, { "epoch": 13.770491803278688, "grad_norm": 9.294218063354492, "learning_rate": 1.9397733027865652e-05, "loss": 1.5044, "step": 4200 }, { "epoch": 13.773770491803278, "grad_norm": 7.42577600479126, "learning_rate": 1.939737002361897e-05, "loss": 1.3022, "step": 4201 }, { "epoch": 13.777049180327868, "grad_norm": 8.157361030578613, "learning_rate": 1.9397006913407175e-05, "loss": 1.2469, "step": 4202 }, { "epoch": 13.780327868852458, "grad_norm": 7.83432674407959, "learning_rate": 1.9396643697234355e-05, "loss": 1.2815, "step": 4203 }, { "epoch": 13.783606557377048, "grad_norm": 4.6021504402160645, "learning_rate": 1.9396280375104605e-05, "loss": 1.4303, "step": 4204 }, { "epoch": 13.78688524590164, "grad_norm": 6.137490272521973, "learning_rate": 1.9395916947022028e-05, "loss": 1.366, "step": 4205 }, { "epoch": 13.790163934426229, "grad_norm": 5.931227207183838, "learning_rate": 1.9395553412990716e-05, "loss": 1.4976, "step": 4206 }, { "epoch": 13.79344262295082, "grad_norm": 5.088767051696777, "learning_rate": 1.939518977301477e-05, "loss": 1.4612, "step": 4207 }, { "epoch": 13.79672131147541, "grad_norm": 5.81878137588501, "learning_rate": 1.9394826027098292e-05, "loss": 1.3987, "step": 4208 }, { "epoch": 13.8, "grad_norm": 7.490156173706055, "learning_rate": 1.9394462175245382e-05, "loss": 1.4617, "step": 4209 }, { "epoch": 13.80327868852459, "grad_norm": 4.969046592712402, "learning_rate": 1.939409821746014e-05, "loss": 1.4268, "step": 4210 }, { "epoch": 13.806557377049181, "grad_norm": 5.077479362487793, "learning_rate": 1.9393734153746678e-05, "loss": 1.4033, "step": 4211 }, { "epoch": 13.809836065573771, "grad_norm": 5.627416133880615, "learning_rate": 1.9393369984109094e-05, "loss": 1.4241, "step": 4212 }, { "epoch": 13.813114754098361, "grad_norm": 5.509609222412109, "learning_rate": 1.93930057085515e-05, "loss": 1.48, "step": 4213 }, { "epoch": 13.816393442622951, "grad_norm": 6.205145359039307, "learning_rate": 1.9392641327077995e-05, "loss": 1.3796, "step": 4214 }, { "epoch": 13.819672131147541, "grad_norm": 7.125100612640381, "learning_rate": 1.9392276839692694e-05, "loss": 1.4829, "step": 4215 }, { "epoch": 13.822950819672132, "grad_norm": 6.750877380371094, "learning_rate": 1.939191224639971e-05, "loss": 1.2085, "step": 4216 }, { "epoch": 13.826229508196722, "grad_norm": 4.46464729309082, "learning_rate": 1.9391547547203146e-05, "loss": 1.3738, "step": 4217 }, { "epoch": 13.829508196721312, "grad_norm": 4.986719608306885, "learning_rate": 1.9391182742107123e-05, "loss": 1.218, "step": 4218 }, { "epoch": 13.832786885245902, "grad_norm": 5.600821495056152, "learning_rate": 1.9390817831115745e-05, "loss": 1.2231, "step": 4219 }, { "epoch": 13.836065573770492, "grad_norm": 7.300820350646973, "learning_rate": 1.9390452814233133e-05, "loss": 1.5596, "step": 4220 }, { "epoch": 13.839344262295082, "grad_norm": 10.509516716003418, "learning_rate": 1.9390087691463407e-05, "loss": 1.4724, "step": 4221 }, { "epoch": 13.842622950819672, "grad_norm": 5.136231422424316, "learning_rate": 1.9389722462810673e-05, "loss": 1.3696, "step": 4222 }, { "epoch": 13.845901639344262, "grad_norm": 6.888629913330078, "learning_rate": 1.938935712827906e-05, "loss": 1.2847, "step": 4223 }, { "epoch": 13.849180327868853, "grad_norm": 5.4094438552856445, "learning_rate": 1.938899168787268e-05, "loss": 1.3481, "step": 4224 }, { "epoch": 13.852459016393443, "grad_norm": 6.298682689666748, "learning_rate": 1.938862614159566e-05, "loss": 1.2891, "step": 4225 }, { "epoch": 13.855737704918033, "grad_norm": 6.911571979522705, "learning_rate": 1.9388260489452113e-05, "loss": 1.5513, "step": 4226 }, { "epoch": 13.859016393442623, "grad_norm": 6.523331165313721, "learning_rate": 1.9387894731446172e-05, "loss": 1.3665, "step": 4227 }, { "epoch": 13.862295081967213, "grad_norm": 4.543163299560547, "learning_rate": 1.9387528867581957e-05, "loss": 1.5325, "step": 4228 }, { "epoch": 13.865573770491803, "grad_norm": 6.31658935546875, "learning_rate": 1.938716289786359e-05, "loss": 1.3967, "step": 4229 }, { "epoch": 13.868852459016393, "grad_norm": 6.498244285583496, "learning_rate": 1.9386796822295206e-05, "loss": 1.4685, "step": 4230 }, { "epoch": 13.872131147540983, "grad_norm": 5.332920551300049, "learning_rate": 1.9386430640880923e-05, "loss": 1.4666, "step": 4231 }, { "epoch": 13.875409836065574, "grad_norm": 5.539114475250244, "learning_rate": 1.9386064353624877e-05, "loss": 1.4661, "step": 4232 }, { "epoch": 13.878688524590164, "grad_norm": 5.939239025115967, "learning_rate": 1.9385697960531196e-05, "loss": 1.324, "step": 4233 }, { "epoch": 13.881967213114754, "grad_norm": 6.4807820320129395, "learning_rate": 1.9385331461604012e-05, "loss": 1.3071, "step": 4234 }, { "epoch": 13.885245901639344, "grad_norm": 8.409679412841797, "learning_rate": 1.938496485684746e-05, "loss": 1.5366, "step": 4235 }, { "epoch": 13.888524590163934, "grad_norm": 7.405941486358643, "learning_rate": 1.9384598146265668e-05, "loss": 1.2242, "step": 4236 }, { "epoch": 13.891803278688524, "grad_norm": 6.837608814239502, "learning_rate": 1.9384231329862776e-05, "loss": 1.5566, "step": 4237 }, { "epoch": 13.895081967213114, "grad_norm": 6.417967319488525, "learning_rate": 1.9383864407642918e-05, "loss": 1.3306, "step": 4238 }, { "epoch": 13.898360655737704, "grad_norm": 7.282597541809082, "learning_rate": 1.9383497379610233e-05, "loss": 1.4373, "step": 4239 }, { "epoch": 13.901639344262295, "grad_norm": 5.335271835327148, "learning_rate": 1.938313024576886e-05, "loss": 1.489, "step": 4240 }, { "epoch": 13.904918032786885, "grad_norm": 6.537214279174805, "learning_rate": 1.9382763006122932e-05, "loss": 1.4067, "step": 4241 }, { "epoch": 13.908196721311475, "grad_norm": 7.268737316131592, "learning_rate": 1.9382395660676597e-05, "loss": 1.3616, "step": 4242 }, { "epoch": 13.911475409836065, "grad_norm": 6.326832294464111, "learning_rate": 1.9382028209434e-05, "loss": 1.2042, "step": 4243 }, { "epoch": 13.914754098360655, "grad_norm": 5.361358165740967, "learning_rate": 1.9381660652399276e-05, "loss": 1.6357, "step": 4244 }, { "epoch": 13.918032786885245, "grad_norm": 6.932002544403076, "learning_rate": 1.938129298957658e-05, "loss": 1.2893, "step": 4245 }, { "epoch": 13.921311475409835, "grad_norm": 6.085315227508545, "learning_rate": 1.9380925220970043e-05, "loss": 1.3152, "step": 4246 }, { "epoch": 13.924590163934425, "grad_norm": 5.188230514526367, "learning_rate": 1.9380557346583826e-05, "loss": 1.4443, "step": 4247 }, { "epoch": 13.927868852459017, "grad_norm": 5.955390930175781, "learning_rate": 1.938018936642207e-05, "loss": 1.4279, "step": 4248 }, { "epoch": 13.931147540983606, "grad_norm": 5.524491310119629, "learning_rate": 1.9379821280488924e-05, "loss": 1.4167, "step": 4249 }, { "epoch": 13.934426229508198, "grad_norm": 5.0030012130737305, "learning_rate": 1.9379453088788543e-05, "loss": 1.4385, "step": 4250 }, { "epoch": 13.937704918032788, "grad_norm": 5.493734836578369, "learning_rate": 1.9379084791325073e-05, "loss": 1.5161, "step": 4251 }, { "epoch": 13.940983606557378, "grad_norm": 7.077645301818848, "learning_rate": 1.9378716388102675e-05, "loss": 1.325, "step": 4252 }, { "epoch": 13.944262295081968, "grad_norm": 5.1558003425598145, "learning_rate": 1.9378347879125497e-05, "loss": 1.3721, "step": 4253 }, { "epoch": 13.947540983606558, "grad_norm": 5.438504695892334, "learning_rate": 1.937797926439769e-05, "loss": 1.3459, "step": 4254 }, { "epoch": 13.950819672131148, "grad_norm": 5.36304235458374, "learning_rate": 1.9377610543923423e-05, "loss": 1.344, "step": 4255 }, { "epoch": 13.954098360655738, "grad_norm": 5.264095783233643, "learning_rate": 1.9377241717706846e-05, "loss": 1.4517, "step": 4256 }, { "epoch": 13.957377049180328, "grad_norm": 7.578881740570068, "learning_rate": 1.9376872785752117e-05, "loss": 1.0981, "step": 4257 }, { "epoch": 13.960655737704919, "grad_norm": 6.174148082733154, "learning_rate": 1.9376503748063397e-05, "loss": 1.2577, "step": 4258 }, { "epoch": 13.963934426229509, "grad_norm": 6.473388671875, "learning_rate": 1.937613460464485e-05, "loss": 1.5488, "step": 4259 }, { "epoch": 13.967213114754099, "grad_norm": 5.38677453994751, "learning_rate": 1.9375765355500635e-05, "loss": 1.4978, "step": 4260 }, { "epoch": 13.970491803278689, "grad_norm": 5.56431770324707, "learning_rate": 1.937539600063492e-05, "loss": 1.4836, "step": 4261 }, { "epoch": 13.973770491803279, "grad_norm": 5.909606456756592, "learning_rate": 1.9375026540051864e-05, "loss": 1.731, "step": 4262 }, { "epoch": 13.97704918032787, "grad_norm": 4.902935028076172, "learning_rate": 1.9374656973755638e-05, "loss": 1.4236, "step": 4263 }, { "epoch": 13.98032786885246, "grad_norm": 6.199972629547119, "learning_rate": 1.9374287301750405e-05, "loss": 1.4185, "step": 4264 }, { "epoch": 13.98360655737705, "grad_norm": 12.244927406311035, "learning_rate": 1.937391752404034e-05, "loss": 1.3577, "step": 4265 }, { "epoch": 13.98688524590164, "grad_norm": 4.936150074005127, "learning_rate": 1.9373547640629604e-05, "loss": 1.4885, "step": 4266 }, { "epoch": 13.99016393442623, "grad_norm": 6.7956695556640625, "learning_rate": 1.9373177651522374e-05, "loss": 1.2288, "step": 4267 }, { "epoch": 13.99344262295082, "grad_norm": 8.418253898620605, "learning_rate": 1.937280755672282e-05, "loss": 1.3606, "step": 4268 }, { "epoch": 13.99672131147541, "grad_norm": 6.885841369628906, "learning_rate": 1.9372437356235117e-05, "loss": 1.301, "step": 4269 }, { "epoch": 14.0, "grad_norm": 10.029462814331055, "learning_rate": 1.937206705006344e-05, "loss": 1.2906, "step": 4270 }, { "epoch": 14.00327868852459, "grad_norm": 5.937808513641357, "learning_rate": 1.937169663821196e-05, "loss": 1.2861, "step": 4271 }, { "epoch": 14.00655737704918, "grad_norm": 6.627782821655273, "learning_rate": 1.9371326120684854e-05, "loss": 1.4834, "step": 4272 }, { "epoch": 14.00983606557377, "grad_norm": 5.875832557678223, "learning_rate": 1.9370955497486304e-05, "loss": 1.4795, "step": 4273 }, { "epoch": 14.01311475409836, "grad_norm": 5.094941139221191, "learning_rate": 1.9370584768620485e-05, "loss": 1.1558, "step": 4274 }, { "epoch": 14.01639344262295, "grad_norm": 4.933356761932373, "learning_rate": 1.9370213934091585e-05, "loss": 1.4138, "step": 4275 }, { "epoch": 14.01967213114754, "grad_norm": 6.778572082519531, "learning_rate": 1.9369842993903778e-05, "loss": 1.2572, "step": 4276 }, { "epoch": 14.02295081967213, "grad_norm": 9.454018592834473, "learning_rate": 1.9369471948061246e-05, "loss": 1.2634, "step": 4277 }, { "epoch": 14.026229508196721, "grad_norm": 4.545598030090332, "learning_rate": 1.9369100796568182e-05, "loss": 1.2821, "step": 4278 }, { "epoch": 14.029508196721311, "grad_norm": 7.138983249664307, "learning_rate": 1.936872953942876e-05, "loss": 1.2888, "step": 4279 }, { "epoch": 14.032786885245901, "grad_norm": 4.494211196899414, "learning_rate": 1.9368358176647174e-05, "loss": 1.3403, "step": 4280 }, { "epoch": 14.036065573770491, "grad_norm": 5.364751815795898, "learning_rate": 1.9367986708227608e-05, "loss": 1.1746, "step": 4281 }, { "epoch": 14.039344262295081, "grad_norm": 4.672427654266357, "learning_rate": 1.9367615134174252e-05, "loss": 1.4502, "step": 4282 }, { "epoch": 14.042622950819672, "grad_norm": 6.472332000732422, "learning_rate": 1.9367243454491297e-05, "loss": 1.1597, "step": 4283 }, { "epoch": 14.045901639344262, "grad_norm": 5.261764049530029, "learning_rate": 1.936687166918293e-05, "loss": 1.3665, "step": 4284 }, { "epoch": 14.049180327868852, "grad_norm": 8.31147289276123, "learning_rate": 1.9366499778253346e-05, "loss": 1.2859, "step": 4285 }, { "epoch": 14.052459016393442, "grad_norm": 5.260338306427002, "learning_rate": 1.936612778170674e-05, "loss": 1.5681, "step": 4286 }, { "epoch": 14.055737704918032, "grad_norm": 5.850723743438721, "learning_rate": 1.9365755679547304e-05, "loss": 1.405, "step": 4287 }, { "epoch": 14.059016393442622, "grad_norm": 6.776914119720459, "learning_rate": 1.9365383471779235e-05, "loss": 1.2524, "step": 4288 }, { "epoch": 14.062295081967212, "grad_norm": 7.487815856933594, "learning_rate": 1.9365011158406727e-05, "loss": 1.2925, "step": 4289 }, { "epoch": 14.065573770491802, "grad_norm": 7.904605865478516, "learning_rate": 1.9364638739433984e-05, "loss": 1.2186, "step": 4290 }, { "epoch": 14.068852459016393, "grad_norm": 5.003389358520508, "learning_rate": 1.93642662148652e-05, "loss": 1.4896, "step": 4291 }, { "epoch": 14.072131147540984, "grad_norm": 5.3611884117126465, "learning_rate": 1.936389358470458e-05, "loss": 1.3281, "step": 4292 }, { "epoch": 14.075409836065575, "grad_norm": 5.671719551086426, "learning_rate": 1.9363520848956325e-05, "loss": 1.439, "step": 4293 }, { "epoch": 14.078688524590165, "grad_norm": 5.204200744628906, "learning_rate": 1.9363148007624634e-05, "loss": 1.4517, "step": 4294 }, { "epoch": 14.081967213114755, "grad_norm": 6.353233337402344, "learning_rate": 1.9362775060713718e-05, "loss": 1.4683, "step": 4295 }, { "epoch": 14.085245901639345, "grad_norm": 4.165718078613281, "learning_rate": 1.9362402008227775e-05, "loss": 1.2576, "step": 4296 }, { "epoch": 14.088524590163935, "grad_norm": 5.872906684875488, "learning_rate": 1.9362028850171016e-05, "loss": 1.3049, "step": 4297 }, { "epoch": 14.091803278688525, "grad_norm": 5.866520881652832, "learning_rate": 1.9361655586547648e-05, "loss": 1.4371, "step": 4298 }, { "epoch": 14.095081967213115, "grad_norm": 6.101042747497559, "learning_rate": 1.936128221736188e-05, "loss": 1.137, "step": 4299 }, { "epoch": 14.098360655737705, "grad_norm": 6.040002346038818, "learning_rate": 1.936090874261792e-05, "loss": 1.4526, "step": 4300 }, { "epoch": 14.101639344262296, "grad_norm": 5.50862979888916, "learning_rate": 1.936053516231998e-05, "loss": 1.3384, "step": 4301 }, { "epoch": 14.104918032786886, "grad_norm": 5.313603401184082, "learning_rate": 1.9360161476472274e-05, "loss": 1.1479, "step": 4302 }, { "epoch": 14.108196721311476, "grad_norm": 5.676162242889404, "learning_rate": 1.935978768507902e-05, "loss": 1.2805, "step": 4303 }, { "epoch": 14.111475409836066, "grad_norm": 5.777557373046875, "learning_rate": 1.9359413788144423e-05, "loss": 1.2382, "step": 4304 }, { "epoch": 14.114754098360656, "grad_norm": 9.490338325500488, "learning_rate": 1.9359039785672707e-05, "loss": 1.2917, "step": 4305 }, { "epoch": 14.118032786885246, "grad_norm": 6.4924702644348145, "learning_rate": 1.9358665677668082e-05, "loss": 1.3254, "step": 4306 }, { "epoch": 14.121311475409836, "grad_norm": 10.12639045715332, "learning_rate": 1.9358291464134776e-05, "loss": 1.1985, "step": 4307 }, { "epoch": 14.124590163934426, "grad_norm": 5.998322486877441, "learning_rate": 1.9357917145077004e-05, "loss": 1.3677, "step": 4308 }, { "epoch": 14.127868852459017, "grad_norm": 5.825649261474609, "learning_rate": 1.9357542720498982e-05, "loss": 1.4663, "step": 4309 }, { "epoch": 14.131147540983607, "grad_norm": 5.583644390106201, "learning_rate": 1.9357168190404937e-05, "loss": 1.0262, "step": 4310 }, { "epoch": 14.134426229508197, "grad_norm": 6.622591018676758, "learning_rate": 1.9356793554799094e-05, "loss": 1.234, "step": 4311 }, { "epoch": 14.137704918032787, "grad_norm": 6.129324913024902, "learning_rate": 1.9356418813685676e-05, "loss": 1.3394, "step": 4312 }, { "epoch": 14.140983606557377, "grad_norm": 5.6547393798828125, "learning_rate": 1.9356043967068903e-05, "loss": 1.1792, "step": 4313 }, { "epoch": 14.144262295081967, "grad_norm": 6.78989839553833, "learning_rate": 1.9355669014953008e-05, "loss": 1.2676, "step": 4314 }, { "epoch": 14.147540983606557, "grad_norm": 6.3437652587890625, "learning_rate": 1.9355293957342222e-05, "loss": 1.5288, "step": 4315 }, { "epoch": 14.150819672131147, "grad_norm": 6.11290979385376, "learning_rate": 1.9354918794240768e-05, "loss": 1.2544, "step": 4316 }, { "epoch": 14.154098360655738, "grad_norm": 5.620145320892334, "learning_rate": 1.9354543525652872e-05, "loss": 1.569, "step": 4317 }, { "epoch": 14.157377049180328, "grad_norm": 7.679035663604736, "learning_rate": 1.9354168151582778e-05, "loss": 1.3921, "step": 4318 }, { "epoch": 14.160655737704918, "grad_norm": 6.508466720581055, "learning_rate": 1.935379267203471e-05, "loss": 1.386, "step": 4319 }, { "epoch": 14.163934426229508, "grad_norm": 5.210964202880859, "learning_rate": 1.9353417087012903e-05, "loss": 1.1636, "step": 4320 }, { "epoch": 14.167213114754098, "grad_norm": 6.402120590209961, "learning_rate": 1.9353041396521593e-05, "loss": 1.176, "step": 4321 }, { "epoch": 14.170491803278688, "grad_norm": 6.149623394012451, "learning_rate": 1.9352665600565018e-05, "loss": 1.1779, "step": 4322 }, { "epoch": 14.173770491803278, "grad_norm": 6.056100368499756, "learning_rate": 1.9352289699147416e-05, "loss": 1.3008, "step": 4323 }, { "epoch": 14.177049180327868, "grad_norm": 5.442596912384033, "learning_rate": 1.935191369227302e-05, "loss": 1.1982, "step": 4324 }, { "epoch": 14.180327868852459, "grad_norm": 7.036499977111816, "learning_rate": 1.9351537579946075e-05, "loss": 1.3279, "step": 4325 }, { "epoch": 14.183606557377049, "grad_norm": 6.786525249481201, "learning_rate": 1.935116136217082e-05, "loss": 1.4045, "step": 4326 }, { "epoch": 14.186885245901639, "grad_norm": 5.858677864074707, "learning_rate": 1.9350785038951498e-05, "loss": 1.3225, "step": 4327 }, { "epoch": 14.190163934426229, "grad_norm": 5.395212173461914, "learning_rate": 1.9350408610292353e-05, "loss": 1.3112, "step": 4328 }, { "epoch": 14.193442622950819, "grad_norm": 6.262006759643555, "learning_rate": 1.935003207619763e-05, "loss": 1.1349, "step": 4329 }, { "epoch": 14.19672131147541, "grad_norm": 7.975495338439941, "learning_rate": 1.9349655436671572e-05, "loss": 1.3574, "step": 4330 }, { "epoch": 14.2, "grad_norm": 5.804165840148926, "learning_rate": 1.9349278691718426e-05, "loss": 1.3076, "step": 4331 }, { "epoch": 14.20327868852459, "grad_norm": 7.001029014587402, "learning_rate": 1.9348901841342448e-05, "loss": 1.3784, "step": 4332 }, { "epoch": 14.20655737704918, "grad_norm": 7.302984714508057, "learning_rate": 1.9348524885547876e-05, "loss": 1.322, "step": 4333 }, { "epoch": 14.20983606557377, "grad_norm": 5.530000686645508, "learning_rate": 1.9348147824338967e-05, "loss": 1.5457, "step": 4334 }, { "epoch": 14.21311475409836, "grad_norm": 7.200451374053955, "learning_rate": 1.934777065771997e-05, "loss": 1.3755, "step": 4335 }, { "epoch": 14.216393442622952, "grad_norm": 5.513252258300781, "learning_rate": 1.9347393385695143e-05, "loss": 1.4634, "step": 4336 }, { "epoch": 14.219672131147542, "grad_norm": 6.551181793212891, "learning_rate": 1.9347016008268736e-05, "loss": 1.2688, "step": 4337 }, { "epoch": 14.222950819672132, "grad_norm": 6.826015949249268, "learning_rate": 1.9346638525445003e-05, "loss": 1.3181, "step": 4338 }, { "epoch": 14.226229508196722, "grad_norm": 5.903462886810303, "learning_rate": 1.9346260937228206e-05, "loss": 1.2781, "step": 4339 }, { "epoch": 14.229508196721312, "grad_norm": 7.602588653564453, "learning_rate": 1.9345883243622597e-05, "loss": 1.3254, "step": 4340 }, { "epoch": 14.232786885245902, "grad_norm": 5.341405391693115, "learning_rate": 1.9345505444632436e-05, "loss": 1.342, "step": 4341 }, { "epoch": 14.236065573770492, "grad_norm": 8.068038940429688, "learning_rate": 1.9345127540261984e-05, "loss": 1.5269, "step": 4342 }, { "epoch": 14.239344262295083, "grad_norm": 7.115002632141113, "learning_rate": 1.9344749530515504e-05, "loss": 1.2207, "step": 4343 }, { "epoch": 14.242622950819673, "grad_norm": 5.670547008514404, "learning_rate": 1.9344371415397258e-05, "loss": 1.2441, "step": 4344 }, { "epoch": 14.245901639344263, "grad_norm": 12.920042037963867, "learning_rate": 1.9343993194911508e-05, "loss": 1.2977, "step": 4345 }, { "epoch": 14.249180327868853, "grad_norm": 5.257274627685547, "learning_rate": 1.934361486906252e-05, "loss": 1.343, "step": 4346 }, { "epoch": 14.252459016393443, "grad_norm": 5.8231048583984375, "learning_rate": 1.934323643785456e-05, "loss": 1.4836, "step": 4347 }, { "epoch": 14.255737704918033, "grad_norm": 6.601258277893066, "learning_rate": 1.934285790129189e-05, "loss": 1.4133, "step": 4348 }, { "epoch": 14.259016393442623, "grad_norm": 5.4328460693359375, "learning_rate": 1.9342479259378787e-05, "loss": 1.429, "step": 4349 }, { "epoch": 14.262295081967213, "grad_norm": 7.188047885894775, "learning_rate": 1.9342100512119514e-05, "loss": 1.3486, "step": 4350 }, { "epoch": 14.265573770491804, "grad_norm": 5.861172676086426, "learning_rate": 1.9341721659518347e-05, "loss": 1.4124, "step": 4351 }, { "epoch": 14.268852459016394, "grad_norm": 10.115544319152832, "learning_rate": 1.9341342701579554e-05, "loss": 1.1831, "step": 4352 }, { "epoch": 14.272131147540984, "grad_norm": 6.678114414215088, "learning_rate": 1.934096363830741e-05, "loss": 1.0377, "step": 4353 }, { "epoch": 14.275409836065574, "grad_norm": 6.394542694091797, "learning_rate": 1.934058446970619e-05, "loss": 1.4509, "step": 4354 }, { "epoch": 14.278688524590164, "grad_norm": 5.624626159667969, "learning_rate": 1.9340205195780165e-05, "loss": 1.1628, "step": 4355 }, { "epoch": 14.281967213114754, "grad_norm": 6.4834303855896, "learning_rate": 1.9339825816533614e-05, "loss": 1.3374, "step": 4356 }, { "epoch": 14.285245901639344, "grad_norm": 5.41970157623291, "learning_rate": 1.9339446331970822e-05, "loss": 1.1947, "step": 4357 }, { "epoch": 14.288524590163934, "grad_norm": 6.231420516967773, "learning_rate": 1.933906674209606e-05, "loss": 1.0791, "step": 4358 }, { "epoch": 14.291803278688525, "grad_norm": 4.790335655212402, "learning_rate": 1.933868704691361e-05, "loss": 1.2954, "step": 4359 }, { "epoch": 14.295081967213115, "grad_norm": 5.729928970336914, "learning_rate": 1.9338307246427753e-05, "loss": 1.3604, "step": 4360 }, { "epoch": 14.298360655737705, "grad_norm": 19.706506729125977, "learning_rate": 1.933792734064277e-05, "loss": 1.4265, "step": 4361 }, { "epoch": 14.301639344262295, "grad_norm": 5.459381580352783, "learning_rate": 1.933754732956295e-05, "loss": 1.373, "step": 4362 }, { "epoch": 14.304918032786885, "grad_norm": 7.275863170623779, "learning_rate": 1.9337167213192573e-05, "loss": 1.2566, "step": 4363 }, { "epoch": 14.308196721311475, "grad_norm": 5.96568489074707, "learning_rate": 1.933678699153593e-05, "loss": 1.181, "step": 4364 }, { "epoch": 14.311475409836065, "grad_norm": 5.372664928436279, "learning_rate": 1.9336406664597307e-05, "loss": 1.1155, "step": 4365 }, { "epoch": 14.314754098360655, "grad_norm": 5.924442768096924, "learning_rate": 1.933602623238099e-05, "loss": 1.2998, "step": 4366 }, { "epoch": 14.318032786885245, "grad_norm": 5.55394172668457, "learning_rate": 1.9335645694891266e-05, "loss": 1.1885, "step": 4367 }, { "epoch": 14.321311475409836, "grad_norm": 6.203023910522461, "learning_rate": 1.9335265052132434e-05, "loss": 1.3041, "step": 4368 }, { "epoch": 14.324590163934426, "grad_norm": 5.9190168380737305, "learning_rate": 1.9334884304108782e-05, "loss": 1.3088, "step": 4369 }, { "epoch": 14.327868852459016, "grad_norm": 6.13668966293335, "learning_rate": 1.9334503450824607e-05, "loss": 1.2457, "step": 4370 }, { "epoch": 14.331147540983606, "grad_norm": 7.035022735595703, "learning_rate": 1.9334122492284194e-05, "loss": 1.4062, "step": 4371 }, { "epoch": 14.334426229508196, "grad_norm": 7.131096839904785, "learning_rate": 1.933374142849185e-05, "loss": 1.4392, "step": 4372 }, { "epoch": 14.337704918032786, "grad_norm": 5.522678375244141, "learning_rate": 1.9333360259451864e-05, "loss": 1.4663, "step": 4373 }, { "epoch": 14.340983606557376, "grad_norm": 6.16610050201416, "learning_rate": 1.9332978985168536e-05, "loss": 1.3257, "step": 4374 }, { "epoch": 14.344262295081966, "grad_norm": 7.262687683105469, "learning_rate": 1.933259760564617e-05, "loss": 1.2288, "step": 4375 }, { "epoch": 14.347540983606557, "grad_norm": 6.740689277648926, "learning_rate": 1.933221612088906e-05, "loss": 1.1362, "step": 4376 }, { "epoch": 14.350819672131147, "grad_norm": 7.065206527709961, "learning_rate": 1.933183453090151e-05, "loss": 1.3035, "step": 4377 }, { "epoch": 14.354098360655737, "grad_norm": 7.125499725341797, "learning_rate": 1.9331452835687823e-05, "loss": 1.3406, "step": 4378 }, { "epoch": 14.357377049180329, "grad_norm": 6.1391425132751465, "learning_rate": 1.93310710352523e-05, "loss": 1.1987, "step": 4379 }, { "epoch": 14.360655737704919, "grad_norm": 6.055422306060791, "learning_rate": 1.9330689129599257e-05, "loss": 1.3848, "step": 4380 }, { "epoch": 14.363934426229509, "grad_norm": 6.373858451843262, "learning_rate": 1.9330307118732985e-05, "loss": 1.2794, "step": 4381 }, { "epoch": 14.3672131147541, "grad_norm": 17.947242736816406, "learning_rate": 1.93299250026578e-05, "loss": 1.5002, "step": 4382 }, { "epoch": 14.37049180327869, "grad_norm": 7.05114221572876, "learning_rate": 1.9329542781378014e-05, "loss": 1.3137, "step": 4383 }, { "epoch": 14.37377049180328, "grad_norm": 10.74778938293457, "learning_rate": 1.932916045489793e-05, "loss": 1.3613, "step": 4384 }, { "epoch": 14.37704918032787, "grad_norm": 5.341408729553223, "learning_rate": 1.9328778023221866e-05, "loss": 1.4209, "step": 4385 }, { "epoch": 14.38032786885246, "grad_norm": 10.840303421020508, "learning_rate": 1.9328395486354127e-05, "loss": 1.2202, "step": 4386 }, { "epoch": 14.38360655737705, "grad_norm": 7.408684730529785, "learning_rate": 1.9328012844299028e-05, "loss": 1.2524, "step": 4387 }, { "epoch": 14.38688524590164, "grad_norm": 7.017950057983398, "learning_rate": 1.932763009706089e-05, "loss": 1.1719, "step": 4388 }, { "epoch": 14.39016393442623, "grad_norm": 6.110220432281494, "learning_rate": 1.932724724464402e-05, "loss": 1.2319, "step": 4389 }, { "epoch": 14.39344262295082, "grad_norm": 6.704636573791504, "learning_rate": 1.932686428705274e-05, "loss": 1.426, "step": 4390 }, { "epoch": 14.39672131147541, "grad_norm": 11.322576522827148, "learning_rate": 1.932648122429137e-05, "loss": 1.542, "step": 4391 }, { "epoch": 14.4, "grad_norm": 8.17076587677002, "learning_rate": 1.9326098056364224e-05, "loss": 1.4094, "step": 4392 }, { "epoch": 14.40327868852459, "grad_norm": 5.554458141326904, "learning_rate": 1.9325714783275627e-05, "loss": 1.1582, "step": 4393 }, { "epoch": 14.40655737704918, "grad_norm": 5.282264709472656, "learning_rate": 1.9325331405029903e-05, "loss": 1.3169, "step": 4394 }, { "epoch": 14.40983606557377, "grad_norm": 6.0825581550598145, "learning_rate": 1.9324947921631366e-05, "loss": 1.4526, "step": 4395 }, { "epoch": 14.41311475409836, "grad_norm": 6.53171443939209, "learning_rate": 1.932456433308435e-05, "loss": 1.3833, "step": 4396 }, { "epoch": 14.416393442622951, "grad_norm": 5.517569541931152, "learning_rate": 1.9324180639393173e-05, "loss": 1.4487, "step": 4397 }, { "epoch": 14.419672131147541, "grad_norm": 7.2568769454956055, "learning_rate": 1.9323796840562166e-05, "loss": 1.2727, "step": 4398 }, { "epoch": 14.422950819672131, "grad_norm": 6.893975257873535, "learning_rate": 1.9323412936595655e-05, "loss": 1.1729, "step": 4399 }, { "epoch": 14.426229508196721, "grad_norm": 5.021090984344482, "learning_rate": 1.9323028927497968e-05, "loss": 1.4104, "step": 4400 }, { "epoch": 14.429508196721311, "grad_norm": 8.65341567993164, "learning_rate": 1.932264481327344e-05, "loss": 1.5056, "step": 4401 }, { "epoch": 14.432786885245902, "grad_norm": 5.340765953063965, "learning_rate": 1.9322260593926394e-05, "loss": 1.4663, "step": 4402 }, { "epoch": 14.436065573770492, "grad_norm": 6.533968925476074, "learning_rate": 1.932187626946117e-05, "loss": 1.2588, "step": 4403 }, { "epoch": 14.439344262295082, "grad_norm": 6.568120956420898, "learning_rate": 1.93214918398821e-05, "loss": 1.2971, "step": 4404 }, { "epoch": 14.442622950819672, "grad_norm": 6.4681477546691895, "learning_rate": 1.9321107305193516e-05, "loss": 1.4651, "step": 4405 }, { "epoch": 14.445901639344262, "grad_norm": 7.941619396209717, "learning_rate": 1.9320722665399755e-05, "loss": 1.363, "step": 4406 }, { "epoch": 14.449180327868852, "grad_norm": 5.493140697479248, "learning_rate": 1.932033792050515e-05, "loss": 1.207, "step": 4407 }, { "epoch": 14.452459016393442, "grad_norm": 5.944809913635254, "learning_rate": 1.9319953070514052e-05, "loss": 1.0471, "step": 4408 }, { "epoch": 14.455737704918032, "grad_norm": 7.944741725921631, "learning_rate": 1.931956811543079e-05, "loss": 1.6711, "step": 4409 }, { "epoch": 14.459016393442623, "grad_norm": 6.770572662353516, "learning_rate": 1.9319183055259708e-05, "loss": 1.4353, "step": 4410 }, { "epoch": 14.462295081967213, "grad_norm": 13.005511283874512, "learning_rate": 1.931879789000515e-05, "loss": 1.2595, "step": 4411 }, { "epoch": 14.465573770491803, "grad_norm": 8.118657112121582, "learning_rate": 1.9318412619671453e-05, "loss": 1.1658, "step": 4412 }, { "epoch": 14.468852459016393, "grad_norm": 15.027040481567383, "learning_rate": 1.9318027244262964e-05, "loss": 1.4158, "step": 4413 }, { "epoch": 14.472131147540983, "grad_norm": 5.7605366706848145, "learning_rate": 1.9317641763784034e-05, "loss": 1.4043, "step": 4414 }, { "epoch": 14.475409836065573, "grad_norm": 7.749424457550049, "learning_rate": 1.9317256178239e-05, "loss": 1.3467, "step": 4415 }, { "epoch": 14.478688524590163, "grad_norm": 6.077833652496338, "learning_rate": 1.931687048763222e-05, "loss": 1.3779, "step": 4416 }, { "epoch": 14.481967213114753, "grad_norm": 8.453313827514648, "learning_rate": 1.9316484691968035e-05, "loss": 1.45, "step": 4417 }, { "epoch": 14.485245901639344, "grad_norm": 6.1947808265686035, "learning_rate": 1.93160987912508e-05, "loss": 1.2478, "step": 4418 }, { "epoch": 14.488524590163934, "grad_norm": 6.585542678833008, "learning_rate": 1.931571278548486e-05, "loss": 1.3286, "step": 4419 }, { "epoch": 14.491803278688524, "grad_norm": 5.966005802154541, "learning_rate": 1.931532667467458e-05, "loss": 1.2451, "step": 4420 }, { "epoch": 14.495081967213114, "grad_norm": 9.680480003356934, "learning_rate": 1.9314940458824303e-05, "loss": 1.3577, "step": 4421 }, { "epoch": 14.498360655737706, "grad_norm": 8.656535148620605, "learning_rate": 1.9314554137938388e-05, "loss": 1.0956, "step": 4422 }, { "epoch": 14.501639344262294, "grad_norm": 8.460689544677734, "learning_rate": 1.9314167712021186e-05, "loss": 1.1816, "step": 4423 }, { "epoch": 14.504918032786886, "grad_norm": 6.165332794189453, "learning_rate": 1.9313781181077066e-05, "loss": 1.217, "step": 4424 }, { "epoch": 14.508196721311476, "grad_norm": 5.859138488769531, "learning_rate": 1.9313394545110375e-05, "loss": 1.29, "step": 4425 }, { "epoch": 14.511475409836066, "grad_norm": 7.011918067932129, "learning_rate": 1.9313007804125476e-05, "loss": 1.3792, "step": 4426 }, { "epoch": 14.514754098360656, "grad_norm": 6.502308368682861, "learning_rate": 1.9312620958126733e-05, "loss": 1.2925, "step": 4427 }, { "epoch": 14.518032786885247, "grad_norm": 7.74381160736084, "learning_rate": 1.9312234007118503e-05, "loss": 1.2551, "step": 4428 }, { "epoch": 14.521311475409837, "grad_norm": 10.388935089111328, "learning_rate": 1.9311846951105154e-05, "loss": 1.4771, "step": 4429 }, { "epoch": 14.524590163934427, "grad_norm": 5.31510591506958, "learning_rate": 1.931145979009105e-05, "loss": 1.4651, "step": 4430 }, { "epoch": 14.527868852459017, "grad_norm": 7.002598285675049, "learning_rate": 1.9311072524080554e-05, "loss": 1.3401, "step": 4431 }, { "epoch": 14.531147540983607, "grad_norm": 6.784724712371826, "learning_rate": 1.9310685153078034e-05, "loss": 1.5972, "step": 4432 }, { "epoch": 14.534426229508197, "grad_norm": 4.902523517608643, "learning_rate": 1.931029767708786e-05, "loss": 1.4897, "step": 4433 }, { "epoch": 14.537704918032787, "grad_norm": 5.884494304656982, "learning_rate": 1.93099100961144e-05, "loss": 1.4036, "step": 4434 }, { "epoch": 14.540983606557377, "grad_norm": 7.450108051300049, "learning_rate": 1.930952241016202e-05, "loss": 1.4514, "step": 4435 }, { "epoch": 14.544262295081968, "grad_norm": 6.435813903808594, "learning_rate": 1.9309134619235096e-05, "loss": 1.3379, "step": 4436 }, { "epoch": 14.547540983606558, "grad_norm": 6.560095310211182, "learning_rate": 1.9308746723338005e-05, "loss": 1.5569, "step": 4437 }, { "epoch": 14.550819672131148, "grad_norm": 7.449155330657959, "learning_rate": 1.930835872247511e-05, "loss": 1.5862, "step": 4438 }, { "epoch": 14.554098360655738, "grad_norm": 8.724637985229492, "learning_rate": 1.9307970616650794e-05, "loss": 1.5718, "step": 4439 }, { "epoch": 14.557377049180328, "grad_norm": 6.050788879394531, "learning_rate": 1.9307582405869435e-05, "loss": 1.2804, "step": 4440 }, { "epoch": 14.560655737704918, "grad_norm": 6.848214149475098, "learning_rate": 1.9307194090135402e-05, "loss": 1.5085, "step": 4441 }, { "epoch": 14.563934426229508, "grad_norm": 6.274320602416992, "learning_rate": 1.9306805669453083e-05, "loss": 1.3884, "step": 4442 }, { "epoch": 14.567213114754098, "grad_norm": 9.900691986083984, "learning_rate": 1.9306417143826854e-05, "loss": 1.3601, "step": 4443 }, { "epoch": 14.570491803278689, "grad_norm": 6.000416278839111, "learning_rate": 1.930602851326109e-05, "loss": 1.4109, "step": 4444 }, { "epoch": 14.573770491803279, "grad_norm": 8.564974784851074, "learning_rate": 1.930563977776018e-05, "loss": 1.3333, "step": 4445 }, { "epoch": 14.577049180327869, "grad_norm": 6.132862567901611, "learning_rate": 1.930525093732851e-05, "loss": 1.394, "step": 4446 }, { "epoch": 14.580327868852459, "grad_norm": 8.283713340759277, "learning_rate": 1.9304861991970454e-05, "loss": 1.2727, "step": 4447 }, { "epoch": 14.583606557377049, "grad_norm": 9.059849739074707, "learning_rate": 1.9304472941690412e-05, "loss": 1.4836, "step": 4448 }, { "epoch": 14.58688524590164, "grad_norm": 5.708420276641846, "learning_rate": 1.930408378649276e-05, "loss": 1.3567, "step": 4449 }, { "epoch": 14.59016393442623, "grad_norm": 6.272709369659424, "learning_rate": 1.9303694526381886e-05, "loss": 1.4148, "step": 4450 }, { "epoch": 14.59344262295082, "grad_norm": 9.20096206665039, "learning_rate": 1.9303305161362188e-05, "loss": 1.5234, "step": 4451 }, { "epoch": 14.59672131147541, "grad_norm": 6.9108500480651855, "learning_rate": 1.930291569143805e-05, "loss": 1.4094, "step": 4452 }, { "epoch": 14.6, "grad_norm": 11.932204246520996, "learning_rate": 1.9302526116613863e-05, "loss": 1.1368, "step": 4453 }, { "epoch": 14.60327868852459, "grad_norm": 6.466622352600098, "learning_rate": 1.9302136436894022e-05, "loss": 1.4186, "step": 4454 }, { "epoch": 14.60655737704918, "grad_norm": 5.21495246887207, "learning_rate": 1.9301746652282923e-05, "loss": 1.3336, "step": 4455 }, { "epoch": 14.60983606557377, "grad_norm": 6.097749710083008, "learning_rate": 1.930135676278496e-05, "loss": 1.3298, "step": 4456 }, { "epoch": 14.61311475409836, "grad_norm": 6.34276008605957, "learning_rate": 1.9300966768404526e-05, "loss": 1.308, "step": 4457 }, { "epoch": 14.61639344262295, "grad_norm": 6.62117338180542, "learning_rate": 1.930057666914602e-05, "loss": 1.2288, "step": 4458 }, { "epoch": 14.61967213114754, "grad_norm": 6.996241569519043, "learning_rate": 1.9300186465013845e-05, "loss": 1.3906, "step": 4459 }, { "epoch": 14.62295081967213, "grad_norm": 5.387485504150391, "learning_rate": 1.9299796156012397e-05, "loss": 1.1652, "step": 4460 }, { "epoch": 14.62622950819672, "grad_norm": 8.11293888092041, "learning_rate": 1.9299405742146078e-05, "loss": 1.2493, "step": 4461 }, { "epoch": 14.62950819672131, "grad_norm": 5.687828063964844, "learning_rate": 1.929901522341929e-05, "loss": 1.209, "step": 4462 }, { "epoch": 14.6327868852459, "grad_norm": 6.699526786804199, "learning_rate": 1.9298624599836435e-05, "loss": 1.3298, "step": 4463 }, { "epoch": 14.636065573770491, "grad_norm": 5.057247638702393, "learning_rate": 1.929823387140192e-05, "loss": 1.4429, "step": 4464 }, { "epoch": 14.639344262295083, "grad_norm": 6.383164882659912, "learning_rate": 1.9297843038120153e-05, "loss": 1.3674, "step": 4465 }, { "epoch": 14.642622950819671, "grad_norm": 6.491387367248535, "learning_rate": 1.9297452099995534e-05, "loss": 1.3916, "step": 4466 }, { "epoch": 14.645901639344263, "grad_norm": 6.017464637756348, "learning_rate": 1.929706105703248e-05, "loss": 1.4685, "step": 4467 }, { "epoch": 14.649180327868853, "grad_norm": 4.728129863739014, "learning_rate": 1.9296669909235394e-05, "loss": 1.5125, "step": 4468 }, { "epoch": 14.652459016393443, "grad_norm": 7.998268127441406, "learning_rate": 1.929627865660869e-05, "loss": 1.2883, "step": 4469 }, { "epoch": 14.655737704918034, "grad_norm": 5.347118377685547, "learning_rate": 1.9295887299156777e-05, "loss": 1.2942, "step": 4470 }, { "epoch": 14.659016393442624, "grad_norm": 6.652716636657715, "learning_rate": 1.929549583688407e-05, "loss": 1.5635, "step": 4471 }, { "epoch": 14.662295081967214, "grad_norm": 5.368921279907227, "learning_rate": 1.929510426979498e-05, "loss": 1.23, "step": 4472 }, { "epoch": 14.665573770491804, "grad_norm": 7.533402442932129, "learning_rate": 1.9294712597893933e-05, "loss": 1.3174, "step": 4473 }, { "epoch": 14.668852459016394, "grad_norm": 5.554571151733398, "learning_rate": 1.9294320821185328e-05, "loss": 1.3296, "step": 4474 }, { "epoch": 14.672131147540984, "grad_norm": 6.190000057220459, "learning_rate": 1.92939289396736e-05, "loss": 1.3162, "step": 4475 }, { "epoch": 14.675409836065574, "grad_norm": 5.207376480102539, "learning_rate": 1.929353695336315e-05, "loss": 1.3052, "step": 4476 }, { "epoch": 14.678688524590164, "grad_norm": 9.432092666625977, "learning_rate": 1.9293144862258416e-05, "loss": 1.3257, "step": 4477 }, { "epoch": 14.681967213114755, "grad_norm": 5.7993550300598145, "learning_rate": 1.929275266636381e-05, "loss": 1.3921, "step": 4478 }, { "epoch": 14.685245901639345, "grad_norm": 7.469425678253174, "learning_rate": 1.9292360365683757e-05, "loss": 1.4045, "step": 4479 }, { "epoch": 14.688524590163935, "grad_norm": 5.414825439453125, "learning_rate": 1.9291967960222676e-05, "loss": 1.2198, "step": 4480 }, { "epoch": 14.691803278688525, "grad_norm": 5.641054153442383, "learning_rate": 1.9291575449984995e-05, "loss": 1.407, "step": 4481 }, { "epoch": 14.695081967213115, "grad_norm": 6.524509906768799, "learning_rate": 1.929118283497514e-05, "loss": 1.4473, "step": 4482 }, { "epoch": 14.698360655737705, "grad_norm": 5.80792760848999, "learning_rate": 1.929079011519754e-05, "loss": 1.1653, "step": 4483 }, { "epoch": 14.701639344262295, "grad_norm": 7.314389705657959, "learning_rate": 1.929039729065662e-05, "loss": 1.3323, "step": 4484 }, { "epoch": 14.704918032786885, "grad_norm": 5.36889123916626, "learning_rate": 1.9290004361356813e-05, "loss": 1.3049, "step": 4485 }, { "epoch": 14.708196721311475, "grad_norm": 6.704766273498535, "learning_rate": 1.9289611327302546e-05, "loss": 1.5679, "step": 4486 }, { "epoch": 14.711475409836066, "grad_norm": 6.613257884979248, "learning_rate": 1.9289218188498252e-05, "loss": 1.3691, "step": 4487 }, { "epoch": 14.714754098360656, "grad_norm": 5.329147815704346, "learning_rate": 1.9288824944948367e-05, "loss": 1.6519, "step": 4488 }, { "epoch": 14.718032786885246, "grad_norm": 5.615301609039307, "learning_rate": 1.928843159665732e-05, "loss": 1.4204, "step": 4489 }, { "epoch": 14.721311475409836, "grad_norm": 5.002199172973633, "learning_rate": 1.9288038143629547e-05, "loss": 1.288, "step": 4490 }, { "epoch": 14.724590163934426, "grad_norm": 5.676441669464111, "learning_rate": 1.9287644585869493e-05, "loss": 1.3029, "step": 4491 }, { "epoch": 14.727868852459016, "grad_norm": 4.602145195007324, "learning_rate": 1.9287250923381587e-05, "loss": 1.4478, "step": 4492 }, { "epoch": 14.731147540983606, "grad_norm": 5.980408191680908, "learning_rate": 1.928685715617027e-05, "loss": 1.3052, "step": 4493 }, { "epoch": 14.734426229508196, "grad_norm": 5.363458633422852, "learning_rate": 1.928646328423998e-05, "loss": 1.3506, "step": 4494 }, { "epoch": 14.737704918032787, "grad_norm": 5.992030620574951, "learning_rate": 1.9286069307595166e-05, "loss": 1.1287, "step": 4495 }, { "epoch": 14.740983606557377, "grad_norm": 6.193769454956055, "learning_rate": 1.9285675226240263e-05, "loss": 1.3855, "step": 4496 }, { "epoch": 14.744262295081967, "grad_norm": 5.353915691375732, "learning_rate": 1.9285281040179717e-05, "loss": 1.1609, "step": 4497 }, { "epoch": 14.747540983606557, "grad_norm": 5.670456886291504, "learning_rate": 1.9284886749417974e-05, "loss": 1.5208, "step": 4498 }, { "epoch": 14.750819672131147, "grad_norm": 5.685783863067627, "learning_rate": 1.928449235395948e-05, "loss": 1.4766, "step": 4499 }, { "epoch": 14.754098360655737, "grad_norm": 6.28975772857666, "learning_rate": 1.9284097853808678e-05, "loss": 1.3926, "step": 4500 }, { "epoch": 14.757377049180327, "grad_norm": 23.81737518310547, "learning_rate": 1.9283703248970022e-05, "loss": 1.3999, "step": 4501 }, { "epoch": 14.760655737704917, "grad_norm": 4.931875228881836, "learning_rate": 1.928330853944796e-05, "loss": 1.3486, "step": 4502 }, { "epoch": 14.763934426229508, "grad_norm": 5.287425518035889, "learning_rate": 1.9282913725246937e-05, "loss": 1.3669, "step": 4503 }, { "epoch": 14.767213114754098, "grad_norm": 14.009918212890625, "learning_rate": 1.928251880637141e-05, "loss": 1.3174, "step": 4504 }, { "epoch": 14.770491803278688, "grad_norm": 6.990057945251465, "learning_rate": 1.928212378282584e-05, "loss": 1.1881, "step": 4505 }, { "epoch": 14.773770491803278, "grad_norm": 5.941379070281982, "learning_rate": 1.9281728654614667e-05, "loss": 1.3601, "step": 4506 }, { "epoch": 14.777049180327868, "grad_norm": 6.598684310913086, "learning_rate": 1.9281333421742358e-05, "loss": 1.4238, "step": 4507 }, { "epoch": 14.780327868852458, "grad_norm": 5.79692268371582, "learning_rate": 1.9280938084213358e-05, "loss": 1.5432, "step": 4508 }, { "epoch": 14.783606557377048, "grad_norm": 7.4224772453308105, "learning_rate": 1.9280542642032137e-05, "loss": 1.2971, "step": 4509 }, { "epoch": 14.78688524590164, "grad_norm": 6.428098201751709, "learning_rate": 1.9280147095203143e-05, "loss": 1.2361, "step": 4510 }, { "epoch": 14.790163934426229, "grad_norm": 6.0195841789245605, "learning_rate": 1.9279751443730844e-05, "loss": 1.308, "step": 4511 }, { "epoch": 14.79344262295082, "grad_norm": 6.481630325317383, "learning_rate": 1.92793556876197e-05, "loss": 1.2825, "step": 4512 }, { "epoch": 14.79672131147541, "grad_norm": 6.315440654754639, "learning_rate": 1.9278959826874174e-05, "loss": 1.3066, "step": 4513 }, { "epoch": 14.8, "grad_norm": 5.915075302124023, "learning_rate": 1.9278563861498726e-05, "loss": 1.269, "step": 4514 }, { "epoch": 14.80327868852459, "grad_norm": 6.501873970031738, "learning_rate": 1.9278167791497823e-05, "loss": 1.2134, "step": 4515 }, { "epoch": 14.806557377049181, "grad_norm": 6.033059597015381, "learning_rate": 1.9277771616875933e-05, "loss": 1.3008, "step": 4516 }, { "epoch": 14.809836065573771, "grad_norm": 6.322947025299072, "learning_rate": 1.927737533763752e-05, "loss": 1.4573, "step": 4517 }, { "epoch": 14.813114754098361, "grad_norm": 5.949410915374756, "learning_rate": 1.9276978953787052e-05, "loss": 1.446, "step": 4518 }, { "epoch": 14.816393442622951, "grad_norm": 6.869070053100586, "learning_rate": 1.9276582465329003e-05, "loss": 1.3225, "step": 4519 }, { "epoch": 14.819672131147541, "grad_norm": 5.316800117492676, "learning_rate": 1.927618587226784e-05, "loss": 1.4548, "step": 4520 }, { "epoch": 14.822950819672132, "grad_norm": 6.374141216278076, "learning_rate": 1.927578917460804e-05, "loss": 1.2888, "step": 4521 }, { "epoch": 14.826229508196722, "grad_norm": 6.876203536987305, "learning_rate": 1.927539237235407e-05, "loss": 1.3484, "step": 4522 }, { "epoch": 14.829508196721312, "grad_norm": 7.112538814544678, "learning_rate": 1.9274995465510406e-05, "loss": 1.3406, "step": 4523 }, { "epoch": 14.832786885245902, "grad_norm": 5.017179489135742, "learning_rate": 1.9274598454081527e-05, "loss": 1.3811, "step": 4524 }, { "epoch": 14.836065573770492, "grad_norm": 6.874765872955322, "learning_rate": 1.927420133807191e-05, "loss": 1.3416, "step": 4525 }, { "epoch": 14.839344262295082, "grad_norm": 7.040253162384033, "learning_rate": 1.9273804117486024e-05, "loss": 1.4351, "step": 4526 }, { "epoch": 14.842622950819672, "grad_norm": 6.231764316558838, "learning_rate": 1.9273406792328355e-05, "loss": 1.177, "step": 4527 }, { "epoch": 14.845901639344262, "grad_norm": 5.636988639831543, "learning_rate": 1.9273009362603385e-05, "loss": 1.225, "step": 4528 }, { "epoch": 14.849180327868853, "grad_norm": 5.199662685394287, "learning_rate": 1.9272611828315592e-05, "loss": 1.2661, "step": 4529 }, { "epoch": 14.852459016393443, "grad_norm": 7.097002983093262, "learning_rate": 1.927221418946946e-05, "loss": 1.3438, "step": 4530 }, { "epoch": 14.855737704918033, "grad_norm": 5.491987705230713, "learning_rate": 1.9271816446069472e-05, "loss": 1.5796, "step": 4531 }, { "epoch": 14.859016393442623, "grad_norm": 5.077106952667236, "learning_rate": 1.9271418598120114e-05, "loss": 1.3494, "step": 4532 }, { "epoch": 14.862295081967213, "grad_norm": 6.297293663024902, "learning_rate": 1.927102064562587e-05, "loss": 1.3038, "step": 4533 }, { "epoch": 14.865573770491803, "grad_norm": 6.885687351226807, "learning_rate": 1.927062258859123e-05, "loss": 1.415, "step": 4534 }, { "epoch": 14.868852459016393, "grad_norm": 7.9267096519470215, "learning_rate": 1.927022442702068e-05, "loss": 1.3809, "step": 4535 }, { "epoch": 14.872131147540983, "grad_norm": 6.249356746673584, "learning_rate": 1.9269826160918714e-05, "loss": 1.468, "step": 4536 }, { "epoch": 14.875409836065574, "grad_norm": 7.867213726043701, "learning_rate": 1.9269427790289813e-05, "loss": 1.2521, "step": 4537 }, { "epoch": 14.878688524590164, "grad_norm": 5.6741509437561035, "learning_rate": 1.9269029315138483e-05, "loss": 1.335, "step": 4538 }, { "epoch": 14.881967213114754, "grad_norm": 6.418978214263916, "learning_rate": 1.9268630735469207e-05, "loss": 1.5537, "step": 4539 }, { "epoch": 14.885245901639344, "grad_norm": 5.344372749328613, "learning_rate": 1.9268232051286483e-05, "loss": 1.3596, "step": 4540 }, { "epoch": 14.888524590163934, "grad_norm": 6.494318962097168, "learning_rate": 1.9267833262594805e-05, "loss": 1.1526, "step": 4541 }, { "epoch": 14.891803278688524, "grad_norm": 6.362621784210205, "learning_rate": 1.9267434369398672e-05, "loss": 1.4448, "step": 4542 }, { "epoch": 14.895081967213114, "grad_norm": 7.468128204345703, "learning_rate": 1.926703537170258e-05, "loss": 1.2394, "step": 4543 }, { "epoch": 14.898360655737704, "grad_norm": 8.641178131103516, "learning_rate": 1.926663626951103e-05, "loss": 1.278, "step": 4544 }, { "epoch": 14.901639344262295, "grad_norm": 6.391946315765381, "learning_rate": 1.926623706282852e-05, "loss": 1.4983, "step": 4545 }, { "epoch": 14.904918032786885, "grad_norm": 5.787728786468506, "learning_rate": 1.9265837751659554e-05, "loss": 1.2651, "step": 4546 }, { "epoch": 14.908196721311475, "grad_norm": 7.263465881347656, "learning_rate": 1.9265438336008633e-05, "loss": 1.3025, "step": 4547 }, { "epoch": 14.911475409836065, "grad_norm": 6.328160285949707, "learning_rate": 1.926503881588026e-05, "loss": 1.3611, "step": 4548 }, { "epoch": 14.914754098360655, "grad_norm": 6.3157806396484375, "learning_rate": 1.9264639191278944e-05, "loss": 1.2412, "step": 4549 }, { "epoch": 14.918032786885245, "grad_norm": 6.342469692230225, "learning_rate": 1.9264239462209187e-05, "loss": 1.5852, "step": 4550 }, { "epoch": 14.921311475409835, "grad_norm": 9.780330657958984, "learning_rate": 1.9263839628675497e-05, "loss": 1.1575, "step": 4551 }, { "epoch": 14.924590163934425, "grad_norm": 6.443414211273193, "learning_rate": 1.9263439690682384e-05, "loss": 1.4382, "step": 4552 }, { "epoch": 14.927868852459017, "grad_norm": 6.5611443519592285, "learning_rate": 1.9263039648234354e-05, "loss": 1.1169, "step": 4553 }, { "epoch": 14.931147540983606, "grad_norm": 5.958252429962158, "learning_rate": 1.9262639501335926e-05, "loss": 1.332, "step": 4554 }, { "epoch": 14.934426229508198, "grad_norm": 6.62816047668457, "learning_rate": 1.9262239249991603e-05, "loss": 1.0259, "step": 4555 }, { "epoch": 14.937704918032788, "grad_norm": 5.500743389129639, "learning_rate": 1.9261838894205903e-05, "loss": 1.2665, "step": 4556 }, { "epoch": 14.940983606557378, "grad_norm": 5.172966003417969, "learning_rate": 1.9261438433983337e-05, "loss": 1.4087, "step": 4557 }, { "epoch": 14.944262295081968, "grad_norm": 5.600658893585205, "learning_rate": 1.926103786932843e-05, "loss": 1.2756, "step": 4558 }, { "epoch": 14.947540983606558, "grad_norm": 6.519221305847168, "learning_rate": 1.9260637200245685e-05, "loss": 1.2981, "step": 4559 }, { "epoch": 14.950819672131148, "grad_norm": 6.785202980041504, "learning_rate": 1.9260236426739628e-05, "loss": 1.3499, "step": 4560 }, { "epoch": 14.954098360655738, "grad_norm": 10.461712837219238, "learning_rate": 1.925983554881478e-05, "loss": 1.5039, "step": 4561 }, { "epoch": 14.957377049180328, "grad_norm": 6.62750768661499, "learning_rate": 1.9259434566475656e-05, "loss": 1.1707, "step": 4562 }, { "epoch": 14.960655737704919, "grad_norm": 6.361478328704834, "learning_rate": 1.9259033479726783e-05, "loss": 1.5291, "step": 4563 }, { "epoch": 14.963934426229509, "grad_norm": 5.336247444152832, "learning_rate": 1.9258632288572676e-05, "loss": 1.1785, "step": 4564 }, { "epoch": 14.967213114754099, "grad_norm": 6.925336837768555, "learning_rate": 1.9258230993017866e-05, "loss": 1.5818, "step": 4565 }, { "epoch": 14.970491803278689, "grad_norm": 7.122466564178467, "learning_rate": 1.9257829593066876e-05, "loss": 1.3177, "step": 4566 }, { "epoch": 14.973770491803279, "grad_norm": 7.01494836807251, "learning_rate": 1.925742808872423e-05, "loss": 1.3831, "step": 4567 }, { "epoch": 14.97704918032787, "grad_norm": 6.173234462738037, "learning_rate": 1.925702647999446e-05, "loss": 1.437, "step": 4568 }, { "epoch": 14.98032786885246, "grad_norm": 7.49747896194458, "learning_rate": 1.925662476688209e-05, "loss": 1.4453, "step": 4569 }, { "epoch": 14.98360655737705, "grad_norm": 6.824763298034668, "learning_rate": 1.9256222949391648e-05, "loss": 1.3591, "step": 4570 }, { "epoch": 14.98688524590164, "grad_norm": 6.069915771484375, "learning_rate": 1.9255821027527673e-05, "loss": 1.3823, "step": 4571 }, { "epoch": 14.99016393442623, "grad_norm": 6.430721759796143, "learning_rate": 1.9255419001294687e-05, "loss": 1.1494, "step": 4572 }, { "epoch": 14.99344262295082, "grad_norm": 6.766967296600342, "learning_rate": 1.9255016870697233e-05, "loss": 1.1699, "step": 4573 }, { "epoch": 14.99672131147541, "grad_norm": 10.127307891845703, "learning_rate": 1.925461463573984e-05, "loss": 1.1399, "step": 4574 }, { "epoch": 15.0, "grad_norm": 5.82437801361084, "learning_rate": 1.9254212296427043e-05, "loss": 1.2302, "step": 4575 }, { "epoch": 15.00327868852459, "grad_norm": 5.892556190490723, "learning_rate": 1.9253809852763382e-05, "loss": 1.2617, "step": 4576 }, { "epoch": 15.00655737704918, "grad_norm": 6.586758613586426, "learning_rate": 1.9253407304753395e-05, "loss": 1.3494, "step": 4577 }, { "epoch": 15.00983606557377, "grad_norm": 6.884945869445801, "learning_rate": 1.9253004652401616e-05, "loss": 1.1976, "step": 4578 }, { "epoch": 15.01311475409836, "grad_norm": 5.923006534576416, "learning_rate": 1.925260189571259e-05, "loss": 1.2222, "step": 4579 }, { "epoch": 15.01639344262295, "grad_norm": 5.7640180587768555, "learning_rate": 1.9252199034690857e-05, "loss": 1.2092, "step": 4580 }, { "epoch": 15.01967213114754, "grad_norm": 4.574153900146484, "learning_rate": 1.925179606934096e-05, "loss": 1.5278, "step": 4581 }, { "epoch": 15.02295081967213, "grad_norm": 5.991824626922607, "learning_rate": 1.9251392999667446e-05, "loss": 1.4158, "step": 4582 }, { "epoch": 15.026229508196721, "grad_norm": 4.9174323081970215, "learning_rate": 1.925098982567485e-05, "loss": 1.3481, "step": 4583 }, { "epoch": 15.029508196721311, "grad_norm": 6.567943096160889, "learning_rate": 1.9250586547367734e-05, "loss": 1.2571, "step": 4584 }, { "epoch": 15.032786885245901, "grad_norm": 4.897022247314453, "learning_rate": 1.925018316475063e-05, "loss": 1.3872, "step": 4585 }, { "epoch": 15.036065573770491, "grad_norm": 5.791433334350586, "learning_rate": 1.9249779677828096e-05, "loss": 1.244, "step": 4586 }, { "epoch": 15.039344262295081, "grad_norm": 6.546054840087891, "learning_rate": 1.9249376086604677e-05, "loss": 1.2236, "step": 4587 }, { "epoch": 15.042622950819672, "grad_norm": 4.847489833831787, "learning_rate": 1.9248972391084925e-05, "loss": 1.3212, "step": 4588 }, { "epoch": 15.045901639344262, "grad_norm": 7.273299217224121, "learning_rate": 1.9248568591273395e-05, "loss": 1.3738, "step": 4589 }, { "epoch": 15.049180327868852, "grad_norm": 5.451585292816162, "learning_rate": 1.9248164687174636e-05, "loss": 1.2831, "step": 4590 }, { "epoch": 15.052459016393442, "grad_norm": 6.0671162605285645, "learning_rate": 1.9247760678793206e-05, "loss": 1.0547, "step": 4591 }, { "epoch": 15.055737704918032, "grad_norm": 5.883884429931641, "learning_rate": 1.9247356566133662e-05, "loss": 1.2477, "step": 4592 }, { "epoch": 15.059016393442622, "grad_norm": 4.956162452697754, "learning_rate": 1.9246952349200553e-05, "loss": 1.4022, "step": 4593 }, { "epoch": 15.062295081967212, "grad_norm": 5.66074275970459, "learning_rate": 1.9246548027998444e-05, "loss": 1.2822, "step": 4594 }, { "epoch": 15.065573770491802, "grad_norm": 6.332279682159424, "learning_rate": 1.924614360253189e-05, "loss": 1.1836, "step": 4595 }, { "epoch": 15.068852459016393, "grad_norm": 5.498050212860107, "learning_rate": 1.9245739072805458e-05, "loss": 1.2861, "step": 4596 }, { "epoch": 15.072131147540984, "grad_norm": 12.311932563781738, "learning_rate": 1.92453344388237e-05, "loss": 1.1884, "step": 4597 }, { "epoch": 15.075409836065575, "grad_norm": 7.091182231903076, "learning_rate": 1.9244929700591185e-05, "loss": 1.1331, "step": 4598 }, { "epoch": 15.078688524590165, "grad_norm": 4.3558735847473145, "learning_rate": 1.9244524858112474e-05, "loss": 1.38, "step": 4599 }, { "epoch": 15.081967213114755, "grad_norm": 6.416442394256592, "learning_rate": 1.9244119911392136e-05, "loss": 1.0762, "step": 4600 }, { "epoch": 15.085245901639345, "grad_norm": 6.369469165802002, "learning_rate": 1.924371486043473e-05, "loss": 1.3162, "step": 4601 }, { "epoch": 15.088524590163935, "grad_norm": 6.423653602600098, "learning_rate": 1.9243309705244833e-05, "loss": 1.2167, "step": 4602 }, { "epoch": 15.091803278688525, "grad_norm": 7.985208988189697, "learning_rate": 1.9242904445827006e-05, "loss": 1.4844, "step": 4603 }, { "epoch": 15.095081967213115, "grad_norm": 8.808614730834961, "learning_rate": 1.9242499082185823e-05, "loss": 1.1404, "step": 4604 }, { "epoch": 15.098360655737705, "grad_norm": 6.799317836761475, "learning_rate": 1.9242093614325852e-05, "loss": 1.3721, "step": 4605 }, { "epoch": 15.101639344262296, "grad_norm": 5.579391002655029, "learning_rate": 1.9241688042251667e-05, "loss": 1.2773, "step": 4606 }, { "epoch": 15.104918032786886, "grad_norm": 5.546252727508545, "learning_rate": 1.9241282365967838e-05, "loss": 1.2096, "step": 4607 }, { "epoch": 15.108196721311476, "grad_norm": 7.391226291656494, "learning_rate": 1.9240876585478943e-05, "loss": 1.2213, "step": 4608 }, { "epoch": 15.111475409836066, "grad_norm": 4.643617630004883, "learning_rate": 1.9240470700789556e-05, "loss": 1.4626, "step": 4609 }, { "epoch": 15.114754098360656, "grad_norm": 7.591899871826172, "learning_rate": 1.9240064711904254e-05, "loss": 1.1755, "step": 4610 }, { "epoch": 15.118032786885246, "grad_norm": 5.267609596252441, "learning_rate": 1.9239658618827617e-05, "loss": 1.2629, "step": 4611 }, { "epoch": 15.121311475409836, "grad_norm": 6.514697074890137, "learning_rate": 1.923925242156422e-05, "loss": 1.314, "step": 4612 }, { "epoch": 15.124590163934426, "grad_norm": 6.294702053070068, "learning_rate": 1.9238846120118646e-05, "loss": 1.2235, "step": 4613 }, { "epoch": 15.127868852459017, "grad_norm": 4.627302169799805, "learning_rate": 1.9238439714495478e-05, "loss": 1.2617, "step": 4614 }, { "epoch": 15.131147540983607, "grad_norm": 7.0471601486206055, "learning_rate": 1.9238033204699293e-05, "loss": 1.2676, "step": 4615 }, { "epoch": 15.134426229508197, "grad_norm": 6.054557800292969, "learning_rate": 1.9237626590734684e-05, "loss": 1.202, "step": 4616 }, { "epoch": 15.137704918032787, "grad_norm": 5.321061134338379, "learning_rate": 1.9237219872606225e-05, "loss": 1.22, "step": 4617 }, { "epoch": 15.140983606557377, "grad_norm": 4.868913650512695, "learning_rate": 1.923681305031851e-05, "loss": 1.2855, "step": 4618 }, { "epoch": 15.144262295081967, "grad_norm": 6.896109580993652, "learning_rate": 1.9236406123876126e-05, "loss": 1.0664, "step": 4619 }, { "epoch": 15.147540983606557, "grad_norm": 5.510124206542969, "learning_rate": 1.9235999093283655e-05, "loss": 1.2905, "step": 4620 }, { "epoch": 15.150819672131147, "grad_norm": 5.221634387969971, "learning_rate": 1.9235591958545693e-05, "loss": 1.1826, "step": 4621 }, { "epoch": 15.154098360655738, "grad_norm": 6.234377384185791, "learning_rate": 1.923518471966683e-05, "loss": 1.2009, "step": 4622 }, { "epoch": 15.157377049180328, "grad_norm": 6.36950159072876, "learning_rate": 1.9234777376651656e-05, "loss": 1.2859, "step": 4623 }, { "epoch": 15.160655737704918, "grad_norm": 5.8666582107543945, "learning_rate": 1.923436992950477e-05, "loss": 1.0435, "step": 4624 }, { "epoch": 15.163934426229508, "grad_norm": 5.545718193054199, "learning_rate": 1.9233962378230753e-05, "loss": 1.2822, "step": 4625 }, { "epoch": 15.167213114754098, "grad_norm": 5.0787353515625, "learning_rate": 1.9233554722834213e-05, "loss": 1.2515, "step": 4626 }, { "epoch": 15.170491803278688, "grad_norm": 5.812142848968506, "learning_rate": 1.9233146963319746e-05, "loss": 1.2349, "step": 4627 }, { "epoch": 15.173770491803278, "grad_norm": 5.870377063751221, "learning_rate": 1.9232739099691942e-05, "loss": 1.251, "step": 4628 }, { "epoch": 15.177049180327868, "grad_norm": 6.889293193817139, "learning_rate": 1.923233113195541e-05, "loss": 1.2388, "step": 4629 }, { "epoch": 15.180327868852459, "grad_norm": 4.792454719543457, "learning_rate": 1.9231923060114742e-05, "loss": 1.2371, "step": 4630 }, { "epoch": 15.183606557377049, "grad_norm": 6.380695819854736, "learning_rate": 1.9231514884174544e-05, "loss": 1.3718, "step": 4631 }, { "epoch": 15.186885245901639, "grad_norm": 7.0921244621276855, "learning_rate": 1.923110660413942e-05, "loss": 1.0051, "step": 4632 }, { "epoch": 15.190163934426229, "grad_norm": 6.267615795135498, "learning_rate": 1.923069822001397e-05, "loss": 1.4175, "step": 4633 }, { "epoch": 15.193442622950819, "grad_norm": 7.081976413726807, "learning_rate": 1.9230289731802796e-05, "loss": 1.3534, "step": 4634 }, { "epoch": 15.19672131147541, "grad_norm": 5.798069953918457, "learning_rate": 1.9229881139510512e-05, "loss": 1.5447, "step": 4635 }, { "epoch": 15.2, "grad_norm": 6.1561384201049805, "learning_rate": 1.922947244314172e-05, "loss": 1.4333, "step": 4636 }, { "epoch": 15.20327868852459, "grad_norm": 6.341854572296143, "learning_rate": 1.922906364270103e-05, "loss": 1.306, "step": 4637 }, { "epoch": 15.20655737704918, "grad_norm": 5.805028438568115, "learning_rate": 1.9228654738193047e-05, "loss": 1.1523, "step": 4638 }, { "epoch": 15.20983606557377, "grad_norm": 5.436279296875, "learning_rate": 1.9228245729622393e-05, "loss": 1.2317, "step": 4639 }, { "epoch": 15.21311475409836, "grad_norm": 5.3015666007995605, "learning_rate": 1.922783661699367e-05, "loss": 1.0798, "step": 4640 }, { "epoch": 15.216393442622952, "grad_norm": 5.730832099914551, "learning_rate": 1.9227427400311497e-05, "loss": 1.129, "step": 4641 }, { "epoch": 15.219672131147542, "grad_norm": 6.73893928527832, "learning_rate": 1.9227018079580487e-05, "loss": 1.2913, "step": 4642 }, { "epoch": 15.222950819672132, "grad_norm": 6.285943508148193, "learning_rate": 1.922660865480525e-05, "loss": 1.2764, "step": 4643 }, { "epoch": 15.226229508196722, "grad_norm": 9.359155654907227, "learning_rate": 1.922619912599041e-05, "loss": 1.2539, "step": 4644 }, { "epoch": 15.229508196721312, "grad_norm": 4.583150863647461, "learning_rate": 1.922578949314058e-05, "loss": 1.2825, "step": 4645 }, { "epoch": 15.232786885245902, "grad_norm": 6.673872947692871, "learning_rate": 1.9225379756260383e-05, "loss": 1.2539, "step": 4646 }, { "epoch": 15.236065573770492, "grad_norm": 5.107909202575684, "learning_rate": 1.922496991535444e-05, "loss": 1.2968, "step": 4647 }, { "epoch": 15.239344262295083, "grad_norm": 5.381586074829102, "learning_rate": 1.9224559970427362e-05, "loss": 1.3193, "step": 4648 }, { "epoch": 15.242622950819673, "grad_norm": 5.3795623779296875, "learning_rate": 1.9224149921483784e-05, "loss": 1.2903, "step": 4649 }, { "epoch": 15.245901639344263, "grad_norm": 6.656342506408691, "learning_rate": 1.9223739768528328e-05, "loss": 1.3911, "step": 4650 }, { "epoch": 15.249180327868853, "grad_norm": 7.002657890319824, "learning_rate": 1.922332951156561e-05, "loss": 1.2579, "step": 4651 }, { "epoch": 15.252459016393443, "grad_norm": 5.2233099937438965, "learning_rate": 1.9222919150600266e-05, "loss": 1.2422, "step": 4652 }, { "epoch": 15.255737704918033, "grad_norm": 7.229186058044434, "learning_rate": 1.9222508685636917e-05, "loss": 1.2864, "step": 4653 }, { "epoch": 15.259016393442623, "grad_norm": 5.675781726837158, "learning_rate": 1.9222098116680193e-05, "loss": 1.1482, "step": 4654 }, { "epoch": 15.262295081967213, "grad_norm": 5.647179126739502, "learning_rate": 1.9221687443734724e-05, "loss": 1.5171, "step": 4655 }, { "epoch": 15.265573770491804, "grad_norm": 6.899447917938232, "learning_rate": 1.9221276666805142e-05, "loss": 1.067, "step": 4656 }, { "epoch": 15.268852459016394, "grad_norm": 5.279336929321289, "learning_rate": 1.922086578589608e-05, "loss": 1.3082, "step": 4657 }, { "epoch": 15.272131147540984, "grad_norm": 7.9003424644470215, "learning_rate": 1.9220454801012164e-05, "loss": 1.3186, "step": 4658 }, { "epoch": 15.275409836065574, "grad_norm": 6.019838809967041, "learning_rate": 1.9220043712158038e-05, "loss": 1.1383, "step": 4659 }, { "epoch": 15.278688524590164, "grad_norm": 6.019235134124756, "learning_rate": 1.9219632519338332e-05, "loss": 1.2483, "step": 4660 }, { "epoch": 15.281967213114754, "grad_norm": 5.792607307434082, "learning_rate": 1.921922122255768e-05, "loss": 1.4512, "step": 4661 }, { "epoch": 15.285245901639344, "grad_norm": 5.707829475402832, "learning_rate": 1.9218809821820725e-05, "loss": 1.322, "step": 4662 }, { "epoch": 15.288524590163934, "grad_norm": 6.310849189758301, "learning_rate": 1.9218398317132107e-05, "loss": 1.3442, "step": 4663 }, { "epoch": 15.291803278688525, "grad_norm": 6.441150188446045, "learning_rate": 1.921798670849646e-05, "loss": 1.4877, "step": 4664 }, { "epoch": 15.295081967213115, "grad_norm": 5.426593780517578, "learning_rate": 1.9217574995918428e-05, "loss": 1.2727, "step": 4665 }, { "epoch": 15.298360655737705, "grad_norm": 7.536440849304199, "learning_rate": 1.9217163179402657e-05, "loss": 1.246, "step": 4666 }, { "epoch": 15.301639344262295, "grad_norm": 5.531909465789795, "learning_rate": 1.9216751258953786e-05, "loss": 1.29, "step": 4667 }, { "epoch": 15.304918032786885, "grad_norm": 6.913450717926025, "learning_rate": 1.921633923457646e-05, "loss": 1.2175, "step": 4668 }, { "epoch": 15.308196721311475, "grad_norm": 5.329433917999268, "learning_rate": 1.921592710627533e-05, "loss": 1.3389, "step": 4669 }, { "epoch": 15.311475409836065, "grad_norm": 5.805376052856445, "learning_rate": 1.9215514874055037e-05, "loss": 1.1758, "step": 4670 }, { "epoch": 15.314754098360655, "grad_norm": 5.740683078765869, "learning_rate": 1.921510253792023e-05, "loss": 1.4031, "step": 4671 }, { "epoch": 15.318032786885245, "grad_norm": 5.2303242683410645, "learning_rate": 1.9214690097875565e-05, "loss": 1.4423, "step": 4672 }, { "epoch": 15.321311475409836, "grad_norm": 6.0747809410095215, "learning_rate": 1.9214277553925687e-05, "loss": 1.5583, "step": 4673 }, { "epoch": 15.324590163934426, "grad_norm": 5.518111228942871, "learning_rate": 1.921386490607525e-05, "loss": 1.3396, "step": 4674 }, { "epoch": 15.327868852459016, "grad_norm": 5.624899387359619, "learning_rate": 1.9213452154328905e-05, "loss": 1.5083, "step": 4675 }, { "epoch": 15.331147540983606, "grad_norm": 5.9632368087768555, "learning_rate": 1.9213039298691306e-05, "loss": 1.3262, "step": 4676 }, { "epoch": 15.334426229508196, "grad_norm": 6.831569671630859, "learning_rate": 1.9212626339167114e-05, "loss": 1.2666, "step": 4677 }, { "epoch": 15.337704918032786, "grad_norm": 5.420403957366943, "learning_rate": 1.9212213275760976e-05, "loss": 1.3582, "step": 4678 }, { "epoch": 15.340983606557376, "grad_norm": 4.758334636688232, "learning_rate": 1.921180010847756e-05, "loss": 1.1829, "step": 4679 }, { "epoch": 15.344262295081966, "grad_norm": 6.793691635131836, "learning_rate": 1.9211386837321517e-05, "loss": 1.2544, "step": 4680 }, { "epoch": 15.347540983606557, "grad_norm": 4.799959182739258, "learning_rate": 1.921097346229751e-05, "loss": 1.1908, "step": 4681 }, { "epoch": 15.350819672131147, "grad_norm": 6.078999996185303, "learning_rate": 1.9210559983410198e-05, "loss": 1.2642, "step": 4682 }, { "epoch": 15.354098360655737, "grad_norm": 6.8179144859313965, "learning_rate": 1.921014640066425e-05, "loss": 1.064, "step": 4683 }, { "epoch": 15.357377049180329, "grad_norm": 6.334630489349365, "learning_rate": 1.9209732714064324e-05, "loss": 1.0718, "step": 4684 }, { "epoch": 15.360655737704919, "grad_norm": 5.828545570373535, "learning_rate": 1.9209318923615085e-05, "loss": 1.1696, "step": 4685 }, { "epoch": 15.363934426229509, "grad_norm": 4.698159694671631, "learning_rate": 1.92089050293212e-05, "loss": 1.3075, "step": 4686 }, { "epoch": 15.3672131147541, "grad_norm": 5.927482604980469, "learning_rate": 1.9208491031187333e-05, "loss": 1.0745, "step": 4687 }, { "epoch": 15.37049180327869, "grad_norm": 5.665998935699463, "learning_rate": 1.920807692921816e-05, "loss": 1.0972, "step": 4688 }, { "epoch": 15.37377049180328, "grad_norm": 7.188030242919922, "learning_rate": 1.9207662723418343e-05, "loss": 1.2317, "step": 4689 }, { "epoch": 15.37704918032787, "grad_norm": 9.808059692382812, "learning_rate": 1.9207248413792555e-05, "loss": 1.1389, "step": 4690 }, { "epoch": 15.38032786885246, "grad_norm": 6.289336681365967, "learning_rate": 1.9206834000345468e-05, "loss": 1.0925, "step": 4691 }, { "epoch": 15.38360655737705, "grad_norm": 6.0324273109436035, "learning_rate": 1.9206419483081757e-05, "loss": 1.005, "step": 4692 }, { "epoch": 15.38688524590164, "grad_norm": 7.347117900848389, "learning_rate": 1.9206004862006094e-05, "loss": 0.8666, "step": 4693 }, { "epoch": 15.39016393442623, "grad_norm": 6.760676383972168, "learning_rate": 1.9205590137123152e-05, "loss": 1.2905, "step": 4694 }, { "epoch": 15.39344262295082, "grad_norm": 6.041611671447754, "learning_rate": 1.9205175308437614e-05, "loss": 1.162, "step": 4695 }, { "epoch": 15.39672131147541, "grad_norm": 5.048579692840576, "learning_rate": 1.9204760375954147e-05, "loss": 1.2454, "step": 4696 }, { "epoch": 15.4, "grad_norm": 5.849298477172852, "learning_rate": 1.9204345339677442e-05, "loss": 1.3169, "step": 4697 }, { "epoch": 15.40327868852459, "grad_norm": 7.617844104766846, "learning_rate": 1.920393019961217e-05, "loss": 0.9465, "step": 4698 }, { "epoch": 15.40655737704918, "grad_norm": 6.490930557250977, "learning_rate": 1.9203514955763018e-05, "loss": 1.2922, "step": 4699 }, { "epoch": 15.40983606557377, "grad_norm": 5.484167575836182, "learning_rate": 1.9203099608134663e-05, "loss": 1.3728, "step": 4700 }, { "epoch": 15.41311475409836, "grad_norm": 6.267960071563721, "learning_rate": 1.9202684156731793e-05, "loss": 1.3431, "step": 4701 }, { "epoch": 15.416393442622951, "grad_norm": 8.23141098022461, "learning_rate": 1.920226860155909e-05, "loss": 1.2422, "step": 4702 }, { "epoch": 15.419672131147541, "grad_norm": 6.668757915496826, "learning_rate": 1.9201852942621237e-05, "loss": 1.5371, "step": 4703 }, { "epoch": 15.422950819672131, "grad_norm": 17.416933059692383, "learning_rate": 1.9201437179922927e-05, "loss": 1.1654, "step": 4704 }, { "epoch": 15.426229508196721, "grad_norm": 8.520625114440918, "learning_rate": 1.9201021313468847e-05, "loss": 1.3519, "step": 4705 }, { "epoch": 15.429508196721311, "grad_norm": 5.344023704528809, "learning_rate": 1.9200605343263684e-05, "loss": 1.2357, "step": 4706 }, { "epoch": 15.432786885245902, "grad_norm": 5.648622035980225, "learning_rate": 1.9200189269312133e-05, "loss": 1.1953, "step": 4707 }, { "epoch": 15.436065573770492, "grad_norm": 6.3206467628479, "learning_rate": 1.919977309161888e-05, "loss": 1.5535, "step": 4708 }, { "epoch": 15.439344262295082, "grad_norm": 6.034040927886963, "learning_rate": 1.9199356810188617e-05, "loss": 1.3518, "step": 4709 }, { "epoch": 15.442622950819672, "grad_norm": 6.219759941101074, "learning_rate": 1.9198940425026045e-05, "loss": 1.2278, "step": 4710 }, { "epoch": 15.445901639344262, "grad_norm": 6.493440628051758, "learning_rate": 1.9198523936135852e-05, "loss": 1.3711, "step": 4711 }, { "epoch": 15.449180327868852, "grad_norm": 5.4762983322143555, "learning_rate": 1.9198107343522737e-05, "loss": 1.0693, "step": 4712 }, { "epoch": 15.452459016393442, "grad_norm": 6.81265926361084, "learning_rate": 1.9197690647191404e-05, "loss": 1.101, "step": 4713 }, { "epoch": 15.455737704918032, "grad_norm": 7.6394500732421875, "learning_rate": 1.919727384714654e-05, "loss": 1.2302, "step": 4714 }, { "epoch": 15.459016393442623, "grad_norm": 6.1923298835754395, "learning_rate": 1.919685694339285e-05, "loss": 1.2556, "step": 4715 }, { "epoch": 15.462295081967213, "grad_norm": 5.1632537841796875, "learning_rate": 1.9196439935935043e-05, "loss": 1.5823, "step": 4716 }, { "epoch": 15.465573770491803, "grad_norm": 4.906126499176025, "learning_rate": 1.9196022824777808e-05, "loss": 1.3425, "step": 4717 }, { "epoch": 15.468852459016393, "grad_norm": 8.278353691101074, "learning_rate": 1.9195605609925857e-05, "loss": 1.2939, "step": 4718 }, { "epoch": 15.472131147540983, "grad_norm": 6.330051898956299, "learning_rate": 1.9195188291383887e-05, "loss": 1.3385, "step": 4719 }, { "epoch": 15.475409836065573, "grad_norm": 5.252561569213867, "learning_rate": 1.919477086915661e-05, "loss": 1.1812, "step": 4720 }, { "epoch": 15.478688524590163, "grad_norm": 6.495214939117432, "learning_rate": 1.9194353343248733e-05, "loss": 1.2112, "step": 4721 }, { "epoch": 15.481967213114753, "grad_norm": 8.847373962402344, "learning_rate": 1.9193935713664965e-05, "loss": 1.1851, "step": 4722 }, { "epoch": 15.485245901639344, "grad_norm": 5.426539897918701, "learning_rate": 1.919351798041001e-05, "loss": 1.1284, "step": 4723 }, { "epoch": 15.488524590163934, "grad_norm": 6.048662185668945, "learning_rate": 1.9193100143488578e-05, "loss": 1.3157, "step": 4724 }, { "epoch": 15.491803278688524, "grad_norm": 6.391346454620361, "learning_rate": 1.9192682202905385e-05, "loss": 1.283, "step": 4725 }, { "epoch": 15.495081967213114, "grad_norm": 7.148429870605469, "learning_rate": 1.9192264158665145e-05, "loss": 1.1761, "step": 4726 }, { "epoch": 15.498360655737706, "grad_norm": 5.522749423980713, "learning_rate": 1.9191846010772566e-05, "loss": 1.2197, "step": 4727 }, { "epoch": 15.501639344262294, "grad_norm": 6.281272888183594, "learning_rate": 1.9191427759232366e-05, "loss": 1.322, "step": 4728 }, { "epoch": 15.504918032786886, "grad_norm": 8.760543823242188, "learning_rate": 1.9191009404049262e-05, "loss": 1.2305, "step": 4729 }, { "epoch": 15.508196721311476, "grad_norm": 5.740420818328857, "learning_rate": 1.919059094522797e-05, "loss": 1.1222, "step": 4730 }, { "epoch": 15.511475409836066, "grad_norm": 10.900397300720215, "learning_rate": 1.9190172382773215e-05, "loss": 1.3594, "step": 4731 }, { "epoch": 15.514754098360656, "grad_norm": 7.495136260986328, "learning_rate": 1.9189753716689707e-05, "loss": 1.2201, "step": 4732 }, { "epoch": 15.518032786885247, "grad_norm": 6.1900763511657715, "learning_rate": 1.9189334946982167e-05, "loss": 1.3865, "step": 4733 }, { "epoch": 15.521311475409837, "grad_norm": 6.291758060455322, "learning_rate": 1.9188916073655324e-05, "loss": 1.1008, "step": 4734 }, { "epoch": 15.524590163934427, "grad_norm": 5.215905666351318, "learning_rate": 1.91884970967139e-05, "loss": 1.4436, "step": 4735 }, { "epoch": 15.527868852459017, "grad_norm": 5.536752700805664, "learning_rate": 1.9188078016162615e-05, "loss": 1.3325, "step": 4736 }, { "epoch": 15.531147540983607, "grad_norm": 7.671133995056152, "learning_rate": 1.91876588320062e-05, "loss": 1.1606, "step": 4737 }, { "epoch": 15.534426229508197, "grad_norm": 6.978606224060059, "learning_rate": 1.9187239544249373e-05, "loss": 1.0217, "step": 4738 }, { "epoch": 15.537704918032787, "grad_norm": 4.921175956726074, "learning_rate": 1.9186820152896876e-05, "loss": 1.2321, "step": 4739 }, { "epoch": 15.540983606557377, "grad_norm": 5.092264175415039, "learning_rate": 1.9186400657953424e-05, "loss": 1.2971, "step": 4740 }, { "epoch": 15.544262295081968, "grad_norm": 6.981807231903076, "learning_rate": 1.9185981059423756e-05, "loss": 1.2988, "step": 4741 }, { "epoch": 15.547540983606558, "grad_norm": 10.918380737304688, "learning_rate": 1.91855613573126e-05, "loss": 1.1357, "step": 4742 }, { "epoch": 15.550819672131148, "grad_norm": 5.345022678375244, "learning_rate": 1.9185141551624687e-05, "loss": 1.3584, "step": 4743 }, { "epoch": 15.554098360655738, "grad_norm": 6.965592384338379, "learning_rate": 1.9184721642364753e-05, "loss": 1.3552, "step": 4744 }, { "epoch": 15.557377049180328, "grad_norm": 5.93300199508667, "learning_rate": 1.9184301629537537e-05, "loss": 1.4089, "step": 4745 }, { "epoch": 15.560655737704918, "grad_norm": 6.751558780670166, "learning_rate": 1.9183881513147768e-05, "loss": 1.1013, "step": 4746 }, { "epoch": 15.563934426229508, "grad_norm": 6.532781600952148, "learning_rate": 1.9183461293200184e-05, "loss": 1.1887, "step": 4747 }, { "epoch": 15.567213114754098, "grad_norm": 4.45811128616333, "learning_rate": 1.9183040969699525e-05, "loss": 1.3782, "step": 4748 }, { "epoch": 15.570491803278689, "grad_norm": 5.655259132385254, "learning_rate": 1.9182620542650536e-05, "loss": 1.2971, "step": 4749 }, { "epoch": 15.573770491803279, "grad_norm": 7.080542087554932, "learning_rate": 1.918220001205795e-05, "loss": 1.252, "step": 4750 }, { "epoch": 15.577049180327869, "grad_norm": 6.476240634918213, "learning_rate": 1.9181779377926513e-05, "loss": 1.2644, "step": 4751 }, { "epoch": 15.580327868852459, "grad_norm": 6.004882335662842, "learning_rate": 1.9181358640260964e-05, "loss": 1.2842, "step": 4752 }, { "epoch": 15.583606557377049, "grad_norm": 8.321659088134766, "learning_rate": 1.9180937799066053e-05, "loss": 1.1736, "step": 4753 }, { "epoch": 15.58688524590164, "grad_norm": 7.374931812286377, "learning_rate": 1.9180516854346525e-05, "loss": 1.2258, "step": 4754 }, { "epoch": 15.59016393442623, "grad_norm": 5.5085883140563965, "learning_rate": 1.918009580610712e-05, "loss": 1.2817, "step": 4755 }, { "epoch": 15.59344262295082, "grad_norm": 5.555426597595215, "learning_rate": 1.917967465435259e-05, "loss": 1.1696, "step": 4756 }, { "epoch": 15.59672131147541, "grad_norm": 6.017915725708008, "learning_rate": 1.9179253399087684e-05, "loss": 1.4071, "step": 4757 }, { "epoch": 15.6, "grad_norm": 5.372984886169434, "learning_rate": 1.9178832040317153e-05, "loss": 1.2003, "step": 4758 }, { "epoch": 15.60327868852459, "grad_norm": 6.703983783721924, "learning_rate": 1.9178410578045746e-05, "loss": 1.2791, "step": 4759 }, { "epoch": 15.60655737704918, "grad_norm": 5.99609899520874, "learning_rate": 1.917798901227822e-05, "loss": 1.4561, "step": 4760 }, { "epoch": 15.60983606557377, "grad_norm": 6.759332180023193, "learning_rate": 1.917756734301932e-05, "loss": 1.0778, "step": 4761 }, { "epoch": 15.61311475409836, "grad_norm": 6.008084774017334, "learning_rate": 1.9177145570273808e-05, "loss": 1.3027, "step": 4762 }, { "epoch": 15.61639344262295, "grad_norm": 5.96160364151001, "learning_rate": 1.917672369404644e-05, "loss": 1.3203, "step": 4763 }, { "epoch": 15.61967213114754, "grad_norm": 6.280120372772217, "learning_rate": 1.917630171434197e-05, "loss": 1.3086, "step": 4764 }, { "epoch": 15.62295081967213, "grad_norm": 7.404764175415039, "learning_rate": 1.9175879631165154e-05, "loss": 1.2676, "step": 4765 }, { "epoch": 15.62622950819672, "grad_norm": 5.6301469802856445, "learning_rate": 1.9175457444520758e-05, "loss": 1.293, "step": 4766 }, { "epoch": 15.62950819672131, "grad_norm": 8.301543235778809, "learning_rate": 1.9175035154413538e-05, "loss": 1.1685, "step": 4767 }, { "epoch": 15.6327868852459, "grad_norm": 6.899020671844482, "learning_rate": 1.9174612760848257e-05, "loss": 1.3501, "step": 4768 }, { "epoch": 15.636065573770491, "grad_norm": 6.095473289489746, "learning_rate": 1.9174190263829678e-05, "loss": 1.3406, "step": 4769 }, { "epoch": 15.639344262295083, "grad_norm": 5.534016132354736, "learning_rate": 1.9173767663362566e-05, "loss": 1.5457, "step": 4770 }, { "epoch": 15.642622950819671, "grad_norm": 5.746725559234619, "learning_rate": 1.9173344959451684e-05, "loss": 1.334, "step": 4771 }, { "epoch": 15.645901639344263, "grad_norm": 6.391883373260498, "learning_rate": 1.9172922152101802e-05, "loss": 1.2434, "step": 4772 }, { "epoch": 15.649180327868853, "grad_norm": 5.552792072296143, "learning_rate": 1.917249924131768e-05, "loss": 1.312, "step": 4773 }, { "epoch": 15.652459016393443, "grad_norm": 5.712964057922363, "learning_rate": 1.91720762271041e-05, "loss": 1.3656, "step": 4774 }, { "epoch": 15.655737704918034, "grad_norm": 5.984529972076416, "learning_rate": 1.9171653109465818e-05, "loss": 1.3137, "step": 4775 }, { "epoch": 15.659016393442624, "grad_norm": 6.036046028137207, "learning_rate": 1.917122988840761e-05, "loss": 1.3884, "step": 4776 }, { "epoch": 15.662295081967214, "grad_norm": 7.750387191772461, "learning_rate": 1.9170806563934254e-05, "loss": 1.3601, "step": 4777 }, { "epoch": 15.665573770491804, "grad_norm": 6.63201379776001, "learning_rate": 1.9170383136050515e-05, "loss": 1.2798, "step": 4778 }, { "epoch": 15.668852459016394, "grad_norm": 6.707050323486328, "learning_rate": 1.9169959604761174e-05, "loss": 1.3999, "step": 4779 }, { "epoch": 15.672131147540984, "grad_norm": 5.284810543060303, "learning_rate": 1.9169535970071004e-05, "loss": 1.3611, "step": 4780 }, { "epoch": 15.675409836065574, "grad_norm": 6.271422386169434, "learning_rate": 1.916911223198478e-05, "loss": 1.4106, "step": 4781 }, { "epoch": 15.678688524590164, "grad_norm": 4.597606182098389, "learning_rate": 1.9168688390507283e-05, "loss": 1.2104, "step": 4782 }, { "epoch": 15.681967213114755, "grad_norm": 5.39452600479126, "learning_rate": 1.916826444564329e-05, "loss": 1.5906, "step": 4783 }, { "epoch": 15.685245901639345, "grad_norm": 5.8518805503845215, "learning_rate": 1.9167840397397585e-05, "loss": 1.2976, "step": 4784 }, { "epoch": 15.688524590163935, "grad_norm": 5.834165096282959, "learning_rate": 1.9167416245774947e-05, "loss": 1.1198, "step": 4785 }, { "epoch": 15.691803278688525, "grad_norm": 7.128704071044922, "learning_rate": 1.916699199078016e-05, "loss": 1.0403, "step": 4786 }, { "epoch": 15.695081967213115, "grad_norm": 6.092923164367676, "learning_rate": 1.9166567632418004e-05, "loss": 1.2516, "step": 4787 }, { "epoch": 15.698360655737705, "grad_norm": 7.853362560272217, "learning_rate": 1.916614317069327e-05, "loss": 1.1799, "step": 4788 }, { "epoch": 15.701639344262295, "grad_norm": 5.8685808181762695, "learning_rate": 1.916571860561074e-05, "loss": 1.3624, "step": 4789 }, { "epoch": 15.704918032786885, "grad_norm": 7.098714828491211, "learning_rate": 1.91652939371752e-05, "loss": 1.0906, "step": 4790 }, { "epoch": 15.708196721311475, "grad_norm": 11.071489334106445, "learning_rate": 1.9164869165391445e-05, "loss": 1.1638, "step": 4791 }, { "epoch": 15.711475409836066, "grad_norm": 6.411388397216797, "learning_rate": 1.9164444290264262e-05, "loss": 1.5126, "step": 4792 }, { "epoch": 15.714754098360656, "grad_norm": 6.730912685394287, "learning_rate": 1.916401931179844e-05, "loss": 1.304, "step": 4793 }, { "epoch": 15.718032786885246, "grad_norm": 6.000550270080566, "learning_rate": 1.916359422999877e-05, "loss": 1.1583, "step": 4794 }, { "epoch": 15.721311475409836, "grad_norm": 5.6875128746032715, "learning_rate": 1.916316904487005e-05, "loss": 1.2332, "step": 4795 }, { "epoch": 15.724590163934426, "grad_norm": 6.490627288818359, "learning_rate": 1.9162743756417067e-05, "loss": 1.4081, "step": 4796 }, { "epoch": 15.727868852459016, "grad_norm": 8.227499008178711, "learning_rate": 1.9162318364644625e-05, "loss": 1.2805, "step": 4797 }, { "epoch": 15.731147540983606, "grad_norm": 8.617471694946289, "learning_rate": 1.916189286955752e-05, "loss": 1.1963, "step": 4798 }, { "epoch": 15.734426229508196, "grad_norm": 5.943732738494873, "learning_rate": 1.916146727116054e-05, "loss": 1.0742, "step": 4799 }, { "epoch": 15.737704918032787, "grad_norm": 5.937369346618652, "learning_rate": 1.9161041569458496e-05, "loss": 1.1125, "step": 4800 }, { "epoch": 15.740983606557377, "grad_norm": 6.9844489097595215, "learning_rate": 1.916061576445618e-05, "loss": 1.2205, "step": 4801 }, { "epoch": 15.744262295081967, "grad_norm": 8.950047492980957, "learning_rate": 1.91601898561584e-05, "loss": 1.2405, "step": 4802 }, { "epoch": 15.747540983606557, "grad_norm": 7.0362725257873535, "learning_rate": 1.9159763844569953e-05, "loss": 1.1575, "step": 4803 }, { "epoch": 15.750819672131147, "grad_norm": 9.462850570678711, "learning_rate": 1.9159337729695647e-05, "loss": 1.4048, "step": 4804 }, { "epoch": 15.754098360655737, "grad_norm": 6.848093032836914, "learning_rate": 1.9158911511540284e-05, "loss": 1.2913, "step": 4805 }, { "epoch": 15.757377049180327, "grad_norm": 9.401313781738281, "learning_rate": 1.915848519010867e-05, "loss": 1.2788, "step": 4806 }, { "epoch": 15.760655737704917, "grad_norm": 7.225612640380859, "learning_rate": 1.9158058765405613e-05, "loss": 1.2693, "step": 4807 }, { "epoch": 15.763934426229508, "grad_norm": 6.050390243530273, "learning_rate": 1.915763223743592e-05, "loss": 1.2856, "step": 4808 }, { "epoch": 15.767213114754098, "grad_norm": 7.677514553070068, "learning_rate": 1.9157205606204405e-05, "loss": 1.1423, "step": 4809 }, { "epoch": 15.770491803278688, "grad_norm": 8.521780014038086, "learning_rate": 1.9156778871715876e-05, "loss": 1.3501, "step": 4810 }, { "epoch": 15.773770491803278, "grad_norm": 8.089235305786133, "learning_rate": 1.915635203397514e-05, "loss": 1.3979, "step": 4811 }, { "epoch": 15.777049180327868, "grad_norm": 6.266726016998291, "learning_rate": 1.915592509298702e-05, "loss": 1.2336, "step": 4812 }, { "epoch": 15.780327868852458, "grad_norm": 5.9166951179504395, "learning_rate": 1.915549804875632e-05, "loss": 1.3398, "step": 4813 }, { "epoch": 15.783606557377048, "grad_norm": 7.421832084655762, "learning_rate": 1.915507090128786e-05, "loss": 1.1689, "step": 4814 }, { "epoch": 15.78688524590164, "grad_norm": 9.38991928100586, "learning_rate": 1.9154643650586463e-05, "loss": 1.3162, "step": 4815 }, { "epoch": 15.790163934426229, "grad_norm": 6.757399082183838, "learning_rate": 1.9154216296656936e-05, "loss": 1.3643, "step": 4816 }, { "epoch": 15.79344262295082, "grad_norm": 7.627668380737305, "learning_rate": 1.91537888395041e-05, "loss": 1.4473, "step": 4817 }, { "epoch": 15.79672131147541, "grad_norm": 7.567737579345703, "learning_rate": 1.9153361279132784e-05, "loss": 1.4827, "step": 4818 }, { "epoch": 15.8, "grad_norm": 7.262096405029297, "learning_rate": 1.91529336155478e-05, "loss": 1.3337, "step": 4819 }, { "epoch": 15.80327868852459, "grad_norm": 7.179909706115723, "learning_rate": 1.9152505848753966e-05, "loss": 1.2405, "step": 4820 }, { "epoch": 15.806557377049181, "grad_norm": 7.2630486488342285, "learning_rate": 1.9152077978756118e-05, "loss": 1.0806, "step": 4821 }, { "epoch": 15.809836065573771, "grad_norm": 6.474973201751709, "learning_rate": 1.9151650005559075e-05, "loss": 1.272, "step": 4822 }, { "epoch": 15.813114754098361, "grad_norm": 6.982957363128662, "learning_rate": 1.9151221929167663e-05, "loss": 1.1268, "step": 4823 }, { "epoch": 15.816393442622951, "grad_norm": 8.312536239624023, "learning_rate": 1.915079374958671e-05, "loss": 1.231, "step": 4824 }, { "epoch": 15.819672131147541, "grad_norm": 6.17711877822876, "learning_rate": 1.915036546682104e-05, "loss": 1.3723, "step": 4825 }, { "epoch": 15.822950819672132, "grad_norm": 6.760368824005127, "learning_rate": 1.9149937080875487e-05, "loss": 1.3042, "step": 4826 }, { "epoch": 15.826229508196722, "grad_norm": 6.899716854095459, "learning_rate": 1.9149508591754878e-05, "loss": 1.3279, "step": 4827 }, { "epoch": 15.829508196721312, "grad_norm": 5.934763431549072, "learning_rate": 1.914907999946405e-05, "loss": 1.2817, "step": 4828 }, { "epoch": 15.832786885245902, "grad_norm": 5.822116374969482, "learning_rate": 1.914865130400783e-05, "loss": 1.3096, "step": 4829 }, { "epoch": 15.836065573770492, "grad_norm": 4.975119113922119, "learning_rate": 1.9148222505391055e-05, "loss": 1.3311, "step": 4830 }, { "epoch": 15.839344262295082, "grad_norm": 5.849379539489746, "learning_rate": 1.914779360361856e-05, "loss": 1.4082, "step": 4831 }, { "epoch": 15.842622950819672, "grad_norm": 5.822503566741943, "learning_rate": 1.9147364598695176e-05, "loss": 1.23, "step": 4832 }, { "epoch": 15.845901639344262, "grad_norm": 8.246581077575684, "learning_rate": 1.9146935490625753e-05, "loss": 1.4102, "step": 4833 }, { "epoch": 15.849180327868853, "grad_norm": 7.637801170349121, "learning_rate": 1.914650627941512e-05, "loss": 1.0875, "step": 4834 }, { "epoch": 15.852459016393443, "grad_norm": 7.08363676071167, "learning_rate": 1.9146076965068114e-05, "loss": 1.3083, "step": 4835 }, { "epoch": 15.855737704918033, "grad_norm": 5.0338640213012695, "learning_rate": 1.9145647547589586e-05, "loss": 1.4514, "step": 4836 }, { "epoch": 15.859016393442623, "grad_norm": 7.156954765319824, "learning_rate": 1.914521802698437e-05, "loss": 1.301, "step": 4837 }, { "epoch": 15.862295081967213, "grad_norm": 8.983406066894531, "learning_rate": 1.914478840325731e-05, "loss": 1.3435, "step": 4838 }, { "epoch": 15.865573770491803, "grad_norm": 6.590577125549316, "learning_rate": 1.914435867641326e-05, "loss": 1.1709, "step": 4839 }, { "epoch": 15.868852459016393, "grad_norm": 6.22952127456665, "learning_rate": 1.914392884645705e-05, "loss": 1.157, "step": 4840 }, { "epoch": 15.872131147540983, "grad_norm": 7.259899616241455, "learning_rate": 1.9143498913393535e-05, "loss": 1.0461, "step": 4841 }, { "epoch": 15.875409836065574, "grad_norm": 5.207620143890381, "learning_rate": 1.914306887722757e-05, "loss": 1.1777, "step": 4842 }, { "epoch": 15.878688524590164, "grad_norm": 6.625607967376709, "learning_rate": 1.9142638737963994e-05, "loss": 1.4307, "step": 4843 }, { "epoch": 15.881967213114754, "grad_norm": 6.57364559173584, "learning_rate": 1.9142208495607658e-05, "loss": 1.0605, "step": 4844 }, { "epoch": 15.885245901639344, "grad_norm": 5.323868274688721, "learning_rate": 1.9141778150163417e-05, "loss": 1.3689, "step": 4845 }, { "epoch": 15.888524590163934, "grad_norm": 5.787618637084961, "learning_rate": 1.914134770163612e-05, "loss": 0.9824, "step": 4846 }, { "epoch": 15.891803278688524, "grad_norm": 5.925220012664795, "learning_rate": 1.9140917150030628e-05, "loss": 1.252, "step": 4847 }, { "epoch": 15.895081967213114, "grad_norm": 6.763308525085449, "learning_rate": 1.914048649535179e-05, "loss": 1.3954, "step": 4848 }, { "epoch": 15.898360655737704, "grad_norm": 5.60258150100708, "learning_rate": 1.914005573760446e-05, "loss": 1.3748, "step": 4849 }, { "epoch": 15.901639344262295, "grad_norm": 5.172190189361572, "learning_rate": 1.91396248767935e-05, "loss": 1.397, "step": 4850 }, { "epoch": 15.904918032786885, "grad_norm": 7.018204212188721, "learning_rate": 1.9139193912923764e-05, "loss": 1.1942, "step": 4851 }, { "epoch": 15.908196721311475, "grad_norm": 6.804849147796631, "learning_rate": 1.913876284600012e-05, "loss": 1.3419, "step": 4852 }, { "epoch": 15.911475409836065, "grad_norm": 6.631327152252197, "learning_rate": 1.9138331676027418e-05, "loss": 1.3376, "step": 4853 }, { "epoch": 15.914754098360655, "grad_norm": 7.621772766113281, "learning_rate": 1.9137900403010526e-05, "loss": 1.3545, "step": 4854 }, { "epoch": 15.918032786885245, "grad_norm": 6.453926086425781, "learning_rate": 1.9137469026954306e-05, "loss": 1.0968, "step": 4855 }, { "epoch": 15.921311475409835, "grad_norm": 6.422159671783447, "learning_rate": 1.9137037547863625e-05, "loss": 1.3748, "step": 4856 }, { "epoch": 15.924590163934425, "grad_norm": 6.671985149383545, "learning_rate": 1.9136605965743344e-05, "loss": 1.3269, "step": 4857 }, { "epoch": 15.927868852459017, "grad_norm": 6.1454949378967285, "learning_rate": 1.9136174280598326e-05, "loss": 1.2507, "step": 4858 }, { "epoch": 15.931147540983606, "grad_norm": 6.55383825302124, "learning_rate": 1.9135742492433448e-05, "loss": 1.1215, "step": 4859 }, { "epoch": 15.934426229508198, "grad_norm": 6.944574356079102, "learning_rate": 1.9135310601253575e-05, "loss": 1.2864, "step": 4860 }, { "epoch": 15.937704918032788, "grad_norm": 6.132347106933594, "learning_rate": 1.913487860706358e-05, "loss": 1.395, "step": 4861 }, { "epoch": 15.940983606557378, "grad_norm": 4.913037300109863, "learning_rate": 1.913444650986832e-05, "loss": 1.1653, "step": 4862 }, { "epoch": 15.944262295081968, "grad_norm": 6.674773693084717, "learning_rate": 1.913401430967269e-05, "loss": 1.468, "step": 4863 }, { "epoch": 15.947540983606558, "grad_norm": 5.274499416351318, "learning_rate": 1.9133582006481547e-05, "loss": 1.2546, "step": 4864 }, { "epoch": 15.950819672131148, "grad_norm": 5.751892566680908, "learning_rate": 1.913314960029977e-05, "loss": 1.428, "step": 4865 }, { "epoch": 15.954098360655738, "grad_norm": 6.815501689910889, "learning_rate": 1.9132717091132235e-05, "loss": 1.1694, "step": 4866 }, { "epoch": 15.957377049180328, "grad_norm": 8.521073341369629, "learning_rate": 1.913228447898382e-05, "loss": 1.5874, "step": 4867 }, { "epoch": 15.960655737704919, "grad_norm": 6.315528869628906, "learning_rate": 1.9131851763859402e-05, "loss": 1.1985, "step": 4868 }, { "epoch": 15.963934426229509, "grad_norm": 5.804703712463379, "learning_rate": 1.913141894576386e-05, "loss": 1.3835, "step": 4869 }, { "epoch": 15.967213114754099, "grad_norm": 5.671017646789551, "learning_rate": 1.9130986024702075e-05, "loss": 1.2534, "step": 4870 }, { "epoch": 15.970491803278689, "grad_norm": 5.939072132110596, "learning_rate": 1.9130553000678928e-05, "loss": 1.303, "step": 4871 }, { "epoch": 15.973770491803279, "grad_norm": 5.819947242736816, "learning_rate": 1.9130119873699303e-05, "loss": 1.3452, "step": 4872 }, { "epoch": 15.97704918032787, "grad_norm": 5.3490142822265625, "learning_rate": 1.9129686643768085e-05, "loss": 1.5338, "step": 4873 }, { "epoch": 15.98032786885246, "grad_norm": 5.547935962677002, "learning_rate": 1.9129253310890156e-05, "loss": 1.3337, "step": 4874 }, { "epoch": 15.98360655737705, "grad_norm": 7.12113618850708, "learning_rate": 1.9128819875070406e-05, "loss": 1.4021, "step": 4875 }, { "epoch": 15.98688524590164, "grad_norm": 6.212071895599365, "learning_rate": 1.9128386336313717e-05, "loss": 1.2288, "step": 4876 }, { "epoch": 15.99016393442623, "grad_norm": 6.9741902351379395, "learning_rate": 1.9127952694624983e-05, "loss": 1.2493, "step": 4877 }, { "epoch": 15.99344262295082, "grad_norm": 5.5609941482543945, "learning_rate": 1.9127518950009093e-05, "loss": 1.4409, "step": 4878 }, { "epoch": 15.99672131147541, "grad_norm": 5.898237705230713, "learning_rate": 1.9127085102470933e-05, "loss": 1.2859, "step": 4879 }, { "epoch": 16.0, "grad_norm": 7.3348917961120605, "learning_rate": 1.9126651152015404e-05, "loss": 1.3501, "step": 4880 }, { "epoch": 16.003278688524592, "grad_norm": 5.456066131591797, "learning_rate": 1.9126217098647388e-05, "loss": 1.0768, "step": 4881 }, { "epoch": 16.00655737704918, "grad_norm": 6.280579566955566, "learning_rate": 1.912578294237179e-05, "loss": 1.4036, "step": 4882 }, { "epoch": 16.009836065573772, "grad_norm": 6.329850196838379, "learning_rate": 1.9125348683193498e-05, "loss": 1.1313, "step": 4883 }, { "epoch": 16.01311475409836, "grad_norm": 7.3589301109313965, "learning_rate": 1.9124914321117413e-05, "loss": 1.0972, "step": 4884 }, { "epoch": 16.016393442622952, "grad_norm": 7.119327068328857, "learning_rate": 1.912447985614843e-05, "loss": 1.208, "step": 4885 }, { "epoch": 16.01967213114754, "grad_norm": 6.4790143966674805, "learning_rate": 1.912404528829145e-05, "loss": 1.104, "step": 4886 }, { "epoch": 16.022950819672133, "grad_norm": 8.944214820861816, "learning_rate": 1.9123610617551374e-05, "loss": 1.2825, "step": 4887 }, { "epoch": 16.02622950819672, "grad_norm": 6.909128665924072, "learning_rate": 1.9123175843933103e-05, "loss": 1.1948, "step": 4888 }, { "epoch": 16.029508196721313, "grad_norm": 6.417247295379639, "learning_rate": 1.912274096744154e-05, "loss": 1.1364, "step": 4889 }, { "epoch": 16.0327868852459, "grad_norm": 11.914085388183594, "learning_rate": 1.9122305988081584e-05, "loss": 1.2621, "step": 4890 }, { "epoch": 16.036065573770493, "grad_norm": 6.822451114654541, "learning_rate": 1.912187090585814e-05, "loss": 1.2751, "step": 4891 }, { "epoch": 16.03934426229508, "grad_norm": 6.287839889526367, "learning_rate": 1.9121435720776122e-05, "loss": 1.2104, "step": 4892 }, { "epoch": 16.042622950819673, "grad_norm": 31.49563980102539, "learning_rate": 1.912100043284043e-05, "loss": 1.1663, "step": 4893 }, { "epoch": 16.04590163934426, "grad_norm": 7.380756378173828, "learning_rate": 1.9120565042055977e-05, "loss": 1.0433, "step": 4894 }, { "epoch": 16.049180327868854, "grad_norm": 31.661832809448242, "learning_rate": 1.912012954842767e-05, "loss": 1.2823, "step": 4895 }, { "epoch": 16.052459016393442, "grad_norm": 7.249711990356445, "learning_rate": 1.9119693951960417e-05, "loss": 1.1221, "step": 4896 }, { "epoch": 16.055737704918034, "grad_norm": 6.877261638641357, "learning_rate": 1.9119258252659134e-05, "loss": 1.0963, "step": 4897 }, { "epoch": 16.059016393442622, "grad_norm": 7.08061408996582, "learning_rate": 1.9118822450528735e-05, "loss": 1.1794, "step": 4898 }, { "epoch": 16.062295081967214, "grad_norm": 7.870869159698486, "learning_rate": 1.911838654557413e-05, "loss": 1.2947, "step": 4899 }, { "epoch": 16.065573770491802, "grad_norm": 5.203034400939941, "learning_rate": 1.9117950537800235e-05, "loss": 1.2288, "step": 4900 }, { "epoch": 16.068852459016394, "grad_norm": 8.54223918914795, "learning_rate": 1.9117514427211966e-05, "loss": 1.2373, "step": 4901 }, { "epoch": 16.072131147540983, "grad_norm": 6.885888576507568, "learning_rate": 1.9117078213814243e-05, "loss": 1.1389, "step": 4902 }, { "epoch": 16.075409836065575, "grad_norm": 8.788763046264648, "learning_rate": 1.9116641897611986e-05, "loss": 1.1873, "step": 4903 }, { "epoch": 16.078688524590163, "grad_norm": 7.089413166046143, "learning_rate": 1.911620547861011e-05, "loss": 1.168, "step": 4904 }, { "epoch": 16.081967213114755, "grad_norm": 12.709538459777832, "learning_rate": 1.911576895681354e-05, "loss": 1.1823, "step": 4905 }, { "epoch": 16.085245901639343, "grad_norm": 6.604151248931885, "learning_rate": 1.91153323322272e-05, "loss": 0.998, "step": 4906 }, { "epoch": 16.088524590163935, "grad_norm": 6.728546619415283, "learning_rate": 1.9114895604856004e-05, "loss": 1.3572, "step": 4907 }, { "epoch": 16.091803278688523, "grad_norm": 10.340788841247559, "learning_rate": 1.911445877470489e-05, "loss": 1.2266, "step": 4908 }, { "epoch": 16.095081967213115, "grad_norm": 6.1786789894104, "learning_rate": 1.911402184177877e-05, "loss": 1.2842, "step": 4909 }, { "epoch": 16.098360655737704, "grad_norm": 7.897781848907471, "learning_rate": 1.9113584806082583e-05, "loss": 0.8992, "step": 4910 }, { "epoch": 16.101639344262296, "grad_norm": 7.485877990722656, "learning_rate": 1.9113147667621246e-05, "loss": 1.3862, "step": 4911 }, { "epoch": 16.104918032786884, "grad_norm": 5.757923603057861, "learning_rate": 1.9112710426399697e-05, "loss": 1.1703, "step": 4912 }, { "epoch": 16.108196721311476, "grad_norm": 5.367833137512207, "learning_rate": 1.9112273082422865e-05, "loss": 1.1976, "step": 4913 }, { "epoch": 16.111475409836064, "grad_norm": 5.41797399520874, "learning_rate": 1.9111835635695674e-05, "loss": 1.0363, "step": 4914 }, { "epoch": 16.114754098360656, "grad_norm": 6.436002254486084, "learning_rate": 1.911139808622307e-05, "loss": 1.1954, "step": 4915 }, { "epoch": 16.118032786885244, "grad_norm": 7.0531005859375, "learning_rate": 1.9110960434009973e-05, "loss": 1.2223, "step": 4916 }, { "epoch": 16.121311475409836, "grad_norm": 6.448566913604736, "learning_rate": 1.9110522679061326e-05, "loss": 1.1495, "step": 4917 }, { "epoch": 16.124590163934425, "grad_norm": 5.570141792297363, "learning_rate": 1.9110084821382064e-05, "loss": 1.3862, "step": 4918 }, { "epoch": 16.127868852459017, "grad_norm": 8.451797485351562, "learning_rate": 1.9109646860977122e-05, "loss": 1.3253, "step": 4919 }, { "epoch": 16.131147540983605, "grad_norm": 6.86973762512207, "learning_rate": 1.9109208797851438e-05, "loss": 1.2268, "step": 4920 }, { "epoch": 16.134426229508197, "grad_norm": 5.095297813415527, "learning_rate": 1.9108770632009958e-05, "loss": 1.2751, "step": 4921 }, { "epoch": 16.137704918032785, "grad_norm": 8.97947883605957, "learning_rate": 1.9108332363457618e-05, "loss": 1.4119, "step": 4922 }, { "epoch": 16.140983606557377, "grad_norm": 7.736321449279785, "learning_rate": 1.910789399219936e-05, "loss": 1.2517, "step": 4923 }, { "epoch": 16.14426229508197, "grad_norm": 8.141934394836426, "learning_rate": 1.9107455518240124e-05, "loss": 1.2477, "step": 4924 }, { "epoch": 16.147540983606557, "grad_norm": 5.48477840423584, "learning_rate": 1.910701694158486e-05, "loss": 1.2161, "step": 4925 }, { "epoch": 16.15081967213115, "grad_norm": 5.724758148193359, "learning_rate": 1.9106578262238515e-05, "loss": 1.5215, "step": 4926 }, { "epoch": 16.154098360655738, "grad_norm": 6.984986305236816, "learning_rate": 1.9106139480206024e-05, "loss": 1.2839, "step": 4927 }, { "epoch": 16.15737704918033, "grad_norm": 5.8692803382873535, "learning_rate": 1.9105700595492347e-05, "loss": 1.1033, "step": 4928 }, { "epoch": 16.160655737704918, "grad_norm": 6.89318323135376, "learning_rate": 1.9105261608102427e-05, "loss": 1.1711, "step": 4929 }, { "epoch": 16.16393442622951, "grad_norm": 5.859401702880859, "learning_rate": 1.9104822518041218e-05, "loss": 1.0965, "step": 4930 }, { "epoch": 16.167213114754098, "grad_norm": 8.95275592803955, "learning_rate": 1.9104383325313662e-05, "loss": 1.0331, "step": 4931 }, { "epoch": 16.17049180327869, "grad_norm": 5.874800682067871, "learning_rate": 1.9103944029924724e-05, "loss": 1.3217, "step": 4932 }, { "epoch": 16.17377049180328, "grad_norm": 6.794712543487549, "learning_rate": 1.910350463187935e-05, "loss": 1.0536, "step": 4933 }, { "epoch": 16.17704918032787, "grad_norm": 6.033184051513672, "learning_rate": 1.9103065131182494e-05, "loss": 1.3452, "step": 4934 }, { "epoch": 16.18032786885246, "grad_norm": 5.926632404327393, "learning_rate": 1.9102625527839116e-05, "loss": 1.0728, "step": 4935 }, { "epoch": 16.18360655737705, "grad_norm": 5.111370086669922, "learning_rate": 1.910218582185417e-05, "loss": 1.0797, "step": 4936 }, { "epoch": 16.18688524590164, "grad_norm": 6.636270999908447, "learning_rate": 1.9101746013232613e-05, "loss": 1.2913, "step": 4937 }, { "epoch": 16.19016393442623, "grad_norm": 5.43906307220459, "learning_rate": 1.910130610197941e-05, "loss": 1.4049, "step": 4938 }, { "epoch": 16.19344262295082, "grad_norm": 5.239634037017822, "learning_rate": 1.9100866088099513e-05, "loss": 1.1399, "step": 4939 }, { "epoch": 16.19672131147541, "grad_norm": 9.16508960723877, "learning_rate": 1.9100425971597893e-05, "loss": 1.0779, "step": 4940 }, { "epoch": 16.2, "grad_norm": 6.645820140838623, "learning_rate": 1.9099985752479505e-05, "loss": 1.1768, "step": 4941 }, { "epoch": 16.20327868852459, "grad_norm": 5.853351593017578, "learning_rate": 1.9099545430749317e-05, "loss": 1.2667, "step": 4942 }, { "epoch": 16.20655737704918, "grad_norm": 5.010162353515625, "learning_rate": 1.9099105006412295e-05, "loss": 0.9395, "step": 4943 }, { "epoch": 16.20983606557377, "grad_norm": 8.172714233398438, "learning_rate": 1.90986644794734e-05, "loss": 1.3699, "step": 4944 }, { "epoch": 16.21311475409836, "grad_norm": 5.948418617248535, "learning_rate": 1.909822384993761e-05, "loss": 1.3171, "step": 4945 }, { "epoch": 16.21639344262295, "grad_norm": 6.182907581329346, "learning_rate": 1.9097783117809874e-05, "loss": 0.9822, "step": 4946 }, { "epoch": 16.21967213114754, "grad_norm": 6.393546104431152, "learning_rate": 1.9097342283095185e-05, "loss": 1.0459, "step": 4947 }, { "epoch": 16.222950819672132, "grad_norm": 7.215763092041016, "learning_rate": 1.9096901345798496e-05, "loss": 1.2224, "step": 4948 }, { "epoch": 16.22622950819672, "grad_norm": 5.5407867431640625, "learning_rate": 1.909646030592479e-05, "loss": 1.0304, "step": 4949 }, { "epoch": 16.229508196721312, "grad_norm": 6.766398906707764, "learning_rate": 1.9096019163479033e-05, "loss": 1.1497, "step": 4950 }, { "epoch": 16.2327868852459, "grad_norm": 15.830694198608398, "learning_rate": 1.9095577918466205e-05, "loss": 1.179, "step": 4951 }, { "epoch": 16.236065573770492, "grad_norm": 8.738137245178223, "learning_rate": 1.9095136570891277e-05, "loss": 1.3682, "step": 4952 }, { "epoch": 16.23934426229508, "grad_norm": 6.055537700653076, "learning_rate": 1.909469512075923e-05, "loss": 1.3055, "step": 4953 }, { "epoch": 16.242622950819673, "grad_norm": 5.754078388214111, "learning_rate": 1.909425356807504e-05, "loss": 1.2556, "step": 4954 }, { "epoch": 16.24590163934426, "grad_norm": 5.070801734924316, "learning_rate": 1.9093811912843684e-05, "loss": 1.1997, "step": 4955 }, { "epoch": 16.249180327868853, "grad_norm": 7.028561115264893, "learning_rate": 1.909337015507014e-05, "loss": 1.2952, "step": 4956 }, { "epoch": 16.25245901639344, "grad_norm": 6.165749549865723, "learning_rate": 1.9092928294759397e-05, "loss": 1.1082, "step": 4957 }, { "epoch": 16.255737704918033, "grad_norm": 5.021775722503662, "learning_rate": 1.9092486331916432e-05, "loss": 1.2493, "step": 4958 }, { "epoch": 16.25901639344262, "grad_norm": 7.19464635848999, "learning_rate": 1.909204426654623e-05, "loss": 1.1804, "step": 4959 }, { "epoch": 16.262295081967213, "grad_norm": 5.593145847320557, "learning_rate": 1.9091602098653776e-05, "loss": 1.4226, "step": 4960 }, { "epoch": 16.2655737704918, "grad_norm": 6.567447662353516, "learning_rate": 1.9091159828244055e-05, "loss": 1.3442, "step": 4961 }, { "epoch": 16.268852459016394, "grad_norm": 5.929178714752197, "learning_rate": 1.9090717455322052e-05, "loss": 1.2532, "step": 4962 }, { "epoch": 16.272131147540982, "grad_norm": 5.744911193847656, "learning_rate": 1.909027497989276e-05, "loss": 1.2739, "step": 4963 }, { "epoch": 16.275409836065574, "grad_norm": 7.626625061035156, "learning_rate": 1.9089832401961165e-05, "loss": 1.0945, "step": 4964 }, { "epoch": 16.278688524590162, "grad_norm": 8.22742748260498, "learning_rate": 1.908938972153226e-05, "loss": 1.1875, "step": 4965 }, { "epoch": 16.281967213114754, "grad_norm": 5.6653642654418945, "learning_rate": 1.9088946938611034e-05, "loss": 1.0696, "step": 4966 }, { "epoch": 16.285245901639342, "grad_norm": 5.640312194824219, "learning_rate": 1.9088504053202485e-05, "loss": 1.377, "step": 4967 }, { "epoch": 16.288524590163934, "grad_norm": 6.709308624267578, "learning_rate": 1.9088061065311598e-05, "loss": 1.3206, "step": 4968 }, { "epoch": 16.291803278688526, "grad_norm": 5.9107136726379395, "learning_rate": 1.9087617974943376e-05, "loss": 1.2085, "step": 4969 }, { "epoch": 16.295081967213115, "grad_norm": 5.9414801597595215, "learning_rate": 1.9087174782102815e-05, "loss": 1.1995, "step": 4970 }, { "epoch": 16.298360655737707, "grad_norm": 15.740691184997559, "learning_rate": 1.9086731486794906e-05, "loss": 1.3496, "step": 4971 }, { "epoch": 16.301639344262295, "grad_norm": 6.372772693634033, "learning_rate": 1.908628808902465e-05, "loss": 1.3403, "step": 4972 }, { "epoch": 16.304918032786887, "grad_norm": 9.507746696472168, "learning_rate": 1.9085844588797054e-05, "loss": 1.006, "step": 4973 }, { "epoch": 16.308196721311475, "grad_norm": 6.967000484466553, "learning_rate": 1.9085400986117108e-05, "loss": 1.0769, "step": 4974 }, { "epoch": 16.311475409836067, "grad_norm": 5.587075233459473, "learning_rate": 1.9084957280989824e-05, "loss": 1.2909, "step": 4975 }, { "epoch": 16.314754098360655, "grad_norm": 7.652708530426025, "learning_rate": 1.9084513473420198e-05, "loss": 1.4126, "step": 4976 }, { "epoch": 16.318032786885247, "grad_norm": 6.007599353790283, "learning_rate": 1.9084069563413236e-05, "loss": 1.291, "step": 4977 }, { "epoch": 16.321311475409836, "grad_norm": 5.854412078857422, "learning_rate": 1.908362555097395e-05, "loss": 1.0991, "step": 4978 }, { "epoch": 16.324590163934428, "grad_norm": 5.517611503601074, "learning_rate": 1.9083181436107336e-05, "loss": 1.1564, "step": 4979 }, { "epoch": 16.327868852459016, "grad_norm": 7.521396160125732, "learning_rate": 1.908273721881841e-05, "loss": 1.0298, "step": 4980 }, { "epoch": 16.331147540983608, "grad_norm": 9.712078094482422, "learning_rate": 1.9082292899112175e-05, "loss": 1.3409, "step": 4981 }, { "epoch": 16.334426229508196, "grad_norm": 7.670865535736084, "learning_rate": 1.9081848476993647e-05, "loss": 1.2546, "step": 4982 }, { "epoch": 16.337704918032788, "grad_norm": 6.9925994873046875, "learning_rate": 1.9081403952467836e-05, "loss": 1.2324, "step": 4983 }, { "epoch": 16.340983606557376, "grad_norm": 5.342063903808594, "learning_rate": 1.9080959325539746e-05, "loss": 1.365, "step": 4984 }, { "epoch": 16.34426229508197, "grad_norm": 6.220664024353027, "learning_rate": 1.9080514596214406e-05, "loss": 1.0404, "step": 4985 }, { "epoch": 16.347540983606557, "grad_norm": 5.465656757354736, "learning_rate": 1.908006976449682e-05, "loss": 1.0728, "step": 4986 }, { "epoch": 16.35081967213115, "grad_norm": 5.688230514526367, "learning_rate": 1.9079624830392004e-05, "loss": 1.1699, "step": 4987 }, { "epoch": 16.354098360655737, "grad_norm": 5.8002095222473145, "learning_rate": 1.9079179793904982e-05, "loss": 1.1891, "step": 4988 }, { "epoch": 16.35737704918033, "grad_norm": 5.9525980949401855, "learning_rate": 1.9078734655040763e-05, "loss": 1.1879, "step": 4989 }, { "epoch": 16.360655737704917, "grad_norm": 6.631590843200684, "learning_rate": 1.9078289413804373e-05, "loss": 1.2034, "step": 4990 }, { "epoch": 16.36393442622951, "grad_norm": 5.691678047180176, "learning_rate": 1.907784407020083e-05, "loss": 1.109, "step": 4991 }, { "epoch": 16.367213114754097, "grad_norm": 5.278566837310791, "learning_rate": 1.907739862423516e-05, "loss": 0.9195, "step": 4992 }, { "epoch": 16.37049180327869, "grad_norm": 5.892734050750732, "learning_rate": 1.907695307591238e-05, "loss": 1.2954, "step": 4993 }, { "epoch": 16.373770491803278, "grad_norm": 5.003035545349121, "learning_rate": 1.9076507425237518e-05, "loss": 1.0974, "step": 4994 }, { "epoch": 16.37704918032787, "grad_norm": 6.471787929534912, "learning_rate": 1.9076061672215595e-05, "loss": 1.1863, "step": 4995 }, { "epoch": 16.380327868852458, "grad_norm": 6.399634838104248, "learning_rate": 1.907561581685164e-05, "loss": 1.2671, "step": 4996 }, { "epoch": 16.38360655737705, "grad_norm": 6.653284072875977, "learning_rate": 1.907516985915068e-05, "loss": 1.123, "step": 4997 }, { "epoch": 16.386885245901638, "grad_norm": 5.735847473144531, "learning_rate": 1.907472379911775e-05, "loss": 1.1874, "step": 4998 }, { "epoch": 16.39016393442623, "grad_norm": 4.793530464172363, "learning_rate": 1.9074277636757867e-05, "loss": 1.3503, "step": 4999 }, { "epoch": 16.39344262295082, "grad_norm": 4.94078254699707, "learning_rate": 1.907383137207607e-05, "loss": 1.2698, "step": 5000 }, { "epoch": 16.39672131147541, "grad_norm": 5.812944412231445, "learning_rate": 1.907338500507739e-05, "loss": 1.2366, "step": 5001 }, { "epoch": 16.4, "grad_norm": 6.301115989685059, "learning_rate": 1.9072938535766864e-05, "loss": 1.3615, "step": 5002 }, { "epoch": 16.40327868852459, "grad_norm": 5.737308502197266, "learning_rate": 1.907249196414952e-05, "loss": 1.2275, "step": 5003 }, { "epoch": 16.40655737704918, "grad_norm": 5.729306697845459, "learning_rate": 1.9072045290230394e-05, "loss": 1.125, "step": 5004 }, { "epoch": 16.40983606557377, "grad_norm": 6.342235565185547, "learning_rate": 1.9071598514014527e-05, "loss": 1.121, "step": 5005 }, { "epoch": 16.41311475409836, "grad_norm": 5.464046955108643, "learning_rate": 1.9071151635506954e-05, "loss": 1.5103, "step": 5006 }, { "epoch": 16.41639344262295, "grad_norm": 4.880171775817871, "learning_rate": 1.9070704654712715e-05, "loss": 1.3481, "step": 5007 }, { "epoch": 16.41967213114754, "grad_norm": 5.581352710723877, "learning_rate": 1.9070257571636852e-05, "loss": 1.2108, "step": 5008 }, { "epoch": 16.42295081967213, "grad_norm": 5.56561279296875, "learning_rate": 1.90698103862844e-05, "loss": 1.0685, "step": 5009 }, { "epoch": 16.42622950819672, "grad_norm": 5.8953633308410645, "learning_rate": 1.906936309866041e-05, "loss": 1.0637, "step": 5010 }, { "epoch": 16.42950819672131, "grad_norm": 5.247613906860352, "learning_rate": 1.9068915708769917e-05, "loss": 1.3453, "step": 5011 }, { "epoch": 16.432786885245903, "grad_norm": 5.19228982925415, "learning_rate": 1.906846821661797e-05, "loss": 1.0157, "step": 5012 }, { "epoch": 16.43606557377049, "grad_norm": 6.0259246826171875, "learning_rate": 1.9068020622209616e-05, "loss": 1.153, "step": 5013 }, { "epoch": 16.439344262295084, "grad_norm": 6.020227432250977, "learning_rate": 1.9067572925549903e-05, "loss": 1.2483, "step": 5014 }, { "epoch": 16.442622950819672, "grad_norm": 8.251675605773926, "learning_rate": 1.9067125126643878e-05, "loss": 1.196, "step": 5015 }, { "epoch": 16.445901639344264, "grad_norm": 6.190026760101318, "learning_rate": 1.9066677225496588e-05, "loss": 1.1141, "step": 5016 }, { "epoch": 16.449180327868852, "grad_norm": 5.037295341491699, "learning_rate": 1.9066229222113086e-05, "loss": 1.1174, "step": 5017 }, { "epoch": 16.452459016393444, "grad_norm": 5.091028690338135, "learning_rate": 1.9065781116498422e-05, "loss": 1.0912, "step": 5018 }, { "epoch": 16.455737704918032, "grad_norm": 5.93966817855835, "learning_rate": 1.906533290865765e-05, "loss": 1.2305, "step": 5019 }, { "epoch": 16.459016393442624, "grad_norm": 6.737738132476807, "learning_rate": 1.9064884598595826e-05, "loss": 1.0918, "step": 5020 }, { "epoch": 16.462295081967213, "grad_norm": 4.568091869354248, "learning_rate": 1.9064436186318e-05, "loss": 1.1832, "step": 5021 }, { "epoch": 16.465573770491805, "grad_norm": 7.222108840942383, "learning_rate": 1.9063987671829234e-05, "loss": 1.2097, "step": 5022 }, { "epoch": 16.468852459016393, "grad_norm": 5.368531227111816, "learning_rate": 1.906353905513458e-05, "loss": 1.0935, "step": 5023 }, { "epoch": 16.472131147540985, "grad_norm": 4.777165412902832, "learning_rate": 1.90630903362391e-05, "loss": 1.1401, "step": 5024 }, { "epoch": 16.475409836065573, "grad_norm": 4.874639511108398, "learning_rate": 1.9062641515147856e-05, "loss": 1.1765, "step": 5025 }, { "epoch": 16.478688524590165, "grad_norm": 5.936769008636475, "learning_rate": 1.90621925918659e-05, "loss": 1.1265, "step": 5026 }, { "epoch": 16.481967213114753, "grad_norm": 5.4167280197143555, "learning_rate": 1.9061743566398306e-05, "loss": 1.3336, "step": 5027 }, { "epoch": 16.485245901639345, "grad_norm": 7.817202091217041, "learning_rate": 1.906129443875013e-05, "loss": 1.0568, "step": 5028 }, { "epoch": 16.488524590163934, "grad_norm": 6.841742992401123, "learning_rate": 1.9060845208926438e-05, "loss": 1.0504, "step": 5029 }, { "epoch": 16.491803278688526, "grad_norm": 7.363117218017578, "learning_rate": 1.9060395876932296e-05, "loss": 1.21, "step": 5030 }, { "epoch": 16.495081967213114, "grad_norm": 6.214908599853516, "learning_rate": 1.9059946442772768e-05, "loss": 1.2695, "step": 5031 }, { "epoch": 16.498360655737706, "grad_norm": 6.788349628448486, "learning_rate": 1.9059496906452927e-05, "loss": 1.1677, "step": 5032 }, { "epoch": 16.501639344262294, "grad_norm": 6.105998992919922, "learning_rate": 1.9059047267977836e-05, "loss": 1.2186, "step": 5033 }, { "epoch": 16.504918032786886, "grad_norm": 7.111069202423096, "learning_rate": 1.9058597527352568e-05, "loss": 1.345, "step": 5034 }, { "epoch": 16.508196721311474, "grad_norm": 5.190918445587158, "learning_rate": 1.9058147684582194e-05, "loss": 1.4146, "step": 5035 }, { "epoch": 16.511475409836066, "grad_norm": 5.448409557342529, "learning_rate": 1.9057697739671786e-05, "loss": 1.1296, "step": 5036 }, { "epoch": 16.514754098360655, "grad_norm": 8.388707160949707, "learning_rate": 1.9057247692626423e-05, "loss": 1.0623, "step": 5037 }, { "epoch": 16.518032786885247, "grad_norm": 6.008148670196533, "learning_rate": 1.9056797543451174e-05, "loss": 1.342, "step": 5038 }, { "epoch": 16.521311475409835, "grad_norm": 8.575223922729492, "learning_rate": 1.905634729215111e-05, "loss": 1.2224, "step": 5039 }, { "epoch": 16.524590163934427, "grad_norm": 8.00417709350586, "learning_rate": 1.905589693873132e-05, "loss": 1.0585, "step": 5040 }, { "epoch": 16.527868852459015, "grad_norm": 5.423209190368652, "learning_rate": 1.9055446483196877e-05, "loss": 1.353, "step": 5041 }, { "epoch": 16.531147540983607, "grad_norm": 9.379209518432617, "learning_rate": 1.9054995925552857e-05, "loss": 1.1597, "step": 5042 }, { "epoch": 16.534426229508195, "grad_norm": 5.796346187591553, "learning_rate": 1.905454526580434e-05, "loss": 1.2823, "step": 5043 }, { "epoch": 16.537704918032787, "grad_norm": 5.7021026611328125, "learning_rate": 1.9054094503956416e-05, "loss": 1.2852, "step": 5044 }, { "epoch": 16.540983606557376, "grad_norm": 7.676151275634766, "learning_rate": 1.9053643640014162e-05, "loss": 1.1973, "step": 5045 }, { "epoch": 16.544262295081968, "grad_norm": 6.231141090393066, "learning_rate": 1.905319267398266e-05, "loss": 1.0955, "step": 5046 }, { "epoch": 16.547540983606556, "grad_norm": 6.75424337387085, "learning_rate": 1.9052741605867e-05, "loss": 1.0768, "step": 5047 }, { "epoch": 16.550819672131148, "grad_norm": 6.611103057861328, "learning_rate": 1.9052290435672264e-05, "loss": 1.2581, "step": 5048 }, { "epoch": 16.554098360655736, "grad_norm": 6.980668544769287, "learning_rate": 1.905183916340354e-05, "loss": 1.1429, "step": 5049 }, { "epoch": 16.557377049180328, "grad_norm": 6.542794227600098, "learning_rate": 1.9051387789065922e-05, "loss": 0.9581, "step": 5050 }, { "epoch": 16.560655737704916, "grad_norm": 7.17833948135376, "learning_rate": 1.9050936312664492e-05, "loss": 1.2736, "step": 5051 }, { "epoch": 16.56393442622951, "grad_norm": 7.3277974128723145, "learning_rate": 1.9050484734204347e-05, "loss": 1.2131, "step": 5052 }, { "epoch": 16.567213114754097, "grad_norm": 5.5325775146484375, "learning_rate": 1.9050033053690575e-05, "loss": 1.2673, "step": 5053 }, { "epoch": 16.57049180327869, "grad_norm": 5.456267833709717, "learning_rate": 1.904958127112827e-05, "loss": 1.155, "step": 5054 }, { "epoch": 16.57377049180328, "grad_norm": 5.713072776794434, "learning_rate": 1.904912938652253e-05, "loss": 1.2478, "step": 5055 }, { "epoch": 16.57704918032787, "grad_norm": 5.693882465362549, "learning_rate": 1.9048677399878444e-05, "loss": 1.1814, "step": 5056 }, { "epoch": 16.58032786885246, "grad_norm": 5.228651523590088, "learning_rate": 1.9048225311201114e-05, "loss": 1.2048, "step": 5057 }, { "epoch": 16.58360655737705, "grad_norm": 7.02666711807251, "learning_rate": 1.9047773120495634e-05, "loss": 0.9547, "step": 5058 }, { "epoch": 16.58688524590164, "grad_norm": 5.8427414894104, "learning_rate": 1.9047320827767106e-05, "loss": 1.2878, "step": 5059 }, { "epoch": 16.59016393442623, "grad_norm": 8.302895545959473, "learning_rate": 1.904686843302063e-05, "loss": 1.1614, "step": 5060 }, { "epoch": 16.59344262295082, "grad_norm": 6.714869499206543, "learning_rate": 1.9046415936261304e-05, "loss": 1.2761, "step": 5061 }, { "epoch": 16.59672131147541, "grad_norm": 7.33555793762207, "learning_rate": 1.9045963337494232e-05, "loss": 1.2639, "step": 5062 }, { "epoch": 16.6, "grad_norm": 5.954897880554199, "learning_rate": 1.904551063672452e-05, "loss": 1.0916, "step": 5063 }, { "epoch": 16.60327868852459, "grad_norm": 6.922745227813721, "learning_rate": 1.904505783395727e-05, "loss": 1.2386, "step": 5064 }, { "epoch": 16.60655737704918, "grad_norm": 8.968873977661133, "learning_rate": 1.9044604929197588e-05, "loss": 1.1716, "step": 5065 }, { "epoch": 16.60983606557377, "grad_norm": 6.337965488433838, "learning_rate": 1.904415192245058e-05, "loss": 1.4333, "step": 5066 }, { "epoch": 16.613114754098362, "grad_norm": 5.367619037628174, "learning_rate": 1.9043698813721357e-05, "loss": 1.1047, "step": 5067 }, { "epoch": 16.61639344262295, "grad_norm": 5.900163173675537, "learning_rate": 1.904324560301503e-05, "loss": 1.1221, "step": 5068 }, { "epoch": 16.619672131147542, "grad_norm": 5.87581205368042, "learning_rate": 1.9042792290336702e-05, "loss": 1.2913, "step": 5069 }, { "epoch": 16.62295081967213, "grad_norm": 6.947291851043701, "learning_rate": 1.904233887569149e-05, "loss": 1.4058, "step": 5070 }, { "epoch": 16.626229508196722, "grad_norm": 6.311145305633545, "learning_rate": 1.9041885359084507e-05, "loss": 1.231, "step": 5071 }, { "epoch": 16.62950819672131, "grad_norm": 6.453390121459961, "learning_rate": 1.904143174052086e-05, "loss": 1.3169, "step": 5072 }, { "epoch": 16.632786885245903, "grad_norm": 7.0491557121276855, "learning_rate": 1.9040978020005677e-05, "loss": 0.9895, "step": 5073 }, { "epoch": 16.63606557377049, "grad_norm": 6.944518566131592, "learning_rate": 1.9040524197544062e-05, "loss": 1.3506, "step": 5074 }, { "epoch": 16.639344262295083, "grad_norm": 7.601922988891602, "learning_rate": 1.9040070273141138e-05, "loss": 1.324, "step": 5075 }, { "epoch": 16.64262295081967, "grad_norm": 5.640069007873535, "learning_rate": 1.9039616246802023e-05, "loss": 0.8741, "step": 5076 }, { "epoch": 16.645901639344263, "grad_norm": 5.294717788696289, "learning_rate": 1.9039162118531834e-05, "loss": 1.4407, "step": 5077 }, { "epoch": 16.64918032786885, "grad_norm": 9.797799110412598, "learning_rate": 1.9038707888335694e-05, "loss": 1.2153, "step": 5078 }, { "epoch": 16.652459016393443, "grad_norm": 6.299435615539551, "learning_rate": 1.9038253556218725e-05, "loss": 1.1301, "step": 5079 }, { "epoch": 16.65573770491803, "grad_norm": 5.975564956665039, "learning_rate": 1.903779912218605e-05, "loss": 1.2963, "step": 5080 }, { "epoch": 16.659016393442624, "grad_norm": 8.68079662322998, "learning_rate": 1.903734458624279e-05, "loss": 1.1729, "step": 5081 }, { "epoch": 16.662295081967212, "grad_norm": 7.071132183074951, "learning_rate": 1.9036889948394077e-05, "loss": 1.0883, "step": 5082 }, { "epoch": 16.665573770491804, "grad_norm": 6.16436767578125, "learning_rate": 1.9036435208645034e-05, "loss": 1.2341, "step": 5083 }, { "epoch": 16.668852459016392, "grad_norm": 5.404183387756348, "learning_rate": 1.903598036700079e-05, "loss": 1.5583, "step": 5084 }, { "epoch": 16.672131147540984, "grad_norm": 6.764447212219238, "learning_rate": 1.9035525423466468e-05, "loss": 1.2437, "step": 5085 }, { "epoch": 16.675409836065572, "grad_norm": 5.449801445007324, "learning_rate": 1.9035070378047204e-05, "loss": 1.2339, "step": 5086 }, { "epoch": 16.678688524590164, "grad_norm": 5.563921928405762, "learning_rate": 1.903461523074813e-05, "loss": 1.219, "step": 5087 }, { "epoch": 16.681967213114753, "grad_norm": 5.863699436187744, "learning_rate": 1.9034159981574372e-05, "loss": 1.1582, "step": 5088 }, { "epoch": 16.685245901639345, "grad_norm": 5.6079936027526855, "learning_rate": 1.9033704630531066e-05, "loss": 1.2196, "step": 5089 }, { "epoch": 16.688524590163933, "grad_norm": 5.480492115020752, "learning_rate": 1.903324917762335e-05, "loss": 1.2781, "step": 5090 }, { "epoch": 16.691803278688525, "grad_norm": 6.030633449554443, "learning_rate": 1.903279362285636e-05, "loss": 1.2964, "step": 5091 }, { "epoch": 16.695081967213113, "grad_norm": 7.382988929748535, "learning_rate": 1.9032337966235225e-05, "loss": 1.4612, "step": 5092 }, { "epoch": 16.698360655737705, "grad_norm": 5.854630470275879, "learning_rate": 1.903188220776509e-05, "loss": 1.2727, "step": 5093 }, { "epoch": 16.701639344262293, "grad_norm": 5.975923538208008, "learning_rate": 1.9031426347451095e-05, "loss": 0.9609, "step": 5094 }, { "epoch": 16.704918032786885, "grad_norm": 5.756021499633789, "learning_rate": 1.9030970385298377e-05, "loss": 1.1853, "step": 5095 }, { "epoch": 16.708196721311474, "grad_norm": 6.4064621925354, "learning_rate": 1.9030514321312074e-05, "loss": 1.0566, "step": 5096 }, { "epoch": 16.711475409836066, "grad_norm": 7.0973639488220215, "learning_rate": 1.9030058155497334e-05, "loss": 1.209, "step": 5097 }, { "epoch": 16.714754098360658, "grad_norm": 6.366944789886475, "learning_rate": 1.90296018878593e-05, "loss": 1.3127, "step": 5098 }, { "epoch": 16.718032786885246, "grad_norm": 6.566442966461182, "learning_rate": 1.9029145518403117e-05, "loss": 1.066, "step": 5099 }, { "epoch": 16.721311475409838, "grad_norm": 5.466251850128174, "learning_rate": 1.902868904713393e-05, "loss": 1.1316, "step": 5100 }, { "epoch": 16.724590163934426, "grad_norm": 6.488228797912598, "learning_rate": 1.9028232474056886e-05, "loss": 1.2462, "step": 5101 }, { "epoch": 16.727868852459018, "grad_norm": 7.063770771026611, "learning_rate": 1.9027775799177133e-05, "loss": 1.4009, "step": 5102 }, { "epoch": 16.731147540983606, "grad_norm": 5.366828918457031, "learning_rate": 1.9027319022499823e-05, "loss": 1.259, "step": 5103 }, { "epoch": 16.7344262295082, "grad_norm": 5.611567974090576, "learning_rate": 1.9026862144030108e-05, "loss": 1.1104, "step": 5104 }, { "epoch": 16.737704918032787, "grad_norm": 6.094473838806152, "learning_rate": 1.902640516377313e-05, "loss": 1.3035, "step": 5105 }, { "epoch": 16.74098360655738, "grad_norm": 5.654389381408691, "learning_rate": 1.9025948081734053e-05, "loss": 1.4143, "step": 5106 }, { "epoch": 16.744262295081967, "grad_norm": 5.379783630371094, "learning_rate": 1.9025490897918027e-05, "loss": 1.2671, "step": 5107 }, { "epoch": 16.74754098360656, "grad_norm": 5.081586837768555, "learning_rate": 1.9025033612330203e-05, "loss": 1.3865, "step": 5108 }, { "epoch": 16.750819672131147, "grad_norm": 6.941141128540039, "learning_rate": 1.9024576224975747e-05, "loss": 1.1624, "step": 5109 }, { "epoch": 16.75409836065574, "grad_norm": 8.125192642211914, "learning_rate": 1.9024118735859807e-05, "loss": 1.1477, "step": 5110 }, { "epoch": 16.757377049180327, "grad_norm": 6.33914041519165, "learning_rate": 1.9023661144987547e-05, "loss": 1.325, "step": 5111 }, { "epoch": 16.76065573770492, "grad_norm": 6.83723258972168, "learning_rate": 1.9023203452364125e-05, "loss": 1.446, "step": 5112 }, { "epoch": 16.763934426229508, "grad_norm": 6.2849321365356445, "learning_rate": 1.9022745657994702e-05, "loss": 0.9897, "step": 5113 }, { "epoch": 16.7672131147541, "grad_norm": 5.22208309173584, "learning_rate": 1.902228776188444e-05, "loss": 1.1665, "step": 5114 }, { "epoch": 16.770491803278688, "grad_norm": 5.80234432220459, "learning_rate": 1.90218297640385e-05, "loss": 1.2292, "step": 5115 }, { "epoch": 16.77377049180328, "grad_norm": 6.352644920349121, "learning_rate": 1.9021371664462058e-05, "loss": 1.2026, "step": 5116 }, { "epoch": 16.777049180327868, "grad_norm": 6.864444732666016, "learning_rate": 1.9020913463160263e-05, "loss": 1.1448, "step": 5117 }, { "epoch": 16.78032786885246, "grad_norm": 6.525245666503906, "learning_rate": 1.902045516013829e-05, "loss": 1.1803, "step": 5118 }, { "epoch": 16.78360655737705, "grad_norm": 5.256924629211426, "learning_rate": 1.9019996755401306e-05, "loss": 1.1531, "step": 5119 }, { "epoch": 16.78688524590164, "grad_norm": 5.80098819732666, "learning_rate": 1.9019538248954483e-05, "loss": 0.9335, "step": 5120 }, { "epoch": 16.79016393442623, "grad_norm": 5.776705741882324, "learning_rate": 1.9019079640802988e-05, "loss": 1.4067, "step": 5121 }, { "epoch": 16.79344262295082, "grad_norm": 5.718236923217773, "learning_rate": 1.901862093095199e-05, "loss": 1.2396, "step": 5122 }, { "epoch": 16.79672131147541, "grad_norm": 5.070932865142822, "learning_rate": 1.9018162119406668e-05, "loss": 1.3042, "step": 5123 }, { "epoch": 16.8, "grad_norm": 4.761099338531494, "learning_rate": 1.9017703206172187e-05, "loss": 1.2253, "step": 5124 }, { "epoch": 16.80327868852459, "grad_norm": 6.4628167152404785, "learning_rate": 1.9017244191253726e-05, "loss": 1.1892, "step": 5125 }, { "epoch": 16.80655737704918, "grad_norm": 6.113093376159668, "learning_rate": 1.9016785074656464e-05, "loss": 1.0448, "step": 5126 }, { "epoch": 16.80983606557377, "grad_norm": 5.876452922821045, "learning_rate": 1.9016325856385575e-05, "loss": 1.1216, "step": 5127 }, { "epoch": 16.81311475409836, "grad_norm": 6.283257961273193, "learning_rate": 1.9015866536446236e-05, "loss": 1.4355, "step": 5128 }, { "epoch": 16.81639344262295, "grad_norm": 5.353209018707275, "learning_rate": 1.901540711484363e-05, "loss": 1.4277, "step": 5129 }, { "epoch": 16.81967213114754, "grad_norm": 5.191181659698486, "learning_rate": 1.9014947591582933e-05, "loss": 1.2179, "step": 5130 }, { "epoch": 16.82295081967213, "grad_norm": 5.249701499938965, "learning_rate": 1.9014487966669328e-05, "loss": 1.2571, "step": 5131 }, { "epoch": 16.82622950819672, "grad_norm": 4.979970932006836, "learning_rate": 1.9014028240108e-05, "loss": 1.2554, "step": 5132 }, { "epoch": 16.82950819672131, "grad_norm": 5.393504619598389, "learning_rate": 1.9013568411904133e-05, "loss": 1.1765, "step": 5133 }, { "epoch": 16.832786885245902, "grad_norm": 5.542590618133545, "learning_rate": 1.9013108482062908e-05, "loss": 1.4233, "step": 5134 }, { "epoch": 16.83606557377049, "grad_norm": 6.041392803192139, "learning_rate": 1.9012648450589513e-05, "loss": 1.2183, "step": 5135 }, { "epoch": 16.839344262295082, "grad_norm": 4.9416656494140625, "learning_rate": 1.9012188317489136e-05, "loss": 1.2467, "step": 5136 }, { "epoch": 16.84262295081967, "grad_norm": 6.353060722351074, "learning_rate": 1.9011728082766967e-05, "loss": 1.2717, "step": 5137 }, { "epoch": 16.845901639344262, "grad_norm": 5.605661869049072, "learning_rate": 1.9011267746428193e-05, "loss": 1.0576, "step": 5138 }, { "epoch": 16.84918032786885, "grad_norm": 5.829712867736816, "learning_rate": 1.901080730847801e-05, "loss": 1.1917, "step": 5139 }, { "epoch": 16.852459016393443, "grad_norm": 5.783937454223633, "learning_rate": 1.90103467689216e-05, "loss": 1.3018, "step": 5140 }, { "epoch": 16.855737704918035, "grad_norm": 6.142447471618652, "learning_rate": 1.9009886127764164e-05, "loss": 1.2375, "step": 5141 }, { "epoch": 16.859016393442623, "grad_norm": 5.316588401794434, "learning_rate": 1.9009425385010894e-05, "loss": 1.413, "step": 5142 }, { "epoch": 16.862295081967215, "grad_norm": 4.9007487297058105, "learning_rate": 1.9008964540666984e-05, "loss": 1.3242, "step": 5143 }, { "epoch": 16.865573770491803, "grad_norm": 6.416469097137451, "learning_rate": 1.9008503594737632e-05, "loss": 1.2429, "step": 5144 }, { "epoch": 16.868852459016395, "grad_norm": 5.496776103973389, "learning_rate": 1.9008042547228038e-05, "loss": 1.3313, "step": 5145 }, { "epoch": 16.872131147540983, "grad_norm": 5.243940830230713, "learning_rate": 1.9007581398143394e-05, "loss": 1.0438, "step": 5146 }, { "epoch": 16.875409836065575, "grad_norm": 5.46386194229126, "learning_rate": 1.9007120147488908e-05, "loss": 1.252, "step": 5147 }, { "epoch": 16.878688524590164, "grad_norm": 7.55790901184082, "learning_rate": 1.9006658795269775e-05, "loss": 0.9559, "step": 5148 }, { "epoch": 16.881967213114756, "grad_norm": 5.48533821105957, "learning_rate": 1.9006197341491202e-05, "loss": 1.1318, "step": 5149 }, { "epoch": 16.885245901639344, "grad_norm": 5.773113250732422, "learning_rate": 1.9005735786158386e-05, "loss": 1.1296, "step": 5150 }, { "epoch": 16.888524590163936, "grad_norm": 5.3519415855407715, "learning_rate": 1.9005274129276538e-05, "loss": 1.2629, "step": 5151 }, { "epoch": 16.891803278688524, "grad_norm": 5.567676544189453, "learning_rate": 1.900481237085086e-05, "loss": 1.3259, "step": 5152 }, { "epoch": 16.895081967213116, "grad_norm": 6.422274112701416, "learning_rate": 1.9004350510886564e-05, "loss": 1.1448, "step": 5153 }, { "epoch": 16.898360655737704, "grad_norm": 7.158276557922363, "learning_rate": 1.900388854938885e-05, "loss": 1.2478, "step": 5154 }, { "epoch": 16.901639344262296, "grad_norm": 5.490501880645752, "learning_rate": 1.900342648636293e-05, "loss": 1.1528, "step": 5155 }, { "epoch": 16.904918032786885, "grad_norm": 5.3069233894348145, "learning_rate": 1.9002964321814016e-05, "loss": 1.3704, "step": 5156 }, { "epoch": 16.908196721311477, "grad_norm": 5.040757179260254, "learning_rate": 1.900250205574732e-05, "loss": 1.3093, "step": 5157 }, { "epoch": 16.911475409836065, "grad_norm": 4.961241245269775, "learning_rate": 1.900203968816805e-05, "loss": 1.24, "step": 5158 }, { "epoch": 16.914754098360657, "grad_norm": 4.619396209716797, "learning_rate": 1.9001577219081428e-05, "loss": 1.3353, "step": 5159 }, { "epoch": 16.918032786885245, "grad_norm": 5.920142650604248, "learning_rate": 1.900111464849266e-05, "loss": 1.2349, "step": 5160 }, { "epoch": 16.921311475409837, "grad_norm": 7.028720378875732, "learning_rate": 1.9000651976406966e-05, "loss": 1.1732, "step": 5161 }, { "epoch": 16.924590163934425, "grad_norm": 7.521356582641602, "learning_rate": 1.9000189202829562e-05, "loss": 1.3203, "step": 5162 }, { "epoch": 16.927868852459017, "grad_norm": 5.836017608642578, "learning_rate": 1.899972632776567e-05, "loss": 1.3906, "step": 5163 }, { "epoch": 16.931147540983606, "grad_norm": 6.78883171081543, "learning_rate": 1.89992633512205e-05, "loss": 1.3479, "step": 5164 }, { "epoch": 16.934426229508198, "grad_norm": 6.212003231048584, "learning_rate": 1.899880027319928e-05, "loss": 0.9565, "step": 5165 }, { "epoch": 16.937704918032786, "grad_norm": 5.389747142791748, "learning_rate": 1.8998337093707237e-05, "loss": 1.3302, "step": 5166 }, { "epoch": 16.940983606557378, "grad_norm": 6.332284450531006, "learning_rate": 1.8997873812749583e-05, "loss": 1.1841, "step": 5167 }, { "epoch": 16.944262295081966, "grad_norm": 5.270400047302246, "learning_rate": 1.8997410430331548e-05, "loss": 1.0658, "step": 5168 }, { "epoch": 16.947540983606558, "grad_norm": 4.7964630126953125, "learning_rate": 1.8996946946458356e-05, "loss": 1.1583, "step": 5169 }, { "epoch": 16.950819672131146, "grad_norm": 5.244201183319092, "learning_rate": 1.8996483361135228e-05, "loss": 1.2483, "step": 5170 }, { "epoch": 16.95409836065574, "grad_norm": 5.942656993865967, "learning_rate": 1.89960196743674e-05, "loss": 1.2294, "step": 5171 }, { "epoch": 16.957377049180327, "grad_norm": 7.724979400634766, "learning_rate": 1.8995555886160094e-05, "loss": 1.1071, "step": 5172 }, { "epoch": 16.96065573770492, "grad_norm": 5.358799934387207, "learning_rate": 1.8995091996518544e-05, "loss": 1.2468, "step": 5173 }, { "epoch": 16.963934426229507, "grad_norm": 6.013625144958496, "learning_rate": 1.899462800544798e-05, "loss": 1.2271, "step": 5174 }, { "epoch": 16.9672131147541, "grad_norm": 6.134364604949951, "learning_rate": 1.8994163912953633e-05, "loss": 1.2703, "step": 5175 }, { "epoch": 16.970491803278687, "grad_norm": 6.517800807952881, "learning_rate": 1.8993699719040734e-05, "loss": 1.2927, "step": 5176 }, { "epoch": 16.97377049180328, "grad_norm": 4.470831871032715, "learning_rate": 1.8993235423714522e-05, "loss": 1.3782, "step": 5177 }, { "epoch": 16.977049180327867, "grad_norm": 10.800227165222168, "learning_rate": 1.899277102698023e-05, "loss": 1.1885, "step": 5178 }, { "epoch": 16.98032786885246, "grad_norm": 6.934588432312012, "learning_rate": 1.8992306528843094e-05, "loss": 1.2305, "step": 5179 }, { "epoch": 16.983606557377048, "grad_norm": 5.325123310089111, "learning_rate": 1.8991841929308353e-05, "loss": 1.2135, "step": 5180 }, { "epoch": 16.98688524590164, "grad_norm": 5.798659324645996, "learning_rate": 1.899137722838124e-05, "loss": 1.3411, "step": 5181 }, { "epoch": 16.990163934426228, "grad_norm": 10.20034408569336, "learning_rate": 1.8990912426067006e-05, "loss": 1.0577, "step": 5182 }, { "epoch": 16.99344262295082, "grad_norm": 6.069117546081543, "learning_rate": 1.8990447522370886e-05, "loss": 1.1543, "step": 5183 }, { "epoch": 16.99672131147541, "grad_norm": 5.271059513092041, "learning_rate": 1.898998251729812e-05, "loss": 1.3192, "step": 5184 }, { "epoch": 17.0, "grad_norm": 5.204840660095215, "learning_rate": 1.8989517410853956e-05, "loss": 1.1328, "step": 5185 }, { "epoch": 17.003278688524592, "grad_norm": 5.950666904449463, "learning_rate": 1.8989052203043633e-05, "loss": 0.8958, "step": 5186 }, { "epoch": 17.00655737704918, "grad_norm": 6.58958625793457, "learning_rate": 1.8988586893872404e-05, "loss": 0.9692, "step": 5187 }, { "epoch": 17.009836065573772, "grad_norm": 6.048765659332275, "learning_rate": 1.898812148334551e-05, "loss": 1.1138, "step": 5188 }, { "epoch": 17.01311475409836, "grad_norm": 5.028224945068359, "learning_rate": 1.89876559714682e-05, "loss": 1.2369, "step": 5189 }, { "epoch": 17.016393442622952, "grad_norm": 12.420877456665039, "learning_rate": 1.8987190358245727e-05, "loss": 1.2928, "step": 5190 }, { "epoch": 17.01967213114754, "grad_norm": 6.712452411651611, "learning_rate": 1.8986724643683337e-05, "loss": 1.0428, "step": 5191 }, { "epoch": 17.022950819672133, "grad_norm": 6.257495880126953, "learning_rate": 1.898625882778628e-05, "loss": 1.199, "step": 5192 }, { "epoch": 17.02622950819672, "grad_norm": 5.227756023406982, "learning_rate": 1.8985792910559814e-05, "loss": 1.2095, "step": 5193 }, { "epoch": 17.029508196721313, "grad_norm": 4.898775100708008, "learning_rate": 1.898532689200919e-05, "loss": 1.2096, "step": 5194 }, { "epoch": 17.0327868852459, "grad_norm": 7.212252616882324, "learning_rate": 1.8984860772139665e-05, "loss": 1.0051, "step": 5195 }, { "epoch": 17.036065573770493, "grad_norm": 6.8407793045043945, "learning_rate": 1.8984394550956488e-05, "loss": 1.111, "step": 5196 }, { "epoch": 17.03934426229508, "grad_norm": 5.483895301818848, "learning_rate": 1.8983928228464924e-05, "loss": 1.3433, "step": 5197 }, { "epoch": 17.042622950819673, "grad_norm": 6.2593584060668945, "learning_rate": 1.898346180467023e-05, "loss": 0.9602, "step": 5198 }, { "epoch": 17.04590163934426, "grad_norm": 6.177966594696045, "learning_rate": 1.8982995279577662e-05, "loss": 1.433, "step": 5199 }, { "epoch": 17.049180327868854, "grad_norm": 4.834202289581299, "learning_rate": 1.898252865319248e-05, "loss": 1.178, "step": 5200 }, { "epoch": 17.052459016393442, "grad_norm": 5.899739742279053, "learning_rate": 1.898206192551995e-05, "loss": 1.0388, "step": 5201 }, { "epoch": 17.055737704918034, "grad_norm": 7.264355659484863, "learning_rate": 1.898159509656533e-05, "loss": 1.015, "step": 5202 }, { "epoch": 17.059016393442622, "grad_norm": 5.762638568878174, "learning_rate": 1.8981128166333893e-05, "loss": 1.2155, "step": 5203 }, { "epoch": 17.062295081967214, "grad_norm": 4.877063274383545, "learning_rate": 1.8980661134830894e-05, "loss": 1.0455, "step": 5204 }, { "epoch": 17.065573770491802, "grad_norm": 6.074609756469727, "learning_rate": 1.8980194002061602e-05, "loss": 1.0883, "step": 5205 }, { "epoch": 17.068852459016394, "grad_norm": 5.436195373535156, "learning_rate": 1.8979726768031287e-05, "loss": 1.0984, "step": 5206 }, { "epoch": 17.072131147540983, "grad_norm": 6.110064506530762, "learning_rate": 1.8979259432745217e-05, "loss": 1.0872, "step": 5207 }, { "epoch": 17.075409836065575, "grad_norm": 4.9070820808410645, "learning_rate": 1.897879199620866e-05, "loss": 1.3115, "step": 5208 }, { "epoch": 17.078688524590163, "grad_norm": 6.185041904449463, "learning_rate": 1.897832445842689e-05, "loss": 1.0411, "step": 5209 }, { "epoch": 17.081967213114755, "grad_norm": 5.112009048461914, "learning_rate": 1.897785681940517e-05, "loss": 1.2896, "step": 5210 }, { "epoch": 17.085245901639343, "grad_norm": 5.147551536560059, "learning_rate": 1.8977389079148784e-05, "loss": 1.2253, "step": 5211 }, { "epoch": 17.088524590163935, "grad_norm": 5.72550630569458, "learning_rate": 1.8976921237663003e-05, "loss": 0.9851, "step": 5212 }, { "epoch": 17.091803278688523, "grad_norm": 6.737991809844971, "learning_rate": 1.89764532949531e-05, "loss": 1.113, "step": 5213 }, { "epoch": 17.095081967213115, "grad_norm": 6.382517337799072, "learning_rate": 1.8975985251024355e-05, "loss": 0.943, "step": 5214 }, { "epoch": 17.098360655737704, "grad_norm": 7.8574299812316895, "learning_rate": 1.897551710588204e-05, "loss": 1.3053, "step": 5215 }, { "epoch": 17.101639344262296, "grad_norm": 5.93196964263916, "learning_rate": 1.897504885953144e-05, "loss": 1.1617, "step": 5216 }, { "epoch": 17.104918032786884, "grad_norm": 5.629424095153809, "learning_rate": 1.897458051197783e-05, "loss": 1.0062, "step": 5217 }, { "epoch": 17.108196721311476, "grad_norm": 4.278794288635254, "learning_rate": 1.8974112063226498e-05, "loss": 1.1019, "step": 5218 }, { "epoch": 17.111475409836064, "grad_norm": 5.422066688537598, "learning_rate": 1.8973643513282716e-05, "loss": 1.2732, "step": 5219 }, { "epoch": 17.114754098360656, "grad_norm": 5.962251663208008, "learning_rate": 1.8973174862151776e-05, "loss": 1.2065, "step": 5220 }, { "epoch": 17.118032786885244, "grad_norm": 5.426375865936279, "learning_rate": 1.8972706109838962e-05, "loss": 1.2074, "step": 5221 }, { "epoch": 17.121311475409836, "grad_norm": 5.210721015930176, "learning_rate": 1.8972237256349553e-05, "loss": 1.0469, "step": 5222 }, { "epoch": 17.124590163934425, "grad_norm": 6.124940395355225, "learning_rate": 1.8971768301688846e-05, "loss": 1.1719, "step": 5223 }, { "epoch": 17.127868852459017, "grad_norm": 5.708293914794922, "learning_rate": 1.8971299245862117e-05, "loss": 1.0557, "step": 5224 }, { "epoch": 17.131147540983605, "grad_norm": 5.06571626663208, "learning_rate": 1.8970830088874664e-05, "loss": 1.2053, "step": 5225 }, { "epoch": 17.134426229508197, "grad_norm": 12.838958740234375, "learning_rate": 1.8970360830731772e-05, "loss": 1.2773, "step": 5226 }, { "epoch": 17.137704918032785, "grad_norm": 6.334457874298096, "learning_rate": 1.8969891471438736e-05, "loss": 1.0338, "step": 5227 }, { "epoch": 17.140983606557377, "grad_norm": 6.016249179840088, "learning_rate": 1.8969422011000852e-05, "loss": 1.2642, "step": 5228 }, { "epoch": 17.14426229508197, "grad_norm": 5.187460422515869, "learning_rate": 1.8968952449423403e-05, "loss": 1.2186, "step": 5229 }, { "epoch": 17.147540983606557, "grad_norm": 4.305524826049805, "learning_rate": 1.8968482786711693e-05, "loss": 1.0025, "step": 5230 }, { "epoch": 17.15081967213115, "grad_norm": 5.456974983215332, "learning_rate": 1.8968013022871012e-05, "loss": 0.8164, "step": 5231 }, { "epoch": 17.154098360655738, "grad_norm": 4.929464817047119, "learning_rate": 1.8967543157906662e-05, "loss": 1.0876, "step": 5232 }, { "epoch": 17.15737704918033, "grad_norm": 5.794517993927002, "learning_rate": 1.8967073191823936e-05, "loss": 1.2498, "step": 5233 }, { "epoch": 17.160655737704918, "grad_norm": 7.89599084854126, "learning_rate": 1.896660312462814e-05, "loss": 1.0826, "step": 5234 }, { "epoch": 17.16393442622951, "grad_norm": 7.207643985748291, "learning_rate": 1.896613295632457e-05, "loss": 1.324, "step": 5235 }, { "epoch": 17.167213114754098, "grad_norm": 4.644237995147705, "learning_rate": 1.896566268691853e-05, "loss": 1.0957, "step": 5236 }, { "epoch": 17.17049180327869, "grad_norm": 14.552820205688477, "learning_rate": 1.8965192316415318e-05, "loss": 1.0447, "step": 5237 }, { "epoch": 17.17377049180328, "grad_norm": 5.12597131729126, "learning_rate": 1.8964721844820242e-05, "loss": 1.0773, "step": 5238 }, { "epoch": 17.17704918032787, "grad_norm": 6.980552673339844, "learning_rate": 1.8964251272138604e-05, "loss": 0.9395, "step": 5239 }, { "epoch": 17.18032786885246, "grad_norm": 5.456212520599365, "learning_rate": 1.8963780598375717e-05, "loss": 0.9744, "step": 5240 }, { "epoch": 17.18360655737705, "grad_norm": 5.6055803298950195, "learning_rate": 1.8963309823536883e-05, "loss": 1.0271, "step": 5241 }, { "epoch": 17.18688524590164, "grad_norm": 6.614537239074707, "learning_rate": 1.8962838947627408e-05, "loss": 1.2654, "step": 5242 }, { "epoch": 17.19016393442623, "grad_norm": 6.026393413543701, "learning_rate": 1.8962367970652608e-05, "loss": 1.2434, "step": 5243 }, { "epoch": 17.19344262295082, "grad_norm": 6.3960113525390625, "learning_rate": 1.896189689261779e-05, "loss": 1.0049, "step": 5244 }, { "epoch": 17.19672131147541, "grad_norm": 4.5688300132751465, "learning_rate": 1.8961425713528262e-05, "loss": 1.3004, "step": 5245 }, { "epoch": 17.2, "grad_norm": 5.863027572631836, "learning_rate": 1.896095443338935e-05, "loss": 1.3044, "step": 5246 }, { "epoch": 17.20327868852459, "grad_norm": 6.327192306518555, "learning_rate": 1.896048305220635e-05, "loss": 1.1113, "step": 5247 }, { "epoch": 17.20655737704918, "grad_norm": 11.063252449035645, "learning_rate": 1.8960011569984592e-05, "loss": 1.024, "step": 5248 }, { "epoch": 17.20983606557377, "grad_norm": 11.250319480895996, "learning_rate": 1.8959539986729392e-05, "loss": 1.3403, "step": 5249 }, { "epoch": 17.21311475409836, "grad_norm": 6.518808364868164, "learning_rate": 1.895906830244606e-05, "loss": 0.9165, "step": 5250 }, { "epoch": 17.21639344262295, "grad_norm": 9.400972366333008, "learning_rate": 1.8958596517139914e-05, "loss": 0.9683, "step": 5251 }, { "epoch": 17.21967213114754, "grad_norm": 5.767450332641602, "learning_rate": 1.8958124630816278e-05, "loss": 1.1121, "step": 5252 }, { "epoch": 17.222950819672132, "grad_norm": 5.348955154418945, "learning_rate": 1.8957652643480476e-05, "loss": 1.2454, "step": 5253 }, { "epoch": 17.22622950819672, "grad_norm": 6.800206184387207, "learning_rate": 1.8957180555137825e-05, "loss": 1.1052, "step": 5254 }, { "epoch": 17.229508196721312, "grad_norm": 7.328993320465088, "learning_rate": 1.895670836579365e-05, "loss": 1.0132, "step": 5255 }, { "epoch": 17.2327868852459, "grad_norm": 6.910444736480713, "learning_rate": 1.8956236075453277e-05, "loss": 1.2153, "step": 5256 }, { "epoch": 17.236065573770492, "grad_norm": 7.626601696014404, "learning_rate": 1.895576368412203e-05, "loss": 1.1409, "step": 5257 }, { "epoch": 17.23934426229508, "grad_norm": 9.138999938964844, "learning_rate": 1.8955291191805237e-05, "loss": 1.092, "step": 5258 }, { "epoch": 17.242622950819673, "grad_norm": 5.888599395751953, "learning_rate": 1.895481859850822e-05, "loss": 1.1659, "step": 5259 }, { "epoch": 17.24590163934426, "grad_norm": 5.733506202697754, "learning_rate": 1.8954345904236317e-05, "loss": 1.1091, "step": 5260 }, { "epoch": 17.249180327868853, "grad_norm": 6.009711265563965, "learning_rate": 1.8953873108994852e-05, "loss": 1.1136, "step": 5261 }, { "epoch": 17.25245901639344, "grad_norm": 6.607558250427246, "learning_rate": 1.8953400212789158e-05, "loss": 1.1853, "step": 5262 }, { "epoch": 17.255737704918033, "grad_norm": 5.285210609436035, "learning_rate": 1.895292721562457e-05, "loss": 1.3555, "step": 5263 }, { "epoch": 17.25901639344262, "grad_norm": 5.559352874755859, "learning_rate": 1.8952454117506414e-05, "loss": 1.0719, "step": 5264 }, { "epoch": 17.262295081967213, "grad_norm": 6.770509719848633, "learning_rate": 1.8951980918440033e-05, "loss": 1.1489, "step": 5265 }, { "epoch": 17.2655737704918, "grad_norm": 16.189849853515625, "learning_rate": 1.8951507618430758e-05, "loss": 1.3987, "step": 5266 }, { "epoch": 17.268852459016394, "grad_norm": 7.544551849365234, "learning_rate": 1.8951034217483927e-05, "loss": 1.1786, "step": 5267 }, { "epoch": 17.272131147540982, "grad_norm": 11.763014793395996, "learning_rate": 1.895056071560488e-05, "loss": 1.1742, "step": 5268 }, { "epoch": 17.275409836065574, "grad_norm": 4.818278789520264, "learning_rate": 1.8950087112798955e-05, "loss": 1.2294, "step": 5269 }, { "epoch": 17.278688524590162, "grad_norm": 5.580166816711426, "learning_rate": 1.894961340907149e-05, "loss": 1.2769, "step": 5270 }, { "epoch": 17.281967213114754, "grad_norm": 5.380280017852783, "learning_rate": 1.8949139604427832e-05, "loss": 1.1518, "step": 5271 }, { "epoch": 17.285245901639342, "grad_norm": 7.0955634117126465, "learning_rate": 1.8948665698873317e-05, "loss": 0.9951, "step": 5272 }, { "epoch": 17.288524590163934, "grad_norm": 5.237166881561279, "learning_rate": 1.894819169241329e-05, "loss": 1.0814, "step": 5273 }, { "epoch": 17.291803278688526, "grad_norm": 7.032750129699707, "learning_rate": 1.8947717585053102e-05, "loss": 1.0, "step": 5274 }, { "epoch": 17.295081967213115, "grad_norm": 5.949202537536621, "learning_rate": 1.8947243376798095e-05, "loss": 1.1064, "step": 5275 }, { "epoch": 17.298360655737707, "grad_norm": 6.219604969024658, "learning_rate": 1.8946769067653614e-05, "loss": 1.2725, "step": 5276 }, { "epoch": 17.301639344262295, "grad_norm": 7.7059526443481445, "learning_rate": 1.894629465762501e-05, "loss": 0.947, "step": 5277 }, { "epoch": 17.304918032786887, "grad_norm": 10.176277160644531, "learning_rate": 1.894582014671763e-05, "loss": 1.207, "step": 5278 }, { "epoch": 17.308196721311475, "grad_norm": 6.3380045890808105, "learning_rate": 1.8945345534936834e-05, "loss": 1.1423, "step": 5279 }, { "epoch": 17.311475409836067, "grad_norm": 7.370195388793945, "learning_rate": 1.8944870822287957e-05, "loss": 1.1616, "step": 5280 }, { "epoch": 17.314754098360655, "grad_norm": 6.637462615966797, "learning_rate": 1.8944396008776366e-05, "loss": 1.1938, "step": 5281 }, { "epoch": 17.318032786885247, "grad_norm": 5.635525226593018, "learning_rate": 1.894392109440741e-05, "loss": 1.2396, "step": 5282 }, { "epoch": 17.321311475409836, "grad_norm": 12.318580627441406, "learning_rate": 1.8943446079186442e-05, "loss": 1.1649, "step": 5283 }, { "epoch": 17.324590163934428, "grad_norm": 5.88450813293457, "learning_rate": 1.8942970963118822e-05, "loss": 1.2207, "step": 5284 }, { "epoch": 17.327868852459016, "grad_norm": 5.754702091217041, "learning_rate": 1.8942495746209907e-05, "loss": 1.1836, "step": 5285 }, { "epoch": 17.331147540983608, "grad_norm": 7.885829925537109, "learning_rate": 1.8942020428465054e-05, "loss": 1.1686, "step": 5286 }, { "epoch": 17.334426229508196, "grad_norm": 8.578039169311523, "learning_rate": 1.8941545009889623e-05, "loss": 1.1681, "step": 5287 }, { "epoch": 17.337704918032788, "grad_norm": 5.982458114624023, "learning_rate": 1.8941069490488976e-05, "loss": 1.2656, "step": 5288 }, { "epoch": 17.340983606557376, "grad_norm": 5.714114665985107, "learning_rate": 1.894059387026847e-05, "loss": 1.0621, "step": 5289 }, { "epoch": 17.34426229508197, "grad_norm": 6.528292655944824, "learning_rate": 1.8940118149233478e-05, "loss": 1.1858, "step": 5290 }, { "epoch": 17.347540983606557, "grad_norm": 6.046191215515137, "learning_rate": 1.8939642327389353e-05, "loss": 1.1072, "step": 5291 }, { "epoch": 17.35081967213115, "grad_norm": 6.215454578399658, "learning_rate": 1.893916640474147e-05, "loss": 1.3281, "step": 5292 }, { "epoch": 17.354098360655737, "grad_norm": 14.245678901672363, "learning_rate": 1.8938690381295193e-05, "loss": 1.1179, "step": 5293 }, { "epoch": 17.35737704918033, "grad_norm": 6.956291198730469, "learning_rate": 1.893821425705588e-05, "loss": 1.0516, "step": 5294 }, { "epoch": 17.360655737704917, "grad_norm": 4.748647212982178, "learning_rate": 1.893773803202892e-05, "loss": 1.2764, "step": 5295 }, { "epoch": 17.36393442622951, "grad_norm": 6.620082378387451, "learning_rate": 1.893726170621966e-05, "loss": 1.1974, "step": 5296 }, { "epoch": 17.367213114754097, "grad_norm": 8.86440372467041, "learning_rate": 1.8936785279633488e-05, "loss": 1.2358, "step": 5297 }, { "epoch": 17.37049180327869, "grad_norm": 6.538543224334717, "learning_rate": 1.8936308752275767e-05, "loss": 1.2034, "step": 5298 }, { "epoch": 17.373770491803278, "grad_norm": 7.806949615478516, "learning_rate": 1.8935832124151872e-05, "loss": 1.1049, "step": 5299 }, { "epoch": 17.37704918032787, "grad_norm": 5.960905075073242, "learning_rate": 1.893535539526718e-05, "loss": 1.1556, "step": 5300 }, { "epoch": 17.380327868852458, "grad_norm": 6.089158535003662, "learning_rate": 1.893487856562707e-05, "loss": 1.0511, "step": 5301 }, { "epoch": 17.38360655737705, "grad_norm": 8.6816987991333, "learning_rate": 1.893440163523691e-05, "loss": 1.2306, "step": 5302 }, { "epoch": 17.386885245901638, "grad_norm": 5.135089874267578, "learning_rate": 1.8933924604102083e-05, "loss": 1.1682, "step": 5303 }, { "epoch": 17.39016393442623, "grad_norm": 4.734907627105713, "learning_rate": 1.8933447472227965e-05, "loss": 1.3613, "step": 5304 }, { "epoch": 17.39344262295082, "grad_norm": 19.578201293945312, "learning_rate": 1.8932970239619943e-05, "loss": 0.9667, "step": 5305 }, { "epoch": 17.39672131147541, "grad_norm": 6.460972785949707, "learning_rate": 1.893249290628339e-05, "loss": 1.2903, "step": 5306 }, { "epoch": 17.4, "grad_norm": 4.66145658493042, "learning_rate": 1.8932015472223692e-05, "loss": 0.974, "step": 5307 }, { "epoch": 17.40327868852459, "grad_norm": 6.291612148284912, "learning_rate": 1.8931537937446235e-05, "loss": 1.1711, "step": 5308 }, { "epoch": 17.40655737704918, "grad_norm": 4.890410423278809, "learning_rate": 1.89310603019564e-05, "loss": 1.1257, "step": 5309 }, { "epoch": 17.40983606557377, "grad_norm": 4.741175174713135, "learning_rate": 1.8930582565759576e-05, "loss": 1.1766, "step": 5310 }, { "epoch": 17.41311475409836, "grad_norm": 6.231001853942871, "learning_rate": 1.8930104728861142e-05, "loss": 1.2034, "step": 5311 }, { "epoch": 17.41639344262295, "grad_norm": 6.662304878234863, "learning_rate": 1.8929626791266494e-05, "loss": 1.2344, "step": 5312 }, { "epoch": 17.41967213114754, "grad_norm": 6.162705898284912, "learning_rate": 1.8929148752981024e-05, "loss": 1.2787, "step": 5313 }, { "epoch": 17.42295081967213, "grad_norm": 6.230904579162598, "learning_rate": 1.8928670614010116e-05, "loss": 1.2343, "step": 5314 }, { "epoch": 17.42622950819672, "grad_norm": 5.847381591796875, "learning_rate": 1.892819237435916e-05, "loss": 1.0238, "step": 5315 }, { "epoch": 17.42950819672131, "grad_norm": 4.868381977081299, "learning_rate": 1.8927714034033557e-05, "loss": 1.0997, "step": 5316 }, { "epoch": 17.432786885245903, "grad_norm": 8.38903522491455, "learning_rate": 1.8927235593038693e-05, "loss": 1.2618, "step": 5317 }, { "epoch": 17.43606557377049, "grad_norm": 8.756120681762695, "learning_rate": 1.8926757051379967e-05, "loss": 1.1021, "step": 5318 }, { "epoch": 17.439344262295084, "grad_norm": 5.521518707275391, "learning_rate": 1.8926278409062773e-05, "loss": 1.2493, "step": 5319 }, { "epoch": 17.442622950819672, "grad_norm": 6.1746320724487305, "learning_rate": 1.892579966609251e-05, "loss": 1.249, "step": 5320 }, { "epoch": 17.445901639344264, "grad_norm": 7.973886966705322, "learning_rate": 1.8925320822474573e-05, "loss": 1.052, "step": 5321 }, { "epoch": 17.449180327868852, "grad_norm": 5.786774635314941, "learning_rate": 1.8924841878214366e-05, "loss": 1.1162, "step": 5322 }, { "epoch": 17.452459016393444, "grad_norm": 6.26724100112915, "learning_rate": 1.8924362833317286e-05, "loss": 1.1184, "step": 5323 }, { "epoch": 17.455737704918032, "grad_norm": 5.420485496520996, "learning_rate": 1.8923883687788734e-05, "loss": 1.2939, "step": 5324 }, { "epoch": 17.459016393442624, "grad_norm": 6.170974254608154, "learning_rate": 1.892340444163412e-05, "loss": 1.0016, "step": 5325 }, { "epoch": 17.462295081967213, "grad_norm": 8.640681266784668, "learning_rate": 1.892292509485884e-05, "loss": 1.2758, "step": 5326 }, { "epoch": 17.465573770491805, "grad_norm": 6.070803642272949, "learning_rate": 1.8922445647468302e-05, "loss": 0.9205, "step": 5327 }, { "epoch": 17.468852459016393, "grad_norm": 5.4107279777526855, "learning_rate": 1.8921966099467912e-05, "loss": 1.0839, "step": 5328 }, { "epoch": 17.472131147540985, "grad_norm": 5.441486835479736, "learning_rate": 1.8921486450863078e-05, "loss": 1.1053, "step": 5329 }, { "epoch": 17.475409836065573, "grad_norm": 5.691211223602295, "learning_rate": 1.8921006701659207e-05, "loss": 1.1047, "step": 5330 }, { "epoch": 17.478688524590165, "grad_norm": 5.846643924713135, "learning_rate": 1.892052685186171e-05, "loss": 1.1018, "step": 5331 }, { "epoch": 17.481967213114753, "grad_norm": 6.135072708129883, "learning_rate": 1.8920046901476e-05, "loss": 1.2136, "step": 5332 }, { "epoch": 17.485245901639345, "grad_norm": 5.122457504272461, "learning_rate": 1.8919566850507488e-05, "loss": 1.2296, "step": 5333 }, { "epoch": 17.488524590163934, "grad_norm": 6.548494815826416, "learning_rate": 1.891908669896158e-05, "loss": 1.1213, "step": 5334 }, { "epoch": 17.491803278688526, "grad_norm": 6.03236722946167, "learning_rate": 1.8918606446843702e-05, "loss": 0.9757, "step": 5335 }, { "epoch": 17.495081967213114, "grad_norm": 7.051952362060547, "learning_rate": 1.891812609415926e-05, "loss": 0.9419, "step": 5336 }, { "epoch": 17.498360655737706, "grad_norm": 6.098530292510986, "learning_rate": 1.8917645640913673e-05, "loss": 0.9484, "step": 5337 }, { "epoch": 17.501639344262294, "grad_norm": 6.553592681884766, "learning_rate": 1.891716508711236e-05, "loss": 1.126, "step": 5338 }, { "epoch": 17.504918032786886, "grad_norm": 5.964664459228516, "learning_rate": 1.8916684432760745e-05, "loss": 1.1239, "step": 5339 }, { "epoch": 17.508196721311474, "grad_norm": 14.429243087768555, "learning_rate": 1.8916203677864234e-05, "loss": 1.1569, "step": 5340 }, { "epoch": 17.511475409836066, "grad_norm": 6.392255783081055, "learning_rate": 1.891572282242826e-05, "loss": 1.0527, "step": 5341 }, { "epoch": 17.514754098360655, "grad_norm": 6.247359752655029, "learning_rate": 1.891524186645824e-05, "loss": 1.1906, "step": 5342 }, { "epoch": 17.518032786885247, "grad_norm": 6.878375053405762, "learning_rate": 1.8914760809959597e-05, "loss": 1.3965, "step": 5343 }, { "epoch": 17.521311475409835, "grad_norm": 6.982459545135498, "learning_rate": 1.891427965293776e-05, "loss": 1.0618, "step": 5344 }, { "epoch": 17.524590163934427, "grad_norm": 6.073806285858154, "learning_rate": 1.8913798395398147e-05, "loss": 1.0176, "step": 5345 }, { "epoch": 17.527868852459015, "grad_norm": 5.8599958419799805, "learning_rate": 1.891331703734619e-05, "loss": 1.0176, "step": 5346 }, { "epoch": 17.531147540983607, "grad_norm": 6.029031276702881, "learning_rate": 1.8912835578787322e-05, "loss": 1.1317, "step": 5347 }, { "epoch": 17.534426229508195, "grad_norm": 6.5598626136779785, "learning_rate": 1.8912354019726963e-05, "loss": 1.0925, "step": 5348 }, { "epoch": 17.537704918032787, "grad_norm": 5.769912242889404, "learning_rate": 1.891187236017054e-05, "loss": 1.0276, "step": 5349 }, { "epoch": 17.540983606557376, "grad_norm": 6.482815742492676, "learning_rate": 1.8911390600123496e-05, "loss": 1.2502, "step": 5350 }, { "epoch": 17.544262295081968, "grad_norm": 8.440142631530762, "learning_rate": 1.8910908739591257e-05, "loss": 1.3331, "step": 5351 }, { "epoch": 17.547540983606556, "grad_norm": 7.250142574310303, "learning_rate": 1.8910426778579257e-05, "loss": 1.1946, "step": 5352 }, { "epoch": 17.550819672131148, "grad_norm": 7.9364471435546875, "learning_rate": 1.8909944717092927e-05, "loss": 1.366, "step": 5353 }, { "epoch": 17.554098360655736, "grad_norm": 6.761594772338867, "learning_rate": 1.890946255513771e-05, "loss": 1.2927, "step": 5354 }, { "epoch": 17.557377049180328, "grad_norm": 5.839101791381836, "learning_rate": 1.890898029271904e-05, "loss": 1.2712, "step": 5355 }, { "epoch": 17.560655737704916, "grad_norm": 6.916977405548096, "learning_rate": 1.890849792984235e-05, "loss": 1.0331, "step": 5356 }, { "epoch": 17.56393442622951, "grad_norm": 7.719583511352539, "learning_rate": 1.8908015466513086e-05, "loss": 1.0078, "step": 5357 }, { "epoch": 17.567213114754097, "grad_norm": 6.126312255859375, "learning_rate": 1.8907532902736686e-05, "loss": 1.2129, "step": 5358 }, { "epoch": 17.57049180327869, "grad_norm": 7.404585838317871, "learning_rate": 1.8907050238518587e-05, "loss": 1.1508, "step": 5359 }, { "epoch": 17.57377049180328, "grad_norm": 6.14016580581665, "learning_rate": 1.890656747386424e-05, "loss": 1.0798, "step": 5360 }, { "epoch": 17.57704918032787, "grad_norm": 6.420026779174805, "learning_rate": 1.8906084608779084e-05, "loss": 1.319, "step": 5361 }, { "epoch": 17.58032786885246, "grad_norm": 10.998543739318848, "learning_rate": 1.890560164326856e-05, "loss": 1.0646, "step": 5362 }, { "epoch": 17.58360655737705, "grad_norm": 5.598634243011475, "learning_rate": 1.8905118577338123e-05, "loss": 0.9724, "step": 5363 }, { "epoch": 17.58688524590164, "grad_norm": 5.575559139251709, "learning_rate": 1.890463541099321e-05, "loss": 1.236, "step": 5364 }, { "epoch": 17.59016393442623, "grad_norm": 8.066071510314941, "learning_rate": 1.890415214423928e-05, "loss": 1.2012, "step": 5365 }, { "epoch": 17.59344262295082, "grad_norm": 7.923338413238525, "learning_rate": 1.890366877708177e-05, "loss": 1.2263, "step": 5366 }, { "epoch": 17.59672131147541, "grad_norm": 6.53048849105835, "learning_rate": 1.8903185309526142e-05, "loss": 1.1047, "step": 5367 }, { "epoch": 17.6, "grad_norm": 6.983731746673584, "learning_rate": 1.8902701741577844e-05, "loss": 1.0396, "step": 5368 }, { "epoch": 17.60327868852459, "grad_norm": 5.522674560546875, "learning_rate": 1.8902218073242325e-05, "loss": 1.0468, "step": 5369 }, { "epoch": 17.60655737704918, "grad_norm": 6.856719970703125, "learning_rate": 1.890173430452504e-05, "loss": 1.0804, "step": 5370 }, { "epoch": 17.60983606557377, "grad_norm": 6.1339850425720215, "learning_rate": 1.8901250435431447e-05, "loss": 1.428, "step": 5371 }, { "epoch": 17.613114754098362, "grad_norm": 5.299254417419434, "learning_rate": 1.8900766465966998e-05, "loss": 1.1895, "step": 5372 }, { "epoch": 17.61639344262295, "grad_norm": 7.010680675506592, "learning_rate": 1.8900282396137153e-05, "loss": 1.1844, "step": 5373 }, { "epoch": 17.619672131147542, "grad_norm": 6.172682285308838, "learning_rate": 1.8899798225947373e-05, "loss": 0.8365, "step": 5374 }, { "epoch": 17.62295081967213, "grad_norm": 22.266765594482422, "learning_rate": 1.889931395540311e-05, "loss": 1.4158, "step": 5375 }, { "epoch": 17.626229508196722, "grad_norm": 5.453123092651367, "learning_rate": 1.8898829584509832e-05, "loss": 1.2756, "step": 5376 }, { "epoch": 17.62950819672131, "grad_norm": 6.102890491485596, "learning_rate": 1.8898345113273e-05, "loss": 1.1787, "step": 5377 }, { "epoch": 17.632786885245903, "grad_norm": 5.629245758056641, "learning_rate": 1.8897860541698073e-05, "loss": 1.1884, "step": 5378 }, { "epoch": 17.63606557377049, "grad_norm": 6.344771385192871, "learning_rate": 1.8897375869790515e-05, "loss": 1.1511, "step": 5379 }, { "epoch": 17.639344262295083, "grad_norm": 5.77396821975708, "learning_rate": 1.8896891097555797e-05, "loss": 1.2961, "step": 5380 }, { "epoch": 17.64262295081967, "grad_norm": 7.701314449310303, "learning_rate": 1.889640622499938e-05, "loss": 0.9648, "step": 5381 }, { "epoch": 17.645901639344263, "grad_norm": 5.8820319175720215, "learning_rate": 1.889592125212673e-05, "loss": 1.2743, "step": 5382 }, { "epoch": 17.64918032786885, "grad_norm": 5.792636394500732, "learning_rate": 1.889543617894332e-05, "loss": 1.1133, "step": 5383 }, { "epoch": 17.652459016393443, "grad_norm": 5.4806671142578125, "learning_rate": 1.889495100545462e-05, "loss": 1.1584, "step": 5384 }, { "epoch": 17.65573770491803, "grad_norm": 5.689225673675537, "learning_rate": 1.8894465731666095e-05, "loss": 0.9749, "step": 5385 }, { "epoch": 17.659016393442624, "grad_norm": 6.122067451477051, "learning_rate": 1.8893980357583227e-05, "loss": 1.1005, "step": 5386 }, { "epoch": 17.662295081967212, "grad_norm": 7.028914451599121, "learning_rate": 1.8893494883211478e-05, "loss": 1.3477, "step": 5387 }, { "epoch": 17.665573770491804, "grad_norm": 6.5998311042785645, "learning_rate": 1.8893009308556327e-05, "loss": 1.1981, "step": 5388 }, { "epoch": 17.668852459016392, "grad_norm": 7.3802971839904785, "learning_rate": 1.8892523633623252e-05, "loss": 1.2618, "step": 5389 }, { "epoch": 17.672131147540984, "grad_norm": 5.865564823150635, "learning_rate": 1.889203785841773e-05, "loss": 1.0552, "step": 5390 }, { "epoch": 17.675409836065572, "grad_norm": 6.939637660980225, "learning_rate": 1.889155198294523e-05, "loss": 1.0696, "step": 5391 }, { "epoch": 17.678688524590164, "grad_norm": 5.9861884117126465, "learning_rate": 1.8891066007211242e-05, "loss": 1.1923, "step": 5392 }, { "epoch": 17.681967213114753, "grad_norm": 7.970438480377197, "learning_rate": 1.8890579931221237e-05, "loss": 1.3147, "step": 5393 }, { "epoch": 17.685245901639345, "grad_norm": 5.2796525955200195, "learning_rate": 1.8890093754980698e-05, "loss": 1.2166, "step": 5394 }, { "epoch": 17.688524590163933, "grad_norm": 5.885576248168945, "learning_rate": 1.8889607478495107e-05, "loss": 1.3093, "step": 5395 }, { "epoch": 17.691803278688525, "grad_norm": 7.351493835449219, "learning_rate": 1.8889121101769958e-05, "loss": 1.1121, "step": 5396 }, { "epoch": 17.695081967213113, "grad_norm": 5.423109531402588, "learning_rate": 1.8888634624810717e-05, "loss": 1.2255, "step": 5397 }, { "epoch": 17.698360655737705, "grad_norm": 5.969770431518555, "learning_rate": 1.888814804762288e-05, "loss": 1.2678, "step": 5398 }, { "epoch": 17.701639344262293, "grad_norm": 6.3671345710754395, "learning_rate": 1.8887661370211937e-05, "loss": 1.2371, "step": 5399 }, { "epoch": 17.704918032786885, "grad_norm": 5.86403226852417, "learning_rate": 1.8887174592583365e-05, "loss": 1.553, "step": 5400 }, { "epoch": 17.708196721311474, "grad_norm": 9.299227714538574, "learning_rate": 1.8886687714742665e-05, "loss": 1.1431, "step": 5401 }, { "epoch": 17.711475409836066, "grad_norm": 8.67421817779541, "learning_rate": 1.888620073669532e-05, "loss": 1.3547, "step": 5402 }, { "epoch": 17.714754098360658, "grad_norm": 5.941191673278809, "learning_rate": 1.888571365844682e-05, "loss": 1.3521, "step": 5403 }, { "epoch": 17.718032786885246, "grad_norm": 5.372480869293213, "learning_rate": 1.888522648000266e-05, "loss": 1.1965, "step": 5404 }, { "epoch": 17.721311475409838, "grad_norm": 6.653146743774414, "learning_rate": 1.8884739201368337e-05, "loss": 1.1705, "step": 5405 }, { "epoch": 17.724590163934426, "grad_norm": 5.337889194488525, "learning_rate": 1.888425182254934e-05, "loss": 1.1611, "step": 5406 }, { "epoch": 17.727868852459018, "grad_norm": 5.6744818687438965, "learning_rate": 1.8883764343551165e-05, "loss": 1.2971, "step": 5407 }, { "epoch": 17.731147540983606, "grad_norm": 6.364655017852783, "learning_rate": 1.8883276764379307e-05, "loss": 1.1398, "step": 5408 }, { "epoch": 17.7344262295082, "grad_norm": 5.493879318237305, "learning_rate": 1.888278908503927e-05, "loss": 1.2236, "step": 5409 }, { "epoch": 17.737704918032787, "grad_norm": 6.833587646484375, "learning_rate": 1.888230130553655e-05, "loss": 1.1272, "step": 5410 }, { "epoch": 17.74098360655738, "grad_norm": 6.626623153686523, "learning_rate": 1.8881813425876648e-05, "loss": 1.4414, "step": 5411 }, { "epoch": 17.744262295081967, "grad_norm": 5.710596561431885, "learning_rate": 1.888132544606507e-05, "loss": 1.0508, "step": 5412 }, { "epoch": 17.74754098360656, "grad_norm": 5.392367362976074, "learning_rate": 1.8880837366107306e-05, "loss": 1.0535, "step": 5413 }, { "epoch": 17.750819672131147, "grad_norm": 8.29860782623291, "learning_rate": 1.888034918600887e-05, "loss": 1.1389, "step": 5414 }, { "epoch": 17.75409836065574, "grad_norm": 7.747216701507568, "learning_rate": 1.8879860905775264e-05, "loss": 1.2223, "step": 5415 }, { "epoch": 17.757377049180327, "grad_norm": 5.089217185974121, "learning_rate": 1.887937252541199e-05, "loss": 1.1086, "step": 5416 }, { "epoch": 17.76065573770492, "grad_norm": 6.891300201416016, "learning_rate": 1.8878884044924563e-05, "loss": 1.1577, "step": 5417 }, { "epoch": 17.763934426229508, "grad_norm": 7.00875186920166, "learning_rate": 1.8878395464318483e-05, "loss": 0.9906, "step": 5418 }, { "epoch": 17.7672131147541, "grad_norm": 5.898369789123535, "learning_rate": 1.8877906783599267e-05, "loss": 1.0416, "step": 5419 }, { "epoch": 17.770491803278688, "grad_norm": 5.428972244262695, "learning_rate": 1.887741800277242e-05, "loss": 1.2415, "step": 5420 }, { "epoch": 17.77377049180328, "grad_norm": 7.066887855529785, "learning_rate": 1.887692912184345e-05, "loss": 1.1697, "step": 5421 }, { "epoch": 17.777049180327868, "grad_norm": 5.8862128257751465, "learning_rate": 1.8876440140817883e-05, "loss": 1.2319, "step": 5422 }, { "epoch": 17.78032786885246, "grad_norm": 6.829597473144531, "learning_rate": 1.887595105970122e-05, "loss": 1.2322, "step": 5423 }, { "epoch": 17.78360655737705, "grad_norm": 7.568446159362793, "learning_rate": 1.8875461878498977e-05, "loss": 1.02, "step": 5424 }, { "epoch": 17.78688524590164, "grad_norm": 6.338860511779785, "learning_rate": 1.8874972597216678e-05, "loss": 1.2815, "step": 5425 }, { "epoch": 17.79016393442623, "grad_norm": 5.652091979980469, "learning_rate": 1.8874483215859835e-05, "loss": 1.4399, "step": 5426 }, { "epoch": 17.79344262295082, "grad_norm": 6.572691917419434, "learning_rate": 1.887399373443396e-05, "loss": 1.1757, "step": 5427 }, { "epoch": 17.79672131147541, "grad_norm": 6.966214656829834, "learning_rate": 1.8873504152944585e-05, "loss": 1.2241, "step": 5428 }, { "epoch": 17.8, "grad_norm": 6.916427135467529, "learning_rate": 1.8873014471397225e-05, "loss": 0.9216, "step": 5429 }, { "epoch": 17.80327868852459, "grad_norm": 6.507976531982422, "learning_rate": 1.88725246897974e-05, "loss": 1.217, "step": 5430 }, { "epoch": 17.80655737704918, "grad_norm": 5.677917957305908, "learning_rate": 1.8872034808150633e-05, "loss": 1.0065, "step": 5431 }, { "epoch": 17.80983606557377, "grad_norm": 6.010011672973633, "learning_rate": 1.8871544826462448e-05, "loss": 1.5554, "step": 5432 }, { "epoch": 17.81311475409836, "grad_norm": 5.647887229919434, "learning_rate": 1.8871054744738374e-05, "loss": 1.1013, "step": 5433 }, { "epoch": 17.81639344262295, "grad_norm": 5.9988322257995605, "learning_rate": 1.887056456298393e-05, "loss": 1.2305, "step": 5434 }, { "epoch": 17.81967213114754, "grad_norm": 4.948972225189209, "learning_rate": 1.8870074281204652e-05, "loss": 1.1584, "step": 5435 }, { "epoch": 17.82295081967213, "grad_norm": 5.09136962890625, "learning_rate": 1.886958389940606e-05, "loss": 1.1132, "step": 5436 }, { "epoch": 17.82622950819672, "grad_norm": 5.856156826019287, "learning_rate": 1.886909341759369e-05, "loss": 1.2525, "step": 5437 }, { "epoch": 17.82950819672131, "grad_norm": 4.942820072174072, "learning_rate": 1.886860283577307e-05, "loss": 1.2644, "step": 5438 }, { "epoch": 17.832786885245902, "grad_norm": 7.767524719238281, "learning_rate": 1.8868112153949735e-05, "loss": 1.1608, "step": 5439 }, { "epoch": 17.83606557377049, "grad_norm": 6.642439365386963, "learning_rate": 1.886762137212921e-05, "loss": 1.3002, "step": 5440 }, { "epoch": 17.839344262295082, "grad_norm": 4.679854393005371, "learning_rate": 1.8867130490317036e-05, "loss": 1.3735, "step": 5441 }, { "epoch": 17.84262295081967, "grad_norm": 4.828691482543945, "learning_rate": 1.8866639508518744e-05, "loss": 1.1382, "step": 5442 }, { "epoch": 17.845901639344262, "grad_norm": 6.507320880889893, "learning_rate": 1.8866148426739878e-05, "loss": 1.0818, "step": 5443 }, { "epoch": 17.84918032786885, "grad_norm": 6.262410640716553, "learning_rate": 1.8865657244985964e-05, "loss": 1.1053, "step": 5444 }, { "epoch": 17.852459016393443, "grad_norm": 6.367223262786865, "learning_rate": 1.8865165963262552e-05, "loss": 1.1901, "step": 5445 }, { "epoch": 17.855737704918035, "grad_norm": 5.84100341796875, "learning_rate": 1.886467458157517e-05, "loss": 1.1306, "step": 5446 }, { "epoch": 17.859016393442623, "grad_norm": 19.267568588256836, "learning_rate": 1.886418309992937e-05, "loss": 1.1519, "step": 5447 }, { "epoch": 17.862295081967215, "grad_norm": 6.295307159423828, "learning_rate": 1.8863691518330688e-05, "loss": 1.0694, "step": 5448 }, { "epoch": 17.865573770491803, "grad_norm": 7.1222968101501465, "learning_rate": 1.886319983678467e-05, "loss": 0.9124, "step": 5449 }, { "epoch": 17.868852459016395, "grad_norm": 6.226139545440674, "learning_rate": 1.8862708055296852e-05, "loss": 1.1499, "step": 5450 }, { "epoch": 17.872131147540983, "grad_norm": 5.851990699768066, "learning_rate": 1.8862216173872792e-05, "loss": 1.1431, "step": 5451 }, { "epoch": 17.875409836065575, "grad_norm": 5.586023330688477, "learning_rate": 1.8861724192518027e-05, "loss": 1.2865, "step": 5452 }, { "epoch": 17.878688524590164, "grad_norm": 5.467860698699951, "learning_rate": 1.886123211123811e-05, "loss": 1.2924, "step": 5453 }, { "epoch": 17.881967213114756, "grad_norm": 7.181783199310303, "learning_rate": 1.8860739930038585e-05, "loss": 1.1726, "step": 5454 }, { "epoch": 17.885245901639344, "grad_norm": 6.598668575286865, "learning_rate": 1.8860247648925007e-05, "loss": 1.2485, "step": 5455 }, { "epoch": 17.888524590163936, "grad_norm": 24.408662796020508, "learning_rate": 1.8859755267902923e-05, "loss": 1.0408, "step": 5456 }, { "epoch": 17.891803278688524, "grad_norm": 5.390960216522217, "learning_rate": 1.8859262786977888e-05, "loss": 1.0796, "step": 5457 }, { "epoch": 17.895081967213116, "grad_norm": 28.09283447265625, "learning_rate": 1.8858770206155454e-05, "loss": 1.1802, "step": 5458 }, { "epoch": 17.898360655737704, "grad_norm": 6.214836597442627, "learning_rate": 1.8858277525441174e-05, "loss": 1.1304, "step": 5459 }, { "epoch": 17.901639344262296, "grad_norm": 7.333873748779297, "learning_rate": 1.8857784744840605e-05, "loss": 1.0752, "step": 5460 }, { "epoch": 17.904918032786885, "grad_norm": 7.440240383148193, "learning_rate": 1.88572918643593e-05, "loss": 1.0515, "step": 5461 }, { "epoch": 17.908196721311477, "grad_norm": 6.9050116539001465, "learning_rate": 1.885679888400282e-05, "loss": 1.2852, "step": 5462 }, { "epoch": 17.911475409836065, "grad_norm": 4.710257053375244, "learning_rate": 1.8856305803776728e-05, "loss": 1.2839, "step": 5463 }, { "epoch": 17.914754098360657, "grad_norm": 6.46485710144043, "learning_rate": 1.8855812623686577e-05, "loss": 1.1982, "step": 5464 }, { "epoch": 17.918032786885245, "grad_norm": 6.04885721206665, "learning_rate": 1.8855319343737934e-05, "loss": 1.1859, "step": 5465 }, { "epoch": 17.921311475409837, "grad_norm": 5.433770179748535, "learning_rate": 1.8854825963936355e-05, "loss": 1.2677, "step": 5466 }, { "epoch": 17.924590163934425, "grad_norm": 5.736139297485352, "learning_rate": 1.885433248428741e-05, "loss": 1.2333, "step": 5467 }, { "epoch": 17.927868852459017, "grad_norm": 5.4161882400512695, "learning_rate": 1.8853838904796657e-05, "loss": 1.1885, "step": 5468 }, { "epoch": 17.931147540983606, "grad_norm": 5.7793192863464355, "learning_rate": 1.8853345225469665e-05, "loss": 1.3098, "step": 5469 }, { "epoch": 17.934426229508198, "grad_norm": 7.5321526527404785, "learning_rate": 1.8852851446312e-05, "loss": 1.1235, "step": 5470 }, { "epoch": 17.937704918032786, "grad_norm": 33.531124114990234, "learning_rate": 1.8852357567329232e-05, "loss": 1.2034, "step": 5471 }, { "epoch": 17.940983606557378, "grad_norm": 5.979525089263916, "learning_rate": 1.8851863588526932e-05, "loss": 1.0381, "step": 5472 }, { "epoch": 17.944262295081966, "grad_norm": 9.517597198486328, "learning_rate": 1.885136950991066e-05, "loss": 1.2881, "step": 5473 }, { "epoch": 17.947540983606558, "grad_norm": 6.722975730895996, "learning_rate": 1.8850875331485996e-05, "loss": 1.0305, "step": 5474 }, { "epoch": 17.950819672131146, "grad_norm": 5.962669372558594, "learning_rate": 1.8850381053258507e-05, "loss": 1.2197, "step": 5475 }, { "epoch": 17.95409836065574, "grad_norm": 7.400661945343018, "learning_rate": 1.8849886675233777e-05, "loss": 1.1649, "step": 5476 }, { "epoch": 17.957377049180327, "grad_norm": 9.734167098999023, "learning_rate": 1.8849392197417367e-05, "loss": 1.304, "step": 5477 }, { "epoch": 17.96065573770492, "grad_norm": 7.503365993499756, "learning_rate": 1.884889761981486e-05, "loss": 1.1506, "step": 5478 }, { "epoch": 17.963934426229507, "grad_norm": 7.471345901489258, "learning_rate": 1.884840294243183e-05, "loss": 1.0975, "step": 5479 }, { "epoch": 17.9672131147541, "grad_norm": 6.176530838012695, "learning_rate": 1.884790816527386e-05, "loss": 1.2996, "step": 5480 }, { "epoch": 17.970491803278687, "grad_norm": 8.077680587768555, "learning_rate": 1.8847413288346526e-05, "loss": 0.9696, "step": 5481 }, { "epoch": 17.97377049180328, "grad_norm": 8.722846984863281, "learning_rate": 1.8846918311655405e-05, "loss": 1.1077, "step": 5482 }, { "epoch": 17.977049180327867, "grad_norm": 5.662355422973633, "learning_rate": 1.8846423235206087e-05, "loss": 1.2545, "step": 5483 }, { "epoch": 17.98032786885246, "grad_norm": 7.1779656410217285, "learning_rate": 1.8845928059004144e-05, "loss": 1.0201, "step": 5484 }, { "epoch": 17.983606557377048, "grad_norm": 6.114141464233398, "learning_rate": 1.8845432783055165e-05, "loss": 1.2699, "step": 5485 }, { "epoch": 17.98688524590164, "grad_norm": 7.327999591827393, "learning_rate": 1.8844937407364735e-05, "loss": 1.057, "step": 5486 }, { "epoch": 17.990163934426228, "grad_norm": 9.251521110534668, "learning_rate": 1.884444193193844e-05, "loss": 1.0648, "step": 5487 }, { "epoch": 17.99344262295082, "grad_norm": 5.646684646606445, "learning_rate": 1.8843946356781865e-05, "loss": 1.2822, "step": 5488 }, { "epoch": 17.99672131147541, "grad_norm": 7.406488418579102, "learning_rate": 1.8843450681900605e-05, "loss": 1.209, "step": 5489 }, { "epoch": 18.0, "grad_norm": 6.227457046508789, "learning_rate": 1.8842954907300236e-05, "loss": 0.924, "step": 5490 }, { "epoch": 18.003278688524592, "grad_norm": 9.522916793823242, "learning_rate": 1.884245903298636e-05, "loss": 0.9527, "step": 5491 }, { "epoch": 18.00655737704918, "grad_norm": 10.578934669494629, "learning_rate": 1.8841963058964564e-05, "loss": 0.9904, "step": 5492 }, { "epoch": 18.009836065573772, "grad_norm": 6.416295528411865, "learning_rate": 1.8841466985240443e-05, "loss": 1.0061, "step": 5493 }, { "epoch": 18.01311475409836, "grad_norm": 5.825777530670166, "learning_rate": 1.8840970811819588e-05, "loss": 1.1027, "step": 5494 }, { "epoch": 18.016393442622952, "grad_norm": 6.274228096008301, "learning_rate": 1.884047453870759e-05, "loss": 1.0267, "step": 5495 }, { "epoch": 18.01967213114754, "grad_norm": 6.694442272186279, "learning_rate": 1.8839978165910057e-05, "loss": 1.1909, "step": 5496 }, { "epoch": 18.022950819672133, "grad_norm": 5.8979668617248535, "learning_rate": 1.8839481693432575e-05, "loss": 0.9552, "step": 5497 }, { "epoch": 18.02622950819672, "grad_norm": 7.6879754066467285, "learning_rate": 1.8838985121280745e-05, "loss": 1.0261, "step": 5498 }, { "epoch": 18.029508196721313, "grad_norm": 6.716588497161865, "learning_rate": 1.883848844946017e-05, "loss": 1.1276, "step": 5499 }, { "epoch": 18.0327868852459, "grad_norm": 7.0301103591918945, "learning_rate": 1.8837991677976447e-05, "loss": 1.0891, "step": 5500 }, { "epoch": 18.036065573770493, "grad_norm": 7.93990421295166, "learning_rate": 1.883749480683518e-05, "loss": 0.9832, "step": 5501 }, { "epoch": 18.03934426229508, "grad_norm": 10.743792533874512, "learning_rate": 1.8836997836041968e-05, "loss": 1.0206, "step": 5502 }, { "epoch": 18.042622950819673, "grad_norm": 6.201620101928711, "learning_rate": 1.883650076560242e-05, "loss": 1.1458, "step": 5503 }, { "epoch": 18.04590163934426, "grad_norm": 6.020015239715576, "learning_rate": 1.8836003595522135e-05, "loss": 1.0657, "step": 5504 }, { "epoch": 18.049180327868854, "grad_norm": 7.802656173706055, "learning_rate": 1.8835506325806726e-05, "loss": 0.9829, "step": 5505 }, { "epoch": 18.052459016393442, "grad_norm": 17.84089469909668, "learning_rate": 1.8835008956461795e-05, "loss": 1.1339, "step": 5506 }, { "epoch": 18.055737704918034, "grad_norm": 5.313963413238525, "learning_rate": 1.883451148749295e-05, "loss": 1.2166, "step": 5507 }, { "epoch": 18.059016393442622, "grad_norm": 7.722029209136963, "learning_rate": 1.8834013918905805e-05, "loss": 1.1309, "step": 5508 }, { "epoch": 18.062295081967214, "grad_norm": 5.3284735679626465, "learning_rate": 1.8833516250705966e-05, "loss": 1.161, "step": 5509 }, { "epoch": 18.065573770491802, "grad_norm": 7.572501182556152, "learning_rate": 1.8833018482899047e-05, "loss": 0.9963, "step": 5510 }, { "epoch": 18.068852459016394, "grad_norm": 6.242968559265137, "learning_rate": 1.883252061549066e-05, "loss": 0.9215, "step": 5511 }, { "epoch": 18.072131147540983, "grad_norm": 5.929424285888672, "learning_rate": 1.883202264848642e-05, "loss": 1.124, "step": 5512 }, { "epoch": 18.075409836065575, "grad_norm": 6.780281066894531, "learning_rate": 1.8831524581891946e-05, "loss": 1.1964, "step": 5513 }, { "epoch": 18.078688524590163, "grad_norm": 5.358807563781738, "learning_rate": 1.8831026415712845e-05, "loss": 1.3834, "step": 5514 }, { "epoch": 18.081967213114755, "grad_norm": 6.378330707550049, "learning_rate": 1.883052814995474e-05, "loss": 1.0145, "step": 5515 }, { "epoch": 18.085245901639343, "grad_norm": 6.062604904174805, "learning_rate": 1.8830029784623248e-05, "loss": 1.0378, "step": 5516 }, { "epoch": 18.088524590163935, "grad_norm": 5.9636945724487305, "learning_rate": 1.8829531319723992e-05, "loss": 1.2488, "step": 5517 }, { "epoch": 18.091803278688523, "grad_norm": 6.288236618041992, "learning_rate": 1.8829032755262585e-05, "loss": 1.05, "step": 5518 }, { "epoch": 18.095081967213115, "grad_norm": 6.9078826904296875, "learning_rate": 1.882853409124466e-05, "loss": 0.9395, "step": 5519 }, { "epoch": 18.098360655737704, "grad_norm": 8.366808891296387, "learning_rate": 1.882803532767583e-05, "loss": 1.0237, "step": 5520 }, { "epoch": 18.101639344262296, "grad_norm": 6.8149638175964355, "learning_rate": 1.8827536464561726e-05, "loss": 1.0032, "step": 5521 }, { "epoch": 18.104918032786884, "grad_norm": 8.402849197387695, "learning_rate": 1.8827037501907966e-05, "loss": 0.8817, "step": 5522 }, { "epoch": 18.108196721311476, "grad_norm": 6.066958427429199, "learning_rate": 1.882653843972018e-05, "loss": 1.1833, "step": 5523 }, { "epoch": 18.111475409836064, "grad_norm": 5.611450672149658, "learning_rate": 1.8826039278004e-05, "loss": 0.9445, "step": 5524 }, { "epoch": 18.114754098360656, "grad_norm": 7.380919933319092, "learning_rate": 1.8825540016765046e-05, "loss": 1.1943, "step": 5525 }, { "epoch": 18.118032786885244, "grad_norm": 7.146297931671143, "learning_rate": 1.8825040656008955e-05, "loss": 0.9666, "step": 5526 }, { "epoch": 18.121311475409836, "grad_norm": 5.294835090637207, "learning_rate": 1.8824541195741353e-05, "loss": 0.9764, "step": 5527 }, { "epoch": 18.124590163934425, "grad_norm": 8.35687255859375, "learning_rate": 1.8824041635967875e-05, "loss": 1.2666, "step": 5528 }, { "epoch": 18.127868852459017, "grad_norm": 5.828364372253418, "learning_rate": 1.8823541976694155e-05, "loss": 1.1829, "step": 5529 }, { "epoch": 18.131147540983605, "grad_norm": 6.007293224334717, "learning_rate": 1.8823042217925823e-05, "loss": 0.8795, "step": 5530 }, { "epoch": 18.134426229508197, "grad_norm": 5.967752456665039, "learning_rate": 1.8822542359668515e-05, "loss": 1.1525, "step": 5531 }, { "epoch": 18.137704918032785, "grad_norm": 6.3838276863098145, "learning_rate": 1.882204240192787e-05, "loss": 1.1167, "step": 5532 }, { "epoch": 18.140983606557377, "grad_norm": 5.624703407287598, "learning_rate": 1.8821542344709525e-05, "loss": 1.0488, "step": 5533 }, { "epoch": 18.14426229508197, "grad_norm": 6.336958408355713, "learning_rate": 1.8821042188019115e-05, "loss": 0.9709, "step": 5534 }, { "epoch": 18.147540983606557, "grad_norm": 6.073372840881348, "learning_rate": 1.8820541931862287e-05, "loss": 1.2603, "step": 5535 }, { "epoch": 18.15081967213115, "grad_norm": 5.909428119659424, "learning_rate": 1.8820041576244678e-05, "loss": 1.134, "step": 5536 }, { "epoch": 18.154098360655738, "grad_norm": 6.053889274597168, "learning_rate": 1.8819541121171926e-05, "loss": 1.0316, "step": 5537 }, { "epoch": 18.15737704918033, "grad_norm": 9.096320152282715, "learning_rate": 1.8819040566649677e-05, "loss": 1.2073, "step": 5538 }, { "epoch": 18.160655737704918, "grad_norm": 5.727476596832275, "learning_rate": 1.8818539912683577e-05, "loss": 1.1783, "step": 5539 }, { "epoch": 18.16393442622951, "grad_norm": 6.787683010101318, "learning_rate": 1.8818039159279272e-05, "loss": 1.2886, "step": 5540 }, { "epoch": 18.167213114754098, "grad_norm": 5.299698352813721, "learning_rate": 1.881753830644241e-05, "loss": 1.2134, "step": 5541 }, { "epoch": 18.17049180327869, "grad_norm": 5.872164249420166, "learning_rate": 1.881703735417863e-05, "loss": 0.9635, "step": 5542 }, { "epoch": 18.17377049180328, "grad_norm": 6.2763671875, "learning_rate": 1.8816536302493586e-05, "loss": 1.1962, "step": 5543 }, { "epoch": 18.17704918032787, "grad_norm": 6.288082122802734, "learning_rate": 1.8816035151392933e-05, "loss": 0.9134, "step": 5544 }, { "epoch": 18.18032786885246, "grad_norm": 4.824095249176025, "learning_rate": 1.8815533900882315e-05, "loss": 1.1779, "step": 5545 }, { "epoch": 18.18360655737705, "grad_norm": 6.730475902557373, "learning_rate": 1.8815032550967386e-05, "loss": 1.0939, "step": 5546 }, { "epoch": 18.18688524590164, "grad_norm": 6.442265033721924, "learning_rate": 1.8814531101653802e-05, "loss": 0.9994, "step": 5547 }, { "epoch": 18.19016393442623, "grad_norm": 7.188826084136963, "learning_rate": 1.8814029552947213e-05, "loss": 0.903, "step": 5548 }, { "epoch": 18.19344262295082, "grad_norm": 5.699820041656494, "learning_rate": 1.8813527904853276e-05, "loss": 0.9519, "step": 5549 }, { "epoch": 18.19672131147541, "grad_norm": 5.631203651428223, "learning_rate": 1.881302615737765e-05, "loss": 0.9933, "step": 5550 }, { "epoch": 18.2, "grad_norm": 6.63300895690918, "learning_rate": 1.881252431052599e-05, "loss": 0.9976, "step": 5551 }, { "epoch": 18.20327868852459, "grad_norm": 6.740891456604004, "learning_rate": 1.8812022364303953e-05, "loss": 1.1614, "step": 5552 }, { "epoch": 18.20655737704918, "grad_norm": 12.799420356750488, "learning_rate": 1.8811520318717206e-05, "loss": 0.8977, "step": 5553 }, { "epoch": 18.20983606557377, "grad_norm": 5.518776893615723, "learning_rate": 1.8811018173771404e-05, "loss": 1.1879, "step": 5554 }, { "epoch": 18.21311475409836, "grad_norm": 35.81496047973633, "learning_rate": 1.881051592947221e-05, "loss": 1.2495, "step": 5555 }, { "epoch": 18.21639344262295, "grad_norm": 5.12368631362915, "learning_rate": 1.881001358582529e-05, "loss": 1.1843, "step": 5556 }, { "epoch": 18.21967213114754, "grad_norm": 6.58686637878418, "learning_rate": 1.8809511142836304e-05, "loss": 1.1464, "step": 5557 }, { "epoch": 18.222950819672132, "grad_norm": 5.989483833312988, "learning_rate": 1.8809008600510924e-05, "loss": 1.1812, "step": 5558 }, { "epoch": 18.22622950819672, "grad_norm": 5.892103672027588, "learning_rate": 1.880850595885481e-05, "loss": 1.1213, "step": 5559 }, { "epoch": 18.229508196721312, "grad_norm": 5.357776165008545, "learning_rate": 1.8808003217873633e-05, "loss": 0.9836, "step": 5560 }, { "epoch": 18.2327868852459, "grad_norm": 5.931318283081055, "learning_rate": 1.8807500377573063e-05, "loss": 1.2012, "step": 5561 }, { "epoch": 18.236065573770492, "grad_norm": 9.12777328491211, "learning_rate": 1.8806997437958768e-05, "loss": 1.0741, "step": 5562 }, { "epoch": 18.23934426229508, "grad_norm": 5.9097113609313965, "learning_rate": 1.8806494399036422e-05, "loss": 1.0491, "step": 5563 }, { "epoch": 18.242622950819673, "grad_norm": 4.711833477020264, "learning_rate": 1.880599126081169e-05, "loss": 1.2051, "step": 5564 }, { "epoch": 18.24590163934426, "grad_norm": 7.111276149749756, "learning_rate": 1.8805488023290254e-05, "loss": 1.1846, "step": 5565 }, { "epoch": 18.249180327868853, "grad_norm": 7.161867141723633, "learning_rate": 1.8804984686477788e-05, "loss": 1.0447, "step": 5566 }, { "epoch": 18.25245901639344, "grad_norm": 6.330655574798584, "learning_rate": 1.8804481250379962e-05, "loss": 1.0603, "step": 5567 }, { "epoch": 18.255737704918033, "grad_norm": 8.497854232788086, "learning_rate": 1.8803977715002454e-05, "loss": 0.8649, "step": 5568 }, { "epoch": 18.25901639344262, "grad_norm": 6.2357354164123535, "learning_rate": 1.8803474080350944e-05, "loss": 1.027, "step": 5569 }, { "epoch": 18.262295081967213, "grad_norm": 7.691125869750977, "learning_rate": 1.880297034643111e-05, "loss": 1.1672, "step": 5570 }, { "epoch": 18.2655737704918, "grad_norm": 8.045016288757324, "learning_rate": 1.8802466513248635e-05, "loss": 1.177, "step": 5571 }, { "epoch": 18.268852459016394, "grad_norm": 8.803242683410645, "learning_rate": 1.8801962580809198e-05, "loss": 1.3311, "step": 5572 }, { "epoch": 18.272131147540982, "grad_norm": 8.328344345092773, "learning_rate": 1.8801458549118478e-05, "loss": 0.9402, "step": 5573 }, { "epoch": 18.275409836065574, "grad_norm": 6.073459625244141, "learning_rate": 1.880095441818216e-05, "loss": 1.203, "step": 5574 }, { "epoch": 18.278688524590162, "grad_norm": 6.346359729766846, "learning_rate": 1.8800450188005936e-05, "loss": 1.377, "step": 5575 }, { "epoch": 18.281967213114754, "grad_norm": 6.089292049407959, "learning_rate": 1.879994585859548e-05, "loss": 1.0748, "step": 5576 }, { "epoch": 18.285245901639342, "grad_norm": 6.038885593414307, "learning_rate": 1.879944142995649e-05, "loss": 0.8821, "step": 5577 }, { "epoch": 18.288524590163934, "grad_norm": 6.2229814529418945, "learning_rate": 1.8798936902094648e-05, "loss": 1.0054, "step": 5578 }, { "epoch": 18.291803278688526, "grad_norm": 5.716176986694336, "learning_rate": 1.8798432275015644e-05, "loss": 1.1211, "step": 5579 }, { "epoch": 18.295081967213115, "grad_norm": 7.580899715423584, "learning_rate": 1.8797927548725164e-05, "loss": 1.1875, "step": 5580 }, { "epoch": 18.298360655737707, "grad_norm": 6.690877914428711, "learning_rate": 1.8797422723228908e-05, "loss": 1.1038, "step": 5581 }, { "epoch": 18.301639344262295, "grad_norm": 5.232478141784668, "learning_rate": 1.8796917798532562e-05, "loss": 1.0981, "step": 5582 }, { "epoch": 18.304918032786887, "grad_norm": 12.791223526000977, "learning_rate": 1.879641277464182e-05, "loss": 1.0576, "step": 5583 }, { "epoch": 18.308196721311475, "grad_norm": 6.205768585205078, "learning_rate": 1.8795907651562378e-05, "loss": 1.1943, "step": 5584 }, { "epoch": 18.311475409836067, "grad_norm": 9.741069793701172, "learning_rate": 1.8795402429299935e-05, "loss": 1.0733, "step": 5585 }, { "epoch": 18.314754098360655, "grad_norm": 5.883788108825684, "learning_rate": 1.8794897107860183e-05, "loss": 1.1823, "step": 5586 }, { "epoch": 18.318032786885247, "grad_norm": 5.584466934204102, "learning_rate": 1.8794391687248824e-05, "loss": 1.1798, "step": 5587 }, { "epoch": 18.321311475409836, "grad_norm": 5.85667610168457, "learning_rate": 1.8793886167471548e-05, "loss": 1.2559, "step": 5588 }, { "epoch": 18.324590163934428, "grad_norm": 6.807621479034424, "learning_rate": 1.879338054853407e-05, "loss": 1.1891, "step": 5589 }, { "epoch": 18.327868852459016, "grad_norm": 5.939558506011963, "learning_rate": 1.879287483044208e-05, "loss": 1.1084, "step": 5590 }, { "epoch": 18.331147540983608, "grad_norm": 5.255509376525879, "learning_rate": 1.879236901320128e-05, "loss": 1.1888, "step": 5591 }, { "epoch": 18.334426229508196, "grad_norm": 15.715373039245605, "learning_rate": 1.8791863096817385e-05, "loss": 1.1462, "step": 5592 }, { "epoch": 18.337704918032788, "grad_norm": 6.535693645477295, "learning_rate": 1.8791357081296088e-05, "loss": 1.1244, "step": 5593 }, { "epoch": 18.340983606557376, "grad_norm": 5.901676177978516, "learning_rate": 1.87908509666431e-05, "loss": 1.1187, "step": 5594 }, { "epoch": 18.34426229508197, "grad_norm": 6.182778358459473, "learning_rate": 1.8790344752864126e-05, "loss": 0.9877, "step": 5595 }, { "epoch": 18.347540983606557, "grad_norm": 6.992114067077637, "learning_rate": 1.8789838439964876e-05, "loss": 0.9734, "step": 5596 }, { "epoch": 18.35081967213115, "grad_norm": 6.113780498504639, "learning_rate": 1.8789332027951058e-05, "loss": 1.0706, "step": 5597 }, { "epoch": 18.354098360655737, "grad_norm": 5.085817337036133, "learning_rate": 1.8788825516828385e-05, "loss": 1.2329, "step": 5598 }, { "epoch": 18.35737704918033, "grad_norm": 5.431139945983887, "learning_rate": 1.8788318906602566e-05, "loss": 1.1426, "step": 5599 }, { "epoch": 18.360655737704917, "grad_norm": 6.428829193115234, "learning_rate": 1.8787812197279313e-05, "loss": 1.0775, "step": 5600 }, { "epoch": 18.36393442622951, "grad_norm": 5.697199821472168, "learning_rate": 1.8787305388864338e-05, "loss": 0.9891, "step": 5601 }, { "epoch": 18.367213114754097, "grad_norm": 6.924066066741943, "learning_rate": 1.878679848136336e-05, "loss": 1.3569, "step": 5602 }, { "epoch": 18.37049180327869, "grad_norm": 7.442922115325928, "learning_rate": 1.8786291474782095e-05, "loss": 1.0388, "step": 5603 }, { "epoch": 18.373770491803278, "grad_norm": 4.805959224700928, "learning_rate": 1.8785784369126254e-05, "loss": 1.1992, "step": 5604 }, { "epoch": 18.37704918032787, "grad_norm": 5.948963165283203, "learning_rate": 1.878527716440156e-05, "loss": 1.2126, "step": 5605 }, { "epoch": 18.380327868852458, "grad_norm": 5.6797356605529785, "learning_rate": 1.8784769860613737e-05, "loss": 1.0867, "step": 5606 }, { "epoch": 18.38360655737705, "grad_norm": 5.335916996002197, "learning_rate": 1.8784262457768496e-05, "loss": 1.089, "step": 5607 }, { "epoch": 18.386885245901638, "grad_norm": 6.5361433029174805, "learning_rate": 1.8783754955871564e-05, "loss": 1.113, "step": 5608 }, { "epoch": 18.39016393442623, "grad_norm": 5.355642318725586, "learning_rate": 1.878324735492866e-05, "loss": 1.1354, "step": 5609 }, { "epoch": 18.39344262295082, "grad_norm": 8.49654483795166, "learning_rate": 1.878273965494551e-05, "loss": 1.0522, "step": 5610 }, { "epoch": 18.39672131147541, "grad_norm": 5.137337684631348, "learning_rate": 1.878223185592784e-05, "loss": 0.9996, "step": 5611 }, { "epoch": 18.4, "grad_norm": 7.544824123382568, "learning_rate": 1.8781723957881374e-05, "loss": 1.2183, "step": 5612 }, { "epoch": 18.40327868852459, "grad_norm": 6.556053638458252, "learning_rate": 1.8781215960811837e-05, "loss": 0.9703, "step": 5613 }, { "epoch": 18.40655737704918, "grad_norm": 6.6269330978393555, "learning_rate": 1.8780707864724966e-05, "loss": 1.2123, "step": 5614 }, { "epoch": 18.40983606557377, "grad_norm": 5.8381757736206055, "learning_rate": 1.878019966962648e-05, "loss": 1.1575, "step": 5615 }, { "epoch": 18.41311475409836, "grad_norm": 5.37737512588501, "learning_rate": 1.8779691375522114e-05, "loss": 1.2822, "step": 5616 }, { "epoch": 18.41639344262295, "grad_norm": 5.883443832397461, "learning_rate": 1.87791829824176e-05, "loss": 1.0353, "step": 5617 }, { "epoch": 18.41967213114754, "grad_norm": 7.0339131355285645, "learning_rate": 1.8778674490318668e-05, "loss": 1.1733, "step": 5618 }, { "epoch": 18.42295081967213, "grad_norm": 6.272650718688965, "learning_rate": 1.8778165899231055e-05, "loss": 1.1622, "step": 5619 }, { "epoch": 18.42622950819672, "grad_norm": 5.728464126586914, "learning_rate": 1.8777657209160492e-05, "loss": 1.2516, "step": 5620 }, { "epoch": 18.42950819672131, "grad_norm": 5.7654876708984375, "learning_rate": 1.877714842011272e-05, "loss": 1.145, "step": 5621 }, { "epoch": 18.432786885245903, "grad_norm": 4.429052352905273, "learning_rate": 1.8776639532093476e-05, "loss": 1.3799, "step": 5622 }, { "epoch": 18.43606557377049, "grad_norm": 6.876945972442627, "learning_rate": 1.877613054510849e-05, "loss": 1.2314, "step": 5623 }, { "epoch": 18.439344262295084, "grad_norm": 8.658990859985352, "learning_rate": 1.8775621459163514e-05, "loss": 1.2788, "step": 5624 }, { "epoch": 18.442622950819672, "grad_norm": 6.789599418640137, "learning_rate": 1.8775112274264276e-05, "loss": 1.1465, "step": 5625 }, { "epoch": 18.445901639344264, "grad_norm": 5.396176338195801, "learning_rate": 1.8774602990416527e-05, "loss": 1.113, "step": 5626 }, { "epoch": 18.449180327868852, "grad_norm": 5.426639080047607, "learning_rate": 1.8774093607626005e-05, "loss": 1.0471, "step": 5627 }, { "epoch": 18.452459016393444, "grad_norm": 9.205546379089355, "learning_rate": 1.8773584125898453e-05, "loss": 1.0684, "step": 5628 }, { "epoch": 18.455737704918032, "grad_norm": 4.807228088378906, "learning_rate": 1.8773074545239618e-05, "loss": 1.3651, "step": 5629 }, { "epoch": 18.459016393442624, "grad_norm": 5.700998783111572, "learning_rate": 1.8772564865655246e-05, "loss": 1.0469, "step": 5630 }, { "epoch": 18.462295081967213, "grad_norm": 8.903441429138184, "learning_rate": 1.8772055087151087e-05, "loss": 1.1053, "step": 5631 }, { "epoch": 18.465573770491805, "grad_norm": 6.216640949249268, "learning_rate": 1.877154520973288e-05, "loss": 1.201, "step": 5632 }, { "epoch": 18.468852459016393, "grad_norm": 6.902214527130127, "learning_rate": 1.8771035233406386e-05, "loss": 1.2222, "step": 5633 }, { "epoch": 18.472131147540985, "grad_norm": 6.242872714996338, "learning_rate": 1.877052515817735e-05, "loss": 1.0009, "step": 5634 }, { "epoch": 18.475409836065573, "grad_norm": 7.386353969573975, "learning_rate": 1.8770014984051524e-05, "loss": 1.0266, "step": 5635 }, { "epoch": 18.478688524590165, "grad_norm": 7.166576862335205, "learning_rate": 1.876950471103466e-05, "loss": 1.24, "step": 5636 }, { "epoch": 18.481967213114753, "grad_norm": 7.500884532928467, "learning_rate": 1.876899433913251e-05, "loss": 1.1842, "step": 5637 }, { "epoch": 18.485245901639345, "grad_norm": 6.449544429779053, "learning_rate": 1.876848386835083e-05, "loss": 1.2678, "step": 5638 }, { "epoch": 18.488524590163934, "grad_norm": 5.553869724273682, "learning_rate": 1.8767973298695384e-05, "loss": 1.1219, "step": 5639 }, { "epoch": 18.491803278688526, "grad_norm": 6.245901107788086, "learning_rate": 1.876746263017192e-05, "loss": 1.0625, "step": 5640 }, { "epoch": 18.495081967213114, "grad_norm": 5.952173709869385, "learning_rate": 1.8766951862786196e-05, "loss": 1.0594, "step": 5641 }, { "epoch": 18.498360655737706, "grad_norm": 5.164961338043213, "learning_rate": 1.8766440996543976e-05, "loss": 1.2526, "step": 5642 }, { "epoch": 18.501639344262294, "grad_norm": 5.1480712890625, "learning_rate": 1.876593003145102e-05, "loss": 0.9391, "step": 5643 }, { "epoch": 18.504918032786886, "grad_norm": 9.22033977508545, "learning_rate": 1.8765418967513085e-05, "loss": 0.9663, "step": 5644 }, { "epoch": 18.508196721311474, "grad_norm": 5.592316627502441, "learning_rate": 1.876490780473594e-05, "loss": 1.1608, "step": 5645 }, { "epoch": 18.511475409836066, "grad_norm": 5.921152591705322, "learning_rate": 1.876439654312535e-05, "loss": 1.0966, "step": 5646 }, { "epoch": 18.514754098360655, "grad_norm": 5.443417072296143, "learning_rate": 1.876388518268707e-05, "loss": 1.0913, "step": 5647 }, { "epoch": 18.518032786885247, "grad_norm": 6.376317977905273, "learning_rate": 1.8763373723426873e-05, "loss": 1.2856, "step": 5648 }, { "epoch": 18.521311475409835, "grad_norm": 6.0001606941223145, "learning_rate": 1.876286216535053e-05, "loss": 1.1101, "step": 5649 }, { "epoch": 18.524590163934427, "grad_norm": 5.625040054321289, "learning_rate": 1.8762350508463804e-05, "loss": 1.2954, "step": 5650 }, { "epoch": 18.527868852459015, "grad_norm": 11.370407104492188, "learning_rate": 1.8761838752772462e-05, "loss": 1.0002, "step": 5651 }, { "epoch": 18.531147540983607, "grad_norm": 5.266855716705322, "learning_rate": 1.8761326898282283e-05, "loss": 1.2136, "step": 5652 }, { "epoch": 18.534426229508195, "grad_norm": 5.346222877502441, "learning_rate": 1.876081494499903e-05, "loss": 1.0322, "step": 5653 }, { "epoch": 18.537704918032787, "grad_norm": 5.916358470916748, "learning_rate": 1.876030289292848e-05, "loss": 1.1318, "step": 5654 }, { "epoch": 18.540983606557376, "grad_norm": 5.314289093017578, "learning_rate": 1.8759790742076406e-05, "loss": 1.3401, "step": 5655 }, { "epoch": 18.544262295081968, "grad_norm": 6.558027267456055, "learning_rate": 1.8759278492448586e-05, "loss": 1.0217, "step": 5656 }, { "epoch": 18.547540983606556, "grad_norm": 5.63638973236084, "learning_rate": 1.875876614405079e-05, "loss": 0.9268, "step": 5657 }, { "epoch": 18.550819672131148, "grad_norm": 5.178272247314453, "learning_rate": 1.8758253696888803e-05, "loss": 1.1611, "step": 5658 }, { "epoch": 18.554098360655736, "grad_norm": 7.153955936431885, "learning_rate": 1.8757741150968397e-05, "loss": 0.9875, "step": 5659 }, { "epoch": 18.557377049180328, "grad_norm": 6.608218669891357, "learning_rate": 1.8757228506295354e-05, "loss": 1.1185, "step": 5660 }, { "epoch": 18.560655737704916, "grad_norm": 7.307682037353516, "learning_rate": 1.8756715762875454e-05, "loss": 1.2802, "step": 5661 }, { "epoch": 18.56393442622951, "grad_norm": 5.42232084274292, "learning_rate": 1.875620292071448e-05, "loss": 1.1442, "step": 5662 }, { "epoch": 18.567213114754097, "grad_norm": 5.715123653411865, "learning_rate": 1.8755689979818214e-05, "loss": 1.0469, "step": 5663 }, { "epoch": 18.57049180327869, "grad_norm": 7.072237491607666, "learning_rate": 1.875517694019244e-05, "loss": 1.0825, "step": 5664 }, { "epoch": 18.57377049180328, "grad_norm": 6.55800199508667, "learning_rate": 1.875466380184294e-05, "loss": 0.9202, "step": 5665 }, { "epoch": 18.57704918032787, "grad_norm": 5.750311851501465, "learning_rate": 1.8754150564775505e-05, "loss": 1.1521, "step": 5666 }, { "epoch": 18.58032786885246, "grad_norm": 10.841290473937988, "learning_rate": 1.875363722899592e-05, "loss": 0.9641, "step": 5667 }, { "epoch": 18.58360655737705, "grad_norm": 4.743048191070557, "learning_rate": 1.8753123794509974e-05, "loss": 1.316, "step": 5668 }, { "epoch": 18.58688524590164, "grad_norm": 6.0309553146362305, "learning_rate": 1.875261026132346e-05, "loss": 1.1743, "step": 5669 }, { "epoch": 18.59016393442623, "grad_norm": 7.087132453918457, "learning_rate": 1.875209662944216e-05, "loss": 1.1011, "step": 5670 }, { "epoch": 18.59344262295082, "grad_norm": 6.631762504577637, "learning_rate": 1.875158289887187e-05, "loss": 1.0514, "step": 5671 }, { "epoch": 18.59672131147541, "grad_norm": 7.551165580749512, "learning_rate": 1.875106906961839e-05, "loss": 1.0413, "step": 5672 }, { "epoch": 18.6, "grad_norm": 7.223155975341797, "learning_rate": 1.87505551416875e-05, "loss": 0.9629, "step": 5673 }, { "epoch": 18.60327868852459, "grad_norm": 5.455851078033447, "learning_rate": 1.8750041115085005e-05, "loss": 0.8251, "step": 5674 }, { "epoch": 18.60655737704918, "grad_norm": 5.768228054046631, "learning_rate": 1.87495269898167e-05, "loss": 0.9141, "step": 5675 }, { "epoch": 18.60983606557377, "grad_norm": 5.672031402587891, "learning_rate": 1.874901276588838e-05, "loss": 0.9892, "step": 5676 }, { "epoch": 18.613114754098362, "grad_norm": 5.739456653594971, "learning_rate": 1.8748498443305846e-05, "loss": 1.3528, "step": 5677 }, { "epoch": 18.61639344262295, "grad_norm": 6.555481433868408, "learning_rate": 1.8747984022074892e-05, "loss": 1.0858, "step": 5678 }, { "epoch": 18.619672131147542, "grad_norm": 5.341215133666992, "learning_rate": 1.8747469502201326e-05, "loss": 1.0789, "step": 5679 }, { "epoch": 18.62295081967213, "grad_norm": 10.523560523986816, "learning_rate": 1.8746954883690944e-05, "loss": 1.0811, "step": 5680 }, { "epoch": 18.626229508196722, "grad_norm": 8.748462677001953, "learning_rate": 1.8746440166549554e-05, "loss": 0.9447, "step": 5681 }, { "epoch": 18.62950819672131, "grad_norm": 5.678058624267578, "learning_rate": 1.8745925350782955e-05, "loss": 1.0039, "step": 5682 }, { "epoch": 18.632786885245903, "grad_norm": 6.085732460021973, "learning_rate": 1.8745410436396954e-05, "loss": 1.0355, "step": 5683 }, { "epoch": 18.63606557377049, "grad_norm": 6.051661491394043, "learning_rate": 1.8744895423397354e-05, "loss": 1.0992, "step": 5684 }, { "epoch": 18.639344262295083, "grad_norm": 6.870983123779297, "learning_rate": 1.8744380311789973e-05, "loss": 1.1638, "step": 5685 }, { "epoch": 18.64262295081967, "grad_norm": 5.810033798217773, "learning_rate": 1.874386510158061e-05, "loss": 1.037, "step": 5686 }, { "epoch": 18.645901639344263, "grad_norm": 6.61976432800293, "learning_rate": 1.8743349792775074e-05, "loss": 1.1888, "step": 5687 }, { "epoch": 18.64918032786885, "grad_norm": 6.0462965965271, "learning_rate": 1.8742834385379177e-05, "loss": 1.0159, "step": 5688 }, { "epoch": 18.652459016393443, "grad_norm": 7.164778709411621, "learning_rate": 1.8742318879398738e-05, "loss": 1.0389, "step": 5689 }, { "epoch": 18.65573770491803, "grad_norm": 6.517034530639648, "learning_rate": 1.8741803274839558e-05, "loss": 1.0095, "step": 5690 }, { "epoch": 18.659016393442624, "grad_norm": 7.425522804260254, "learning_rate": 1.874128757170746e-05, "loss": 1.1323, "step": 5691 }, { "epoch": 18.662295081967212, "grad_norm": 5.275187015533447, "learning_rate": 1.8740771770008256e-05, "loss": 1.1362, "step": 5692 }, { "epoch": 18.665573770491804, "grad_norm": 6.173318862915039, "learning_rate": 1.874025586974776e-05, "loss": 1.2461, "step": 5693 }, { "epoch": 18.668852459016392, "grad_norm": 5.099038600921631, "learning_rate": 1.873973987093179e-05, "loss": 1.1775, "step": 5694 }, { "epoch": 18.672131147540984, "grad_norm": 6.216764450073242, "learning_rate": 1.8739223773566173e-05, "loss": 0.9199, "step": 5695 }, { "epoch": 18.675409836065572, "grad_norm": 13.094006538391113, "learning_rate": 1.8738707577656717e-05, "loss": 1.2385, "step": 5696 }, { "epoch": 18.678688524590164, "grad_norm": 6.318499565124512, "learning_rate": 1.873819128320925e-05, "loss": 1.3311, "step": 5697 }, { "epoch": 18.681967213114753, "grad_norm": 5.830702781677246, "learning_rate": 1.8737674890229585e-05, "loss": 1.1487, "step": 5698 }, { "epoch": 18.685245901639345, "grad_norm": 5.385848045349121, "learning_rate": 1.8737158398723558e-05, "loss": 1.2129, "step": 5699 }, { "epoch": 18.688524590163933, "grad_norm": 6.004749298095703, "learning_rate": 1.873664180869698e-05, "loss": 1.261, "step": 5700 }, { "epoch": 18.691803278688525, "grad_norm": 6.574706554412842, "learning_rate": 1.8736125120155683e-05, "loss": 0.9551, "step": 5701 }, { "epoch": 18.695081967213113, "grad_norm": 5.908836364746094, "learning_rate": 1.873560833310549e-05, "loss": 1.186, "step": 5702 }, { "epoch": 18.698360655737705, "grad_norm": 5.232239246368408, "learning_rate": 1.8735091447552235e-05, "loss": 1.0763, "step": 5703 }, { "epoch": 18.701639344262293, "grad_norm": 5.843413352966309, "learning_rate": 1.873457446350174e-05, "loss": 0.9249, "step": 5704 }, { "epoch": 18.704918032786885, "grad_norm": 5.781546115875244, "learning_rate": 1.8734057380959834e-05, "loss": 1.0691, "step": 5705 }, { "epoch": 18.708196721311474, "grad_norm": 5.399068355560303, "learning_rate": 1.873354019993235e-05, "loss": 1.1039, "step": 5706 }, { "epoch": 18.711475409836066, "grad_norm": 6.384198188781738, "learning_rate": 1.8733022920425125e-05, "loss": 1.0756, "step": 5707 }, { "epoch": 18.714754098360658, "grad_norm": 9.994614601135254, "learning_rate": 1.8732505542443983e-05, "loss": 1.0815, "step": 5708 }, { "epoch": 18.718032786885246, "grad_norm": 7.025062561035156, "learning_rate": 1.8731988065994756e-05, "loss": 1.0579, "step": 5709 }, { "epoch": 18.721311475409838, "grad_norm": 5.859430313110352, "learning_rate": 1.8731470491083292e-05, "loss": 1.0598, "step": 5710 }, { "epoch": 18.724590163934426, "grad_norm": 5.3796305656433105, "learning_rate": 1.873095281771542e-05, "loss": 1.1643, "step": 5711 }, { "epoch": 18.727868852459018, "grad_norm": 6.304981231689453, "learning_rate": 1.8730435045896973e-05, "loss": 1.1368, "step": 5712 }, { "epoch": 18.731147540983606, "grad_norm": 5.556851863861084, "learning_rate": 1.8729917175633794e-05, "loss": 1.2417, "step": 5713 }, { "epoch": 18.7344262295082, "grad_norm": 5.97057580947876, "learning_rate": 1.8729399206931722e-05, "loss": 1.2877, "step": 5714 }, { "epoch": 18.737704918032787, "grad_norm": 6.109092712402344, "learning_rate": 1.87288811397966e-05, "loss": 1.1226, "step": 5715 }, { "epoch": 18.74098360655738, "grad_norm": 6.104038238525391, "learning_rate": 1.8728362974234268e-05, "loss": 1.0291, "step": 5716 }, { "epoch": 18.744262295081967, "grad_norm": 5.765781879425049, "learning_rate": 1.8727844710250564e-05, "loss": 0.9909, "step": 5717 }, { "epoch": 18.74754098360656, "grad_norm": 6.344590663909912, "learning_rate": 1.872732634785134e-05, "loss": 1.1094, "step": 5718 }, { "epoch": 18.750819672131147, "grad_norm": 5.618457794189453, "learning_rate": 1.8726807887042434e-05, "loss": 0.9791, "step": 5719 }, { "epoch": 18.75409836065574, "grad_norm": 7.384481906890869, "learning_rate": 1.87262893278297e-05, "loss": 0.9503, "step": 5720 }, { "epoch": 18.757377049180327, "grad_norm": 8.131205558776855, "learning_rate": 1.8725770670218978e-05, "loss": 1.4019, "step": 5721 }, { "epoch": 18.76065573770492, "grad_norm": 5.204817771911621, "learning_rate": 1.8725251914216115e-05, "loss": 1.1689, "step": 5722 }, { "epoch": 18.763934426229508, "grad_norm": 5.752047061920166, "learning_rate": 1.8724733059826968e-05, "loss": 0.9671, "step": 5723 }, { "epoch": 18.7672131147541, "grad_norm": 6.069904804229736, "learning_rate": 1.8724214107057386e-05, "loss": 1.1157, "step": 5724 }, { "epoch": 18.770491803278688, "grad_norm": 5.466434955596924, "learning_rate": 1.872369505591322e-05, "loss": 1.0029, "step": 5725 }, { "epoch": 18.77377049180328, "grad_norm": 6.036693096160889, "learning_rate": 1.872317590640032e-05, "loss": 1.0671, "step": 5726 }, { "epoch": 18.777049180327868, "grad_norm": 6.015136241912842, "learning_rate": 1.8722656658524544e-05, "loss": 1.1737, "step": 5727 }, { "epoch": 18.78032786885246, "grad_norm": 5.862606048583984, "learning_rate": 1.8722137312291743e-05, "loss": 1.1387, "step": 5728 }, { "epoch": 18.78360655737705, "grad_norm": 6.1319580078125, "learning_rate": 1.872161786770777e-05, "loss": 1.0336, "step": 5729 }, { "epoch": 18.78688524590164, "grad_norm": 7.235210418701172, "learning_rate": 1.8721098324778494e-05, "loss": 1.1375, "step": 5730 }, { "epoch": 18.79016393442623, "grad_norm": 10.483643531799316, "learning_rate": 1.8720578683509765e-05, "loss": 1.0474, "step": 5731 }, { "epoch": 18.79344262295082, "grad_norm": 5.563216209411621, "learning_rate": 1.8720058943907444e-05, "loss": 1.2115, "step": 5732 }, { "epoch": 18.79672131147541, "grad_norm": 6.231631278991699, "learning_rate": 1.8719539105977394e-05, "loss": 1.0529, "step": 5733 }, { "epoch": 18.8, "grad_norm": 7.380609035491943, "learning_rate": 1.871901916972547e-05, "loss": 1.1854, "step": 5734 }, { "epoch": 18.80327868852459, "grad_norm": 6.30365514755249, "learning_rate": 1.8718499135157545e-05, "loss": 1.2845, "step": 5735 }, { "epoch": 18.80655737704918, "grad_norm": 6.596072673797607, "learning_rate": 1.8717979002279473e-05, "loss": 1.0448, "step": 5736 }, { "epoch": 18.80983606557377, "grad_norm": 6.566147804260254, "learning_rate": 1.8717458771097125e-05, "loss": 1.011, "step": 5737 }, { "epoch": 18.81311475409836, "grad_norm": 6.7367472648620605, "learning_rate": 1.8716938441616362e-05, "loss": 0.9894, "step": 5738 }, { "epoch": 18.81639344262295, "grad_norm": 6.312222003936768, "learning_rate": 1.871641801384306e-05, "loss": 1.1118, "step": 5739 }, { "epoch": 18.81967213114754, "grad_norm": 5.975043296813965, "learning_rate": 1.871589748778308e-05, "loss": 1.1968, "step": 5740 }, { "epoch": 18.82295081967213, "grad_norm": 18.793201446533203, "learning_rate": 1.871537686344229e-05, "loss": 1.0105, "step": 5741 }, { "epoch": 18.82622950819672, "grad_norm": 6.526736736297607, "learning_rate": 1.8714856140826572e-05, "loss": 1.0571, "step": 5742 }, { "epoch": 18.82950819672131, "grad_norm": 6.139484405517578, "learning_rate": 1.8714335319941783e-05, "loss": 1.2206, "step": 5743 }, { "epoch": 18.832786885245902, "grad_norm": 6.622418403625488, "learning_rate": 1.8713814400793807e-05, "loss": 1.0454, "step": 5744 }, { "epoch": 18.83606557377049, "grad_norm": 6.361361503601074, "learning_rate": 1.871329338338851e-05, "loss": 1.189, "step": 5745 }, { "epoch": 18.839344262295082, "grad_norm": 5.249493598937988, "learning_rate": 1.871277226773177e-05, "loss": 1.259, "step": 5746 }, { "epoch": 18.84262295081967, "grad_norm": 4.940152645111084, "learning_rate": 1.871225105382947e-05, "loss": 1.1292, "step": 5747 }, { "epoch": 18.845901639344262, "grad_norm": 6.067388534545898, "learning_rate": 1.8711729741687475e-05, "loss": 1.2177, "step": 5748 }, { "epoch": 18.84918032786885, "grad_norm": 4.977659702301025, "learning_rate": 1.871120833131167e-05, "loss": 1.1926, "step": 5749 }, { "epoch": 18.852459016393443, "grad_norm": 6.3235344886779785, "learning_rate": 1.8710686822707935e-05, "loss": 1.1085, "step": 5750 }, { "epoch": 18.855737704918035, "grad_norm": 5.634079456329346, "learning_rate": 1.871016521588215e-05, "loss": 1.1285, "step": 5751 }, { "epoch": 18.859016393442623, "grad_norm": 7.59759521484375, "learning_rate": 1.8709643510840192e-05, "loss": 0.8583, "step": 5752 }, { "epoch": 18.862295081967215, "grad_norm": 5.365074634552002, "learning_rate": 1.870912170758795e-05, "loss": 1.1228, "step": 5753 }, { "epoch": 18.865573770491803, "grad_norm": 6.100320816040039, "learning_rate": 1.8708599806131308e-05, "loss": 0.8754, "step": 5754 }, { "epoch": 18.868852459016395, "grad_norm": 7.9905524253845215, "learning_rate": 1.8708077806476144e-05, "loss": 1.167, "step": 5755 }, { "epoch": 18.872131147540983, "grad_norm": 5.8107805252075195, "learning_rate": 1.8707555708628354e-05, "loss": 1.1031, "step": 5756 }, { "epoch": 18.875409836065575, "grad_norm": 5.362249374389648, "learning_rate": 1.8707033512593815e-05, "loss": 1.308, "step": 5757 }, { "epoch": 18.878688524590164, "grad_norm": 5.938754081726074, "learning_rate": 1.8706511218378424e-05, "loss": 0.9838, "step": 5758 }, { "epoch": 18.881967213114756, "grad_norm": 6.590133190155029, "learning_rate": 1.8705988825988062e-05, "loss": 1.2133, "step": 5759 }, { "epoch": 18.885245901639344, "grad_norm": 5.14252233505249, "learning_rate": 1.8705466335428624e-05, "loss": 1.0239, "step": 5760 }, { "epoch": 18.888524590163936, "grad_norm": 6.282638072967529, "learning_rate": 1.8704943746706007e-05, "loss": 1.1362, "step": 5761 }, { "epoch": 18.891803278688524, "grad_norm": 5.422593593597412, "learning_rate": 1.8704421059826094e-05, "loss": 1.186, "step": 5762 }, { "epoch": 18.895081967213116, "grad_norm": 7.02758264541626, "learning_rate": 1.8703898274794785e-05, "loss": 0.9197, "step": 5763 }, { "epoch": 18.898360655737704, "grad_norm": 6.201069355010986, "learning_rate": 1.870337539161797e-05, "loss": 0.9865, "step": 5764 }, { "epoch": 18.901639344262296, "grad_norm": 6.186995506286621, "learning_rate": 1.8702852410301556e-05, "loss": 1.1865, "step": 5765 }, { "epoch": 18.904918032786885, "grad_norm": 6.044063568115234, "learning_rate": 1.8702329330851426e-05, "loss": 1.2488, "step": 5766 }, { "epoch": 18.908196721311477, "grad_norm": 4.949295997619629, "learning_rate": 1.8701806153273486e-05, "loss": 1.1199, "step": 5767 }, { "epoch": 18.911475409836065, "grad_norm": 7.549813747406006, "learning_rate": 1.8701282877573632e-05, "loss": 0.959, "step": 5768 }, { "epoch": 18.914754098360657, "grad_norm": 6.936272144317627, "learning_rate": 1.8700759503757768e-05, "loss": 1.312, "step": 5769 }, { "epoch": 18.918032786885245, "grad_norm": 6.127206802368164, "learning_rate": 1.87002360318318e-05, "loss": 1.1107, "step": 5770 }, { "epoch": 18.921311475409837, "grad_norm": 8.034056663513184, "learning_rate": 1.8699712461801617e-05, "loss": 1.3112, "step": 5771 }, { "epoch": 18.924590163934425, "grad_norm": 5.68238639831543, "learning_rate": 1.869918879367313e-05, "loss": 1.09, "step": 5772 }, { "epoch": 18.927868852459017, "grad_norm": 4.131909370422363, "learning_rate": 1.869866502745225e-05, "loss": 1.3044, "step": 5773 }, { "epoch": 18.931147540983606, "grad_norm": 7.076353073120117, "learning_rate": 1.8698141163144873e-05, "loss": 1.2098, "step": 5774 }, { "epoch": 18.934426229508198, "grad_norm": 5.546924591064453, "learning_rate": 1.8697617200756914e-05, "loss": 1.0083, "step": 5775 }, { "epoch": 18.937704918032786, "grad_norm": 6.357365131378174, "learning_rate": 1.8697093140294272e-05, "loss": 1.1462, "step": 5776 }, { "epoch": 18.940983606557378, "grad_norm": 5.677276611328125, "learning_rate": 1.8696568981762867e-05, "loss": 1.2678, "step": 5777 }, { "epoch": 18.944262295081966, "grad_norm": 4.903616428375244, "learning_rate": 1.869604472516861e-05, "loss": 1.0682, "step": 5778 }, { "epoch": 18.947540983606558, "grad_norm": 5.085056781768799, "learning_rate": 1.8695520370517397e-05, "loss": 0.9873, "step": 5779 }, { "epoch": 18.950819672131146, "grad_norm": 5.604877948760986, "learning_rate": 1.8694995917815157e-05, "loss": 1.186, "step": 5780 }, { "epoch": 18.95409836065574, "grad_norm": 7.510365009307861, "learning_rate": 1.8694471367067795e-05, "loss": 1.0394, "step": 5781 }, { "epoch": 18.957377049180327, "grad_norm": 6.554582595825195, "learning_rate": 1.869394671828123e-05, "loss": 1.0416, "step": 5782 }, { "epoch": 18.96065573770492, "grad_norm": 5.3471879959106445, "learning_rate": 1.8693421971461373e-05, "loss": 1.3289, "step": 5783 }, { "epoch": 18.963934426229507, "grad_norm": 7.253870487213135, "learning_rate": 1.8692897126614146e-05, "loss": 1.056, "step": 5784 }, { "epoch": 18.9672131147541, "grad_norm": 7.197559833526611, "learning_rate": 1.8692372183745466e-05, "loss": 0.881, "step": 5785 }, { "epoch": 18.970491803278687, "grad_norm": 5.700599193572998, "learning_rate": 1.8691847142861253e-05, "loss": 0.991, "step": 5786 }, { "epoch": 18.97377049180328, "grad_norm": 5.430779933929443, "learning_rate": 1.8691322003967423e-05, "loss": 1.3905, "step": 5787 }, { "epoch": 18.977049180327867, "grad_norm": 6.73570442199707, "learning_rate": 1.8690796767069903e-05, "loss": 1.1034, "step": 5788 }, { "epoch": 18.98032786885246, "grad_norm": 5.3811116218566895, "learning_rate": 1.869027143217461e-05, "loss": 1.3164, "step": 5789 }, { "epoch": 18.983606557377048, "grad_norm": 5.390101432800293, "learning_rate": 1.8689745999287477e-05, "loss": 1.1593, "step": 5790 }, { "epoch": 18.98688524590164, "grad_norm": 5.205804347991943, "learning_rate": 1.868922046841442e-05, "loss": 1.0255, "step": 5791 }, { "epoch": 18.990163934426228, "grad_norm": 7.137233257293701, "learning_rate": 1.8688694839561368e-05, "loss": 1.0977, "step": 5792 }, { "epoch": 18.99344262295082, "grad_norm": 10.89370059967041, "learning_rate": 1.8688169112734248e-05, "loss": 1.0059, "step": 5793 }, { "epoch": 18.99672131147541, "grad_norm": 5.134988784790039, "learning_rate": 1.8687643287938982e-05, "loss": 1.1943, "step": 5794 }, { "epoch": 19.0, "grad_norm": 5.879851341247559, "learning_rate": 1.8687117365181514e-05, "loss": 0.9121, "step": 5795 }, { "epoch": 19.003278688524592, "grad_norm": 5.791091442108154, "learning_rate": 1.8686591344467758e-05, "loss": 0.8734, "step": 5796 }, { "epoch": 19.00655737704918, "grad_norm": 5.435515403747559, "learning_rate": 1.8686065225803657e-05, "loss": 1.0319, "step": 5797 }, { "epoch": 19.009836065573772, "grad_norm": 6.318763256072998, "learning_rate": 1.8685539009195138e-05, "loss": 0.9003, "step": 5798 }, { "epoch": 19.01311475409836, "grad_norm": 5.197866916656494, "learning_rate": 1.8685012694648136e-05, "loss": 1.1483, "step": 5799 }, { "epoch": 19.016393442622952, "grad_norm": 6.5855865478515625, "learning_rate": 1.8684486282168585e-05, "loss": 1.0146, "step": 5800 }, { "epoch": 19.01967213114754, "grad_norm": 5.1368913650512695, "learning_rate": 1.8683959771762425e-05, "loss": 1.2885, "step": 5801 }, { "epoch": 19.022950819672133, "grad_norm": 7.006054401397705, "learning_rate": 1.8683433163435588e-05, "loss": 1.0259, "step": 5802 }, { "epoch": 19.02622950819672, "grad_norm": 6.291146755218506, "learning_rate": 1.8682906457194012e-05, "loss": 0.941, "step": 5803 }, { "epoch": 19.029508196721313, "grad_norm": 5.856509685516357, "learning_rate": 1.8682379653043637e-05, "loss": 0.9652, "step": 5804 }, { "epoch": 19.0327868852459, "grad_norm": 4.819483280181885, "learning_rate": 1.868185275099041e-05, "loss": 1.2467, "step": 5805 }, { "epoch": 19.036065573770493, "grad_norm": 5.561692237854004, "learning_rate": 1.868132575104026e-05, "loss": 1.0028, "step": 5806 }, { "epoch": 19.03934426229508, "grad_norm": 5.116506099700928, "learning_rate": 1.8680798653199137e-05, "loss": 1.1051, "step": 5807 }, { "epoch": 19.042622950819673, "grad_norm": 7.9600019454956055, "learning_rate": 1.8680271457472986e-05, "loss": 0.9271, "step": 5808 }, { "epoch": 19.04590163934426, "grad_norm": 5.776375770568848, "learning_rate": 1.8679744163867743e-05, "loss": 1.0103, "step": 5809 }, { "epoch": 19.049180327868854, "grad_norm": 6.523338317871094, "learning_rate": 1.8679216772389364e-05, "loss": 0.9897, "step": 5810 }, { "epoch": 19.052459016393442, "grad_norm": 5.1506757736206055, "learning_rate": 1.8678689283043792e-05, "loss": 0.9324, "step": 5811 }, { "epoch": 19.055737704918034, "grad_norm": 5.91520881652832, "learning_rate": 1.8678161695836977e-05, "loss": 1.118, "step": 5812 }, { "epoch": 19.059016393442622, "grad_norm": 6.56306266784668, "learning_rate": 1.8677634010774864e-05, "loss": 0.8835, "step": 5813 }, { "epoch": 19.062295081967214, "grad_norm": 6.309080123901367, "learning_rate": 1.8677106227863404e-05, "loss": 0.9198, "step": 5814 }, { "epoch": 19.065573770491802, "grad_norm": 5.856454372406006, "learning_rate": 1.867657834710855e-05, "loss": 1.0723, "step": 5815 }, { "epoch": 19.068852459016394, "grad_norm": 7.011753559112549, "learning_rate": 1.8676050368516255e-05, "loss": 0.8862, "step": 5816 }, { "epoch": 19.072131147540983, "grad_norm": 5.014854907989502, "learning_rate": 1.8675522292092466e-05, "loss": 1.1182, "step": 5817 }, { "epoch": 19.075409836065575, "grad_norm": 5.285172939300537, "learning_rate": 1.8674994117843147e-05, "loss": 1.1235, "step": 5818 }, { "epoch": 19.078688524590163, "grad_norm": 6.350784778594971, "learning_rate": 1.8674465845774253e-05, "loss": 0.9075, "step": 5819 }, { "epoch": 19.081967213114755, "grad_norm": 5.640670299530029, "learning_rate": 1.867393747589173e-05, "loss": 1.0558, "step": 5820 }, { "epoch": 19.085245901639343, "grad_norm": 5.4295148849487305, "learning_rate": 1.867340900820155e-05, "loss": 1.0143, "step": 5821 }, { "epoch": 19.088524590163935, "grad_norm": 5.139407157897949, "learning_rate": 1.867288044270966e-05, "loss": 1.0532, "step": 5822 }, { "epoch": 19.091803278688523, "grad_norm": 5.145776271820068, "learning_rate": 1.867235177942203e-05, "loss": 1.262, "step": 5823 }, { "epoch": 19.095081967213115, "grad_norm": 4.873676300048828, "learning_rate": 1.8671823018344615e-05, "loss": 1.0708, "step": 5824 }, { "epoch": 19.098360655737704, "grad_norm": 6.578591346740723, "learning_rate": 1.867129415948338e-05, "loss": 1.0385, "step": 5825 }, { "epoch": 19.101639344262296, "grad_norm": 5.768252849578857, "learning_rate": 1.8670765202844284e-05, "loss": 1.1141, "step": 5826 }, { "epoch": 19.104918032786884, "grad_norm": 16.603708267211914, "learning_rate": 1.86702361484333e-05, "loss": 1.0956, "step": 5827 }, { "epoch": 19.108196721311476, "grad_norm": 6.805786609649658, "learning_rate": 1.8669706996256383e-05, "loss": 1.1938, "step": 5828 }, { "epoch": 19.111475409836064, "grad_norm": 6.516038417816162, "learning_rate": 1.8669177746319508e-05, "loss": 1.095, "step": 5829 }, { "epoch": 19.114754098360656, "grad_norm": 4.718663215637207, "learning_rate": 1.866864839862864e-05, "loss": 0.8989, "step": 5830 }, { "epoch": 19.118032786885244, "grad_norm": 6.320564270019531, "learning_rate": 1.8668118953189748e-05, "loss": 1.062, "step": 5831 }, { "epoch": 19.121311475409836, "grad_norm": 6.592081069946289, "learning_rate": 1.8667589410008802e-05, "loss": 1.1038, "step": 5832 }, { "epoch": 19.124590163934425, "grad_norm": 9.070575714111328, "learning_rate": 1.8667059769091778e-05, "loss": 0.8806, "step": 5833 }, { "epoch": 19.127868852459017, "grad_norm": 6.470545291900635, "learning_rate": 1.8666530030444638e-05, "loss": 1.0453, "step": 5834 }, { "epoch": 19.131147540983605, "grad_norm": 5.162046432495117, "learning_rate": 1.8666000194073365e-05, "loss": 0.8622, "step": 5835 }, { "epoch": 19.134426229508197, "grad_norm": 5.704163074493408, "learning_rate": 1.8665470259983926e-05, "loss": 1.026, "step": 5836 }, { "epoch": 19.137704918032785, "grad_norm": 5.373821258544922, "learning_rate": 1.86649402281823e-05, "loss": 1.0133, "step": 5837 }, { "epoch": 19.140983606557377, "grad_norm": 5.6570725440979, "learning_rate": 1.8664410098674467e-05, "loss": 1.1063, "step": 5838 }, { "epoch": 19.14426229508197, "grad_norm": 6.1995849609375, "learning_rate": 1.8663879871466397e-05, "loss": 0.9916, "step": 5839 }, { "epoch": 19.147540983606557, "grad_norm": 5.2701945304870605, "learning_rate": 1.8663349546564074e-05, "loss": 1.077, "step": 5840 }, { "epoch": 19.15081967213115, "grad_norm": 5.9923601150512695, "learning_rate": 1.866281912397348e-05, "loss": 1.1393, "step": 5841 }, { "epoch": 19.154098360655738, "grad_norm": 6.680043697357178, "learning_rate": 1.8662288603700595e-05, "loss": 1.0375, "step": 5842 }, { "epoch": 19.15737704918033, "grad_norm": 5.255483627319336, "learning_rate": 1.8661757985751398e-05, "loss": 0.709, "step": 5843 }, { "epoch": 19.160655737704918, "grad_norm": 6.085811614990234, "learning_rate": 1.866122727013187e-05, "loss": 1.1465, "step": 5844 }, { "epoch": 19.16393442622951, "grad_norm": 5.114092826843262, "learning_rate": 1.8660696456848e-05, "loss": 1.2493, "step": 5845 }, { "epoch": 19.167213114754098, "grad_norm": 4.77060604095459, "learning_rate": 1.8660165545905774e-05, "loss": 1.1715, "step": 5846 }, { "epoch": 19.17049180327869, "grad_norm": 6.169422626495361, "learning_rate": 1.865963453731118e-05, "loss": 0.8944, "step": 5847 }, { "epoch": 19.17377049180328, "grad_norm": 6.431337833404541, "learning_rate": 1.86591034310702e-05, "loss": 1.0264, "step": 5848 }, { "epoch": 19.17704918032787, "grad_norm": 5.29252815246582, "learning_rate": 1.8658572227188824e-05, "loss": 0.9582, "step": 5849 }, { "epoch": 19.18032786885246, "grad_norm": 6.4503560066223145, "learning_rate": 1.8658040925673044e-05, "loss": 1.0532, "step": 5850 }, { "epoch": 19.18360655737705, "grad_norm": 5.522218227386475, "learning_rate": 1.865750952652885e-05, "loss": 1.0443, "step": 5851 }, { "epoch": 19.18688524590164, "grad_norm": 4.938108921051025, "learning_rate": 1.8656978029762238e-05, "loss": 1.1764, "step": 5852 }, { "epoch": 19.19016393442623, "grad_norm": 6.021831035614014, "learning_rate": 1.8656446435379196e-05, "loss": 1.0513, "step": 5853 }, { "epoch": 19.19344262295082, "grad_norm": 9.023574829101562, "learning_rate": 1.865591474338572e-05, "loss": 1.1843, "step": 5854 }, { "epoch": 19.19672131147541, "grad_norm": 6.780888557434082, "learning_rate": 1.8655382953787805e-05, "loss": 1.13, "step": 5855 }, { "epoch": 19.2, "grad_norm": 6.495650768280029, "learning_rate": 1.865485106659145e-05, "loss": 0.8455, "step": 5856 }, { "epoch": 19.20327868852459, "grad_norm": 5.0628437995910645, "learning_rate": 1.8654319081802645e-05, "loss": 0.9778, "step": 5857 }, { "epoch": 19.20655737704918, "grad_norm": 5.675613880157471, "learning_rate": 1.8653786999427398e-05, "loss": 1.1571, "step": 5858 }, { "epoch": 19.20983606557377, "grad_norm": 6.251295566558838, "learning_rate": 1.8653254819471705e-05, "loss": 1.2312, "step": 5859 }, { "epoch": 19.21311475409836, "grad_norm": 5.63146448135376, "learning_rate": 1.8652722541941565e-05, "loss": 0.9679, "step": 5860 }, { "epoch": 19.21639344262295, "grad_norm": 5.364380836486816, "learning_rate": 1.8652190166842983e-05, "loss": 1.079, "step": 5861 }, { "epoch": 19.21967213114754, "grad_norm": 5.84882926940918, "learning_rate": 1.865165769418196e-05, "loss": 1.0752, "step": 5862 }, { "epoch": 19.222950819672132, "grad_norm": 6.058711051940918, "learning_rate": 1.8651125123964502e-05, "loss": 1.0242, "step": 5863 }, { "epoch": 19.22622950819672, "grad_norm": 6.204561710357666, "learning_rate": 1.8650592456196613e-05, "loss": 1.1079, "step": 5864 }, { "epoch": 19.229508196721312, "grad_norm": 6.171145915985107, "learning_rate": 1.86500596908843e-05, "loss": 0.9251, "step": 5865 }, { "epoch": 19.2327868852459, "grad_norm": 6.812965393066406, "learning_rate": 1.864952682803357e-05, "loss": 0.8986, "step": 5866 }, { "epoch": 19.236065573770492, "grad_norm": 5.578434944152832, "learning_rate": 1.8648993867650432e-05, "loss": 1.171, "step": 5867 }, { "epoch": 19.23934426229508, "grad_norm": 6.113478660583496, "learning_rate": 1.8648460809740895e-05, "loss": 0.98, "step": 5868 }, { "epoch": 19.242622950819673, "grad_norm": 6.025320529937744, "learning_rate": 1.864792765431097e-05, "loss": 0.8156, "step": 5869 }, { "epoch": 19.24590163934426, "grad_norm": 6.239116668701172, "learning_rate": 1.864739440136667e-05, "loss": 0.8356, "step": 5870 }, { "epoch": 19.249180327868853, "grad_norm": 7.073328971862793, "learning_rate": 1.8646861050914008e-05, "loss": 1.1605, "step": 5871 }, { "epoch": 19.25245901639344, "grad_norm": 6.232109546661377, "learning_rate": 1.8646327602958996e-05, "loss": 1.1618, "step": 5872 }, { "epoch": 19.255737704918033, "grad_norm": 6.6937947273254395, "learning_rate": 1.864579405750765e-05, "loss": 0.8055, "step": 5873 }, { "epoch": 19.25901639344262, "grad_norm": 5.659036159515381, "learning_rate": 1.8645260414565988e-05, "loss": 1.053, "step": 5874 }, { "epoch": 19.262295081967213, "grad_norm": 7.1548075675964355, "learning_rate": 1.8644726674140023e-05, "loss": 1.0707, "step": 5875 }, { "epoch": 19.2655737704918, "grad_norm": 6.676242828369141, "learning_rate": 1.864419283623578e-05, "loss": 1.035, "step": 5876 }, { "epoch": 19.268852459016394, "grad_norm": 11.659223556518555, "learning_rate": 1.8643658900859273e-05, "loss": 1.044, "step": 5877 }, { "epoch": 19.272131147540982, "grad_norm": 6.037240028381348, "learning_rate": 1.8643124868016525e-05, "loss": 0.9364, "step": 5878 }, { "epoch": 19.275409836065574, "grad_norm": 8.355077743530273, "learning_rate": 1.8642590737713556e-05, "loss": 0.9218, "step": 5879 }, { "epoch": 19.278688524590162, "grad_norm": 13.650286674499512, "learning_rate": 1.864205650995639e-05, "loss": 1.0875, "step": 5880 }, { "epoch": 19.281967213114754, "grad_norm": 7.137811183929443, "learning_rate": 1.8641522184751057e-05, "loss": 1.0022, "step": 5881 }, { "epoch": 19.285245901639342, "grad_norm": 7.464702606201172, "learning_rate": 1.8640987762103574e-05, "loss": 1.0835, "step": 5882 }, { "epoch": 19.288524590163934, "grad_norm": 5.980595111846924, "learning_rate": 1.8640453242019963e-05, "loss": 0.9902, "step": 5883 }, { "epoch": 19.291803278688526, "grad_norm": 137.2253875732422, "learning_rate": 1.8639918624506265e-05, "loss": 1.0189, "step": 5884 }, { "epoch": 19.295081967213115, "grad_norm": 6.442535400390625, "learning_rate": 1.86393839095685e-05, "loss": 0.9729, "step": 5885 }, { "epoch": 19.298360655737707, "grad_norm": 5.637268543243408, "learning_rate": 1.86388490972127e-05, "loss": 1.3035, "step": 5886 }, { "epoch": 19.301639344262295, "grad_norm": 5.656223773956299, "learning_rate": 1.8638314187444894e-05, "loss": 1.2993, "step": 5887 }, { "epoch": 19.304918032786887, "grad_norm": 8.41391372680664, "learning_rate": 1.863777918027111e-05, "loss": 1.1361, "step": 5888 }, { "epoch": 19.308196721311475, "grad_norm": 5.63496732711792, "learning_rate": 1.863724407569739e-05, "loss": 1.3832, "step": 5889 }, { "epoch": 19.311475409836067, "grad_norm": 7.122337341308594, "learning_rate": 1.8636708873729758e-05, "loss": 0.7844, "step": 5890 }, { "epoch": 19.314754098360655, "grad_norm": 5.876659870147705, "learning_rate": 1.8636173574374253e-05, "loss": 0.9553, "step": 5891 }, { "epoch": 19.318032786885247, "grad_norm": 6.522833347320557, "learning_rate": 1.8635638177636916e-05, "loss": 1.1663, "step": 5892 }, { "epoch": 19.321311475409836, "grad_norm": 5.935093879699707, "learning_rate": 1.8635102683523777e-05, "loss": 1.0513, "step": 5893 }, { "epoch": 19.324590163934428, "grad_norm": 10.26370620727539, "learning_rate": 1.863456709204088e-05, "loss": 1.2278, "step": 5894 }, { "epoch": 19.327868852459016, "grad_norm": 9.525900840759277, "learning_rate": 1.8634031403194255e-05, "loss": 0.9153, "step": 5895 }, { "epoch": 19.331147540983608, "grad_norm": 11.121297836303711, "learning_rate": 1.8633495616989953e-05, "loss": 0.9675, "step": 5896 }, { "epoch": 19.334426229508196, "grad_norm": 8.448031425476074, "learning_rate": 1.8632959733434012e-05, "loss": 0.9424, "step": 5897 }, { "epoch": 19.337704918032788, "grad_norm": 7.489994525909424, "learning_rate": 1.8632423752532474e-05, "loss": 0.9783, "step": 5898 }, { "epoch": 19.340983606557376, "grad_norm": 6.310576438903809, "learning_rate": 1.863188767429138e-05, "loss": 1.0221, "step": 5899 }, { "epoch": 19.34426229508197, "grad_norm": 8.32676887512207, "learning_rate": 1.8631351498716782e-05, "loss": 1.0021, "step": 5900 }, { "epoch": 19.347540983606557, "grad_norm": 6.984696865081787, "learning_rate": 1.8630815225814717e-05, "loss": 1.0203, "step": 5901 }, { "epoch": 19.35081967213115, "grad_norm": 6.948051929473877, "learning_rate": 1.8630278855591236e-05, "loss": 1.2507, "step": 5902 }, { "epoch": 19.354098360655737, "grad_norm": 8.638860702514648, "learning_rate": 1.862974238805239e-05, "loss": 1.1919, "step": 5903 }, { "epoch": 19.35737704918033, "grad_norm": 5.94219970703125, "learning_rate": 1.8629205823204225e-05, "loss": 1.1541, "step": 5904 }, { "epoch": 19.360655737704917, "grad_norm": 6.14625883102417, "learning_rate": 1.8628669161052793e-05, "loss": 1.0233, "step": 5905 }, { "epoch": 19.36393442622951, "grad_norm": 7.3875041007995605, "learning_rate": 1.8628132401604148e-05, "loss": 0.825, "step": 5906 }, { "epoch": 19.367213114754097, "grad_norm": 5.775538444519043, "learning_rate": 1.8627595544864335e-05, "loss": 1.1121, "step": 5907 }, { "epoch": 19.37049180327869, "grad_norm": 6.328117847442627, "learning_rate": 1.8627058590839415e-05, "loss": 1.0994, "step": 5908 }, { "epoch": 19.373770491803278, "grad_norm": 8.045417785644531, "learning_rate": 1.8626521539535436e-05, "loss": 1.097, "step": 5909 }, { "epoch": 19.37704918032787, "grad_norm": 6.093903541564941, "learning_rate": 1.862598439095846e-05, "loss": 1.3054, "step": 5910 }, { "epoch": 19.380327868852458, "grad_norm": 4.778768539428711, "learning_rate": 1.8625447145114536e-05, "loss": 1.3999, "step": 5911 }, { "epoch": 19.38360655737705, "grad_norm": 6.524412631988525, "learning_rate": 1.862490980200973e-05, "loss": 0.98, "step": 5912 }, { "epoch": 19.386885245901638, "grad_norm": 6.677066326141357, "learning_rate": 1.8624372361650103e-05, "loss": 1.1917, "step": 5913 }, { "epoch": 19.39016393442623, "grad_norm": 6.326841831207275, "learning_rate": 1.8623834824041704e-05, "loss": 1.1354, "step": 5914 }, { "epoch": 19.39344262295082, "grad_norm": 5.690197467803955, "learning_rate": 1.8623297189190603e-05, "loss": 1.2019, "step": 5915 }, { "epoch": 19.39672131147541, "grad_norm": 5.305305480957031, "learning_rate": 1.862275945710286e-05, "loss": 1.0925, "step": 5916 }, { "epoch": 19.4, "grad_norm": 5.780972957611084, "learning_rate": 1.862222162778454e-05, "loss": 1.073, "step": 5917 }, { "epoch": 19.40327868852459, "grad_norm": 11.291972160339355, "learning_rate": 1.8621683701241706e-05, "loss": 1.0321, "step": 5918 }, { "epoch": 19.40655737704918, "grad_norm": 5.518503189086914, "learning_rate": 1.8621145677480424e-05, "loss": 1.0621, "step": 5919 }, { "epoch": 19.40983606557377, "grad_norm": 5.613949298858643, "learning_rate": 1.862060755650676e-05, "loss": 1.2113, "step": 5920 }, { "epoch": 19.41311475409836, "grad_norm": 5.782866477966309, "learning_rate": 1.8620069338326786e-05, "loss": 1.1344, "step": 5921 }, { "epoch": 19.41639344262295, "grad_norm": 6.886890411376953, "learning_rate": 1.861953102294656e-05, "loss": 0.8838, "step": 5922 }, { "epoch": 19.41967213114754, "grad_norm": 5.89664888381958, "learning_rate": 1.8618992610372166e-05, "loss": 1.0879, "step": 5923 }, { "epoch": 19.42295081967213, "grad_norm": 6.494360446929932, "learning_rate": 1.8618454100609668e-05, "loss": 1.0477, "step": 5924 }, { "epoch": 19.42622950819672, "grad_norm": 8.972293853759766, "learning_rate": 1.8617915493665138e-05, "loss": 1.1239, "step": 5925 }, { "epoch": 19.42950819672131, "grad_norm": 6.380814075469971, "learning_rate": 1.8617376789544652e-05, "loss": 0.9613, "step": 5926 }, { "epoch": 19.432786885245903, "grad_norm": 5.719783306121826, "learning_rate": 1.861683798825428e-05, "loss": 1.0952, "step": 5927 }, { "epoch": 19.43606557377049, "grad_norm": 5.255747318267822, "learning_rate": 1.8616299089800103e-05, "loss": 1.5333, "step": 5928 }, { "epoch": 19.439344262295084, "grad_norm": 5.3062334060668945, "learning_rate": 1.8615760094188196e-05, "loss": 1.0496, "step": 5929 }, { "epoch": 19.442622950819672, "grad_norm": 5.762156963348389, "learning_rate": 1.8615221001424633e-05, "loss": 0.8275, "step": 5930 }, { "epoch": 19.445901639344264, "grad_norm": 6.72035551071167, "learning_rate": 1.8614681811515495e-05, "loss": 1.031, "step": 5931 }, { "epoch": 19.449180327868852, "grad_norm": 5.605056285858154, "learning_rate": 1.8614142524466863e-05, "loss": 1.1448, "step": 5932 }, { "epoch": 19.452459016393444, "grad_norm": 5.6376237869262695, "learning_rate": 1.861360314028482e-05, "loss": 1.0905, "step": 5933 }, { "epoch": 19.455737704918032, "grad_norm": 8.514162063598633, "learning_rate": 1.8613063658975443e-05, "loss": 1.2131, "step": 5934 }, { "epoch": 19.459016393442624, "grad_norm": 7.279539585113525, "learning_rate": 1.861252408054482e-05, "loss": 1.0171, "step": 5935 }, { "epoch": 19.462295081967213, "grad_norm": 6.503668785095215, "learning_rate": 1.8611984404999033e-05, "loss": 0.9368, "step": 5936 }, { "epoch": 19.465573770491805, "grad_norm": 6.487101078033447, "learning_rate": 1.8611444632344166e-05, "loss": 0.9517, "step": 5937 }, { "epoch": 19.468852459016393, "grad_norm": 6.453583717346191, "learning_rate": 1.861090476258631e-05, "loss": 0.8912, "step": 5938 }, { "epoch": 19.472131147540985, "grad_norm": 6.915359020233154, "learning_rate": 1.8610364795731545e-05, "loss": 0.9634, "step": 5939 }, { "epoch": 19.475409836065573, "grad_norm": 7.141716480255127, "learning_rate": 1.8609824731785968e-05, "loss": 0.9904, "step": 5940 }, { "epoch": 19.478688524590165, "grad_norm": 6.9860076904296875, "learning_rate": 1.8609284570755663e-05, "loss": 1.0381, "step": 5941 }, { "epoch": 19.481967213114753, "grad_norm": 5.268520355224609, "learning_rate": 1.8608744312646726e-05, "loss": 1.1534, "step": 5942 }, { "epoch": 19.485245901639345, "grad_norm": 6.055939674377441, "learning_rate": 1.8608203957465245e-05, "loss": 0.9656, "step": 5943 }, { "epoch": 19.488524590163934, "grad_norm": 5.361840724945068, "learning_rate": 1.860766350521731e-05, "loss": 1.0951, "step": 5944 }, { "epoch": 19.491803278688526, "grad_norm": 6.716110706329346, "learning_rate": 1.8607122955909024e-05, "loss": 1.0871, "step": 5945 }, { "epoch": 19.495081967213114, "grad_norm": 5.726433753967285, "learning_rate": 1.860658230954648e-05, "loss": 0.9888, "step": 5946 }, { "epoch": 19.498360655737706, "grad_norm": 5.833191394805908, "learning_rate": 1.8606041566135765e-05, "loss": 0.9738, "step": 5947 }, { "epoch": 19.501639344262294, "grad_norm": 5.907503604888916, "learning_rate": 1.860550072568299e-05, "loss": 1.0948, "step": 5948 }, { "epoch": 19.504918032786886, "grad_norm": 13.424002647399902, "learning_rate": 1.860495978819424e-05, "loss": 1.1619, "step": 5949 }, { "epoch": 19.508196721311474, "grad_norm": 6.400110721588135, "learning_rate": 1.8604418753675622e-05, "loss": 0.9041, "step": 5950 }, { "epoch": 19.511475409836066, "grad_norm": 5.609800338745117, "learning_rate": 1.8603877622133242e-05, "loss": 1.1466, "step": 5951 }, { "epoch": 19.514754098360655, "grad_norm": 5.111355304718018, "learning_rate": 1.8603336393573195e-05, "loss": 1.1448, "step": 5952 }, { "epoch": 19.518032786885247, "grad_norm": 5.325422763824463, "learning_rate": 1.860279506800158e-05, "loss": 1.1204, "step": 5953 }, { "epoch": 19.521311475409835, "grad_norm": 5.6025776863098145, "learning_rate": 1.8602253645424508e-05, "loss": 1.0458, "step": 5954 }, { "epoch": 19.524590163934427, "grad_norm": 7.086552619934082, "learning_rate": 1.8601712125848084e-05, "loss": 0.8414, "step": 5955 }, { "epoch": 19.527868852459015, "grad_norm": 5.216806888580322, "learning_rate": 1.8601170509278412e-05, "loss": 1.084, "step": 5956 }, { "epoch": 19.531147540983607, "grad_norm": 4.302893161773682, "learning_rate": 1.8600628795721598e-05, "loss": 0.9379, "step": 5957 }, { "epoch": 19.534426229508195, "grad_norm": 4.627374172210693, "learning_rate": 1.8600086985183753e-05, "loss": 1.2153, "step": 5958 }, { "epoch": 19.537704918032787, "grad_norm": 6.051316738128662, "learning_rate": 1.8599545077670983e-05, "loss": 0.9984, "step": 5959 }, { "epoch": 19.540983606557376, "grad_norm": 5.248006343841553, "learning_rate": 1.8599003073189404e-05, "loss": 0.9639, "step": 5960 }, { "epoch": 19.544262295081968, "grad_norm": 5.880740642547607, "learning_rate": 1.8598460971745124e-05, "loss": 1.2068, "step": 5961 }, { "epoch": 19.547540983606556, "grad_norm": 5.799496650695801, "learning_rate": 1.859791877334426e-05, "loss": 0.8961, "step": 5962 }, { "epoch": 19.550819672131148, "grad_norm": 5.5147600173950195, "learning_rate": 1.8597376477992913e-05, "loss": 0.9601, "step": 5963 }, { "epoch": 19.554098360655736, "grad_norm": 7.913723945617676, "learning_rate": 1.8596834085697214e-05, "loss": 0.902, "step": 5964 }, { "epoch": 19.557377049180328, "grad_norm": 18.62139129638672, "learning_rate": 1.859629159646327e-05, "loss": 1.1216, "step": 5965 }, { "epoch": 19.560655737704916, "grad_norm": 5.901493072509766, "learning_rate": 1.8595749010297203e-05, "loss": 1.0625, "step": 5966 }, { "epoch": 19.56393442622951, "grad_norm": 5.917150020599365, "learning_rate": 1.8595206327205125e-05, "loss": 1.1477, "step": 5967 }, { "epoch": 19.567213114754097, "grad_norm": 4.544974327087402, "learning_rate": 1.8594663547193163e-05, "loss": 1.1187, "step": 5968 }, { "epoch": 19.57049180327869, "grad_norm": 6.089466094970703, "learning_rate": 1.8594120670267427e-05, "loss": 0.9395, "step": 5969 }, { "epoch": 19.57377049180328, "grad_norm": 5.453704833984375, "learning_rate": 1.8593577696434048e-05, "loss": 0.9784, "step": 5970 }, { "epoch": 19.57704918032787, "grad_norm": 5.850151538848877, "learning_rate": 1.8593034625699148e-05, "loss": 0.9351, "step": 5971 }, { "epoch": 19.58032786885246, "grad_norm": 6.359012603759766, "learning_rate": 1.8592491458068846e-05, "loss": 1.0552, "step": 5972 }, { "epoch": 19.58360655737705, "grad_norm": 6.245216369628906, "learning_rate": 1.8591948193549267e-05, "loss": 0.9662, "step": 5973 }, { "epoch": 19.58688524590164, "grad_norm": 4.697674751281738, "learning_rate": 1.8591404832146544e-05, "loss": 1.166, "step": 5974 }, { "epoch": 19.59016393442623, "grad_norm": 5.486929416656494, "learning_rate": 1.8590861373866792e-05, "loss": 1.2063, "step": 5975 }, { "epoch": 19.59344262295082, "grad_norm": 5.368932723999023, "learning_rate": 1.8590317818716152e-05, "loss": 1.1096, "step": 5976 }, { "epoch": 19.59672131147541, "grad_norm": 5.642850399017334, "learning_rate": 1.858977416670074e-05, "loss": 1.0249, "step": 5977 }, { "epoch": 19.6, "grad_norm": 7.622422218322754, "learning_rate": 1.85892304178267e-05, "loss": 1.0458, "step": 5978 }, { "epoch": 19.60327868852459, "grad_norm": 5.522401809692383, "learning_rate": 1.8588686572100153e-05, "loss": 1.241, "step": 5979 }, { "epoch": 19.60655737704918, "grad_norm": 6.375765323638916, "learning_rate": 1.8588142629527233e-05, "loss": 1.0782, "step": 5980 }, { "epoch": 19.60983606557377, "grad_norm": 5.17997932434082, "learning_rate": 1.858759859011408e-05, "loss": 1.1855, "step": 5981 }, { "epoch": 19.613114754098362, "grad_norm": 6.937658309936523, "learning_rate": 1.858705445386682e-05, "loss": 1.2161, "step": 5982 }, { "epoch": 19.61639344262295, "grad_norm": 9.720660209655762, "learning_rate": 1.8586510220791596e-05, "loss": 1.0713, "step": 5983 }, { "epoch": 19.619672131147542, "grad_norm": 5.180199146270752, "learning_rate": 1.858596589089454e-05, "loss": 1.0151, "step": 5984 }, { "epoch": 19.62295081967213, "grad_norm": 5.2700910568237305, "learning_rate": 1.858542146418179e-05, "loss": 1.1061, "step": 5985 }, { "epoch": 19.626229508196722, "grad_norm": 5.469386100769043, "learning_rate": 1.858487694065949e-05, "loss": 1.1254, "step": 5986 }, { "epoch": 19.62950819672131, "grad_norm": 6.172003269195557, "learning_rate": 1.8584332320333775e-05, "loss": 0.9971, "step": 5987 }, { "epoch": 19.632786885245903, "grad_norm": 6.103825092315674, "learning_rate": 1.8583787603210787e-05, "loss": 1.0197, "step": 5988 }, { "epoch": 19.63606557377049, "grad_norm": 5.536308765411377, "learning_rate": 1.8583242789296668e-05, "loss": 0.739, "step": 5989 }, { "epoch": 19.639344262295083, "grad_norm": 5.829539775848389, "learning_rate": 1.858269787859756e-05, "loss": 1.0509, "step": 5990 }, { "epoch": 19.64262295081967, "grad_norm": 5.716652870178223, "learning_rate": 1.8582152871119615e-05, "loss": 0.9919, "step": 5991 }, { "epoch": 19.645901639344263, "grad_norm": 4.772240161895752, "learning_rate": 1.858160776686897e-05, "loss": 1.0023, "step": 5992 }, { "epoch": 19.64918032786885, "grad_norm": 4.952551364898682, "learning_rate": 1.858106256585178e-05, "loss": 1.0627, "step": 5993 }, { "epoch": 19.652459016393443, "grad_norm": 5.942915916442871, "learning_rate": 1.858051726807418e-05, "loss": 1.0207, "step": 5994 }, { "epoch": 19.65573770491803, "grad_norm": 5.928442478179932, "learning_rate": 1.857997187354233e-05, "loss": 0.9243, "step": 5995 }, { "epoch": 19.659016393442624, "grad_norm": 6.349141597747803, "learning_rate": 1.857942638226238e-05, "loss": 1.2324, "step": 5996 }, { "epoch": 19.662295081967212, "grad_norm": 5.598862171173096, "learning_rate": 1.857888079424047e-05, "loss": 1.0718, "step": 5997 }, { "epoch": 19.665573770491804, "grad_norm": 6.467174530029297, "learning_rate": 1.8578335109482763e-05, "loss": 1.1121, "step": 5998 }, { "epoch": 19.668852459016392, "grad_norm": 5.168078899383545, "learning_rate": 1.8577789327995406e-05, "loss": 1.3374, "step": 5999 }, { "epoch": 19.672131147540984, "grad_norm": 5.44432258605957, "learning_rate": 1.8577243449784558e-05, "loss": 1.066, "step": 6000 }, { "epoch": 19.675409836065572, "grad_norm": 5.474475383758545, "learning_rate": 1.857669747485637e-05, "loss": 1.1649, "step": 6001 }, { "epoch": 19.678688524590164, "grad_norm": 5.117712497711182, "learning_rate": 1.8576151403217003e-05, "loss": 1.1299, "step": 6002 }, { "epoch": 19.681967213114753, "grad_norm": 5.939112186431885, "learning_rate": 1.857560523487261e-05, "loss": 1.0837, "step": 6003 }, { "epoch": 19.685245901639345, "grad_norm": 5.673226833343506, "learning_rate": 1.8575058969829353e-05, "loss": 1.0594, "step": 6004 }, { "epoch": 19.688524590163933, "grad_norm": 5.04705810546875, "learning_rate": 1.857451260809339e-05, "loss": 1.1853, "step": 6005 }, { "epoch": 19.691803278688525, "grad_norm": 4.877986907958984, "learning_rate": 1.857396614967088e-05, "loss": 0.9215, "step": 6006 }, { "epoch": 19.695081967213113, "grad_norm": 6.2371697425842285, "learning_rate": 1.857341959456799e-05, "loss": 0.9424, "step": 6007 }, { "epoch": 19.698360655737705, "grad_norm": 5.114769458770752, "learning_rate": 1.857287294279088e-05, "loss": 1.2373, "step": 6008 }, { "epoch": 19.701639344262293, "grad_norm": 7.07954740524292, "learning_rate": 1.857232619434571e-05, "loss": 1.0292, "step": 6009 }, { "epoch": 19.704918032786885, "grad_norm": 6.320850372314453, "learning_rate": 1.8571779349238653e-05, "loss": 1.0164, "step": 6010 }, { "epoch": 19.708196721311474, "grad_norm": 5.04211950302124, "learning_rate": 1.857123240747587e-05, "loss": 0.9389, "step": 6011 }, { "epoch": 19.711475409836066, "grad_norm": 8.63526725769043, "learning_rate": 1.8570685369063528e-05, "loss": 0.9042, "step": 6012 }, { "epoch": 19.714754098360658, "grad_norm": 5.893457412719727, "learning_rate": 1.85701382340078e-05, "loss": 0.9938, "step": 6013 }, { "epoch": 19.718032786885246, "grad_norm": 7.082520961761475, "learning_rate": 1.856959100231485e-05, "loss": 0.9657, "step": 6014 }, { "epoch": 19.721311475409838, "grad_norm": 6.62134313583374, "learning_rate": 1.8569043673990854e-05, "loss": 1.001, "step": 6015 }, { "epoch": 19.724590163934426, "grad_norm": 71.09439086914062, "learning_rate": 1.8568496249041977e-05, "loss": 0.9786, "step": 6016 }, { "epoch": 19.727868852459018, "grad_norm": 5.897244930267334, "learning_rate": 1.85679487274744e-05, "loss": 0.9932, "step": 6017 }, { "epoch": 19.731147540983606, "grad_norm": 5.253571510314941, "learning_rate": 1.856740110929429e-05, "loss": 1.0354, "step": 6018 }, { "epoch": 19.7344262295082, "grad_norm": 5.6684184074401855, "learning_rate": 1.856685339450783e-05, "loss": 1.3397, "step": 6019 }, { "epoch": 19.737704918032787, "grad_norm": 6.161533832550049, "learning_rate": 1.8566305583121187e-05, "loss": 1.2372, "step": 6020 }, { "epoch": 19.74098360655738, "grad_norm": 6.4041428565979, "learning_rate": 1.856575767514054e-05, "loss": 1.0214, "step": 6021 }, { "epoch": 19.744262295081967, "grad_norm": 10.050030708312988, "learning_rate": 1.8565209670572072e-05, "loss": 1.0081, "step": 6022 }, { "epoch": 19.74754098360656, "grad_norm": 7.84328031539917, "learning_rate": 1.8564661569421956e-05, "loss": 1.0898, "step": 6023 }, { "epoch": 19.750819672131147, "grad_norm": 8.70715618133545, "learning_rate": 1.856411337169638e-05, "loss": 1.0996, "step": 6024 }, { "epoch": 19.75409836065574, "grad_norm": 7.509127140045166, "learning_rate": 1.856356507740152e-05, "loss": 1.2268, "step": 6025 }, { "epoch": 19.757377049180327, "grad_norm": 21.524412155151367, "learning_rate": 1.8563016686543557e-05, "loss": 0.8722, "step": 6026 }, { "epoch": 19.76065573770492, "grad_norm": 5.333108425140381, "learning_rate": 1.856246819912868e-05, "loss": 1.1289, "step": 6027 }, { "epoch": 19.763934426229508, "grad_norm": 6.67244291305542, "learning_rate": 1.856191961516307e-05, "loss": 1.1301, "step": 6028 }, { "epoch": 19.7672131147541, "grad_norm": 7.432878494262695, "learning_rate": 1.8561370934652915e-05, "loss": 1.1819, "step": 6029 }, { "epoch": 19.770491803278688, "grad_norm": 7.710842132568359, "learning_rate": 1.8560822157604402e-05, "loss": 1.2466, "step": 6030 }, { "epoch": 19.77377049180328, "grad_norm": 6.054474353790283, "learning_rate": 1.856027328402372e-05, "loss": 1.1262, "step": 6031 }, { "epoch": 19.777049180327868, "grad_norm": 6.0025200843811035, "learning_rate": 1.855972431391705e-05, "loss": 0.8831, "step": 6032 }, { "epoch": 19.78032786885246, "grad_norm": 8.378828048706055, "learning_rate": 1.8559175247290593e-05, "loss": 1.0759, "step": 6033 }, { "epoch": 19.78360655737705, "grad_norm": 7.486321926116943, "learning_rate": 1.8558626084150538e-05, "loss": 1.0708, "step": 6034 }, { "epoch": 19.78688524590164, "grad_norm": 6.727148532867432, "learning_rate": 1.8558076824503072e-05, "loss": 1.0732, "step": 6035 }, { "epoch": 19.79016393442623, "grad_norm": 5.861502647399902, "learning_rate": 1.8557527468354387e-05, "loss": 0.9741, "step": 6036 }, { "epoch": 19.79344262295082, "grad_norm": 8.223209381103516, "learning_rate": 1.8556978015710688e-05, "loss": 1.2129, "step": 6037 }, { "epoch": 19.79672131147541, "grad_norm": 6.488895893096924, "learning_rate": 1.8556428466578166e-05, "loss": 1.061, "step": 6038 }, { "epoch": 19.8, "grad_norm": 6.2804741859436035, "learning_rate": 1.8555878820963014e-05, "loss": 1.1996, "step": 6039 }, { "epoch": 19.80327868852459, "grad_norm": 6.691997528076172, "learning_rate": 1.855532907887143e-05, "loss": 1.0247, "step": 6040 }, { "epoch": 19.80655737704918, "grad_norm": 9.705714225769043, "learning_rate": 1.855477924030962e-05, "loss": 1.0645, "step": 6041 }, { "epoch": 19.80983606557377, "grad_norm": 5.843883037567139, "learning_rate": 1.8554229305283778e-05, "loss": 1.2379, "step": 6042 }, { "epoch": 19.81311475409836, "grad_norm": 5.807456016540527, "learning_rate": 1.8553679273800104e-05, "loss": 0.9922, "step": 6043 }, { "epoch": 19.81639344262295, "grad_norm": 5.505711555480957, "learning_rate": 1.8553129145864806e-05, "loss": 1.2142, "step": 6044 }, { "epoch": 19.81967213114754, "grad_norm": 6.165963172912598, "learning_rate": 1.8552578921484083e-05, "loss": 1.217, "step": 6045 }, { "epoch": 19.82295081967213, "grad_norm": 6.210695743560791, "learning_rate": 1.855202860066414e-05, "loss": 1.1589, "step": 6046 }, { "epoch": 19.82622950819672, "grad_norm": 5.779447555541992, "learning_rate": 1.855147818341118e-05, "loss": 1.0351, "step": 6047 }, { "epoch": 19.82950819672131, "grad_norm": 5.331858158111572, "learning_rate": 1.8550927669731417e-05, "loss": 1.3076, "step": 6048 }, { "epoch": 19.832786885245902, "grad_norm": 7.182527542114258, "learning_rate": 1.855037705963105e-05, "loss": 1.1074, "step": 6049 }, { "epoch": 19.83606557377049, "grad_norm": 7.2797532081604, "learning_rate": 1.854982635311629e-05, "loss": 1.0927, "step": 6050 }, { "epoch": 19.839344262295082, "grad_norm": 6.726433277130127, "learning_rate": 1.854927555019335e-05, "loss": 1.114, "step": 6051 }, { "epoch": 19.84262295081967, "grad_norm": 6.076471328735352, "learning_rate": 1.854872465086844e-05, "loss": 1.0947, "step": 6052 }, { "epoch": 19.845901639344262, "grad_norm": 6.236479759216309, "learning_rate": 1.8548173655147773e-05, "loss": 1.116, "step": 6053 }, { "epoch": 19.84918032786885, "grad_norm": 6.379154682159424, "learning_rate": 1.854762256303756e-05, "loss": 1.3165, "step": 6054 }, { "epoch": 19.852459016393443, "grad_norm": 5.482997417449951, "learning_rate": 1.854707137454401e-05, "loss": 1.0912, "step": 6055 }, { "epoch": 19.855737704918035, "grad_norm": 6.6227803230285645, "learning_rate": 1.854652008967335e-05, "loss": 0.9907, "step": 6056 }, { "epoch": 19.859016393442623, "grad_norm": 4.906563758850098, "learning_rate": 1.8545968708431785e-05, "loss": 0.9722, "step": 6057 }, { "epoch": 19.862295081967215, "grad_norm": 6.779810428619385, "learning_rate": 1.854541723082554e-05, "loss": 1.0468, "step": 6058 }, { "epoch": 19.865573770491803, "grad_norm": 6.978311061859131, "learning_rate": 1.854486565686083e-05, "loss": 1.0544, "step": 6059 }, { "epoch": 19.868852459016395, "grad_norm": 6.3279194831848145, "learning_rate": 1.8544313986543875e-05, "loss": 0.9548, "step": 6060 }, { "epoch": 19.872131147540983, "grad_norm": 5.892894268035889, "learning_rate": 1.8543762219880896e-05, "loss": 0.9707, "step": 6061 }, { "epoch": 19.875409836065575, "grad_norm": 6.787768363952637, "learning_rate": 1.8543210356878118e-05, "loss": 1.1697, "step": 6062 }, { "epoch": 19.878688524590164, "grad_norm": 6.130251407623291, "learning_rate": 1.8542658397541754e-05, "loss": 0.9675, "step": 6063 }, { "epoch": 19.881967213114756, "grad_norm": 6.523378849029541, "learning_rate": 1.854210634187804e-05, "loss": 1.3289, "step": 6064 }, { "epoch": 19.885245901639344, "grad_norm": 7.093616962432861, "learning_rate": 1.8541554189893192e-05, "loss": 1.1935, "step": 6065 }, { "epoch": 19.888524590163936, "grad_norm": 6.3323893547058105, "learning_rate": 1.8541001941593442e-05, "loss": 1.0621, "step": 6066 }, { "epoch": 19.891803278688524, "grad_norm": 6.527801990509033, "learning_rate": 1.8540449596985013e-05, "loss": 1.1194, "step": 6067 }, { "epoch": 19.895081967213116, "grad_norm": 6.233226299285889, "learning_rate": 1.8539897156074135e-05, "loss": 1.0966, "step": 6068 }, { "epoch": 19.898360655737704, "grad_norm": 5.276297092437744, "learning_rate": 1.8539344618867036e-05, "loss": 1.2013, "step": 6069 }, { "epoch": 19.901639344262296, "grad_norm": 6.525942325592041, "learning_rate": 1.853879198536995e-05, "loss": 1.2085, "step": 6070 }, { "epoch": 19.904918032786885, "grad_norm": 6.547286510467529, "learning_rate": 1.8538239255589107e-05, "loss": 1.2161, "step": 6071 }, { "epoch": 19.908196721311477, "grad_norm": 6.9885454177856445, "learning_rate": 1.853768642953074e-05, "loss": 1.0654, "step": 6072 }, { "epoch": 19.911475409836065, "grad_norm": 6.450183868408203, "learning_rate": 1.8537133507201075e-05, "loss": 0.9458, "step": 6073 }, { "epoch": 19.914754098360657, "grad_norm": 4.937309741973877, "learning_rate": 1.8536580488606358e-05, "loss": 1.1923, "step": 6074 }, { "epoch": 19.918032786885245, "grad_norm": 7.480810165405273, "learning_rate": 1.8536027373752818e-05, "loss": 1.0969, "step": 6075 }, { "epoch": 19.921311475409837, "grad_norm": 5.834571361541748, "learning_rate": 1.85354741626467e-05, "loss": 1.1196, "step": 6076 }, { "epoch": 19.924590163934425, "grad_norm": 5.428569793701172, "learning_rate": 1.8534920855294228e-05, "loss": 1.0848, "step": 6077 }, { "epoch": 19.927868852459017, "grad_norm": 6.604460716247559, "learning_rate": 1.8534367451701654e-05, "loss": 1.1855, "step": 6078 }, { "epoch": 19.931147540983606, "grad_norm": 6.834423065185547, "learning_rate": 1.8533813951875214e-05, "loss": 1.0684, "step": 6079 }, { "epoch": 19.934426229508198, "grad_norm": 6.884205341339111, "learning_rate": 1.8533260355821145e-05, "loss": 1.015, "step": 6080 }, { "epoch": 19.937704918032786, "grad_norm": 5.905259609222412, "learning_rate": 1.8532706663545695e-05, "loss": 0.8855, "step": 6081 }, { "epoch": 19.940983606557378, "grad_norm": 6.427189350128174, "learning_rate": 1.853215287505511e-05, "loss": 1.2924, "step": 6082 }, { "epoch": 19.944262295081966, "grad_norm": 6.410397052764893, "learning_rate": 1.8531598990355623e-05, "loss": 1.0168, "step": 6083 }, { "epoch": 19.947540983606558, "grad_norm": 6.826106548309326, "learning_rate": 1.853104500945349e-05, "loss": 1.0481, "step": 6084 }, { "epoch": 19.950819672131146, "grad_norm": 5.636416912078857, "learning_rate": 1.8530490932354953e-05, "loss": 1.1179, "step": 6085 }, { "epoch": 19.95409836065574, "grad_norm": 5.560190200805664, "learning_rate": 1.8529936759066264e-05, "loss": 0.983, "step": 6086 }, { "epoch": 19.957377049180327, "grad_norm": 5.219203948974609, "learning_rate": 1.8529382489593666e-05, "loss": 1.1659, "step": 6087 }, { "epoch": 19.96065573770492, "grad_norm": 5.695348739624023, "learning_rate": 1.8528828123943415e-05, "loss": 1.1436, "step": 6088 }, { "epoch": 19.963934426229507, "grad_norm": 5.76986837387085, "learning_rate": 1.8528273662121758e-05, "loss": 0.9849, "step": 6089 }, { "epoch": 19.9672131147541, "grad_norm": 5.457442760467529, "learning_rate": 1.8527719104134946e-05, "loss": 1.02, "step": 6090 }, { "epoch": 19.970491803278687, "grad_norm": 5.741635322570801, "learning_rate": 1.8527164449989237e-05, "loss": 1.0974, "step": 6091 }, { "epoch": 19.97377049180328, "grad_norm": 7.376955509185791, "learning_rate": 1.8526609699690886e-05, "loss": 1.0047, "step": 6092 }, { "epoch": 19.977049180327867, "grad_norm": 5.312355995178223, "learning_rate": 1.852605485324614e-05, "loss": 1.126, "step": 6093 }, { "epoch": 19.98032786885246, "grad_norm": 5.410157203674316, "learning_rate": 1.852549991066126e-05, "loss": 0.84, "step": 6094 }, { "epoch": 19.983606557377048, "grad_norm": 5.799681663513184, "learning_rate": 1.852494487194251e-05, "loss": 1.0571, "step": 6095 }, { "epoch": 19.98688524590164, "grad_norm": 5.516228675842285, "learning_rate": 1.852438973709614e-05, "loss": 1.1135, "step": 6096 }, { "epoch": 19.990163934426228, "grad_norm": 5.529789924621582, "learning_rate": 1.852383450612841e-05, "loss": 1.3717, "step": 6097 }, { "epoch": 19.99344262295082, "grad_norm": 6.801288604736328, "learning_rate": 1.8523279179045586e-05, "loss": 1.1041, "step": 6098 }, { "epoch": 19.99672131147541, "grad_norm": 5.99270486831665, "learning_rate": 1.8522723755853924e-05, "loss": 1.0465, "step": 6099 }, { "epoch": 20.0, "grad_norm": 5.310741901397705, "learning_rate": 1.8522168236559693e-05, "loss": 0.8059, "step": 6100 }, { "epoch": 20.003278688524592, "grad_norm": 6.839766502380371, "learning_rate": 1.8521612621169157e-05, "loss": 0.9276, "step": 6101 }, { "epoch": 20.00655737704918, "grad_norm": 5.863697052001953, "learning_rate": 1.852105690968857e-05, "loss": 0.9375, "step": 6102 }, { "epoch": 20.009836065573772, "grad_norm": 6.553555965423584, "learning_rate": 1.8520501102124217e-05, "loss": 0.951, "step": 6103 }, { "epoch": 20.01311475409836, "grad_norm": 5.568972110748291, "learning_rate": 1.851994519848235e-05, "loss": 0.8427, "step": 6104 }, { "epoch": 20.016393442622952, "grad_norm": 6.4241437911987305, "learning_rate": 1.851938919876924e-05, "loss": 0.9698, "step": 6105 }, { "epoch": 20.01967213114754, "grad_norm": 4.873533725738525, "learning_rate": 1.8518833102991163e-05, "loss": 0.9207, "step": 6106 }, { "epoch": 20.022950819672133, "grad_norm": 5.671757698059082, "learning_rate": 1.8518276911154384e-05, "loss": 1.0149, "step": 6107 }, { "epoch": 20.02622950819672, "grad_norm": 5.062848091125488, "learning_rate": 1.8517720623265174e-05, "loss": 1.1061, "step": 6108 }, { "epoch": 20.029508196721313, "grad_norm": 5.8945417404174805, "learning_rate": 1.851716423932981e-05, "loss": 1.059, "step": 6109 }, { "epoch": 20.0327868852459, "grad_norm": 5.8442912101745605, "learning_rate": 1.8516607759354562e-05, "loss": 1.0494, "step": 6110 }, { "epoch": 20.036065573770493, "grad_norm": 6.808475971221924, "learning_rate": 1.851605118334571e-05, "loss": 0.9847, "step": 6111 }, { "epoch": 20.03934426229508, "grad_norm": 5.335504531860352, "learning_rate": 1.8515494511309524e-05, "loss": 0.8661, "step": 6112 }, { "epoch": 20.042622950819673, "grad_norm": 5.400815486907959, "learning_rate": 1.8514937743252284e-05, "loss": 0.9525, "step": 6113 }, { "epoch": 20.04590163934426, "grad_norm": 5.2373552322387695, "learning_rate": 1.8514380879180265e-05, "loss": 0.9583, "step": 6114 }, { "epoch": 20.049180327868854, "grad_norm": 10.236716270446777, "learning_rate": 1.8513823919099752e-05, "loss": 0.8092, "step": 6115 }, { "epoch": 20.052459016393442, "grad_norm": 5.910849571228027, "learning_rate": 1.851326686301702e-05, "loss": 0.9894, "step": 6116 }, { "epoch": 20.055737704918034, "grad_norm": 5.6803178787231445, "learning_rate": 1.8512709710938355e-05, "loss": 1.0101, "step": 6117 }, { "epoch": 20.059016393442622, "grad_norm": 5.230236053466797, "learning_rate": 1.8512152462870035e-05, "loss": 1.0648, "step": 6118 }, { "epoch": 20.062295081967214, "grad_norm": 6.65630578994751, "learning_rate": 1.851159511881835e-05, "loss": 0.9508, "step": 6119 }, { "epoch": 20.065573770491802, "grad_norm": 5.6965012550354, "learning_rate": 1.8511037678789575e-05, "loss": 0.9545, "step": 6120 }, { "epoch": 20.068852459016394, "grad_norm": 4.852899551391602, "learning_rate": 1.8510480142790002e-05, "loss": 1.1315, "step": 6121 }, { "epoch": 20.072131147540983, "grad_norm": 5.608546733856201, "learning_rate": 1.8509922510825917e-05, "loss": 1.0537, "step": 6122 }, { "epoch": 20.075409836065575, "grad_norm": 6.907296180725098, "learning_rate": 1.8509364782903606e-05, "loss": 0.9806, "step": 6123 }, { "epoch": 20.078688524590163, "grad_norm": 5.854202747344971, "learning_rate": 1.8508806959029362e-05, "loss": 1.1771, "step": 6124 }, { "epoch": 20.081967213114755, "grad_norm": 5.2967424392700195, "learning_rate": 1.8508249039209474e-05, "loss": 1.0405, "step": 6125 }, { "epoch": 20.085245901639343, "grad_norm": 8.594086647033691, "learning_rate": 1.850769102345023e-05, "loss": 0.9585, "step": 6126 }, { "epoch": 20.088524590163935, "grad_norm": 9.250836372375488, "learning_rate": 1.8507132911757925e-05, "loss": 1.2296, "step": 6127 }, { "epoch": 20.091803278688523, "grad_norm": 7.937324047088623, "learning_rate": 1.8506574704138847e-05, "loss": 0.9604, "step": 6128 }, { "epoch": 20.095081967213115, "grad_norm": 5.494113445281982, "learning_rate": 1.85060164005993e-05, "loss": 1.0886, "step": 6129 }, { "epoch": 20.098360655737704, "grad_norm": 5.706589698791504, "learning_rate": 1.8505458001145567e-05, "loss": 0.8734, "step": 6130 }, { "epoch": 20.101639344262296, "grad_norm": 6.146677017211914, "learning_rate": 1.8504899505783956e-05, "loss": 0.7756, "step": 6131 }, { "epoch": 20.104918032786884, "grad_norm": 5.045994758605957, "learning_rate": 1.8504340914520763e-05, "loss": 1.0859, "step": 6132 }, { "epoch": 20.108196721311476, "grad_norm": 7.006516933441162, "learning_rate": 1.850378222736228e-05, "loss": 1.0786, "step": 6133 }, { "epoch": 20.111475409836064, "grad_norm": 7.374827861785889, "learning_rate": 1.850322344431481e-05, "loss": 0.8392, "step": 6134 }, { "epoch": 20.114754098360656, "grad_norm": 5.382654666900635, "learning_rate": 1.850266456538466e-05, "loss": 1.0769, "step": 6135 }, { "epoch": 20.118032786885244, "grad_norm": 5.333618640899658, "learning_rate": 1.8502105590578117e-05, "loss": 1.0912, "step": 6136 }, { "epoch": 20.121311475409836, "grad_norm": 4.9084248542785645, "learning_rate": 1.8501546519901503e-05, "loss": 0.9059, "step": 6137 }, { "epoch": 20.124590163934425, "grad_norm": 4.6073713302612305, "learning_rate": 1.8500987353361108e-05, "loss": 0.9611, "step": 6138 }, { "epoch": 20.127868852459017, "grad_norm": 7.153728485107422, "learning_rate": 1.8500428090963244e-05, "loss": 0.9708, "step": 6139 }, { "epoch": 20.131147540983605, "grad_norm": 5.955711841583252, "learning_rate": 1.849986873271421e-05, "loss": 0.9923, "step": 6140 }, { "epoch": 20.134426229508197, "grad_norm": 5.536265850067139, "learning_rate": 1.8499309278620323e-05, "loss": 1.0361, "step": 6141 }, { "epoch": 20.137704918032785, "grad_norm": 5.598681449890137, "learning_rate": 1.8498749728687886e-05, "loss": 0.9924, "step": 6142 }, { "epoch": 20.140983606557377, "grad_norm": 5.16771125793457, "learning_rate": 1.849819008292321e-05, "loss": 0.9821, "step": 6143 }, { "epoch": 20.14426229508197, "grad_norm": 7.35514497756958, "learning_rate": 1.8497630341332603e-05, "loss": 1.2378, "step": 6144 }, { "epoch": 20.147540983606557, "grad_norm": 5.106778621673584, "learning_rate": 1.849707050392238e-05, "loss": 0.9315, "step": 6145 }, { "epoch": 20.15081967213115, "grad_norm": 5.174920082092285, "learning_rate": 1.8496510570698852e-05, "loss": 1.0105, "step": 6146 }, { "epoch": 20.154098360655738, "grad_norm": 11.998414039611816, "learning_rate": 1.8495950541668334e-05, "loss": 0.9333, "step": 6147 }, { "epoch": 20.15737704918033, "grad_norm": 4.984881401062012, "learning_rate": 1.849539041683714e-05, "loss": 0.9398, "step": 6148 }, { "epoch": 20.160655737704918, "grad_norm": 5.135432720184326, "learning_rate": 1.8494830196211584e-05, "loss": 1.0491, "step": 6149 }, { "epoch": 20.16393442622951, "grad_norm": 9.468082427978516, "learning_rate": 1.8494269879797986e-05, "loss": 0.8867, "step": 6150 }, { "epoch": 20.167213114754098, "grad_norm": 5.4587836265563965, "learning_rate": 1.849370946760266e-05, "loss": 0.8395, "step": 6151 }, { "epoch": 20.17049180327869, "grad_norm": 7.005479335784912, "learning_rate": 1.8493148959631936e-05, "loss": 1.2395, "step": 6152 }, { "epoch": 20.17377049180328, "grad_norm": 7.4747233390808105, "learning_rate": 1.8492588355892125e-05, "loss": 0.9705, "step": 6153 }, { "epoch": 20.17704918032787, "grad_norm": 6.930615425109863, "learning_rate": 1.8492027656389547e-05, "loss": 0.7631, "step": 6154 }, { "epoch": 20.18032786885246, "grad_norm": 6.518565654754639, "learning_rate": 1.8491466861130528e-05, "loss": 0.9554, "step": 6155 }, { "epoch": 20.18360655737705, "grad_norm": 7.32848596572876, "learning_rate": 1.8490905970121393e-05, "loss": 0.9906, "step": 6156 }, { "epoch": 20.18688524590164, "grad_norm": 5.510814666748047, "learning_rate": 1.8490344983368462e-05, "loss": 0.8307, "step": 6157 }, { "epoch": 20.19016393442623, "grad_norm": 5.506610870361328, "learning_rate": 1.848978390087807e-05, "loss": 0.973, "step": 6158 }, { "epoch": 20.19344262295082, "grad_norm": 7.8591790199279785, "learning_rate": 1.848922272265653e-05, "loss": 1.1908, "step": 6159 }, { "epoch": 20.19672131147541, "grad_norm": 5.159714221954346, "learning_rate": 1.8488661448710183e-05, "loss": 1.2126, "step": 6160 }, { "epoch": 20.2, "grad_norm": 7.047665119171143, "learning_rate": 1.8488100079045345e-05, "loss": 1.0994, "step": 6161 }, { "epoch": 20.20327868852459, "grad_norm": 5.891313552856445, "learning_rate": 1.848753861366836e-05, "loss": 0.7568, "step": 6162 }, { "epoch": 20.20655737704918, "grad_norm": 5.497257709503174, "learning_rate": 1.848697705258555e-05, "loss": 1.088, "step": 6163 }, { "epoch": 20.20983606557377, "grad_norm": 5.843005180358887, "learning_rate": 1.8486415395803247e-05, "loss": 0.8805, "step": 6164 }, { "epoch": 20.21311475409836, "grad_norm": 8.886371612548828, "learning_rate": 1.848585364332779e-05, "loss": 0.9453, "step": 6165 }, { "epoch": 20.21639344262295, "grad_norm": 6.065206527709961, "learning_rate": 1.8485291795165508e-05, "loss": 0.9891, "step": 6166 }, { "epoch": 20.21967213114754, "grad_norm": 5.577242374420166, "learning_rate": 1.848472985132274e-05, "loss": 0.8423, "step": 6167 }, { "epoch": 20.222950819672132, "grad_norm": 6.476274013519287, "learning_rate": 1.848416781180582e-05, "loss": 0.9146, "step": 6168 }, { "epoch": 20.22622950819672, "grad_norm": 5.2125935554504395, "learning_rate": 1.848360567662109e-05, "loss": 1.0543, "step": 6169 }, { "epoch": 20.229508196721312, "grad_norm": 5.627808570861816, "learning_rate": 1.8483043445774883e-05, "loss": 1.0761, "step": 6170 }, { "epoch": 20.2327868852459, "grad_norm": 5.017903804779053, "learning_rate": 1.8482481119273538e-05, "loss": 1.011, "step": 6171 }, { "epoch": 20.236065573770492, "grad_norm": 4.976287364959717, "learning_rate": 1.8481918697123402e-05, "loss": 1.1176, "step": 6172 }, { "epoch": 20.23934426229508, "grad_norm": 5.125059127807617, "learning_rate": 1.8481356179330812e-05, "loss": 0.8483, "step": 6173 }, { "epoch": 20.242622950819673, "grad_norm": 6.922541618347168, "learning_rate": 1.8480793565902114e-05, "loss": 0.8203, "step": 6174 }, { "epoch": 20.24590163934426, "grad_norm": 13.420636177062988, "learning_rate": 1.848023085684365e-05, "loss": 1.1577, "step": 6175 }, { "epoch": 20.249180327868853, "grad_norm": 8.26529598236084, "learning_rate": 1.8479668052161764e-05, "loss": 0.9338, "step": 6176 }, { "epoch": 20.25245901639344, "grad_norm": 5.983084678649902, "learning_rate": 1.847910515186281e-05, "loss": 0.9302, "step": 6177 }, { "epoch": 20.255737704918033, "grad_norm": 7.634568691253662, "learning_rate": 1.8478542155953125e-05, "loss": 1.1177, "step": 6178 }, { "epoch": 20.25901639344262, "grad_norm": 5.424469470977783, "learning_rate": 1.8477979064439062e-05, "loss": 1.064, "step": 6179 }, { "epoch": 20.262295081967213, "grad_norm": 5.074636936187744, "learning_rate": 1.847741587732697e-05, "loss": 1.1868, "step": 6180 }, { "epoch": 20.2655737704918, "grad_norm": 6.726556777954102, "learning_rate": 1.8476852594623202e-05, "loss": 0.8557, "step": 6181 }, { "epoch": 20.268852459016394, "grad_norm": 5.651718616485596, "learning_rate": 1.8476289216334103e-05, "loss": 1.009, "step": 6182 }, { "epoch": 20.272131147540982, "grad_norm": 5.486967086791992, "learning_rate": 1.8475725742466034e-05, "loss": 1.0699, "step": 6183 }, { "epoch": 20.275409836065574, "grad_norm": 5.186209678649902, "learning_rate": 1.8475162173025346e-05, "loss": 1.1841, "step": 6184 }, { "epoch": 20.278688524590162, "grad_norm": 6.3730597496032715, "learning_rate": 1.8474598508018387e-05, "loss": 0.9156, "step": 6185 }, { "epoch": 20.281967213114754, "grad_norm": 6.693710803985596, "learning_rate": 1.847403474745152e-05, "loss": 0.9928, "step": 6186 }, { "epoch": 20.285245901639342, "grad_norm": 5.455808162689209, "learning_rate": 1.8473470891331103e-05, "loss": 0.9657, "step": 6187 }, { "epoch": 20.288524590163934, "grad_norm": 5.238801956176758, "learning_rate": 1.847290693966349e-05, "loss": 1.0046, "step": 6188 }, { "epoch": 20.291803278688526, "grad_norm": 6.1655049324035645, "learning_rate": 1.8472342892455043e-05, "loss": 1.1559, "step": 6189 }, { "epoch": 20.295081967213115, "grad_norm": 6.053051471710205, "learning_rate": 1.847177874971212e-05, "loss": 1.1238, "step": 6190 }, { "epoch": 20.298360655737707, "grad_norm": 5.610633850097656, "learning_rate": 1.8471214511441084e-05, "loss": 0.9482, "step": 6191 }, { "epoch": 20.301639344262295, "grad_norm": 4.932495594024658, "learning_rate": 1.8470650177648294e-05, "loss": 1.1456, "step": 6192 }, { "epoch": 20.304918032786887, "grad_norm": 5.881021022796631, "learning_rate": 1.8470085748340118e-05, "loss": 1.1578, "step": 6193 }, { "epoch": 20.308196721311475, "grad_norm": 6.6255693435668945, "learning_rate": 1.8469521223522915e-05, "loss": 0.913, "step": 6194 }, { "epoch": 20.311475409836067, "grad_norm": 4.958318710327148, "learning_rate": 1.846895660320306e-05, "loss": 1.0056, "step": 6195 }, { "epoch": 20.314754098360655, "grad_norm": 5.393502235412598, "learning_rate": 1.846839188738691e-05, "loss": 1.1864, "step": 6196 }, { "epoch": 20.318032786885247, "grad_norm": 6.833074569702148, "learning_rate": 1.8467827076080835e-05, "loss": 1.1407, "step": 6197 }, { "epoch": 20.321311475409836, "grad_norm": 5.07382345199585, "learning_rate": 1.8467262169291208e-05, "loss": 0.9033, "step": 6198 }, { "epoch": 20.324590163934428, "grad_norm": 6.33900260925293, "learning_rate": 1.8466697167024396e-05, "loss": 0.8718, "step": 6199 }, { "epoch": 20.327868852459016, "grad_norm": 6.1186323165893555, "learning_rate": 1.846613206928677e-05, "loss": 0.8694, "step": 6200 }, { "epoch": 20.331147540983608, "grad_norm": 5.945049285888672, "learning_rate": 1.8465566876084705e-05, "loss": 0.918, "step": 6201 }, { "epoch": 20.334426229508196, "grad_norm": 5.994840145111084, "learning_rate": 1.8465001587424565e-05, "loss": 0.9668, "step": 6202 }, { "epoch": 20.337704918032788, "grad_norm": 7.450390338897705, "learning_rate": 1.846443620331274e-05, "loss": 0.9899, "step": 6203 }, { "epoch": 20.340983606557376, "grad_norm": 5.61204719543457, "learning_rate": 1.8463870723755588e-05, "loss": 0.9222, "step": 6204 }, { "epoch": 20.34426229508197, "grad_norm": 6.034381866455078, "learning_rate": 1.8463305148759498e-05, "loss": 1.0078, "step": 6205 }, { "epoch": 20.347540983606557, "grad_norm": 5.699859142303467, "learning_rate": 1.8462739478330837e-05, "loss": 0.9193, "step": 6206 }, { "epoch": 20.35081967213115, "grad_norm": 4.264897346496582, "learning_rate": 1.8462173712475997e-05, "loss": 1.1775, "step": 6207 }, { "epoch": 20.354098360655737, "grad_norm": 6.2659711837768555, "learning_rate": 1.8461607851201348e-05, "loss": 1.0441, "step": 6208 }, { "epoch": 20.35737704918033, "grad_norm": 6.3633904457092285, "learning_rate": 1.8461041894513268e-05, "loss": 0.853, "step": 6209 }, { "epoch": 20.360655737704917, "grad_norm": 5.864890098571777, "learning_rate": 1.8460475842418148e-05, "loss": 0.9376, "step": 6210 }, { "epoch": 20.36393442622951, "grad_norm": 7.338570594787598, "learning_rate": 1.845990969492236e-05, "loss": 1.0544, "step": 6211 }, { "epoch": 20.367213114754097, "grad_norm": 5.265163421630859, "learning_rate": 1.84593434520323e-05, "loss": 0.8905, "step": 6212 }, { "epoch": 20.37049180327869, "grad_norm": 5.737496852874756, "learning_rate": 1.8458777113754343e-05, "loss": 0.8579, "step": 6213 }, { "epoch": 20.373770491803278, "grad_norm": 7.049496173858643, "learning_rate": 1.8458210680094882e-05, "loss": 0.8911, "step": 6214 }, { "epoch": 20.37704918032787, "grad_norm": 5.4087371826171875, "learning_rate": 1.8457644151060304e-05, "loss": 0.9719, "step": 6215 }, { "epoch": 20.380327868852458, "grad_norm": 5.223104000091553, "learning_rate": 1.8457077526656992e-05, "loss": 1.0202, "step": 6216 }, { "epoch": 20.38360655737705, "grad_norm": 5.51652193069458, "learning_rate": 1.8456510806891333e-05, "loss": 1.0754, "step": 6217 }, { "epoch": 20.386885245901638, "grad_norm": 7.175507068634033, "learning_rate": 1.845594399176973e-05, "loss": 0.9249, "step": 6218 }, { "epoch": 20.39016393442623, "grad_norm": 5.687621593475342, "learning_rate": 1.845537708129856e-05, "loss": 1.0509, "step": 6219 }, { "epoch": 20.39344262295082, "grad_norm": 5.745337009429932, "learning_rate": 1.8454810075484228e-05, "loss": 0.9977, "step": 6220 }, { "epoch": 20.39672131147541, "grad_norm": 6.837857723236084, "learning_rate": 1.8454242974333117e-05, "loss": 0.925, "step": 6221 }, { "epoch": 20.4, "grad_norm": 5.038492202758789, "learning_rate": 1.8453675777851627e-05, "loss": 1.1104, "step": 6222 }, { "epoch": 20.40327868852459, "grad_norm": 6.103412628173828, "learning_rate": 1.8453108486046153e-05, "loss": 0.8945, "step": 6223 }, { "epoch": 20.40655737704918, "grad_norm": 6.038001537322998, "learning_rate": 1.8452541098923093e-05, "loss": 0.8985, "step": 6224 }, { "epoch": 20.40983606557377, "grad_norm": 7.77236795425415, "learning_rate": 1.8451973616488846e-05, "loss": 1.194, "step": 6225 }, { "epoch": 20.41311475409836, "grad_norm": 5.495419979095459, "learning_rate": 1.8451406038749803e-05, "loss": 0.7369, "step": 6226 }, { "epoch": 20.41639344262295, "grad_norm": 6.7158966064453125, "learning_rate": 1.845083836571237e-05, "loss": 1.2595, "step": 6227 }, { "epoch": 20.41967213114754, "grad_norm": 7.866293907165527, "learning_rate": 1.8450270597382952e-05, "loss": 0.8868, "step": 6228 }, { "epoch": 20.42295081967213, "grad_norm": 6.1031365394592285, "learning_rate": 1.8449702733767948e-05, "loss": 1.1152, "step": 6229 }, { "epoch": 20.42622950819672, "grad_norm": 5.372050762176514, "learning_rate": 1.8449134774873755e-05, "loss": 1.0248, "step": 6230 }, { "epoch": 20.42950819672131, "grad_norm": 5.153038024902344, "learning_rate": 1.8448566720706784e-05, "loss": 1.0143, "step": 6231 }, { "epoch": 20.432786885245903, "grad_norm": 5.339638710021973, "learning_rate": 1.844799857127344e-05, "loss": 0.9487, "step": 6232 }, { "epoch": 20.43606557377049, "grad_norm": 5.66416597366333, "learning_rate": 1.8447430326580127e-05, "loss": 0.8655, "step": 6233 }, { "epoch": 20.439344262295084, "grad_norm": 5.6557183265686035, "learning_rate": 1.8446861986633255e-05, "loss": 0.8065, "step": 6234 }, { "epoch": 20.442622950819672, "grad_norm": 5.2844719886779785, "learning_rate": 1.8446293551439232e-05, "loss": 1.1338, "step": 6235 }, { "epoch": 20.445901639344264, "grad_norm": 5.340636253356934, "learning_rate": 1.8445725021004466e-05, "loss": 1.0603, "step": 6236 }, { "epoch": 20.449180327868852, "grad_norm": 5.472815990447998, "learning_rate": 1.844515639533537e-05, "loss": 1.2004, "step": 6237 }, { "epoch": 20.452459016393444, "grad_norm": 6.770509243011475, "learning_rate": 1.8444587674438357e-05, "loss": 0.8976, "step": 6238 }, { "epoch": 20.455737704918032, "grad_norm": 5.429866313934326, "learning_rate": 1.8444018858319835e-05, "loss": 1.0701, "step": 6239 }, { "epoch": 20.459016393442624, "grad_norm": 6.3778767585754395, "learning_rate": 1.844344994698622e-05, "loss": 0.8856, "step": 6240 }, { "epoch": 20.462295081967213, "grad_norm": 5.346410751342773, "learning_rate": 1.844288094044393e-05, "loss": 1.0327, "step": 6241 }, { "epoch": 20.465573770491805, "grad_norm": 5.468595504760742, "learning_rate": 1.8442311838699378e-05, "loss": 0.796, "step": 6242 }, { "epoch": 20.468852459016393, "grad_norm": 6.892105579376221, "learning_rate": 1.8441742641758983e-05, "loss": 1.116, "step": 6243 }, { "epoch": 20.472131147540985, "grad_norm": 6.902703285217285, "learning_rate": 1.844117334962916e-05, "loss": 0.9331, "step": 6244 }, { "epoch": 20.475409836065573, "grad_norm": 6.129274845123291, "learning_rate": 1.8440603962316333e-05, "loss": 1.0625, "step": 6245 }, { "epoch": 20.478688524590165, "grad_norm": 3.8362679481506348, "learning_rate": 1.8440034479826922e-05, "loss": 1.0002, "step": 6246 }, { "epoch": 20.481967213114753, "grad_norm": 5.953201770782471, "learning_rate": 1.8439464902167342e-05, "loss": 1.02, "step": 6247 }, { "epoch": 20.485245901639345, "grad_norm": 6.0412492752075195, "learning_rate": 1.8438895229344023e-05, "loss": 1.0678, "step": 6248 }, { "epoch": 20.488524590163934, "grad_norm": 6.295278072357178, "learning_rate": 1.8438325461363387e-05, "loss": 1.0536, "step": 6249 }, { "epoch": 20.491803278688526, "grad_norm": 5.856579303741455, "learning_rate": 1.8437755598231857e-05, "loss": 0.9578, "step": 6250 }, { "epoch": 20.495081967213114, "grad_norm": 5.247403144836426, "learning_rate": 1.8437185639955858e-05, "loss": 1.0317, "step": 6251 }, { "epoch": 20.498360655737706, "grad_norm": 6.506542682647705, "learning_rate": 1.8436615586541822e-05, "loss": 0.8862, "step": 6252 }, { "epoch": 20.501639344262294, "grad_norm": 5.880921363830566, "learning_rate": 1.843604543799617e-05, "loss": 1.1797, "step": 6253 }, { "epoch": 20.504918032786886, "grad_norm": 9.811944961547852, "learning_rate": 1.8435475194325338e-05, "loss": 0.979, "step": 6254 }, { "epoch": 20.508196721311474, "grad_norm": 5.096286296844482, "learning_rate": 1.8434904855535748e-05, "loss": 1.1879, "step": 6255 }, { "epoch": 20.511475409836066, "grad_norm": 5.338028907775879, "learning_rate": 1.8434334421633838e-05, "loss": 1.1978, "step": 6256 }, { "epoch": 20.514754098360655, "grad_norm": 5.131256580352783, "learning_rate": 1.843376389262604e-05, "loss": 1.1265, "step": 6257 }, { "epoch": 20.518032786885247, "grad_norm": 9.116606712341309, "learning_rate": 1.8433193268518784e-05, "loss": 1.001, "step": 6258 }, { "epoch": 20.521311475409835, "grad_norm": 5.878342151641846, "learning_rate": 1.8432622549318505e-05, "loss": 1.1277, "step": 6259 }, { "epoch": 20.524590163934427, "grad_norm": 5.410289287567139, "learning_rate": 1.8432051735031636e-05, "loss": 0.9163, "step": 6260 }, { "epoch": 20.527868852459015, "grad_norm": 6.044735431671143, "learning_rate": 1.843148082566462e-05, "loss": 0.7198, "step": 6261 }, { "epoch": 20.531147540983607, "grad_norm": 6.521061420440674, "learning_rate": 1.843090982122389e-05, "loss": 1.0812, "step": 6262 }, { "epoch": 20.534426229508195, "grad_norm": 4.665205955505371, "learning_rate": 1.8430338721715886e-05, "loss": 1.0543, "step": 6263 }, { "epoch": 20.537704918032787, "grad_norm": 5.546588897705078, "learning_rate": 1.842976752714705e-05, "loss": 0.87, "step": 6264 }, { "epoch": 20.540983606557376, "grad_norm": 5.060636043548584, "learning_rate": 1.8429196237523816e-05, "loss": 0.97, "step": 6265 }, { "epoch": 20.544262295081968, "grad_norm": 5.1597723960876465, "learning_rate": 1.8428624852852634e-05, "loss": 0.9219, "step": 6266 }, { "epoch": 20.547540983606556, "grad_norm": 5.835297584533691, "learning_rate": 1.842805337313994e-05, "loss": 0.8291, "step": 6267 }, { "epoch": 20.550819672131148, "grad_norm": 6.676853656768799, "learning_rate": 1.8427481798392183e-05, "loss": 1.2577, "step": 6268 }, { "epoch": 20.554098360655736, "grad_norm": 5.468021392822266, "learning_rate": 1.8426910128615806e-05, "loss": 0.976, "step": 6269 }, { "epoch": 20.557377049180328, "grad_norm": 5.857914924621582, "learning_rate": 1.8426338363817257e-05, "loss": 1.0435, "step": 6270 }, { "epoch": 20.560655737704916, "grad_norm": 5.63979959487915, "learning_rate": 1.842576650400298e-05, "loss": 0.7816, "step": 6271 }, { "epoch": 20.56393442622951, "grad_norm": 5.865429401397705, "learning_rate": 1.8425194549179423e-05, "loss": 1.1376, "step": 6272 }, { "epoch": 20.567213114754097, "grad_norm": 5.481944561004639, "learning_rate": 1.842462249935304e-05, "loss": 1.0844, "step": 6273 }, { "epoch": 20.57049180327869, "grad_norm": 6.094230651855469, "learning_rate": 1.842405035453028e-05, "loss": 0.8634, "step": 6274 }, { "epoch": 20.57377049180328, "grad_norm": 5.7934417724609375, "learning_rate": 1.8423478114717593e-05, "loss": 0.9525, "step": 6275 }, { "epoch": 20.57704918032787, "grad_norm": 7.5551347732543945, "learning_rate": 1.842290577992143e-05, "loss": 0.8029, "step": 6276 }, { "epoch": 20.58032786885246, "grad_norm": 5.881172180175781, "learning_rate": 1.842233335014825e-05, "loss": 0.8928, "step": 6277 }, { "epoch": 20.58360655737705, "grad_norm": 4.767359733581543, "learning_rate": 1.84217608254045e-05, "loss": 1.2211, "step": 6278 }, { "epoch": 20.58688524590164, "grad_norm": 5.778526782989502, "learning_rate": 1.8421188205696645e-05, "loss": 0.7988, "step": 6279 }, { "epoch": 20.59016393442623, "grad_norm": 6.176638126373291, "learning_rate": 1.8420615491031134e-05, "loss": 1.1063, "step": 6280 }, { "epoch": 20.59344262295082, "grad_norm": 4.683987617492676, "learning_rate": 1.8420042681414426e-05, "loss": 1.0953, "step": 6281 }, { "epoch": 20.59672131147541, "grad_norm": 5.984285354614258, "learning_rate": 1.841946977685299e-05, "loss": 1.1833, "step": 6282 }, { "epoch": 20.6, "grad_norm": 5.9800028800964355, "learning_rate": 1.8418896777353272e-05, "loss": 0.8933, "step": 6283 }, { "epoch": 20.60327868852459, "grad_norm": 5.806768894195557, "learning_rate": 1.841832368292174e-05, "loss": 1.0492, "step": 6284 }, { "epoch": 20.60655737704918, "grad_norm": 5.918825149536133, "learning_rate": 1.8417750493564857e-05, "loss": 1.187, "step": 6285 }, { "epoch": 20.60983606557377, "grad_norm": 6.35638952255249, "learning_rate": 1.8417177209289082e-05, "loss": 0.9547, "step": 6286 }, { "epoch": 20.613114754098362, "grad_norm": 5.5745086669921875, "learning_rate": 1.8416603830100885e-05, "loss": 0.9524, "step": 6287 }, { "epoch": 20.61639344262295, "grad_norm": 4.550591468811035, "learning_rate": 1.8416030356006728e-05, "loss": 1.0045, "step": 6288 }, { "epoch": 20.619672131147542, "grad_norm": 6.185609340667725, "learning_rate": 1.841545678701308e-05, "loss": 0.9677, "step": 6289 }, { "epoch": 20.62295081967213, "grad_norm": 6.182122230529785, "learning_rate": 1.8414883123126402e-05, "loss": 0.953, "step": 6290 }, { "epoch": 20.626229508196722, "grad_norm": 5.72707462310791, "learning_rate": 1.8414309364353173e-05, "loss": 1.2424, "step": 6291 }, { "epoch": 20.62950819672131, "grad_norm": 7.115115642547607, "learning_rate": 1.8413735510699856e-05, "loss": 0.9338, "step": 6292 }, { "epoch": 20.632786885245903, "grad_norm": 6.030523777008057, "learning_rate": 1.841316156217292e-05, "loss": 1.1071, "step": 6293 }, { "epoch": 20.63606557377049, "grad_norm": 6.2521138191223145, "learning_rate": 1.8412587518778845e-05, "loss": 0.9258, "step": 6294 }, { "epoch": 20.639344262295083, "grad_norm": 5.640839099884033, "learning_rate": 1.8412013380524095e-05, "loss": 0.8848, "step": 6295 }, { "epoch": 20.64262295081967, "grad_norm": 5.838527679443359, "learning_rate": 1.841143914741515e-05, "loss": 1.1389, "step": 6296 }, { "epoch": 20.645901639344263, "grad_norm": 5.528104305267334, "learning_rate": 1.841086481945848e-05, "loss": 1.0326, "step": 6297 }, { "epoch": 20.64918032786885, "grad_norm": 5.034243106842041, "learning_rate": 1.8410290396660567e-05, "loss": 1.2246, "step": 6298 }, { "epoch": 20.652459016393443, "grad_norm": 5.459237098693848, "learning_rate": 1.840971587902788e-05, "loss": 1.1899, "step": 6299 }, { "epoch": 20.65573770491803, "grad_norm": 6.085824966430664, "learning_rate": 1.8409141266566908e-05, "loss": 0.9883, "step": 6300 }, { "epoch": 20.659016393442624, "grad_norm": 5.915085315704346, "learning_rate": 1.840856655928412e-05, "loss": 0.842, "step": 6301 }, { "epoch": 20.662295081967212, "grad_norm": 4.718939781188965, "learning_rate": 1.8407991757186007e-05, "loss": 1.0693, "step": 6302 }, { "epoch": 20.665573770491804, "grad_norm": 5.925350666046143, "learning_rate": 1.8407416860279045e-05, "loss": 0.7618, "step": 6303 }, { "epoch": 20.668852459016392, "grad_norm": 4.870418071746826, "learning_rate": 1.8406841868569712e-05, "loss": 1.1067, "step": 6304 }, { "epoch": 20.672131147540984, "grad_norm": 5.8644914627075195, "learning_rate": 1.8406266782064498e-05, "loss": 0.8972, "step": 6305 }, { "epoch": 20.675409836065572, "grad_norm": 5.888441562652588, "learning_rate": 1.8405691600769886e-05, "loss": 0.9124, "step": 6306 }, { "epoch": 20.678688524590164, "grad_norm": 6.245746612548828, "learning_rate": 1.8405116324692362e-05, "loss": 0.8742, "step": 6307 }, { "epoch": 20.681967213114753, "grad_norm": 4.745670795440674, "learning_rate": 1.840454095383841e-05, "loss": 0.9681, "step": 6308 }, { "epoch": 20.685245901639345, "grad_norm": 5.246739387512207, "learning_rate": 1.840396548821452e-05, "loss": 1.1115, "step": 6309 }, { "epoch": 20.688524590163933, "grad_norm": 6.13297176361084, "learning_rate": 1.8403389927827186e-05, "loss": 0.9805, "step": 6310 }, { "epoch": 20.691803278688525, "grad_norm": 5.818902015686035, "learning_rate": 1.840281427268289e-05, "loss": 0.9814, "step": 6311 }, { "epoch": 20.695081967213113, "grad_norm": 5.872481346130371, "learning_rate": 1.8402238522788124e-05, "loss": 1.0425, "step": 6312 }, { "epoch": 20.698360655737705, "grad_norm": 6.591672897338867, "learning_rate": 1.8401662678149387e-05, "loss": 0.9257, "step": 6313 }, { "epoch": 20.701639344262293, "grad_norm": 6.182232856750488, "learning_rate": 1.8401086738773166e-05, "loss": 0.9658, "step": 6314 }, { "epoch": 20.704918032786885, "grad_norm": 5.066309452056885, "learning_rate": 1.8400510704665955e-05, "loss": 1.1323, "step": 6315 }, { "epoch": 20.708196721311474, "grad_norm": 5.135092735290527, "learning_rate": 1.8399934575834254e-05, "loss": 1.115, "step": 6316 }, { "epoch": 20.711475409836066, "grad_norm": 5.7318034172058105, "learning_rate": 1.8399358352284554e-05, "loss": 1.0426, "step": 6317 }, { "epoch": 20.714754098360658, "grad_norm": 6.092456340789795, "learning_rate": 1.8398782034023357e-05, "loss": 0.9073, "step": 6318 }, { "epoch": 20.718032786885246, "grad_norm": 4.70117712020874, "learning_rate": 1.8398205621057164e-05, "loss": 1.004, "step": 6319 }, { "epoch": 20.721311475409838, "grad_norm": 4.860965251922607, "learning_rate": 1.8397629113392464e-05, "loss": 0.6416, "step": 6320 }, { "epoch": 20.724590163934426, "grad_norm": 5.107070446014404, "learning_rate": 1.8397052511035766e-05, "loss": 1.0955, "step": 6321 }, { "epoch": 20.727868852459018, "grad_norm": 4.916102409362793, "learning_rate": 1.8396475813993574e-05, "loss": 1.1803, "step": 6322 }, { "epoch": 20.731147540983606, "grad_norm": 6.8947529792785645, "learning_rate": 1.8395899022272384e-05, "loss": 0.8209, "step": 6323 }, { "epoch": 20.7344262295082, "grad_norm": 5.002739429473877, "learning_rate": 1.8395322135878705e-05, "loss": 0.9794, "step": 6324 }, { "epoch": 20.737704918032787, "grad_norm": 6.004178047180176, "learning_rate": 1.8394745154819037e-05, "loss": 0.8324, "step": 6325 }, { "epoch": 20.74098360655738, "grad_norm": 4.974658966064453, "learning_rate": 1.839416807909989e-05, "loss": 1.3677, "step": 6326 }, { "epoch": 20.744262295081967, "grad_norm": 5.735434532165527, "learning_rate": 1.8393590908727773e-05, "loss": 1.0176, "step": 6327 }, { "epoch": 20.74754098360656, "grad_norm": 5.166877746582031, "learning_rate": 1.839301364370919e-05, "loss": 0.8312, "step": 6328 }, { "epoch": 20.750819672131147, "grad_norm": 5.116655349731445, "learning_rate": 1.839243628405065e-05, "loss": 0.8151, "step": 6329 }, { "epoch": 20.75409836065574, "grad_norm": 5.148130416870117, "learning_rate": 1.8391858829758667e-05, "loss": 1.072, "step": 6330 }, { "epoch": 20.757377049180327, "grad_norm": 4.965447902679443, "learning_rate": 1.8391281280839746e-05, "loss": 0.8165, "step": 6331 }, { "epoch": 20.76065573770492, "grad_norm": 6.571438312530518, "learning_rate": 1.8390703637300408e-05, "loss": 1.0162, "step": 6332 }, { "epoch": 20.763934426229508, "grad_norm": 5.223337173461914, "learning_rate": 1.839012589914716e-05, "loss": 0.9384, "step": 6333 }, { "epoch": 20.7672131147541, "grad_norm": 6.628055095672607, "learning_rate": 1.838954806638652e-05, "loss": 0.8149, "step": 6334 }, { "epoch": 20.770491803278688, "grad_norm": 5.81593132019043, "learning_rate": 1.8388970139025006e-05, "loss": 1.012, "step": 6335 }, { "epoch": 20.77377049180328, "grad_norm": 5.360565662384033, "learning_rate": 1.8388392117069128e-05, "loss": 1.1322, "step": 6336 }, { "epoch": 20.777049180327868, "grad_norm": 5.60485315322876, "learning_rate": 1.8387814000525406e-05, "loss": 1.105, "step": 6337 }, { "epoch": 20.78032786885246, "grad_norm": 6.738484859466553, "learning_rate": 1.8387235789400363e-05, "loss": 1.2404, "step": 6338 }, { "epoch": 20.78360655737705, "grad_norm": 5.944322109222412, "learning_rate": 1.8386657483700516e-05, "loss": 1.0009, "step": 6339 }, { "epoch": 20.78688524590164, "grad_norm": 5.267786026000977, "learning_rate": 1.8386079083432382e-05, "loss": 0.8263, "step": 6340 }, { "epoch": 20.79016393442623, "grad_norm": 5.405961036682129, "learning_rate": 1.838550058860249e-05, "loss": 1.0469, "step": 6341 }, { "epoch": 20.79344262295082, "grad_norm": 5.588339328765869, "learning_rate": 1.838492199921736e-05, "loss": 1.016, "step": 6342 }, { "epoch": 20.79672131147541, "grad_norm": 4.847718238830566, "learning_rate": 1.8384343315283515e-05, "loss": 1.0616, "step": 6343 }, { "epoch": 20.8, "grad_norm": 6.0730061531066895, "learning_rate": 1.8383764536807486e-05, "loss": 0.9978, "step": 6344 }, { "epoch": 20.80327868852459, "grad_norm": 5.542835712432861, "learning_rate": 1.838318566379579e-05, "loss": 0.9958, "step": 6345 }, { "epoch": 20.80655737704918, "grad_norm": 6.043570518493652, "learning_rate": 1.838260669625496e-05, "loss": 0.8954, "step": 6346 }, { "epoch": 20.80983606557377, "grad_norm": 7.046552658081055, "learning_rate": 1.8382027634191523e-05, "loss": 1.1182, "step": 6347 }, { "epoch": 20.81311475409836, "grad_norm": 5.129305839538574, "learning_rate": 1.838144847761201e-05, "loss": 0.9623, "step": 6348 }, { "epoch": 20.81639344262295, "grad_norm": 4.91363525390625, "learning_rate": 1.8380869226522954e-05, "loss": 0.9249, "step": 6349 }, { "epoch": 20.81967213114754, "grad_norm": 5.788430213928223, "learning_rate": 1.8380289880930878e-05, "loss": 1.0581, "step": 6350 }, { "epoch": 20.82295081967213, "grad_norm": 5.1715264320373535, "learning_rate": 1.8379710440842323e-05, "loss": 1.0427, "step": 6351 }, { "epoch": 20.82622950819672, "grad_norm": 7.234948635101318, "learning_rate": 1.8379130906263824e-05, "loss": 0.8833, "step": 6352 }, { "epoch": 20.82950819672131, "grad_norm": 6.765955448150635, "learning_rate": 1.8378551277201906e-05, "loss": 0.8869, "step": 6353 }, { "epoch": 20.832786885245902, "grad_norm": 8.129997253417969, "learning_rate": 1.8377971553663113e-05, "loss": 0.8876, "step": 6354 }, { "epoch": 20.83606557377049, "grad_norm": 6.19616174697876, "learning_rate": 1.8377391735653983e-05, "loss": 1.0142, "step": 6355 }, { "epoch": 20.839344262295082, "grad_norm": 6.998855113983154, "learning_rate": 1.8376811823181045e-05, "loss": 0.8008, "step": 6356 }, { "epoch": 20.84262295081967, "grad_norm": 4.52009391784668, "learning_rate": 1.837623181625085e-05, "loss": 0.905, "step": 6357 }, { "epoch": 20.845901639344262, "grad_norm": 11.487444877624512, "learning_rate": 1.8375651714869926e-05, "loss": 0.7914, "step": 6358 }, { "epoch": 20.84918032786885, "grad_norm": 6.495623588562012, "learning_rate": 1.8375071519044826e-05, "loss": 0.7317, "step": 6359 }, { "epoch": 20.852459016393443, "grad_norm": 5.547083854675293, "learning_rate": 1.837449122878208e-05, "loss": 1.0663, "step": 6360 }, { "epoch": 20.855737704918035, "grad_norm": 5.553553581237793, "learning_rate": 1.837391084408824e-05, "loss": 1.0664, "step": 6361 }, { "epoch": 20.859016393442623, "grad_norm": 5.9186015129089355, "learning_rate": 1.8373330364969856e-05, "loss": 0.939, "step": 6362 }, { "epoch": 20.862295081967215, "grad_norm": 6.094951629638672, "learning_rate": 1.837274979143346e-05, "loss": 1.106, "step": 6363 }, { "epoch": 20.865573770491803, "grad_norm": 6.164930820465088, "learning_rate": 1.8372169123485605e-05, "loss": 0.97, "step": 6364 }, { "epoch": 20.868852459016395, "grad_norm": 5.5219950675964355, "learning_rate": 1.837158836113284e-05, "loss": 1.0134, "step": 6365 }, { "epoch": 20.872131147540983, "grad_norm": 7.1164751052856445, "learning_rate": 1.837100750438171e-05, "loss": 0.9204, "step": 6366 }, { "epoch": 20.875409836065575, "grad_norm": 5.358199596405029, "learning_rate": 1.837042655323877e-05, "loss": 1.3045, "step": 6367 }, { "epoch": 20.878688524590164, "grad_norm": 6.580497741699219, "learning_rate": 1.836984550771056e-05, "loss": 1.0596, "step": 6368 }, { "epoch": 20.881967213114756, "grad_norm": 5.98673152923584, "learning_rate": 1.8369264367803646e-05, "loss": 0.9108, "step": 6369 }, { "epoch": 20.885245901639344, "grad_norm": 5.491272449493408, "learning_rate": 1.8368683133524576e-05, "loss": 1.0416, "step": 6370 }, { "epoch": 20.888524590163936, "grad_norm": 5.1513519287109375, "learning_rate": 1.83681018048799e-05, "loss": 1.0441, "step": 6371 }, { "epoch": 20.891803278688524, "grad_norm": 7.174558639526367, "learning_rate": 1.8367520381876173e-05, "loss": 0.958, "step": 6372 }, { "epoch": 20.895081967213116, "grad_norm": 5.927909851074219, "learning_rate": 1.8366938864519955e-05, "loss": 1.1217, "step": 6373 }, { "epoch": 20.898360655737704, "grad_norm": 5.1634440422058105, "learning_rate": 1.8366357252817803e-05, "loss": 1.0352, "step": 6374 }, { "epoch": 20.901639344262296, "grad_norm": 6.842897415161133, "learning_rate": 1.8365775546776272e-05, "loss": 0.9861, "step": 6375 }, { "epoch": 20.904918032786885, "grad_norm": 6.17972469329834, "learning_rate": 1.8365193746401926e-05, "loss": 0.8502, "step": 6376 }, { "epoch": 20.908196721311477, "grad_norm": 5.739527702331543, "learning_rate": 1.8364611851701322e-05, "loss": 1.3059, "step": 6377 }, { "epoch": 20.911475409836065, "grad_norm": 5.371199607849121, "learning_rate": 1.8364029862681022e-05, "loss": 0.9722, "step": 6378 }, { "epoch": 20.914754098360657, "grad_norm": 5.1282196044921875, "learning_rate": 1.836344777934759e-05, "loss": 1.0222, "step": 6379 }, { "epoch": 20.918032786885245, "grad_norm": 5.356537342071533, "learning_rate": 1.836286560170759e-05, "loss": 1.1548, "step": 6380 }, { "epoch": 20.921311475409837, "grad_norm": 7.479512691497803, "learning_rate": 1.8362283329767577e-05, "loss": 0.7501, "step": 6381 }, { "epoch": 20.924590163934425, "grad_norm": 5.3684821128845215, "learning_rate": 1.8361700963534134e-05, "loss": 1.0063, "step": 6382 }, { "epoch": 20.927868852459017, "grad_norm": 6.1150641441345215, "learning_rate": 1.8361118503013814e-05, "loss": 1.0229, "step": 6383 }, { "epoch": 20.931147540983606, "grad_norm": 6.0746378898620605, "learning_rate": 1.836053594821319e-05, "loss": 1.006, "step": 6384 }, { "epoch": 20.934426229508198, "grad_norm": 5.860284328460693, "learning_rate": 1.8359953299138832e-05, "loss": 1.0079, "step": 6385 }, { "epoch": 20.937704918032786, "grad_norm": 5.176154136657715, "learning_rate": 1.8359370555797308e-05, "loss": 1.1328, "step": 6386 }, { "epoch": 20.940983606557378, "grad_norm": 4.817923069000244, "learning_rate": 1.835878771819519e-05, "loss": 1.1562, "step": 6387 }, { "epoch": 20.944262295081966, "grad_norm": 6.603480815887451, "learning_rate": 1.8358204786339046e-05, "loss": 0.917, "step": 6388 }, { "epoch": 20.947540983606558, "grad_norm": 9.73315143585205, "learning_rate": 1.8357621760235454e-05, "loss": 0.9329, "step": 6389 }, { "epoch": 20.950819672131146, "grad_norm": 5.110047340393066, "learning_rate": 1.835703863989099e-05, "loss": 0.9238, "step": 6390 }, { "epoch": 20.95409836065574, "grad_norm": 5.330109596252441, "learning_rate": 1.8356455425312224e-05, "loss": 0.9803, "step": 6391 }, { "epoch": 20.957377049180327, "grad_norm": 7.464608192443848, "learning_rate": 1.8355872116505735e-05, "loss": 0.9632, "step": 6392 }, { "epoch": 20.96065573770492, "grad_norm": 5.907283306121826, "learning_rate": 1.83552887134781e-05, "loss": 0.8722, "step": 6393 }, { "epoch": 20.963934426229507, "grad_norm": 6.447669982910156, "learning_rate": 1.8354705216235896e-05, "loss": 0.864, "step": 6394 }, { "epoch": 20.9672131147541, "grad_norm": 8.271269798278809, "learning_rate": 1.8354121624785707e-05, "loss": 1.176, "step": 6395 }, { "epoch": 20.970491803278687, "grad_norm": 4.728301525115967, "learning_rate": 1.8353537939134107e-05, "loss": 0.9348, "step": 6396 }, { "epoch": 20.97377049180328, "grad_norm": 6.079099655151367, "learning_rate": 1.8352954159287683e-05, "loss": 0.9165, "step": 6397 }, { "epoch": 20.977049180327867, "grad_norm": 5.308145999908447, "learning_rate": 1.8352370285253018e-05, "loss": 0.7467, "step": 6398 }, { "epoch": 20.98032786885246, "grad_norm": 5.124764919281006, "learning_rate": 1.835178631703669e-05, "loss": 0.9891, "step": 6399 }, { "epoch": 20.983606557377048, "grad_norm": 5.466836452484131, "learning_rate": 1.8351202254645287e-05, "loss": 1.1638, "step": 6400 }, { "epoch": 20.98688524590164, "grad_norm": 4.8301615715026855, "learning_rate": 1.8350618098085398e-05, "loss": 1.1392, "step": 6401 }, { "epoch": 20.990163934426228, "grad_norm": 5.8566131591796875, "learning_rate": 1.8350033847363608e-05, "loss": 1.0103, "step": 6402 }, { "epoch": 20.99344262295082, "grad_norm": 4.928653717041016, "learning_rate": 1.8349449502486505e-05, "loss": 1.0209, "step": 6403 }, { "epoch": 20.99672131147541, "grad_norm": 4.89117956161499, "learning_rate": 1.8348865063460675e-05, "loss": 0.9744, "step": 6404 }, { "epoch": 21.0, "grad_norm": 5.38976526260376, "learning_rate": 1.8348280530292712e-05, "loss": 1.0579, "step": 6405 }, { "epoch": 21.003278688524592, "grad_norm": 4.400243759155273, "learning_rate": 1.8347695902989208e-05, "loss": 1.1083, "step": 6406 }, { "epoch": 21.00655737704918, "grad_norm": 18.23080062866211, "learning_rate": 1.834711118155675e-05, "loss": 0.8416, "step": 6407 }, { "epoch": 21.009836065573772, "grad_norm": 5.986701488494873, "learning_rate": 1.8346526366001936e-05, "loss": 0.9452, "step": 6408 }, { "epoch": 21.01311475409836, "grad_norm": 5.222447872161865, "learning_rate": 1.834594145633136e-05, "loss": 0.9366, "step": 6409 }, { "epoch": 21.016393442622952, "grad_norm": 5.591364860534668, "learning_rate": 1.8345356452551616e-05, "loss": 0.9306, "step": 6410 }, { "epoch": 21.01967213114754, "grad_norm": 5.904355525970459, "learning_rate": 1.83447713546693e-05, "loss": 0.7909, "step": 6411 }, { "epoch": 21.022950819672133, "grad_norm": 5.189318656921387, "learning_rate": 1.8344186162691012e-05, "loss": 0.8168, "step": 6412 }, { "epoch": 21.02622950819672, "grad_norm": 5.277698516845703, "learning_rate": 1.8343600876623347e-05, "loss": 0.7844, "step": 6413 }, { "epoch": 21.029508196721313, "grad_norm": 5.1093363761901855, "learning_rate": 1.834301549647291e-05, "loss": 0.9737, "step": 6414 }, { "epoch": 21.0327868852459, "grad_norm": 5.1956915855407715, "learning_rate": 1.8342430022246295e-05, "loss": 0.9611, "step": 6415 }, { "epoch": 21.036065573770493, "grad_norm": 7.763367176055908, "learning_rate": 1.8341844453950108e-05, "loss": 1.0355, "step": 6416 }, { "epoch": 21.03934426229508, "grad_norm": 4.6953911781311035, "learning_rate": 1.8341258791590953e-05, "loss": 0.9905, "step": 6417 }, { "epoch": 21.042622950819673, "grad_norm": 8.002156257629395, "learning_rate": 1.8340673035175433e-05, "loss": 0.7816, "step": 6418 }, { "epoch": 21.04590163934426, "grad_norm": 7.0600385665893555, "learning_rate": 1.8340087184710152e-05, "loss": 1.0062, "step": 6419 }, { "epoch": 21.049180327868854, "grad_norm": 7.904319763183594, "learning_rate": 1.8339501240201717e-05, "loss": 0.9104, "step": 6420 }, { "epoch": 21.052459016393442, "grad_norm": 5.974316120147705, "learning_rate": 1.8338915201656735e-05, "loss": 1.0996, "step": 6421 }, { "epoch": 21.055737704918034, "grad_norm": 4.914780616760254, "learning_rate": 1.833832906908181e-05, "loss": 0.9176, "step": 6422 }, { "epoch": 21.059016393442622, "grad_norm": 21.58787727355957, "learning_rate": 1.8337742842483558e-05, "loss": 0.9557, "step": 6423 }, { "epoch": 21.062295081967214, "grad_norm": 6.129678726196289, "learning_rate": 1.8337156521868587e-05, "loss": 1.0006, "step": 6424 }, { "epoch": 21.065573770491802, "grad_norm": 6.486820220947266, "learning_rate": 1.8336570107243507e-05, "loss": 0.7847, "step": 6425 }, { "epoch": 21.068852459016394, "grad_norm": 4.4276556968688965, "learning_rate": 1.833598359861493e-05, "loss": 0.905, "step": 6426 }, { "epoch": 21.072131147540983, "grad_norm": 5.681924819946289, "learning_rate": 1.8335396995989474e-05, "loss": 0.8469, "step": 6427 }, { "epoch": 21.075409836065575, "grad_norm": 5.166893482208252, "learning_rate": 1.8334810299373746e-05, "loss": 0.8525, "step": 6428 }, { "epoch": 21.078688524590163, "grad_norm": 5.496186256408691, "learning_rate": 1.8334223508774366e-05, "loss": 1.0134, "step": 6429 }, { "epoch": 21.081967213114755, "grad_norm": 6.286301136016846, "learning_rate": 1.8333636624197954e-05, "loss": 1.1782, "step": 6430 }, { "epoch": 21.085245901639343, "grad_norm": 6.078952789306641, "learning_rate": 1.833304964565112e-05, "loss": 1.0209, "step": 6431 }, { "epoch": 21.088524590163935, "grad_norm": 4.560314178466797, "learning_rate": 1.8332462573140494e-05, "loss": 1.2762, "step": 6432 }, { "epoch": 21.091803278688523, "grad_norm": 5.242687225341797, "learning_rate": 1.8331875406672683e-05, "loss": 0.9597, "step": 6433 }, { "epoch": 21.095081967213115, "grad_norm": 5.7170281410217285, "learning_rate": 1.8331288146254315e-05, "loss": 0.9235, "step": 6434 }, { "epoch": 21.098360655737704, "grad_norm": 5.1751298904418945, "learning_rate": 1.833070079189201e-05, "loss": 1.0308, "step": 6435 }, { "epoch": 21.101639344262296, "grad_norm": 5.411533832550049, "learning_rate": 1.8330113343592394e-05, "loss": 1.068, "step": 6436 }, { "epoch": 21.104918032786884, "grad_norm": 8.2308931350708, "learning_rate": 1.8329525801362086e-05, "loss": 1.1393, "step": 6437 }, { "epoch": 21.108196721311476, "grad_norm": 6.299222469329834, "learning_rate": 1.8328938165207716e-05, "loss": 0.7479, "step": 6438 }, { "epoch": 21.111475409836064, "grad_norm": 4.888100624084473, "learning_rate": 1.8328350435135908e-05, "loss": 0.9203, "step": 6439 }, { "epoch": 21.114754098360656, "grad_norm": 5.197320461273193, "learning_rate": 1.832776261115329e-05, "loss": 0.9337, "step": 6440 }, { "epoch": 21.118032786885244, "grad_norm": 5.884115695953369, "learning_rate": 1.832717469326649e-05, "loss": 1.0837, "step": 6441 }, { "epoch": 21.121311475409836, "grad_norm": 5.248736381530762, "learning_rate": 1.8326586681482137e-05, "loss": 1.094, "step": 6442 }, { "epoch": 21.124590163934425, "grad_norm": 5.938676357269287, "learning_rate": 1.8325998575806863e-05, "loss": 0.8636, "step": 6443 }, { "epoch": 21.127868852459017, "grad_norm": 9.19833755493164, "learning_rate": 1.8325410376247295e-05, "loss": 1.0317, "step": 6444 }, { "epoch": 21.131147540983605, "grad_norm": 4.705526828765869, "learning_rate": 1.832482208281007e-05, "loss": 1.1991, "step": 6445 }, { "epoch": 21.134426229508197, "grad_norm": 4.705691337585449, "learning_rate": 1.832423369550182e-05, "loss": 0.8152, "step": 6446 }, { "epoch": 21.137704918032785, "grad_norm": 7.318425178527832, "learning_rate": 1.8323645214329183e-05, "loss": 0.8381, "step": 6447 }, { "epoch": 21.140983606557377, "grad_norm": 5.700703144073486, "learning_rate": 1.8323056639298788e-05, "loss": 0.9836, "step": 6448 }, { "epoch": 21.14426229508197, "grad_norm": 7.53397274017334, "learning_rate": 1.8322467970417278e-05, "loss": 0.8657, "step": 6449 }, { "epoch": 21.147540983606557, "grad_norm": 6.78814172744751, "learning_rate": 1.8321879207691287e-05, "loss": 1.0012, "step": 6450 }, { "epoch": 21.15081967213115, "grad_norm": 4.6441521644592285, "learning_rate": 1.8321290351127455e-05, "loss": 0.8984, "step": 6451 }, { "epoch": 21.154098360655738, "grad_norm": 14.790263175964355, "learning_rate": 1.8320701400732424e-05, "loss": 0.7349, "step": 6452 }, { "epoch": 21.15737704918033, "grad_norm": 5.557155132293701, "learning_rate": 1.832011235651283e-05, "loss": 0.9771, "step": 6453 }, { "epoch": 21.160655737704918, "grad_norm": 5.746638774871826, "learning_rate": 1.8319523218475323e-05, "loss": 0.8884, "step": 6454 }, { "epoch": 21.16393442622951, "grad_norm": 5.287160873413086, "learning_rate": 1.831893398662654e-05, "loss": 0.7679, "step": 6455 }, { "epoch": 21.167213114754098, "grad_norm": 5.666332244873047, "learning_rate": 1.8318344660973125e-05, "loss": 0.8379, "step": 6456 }, { "epoch": 21.17049180327869, "grad_norm": 7.8693647384643555, "learning_rate": 1.8317755241521727e-05, "loss": 0.7634, "step": 6457 }, { "epoch": 21.17377049180328, "grad_norm": 6.248149394989014, "learning_rate": 1.831716572827899e-05, "loss": 0.9265, "step": 6458 }, { "epoch": 21.17704918032787, "grad_norm": 5.793575763702393, "learning_rate": 1.831657612125156e-05, "loss": 0.9208, "step": 6459 }, { "epoch": 21.18032786885246, "grad_norm": 10.31843090057373, "learning_rate": 1.831598642044609e-05, "loss": 0.969, "step": 6460 }, { "epoch": 21.18360655737705, "grad_norm": 5.503422260284424, "learning_rate": 1.8315396625869226e-05, "loss": 0.9598, "step": 6461 }, { "epoch": 21.18688524590164, "grad_norm": 5.4704999923706055, "learning_rate": 1.8314806737527616e-05, "loss": 0.7967, "step": 6462 }, { "epoch": 21.19016393442623, "grad_norm": 5.817468643188477, "learning_rate": 1.8314216755427922e-05, "loss": 0.9329, "step": 6463 }, { "epoch": 21.19344262295082, "grad_norm": 10.09897518157959, "learning_rate": 1.831362667957678e-05, "loss": 0.7889, "step": 6464 }, { "epoch": 21.19672131147541, "grad_norm": 5.093682289123535, "learning_rate": 1.831303650998086e-05, "loss": 1.0809, "step": 6465 }, { "epoch": 21.2, "grad_norm": 5.069019317626953, "learning_rate": 1.831244624664681e-05, "loss": 0.8788, "step": 6466 }, { "epoch": 21.20327868852459, "grad_norm": 6.565288066864014, "learning_rate": 1.8311855889581283e-05, "loss": 1.071, "step": 6467 }, { "epoch": 21.20655737704918, "grad_norm": 5.67744255065918, "learning_rate": 1.8311265438790937e-05, "loss": 0.8859, "step": 6468 }, { "epoch": 21.20983606557377, "grad_norm": 5.738905429840088, "learning_rate": 1.8310674894282433e-05, "loss": 0.9398, "step": 6469 }, { "epoch": 21.21311475409836, "grad_norm": 5.4251580238342285, "learning_rate": 1.831008425606243e-05, "loss": 0.963, "step": 6470 }, { "epoch": 21.21639344262295, "grad_norm": 5.1958513259887695, "learning_rate": 1.8309493524137587e-05, "loss": 1.0422, "step": 6471 }, { "epoch": 21.21967213114754, "grad_norm": 6.215082168579102, "learning_rate": 1.8308902698514565e-05, "loss": 1.0517, "step": 6472 }, { "epoch": 21.222950819672132, "grad_norm": 6.584722518920898, "learning_rate": 1.8308311779200027e-05, "loss": 0.9039, "step": 6473 }, { "epoch": 21.22622950819672, "grad_norm": 5.914769172668457, "learning_rate": 1.8307720766200633e-05, "loss": 1.129, "step": 6474 }, { "epoch": 21.229508196721312, "grad_norm": 6.087928771972656, "learning_rate": 1.830712965952305e-05, "loss": 0.8659, "step": 6475 }, { "epoch": 21.2327868852459, "grad_norm": 7.473366737365723, "learning_rate": 1.830653845917394e-05, "loss": 0.7506, "step": 6476 }, { "epoch": 21.236065573770492, "grad_norm": 6.744200229644775, "learning_rate": 1.8305947165159975e-05, "loss": 0.9476, "step": 6477 }, { "epoch": 21.23934426229508, "grad_norm": 5.316779136657715, "learning_rate": 1.830535577748782e-05, "loss": 1.0396, "step": 6478 }, { "epoch": 21.242622950819673, "grad_norm": 5.495310306549072, "learning_rate": 1.8304764296164145e-05, "loss": 0.8273, "step": 6479 }, { "epoch": 21.24590163934426, "grad_norm": 5.173651218414307, "learning_rate": 1.8304172721195615e-05, "loss": 0.9016, "step": 6480 }, { "epoch": 21.249180327868853, "grad_norm": 5.660210609436035, "learning_rate": 1.8303581052588905e-05, "loss": 0.9907, "step": 6481 }, { "epoch": 21.25245901639344, "grad_norm": 4.496129989624023, "learning_rate": 1.830298929035068e-05, "loss": 0.8741, "step": 6482 }, { "epoch": 21.255737704918033, "grad_norm": 4.0658135414123535, "learning_rate": 1.8302397434487625e-05, "loss": 1.0436, "step": 6483 }, { "epoch": 21.25901639344262, "grad_norm": 8.372333526611328, "learning_rate": 1.8301805485006402e-05, "loss": 0.7997, "step": 6484 }, { "epoch": 21.262295081967213, "grad_norm": 5.8186235427856445, "learning_rate": 1.8301213441913693e-05, "loss": 0.7118, "step": 6485 }, { "epoch": 21.2655737704918, "grad_norm": 7.1298441886901855, "learning_rate": 1.8300621305216172e-05, "loss": 0.9884, "step": 6486 }, { "epoch": 21.268852459016394, "grad_norm": 5.204191207885742, "learning_rate": 1.8300029074920512e-05, "loss": 0.8399, "step": 6487 }, { "epoch": 21.272131147540982, "grad_norm": 8.27045726776123, "learning_rate": 1.8299436751033396e-05, "loss": 0.7878, "step": 6488 }, { "epoch": 21.275409836065574, "grad_norm": 4.770337104797363, "learning_rate": 1.8298844333561502e-05, "loss": 0.8846, "step": 6489 }, { "epoch": 21.278688524590162, "grad_norm": 5.829885005950928, "learning_rate": 1.829825182251151e-05, "loss": 1.1182, "step": 6490 }, { "epoch": 21.281967213114754, "grad_norm": 6.422431945800781, "learning_rate": 1.8297659217890095e-05, "loss": 1.0872, "step": 6491 }, { "epoch": 21.285245901639342, "grad_norm": 4.933125972747803, "learning_rate": 1.829706651970395e-05, "loss": 0.8981, "step": 6492 }, { "epoch": 21.288524590163934, "grad_norm": 5.609850883483887, "learning_rate": 1.8296473727959755e-05, "loss": 0.8693, "step": 6493 }, { "epoch": 21.291803278688526, "grad_norm": 6.002315521240234, "learning_rate": 1.829588084266419e-05, "loss": 0.6147, "step": 6494 }, { "epoch": 21.295081967213115, "grad_norm": 5.683804988861084, "learning_rate": 1.8295287863823944e-05, "loss": 0.8818, "step": 6495 }, { "epoch": 21.298360655737707, "grad_norm": 5.633211612701416, "learning_rate": 1.82946947914457e-05, "loss": 1.1232, "step": 6496 }, { "epoch": 21.301639344262295, "grad_norm": 5.698216915130615, "learning_rate": 1.829410162553615e-05, "loss": 0.7987, "step": 6497 }, { "epoch": 21.304918032786887, "grad_norm": 4.620022773742676, "learning_rate": 1.8293508366101977e-05, "loss": 0.8687, "step": 6498 }, { "epoch": 21.308196721311475, "grad_norm": 5.634494781494141, "learning_rate": 1.8292915013149878e-05, "loss": 0.9332, "step": 6499 }, { "epoch": 21.311475409836067, "grad_norm": 5.413187503814697, "learning_rate": 1.8292321566686538e-05, "loss": 0.8485, "step": 6500 }, { "epoch": 21.314754098360655, "grad_norm": 5.738636016845703, "learning_rate": 1.829172802671865e-05, "loss": 0.8696, "step": 6501 }, { "epoch": 21.318032786885247, "grad_norm": 7.976901054382324, "learning_rate": 1.829113439325291e-05, "loss": 0.6334, "step": 6502 }, { "epoch": 21.321311475409836, "grad_norm": 5.002741813659668, "learning_rate": 1.8290540666296008e-05, "loss": 0.9852, "step": 6503 }, { "epoch": 21.324590163934428, "grad_norm": 4.8605875968933105, "learning_rate": 1.8289946845854638e-05, "loss": 1.2587, "step": 6504 }, { "epoch": 21.327868852459016, "grad_norm": 5.890091419219971, "learning_rate": 1.82893529319355e-05, "loss": 0.8691, "step": 6505 }, { "epoch": 21.331147540983608, "grad_norm": 6.477115154266357, "learning_rate": 1.8288758924545287e-05, "loss": 0.9038, "step": 6506 }, { "epoch": 21.334426229508196, "grad_norm": 8.029791831970215, "learning_rate": 1.82881648236907e-05, "loss": 1.218, "step": 6507 }, { "epoch": 21.337704918032788, "grad_norm": 5.439359188079834, "learning_rate": 1.828757062937844e-05, "loss": 0.9619, "step": 6508 }, { "epoch": 21.340983606557376, "grad_norm": 5.848730087280273, "learning_rate": 1.82869763416152e-05, "loss": 0.8344, "step": 6509 }, { "epoch": 21.34426229508197, "grad_norm": 4.980209827423096, "learning_rate": 1.828638196040769e-05, "loss": 0.8848, "step": 6510 }, { "epoch": 21.347540983606557, "grad_norm": 7.225955486297607, "learning_rate": 1.8285787485762602e-05, "loss": 0.857, "step": 6511 }, { "epoch": 21.35081967213115, "grad_norm": 5.511238098144531, "learning_rate": 1.828519291768665e-05, "loss": 1.2432, "step": 6512 }, { "epoch": 21.354098360655737, "grad_norm": 7.044803619384766, "learning_rate": 1.828459825618653e-05, "loss": 0.7813, "step": 6513 }, { "epoch": 21.35737704918033, "grad_norm": 6.921213626861572, "learning_rate": 1.8284003501268953e-05, "loss": 1.0905, "step": 6514 }, { "epoch": 21.360655737704917, "grad_norm": 6.648909568786621, "learning_rate": 1.8283408652940623e-05, "loss": 1.0183, "step": 6515 }, { "epoch": 21.36393442622951, "grad_norm": 5.583667278289795, "learning_rate": 1.8282813711208246e-05, "loss": 0.9514, "step": 6516 }, { "epoch": 21.367213114754097, "grad_norm": 5.97081184387207, "learning_rate": 1.8282218676078532e-05, "loss": 0.9957, "step": 6517 }, { "epoch": 21.37049180327869, "grad_norm": 5.577266693115234, "learning_rate": 1.8281623547558195e-05, "loss": 0.9097, "step": 6518 }, { "epoch": 21.373770491803278, "grad_norm": 5.396088123321533, "learning_rate": 1.828102832565394e-05, "loss": 0.9373, "step": 6519 }, { "epoch": 21.37704918032787, "grad_norm": 6.114706516265869, "learning_rate": 1.8280433010372476e-05, "loss": 0.9991, "step": 6520 }, { "epoch": 21.380327868852458, "grad_norm": 19.039575576782227, "learning_rate": 1.8279837601720525e-05, "loss": 0.824, "step": 6521 }, { "epoch": 21.38360655737705, "grad_norm": 6.3852434158325195, "learning_rate": 1.8279242099704798e-05, "loss": 1.1134, "step": 6522 }, { "epoch": 21.386885245901638, "grad_norm": 5.29438591003418, "learning_rate": 1.8278646504332006e-05, "loss": 0.9756, "step": 6523 }, { "epoch": 21.39016393442623, "grad_norm": 5.897635459899902, "learning_rate": 1.8278050815608865e-05, "loss": 0.9246, "step": 6524 }, { "epoch": 21.39344262295082, "grad_norm": 5.980428695678711, "learning_rate": 1.8277455033542097e-05, "loss": 1.2087, "step": 6525 }, { "epoch": 21.39672131147541, "grad_norm": 5.579648971557617, "learning_rate": 1.8276859158138414e-05, "loss": 0.9283, "step": 6526 }, { "epoch": 21.4, "grad_norm": 5.224280834197998, "learning_rate": 1.827626318940454e-05, "loss": 0.9967, "step": 6527 }, { "epoch": 21.40327868852459, "grad_norm": 5.150885581970215, "learning_rate": 1.8275667127347194e-05, "loss": 1.282, "step": 6528 }, { "epoch": 21.40655737704918, "grad_norm": 4.599416255950928, "learning_rate": 1.8275070971973097e-05, "loss": 0.9295, "step": 6529 }, { "epoch": 21.40983606557377, "grad_norm": 4.985888481140137, "learning_rate": 1.8274474723288967e-05, "loss": 0.7473, "step": 6530 }, { "epoch": 21.41311475409836, "grad_norm": 4.872303009033203, "learning_rate": 1.827387838130154e-05, "loss": 0.9782, "step": 6531 }, { "epoch": 21.41639344262295, "grad_norm": 5.711451053619385, "learning_rate": 1.8273281946017524e-05, "loss": 1.0922, "step": 6532 }, { "epoch": 21.41967213114754, "grad_norm": 5.1774725914001465, "learning_rate": 1.827268541744365e-05, "loss": 1.0408, "step": 6533 }, { "epoch": 21.42295081967213, "grad_norm": 4.979228496551514, "learning_rate": 1.8272088795586654e-05, "loss": 0.92, "step": 6534 }, { "epoch": 21.42622950819672, "grad_norm": 5.638589859008789, "learning_rate": 1.827149208045325e-05, "loss": 1.1717, "step": 6535 }, { "epoch": 21.42950819672131, "grad_norm": 6.123435020446777, "learning_rate": 1.8270895272050175e-05, "loss": 1.0203, "step": 6536 }, { "epoch": 21.432786885245903, "grad_norm": 6.735031604766846, "learning_rate": 1.8270298370384155e-05, "loss": 0.9623, "step": 6537 }, { "epoch": 21.43606557377049, "grad_norm": 4.900485992431641, "learning_rate": 1.8269701375461925e-05, "loss": 1.1334, "step": 6538 }, { "epoch": 21.439344262295084, "grad_norm": 5.6829752922058105, "learning_rate": 1.826910428729021e-05, "loss": 0.8481, "step": 6539 }, { "epoch": 21.442622950819672, "grad_norm": 5.884544372558594, "learning_rate": 1.826850710587575e-05, "loss": 1.03, "step": 6540 }, { "epoch": 21.445901639344264, "grad_norm": 5.2301225662231445, "learning_rate": 1.826790983122527e-05, "loss": 1.1373, "step": 6541 }, { "epoch": 21.449180327868852, "grad_norm": 5.334657669067383, "learning_rate": 1.8267312463345514e-05, "loss": 0.9878, "step": 6542 }, { "epoch": 21.452459016393444, "grad_norm": 6.452695846557617, "learning_rate": 1.8266715002243214e-05, "loss": 0.6725, "step": 6543 }, { "epoch": 21.455737704918032, "grad_norm": 4.874388694763184, "learning_rate": 1.8266117447925108e-05, "loss": 0.8503, "step": 6544 }, { "epoch": 21.459016393442624, "grad_norm": 5.92921257019043, "learning_rate": 1.826551980039793e-05, "loss": 1.0298, "step": 6545 }, { "epoch": 21.462295081967213, "grad_norm": 5.233575820922852, "learning_rate": 1.8264922059668425e-05, "loss": 0.9055, "step": 6546 }, { "epoch": 21.465573770491805, "grad_norm": 5.605851650238037, "learning_rate": 1.826432422574333e-05, "loss": 0.8763, "step": 6547 }, { "epoch": 21.468852459016393, "grad_norm": 4.867798805236816, "learning_rate": 1.8263726298629384e-05, "loss": 0.9213, "step": 6548 }, { "epoch": 21.472131147540985, "grad_norm": 4.360227584838867, "learning_rate": 1.8263128278333334e-05, "loss": 0.962, "step": 6549 }, { "epoch": 21.475409836065573, "grad_norm": 5.542084217071533, "learning_rate": 1.826253016486192e-05, "loss": 0.7966, "step": 6550 }, { "epoch": 21.478688524590165, "grad_norm": 5.782253742218018, "learning_rate": 1.8261931958221892e-05, "loss": 0.9791, "step": 6551 }, { "epoch": 21.481967213114753, "grad_norm": 5.23502779006958, "learning_rate": 1.8261333658419985e-05, "loss": 0.9091, "step": 6552 }, { "epoch": 21.485245901639345, "grad_norm": 4.7440900802612305, "learning_rate": 1.8260735265462957e-05, "loss": 1.0162, "step": 6553 }, { "epoch": 21.488524590163934, "grad_norm": 5.3788981437683105, "learning_rate": 1.8260136779357546e-05, "loss": 0.7621, "step": 6554 }, { "epoch": 21.491803278688526, "grad_norm": 5.843472957611084, "learning_rate": 1.8259538200110505e-05, "loss": 0.7476, "step": 6555 }, { "epoch": 21.495081967213114, "grad_norm": 8.072091102600098, "learning_rate": 1.8258939527728583e-05, "loss": 0.828, "step": 6556 }, { "epoch": 21.498360655737706, "grad_norm": 5.091602325439453, "learning_rate": 1.8258340762218532e-05, "loss": 1.1661, "step": 6557 }, { "epoch": 21.501639344262294, "grad_norm": 5.9877166748046875, "learning_rate": 1.82577419035871e-05, "loss": 0.8782, "step": 6558 }, { "epoch": 21.504918032786886, "grad_norm": 5.479403495788574, "learning_rate": 1.8257142951841043e-05, "loss": 1.1399, "step": 6559 }, { "epoch": 21.508196721311474, "grad_norm": 5.198081970214844, "learning_rate": 1.8256543906987116e-05, "loss": 0.9143, "step": 6560 }, { "epoch": 21.511475409836066, "grad_norm": 7.10084867477417, "learning_rate": 1.825594476903207e-05, "loss": 0.9072, "step": 6561 }, { "epoch": 21.514754098360655, "grad_norm": 5.01294469833374, "learning_rate": 1.8255345537982664e-05, "loss": 0.9282, "step": 6562 }, { "epoch": 21.518032786885247, "grad_norm": 5.395755767822266, "learning_rate": 1.825474621384565e-05, "loss": 0.9579, "step": 6563 }, { "epoch": 21.521311475409835, "grad_norm": 5.734954357147217, "learning_rate": 1.8254146796627793e-05, "loss": 0.9969, "step": 6564 }, { "epoch": 21.524590163934427, "grad_norm": 5.544089317321777, "learning_rate": 1.8253547286335848e-05, "loss": 1.0692, "step": 6565 }, { "epoch": 21.527868852459015, "grad_norm": 6.194305896759033, "learning_rate": 1.8252947682976575e-05, "loss": 0.968, "step": 6566 }, { "epoch": 21.531147540983607, "grad_norm": 5.947877407073975, "learning_rate": 1.8252347986556736e-05, "loss": 0.8215, "step": 6567 }, { "epoch": 21.534426229508195, "grad_norm": 4.665343761444092, "learning_rate": 1.8251748197083098e-05, "loss": 0.9437, "step": 6568 }, { "epoch": 21.537704918032787, "grad_norm": 7.045760154724121, "learning_rate": 1.8251148314562416e-05, "loss": 0.918, "step": 6569 }, { "epoch": 21.540983606557376, "grad_norm": 6.205850124359131, "learning_rate": 1.8250548339001456e-05, "loss": 1.1348, "step": 6570 }, { "epoch": 21.544262295081968, "grad_norm": 4.352831840515137, "learning_rate": 1.8249948270406985e-05, "loss": 1.104, "step": 6571 }, { "epoch": 21.547540983606556, "grad_norm": 7.016147136688232, "learning_rate": 1.8249348108785772e-05, "loss": 0.8571, "step": 6572 }, { "epoch": 21.550819672131148, "grad_norm": 5.8259501457214355, "learning_rate": 1.824874785414458e-05, "loss": 0.9449, "step": 6573 }, { "epoch": 21.554098360655736, "grad_norm": 5.294761657714844, "learning_rate": 1.8248147506490175e-05, "loss": 1.1183, "step": 6574 }, { "epoch": 21.557377049180328, "grad_norm": 4.730048656463623, "learning_rate": 1.824754706582934e-05, "loss": 0.7551, "step": 6575 }, { "epoch": 21.560655737704916, "grad_norm": 4.6038126945495605, "learning_rate": 1.824694653216883e-05, "loss": 0.9865, "step": 6576 }, { "epoch": 21.56393442622951, "grad_norm": 5.926415920257568, "learning_rate": 1.8246345905515427e-05, "loss": 1.0378, "step": 6577 }, { "epoch": 21.567213114754097, "grad_norm": 5.275763988494873, "learning_rate": 1.82457451858759e-05, "loss": 0.9079, "step": 6578 }, { "epoch": 21.57049180327869, "grad_norm": 4.496908664703369, "learning_rate": 1.8245144373257018e-05, "loss": 0.8671, "step": 6579 }, { "epoch": 21.57377049180328, "grad_norm": 5.79690408706665, "learning_rate": 1.8244543467665564e-05, "loss": 0.9796, "step": 6580 }, { "epoch": 21.57704918032787, "grad_norm": 5.056000232696533, "learning_rate": 1.8243942469108308e-05, "loss": 0.9351, "step": 6581 }, { "epoch": 21.58032786885246, "grad_norm": 5.226530075073242, "learning_rate": 1.8243341377592032e-05, "loss": 1.0382, "step": 6582 }, { "epoch": 21.58360655737705, "grad_norm": 5.962601184844971, "learning_rate": 1.824274019312351e-05, "loss": 0.9674, "step": 6583 }, { "epoch": 21.58688524590164, "grad_norm": 5.729540824890137, "learning_rate": 1.824213891570952e-05, "loss": 0.9073, "step": 6584 }, { "epoch": 21.59016393442623, "grad_norm": 5.926267147064209, "learning_rate": 1.8241537545356846e-05, "loss": 0.7568, "step": 6585 }, { "epoch": 21.59344262295082, "grad_norm": 6.317266941070557, "learning_rate": 1.824093608207227e-05, "loss": 0.8174, "step": 6586 }, { "epoch": 21.59672131147541, "grad_norm": 5.1104278564453125, "learning_rate": 1.8240334525862565e-05, "loss": 1.0969, "step": 6587 }, { "epoch": 21.6, "grad_norm": 6.469696521759033, "learning_rate": 1.8239732876734525e-05, "loss": 1.0391, "step": 6588 }, { "epoch": 21.60327868852459, "grad_norm": 5.524281024932861, "learning_rate": 1.823913113469493e-05, "loss": 0.9031, "step": 6589 }, { "epoch": 21.60655737704918, "grad_norm": 3.8291499614715576, "learning_rate": 1.823852929975056e-05, "loss": 1.2129, "step": 6590 }, { "epoch": 21.60983606557377, "grad_norm": 4.55397367477417, "learning_rate": 1.823792737190821e-05, "loss": 0.8323, "step": 6591 }, { "epoch": 21.613114754098362, "grad_norm": 5.932422161102295, "learning_rate": 1.8237325351174663e-05, "loss": 0.798, "step": 6592 }, { "epoch": 21.61639344262295, "grad_norm": 5.144437313079834, "learning_rate": 1.823672323755671e-05, "loss": 1.1569, "step": 6593 }, { "epoch": 21.619672131147542, "grad_norm": 4.996433734893799, "learning_rate": 1.8236121031061136e-05, "loss": 0.8601, "step": 6594 }, { "epoch": 21.62295081967213, "grad_norm": 6.69576358795166, "learning_rate": 1.8235518731694735e-05, "loss": 0.7301, "step": 6595 }, { "epoch": 21.626229508196722, "grad_norm": 5.763123989105225, "learning_rate": 1.8234916339464294e-05, "loss": 0.8608, "step": 6596 }, { "epoch": 21.62950819672131, "grad_norm": 6.073605060577393, "learning_rate": 1.8234313854376613e-05, "loss": 0.7467, "step": 6597 }, { "epoch": 21.632786885245903, "grad_norm": 6.5662841796875, "learning_rate": 1.823371127643848e-05, "loss": 0.8325, "step": 6598 }, { "epoch": 21.63606557377049, "grad_norm": 6.885468006134033, "learning_rate": 1.8233108605656694e-05, "loss": 0.8181, "step": 6599 }, { "epoch": 21.639344262295083, "grad_norm": 7.084211826324463, "learning_rate": 1.8232505842038047e-05, "loss": 0.668, "step": 6600 }, { "epoch": 21.64262295081967, "grad_norm": 4.78737211227417, "learning_rate": 1.8231902985589336e-05, "loss": 0.9674, "step": 6601 }, { "epoch": 21.645901639344263, "grad_norm": 7.997267246246338, "learning_rate": 1.823130003631736e-05, "loss": 1.1223, "step": 6602 }, { "epoch": 21.64918032786885, "grad_norm": 5.01796293258667, "learning_rate": 1.8230696994228917e-05, "loss": 1.1522, "step": 6603 }, { "epoch": 21.652459016393443, "grad_norm": 6.021756649017334, "learning_rate": 1.823009385933081e-05, "loss": 0.9061, "step": 6604 }, { "epoch": 21.65573770491803, "grad_norm": 5.217471122741699, "learning_rate": 1.8229490631629834e-05, "loss": 1.0061, "step": 6605 }, { "epoch": 21.659016393442624, "grad_norm": 6.755669593811035, "learning_rate": 1.8228887311132798e-05, "loss": 0.8638, "step": 6606 }, { "epoch": 21.662295081967212, "grad_norm": 6.547597885131836, "learning_rate": 1.82282838978465e-05, "loss": 0.7417, "step": 6607 }, { "epoch": 21.665573770491804, "grad_norm": 7.959666728973389, "learning_rate": 1.8227680391777746e-05, "loss": 1.1093, "step": 6608 }, { "epoch": 21.668852459016392, "grad_norm": 5.393048286437988, "learning_rate": 1.822707679293334e-05, "loss": 1.1262, "step": 6609 }, { "epoch": 21.672131147540984, "grad_norm": 5.510178089141846, "learning_rate": 1.822647310132009e-05, "loss": 1.0465, "step": 6610 }, { "epoch": 21.675409836065572, "grad_norm": 4.750712871551514, "learning_rate": 1.82258693169448e-05, "loss": 1.0758, "step": 6611 }, { "epoch": 21.678688524590164, "grad_norm": 5.014883995056152, "learning_rate": 1.8225265439814286e-05, "loss": 0.7298, "step": 6612 }, { "epoch": 21.681967213114753, "grad_norm": 5.081958293914795, "learning_rate": 1.8224661469935348e-05, "loss": 0.8578, "step": 6613 }, { "epoch": 21.685245901639345, "grad_norm": 6.050416946411133, "learning_rate": 1.8224057407314803e-05, "loss": 0.9362, "step": 6614 }, { "epoch": 21.688524590163933, "grad_norm": 4.919600486755371, "learning_rate": 1.822345325195946e-05, "loss": 0.8964, "step": 6615 }, { "epoch": 21.691803278688525, "grad_norm": 5.9299845695495605, "learning_rate": 1.822284900387613e-05, "loss": 1.0087, "step": 6616 }, { "epoch": 21.695081967213113, "grad_norm": 4.67531681060791, "learning_rate": 1.8222244663071628e-05, "loss": 0.9176, "step": 6617 }, { "epoch": 21.698360655737705, "grad_norm": 5.764005184173584, "learning_rate": 1.8221640229552767e-05, "loss": 0.9345, "step": 6618 }, { "epoch": 21.701639344262293, "grad_norm": 5.337629795074463, "learning_rate": 1.8221035703326367e-05, "loss": 1.1376, "step": 6619 }, { "epoch": 21.704918032786885, "grad_norm": 7.429557800292969, "learning_rate": 1.8220431084399238e-05, "loss": 1.0056, "step": 6620 }, { "epoch": 21.708196721311474, "grad_norm": 5.395418643951416, "learning_rate": 1.8219826372778203e-05, "loss": 1.0442, "step": 6621 }, { "epoch": 21.711475409836066, "grad_norm": 6.088280200958252, "learning_rate": 1.8219221568470076e-05, "loss": 0.6821, "step": 6622 }, { "epoch": 21.714754098360658, "grad_norm": 5.444976329803467, "learning_rate": 1.8218616671481685e-05, "loss": 1.1555, "step": 6623 }, { "epoch": 21.718032786885246, "grad_norm": 5.433916091918945, "learning_rate": 1.8218011681819846e-05, "loss": 0.9084, "step": 6624 }, { "epoch": 21.721311475409838, "grad_norm": 5.322361469268799, "learning_rate": 1.821740659949138e-05, "loss": 0.834, "step": 6625 }, { "epoch": 21.724590163934426, "grad_norm": 5.094183921813965, "learning_rate": 1.821680142450311e-05, "loss": 0.8046, "step": 6626 }, { "epoch": 21.727868852459018, "grad_norm": 6.876898288726807, "learning_rate": 1.8216196156861858e-05, "loss": 0.858, "step": 6627 }, { "epoch": 21.731147540983606, "grad_norm": 6.56553840637207, "learning_rate": 1.8215590796574454e-05, "loss": 0.9383, "step": 6628 }, { "epoch": 21.7344262295082, "grad_norm": 4.325312614440918, "learning_rate": 1.821498534364772e-05, "loss": 1.2468, "step": 6629 }, { "epoch": 21.737704918032787, "grad_norm": 4.651838779449463, "learning_rate": 1.821437979808849e-05, "loss": 0.9286, "step": 6630 }, { "epoch": 21.74098360655738, "grad_norm": 6.751614570617676, "learning_rate": 1.821377415990358e-05, "loss": 1.0156, "step": 6631 }, { "epoch": 21.744262295081967, "grad_norm": 6.0339765548706055, "learning_rate": 1.8213168429099833e-05, "loss": 0.9517, "step": 6632 }, { "epoch": 21.74754098360656, "grad_norm": 4.956977367401123, "learning_rate": 1.8212562605684067e-05, "loss": 1.1418, "step": 6633 }, { "epoch": 21.750819672131147, "grad_norm": 5.33377742767334, "learning_rate": 1.821195668966312e-05, "loss": 1.0146, "step": 6634 }, { "epoch": 21.75409836065574, "grad_norm": 6.35906982421875, "learning_rate": 1.8211350681043824e-05, "loss": 0.7489, "step": 6635 }, { "epoch": 21.757377049180327, "grad_norm": 5.824113845825195, "learning_rate": 1.821074457983301e-05, "loss": 1.0547, "step": 6636 }, { "epoch": 21.76065573770492, "grad_norm": 5.439515113830566, "learning_rate": 1.8210138386037516e-05, "loss": 0.9912, "step": 6637 }, { "epoch": 21.763934426229508, "grad_norm": 6.644305229187012, "learning_rate": 1.8209532099664177e-05, "loss": 0.9281, "step": 6638 }, { "epoch": 21.7672131147541, "grad_norm": 5.873106956481934, "learning_rate": 1.8208925720719823e-05, "loss": 0.9344, "step": 6639 }, { "epoch": 21.770491803278688, "grad_norm": 4.572545051574707, "learning_rate": 1.8208319249211298e-05, "loss": 1.1224, "step": 6640 }, { "epoch": 21.77377049180328, "grad_norm": 5.702168941497803, "learning_rate": 1.820771268514544e-05, "loss": 0.9523, "step": 6641 }, { "epoch": 21.777049180327868, "grad_norm": 4.781166076660156, "learning_rate": 1.8207106028529086e-05, "loss": 1.0563, "step": 6642 }, { "epoch": 21.78032786885246, "grad_norm": 5.423104763031006, "learning_rate": 1.8206499279369077e-05, "loss": 0.9657, "step": 6643 }, { "epoch": 21.78360655737705, "grad_norm": 6.365234851837158, "learning_rate": 1.820589243767226e-05, "loss": 1.1407, "step": 6644 }, { "epoch": 21.78688524590164, "grad_norm": 5.67294979095459, "learning_rate": 1.8205285503445473e-05, "loss": 0.9795, "step": 6645 }, { "epoch": 21.79016393442623, "grad_norm": 6.353556156158447, "learning_rate": 1.820467847669556e-05, "loss": 0.9475, "step": 6646 }, { "epoch": 21.79344262295082, "grad_norm": 5.7381391525268555, "learning_rate": 1.8204071357429365e-05, "loss": 0.8649, "step": 6647 }, { "epoch": 21.79672131147541, "grad_norm": 6.02487325668335, "learning_rate": 1.8203464145653737e-05, "loss": 0.9026, "step": 6648 }, { "epoch": 21.8, "grad_norm": 7.704397201538086, "learning_rate": 1.8202856841375517e-05, "loss": 1.0266, "step": 6649 }, { "epoch": 21.80327868852459, "grad_norm": 6.724269390106201, "learning_rate": 1.8202249444601564e-05, "loss": 0.8435, "step": 6650 }, { "epoch": 21.80655737704918, "grad_norm": 5.25297737121582, "learning_rate": 1.8201641955338714e-05, "loss": 0.925, "step": 6651 }, { "epoch": 21.80983606557377, "grad_norm": 7.067836761474609, "learning_rate": 1.820103437359383e-05, "loss": 0.8595, "step": 6652 }, { "epoch": 21.81311475409836, "grad_norm": 5.064701080322266, "learning_rate": 1.8200426699373753e-05, "loss": 1.0543, "step": 6653 }, { "epoch": 21.81639344262295, "grad_norm": 5.077996253967285, "learning_rate": 1.819981893268534e-05, "loss": 0.9781, "step": 6654 }, { "epoch": 21.81967213114754, "grad_norm": 5.897151470184326, "learning_rate": 1.819921107353544e-05, "loss": 0.7844, "step": 6655 }, { "epoch": 21.82295081967213, "grad_norm": 5.604306697845459, "learning_rate": 1.819860312193091e-05, "loss": 0.8639, "step": 6656 }, { "epoch": 21.82622950819672, "grad_norm": 5.695985794067383, "learning_rate": 1.819799507787861e-05, "loss": 0.908, "step": 6657 }, { "epoch": 21.82950819672131, "grad_norm": 4.733065605163574, "learning_rate": 1.8197386941385385e-05, "loss": 1.187, "step": 6658 }, { "epoch": 21.832786885245902, "grad_norm": 5.139185428619385, "learning_rate": 1.8196778712458106e-05, "loss": 1.0105, "step": 6659 }, { "epoch": 21.83606557377049, "grad_norm": 7.510830402374268, "learning_rate": 1.819617039110362e-05, "loss": 0.9268, "step": 6660 }, { "epoch": 21.839344262295082, "grad_norm": 5.740429878234863, "learning_rate": 1.8195561977328792e-05, "loss": 0.9104, "step": 6661 }, { "epoch": 21.84262295081967, "grad_norm": 5.939757347106934, "learning_rate": 1.819495347114048e-05, "loss": 0.7768, "step": 6662 }, { "epoch": 21.845901639344262, "grad_norm": 7.928373336791992, "learning_rate": 1.819434487254555e-05, "loss": 0.8796, "step": 6663 }, { "epoch": 21.84918032786885, "grad_norm": 5.585186958312988, "learning_rate": 1.819373618155086e-05, "loss": 0.8276, "step": 6664 }, { "epoch": 21.852459016393443, "grad_norm": 5.569778919219971, "learning_rate": 1.8193127398163277e-05, "loss": 0.7853, "step": 6665 }, { "epoch": 21.855737704918035, "grad_norm": 4.794190883636475, "learning_rate": 1.8192518522389662e-05, "loss": 0.9788, "step": 6666 }, { "epoch": 21.859016393442623, "grad_norm": 4.841946125030518, "learning_rate": 1.8191909554236885e-05, "loss": 0.8763, "step": 6667 }, { "epoch": 21.862295081967215, "grad_norm": 5.397153377532959, "learning_rate": 1.8191300493711808e-05, "loss": 1.0544, "step": 6668 }, { "epoch": 21.865573770491803, "grad_norm": 5.935656547546387, "learning_rate": 1.81906913408213e-05, "loss": 1.1106, "step": 6669 }, { "epoch": 21.868852459016395, "grad_norm": 5.722276210784912, "learning_rate": 1.8190082095572233e-05, "loss": 1.0653, "step": 6670 }, { "epoch": 21.872131147540983, "grad_norm": 5.411489486694336, "learning_rate": 1.8189472757971474e-05, "loss": 1.0243, "step": 6671 }, { "epoch": 21.875409836065575, "grad_norm": 5.642021656036377, "learning_rate": 1.8188863328025896e-05, "loss": 0.8578, "step": 6672 }, { "epoch": 21.878688524590164, "grad_norm": 6.289281845092773, "learning_rate": 1.8188253805742366e-05, "loss": 0.8963, "step": 6673 }, { "epoch": 21.881967213114756, "grad_norm": 5.873016357421875, "learning_rate": 1.8187644191127766e-05, "loss": 0.8912, "step": 6674 }, { "epoch": 21.885245901639344, "grad_norm": 5.629876613616943, "learning_rate": 1.8187034484188963e-05, "loss": 0.7534, "step": 6675 }, { "epoch": 21.888524590163936, "grad_norm": 5.835780620574951, "learning_rate": 1.8186424684932832e-05, "loss": 0.9019, "step": 6676 }, { "epoch": 21.891803278688524, "grad_norm": 5.082607269287109, "learning_rate": 1.818581479336625e-05, "loss": 1.0358, "step": 6677 }, { "epoch": 21.895081967213116, "grad_norm": 7.545527935028076, "learning_rate": 1.8185204809496097e-05, "loss": 0.9021, "step": 6678 }, { "epoch": 21.898360655737704, "grad_norm": 5.725517749786377, "learning_rate": 1.818459473332925e-05, "loss": 0.9585, "step": 6679 }, { "epoch": 21.901639344262296, "grad_norm": 5.353714942932129, "learning_rate": 1.8183984564872588e-05, "loss": 0.9305, "step": 6680 }, { "epoch": 21.904918032786885, "grad_norm": 6.159820556640625, "learning_rate": 1.818337430413299e-05, "loss": 0.9749, "step": 6681 }, { "epoch": 21.908196721311477, "grad_norm": 5.832191467285156, "learning_rate": 1.818276395111734e-05, "loss": 0.8093, "step": 6682 }, { "epoch": 21.911475409836065, "grad_norm": 5.500980377197266, "learning_rate": 1.8182153505832513e-05, "loss": 1.0081, "step": 6683 }, { "epoch": 21.914754098360657, "grad_norm": 7.839158058166504, "learning_rate": 1.8181542968285402e-05, "loss": 1.1113, "step": 6684 }, { "epoch": 21.918032786885245, "grad_norm": 5.983521461486816, "learning_rate": 1.8180932338482885e-05, "loss": 1.0221, "step": 6685 }, { "epoch": 21.921311475409837, "grad_norm": 4.592347621917725, "learning_rate": 1.8180321616431853e-05, "loss": 1.0851, "step": 6686 }, { "epoch": 21.924590163934425, "grad_norm": 5.373589038848877, "learning_rate": 1.8179710802139187e-05, "loss": 0.9609, "step": 6687 }, { "epoch": 21.927868852459017, "grad_norm": 4.365265846252441, "learning_rate": 1.817909989561178e-05, "loss": 1.271, "step": 6688 }, { "epoch": 21.931147540983606, "grad_norm": 4.541933059692383, "learning_rate": 1.8178488896856512e-05, "loss": 0.8353, "step": 6689 }, { "epoch": 21.934426229508198, "grad_norm": 6.727811336517334, "learning_rate": 1.8177877805880283e-05, "loss": 0.9059, "step": 6690 }, { "epoch": 21.937704918032786, "grad_norm": 5.331002712249756, "learning_rate": 1.8177266622689976e-05, "loss": 0.9126, "step": 6691 }, { "epoch": 21.940983606557378, "grad_norm": 5.2635979652404785, "learning_rate": 1.8176655347292485e-05, "loss": 0.9805, "step": 6692 }, { "epoch": 21.944262295081966, "grad_norm": 5.31040096282959, "learning_rate": 1.8176043979694706e-05, "loss": 0.8234, "step": 6693 }, { "epoch": 21.947540983606558, "grad_norm": 6.037671089172363, "learning_rate": 1.817543251990353e-05, "loss": 0.7677, "step": 6694 }, { "epoch": 21.950819672131146, "grad_norm": 5.927662372589111, "learning_rate": 1.8174820967925852e-05, "loss": 1.1132, "step": 6695 }, { "epoch": 21.95409836065574, "grad_norm": 6.118706703186035, "learning_rate": 1.8174209323768567e-05, "loss": 0.8418, "step": 6696 }, { "epoch": 21.957377049180327, "grad_norm": 5.159636497497559, "learning_rate": 1.8173597587438572e-05, "loss": 1.113, "step": 6697 }, { "epoch": 21.96065573770492, "grad_norm": 6.2848639488220215, "learning_rate": 1.817298575894277e-05, "loss": 0.6696, "step": 6698 }, { "epoch": 21.963934426229507, "grad_norm": 6.827221870422363, "learning_rate": 1.8172373838288052e-05, "loss": 0.8033, "step": 6699 }, { "epoch": 21.9672131147541, "grad_norm": 5.511017799377441, "learning_rate": 1.8171761825481323e-05, "loss": 0.906, "step": 6700 }, { "epoch": 21.970491803278687, "grad_norm": 5.600459098815918, "learning_rate": 1.817114972052948e-05, "loss": 0.9435, "step": 6701 }, { "epoch": 21.97377049180328, "grad_norm": 7.35441255569458, "learning_rate": 1.8170537523439432e-05, "loss": 0.9814, "step": 6702 }, { "epoch": 21.977049180327867, "grad_norm": 5.157516956329346, "learning_rate": 1.8169925234218076e-05, "loss": 0.8698, "step": 6703 }, { "epoch": 21.98032786885246, "grad_norm": 10.908531188964844, "learning_rate": 1.816931285287232e-05, "loss": 0.9194, "step": 6704 }, { "epoch": 21.983606557377048, "grad_norm": 4.809062957763672, "learning_rate": 1.8168700379409067e-05, "loss": 1.0792, "step": 6705 }, { "epoch": 21.98688524590164, "grad_norm": 6.186668395996094, "learning_rate": 1.8168087813835223e-05, "loss": 0.9214, "step": 6706 }, { "epoch": 21.990163934426228, "grad_norm": 5.873138904571533, "learning_rate": 1.8167475156157697e-05, "loss": 0.7349, "step": 6707 }, { "epoch": 21.99344262295082, "grad_norm": 6.403918743133545, "learning_rate": 1.8166862406383396e-05, "loss": 0.7454, "step": 6708 }, { "epoch": 21.99672131147541, "grad_norm": 4.564275741577148, "learning_rate": 1.8166249564519233e-05, "loss": 1.0552, "step": 6709 }, { "epoch": 22.0, "grad_norm": 9.04582691192627, "learning_rate": 1.816563663057211e-05, "loss": 1.1525, "step": 6710 }, { "epoch": 22.003278688524592, "grad_norm": 6.890483379364014, "learning_rate": 1.816502360454895e-05, "loss": 0.757, "step": 6711 }, { "epoch": 22.00655737704918, "grad_norm": 5.077732086181641, "learning_rate": 1.8164410486456655e-05, "loss": 0.9646, "step": 6712 }, { "epoch": 22.009836065573772, "grad_norm": 5.092843532562256, "learning_rate": 1.8163797276302143e-05, "loss": 0.9494, "step": 6713 }, { "epoch": 22.01311475409836, "grad_norm": 5.9281907081604, "learning_rate": 1.8163183974092327e-05, "loss": 0.8533, "step": 6714 }, { "epoch": 22.016393442622952, "grad_norm": 7.367257595062256, "learning_rate": 1.8162570579834126e-05, "loss": 0.7455, "step": 6715 }, { "epoch": 22.01967213114754, "grad_norm": 5.650345802307129, "learning_rate": 1.8161957093534456e-05, "loss": 0.6588, "step": 6716 }, { "epoch": 22.022950819672133, "grad_norm": 14.55127239227295, "learning_rate": 1.8161343515200234e-05, "loss": 0.9084, "step": 6717 }, { "epoch": 22.02622950819672, "grad_norm": 5.924037933349609, "learning_rate": 1.816072984483838e-05, "loss": 0.9191, "step": 6718 }, { "epoch": 22.029508196721313, "grad_norm": 7.653966903686523, "learning_rate": 1.8160116082455808e-05, "loss": 0.7137, "step": 6719 }, { "epoch": 22.0327868852459, "grad_norm": 5.334834098815918, "learning_rate": 1.8159502228059443e-05, "loss": 1.0164, "step": 6720 }, { "epoch": 22.036065573770493, "grad_norm": 6.063582897186279, "learning_rate": 1.8158888281656206e-05, "loss": 0.8032, "step": 6721 }, { "epoch": 22.03934426229508, "grad_norm": 4.427256107330322, "learning_rate": 1.8158274243253023e-05, "loss": 0.8663, "step": 6722 }, { "epoch": 22.042622950819673, "grad_norm": 5.133552551269531, "learning_rate": 1.8157660112856814e-05, "loss": 1.238, "step": 6723 }, { "epoch": 22.04590163934426, "grad_norm": 5.701894283294678, "learning_rate": 1.8157045890474505e-05, "loss": 0.9357, "step": 6724 }, { "epoch": 22.049180327868854, "grad_norm": 12.323554039001465, "learning_rate": 1.8156431576113026e-05, "loss": 0.7314, "step": 6725 }, { "epoch": 22.052459016393442, "grad_norm": 6.190957546234131, "learning_rate": 1.8155817169779294e-05, "loss": 0.8082, "step": 6726 }, { "epoch": 22.055737704918034, "grad_norm": 6.078810691833496, "learning_rate": 1.8155202671480247e-05, "loss": 0.8127, "step": 6727 }, { "epoch": 22.059016393442622, "grad_norm": 5.582296371459961, "learning_rate": 1.815458808122281e-05, "loss": 0.9273, "step": 6728 }, { "epoch": 22.062295081967214, "grad_norm": 14.838109016418457, "learning_rate": 1.8153973399013917e-05, "loss": 0.9636, "step": 6729 }, { "epoch": 22.065573770491802, "grad_norm": 5.170914649963379, "learning_rate": 1.815335862486049e-05, "loss": 0.9711, "step": 6730 }, { "epoch": 22.068852459016394, "grad_norm": 5.337457656860352, "learning_rate": 1.8152743758769472e-05, "loss": 0.807, "step": 6731 }, { "epoch": 22.072131147540983, "grad_norm": 5.402651786804199, "learning_rate": 1.815212880074779e-05, "loss": 0.9548, "step": 6732 }, { "epoch": 22.075409836065575, "grad_norm": 6.994699478149414, "learning_rate": 1.8151513750802377e-05, "loss": 0.822, "step": 6733 }, { "epoch": 22.078688524590163, "grad_norm": 6.9192705154418945, "learning_rate": 1.8150898608940175e-05, "loss": 1.0608, "step": 6734 }, { "epoch": 22.081967213114755, "grad_norm": 4.949306488037109, "learning_rate": 1.8150283375168112e-05, "loss": 1.1188, "step": 6735 }, { "epoch": 22.085245901639343, "grad_norm": 5.24221134185791, "learning_rate": 1.8149668049493137e-05, "loss": 0.9266, "step": 6736 }, { "epoch": 22.088524590163935, "grad_norm": 5.955596446990967, "learning_rate": 1.8149052631922175e-05, "loss": 0.84, "step": 6737 }, { "epoch": 22.091803278688523, "grad_norm": 9.747213363647461, "learning_rate": 1.8148437122462175e-05, "loss": 0.9265, "step": 6738 }, { "epoch": 22.095081967213115, "grad_norm": 5.989266872406006, "learning_rate": 1.8147821521120073e-05, "loss": 0.9367, "step": 6739 }, { "epoch": 22.098360655737704, "grad_norm": 5.774700164794922, "learning_rate": 1.8147205827902808e-05, "loss": 0.798, "step": 6740 }, { "epoch": 22.101639344262296, "grad_norm": 6.530672550201416, "learning_rate": 1.8146590042817332e-05, "loss": 0.9858, "step": 6741 }, { "epoch": 22.104918032786884, "grad_norm": 5.201679229736328, "learning_rate": 1.814597416587058e-05, "loss": 0.8115, "step": 6742 }, { "epoch": 22.108196721311476, "grad_norm": 4.507617950439453, "learning_rate": 1.81453581970695e-05, "loss": 0.7339, "step": 6743 }, { "epoch": 22.111475409836064, "grad_norm": 4.629678726196289, "learning_rate": 1.814474213642104e-05, "loss": 1.2693, "step": 6744 }, { "epoch": 22.114754098360656, "grad_norm": 5.266994953155518, "learning_rate": 1.814412598393214e-05, "loss": 0.7159, "step": 6745 }, { "epoch": 22.118032786885244, "grad_norm": 5.669022560119629, "learning_rate": 1.8143509739609753e-05, "loss": 0.8682, "step": 6746 }, { "epoch": 22.121311475409836, "grad_norm": 5.012202262878418, "learning_rate": 1.8142893403460827e-05, "loss": 1.0051, "step": 6747 }, { "epoch": 22.124590163934425, "grad_norm": 7.586751937866211, "learning_rate": 1.8142276975492312e-05, "loss": 0.8827, "step": 6748 }, { "epoch": 22.127868852459017, "grad_norm": 4.776618480682373, "learning_rate": 1.8141660455711156e-05, "loss": 1.1172, "step": 6749 }, { "epoch": 22.131147540983605, "grad_norm": 5.765000343322754, "learning_rate": 1.8141043844124316e-05, "loss": 0.8093, "step": 6750 }, { "epoch": 22.134426229508197, "grad_norm": 6.158092975616455, "learning_rate": 1.814042714073874e-05, "loss": 0.8127, "step": 6751 }, { "epoch": 22.137704918032785, "grad_norm": 5.24444580078125, "learning_rate": 1.8139810345561385e-05, "loss": 0.8113, "step": 6752 }, { "epoch": 22.140983606557377, "grad_norm": 6.285796165466309, "learning_rate": 1.8139193458599204e-05, "loss": 1.0631, "step": 6753 }, { "epoch": 22.14426229508197, "grad_norm": 6.225743293762207, "learning_rate": 1.8138576479859154e-05, "loss": 1.0247, "step": 6754 }, { "epoch": 22.147540983606557, "grad_norm": 5.587831974029541, "learning_rate": 1.8137959409348193e-05, "loss": 0.989, "step": 6755 }, { "epoch": 22.15081967213115, "grad_norm": 4.908051490783691, "learning_rate": 1.8137342247073278e-05, "loss": 0.9088, "step": 6756 }, { "epoch": 22.154098360655738, "grad_norm": 6.61565637588501, "learning_rate": 1.8136724993041374e-05, "loss": 1.1418, "step": 6757 }, { "epoch": 22.15737704918033, "grad_norm": 5.393927097320557, "learning_rate": 1.813610764725943e-05, "loss": 1.2136, "step": 6758 }, { "epoch": 22.160655737704918, "grad_norm": 9.02811336517334, "learning_rate": 1.813549020973441e-05, "loss": 0.8611, "step": 6759 }, { "epoch": 22.16393442622951, "grad_norm": 5.856217861175537, "learning_rate": 1.8134872680473285e-05, "loss": 0.9082, "step": 6760 }, { "epoch": 22.167213114754098, "grad_norm": 4.45147180557251, "learning_rate": 1.8134255059483008e-05, "loss": 1.0535, "step": 6761 }, { "epoch": 22.17049180327869, "grad_norm": 5.3635993003845215, "learning_rate": 1.8133637346770552e-05, "loss": 1.0524, "step": 6762 }, { "epoch": 22.17377049180328, "grad_norm": 4.9072041511535645, "learning_rate": 1.8133019542342873e-05, "loss": 0.9109, "step": 6763 }, { "epoch": 22.17704918032787, "grad_norm": 5.542000770568848, "learning_rate": 1.8132401646206947e-05, "loss": 0.7983, "step": 6764 }, { "epoch": 22.18032786885246, "grad_norm": 4.8948283195495605, "learning_rate": 1.8131783658369736e-05, "loss": 0.7599, "step": 6765 }, { "epoch": 22.18360655737705, "grad_norm": 6.1069488525390625, "learning_rate": 1.8131165578838207e-05, "loss": 0.7602, "step": 6766 }, { "epoch": 22.18688524590164, "grad_norm": 5.073257923126221, "learning_rate": 1.813054740761933e-05, "loss": 0.7631, "step": 6767 }, { "epoch": 22.19016393442623, "grad_norm": 5.177496910095215, "learning_rate": 1.8129929144720082e-05, "loss": 0.7397, "step": 6768 }, { "epoch": 22.19344262295082, "grad_norm": 6.811901569366455, "learning_rate": 1.8129310790147428e-05, "loss": 0.7743, "step": 6769 }, { "epoch": 22.19672131147541, "grad_norm": 6.866921901702881, "learning_rate": 1.8128692343908346e-05, "loss": 0.7749, "step": 6770 }, { "epoch": 22.2, "grad_norm": 5.900049686431885, "learning_rate": 1.81280738060098e-05, "loss": 1.1416, "step": 6771 }, { "epoch": 22.20327868852459, "grad_norm": 7.275408744812012, "learning_rate": 1.8127455176458775e-05, "loss": 0.769, "step": 6772 }, { "epoch": 22.20655737704918, "grad_norm": 5.402504920959473, "learning_rate": 1.812683645526224e-05, "loss": 0.8039, "step": 6773 }, { "epoch": 22.20983606557377, "grad_norm": 6.085797309875488, "learning_rate": 1.8126217642427174e-05, "loss": 0.8702, "step": 6774 }, { "epoch": 22.21311475409836, "grad_norm": 5.75750207901001, "learning_rate": 1.8125598737960558e-05, "loss": 0.7299, "step": 6775 }, { "epoch": 22.21639344262295, "grad_norm": 5.70616340637207, "learning_rate": 1.8124979741869368e-05, "loss": 0.8517, "step": 6776 }, { "epoch": 22.21967213114754, "grad_norm": 9.659333229064941, "learning_rate": 1.812436065416058e-05, "loss": 0.879, "step": 6777 }, { "epoch": 22.222950819672132, "grad_norm": 6.680561542510986, "learning_rate": 1.8123741474841178e-05, "loss": 1.1024, "step": 6778 }, { "epoch": 22.22622950819672, "grad_norm": 7.868546485900879, "learning_rate": 1.8123122203918147e-05, "loss": 0.8155, "step": 6779 }, { "epoch": 22.229508196721312, "grad_norm": 5.006355285644531, "learning_rate": 1.8122502841398467e-05, "loss": 0.9775, "step": 6780 }, { "epoch": 22.2327868852459, "grad_norm": 4.936673641204834, "learning_rate": 1.8121883387289122e-05, "loss": 0.5933, "step": 6781 }, { "epoch": 22.236065573770492, "grad_norm": 4.56491231918335, "learning_rate": 1.8121263841597097e-05, "loss": 0.9363, "step": 6782 }, { "epoch": 22.23934426229508, "grad_norm": 6.987270355224609, "learning_rate": 1.8120644204329376e-05, "loss": 0.8491, "step": 6783 }, { "epoch": 22.242622950819673, "grad_norm": 5.632044315338135, "learning_rate": 1.812002447549295e-05, "loss": 0.8169, "step": 6784 }, { "epoch": 22.24590163934426, "grad_norm": 5.30575704574585, "learning_rate": 1.811940465509481e-05, "loss": 0.6316, "step": 6785 }, { "epoch": 22.249180327868853, "grad_norm": 5.158592224121094, "learning_rate": 1.811878474314193e-05, "loss": 0.8423, "step": 6786 }, { "epoch": 22.25245901639344, "grad_norm": 5.304257392883301, "learning_rate": 1.811816473964132e-05, "loss": 0.8148, "step": 6787 }, { "epoch": 22.255737704918033, "grad_norm": 4.709700107574463, "learning_rate": 1.8117544644599955e-05, "loss": 0.9715, "step": 6788 }, { "epoch": 22.25901639344262, "grad_norm": 5.315721035003662, "learning_rate": 1.8116924458024838e-05, "loss": 0.9037, "step": 6789 }, { "epoch": 22.262295081967213, "grad_norm": 5.481518745422363, "learning_rate": 1.811630417992296e-05, "loss": 0.7974, "step": 6790 }, { "epoch": 22.2655737704918, "grad_norm": 6.018019199371338, "learning_rate": 1.811568381030131e-05, "loss": 0.7863, "step": 6791 }, { "epoch": 22.268852459016394, "grad_norm": 5.245169162750244, "learning_rate": 1.8115063349166887e-05, "loss": 0.789, "step": 6792 }, { "epoch": 22.272131147540982, "grad_norm": 4.883037090301514, "learning_rate": 1.811444279652669e-05, "loss": 0.7782, "step": 6793 }, { "epoch": 22.275409836065574, "grad_norm": 4.9863739013671875, "learning_rate": 1.811382215238771e-05, "loss": 1.1968, "step": 6794 }, { "epoch": 22.278688524590162, "grad_norm": 6.4580817222595215, "learning_rate": 1.811320141675695e-05, "loss": 0.6656, "step": 6795 }, { "epoch": 22.281967213114754, "grad_norm": 6.257705211639404, "learning_rate": 1.8112580589641412e-05, "loss": 0.8018, "step": 6796 }, { "epoch": 22.285245901639342, "grad_norm": 20.08660316467285, "learning_rate": 1.811195967104809e-05, "loss": 0.887, "step": 6797 }, { "epoch": 22.288524590163934, "grad_norm": 5.1904520988464355, "learning_rate": 1.8111338660983988e-05, "loss": 0.937, "step": 6798 }, { "epoch": 22.291803278688526, "grad_norm": 5.103592872619629, "learning_rate": 1.811071755945611e-05, "loss": 0.9567, "step": 6799 }, { "epoch": 22.295081967213115, "grad_norm": 4.623490810394287, "learning_rate": 1.8110096366471458e-05, "loss": 0.9598, "step": 6800 }, { "epoch": 22.298360655737707, "grad_norm": 4.947482585906982, "learning_rate": 1.8109475082037033e-05, "loss": 0.9328, "step": 6801 }, { "epoch": 22.301639344262295, "grad_norm": 5.781299591064453, "learning_rate": 1.810885370615985e-05, "loss": 0.8386, "step": 6802 }, { "epoch": 22.304918032786887, "grad_norm": 4.870994567871094, "learning_rate": 1.810823223884691e-05, "loss": 0.8153, "step": 6803 }, { "epoch": 22.308196721311475, "grad_norm": 5.82897424697876, "learning_rate": 1.810761068010522e-05, "loss": 0.84, "step": 6804 }, { "epoch": 22.311475409836067, "grad_norm": 5.68779993057251, "learning_rate": 1.810698902994179e-05, "loss": 1.0725, "step": 6805 }, { "epoch": 22.314754098360655, "grad_norm": 6.765839099884033, "learning_rate": 1.8106367288363625e-05, "loss": 0.8005, "step": 6806 }, { "epoch": 22.318032786885247, "grad_norm": 6.195949077606201, "learning_rate": 1.8105745455377748e-05, "loss": 1.0107, "step": 6807 }, { "epoch": 22.321311475409836, "grad_norm": 5.617513656616211, "learning_rate": 1.8105123530991158e-05, "loss": 0.9131, "step": 6808 }, { "epoch": 22.324590163934428, "grad_norm": 6.17894172668457, "learning_rate": 1.810450151521088e-05, "loss": 0.7978, "step": 6809 }, { "epoch": 22.327868852459016, "grad_norm": 5.667329788208008, "learning_rate": 1.8103879408043913e-05, "loss": 0.8491, "step": 6810 }, { "epoch": 22.331147540983608, "grad_norm": 9.257843017578125, "learning_rate": 1.810325720949728e-05, "loss": 0.8973, "step": 6811 }, { "epoch": 22.334426229508196, "grad_norm": 6.284240245819092, "learning_rate": 1.8102634919578e-05, "loss": 0.807, "step": 6812 }, { "epoch": 22.337704918032788, "grad_norm": 6.487415313720703, "learning_rate": 1.8102012538293087e-05, "loss": 0.7567, "step": 6813 }, { "epoch": 22.340983606557376, "grad_norm": 5.463231563568115, "learning_rate": 1.8101390065649555e-05, "loss": 1.0608, "step": 6814 }, { "epoch": 22.34426229508197, "grad_norm": 6.861771583557129, "learning_rate": 1.810076750165443e-05, "loss": 0.7391, "step": 6815 }, { "epoch": 22.347540983606557, "grad_norm": 5.587484359741211, "learning_rate": 1.8100144846314728e-05, "loss": 0.9059, "step": 6816 }, { "epoch": 22.35081967213115, "grad_norm": 5.304640769958496, "learning_rate": 1.809952209963747e-05, "loss": 0.9886, "step": 6817 }, { "epoch": 22.354098360655737, "grad_norm": 5.698103427886963, "learning_rate": 1.8098899261629678e-05, "loss": 0.7054, "step": 6818 }, { "epoch": 22.35737704918033, "grad_norm": 6.105955123901367, "learning_rate": 1.8098276332298378e-05, "loss": 1.0756, "step": 6819 }, { "epoch": 22.360655737704917, "grad_norm": 5.663609027862549, "learning_rate": 1.809765331165059e-05, "loss": 0.8906, "step": 6820 }, { "epoch": 22.36393442622951, "grad_norm": 6.116049766540527, "learning_rate": 1.8097030199693345e-05, "loss": 0.8994, "step": 6821 }, { "epoch": 22.367213114754097, "grad_norm": 4.739365577697754, "learning_rate": 1.8096406996433664e-05, "loss": 1.0297, "step": 6822 }, { "epoch": 22.37049180327869, "grad_norm": 5.0799880027771, "learning_rate": 1.809578370187858e-05, "loss": 1.1827, "step": 6823 }, { "epoch": 22.373770491803278, "grad_norm": 5.022462368011475, "learning_rate": 1.809516031603511e-05, "loss": 1.1943, "step": 6824 }, { "epoch": 22.37704918032787, "grad_norm": 6.1290507316589355, "learning_rate": 1.8094536838910294e-05, "loss": 0.7775, "step": 6825 }, { "epoch": 22.380327868852458, "grad_norm": 6.900681018829346, "learning_rate": 1.8093913270511158e-05, "loss": 0.811, "step": 6826 }, { "epoch": 22.38360655737705, "grad_norm": 5.121540069580078, "learning_rate": 1.8093289610844737e-05, "loss": 1.0146, "step": 6827 }, { "epoch": 22.386885245901638, "grad_norm": 5.8854241371154785, "learning_rate": 1.8092665859918058e-05, "loss": 1.0173, "step": 6828 }, { "epoch": 22.39016393442623, "grad_norm": 5.364675045013428, "learning_rate": 1.8092042017738158e-05, "loss": 0.8473, "step": 6829 }, { "epoch": 22.39344262295082, "grad_norm": 6.723122596740723, "learning_rate": 1.8091418084312067e-05, "loss": 0.871, "step": 6830 }, { "epoch": 22.39672131147541, "grad_norm": 13.32523250579834, "learning_rate": 1.809079405964683e-05, "loss": 0.9454, "step": 6831 }, { "epoch": 22.4, "grad_norm": 5.638190269470215, "learning_rate": 1.8090169943749477e-05, "loss": 0.8569, "step": 6832 }, { "epoch": 22.40327868852459, "grad_norm": 6.0144524574279785, "learning_rate": 1.8089545736627044e-05, "loss": 1.171, "step": 6833 }, { "epoch": 22.40655737704918, "grad_norm": 7.428286552429199, "learning_rate": 1.8088921438286573e-05, "loss": 0.926, "step": 6834 }, { "epoch": 22.40983606557377, "grad_norm": 6.31215763092041, "learning_rate": 1.80882970487351e-05, "loss": 0.9196, "step": 6835 }, { "epoch": 22.41311475409836, "grad_norm": 5.449139595031738, "learning_rate": 1.808767256797967e-05, "loss": 1.0478, "step": 6836 }, { "epoch": 22.41639344262295, "grad_norm": 5.078422546386719, "learning_rate": 1.8087047996027323e-05, "loss": 1.0983, "step": 6837 }, { "epoch": 22.41967213114754, "grad_norm": 12.361398696899414, "learning_rate": 1.80864233328851e-05, "loss": 0.8631, "step": 6838 }, { "epoch": 22.42295081967213, "grad_norm": 6.565329074859619, "learning_rate": 1.8085798578560047e-05, "loss": 0.7949, "step": 6839 }, { "epoch": 22.42622950819672, "grad_norm": 6.089427947998047, "learning_rate": 1.808517373305921e-05, "loss": 0.8512, "step": 6840 }, { "epoch": 22.42950819672131, "grad_norm": 5.971826076507568, "learning_rate": 1.808454879638963e-05, "loss": 0.7685, "step": 6841 }, { "epoch": 22.432786885245903, "grad_norm": 5.607061386108398, "learning_rate": 1.8083923768558354e-05, "loss": 1.1653, "step": 6842 }, { "epoch": 22.43606557377049, "grad_norm": 5.038521766662598, "learning_rate": 1.8083298649572438e-05, "loss": 0.8186, "step": 6843 }, { "epoch": 22.439344262295084, "grad_norm": 6.05923318862915, "learning_rate": 1.8082673439438926e-05, "loss": 1.0383, "step": 6844 }, { "epoch": 22.442622950819672, "grad_norm": 5.940454483032227, "learning_rate": 1.8082048138164862e-05, "loss": 0.6983, "step": 6845 }, { "epoch": 22.445901639344264, "grad_norm": 5.440424919128418, "learning_rate": 1.8081422745757303e-05, "loss": 0.7631, "step": 6846 }, { "epoch": 22.449180327868852, "grad_norm": 5.178029537200928, "learning_rate": 1.80807972622233e-05, "loss": 0.8978, "step": 6847 }, { "epoch": 22.452459016393444, "grad_norm": 5.4316253662109375, "learning_rate": 1.808017168756991e-05, "loss": 1.0074, "step": 6848 }, { "epoch": 22.455737704918032, "grad_norm": 5.436670303344727, "learning_rate": 1.8079546021804178e-05, "loss": 1.0222, "step": 6849 }, { "epoch": 22.459016393442624, "grad_norm": 7.461001396179199, "learning_rate": 1.8078920264933165e-05, "loss": 0.9073, "step": 6850 }, { "epoch": 22.462295081967213, "grad_norm": 6.478816986083984, "learning_rate": 1.8078294416963925e-05, "loss": 0.8857, "step": 6851 }, { "epoch": 22.465573770491805, "grad_norm": 8.778402328491211, "learning_rate": 1.8077668477903518e-05, "loss": 0.7928, "step": 6852 }, { "epoch": 22.468852459016393, "grad_norm": 6.125204086303711, "learning_rate": 1.8077042447759002e-05, "loss": 0.775, "step": 6853 }, { "epoch": 22.472131147540985, "grad_norm": 6.018715858459473, "learning_rate": 1.807641632653743e-05, "loss": 1.0449, "step": 6854 }, { "epoch": 22.475409836065573, "grad_norm": 5.646608352661133, "learning_rate": 1.8075790114245873e-05, "loss": 0.8878, "step": 6855 }, { "epoch": 22.478688524590165, "grad_norm": 5.149289131164551, "learning_rate": 1.8075163810891378e-05, "loss": 1.1189, "step": 6856 }, { "epoch": 22.481967213114753, "grad_norm": 5.695766448974609, "learning_rate": 1.807453741648102e-05, "loss": 0.8029, "step": 6857 }, { "epoch": 22.485245901639345, "grad_norm": 5.4318952560424805, "learning_rate": 1.8073910931021855e-05, "loss": 1.0681, "step": 6858 }, { "epoch": 22.488524590163934, "grad_norm": 5.023657321929932, "learning_rate": 1.8073284354520952e-05, "loss": 0.9206, "step": 6859 }, { "epoch": 22.491803278688526, "grad_norm": 5.791038990020752, "learning_rate": 1.807265768698537e-05, "loss": 0.9184, "step": 6860 }, { "epoch": 22.495081967213114, "grad_norm": 5.569212436676025, "learning_rate": 1.8072030928422183e-05, "loss": 0.9785, "step": 6861 }, { "epoch": 22.498360655737706, "grad_norm": 5.384968280792236, "learning_rate": 1.8071404078838454e-05, "loss": 0.7969, "step": 6862 }, { "epoch": 22.501639344262294, "grad_norm": 6.848830699920654, "learning_rate": 1.807077713824125e-05, "loss": 1.0016, "step": 6863 }, { "epoch": 22.504918032786886, "grad_norm": 5.576313495635986, "learning_rate": 1.807015010663764e-05, "loss": 0.8929, "step": 6864 }, { "epoch": 22.508196721311474, "grad_norm": 6.785794734954834, "learning_rate": 1.8069522984034703e-05, "loss": 0.9713, "step": 6865 }, { "epoch": 22.511475409836066, "grad_norm": 5.810428142547607, "learning_rate": 1.80688957704395e-05, "loss": 0.9865, "step": 6866 }, { "epoch": 22.514754098360655, "grad_norm": 4.880155086517334, "learning_rate": 1.806826846585911e-05, "loss": 0.9721, "step": 6867 }, { "epoch": 22.518032786885247, "grad_norm": 5.219918251037598, "learning_rate": 1.8067641070300602e-05, "loss": 0.7423, "step": 6868 }, { "epoch": 22.521311475409835, "grad_norm": 5.715041160583496, "learning_rate": 1.8067013583771052e-05, "loss": 0.9122, "step": 6869 }, { "epoch": 22.524590163934427, "grad_norm": 7.475972652435303, "learning_rate": 1.806638600627754e-05, "loss": 0.9111, "step": 6870 }, { "epoch": 22.527868852459015, "grad_norm": 5.64440393447876, "learning_rate": 1.8065758337827135e-05, "loss": 0.8452, "step": 6871 }, { "epoch": 22.531147540983607, "grad_norm": 7.689429759979248, "learning_rate": 1.806513057842692e-05, "loss": 0.8118, "step": 6872 }, { "epoch": 22.534426229508195, "grad_norm": 5.644529342651367, "learning_rate": 1.8064502728083973e-05, "loss": 0.8489, "step": 6873 }, { "epoch": 22.537704918032787, "grad_norm": 6.3821940422058105, "learning_rate": 1.806387478680537e-05, "loss": 1.0309, "step": 6874 }, { "epoch": 22.540983606557376, "grad_norm": 12.456870079040527, "learning_rate": 1.80632467545982e-05, "loss": 0.7675, "step": 6875 }, { "epoch": 22.544262295081968, "grad_norm": 7.425981044769287, "learning_rate": 1.806261863146953e-05, "loss": 0.8736, "step": 6876 }, { "epoch": 22.547540983606556, "grad_norm": 6.305254936218262, "learning_rate": 1.806199041742646e-05, "loss": 0.8717, "step": 6877 }, { "epoch": 22.550819672131148, "grad_norm": 7.1285881996154785, "learning_rate": 1.806136211247606e-05, "loss": 0.8829, "step": 6878 }, { "epoch": 22.554098360655736, "grad_norm": 5.238653659820557, "learning_rate": 1.8060733716625427e-05, "loss": 0.8439, "step": 6879 }, { "epoch": 22.557377049180328, "grad_norm": 57.04012680053711, "learning_rate": 1.8060105229881635e-05, "loss": 0.8075, "step": 6880 }, { "epoch": 22.560655737704916, "grad_norm": 5.513030052185059, "learning_rate": 1.8059476652251778e-05, "loss": 0.7917, "step": 6881 }, { "epoch": 22.56393442622951, "grad_norm": 5.661765098571777, "learning_rate": 1.8058847983742943e-05, "loss": 0.8821, "step": 6882 }, { "epoch": 22.567213114754097, "grad_norm": 6.521851539611816, "learning_rate": 1.8058219224362217e-05, "loss": 0.6955, "step": 6883 }, { "epoch": 22.57049180327869, "grad_norm": 6.580411911010742, "learning_rate": 1.805759037411669e-05, "loss": 1.0389, "step": 6884 }, { "epoch": 22.57377049180328, "grad_norm": 6.918982028961182, "learning_rate": 1.8056961433013455e-05, "loss": 0.7371, "step": 6885 }, { "epoch": 22.57704918032787, "grad_norm": 7.679631233215332, "learning_rate": 1.80563324010596e-05, "loss": 0.8486, "step": 6886 }, { "epoch": 22.58032786885246, "grad_norm": 9.810779571533203, "learning_rate": 1.805570327826222e-05, "loss": 0.9303, "step": 6887 }, { "epoch": 22.58360655737705, "grad_norm": 8.106207847595215, "learning_rate": 1.8055074064628416e-05, "loss": 0.7149, "step": 6888 }, { "epoch": 22.58688524590164, "grad_norm": 7.556046962738037, "learning_rate": 1.805444476016527e-05, "loss": 0.7817, "step": 6889 }, { "epoch": 22.59016393442623, "grad_norm": 6.585160255432129, "learning_rate": 1.805381536487989e-05, "loss": 0.8264, "step": 6890 }, { "epoch": 22.59344262295082, "grad_norm": 8.627057075500488, "learning_rate": 1.8053185878779364e-05, "loss": 0.9581, "step": 6891 }, { "epoch": 22.59672131147541, "grad_norm": 8.136783599853516, "learning_rate": 1.80525563018708e-05, "loss": 1.076, "step": 6892 }, { "epoch": 22.6, "grad_norm": 6.09505558013916, "learning_rate": 1.8051926634161282e-05, "loss": 1.0166, "step": 6893 }, { "epoch": 22.60327868852459, "grad_norm": 7.115209579467773, "learning_rate": 1.8051296875657928e-05, "loss": 1.1184, "step": 6894 }, { "epoch": 22.60655737704918, "grad_norm": 6.462308406829834, "learning_rate": 1.8050667026367827e-05, "loss": 0.7109, "step": 6895 }, { "epoch": 22.60983606557377, "grad_norm": 8.992422103881836, "learning_rate": 1.805003708629808e-05, "loss": 0.8757, "step": 6896 }, { "epoch": 22.613114754098362, "grad_norm": 8.277982711791992, "learning_rate": 1.8049407055455802e-05, "loss": 0.9124, "step": 6897 }, { "epoch": 22.61639344262295, "grad_norm": 6.272273540496826, "learning_rate": 1.804877693384809e-05, "loss": 0.9399, "step": 6898 }, { "epoch": 22.619672131147542, "grad_norm": 57.76407241821289, "learning_rate": 1.8048146721482044e-05, "loss": 1.0599, "step": 6899 }, { "epoch": 22.62295081967213, "grad_norm": 7.064625263214111, "learning_rate": 1.804751641836478e-05, "loss": 0.7275, "step": 6900 }, { "epoch": 22.626229508196722, "grad_norm": 6.599218845367432, "learning_rate": 1.80468860245034e-05, "loss": 0.9437, "step": 6901 }, { "epoch": 22.62950819672131, "grad_norm": 7.54197359085083, "learning_rate": 1.804625553990501e-05, "loss": 0.9561, "step": 6902 }, { "epoch": 22.632786885245903, "grad_norm": 9.145936965942383, "learning_rate": 1.8045624964576727e-05, "loss": 0.7281, "step": 6903 }, { "epoch": 22.63606557377049, "grad_norm": 9.498848915100098, "learning_rate": 1.8044994298525657e-05, "loss": 0.8898, "step": 6904 }, { "epoch": 22.639344262295083, "grad_norm": 7.9025468826293945, "learning_rate": 1.804436354175891e-05, "loss": 1.0149, "step": 6905 }, { "epoch": 22.64262295081967, "grad_norm": 8.33644962310791, "learning_rate": 1.80437326942836e-05, "loss": 0.9474, "step": 6906 }, { "epoch": 22.645901639344263, "grad_norm": 9.445107460021973, "learning_rate": 1.804310175610684e-05, "loss": 0.8195, "step": 6907 }, { "epoch": 22.64918032786885, "grad_norm": 6.649044990539551, "learning_rate": 1.8042470727235746e-05, "loss": 1.1672, "step": 6908 }, { "epoch": 22.652459016393443, "grad_norm": 6.8272705078125, "learning_rate": 1.8041839607677432e-05, "loss": 1.0554, "step": 6909 }, { "epoch": 22.65573770491803, "grad_norm": 11.602812767028809, "learning_rate": 1.8041208397439017e-05, "loss": 0.9158, "step": 6910 }, { "epoch": 22.659016393442624, "grad_norm": 6.946040630340576, "learning_rate": 1.8040577096527616e-05, "loss": 0.8632, "step": 6911 }, { "epoch": 22.662295081967212, "grad_norm": 8.362730026245117, "learning_rate": 1.803994570495035e-05, "loss": 1.0258, "step": 6912 }, { "epoch": 22.665573770491804, "grad_norm": 6.656410217285156, "learning_rate": 1.803931422271433e-05, "loss": 0.9933, "step": 6913 }, { "epoch": 22.668852459016392, "grad_norm": 5.689185619354248, "learning_rate": 1.8038682649826687e-05, "loss": 0.89, "step": 6914 }, { "epoch": 22.672131147540984, "grad_norm": 7.173707485198975, "learning_rate": 1.803805098629454e-05, "loss": 0.9527, "step": 6915 }, { "epoch": 22.675409836065572, "grad_norm": 5.9302659034729, "learning_rate": 1.803741923212501e-05, "loss": 1.0414, "step": 6916 }, { "epoch": 22.678688524590164, "grad_norm": 6.0836405754089355, "learning_rate": 1.803678738732522e-05, "loss": 0.8447, "step": 6917 }, { "epoch": 22.681967213114753, "grad_norm": 10.76025676727295, "learning_rate": 1.8036155451902298e-05, "loss": 0.6914, "step": 6918 }, { "epoch": 22.685245901639345, "grad_norm": 8.741257667541504, "learning_rate": 1.8035523425863368e-05, "loss": 0.7225, "step": 6919 }, { "epoch": 22.688524590163933, "grad_norm": 7.168607234954834, "learning_rate": 1.8034891309215555e-05, "loss": 0.9789, "step": 6920 }, { "epoch": 22.691803278688525, "grad_norm": 26.572620391845703, "learning_rate": 1.803425910196599e-05, "loss": 0.9355, "step": 6921 }, { "epoch": 22.695081967213113, "grad_norm": 6.950128555297852, "learning_rate": 1.80336268041218e-05, "loss": 1.0466, "step": 6922 }, { "epoch": 22.698360655737705, "grad_norm": 6.291616439819336, "learning_rate": 1.803299441569011e-05, "loss": 1.0972, "step": 6923 }, { "epoch": 22.701639344262293, "grad_norm": 161.67770385742188, "learning_rate": 1.8032361936678063e-05, "loss": 0.9243, "step": 6924 }, { "epoch": 22.704918032786885, "grad_norm": 11.763191223144531, "learning_rate": 1.803172936709278e-05, "loss": 0.9004, "step": 6925 }, { "epoch": 22.708196721311474, "grad_norm": 12.447731971740723, "learning_rate": 1.80310967069414e-05, "loss": 0.8507, "step": 6926 }, { "epoch": 22.711475409836066, "grad_norm": 25.991100311279297, "learning_rate": 1.803046395623105e-05, "loss": 0.9271, "step": 6927 }, { "epoch": 22.714754098360658, "grad_norm": 8.768335342407227, "learning_rate": 1.8029831114968872e-05, "loss": 1.0081, "step": 6928 }, { "epoch": 22.718032786885246, "grad_norm": 20.67568588256836, "learning_rate": 1.8029198183162e-05, "loss": 1.0796, "step": 6929 }, { "epoch": 22.721311475409838, "grad_norm": 16.084354400634766, "learning_rate": 1.8028565160817567e-05, "loss": 1.282, "step": 6930 }, { "epoch": 22.724590163934426, "grad_norm": 7.7188591957092285, "learning_rate": 1.8027932047942717e-05, "loss": 1.0029, "step": 6931 }, { "epoch": 22.727868852459018, "grad_norm": 12.375055313110352, "learning_rate": 1.8027298844544585e-05, "loss": 1.0042, "step": 6932 }, { "epoch": 22.731147540983606, "grad_norm": 9.541023254394531, "learning_rate": 1.802666555063031e-05, "loss": 1.0575, "step": 6933 }, { "epoch": 22.7344262295082, "grad_norm": 9.938715934753418, "learning_rate": 1.802603216620704e-05, "loss": 1.1034, "step": 6934 }, { "epoch": 22.737704918032787, "grad_norm": 9.143087387084961, "learning_rate": 1.802539869128191e-05, "loss": 1.0094, "step": 6935 }, { "epoch": 22.74098360655738, "grad_norm": 6.9230170249938965, "learning_rate": 1.8024765125862064e-05, "loss": 1.287, "step": 6936 }, { "epoch": 22.744262295081967, "grad_norm": 14.74780559539795, "learning_rate": 1.8024131469954652e-05, "loss": 0.8273, "step": 6937 }, { "epoch": 22.74754098360656, "grad_norm": 6.560512542724609, "learning_rate": 1.802349772356681e-05, "loss": 0.9399, "step": 6938 }, { "epoch": 22.750819672131147, "grad_norm": 6.334825038909912, "learning_rate": 1.8022863886705692e-05, "loss": 0.9019, "step": 6939 }, { "epoch": 22.75409836065574, "grad_norm": 14.367955207824707, "learning_rate": 1.8022229959378438e-05, "loss": 0.9656, "step": 6940 }, { "epoch": 22.757377049180327, "grad_norm": 13.700589179992676, "learning_rate": 1.8021595941592206e-05, "loss": 0.9104, "step": 6941 }, { "epoch": 22.76065573770492, "grad_norm": 5.583145618438721, "learning_rate": 1.8020961833354133e-05, "loss": 1.0398, "step": 6942 }, { "epoch": 22.763934426229508, "grad_norm": 13.401606559753418, "learning_rate": 1.802032763467138e-05, "loss": 0.9501, "step": 6943 }, { "epoch": 22.7672131147541, "grad_norm": 6.195559501647949, "learning_rate": 1.8019693345551093e-05, "loss": 1.1253, "step": 6944 }, { "epoch": 22.770491803278688, "grad_norm": 6.944559574127197, "learning_rate": 1.8019058966000426e-05, "loss": 1.1653, "step": 6945 }, { "epoch": 22.77377049180328, "grad_norm": 6.164637565612793, "learning_rate": 1.8018424496026528e-05, "loss": 1.0942, "step": 6946 }, { "epoch": 22.777049180327868, "grad_norm": 7.717836380004883, "learning_rate": 1.801778993563656e-05, "loss": 1.1589, "step": 6947 }, { "epoch": 22.78032786885246, "grad_norm": 17.53165626525879, "learning_rate": 1.8017155284837672e-05, "loss": 1.1581, "step": 6948 }, { "epoch": 22.78360655737705, "grad_norm": 8.270157814025879, "learning_rate": 1.8016520543637025e-05, "loss": 0.9525, "step": 6949 }, { "epoch": 22.78688524590164, "grad_norm": 8.346734046936035, "learning_rate": 1.801588571204177e-05, "loss": 1.0183, "step": 6950 }, { "epoch": 22.79016393442623, "grad_norm": 8.039310455322266, "learning_rate": 1.8015250790059075e-05, "loss": 1.1038, "step": 6951 }, { "epoch": 22.79344262295082, "grad_norm": 9.441753387451172, "learning_rate": 1.801461577769609e-05, "loss": 1.0015, "step": 6952 }, { "epoch": 22.79672131147541, "grad_norm": 8.56203842163086, "learning_rate": 1.8013980674959975e-05, "loss": 1.1034, "step": 6953 }, { "epoch": 22.8, "grad_norm": 6.995936870574951, "learning_rate": 1.8013345481857903e-05, "loss": 0.7405, "step": 6954 }, { "epoch": 22.80327868852459, "grad_norm": 8.712996482849121, "learning_rate": 1.8012710198397022e-05, "loss": 0.9034, "step": 6955 }, { "epoch": 22.80655737704918, "grad_norm": 10.733874320983887, "learning_rate": 1.801207482458451e-05, "loss": 0.7762, "step": 6956 }, { "epoch": 22.80983606557377, "grad_norm": 7.70270299911499, "learning_rate": 1.8011439360427517e-05, "loss": 1.0511, "step": 6957 }, { "epoch": 22.81311475409836, "grad_norm": 5.498237133026123, "learning_rate": 1.8010803805933217e-05, "loss": 0.9363, "step": 6958 }, { "epoch": 22.81639344262295, "grad_norm": 9.120363235473633, "learning_rate": 1.801016816110878e-05, "loss": 0.9746, "step": 6959 }, { "epoch": 22.81967213114754, "grad_norm": 9.287430763244629, "learning_rate": 1.800953242596136e-05, "loss": 0.8162, "step": 6960 }, { "epoch": 22.82295081967213, "grad_norm": 7.803481101989746, "learning_rate": 1.8008896600498142e-05, "loss": 0.8441, "step": 6961 }, { "epoch": 22.82622950819672, "grad_norm": 7.095364093780518, "learning_rate": 1.8008260684726282e-05, "loss": 1.0051, "step": 6962 }, { "epoch": 22.82950819672131, "grad_norm": 35.419471740722656, "learning_rate": 1.800762467865296e-05, "loss": 1.059, "step": 6963 }, { "epoch": 22.832786885245902, "grad_norm": 11.42062759399414, "learning_rate": 1.800698858228534e-05, "loss": 0.8802, "step": 6964 }, { "epoch": 22.83606557377049, "grad_norm": 6.975757122039795, "learning_rate": 1.8006352395630604e-05, "loss": 0.8745, "step": 6965 }, { "epoch": 22.839344262295082, "grad_norm": 7.0426025390625, "learning_rate": 1.8005716118695916e-05, "loss": 1.0337, "step": 6966 }, { "epoch": 22.84262295081967, "grad_norm": 8.363186836242676, "learning_rate": 1.8005079751488455e-05, "loss": 1.0967, "step": 6967 }, { "epoch": 22.845901639344262, "grad_norm": 12.237454414367676, "learning_rate": 1.8004443294015396e-05, "loss": 1.2556, "step": 6968 }, { "epoch": 22.84918032786885, "grad_norm": 6.167757511138916, "learning_rate": 1.800380674628392e-05, "loss": 0.8251, "step": 6969 }, { "epoch": 22.852459016393443, "grad_norm": 6.579389572143555, "learning_rate": 1.8003170108301198e-05, "loss": 0.9803, "step": 6970 }, { "epoch": 22.855737704918035, "grad_norm": 8.114043235778809, "learning_rate": 1.8002533380074413e-05, "loss": 1.0017, "step": 6971 }, { "epoch": 22.859016393442623, "grad_norm": 5.753868103027344, "learning_rate": 1.8001896561610746e-05, "loss": 0.9921, "step": 6972 }, { "epoch": 22.862295081967215, "grad_norm": 12.585323333740234, "learning_rate": 1.800125965291737e-05, "loss": 0.8103, "step": 6973 }, { "epoch": 22.865573770491803, "grad_norm": 5.987026691436768, "learning_rate": 1.8000622654001476e-05, "loss": 0.9364, "step": 6974 }, { "epoch": 22.868852459016395, "grad_norm": 6.653991222381592, "learning_rate": 1.7999985564870243e-05, "loss": 0.8986, "step": 6975 }, { "epoch": 22.872131147540983, "grad_norm": 7.477581977844238, "learning_rate": 1.799934838553085e-05, "loss": 0.8456, "step": 6976 }, { "epoch": 22.875409836065575, "grad_norm": 8.354751586914062, "learning_rate": 1.7998711115990494e-05, "loss": 0.9516, "step": 6977 }, { "epoch": 22.878688524590164, "grad_norm": 7.048787593841553, "learning_rate": 1.799807375625635e-05, "loss": 1.1466, "step": 6978 }, { "epoch": 22.881967213114756, "grad_norm": 6.539187908172607, "learning_rate": 1.7997436306335608e-05, "loss": 1.043, "step": 6979 }, { "epoch": 22.885245901639344, "grad_norm": 8.093921661376953, "learning_rate": 1.799679876623546e-05, "loss": 0.9976, "step": 6980 }, { "epoch": 22.888524590163936, "grad_norm": 9.19268798828125, "learning_rate": 1.7996161135963085e-05, "loss": 0.9553, "step": 6981 }, { "epoch": 22.891803278688524, "grad_norm": 8.630496978759766, "learning_rate": 1.7995523415525684e-05, "loss": 1.105, "step": 6982 }, { "epoch": 22.895081967213116, "grad_norm": 5.783855438232422, "learning_rate": 1.799488560493044e-05, "loss": 1.0635, "step": 6983 }, { "epoch": 22.898360655737704, "grad_norm": 6.611963748931885, "learning_rate": 1.799424770418455e-05, "loss": 0.9857, "step": 6984 }, { "epoch": 22.901639344262296, "grad_norm": 8.158426284790039, "learning_rate": 1.7993609713295204e-05, "loss": 0.9761, "step": 6985 }, { "epoch": 22.904918032786885, "grad_norm": 10.299657821655273, "learning_rate": 1.7992971632269603e-05, "loss": 1.0709, "step": 6986 }, { "epoch": 22.908196721311477, "grad_norm": 7.327781677246094, "learning_rate": 1.799233346111493e-05, "loss": 0.7892, "step": 6987 }, { "epoch": 22.911475409836065, "grad_norm": 5.930708408355713, "learning_rate": 1.7991695199838388e-05, "loss": 0.9781, "step": 6988 }, { "epoch": 22.914754098360657, "grad_norm": 6.755891799926758, "learning_rate": 1.7991056848447175e-05, "loss": 0.9044, "step": 6989 }, { "epoch": 22.918032786885245, "grad_norm": 6.282683849334717, "learning_rate": 1.7990418406948488e-05, "loss": 0.7275, "step": 6990 }, { "epoch": 22.921311475409837, "grad_norm": 4.50921630859375, "learning_rate": 1.7989779875349524e-05, "loss": 1.0657, "step": 6991 }, { "epoch": 22.924590163934425, "grad_norm": 8.640826225280762, "learning_rate": 1.7989141253657486e-05, "loss": 1.0479, "step": 6992 }, { "epoch": 22.927868852459017, "grad_norm": 5.622028350830078, "learning_rate": 1.798850254187957e-05, "loss": 1.0127, "step": 6993 }, { "epoch": 22.931147540983606, "grad_norm": 6.404699325561523, "learning_rate": 1.7987863740022985e-05, "loss": 0.8158, "step": 6994 }, { "epoch": 22.934426229508198, "grad_norm": 5.149402618408203, "learning_rate": 1.7987224848094932e-05, "loss": 0.9452, "step": 6995 }, { "epoch": 22.937704918032786, "grad_norm": 5.710061073303223, "learning_rate": 1.798658586610261e-05, "loss": 0.9926, "step": 6996 }, { "epoch": 22.940983606557378, "grad_norm": 6.897494792938232, "learning_rate": 1.7985946794053234e-05, "loss": 0.759, "step": 6997 }, { "epoch": 22.944262295081966, "grad_norm": 5.421336650848389, "learning_rate": 1.7985307631954e-05, "loss": 0.7194, "step": 6998 }, { "epoch": 22.947540983606558, "grad_norm": 5.513829708099365, "learning_rate": 1.798466837981212e-05, "loss": 0.9841, "step": 6999 }, { "epoch": 22.950819672131146, "grad_norm": 6.957974433898926, "learning_rate": 1.7984029037634804e-05, "loss": 0.9658, "step": 7000 }, { "epoch": 22.95409836065574, "grad_norm": 6.680821418762207, "learning_rate": 1.798338960542926e-05, "loss": 1.0246, "step": 7001 }, { "epoch": 22.957377049180327, "grad_norm": 6.308154106140137, "learning_rate": 1.7982750083202698e-05, "loss": 0.9573, "step": 7002 }, { "epoch": 22.96065573770492, "grad_norm": 6.809289455413818, "learning_rate": 1.7982110470962325e-05, "loss": 0.7219, "step": 7003 }, { "epoch": 22.963934426229507, "grad_norm": 7.159542083740234, "learning_rate": 1.798147076871536e-05, "loss": 1.0062, "step": 7004 }, { "epoch": 22.9672131147541, "grad_norm": 5.567622184753418, "learning_rate": 1.7980830976469015e-05, "loss": 1.0215, "step": 7005 }, { "epoch": 22.970491803278687, "grad_norm": 5.548670768737793, "learning_rate": 1.7980191094230497e-05, "loss": 0.9357, "step": 7006 }, { "epoch": 22.97377049180328, "grad_norm": 7.246184349060059, "learning_rate": 1.7979551122007035e-05, "loss": 0.9573, "step": 7007 }, { "epoch": 22.977049180327867, "grad_norm": 4.882528305053711, "learning_rate": 1.797891105980583e-05, "loss": 1.1738, "step": 7008 }, { "epoch": 22.98032786885246, "grad_norm": 6.3142619132995605, "learning_rate": 1.797827090763411e-05, "loss": 0.9823, "step": 7009 }, { "epoch": 22.983606557377048, "grad_norm": 8.273298263549805, "learning_rate": 1.797763066549909e-05, "loss": 0.9532, "step": 7010 }, { "epoch": 22.98688524590164, "grad_norm": 6.964803695678711, "learning_rate": 1.797699033340799e-05, "loss": 1.026, "step": 7011 }, { "epoch": 22.990163934426228, "grad_norm": 4.963547706604004, "learning_rate": 1.797634991136803e-05, "loss": 0.8807, "step": 7012 }, { "epoch": 22.99344262295082, "grad_norm": 4.8795294761657715, "learning_rate": 1.7975709399386432e-05, "loss": 0.9573, "step": 7013 }, { "epoch": 22.99672131147541, "grad_norm": 5.020573616027832, "learning_rate": 1.797506879747042e-05, "loss": 0.8658, "step": 7014 }, { "epoch": 23.0, "grad_norm": 7.282156944274902, "learning_rate": 1.797442810562721e-05, "loss": 0.7756, "step": 7015 }, { "epoch": 23.003278688524592, "grad_norm": 5.083094120025635, "learning_rate": 1.7973787323864035e-05, "loss": 0.7592, "step": 7016 }, { "epoch": 23.00655737704918, "grad_norm": 6.316598415374756, "learning_rate": 1.7973146452188114e-05, "loss": 0.8478, "step": 7017 }, { "epoch": 23.009836065573772, "grad_norm": 5.184215545654297, "learning_rate": 1.7972505490606682e-05, "loss": 0.8342, "step": 7018 }, { "epoch": 23.01311475409836, "grad_norm": 5.127924919128418, "learning_rate": 1.7971864439126957e-05, "loss": 0.7816, "step": 7019 }, { "epoch": 23.016393442622952, "grad_norm": 4.608743667602539, "learning_rate": 1.7971223297756172e-05, "loss": 1.109, "step": 7020 }, { "epoch": 23.01967213114754, "grad_norm": 4.939444065093994, "learning_rate": 1.7970582066501557e-05, "loss": 0.8981, "step": 7021 }, { "epoch": 23.022950819672133, "grad_norm": 6.778903961181641, "learning_rate": 1.7969940745370344e-05, "loss": 0.6761, "step": 7022 }, { "epoch": 23.02622950819672, "grad_norm": 6.761502265930176, "learning_rate": 1.796929933436976e-05, "loss": 0.8554, "step": 7023 }, { "epoch": 23.029508196721313, "grad_norm": 5.692233085632324, "learning_rate": 1.7968657833507043e-05, "loss": 0.9207, "step": 7024 }, { "epoch": 23.0327868852459, "grad_norm": 6.537456512451172, "learning_rate": 1.796801624278942e-05, "loss": 0.7425, "step": 7025 }, { "epoch": 23.036065573770493, "grad_norm": 6.909492015838623, "learning_rate": 1.796737456222413e-05, "loss": 0.8677, "step": 7026 }, { "epoch": 23.03934426229508, "grad_norm": 5.591643333435059, "learning_rate": 1.796673279181841e-05, "loss": 0.8075, "step": 7027 }, { "epoch": 23.042622950819673, "grad_norm": 5.798859596252441, "learning_rate": 1.7966090931579493e-05, "loss": 0.9333, "step": 7028 }, { "epoch": 23.04590163934426, "grad_norm": 5.178957939147949, "learning_rate": 1.7965448981514617e-05, "loss": 0.8317, "step": 7029 }, { "epoch": 23.049180327868854, "grad_norm": 4.879865646362305, "learning_rate": 1.7964806941631024e-05, "loss": 1.0223, "step": 7030 }, { "epoch": 23.052459016393442, "grad_norm": 5.519809722900391, "learning_rate": 1.796416481193595e-05, "loss": 0.8132, "step": 7031 }, { "epoch": 23.055737704918034, "grad_norm": 5.987165927886963, "learning_rate": 1.7963522592436638e-05, "loss": 1.0436, "step": 7032 }, { "epoch": 23.059016393442622, "grad_norm": 5.73972749710083, "learning_rate": 1.7962880283140328e-05, "loss": 0.8942, "step": 7033 }, { "epoch": 23.062295081967214, "grad_norm": 4.607187747955322, "learning_rate": 1.7962237884054264e-05, "loss": 1.1924, "step": 7034 }, { "epoch": 23.065573770491802, "grad_norm": 4.6068315505981445, "learning_rate": 1.7961595395185685e-05, "loss": 0.9557, "step": 7035 }, { "epoch": 23.068852459016394, "grad_norm": 5.8743157386779785, "learning_rate": 1.7960952816541847e-05, "loss": 0.816, "step": 7036 }, { "epoch": 23.072131147540983, "grad_norm": 5.832014083862305, "learning_rate": 1.796031014812998e-05, "loss": 0.8827, "step": 7037 }, { "epoch": 23.075409836065575, "grad_norm": 5.201381683349609, "learning_rate": 1.795966738995735e-05, "loss": 0.7855, "step": 7038 }, { "epoch": 23.078688524590163, "grad_norm": 7.197151184082031, "learning_rate": 1.7959024542031187e-05, "loss": 0.8198, "step": 7039 }, { "epoch": 23.081967213114755, "grad_norm": 5.344561576843262, "learning_rate": 1.795838160435875e-05, "loss": 0.8882, "step": 7040 }, { "epoch": 23.085245901639343, "grad_norm": 5.321225166320801, "learning_rate": 1.7957738576947283e-05, "loss": 0.7962, "step": 7041 }, { "epoch": 23.088524590163935, "grad_norm": 5.078014373779297, "learning_rate": 1.795709545980404e-05, "loss": 1.0656, "step": 7042 }, { "epoch": 23.091803278688523, "grad_norm": 8.132012367248535, "learning_rate": 1.7956452252936275e-05, "loss": 0.7586, "step": 7043 }, { "epoch": 23.095081967213115, "grad_norm": 7.498835563659668, "learning_rate": 1.7955808956351237e-05, "loss": 1.0084, "step": 7044 }, { "epoch": 23.098360655737704, "grad_norm": 5.573091506958008, "learning_rate": 1.7955165570056184e-05, "loss": 0.838, "step": 7045 }, { "epoch": 23.101639344262296, "grad_norm": 10.234796524047852, "learning_rate": 1.7954522094058363e-05, "loss": 1.0304, "step": 7046 }, { "epoch": 23.104918032786884, "grad_norm": 5.314974784851074, "learning_rate": 1.7953878528365035e-05, "loss": 0.8145, "step": 7047 }, { "epoch": 23.108196721311476, "grad_norm": 5.2900872230529785, "learning_rate": 1.795323487298346e-05, "loss": 0.7504, "step": 7048 }, { "epoch": 23.111475409836064, "grad_norm": 5.956100940704346, "learning_rate": 1.795259112792089e-05, "loss": 0.9513, "step": 7049 }, { "epoch": 23.114754098360656, "grad_norm": 6.318099498748779, "learning_rate": 1.7951947293184587e-05, "loss": 0.722, "step": 7050 }, { "epoch": 23.118032786885244, "grad_norm": 5.738046646118164, "learning_rate": 1.7951303368781808e-05, "loss": 1.1454, "step": 7051 }, { "epoch": 23.121311475409836, "grad_norm": 5.628579616546631, "learning_rate": 1.7950659354719823e-05, "loss": 0.9135, "step": 7052 }, { "epoch": 23.124590163934425, "grad_norm": 5.976893901824951, "learning_rate": 1.795001525100588e-05, "loss": 0.8427, "step": 7053 }, { "epoch": 23.127868852459017, "grad_norm": 5.052789211273193, "learning_rate": 1.7949371057647255e-05, "loss": 0.9475, "step": 7054 }, { "epoch": 23.131147540983605, "grad_norm": 6.455163955688477, "learning_rate": 1.79487267746512e-05, "loss": 0.9427, "step": 7055 }, { "epoch": 23.134426229508197, "grad_norm": 5.8110432624816895, "learning_rate": 1.794808240202499e-05, "loss": 0.8872, "step": 7056 }, { "epoch": 23.137704918032785, "grad_norm": 5.6614179611206055, "learning_rate": 1.7947437939775887e-05, "loss": 0.8038, "step": 7057 }, { "epoch": 23.140983606557377, "grad_norm": 5.51983642578125, "learning_rate": 1.7946793387911156e-05, "loss": 0.7538, "step": 7058 }, { "epoch": 23.14426229508197, "grad_norm": 5.900461196899414, "learning_rate": 1.794614874643807e-05, "loss": 1.0841, "step": 7059 }, { "epoch": 23.147540983606557, "grad_norm": 5.0782012939453125, "learning_rate": 1.7945504015363894e-05, "loss": 0.9254, "step": 7060 }, { "epoch": 23.15081967213115, "grad_norm": 7.2651872634887695, "learning_rate": 1.7944859194695896e-05, "loss": 0.8766, "step": 7061 }, { "epoch": 23.154098360655738, "grad_norm": 5.477117538452148, "learning_rate": 1.7944214284441353e-05, "loss": 0.6539, "step": 7062 }, { "epoch": 23.15737704918033, "grad_norm": 4.571362018585205, "learning_rate": 1.7943569284607533e-05, "loss": 0.8901, "step": 7063 }, { "epoch": 23.160655737704918, "grad_norm": 6.052667140960693, "learning_rate": 1.7942924195201707e-05, "loss": 1.0777, "step": 7064 }, { "epoch": 23.16393442622951, "grad_norm": 5.897639274597168, "learning_rate": 1.7942279016231156e-05, "loss": 0.9948, "step": 7065 }, { "epoch": 23.167213114754098, "grad_norm": 4.232841968536377, "learning_rate": 1.794163374770315e-05, "loss": 0.9303, "step": 7066 }, { "epoch": 23.17049180327869, "grad_norm": 7.640323638916016, "learning_rate": 1.7940988389624968e-05, "loss": 0.9641, "step": 7067 }, { "epoch": 23.17377049180328, "grad_norm": 11.178056716918945, "learning_rate": 1.7940342942003884e-05, "loss": 1.1096, "step": 7068 }, { "epoch": 23.17704918032787, "grad_norm": 4.799566745758057, "learning_rate": 1.7939697404847175e-05, "loss": 0.7478, "step": 7069 }, { "epoch": 23.18032786885246, "grad_norm": 6.014063835144043, "learning_rate": 1.7939051778162126e-05, "loss": 1.139, "step": 7070 }, { "epoch": 23.18360655737705, "grad_norm": 5.521416664123535, "learning_rate": 1.7938406061956012e-05, "loss": 0.9473, "step": 7071 }, { "epoch": 23.18688524590164, "grad_norm": 6.536008834838867, "learning_rate": 1.7937760256236117e-05, "loss": 0.9055, "step": 7072 }, { "epoch": 23.19016393442623, "grad_norm": 4.7888617515563965, "learning_rate": 1.793711436100972e-05, "loss": 0.9607, "step": 7073 }, { "epoch": 23.19344262295082, "grad_norm": 6.8582963943481445, "learning_rate": 1.793646837628411e-05, "loss": 0.551, "step": 7074 }, { "epoch": 23.19672131147541, "grad_norm": 5.559733867645264, "learning_rate": 1.7935822302066564e-05, "loss": 0.9915, "step": 7075 }, { "epoch": 23.2, "grad_norm": 5.029134273529053, "learning_rate": 1.793517613836437e-05, "loss": 1.0255, "step": 7076 }, { "epoch": 23.20327868852459, "grad_norm": 4.9402995109558105, "learning_rate": 1.7934529885184817e-05, "loss": 0.9297, "step": 7077 }, { "epoch": 23.20655737704918, "grad_norm": 5.605754375457764, "learning_rate": 1.7933883542535185e-05, "loss": 0.6212, "step": 7078 }, { "epoch": 23.20983606557377, "grad_norm": 5.09221887588501, "learning_rate": 1.7933237110422773e-05, "loss": 0.6515, "step": 7079 }, { "epoch": 23.21311475409836, "grad_norm": 5.322080135345459, "learning_rate": 1.793259058885486e-05, "loss": 0.8359, "step": 7080 }, { "epoch": 23.21639344262295, "grad_norm": 6.4895782470703125, "learning_rate": 1.7931943977838742e-05, "loss": 0.8018, "step": 7081 }, { "epoch": 23.21967213114754, "grad_norm": 8.048748016357422, "learning_rate": 1.793129727738171e-05, "loss": 0.5961, "step": 7082 }, { "epoch": 23.222950819672132, "grad_norm": 5.283876419067383, "learning_rate": 1.7930650487491047e-05, "loss": 0.9191, "step": 7083 }, { "epoch": 23.22622950819672, "grad_norm": 5.286937236785889, "learning_rate": 1.7930003608174062e-05, "loss": 0.8822, "step": 7084 }, { "epoch": 23.229508196721312, "grad_norm": 5.03430700302124, "learning_rate": 1.792935663943804e-05, "loss": 1.047, "step": 7085 }, { "epoch": 23.2327868852459, "grad_norm": 5.108869552612305, "learning_rate": 1.7928709581290276e-05, "loss": 0.9001, "step": 7086 }, { "epoch": 23.236065573770492, "grad_norm": 6.357734203338623, "learning_rate": 1.7928062433738065e-05, "loss": 0.7058, "step": 7087 }, { "epoch": 23.23934426229508, "grad_norm": 6.112579822540283, "learning_rate": 1.792741519678871e-05, "loss": 0.6809, "step": 7088 }, { "epoch": 23.242622950819673, "grad_norm": 5.7001776695251465, "learning_rate": 1.7926767870449507e-05, "loss": 0.6172, "step": 7089 }, { "epoch": 23.24590163934426, "grad_norm": 9.045286178588867, "learning_rate": 1.7926120454727753e-05, "loss": 0.8396, "step": 7090 }, { "epoch": 23.249180327868853, "grad_norm": 6.574052333831787, "learning_rate": 1.792547294963075e-05, "loss": 0.7508, "step": 7091 }, { "epoch": 23.25245901639344, "grad_norm": 6.109435081481934, "learning_rate": 1.79248253551658e-05, "loss": 1.0422, "step": 7092 }, { "epoch": 23.255737704918033, "grad_norm": 5.975630283355713, "learning_rate": 1.7924177671340205e-05, "loss": 0.815, "step": 7093 }, { "epoch": 23.25901639344262, "grad_norm": 4.779273986816406, "learning_rate": 1.7923529898161263e-05, "loss": 0.9724, "step": 7094 }, { "epoch": 23.262295081967213, "grad_norm": 5.1238579750061035, "learning_rate": 1.7922882035636287e-05, "loss": 0.7042, "step": 7095 }, { "epoch": 23.2655737704918, "grad_norm": 5.83358097076416, "learning_rate": 1.7922234083772577e-05, "loss": 0.8568, "step": 7096 }, { "epoch": 23.268852459016394, "grad_norm": 5.95493745803833, "learning_rate": 1.7921586042577442e-05, "loss": 0.6868, "step": 7097 }, { "epoch": 23.272131147540982, "grad_norm": 5.635855674743652, "learning_rate": 1.7920937912058187e-05, "loss": 0.8154, "step": 7098 }, { "epoch": 23.275409836065574, "grad_norm": 4.934078693389893, "learning_rate": 1.7920289692222123e-05, "loss": 0.9946, "step": 7099 }, { "epoch": 23.278688524590162, "grad_norm": 6.324696063995361, "learning_rate": 1.791964138307656e-05, "loss": 1.021, "step": 7100 }, { "epoch": 23.281967213114754, "grad_norm": 5.196893692016602, "learning_rate": 1.7918992984628798e-05, "loss": 0.8206, "step": 7101 }, { "epoch": 23.285245901639342, "grad_norm": 5.798224449157715, "learning_rate": 1.791834449688616e-05, "loss": 0.9056, "step": 7102 }, { "epoch": 23.288524590163934, "grad_norm": 6.180045127868652, "learning_rate": 1.7917695919855957e-05, "loss": 0.6725, "step": 7103 }, { "epoch": 23.291803278688526, "grad_norm": 6.0213236808776855, "learning_rate": 1.79170472535455e-05, "loss": 0.9142, "step": 7104 }, { "epoch": 23.295081967213115, "grad_norm": 6.605349063873291, "learning_rate": 1.79163984979621e-05, "loss": 0.8021, "step": 7105 }, { "epoch": 23.298360655737707, "grad_norm": 5.941660404205322, "learning_rate": 1.7915749653113078e-05, "loss": 1.0454, "step": 7106 }, { "epoch": 23.301639344262295, "grad_norm": 5.029720306396484, "learning_rate": 1.7915100719005748e-05, "loss": 0.9877, "step": 7107 }, { "epoch": 23.304918032786887, "grad_norm": 5.702342510223389, "learning_rate": 1.791445169564743e-05, "loss": 0.8264, "step": 7108 }, { "epoch": 23.308196721311475, "grad_norm": 5.065276145935059, "learning_rate": 1.791380258304544e-05, "loss": 0.7712, "step": 7109 }, { "epoch": 23.311475409836067, "grad_norm": 5.246519088745117, "learning_rate": 1.7913153381207095e-05, "loss": 0.8771, "step": 7110 }, { "epoch": 23.314754098360655, "grad_norm": 5.343735218048096, "learning_rate": 1.791250409013972e-05, "loss": 0.7465, "step": 7111 }, { "epoch": 23.318032786885247, "grad_norm": 5.377294063568115, "learning_rate": 1.791185470985063e-05, "loss": 1.0156, "step": 7112 }, { "epoch": 23.321311475409836, "grad_norm": 12.556446075439453, "learning_rate": 1.7911205240347157e-05, "loss": 0.7271, "step": 7113 }, { "epoch": 23.324590163934428, "grad_norm": 4.875969409942627, "learning_rate": 1.7910555681636616e-05, "loss": 0.8408, "step": 7114 }, { "epoch": 23.327868852459016, "grad_norm": 5.287415981292725, "learning_rate": 1.7909906033726337e-05, "loss": 0.9709, "step": 7115 }, { "epoch": 23.331147540983608, "grad_norm": 5.5266804695129395, "learning_rate": 1.790925629662364e-05, "loss": 0.8611, "step": 7116 }, { "epoch": 23.334426229508196, "grad_norm": 5.309285640716553, "learning_rate": 1.790860647033586e-05, "loss": 0.826, "step": 7117 }, { "epoch": 23.337704918032788, "grad_norm": 13.638751029968262, "learning_rate": 1.790795655487032e-05, "loss": 0.6729, "step": 7118 }, { "epoch": 23.340983606557376, "grad_norm": 6.754258632659912, "learning_rate": 1.790730655023434e-05, "loss": 0.9121, "step": 7119 }, { "epoch": 23.34426229508197, "grad_norm": 5.893278121948242, "learning_rate": 1.7906656456435263e-05, "loss": 0.9271, "step": 7120 }, { "epoch": 23.347540983606557, "grad_norm": 5.964935779571533, "learning_rate": 1.790600627348041e-05, "loss": 0.7313, "step": 7121 }, { "epoch": 23.35081967213115, "grad_norm": 5.098182201385498, "learning_rate": 1.790535600137712e-05, "loss": 0.8405, "step": 7122 }, { "epoch": 23.354098360655737, "grad_norm": 5.1277360916137695, "learning_rate": 1.7904705640132717e-05, "loss": 0.8986, "step": 7123 }, { "epoch": 23.35737704918033, "grad_norm": 6.591854095458984, "learning_rate": 1.7904055189754544e-05, "loss": 0.7115, "step": 7124 }, { "epoch": 23.360655737704917, "grad_norm": 6.162096977233887, "learning_rate": 1.790340465024993e-05, "loss": 0.8249, "step": 7125 }, { "epoch": 23.36393442622951, "grad_norm": 5.150374889373779, "learning_rate": 1.7902754021626206e-05, "loss": 0.9124, "step": 7126 }, { "epoch": 23.367213114754097, "grad_norm": 6.033200740814209, "learning_rate": 1.790210330389072e-05, "loss": 0.9199, "step": 7127 }, { "epoch": 23.37049180327869, "grad_norm": 6.033899784088135, "learning_rate": 1.79014524970508e-05, "loss": 1.0018, "step": 7128 }, { "epoch": 23.373770491803278, "grad_norm": 4.369616508483887, "learning_rate": 1.7900801601113786e-05, "loss": 0.9727, "step": 7129 }, { "epoch": 23.37704918032787, "grad_norm": 6.088437080383301, "learning_rate": 1.790015061608702e-05, "loss": 0.9431, "step": 7130 }, { "epoch": 23.380327868852458, "grad_norm": 5.190618991851807, "learning_rate": 1.7899499541977844e-05, "loss": 0.9047, "step": 7131 }, { "epoch": 23.38360655737705, "grad_norm": 5.2790422439575195, "learning_rate": 1.7898848378793595e-05, "loss": 0.9462, "step": 7132 }, { "epoch": 23.386885245901638, "grad_norm": 4.764057636260986, "learning_rate": 1.789819712654162e-05, "loss": 0.6395, "step": 7133 }, { "epoch": 23.39016393442623, "grad_norm": 4.602517604827881, "learning_rate": 1.789754578522926e-05, "loss": 1.0829, "step": 7134 }, { "epoch": 23.39344262295082, "grad_norm": 5.7218852043151855, "learning_rate": 1.7896894354863855e-05, "loss": 0.7672, "step": 7135 }, { "epoch": 23.39672131147541, "grad_norm": 5.534334659576416, "learning_rate": 1.789624283545276e-05, "loss": 0.7474, "step": 7136 }, { "epoch": 23.4, "grad_norm": 5.412461280822754, "learning_rate": 1.7895591227003316e-05, "loss": 0.8162, "step": 7137 }, { "epoch": 23.40327868852459, "grad_norm": 6.817436218261719, "learning_rate": 1.789493952952287e-05, "loss": 0.7834, "step": 7138 }, { "epoch": 23.40655737704918, "grad_norm": 5.631534099578857, "learning_rate": 1.7894287743018775e-05, "loss": 0.7712, "step": 7139 }, { "epoch": 23.40983606557377, "grad_norm": 12.417404174804688, "learning_rate": 1.7893635867498378e-05, "loss": 0.8722, "step": 7140 }, { "epoch": 23.41311475409836, "grad_norm": 5.648946285247803, "learning_rate": 1.7892983902969028e-05, "loss": 0.823, "step": 7141 }, { "epoch": 23.41639344262295, "grad_norm": 5.4816813468933105, "learning_rate": 1.7892331849438077e-05, "loss": 0.9642, "step": 7142 }, { "epoch": 23.41967213114754, "grad_norm": 5.6113176345825195, "learning_rate": 1.789167970691288e-05, "loss": 0.9843, "step": 7143 }, { "epoch": 23.42295081967213, "grad_norm": 4.9730143547058105, "learning_rate": 1.7891027475400785e-05, "loss": 0.8708, "step": 7144 }, { "epoch": 23.42622950819672, "grad_norm": 5.218611717224121, "learning_rate": 1.7890375154909155e-05, "loss": 0.7847, "step": 7145 }, { "epoch": 23.42950819672131, "grad_norm": 6.855926513671875, "learning_rate": 1.788972274544534e-05, "loss": 1.0122, "step": 7146 }, { "epoch": 23.432786885245903, "grad_norm": 5.264576435089111, "learning_rate": 1.7889070247016697e-05, "loss": 0.8169, "step": 7147 }, { "epoch": 23.43606557377049, "grad_norm": 6.22582483291626, "learning_rate": 1.7888417659630587e-05, "loss": 0.8013, "step": 7148 }, { "epoch": 23.439344262295084, "grad_norm": 6.01300048828125, "learning_rate": 1.7887764983294365e-05, "loss": 0.7563, "step": 7149 }, { "epoch": 23.442622950819672, "grad_norm": 5.253568172454834, "learning_rate": 1.7887112218015387e-05, "loss": 0.7482, "step": 7150 }, { "epoch": 23.445901639344264, "grad_norm": 6.066300392150879, "learning_rate": 1.7886459363801024e-05, "loss": 0.9684, "step": 7151 }, { "epoch": 23.449180327868852, "grad_norm": 6.563246250152588, "learning_rate": 1.788580642065863e-05, "loss": 0.8574, "step": 7152 }, { "epoch": 23.452459016393444, "grad_norm": 4.967577934265137, "learning_rate": 1.788515338859557e-05, "loss": 1.0452, "step": 7153 }, { "epoch": 23.455737704918032, "grad_norm": 6.668243408203125, "learning_rate": 1.7884500267619204e-05, "loss": 0.7485, "step": 7154 }, { "epoch": 23.459016393442624, "grad_norm": 6.428305625915527, "learning_rate": 1.78838470577369e-05, "loss": 0.9894, "step": 7155 }, { "epoch": 23.462295081967213, "grad_norm": 6.6913042068481445, "learning_rate": 1.7883193758956028e-05, "loss": 0.9202, "step": 7156 }, { "epoch": 23.465573770491805, "grad_norm": 4.956277370452881, "learning_rate": 1.7882540371283946e-05, "loss": 0.7499, "step": 7157 }, { "epoch": 23.468852459016393, "grad_norm": 5.642459869384766, "learning_rate": 1.7881886894728028e-05, "loss": 0.8129, "step": 7158 }, { "epoch": 23.472131147540985, "grad_norm": 4.914809703826904, "learning_rate": 1.7881233329295637e-05, "loss": 0.9293, "step": 7159 }, { "epoch": 23.475409836065573, "grad_norm": 6.4558892250061035, "learning_rate": 1.7880579674994147e-05, "loss": 0.8569, "step": 7160 }, { "epoch": 23.478688524590165, "grad_norm": 6.583141326904297, "learning_rate": 1.787992593183093e-05, "loss": 0.8542, "step": 7161 }, { "epoch": 23.481967213114753, "grad_norm": 4.117868423461914, "learning_rate": 1.7879272099813353e-05, "loss": 1.1473, "step": 7162 }, { "epoch": 23.485245901639345, "grad_norm": 5.713847637176514, "learning_rate": 1.787861817894879e-05, "loss": 1.0862, "step": 7163 }, { "epoch": 23.488524590163934, "grad_norm": 6.607454299926758, "learning_rate": 1.7877964169244615e-05, "loss": 0.7856, "step": 7164 }, { "epoch": 23.491803278688526, "grad_norm": 4.9527130126953125, "learning_rate": 1.7877310070708206e-05, "loss": 0.7823, "step": 7165 }, { "epoch": 23.495081967213114, "grad_norm": 48.8953742980957, "learning_rate": 1.787665588334693e-05, "loss": 0.7711, "step": 7166 }, { "epoch": 23.498360655737706, "grad_norm": 6.193827152252197, "learning_rate": 1.7876001607168178e-05, "loss": 0.8608, "step": 7167 }, { "epoch": 23.501639344262294, "grad_norm": 5.443946838378906, "learning_rate": 1.787534724217931e-05, "loss": 0.7436, "step": 7168 }, { "epoch": 23.504918032786886, "grad_norm": 6.060565948486328, "learning_rate": 1.7874692788387718e-05, "loss": 0.7812, "step": 7169 }, { "epoch": 23.508196721311474, "grad_norm": 4.427140712738037, "learning_rate": 1.7874038245800775e-05, "loss": 0.9424, "step": 7170 }, { "epoch": 23.511475409836066, "grad_norm": 5.251713275909424, "learning_rate": 1.7873383614425866e-05, "loss": 0.8104, "step": 7171 }, { "epoch": 23.514754098360655, "grad_norm": 6.357522487640381, "learning_rate": 1.787272889427037e-05, "loss": 1.0026, "step": 7172 }, { "epoch": 23.518032786885247, "grad_norm": 5.624875068664551, "learning_rate": 1.787207408534167e-05, "loss": 0.6595, "step": 7173 }, { "epoch": 23.521311475409835, "grad_norm": 5.906907558441162, "learning_rate": 1.787141918764715e-05, "loss": 0.7804, "step": 7174 }, { "epoch": 23.524590163934427, "grad_norm": 5.984419822692871, "learning_rate": 1.7870764201194194e-05, "loss": 1.0435, "step": 7175 }, { "epoch": 23.527868852459015, "grad_norm": 5.121947288513184, "learning_rate": 1.7870109125990186e-05, "loss": 1.0911, "step": 7176 }, { "epoch": 23.531147540983607, "grad_norm": 5.3209733963012695, "learning_rate": 1.786945396204252e-05, "loss": 0.7712, "step": 7177 }, { "epoch": 23.534426229508195, "grad_norm": 6.065683841705322, "learning_rate": 1.7868798709358572e-05, "loss": 0.8334, "step": 7178 }, { "epoch": 23.537704918032787, "grad_norm": 6.086121559143066, "learning_rate": 1.7868143367945742e-05, "loss": 0.9099, "step": 7179 }, { "epoch": 23.540983606557376, "grad_norm": 5.1363115310668945, "learning_rate": 1.7867487937811412e-05, "loss": 1.0165, "step": 7180 }, { "epoch": 23.544262295081968, "grad_norm": 4.346246242523193, "learning_rate": 1.7866832418962978e-05, "loss": 1.0017, "step": 7181 }, { "epoch": 23.547540983606556, "grad_norm": 5.022402763366699, "learning_rate": 1.786617681140783e-05, "loss": 0.9281, "step": 7182 }, { "epoch": 23.550819672131148, "grad_norm": 6.098505973815918, "learning_rate": 1.7865521115153354e-05, "loss": 0.863, "step": 7183 }, { "epoch": 23.554098360655736, "grad_norm": 5.756143093109131, "learning_rate": 1.786486533020695e-05, "loss": 1.051, "step": 7184 }, { "epoch": 23.557377049180328, "grad_norm": 6.744861125946045, "learning_rate": 1.786420945657602e-05, "loss": 0.4473, "step": 7185 }, { "epoch": 23.560655737704916, "grad_norm": 8.945691108703613, "learning_rate": 1.786355349426794e-05, "loss": 1.0156, "step": 7186 }, { "epoch": 23.56393442622951, "grad_norm": 6.798580169677734, "learning_rate": 1.786289744329013e-05, "loss": 0.7083, "step": 7187 }, { "epoch": 23.567213114754097, "grad_norm": 5.24274206161499, "learning_rate": 1.7862241303649967e-05, "loss": 0.8492, "step": 7188 }, { "epoch": 23.57049180327869, "grad_norm": 5.930305480957031, "learning_rate": 1.786158507535486e-05, "loss": 1.0226, "step": 7189 }, { "epoch": 23.57377049180328, "grad_norm": 5.704438209533691, "learning_rate": 1.786092875841221e-05, "loss": 0.6661, "step": 7190 }, { "epoch": 23.57704918032787, "grad_norm": 5.476617813110352, "learning_rate": 1.7860272352829415e-05, "loss": 0.9481, "step": 7191 }, { "epoch": 23.58032786885246, "grad_norm": 44.285037994384766, "learning_rate": 1.785961585861388e-05, "loss": 0.729, "step": 7192 }, { "epoch": 23.58360655737705, "grad_norm": 5.865227699279785, "learning_rate": 1.7858959275772997e-05, "loss": 0.8196, "step": 7193 }, { "epoch": 23.58688524590164, "grad_norm": 6.066885948181152, "learning_rate": 1.7858302604314183e-05, "loss": 0.881, "step": 7194 }, { "epoch": 23.59016393442623, "grad_norm": 5.442193031311035, "learning_rate": 1.785764584424483e-05, "loss": 1.1399, "step": 7195 }, { "epoch": 23.59344262295082, "grad_norm": 6.721230506896973, "learning_rate": 1.7856988995572354e-05, "loss": 0.8657, "step": 7196 }, { "epoch": 23.59672131147541, "grad_norm": 8.064674377441406, "learning_rate": 1.785633205830416e-05, "loss": 0.8495, "step": 7197 }, { "epoch": 23.6, "grad_norm": 4.916460990905762, "learning_rate": 1.7855675032447648e-05, "loss": 0.875, "step": 7198 }, { "epoch": 23.60327868852459, "grad_norm": 10.991976737976074, "learning_rate": 1.7855017918010237e-05, "loss": 0.9883, "step": 7199 }, { "epoch": 23.60655737704918, "grad_norm": 5.644162654876709, "learning_rate": 1.785436071499933e-05, "loss": 0.8502, "step": 7200 }, { "epoch": 23.60983606557377, "grad_norm": 5.483100891113281, "learning_rate": 1.7853703423422337e-05, "loss": 0.7968, "step": 7201 }, { "epoch": 23.613114754098362, "grad_norm": 7.046675205230713, "learning_rate": 1.7853046043286676e-05, "loss": 0.9018, "step": 7202 }, { "epoch": 23.61639344262295, "grad_norm": 6.375696182250977, "learning_rate": 1.7852388574599754e-05, "loss": 0.6629, "step": 7203 }, { "epoch": 23.619672131147542, "grad_norm": 23.16229248046875, "learning_rate": 1.7851731017368985e-05, "loss": 0.9155, "step": 7204 }, { "epoch": 23.62295081967213, "grad_norm": 8.316524505615234, "learning_rate": 1.7851073371601786e-05, "loss": 0.8386, "step": 7205 }, { "epoch": 23.626229508196722, "grad_norm": 7.042294025421143, "learning_rate": 1.7850415637305572e-05, "loss": 0.9961, "step": 7206 }, { "epoch": 23.62950819672131, "grad_norm": 7.826927661895752, "learning_rate": 1.784975781448776e-05, "loss": 0.7024, "step": 7207 }, { "epoch": 23.632786885245903, "grad_norm": 8.21361255645752, "learning_rate": 1.784909990315577e-05, "loss": 0.9597, "step": 7208 }, { "epoch": 23.63606557377049, "grad_norm": 8.087798118591309, "learning_rate": 1.784844190331701e-05, "loss": 0.9991, "step": 7209 }, { "epoch": 23.639344262295083, "grad_norm": 5.206655979156494, "learning_rate": 1.7847783814978916e-05, "loss": 0.9496, "step": 7210 }, { "epoch": 23.64262295081967, "grad_norm": 6.535554885864258, "learning_rate": 1.7847125638148892e-05, "loss": 0.8065, "step": 7211 }, { "epoch": 23.645901639344263, "grad_norm": 5.820077896118164, "learning_rate": 1.784646737283437e-05, "loss": 0.8727, "step": 7212 }, { "epoch": 23.64918032786885, "grad_norm": 6.0091447830200195, "learning_rate": 1.7845809019042774e-05, "loss": 1.0409, "step": 7213 }, { "epoch": 23.652459016393443, "grad_norm": 6.855261325836182, "learning_rate": 1.784515057678152e-05, "loss": 0.7582, "step": 7214 }, { "epoch": 23.65573770491803, "grad_norm": 6.530843257904053, "learning_rate": 1.7844492046058036e-05, "loss": 0.9894, "step": 7215 }, { "epoch": 23.659016393442624, "grad_norm": 5.397341728210449, "learning_rate": 1.7843833426879747e-05, "loss": 1.0663, "step": 7216 }, { "epoch": 23.662295081967212, "grad_norm": 6.691868305206299, "learning_rate": 1.7843174719254084e-05, "loss": 0.9512, "step": 7217 }, { "epoch": 23.665573770491804, "grad_norm": 9.284786224365234, "learning_rate": 1.784251592318847e-05, "loss": 0.6516, "step": 7218 }, { "epoch": 23.668852459016392, "grad_norm": 5.570718765258789, "learning_rate": 1.7841857038690332e-05, "loss": 0.8029, "step": 7219 }, { "epoch": 23.672131147540984, "grad_norm": 7.744438171386719, "learning_rate": 1.7841198065767107e-05, "loss": 0.7647, "step": 7220 }, { "epoch": 23.675409836065572, "grad_norm": 6.681369304656982, "learning_rate": 1.7840539004426218e-05, "loss": 1.0065, "step": 7221 }, { "epoch": 23.678688524590164, "grad_norm": 8.420952796936035, "learning_rate": 1.7839879854675103e-05, "loss": 0.9326, "step": 7222 }, { "epoch": 23.681967213114753, "grad_norm": 8.813811302185059, "learning_rate": 1.7839220616521186e-05, "loss": 0.7502, "step": 7223 }, { "epoch": 23.685245901639345, "grad_norm": 5.611009120941162, "learning_rate": 1.783856128997191e-05, "loss": 0.9804, "step": 7224 }, { "epoch": 23.688524590163933, "grad_norm": 6.3586859703063965, "learning_rate": 1.78379018750347e-05, "loss": 0.8214, "step": 7225 }, { "epoch": 23.691803278688525, "grad_norm": 6.671968936920166, "learning_rate": 1.7837242371717e-05, "loss": 0.929, "step": 7226 }, { "epoch": 23.695081967213113, "grad_norm": 5.211374282836914, "learning_rate": 1.7836582780026246e-05, "loss": 0.7883, "step": 7227 }, { "epoch": 23.698360655737705, "grad_norm": 4.858123779296875, "learning_rate": 1.783592309996987e-05, "loss": 0.947, "step": 7228 }, { "epoch": 23.701639344262293, "grad_norm": 7.166254997253418, "learning_rate": 1.7835263331555317e-05, "loss": 0.6528, "step": 7229 }, { "epoch": 23.704918032786885, "grad_norm": 6.093628406524658, "learning_rate": 1.783460347479002e-05, "loss": 1.0619, "step": 7230 }, { "epoch": 23.708196721311474, "grad_norm": 6.484720230102539, "learning_rate": 1.7833943529681425e-05, "loss": 0.6808, "step": 7231 }, { "epoch": 23.711475409836066, "grad_norm": 5.838448524475098, "learning_rate": 1.7833283496236974e-05, "loss": 0.8058, "step": 7232 }, { "epoch": 23.714754098360658, "grad_norm": 8.052468299865723, "learning_rate": 1.7832623374464103e-05, "loss": 0.8743, "step": 7233 }, { "epoch": 23.718032786885246, "grad_norm": 5.676220893859863, "learning_rate": 1.7831963164370257e-05, "loss": 0.713, "step": 7234 }, { "epoch": 23.721311475409838, "grad_norm": 5.610249042510986, "learning_rate": 1.783130286596289e-05, "loss": 0.8665, "step": 7235 }, { "epoch": 23.724590163934426, "grad_norm": 5.486862659454346, "learning_rate": 1.7830642479249436e-05, "loss": 0.9653, "step": 7236 }, { "epoch": 23.727868852459018, "grad_norm": 5.975827217102051, "learning_rate": 1.7829982004237348e-05, "loss": 0.5936, "step": 7237 }, { "epoch": 23.731147540983606, "grad_norm": 5.78178596496582, "learning_rate": 1.782932144093407e-05, "loss": 0.8949, "step": 7238 }, { "epoch": 23.7344262295082, "grad_norm": 30.482622146606445, "learning_rate": 1.7828660789347054e-05, "loss": 0.8098, "step": 7239 }, { "epoch": 23.737704918032787, "grad_norm": 7.399083614349365, "learning_rate": 1.7828000049483745e-05, "loss": 0.8761, "step": 7240 }, { "epoch": 23.74098360655738, "grad_norm": 5.280264377593994, "learning_rate": 1.7827339221351598e-05, "loss": 0.8055, "step": 7241 }, { "epoch": 23.744262295081967, "grad_norm": 6.54689884185791, "learning_rate": 1.7826678304958065e-05, "loss": 0.8798, "step": 7242 }, { "epoch": 23.74754098360656, "grad_norm": 6.145811080932617, "learning_rate": 1.7826017300310593e-05, "loss": 0.7145, "step": 7243 }, { "epoch": 23.750819672131147, "grad_norm": 5.228785514831543, "learning_rate": 1.782535620741664e-05, "loss": 0.9635, "step": 7244 }, { "epoch": 23.75409836065574, "grad_norm": 5.245724201202393, "learning_rate": 1.782469502628366e-05, "loss": 0.819, "step": 7245 }, { "epoch": 23.757377049180327, "grad_norm": 5.269730567932129, "learning_rate": 1.7824033756919112e-05, "loss": 0.931, "step": 7246 }, { "epoch": 23.76065573770492, "grad_norm": 5.231870651245117, "learning_rate": 1.7823372399330443e-05, "loss": 1.0475, "step": 7247 }, { "epoch": 23.763934426229508, "grad_norm": 5.128109455108643, "learning_rate": 1.782271095352512e-05, "loss": 0.6157, "step": 7248 }, { "epoch": 23.7672131147541, "grad_norm": 6.075936794281006, "learning_rate": 1.7822049419510594e-05, "loss": 0.8798, "step": 7249 }, { "epoch": 23.770491803278688, "grad_norm": 6.8730950355529785, "learning_rate": 1.782138779729433e-05, "loss": 0.8035, "step": 7250 }, { "epoch": 23.77377049180328, "grad_norm": 5.813043594360352, "learning_rate": 1.7820726086883783e-05, "loss": 1.0256, "step": 7251 }, { "epoch": 23.777049180327868, "grad_norm": 5.371934413909912, "learning_rate": 1.782006428828642e-05, "loss": 0.9772, "step": 7252 }, { "epoch": 23.78032786885246, "grad_norm": 6.044444561004639, "learning_rate": 1.7819402401509703e-05, "loss": 0.808, "step": 7253 }, { "epoch": 23.78360655737705, "grad_norm": 5.758303642272949, "learning_rate": 1.781874042656109e-05, "loss": 1.0267, "step": 7254 }, { "epoch": 23.78688524590164, "grad_norm": 5.58863639831543, "learning_rate": 1.7818078363448053e-05, "loss": 0.9062, "step": 7255 }, { "epoch": 23.79016393442623, "grad_norm": 5.123717784881592, "learning_rate": 1.781741621217805e-05, "loss": 0.9067, "step": 7256 }, { "epoch": 23.79344262295082, "grad_norm": 5.925631999969482, "learning_rate": 1.7816753972758552e-05, "loss": 0.7485, "step": 7257 }, { "epoch": 23.79672131147541, "grad_norm": 5.376339435577393, "learning_rate": 1.781609164519703e-05, "loss": 0.9741, "step": 7258 }, { "epoch": 23.8, "grad_norm": 5.484385013580322, "learning_rate": 1.7815429229500946e-05, "loss": 1.2494, "step": 7259 }, { "epoch": 23.80327868852459, "grad_norm": 6.808892726898193, "learning_rate": 1.781476672567777e-05, "loss": 0.9325, "step": 7260 }, { "epoch": 23.80655737704918, "grad_norm": 5.145605087280273, "learning_rate": 1.7814104133734976e-05, "loss": 0.9594, "step": 7261 }, { "epoch": 23.80983606557377, "grad_norm": 8.841425895690918, "learning_rate": 1.781344145368003e-05, "loss": 0.75, "step": 7262 }, { "epoch": 23.81311475409836, "grad_norm": 6.245835781097412, "learning_rate": 1.781277868552041e-05, "loss": 1.0521, "step": 7263 }, { "epoch": 23.81639344262295, "grad_norm": 7.9889397621154785, "learning_rate": 1.7812115829263585e-05, "loss": 0.5693, "step": 7264 }, { "epoch": 23.81967213114754, "grad_norm": 5.536527156829834, "learning_rate": 1.781145288491703e-05, "loss": 0.7974, "step": 7265 }, { "epoch": 23.82295081967213, "grad_norm": 5.309640884399414, "learning_rate": 1.7810789852488225e-05, "loss": 0.9541, "step": 7266 }, { "epoch": 23.82622950819672, "grad_norm": 4.999553680419922, "learning_rate": 1.781012673198464e-05, "loss": 0.9096, "step": 7267 }, { "epoch": 23.82950819672131, "grad_norm": 6.97667121887207, "learning_rate": 1.7809463523413762e-05, "loss": 0.866, "step": 7268 }, { "epoch": 23.832786885245902, "grad_norm": 4.7223310470581055, "learning_rate": 1.7808800226783058e-05, "loss": 0.8047, "step": 7269 }, { "epoch": 23.83606557377049, "grad_norm": 30.160106658935547, "learning_rate": 1.7808136842100015e-05, "loss": 0.9594, "step": 7270 }, { "epoch": 23.839344262295082, "grad_norm": 6.983041763305664, "learning_rate": 1.780747336937211e-05, "loss": 1.2209, "step": 7271 }, { "epoch": 23.84262295081967, "grad_norm": 5.220701217651367, "learning_rate": 1.7806809808606825e-05, "loss": 0.9276, "step": 7272 }, { "epoch": 23.845901639344262, "grad_norm": 4.789504051208496, "learning_rate": 1.780614615981164e-05, "loss": 1.1671, "step": 7273 }, { "epoch": 23.84918032786885, "grad_norm": 12.39383602142334, "learning_rate": 1.7805482422994042e-05, "loss": 0.7114, "step": 7274 }, { "epoch": 23.852459016393443, "grad_norm": 5.535405158996582, "learning_rate": 1.7804818598161517e-05, "loss": 0.8703, "step": 7275 }, { "epoch": 23.855737704918035, "grad_norm": 7.919665813446045, "learning_rate": 1.7804154685321543e-05, "loss": 0.8048, "step": 7276 }, { "epoch": 23.859016393442623, "grad_norm": 6.528443813323975, "learning_rate": 1.780349068448161e-05, "loss": 0.9738, "step": 7277 }, { "epoch": 23.862295081967215, "grad_norm": 4.928613662719727, "learning_rate": 1.780282659564921e-05, "loss": 0.9202, "step": 7278 }, { "epoch": 23.865573770491803, "grad_norm": 6.925018787384033, "learning_rate": 1.7802162418831826e-05, "loss": 0.9849, "step": 7279 }, { "epoch": 23.868852459016395, "grad_norm": 5.206142425537109, "learning_rate": 1.7801498154036946e-05, "loss": 0.8277, "step": 7280 }, { "epoch": 23.872131147540983, "grad_norm": 5.906132698059082, "learning_rate": 1.7800833801272064e-05, "loss": 0.8558, "step": 7281 }, { "epoch": 23.875409836065575, "grad_norm": 6.5988874435424805, "learning_rate": 1.780016936054467e-05, "loss": 0.7033, "step": 7282 }, { "epoch": 23.878688524590164, "grad_norm": 6.231072902679443, "learning_rate": 1.779950483186226e-05, "loss": 0.7179, "step": 7283 }, { "epoch": 23.881967213114756, "grad_norm": 6.703927993774414, "learning_rate": 1.779884021523232e-05, "loss": 0.8733, "step": 7284 }, { "epoch": 23.885245901639344, "grad_norm": 5.681050777435303, "learning_rate": 1.779817551066235e-05, "loss": 0.9919, "step": 7285 }, { "epoch": 23.888524590163936, "grad_norm": 5.478031635284424, "learning_rate": 1.779751071815984e-05, "loss": 0.8367, "step": 7286 }, { "epoch": 23.891803278688524, "grad_norm": 5.730085849761963, "learning_rate": 1.779684583773229e-05, "loss": 1.0596, "step": 7287 }, { "epoch": 23.895081967213116, "grad_norm": 7.274540901184082, "learning_rate": 1.77961808693872e-05, "loss": 0.7262, "step": 7288 }, { "epoch": 23.898360655737704, "grad_norm": 7.687193870544434, "learning_rate": 1.7795515813132063e-05, "loss": 0.7417, "step": 7289 }, { "epoch": 23.901639344262296, "grad_norm": 5.804431915283203, "learning_rate": 1.7794850668974378e-05, "loss": 0.917, "step": 7290 }, { "epoch": 23.904918032786885, "grad_norm": 5.565606594085693, "learning_rate": 1.7794185436921646e-05, "loss": 0.973, "step": 7291 }, { "epoch": 23.908196721311477, "grad_norm": 5.415492057800293, "learning_rate": 1.7793520116981372e-05, "loss": 0.8933, "step": 7292 }, { "epoch": 23.911475409836065, "grad_norm": 6.604628086090088, "learning_rate": 1.7792854709161057e-05, "loss": 0.8784, "step": 7293 }, { "epoch": 23.914754098360657, "grad_norm": 5.39424991607666, "learning_rate": 1.7792189213468195e-05, "loss": 1.0666, "step": 7294 }, { "epoch": 23.918032786885245, "grad_norm": 6.267399311065674, "learning_rate": 1.7791523629910305e-05, "loss": 0.6719, "step": 7295 }, { "epoch": 23.921311475409837, "grad_norm": 5.075406551361084, "learning_rate": 1.7790857958494885e-05, "loss": 0.8053, "step": 7296 }, { "epoch": 23.924590163934425, "grad_norm": 5.056168556213379, "learning_rate": 1.779019219922944e-05, "loss": 0.9753, "step": 7297 }, { "epoch": 23.927868852459017, "grad_norm": 5.690150260925293, "learning_rate": 1.7789526352121477e-05, "loss": 0.7535, "step": 7298 }, { "epoch": 23.931147540983606, "grad_norm": 6.132216453552246, "learning_rate": 1.7788860417178508e-05, "loss": 1.0894, "step": 7299 }, { "epoch": 23.934426229508198, "grad_norm": 4.872580051422119, "learning_rate": 1.7788194394408034e-05, "loss": 0.936, "step": 7300 }, { "epoch": 23.937704918032786, "grad_norm": 4.89820671081543, "learning_rate": 1.7787528283817575e-05, "loss": 0.749, "step": 7301 }, { "epoch": 23.940983606557378, "grad_norm": 5.503674507141113, "learning_rate": 1.7786862085414633e-05, "loss": 0.9415, "step": 7302 }, { "epoch": 23.944262295081966, "grad_norm": 5.397604465484619, "learning_rate": 1.778619579920673e-05, "loss": 0.9115, "step": 7303 }, { "epoch": 23.947540983606558, "grad_norm": 7.094313144683838, "learning_rate": 1.778552942520137e-05, "loss": 0.847, "step": 7304 }, { "epoch": 23.950819672131146, "grad_norm": 6.138317108154297, "learning_rate": 1.7784862963406075e-05, "loss": 0.6547, "step": 7305 }, { "epoch": 23.95409836065574, "grad_norm": 6.39305305480957, "learning_rate": 1.7784196413828352e-05, "loss": 0.9211, "step": 7306 }, { "epoch": 23.957377049180327, "grad_norm": 5.695804595947266, "learning_rate": 1.7783529776475722e-05, "loss": 0.8004, "step": 7307 }, { "epoch": 23.96065573770492, "grad_norm": 5.927872180938721, "learning_rate": 1.77828630513557e-05, "loss": 1.0051, "step": 7308 }, { "epoch": 23.963934426229507, "grad_norm": 8.463623046875, "learning_rate": 1.7782196238475807e-05, "loss": 0.5071, "step": 7309 }, { "epoch": 23.9672131147541, "grad_norm": 5.961519241333008, "learning_rate": 1.778152933784356e-05, "loss": 0.9631, "step": 7310 }, { "epoch": 23.970491803278687, "grad_norm": 7.797673225402832, "learning_rate": 1.7780862349466475e-05, "loss": 0.955, "step": 7311 }, { "epoch": 23.97377049180328, "grad_norm": 5.057248592376709, "learning_rate": 1.778019527335208e-05, "loss": 0.7762, "step": 7312 }, { "epoch": 23.977049180327867, "grad_norm": 5.237808704376221, "learning_rate": 1.7779528109507894e-05, "loss": 1.1238, "step": 7313 }, { "epoch": 23.98032786885246, "grad_norm": 6.327554702758789, "learning_rate": 1.777886085794144e-05, "loss": 0.6614, "step": 7314 }, { "epoch": 23.983606557377048, "grad_norm": 10.28608512878418, "learning_rate": 1.7778193518660242e-05, "loss": 1.0751, "step": 7315 }, { "epoch": 23.98688524590164, "grad_norm": 5.302609443664551, "learning_rate": 1.7777526091671823e-05, "loss": 0.7758, "step": 7316 }, { "epoch": 23.990163934426228, "grad_norm": 6.823005676269531, "learning_rate": 1.7776858576983713e-05, "loss": 0.8904, "step": 7317 }, { "epoch": 23.99344262295082, "grad_norm": 6.864485263824463, "learning_rate": 1.7776190974603435e-05, "loss": 0.9338, "step": 7318 }, { "epoch": 23.99672131147541, "grad_norm": 5.520724296569824, "learning_rate": 1.7775523284538522e-05, "loss": 0.9755, "step": 7319 }, { "epoch": 24.0, "grad_norm": 5.334290504455566, "learning_rate": 1.7774855506796497e-05, "loss": 0.6896, "step": 7320 }, { "epoch": 24.003278688524592, "grad_norm": 10.328673362731934, "learning_rate": 1.777418764138489e-05, "loss": 0.732, "step": 7321 }, { "epoch": 24.00655737704918, "grad_norm": 6.844033718109131, "learning_rate": 1.7773519688311235e-05, "loss": 0.9701, "step": 7322 }, { "epoch": 24.009836065573772, "grad_norm": 4.828906536102295, "learning_rate": 1.7772851647583068e-05, "loss": 0.9143, "step": 7323 }, { "epoch": 24.01311475409836, "grad_norm": 5.494847774505615, "learning_rate": 1.777218351920791e-05, "loss": 0.6803, "step": 7324 }, { "epoch": 24.016393442622952, "grad_norm": 5.071415901184082, "learning_rate": 1.7771515303193304e-05, "loss": 0.5258, "step": 7325 }, { "epoch": 24.01967213114754, "grad_norm": 5.873124122619629, "learning_rate": 1.7770846999546784e-05, "loss": 0.7352, "step": 7326 }, { "epoch": 24.022950819672133, "grad_norm": 5.281012535095215, "learning_rate": 1.7770178608275885e-05, "loss": 1.054, "step": 7327 }, { "epoch": 24.02622950819672, "grad_norm": 4.934489727020264, "learning_rate": 1.7769510129388142e-05, "loss": 0.8101, "step": 7328 }, { "epoch": 24.029508196721313, "grad_norm": 5.048592567443848, "learning_rate": 1.7768841562891094e-05, "loss": 0.8562, "step": 7329 }, { "epoch": 24.0327868852459, "grad_norm": 5.5506672859191895, "learning_rate": 1.776817290879228e-05, "loss": 0.6857, "step": 7330 }, { "epoch": 24.036065573770493, "grad_norm": 5.804925441741943, "learning_rate": 1.7767504167099238e-05, "loss": 0.7145, "step": 7331 }, { "epoch": 24.03934426229508, "grad_norm": 5.636873722076416, "learning_rate": 1.7766835337819514e-05, "loss": 0.814, "step": 7332 }, { "epoch": 24.042622950819673, "grad_norm": 5.591866493225098, "learning_rate": 1.7766166420960643e-05, "loss": 0.8498, "step": 7333 }, { "epoch": 24.04590163934426, "grad_norm": 6.0179219245910645, "learning_rate": 1.7765497416530173e-05, "loss": 0.7123, "step": 7334 }, { "epoch": 24.049180327868854, "grad_norm": 5.21230411529541, "learning_rate": 1.7764828324535645e-05, "loss": 0.7537, "step": 7335 }, { "epoch": 24.052459016393442, "grad_norm": 5.1434855461120605, "learning_rate": 1.7764159144984603e-05, "loss": 0.671, "step": 7336 }, { "epoch": 24.055737704918034, "grad_norm": 4.948975563049316, "learning_rate": 1.7763489877884598e-05, "loss": 0.7922, "step": 7337 }, { "epoch": 24.059016393442622, "grad_norm": 4.6254377365112305, "learning_rate": 1.776282052324317e-05, "loss": 0.9144, "step": 7338 }, { "epoch": 24.062295081967214, "grad_norm": 4.973206043243408, "learning_rate": 1.7762151081067863e-05, "loss": 0.9057, "step": 7339 }, { "epoch": 24.065573770491802, "grad_norm": 4.937098979949951, "learning_rate": 1.7761481551366238e-05, "loss": 0.9218, "step": 7340 }, { "epoch": 24.068852459016394, "grad_norm": 5.084620475769043, "learning_rate": 1.7760811934145842e-05, "loss": 0.718, "step": 7341 }, { "epoch": 24.072131147540983, "grad_norm": 4.838199138641357, "learning_rate": 1.7760142229414213e-05, "loss": 0.8184, "step": 7342 }, { "epoch": 24.075409836065575, "grad_norm": 5.3246283531188965, "learning_rate": 1.775947243717892e-05, "loss": 0.9068, "step": 7343 }, { "epoch": 24.078688524590163, "grad_norm": 4.818875789642334, "learning_rate": 1.7758802557447503e-05, "loss": 1.0252, "step": 7344 }, { "epoch": 24.081967213114755, "grad_norm": 4.9171271324157715, "learning_rate": 1.7758132590227522e-05, "loss": 0.9119, "step": 7345 }, { "epoch": 24.085245901639343, "grad_norm": 5.869847774505615, "learning_rate": 1.7757462535526532e-05, "loss": 0.5832, "step": 7346 }, { "epoch": 24.088524590163935, "grad_norm": 5.261012077331543, "learning_rate": 1.775679239335208e-05, "loss": 0.7221, "step": 7347 }, { "epoch": 24.091803278688523, "grad_norm": 5.76719331741333, "learning_rate": 1.7756122163711734e-05, "loss": 0.9005, "step": 7348 }, { "epoch": 24.095081967213115, "grad_norm": 5.0698041915893555, "learning_rate": 1.7755451846613045e-05, "loss": 0.9908, "step": 7349 }, { "epoch": 24.098360655737704, "grad_norm": 5.100347518920898, "learning_rate": 1.775478144206357e-05, "loss": 0.5638, "step": 7350 }, { "epoch": 24.101639344262296, "grad_norm": 4.449403762817383, "learning_rate": 1.7754110950070874e-05, "loss": 1.0618, "step": 7351 }, { "epoch": 24.104918032786884, "grad_norm": 4.285492897033691, "learning_rate": 1.7753440370642513e-05, "loss": 0.9332, "step": 7352 }, { "epoch": 24.108196721311476, "grad_norm": 4.97303581237793, "learning_rate": 1.7752769703786055e-05, "loss": 0.6406, "step": 7353 }, { "epoch": 24.111475409836064, "grad_norm": 5.375770568847656, "learning_rate": 1.7752098949509053e-05, "loss": 0.776, "step": 7354 }, { "epoch": 24.114754098360656, "grad_norm": 5.266770362854004, "learning_rate": 1.7751428107819075e-05, "loss": 0.8887, "step": 7355 }, { "epoch": 24.118032786885244, "grad_norm": 4.954979419708252, "learning_rate": 1.7750757178723686e-05, "loss": 0.765, "step": 7356 }, { "epoch": 24.121311475409836, "grad_norm": 6.406671047210693, "learning_rate": 1.7750086162230455e-05, "loss": 0.9448, "step": 7357 }, { "epoch": 24.124590163934425, "grad_norm": 4.919301986694336, "learning_rate": 1.774941505834694e-05, "loss": 0.8171, "step": 7358 }, { "epoch": 24.127868852459017, "grad_norm": 5.889680862426758, "learning_rate": 1.7748743867080715e-05, "loss": 0.5906, "step": 7359 }, { "epoch": 24.131147540983605, "grad_norm": 4.81962251663208, "learning_rate": 1.7748072588439345e-05, "loss": 0.783, "step": 7360 }, { "epoch": 24.134426229508197, "grad_norm": 5.567442893981934, "learning_rate": 1.7747401222430403e-05, "loss": 0.7192, "step": 7361 }, { "epoch": 24.137704918032785, "grad_norm": 4.737745761871338, "learning_rate": 1.7746729769061454e-05, "loss": 0.6945, "step": 7362 }, { "epoch": 24.140983606557377, "grad_norm": 5.120189666748047, "learning_rate": 1.7746058228340073e-05, "loss": 0.8466, "step": 7363 }, { "epoch": 24.14426229508197, "grad_norm": 6.568039894104004, "learning_rate": 1.774538660027383e-05, "loss": 0.7931, "step": 7364 }, { "epoch": 24.147540983606557, "grad_norm": 5.318478107452393, "learning_rate": 1.7744714884870303e-05, "loss": 0.7833, "step": 7365 }, { "epoch": 24.15081967213115, "grad_norm": 4.68771505355835, "learning_rate": 1.7744043082137062e-05, "loss": 0.637, "step": 7366 }, { "epoch": 24.154098360655738, "grad_norm": 5.412518501281738, "learning_rate": 1.7743371192081683e-05, "loss": 0.6229, "step": 7367 }, { "epoch": 24.15737704918033, "grad_norm": 4.921260356903076, "learning_rate": 1.7742699214711745e-05, "loss": 0.6901, "step": 7368 }, { "epoch": 24.160655737704918, "grad_norm": 4.724396228790283, "learning_rate": 1.774202715003482e-05, "loss": 0.7571, "step": 7369 }, { "epoch": 24.16393442622951, "grad_norm": 5.685887336730957, "learning_rate": 1.774135499805849e-05, "loss": 0.8761, "step": 7370 }, { "epoch": 24.167213114754098, "grad_norm": 5.559779167175293, "learning_rate": 1.7740682758790334e-05, "loss": 0.673, "step": 7371 }, { "epoch": 24.17049180327869, "grad_norm": 4.66281270980835, "learning_rate": 1.774001043223793e-05, "loss": 0.7998, "step": 7372 }, { "epoch": 24.17377049180328, "grad_norm": 5.199752330780029, "learning_rate": 1.7739338018408864e-05, "loss": 0.6887, "step": 7373 }, { "epoch": 24.17704918032787, "grad_norm": 6.627533435821533, "learning_rate": 1.7738665517310713e-05, "loss": 0.5851, "step": 7374 }, { "epoch": 24.18032786885246, "grad_norm": 5.034623622894287, "learning_rate": 1.7737992928951066e-05, "loss": 0.9138, "step": 7375 }, { "epoch": 24.18360655737705, "grad_norm": 5.654867649078369, "learning_rate": 1.7737320253337497e-05, "loss": 0.6331, "step": 7376 }, { "epoch": 24.18688524590164, "grad_norm": 6.787798881530762, "learning_rate": 1.77366474904776e-05, "loss": 0.8643, "step": 7377 }, { "epoch": 24.19016393442623, "grad_norm": 5.2899088859558105, "learning_rate": 1.7735974640378958e-05, "loss": 0.7722, "step": 7378 }, { "epoch": 24.19344262295082, "grad_norm": 5.637021064758301, "learning_rate": 1.7735301703049156e-05, "loss": 0.7899, "step": 7379 }, { "epoch": 24.19672131147541, "grad_norm": 4.996993541717529, "learning_rate": 1.7734628678495787e-05, "loss": 0.7167, "step": 7380 }, { "epoch": 24.2, "grad_norm": 4.748942852020264, "learning_rate": 1.7733955566726438e-05, "loss": 0.9681, "step": 7381 }, { "epoch": 24.20327868852459, "grad_norm": 5.83690071105957, "learning_rate": 1.77332823677487e-05, "loss": 0.6594, "step": 7382 }, { "epoch": 24.20655737704918, "grad_norm": 5.250882148742676, "learning_rate": 1.7732609081570162e-05, "loss": 0.678, "step": 7383 }, { "epoch": 24.20983606557377, "grad_norm": 7.838682174682617, "learning_rate": 1.7731935708198417e-05, "loss": 0.8777, "step": 7384 }, { "epoch": 24.21311475409836, "grad_norm": 5.38614559173584, "learning_rate": 1.7731262247641056e-05, "loss": 0.6025, "step": 7385 }, { "epoch": 24.21639344262295, "grad_norm": 7.041114330291748, "learning_rate": 1.7730588699905675e-05, "loss": 0.8323, "step": 7386 }, { "epoch": 24.21967213114754, "grad_norm": 5.701378345489502, "learning_rate": 1.772991506499987e-05, "loss": 0.9886, "step": 7387 }, { "epoch": 24.222950819672132, "grad_norm": 7.27174711227417, "learning_rate": 1.7729241342931235e-05, "loss": 0.7616, "step": 7388 }, { "epoch": 24.22622950819672, "grad_norm": 5.711893081665039, "learning_rate": 1.7728567533707367e-05, "loss": 0.6504, "step": 7389 }, { "epoch": 24.229508196721312, "grad_norm": 3.9585745334625244, "learning_rate": 1.7727893637335864e-05, "loss": 1.0612, "step": 7390 }, { "epoch": 24.2327868852459, "grad_norm": 5.204208850860596, "learning_rate": 1.7727219653824326e-05, "loss": 1.0137, "step": 7391 }, { "epoch": 24.236065573770492, "grad_norm": 6.7898640632629395, "learning_rate": 1.772654558318035e-05, "loss": 1.0069, "step": 7392 }, { "epoch": 24.23934426229508, "grad_norm": 5.390816688537598, "learning_rate": 1.7725871425411544e-05, "loss": 0.8653, "step": 7393 }, { "epoch": 24.242622950819673, "grad_norm": 5.599405765533447, "learning_rate": 1.77251971805255e-05, "loss": 0.8961, "step": 7394 }, { "epoch": 24.24590163934426, "grad_norm": 6.384785175323486, "learning_rate": 1.7724522848529827e-05, "loss": 0.9635, "step": 7395 }, { "epoch": 24.249180327868853, "grad_norm": 5.119091510772705, "learning_rate": 1.7723848429432127e-05, "loss": 0.9726, "step": 7396 }, { "epoch": 24.25245901639344, "grad_norm": 5.885898590087891, "learning_rate": 1.7723173923240006e-05, "loss": 0.8627, "step": 7397 }, { "epoch": 24.255737704918033, "grad_norm": 7.944623947143555, "learning_rate": 1.772249932996107e-05, "loss": 1.0246, "step": 7398 }, { "epoch": 24.25901639344262, "grad_norm": 6.497435569763184, "learning_rate": 1.7721824649602923e-05, "loss": 0.8701, "step": 7399 }, { "epoch": 24.262295081967213, "grad_norm": 5.127861976623535, "learning_rate": 1.7721149882173176e-05, "loss": 0.8605, "step": 7400 }, { "epoch": 24.2655737704918, "grad_norm": 6.697866439819336, "learning_rate": 1.7720475027679433e-05, "loss": 0.7072, "step": 7401 }, { "epoch": 24.268852459016394, "grad_norm": 5.682217121124268, "learning_rate": 1.771980008612931e-05, "loss": 0.9011, "step": 7402 }, { "epoch": 24.272131147540982, "grad_norm": 6.084877967834473, "learning_rate": 1.7719125057530413e-05, "loss": 0.6439, "step": 7403 }, { "epoch": 24.275409836065574, "grad_norm": 5.369731903076172, "learning_rate": 1.7718449941890355e-05, "loss": 0.8344, "step": 7404 }, { "epoch": 24.278688524590162, "grad_norm": 6.239985942840576, "learning_rate": 1.771777473921675e-05, "loss": 0.8596, "step": 7405 }, { "epoch": 24.281967213114754, "grad_norm": 4.970897197723389, "learning_rate": 1.7717099449517208e-05, "loss": 0.7952, "step": 7406 }, { "epoch": 24.285245901639342, "grad_norm": 4.763228416442871, "learning_rate": 1.771642407279935e-05, "loss": 0.744, "step": 7407 }, { "epoch": 24.288524590163934, "grad_norm": 4.892712593078613, "learning_rate": 1.7715748609070782e-05, "loss": 0.8872, "step": 7408 }, { "epoch": 24.291803278688526, "grad_norm": 5.061582088470459, "learning_rate": 1.771507305833913e-05, "loss": 0.7675, "step": 7409 }, { "epoch": 24.295081967213115, "grad_norm": 5.133486747741699, "learning_rate": 1.7714397420612007e-05, "loss": 0.9072, "step": 7410 }, { "epoch": 24.298360655737707, "grad_norm": 5.5157294273376465, "learning_rate": 1.7713721695897028e-05, "loss": 0.7681, "step": 7411 }, { "epoch": 24.301639344262295, "grad_norm": 5.649496555328369, "learning_rate": 1.771304588420182e-05, "loss": 0.7909, "step": 7412 }, { "epoch": 24.304918032786887, "grad_norm": 6.2999796867370605, "learning_rate": 1.7712369985534e-05, "loss": 0.6508, "step": 7413 }, { "epoch": 24.308196721311475, "grad_norm": 4.589176654815674, "learning_rate": 1.771169399990119e-05, "loss": 0.9473, "step": 7414 }, { "epoch": 24.311475409836067, "grad_norm": 5.565499305725098, "learning_rate": 1.771101792731101e-05, "loss": 0.711, "step": 7415 }, { "epoch": 24.314754098360655, "grad_norm": 5.233538627624512, "learning_rate": 1.7710341767771088e-05, "loss": 0.9647, "step": 7416 }, { "epoch": 24.318032786885247, "grad_norm": 5.335744380950928, "learning_rate": 1.7709665521289045e-05, "loss": 0.8728, "step": 7417 }, { "epoch": 24.321311475409836, "grad_norm": 5.358399391174316, "learning_rate": 1.770898918787251e-05, "loss": 0.6851, "step": 7418 }, { "epoch": 24.324590163934428, "grad_norm": 5.734777927398682, "learning_rate": 1.7708312767529104e-05, "loss": 0.6796, "step": 7419 }, { "epoch": 24.327868852459016, "grad_norm": 5.7129130363464355, "learning_rate": 1.7707636260266453e-05, "loss": 1.1089, "step": 7420 }, { "epoch": 24.331147540983608, "grad_norm": 5.391568660736084, "learning_rate": 1.7706959666092195e-05, "loss": 0.7994, "step": 7421 }, { "epoch": 24.334426229508196, "grad_norm": 5.384868144989014, "learning_rate": 1.770628298501395e-05, "loss": 0.7958, "step": 7422 }, { "epoch": 24.337704918032788, "grad_norm": 4.755181312561035, "learning_rate": 1.7705606217039354e-05, "loss": 0.944, "step": 7423 }, { "epoch": 24.340983606557376, "grad_norm": 4.244866371154785, "learning_rate": 1.7704929362176035e-05, "loss": 0.6507, "step": 7424 }, { "epoch": 24.34426229508197, "grad_norm": 5.3887763023376465, "learning_rate": 1.770425242043163e-05, "loss": 0.7694, "step": 7425 }, { "epoch": 24.347540983606557, "grad_norm": 5.055049896240234, "learning_rate": 1.7703575391813764e-05, "loss": 0.9822, "step": 7426 }, { "epoch": 24.35081967213115, "grad_norm": 5.820952415466309, "learning_rate": 1.7702898276330077e-05, "loss": 0.6951, "step": 7427 }, { "epoch": 24.354098360655737, "grad_norm": 6.338258266448975, "learning_rate": 1.77022210739882e-05, "loss": 0.7583, "step": 7428 }, { "epoch": 24.35737704918033, "grad_norm": 5.004817008972168, "learning_rate": 1.770154378479578e-05, "loss": 0.8057, "step": 7429 }, { "epoch": 24.360655737704917, "grad_norm": 5.080676555633545, "learning_rate": 1.770086640876044e-05, "loss": 1.0416, "step": 7430 }, { "epoch": 24.36393442622951, "grad_norm": 7.234163761138916, "learning_rate": 1.7700188945889825e-05, "loss": 0.5411, "step": 7431 }, { "epoch": 24.367213114754097, "grad_norm": 9.027088165283203, "learning_rate": 1.7699511396191573e-05, "loss": 0.7929, "step": 7432 }, { "epoch": 24.37049180327869, "grad_norm": 6.364138603210449, "learning_rate": 1.7698833759673328e-05, "loss": 0.7753, "step": 7433 }, { "epoch": 24.373770491803278, "grad_norm": 5.165746688842773, "learning_rate": 1.7698156036342723e-05, "loss": 0.8258, "step": 7434 }, { "epoch": 24.37704918032787, "grad_norm": 4.971004009246826, "learning_rate": 1.7697478226207407e-05, "loss": 0.6549, "step": 7435 }, { "epoch": 24.380327868852458, "grad_norm": 5.370655536651611, "learning_rate": 1.769680032927502e-05, "loss": 1.1145, "step": 7436 }, { "epoch": 24.38360655737705, "grad_norm": 9.02771282196045, "learning_rate": 1.7696122345553205e-05, "loss": 0.7949, "step": 7437 }, { "epoch": 24.386885245901638, "grad_norm": 5.642470359802246, "learning_rate": 1.7695444275049613e-05, "loss": 0.8065, "step": 7438 }, { "epoch": 24.39016393442623, "grad_norm": 6.366202354431152, "learning_rate": 1.7694766117771884e-05, "loss": 0.7332, "step": 7439 }, { "epoch": 24.39344262295082, "grad_norm": 5.2770562171936035, "learning_rate": 1.7694087873727663e-05, "loss": 0.7606, "step": 7440 }, { "epoch": 24.39672131147541, "grad_norm": 5.720729351043701, "learning_rate": 1.7693409542924606e-05, "loss": 1.1388, "step": 7441 }, { "epoch": 24.4, "grad_norm": 5.404600620269775, "learning_rate": 1.7692731125370355e-05, "loss": 0.9905, "step": 7442 }, { "epoch": 24.40327868852459, "grad_norm": 5.131901741027832, "learning_rate": 1.7692052621072558e-05, "loss": 0.8316, "step": 7443 }, { "epoch": 24.40655737704918, "grad_norm": 4.281422138214111, "learning_rate": 1.769137403003888e-05, "loss": 0.9781, "step": 7444 }, { "epoch": 24.40983606557377, "grad_norm": 4.413338661193848, "learning_rate": 1.7690695352276953e-05, "loss": 1.0146, "step": 7445 }, { "epoch": 24.41311475409836, "grad_norm": 5.778479099273682, "learning_rate": 1.7690016587794447e-05, "loss": 0.7661, "step": 7446 }, { "epoch": 24.41639344262295, "grad_norm": 5.820570468902588, "learning_rate": 1.7689337736599002e-05, "loss": 0.922, "step": 7447 }, { "epoch": 24.41967213114754, "grad_norm": 5.282520294189453, "learning_rate": 1.7688658798698283e-05, "loss": 0.8705, "step": 7448 }, { "epoch": 24.42295081967213, "grad_norm": 5.3992791175842285, "learning_rate": 1.768797977409994e-05, "loss": 0.9316, "step": 7449 }, { "epoch": 24.42622950819672, "grad_norm": 5.381279468536377, "learning_rate": 1.7687300662811636e-05, "loss": 0.7234, "step": 7450 }, { "epoch": 24.42950819672131, "grad_norm": 5.063138961791992, "learning_rate": 1.7686621464841017e-05, "loss": 0.8863, "step": 7451 }, { "epoch": 24.432786885245903, "grad_norm": 5.483259201049805, "learning_rate": 1.7685942180195757e-05, "loss": 0.9863, "step": 7452 }, { "epoch": 24.43606557377049, "grad_norm": 5.299267292022705, "learning_rate": 1.7685262808883502e-05, "loss": 0.8909, "step": 7453 }, { "epoch": 24.439344262295084, "grad_norm": 6.488478183746338, "learning_rate": 1.7684583350911917e-05, "loss": 0.8339, "step": 7454 }, { "epoch": 24.442622950819672, "grad_norm": 6.545997619628906, "learning_rate": 1.768390380628867e-05, "loss": 0.8423, "step": 7455 }, { "epoch": 24.445901639344264, "grad_norm": 5.999199867248535, "learning_rate": 1.7683224175021414e-05, "loss": 0.7744, "step": 7456 }, { "epoch": 24.449180327868852, "grad_norm": 5.877775192260742, "learning_rate": 1.768254445711782e-05, "loss": 0.8093, "step": 7457 }, { "epoch": 24.452459016393444, "grad_norm": 5.937796592712402, "learning_rate": 1.7681864652585546e-05, "loss": 0.8232, "step": 7458 }, { "epoch": 24.455737704918032, "grad_norm": 5.364645957946777, "learning_rate": 1.768118476143226e-05, "loss": 0.9643, "step": 7459 }, { "epoch": 24.459016393442624, "grad_norm": 5.410261631011963, "learning_rate": 1.7680504783665635e-05, "loss": 0.6577, "step": 7460 }, { "epoch": 24.462295081967213, "grad_norm": 6.026125431060791, "learning_rate": 1.767982471929333e-05, "loss": 0.6324, "step": 7461 }, { "epoch": 24.465573770491805, "grad_norm": 6.4236159324646, "learning_rate": 1.7679144568323016e-05, "loss": 0.824, "step": 7462 }, { "epoch": 24.468852459016393, "grad_norm": 5.117640018463135, "learning_rate": 1.7678464330762363e-05, "loss": 0.9504, "step": 7463 }, { "epoch": 24.472131147540985, "grad_norm": 5.923989295959473, "learning_rate": 1.7677784006619042e-05, "loss": 0.7574, "step": 7464 }, { "epoch": 24.475409836065573, "grad_norm": 5.845634460449219, "learning_rate": 1.767710359590072e-05, "loss": 0.5442, "step": 7465 }, { "epoch": 24.478688524590165, "grad_norm": 4.94904088973999, "learning_rate": 1.7676423098615078e-05, "loss": 0.6965, "step": 7466 }, { "epoch": 24.481967213114753, "grad_norm": 4.805882453918457, "learning_rate": 1.767574251476978e-05, "loss": 0.8875, "step": 7467 }, { "epoch": 24.485245901639345, "grad_norm": 7.405672073364258, "learning_rate": 1.7675061844372504e-05, "loss": 0.9618, "step": 7468 }, { "epoch": 24.488524590163934, "grad_norm": 5.720219135284424, "learning_rate": 1.7674381087430927e-05, "loss": 0.9042, "step": 7469 }, { "epoch": 24.491803278688526, "grad_norm": 5.714809894561768, "learning_rate": 1.7673700243952726e-05, "loss": 0.5894, "step": 7470 }, { "epoch": 24.495081967213114, "grad_norm": 4.575840473175049, "learning_rate": 1.7673019313945573e-05, "loss": 0.9346, "step": 7471 }, { "epoch": 24.498360655737706, "grad_norm": 5.3874335289001465, "learning_rate": 1.767233829741715e-05, "loss": 0.8113, "step": 7472 }, { "epoch": 24.501639344262294, "grad_norm": 5.570457458496094, "learning_rate": 1.7671657194375137e-05, "loss": 0.7347, "step": 7473 }, { "epoch": 24.504918032786886, "grad_norm": 5.034276962280273, "learning_rate": 1.7670976004827212e-05, "loss": 0.6071, "step": 7474 }, { "epoch": 24.508196721311474, "grad_norm": 5.795191287994385, "learning_rate": 1.7670294728781055e-05, "loss": 0.8093, "step": 7475 }, { "epoch": 24.511475409836066, "grad_norm": 6.284567832946777, "learning_rate": 1.766961336624435e-05, "loss": 0.5603, "step": 7476 }, { "epoch": 24.514754098360655, "grad_norm": 5.8038740158081055, "learning_rate": 1.766893191722478e-05, "loss": 0.6929, "step": 7477 }, { "epoch": 24.518032786885247, "grad_norm": 5.880605220794678, "learning_rate": 1.7668250381730036e-05, "loss": 0.939, "step": 7478 }, { "epoch": 24.521311475409835, "grad_norm": 4.504035472869873, "learning_rate": 1.7667568759767787e-05, "loss": 0.7461, "step": 7479 }, { "epoch": 24.524590163934427, "grad_norm": 33.48543930053711, "learning_rate": 1.7666887051345727e-05, "loss": 0.6294, "step": 7480 }, { "epoch": 24.527868852459015, "grad_norm": 5.051793575286865, "learning_rate": 1.766620525647155e-05, "loss": 0.8861, "step": 7481 }, { "epoch": 24.531147540983607, "grad_norm": 6.030722141265869, "learning_rate": 1.7665523375152934e-05, "loss": 0.5172, "step": 7482 }, { "epoch": 24.534426229508195, "grad_norm": 5.028306484222412, "learning_rate": 1.7664841407397575e-05, "loss": 0.8454, "step": 7483 }, { "epoch": 24.537704918032787, "grad_norm": 5.241796016693115, "learning_rate": 1.766415935321316e-05, "loss": 0.9172, "step": 7484 }, { "epoch": 24.540983606557376, "grad_norm": 7.031108856201172, "learning_rate": 1.7663477212607375e-05, "loss": 0.8623, "step": 7485 }, { "epoch": 24.544262295081968, "grad_norm": 4.68691873550415, "learning_rate": 1.766279498558792e-05, "loss": 0.8104, "step": 7486 }, { "epoch": 24.547540983606556, "grad_norm": 5.021788120269775, "learning_rate": 1.7662112672162485e-05, "loss": 0.8707, "step": 7487 }, { "epoch": 24.550819672131148, "grad_norm": 5.808605670928955, "learning_rate": 1.7661430272338764e-05, "loss": 0.651, "step": 7488 }, { "epoch": 24.554098360655736, "grad_norm": 10.664929389953613, "learning_rate": 1.7660747786124445e-05, "loss": 0.7421, "step": 7489 }, { "epoch": 24.557377049180328, "grad_norm": 6.47233772277832, "learning_rate": 1.7660065213527233e-05, "loss": 0.715, "step": 7490 }, { "epoch": 24.560655737704916, "grad_norm": 5.329986572265625, "learning_rate": 1.7659382554554822e-05, "loss": 0.8858, "step": 7491 }, { "epoch": 24.56393442622951, "grad_norm": 5.757015228271484, "learning_rate": 1.7658699809214906e-05, "loss": 0.6516, "step": 7492 }, { "epoch": 24.567213114754097, "grad_norm": 4.513872146606445, "learning_rate": 1.765801697751519e-05, "loss": 0.7912, "step": 7493 }, { "epoch": 24.57049180327869, "grad_norm": 4.7334065437316895, "learning_rate": 1.765733405946337e-05, "loss": 0.6974, "step": 7494 }, { "epoch": 24.57377049180328, "grad_norm": 5.642387866973877, "learning_rate": 1.7656651055067146e-05, "loss": 0.7892, "step": 7495 }, { "epoch": 24.57704918032787, "grad_norm": 5.707745552062988, "learning_rate": 1.7655967964334218e-05, "loss": 0.7643, "step": 7496 }, { "epoch": 24.58032786885246, "grad_norm": 6.297203540802002, "learning_rate": 1.7655284787272292e-05, "loss": 0.7731, "step": 7497 }, { "epoch": 24.58360655737705, "grad_norm": 6.663912296295166, "learning_rate": 1.7654601523889073e-05, "loss": 0.8824, "step": 7498 }, { "epoch": 24.58688524590164, "grad_norm": 5.235828876495361, "learning_rate": 1.765391817419226e-05, "loss": 0.6191, "step": 7499 }, { "epoch": 24.59016393442623, "grad_norm": 5.8159565925598145, "learning_rate": 1.7653234738189565e-05, "loss": 0.6718, "step": 7500 }, { "epoch": 24.59344262295082, "grad_norm": 5.8742570877075195, "learning_rate": 1.7652551215888688e-05, "loss": 0.9329, "step": 7501 }, { "epoch": 24.59672131147541, "grad_norm": 5.9665141105651855, "learning_rate": 1.765186760729734e-05, "loss": 0.744, "step": 7502 }, { "epoch": 24.6, "grad_norm": 7.371143817901611, "learning_rate": 1.7651183912423228e-05, "loss": 0.9224, "step": 7503 }, { "epoch": 24.60327868852459, "grad_norm": 5.462342262268066, "learning_rate": 1.7650500131274064e-05, "loss": 0.8207, "step": 7504 }, { "epoch": 24.60655737704918, "grad_norm": 4.518479824066162, "learning_rate": 1.764981626385756e-05, "loss": 0.8451, "step": 7505 }, { "epoch": 24.60983606557377, "grad_norm": 4.719971656799316, "learning_rate": 1.7649132310181416e-05, "loss": 0.8108, "step": 7506 }, { "epoch": 24.613114754098362, "grad_norm": 5.442831039428711, "learning_rate": 1.7648448270253356e-05, "loss": 0.7166, "step": 7507 }, { "epoch": 24.61639344262295, "grad_norm": 5.771867275238037, "learning_rate": 1.764776414408109e-05, "loss": 0.7199, "step": 7508 }, { "epoch": 24.619672131147542, "grad_norm": 5.941043376922607, "learning_rate": 1.764707993167233e-05, "loss": 0.8247, "step": 7509 }, { "epoch": 24.62295081967213, "grad_norm": 5.58025598526001, "learning_rate": 1.7646395633034793e-05, "loss": 0.7691, "step": 7510 }, { "epoch": 24.626229508196722, "grad_norm": 4.974597454071045, "learning_rate": 1.7645711248176198e-05, "loss": 1.1276, "step": 7511 }, { "epoch": 24.62950819672131, "grad_norm": 5.506832599639893, "learning_rate": 1.7645026777104254e-05, "loss": 0.8413, "step": 7512 }, { "epoch": 24.632786885245903, "grad_norm": 5.580687999725342, "learning_rate": 1.7644342219826688e-05, "loss": 0.7563, "step": 7513 }, { "epoch": 24.63606557377049, "grad_norm": 4.320910453796387, "learning_rate": 1.7643657576351213e-05, "loss": 0.8735, "step": 7514 }, { "epoch": 24.639344262295083, "grad_norm": 5.896734237670898, "learning_rate": 1.7642972846685552e-05, "loss": 0.6785, "step": 7515 }, { "epoch": 24.64262295081967, "grad_norm": 5.62209415435791, "learning_rate": 1.764228803083743e-05, "loss": 0.868, "step": 7516 }, { "epoch": 24.645901639344263, "grad_norm": 6.194134712219238, "learning_rate": 1.764160312881456e-05, "loss": 0.9957, "step": 7517 }, { "epoch": 24.64918032786885, "grad_norm": 5.361535549163818, "learning_rate": 1.764091814062467e-05, "loss": 0.8856, "step": 7518 }, { "epoch": 24.652459016393443, "grad_norm": 5.937859058380127, "learning_rate": 1.7640233066275484e-05, "loss": 0.7937, "step": 7519 }, { "epoch": 24.65573770491803, "grad_norm": 5.003681182861328, "learning_rate": 1.7639547905774724e-05, "loss": 0.8475, "step": 7520 }, { "epoch": 24.659016393442624, "grad_norm": 8.1160249710083, "learning_rate": 1.763886265913012e-05, "loss": 0.9686, "step": 7521 }, { "epoch": 24.662295081967212, "grad_norm": 10.310317993164062, "learning_rate": 1.7638177326349394e-05, "loss": 0.7991, "step": 7522 }, { "epoch": 24.665573770491804, "grad_norm": 6.170689105987549, "learning_rate": 1.763749190744028e-05, "loss": 0.9407, "step": 7523 }, { "epoch": 24.668852459016392, "grad_norm": 6.196609973907471, "learning_rate": 1.7636806402410503e-05, "loss": 0.8846, "step": 7524 }, { "epoch": 24.672131147540984, "grad_norm": 5.620493412017822, "learning_rate": 1.7636120811267798e-05, "loss": 1.0011, "step": 7525 }, { "epoch": 24.675409836065572, "grad_norm": 4.9476847648620605, "learning_rate": 1.7635435134019887e-05, "loss": 0.871, "step": 7526 }, { "epoch": 24.678688524590164, "grad_norm": 5.635811805725098, "learning_rate": 1.7634749370674506e-05, "loss": 0.8435, "step": 7527 }, { "epoch": 24.681967213114753, "grad_norm": 5.496586322784424, "learning_rate": 1.763406352123939e-05, "loss": 0.8004, "step": 7528 }, { "epoch": 24.685245901639345, "grad_norm": 5.4851250648498535, "learning_rate": 1.763337758572227e-05, "loss": 0.8791, "step": 7529 }, { "epoch": 24.688524590163933, "grad_norm": 4.824306964874268, "learning_rate": 1.763269156413088e-05, "loss": 0.8407, "step": 7530 }, { "epoch": 24.691803278688525, "grad_norm": 5.193410396575928, "learning_rate": 1.7632005456472954e-05, "loss": 0.8599, "step": 7531 }, { "epoch": 24.695081967213113, "grad_norm": 5.313209533691406, "learning_rate": 1.763131926275623e-05, "loss": 0.7644, "step": 7532 }, { "epoch": 24.698360655737705, "grad_norm": 4.525504112243652, "learning_rate": 1.763063298298845e-05, "loss": 0.9224, "step": 7533 }, { "epoch": 24.701639344262293, "grad_norm": 7.708477020263672, "learning_rate": 1.7629946617177355e-05, "loss": 0.9249, "step": 7534 }, { "epoch": 24.704918032786885, "grad_norm": 4.890624523162842, "learning_rate": 1.762926016533067e-05, "loss": 0.9025, "step": 7535 }, { "epoch": 24.708196721311474, "grad_norm": 5.35330867767334, "learning_rate": 1.7628573627456148e-05, "loss": 0.943, "step": 7536 }, { "epoch": 24.711475409836066, "grad_norm": 4.783838272094727, "learning_rate": 1.7627887003561525e-05, "loss": 0.6948, "step": 7537 }, { "epoch": 24.714754098360658, "grad_norm": 5.297825813293457, "learning_rate": 1.7627200293654545e-05, "loss": 0.8054, "step": 7538 }, { "epoch": 24.718032786885246, "grad_norm": 4.940356254577637, "learning_rate": 1.7626513497742954e-05, "loss": 0.711, "step": 7539 }, { "epoch": 24.721311475409838, "grad_norm": 5.169850826263428, "learning_rate": 1.7625826615834492e-05, "loss": 0.9926, "step": 7540 }, { "epoch": 24.724590163934426, "grad_norm": 5.285417556762695, "learning_rate": 1.7625139647936906e-05, "loss": 0.6317, "step": 7541 }, { "epoch": 24.727868852459018, "grad_norm": 5.793093681335449, "learning_rate": 1.7624452594057944e-05, "loss": 0.7686, "step": 7542 }, { "epoch": 24.731147540983606, "grad_norm": 8.491374015808105, "learning_rate": 1.7623765454205348e-05, "loss": 0.7636, "step": 7543 }, { "epoch": 24.7344262295082, "grad_norm": 5.221920013427734, "learning_rate": 1.7623078228386873e-05, "loss": 0.7434, "step": 7544 }, { "epoch": 24.737704918032787, "grad_norm": 5.198134422302246, "learning_rate": 1.762239091661026e-05, "loss": 0.8112, "step": 7545 }, { "epoch": 24.74098360655738, "grad_norm": 6.047178268432617, "learning_rate": 1.7621703518883272e-05, "loss": 0.8916, "step": 7546 }, { "epoch": 24.744262295081967, "grad_norm": 8.572222709655762, "learning_rate": 1.7621016035213646e-05, "loss": 1.0026, "step": 7547 }, { "epoch": 24.74754098360656, "grad_norm": 7.808871269226074, "learning_rate": 1.7620328465609144e-05, "loss": 0.8062, "step": 7548 }, { "epoch": 24.750819672131147, "grad_norm": 6.538837909698486, "learning_rate": 1.7619640810077512e-05, "loss": 0.7772, "step": 7549 }, { "epoch": 24.75409836065574, "grad_norm": 6.279270648956299, "learning_rate": 1.761895306862651e-05, "loss": 0.5992, "step": 7550 }, { "epoch": 24.757377049180327, "grad_norm": 4.777764320373535, "learning_rate": 1.761826524126389e-05, "loss": 0.9579, "step": 7551 }, { "epoch": 24.76065573770492, "grad_norm": 5.730072498321533, "learning_rate": 1.7617577327997408e-05, "loss": 0.8658, "step": 7552 }, { "epoch": 24.763934426229508, "grad_norm": 5.139437675476074, "learning_rate": 1.761688932883482e-05, "loss": 0.9035, "step": 7553 }, { "epoch": 24.7672131147541, "grad_norm": 5.970108509063721, "learning_rate": 1.7616201243783887e-05, "loss": 0.8642, "step": 7554 }, { "epoch": 24.770491803278688, "grad_norm": 4.658074855804443, "learning_rate": 1.7615513072852365e-05, "loss": 0.7933, "step": 7555 }, { "epoch": 24.77377049180328, "grad_norm": 6.6338958740234375, "learning_rate": 1.7614824816048016e-05, "loss": 0.9265, "step": 7556 }, { "epoch": 24.777049180327868, "grad_norm": 5.745189666748047, "learning_rate": 1.76141364733786e-05, "loss": 1.113, "step": 7557 }, { "epoch": 24.78032786885246, "grad_norm": 4.934422016143799, "learning_rate": 1.7613448044851876e-05, "loss": 0.676, "step": 7558 }, { "epoch": 24.78360655737705, "grad_norm": 6.456411838531494, "learning_rate": 1.7612759530475613e-05, "loss": 0.6603, "step": 7559 }, { "epoch": 24.78688524590164, "grad_norm": 5.533995151519775, "learning_rate": 1.761207093025757e-05, "loss": 1.0324, "step": 7560 }, { "epoch": 24.79016393442623, "grad_norm": 4.662745475769043, "learning_rate": 1.761138224420551e-05, "loss": 0.9103, "step": 7561 }, { "epoch": 24.79344262295082, "grad_norm": 5.005630970001221, "learning_rate": 1.7610693472327206e-05, "loss": 0.9174, "step": 7562 }, { "epoch": 24.79672131147541, "grad_norm": 5.921098709106445, "learning_rate": 1.7610004614630414e-05, "loss": 0.7451, "step": 7563 }, { "epoch": 24.8, "grad_norm": 4.988302230834961, "learning_rate": 1.7609315671122912e-05, "loss": 0.9288, "step": 7564 }, { "epoch": 24.80327868852459, "grad_norm": 4.888702869415283, "learning_rate": 1.7608626641812464e-05, "loss": 1.1388, "step": 7565 }, { "epoch": 24.80655737704918, "grad_norm": 5.635329246520996, "learning_rate": 1.7607937526706838e-05, "loss": 0.8341, "step": 7566 }, { "epoch": 24.80983606557377, "grad_norm": 4.727504253387451, "learning_rate": 1.7607248325813806e-05, "loss": 0.887, "step": 7567 }, { "epoch": 24.81311475409836, "grad_norm": 5.671595096588135, "learning_rate": 1.760655903914114e-05, "loss": 0.8698, "step": 7568 }, { "epoch": 24.81639344262295, "grad_norm": 4.634734153747559, "learning_rate": 1.7605869666696613e-05, "loss": 0.9451, "step": 7569 }, { "epoch": 24.81967213114754, "grad_norm": 6.641570568084717, "learning_rate": 1.7605180208487996e-05, "loss": 0.5391, "step": 7570 }, { "epoch": 24.82295081967213, "grad_norm": 5.654496192932129, "learning_rate": 1.7604490664523066e-05, "loss": 0.8541, "step": 7571 }, { "epoch": 24.82622950819672, "grad_norm": 5.016869068145752, "learning_rate": 1.7603801034809597e-05, "loss": 0.8518, "step": 7572 }, { "epoch": 24.82950819672131, "grad_norm": 5.40194034576416, "learning_rate": 1.7603111319355366e-05, "loss": 0.8378, "step": 7573 }, { "epoch": 24.832786885245902, "grad_norm": 5.596675872802734, "learning_rate": 1.7602421518168147e-05, "loss": 0.9135, "step": 7574 }, { "epoch": 24.83606557377049, "grad_norm": 5.699249267578125, "learning_rate": 1.760173163125572e-05, "loss": 0.868, "step": 7575 }, { "epoch": 24.839344262295082, "grad_norm": 6.693991184234619, "learning_rate": 1.760104165862587e-05, "loss": 0.8758, "step": 7576 }, { "epoch": 24.84262295081967, "grad_norm": 6.010105133056641, "learning_rate": 1.760035160028637e-05, "loss": 0.7467, "step": 7577 }, { "epoch": 24.845901639344262, "grad_norm": 5.483595848083496, "learning_rate": 1.7599661456245e-05, "loss": 0.9731, "step": 7578 }, { "epoch": 24.84918032786885, "grad_norm": 4.265044689178467, "learning_rate": 1.759897122650955e-05, "loss": 0.77, "step": 7579 }, { "epoch": 24.852459016393443, "grad_norm": 4.8170247077941895, "learning_rate": 1.7598280911087794e-05, "loss": 0.7875, "step": 7580 }, { "epoch": 24.855737704918035, "grad_norm": 6.738106727600098, "learning_rate": 1.7597590509987524e-05, "loss": 0.9877, "step": 7581 }, { "epoch": 24.859016393442623, "grad_norm": 5.5819854736328125, "learning_rate": 1.7596900023216523e-05, "loss": 0.7819, "step": 7582 }, { "epoch": 24.862295081967215, "grad_norm": 9.373191833496094, "learning_rate": 1.7596209450782573e-05, "loss": 1.012, "step": 7583 }, { "epoch": 24.865573770491803, "grad_norm": 5.506166934967041, "learning_rate": 1.7595518792693463e-05, "loss": 0.8334, "step": 7584 }, { "epoch": 24.868852459016395, "grad_norm": 6.249789237976074, "learning_rate": 1.7594828048956983e-05, "loss": 0.94, "step": 7585 }, { "epoch": 24.872131147540983, "grad_norm": 6.067966461181641, "learning_rate": 1.7594137219580917e-05, "loss": 0.8246, "step": 7586 }, { "epoch": 24.875409836065575, "grad_norm": 5.416373252868652, "learning_rate": 1.759344630457306e-05, "loss": 0.8261, "step": 7587 }, { "epoch": 24.878688524590164, "grad_norm": 5.103262901306152, "learning_rate": 1.75927553039412e-05, "loss": 0.8405, "step": 7588 }, { "epoch": 24.881967213114756, "grad_norm": 4.740615367889404, "learning_rate": 1.759206421769313e-05, "loss": 0.7588, "step": 7589 }, { "epoch": 24.885245901639344, "grad_norm": 5.813396453857422, "learning_rate": 1.759137304583664e-05, "loss": 0.932, "step": 7590 }, { "epoch": 24.888524590163936, "grad_norm": 5.420207500457764, "learning_rate": 1.759068178837953e-05, "loss": 0.7908, "step": 7591 }, { "epoch": 24.891803278688524, "grad_norm": 7.2941813468933105, "learning_rate": 1.758999044532959e-05, "loss": 0.746, "step": 7592 }, { "epoch": 24.895081967213116, "grad_norm": 5.284756660461426, "learning_rate": 1.758929901669461e-05, "loss": 0.595, "step": 7593 }, { "epoch": 24.898360655737704, "grad_norm": 6.061948299407959, "learning_rate": 1.75886075024824e-05, "loss": 0.7144, "step": 7594 }, { "epoch": 24.901639344262296, "grad_norm": 6.363539218902588, "learning_rate": 1.7587915902700748e-05, "loss": 0.7819, "step": 7595 }, { "epoch": 24.904918032786885, "grad_norm": 5.679895877838135, "learning_rate": 1.7587224217357456e-05, "loss": 0.8156, "step": 7596 }, { "epoch": 24.908196721311477, "grad_norm": 5.853405952453613, "learning_rate": 1.758653244646032e-05, "loss": 0.7307, "step": 7597 }, { "epoch": 24.911475409836065, "grad_norm": 5.371512413024902, "learning_rate": 1.7585840590017143e-05, "loss": 0.7388, "step": 7598 }, { "epoch": 24.914754098360657, "grad_norm": 8.065546035766602, "learning_rate": 1.7585148648035726e-05, "loss": 0.9734, "step": 7599 }, { "epoch": 24.918032786885245, "grad_norm": 6.512619495391846, "learning_rate": 1.7584456620523873e-05, "loss": 0.8671, "step": 7600 }, { "epoch": 24.921311475409837, "grad_norm": 4.316613674163818, "learning_rate": 1.7583764507489386e-05, "loss": 0.9159, "step": 7601 }, { "epoch": 24.924590163934425, "grad_norm": 4.626768589019775, "learning_rate": 1.7583072308940066e-05, "loss": 0.9241, "step": 7602 }, { "epoch": 24.927868852459017, "grad_norm": 4.516655445098877, "learning_rate": 1.7582380024883722e-05, "loss": 0.7119, "step": 7603 }, { "epoch": 24.931147540983606, "grad_norm": 6.230130672454834, "learning_rate": 1.758168765532816e-05, "loss": 1.0827, "step": 7604 }, { "epoch": 24.934426229508198, "grad_norm": 5.055631160736084, "learning_rate": 1.7580995200281187e-05, "loss": 0.8242, "step": 7605 }, { "epoch": 24.937704918032786, "grad_norm": 5.123078346252441, "learning_rate": 1.758030265975061e-05, "loss": 0.65, "step": 7606 }, { "epoch": 24.940983606557378, "grad_norm": 7.488478183746338, "learning_rate": 1.757961003374424e-05, "loss": 0.7418, "step": 7607 }, { "epoch": 24.944262295081966, "grad_norm": 5.268954277038574, "learning_rate": 1.7578917322269885e-05, "loss": 0.9789, "step": 7608 }, { "epoch": 24.947540983606558, "grad_norm": 7.025399684906006, "learning_rate": 1.757822452533536e-05, "loss": 0.7181, "step": 7609 }, { "epoch": 24.950819672131146, "grad_norm": 4.935618877410889, "learning_rate": 1.757753164294847e-05, "loss": 0.8075, "step": 7610 }, { "epoch": 24.95409836065574, "grad_norm": 6.712396621704102, "learning_rate": 1.7576838675117038e-05, "loss": 0.9368, "step": 7611 }, { "epoch": 24.957377049180327, "grad_norm": 4.341930389404297, "learning_rate": 1.7576145621848865e-05, "loss": 0.8624, "step": 7612 }, { "epoch": 24.96065573770492, "grad_norm": 5.761560440063477, "learning_rate": 1.7575452483151778e-05, "loss": 0.9752, "step": 7613 }, { "epoch": 24.963934426229507, "grad_norm": 4.8437981605529785, "learning_rate": 1.7574759259033586e-05, "loss": 0.9305, "step": 7614 }, { "epoch": 24.9672131147541, "grad_norm": 5.144506931304932, "learning_rate": 1.7574065949502107e-05, "loss": 1.0357, "step": 7615 }, { "epoch": 24.970491803278687, "grad_norm": 4.3898024559021, "learning_rate": 1.757337255456516e-05, "loss": 0.8149, "step": 7616 }, { "epoch": 24.97377049180328, "grad_norm": 4.207645893096924, "learning_rate": 1.757267907423056e-05, "loss": 0.7146, "step": 7617 }, { "epoch": 24.977049180327867, "grad_norm": 5.2263031005859375, "learning_rate": 1.757198550850613e-05, "loss": 0.7279, "step": 7618 }, { "epoch": 24.98032786885246, "grad_norm": 5.505634784698486, "learning_rate": 1.7571291857399696e-05, "loss": 0.7157, "step": 7619 }, { "epoch": 24.983606557377048, "grad_norm": 5.634832382202148, "learning_rate": 1.757059812091907e-05, "loss": 0.6902, "step": 7620 }, { "epoch": 24.98688524590164, "grad_norm": 4.476668357849121, "learning_rate": 1.7569904299072084e-05, "loss": 0.9674, "step": 7621 }, { "epoch": 24.990163934426228, "grad_norm": 6.083218097686768, "learning_rate": 1.756921039186655e-05, "loss": 0.8105, "step": 7622 }, { "epoch": 24.99344262295082, "grad_norm": 15.884045600891113, "learning_rate": 1.75685163993103e-05, "loss": 0.7178, "step": 7623 }, { "epoch": 24.99672131147541, "grad_norm": 4.786759376525879, "learning_rate": 1.756782232141116e-05, "loss": 0.8852, "step": 7624 }, { "epoch": 25.0, "grad_norm": 5.564047336578369, "learning_rate": 1.7567128158176955e-05, "loss": 0.6491, "step": 7625 }, { "epoch": 25.003278688524592, "grad_norm": 4.798304557800293, "learning_rate": 1.756643390961551e-05, "loss": 0.6637, "step": 7626 }, { "epoch": 25.00655737704918, "grad_norm": 5.352961540222168, "learning_rate": 1.7565739575734657e-05, "loss": 0.8096, "step": 7627 }, { "epoch": 25.009836065573772, "grad_norm": 5.594393730163574, "learning_rate": 1.7565045156542224e-05, "loss": 0.7506, "step": 7628 }, { "epoch": 25.01311475409836, "grad_norm": 4.680536270141602, "learning_rate": 1.756435065204604e-05, "loss": 0.5996, "step": 7629 }, { "epoch": 25.016393442622952, "grad_norm": 5.0904998779296875, "learning_rate": 1.756365606225394e-05, "loss": 0.8345, "step": 7630 }, { "epoch": 25.01967213114754, "grad_norm": 4.747957229614258, "learning_rate": 1.756296138717375e-05, "loss": 0.7688, "step": 7631 }, { "epoch": 25.022950819672133, "grad_norm": 4.838944435119629, "learning_rate": 1.7562266626813312e-05, "loss": 0.9326, "step": 7632 }, { "epoch": 25.02622950819672, "grad_norm": 5.1969990730285645, "learning_rate": 1.756157178118045e-05, "loss": 0.8815, "step": 7633 }, { "epoch": 25.029508196721313, "grad_norm": 4.507811546325684, "learning_rate": 1.7560876850283007e-05, "loss": 0.9275, "step": 7634 }, { "epoch": 25.0327868852459, "grad_norm": 5.759202480316162, "learning_rate": 1.7560181834128817e-05, "loss": 0.6152, "step": 7635 }, { "epoch": 25.036065573770493, "grad_norm": 5.631265640258789, "learning_rate": 1.7559486732725714e-05, "loss": 0.8047, "step": 7636 }, { "epoch": 25.03934426229508, "grad_norm": 4.682046890258789, "learning_rate": 1.755879154608154e-05, "loss": 0.8599, "step": 7637 }, { "epoch": 25.042622950819673, "grad_norm": 5.953055381774902, "learning_rate": 1.7558096274204128e-05, "loss": 0.6655, "step": 7638 }, { "epoch": 25.04590163934426, "grad_norm": 4.861567974090576, "learning_rate": 1.7557400917101324e-05, "loss": 0.9756, "step": 7639 }, { "epoch": 25.049180327868854, "grad_norm": 5.6869659423828125, "learning_rate": 1.755670547478097e-05, "loss": 0.7625, "step": 7640 }, { "epoch": 25.052459016393442, "grad_norm": 5.420792579650879, "learning_rate": 1.75560099472509e-05, "loss": 0.5529, "step": 7641 }, { "epoch": 25.055737704918034, "grad_norm": 5.285811901092529, "learning_rate": 1.7555314334518965e-05, "loss": 0.79, "step": 7642 }, { "epoch": 25.059016393442622, "grad_norm": 5.573616027832031, "learning_rate": 1.7554618636593004e-05, "loss": 0.6124, "step": 7643 }, { "epoch": 25.062295081967214, "grad_norm": 4.910099029541016, "learning_rate": 1.7553922853480862e-05, "loss": 0.9486, "step": 7644 }, { "epoch": 25.065573770491802, "grad_norm": 4.593179702758789, "learning_rate": 1.755322698519039e-05, "loss": 0.9992, "step": 7645 }, { "epoch": 25.068852459016394, "grad_norm": 6.534600257873535, "learning_rate": 1.7552531031729424e-05, "loss": 0.8139, "step": 7646 }, { "epoch": 25.072131147540983, "grad_norm": 5.1005682945251465, "learning_rate": 1.7551834993105825e-05, "loss": 0.7873, "step": 7647 }, { "epoch": 25.075409836065575, "grad_norm": 5.34377384185791, "learning_rate": 1.755113886932743e-05, "loss": 0.8223, "step": 7648 }, { "epoch": 25.078688524590163, "grad_norm": 5.253283977508545, "learning_rate": 1.7550442660402094e-05, "loss": 0.6828, "step": 7649 }, { "epoch": 25.081967213114755, "grad_norm": 6.1447930335998535, "learning_rate": 1.7549746366337665e-05, "loss": 0.6897, "step": 7650 }, { "epoch": 25.085245901639343, "grad_norm": 5.137468338012695, "learning_rate": 1.7549049987141997e-05, "loss": 0.9023, "step": 7651 }, { "epoch": 25.088524590163935, "grad_norm": 6.106145858764648, "learning_rate": 1.7548353522822946e-05, "loss": 0.6038, "step": 7652 }, { "epoch": 25.091803278688523, "grad_norm": 5.563755989074707, "learning_rate": 1.7547656973388353e-05, "loss": 0.895, "step": 7653 }, { "epoch": 25.095081967213115, "grad_norm": 5.92116641998291, "learning_rate": 1.7546960338846085e-05, "loss": 0.8086, "step": 7654 }, { "epoch": 25.098360655737704, "grad_norm": 4.957339286804199, "learning_rate": 1.7546263619203992e-05, "loss": 0.8527, "step": 7655 }, { "epoch": 25.101639344262296, "grad_norm": 6.590178489685059, "learning_rate": 1.7545566814469928e-05, "loss": 0.9071, "step": 7656 }, { "epoch": 25.104918032786884, "grad_norm": 5.381069183349609, "learning_rate": 1.7544869924651754e-05, "loss": 0.6346, "step": 7657 }, { "epoch": 25.108196721311476, "grad_norm": 5.028166770935059, "learning_rate": 1.754417294975733e-05, "loss": 0.8055, "step": 7658 }, { "epoch": 25.111475409836064, "grad_norm": 5.8579301834106445, "learning_rate": 1.754347588979451e-05, "loss": 0.743, "step": 7659 }, { "epoch": 25.114754098360656, "grad_norm": 4.737020492553711, "learning_rate": 1.754277874477115e-05, "loss": 0.7644, "step": 7660 }, { "epoch": 25.118032786885244, "grad_norm": 5.044886112213135, "learning_rate": 1.7542081514695122e-05, "loss": 0.6358, "step": 7661 }, { "epoch": 25.121311475409836, "grad_norm": 5.092952251434326, "learning_rate": 1.7541384199574285e-05, "loss": 0.7769, "step": 7662 }, { "epoch": 25.124590163934425, "grad_norm": 5.463547229766846, "learning_rate": 1.75406867994165e-05, "loss": 0.9083, "step": 7663 }, { "epoch": 25.127868852459017, "grad_norm": 23.57935905456543, "learning_rate": 1.7539989314229628e-05, "loss": 0.8371, "step": 7664 }, { "epoch": 25.131147540983605, "grad_norm": 5.409430503845215, "learning_rate": 1.753929174402154e-05, "loss": 0.9069, "step": 7665 }, { "epoch": 25.134426229508197, "grad_norm": 4.9894795417785645, "learning_rate": 1.7538594088800097e-05, "loss": 0.6172, "step": 7666 }, { "epoch": 25.137704918032785, "grad_norm": 5.822718620300293, "learning_rate": 1.7537896348573166e-05, "loss": 0.8317, "step": 7667 }, { "epoch": 25.140983606557377, "grad_norm": 6.040454387664795, "learning_rate": 1.7537198523348617e-05, "loss": 0.4939, "step": 7668 }, { "epoch": 25.14426229508197, "grad_norm": 4.799689292907715, "learning_rate": 1.753650061313432e-05, "loss": 0.5389, "step": 7669 }, { "epoch": 25.147540983606557, "grad_norm": 4.791372776031494, "learning_rate": 1.7535802617938143e-05, "loss": 0.646, "step": 7670 }, { "epoch": 25.15081967213115, "grad_norm": 5.045995235443115, "learning_rate": 1.7535104537767952e-05, "loss": 0.9794, "step": 7671 }, { "epoch": 25.154098360655738, "grad_norm": 7.299939155578613, "learning_rate": 1.7534406372631626e-05, "loss": 0.704, "step": 7672 }, { "epoch": 25.15737704918033, "grad_norm": 5.544373512268066, "learning_rate": 1.7533708122537034e-05, "loss": 0.627, "step": 7673 }, { "epoch": 25.160655737704918, "grad_norm": 7.191282272338867, "learning_rate": 1.7533009787492048e-05, "loss": 0.6675, "step": 7674 }, { "epoch": 25.16393442622951, "grad_norm": 4.652393341064453, "learning_rate": 1.7532311367504548e-05, "loss": 0.7147, "step": 7675 }, { "epoch": 25.167213114754098, "grad_norm": 5.343196392059326, "learning_rate": 1.75316128625824e-05, "loss": 0.6473, "step": 7676 }, { "epoch": 25.17049180327869, "grad_norm": 4.690279483795166, "learning_rate": 1.7530914272733493e-05, "loss": 0.7396, "step": 7677 }, { "epoch": 25.17377049180328, "grad_norm": 6.021032810211182, "learning_rate": 1.7530215597965692e-05, "loss": 0.6989, "step": 7678 }, { "epoch": 25.17704918032787, "grad_norm": 6.383136749267578, "learning_rate": 1.752951683828688e-05, "loss": 0.9255, "step": 7679 }, { "epoch": 25.18032786885246, "grad_norm": 5.687963008880615, "learning_rate": 1.7528817993704942e-05, "loss": 0.6826, "step": 7680 }, { "epoch": 25.18360655737705, "grad_norm": 5.895388126373291, "learning_rate": 1.752811906422775e-05, "loss": 0.7823, "step": 7681 }, { "epoch": 25.18688524590164, "grad_norm": 4.942539215087891, "learning_rate": 1.7527420049863192e-05, "loss": 0.5756, "step": 7682 }, { "epoch": 25.19016393442623, "grad_norm": 4.755619049072266, "learning_rate": 1.752672095061914e-05, "loss": 0.7444, "step": 7683 }, { "epoch": 25.19344262295082, "grad_norm": 5.139002323150635, "learning_rate": 1.7526021766503487e-05, "loss": 0.9016, "step": 7684 }, { "epoch": 25.19672131147541, "grad_norm": 5.7590227127075195, "learning_rate": 1.7525322497524114e-05, "loss": 0.8187, "step": 7685 }, { "epoch": 25.2, "grad_norm": 5.680117130279541, "learning_rate": 1.7524623143688905e-05, "loss": 0.8747, "step": 7686 }, { "epoch": 25.20327868852459, "grad_norm": 5.067161560058594, "learning_rate": 1.7523923705005742e-05, "loss": 0.7033, "step": 7687 }, { "epoch": 25.20655737704918, "grad_norm": 5.199286937713623, "learning_rate": 1.7523224181482522e-05, "loss": 0.8354, "step": 7688 }, { "epoch": 25.20983606557377, "grad_norm": 5.788562774658203, "learning_rate": 1.7522524573127127e-05, "loss": 0.8855, "step": 7689 }, { "epoch": 25.21311475409836, "grad_norm": 6.396856307983398, "learning_rate": 1.7521824879947446e-05, "loss": 0.9612, "step": 7690 }, { "epoch": 25.21639344262295, "grad_norm": 6.214943885803223, "learning_rate": 1.7521125101951365e-05, "loss": 0.7281, "step": 7691 }, { "epoch": 25.21967213114754, "grad_norm": 5.292761325836182, "learning_rate": 1.752042523914678e-05, "loss": 0.9319, "step": 7692 }, { "epoch": 25.222950819672132, "grad_norm": 5.234715461730957, "learning_rate": 1.7519725291541584e-05, "loss": 1.0149, "step": 7693 }, { "epoch": 25.22622950819672, "grad_norm": 5.684630393981934, "learning_rate": 1.7519025259143663e-05, "loss": 0.8886, "step": 7694 }, { "epoch": 25.229508196721312, "grad_norm": 6.057587623596191, "learning_rate": 1.7518325141960916e-05, "loss": 0.6003, "step": 7695 }, { "epoch": 25.2327868852459, "grad_norm": 4.383965492248535, "learning_rate": 1.7517624940001236e-05, "loss": 0.8474, "step": 7696 }, { "epoch": 25.236065573770492, "grad_norm": 4.699536323547363, "learning_rate": 1.7516924653272518e-05, "loss": 0.677, "step": 7697 }, { "epoch": 25.23934426229508, "grad_norm": 5.295085906982422, "learning_rate": 1.751622428178266e-05, "loss": 0.7991, "step": 7698 }, { "epoch": 25.242622950819673, "grad_norm": 5.926428318023682, "learning_rate": 1.751552382553956e-05, "loss": 0.9532, "step": 7699 }, { "epoch": 25.24590163934426, "grad_norm": 4.623112678527832, "learning_rate": 1.751482328455111e-05, "loss": 0.9247, "step": 7700 }, { "epoch": 25.249180327868853, "grad_norm": 6.512124538421631, "learning_rate": 1.7514122658825217e-05, "loss": 0.5078, "step": 7701 }, { "epoch": 25.25245901639344, "grad_norm": 6.762616157531738, "learning_rate": 1.7513421948369775e-05, "loss": 0.8674, "step": 7702 }, { "epoch": 25.255737704918033, "grad_norm": 5.067263126373291, "learning_rate": 1.751272115319269e-05, "loss": 0.835, "step": 7703 }, { "epoch": 25.25901639344262, "grad_norm": 5.096512794494629, "learning_rate": 1.7512020273301868e-05, "loss": 0.7332, "step": 7704 }, { "epoch": 25.262295081967213, "grad_norm": 4.529747486114502, "learning_rate": 1.7511319308705198e-05, "loss": 0.6872, "step": 7705 }, { "epoch": 25.2655737704918, "grad_norm": 4.7265214920043945, "learning_rate": 1.75106182594106e-05, "loss": 0.879, "step": 7706 }, { "epoch": 25.268852459016394, "grad_norm": 5.096888542175293, "learning_rate": 1.750991712542597e-05, "loss": 0.6878, "step": 7707 }, { "epoch": 25.272131147540982, "grad_norm": 7.753660202026367, "learning_rate": 1.7509215906759215e-05, "loss": 0.7765, "step": 7708 }, { "epoch": 25.275409836065574, "grad_norm": 4.645839691162109, "learning_rate": 1.7508514603418244e-05, "loss": 0.6428, "step": 7709 }, { "epoch": 25.278688524590162, "grad_norm": 6.455714702606201, "learning_rate": 1.7507813215410966e-05, "loss": 0.6295, "step": 7710 }, { "epoch": 25.281967213114754, "grad_norm": 5.132798194885254, "learning_rate": 1.7507111742745282e-05, "loss": 0.6683, "step": 7711 }, { "epoch": 25.285245901639342, "grad_norm": 4.4267473220825195, "learning_rate": 1.7506410185429112e-05, "loss": 0.9387, "step": 7712 }, { "epoch": 25.288524590163934, "grad_norm": 6.926528453826904, "learning_rate": 1.7505708543470362e-05, "loss": 0.5508, "step": 7713 }, { "epoch": 25.291803278688526, "grad_norm": 5.8421406745910645, "learning_rate": 1.7505006816876944e-05, "loss": 0.735, "step": 7714 }, { "epoch": 25.295081967213115, "grad_norm": 6.896578311920166, "learning_rate": 1.7504305005656772e-05, "loss": 0.6793, "step": 7715 }, { "epoch": 25.298360655737707, "grad_norm": 6.177912712097168, "learning_rate": 1.750360310981776e-05, "loss": 0.5294, "step": 7716 }, { "epoch": 25.301639344262295, "grad_norm": 5.26971435546875, "learning_rate": 1.7502901129367814e-05, "loss": 0.6924, "step": 7717 }, { "epoch": 25.304918032786887, "grad_norm": 5.017978191375732, "learning_rate": 1.750219906431486e-05, "loss": 0.7432, "step": 7718 }, { "epoch": 25.308196721311475, "grad_norm": 6.494349002838135, "learning_rate": 1.7501496914666814e-05, "loss": 0.7057, "step": 7719 }, { "epoch": 25.311475409836067, "grad_norm": 5.293187618255615, "learning_rate": 1.750079468043159e-05, "loss": 0.7355, "step": 7720 }, { "epoch": 25.314754098360655, "grad_norm": 5.776474475860596, "learning_rate": 1.7500092361617105e-05, "loss": 0.6741, "step": 7721 }, { "epoch": 25.318032786885247, "grad_norm": 4.7321577072143555, "learning_rate": 1.7499389958231284e-05, "loss": 0.8984, "step": 7722 }, { "epoch": 25.321311475409836, "grad_norm": 4.792735576629639, "learning_rate": 1.749868747028204e-05, "loss": 0.8232, "step": 7723 }, { "epoch": 25.324590163934428, "grad_norm": 5.150420188903809, "learning_rate": 1.74979848977773e-05, "loss": 0.8411, "step": 7724 }, { "epoch": 25.327868852459016, "grad_norm": 4.898209095001221, "learning_rate": 1.749728224072498e-05, "loss": 0.6509, "step": 7725 }, { "epoch": 25.331147540983608, "grad_norm": 5.384110450744629, "learning_rate": 1.7496579499133016e-05, "loss": 0.8075, "step": 7726 }, { "epoch": 25.334426229508196, "grad_norm": 5.421907901763916, "learning_rate": 1.7495876673009314e-05, "loss": 0.6509, "step": 7727 }, { "epoch": 25.337704918032788, "grad_norm": 5.378060340881348, "learning_rate": 1.7495173762361817e-05, "loss": 0.9495, "step": 7728 }, { "epoch": 25.340983606557376, "grad_norm": 4.916167259216309, "learning_rate": 1.749447076719844e-05, "loss": 0.9749, "step": 7729 }, { "epoch": 25.34426229508197, "grad_norm": 5.153604984283447, "learning_rate": 1.749376768752711e-05, "loss": 0.79, "step": 7730 }, { "epoch": 25.347540983606557, "grad_norm": 6.322790145874023, "learning_rate": 1.749306452335576e-05, "loss": 0.9529, "step": 7731 }, { "epoch": 25.35081967213115, "grad_norm": 4.970439434051514, "learning_rate": 1.749236127469232e-05, "loss": 0.8153, "step": 7732 }, { "epoch": 25.354098360655737, "grad_norm": 4.630865097045898, "learning_rate": 1.7491657941544714e-05, "loss": 0.8212, "step": 7733 }, { "epoch": 25.35737704918033, "grad_norm": 4.826854228973389, "learning_rate": 1.7490954523920872e-05, "loss": 1.0285, "step": 7734 }, { "epoch": 25.360655737704917, "grad_norm": 4.656881332397461, "learning_rate": 1.7490251021828734e-05, "loss": 0.9114, "step": 7735 }, { "epoch": 25.36393442622951, "grad_norm": 5.391351222991943, "learning_rate": 1.7489547435276223e-05, "loss": 0.7244, "step": 7736 }, { "epoch": 25.367213114754097, "grad_norm": 5.224879264831543, "learning_rate": 1.748884376427128e-05, "loss": 0.9439, "step": 7737 }, { "epoch": 25.37049180327869, "grad_norm": 5.173906326293945, "learning_rate": 1.7488140008821837e-05, "loss": 0.5806, "step": 7738 }, { "epoch": 25.373770491803278, "grad_norm": 5.106823444366455, "learning_rate": 1.7487436168935832e-05, "loss": 0.7614, "step": 7739 }, { "epoch": 25.37704918032787, "grad_norm": 5.498600482940674, "learning_rate": 1.7486732244621195e-05, "loss": 0.7495, "step": 7740 }, { "epoch": 25.380327868852458, "grad_norm": 5.779775619506836, "learning_rate": 1.748602823588587e-05, "loss": 0.6393, "step": 7741 }, { "epoch": 25.38360655737705, "grad_norm": 5.103318214416504, "learning_rate": 1.7485324142737793e-05, "loss": 0.8024, "step": 7742 }, { "epoch": 25.386885245901638, "grad_norm": 5.262899398803711, "learning_rate": 1.7484619965184903e-05, "loss": 0.7325, "step": 7743 }, { "epoch": 25.39016393442623, "grad_norm": 4.859023094177246, "learning_rate": 1.748391570323514e-05, "loss": 0.5432, "step": 7744 }, { "epoch": 25.39344262295082, "grad_norm": 6.544926166534424, "learning_rate": 1.7483211356896447e-05, "loss": 0.6975, "step": 7745 }, { "epoch": 25.39672131147541, "grad_norm": 26.32547378540039, "learning_rate": 1.748250692617677e-05, "loss": 0.8108, "step": 7746 }, { "epoch": 25.4, "grad_norm": 5.5319671630859375, "learning_rate": 1.748180241108404e-05, "loss": 0.7485, "step": 7747 }, { "epoch": 25.40327868852459, "grad_norm": 4.887552738189697, "learning_rate": 1.7481097811626214e-05, "loss": 0.9397, "step": 7748 }, { "epoch": 25.40655737704918, "grad_norm": 5.086965560913086, "learning_rate": 1.7480393127811228e-05, "loss": 0.9294, "step": 7749 }, { "epoch": 25.40983606557377, "grad_norm": 7.560501575469971, "learning_rate": 1.7479688359647037e-05, "loss": 1.0178, "step": 7750 }, { "epoch": 25.41311475409836, "grad_norm": 5.0052490234375, "learning_rate": 1.7478983507141577e-05, "loss": 0.582, "step": 7751 }, { "epoch": 25.41639344262295, "grad_norm": 5.348884582519531, "learning_rate": 1.7478278570302802e-05, "loss": 0.8879, "step": 7752 }, { "epoch": 25.41967213114754, "grad_norm": 5.149572372436523, "learning_rate": 1.7477573549138666e-05, "loss": 0.7451, "step": 7753 }, { "epoch": 25.42295081967213, "grad_norm": 6.379999160766602, "learning_rate": 1.747686844365711e-05, "loss": 0.7809, "step": 7754 }, { "epoch": 25.42622950819672, "grad_norm": 5.746959209442139, "learning_rate": 1.7476163253866087e-05, "loss": 0.6544, "step": 7755 }, { "epoch": 25.42950819672131, "grad_norm": 5.627737045288086, "learning_rate": 1.7475457979773552e-05, "loss": 0.7046, "step": 7756 }, { "epoch": 25.432786885245903, "grad_norm": 9.259369850158691, "learning_rate": 1.7474752621387456e-05, "loss": 0.6387, "step": 7757 }, { "epoch": 25.43606557377049, "grad_norm": 21.549943923950195, "learning_rate": 1.747404717871575e-05, "loss": 0.732, "step": 7758 }, { "epoch": 25.439344262295084, "grad_norm": 7.389974594116211, "learning_rate": 1.7473341651766394e-05, "loss": 0.6484, "step": 7759 }, { "epoch": 25.442622950819672, "grad_norm": 5.713603496551514, "learning_rate": 1.747263604054734e-05, "loss": 0.8431, "step": 7760 }, { "epoch": 25.445901639344264, "grad_norm": 6.137709140777588, "learning_rate": 1.7471930345066542e-05, "loss": 0.8179, "step": 7761 }, { "epoch": 25.449180327868852, "grad_norm": 4.5408477783203125, "learning_rate": 1.7471224565331966e-05, "loss": 0.9102, "step": 7762 }, { "epoch": 25.452459016393444, "grad_norm": 4.995704174041748, "learning_rate": 1.747051870135156e-05, "loss": 0.7739, "step": 7763 }, { "epoch": 25.455737704918032, "grad_norm": 6.607404708862305, "learning_rate": 1.746981275313329e-05, "loss": 0.7104, "step": 7764 }, { "epoch": 25.459016393442624, "grad_norm": 6.6736674308776855, "learning_rate": 1.7469106720685113e-05, "loss": 0.8444, "step": 7765 }, { "epoch": 25.462295081967213, "grad_norm": 5.741933822631836, "learning_rate": 1.7468400604014997e-05, "loss": 0.783, "step": 7766 }, { "epoch": 25.465573770491805, "grad_norm": 6.4559173583984375, "learning_rate": 1.7467694403130893e-05, "loss": 0.9324, "step": 7767 }, { "epoch": 25.468852459016393, "grad_norm": 4.917204856872559, "learning_rate": 1.7466988118040775e-05, "loss": 0.8795, "step": 7768 }, { "epoch": 25.472131147540985, "grad_norm": 4.956875324249268, "learning_rate": 1.74662817487526e-05, "loss": 0.7496, "step": 7769 }, { "epoch": 25.475409836065573, "grad_norm": 5.866541385650635, "learning_rate": 1.7465575295274333e-05, "loss": 0.7177, "step": 7770 }, { "epoch": 25.478688524590165, "grad_norm": 5.342478275299072, "learning_rate": 1.7464868757613948e-05, "loss": 0.9722, "step": 7771 }, { "epoch": 25.481967213114753, "grad_norm": 4.991090297698975, "learning_rate": 1.74641621357794e-05, "loss": 0.9708, "step": 7772 }, { "epoch": 25.485245901639345, "grad_norm": 7.302638530731201, "learning_rate": 1.7463455429778666e-05, "loss": 0.8672, "step": 7773 }, { "epoch": 25.488524590163934, "grad_norm": 6.09715461730957, "learning_rate": 1.746274863961971e-05, "loss": 0.6577, "step": 7774 }, { "epoch": 25.491803278688526, "grad_norm": 6.143939018249512, "learning_rate": 1.746204176531051e-05, "loss": 0.7028, "step": 7775 }, { "epoch": 25.495081967213114, "grad_norm": 4.708550930023193, "learning_rate": 1.7461334806859023e-05, "loss": 0.7495, "step": 7776 }, { "epoch": 25.498360655737706, "grad_norm": 5.51059627532959, "learning_rate": 1.746062776427323e-05, "loss": 0.8618, "step": 7777 }, { "epoch": 25.501639344262294, "grad_norm": 5.865034580230713, "learning_rate": 1.7459920637561107e-05, "loss": 0.7617, "step": 7778 }, { "epoch": 25.504918032786886, "grad_norm": 5.890259265899658, "learning_rate": 1.7459213426730617e-05, "loss": 0.7636, "step": 7779 }, { "epoch": 25.508196721311474, "grad_norm": 4.8325653076171875, "learning_rate": 1.745850613178974e-05, "loss": 0.8471, "step": 7780 }, { "epoch": 25.511475409836066, "grad_norm": 6.218537330627441, "learning_rate": 1.7457798752746453e-05, "loss": 0.7613, "step": 7781 }, { "epoch": 25.514754098360655, "grad_norm": 6.378767967224121, "learning_rate": 1.745709128960873e-05, "loss": 0.7638, "step": 7782 }, { "epoch": 25.518032786885247, "grad_norm": 7.495121002197266, "learning_rate": 1.7456383742384552e-05, "loss": 0.5391, "step": 7783 }, { "epoch": 25.521311475409835, "grad_norm": 5.066561698913574, "learning_rate": 1.7455676111081894e-05, "loss": 0.746, "step": 7784 }, { "epoch": 25.524590163934427, "grad_norm": 5.215322971343994, "learning_rate": 1.7454968395708735e-05, "loss": 0.6823, "step": 7785 }, { "epoch": 25.527868852459015, "grad_norm": 5.793514251708984, "learning_rate": 1.7454260596273055e-05, "loss": 0.616, "step": 7786 }, { "epoch": 25.531147540983607, "grad_norm": 6.57490873336792, "learning_rate": 1.7453552712782837e-05, "loss": 0.7374, "step": 7787 }, { "epoch": 25.534426229508195, "grad_norm": 6.747349739074707, "learning_rate": 1.7452844745246062e-05, "loss": 0.6516, "step": 7788 }, { "epoch": 25.537704918032787, "grad_norm": 5.902955532073975, "learning_rate": 1.7452136693670714e-05, "loss": 0.5214, "step": 7789 }, { "epoch": 25.540983606557376, "grad_norm": 5.657497406005859, "learning_rate": 1.7451428558064778e-05, "loss": 0.7328, "step": 7790 }, { "epoch": 25.544262295081968, "grad_norm": 6.040675163269043, "learning_rate": 1.7450720338436236e-05, "loss": 0.5818, "step": 7791 }, { "epoch": 25.547540983606556, "grad_norm": 5.34188175201416, "learning_rate": 1.7450012034793074e-05, "loss": 0.7333, "step": 7792 }, { "epoch": 25.550819672131148, "grad_norm": 5.573256015777588, "learning_rate": 1.744930364714328e-05, "loss": 0.8152, "step": 7793 }, { "epoch": 25.554098360655736, "grad_norm": 5.3141608238220215, "learning_rate": 1.7448595175494846e-05, "loss": 0.9508, "step": 7794 }, { "epoch": 25.557377049180328, "grad_norm": 4.529529571533203, "learning_rate": 1.744788661985575e-05, "loss": 1.0048, "step": 7795 }, { "epoch": 25.560655737704916, "grad_norm": 11.861809730529785, "learning_rate": 1.7447177980233996e-05, "loss": 0.7798, "step": 7796 }, { "epoch": 25.56393442622951, "grad_norm": 6.901881217956543, "learning_rate": 1.7446469256637564e-05, "loss": 0.5868, "step": 7797 }, { "epoch": 25.567213114754097, "grad_norm": 5.9233574867248535, "learning_rate": 1.7445760449074448e-05, "loss": 0.7776, "step": 7798 }, { "epoch": 25.57049180327869, "grad_norm": 4.780013561248779, "learning_rate": 1.744505155755264e-05, "loss": 1.2426, "step": 7799 }, { "epoch": 25.57377049180328, "grad_norm": 5.92659854888916, "learning_rate": 1.7444342582080137e-05, "loss": 0.8457, "step": 7800 }, { "epoch": 25.57704918032787, "grad_norm": 5.221203327178955, "learning_rate": 1.7443633522664933e-05, "loss": 0.7883, "step": 7801 }, { "epoch": 25.58032786885246, "grad_norm": 6.363925457000732, "learning_rate": 1.744292437931502e-05, "loss": 0.8573, "step": 7802 }, { "epoch": 25.58360655737705, "grad_norm": 5.358489513397217, "learning_rate": 1.7442215152038397e-05, "loss": 0.9949, "step": 7803 }, { "epoch": 25.58688524590164, "grad_norm": 5.4874444007873535, "learning_rate": 1.7441505840843057e-05, "loss": 0.783, "step": 7804 }, { "epoch": 25.59016393442623, "grad_norm": 5.293353080749512, "learning_rate": 1.7440796445737004e-05, "loss": 1.0386, "step": 7805 }, { "epoch": 25.59344262295082, "grad_norm": 4.573122501373291, "learning_rate": 1.7440086966728235e-05, "loss": 0.9307, "step": 7806 }, { "epoch": 25.59672131147541, "grad_norm": 4.99511194229126, "learning_rate": 1.7439377403824748e-05, "loss": 0.871, "step": 7807 }, { "epoch": 25.6, "grad_norm": 6.083991527557373, "learning_rate": 1.7438667757034547e-05, "loss": 0.6194, "step": 7808 }, { "epoch": 25.60327868852459, "grad_norm": 6.720706462860107, "learning_rate": 1.743795802636563e-05, "loss": 0.6215, "step": 7809 }, { "epoch": 25.60655737704918, "grad_norm": 5.509552955627441, "learning_rate": 1.7437248211826007e-05, "loss": 0.8242, "step": 7810 }, { "epoch": 25.60983606557377, "grad_norm": 5.780636787414551, "learning_rate": 1.7436538313423673e-05, "loss": 0.7871, "step": 7811 }, { "epoch": 25.613114754098362, "grad_norm": 5.602629661560059, "learning_rate": 1.743582833116664e-05, "loss": 0.9539, "step": 7812 }, { "epoch": 25.61639344262295, "grad_norm": 4.891325950622559, "learning_rate": 1.7435118265062912e-05, "loss": 0.8559, "step": 7813 }, { "epoch": 25.619672131147542, "grad_norm": 4.737668514251709, "learning_rate": 1.7434408115120494e-05, "loss": 0.631, "step": 7814 }, { "epoch": 25.62295081967213, "grad_norm": 6.234978199005127, "learning_rate": 1.7433697881347394e-05, "loss": 0.8621, "step": 7815 }, { "epoch": 25.626229508196722, "grad_norm": 6.243137359619141, "learning_rate": 1.7432987563751623e-05, "loss": 0.7365, "step": 7816 }, { "epoch": 25.62950819672131, "grad_norm": 5.184805870056152, "learning_rate": 1.7432277162341186e-05, "loss": 0.9064, "step": 7817 }, { "epoch": 25.632786885245903, "grad_norm": 5.130687236785889, "learning_rate": 1.74315666771241e-05, "loss": 0.6875, "step": 7818 }, { "epoch": 25.63606557377049, "grad_norm": 5.983776569366455, "learning_rate": 1.743085610810837e-05, "loss": 0.827, "step": 7819 }, { "epoch": 25.639344262295083, "grad_norm": 7.454291820526123, "learning_rate": 1.7430145455302013e-05, "loss": 0.8808, "step": 7820 }, { "epoch": 25.64262295081967, "grad_norm": 5.1896491050720215, "learning_rate": 1.742943471871304e-05, "loss": 0.8393, "step": 7821 }, { "epoch": 25.645901639344263, "grad_norm": 5.823718547821045, "learning_rate": 1.7428723898349464e-05, "loss": 0.526, "step": 7822 }, { "epoch": 25.64918032786885, "grad_norm": 5.632214069366455, "learning_rate": 1.7428012994219304e-05, "loss": 0.8491, "step": 7823 }, { "epoch": 25.652459016393443, "grad_norm": 6.455202579498291, "learning_rate": 1.7427302006330572e-05, "loss": 0.709, "step": 7824 }, { "epoch": 25.65573770491803, "grad_norm": 5.532683849334717, "learning_rate": 1.7426590934691292e-05, "loss": 0.8332, "step": 7825 }, { "epoch": 25.659016393442624, "grad_norm": 5.125144958496094, "learning_rate": 1.7425879779309473e-05, "loss": 0.744, "step": 7826 }, { "epoch": 25.662295081967212, "grad_norm": 6.028670787811279, "learning_rate": 1.7425168540193144e-05, "loss": 0.7677, "step": 7827 }, { "epoch": 25.665573770491804, "grad_norm": 5.2336626052856445, "learning_rate": 1.7424457217350316e-05, "loss": 1.025, "step": 7828 }, { "epoch": 25.668852459016392, "grad_norm": 6.011702060699463, "learning_rate": 1.742374581078901e-05, "loss": 1.0532, "step": 7829 }, { "epoch": 25.672131147540984, "grad_norm": 5.318094253540039, "learning_rate": 1.7423034320517256e-05, "loss": 0.6586, "step": 7830 }, { "epoch": 25.675409836065572, "grad_norm": 5.489062309265137, "learning_rate": 1.742232274654307e-05, "loss": 0.7333, "step": 7831 }, { "epoch": 25.678688524590164, "grad_norm": 5.218588829040527, "learning_rate": 1.742161108887448e-05, "loss": 0.8116, "step": 7832 }, { "epoch": 25.681967213114753, "grad_norm": 5.152379989624023, "learning_rate": 1.7420899347519504e-05, "loss": 1.0606, "step": 7833 }, { "epoch": 25.685245901639345, "grad_norm": 5.987560272216797, "learning_rate": 1.7420187522486176e-05, "loss": 0.5514, "step": 7834 }, { "epoch": 25.688524590163933, "grad_norm": 4.5596113204956055, "learning_rate": 1.7419475613782516e-05, "loss": 0.8766, "step": 7835 }, { "epoch": 25.691803278688525, "grad_norm": 5.665600299835205, "learning_rate": 1.7418763621416556e-05, "loss": 0.6309, "step": 7836 }, { "epoch": 25.695081967213113, "grad_norm": 4.803447246551514, "learning_rate": 1.7418051545396323e-05, "loss": 0.7124, "step": 7837 }, { "epoch": 25.698360655737705, "grad_norm": 7.026984214782715, "learning_rate": 1.7417339385729846e-05, "loss": 0.8528, "step": 7838 }, { "epoch": 25.701639344262293, "grad_norm": 6.118686199188232, "learning_rate": 1.7416627142425154e-05, "loss": 0.7001, "step": 7839 }, { "epoch": 25.704918032786885, "grad_norm": 5.520501136779785, "learning_rate": 1.741591481549028e-05, "loss": 0.8732, "step": 7840 }, { "epoch": 25.708196721311474, "grad_norm": 5.322074890136719, "learning_rate": 1.7415202404933256e-05, "loss": 0.7836, "step": 7841 }, { "epoch": 25.711475409836066, "grad_norm": 5.215490818023682, "learning_rate": 1.7414489910762114e-05, "loss": 0.7111, "step": 7842 }, { "epoch": 25.714754098360658, "grad_norm": 11.29732894897461, "learning_rate": 1.741377733298489e-05, "loss": 0.9688, "step": 7843 }, { "epoch": 25.718032786885246, "grad_norm": 5.642852783203125, "learning_rate": 1.7413064671609618e-05, "loss": 1.0308, "step": 7844 }, { "epoch": 25.721311475409838, "grad_norm": 5.643774509429932, "learning_rate": 1.7412351926644336e-05, "loss": 0.799, "step": 7845 }, { "epoch": 25.724590163934426, "grad_norm": 5.251443386077881, "learning_rate": 1.7411639098097076e-05, "loss": 0.8036, "step": 7846 }, { "epoch": 25.727868852459018, "grad_norm": 11.914447784423828, "learning_rate": 1.741092618597588e-05, "loss": 0.743, "step": 7847 }, { "epoch": 25.731147540983606, "grad_norm": 5.240929126739502, "learning_rate": 1.7410213190288788e-05, "loss": 0.6438, "step": 7848 }, { "epoch": 25.7344262295082, "grad_norm": 5.659868240356445, "learning_rate": 1.7409500111043834e-05, "loss": 0.7587, "step": 7849 }, { "epoch": 25.737704918032787, "grad_norm": 4.616711616516113, "learning_rate": 1.7408786948249065e-05, "loss": 0.655, "step": 7850 }, { "epoch": 25.74098360655738, "grad_norm": 5.887938022613525, "learning_rate": 1.740807370191252e-05, "loss": 0.791, "step": 7851 }, { "epoch": 25.744262295081967, "grad_norm": 7.067307472229004, "learning_rate": 1.740736037204224e-05, "loss": 0.7097, "step": 7852 }, { "epoch": 25.74754098360656, "grad_norm": 5.696677207946777, "learning_rate": 1.740664695864627e-05, "loss": 0.888, "step": 7853 }, { "epoch": 25.750819672131147, "grad_norm": 5.328399181365967, "learning_rate": 1.7405933461732658e-05, "loss": 0.7894, "step": 7854 }, { "epoch": 25.75409836065574, "grad_norm": 5.616023540496826, "learning_rate": 1.740521988130944e-05, "loss": 0.5634, "step": 7855 }, { "epoch": 25.757377049180327, "grad_norm": 5.266993522644043, "learning_rate": 1.7404506217384672e-05, "loss": 0.8376, "step": 7856 }, { "epoch": 25.76065573770492, "grad_norm": 5.794459819793701, "learning_rate": 1.7403792469966397e-05, "loss": 0.8218, "step": 7857 }, { "epoch": 25.763934426229508, "grad_norm": 5.5143561363220215, "learning_rate": 1.7403078639062668e-05, "loss": 0.6429, "step": 7858 }, { "epoch": 25.7672131147541, "grad_norm": 5.199601650238037, "learning_rate": 1.7402364724681524e-05, "loss": 0.8195, "step": 7859 }, { "epoch": 25.770491803278688, "grad_norm": 5.73212194442749, "learning_rate": 1.7401650726831023e-05, "loss": 0.8764, "step": 7860 }, { "epoch": 25.77377049180328, "grad_norm": 5.489308834075928, "learning_rate": 1.7400936645519215e-05, "loss": 0.7285, "step": 7861 }, { "epoch": 25.777049180327868, "grad_norm": 4.5998687744140625, "learning_rate": 1.7400222480754152e-05, "loss": 0.9325, "step": 7862 }, { "epoch": 25.78032786885246, "grad_norm": 5.813803672790527, "learning_rate": 1.7399508232543883e-05, "loss": 0.7022, "step": 7863 }, { "epoch": 25.78360655737705, "grad_norm": 5.398983478546143, "learning_rate": 1.7398793900896468e-05, "loss": 0.7432, "step": 7864 }, { "epoch": 25.78688524590164, "grad_norm": 4.82870626449585, "learning_rate": 1.739807948581996e-05, "loss": 0.8185, "step": 7865 }, { "epoch": 25.79016393442623, "grad_norm": 5.3334269523620605, "learning_rate": 1.7397364987322412e-05, "loss": 0.8699, "step": 7866 }, { "epoch": 25.79344262295082, "grad_norm": 4.7138991355896, "learning_rate": 1.7396650405411882e-05, "loss": 0.989, "step": 7867 }, { "epoch": 25.79672131147541, "grad_norm": 5.466228008270264, "learning_rate": 1.7395935740096426e-05, "loss": 1.0115, "step": 7868 }, { "epoch": 25.8, "grad_norm": 5.193724155426025, "learning_rate": 1.739522099138411e-05, "loss": 0.6762, "step": 7869 }, { "epoch": 25.80327868852459, "grad_norm": 4.2386980056762695, "learning_rate": 1.7394506159282984e-05, "loss": 0.7551, "step": 7870 }, { "epoch": 25.80655737704918, "grad_norm": 4.486341953277588, "learning_rate": 1.7393791243801115e-05, "loss": 0.8395, "step": 7871 }, { "epoch": 25.80983606557377, "grad_norm": 4.998424530029297, "learning_rate": 1.7393076244946562e-05, "loss": 0.5785, "step": 7872 }, { "epoch": 25.81311475409836, "grad_norm": 5.149435520172119, "learning_rate": 1.7392361162727385e-05, "loss": 0.8459, "step": 7873 }, { "epoch": 25.81639344262295, "grad_norm": 6.8728437423706055, "learning_rate": 1.7391645997151652e-05, "loss": 0.8093, "step": 7874 }, { "epoch": 25.81967213114754, "grad_norm": 5.175372123718262, "learning_rate": 1.7390930748227423e-05, "loss": 0.6785, "step": 7875 }, { "epoch": 25.82295081967213, "grad_norm": 4.868385314941406, "learning_rate": 1.739021541596277e-05, "loss": 0.9643, "step": 7876 }, { "epoch": 25.82622950819672, "grad_norm": 6.908138275146484, "learning_rate": 1.738950000036575e-05, "loss": 0.8068, "step": 7877 }, { "epoch": 25.82950819672131, "grad_norm": 6.057936668395996, "learning_rate": 1.7388784501444435e-05, "loss": 0.7238, "step": 7878 }, { "epoch": 25.832786885245902, "grad_norm": 7.161067008972168, "learning_rate": 1.7388068919206893e-05, "loss": 0.7674, "step": 7879 }, { "epoch": 25.83606557377049, "grad_norm": 5.185455322265625, "learning_rate": 1.738735325366119e-05, "loss": 0.5841, "step": 7880 }, { "epoch": 25.839344262295082, "grad_norm": 4.9659223556518555, "learning_rate": 1.73866375048154e-05, "loss": 0.7994, "step": 7881 }, { "epoch": 25.84262295081967, "grad_norm": 5.389986991882324, "learning_rate": 1.7385921672677595e-05, "loss": 0.5482, "step": 7882 }, { "epoch": 25.845901639344262, "grad_norm": 5.970175266265869, "learning_rate": 1.738520575725584e-05, "loss": 0.81, "step": 7883 }, { "epoch": 25.84918032786885, "grad_norm": 4.875625133514404, "learning_rate": 1.738448975855821e-05, "loss": 0.67, "step": 7884 }, { "epoch": 25.852459016393443, "grad_norm": 5.573169708251953, "learning_rate": 1.7383773676592782e-05, "loss": 0.8756, "step": 7885 }, { "epoch": 25.855737704918035, "grad_norm": 5.723033428192139, "learning_rate": 1.7383057511367633e-05, "loss": 0.5992, "step": 7886 }, { "epoch": 25.859016393442623, "grad_norm": 4.62445592880249, "learning_rate": 1.7382341262890827e-05, "loss": 0.6639, "step": 7887 }, { "epoch": 25.862295081967215, "grad_norm": 7.390789985656738, "learning_rate": 1.738162493117045e-05, "loss": 0.7148, "step": 7888 }, { "epoch": 25.865573770491803, "grad_norm": 5.7139153480529785, "learning_rate": 1.7380908516214577e-05, "loss": 0.7708, "step": 7889 }, { "epoch": 25.868852459016395, "grad_norm": 5.534101963043213, "learning_rate": 1.7380192018031287e-05, "loss": 1.0436, "step": 7890 }, { "epoch": 25.872131147540983, "grad_norm": 5.537330627441406, "learning_rate": 1.7379475436628656e-05, "loss": 0.8713, "step": 7891 }, { "epoch": 25.875409836065575, "grad_norm": 4.564701080322266, "learning_rate": 1.7378758772014772e-05, "loss": 0.8894, "step": 7892 }, { "epoch": 25.878688524590164, "grad_norm": 6.198728084564209, "learning_rate": 1.7378042024197705e-05, "loss": 0.6113, "step": 7893 }, { "epoch": 25.881967213114756, "grad_norm": 5.415867328643799, "learning_rate": 1.7377325193185547e-05, "loss": 0.5934, "step": 7894 }, { "epoch": 25.885245901639344, "grad_norm": 5.179255962371826, "learning_rate": 1.7376608278986375e-05, "loss": 1.0753, "step": 7895 }, { "epoch": 25.888524590163936, "grad_norm": 5.319212913513184, "learning_rate": 1.7375891281608276e-05, "loss": 0.7167, "step": 7896 }, { "epoch": 25.891803278688524, "grad_norm": 5.784801959991455, "learning_rate": 1.737517420105933e-05, "loss": 0.9987, "step": 7897 }, { "epoch": 25.895081967213116, "grad_norm": 6.421074390411377, "learning_rate": 1.7374457037347634e-05, "loss": 0.7899, "step": 7898 }, { "epoch": 25.898360655737704, "grad_norm": 4.250716209411621, "learning_rate": 1.7373739790481263e-05, "loss": 0.9856, "step": 7899 }, { "epoch": 25.901639344262296, "grad_norm": 6.082488059997559, "learning_rate": 1.737302246046831e-05, "loss": 0.7905, "step": 7900 }, { "epoch": 25.904918032786885, "grad_norm": 6.178430080413818, "learning_rate": 1.7372305047316863e-05, "loss": 0.7579, "step": 7901 }, { "epoch": 25.908196721311477, "grad_norm": 5.199283599853516, "learning_rate": 1.7371587551035006e-05, "loss": 0.8076, "step": 7902 }, { "epoch": 25.911475409836065, "grad_norm": 6.038541793823242, "learning_rate": 1.7370869971630842e-05, "loss": 0.9131, "step": 7903 }, { "epoch": 25.914754098360657, "grad_norm": 5.124237537384033, "learning_rate": 1.7370152309112454e-05, "loss": 0.8766, "step": 7904 }, { "epoch": 25.918032786885245, "grad_norm": 5.724233627319336, "learning_rate": 1.7369434563487933e-05, "loss": 0.8853, "step": 7905 }, { "epoch": 25.921311475409837, "grad_norm": 5.317521572113037, "learning_rate": 1.7368716734765377e-05, "loss": 0.9382, "step": 7906 }, { "epoch": 25.924590163934425, "grad_norm": 6.401578426361084, "learning_rate": 1.7367998822952876e-05, "loss": 0.8311, "step": 7907 }, { "epoch": 25.927868852459017, "grad_norm": 5.430952072143555, "learning_rate": 1.736728082805853e-05, "loss": 0.8409, "step": 7908 }, { "epoch": 25.931147540983606, "grad_norm": 4.9579315185546875, "learning_rate": 1.7366562750090433e-05, "loss": 1.0652, "step": 7909 }, { "epoch": 25.934426229508198, "grad_norm": 5.0452399253845215, "learning_rate": 1.736584458905668e-05, "loss": 0.7161, "step": 7910 }, { "epoch": 25.937704918032786, "grad_norm": 5.240499019622803, "learning_rate": 1.736512634496537e-05, "loss": 0.6878, "step": 7911 }, { "epoch": 25.940983606557378, "grad_norm": 8.644003868103027, "learning_rate": 1.7364408017824603e-05, "loss": 0.7411, "step": 7912 }, { "epoch": 25.944262295081966, "grad_norm": 6.14586877822876, "learning_rate": 1.736368960764248e-05, "loss": 0.645, "step": 7913 }, { "epoch": 25.947540983606558, "grad_norm": 5.947971343994141, "learning_rate": 1.7362971114427097e-05, "loss": 0.761, "step": 7914 }, { "epoch": 25.950819672131146, "grad_norm": 6.069510459899902, "learning_rate": 1.736225253818656e-05, "loss": 0.756, "step": 7915 }, { "epoch": 25.95409836065574, "grad_norm": 5.340587615966797, "learning_rate": 1.7361533878928976e-05, "loss": 0.7929, "step": 7916 }, { "epoch": 25.957377049180327, "grad_norm": 4.9995551109313965, "learning_rate": 1.736081513666244e-05, "loss": 0.8351, "step": 7917 }, { "epoch": 25.96065573770492, "grad_norm": 4.707962989807129, "learning_rate": 1.7360096311395057e-05, "loss": 0.7242, "step": 7918 }, { "epoch": 25.963934426229507, "grad_norm": 6.139939308166504, "learning_rate": 1.7359377403134942e-05, "loss": 0.9073, "step": 7919 }, { "epoch": 25.9672131147541, "grad_norm": 4.897884368896484, "learning_rate": 1.735865841189019e-05, "loss": 0.7462, "step": 7920 }, { "epoch": 25.970491803278687, "grad_norm": 5.674143314361572, "learning_rate": 1.7357939337668914e-05, "loss": 0.6796, "step": 7921 }, { "epoch": 25.97377049180328, "grad_norm": 4.543027400970459, "learning_rate": 1.7357220180479223e-05, "loss": 0.7681, "step": 7922 }, { "epoch": 25.977049180327867, "grad_norm": 5.16259765625, "learning_rate": 1.7356500940329224e-05, "loss": 0.5655, "step": 7923 }, { "epoch": 25.98032786885246, "grad_norm": 5.429602146148682, "learning_rate": 1.735578161722703e-05, "loss": 0.8715, "step": 7924 }, { "epoch": 25.983606557377048, "grad_norm": 5.644184589385986, "learning_rate": 1.7355062211180745e-05, "loss": 0.7136, "step": 7925 }, { "epoch": 25.98688524590164, "grad_norm": 6.871780872344971, "learning_rate": 1.735434272219849e-05, "loss": 0.7228, "step": 7926 }, { "epoch": 25.990163934426228, "grad_norm": 5.2755866050720215, "learning_rate": 1.7353623150288374e-05, "loss": 0.7166, "step": 7927 }, { "epoch": 25.99344262295082, "grad_norm": 5.0475358963012695, "learning_rate": 1.735290349545851e-05, "loss": 0.7448, "step": 7928 }, { "epoch": 25.99672131147541, "grad_norm": 4.723892688751221, "learning_rate": 1.7352183757717016e-05, "loss": 0.8801, "step": 7929 }, { "epoch": 26.0, "grad_norm": 5.530404090881348, "learning_rate": 1.7351463937072008e-05, "loss": 0.6218, "step": 7930 }, { "epoch": 26.003278688524592, "grad_norm": 8.220407485961914, "learning_rate": 1.7350744033531595e-05, "loss": 0.6876, "step": 7931 }, { "epoch": 26.00655737704918, "grad_norm": 4.286952972412109, "learning_rate": 1.7350024047103903e-05, "loss": 0.9625, "step": 7932 }, { "epoch": 26.009836065573772, "grad_norm": 4.969887733459473, "learning_rate": 1.7349303977797048e-05, "loss": 0.7554, "step": 7933 }, { "epoch": 26.01311475409836, "grad_norm": 4.565502166748047, "learning_rate": 1.7348583825619147e-05, "loss": 0.5839, "step": 7934 }, { "epoch": 26.016393442622952, "grad_norm": 4.853638648986816, "learning_rate": 1.7347863590578326e-05, "loss": 0.5021, "step": 7935 }, { "epoch": 26.01967213114754, "grad_norm": 5.290579319000244, "learning_rate": 1.7347143272682697e-05, "loss": 0.6376, "step": 7936 }, { "epoch": 26.022950819672133, "grad_norm": 5.937170505523682, "learning_rate": 1.7346422871940392e-05, "loss": 0.7109, "step": 7937 }, { "epoch": 26.02622950819672, "grad_norm": 6.000253200531006, "learning_rate": 1.7345702388359535e-05, "loss": 0.6867, "step": 7938 }, { "epoch": 26.029508196721313, "grad_norm": 5.465527057647705, "learning_rate": 1.734498182194824e-05, "loss": 0.7894, "step": 7939 }, { "epoch": 26.0327868852459, "grad_norm": 5.333098411560059, "learning_rate": 1.7344261172714642e-05, "loss": 0.7354, "step": 7940 }, { "epoch": 26.036065573770493, "grad_norm": 5.084988594055176, "learning_rate": 1.734354044066686e-05, "loss": 0.6741, "step": 7941 }, { "epoch": 26.03934426229508, "grad_norm": 5.262646198272705, "learning_rate": 1.734281962581303e-05, "loss": 0.7242, "step": 7942 }, { "epoch": 26.042622950819673, "grad_norm": 4.842655181884766, "learning_rate": 1.734209872816127e-05, "loss": 0.6778, "step": 7943 }, { "epoch": 26.04590163934426, "grad_norm": 6.076463222503662, "learning_rate": 1.7341377747719713e-05, "loss": 0.6493, "step": 7944 }, { "epoch": 26.049180327868854, "grad_norm": 4.454833984375, "learning_rate": 1.7340656684496487e-05, "loss": 0.7412, "step": 7945 }, { "epoch": 26.052459016393442, "grad_norm": 5.277955055236816, "learning_rate": 1.7339935538499725e-05, "loss": 0.5304, "step": 7946 }, { "epoch": 26.055737704918034, "grad_norm": 4.452683448791504, "learning_rate": 1.733921430973756e-05, "loss": 0.5124, "step": 7947 }, { "epoch": 26.059016393442622, "grad_norm": 6.698962688446045, "learning_rate": 1.7338492998218125e-05, "loss": 0.8303, "step": 7948 }, { "epoch": 26.062295081967214, "grad_norm": 6.2552618980407715, "learning_rate": 1.7337771603949547e-05, "loss": 0.6552, "step": 7949 }, { "epoch": 26.065573770491802, "grad_norm": 5.251135349273682, "learning_rate": 1.7337050126939966e-05, "loss": 0.6107, "step": 7950 }, { "epoch": 26.068852459016394, "grad_norm": 7.189659118652344, "learning_rate": 1.733632856719752e-05, "loss": 0.787, "step": 7951 }, { "epoch": 26.072131147540983, "grad_norm": 5.0820536613464355, "learning_rate": 1.7335606924730334e-05, "loss": 0.7314, "step": 7952 }, { "epoch": 26.075409836065575, "grad_norm": 5.051118850708008, "learning_rate": 1.7334885199546557e-05, "loss": 0.9163, "step": 7953 }, { "epoch": 26.078688524590163, "grad_norm": 4.922946929931641, "learning_rate": 1.7334163391654323e-05, "loss": 0.8209, "step": 7954 }, { "epoch": 26.081967213114755, "grad_norm": 5.102685451507568, "learning_rate": 1.7333441501061772e-05, "loss": 0.7582, "step": 7955 }, { "epoch": 26.085245901639343, "grad_norm": 4.809590816497803, "learning_rate": 1.7332719527777044e-05, "loss": 1.048, "step": 7956 }, { "epoch": 26.088524590163935, "grad_norm": 5.733288764953613, "learning_rate": 1.7331997471808276e-05, "loss": 0.7318, "step": 7957 }, { "epoch": 26.091803278688523, "grad_norm": 4.912856578826904, "learning_rate": 1.7331275333163614e-05, "loss": 0.7406, "step": 7958 }, { "epoch": 26.095081967213115, "grad_norm": 5.0605950355529785, "learning_rate": 1.73305531118512e-05, "loss": 0.588, "step": 7959 }, { "epoch": 26.098360655737704, "grad_norm": 4.943544864654541, "learning_rate": 1.732983080787918e-05, "loss": 0.9929, "step": 7960 }, { "epoch": 26.101639344262296, "grad_norm": 4.797834396362305, "learning_rate": 1.7329108421255694e-05, "loss": 0.7137, "step": 7961 }, { "epoch": 26.104918032786884, "grad_norm": 5.709222316741943, "learning_rate": 1.7328385951988892e-05, "loss": 0.5479, "step": 7962 }, { "epoch": 26.108196721311476, "grad_norm": 4.646869659423828, "learning_rate": 1.7327663400086918e-05, "loss": 0.7043, "step": 7963 }, { "epoch": 26.111475409836064, "grad_norm": 5.547156810760498, "learning_rate": 1.732694076555792e-05, "loss": 0.8193, "step": 7964 }, { "epoch": 26.114754098360656, "grad_norm": 4.8214311599731445, "learning_rate": 1.7326218048410047e-05, "loss": 0.6418, "step": 7965 }, { "epoch": 26.118032786885244, "grad_norm": 5.141108989715576, "learning_rate": 1.732549524865145e-05, "loss": 0.5545, "step": 7966 }, { "epoch": 26.121311475409836, "grad_norm": 6.250420093536377, "learning_rate": 1.7324772366290274e-05, "loss": 0.8488, "step": 7967 }, { "epoch": 26.124590163934425, "grad_norm": 5.120135307312012, "learning_rate": 1.7324049401334676e-05, "loss": 0.5188, "step": 7968 }, { "epoch": 26.127868852459017, "grad_norm": 4.54120397567749, "learning_rate": 1.7323326353792806e-05, "loss": 0.8301, "step": 7969 }, { "epoch": 26.131147540983605, "grad_norm": 4.902698993682861, "learning_rate": 1.7322603223672816e-05, "loss": 0.7154, "step": 7970 }, { "epoch": 26.134426229508197, "grad_norm": 5.809162616729736, "learning_rate": 1.732188001098286e-05, "loss": 0.7285, "step": 7971 }, { "epoch": 26.137704918032785, "grad_norm": 4.777559757232666, "learning_rate": 1.7321156715731096e-05, "loss": 0.7493, "step": 7972 }, { "epoch": 26.140983606557377, "grad_norm": 4.647351264953613, "learning_rate": 1.7320433337925676e-05, "loss": 0.7114, "step": 7973 }, { "epoch": 26.14426229508197, "grad_norm": 5.450384616851807, "learning_rate": 1.731970987757476e-05, "loss": 0.6002, "step": 7974 }, { "epoch": 26.147540983606557, "grad_norm": 6.28573751449585, "learning_rate": 1.7318986334686505e-05, "loss": 0.5543, "step": 7975 }, { "epoch": 26.15081967213115, "grad_norm": 5.620277404785156, "learning_rate": 1.731826270926907e-05, "loss": 0.6643, "step": 7976 }, { "epoch": 26.154098360655738, "grad_norm": 5.358323574066162, "learning_rate": 1.731753900133061e-05, "loss": 0.5182, "step": 7977 }, { "epoch": 26.15737704918033, "grad_norm": 5.305294036865234, "learning_rate": 1.7316815210879295e-05, "loss": 0.5532, "step": 7978 }, { "epoch": 26.160655737704918, "grad_norm": 4.5080437660217285, "learning_rate": 1.7316091337923276e-05, "loss": 0.9534, "step": 7979 }, { "epoch": 26.16393442622951, "grad_norm": 5.403864860534668, "learning_rate": 1.7315367382470724e-05, "loss": 0.7363, "step": 7980 }, { "epoch": 26.167213114754098, "grad_norm": 5.776086330413818, "learning_rate": 1.7314643344529797e-05, "loss": 0.6885, "step": 7981 }, { "epoch": 26.17049180327869, "grad_norm": 5.144223690032959, "learning_rate": 1.731391922410866e-05, "loss": 0.8217, "step": 7982 }, { "epoch": 26.17377049180328, "grad_norm": 4.6753950119018555, "learning_rate": 1.731319502121548e-05, "loss": 0.7961, "step": 7983 }, { "epoch": 26.17704918032787, "grad_norm": 4.775324821472168, "learning_rate": 1.731247073585842e-05, "loss": 0.6874, "step": 7984 }, { "epoch": 26.18032786885246, "grad_norm": 5.700845241546631, "learning_rate": 1.7311746368045653e-05, "loss": 1.0555, "step": 7985 }, { "epoch": 26.18360655737705, "grad_norm": 5.911497116088867, "learning_rate": 1.7311021917785343e-05, "loss": 0.6537, "step": 7986 }, { "epoch": 26.18688524590164, "grad_norm": 5.27922248840332, "learning_rate": 1.7310297385085658e-05, "loss": 0.6866, "step": 7987 }, { "epoch": 26.19016393442623, "grad_norm": 4.680788040161133, "learning_rate": 1.730957276995477e-05, "loss": 0.7307, "step": 7988 }, { "epoch": 26.19344262295082, "grad_norm": 6.300714492797852, "learning_rate": 1.730884807240085e-05, "loss": 0.8271, "step": 7989 }, { "epoch": 26.19672131147541, "grad_norm": 4.824316501617432, "learning_rate": 1.7308123292432068e-05, "loss": 0.6203, "step": 7990 }, { "epoch": 26.2, "grad_norm": 6.0861897468566895, "learning_rate": 1.7307398430056595e-05, "loss": 0.6878, "step": 7991 }, { "epoch": 26.20327868852459, "grad_norm": 4.420683860778809, "learning_rate": 1.7306673485282612e-05, "loss": 0.7217, "step": 7992 }, { "epoch": 26.20655737704918, "grad_norm": 5.675441265106201, "learning_rate": 1.7305948458118282e-05, "loss": 0.7594, "step": 7993 }, { "epoch": 26.20983606557377, "grad_norm": 4.859981536865234, "learning_rate": 1.7305223348571792e-05, "loss": 0.9016, "step": 7994 }, { "epoch": 26.21311475409836, "grad_norm": 5.2924628257751465, "learning_rate": 1.730449815665131e-05, "loss": 0.6303, "step": 7995 }, { "epoch": 26.21639344262295, "grad_norm": 5.016909122467041, "learning_rate": 1.7303772882365018e-05, "loss": 0.9375, "step": 7996 }, { "epoch": 26.21967213114754, "grad_norm": 4.024514675140381, "learning_rate": 1.730304752572109e-05, "loss": 0.7908, "step": 7997 }, { "epoch": 26.222950819672132, "grad_norm": 4.998186111450195, "learning_rate": 1.7302322086727712e-05, "loss": 0.7912, "step": 7998 }, { "epoch": 26.22622950819672, "grad_norm": 4.923032283782959, "learning_rate": 1.730159656539306e-05, "loss": 0.7588, "step": 7999 }, { "epoch": 26.229508196721312, "grad_norm": 5.7505574226379395, "learning_rate": 1.730087096172531e-05, "loss": 0.7872, "step": 8000 }, { "epoch": 26.2327868852459, "grad_norm": 5.230181694030762, "learning_rate": 1.7300145275732654e-05, "loss": 0.692, "step": 8001 }, { "epoch": 26.236065573770492, "grad_norm": 4.773606300354004, "learning_rate": 1.7299419507423267e-05, "loss": 0.8426, "step": 8002 }, { "epoch": 26.23934426229508, "grad_norm": 4.741995811462402, "learning_rate": 1.7298693656805338e-05, "loss": 0.7398, "step": 8003 }, { "epoch": 26.242622950819673, "grad_norm": 7.3258819580078125, "learning_rate": 1.7297967723887044e-05, "loss": 0.5724, "step": 8004 }, { "epoch": 26.24590163934426, "grad_norm": 4.836188316345215, "learning_rate": 1.7297241708676583e-05, "loss": 0.6345, "step": 8005 }, { "epoch": 26.249180327868853, "grad_norm": 5.379550457000732, "learning_rate": 1.7296515611182133e-05, "loss": 0.8389, "step": 8006 }, { "epoch": 26.25245901639344, "grad_norm": 5.311740875244141, "learning_rate": 1.729578943141188e-05, "loss": 0.8729, "step": 8007 }, { "epoch": 26.255737704918033, "grad_norm": 5.220086097717285, "learning_rate": 1.7295063169374015e-05, "loss": 0.6395, "step": 8008 }, { "epoch": 26.25901639344262, "grad_norm": 6.084230422973633, "learning_rate": 1.7294336825076728e-05, "loss": 0.8041, "step": 8009 }, { "epoch": 26.262295081967213, "grad_norm": 6.091377258300781, "learning_rate": 1.729361039852821e-05, "loss": 0.7609, "step": 8010 }, { "epoch": 26.2655737704918, "grad_norm": 5.553313255310059, "learning_rate": 1.7292883889736654e-05, "loss": 0.4525, "step": 8011 }, { "epoch": 26.268852459016394, "grad_norm": 6.4488139152526855, "learning_rate": 1.7292157298710247e-05, "loss": 0.699, "step": 8012 }, { "epoch": 26.272131147540982, "grad_norm": 4.738976001739502, "learning_rate": 1.7291430625457186e-05, "loss": 0.6635, "step": 8013 }, { "epoch": 26.275409836065574, "grad_norm": 6.1691412925720215, "learning_rate": 1.7290703869985665e-05, "loss": 0.5311, "step": 8014 }, { "epoch": 26.278688524590162, "grad_norm": 5.409060955047607, "learning_rate": 1.728997703230387e-05, "loss": 0.7772, "step": 8015 }, { "epoch": 26.281967213114754, "grad_norm": 5.355844974517822, "learning_rate": 1.7289250112420012e-05, "loss": 0.7936, "step": 8016 }, { "epoch": 26.285245901639342, "grad_norm": 5.510108947753906, "learning_rate": 1.7288523110342276e-05, "loss": 0.9267, "step": 8017 }, { "epoch": 26.288524590163934, "grad_norm": 5.638671398162842, "learning_rate": 1.7287796026078864e-05, "loss": 0.7718, "step": 8018 }, { "epoch": 26.291803278688526, "grad_norm": 5.24574089050293, "learning_rate": 1.7287068859637975e-05, "loss": 0.9285, "step": 8019 }, { "epoch": 26.295081967213115, "grad_norm": 5.1414361000061035, "learning_rate": 1.728634161102781e-05, "loss": 0.6472, "step": 8020 }, { "epoch": 26.298360655737707, "grad_norm": 5.156736373901367, "learning_rate": 1.7285614280256566e-05, "loss": 0.7187, "step": 8021 }, { "epoch": 26.301639344262295, "grad_norm": 5.646481513977051, "learning_rate": 1.7284886867332444e-05, "loss": 0.5456, "step": 8022 }, { "epoch": 26.304918032786887, "grad_norm": 5.1624016761779785, "learning_rate": 1.7284159372263653e-05, "loss": 0.6941, "step": 8023 }, { "epoch": 26.308196721311475, "grad_norm": 6.3454694747924805, "learning_rate": 1.7283431795058385e-05, "loss": 0.6375, "step": 8024 }, { "epoch": 26.311475409836067, "grad_norm": 5.18880033493042, "learning_rate": 1.7282704135724854e-05, "loss": 0.791, "step": 8025 }, { "epoch": 26.314754098360655, "grad_norm": 4.580716133117676, "learning_rate": 1.728197639427126e-05, "loss": 0.759, "step": 8026 }, { "epoch": 26.318032786885247, "grad_norm": 5.176844596862793, "learning_rate": 1.7281248570705814e-05, "loss": 0.7189, "step": 8027 }, { "epoch": 26.321311475409836, "grad_norm": 5.531274795532227, "learning_rate": 1.7280520665036717e-05, "loss": 0.6413, "step": 8028 }, { "epoch": 26.324590163934428, "grad_norm": 5.771851539611816, "learning_rate": 1.727979267727218e-05, "loss": 0.5433, "step": 8029 }, { "epoch": 26.327868852459016, "grad_norm": 5.08815336227417, "learning_rate": 1.7279064607420415e-05, "loss": 0.8533, "step": 8030 }, { "epoch": 26.331147540983608, "grad_norm": 5.032818794250488, "learning_rate": 1.7278336455489625e-05, "loss": 0.7888, "step": 8031 }, { "epoch": 26.334426229508196, "grad_norm": 4.52095365524292, "learning_rate": 1.7277608221488024e-05, "loss": 0.8217, "step": 8032 }, { "epoch": 26.337704918032788, "grad_norm": 5.384937286376953, "learning_rate": 1.7276879905423824e-05, "loss": 0.7362, "step": 8033 }, { "epoch": 26.340983606557376, "grad_norm": 5.497521877288818, "learning_rate": 1.7276151507305235e-05, "loss": 0.7025, "step": 8034 }, { "epoch": 26.34426229508197, "grad_norm": 4.780543804168701, "learning_rate": 1.7275423027140474e-05, "loss": 0.7866, "step": 8035 }, { "epoch": 26.347540983606557, "grad_norm": 4.755852222442627, "learning_rate": 1.7274694464937756e-05, "loss": 0.5836, "step": 8036 }, { "epoch": 26.35081967213115, "grad_norm": 4.843266487121582, "learning_rate": 1.727396582070529e-05, "loss": 0.8538, "step": 8037 }, { "epoch": 26.354098360655737, "grad_norm": 4.054784774780273, "learning_rate": 1.72732370944513e-05, "loss": 0.8657, "step": 8038 }, { "epoch": 26.35737704918033, "grad_norm": 4.9417314529418945, "learning_rate": 1.7272508286184e-05, "loss": 0.7574, "step": 8039 }, { "epoch": 26.360655737704917, "grad_norm": 7.518777370452881, "learning_rate": 1.7271779395911604e-05, "loss": 0.7726, "step": 8040 }, { "epoch": 26.36393442622951, "grad_norm": 6.158746242523193, "learning_rate": 1.7271050423642334e-05, "loss": 0.5139, "step": 8041 }, { "epoch": 26.367213114754097, "grad_norm": 4.757289886474609, "learning_rate": 1.7270321369384414e-05, "loss": 0.7274, "step": 8042 }, { "epoch": 26.37049180327869, "grad_norm": 5.235974311828613, "learning_rate": 1.726959223314606e-05, "loss": 0.6828, "step": 8043 }, { "epoch": 26.373770491803278, "grad_norm": 5.137020587921143, "learning_rate": 1.7268863014935497e-05, "loss": 0.5892, "step": 8044 }, { "epoch": 26.37704918032787, "grad_norm": 6.2497382164001465, "learning_rate": 1.7268133714760945e-05, "loss": 1.0152, "step": 8045 }, { "epoch": 26.380327868852458, "grad_norm": 5.255640983581543, "learning_rate": 1.7267404332630625e-05, "loss": 0.6224, "step": 8046 }, { "epoch": 26.38360655737705, "grad_norm": 6.730890274047852, "learning_rate": 1.7266674868552765e-05, "loss": 0.5955, "step": 8047 }, { "epoch": 26.386885245901638, "grad_norm": 5.205132961273193, "learning_rate": 1.7265945322535594e-05, "loss": 0.762, "step": 8048 }, { "epoch": 26.39016393442623, "grad_norm": 4.080275058746338, "learning_rate": 1.7265215694587335e-05, "loss": 0.7505, "step": 8049 }, { "epoch": 26.39344262295082, "grad_norm": 4.974253177642822, "learning_rate": 1.7264485984716214e-05, "loss": 0.7238, "step": 8050 }, { "epoch": 26.39672131147541, "grad_norm": 6.481887340545654, "learning_rate": 1.7263756192930458e-05, "loss": 1.0217, "step": 8051 }, { "epoch": 26.4, "grad_norm": 5.382483005523682, "learning_rate": 1.72630263192383e-05, "loss": 0.6722, "step": 8052 }, { "epoch": 26.40327868852459, "grad_norm": 5.924650192260742, "learning_rate": 1.726229636364797e-05, "loss": 0.9839, "step": 8053 }, { "epoch": 26.40655737704918, "grad_norm": 4.633831024169922, "learning_rate": 1.7261566326167697e-05, "loss": 0.7125, "step": 8054 }, { "epoch": 26.40983606557377, "grad_norm": 4.602766036987305, "learning_rate": 1.726083620680571e-05, "loss": 0.9332, "step": 8055 }, { "epoch": 26.41311475409836, "grad_norm": 4.581053733825684, "learning_rate": 1.7260106005570252e-05, "loss": 0.8415, "step": 8056 }, { "epoch": 26.41639344262295, "grad_norm": 5.608421325683594, "learning_rate": 1.7259375722469547e-05, "loss": 0.7707, "step": 8057 }, { "epoch": 26.41967213114754, "grad_norm": 4.877066135406494, "learning_rate": 1.7258645357511832e-05, "loss": 0.7318, "step": 8058 }, { "epoch": 26.42295081967213, "grad_norm": 5.0672993659973145, "learning_rate": 1.7257914910705343e-05, "loss": 0.5758, "step": 8059 }, { "epoch": 26.42622950819672, "grad_norm": 4.731900691986084, "learning_rate": 1.7257184382058318e-05, "loss": 0.8094, "step": 8060 }, { "epoch": 26.42950819672131, "grad_norm": 6.005192279815674, "learning_rate": 1.7256453771578993e-05, "loss": 0.72, "step": 8061 }, { "epoch": 26.432786885245903, "grad_norm": 5.245547771453857, "learning_rate": 1.7255723079275607e-05, "loss": 0.6783, "step": 8062 }, { "epoch": 26.43606557377049, "grad_norm": 5.547356605529785, "learning_rate": 1.72549923051564e-05, "loss": 0.7327, "step": 8063 }, { "epoch": 26.439344262295084, "grad_norm": 5.743470668792725, "learning_rate": 1.725426144922961e-05, "loss": 0.503, "step": 8064 }, { "epoch": 26.442622950819672, "grad_norm": 4.9926629066467285, "learning_rate": 1.7253530511503483e-05, "loss": 0.6808, "step": 8065 }, { "epoch": 26.445901639344264, "grad_norm": 4.68770694732666, "learning_rate": 1.7252799491986256e-05, "loss": 0.6242, "step": 8066 }, { "epoch": 26.449180327868852, "grad_norm": 5.737264633178711, "learning_rate": 1.7252068390686174e-05, "loss": 0.5574, "step": 8067 }, { "epoch": 26.452459016393444, "grad_norm": 4.325894355773926, "learning_rate": 1.725133720761148e-05, "loss": 0.6936, "step": 8068 }, { "epoch": 26.455737704918032, "grad_norm": 4.875389575958252, "learning_rate": 1.725060594277042e-05, "loss": 0.9011, "step": 8069 }, { "epoch": 26.459016393442624, "grad_norm": 5.599881649017334, "learning_rate": 1.7249874596171236e-05, "loss": 0.9851, "step": 8070 }, { "epoch": 26.462295081967213, "grad_norm": 5.1766462326049805, "learning_rate": 1.7249143167822182e-05, "loss": 0.71, "step": 8071 }, { "epoch": 26.465573770491805, "grad_norm": 8.31230640411377, "learning_rate": 1.72484116577315e-05, "loss": 0.7678, "step": 8072 }, { "epoch": 26.468852459016393, "grad_norm": 4.86145544052124, "learning_rate": 1.7247680065907443e-05, "loss": 0.7901, "step": 8073 }, { "epoch": 26.472131147540985, "grad_norm": 5.4036641120910645, "learning_rate": 1.7246948392358255e-05, "loss": 0.4929, "step": 8074 }, { "epoch": 26.475409836065573, "grad_norm": 5.119252681732178, "learning_rate": 1.7246216637092184e-05, "loss": 0.7856, "step": 8075 }, { "epoch": 26.478688524590165, "grad_norm": 5.021341323852539, "learning_rate": 1.7245484800117492e-05, "loss": 0.6424, "step": 8076 }, { "epoch": 26.481967213114753, "grad_norm": 4.818675994873047, "learning_rate": 1.7244752881442424e-05, "loss": 0.8307, "step": 8077 }, { "epoch": 26.485245901639345, "grad_norm": 4.3915696144104, "learning_rate": 1.7244020881075236e-05, "loss": 0.8389, "step": 8078 }, { "epoch": 26.488524590163934, "grad_norm": 4.2708330154418945, "learning_rate": 1.724328879902418e-05, "loss": 0.8958, "step": 8079 }, { "epoch": 26.491803278688526, "grad_norm": 6.9756388664245605, "learning_rate": 1.7242556635297512e-05, "loss": 0.5792, "step": 8080 }, { "epoch": 26.495081967213114, "grad_norm": 4.439616680145264, "learning_rate": 1.7241824389903486e-05, "loss": 0.6931, "step": 8081 }, { "epoch": 26.498360655737706, "grad_norm": 5.6627197265625, "learning_rate": 1.724109206285036e-05, "loss": 0.691, "step": 8082 }, { "epoch": 26.501639344262294, "grad_norm": 4.598470211029053, "learning_rate": 1.7240359654146394e-05, "loss": 0.7589, "step": 8083 }, { "epoch": 26.504918032786886, "grad_norm": 6.265660762786865, "learning_rate": 1.7239627163799845e-05, "loss": 0.5901, "step": 8084 }, { "epoch": 26.508196721311474, "grad_norm": 4.776093006134033, "learning_rate": 1.7238894591818975e-05, "loss": 0.7427, "step": 8085 }, { "epoch": 26.511475409836066, "grad_norm": 7.007157325744629, "learning_rate": 1.7238161938212036e-05, "loss": 0.7924, "step": 8086 }, { "epoch": 26.514754098360655, "grad_norm": 5.423185348510742, "learning_rate": 1.7237429202987297e-05, "loss": 0.5779, "step": 8087 }, { "epoch": 26.518032786885247, "grad_norm": 4.81086540222168, "learning_rate": 1.723669638615302e-05, "loss": 0.9316, "step": 8088 }, { "epoch": 26.521311475409835, "grad_norm": 6.849590301513672, "learning_rate": 1.7235963487717466e-05, "loss": 0.8053, "step": 8089 }, { "epoch": 26.524590163934427, "grad_norm": 5.09978723526001, "learning_rate": 1.72352305076889e-05, "loss": 0.577, "step": 8090 }, { "epoch": 26.527868852459015, "grad_norm": 4.935347080230713, "learning_rate": 1.7234497446075588e-05, "loss": 0.8744, "step": 8091 }, { "epoch": 26.531147540983607, "grad_norm": 5.985371112823486, "learning_rate": 1.7233764302885794e-05, "loss": 0.5547, "step": 8092 }, { "epoch": 26.534426229508195, "grad_norm": 5.45805025100708, "learning_rate": 1.723303107812779e-05, "loss": 0.6954, "step": 8093 }, { "epoch": 26.537704918032787, "grad_norm": 4.0200114250183105, "learning_rate": 1.7232297771809834e-05, "loss": 0.7943, "step": 8094 }, { "epoch": 26.540983606557376, "grad_norm": 5.403634071350098, "learning_rate": 1.7231564383940205e-05, "loss": 0.757, "step": 8095 }, { "epoch": 26.544262295081968, "grad_norm": 6.818768501281738, "learning_rate": 1.723083091452717e-05, "loss": 0.6702, "step": 8096 }, { "epoch": 26.547540983606556, "grad_norm": 4.912941932678223, "learning_rate": 1.7230097363579e-05, "loss": 0.9755, "step": 8097 }, { "epoch": 26.550819672131148, "grad_norm": 4.418812274932861, "learning_rate": 1.722936373110396e-05, "loss": 0.7029, "step": 8098 }, { "epoch": 26.554098360655736, "grad_norm": 4.644084453582764, "learning_rate": 1.7228630017110328e-05, "loss": 0.7106, "step": 8099 }, { "epoch": 26.557377049180328, "grad_norm": 5.730286121368408, "learning_rate": 1.7227896221606378e-05, "loss": 0.6111, "step": 8100 }, { "epoch": 26.560655737704916, "grad_norm": 4.688045978546143, "learning_rate": 1.7227162344600382e-05, "loss": 0.5603, "step": 8101 }, { "epoch": 26.56393442622951, "grad_norm": 5.334647178649902, "learning_rate": 1.7226428386100614e-05, "loss": 0.533, "step": 8102 }, { "epoch": 26.567213114754097, "grad_norm": 3.9680421352386475, "learning_rate": 1.7225694346115355e-05, "loss": 0.605, "step": 8103 }, { "epoch": 26.57049180327869, "grad_norm": 5.297041893005371, "learning_rate": 1.722496022465288e-05, "loss": 0.627, "step": 8104 }, { "epoch": 26.57377049180328, "grad_norm": 4.609912872314453, "learning_rate": 1.7224226021721467e-05, "loss": 0.7799, "step": 8105 }, { "epoch": 26.57704918032787, "grad_norm": 4.100105285644531, "learning_rate": 1.722349173732939e-05, "loss": 0.7276, "step": 8106 }, { "epoch": 26.58032786885246, "grad_norm": 6.075005054473877, "learning_rate": 1.7222757371484937e-05, "loss": 0.8858, "step": 8107 }, { "epoch": 26.58360655737705, "grad_norm": 5.456150531768799, "learning_rate": 1.7222022924196384e-05, "loss": 0.6073, "step": 8108 }, { "epoch": 26.58688524590164, "grad_norm": 4.341593265533447, "learning_rate": 1.7221288395472013e-05, "loss": 0.6364, "step": 8109 }, { "epoch": 26.59016393442623, "grad_norm": 5.145501136779785, "learning_rate": 1.722055378532011e-05, "loss": 0.7404, "step": 8110 }, { "epoch": 26.59344262295082, "grad_norm": 4.68424654006958, "learning_rate": 1.7219819093748952e-05, "loss": 0.7142, "step": 8111 }, { "epoch": 26.59672131147541, "grad_norm": 5.233693599700928, "learning_rate": 1.721908432076683e-05, "loss": 0.9106, "step": 8112 }, { "epoch": 26.6, "grad_norm": 5.217652797698975, "learning_rate": 1.7218349466382024e-05, "loss": 0.7181, "step": 8113 }, { "epoch": 26.60327868852459, "grad_norm": 4.657253742218018, "learning_rate": 1.721761453060282e-05, "loss": 0.6592, "step": 8114 }, { "epoch": 26.60655737704918, "grad_norm": 5.042227745056152, "learning_rate": 1.7216879513437512e-05, "loss": 0.7806, "step": 8115 }, { "epoch": 26.60983606557377, "grad_norm": 4.47282600402832, "learning_rate": 1.7216144414894383e-05, "loss": 0.611, "step": 8116 }, { "epoch": 26.613114754098362, "grad_norm": 4.876448631286621, "learning_rate": 1.721540923498172e-05, "loss": 0.5993, "step": 8117 }, { "epoch": 26.61639344262295, "grad_norm": 4.510856628417969, "learning_rate": 1.7214673973707818e-05, "loss": 0.5587, "step": 8118 }, { "epoch": 26.619672131147542, "grad_norm": 4.79296350479126, "learning_rate": 1.7213938631080968e-05, "loss": 0.8043, "step": 8119 }, { "epoch": 26.62295081967213, "grad_norm": 4.867465496063232, "learning_rate": 1.7213203207109454e-05, "loss": 0.9014, "step": 8120 }, { "epoch": 26.626229508196722, "grad_norm": 5.089147090911865, "learning_rate": 1.7212467701801577e-05, "loss": 0.7457, "step": 8121 }, { "epoch": 26.62950819672131, "grad_norm": 7.211783409118652, "learning_rate": 1.721173211516563e-05, "loss": 0.8622, "step": 8122 }, { "epoch": 26.632786885245903, "grad_norm": 6.096306800842285, "learning_rate": 1.7210996447209904e-05, "loss": 0.6688, "step": 8123 }, { "epoch": 26.63606557377049, "grad_norm": 5.72508430480957, "learning_rate": 1.7210260697942695e-05, "loss": 0.8635, "step": 8124 }, { "epoch": 26.639344262295083, "grad_norm": 4.360736846923828, "learning_rate": 1.7209524867372296e-05, "loss": 0.8024, "step": 8125 }, { "epoch": 26.64262295081967, "grad_norm": 4.564045429229736, "learning_rate": 1.7208788955507013e-05, "loss": 1.0279, "step": 8126 }, { "epoch": 26.645901639344263, "grad_norm": 6.616612911224365, "learning_rate": 1.7208052962355135e-05, "loss": 0.6158, "step": 8127 }, { "epoch": 26.64918032786885, "grad_norm": 4.461193084716797, "learning_rate": 1.7207316887924968e-05, "loss": 0.6798, "step": 8128 }, { "epoch": 26.652459016393443, "grad_norm": 6.002246379852295, "learning_rate": 1.7206580732224808e-05, "loss": 0.9543, "step": 8129 }, { "epoch": 26.65573770491803, "grad_norm": 5.555492877960205, "learning_rate": 1.7205844495262957e-05, "loss": 0.6675, "step": 8130 }, { "epoch": 26.659016393442624, "grad_norm": 4.779819965362549, "learning_rate": 1.720510817704772e-05, "loss": 0.7002, "step": 8131 }, { "epoch": 26.662295081967212, "grad_norm": 5.371736526489258, "learning_rate": 1.7204371777587394e-05, "loss": 0.7034, "step": 8132 }, { "epoch": 26.665573770491804, "grad_norm": 5.1738104820251465, "learning_rate": 1.7203635296890288e-05, "loss": 0.9079, "step": 8133 }, { "epoch": 26.668852459016392, "grad_norm": 4.235679626464844, "learning_rate": 1.72028987349647e-05, "loss": 0.7504, "step": 8134 }, { "epoch": 26.672131147540984, "grad_norm": 4.9282073974609375, "learning_rate": 1.7202162091818943e-05, "loss": 0.8341, "step": 8135 }, { "epoch": 26.675409836065572, "grad_norm": 4.632916450500488, "learning_rate": 1.7201425367461323e-05, "loss": 0.681, "step": 8136 }, { "epoch": 26.678688524590164, "grad_norm": 5.308551788330078, "learning_rate": 1.720068856190014e-05, "loss": 0.6718, "step": 8137 }, { "epoch": 26.681967213114753, "grad_norm": 4.352926731109619, "learning_rate": 1.7199951675143708e-05, "loss": 0.8909, "step": 8138 }, { "epoch": 26.685245901639345, "grad_norm": 5.554736614227295, "learning_rate": 1.7199214707200335e-05, "loss": 0.5831, "step": 8139 }, { "epoch": 26.688524590163933, "grad_norm": 5.105560779571533, "learning_rate": 1.719847765807833e-05, "loss": 0.5652, "step": 8140 }, { "epoch": 26.691803278688525, "grad_norm": 5.214602947235107, "learning_rate": 1.7197740527786005e-05, "loss": 0.7032, "step": 8141 }, { "epoch": 26.695081967213113, "grad_norm": 4.700232028961182, "learning_rate": 1.719700331633167e-05, "loss": 0.5856, "step": 8142 }, { "epoch": 26.698360655737705, "grad_norm": 4.999650955200195, "learning_rate": 1.719626602372364e-05, "loss": 0.8923, "step": 8143 }, { "epoch": 26.701639344262293, "grad_norm": 5.152347087860107, "learning_rate": 1.719552864997023e-05, "loss": 0.717, "step": 8144 }, { "epoch": 26.704918032786885, "grad_norm": 5.249163627624512, "learning_rate": 1.7194791195079754e-05, "loss": 0.8297, "step": 8145 }, { "epoch": 26.708196721311474, "grad_norm": 4.718889236450195, "learning_rate": 1.7194053659060524e-05, "loss": 0.5906, "step": 8146 }, { "epoch": 26.711475409836066, "grad_norm": 5.451462268829346, "learning_rate": 1.719331604192086e-05, "loss": 0.6915, "step": 8147 }, { "epoch": 26.714754098360658, "grad_norm": 4.519597053527832, "learning_rate": 1.719257834366908e-05, "loss": 0.6598, "step": 8148 }, { "epoch": 26.718032786885246, "grad_norm": 5.211546421051025, "learning_rate": 1.71918405643135e-05, "loss": 0.5996, "step": 8149 }, { "epoch": 26.721311475409838, "grad_norm": 5.513017177581787, "learning_rate": 1.719110270386244e-05, "loss": 0.5458, "step": 8150 }, { "epoch": 26.724590163934426, "grad_norm": 4.511483192443848, "learning_rate": 1.719036476232422e-05, "loss": 0.9075, "step": 8151 }, { "epoch": 26.727868852459018, "grad_norm": 4.926835060119629, "learning_rate": 1.718962673970716e-05, "loss": 0.8427, "step": 8152 }, { "epoch": 26.731147540983606, "grad_norm": 4.4821929931640625, "learning_rate": 1.7188888636019586e-05, "loss": 0.7686, "step": 8153 }, { "epoch": 26.7344262295082, "grad_norm": 4.974206447601318, "learning_rate": 1.7188150451269816e-05, "loss": 0.5591, "step": 8154 }, { "epoch": 26.737704918032787, "grad_norm": 5.2514328956604, "learning_rate": 1.7187412185466175e-05, "loss": 0.6471, "step": 8155 }, { "epoch": 26.74098360655738, "grad_norm": 4.830451011657715, "learning_rate": 1.718667383861699e-05, "loss": 0.7816, "step": 8156 }, { "epoch": 26.744262295081967, "grad_norm": 4.7252278327941895, "learning_rate": 1.7185935410730582e-05, "loss": 0.7779, "step": 8157 }, { "epoch": 26.74754098360656, "grad_norm": 5.372834205627441, "learning_rate": 1.7185196901815286e-05, "loss": 0.802, "step": 8158 }, { "epoch": 26.750819672131147, "grad_norm": 6.1266279220581055, "learning_rate": 1.718445831187942e-05, "loss": 0.7874, "step": 8159 }, { "epoch": 26.75409836065574, "grad_norm": 4.5724873542785645, "learning_rate": 1.718371964093132e-05, "loss": 0.7908, "step": 8160 }, { "epoch": 26.757377049180327, "grad_norm": 4.762553691864014, "learning_rate": 1.718298088897931e-05, "loss": 0.7216, "step": 8161 }, { "epoch": 26.76065573770492, "grad_norm": 5.355874061584473, "learning_rate": 1.7182242056031722e-05, "loss": 0.8381, "step": 8162 }, { "epoch": 26.763934426229508, "grad_norm": 4.853092670440674, "learning_rate": 1.7181503142096888e-05, "loss": 0.7385, "step": 8163 }, { "epoch": 26.7672131147541, "grad_norm": 4.005867004394531, "learning_rate": 1.7180764147183137e-05, "loss": 0.8431, "step": 8164 }, { "epoch": 26.770491803278688, "grad_norm": 4.94480562210083, "learning_rate": 1.718002507129881e-05, "loss": 0.6444, "step": 8165 }, { "epoch": 26.77377049180328, "grad_norm": 5.24068546295166, "learning_rate": 1.717928591445223e-05, "loss": 0.5959, "step": 8166 }, { "epoch": 26.777049180327868, "grad_norm": 5.123281478881836, "learning_rate": 1.7178546676651735e-05, "loss": 0.6281, "step": 8167 }, { "epoch": 26.78032786885246, "grad_norm": 4.410493850708008, "learning_rate": 1.7177807357905663e-05, "loss": 0.7925, "step": 8168 }, { "epoch": 26.78360655737705, "grad_norm": 6.104818820953369, "learning_rate": 1.7177067958222354e-05, "loss": 0.7349, "step": 8169 }, { "epoch": 26.78688524590164, "grad_norm": 4.478062629699707, "learning_rate": 1.717632847761014e-05, "loss": 0.8641, "step": 8170 }, { "epoch": 26.79016393442623, "grad_norm": 4.985456943511963, "learning_rate": 1.7175588916077357e-05, "loss": 0.7257, "step": 8171 }, { "epoch": 26.79344262295082, "grad_norm": 5.885586261749268, "learning_rate": 1.717484927363235e-05, "loss": 0.7462, "step": 8172 }, { "epoch": 26.79672131147541, "grad_norm": 5.584410190582275, "learning_rate": 1.7174109550283462e-05, "loss": 0.9998, "step": 8173 }, { "epoch": 26.8, "grad_norm": 5.694964408874512, "learning_rate": 1.7173369746039026e-05, "loss": 0.5058, "step": 8174 }, { "epoch": 26.80327868852459, "grad_norm": 5.510812759399414, "learning_rate": 1.717262986090739e-05, "loss": 0.6868, "step": 8175 }, { "epoch": 26.80655737704918, "grad_norm": 5.623562335968018, "learning_rate": 1.7171889894896893e-05, "loss": 0.7101, "step": 8176 }, { "epoch": 26.80983606557377, "grad_norm": 4.823542594909668, "learning_rate": 1.717114984801588e-05, "loss": 0.6883, "step": 8177 }, { "epoch": 26.81311475409836, "grad_norm": 5.47898530960083, "learning_rate": 1.7170409720272697e-05, "loss": 0.753, "step": 8178 }, { "epoch": 26.81639344262295, "grad_norm": 4.324151039123535, "learning_rate": 1.7169669511675688e-05, "loss": 1.0491, "step": 8179 }, { "epoch": 26.81967213114754, "grad_norm": 6.039496898651123, "learning_rate": 1.7168929222233202e-05, "loss": 0.7395, "step": 8180 }, { "epoch": 26.82295081967213, "grad_norm": 7.405332088470459, "learning_rate": 1.7168188851953585e-05, "loss": 0.7474, "step": 8181 }, { "epoch": 26.82622950819672, "grad_norm": 4.867709636688232, "learning_rate": 1.7167448400845185e-05, "loss": 0.6469, "step": 8182 }, { "epoch": 26.82950819672131, "grad_norm": 6.026517868041992, "learning_rate": 1.7166707868916354e-05, "loss": 0.8168, "step": 8183 }, { "epoch": 26.832786885245902, "grad_norm": 4.759296417236328, "learning_rate": 1.7165967256175436e-05, "loss": 0.6982, "step": 8184 }, { "epoch": 26.83606557377049, "grad_norm": 5.050386905670166, "learning_rate": 1.7165226562630787e-05, "loss": 0.9645, "step": 8185 }, { "epoch": 26.839344262295082, "grad_norm": 4.442123889923096, "learning_rate": 1.7164485788290762e-05, "loss": 0.7565, "step": 8186 }, { "epoch": 26.84262295081967, "grad_norm": 4.608466148376465, "learning_rate": 1.716374493316371e-05, "loss": 0.7353, "step": 8187 }, { "epoch": 26.845901639344262, "grad_norm": 4.423882484436035, "learning_rate": 1.7163003997257984e-05, "loss": 0.7505, "step": 8188 }, { "epoch": 26.84918032786885, "grad_norm": 5.082762241363525, "learning_rate": 1.716226298058194e-05, "loss": 0.7426, "step": 8189 }, { "epoch": 26.852459016393443, "grad_norm": 4.8651862144470215, "learning_rate": 1.7161521883143936e-05, "loss": 0.6783, "step": 8190 }, { "epoch": 26.855737704918035, "grad_norm": 4.854366779327393, "learning_rate": 1.7160780704952324e-05, "loss": 0.9833, "step": 8191 }, { "epoch": 26.859016393442623, "grad_norm": 4.1089067459106445, "learning_rate": 1.7160039446015466e-05, "loss": 0.9039, "step": 8192 }, { "epoch": 26.862295081967215, "grad_norm": 4.587819576263428, "learning_rate": 1.715929810634172e-05, "loss": 0.8857, "step": 8193 }, { "epoch": 26.865573770491803, "grad_norm": 5.4283342361450195, "learning_rate": 1.715855668593944e-05, "loss": 0.7958, "step": 8194 }, { "epoch": 26.868852459016395, "grad_norm": 4.661022186279297, "learning_rate": 1.7157815184816993e-05, "loss": 0.9457, "step": 8195 }, { "epoch": 26.872131147540983, "grad_norm": 4.568231105804443, "learning_rate": 1.7157073602982736e-05, "loss": 0.7712, "step": 8196 }, { "epoch": 26.875409836065575, "grad_norm": 5.315059185028076, "learning_rate": 1.7156331940445037e-05, "loss": 0.7893, "step": 8197 }, { "epoch": 26.878688524590164, "grad_norm": 4.719767093658447, "learning_rate": 1.715559019721225e-05, "loss": 0.8355, "step": 8198 }, { "epoch": 26.881967213114756, "grad_norm": 5.063414096832275, "learning_rate": 1.7154848373292744e-05, "loss": 0.739, "step": 8199 }, { "epoch": 26.885245901639344, "grad_norm": 8.22568416595459, "learning_rate": 1.7154106468694885e-05, "loss": 0.6019, "step": 8200 }, { "epoch": 26.888524590163936, "grad_norm": 5.361215114593506, "learning_rate": 1.715336448342704e-05, "loss": 0.7916, "step": 8201 }, { "epoch": 26.891803278688524, "grad_norm": 4.954283237457275, "learning_rate": 1.7152622417497565e-05, "loss": 0.8397, "step": 8202 }, { "epoch": 26.895081967213116, "grad_norm": 4.97587776184082, "learning_rate": 1.715188027091484e-05, "loss": 0.606, "step": 8203 }, { "epoch": 26.898360655737704, "grad_norm": 4.860137462615967, "learning_rate": 1.715113804368723e-05, "loss": 0.7601, "step": 8204 }, { "epoch": 26.901639344262296, "grad_norm": 4.71724271774292, "learning_rate": 1.7150395735823102e-05, "loss": 0.5508, "step": 8205 }, { "epoch": 26.904918032786885, "grad_norm": 6.702202796936035, "learning_rate": 1.7149653347330828e-05, "loss": 1.1781, "step": 8206 }, { "epoch": 26.908196721311477, "grad_norm": 4.649405479431152, "learning_rate": 1.7148910878218778e-05, "loss": 0.9917, "step": 8207 }, { "epoch": 26.911475409836065, "grad_norm": 5.252254962921143, "learning_rate": 1.7148168328495324e-05, "loss": 0.6871, "step": 8208 }, { "epoch": 26.914754098360657, "grad_norm": 4.885874271392822, "learning_rate": 1.714742569816884e-05, "loss": 0.7236, "step": 8209 }, { "epoch": 26.918032786885245, "grad_norm": 4.678791046142578, "learning_rate": 1.7146682987247702e-05, "loss": 0.5847, "step": 8210 }, { "epoch": 26.921311475409837, "grad_norm": 5.691012859344482, "learning_rate": 1.7145940195740282e-05, "loss": 0.5912, "step": 8211 }, { "epoch": 26.924590163934425, "grad_norm": 5.232816219329834, "learning_rate": 1.7145197323654957e-05, "loss": 0.754, "step": 8212 }, { "epoch": 26.927868852459017, "grad_norm": 4.617044448852539, "learning_rate": 1.71444543710001e-05, "loss": 0.6397, "step": 8213 }, { "epoch": 26.931147540983606, "grad_norm": 5.118398189544678, "learning_rate": 1.714371133778409e-05, "loss": 0.9423, "step": 8214 }, { "epoch": 26.934426229508198, "grad_norm": 5.2503814697265625, "learning_rate": 1.7142968224015316e-05, "loss": 0.6324, "step": 8215 }, { "epoch": 26.937704918032786, "grad_norm": 4.562446594238281, "learning_rate": 1.714222502970214e-05, "loss": 0.8553, "step": 8216 }, { "epoch": 26.940983606557378, "grad_norm": 5.01725959777832, "learning_rate": 1.7141481754852957e-05, "loss": 0.9474, "step": 8217 }, { "epoch": 26.944262295081966, "grad_norm": 5.141895771026611, "learning_rate": 1.7140738399476138e-05, "loss": 0.7782, "step": 8218 }, { "epoch": 26.947540983606558, "grad_norm": 5.436066150665283, "learning_rate": 1.713999496358007e-05, "loss": 0.6761, "step": 8219 }, { "epoch": 26.950819672131146, "grad_norm": 5.277507305145264, "learning_rate": 1.7139251447173134e-05, "loss": 0.8746, "step": 8220 }, { "epoch": 26.95409836065574, "grad_norm": 4.794351100921631, "learning_rate": 1.7138507850263715e-05, "loss": 0.819, "step": 8221 }, { "epoch": 26.957377049180327, "grad_norm": 4.708646774291992, "learning_rate": 1.71377641728602e-05, "loss": 0.6611, "step": 8222 }, { "epoch": 26.96065573770492, "grad_norm": 5.187784671783447, "learning_rate": 1.713702041497097e-05, "loss": 0.8746, "step": 8223 }, { "epoch": 26.963934426229507, "grad_norm": 5.393805503845215, "learning_rate": 1.7136276576604413e-05, "loss": 0.6079, "step": 8224 }, { "epoch": 26.9672131147541, "grad_norm": 5.5166802406311035, "learning_rate": 1.713553265776892e-05, "loss": 0.7693, "step": 8225 }, { "epoch": 26.970491803278687, "grad_norm": 5.134838104248047, "learning_rate": 1.7134788658472877e-05, "loss": 0.5956, "step": 8226 }, { "epoch": 26.97377049180328, "grad_norm": 5.153406143188477, "learning_rate": 1.7134044578724673e-05, "loss": 0.8854, "step": 8227 }, { "epoch": 26.977049180327867, "grad_norm": 5.240243434906006, "learning_rate": 1.71333004185327e-05, "loss": 0.6315, "step": 8228 }, { "epoch": 26.98032786885246, "grad_norm": 5.08864688873291, "learning_rate": 1.7132556177905348e-05, "loss": 0.6877, "step": 8229 }, { "epoch": 26.983606557377048, "grad_norm": 5.241415500640869, "learning_rate": 1.713181185685101e-05, "loss": 0.6768, "step": 8230 }, { "epoch": 26.98688524590164, "grad_norm": 4.1095781326293945, "learning_rate": 1.7131067455378074e-05, "loss": 0.9952, "step": 8231 }, { "epoch": 26.990163934426228, "grad_norm": 4.414435863494873, "learning_rate": 1.713032297349494e-05, "loss": 0.6225, "step": 8232 }, { "epoch": 26.99344262295082, "grad_norm": 5.610693454742432, "learning_rate": 1.7129578411210002e-05, "loss": 0.6536, "step": 8233 }, { "epoch": 26.99672131147541, "grad_norm": 5.051898002624512, "learning_rate": 1.7128833768531653e-05, "loss": 0.6766, "step": 8234 }, { "epoch": 27.0, "grad_norm": 4.947016716003418, "learning_rate": 1.7128089045468294e-05, "loss": 0.7036, "step": 8235 }, { "epoch": 27.003278688524592, "grad_norm": 4.474545955657959, "learning_rate": 1.712734424202832e-05, "loss": 0.9017, "step": 8236 }, { "epoch": 27.00655737704918, "grad_norm": 6.571520805358887, "learning_rate": 1.7126599358220124e-05, "loss": 0.934, "step": 8237 }, { "epoch": 27.009836065573772, "grad_norm": 5.955050468444824, "learning_rate": 1.7125854394052113e-05, "loss": 0.7421, "step": 8238 }, { "epoch": 27.01311475409836, "grad_norm": 4.9598708152771, "learning_rate": 1.7125109349532687e-05, "loss": 0.6088, "step": 8239 }, { "epoch": 27.016393442622952, "grad_norm": 5.1064934730529785, "learning_rate": 1.712436422467024e-05, "loss": 0.8236, "step": 8240 }, { "epoch": 27.01967213114754, "grad_norm": 4.9719085693359375, "learning_rate": 1.7123619019473184e-05, "loss": 0.7305, "step": 8241 }, { "epoch": 27.022950819672133, "grad_norm": 5.2521138191223145, "learning_rate": 1.7122873733949913e-05, "loss": 0.5035, "step": 8242 }, { "epoch": 27.02622950819672, "grad_norm": 4.645502090454102, "learning_rate": 1.7122128368108836e-05, "loss": 0.4791, "step": 8243 }, { "epoch": 27.029508196721313, "grad_norm": 4.407013416290283, "learning_rate": 1.7121382921958357e-05, "loss": 0.7003, "step": 8244 }, { "epoch": 27.0327868852459, "grad_norm": 4.772476673126221, "learning_rate": 1.712063739550688e-05, "loss": 0.4631, "step": 8245 }, { "epoch": 27.036065573770493, "grad_norm": 4.357748031616211, "learning_rate": 1.7119891788762814e-05, "loss": 0.5727, "step": 8246 }, { "epoch": 27.03934426229508, "grad_norm": 4.647970676422119, "learning_rate": 1.7119146101734565e-05, "loss": 0.9383, "step": 8247 }, { "epoch": 27.042622950819673, "grad_norm": 6.015129566192627, "learning_rate": 1.7118400334430544e-05, "loss": 0.7003, "step": 8248 }, { "epoch": 27.04590163934426, "grad_norm": 5.8126139640808105, "learning_rate": 1.7117654486859154e-05, "loss": 0.6548, "step": 8249 }, { "epoch": 27.049180327868854, "grad_norm": 4.244621276855469, "learning_rate": 1.711690855902881e-05, "loss": 0.7102, "step": 8250 }, { "epoch": 27.052459016393442, "grad_norm": 7.304745674133301, "learning_rate": 1.7116162550947922e-05, "loss": 0.6824, "step": 8251 }, { "epoch": 27.055737704918034, "grad_norm": 5.430219650268555, "learning_rate": 1.7115416462624902e-05, "loss": 0.838, "step": 8252 }, { "epoch": 27.059016393442622, "grad_norm": 4.811685562133789, "learning_rate": 1.7114670294068167e-05, "loss": 0.7846, "step": 8253 }, { "epoch": 27.062295081967214, "grad_norm": 4.925612449645996, "learning_rate": 1.7113924045286126e-05, "loss": 0.6784, "step": 8254 }, { "epoch": 27.065573770491802, "grad_norm": 4.381997108459473, "learning_rate": 1.711317771628719e-05, "loss": 0.7552, "step": 8255 }, { "epoch": 27.068852459016394, "grad_norm": 5.298519134521484, "learning_rate": 1.7112431307079785e-05, "loss": 0.8369, "step": 8256 }, { "epoch": 27.072131147540983, "grad_norm": 5.508828163146973, "learning_rate": 1.711168481767232e-05, "loss": 0.7264, "step": 8257 }, { "epoch": 27.075409836065575, "grad_norm": 5.526416301727295, "learning_rate": 1.7110938248073212e-05, "loss": 0.6633, "step": 8258 }, { "epoch": 27.078688524590163, "grad_norm": 4.768121719360352, "learning_rate": 1.7110191598290883e-05, "loss": 0.759, "step": 8259 }, { "epoch": 27.081967213114755, "grad_norm": 4.29084587097168, "learning_rate": 1.7109444868333752e-05, "loss": 0.6202, "step": 8260 }, { "epoch": 27.085245901639343, "grad_norm": 5.540261745452881, "learning_rate": 1.7108698058210238e-05, "loss": 0.6968, "step": 8261 }, { "epoch": 27.088524590163935, "grad_norm": 5.2314605712890625, "learning_rate": 1.7107951167928763e-05, "loss": 0.894, "step": 8262 }, { "epoch": 27.091803278688523, "grad_norm": 4.911632537841797, "learning_rate": 1.7107204197497748e-05, "loss": 0.6439, "step": 8263 }, { "epoch": 27.095081967213115, "grad_norm": 5.090764045715332, "learning_rate": 1.7106457146925612e-05, "loss": 0.6969, "step": 8264 }, { "epoch": 27.098360655737704, "grad_norm": 5.478898525238037, "learning_rate": 1.7105710016220788e-05, "loss": 0.8761, "step": 8265 }, { "epoch": 27.101639344262296, "grad_norm": 4.775479316711426, "learning_rate": 1.7104962805391695e-05, "loss": 0.5869, "step": 8266 }, { "epoch": 27.104918032786884, "grad_norm": 5.23712682723999, "learning_rate": 1.7104215514446757e-05, "loss": 0.6531, "step": 8267 }, { "epoch": 27.108196721311476, "grad_norm": 4.594727516174316, "learning_rate": 1.7103468143394403e-05, "loss": 0.6728, "step": 8268 }, { "epoch": 27.111475409836064, "grad_norm": 6.26879358291626, "learning_rate": 1.710272069224306e-05, "loss": 0.8362, "step": 8269 }, { "epoch": 27.114754098360656, "grad_norm": 5.414676666259766, "learning_rate": 1.7101973161001156e-05, "loss": 0.7011, "step": 8270 }, { "epoch": 27.118032786885244, "grad_norm": 5.336802959442139, "learning_rate": 1.710122554967712e-05, "loss": 0.7676, "step": 8271 }, { "epoch": 27.121311475409836, "grad_norm": 4.251275539398193, "learning_rate": 1.7100477858279384e-05, "loss": 0.796, "step": 8272 }, { "epoch": 27.124590163934425, "grad_norm": 4.598999500274658, "learning_rate": 1.7099730086816375e-05, "loss": 0.7378, "step": 8273 }, { "epoch": 27.127868852459017, "grad_norm": 4.9094953536987305, "learning_rate": 1.7098982235296528e-05, "loss": 0.779, "step": 8274 }, { "epoch": 27.131147540983605, "grad_norm": 4.552796363830566, "learning_rate": 1.709823430372828e-05, "loss": 0.6233, "step": 8275 }, { "epoch": 27.134426229508197, "grad_norm": 4.838168144226074, "learning_rate": 1.7097486292120056e-05, "loss": 0.8692, "step": 8276 }, { "epoch": 27.137704918032785, "grad_norm": 4.7121076583862305, "learning_rate": 1.709673820048029e-05, "loss": 0.6576, "step": 8277 }, { "epoch": 27.140983606557377, "grad_norm": 4.7154717445373535, "learning_rate": 1.709599002881743e-05, "loss": 0.7234, "step": 8278 }, { "epoch": 27.14426229508197, "grad_norm": 4.460397243499756, "learning_rate": 1.70952417771399e-05, "loss": 0.5688, "step": 8279 }, { "epoch": 27.147540983606557, "grad_norm": 4.357869625091553, "learning_rate": 1.7094493445456143e-05, "loss": 0.619, "step": 8280 }, { "epoch": 27.15081967213115, "grad_norm": 5.1875, "learning_rate": 1.7093745033774597e-05, "loss": 0.639, "step": 8281 }, { "epoch": 27.154098360655738, "grad_norm": 4.885227203369141, "learning_rate": 1.7092996542103698e-05, "loss": 0.6706, "step": 8282 }, { "epoch": 27.15737704918033, "grad_norm": 4.914400100708008, "learning_rate": 1.709224797045189e-05, "loss": 0.8312, "step": 8283 }, { "epoch": 27.160655737704918, "grad_norm": 6.47430419921875, "learning_rate": 1.709149931882761e-05, "loss": 0.5836, "step": 8284 }, { "epoch": 27.16393442622951, "grad_norm": 5.288905143737793, "learning_rate": 1.70907505872393e-05, "loss": 0.7372, "step": 8285 }, { "epoch": 27.167213114754098, "grad_norm": 5.264772891998291, "learning_rate": 1.709000177569541e-05, "loss": 0.5773, "step": 8286 }, { "epoch": 27.17049180327869, "grad_norm": 4.728588581085205, "learning_rate": 1.7089252884204376e-05, "loss": 0.8248, "step": 8287 }, { "epoch": 27.17377049180328, "grad_norm": 4.6543288230896, "learning_rate": 1.7088503912774643e-05, "loss": 0.6951, "step": 8288 }, { "epoch": 27.17704918032787, "grad_norm": 4.257772445678711, "learning_rate": 1.708775486141466e-05, "loss": 0.6253, "step": 8289 }, { "epoch": 27.18032786885246, "grad_norm": 5.618310928344727, "learning_rate": 1.7087005730132874e-05, "loss": 0.8498, "step": 8290 }, { "epoch": 27.18360655737705, "grad_norm": 5.131984233856201, "learning_rate": 1.7086256518937726e-05, "loss": 0.7319, "step": 8291 }, { "epoch": 27.18688524590164, "grad_norm": 6.94996452331543, "learning_rate": 1.708550722783767e-05, "loss": 0.8821, "step": 8292 }, { "epoch": 27.19016393442623, "grad_norm": 5.148225784301758, "learning_rate": 1.708475785684115e-05, "loss": 0.6204, "step": 8293 }, { "epoch": 27.19344262295082, "grad_norm": 5.013064861297607, "learning_rate": 1.7084008405956623e-05, "loss": 0.7323, "step": 8294 }, { "epoch": 27.19672131147541, "grad_norm": 6.600743293762207, "learning_rate": 1.7083258875192532e-05, "loss": 0.7748, "step": 8295 }, { "epoch": 27.2, "grad_norm": 4.920974254608154, "learning_rate": 1.7082509264557333e-05, "loss": 0.6923, "step": 8296 }, { "epoch": 27.20327868852459, "grad_norm": 4.774862289428711, "learning_rate": 1.7081759574059478e-05, "loss": 0.7533, "step": 8297 }, { "epoch": 27.20655737704918, "grad_norm": 8.694355010986328, "learning_rate": 1.708100980370742e-05, "loss": 0.6257, "step": 8298 }, { "epoch": 27.20983606557377, "grad_norm": 6.379755020141602, "learning_rate": 1.7080259953509618e-05, "loss": 0.6204, "step": 8299 }, { "epoch": 27.21311475409836, "grad_norm": 5.368001461029053, "learning_rate": 1.707951002347452e-05, "loss": 0.6526, "step": 8300 }, { "epoch": 27.21639344262295, "grad_norm": 6.700963020324707, "learning_rate": 1.7078760013610587e-05, "loss": 0.6385, "step": 8301 }, { "epoch": 27.21967213114754, "grad_norm": 7.538840293884277, "learning_rate": 1.7078009923926276e-05, "loss": 0.7537, "step": 8302 }, { "epoch": 27.222950819672132, "grad_norm": 5.283840179443359, "learning_rate": 1.707725975443004e-05, "loss": 0.7681, "step": 8303 }, { "epoch": 27.22622950819672, "grad_norm": 5.567588806152344, "learning_rate": 1.7076509505130344e-05, "loss": 0.7257, "step": 8304 }, { "epoch": 27.229508196721312, "grad_norm": 4.777244567871094, "learning_rate": 1.7075759176035647e-05, "loss": 0.7893, "step": 8305 }, { "epoch": 27.2327868852459, "grad_norm": 4.8620758056640625, "learning_rate": 1.707500876715441e-05, "loss": 0.6146, "step": 8306 }, { "epoch": 27.236065573770492, "grad_norm": 5.148923873901367, "learning_rate": 1.7074258278495087e-05, "loss": 0.7822, "step": 8307 }, { "epoch": 27.23934426229508, "grad_norm": 4.372652530670166, "learning_rate": 1.7073507710066152e-05, "loss": 0.8461, "step": 8308 }, { "epoch": 27.242622950819673, "grad_norm": 4.883421421051025, "learning_rate": 1.707275706187606e-05, "loss": 0.6891, "step": 8309 }, { "epoch": 27.24590163934426, "grad_norm": 7.229972839355469, "learning_rate": 1.7072006333933275e-05, "loss": 0.7397, "step": 8310 }, { "epoch": 27.249180327868853, "grad_norm": 5.533650875091553, "learning_rate": 1.707125552624627e-05, "loss": 0.6948, "step": 8311 }, { "epoch": 27.25245901639344, "grad_norm": 4.869878768920898, "learning_rate": 1.7070504638823507e-05, "loss": 0.7472, "step": 8312 }, { "epoch": 27.255737704918033, "grad_norm": 6.058694362640381, "learning_rate": 1.706975367167345e-05, "loss": 0.7324, "step": 8313 }, { "epoch": 27.25901639344262, "grad_norm": 4.97723388671875, "learning_rate": 1.7069002624804574e-05, "loss": 0.6158, "step": 8314 }, { "epoch": 27.262295081967213, "grad_norm": 5.505105018615723, "learning_rate": 1.706825149822534e-05, "loss": 0.7784, "step": 8315 }, { "epoch": 27.2655737704918, "grad_norm": 5.377954483032227, "learning_rate": 1.7067500291944222e-05, "loss": 0.6391, "step": 8316 }, { "epoch": 27.268852459016394, "grad_norm": 5.227511405944824, "learning_rate": 1.706674900596969e-05, "loss": 0.6832, "step": 8317 }, { "epoch": 27.272131147540982, "grad_norm": 6.310449123382568, "learning_rate": 1.7065997640310214e-05, "loss": 0.7058, "step": 8318 }, { "epoch": 27.275409836065574, "grad_norm": 5.836594104766846, "learning_rate": 1.706524619497427e-05, "loss": 0.7426, "step": 8319 }, { "epoch": 27.278688524590162, "grad_norm": 4.3695149421691895, "learning_rate": 1.7064494669970328e-05, "loss": 0.8714, "step": 8320 }, { "epoch": 27.281967213114754, "grad_norm": 5.113916873931885, "learning_rate": 1.7063743065306865e-05, "loss": 0.7446, "step": 8321 }, { "epoch": 27.285245901639342, "grad_norm": 5.550252437591553, "learning_rate": 1.7062991380992355e-05, "loss": 0.7025, "step": 8322 }, { "epoch": 27.288524590163934, "grad_norm": 4.401626110076904, "learning_rate": 1.7062239617035266e-05, "loss": 0.5527, "step": 8323 }, { "epoch": 27.291803278688526, "grad_norm": 4.741933345794678, "learning_rate": 1.706148777344409e-05, "loss": 0.5673, "step": 8324 }, { "epoch": 27.295081967213115, "grad_norm": 5.309301376342773, "learning_rate": 1.7060735850227298e-05, "loss": 0.8277, "step": 8325 }, { "epoch": 27.298360655737707, "grad_norm": 5.174943923950195, "learning_rate": 1.7059983847393363e-05, "loss": 0.9229, "step": 8326 }, { "epoch": 27.301639344262295, "grad_norm": 4.885037422180176, "learning_rate": 1.705923176495077e-05, "loss": 0.5263, "step": 8327 }, { "epoch": 27.304918032786887, "grad_norm": 4.257493495941162, "learning_rate": 1.7058479602908e-05, "loss": 0.7371, "step": 8328 }, { "epoch": 27.308196721311475, "grad_norm": 14.806519508361816, "learning_rate": 1.7057727361273536e-05, "loss": 0.4706, "step": 8329 }, { "epoch": 27.311475409836067, "grad_norm": 4.71671199798584, "learning_rate": 1.7056975040055857e-05, "loss": 0.5898, "step": 8330 }, { "epoch": 27.314754098360655, "grad_norm": 4.9936628341674805, "learning_rate": 1.7056222639263447e-05, "loss": 0.4276, "step": 8331 }, { "epoch": 27.318032786885247, "grad_norm": 7.3007073402404785, "learning_rate": 1.705547015890479e-05, "loss": 0.7263, "step": 8332 }, { "epoch": 27.321311475409836, "grad_norm": 4.9275898933410645, "learning_rate": 1.7054717598988372e-05, "loss": 0.5945, "step": 8333 }, { "epoch": 27.324590163934428, "grad_norm": 4.909800052642822, "learning_rate": 1.7053964959522675e-05, "loss": 0.5121, "step": 8334 }, { "epoch": 27.327868852459016, "grad_norm": 4.928659439086914, "learning_rate": 1.705321224051619e-05, "loss": 0.7605, "step": 8335 }, { "epoch": 27.331147540983608, "grad_norm": 5.985926151275635, "learning_rate": 1.7052459441977402e-05, "loss": 0.664, "step": 8336 }, { "epoch": 27.334426229508196, "grad_norm": 5.966934680938721, "learning_rate": 1.70517065639148e-05, "loss": 0.6545, "step": 8337 }, { "epoch": 27.337704918032788, "grad_norm": 7.306964874267578, "learning_rate": 1.7050953606336878e-05, "loss": 0.6091, "step": 8338 }, { "epoch": 27.340983606557376, "grad_norm": 4.8285369873046875, "learning_rate": 1.7050200569252126e-05, "loss": 0.4908, "step": 8339 }, { "epoch": 27.34426229508197, "grad_norm": 5.054708480834961, "learning_rate": 1.704944745266903e-05, "loss": 0.6134, "step": 8340 }, { "epoch": 27.347540983606557, "grad_norm": 5.5325026512146, "learning_rate": 1.704869425659608e-05, "loss": 0.9103, "step": 8341 }, { "epoch": 27.35081967213115, "grad_norm": 4.900631904602051, "learning_rate": 1.704794098104178e-05, "loss": 0.7368, "step": 8342 }, { "epoch": 27.354098360655737, "grad_norm": 6.025928020477295, "learning_rate": 1.704718762601461e-05, "loss": 0.8476, "step": 8343 }, { "epoch": 27.35737704918033, "grad_norm": 5.207576751708984, "learning_rate": 1.7046434191523077e-05, "loss": 0.6307, "step": 8344 }, { "epoch": 27.360655737704917, "grad_norm": 4.505213737487793, "learning_rate": 1.704568067757567e-05, "loss": 0.7253, "step": 8345 }, { "epoch": 27.36393442622951, "grad_norm": 5.63283634185791, "learning_rate": 1.704492708418089e-05, "loss": 0.8739, "step": 8346 }, { "epoch": 27.367213114754097, "grad_norm": 6.641963481903076, "learning_rate": 1.704417341134723e-05, "loss": 0.6563, "step": 8347 }, { "epoch": 27.37049180327869, "grad_norm": 4.622913360595703, "learning_rate": 1.7043419659083193e-05, "loss": 0.8165, "step": 8348 }, { "epoch": 27.373770491803278, "grad_norm": 5.115401744842529, "learning_rate": 1.7042665827397273e-05, "loss": 0.6464, "step": 8349 }, { "epoch": 27.37704918032787, "grad_norm": 4.684834003448486, "learning_rate": 1.7041911916297976e-05, "loss": 0.567, "step": 8350 }, { "epoch": 27.380327868852458, "grad_norm": 7.982934474945068, "learning_rate": 1.70411579257938e-05, "loss": 0.8087, "step": 8351 }, { "epoch": 27.38360655737705, "grad_norm": 5.3748884201049805, "learning_rate": 1.7040403855893246e-05, "loss": 0.742, "step": 8352 }, { "epoch": 27.386885245901638, "grad_norm": 5.27285623550415, "learning_rate": 1.7039649706604818e-05, "loss": 0.7186, "step": 8353 }, { "epoch": 27.39016393442623, "grad_norm": 6.253305435180664, "learning_rate": 1.7038895477937023e-05, "loss": 0.71, "step": 8354 }, { "epoch": 27.39344262295082, "grad_norm": 5.63261604309082, "learning_rate": 1.703814116989836e-05, "loss": 0.6244, "step": 8355 }, { "epoch": 27.39672131147541, "grad_norm": 4.784344673156738, "learning_rate": 1.703738678249734e-05, "loss": 0.4046, "step": 8356 }, { "epoch": 27.4, "grad_norm": 6.258378505706787, "learning_rate": 1.7036632315742464e-05, "loss": 0.7983, "step": 8357 }, { "epoch": 27.40327868852459, "grad_norm": 5.170739650726318, "learning_rate": 1.7035877769642245e-05, "loss": 0.8418, "step": 8358 }, { "epoch": 27.40655737704918, "grad_norm": 4.850306034088135, "learning_rate": 1.7035123144205186e-05, "loss": 0.842, "step": 8359 }, { "epoch": 27.40983606557377, "grad_norm": 6.188500881195068, "learning_rate": 1.7034368439439802e-05, "loss": 0.7357, "step": 8360 }, { "epoch": 27.41311475409836, "grad_norm": 5.123194694519043, "learning_rate": 1.7033613655354597e-05, "loss": 0.6472, "step": 8361 }, { "epoch": 27.41639344262295, "grad_norm": 5.206306457519531, "learning_rate": 1.703285879195809e-05, "loss": 0.9996, "step": 8362 }, { "epoch": 27.41967213114754, "grad_norm": 5.427577972412109, "learning_rate": 1.703210384925878e-05, "loss": 0.8421, "step": 8363 }, { "epoch": 27.42295081967213, "grad_norm": 6.348792552947998, "learning_rate": 1.7031348827265193e-05, "loss": 0.6555, "step": 8364 }, { "epoch": 27.42622950819672, "grad_norm": 5.638155937194824, "learning_rate": 1.7030593725985834e-05, "loss": 0.7665, "step": 8365 }, { "epoch": 27.42950819672131, "grad_norm": 5.351243019104004, "learning_rate": 1.7029838545429226e-05, "loss": 0.7564, "step": 8366 }, { "epoch": 27.432786885245903, "grad_norm": 5.66604471206665, "learning_rate": 1.7029083285603873e-05, "loss": 0.8806, "step": 8367 }, { "epoch": 27.43606557377049, "grad_norm": 5.024462699890137, "learning_rate": 1.70283279465183e-05, "loss": 1.0287, "step": 8368 }, { "epoch": 27.439344262295084, "grad_norm": 5.295956134796143, "learning_rate": 1.702757252818102e-05, "loss": 0.6773, "step": 8369 }, { "epoch": 27.442622950819672, "grad_norm": 5.684757232666016, "learning_rate": 1.7026817030600557e-05, "loss": 0.5978, "step": 8370 }, { "epoch": 27.445901639344264, "grad_norm": 5.328881740570068, "learning_rate": 1.7026061453785426e-05, "loss": 0.6277, "step": 8371 }, { "epoch": 27.449180327868852, "grad_norm": 5.095757007598877, "learning_rate": 1.7025305797744142e-05, "loss": 0.6049, "step": 8372 }, { "epoch": 27.452459016393444, "grad_norm": 8.174590110778809, "learning_rate": 1.7024550062485237e-05, "loss": 0.82, "step": 8373 }, { "epoch": 27.455737704918032, "grad_norm": 4.930953502655029, "learning_rate": 1.7023794248017222e-05, "loss": 0.7053, "step": 8374 }, { "epoch": 27.459016393442624, "grad_norm": 5.920534133911133, "learning_rate": 1.7023038354348624e-05, "loss": 0.6672, "step": 8375 }, { "epoch": 27.462295081967213, "grad_norm": 7.157432556152344, "learning_rate": 1.7022282381487968e-05, "loss": 0.6263, "step": 8376 }, { "epoch": 27.465573770491805, "grad_norm": 5.704804420471191, "learning_rate": 1.7021526329443775e-05, "loss": 0.5735, "step": 8377 }, { "epoch": 27.468852459016393, "grad_norm": 5.29218053817749, "learning_rate": 1.702077019822457e-05, "loss": 0.7818, "step": 8378 }, { "epoch": 27.472131147540985, "grad_norm": 4.884810924530029, "learning_rate": 1.702001398783889e-05, "loss": 0.7523, "step": 8379 }, { "epoch": 27.475409836065573, "grad_norm": 5.001998424530029, "learning_rate": 1.7019257698295243e-05, "loss": 0.7575, "step": 8380 }, { "epoch": 27.478688524590165, "grad_norm": 4.973542213439941, "learning_rate": 1.7018501329602176e-05, "loss": 0.5259, "step": 8381 }, { "epoch": 27.481967213114753, "grad_norm": 4.619908809661865, "learning_rate": 1.7017744881768205e-05, "loss": 0.8946, "step": 8382 }, { "epoch": 27.485245901639345, "grad_norm": 5.0373125076293945, "learning_rate": 1.701698835480186e-05, "loss": 0.603, "step": 8383 }, { "epoch": 27.488524590163934, "grad_norm": 5.408868789672852, "learning_rate": 1.701623174871168e-05, "loss": 0.6469, "step": 8384 }, { "epoch": 27.491803278688526, "grad_norm": 5.295040607452393, "learning_rate": 1.7015475063506192e-05, "loss": 0.5231, "step": 8385 }, { "epoch": 27.495081967213114, "grad_norm": 4.836710453033447, "learning_rate": 1.7014718299193925e-05, "loss": 0.6948, "step": 8386 }, { "epoch": 27.498360655737706, "grad_norm": 5.487279415130615, "learning_rate": 1.7013961455783418e-05, "loss": 0.733, "step": 8387 }, { "epoch": 27.501639344262294, "grad_norm": 4.732760429382324, "learning_rate": 1.7013204533283208e-05, "loss": 0.684, "step": 8388 }, { "epoch": 27.504918032786886, "grad_norm": 4.977419376373291, "learning_rate": 1.7012447531701817e-05, "loss": 0.8761, "step": 8389 }, { "epoch": 27.508196721311474, "grad_norm": 4.183011531829834, "learning_rate": 1.7011690451047793e-05, "loss": 0.8998, "step": 8390 }, { "epoch": 27.511475409836066, "grad_norm": 6.066915035247803, "learning_rate": 1.7010933291329668e-05, "loss": 0.6007, "step": 8391 }, { "epoch": 27.514754098360655, "grad_norm": 4.3888044357299805, "learning_rate": 1.701017605255598e-05, "loss": 1.0258, "step": 8392 }, { "epoch": 27.518032786885247, "grad_norm": 5.690793037414551, "learning_rate": 1.7009418734735267e-05, "loss": 0.7049, "step": 8393 }, { "epoch": 27.521311475409835, "grad_norm": 4.983498573303223, "learning_rate": 1.7008661337876074e-05, "loss": 0.7072, "step": 8394 }, { "epoch": 27.524590163934427, "grad_norm": 4.714466571807861, "learning_rate": 1.7007903861986933e-05, "loss": 0.6327, "step": 8395 }, { "epoch": 27.527868852459015, "grad_norm": 4.35863733291626, "learning_rate": 1.7007146307076394e-05, "loss": 0.8492, "step": 8396 }, { "epoch": 27.531147540983607, "grad_norm": 4.714658260345459, "learning_rate": 1.7006388673152988e-05, "loss": 0.53, "step": 8397 }, { "epoch": 27.534426229508195, "grad_norm": 5.19088888168335, "learning_rate": 1.700563096022527e-05, "loss": 0.7042, "step": 8398 }, { "epoch": 27.537704918032787, "grad_norm": 4.842300891876221, "learning_rate": 1.700487316830178e-05, "loss": 0.8185, "step": 8399 }, { "epoch": 27.540983606557376, "grad_norm": 5.443787574768066, "learning_rate": 1.700411529739106e-05, "loss": 0.6147, "step": 8400 }, { "epoch": 27.544262295081968, "grad_norm": 4.956154823303223, "learning_rate": 1.7003357347501656e-05, "loss": 0.7158, "step": 8401 }, { "epoch": 27.547540983606556, "grad_norm": 4.321585178375244, "learning_rate": 1.7002599318642115e-05, "loss": 0.7542, "step": 8402 }, { "epoch": 27.550819672131148, "grad_norm": 5.80507230758667, "learning_rate": 1.7001841210820994e-05, "loss": 0.5135, "step": 8403 }, { "epoch": 27.554098360655736, "grad_norm": 4.716460704803467, "learning_rate": 1.7001083024046824e-05, "loss": 0.6945, "step": 8404 }, { "epoch": 27.557377049180328, "grad_norm": 5.631516933441162, "learning_rate": 1.700032475832817e-05, "loss": 0.7056, "step": 8405 }, { "epoch": 27.560655737704916, "grad_norm": 5.8688483238220215, "learning_rate": 1.699956641367357e-05, "loss": 0.6015, "step": 8406 }, { "epoch": 27.56393442622951, "grad_norm": 4.6347975730896, "learning_rate": 1.6998807990091587e-05, "loss": 0.6852, "step": 8407 }, { "epoch": 27.567213114754097, "grad_norm": 6.307651996612549, "learning_rate": 1.699804948759076e-05, "loss": 0.8168, "step": 8408 }, { "epoch": 27.57049180327869, "grad_norm": 5.6072282791137695, "learning_rate": 1.699729090617966e-05, "loss": 0.665, "step": 8409 }, { "epoch": 27.57377049180328, "grad_norm": 8.109440803527832, "learning_rate": 1.699653224586682e-05, "loss": 0.7605, "step": 8410 }, { "epoch": 27.57704918032787, "grad_norm": 4.965253829956055, "learning_rate": 1.6995773506660805e-05, "loss": 0.7366, "step": 8411 }, { "epoch": 27.58032786885246, "grad_norm": 4.814770698547363, "learning_rate": 1.6995014688570173e-05, "loss": 0.7021, "step": 8412 }, { "epoch": 27.58360655737705, "grad_norm": 5.950358867645264, "learning_rate": 1.6994255791603475e-05, "loss": 0.7133, "step": 8413 }, { "epoch": 27.58688524590164, "grad_norm": 5.883810520172119, "learning_rate": 1.699349681576927e-05, "loss": 0.5923, "step": 8414 }, { "epoch": 27.59016393442623, "grad_norm": 5.025627613067627, "learning_rate": 1.699273776107612e-05, "loss": 0.5925, "step": 8415 }, { "epoch": 27.59344262295082, "grad_norm": 5.989953517913818, "learning_rate": 1.699197862753258e-05, "loss": 0.6634, "step": 8416 }, { "epoch": 27.59672131147541, "grad_norm": 6.322274684906006, "learning_rate": 1.699121941514721e-05, "loss": 0.7695, "step": 8417 }, { "epoch": 27.6, "grad_norm": 4.438777923583984, "learning_rate": 1.6990460123928577e-05, "loss": 0.7213, "step": 8418 }, { "epoch": 27.60327868852459, "grad_norm": 4.672339916229248, "learning_rate": 1.6989700753885233e-05, "loss": 0.787, "step": 8419 }, { "epoch": 27.60655737704918, "grad_norm": 4.46783971786499, "learning_rate": 1.6988941305025748e-05, "loss": 0.9939, "step": 8420 }, { "epoch": 27.60983606557377, "grad_norm": 4.818180561065674, "learning_rate": 1.6988181777358683e-05, "loss": 1.0702, "step": 8421 }, { "epoch": 27.613114754098362, "grad_norm": 4.6261796951293945, "learning_rate": 1.6987422170892596e-05, "loss": 0.6196, "step": 8422 }, { "epoch": 27.61639344262295, "grad_norm": 4.80049991607666, "learning_rate": 1.6986662485636065e-05, "loss": 0.6352, "step": 8423 }, { "epoch": 27.619672131147542, "grad_norm": 5.562760353088379, "learning_rate": 1.6985902721597652e-05, "loss": 0.7214, "step": 8424 }, { "epoch": 27.62295081967213, "grad_norm": 5.140350341796875, "learning_rate": 1.698514287878592e-05, "loss": 0.6533, "step": 8425 }, { "epoch": 27.626229508196722, "grad_norm": 4.3942084312438965, "learning_rate": 1.6984382957209437e-05, "loss": 0.8189, "step": 8426 }, { "epoch": 27.62950819672131, "grad_norm": 6.975147724151611, "learning_rate": 1.6983622956876778e-05, "loss": 0.7697, "step": 8427 }, { "epoch": 27.632786885245903, "grad_norm": 5.084574222564697, "learning_rate": 1.6982862877796505e-05, "loss": 0.7368, "step": 8428 }, { "epoch": 27.63606557377049, "grad_norm": 5.339658260345459, "learning_rate": 1.6982102719977195e-05, "loss": 0.8368, "step": 8429 }, { "epoch": 27.639344262295083, "grad_norm": 6.23110294342041, "learning_rate": 1.698134248342742e-05, "loss": 0.5157, "step": 8430 }, { "epoch": 27.64262295081967, "grad_norm": 6.312103271484375, "learning_rate": 1.6980582168155748e-05, "loss": 0.5169, "step": 8431 }, { "epoch": 27.645901639344263, "grad_norm": 5.954864025115967, "learning_rate": 1.697982177417075e-05, "loss": 0.9633, "step": 8432 }, { "epoch": 27.64918032786885, "grad_norm": 4.5630950927734375, "learning_rate": 1.697906130148101e-05, "loss": 0.7306, "step": 8433 }, { "epoch": 27.652459016393443, "grad_norm": 5.176898956298828, "learning_rate": 1.697830075009509e-05, "loss": 0.7047, "step": 8434 }, { "epoch": 27.65573770491803, "grad_norm": 4.99648380279541, "learning_rate": 1.6977540120021584e-05, "loss": 0.7846, "step": 8435 }, { "epoch": 27.659016393442624, "grad_norm": 6.057483196258545, "learning_rate": 1.6976779411269047e-05, "loss": 0.4888, "step": 8436 }, { "epoch": 27.662295081967212, "grad_norm": 5.708541393280029, "learning_rate": 1.6976018623846075e-05, "loss": 0.7429, "step": 8437 }, { "epoch": 27.665573770491804, "grad_norm": 5.125787258148193, "learning_rate": 1.697525775776124e-05, "loss": 0.9074, "step": 8438 }, { "epoch": 27.668852459016392, "grad_norm": 4.877549648284912, "learning_rate": 1.6974496813023118e-05, "loss": 0.6593, "step": 8439 }, { "epoch": 27.672131147540984, "grad_norm": 4.855284690856934, "learning_rate": 1.6973735789640294e-05, "loss": 0.6967, "step": 8440 }, { "epoch": 27.675409836065572, "grad_norm": 4.956364154815674, "learning_rate": 1.6972974687621347e-05, "loss": 0.6467, "step": 8441 }, { "epoch": 27.678688524590164, "grad_norm": 6.816883087158203, "learning_rate": 1.6972213506974866e-05, "loss": 0.7409, "step": 8442 }, { "epoch": 27.681967213114753, "grad_norm": 5.42759370803833, "learning_rate": 1.6971452247709422e-05, "loss": 0.9211, "step": 8443 }, { "epoch": 27.685245901639345, "grad_norm": 5.202248573303223, "learning_rate": 1.697069090983361e-05, "loss": 0.7144, "step": 8444 }, { "epoch": 27.688524590163933, "grad_norm": 5.005364894866943, "learning_rate": 1.6969929493356008e-05, "loss": 0.6931, "step": 8445 }, { "epoch": 27.691803278688525, "grad_norm": 4.6283040046691895, "learning_rate": 1.6969167998285205e-05, "loss": 0.7769, "step": 8446 }, { "epoch": 27.695081967213113, "grad_norm": 3.9950995445251465, "learning_rate": 1.6968406424629785e-05, "loss": 0.5325, "step": 8447 }, { "epoch": 27.698360655737705, "grad_norm": 5.742214202880859, "learning_rate": 1.6967644772398337e-05, "loss": 0.7259, "step": 8448 }, { "epoch": 27.701639344262293, "grad_norm": 6.187466621398926, "learning_rate": 1.696688304159945e-05, "loss": 0.6654, "step": 8449 }, { "epoch": 27.704918032786885, "grad_norm": 5.255598068237305, "learning_rate": 1.6966121232241714e-05, "loss": 0.7454, "step": 8450 }, { "epoch": 27.708196721311474, "grad_norm": 4.870529651641846, "learning_rate": 1.696535934433372e-05, "loss": 0.6729, "step": 8451 }, { "epoch": 27.711475409836066, "grad_norm": 5.144138813018799, "learning_rate": 1.6964597377884053e-05, "loss": 0.6325, "step": 8452 }, { "epoch": 27.714754098360658, "grad_norm": 4.930802822113037, "learning_rate": 1.6963835332901313e-05, "loss": 0.7573, "step": 8453 }, { "epoch": 27.718032786885246, "grad_norm": 5.260807991027832, "learning_rate": 1.6963073209394086e-05, "loss": 0.626, "step": 8454 }, { "epoch": 27.721311475409838, "grad_norm": 4.97371768951416, "learning_rate": 1.696231100737097e-05, "loss": 0.5569, "step": 8455 }, { "epoch": 27.724590163934426, "grad_norm": 5.570669651031494, "learning_rate": 1.696154872684056e-05, "loss": 0.6226, "step": 8456 }, { "epoch": 27.727868852459018, "grad_norm": 4.94579553604126, "learning_rate": 1.6960786367811447e-05, "loss": 0.7409, "step": 8457 }, { "epoch": 27.731147540983606, "grad_norm": 5.549431800842285, "learning_rate": 1.696002393029223e-05, "loss": 0.7021, "step": 8458 }, { "epoch": 27.7344262295082, "grad_norm": 24.787174224853516, "learning_rate": 1.695926141429151e-05, "loss": 0.6253, "step": 8459 }, { "epoch": 27.737704918032787, "grad_norm": 4.623440265655518, "learning_rate": 1.695849881981788e-05, "loss": 0.59, "step": 8460 }, { "epoch": 27.74098360655738, "grad_norm": 5.452479839324951, "learning_rate": 1.695773614687994e-05, "loss": 0.6959, "step": 8461 }, { "epoch": 27.744262295081967, "grad_norm": 5.630815029144287, "learning_rate": 1.695697339548629e-05, "loss": 0.6592, "step": 8462 }, { "epoch": 27.74754098360656, "grad_norm": 5.374855041503906, "learning_rate": 1.6956210565645535e-05, "loss": 0.552, "step": 8463 }, { "epoch": 27.750819672131147, "grad_norm": 6.359845161437988, "learning_rate": 1.695544765736627e-05, "loss": 0.6965, "step": 8464 }, { "epoch": 27.75409836065574, "grad_norm": 5.386277198791504, "learning_rate": 1.6954684670657105e-05, "loss": 0.9025, "step": 8465 }, { "epoch": 27.757377049180327, "grad_norm": 4.745492458343506, "learning_rate": 1.6953921605526637e-05, "loss": 0.5616, "step": 8466 }, { "epoch": 27.76065573770492, "grad_norm": 5.010069847106934, "learning_rate": 1.695315846198347e-05, "loss": 0.9461, "step": 8467 }, { "epoch": 27.763934426229508, "grad_norm": 7.309394359588623, "learning_rate": 1.6952395240036215e-05, "loss": 0.7003, "step": 8468 }, { "epoch": 27.7672131147541, "grad_norm": 5.902930736541748, "learning_rate": 1.6951631939693473e-05, "loss": 0.8499, "step": 8469 }, { "epoch": 27.770491803278688, "grad_norm": 6.29485559463501, "learning_rate": 1.6950868560963852e-05, "loss": 0.6453, "step": 8470 }, { "epoch": 27.77377049180328, "grad_norm": 5.923398017883301, "learning_rate": 1.6950105103855966e-05, "loss": 0.737, "step": 8471 }, { "epoch": 27.777049180327868, "grad_norm": 5.985838413238525, "learning_rate": 1.6949341568378416e-05, "loss": 0.8179, "step": 8472 }, { "epoch": 27.78032786885246, "grad_norm": 5.256649017333984, "learning_rate": 1.694857795453981e-05, "loss": 0.5428, "step": 8473 }, { "epoch": 27.78360655737705, "grad_norm": 5.659832954406738, "learning_rate": 1.6947814262348766e-05, "loss": 0.6556, "step": 8474 }, { "epoch": 27.78688524590164, "grad_norm": 5.286026477813721, "learning_rate": 1.6947050491813894e-05, "loss": 0.9262, "step": 8475 }, { "epoch": 27.79016393442623, "grad_norm": 6.309700012207031, "learning_rate": 1.6946286642943803e-05, "loss": 0.5829, "step": 8476 }, { "epoch": 27.79344262295082, "grad_norm": 5.473072052001953, "learning_rate": 1.6945522715747112e-05, "loss": 0.5748, "step": 8477 }, { "epoch": 27.79672131147541, "grad_norm": 6.537419319152832, "learning_rate": 1.6944758710232423e-05, "loss": 0.5204, "step": 8478 }, { "epoch": 27.8, "grad_norm": 8.067626953125, "learning_rate": 1.6943994626408365e-05, "loss": 0.8056, "step": 8479 }, { "epoch": 27.80327868852459, "grad_norm": 4.5743513107299805, "learning_rate": 1.6943230464283545e-05, "loss": 0.767, "step": 8480 }, { "epoch": 27.80655737704918, "grad_norm": 6.26767635345459, "learning_rate": 1.6942466223866582e-05, "loss": 0.8239, "step": 8481 }, { "epoch": 27.80983606557377, "grad_norm": 5.374115467071533, "learning_rate": 1.6941701905166096e-05, "loss": 0.663, "step": 8482 }, { "epoch": 27.81311475409836, "grad_norm": 6.054136276245117, "learning_rate": 1.6940937508190702e-05, "loss": 0.6413, "step": 8483 }, { "epoch": 27.81639344262295, "grad_norm": 5.388138294219971, "learning_rate": 1.694017303294902e-05, "loss": 0.8123, "step": 8484 }, { "epoch": 27.81967213114754, "grad_norm": 5.497708320617676, "learning_rate": 1.6939408479449672e-05, "loss": 0.8396, "step": 8485 }, { "epoch": 27.82295081967213, "grad_norm": 10.443965911865234, "learning_rate": 1.693864384770128e-05, "loss": 0.7203, "step": 8486 }, { "epoch": 27.82622950819672, "grad_norm": 5.528305530548096, "learning_rate": 1.693787913771246e-05, "loss": 0.7416, "step": 8487 }, { "epoch": 27.82950819672131, "grad_norm": 5.4736528396606445, "learning_rate": 1.6937114349491844e-05, "loss": 0.6512, "step": 8488 }, { "epoch": 27.832786885245902, "grad_norm": 6.6881818771362305, "learning_rate": 1.693634948304805e-05, "loss": 0.7041, "step": 8489 }, { "epoch": 27.83606557377049, "grad_norm": 8.343311309814453, "learning_rate": 1.69355845383897e-05, "loss": 0.7059, "step": 8490 }, { "epoch": 27.839344262295082, "grad_norm": 6.47249698638916, "learning_rate": 1.6934819515525425e-05, "loss": 0.6832, "step": 8491 }, { "epoch": 27.84262295081967, "grad_norm": 5.563149452209473, "learning_rate": 1.693405441446385e-05, "loss": 0.6259, "step": 8492 }, { "epoch": 27.845901639344262, "grad_norm": 5.790935039520264, "learning_rate": 1.69332892352136e-05, "loss": 0.5907, "step": 8493 }, { "epoch": 27.84918032786885, "grad_norm": 6.3732709884643555, "learning_rate": 1.693252397778331e-05, "loss": 0.6696, "step": 8494 }, { "epoch": 27.852459016393443, "grad_norm": 5.060567378997803, "learning_rate": 1.6931758642181598e-05, "loss": 0.675, "step": 8495 }, { "epoch": 27.855737704918035, "grad_norm": 4.852691173553467, "learning_rate": 1.6930993228417107e-05, "loss": 0.9164, "step": 8496 }, { "epoch": 27.859016393442623, "grad_norm": 4.595860004425049, "learning_rate": 1.6930227736498454e-05, "loss": 0.8309, "step": 8497 }, { "epoch": 27.862295081967215, "grad_norm": 5.764837265014648, "learning_rate": 1.6929462166434284e-05, "loss": 0.7442, "step": 8498 }, { "epoch": 27.865573770491803, "grad_norm": 5.829869747161865, "learning_rate": 1.6928696518233217e-05, "loss": 0.5089, "step": 8499 }, { "epoch": 27.868852459016395, "grad_norm": 5.7262115478515625, "learning_rate": 1.6927930791903896e-05, "loss": 0.6909, "step": 8500 }, { "epoch": 27.872131147540983, "grad_norm": 5.691304683685303, "learning_rate": 1.692716498745495e-05, "loss": 0.6858, "step": 8501 }, { "epoch": 27.875409836065575, "grad_norm": 7.467803478240967, "learning_rate": 1.692639910489502e-05, "loss": 0.625, "step": 8502 }, { "epoch": 27.878688524590164, "grad_norm": 6.922425746917725, "learning_rate": 1.692563314423274e-05, "loss": 0.7389, "step": 8503 }, { "epoch": 27.881967213114756, "grad_norm": 6.260294437408447, "learning_rate": 1.6924867105476738e-05, "loss": 0.764, "step": 8504 }, { "epoch": 27.885245901639344, "grad_norm": 6.789814472198486, "learning_rate": 1.6924100988635666e-05, "loss": 0.7626, "step": 8505 }, { "epoch": 27.888524590163936, "grad_norm": 6.289170742034912, "learning_rate": 1.692333479371815e-05, "loss": 0.8679, "step": 8506 }, { "epoch": 27.891803278688524, "grad_norm": 6.464887619018555, "learning_rate": 1.692256852073284e-05, "loss": 0.5582, "step": 8507 }, { "epoch": 27.895081967213116, "grad_norm": 5.52897834777832, "learning_rate": 1.6921802169688374e-05, "loss": 0.5485, "step": 8508 }, { "epoch": 27.898360655737704, "grad_norm": 5.828079700469971, "learning_rate": 1.6921035740593386e-05, "loss": 0.8016, "step": 8509 }, { "epoch": 27.901639344262296, "grad_norm": 4.960132122039795, "learning_rate": 1.6920269233456527e-05, "loss": 0.7029, "step": 8510 }, { "epoch": 27.904918032786885, "grad_norm": 4.779764175415039, "learning_rate": 1.691950264828644e-05, "loss": 0.9621, "step": 8511 }, { "epoch": 27.908196721311477, "grad_norm": 4.9890642166137695, "learning_rate": 1.691873598509176e-05, "loss": 0.7508, "step": 8512 }, { "epoch": 27.911475409836065, "grad_norm": 6.193405628204346, "learning_rate": 1.6917969243881143e-05, "loss": 0.6089, "step": 8513 }, { "epoch": 27.914754098360657, "grad_norm": 5.4973978996276855, "learning_rate": 1.6917202424663228e-05, "loss": 0.7503, "step": 8514 }, { "epoch": 27.918032786885245, "grad_norm": 5.081664562225342, "learning_rate": 1.6916435527446663e-05, "loss": 0.693, "step": 8515 }, { "epoch": 27.921311475409837, "grad_norm": 5.454559326171875, "learning_rate": 1.6915668552240098e-05, "loss": 0.6498, "step": 8516 }, { "epoch": 27.924590163934425, "grad_norm": 7.310030937194824, "learning_rate": 1.6914901499052177e-05, "loss": 0.5874, "step": 8517 }, { "epoch": 27.927868852459017, "grad_norm": 7.365672588348389, "learning_rate": 1.6914134367891553e-05, "loss": 0.8449, "step": 8518 }, { "epoch": 27.931147540983606, "grad_norm": 4.896435260772705, "learning_rate": 1.6913367158766876e-05, "loss": 0.8925, "step": 8519 }, { "epoch": 27.934426229508198, "grad_norm": 4.956545352935791, "learning_rate": 1.6912599871686795e-05, "loss": 0.5264, "step": 8520 }, { "epoch": 27.937704918032786, "grad_norm": 7.198556423187256, "learning_rate": 1.6911832506659968e-05, "loss": 0.5892, "step": 8521 }, { "epoch": 27.940983606557378, "grad_norm": 5.693394184112549, "learning_rate": 1.6911065063695037e-05, "loss": 0.694, "step": 8522 }, { "epoch": 27.944262295081966, "grad_norm": 5.064615249633789, "learning_rate": 1.6910297542800662e-05, "loss": 0.8267, "step": 8523 }, { "epoch": 27.947540983606558, "grad_norm": 4.815850257873535, "learning_rate": 1.6909529943985498e-05, "loss": 0.7886, "step": 8524 }, { "epoch": 27.950819672131146, "grad_norm": 5.407998561859131, "learning_rate": 1.69087622672582e-05, "loss": 0.8027, "step": 8525 }, { "epoch": 27.95409836065574, "grad_norm": 6.965697765350342, "learning_rate": 1.6907994512627425e-05, "loss": 0.6808, "step": 8526 }, { "epoch": 27.957377049180327, "grad_norm": 5.849834442138672, "learning_rate": 1.6907226680101833e-05, "loss": 0.7282, "step": 8527 }, { "epoch": 27.96065573770492, "grad_norm": 4.625887870788574, "learning_rate": 1.6906458769690074e-05, "loss": 0.8296, "step": 8528 }, { "epoch": 27.963934426229507, "grad_norm": 6.284618854522705, "learning_rate": 1.6905690781400812e-05, "loss": 0.5335, "step": 8529 }, { "epoch": 27.9672131147541, "grad_norm": 4.050642013549805, "learning_rate": 1.6904922715242703e-05, "loss": 0.8783, "step": 8530 }, { "epoch": 27.970491803278687, "grad_norm": 5.186222553253174, "learning_rate": 1.6904154571224417e-05, "loss": 0.895, "step": 8531 }, { "epoch": 27.97377049180328, "grad_norm": 5.007858753204346, "learning_rate": 1.6903386349354605e-05, "loss": 0.5646, "step": 8532 }, { "epoch": 27.977049180327867, "grad_norm": 4.585184574127197, "learning_rate": 1.6902618049641937e-05, "loss": 0.4325, "step": 8533 }, { "epoch": 27.98032786885246, "grad_norm": 5.3217644691467285, "learning_rate": 1.6901849672095072e-05, "loss": 0.936, "step": 8534 }, { "epoch": 27.983606557377048, "grad_norm": 8.464008331298828, "learning_rate": 1.6901081216722678e-05, "loss": 0.8782, "step": 8535 }, { "epoch": 27.98688524590164, "grad_norm": 5.472362995147705, "learning_rate": 1.6900312683533414e-05, "loss": 0.7214, "step": 8536 }, { "epoch": 27.990163934426228, "grad_norm": 5.677659034729004, "learning_rate": 1.6899544072535955e-05, "loss": 0.5694, "step": 8537 }, { "epoch": 27.99344262295082, "grad_norm": 4.916808128356934, "learning_rate": 1.689877538373896e-05, "loss": 0.7826, "step": 8538 }, { "epoch": 27.99672131147541, "grad_norm": 5.562504291534424, "learning_rate": 1.68980066171511e-05, "loss": 0.7297, "step": 8539 }, { "epoch": 28.0, "grad_norm": 6.4071044921875, "learning_rate": 1.6897237772781046e-05, "loss": 0.7744, "step": 8540 }, { "epoch": 28.003278688524592, "grad_norm": 7.794445037841797, "learning_rate": 1.689646885063746e-05, "loss": 0.5608, "step": 8541 }, { "epoch": 28.00655737704918, "grad_norm": 6.207058429718018, "learning_rate": 1.689569985072902e-05, "loss": 0.6699, "step": 8542 }, { "epoch": 28.009836065573772, "grad_norm": 4.898711204528809, "learning_rate": 1.6894930773064394e-05, "loss": 0.5244, "step": 8543 }, { "epoch": 28.01311475409836, "grad_norm": 4.94246244430542, "learning_rate": 1.6894161617652253e-05, "loss": 0.5922, "step": 8544 }, { "epoch": 28.016393442622952, "grad_norm": 4.520877361297607, "learning_rate": 1.6893392384501273e-05, "loss": 0.4606, "step": 8545 }, { "epoch": 28.01967213114754, "grad_norm": 5.061443328857422, "learning_rate": 1.6892623073620127e-05, "loss": 0.7352, "step": 8546 }, { "epoch": 28.022950819672133, "grad_norm": 4.4260430335998535, "learning_rate": 1.689185368501749e-05, "loss": 0.6509, "step": 8547 }, { "epoch": 28.02622950819672, "grad_norm": 4.7063469886779785, "learning_rate": 1.6891084218702035e-05, "loss": 0.6913, "step": 8548 }, { "epoch": 28.029508196721313, "grad_norm": 4.459053039550781, "learning_rate": 1.689031467468244e-05, "loss": 0.8152, "step": 8549 }, { "epoch": 28.0327868852459, "grad_norm": 4.775468826293945, "learning_rate": 1.6889545052967384e-05, "loss": 0.5366, "step": 8550 }, { "epoch": 28.036065573770493, "grad_norm": 5.384983062744141, "learning_rate": 1.6888775353565547e-05, "loss": 0.6683, "step": 8551 }, { "epoch": 28.03934426229508, "grad_norm": 6.210578918457031, "learning_rate": 1.68880055764856e-05, "loss": 0.6618, "step": 8552 }, { "epoch": 28.042622950819673, "grad_norm": 5.079370975494385, "learning_rate": 1.688723572173623e-05, "loss": 0.7247, "step": 8553 }, { "epoch": 28.04590163934426, "grad_norm": 4.850236892700195, "learning_rate": 1.688646578932612e-05, "loss": 0.5739, "step": 8554 }, { "epoch": 28.049180327868854, "grad_norm": 5.891907691955566, "learning_rate": 1.6885695779263945e-05, "loss": 0.5762, "step": 8555 }, { "epoch": 28.052459016393442, "grad_norm": 4.372962951660156, "learning_rate": 1.6884925691558393e-05, "loss": 0.6422, "step": 8556 }, { "epoch": 28.055737704918034, "grad_norm": 6.528442859649658, "learning_rate": 1.688415552621814e-05, "loss": 0.7551, "step": 8557 }, { "epoch": 28.059016393442622, "grad_norm": 4.472232818603516, "learning_rate": 1.688338528325188e-05, "loss": 0.6605, "step": 8558 }, { "epoch": 28.062295081967214, "grad_norm": 6.243743419647217, "learning_rate": 1.6882614962668294e-05, "loss": 0.5284, "step": 8559 }, { "epoch": 28.065573770491802, "grad_norm": 4.572780132293701, "learning_rate": 1.6881844564476067e-05, "loss": 0.9535, "step": 8560 }, { "epoch": 28.068852459016394, "grad_norm": 5.617516040802002, "learning_rate": 1.6881074088683888e-05, "loss": 0.9413, "step": 8561 }, { "epoch": 28.072131147540983, "grad_norm": 6.995020866394043, "learning_rate": 1.6880303535300445e-05, "loss": 0.6301, "step": 8562 }, { "epoch": 28.075409836065575, "grad_norm": 5.432850360870361, "learning_rate": 1.6879532904334422e-05, "loss": 0.7005, "step": 8563 }, { "epoch": 28.078688524590163, "grad_norm": 4.3936309814453125, "learning_rate": 1.687876219579452e-05, "loss": 0.4982, "step": 8564 }, { "epoch": 28.081967213114755, "grad_norm": 5.578522205352783, "learning_rate": 1.6877991409689415e-05, "loss": 0.6735, "step": 8565 }, { "epoch": 28.085245901639343, "grad_norm": 5.81766414642334, "learning_rate": 1.687722054602781e-05, "loss": 0.6626, "step": 8566 }, { "epoch": 28.088524590163935, "grad_norm": 8.31134033203125, "learning_rate": 1.6876449604818388e-05, "loss": 0.4818, "step": 8567 }, { "epoch": 28.091803278688523, "grad_norm": 5.12778377532959, "learning_rate": 1.6875678586069853e-05, "loss": 0.7733, "step": 8568 }, { "epoch": 28.095081967213115, "grad_norm": 5.502584934234619, "learning_rate": 1.6874907489790885e-05, "loss": 0.6156, "step": 8569 }, { "epoch": 28.098360655737704, "grad_norm": 5.497848987579346, "learning_rate": 1.6874136315990195e-05, "loss": 0.8016, "step": 8570 }, { "epoch": 28.101639344262296, "grad_norm": 4.43825101852417, "learning_rate": 1.687336506467647e-05, "loss": 0.5964, "step": 8571 }, { "epoch": 28.104918032786884, "grad_norm": 5.507388114929199, "learning_rate": 1.68725937358584e-05, "loss": 0.7234, "step": 8572 }, { "epoch": 28.108196721311476, "grad_norm": 5.8171186447143555, "learning_rate": 1.6871822329544697e-05, "loss": 0.5809, "step": 8573 }, { "epoch": 28.111475409836064, "grad_norm": 14.032148361206055, "learning_rate": 1.687105084574405e-05, "loss": 0.6267, "step": 8574 }, { "epoch": 28.114754098360656, "grad_norm": 4.356039524078369, "learning_rate": 1.687027928446516e-05, "loss": 0.5546, "step": 8575 }, { "epoch": 28.118032786885244, "grad_norm": 5.429539203643799, "learning_rate": 1.6869507645716727e-05, "loss": 0.6534, "step": 8576 }, { "epoch": 28.121311475409836, "grad_norm": 4.577311992645264, "learning_rate": 1.6868735929507453e-05, "loss": 0.6413, "step": 8577 }, { "epoch": 28.124590163934425, "grad_norm": 4.596468448638916, "learning_rate": 1.6867964135846043e-05, "loss": 0.5457, "step": 8578 }, { "epoch": 28.127868852459017, "grad_norm": 4.437417030334473, "learning_rate": 1.6867192264741196e-05, "loss": 0.6562, "step": 8579 }, { "epoch": 28.131147540983605, "grad_norm": 5.27927303314209, "learning_rate": 1.6866420316201614e-05, "loss": 0.53, "step": 8580 }, { "epoch": 28.134426229508197, "grad_norm": 6.212447643280029, "learning_rate": 1.6865648290236007e-05, "loss": 0.5438, "step": 8581 }, { "epoch": 28.137704918032785, "grad_norm": 5.718352794647217, "learning_rate": 1.6864876186853072e-05, "loss": 0.5869, "step": 8582 }, { "epoch": 28.140983606557377, "grad_norm": 5.125409126281738, "learning_rate": 1.6864104006061525e-05, "loss": 0.6832, "step": 8583 }, { "epoch": 28.14426229508197, "grad_norm": 29.050315856933594, "learning_rate": 1.686333174787006e-05, "loss": 0.6259, "step": 8584 }, { "epoch": 28.147540983606557, "grad_norm": 4.7884039878845215, "learning_rate": 1.6862559412287403e-05, "loss": 0.5494, "step": 8585 }, { "epoch": 28.15081967213115, "grad_norm": 5.974005222320557, "learning_rate": 1.6861786999322248e-05, "loss": 0.4482, "step": 8586 }, { "epoch": 28.154098360655738, "grad_norm": 4.702669143676758, "learning_rate": 1.6861014508983313e-05, "loss": 0.7932, "step": 8587 }, { "epoch": 28.15737704918033, "grad_norm": 5.989809989929199, "learning_rate": 1.6860241941279305e-05, "loss": 0.5722, "step": 8588 }, { "epoch": 28.160655737704918, "grad_norm": 4.5442681312561035, "learning_rate": 1.6859469296218937e-05, "loss": 0.5487, "step": 8589 }, { "epoch": 28.16393442622951, "grad_norm": 4.915657997131348, "learning_rate": 1.6858696573810917e-05, "loss": 0.6797, "step": 8590 }, { "epoch": 28.167213114754098, "grad_norm": 5.510461330413818, "learning_rate": 1.6857923774063965e-05, "loss": 0.7418, "step": 8591 }, { "epoch": 28.17049180327869, "grad_norm": 6.888730049133301, "learning_rate": 1.6857150896986795e-05, "loss": 0.7831, "step": 8592 }, { "epoch": 28.17377049180328, "grad_norm": 6.042365074157715, "learning_rate": 1.6856377942588114e-05, "loss": 0.727, "step": 8593 }, { "epoch": 28.17704918032787, "grad_norm": 5.3614606857299805, "learning_rate": 1.6855604910876645e-05, "loss": 0.5931, "step": 8594 }, { "epoch": 28.18032786885246, "grad_norm": 7.027738571166992, "learning_rate": 1.6854831801861102e-05, "loss": 0.5871, "step": 8595 }, { "epoch": 28.18360655737705, "grad_norm": 4.732132911682129, "learning_rate": 1.6854058615550203e-05, "loss": 0.6122, "step": 8596 }, { "epoch": 28.18688524590164, "grad_norm": 5.985917568206787, "learning_rate": 1.6853285351952665e-05, "loss": 0.5991, "step": 8597 }, { "epoch": 28.19016393442623, "grad_norm": 5.152946949005127, "learning_rate": 1.6852512011077213e-05, "loss": 0.7186, "step": 8598 }, { "epoch": 28.19344262295082, "grad_norm": 4.833130836486816, "learning_rate": 1.685173859293256e-05, "loss": 0.6219, "step": 8599 }, { "epoch": 28.19672131147541, "grad_norm": 5.074745178222656, "learning_rate": 1.685096509752743e-05, "loss": 0.7762, "step": 8600 }, { "epoch": 28.2, "grad_norm": 4.79227352142334, "learning_rate": 1.6850191524870548e-05, "loss": 0.4905, "step": 8601 }, { "epoch": 28.20327868852459, "grad_norm": 5.1140594482421875, "learning_rate": 1.6849417874970633e-05, "loss": 0.6129, "step": 8602 }, { "epoch": 28.20655737704918, "grad_norm": 4.886425971984863, "learning_rate": 1.684864414783641e-05, "loss": 0.7462, "step": 8603 }, { "epoch": 28.20983606557377, "grad_norm": 5.906988620758057, "learning_rate": 1.6847870343476603e-05, "loss": 0.9019, "step": 8604 }, { "epoch": 28.21311475409836, "grad_norm": 6.116228103637695, "learning_rate": 1.684709646189994e-05, "loss": 0.5449, "step": 8605 }, { "epoch": 28.21639344262295, "grad_norm": 4.928338050842285, "learning_rate": 1.684632250311514e-05, "loss": 0.6642, "step": 8606 }, { "epoch": 28.21967213114754, "grad_norm": 7.14915657043457, "learning_rate": 1.684554846713094e-05, "loss": 0.7365, "step": 8607 }, { "epoch": 28.222950819672132, "grad_norm": 136.8570556640625, "learning_rate": 1.6844774353956062e-05, "loss": 0.5267, "step": 8608 }, { "epoch": 28.22622950819672, "grad_norm": 4.760806083679199, "learning_rate": 1.6844000163599237e-05, "loss": 0.6897, "step": 8609 }, { "epoch": 28.229508196721312, "grad_norm": 5.644662380218506, "learning_rate": 1.6843225896069192e-05, "loss": 0.4965, "step": 8610 }, { "epoch": 28.2327868852459, "grad_norm": 6.109655857086182, "learning_rate": 1.6842451551374663e-05, "loss": 0.9078, "step": 8611 }, { "epoch": 28.236065573770492, "grad_norm": 9.935291290283203, "learning_rate": 1.6841677129524377e-05, "loss": 0.7294, "step": 8612 }, { "epoch": 28.23934426229508, "grad_norm": 19.75516700744629, "learning_rate": 1.6840902630527066e-05, "loss": 0.7067, "step": 8613 }, { "epoch": 28.242622950819673, "grad_norm": 27.235858917236328, "learning_rate": 1.6840128054391467e-05, "loss": 1.0599, "step": 8614 }, { "epoch": 28.24590163934426, "grad_norm": 8.570258140563965, "learning_rate": 1.683935340112631e-05, "loss": 0.9196, "step": 8615 }, { "epoch": 28.249180327868853, "grad_norm": 10.809975624084473, "learning_rate": 1.6838578670740337e-05, "loss": 0.7665, "step": 8616 }, { "epoch": 28.25245901639344, "grad_norm": 9.797262191772461, "learning_rate": 1.6837803863242276e-05, "loss": 1.0055, "step": 8617 }, { "epoch": 28.255737704918033, "grad_norm": 7.102574348449707, "learning_rate": 1.683702897864087e-05, "loss": 0.8698, "step": 8618 }, { "epoch": 28.25901639344262, "grad_norm": 8.723477363586426, "learning_rate": 1.6836254016944848e-05, "loss": 0.635, "step": 8619 }, { "epoch": 28.262295081967213, "grad_norm": 6.848689556121826, "learning_rate": 1.683547897816296e-05, "loss": 0.6383, "step": 8620 }, { "epoch": 28.2655737704918, "grad_norm": 29.970434188842773, "learning_rate": 1.6834703862303933e-05, "loss": 0.6806, "step": 8621 }, { "epoch": 28.268852459016394, "grad_norm": 6.731567859649658, "learning_rate": 1.683392866937652e-05, "loss": 0.9146, "step": 8622 }, { "epoch": 28.272131147540982, "grad_norm": 8.036831855773926, "learning_rate": 1.683315339938945e-05, "loss": 0.7399, "step": 8623 }, { "epoch": 28.275409836065574, "grad_norm": 18.349817276000977, "learning_rate": 1.6832378052351472e-05, "loss": 0.7282, "step": 8624 }, { "epoch": 28.278688524590162, "grad_norm": 6.1081624031066895, "learning_rate": 1.6831602628271332e-05, "loss": 0.8846, "step": 8625 }, { "epoch": 28.281967213114754, "grad_norm": 4.73009729385376, "learning_rate": 1.6830827127157764e-05, "loss": 0.9639, "step": 8626 }, { "epoch": 28.285245901639342, "grad_norm": 5.879464626312256, "learning_rate": 1.683005154901952e-05, "loss": 0.9005, "step": 8627 }, { "epoch": 28.288524590163934, "grad_norm": 5.080660343170166, "learning_rate": 1.682927589386534e-05, "loss": 0.7502, "step": 8628 }, { "epoch": 28.291803278688526, "grad_norm": 5.595675468444824, "learning_rate": 1.6828500161703983e-05, "loss": 1.0026, "step": 8629 }, { "epoch": 28.295081967213115, "grad_norm": 8.062348365783691, "learning_rate": 1.682772435254418e-05, "loss": 1.0272, "step": 8630 }, { "epoch": 28.298360655737707, "grad_norm": 5.271210670471191, "learning_rate": 1.6826948466394685e-05, "loss": 0.8468, "step": 8631 }, { "epoch": 28.301639344262295, "grad_norm": 4.757280349731445, "learning_rate": 1.682617250326425e-05, "loss": 0.8786, "step": 8632 }, { "epoch": 28.304918032786887, "grad_norm": 6.063356876373291, "learning_rate": 1.6825396463161623e-05, "loss": 0.6019, "step": 8633 }, { "epoch": 28.308196721311475, "grad_norm": 8.66766357421875, "learning_rate": 1.682462034609555e-05, "loss": 0.8302, "step": 8634 }, { "epoch": 28.311475409836067, "grad_norm": 4.661159515380859, "learning_rate": 1.6823844152074795e-05, "loss": 0.918, "step": 8635 }, { "epoch": 28.314754098360655, "grad_norm": 6.1775689125061035, "learning_rate": 1.6823067881108095e-05, "loss": 0.7206, "step": 8636 }, { "epoch": 28.318032786885247, "grad_norm": 5.858091354370117, "learning_rate": 1.6822291533204213e-05, "loss": 0.6843, "step": 8637 }, { "epoch": 28.321311475409836, "grad_norm": 6.200379848480225, "learning_rate": 1.6821515108371898e-05, "loss": 0.7244, "step": 8638 }, { "epoch": 28.324590163934428, "grad_norm": 6.946844100952148, "learning_rate": 1.682073860661991e-05, "loss": 0.8973, "step": 8639 }, { "epoch": 28.327868852459016, "grad_norm": 5.355607032775879, "learning_rate": 1.6819962027957004e-05, "loss": 0.8128, "step": 8640 }, { "epoch": 28.331147540983608, "grad_norm": 5.5454277992248535, "learning_rate": 1.6819185372391932e-05, "loss": 0.7305, "step": 8641 }, { "epoch": 28.334426229508196, "grad_norm": 6.083963871002197, "learning_rate": 1.6818408639933456e-05, "loss": 0.6982, "step": 8642 }, { "epoch": 28.337704918032788, "grad_norm": 4.547393798828125, "learning_rate": 1.6817631830590335e-05, "loss": 0.8145, "step": 8643 }, { "epoch": 28.340983606557376, "grad_norm": 5.304600715637207, "learning_rate": 1.6816854944371326e-05, "loss": 0.6808, "step": 8644 }, { "epoch": 28.34426229508197, "grad_norm": 6.306334972381592, "learning_rate": 1.6816077981285186e-05, "loss": 0.6719, "step": 8645 }, { "epoch": 28.347540983606557, "grad_norm": 4.864986419677734, "learning_rate": 1.6815300941340686e-05, "loss": 0.8154, "step": 8646 }, { "epoch": 28.35081967213115, "grad_norm": 8.128622055053711, "learning_rate": 1.6814523824546577e-05, "loss": 0.7134, "step": 8647 }, { "epoch": 28.354098360655737, "grad_norm": 5.908952713012695, "learning_rate": 1.681374663091163e-05, "loss": 0.6862, "step": 8648 }, { "epoch": 28.35737704918033, "grad_norm": 5.92065954208374, "learning_rate": 1.68129693604446e-05, "loss": 0.6938, "step": 8649 }, { "epoch": 28.360655737704917, "grad_norm": 5.125844955444336, "learning_rate": 1.6812192013154262e-05, "loss": 0.6066, "step": 8650 }, { "epoch": 28.36393442622951, "grad_norm": 4.756032466888428, "learning_rate": 1.681141458904937e-05, "loss": 0.7098, "step": 8651 }, { "epoch": 28.367213114754097, "grad_norm": 5.498105049133301, "learning_rate": 1.6810637088138704e-05, "loss": 0.7362, "step": 8652 }, { "epoch": 28.37049180327869, "grad_norm": 5.078878402709961, "learning_rate": 1.680985951043102e-05, "loss": 0.7594, "step": 8653 }, { "epoch": 28.373770491803278, "grad_norm": 4.113529682159424, "learning_rate": 1.6809081855935087e-05, "loss": 1.0751, "step": 8654 }, { "epoch": 28.37704918032787, "grad_norm": 6.102570533752441, "learning_rate": 1.680830412465968e-05, "loss": 0.7655, "step": 8655 }, { "epoch": 28.380327868852458, "grad_norm": 5.726990699768066, "learning_rate": 1.6807526316613562e-05, "loss": 0.791, "step": 8656 }, { "epoch": 28.38360655737705, "grad_norm": 5.955456256866455, "learning_rate": 1.6806748431805512e-05, "loss": 0.7503, "step": 8657 }, { "epoch": 28.386885245901638, "grad_norm": 5.1357269287109375, "learning_rate": 1.680597047024429e-05, "loss": 0.7102, "step": 8658 }, { "epoch": 28.39016393442623, "grad_norm": 4.940741062164307, "learning_rate": 1.6805192431938676e-05, "loss": 0.8067, "step": 8659 }, { "epoch": 28.39344262295082, "grad_norm": 5.030428886413574, "learning_rate": 1.6804414316897448e-05, "loss": 0.8006, "step": 8660 }, { "epoch": 28.39672131147541, "grad_norm": 6.007197856903076, "learning_rate": 1.6803636125129366e-05, "loss": 0.6674, "step": 8661 }, { "epoch": 28.4, "grad_norm": 4.99419641494751, "learning_rate": 1.6802857856643214e-05, "loss": 0.7064, "step": 8662 }, { "epoch": 28.40327868852459, "grad_norm": 6.148425102233887, "learning_rate": 1.6802079511447772e-05, "loss": 0.618, "step": 8663 }, { "epoch": 28.40655737704918, "grad_norm": 4.735400676727295, "learning_rate": 1.6801301089551803e-05, "loss": 0.7009, "step": 8664 }, { "epoch": 28.40983606557377, "grad_norm": 6.514192581176758, "learning_rate": 1.68005225909641e-05, "loss": 0.6042, "step": 8665 }, { "epoch": 28.41311475409836, "grad_norm": 4.214549541473389, "learning_rate": 1.6799744015693428e-05, "loss": 0.6729, "step": 8666 }, { "epoch": 28.41639344262295, "grad_norm": 4.509930610656738, "learning_rate": 1.679896536374858e-05, "loss": 0.8079, "step": 8667 }, { "epoch": 28.41967213114754, "grad_norm": 86.92931365966797, "learning_rate": 1.679818663513832e-05, "loss": 0.6829, "step": 8668 }, { "epoch": 28.42295081967213, "grad_norm": 5.197043418884277, "learning_rate": 1.679740782987144e-05, "loss": 0.856, "step": 8669 }, { "epoch": 28.42622950819672, "grad_norm": 5.3410844802856445, "learning_rate": 1.679662894795672e-05, "loss": 0.5806, "step": 8670 }, { "epoch": 28.42950819672131, "grad_norm": 6.064105987548828, "learning_rate": 1.6795849989402943e-05, "loss": 0.5936, "step": 8671 }, { "epoch": 28.432786885245903, "grad_norm": 6.425778865814209, "learning_rate": 1.6795070954218888e-05, "loss": 0.8825, "step": 8672 }, { "epoch": 28.43606557377049, "grad_norm": 5.042659759521484, "learning_rate": 1.6794291842413346e-05, "loss": 0.7243, "step": 8673 }, { "epoch": 28.439344262295084, "grad_norm": 5.613293647766113, "learning_rate": 1.67935126539951e-05, "loss": 0.7795, "step": 8674 }, { "epoch": 28.442622950819672, "grad_norm": 5.960788726806641, "learning_rate": 1.679273338897293e-05, "loss": 0.6923, "step": 8675 }, { "epoch": 28.445901639344264, "grad_norm": 9.940940856933594, "learning_rate": 1.6791954047355635e-05, "loss": 0.6529, "step": 8676 }, { "epoch": 28.449180327868852, "grad_norm": 6.420094966888428, "learning_rate": 1.6791174629151995e-05, "loss": 0.6014, "step": 8677 }, { "epoch": 28.452459016393444, "grad_norm": 5.652757167816162, "learning_rate": 1.6790395134370797e-05, "loss": 0.8932, "step": 8678 }, { "epoch": 28.455737704918032, "grad_norm": 7.293579578399658, "learning_rate": 1.6789615563020832e-05, "loss": 0.9008, "step": 8679 }, { "epoch": 28.459016393442624, "grad_norm": 5.469519138336182, "learning_rate": 1.67888359151109e-05, "loss": 0.7059, "step": 8680 }, { "epoch": 28.462295081967213, "grad_norm": 7.140987873077393, "learning_rate": 1.678805619064978e-05, "loss": 0.6979, "step": 8681 }, { "epoch": 28.465573770491805, "grad_norm": 7.377917766571045, "learning_rate": 1.6787276389646264e-05, "loss": 0.6304, "step": 8682 }, { "epoch": 28.468852459016393, "grad_norm": 6.494983196258545, "learning_rate": 1.6786496512109156e-05, "loss": 0.7167, "step": 8683 }, { "epoch": 28.472131147540985, "grad_norm": 13.036919593811035, "learning_rate": 1.6785716558047242e-05, "loss": 0.6837, "step": 8684 }, { "epoch": 28.475409836065573, "grad_norm": 6.339929103851318, "learning_rate": 1.6784936527469318e-05, "loss": 0.6336, "step": 8685 }, { "epoch": 28.478688524590165, "grad_norm": 6.5237040519714355, "learning_rate": 1.678415642038418e-05, "loss": 0.7094, "step": 8686 }, { "epoch": 28.481967213114753, "grad_norm": 4.722304821014404, "learning_rate": 1.6783376236800624e-05, "loss": 0.7856, "step": 8687 }, { "epoch": 28.485245901639345, "grad_norm": 4.491647243499756, "learning_rate": 1.6782595976727448e-05, "loss": 0.7168, "step": 8688 }, { "epoch": 28.488524590163934, "grad_norm": 5.256954193115234, "learning_rate": 1.6781815640173448e-05, "loss": 0.5438, "step": 8689 }, { "epoch": 28.491803278688526, "grad_norm": 5.234504699707031, "learning_rate": 1.678103522714743e-05, "loss": 0.8115, "step": 8690 }, { "epoch": 28.495081967213114, "grad_norm": 5.494527339935303, "learning_rate": 1.678025473765819e-05, "loss": 0.679, "step": 8691 }, { "epoch": 28.498360655737706, "grad_norm": 8.424992561340332, "learning_rate": 1.6779474171714524e-05, "loss": 0.7326, "step": 8692 }, { "epoch": 28.501639344262294, "grad_norm": 4.705105304718018, "learning_rate": 1.6778693529325237e-05, "loss": 0.7458, "step": 8693 }, { "epoch": 28.504918032786886, "grad_norm": 5.510371208190918, "learning_rate": 1.6777912810499136e-05, "loss": 1.0709, "step": 8694 }, { "epoch": 28.508196721311474, "grad_norm": 5.350783348083496, "learning_rate": 1.6777132015245017e-05, "loss": 0.8231, "step": 8695 }, { "epoch": 28.511475409836066, "grad_norm": 5.531411170959473, "learning_rate": 1.6776351143571695e-05, "loss": 0.7432, "step": 8696 }, { "epoch": 28.514754098360655, "grad_norm": 7.6439948081970215, "learning_rate": 1.677557019548796e-05, "loss": 0.6584, "step": 8697 }, { "epoch": 28.518032786885247, "grad_norm": 4.918362617492676, "learning_rate": 1.6774789171002634e-05, "loss": 0.7631, "step": 8698 }, { "epoch": 28.521311475409835, "grad_norm": 9.010960578918457, "learning_rate": 1.677400807012451e-05, "loss": 0.7256, "step": 8699 }, { "epoch": 28.524590163934427, "grad_norm": 5.141944885253906, "learning_rate": 1.6773226892862405e-05, "loss": 0.7031, "step": 8700 }, { "epoch": 28.527868852459015, "grad_norm": 7.613285541534424, "learning_rate": 1.6772445639225125e-05, "loss": 0.6533, "step": 8701 }, { "epoch": 28.531147540983607, "grad_norm": 5.870441436767578, "learning_rate": 1.6771664309221475e-05, "loss": 0.6477, "step": 8702 }, { "epoch": 28.534426229508195, "grad_norm": 5.344351768493652, "learning_rate": 1.6770882902860272e-05, "loss": 0.8213, "step": 8703 }, { "epoch": 28.537704918032787, "grad_norm": 7.646379470825195, "learning_rate": 1.6770101420150324e-05, "loss": 0.6338, "step": 8704 }, { "epoch": 28.540983606557376, "grad_norm": 6.254685878753662, "learning_rate": 1.676931986110044e-05, "loss": 0.7971, "step": 8705 }, { "epoch": 28.544262295081968, "grad_norm": 5.078038215637207, "learning_rate": 1.676853822571944e-05, "loss": 0.5635, "step": 8706 }, { "epoch": 28.547540983606556, "grad_norm": 5.131629943847656, "learning_rate": 1.6767756514016132e-05, "loss": 0.7124, "step": 8707 }, { "epoch": 28.550819672131148, "grad_norm": 5.629550457000732, "learning_rate": 1.6766974725999338e-05, "loss": 0.6844, "step": 8708 }, { "epoch": 28.554098360655736, "grad_norm": 5.549307823181152, "learning_rate": 1.6766192861677863e-05, "loss": 0.5933, "step": 8709 }, { "epoch": 28.557377049180328, "grad_norm": 5.689754962921143, "learning_rate": 1.676541092106053e-05, "loss": 0.9534, "step": 8710 }, { "epoch": 28.560655737704916, "grad_norm": 5.103662967681885, "learning_rate": 1.6764628904156153e-05, "loss": 0.6491, "step": 8711 }, { "epoch": 28.56393442622951, "grad_norm": 5.843778133392334, "learning_rate": 1.676384681097355e-05, "loss": 0.5555, "step": 8712 }, { "epoch": 28.567213114754097, "grad_norm": 5.183019161224365, "learning_rate": 1.6763064641521548e-05, "loss": 0.6151, "step": 8713 }, { "epoch": 28.57049180327869, "grad_norm": 6.139160633087158, "learning_rate": 1.6762282395808956e-05, "loss": 0.5042, "step": 8714 }, { "epoch": 28.57377049180328, "grad_norm": 5.835981845855713, "learning_rate": 1.67615000738446e-05, "loss": 0.5733, "step": 8715 }, { "epoch": 28.57704918032787, "grad_norm": 13.809354782104492, "learning_rate": 1.6760717675637298e-05, "loss": 0.5846, "step": 8716 }, { "epoch": 28.58032786885246, "grad_norm": 5.704048156738281, "learning_rate": 1.675993520119588e-05, "loss": 0.7189, "step": 8717 }, { "epoch": 28.58360655737705, "grad_norm": 5.21662712097168, "learning_rate": 1.675915265052916e-05, "loss": 0.802, "step": 8718 }, { "epoch": 28.58688524590164, "grad_norm": 6.762964725494385, "learning_rate": 1.6758370023645968e-05, "loss": 0.6182, "step": 8719 }, { "epoch": 28.59016393442623, "grad_norm": 5.146982669830322, "learning_rate": 1.6757587320555124e-05, "loss": 0.7226, "step": 8720 }, { "epoch": 28.59344262295082, "grad_norm": 5.118579864501953, "learning_rate": 1.675680454126546e-05, "loss": 1.028, "step": 8721 }, { "epoch": 28.59672131147541, "grad_norm": 5.000270366668701, "learning_rate": 1.6756021685785797e-05, "loss": 0.6436, "step": 8722 }, { "epoch": 28.6, "grad_norm": 5.153162479400635, "learning_rate": 1.6755238754124965e-05, "loss": 0.6245, "step": 8723 }, { "epoch": 28.60327868852459, "grad_norm": 7.330872535705566, "learning_rate": 1.6754455746291792e-05, "loss": 0.6233, "step": 8724 }, { "epoch": 28.60655737704918, "grad_norm": 5.251475811004639, "learning_rate": 1.675367266229511e-05, "loss": 0.7696, "step": 8725 }, { "epoch": 28.60983606557377, "grad_norm": 5.4676690101623535, "learning_rate": 1.6752889502143747e-05, "loss": 0.5996, "step": 8726 }, { "epoch": 28.613114754098362, "grad_norm": 5.996411323547363, "learning_rate": 1.6752106265846533e-05, "loss": 0.8712, "step": 8727 }, { "epoch": 28.61639344262295, "grad_norm": 4.802218914031982, "learning_rate": 1.67513229534123e-05, "loss": 0.5993, "step": 8728 }, { "epoch": 28.619672131147542, "grad_norm": 4.078405380249023, "learning_rate": 1.675053956484988e-05, "loss": 0.751, "step": 8729 }, { "epoch": 28.62295081967213, "grad_norm": 5.35197114944458, "learning_rate": 1.6749756100168107e-05, "loss": 0.728, "step": 8730 }, { "epoch": 28.626229508196722, "grad_norm": 5.444883346557617, "learning_rate": 1.674897255937582e-05, "loss": 0.7486, "step": 8731 }, { "epoch": 28.62950819672131, "grad_norm": 4.667680263519287, "learning_rate": 1.6748188942481848e-05, "loss": 0.7999, "step": 8732 }, { "epoch": 28.632786885245903, "grad_norm": 4.937647342681885, "learning_rate": 1.674740524949503e-05, "loss": 0.6868, "step": 8733 }, { "epoch": 28.63606557377049, "grad_norm": 5.050197601318359, "learning_rate": 1.67466214804242e-05, "loss": 0.6371, "step": 8734 }, { "epoch": 28.639344262295083, "grad_norm": 5.005286693572998, "learning_rate": 1.67458376352782e-05, "loss": 0.8475, "step": 8735 }, { "epoch": 28.64262295081967, "grad_norm": 4.603224754333496, "learning_rate": 1.6745053714065866e-05, "loss": 0.7214, "step": 8736 }, { "epoch": 28.645901639344263, "grad_norm": 4.030297756195068, "learning_rate": 1.674426971679604e-05, "loss": 0.8433, "step": 8737 }, { "epoch": 28.64918032786885, "grad_norm": 4.900867938995361, "learning_rate": 1.6743485643477556e-05, "loss": 0.7245, "step": 8738 }, { "epoch": 28.652459016393443, "grad_norm": 5.810232162475586, "learning_rate": 1.6742701494119266e-05, "loss": 0.735, "step": 8739 }, { "epoch": 28.65573770491803, "grad_norm": 5.558290004730225, "learning_rate": 1.674191726873e-05, "loss": 0.6533, "step": 8740 }, { "epoch": 28.659016393442624, "grad_norm": 5.174046039581299, "learning_rate": 1.674113296731861e-05, "loss": 0.9177, "step": 8741 }, { "epoch": 28.662295081967212, "grad_norm": 6.288798809051514, "learning_rate": 1.674034858989394e-05, "loss": 0.632, "step": 8742 }, { "epoch": 28.665573770491804, "grad_norm": 5.691580295562744, "learning_rate": 1.6739564136464827e-05, "loss": 0.7579, "step": 8743 }, { "epoch": 28.668852459016392, "grad_norm": 4.904033184051514, "learning_rate": 1.673877960704012e-05, "loss": 0.8091, "step": 8744 }, { "epoch": 28.672131147540984, "grad_norm": 4.883443832397461, "learning_rate": 1.673799500162867e-05, "loss": 0.6593, "step": 8745 }, { "epoch": 28.675409836065572, "grad_norm": 4.83967924118042, "learning_rate": 1.6737210320239322e-05, "loss": 0.7886, "step": 8746 }, { "epoch": 28.678688524590164, "grad_norm": 4.678777694702148, "learning_rate": 1.6736425562880918e-05, "loss": 0.7133, "step": 8747 }, { "epoch": 28.681967213114753, "grad_norm": 6.192256450653076, "learning_rate": 1.6735640729562314e-05, "loss": 0.9326, "step": 8748 }, { "epoch": 28.685245901639345, "grad_norm": 5.110138893127441, "learning_rate": 1.6734855820292356e-05, "loss": 0.6783, "step": 8749 }, { "epoch": 28.688524590163933, "grad_norm": 4.141746520996094, "learning_rate": 1.6734070835079897e-05, "loss": 0.6863, "step": 8750 }, { "epoch": 28.691803278688525, "grad_norm": 5.370892524719238, "learning_rate": 1.673328577393379e-05, "loss": 0.6494, "step": 8751 }, { "epoch": 28.695081967213113, "grad_norm": 4.932370662689209, "learning_rate": 1.6732500636862883e-05, "loss": 0.6894, "step": 8752 }, { "epoch": 28.698360655737705, "grad_norm": 5.372616767883301, "learning_rate": 1.6731715423876028e-05, "loss": 0.9564, "step": 8753 }, { "epoch": 28.701639344262293, "grad_norm": 5.464081764221191, "learning_rate": 1.6730930134982088e-05, "loss": 0.6098, "step": 8754 }, { "epoch": 28.704918032786885, "grad_norm": 5.779255390167236, "learning_rate": 1.673014477018991e-05, "loss": 0.9091, "step": 8755 }, { "epoch": 28.708196721311474, "grad_norm": 4.752089023590088, "learning_rate": 1.6729359329508353e-05, "loss": 0.472, "step": 8756 }, { "epoch": 28.711475409836066, "grad_norm": 5.370855331420898, "learning_rate": 1.672857381294627e-05, "loss": 0.6753, "step": 8757 }, { "epoch": 28.714754098360658, "grad_norm": 5.0373711585998535, "learning_rate": 1.6727788220512522e-05, "loss": 0.7908, "step": 8758 }, { "epoch": 28.718032786885246, "grad_norm": 7.2506513595581055, "learning_rate": 1.672700255221597e-05, "loss": 0.6456, "step": 8759 }, { "epoch": 28.721311475409838, "grad_norm": 5.3920512199401855, "learning_rate": 1.6726216808065467e-05, "loss": 0.9053, "step": 8760 }, { "epoch": 28.724590163934426, "grad_norm": 5.1584038734436035, "learning_rate": 1.6725430988069875e-05, "loss": 0.6984, "step": 8761 }, { "epoch": 28.727868852459018, "grad_norm": 6.060545444488525, "learning_rate": 1.6724645092238058e-05, "loss": 0.9852, "step": 8762 }, { "epoch": 28.731147540983606, "grad_norm": 6.887720584869385, "learning_rate": 1.6723859120578873e-05, "loss": 0.7037, "step": 8763 }, { "epoch": 28.7344262295082, "grad_norm": 4.908333778381348, "learning_rate": 1.672307307310119e-05, "loss": 0.7704, "step": 8764 }, { "epoch": 28.737704918032787, "grad_norm": 5.019277572631836, "learning_rate": 1.6722286949813866e-05, "loss": 0.629, "step": 8765 }, { "epoch": 28.74098360655738, "grad_norm": 5.047391414642334, "learning_rate": 1.6721500750725764e-05, "loss": 0.8376, "step": 8766 }, { "epoch": 28.744262295081967, "grad_norm": 5.307724952697754, "learning_rate": 1.6720714475845755e-05, "loss": 0.7607, "step": 8767 }, { "epoch": 28.74754098360656, "grad_norm": 4.3843302726745605, "learning_rate": 1.6719928125182703e-05, "loss": 0.8099, "step": 8768 }, { "epoch": 28.750819672131147, "grad_norm": 4.981196403503418, "learning_rate": 1.6719141698745477e-05, "loss": 0.8196, "step": 8769 }, { "epoch": 28.75409836065574, "grad_norm": 14.914430618286133, "learning_rate": 1.6718355196542936e-05, "loss": 0.4936, "step": 8770 }, { "epoch": 28.757377049180327, "grad_norm": 4.9632110595703125, "learning_rate": 1.671756861858396e-05, "loss": 0.5663, "step": 8771 }, { "epoch": 28.76065573770492, "grad_norm": 5.571085453033447, "learning_rate": 1.6716781964877413e-05, "loss": 0.6293, "step": 8772 }, { "epoch": 28.763934426229508, "grad_norm": 4.816007614135742, "learning_rate": 1.671599523543216e-05, "loss": 0.5533, "step": 8773 }, { "epoch": 28.7672131147541, "grad_norm": 4.666957378387451, "learning_rate": 1.6715208430257085e-05, "loss": 0.7516, "step": 8774 }, { "epoch": 28.770491803278688, "grad_norm": 5.124387741088867, "learning_rate": 1.6714421549361048e-05, "loss": 0.5744, "step": 8775 }, { "epoch": 28.77377049180328, "grad_norm": 5.261018753051758, "learning_rate": 1.6713634592752926e-05, "loss": 0.5455, "step": 8776 }, { "epoch": 28.777049180327868, "grad_norm": 4.843128204345703, "learning_rate": 1.6712847560441598e-05, "loss": 0.7592, "step": 8777 }, { "epoch": 28.78032786885246, "grad_norm": 6.817595481872559, "learning_rate": 1.6712060452435933e-05, "loss": 0.7626, "step": 8778 }, { "epoch": 28.78360655737705, "grad_norm": 8.03968620300293, "learning_rate": 1.6711273268744804e-05, "loss": 0.7307, "step": 8779 }, { "epoch": 28.78688524590164, "grad_norm": 6.566368579864502, "learning_rate": 1.6710486009377092e-05, "loss": 0.7329, "step": 8780 }, { "epoch": 28.79016393442623, "grad_norm": 5.4596028327941895, "learning_rate": 1.6709698674341677e-05, "loss": 0.6125, "step": 8781 }, { "epoch": 28.79344262295082, "grad_norm": 4.983172416687012, "learning_rate": 1.6708911263647433e-05, "loss": 0.774, "step": 8782 }, { "epoch": 28.79672131147541, "grad_norm": 4.417037010192871, "learning_rate": 1.6708123777303233e-05, "loss": 0.6825, "step": 8783 }, { "epoch": 28.8, "grad_norm": 6.196507453918457, "learning_rate": 1.6707336215317968e-05, "loss": 0.5017, "step": 8784 }, { "epoch": 28.80327868852459, "grad_norm": 6.285959243774414, "learning_rate": 1.6706548577700514e-05, "loss": 0.9103, "step": 8785 }, { "epoch": 28.80655737704918, "grad_norm": 5.399222373962402, "learning_rate": 1.6705760864459748e-05, "loss": 0.6865, "step": 8786 }, { "epoch": 28.80983606557377, "grad_norm": 8.547877311706543, "learning_rate": 1.6704973075604558e-05, "loss": 0.5938, "step": 8787 }, { "epoch": 28.81311475409836, "grad_norm": 4.398695468902588, "learning_rate": 1.6704185211143824e-05, "loss": 0.4603, "step": 8788 }, { "epoch": 28.81639344262295, "grad_norm": 5.289072036743164, "learning_rate": 1.670339727108643e-05, "loss": 0.7223, "step": 8789 }, { "epoch": 28.81967213114754, "grad_norm": 5.100119113922119, "learning_rate": 1.6702609255441267e-05, "loss": 0.6343, "step": 8790 }, { "epoch": 28.82295081967213, "grad_norm": 4.667019844055176, "learning_rate": 1.6701821164217212e-05, "loss": 0.7587, "step": 8791 }, { "epoch": 28.82622950819672, "grad_norm": 4.859954357147217, "learning_rate": 1.6701032997423155e-05, "loss": 0.8521, "step": 8792 }, { "epoch": 28.82950819672131, "grad_norm": 4.49280309677124, "learning_rate": 1.670024475506798e-05, "loss": 0.5745, "step": 8793 }, { "epoch": 28.832786885245902, "grad_norm": 5.399994850158691, "learning_rate": 1.6699456437160587e-05, "loss": 0.7032, "step": 8794 }, { "epoch": 28.83606557377049, "grad_norm": 4.087086200714111, "learning_rate": 1.6698668043709854e-05, "loss": 0.8344, "step": 8795 }, { "epoch": 28.839344262295082, "grad_norm": 4.961815357208252, "learning_rate": 1.6697879574724673e-05, "loss": 0.9656, "step": 8796 }, { "epoch": 28.84262295081967, "grad_norm": 5.018675327301025, "learning_rate": 1.6697091030213935e-05, "loss": 0.5821, "step": 8797 }, { "epoch": 28.845901639344262, "grad_norm": 5.860288619995117, "learning_rate": 1.6696302410186533e-05, "loss": 0.5168, "step": 8798 }, { "epoch": 28.84918032786885, "grad_norm": 7.416215419769287, "learning_rate": 1.669551371465136e-05, "loss": 0.6948, "step": 8799 }, { "epoch": 28.852459016393443, "grad_norm": 5.518752098083496, "learning_rate": 1.6694724943617306e-05, "loss": 0.6778, "step": 8800 }, { "epoch": 28.855737704918035, "grad_norm": 5.004767894744873, "learning_rate": 1.669393609709327e-05, "loss": 0.6079, "step": 8801 }, { "epoch": 28.859016393442623, "grad_norm": 5.362035751342773, "learning_rate": 1.6693147175088144e-05, "loss": 0.8064, "step": 8802 }, { "epoch": 28.862295081967215, "grad_norm": 5.075319766998291, "learning_rate": 1.6692358177610823e-05, "loss": 0.7627, "step": 8803 }, { "epoch": 28.865573770491803, "grad_norm": 5.447176933288574, "learning_rate": 1.6691569104670206e-05, "loss": 0.6728, "step": 8804 }, { "epoch": 28.868852459016395, "grad_norm": 5.027618885040283, "learning_rate": 1.669077995627519e-05, "loss": 0.6969, "step": 8805 }, { "epoch": 28.872131147540983, "grad_norm": 6.889418601989746, "learning_rate": 1.6689990732434672e-05, "loss": 0.5056, "step": 8806 }, { "epoch": 28.875409836065575, "grad_norm": 21.37763786315918, "learning_rate": 1.6689201433157554e-05, "loss": 0.8239, "step": 8807 }, { "epoch": 28.878688524590164, "grad_norm": 5.379560947418213, "learning_rate": 1.6688412058452738e-05, "loss": 0.5895, "step": 8808 }, { "epoch": 28.881967213114756, "grad_norm": 5.147031307220459, "learning_rate": 1.6687622608329118e-05, "loss": 0.6749, "step": 8809 }, { "epoch": 28.885245901639344, "grad_norm": 4.847590446472168, "learning_rate": 1.6686833082795598e-05, "loss": 0.5904, "step": 8810 }, { "epoch": 28.888524590163936, "grad_norm": 5.983795166015625, "learning_rate": 1.6686043481861086e-05, "loss": 0.5966, "step": 8811 }, { "epoch": 28.891803278688524, "grad_norm": 5.605137348175049, "learning_rate": 1.668525380553448e-05, "loss": 0.6472, "step": 8812 }, { "epoch": 28.895081967213116, "grad_norm": 5.896368980407715, "learning_rate": 1.668446405382469e-05, "loss": 0.6637, "step": 8813 }, { "epoch": 28.898360655737704, "grad_norm": 6.079278945922852, "learning_rate": 1.6683674226740613e-05, "loss": 0.8237, "step": 8814 }, { "epoch": 28.901639344262296, "grad_norm": 5.044096946716309, "learning_rate": 1.6682884324291164e-05, "loss": 0.7585, "step": 8815 }, { "epoch": 28.904918032786885, "grad_norm": 5.047567844390869, "learning_rate": 1.6682094346485242e-05, "loss": 0.6232, "step": 8816 }, { "epoch": 28.908196721311477, "grad_norm": 5.200798511505127, "learning_rate": 1.668130429333176e-05, "loss": 0.7338, "step": 8817 }, { "epoch": 28.911475409836065, "grad_norm": 4.860358715057373, "learning_rate": 1.6680514164839624e-05, "loss": 0.7006, "step": 8818 }, { "epoch": 28.914754098360657, "grad_norm": 5.238250255584717, "learning_rate": 1.667972396101775e-05, "loss": 0.6072, "step": 8819 }, { "epoch": 28.918032786885245, "grad_norm": 4.978642463684082, "learning_rate": 1.6678933681875035e-05, "loss": 0.7609, "step": 8820 }, { "epoch": 28.921311475409837, "grad_norm": 4.654418468475342, "learning_rate": 1.6678143327420406e-05, "loss": 0.4917, "step": 8821 }, { "epoch": 28.924590163934425, "grad_norm": 4.9593634605407715, "learning_rate": 1.6677352897662762e-05, "loss": 0.5287, "step": 8822 }, { "epoch": 28.927868852459017, "grad_norm": 5.001006603240967, "learning_rate": 1.6676562392611025e-05, "loss": 0.825, "step": 8823 }, { "epoch": 28.931147540983606, "grad_norm": 5.072381973266602, "learning_rate": 1.6675771812274104e-05, "loss": 0.7909, "step": 8824 }, { "epoch": 28.934426229508198, "grad_norm": 5.605606555938721, "learning_rate": 1.6674981156660916e-05, "loss": 0.5785, "step": 8825 }, { "epoch": 28.937704918032786, "grad_norm": 5.983192443847656, "learning_rate": 1.6674190425780372e-05, "loss": 0.6142, "step": 8826 }, { "epoch": 28.940983606557378, "grad_norm": 5.418100357055664, "learning_rate": 1.6673399619641392e-05, "loss": 0.5896, "step": 8827 }, { "epoch": 28.944262295081966, "grad_norm": 5.455547332763672, "learning_rate": 1.6672608738252896e-05, "loss": 0.7498, "step": 8828 }, { "epoch": 28.947540983606558, "grad_norm": 5.163539886474609, "learning_rate": 1.6671817781623794e-05, "loss": 0.6342, "step": 8829 }, { "epoch": 28.950819672131146, "grad_norm": 6.423779487609863, "learning_rate": 1.6671026749763012e-05, "loss": 0.6234, "step": 8830 }, { "epoch": 28.95409836065574, "grad_norm": 5.246523380279541, "learning_rate": 1.667023564267947e-05, "loss": 0.6848, "step": 8831 }, { "epoch": 28.957377049180327, "grad_norm": 6.505122661590576, "learning_rate": 1.6669444460382082e-05, "loss": 1.0046, "step": 8832 }, { "epoch": 28.96065573770492, "grad_norm": 4.883874416351318, "learning_rate": 1.6668653202879773e-05, "loss": 0.9202, "step": 8833 }, { "epoch": 28.963934426229507, "grad_norm": 4.76108980178833, "learning_rate": 1.666786187018147e-05, "loss": 0.6283, "step": 8834 }, { "epoch": 28.9672131147541, "grad_norm": 5.377939224243164, "learning_rate": 1.6667070462296088e-05, "loss": 0.6551, "step": 8835 }, { "epoch": 28.970491803278687, "grad_norm": 5.673854351043701, "learning_rate": 1.6666278979232554e-05, "loss": 0.723, "step": 8836 }, { "epoch": 28.97377049180328, "grad_norm": 20.44075584411621, "learning_rate": 1.6665487420999796e-05, "loss": 0.7131, "step": 8837 }, { "epoch": 28.977049180327867, "grad_norm": 5.2892231941223145, "learning_rate": 1.6664695787606735e-05, "loss": 0.5723, "step": 8838 }, { "epoch": 28.98032786885246, "grad_norm": 4.88477087020874, "learning_rate": 1.6663904079062302e-05, "loss": 0.8222, "step": 8839 }, { "epoch": 28.983606557377048, "grad_norm": 5.096414089202881, "learning_rate": 1.6663112295375418e-05, "loss": 0.7488, "step": 8840 }, { "epoch": 28.98688524590164, "grad_norm": 6.992254734039307, "learning_rate": 1.6662320436555014e-05, "loss": 0.7573, "step": 8841 }, { "epoch": 28.990163934426228, "grad_norm": 4.805820465087891, "learning_rate": 1.6661528502610025e-05, "loss": 0.6596, "step": 8842 }, { "epoch": 28.99344262295082, "grad_norm": 4.50244140625, "learning_rate": 1.6660736493549374e-05, "loss": 0.853, "step": 8843 }, { "epoch": 28.99672131147541, "grad_norm": 4.278300762176514, "learning_rate": 1.665994440938199e-05, "loss": 0.997, "step": 8844 }, { "epoch": 29.0, "grad_norm": 6.035704135894775, "learning_rate": 1.665915225011681e-05, "loss": 0.5123, "step": 8845 }, { "epoch": 29.003278688524592, "grad_norm": 5.557611465454102, "learning_rate": 1.665836001576277e-05, "loss": 0.6315, "step": 8846 }, { "epoch": 29.00655737704918, "grad_norm": 5.238670825958252, "learning_rate": 1.6657567706328792e-05, "loss": 0.5891, "step": 8847 }, { "epoch": 29.009836065573772, "grad_norm": 6.449426174163818, "learning_rate": 1.6656775321823817e-05, "loss": 0.7419, "step": 8848 }, { "epoch": 29.01311475409836, "grad_norm": 6.769680976867676, "learning_rate": 1.665598286225678e-05, "loss": 0.532, "step": 8849 }, { "epoch": 29.016393442622952, "grad_norm": 5.008584976196289, "learning_rate": 1.6655190327636615e-05, "loss": 0.6739, "step": 8850 }, { "epoch": 29.01967213114754, "grad_norm": 6.442235946655273, "learning_rate": 1.6654397717972258e-05, "loss": 0.7596, "step": 8851 }, { "epoch": 29.022950819672133, "grad_norm": 4.979940414428711, "learning_rate": 1.6653605033272653e-05, "loss": 0.6654, "step": 8852 }, { "epoch": 29.02622950819672, "grad_norm": 5.097231864929199, "learning_rate": 1.665281227354673e-05, "loss": 0.8163, "step": 8853 }, { "epoch": 29.029508196721313, "grad_norm": 6.781599044799805, "learning_rate": 1.665201943880343e-05, "loss": 0.6633, "step": 8854 }, { "epoch": 29.0327868852459, "grad_norm": 5.567934513092041, "learning_rate": 1.6651226529051695e-05, "loss": 0.5347, "step": 8855 }, { "epoch": 29.036065573770493, "grad_norm": 5.064854145050049, "learning_rate": 1.6650433544300468e-05, "loss": 0.4884, "step": 8856 }, { "epoch": 29.03934426229508, "grad_norm": 4.941887855529785, "learning_rate": 1.6649640484558686e-05, "loss": 0.7247, "step": 8857 }, { "epoch": 29.042622950819673, "grad_norm": 5.0049967765808105, "learning_rate": 1.6648847349835294e-05, "loss": 0.6976, "step": 8858 }, { "epoch": 29.04590163934426, "grad_norm": 4.6762824058532715, "learning_rate": 1.6648054140139234e-05, "loss": 0.4318, "step": 8859 }, { "epoch": 29.049180327868854, "grad_norm": 4.606443405151367, "learning_rate": 1.664726085547945e-05, "loss": 0.6752, "step": 8860 }, { "epoch": 29.052459016393442, "grad_norm": 5.373260498046875, "learning_rate": 1.6646467495864892e-05, "loss": 0.6758, "step": 8861 }, { "epoch": 29.055737704918034, "grad_norm": 4.790811538696289, "learning_rate": 1.6645674061304502e-05, "loss": 0.6554, "step": 8862 }, { "epoch": 29.059016393442622, "grad_norm": 4.5327348709106445, "learning_rate": 1.664488055180723e-05, "loss": 0.5446, "step": 8863 }, { "epoch": 29.062295081967214, "grad_norm": 4.91353702545166, "learning_rate": 1.6644086967382015e-05, "loss": 0.6149, "step": 8864 }, { "epoch": 29.065573770491802, "grad_norm": 5.16680908203125, "learning_rate": 1.6643293308037813e-05, "loss": 0.6313, "step": 8865 }, { "epoch": 29.068852459016394, "grad_norm": 5.0141282081604, "learning_rate": 1.664249957378357e-05, "loss": 0.4613, "step": 8866 }, { "epoch": 29.072131147540983, "grad_norm": 4.512136459350586, "learning_rate": 1.6641705764628243e-05, "loss": 0.8683, "step": 8867 }, { "epoch": 29.075409836065575, "grad_norm": 5.503065586090088, "learning_rate": 1.6640911880580773e-05, "loss": 0.6154, "step": 8868 }, { "epoch": 29.078688524590163, "grad_norm": 4.922616004943848, "learning_rate": 1.664011792165012e-05, "loss": 0.6717, "step": 8869 }, { "epoch": 29.081967213114755, "grad_norm": 5.271553993225098, "learning_rate": 1.663932388784523e-05, "loss": 0.6143, "step": 8870 }, { "epoch": 29.085245901639343, "grad_norm": 5.786732196807861, "learning_rate": 1.663852977917506e-05, "loss": 0.4366, "step": 8871 }, { "epoch": 29.088524590163935, "grad_norm": 5.0965070724487305, "learning_rate": 1.663773559564857e-05, "loss": 0.8968, "step": 8872 }, { "epoch": 29.091803278688523, "grad_norm": 4.9833221435546875, "learning_rate": 1.6636941337274705e-05, "loss": 0.7924, "step": 8873 }, { "epoch": 29.095081967213115, "grad_norm": 5.966686725616455, "learning_rate": 1.6636147004062424e-05, "loss": 0.7786, "step": 8874 }, { "epoch": 29.098360655737704, "grad_norm": 9.11623764038086, "learning_rate": 1.663535259602069e-05, "loss": 0.6923, "step": 8875 }, { "epoch": 29.101639344262296, "grad_norm": 5.362161159515381, "learning_rate": 1.6634558113158455e-05, "loss": 0.7985, "step": 8876 }, { "epoch": 29.104918032786884, "grad_norm": 5.4506988525390625, "learning_rate": 1.6633763555484676e-05, "loss": 0.5934, "step": 8877 }, { "epoch": 29.108196721311476, "grad_norm": 4.381328582763672, "learning_rate": 1.663296892300832e-05, "loss": 0.5634, "step": 8878 }, { "epoch": 29.111475409836064, "grad_norm": 5.972904682159424, "learning_rate": 1.663217421573834e-05, "loss": 0.4659, "step": 8879 }, { "epoch": 29.114754098360656, "grad_norm": 5.923353672027588, "learning_rate": 1.6631379433683705e-05, "loss": 0.4107, "step": 8880 }, { "epoch": 29.118032786885244, "grad_norm": 5.447046756744385, "learning_rate": 1.6630584576853367e-05, "loss": 0.6367, "step": 8881 }, { "epoch": 29.121311475409836, "grad_norm": 4.5258002281188965, "learning_rate": 1.6629789645256297e-05, "loss": 0.5967, "step": 8882 }, { "epoch": 29.124590163934425, "grad_norm": 4.724497318267822, "learning_rate": 1.662899463890145e-05, "loss": 0.6949, "step": 8883 }, { "epoch": 29.127868852459017, "grad_norm": 5.832554817199707, "learning_rate": 1.66281995577978e-05, "loss": 0.8319, "step": 8884 }, { "epoch": 29.131147540983605, "grad_norm": 4.385213375091553, "learning_rate": 1.662740440195431e-05, "loss": 0.8751, "step": 8885 }, { "epoch": 29.134426229508197, "grad_norm": 4.5369181632995605, "learning_rate": 1.6626609171379938e-05, "loss": 0.5208, "step": 8886 }, { "epoch": 29.137704918032785, "grad_norm": 4.586091995239258, "learning_rate": 1.6625813866083665e-05, "loss": 0.619, "step": 8887 }, { "epoch": 29.140983606557377, "grad_norm": 4.858433723449707, "learning_rate": 1.6625018486074448e-05, "loss": 0.5679, "step": 8888 }, { "epoch": 29.14426229508197, "grad_norm": 4.201754093170166, "learning_rate": 1.6624223031361258e-05, "loss": 0.5339, "step": 8889 }, { "epoch": 29.147540983606557, "grad_norm": 6.37907600402832, "learning_rate": 1.662342750195307e-05, "loss": 0.6341, "step": 8890 }, { "epoch": 29.15081967213115, "grad_norm": 5.000215530395508, "learning_rate": 1.6622631897858848e-05, "loss": 0.6384, "step": 8891 }, { "epoch": 29.154098360655738, "grad_norm": 5.295529365539551, "learning_rate": 1.6621836219087565e-05, "loss": 0.5013, "step": 8892 }, { "epoch": 29.15737704918033, "grad_norm": 4.6557817459106445, "learning_rate": 1.6621040465648196e-05, "loss": 0.7188, "step": 8893 }, { "epoch": 29.160655737704918, "grad_norm": 5.626043796539307, "learning_rate": 1.6620244637549706e-05, "loss": 0.8226, "step": 8894 }, { "epoch": 29.16393442622951, "grad_norm": 4.6543498039245605, "learning_rate": 1.6619448734801082e-05, "loss": 0.6545, "step": 8895 }, { "epoch": 29.167213114754098, "grad_norm": 4.878859043121338, "learning_rate": 1.6618652757411287e-05, "loss": 0.5461, "step": 8896 }, { "epoch": 29.17049180327869, "grad_norm": 5.000424385070801, "learning_rate": 1.66178567053893e-05, "loss": 0.7859, "step": 8897 }, { "epoch": 29.17377049180328, "grad_norm": 5.002409934997559, "learning_rate": 1.66170605787441e-05, "loss": 0.624, "step": 8898 }, { "epoch": 29.17704918032787, "grad_norm": 4.66563606262207, "learning_rate": 1.6616264377484658e-05, "loss": 0.5794, "step": 8899 }, { "epoch": 29.18032786885246, "grad_norm": 4.950863361358643, "learning_rate": 1.661546810161996e-05, "loss": 0.8694, "step": 8900 }, { "epoch": 29.18360655737705, "grad_norm": 5.41664457321167, "learning_rate": 1.6614671751158978e-05, "loss": 0.6385, "step": 8901 }, { "epoch": 29.18688524590164, "grad_norm": 4.591574668884277, "learning_rate": 1.66138753261107e-05, "loss": 0.4328, "step": 8902 }, { "epoch": 29.19016393442623, "grad_norm": 4.702449321746826, "learning_rate": 1.6613078826484096e-05, "loss": 0.7631, "step": 8903 }, { "epoch": 29.19344262295082, "grad_norm": 4.541688919067383, "learning_rate": 1.661228225228815e-05, "loss": 0.683, "step": 8904 }, { "epoch": 29.19672131147541, "grad_norm": 4.8900299072265625, "learning_rate": 1.6611485603531853e-05, "loss": 0.4164, "step": 8905 }, { "epoch": 29.2, "grad_norm": 4.97788143157959, "learning_rate": 1.6610688880224178e-05, "loss": 0.5485, "step": 8906 }, { "epoch": 29.20327868852459, "grad_norm": 5.143381118774414, "learning_rate": 1.660989208237411e-05, "loss": 0.6237, "step": 8907 }, { "epoch": 29.20655737704918, "grad_norm": 5.7567572593688965, "learning_rate": 1.660909520999064e-05, "loss": 0.5907, "step": 8908 }, { "epoch": 29.20983606557377, "grad_norm": 4.983427047729492, "learning_rate": 1.6608298263082748e-05, "loss": 0.822, "step": 8909 }, { "epoch": 29.21311475409836, "grad_norm": 4.75292444229126, "learning_rate": 1.6607501241659424e-05, "loss": 0.8929, "step": 8910 }, { "epoch": 29.21639344262295, "grad_norm": 4.825085163116455, "learning_rate": 1.660670414572965e-05, "loss": 0.4349, "step": 8911 }, { "epoch": 29.21967213114754, "grad_norm": 4.292789459228516, "learning_rate": 1.6605906975302422e-05, "loss": 0.595, "step": 8912 }, { "epoch": 29.222950819672132, "grad_norm": 6.127376079559326, "learning_rate": 1.6605109730386718e-05, "loss": 0.6388, "step": 8913 }, { "epoch": 29.22622950819672, "grad_norm": 4.944276809692383, "learning_rate": 1.6604312410991542e-05, "loss": 0.7625, "step": 8914 }, { "epoch": 29.229508196721312, "grad_norm": 4.43077278137207, "learning_rate": 1.6603515017125873e-05, "loss": 0.7775, "step": 8915 }, { "epoch": 29.2327868852459, "grad_norm": 4.505919933319092, "learning_rate": 1.6602717548798707e-05, "loss": 0.6643, "step": 8916 }, { "epoch": 29.236065573770492, "grad_norm": 4.121018409729004, "learning_rate": 1.6601920006019036e-05, "loss": 0.7115, "step": 8917 }, { "epoch": 29.23934426229508, "grad_norm": 8.771735191345215, "learning_rate": 1.660112238879585e-05, "loss": 0.5959, "step": 8918 }, { "epoch": 29.242622950819673, "grad_norm": 5.711862564086914, "learning_rate": 1.6600324697138148e-05, "loss": 0.4953, "step": 8919 }, { "epoch": 29.24590163934426, "grad_norm": 4.24009370803833, "learning_rate": 1.659952693105492e-05, "loss": 0.8736, "step": 8920 }, { "epoch": 29.249180327868853, "grad_norm": 5.703122138977051, "learning_rate": 1.6598729090555168e-05, "loss": 0.6353, "step": 8921 }, { "epoch": 29.25245901639344, "grad_norm": 4.918450832366943, "learning_rate": 1.659793117564788e-05, "loss": 0.5796, "step": 8922 }, { "epoch": 29.255737704918033, "grad_norm": 4.903970718383789, "learning_rate": 1.6597133186342062e-05, "loss": 0.6709, "step": 8923 }, { "epoch": 29.25901639344262, "grad_norm": 5.6881103515625, "learning_rate": 1.6596335122646706e-05, "loss": 0.7161, "step": 8924 }, { "epoch": 29.262295081967213, "grad_norm": 4.860029220581055, "learning_rate": 1.6595536984570816e-05, "loss": 0.4147, "step": 8925 }, { "epoch": 29.2655737704918, "grad_norm": 4.985410690307617, "learning_rate": 1.6594738772123382e-05, "loss": 0.7719, "step": 8926 }, { "epoch": 29.268852459016394, "grad_norm": 5.017411231994629, "learning_rate": 1.6593940485313416e-05, "loss": 0.4187, "step": 8927 }, { "epoch": 29.272131147540982, "grad_norm": 4.798947811126709, "learning_rate": 1.6593142124149918e-05, "loss": 0.8595, "step": 8928 }, { "epoch": 29.275409836065574, "grad_norm": 7.147234916687012, "learning_rate": 1.659234368864188e-05, "loss": 0.4872, "step": 8929 }, { "epoch": 29.278688524590162, "grad_norm": 4.523397922515869, "learning_rate": 1.659154517879832e-05, "loss": 0.7397, "step": 8930 }, { "epoch": 29.281967213114754, "grad_norm": 7.2637200355529785, "learning_rate": 1.6590746594628228e-05, "loss": 0.8149, "step": 8931 }, { "epoch": 29.285245901639342, "grad_norm": 5.070878982543945, "learning_rate": 1.658994793614062e-05, "loss": 0.6862, "step": 8932 }, { "epoch": 29.288524590163934, "grad_norm": 6.032137393951416, "learning_rate": 1.6589149203344493e-05, "loss": 0.6245, "step": 8933 }, { "epoch": 29.291803278688526, "grad_norm": 5.684913635253906, "learning_rate": 1.6588350396248865e-05, "loss": 0.7606, "step": 8934 }, { "epoch": 29.295081967213115, "grad_norm": 5.230346202850342, "learning_rate": 1.658755151486273e-05, "loss": 0.5624, "step": 8935 }, { "epoch": 29.298360655737707, "grad_norm": 5.0260796546936035, "learning_rate": 1.6586752559195106e-05, "loss": 0.5895, "step": 8936 }, { "epoch": 29.301639344262295, "grad_norm": 4.929717063903809, "learning_rate": 1.6585953529254993e-05, "loss": 0.8724, "step": 8937 }, { "epoch": 29.304918032786887, "grad_norm": 5.665063858032227, "learning_rate": 1.6585154425051412e-05, "loss": 0.6812, "step": 8938 }, { "epoch": 29.308196721311475, "grad_norm": 5.944118499755859, "learning_rate": 1.6584355246593365e-05, "loss": 0.6046, "step": 8939 }, { "epoch": 29.311475409836067, "grad_norm": 4.933685779571533, "learning_rate": 1.6583555993889868e-05, "loss": 0.7755, "step": 8940 }, { "epoch": 29.314754098360655, "grad_norm": 4.780801773071289, "learning_rate": 1.6582756666949934e-05, "loss": 0.5981, "step": 8941 }, { "epoch": 29.318032786885247, "grad_norm": 6.351215839385986, "learning_rate": 1.6581957265782568e-05, "loss": 0.6266, "step": 8942 }, { "epoch": 29.321311475409836, "grad_norm": 6.217222213745117, "learning_rate": 1.6581157790396796e-05, "loss": 0.5875, "step": 8943 }, { "epoch": 29.324590163934428, "grad_norm": 4.889880657196045, "learning_rate": 1.6580358240801624e-05, "loss": 0.4712, "step": 8944 }, { "epoch": 29.327868852459016, "grad_norm": 5.027284622192383, "learning_rate": 1.657955861700607e-05, "loss": 0.9901, "step": 8945 }, { "epoch": 29.331147540983608, "grad_norm": 4.434191703796387, "learning_rate": 1.6578758919019157e-05, "loss": 1.0394, "step": 8946 }, { "epoch": 29.334426229508196, "grad_norm": 10.011956214904785, "learning_rate": 1.6577959146849893e-05, "loss": 0.8267, "step": 8947 }, { "epoch": 29.337704918032788, "grad_norm": 4.385512351989746, "learning_rate": 1.65771593005073e-05, "loss": 0.6371, "step": 8948 }, { "epoch": 29.340983606557376, "grad_norm": 5.6793951988220215, "learning_rate": 1.65763593800004e-05, "loss": 0.5803, "step": 8949 }, { "epoch": 29.34426229508197, "grad_norm": 5.911528587341309, "learning_rate": 1.657555938533821e-05, "loss": 1.0799, "step": 8950 }, { "epoch": 29.347540983606557, "grad_norm": 4.830749034881592, "learning_rate": 1.6574759316529748e-05, "loss": 0.4939, "step": 8951 }, { "epoch": 29.35081967213115, "grad_norm": 4.9093194007873535, "learning_rate": 1.657395917358404e-05, "loss": 0.7077, "step": 8952 }, { "epoch": 29.354098360655737, "grad_norm": 6.605448246002197, "learning_rate": 1.6573158956510107e-05, "loss": 0.5507, "step": 8953 }, { "epoch": 29.35737704918033, "grad_norm": 6.039813041687012, "learning_rate": 1.6572358665316973e-05, "loss": 0.4828, "step": 8954 }, { "epoch": 29.360655737704917, "grad_norm": 5.12237548828125, "learning_rate": 1.657155830001366e-05, "loss": 0.6056, "step": 8955 }, { "epoch": 29.36393442622951, "grad_norm": 4.214903831481934, "learning_rate": 1.6570757860609198e-05, "loss": 0.8396, "step": 8956 }, { "epoch": 29.367213114754097, "grad_norm": 4.618077278137207, "learning_rate": 1.6569957347112606e-05, "loss": 0.7311, "step": 8957 }, { "epoch": 29.37049180327869, "grad_norm": 5.185934543609619, "learning_rate": 1.6569156759532916e-05, "loss": 0.7946, "step": 8958 }, { "epoch": 29.373770491803278, "grad_norm": 5.439398765563965, "learning_rate": 1.656835609787915e-05, "loss": 0.5336, "step": 8959 }, { "epoch": 29.37704918032787, "grad_norm": 4.808021545410156, "learning_rate": 1.6567555362160345e-05, "loss": 0.6761, "step": 8960 }, { "epoch": 29.380327868852458, "grad_norm": 4.168517112731934, "learning_rate": 1.656675455238552e-05, "loss": 0.752, "step": 8961 }, { "epoch": 29.38360655737705, "grad_norm": 5.103165149688721, "learning_rate": 1.6565953668563713e-05, "loss": 0.5829, "step": 8962 }, { "epoch": 29.386885245901638, "grad_norm": 4.8263115882873535, "learning_rate": 1.656515271070395e-05, "loss": 0.5349, "step": 8963 }, { "epoch": 29.39016393442623, "grad_norm": 4.385153293609619, "learning_rate": 1.6564351678815263e-05, "loss": 0.7298, "step": 8964 }, { "epoch": 29.39344262295082, "grad_norm": 3.9023168087005615, "learning_rate": 1.6563550572906687e-05, "loss": 0.7282, "step": 8965 }, { "epoch": 29.39672131147541, "grad_norm": 7.8498053550720215, "learning_rate": 1.6562749392987255e-05, "loss": 0.4901, "step": 8966 }, { "epoch": 29.4, "grad_norm": 4.320003032684326, "learning_rate": 1.6561948139065997e-05, "loss": 0.6508, "step": 8967 }, { "epoch": 29.40327868852459, "grad_norm": 4.4511590003967285, "learning_rate": 1.6561146811151953e-05, "loss": 0.7961, "step": 8968 }, { "epoch": 29.40655737704918, "grad_norm": 4.899720191955566, "learning_rate": 1.6560345409254154e-05, "loss": 0.8845, "step": 8969 }, { "epoch": 29.40983606557377, "grad_norm": 4.663225173950195, "learning_rate": 1.6559543933381645e-05, "loss": 0.7794, "step": 8970 }, { "epoch": 29.41311475409836, "grad_norm": 4.8483123779296875, "learning_rate": 1.655874238354345e-05, "loss": 0.582, "step": 8971 }, { "epoch": 29.41639344262295, "grad_norm": 4.582091331481934, "learning_rate": 1.6557940759748623e-05, "loss": 0.477, "step": 8972 }, { "epoch": 29.41967213114754, "grad_norm": 11.281800270080566, "learning_rate": 1.655713906200619e-05, "loss": 0.6794, "step": 8973 }, { "epoch": 29.42295081967213, "grad_norm": 8.267921447753906, "learning_rate": 1.6556337290325202e-05, "loss": 0.6169, "step": 8974 }, { "epoch": 29.42622950819672, "grad_norm": 5.2004570960998535, "learning_rate": 1.655553544471469e-05, "loss": 0.883, "step": 8975 }, { "epoch": 29.42950819672131, "grad_norm": 6.878609657287598, "learning_rate": 1.65547335251837e-05, "loss": 0.6508, "step": 8976 }, { "epoch": 29.432786885245903, "grad_norm": 4.824264049530029, "learning_rate": 1.6553931531741276e-05, "loss": 0.7069, "step": 8977 }, { "epoch": 29.43606557377049, "grad_norm": 5.408151149749756, "learning_rate": 1.6553129464396457e-05, "loss": 0.6766, "step": 8978 }, { "epoch": 29.439344262295084, "grad_norm": 4.732307434082031, "learning_rate": 1.6552327323158294e-05, "loss": 0.6924, "step": 8979 }, { "epoch": 29.442622950819672, "grad_norm": 5.825427532196045, "learning_rate": 1.6551525108035824e-05, "loss": 0.8524, "step": 8980 }, { "epoch": 29.445901639344264, "grad_norm": 4.8247785568237305, "learning_rate": 1.6550722819038096e-05, "loss": 0.6623, "step": 8981 }, { "epoch": 29.449180327868852, "grad_norm": 10.92055892944336, "learning_rate": 1.654992045617416e-05, "loss": 0.692, "step": 8982 }, { "epoch": 29.452459016393444, "grad_norm": 4.947007179260254, "learning_rate": 1.654911801945306e-05, "loss": 0.7433, "step": 8983 }, { "epoch": 29.455737704918032, "grad_norm": 11.355402946472168, "learning_rate": 1.6548315508883845e-05, "loss": 0.4831, "step": 8984 }, { "epoch": 29.459016393442624, "grad_norm": 16.986148834228516, "learning_rate": 1.654751292447556e-05, "loss": 0.5197, "step": 8985 }, { "epoch": 29.462295081967213, "grad_norm": 6.82462739944458, "learning_rate": 1.6546710266237264e-05, "loss": 0.7614, "step": 8986 }, { "epoch": 29.465573770491805, "grad_norm": 4.621527194976807, "learning_rate": 1.6545907534178e-05, "loss": 0.5483, "step": 8987 }, { "epoch": 29.468852459016393, "grad_norm": 4.561800479888916, "learning_rate": 1.6545104728306825e-05, "loss": 0.7973, "step": 8988 }, { "epoch": 29.472131147540985, "grad_norm": 6.114077568054199, "learning_rate": 1.654430184863279e-05, "loss": 0.5583, "step": 8989 }, { "epoch": 29.475409836065573, "grad_norm": 4.593318462371826, "learning_rate": 1.6543498895164944e-05, "loss": 0.712, "step": 8990 }, { "epoch": 29.478688524590165, "grad_norm": 5.792817115783691, "learning_rate": 1.6542695867912346e-05, "loss": 0.5647, "step": 8991 }, { "epoch": 29.481967213114753, "grad_norm": 5.09850549697876, "learning_rate": 1.654189276688405e-05, "loss": 0.6376, "step": 8992 }, { "epoch": 29.485245901639345, "grad_norm": 4.524419784545898, "learning_rate": 1.654108959208911e-05, "loss": 0.7426, "step": 8993 }, { "epoch": 29.488524590163934, "grad_norm": 4.7544169425964355, "learning_rate": 1.6540286343536583e-05, "loss": 0.7704, "step": 8994 }, { "epoch": 29.491803278688526, "grad_norm": 4.976182460784912, "learning_rate": 1.653948302123553e-05, "loss": 0.6156, "step": 8995 }, { "epoch": 29.495081967213114, "grad_norm": 4.939798355102539, "learning_rate": 1.6538679625195002e-05, "loss": 0.7015, "step": 8996 }, { "epoch": 29.498360655737706, "grad_norm": 6.434833526611328, "learning_rate": 1.653787615542407e-05, "loss": 0.7425, "step": 8997 }, { "epoch": 29.501639344262294, "grad_norm": 7.66232442855835, "learning_rate": 1.6537072611931778e-05, "loss": 0.5269, "step": 8998 }, { "epoch": 29.504918032786886, "grad_norm": 8.307172775268555, "learning_rate": 1.65362689947272e-05, "loss": 0.6211, "step": 8999 }, { "epoch": 29.508196721311474, "grad_norm": 5.595070838928223, "learning_rate": 1.6535465303819394e-05, "loss": 0.582, "step": 9000 }, { "epoch": 29.511475409836066, "grad_norm": 4.233602523803711, "learning_rate": 1.653466153921742e-05, "loss": 0.6527, "step": 9001 }, { "epoch": 29.514754098360655, "grad_norm": 4.907293796539307, "learning_rate": 1.6533857700930345e-05, "loss": 0.3659, "step": 9002 }, { "epoch": 29.518032786885247, "grad_norm": 4.3492326736450195, "learning_rate": 1.6533053788967227e-05, "loss": 0.5899, "step": 9003 }, { "epoch": 29.521311475409835, "grad_norm": 5.200998306274414, "learning_rate": 1.6532249803337138e-05, "loss": 0.6227, "step": 9004 }, { "epoch": 29.524590163934427, "grad_norm": 4.860777854919434, "learning_rate": 1.653144574404914e-05, "loss": 0.5642, "step": 9005 }, { "epoch": 29.527868852459015, "grad_norm": 5.132019996643066, "learning_rate": 1.65306416111123e-05, "loss": 0.6027, "step": 9006 }, { "epoch": 29.531147540983607, "grad_norm": 5.1220808029174805, "learning_rate": 1.6529837404535685e-05, "loss": 0.834, "step": 9007 }, { "epoch": 29.534426229508195, "grad_norm": 5.100808143615723, "learning_rate": 1.6529033124328364e-05, "loss": 0.5153, "step": 9008 }, { "epoch": 29.537704918032787, "grad_norm": 5.087825775146484, "learning_rate": 1.6528228770499406e-05, "loss": 0.6449, "step": 9009 }, { "epoch": 29.540983606557376, "grad_norm": 4.921532154083252, "learning_rate": 1.6527424343057884e-05, "loss": 0.7909, "step": 9010 }, { "epoch": 29.544262295081968, "grad_norm": 4.724952697753906, "learning_rate": 1.652661984201286e-05, "loss": 0.493, "step": 9011 }, { "epoch": 29.547540983606556, "grad_norm": 5.360681533813477, "learning_rate": 1.6525815267373415e-05, "loss": 0.7485, "step": 9012 }, { "epoch": 29.550819672131148, "grad_norm": 4.523497104644775, "learning_rate": 1.652501061914862e-05, "loss": 0.9035, "step": 9013 }, { "epoch": 29.554098360655736, "grad_norm": 7.955570220947266, "learning_rate": 1.652420589734754e-05, "loss": 0.5703, "step": 9014 }, { "epoch": 29.557377049180328, "grad_norm": 5.111156463623047, "learning_rate": 1.6523401101979258e-05, "loss": 0.6115, "step": 9015 }, { "epoch": 29.560655737704916, "grad_norm": 8.019867897033691, "learning_rate": 1.6522596233052847e-05, "loss": 0.7424, "step": 9016 }, { "epoch": 29.56393442622951, "grad_norm": 4.412481784820557, "learning_rate": 1.6521791290577384e-05, "loss": 0.6958, "step": 9017 }, { "epoch": 29.567213114754097, "grad_norm": 5.225757122039795, "learning_rate": 1.6520986274561937e-05, "loss": 0.8434, "step": 9018 }, { "epoch": 29.57049180327869, "grad_norm": 4.348138332366943, "learning_rate": 1.652018118501559e-05, "loss": 0.6836, "step": 9019 }, { "epoch": 29.57377049180328, "grad_norm": 6.007503032684326, "learning_rate": 1.6519376021947424e-05, "loss": 0.6127, "step": 9020 }, { "epoch": 29.57704918032787, "grad_norm": 4.647149562835693, "learning_rate": 1.6518570785366515e-05, "loss": 0.7596, "step": 9021 }, { "epoch": 29.58032786885246, "grad_norm": 5.574053764343262, "learning_rate": 1.651776547528194e-05, "loss": 0.7236, "step": 9022 }, { "epoch": 29.58360655737705, "grad_norm": 5.278926849365234, "learning_rate": 1.6516960091702787e-05, "loss": 0.598, "step": 9023 }, { "epoch": 29.58688524590164, "grad_norm": 10.781737327575684, "learning_rate": 1.6516154634638128e-05, "loss": 0.9318, "step": 9024 }, { "epoch": 29.59016393442623, "grad_norm": 4.407628059387207, "learning_rate": 1.6515349104097058e-05, "loss": 0.7161, "step": 9025 }, { "epoch": 29.59344262295082, "grad_norm": 6.667639255523682, "learning_rate": 1.6514543500088645e-05, "loss": 0.825, "step": 9026 }, { "epoch": 29.59672131147541, "grad_norm": 6.107189655303955, "learning_rate": 1.651373782262198e-05, "loss": 0.5155, "step": 9027 }, { "epoch": 29.6, "grad_norm": 6.065254211425781, "learning_rate": 1.6512932071706153e-05, "loss": 0.5648, "step": 9028 }, { "epoch": 29.60327868852459, "grad_norm": 4.623809337615967, "learning_rate": 1.6512126247350245e-05, "loss": 0.6226, "step": 9029 }, { "epoch": 29.60655737704918, "grad_norm": 4.9199700355529785, "learning_rate": 1.6511320349563345e-05, "loss": 0.577, "step": 9030 }, { "epoch": 29.60983606557377, "grad_norm": 8.09756851196289, "learning_rate": 1.6510514378354532e-05, "loss": 0.676, "step": 9031 }, { "epoch": 29.613114754098362, "grad_norm": 5.212972640991211, "learning_rate": 1.65097083337329e-05, "loss": 0.6966, "step": 9032 }, { "epoch": 29.61639344262295, "grad_norm": 5.83280611038208, "learning_rate": 1.6508902215707544e-05, "loss": 0.48, "step": 9033 }, { "epoch": 29.619672131147542, "grad_norm": 4.580689430236816, "learning_rate": 1.6508096024287543e-05, "loss": 0.6965, "step": 9034 }, { "epoch": 29.62295081967213, "grad_norm": 5.043497562408447, "learning_rate": 1.6507289759481992e-05, "loss": 0.6643, "step": 9035 }, { "epoch": 29.626229508196722, "grad_norm": 9.132221221923828, "learning_rate": 1.6506483421299987e-05, "loss": 0.7664, "step": 9036 }, { "epoch": 29.62950819672131, "grad_norm": 6.171043872833252, "learning_rate": 1.6505677009750614e-05, "loss": 0.6404, "step": 9037 }, { "epoch": 29.632786885245903, "grad_norm": 4.546595573425293, "learning_rate": 1.650487052484297e-05, "loss": 0.6851, "step": 9038 }, { "epoch": 29.63606557377049, "grad_norm": 5.312391757965088, "learning_rate": 1.6504063966586148e-05, "loss": 0.7113, "step": 9039 }, { "epoch": 29.639344262295083, "grad_norm": 6.8193488121032715, "learning_rate": 1.650325733498924e-05, "loss": 0.6783, "step": 9040 }, { "epoch": 29.64262295081967, "grad_norm": 5.416409015655518, "learning_rate": 1.6502450630061348e-05, "loss": 0.5157, "step": 9041 }, { "epoch": 29.645901639344263, "grad_norm": 6.687575817108154, "learning_rate": 1.6501643851811557e-05, "loss": 0.5621, "step": 9042 }, { "epoch": 29.64918032786885, "grad_norm": 4.354883193969727, "learning_rate": 1.6500837000248978e-05, "loss": 1.0711, "step": 9043 }, { "epoch": 29.652459016393443, "grad_norm": 4.665003776550293, "learning_rate": 1.65000300753827e-05, "loss": 0.6598, "step": 9044 }, { "epoch": 29.65573770491803, "grad_norm": 5.027308940887451, "learning_rate": 1.6499223077221824e-05, "loss": 0.483, "step": 9045 }, { "epoch": 29.659016393442624, "grad_norm": 4.794105052947998, "learning_rate": 1.649841600577545e-05, "loss": 0.4737, "step": 9046 }, { "epoch": 29.662295081967212, "grad_norm": 5.490495204925537, "learning_rate": 1.6497608861052682e-05, "loss": 0.6035, "step": 9047 }, { "epoch": 29.665573770491804, "grad_norm": 6.692791938781738, "learning_rate": 1.6496801643062616e-05, "loss": 0.6309, "step": 9048 }, { "epoch": 29.668852459016392, "grad_norm": 4.98020601272583, "learning_rate": 1.6495994351814358e-05, "loss": 0.533, "step": 9049 }, { "epoch": 29.672131147540984, "grad_norm": 5.895736217498779, "learning_rate": 1.649518698731701e-05, "loss": 0.4733, "step": 9050 }, { "epoch": 29.675409836065572, "grad_norm": 4.990926265716553, "learning_rate": 1.649437954957967e-05, "loss": 0.6907, "step": 9051 }, { "epoch": 29.678688524590164, "grad_norm": 4.6254496574401855, "learning_rate": 1.6493572038611452e-05, "loss": 0.6604, "step": 9052 }, { "epoch": 29.681967213114753, "grad_norm": 5.142348766326904, "learning_rate": 1.649276445442146e-05, "loss": 0.5309, "step": 9053 }, { "epoch": 29.685245901639345, "grad_norm": 4.532542705535889, "learning_rate": 1.649195679701879e-05, "loss": 0.7039, "step": 9054 }, { "epoch": 29.688524590163933, "grad_norm": 5.526631832122803, "learning_rate": 1.6491149066412566e-05, "loss": 0.6978, "step": 9055 }, { "epoch": 29.691803278688525, "grad_norm": 5.487100601196289, "learning_rate": 1.6490341262611885e-05, "loss": 0.563, "step": 9056 }, { "epoch": 29.695081967213113, "grad_norm": 5.08480978012085, "learning_rate": 1.6489533385625856e-05, "loss": 0.7231, "step": 9057 }, { "epoch": 29.698360655737705, "grad_norm": 8.34067153930664, "learning_rate": 1.648872543546359e-05, "loss": 0.8959, "step": 9058 }, { "epoch": 29.701639344262293, "grad_norm": 4.896616458892822, "learning_rate": 1.6487917412134202e-05, "loss": 0.6567, "step": 9059 }, { "epoch": 29.704918032786885, "grad_norm": 5.729114532470703, "learning_rate": 1.6487109315646795e-05, "loss": 0.4691, "step": 9060 }, { "epoch": 29.708196721311474, "grad_norm": 5.475156307220459, "learning_rate": 1.6486301146010487e-05, "loss": 0.7561, "step": 9061 }, { "epoch": 29.711475409836066, "grad_norm": 9.712982177734375, "learning_rate": 1.648549290323439e-05, "loss": 0.7549, "step": 9062 }, { "epoch": 29.714754098360658, "grad_norm": 4.524073123931885, "learning_rate": 1.648468458732762e-05, "loss": 0.6353, "step": 9063 }, { "epoch": 29.718032786885246, "grad_norm": 4.625755310058594, "learning_rate": 1.6483876198299284e-05, "loss": 0.5428, "step": 9064 }, { "epoch": 29.721311475409838, "grad_norm": 6.229811191558838, "learning_rate": 1.6483067736158504e-05, "loss": 0.5748, "step": 9065 }, { "epoch": 29.724590163934426, "grad_norm": 4.504016876220703, "learning_rate": 1.6482259200914397e-05, "loss": 0.6344, "step": 9066 }, { "epoch": 29.727868852459018, "grad_norm": 5.551501274108887, "learning_rate": 1.6481450592576076e-05, "loss": 0.644, "step": 9067 }, { "epoch": 29.731147540983606, "grad_norm": 5.525891304016113, "learning_rate": 1.6480641911152662e-05, "loss": 0.6036, "step": 9068 }, { "epoch": 29.7344262295082, "grad_norm": 4.746159076690674, "learning_rate": 1.647983315665327e-05, "loss": 0.727, "step": 9069 }, { "epoch": 29.737704918032787, "grad_norm": 4.211921691894531, "learning_rate": 1.647902432908702e-05, "loss": 0.7675, "step": 9070 }, { "epoch": 29.74098360655738, "grad_norm": 4.696943283081055, "learning_rate": 1.647821542846304e-05, "loss": 0.6544, "step": 9071 }, { "epoch": 29.744262295081967, "grad_norm": 4.424302577972412, "learning_rate": 1.647740645479044e-05, "loss": 0.4866, "step": 9072 }, { "epoch": 29.74754098360656, "grad_norm": 5.57074499130249, "learning_rate": 1.6476597408078352e-05, "loss": 0.6254, "step": 9073 }, { "epoch": 29.750819672131147, "grad_norm": 4.065980911254883, "learning_rate": 1.647578828833589e-05, "loss": 0.8617, "step": 9074 }, { "epoch": 29.75409836065574, "grad_norm": 4.6416707038879395, "learning_rate": 1.6474979095572184e-05, "loss": 0.6315, "step": 9075 }, { "epoch": 29.757377049180327, "grad_norm": 4.657674312591553, "learning_rate": 1.6474169829796353e-05, "loss": 0.5335, "step": 9076 }, { "epoch": 29.76065573770492, "grad_norm": 4.337388515472412, "learning_rate": 1.6473360491017533e-05, "loss": 0.7637, "step": 9077 }, { "epoch": 29.763934426229508, "grad_norm": 4.770705223083496, "learning_rate": 1.6472551079244836e-05, "loss": 0.4985, "step": 9078 }, { "epoch": 29.7672131147541, "grad_norm": 4.753401279449463, "learning_rate": 1.64717415944874e-05, "loss": 0.825, "step": 9079 }, { "epoch": 29.770491803278688, "grad_norm": 3.7411186695098877, "learning_rate": 1.6470932036754348e-05, "loss": 0.7049, "step": 9080 }, { "epoch": 29.77377049180328, "grad_norm": 5.072325706481934, "learning_rate": 1.647012240605481e-05, "loss": 0.6808, "step": 9081 }, { "epoch": 29.777049180327868, "grad_norm": 5.231274604797363, "learning_rate": 1.6469312702397912e-05, "loss": 0.7162, "step": 9082 }, { "epoch": 29.78032786885246, "grad_norm": 5.080100059509277, "learning_rate": 1.6468502925792787e-05, "loss": 0.443, "step": 9083 }, { "epoch": 29.78360655737705, "grad_norm": 4.771543502807617, "learning_rate": 1.6467693076248567e-05, "loss": 0.595, "step": 9084 }, { "epoch": 29.78688524590164, "grad_norm": 6.183279514312744, "learning_rate": 1.6466883153774383e-05, "loss": 0.7155, "step": 9085 }, { "epoch": 29.79016393442623, "grad_norm": 4.60988187789917, "learning_rate": 1.6466073158379367e-05, "loss": 0.7085, "step": 9086 }, { "epoch": 29.79344262295082, "grad_norm": 4.727724075317383, "learning_rate": 1.6465263090072652e-05, "loss": 0.9537, "step": 9087 }, { "epoch": 29.79672131147541, "grad_norm": 4.739785194396973, "learning_rate": 1.6464452948863377e-05, "loss": 0.623, "step": 9088 }, { "epoch": 29.8, "grad_norm": 7.657869815826416, "learning_rate": 1.646364273476067e-05, "loss": 0.4268, "step": 9089 }, { "epoch": 29.80327868852459, "grad_norm": 8.229116439819336, "learning_rate": 1.646283244777367e-05, "loss": 0.4603, "step": 9090 }, { "epoch": 29.80655737704918, "grad_norm": 4.294071197509766, "learning_rate": 1.6462022087911516e-05, "loss": 0.7417, "step": 9091 }, { "epoch": 29.80983606557377, "grad_norm": 5.7779741287231445, "learning_rate": 1.6461211655183347e-05, "loss": 0.7221, "step": 9092 }, { "epoch": 29.81311475409836, "grad_norm": 6.992790699005127, "learning_rate": 1.6460401149598296e-05, "loss": 0.8635, "step": 9093 }, { "epoch": 29.81639344262295, "grad_norm": 4.45933723449707, "learning_rate": 1.6459590571165504e-05, "loss": 0.5574, "step": 9094 }, { "epoch": 29.81967213114754, "grad_norm": 4.654877185821533, "learning_rate": 1.6458779919894117e-05, "loss": 0.6396, "step": 9095 }, { "epoch": 29.82295081967213, "grad_norm": 6.883938312530518, "learning_rate": 1.6457969195793264e-05, "loss": 0.6412, "step": 9096 }, { "epoch": 29.82622950819672, "grad_norm": 4.545663833618164, "learning_rate": 1.6457158398872098e-05, "loss": 0.5898, "step": 9097 }, { "epoch": 29.82950819672131, "grad_norm": 5.271470546722412, "learning_rate": 1.6456347529139756e-05, "loss": 0.5753, "step": 9098 }, { "epoch": 29.832786885245902, "grad_norm": 5.153072834014893, "learning_rate": 1.6455536586605384e-05, "loss": 0.6787, "step": 9099 }, { "epoch": 29.83606557377049, "grad_norm": 10.62690544128418, "learning_rate": 1.6454725571278124e-05, "loss": 0.5622, "step": 9100 }, { "epoch": 29.839344262295082, "grad_norm": 5.026702880859375, "learning_rate": 1.645391448316712e-05, "loss": 0.4932, "step": 9101 }, { "epoch": 29.84262295081967, "grad_norm": 5.522712707519531, "learning_rate": 1.645310332228152e-05, "loss": 0.7665, "step": 9102 }, { "epoch": 29.845901639344262, "grad_norm": 5.6325249671936035, "learning_rate": 1.6452292088630475e-05, "loss": 0.7401, "step": 9103 }, { "epoch": 29.84918032786885, "grad_norm": 4.537435054779053, "learning_rate": 1.645148078222312e-05, "loss": 0.581, "step": 9104 }, { "epoch": 29.852459016393443, "grad_norm": 4.536801815032959, "learning_rate": 1.645066940306862e-05, "loss": 0.5742, "step": 9105 }, { "epoch": 29.855737704918035, "grad_norm": 4.6357340812683105, "learning_rate": 1.644985795117611e-05, "loss": 0.7091, "step": 9106 }, { "epoch": 29.859016393442623, "grad_norm": 4.766788005828857, "learning_rate": 1.6449046426554747e-05, "loss": 0.6078, "step": 9107 }, { "epoch": 29.862295081967215, "grad_norm": 4.973330974578857, "learning_rate": 1.6448234829213684e-05, "loss": 0.3713, "step": 9108 }, { "epoch": 29.865573770491803, "grad_norm": 4.833821773529053, "learning_rate": 1.6447423159162062e-05, "loss": 0.8397, "step": 9109 }, { "epoch": 29.868852459016395, "grad_norm": 12.452250480651855, "learning_rate": 1.6446611416409047e-05, "loss": 0.5936, "step": 9110 }, { "epoch": 29.872131147540983, "grad_norm": 5.2778754234313965, "learning_rate": 1.6445799600963782e-05, "loss": 0.7224, "step": 9111 }, { "epoch": 29.875409836065575, "grad_norm": 4.149826526641846, "learning_rate": 1.6444987712835424e-05, "loss": 0.6055, "step": 9112 }, { "epoch": 29.878688524590164, "grad_norm": 4.9878950119018555, "learning_rate": 1.644417575203313e-05, "loss": 0.4178, "step": 9113 }, { "epoch": 29.881967213114756, "grad_norm": 6.708062648773193, "learning_rate": 1.6443363718566053e-05, "loss": 0.6347, "step": 9114 }, { "epoch": 29.885245901639344, "grad_norm": 5.709973335266113, "learning_rate": 1.644255161244335e-05, "loss": 0.5643, "step": 9115 }, { "epoch": 29.888524590163936, "grad_norm": 3.5432705879211426, "learning_rate": 1.644173943367418e-05, "loss": 0.489, "step": 9116 }, { "epoch": 29.891803278688524, "grad_norm": 5.342555046081543, "learning_rate": 1.6440927182267698e-05, "loss": 0.641, "step": 9117 }, { "epoch": 29.895081967213116, "grad_norm": 4.713717460632324, "learning_rate": 1.6440114858233068e-05, "loss": 0.7152, "step": 9118 }, { "epoch": 29.898360655737704, "grad_norm": 5.311267375946045, "learning_rate": 1.6439302461579447e-05, "loss": 0.5465, "step": 9119 }, { "epoch": 29.901639344262296, "grad_norm": 5.982922554016113, "learning_rate": 1.6438489992315993e-05, "loss": 0.6442, "step": 9120 }, { "epoch": 29.904918032786885, "grad_norm": 5.357583999633789, "learning_rate": 1.6437677450451875e-05, "loss": 0.6667, "step": 9121 }, { "epoch": 29.908196721311477, "grad_norm": 5.887548923492432, "learning_rate": 1.6436864835996243e-05, "loss": 0.4939, "step": 9122 }, { "epoch": 29.911475409836065, "grad_norm": 4.186419486999512, "learning_rate": 1.6436052148958274e-05, "loss": 0.4911, "step": 9123 }, { "epoch": 29.914754098360657, "grad_norm": 5.9883623123168945, "learning_rate": 1.643523938934712e-05, "loss": 0.5988, "step": 9124 }, { "epoch": 29.918032786885245, "grad_norm": 5.107462406158447, "learning_rate": 1.6434426557171955e-05, "loss": 0.4958, "step": 9125 }, { "epoch": 29.921311475409837, "grad_norm": 7.087255954742432, "learning_rate": 1.6433613652441937e-05, "loss": 0.4914, "step": 9126 }, { "epoch": 29.924590163934425, "grad_norm": 5.261238098144531, "learning_rate": 1.6432800675166238e-05, "loss": 0.5043, "step": 9127 }, { "epoch": 29.927868852459017, "grad_norm": 5.6240692138671875, "learning_rate": 1.6431987625354022e-05, "loss": 0.5219, "step": 9128 }, { "epoch": 29.931147540983606, "grad_norm": 4.789888858795166, "learning_rate": 1.6431174503014458e-05, "loss": 0.4581, "step": 9129 }, { "epoch": 29.934426229508198, "grad_norm": 5.498802185058594, "learning_rate": 1.6430361308156716e-05, "loss": 0.7674, "step": 9130 }, { "epoch": 29.937704918032786, "grad_norm": 4.820075988769531, "learning_rate": 1.6429548040789963e-05, "loss": 0.7377, "step": 9131 }, { "epoch": 29.940983606557378, "grad_norm": 5.356418132781982, "learning_rate": 1.642873470092337e-05, "loss": 0.5809, "step": 9132 }, { "epoch": 29.944262295081966, "grad_norm": 4.648069858551025, "learning_rate": 1.6427921288566114e-05, "loss": 0.8854, "step": 9133 }, { "epoch": 29.947540983606558, "grad_norm": 5.228470325469971, "learning_rate": 1.6427107803727354e-05, "loss": 0.6458, "step": 9134 }, { "epoch": 29.950819672131146, "grad_norm": 4.898388385772705, "learning_rate": 1.6426294246416276e-05, "loss": 0.6486, "step": 9135 }, { "epoch": 29.95409836065574, "grad_norm": 5.551548957824707, "learning_rate": 1.642548061664205e-05, "loss": 0.7522, "step": 9136 }, { "epoch": 29.957377049180327, "grad_norm": 4.634061336517334, "learning_rate": 1.6424666914413848e-05, "loss": 0.6079, "step": 9137 }, { "epoch": 29.96065573770492, "grad_norm": 4.659641265869141, "learning_rate": 1.6423853139740845e-05, "loss": 0.7556, "step": 9138 }, { "epoch": 29.963934426229507, "grad_norm": 4.844051837921143, "learning_rate": 1.642303929263222e-05, "loss": 0.6681, "step": 9139 }, { "epoch": 29.9672131147541, "grad_norm": 5.321391582489014, "learning_rate": 1.6422225373097148e-05, "loss": 0.5285, "step": 9140 }, { "epoch": 29.970491803278687, "grad_norm": 4.777748107910156, "learning_rate": 1.6421411381144808e-05, "loss": 0.7029, "step": 9141 }, { "epoch": 29.97377049180328, "grad_norm": 4.788484573364258, "learning_rate": 1.6420597316784378e-05, "loss": 0.5411, "step": 9142 }, { "epoch": 29.977049180327867, "grad_norm": 5.959916591644287, "learning_rate": 1.6419783180025034e-05, "loss": 0.8397, "step": 9143 }, { "epoch": 29.98032786885246, "grad_norm": 4.415522575378418, "learning_rate": 1.6418968970875966e-05, "loss": 0.643, "step": 9144 }, { "epoch": 29.983606557377048, "grad_norm": 5.767632007598877, "learning_rate": 1.6418154689346345e-05, "loss": 0.4354, "step": 9145 }, { "epoch": 29.98688524590164, "grad_norm": 4.812347888946533, "learning_rate": 1.6417340335445358e-05, "loss": 0.4414, "step": 9146 }, { "epoch": 29.990163934426228, "grad_norm": 5.022370338439941, "learning_rate": 1.6416525909182187e-05, "loss": 0.3968, "step": 9147 }, { "epoch": 29.99344262295082, "grad_norm": 4.151702880859375, "learning_rate": 1.641571141056601e-05, "loss": 0.7843, "step": 9148 }, { "epoch": 29.99672131147541, "grad_norm": 4.8863325119018555, "learning_rate": 1.6414896839606024e-05, "loss": 0.5731, "step": 9149 }, { "epoch": 30.0, "grad_norm": 6.646637439727783, "learning_rate": 1.6414082196311402e-05, "loss": 0.559, "step": 9150 }, { "epoch": 30.003278688524592, "grad_norm": 4.42409086227417, "learning_rate": 1.6413267480691334e-05, "loss": 0.7675, "step": 9151 }, { "epoch": 30.00655737704918, "grad_norm": 4.532552719116211, "learning_rate": 1.6412452692755008e-05, "loss": 0.5212, "step": 9152 }, { "epoch": 30.009836065573772, "grad_norm": 5.686970233917236, "learning_rate": 1.6411637832511613e-05, "loss": 0.7549, "step": 9153 }, { "epoch": 30.01311475409836, "grad_norm": 4.495794773101807, "learning_rate": 1.6410822899970327e-05, "loss": 0.6247, "step": 9154 }, { "epoch": 30.016393442622952, "grad_norm": 4.670166492462158, "learning_rate": 1.6410007895140352e-05, "loss": 0.7566, "step": 9155 }, { "epoch": 30.01967213114754, "grad_norm": 6.019744396209717, "learning_rate": 1.6409192818030875e-05, "loss": 0.4275, "step": 9156 }, { "epoch": 30.022950819672133, "grad_norm": 5.254430770874023, "learning_rate": 1.6408377668651082e-05, "loss": 0.4115, "step": 9157 }, { "epoch": 30.02622950819672, "grad_norm": 5.074186325073242, "learning_rate": 1.640756244701017e-05, "loss": 0.6245, "step": 9158 }, { "epoch": 30.029508196721313, "grad_norm": 9.933045387268066, "learning_rate": 1.6406747153117328e-05, "loss": 0.6976, "step": 9159 }, { "epoch": 30.0327868852459, "grad_norm": 4.38885498046875, "learning_rate": 1.6405931786981753e-05, "loss": 0.5969, "step": 9160 }, { "epoch": 30.036065573770493, "grad_norm": 4.778743743896484, "learning_rate": 1.6405116348612636e-05, "loss": 0.6738, "step": 9161 }, { "epoch": 30.03934426229508, "grad_norm": 10.308232307434082, "learning_rate": 1.640430083801917e-05, "loss": 0.5611, "step": 9162 }, { "epoch": 30.042622950819673, "grad_norm": 4.933079719543457, "learning_rate": 1.6403485255210555e-05, "loss": 0.458, "step": 9163 }, { "epoch": 30.04590163934426, "grad_norm": 4.892847537994385, "learning_rate": 1.6402669600195986e-05, "loss": 0.4506, "step": 9164 }, { "epoch": 30.049180327868854, "grad_norm": 4.017905235290527, "learning_rate": 1.640185387298466e-05, "loss": 0.6358, "step": 9165 }, { "epoch": 30.052459016393442, "grad_norm": 4.285683631896973, "learning_rate": 1.6401038073585772e-05, "loss": 0.595, "step": 9166 }, { "epoch": 30.055737704918034, "grad_norm": 4.642520904541016, "learning_rate": 1.6400222202008528e-05, "loss": 0.6418, "step": 9167 }, { "epoch": 30.059016393442622, "grad_norm": 4.464980602264404, "learning_rate": 1.6399406258262125e-05, "loss": 0.6828, "step": 9168 }, { "epoch": 30.062295081967214, "grad_norm": 4.046326160430908, "learning_rate": 1.639859024235576e-05, "loss": 0.5071, "step": 9169 }, { "epoch": 30.065573770491802, "grad_norm": 5.154852867126465, "learning_rate": 1.639777415429864e-05, "loss": 0.4195, "step": 9170 }, { "epoch": 30.068852459016394, "grad_norm": 4.5825886726379395, "learning_rate": 1.6396957994099962e-05, "loss": 0.8288, "step": 9171 }, { "epoch": 30.072131147540983, "grad_norm": 4.414795875549316, "learning_rate": 1.639614176176893e-05, "loss": 0.6702, "step": 9172 }, { "epoch": 30.075409836065575, "grad_norm": 5.668283939361572, "learning_rate": 1.6395325457314752e-05, "loss": 0.7119, "step": 9173 }, { "epoch": 30.078688524590163, "grad_norm": 4.8356614112854, "learning_rate": 1.639450908074663e-05, "loss": 0.6325, "step": 9174 }, { "epoch": 30.081967213114755, "grad_norm": 5.003342628479004, "learning_rate": 1.6393692632073766e-05, "loss": 0.4411, "step": 9175 }, { "epoch": 30.085245901639343, "grad_norm": 4.552889823913574, "learning_rate": 1.6392876111305372e-05, "loss": 0.6125, "step": 9176 }, { "epoch": 30.088524590163935, "grad_norm": 4.432124137878418, "learning_rate": 1.6392059518450655e-05, "loss": 0.4835, "step": 9177 }, { "epoch": 30.091803278688523, "grad_norm": 4.25898551940918, "learning_rate": 1.6391242853518822e-05, "loss": 0.5474, "step": 9178 }, { "epoch": 30.095081967213115, "grad_norm": 6.194394588470459, "learning_rate": 1.6390426116519075e-05, "loss": 0.5689, "step": 9179 }, { "epoch": 30.098360655737704, "grad_norm": 4.019896984100342, "learning_rate": 1.638960930746063e-05, "loss": 0.8018, "step": 9180 }, { "epoch": 30.101639344262296, "grad_norm": 5.335803985595703, "learning_rate": 1.6388792426352702e-05, "loss": 0.4468, "step": 9181 }, { "epoch": 30.104918032786884, "grad_norm": 5.827856063842773, "learning_rate": 1.6387975473204495e-05, "loss": 0.5171, "step": 9182 }, { "epoch": 30.108196721311476, "grad_norm": 4.724811553955078, "learning_rate": 1.638715844802522e-05, "loss": 0.5027, "step": 9183 }, { "epoch": 30.111475409836064, "grad_norm": 5.526468276977539, "learning_rate": 1.6386341350824094e-05, "loss": 0.5037, "step": 9184 }, { "epoch": 30.114754098360656, "grad_norm": 4.745365142822266, "learning_rate": 1.6385524181610325e-05, "loss": 0.5524, "step": 9185 }, { "epoch": 30.118032786885244, "grad_norm": 5.398172378540039, "learning_rate": 1.6384706940393138e-05, "loss": 0.7301, "step": 9186 }, { "epoch": 30.121311475409836, "grad_norm": 3.7530219554901123, "learning_rate": 1.6383889627181738e-05, "loss": 0.7888, "step": 9187 }, { "epoch": 30.124590163934425, "grad_norm": 4.764353275299072, "learning_rate": 1.638307224198535e-05, "loss": 0.5573, "step": 9188 }, { "epoch": 30.127868852459017, "grad_norm": 4.227126121520996, "learning_rate": 1.6382254784813175e-05, "loss": 0.6827, "step": 9189 }, { "epoch": 30.131147540983605, "grad_norm": 4.260341167449951, "learning_rate": 1.6381437255674452e-05, "loss": 0.5943, "step": 9190 }, { "epoch": 30.134426229508197, "grad_norm": 6.028984546661377, "learning_rate": 1.6380619654578384e-05, "loss": 0.5485, "step": 9191 }, { "epoch": 30.137704918032785, "grad_norm": 5.043233394622803, "learning_rate": 1.6379801981534198e-05, "loss": 0.7818, "step": 9192 }, { "epoch": 30.140983606557377, "grad_norm": 4.724013805389404, "learning_rate": 1.6378984236551108e-05, "loss": 0.4557, "step": 9193 }, { "epoch": 30.14426229508197, "grad_norm": 4.717303276062012, "learning_rate": 1.6378166419638342e-05, "loss": 0.414, "step": 9194 }, { "epoch": 30.147540983606557, "grad_norm": 5.11812162399292, "learning_rate": 1.6377348530805114e-05, "loss": 0.8401, "step": 9195 }, { "epoch": 30.15081967213115, "grad_norm": 6.7477126121521, "learning_rate": 1.637653057006065e-05, "loss": 0.7449, "step": 9196 }, { "epoch": 30.154098360655738, "grad_norm": 6.118381500244141, "learning_rate": 1.6375712537414178e-05, "loss": 0.6389, "step": 9197 }, { "epoch": 30.15737704918033, "grad_norm": 5.453896522521973, "learning_rate": 1.6374894432874915e-05, "loss": 0.4701, "step": 9198 }, { "epoch": 30.160655737704918, "grad_norm": 5.620576858520508, "learning_rate": 1.637407625645209e-05, "loss": 0.712, "step": 9199 }, { "epoch": 30.16393442622951, "grad_norm": 3.8706538677215576, "learning_rate": 1.6373258008154928e-05, "loss": 0.7311, "step": 9200 }, { "epoch": 30.167213114754098, "grad_norm": 4.596757411956787, "learning_rate": 1.6372439687992658e-05, "loss": 0.9484, "step": 9201 }, { "epoch": 30.17049180327869, "grad_norm": 4.794179916381836, "learning_rate": 1.6371621295974503e-05, "loss": 0.5812, "step": 9202 }, { "epoch": 30.17377049180328, "grad_norm": 4.630975723266602, "learning_rate": 1.6370802832109692e-05, "loss": 0.6646, "step": 9203 }, { "epoch": 30.17704918032787, "grad_norm": 5.378143787384033, "learning_rate": 1.6369984296407454e-05, "loss": 0.7814, "step": 9204 }, { "epoch": 30.18032786885246, "grad_norm": 4.992450714111328, "learning_rate": 1.6369165688877022e-05, "loss": 0.519, "step": 9205 }, { "epoch": 30.18360655737705, "grad_norm": 4.021152019500732, "learning_rate": 1.6368347009527626e-05, "loss": 0.7836, "step": 9206 }, { "epoch": 30.18688524590164, "grad_norm": 4.736091613769531, "learning_rate": 1.6367528258368493e-05, "loss": 0.5595, "step": 9207 }, { "epoch": 30.19016393442623, "grad_norm": 4.7711405754089355, "learning_rate": 1.636670943540886e-05, "loss": 0.5358, "step": 9208 }, { "epoch": 30.19344262295082, "grad_norm": 4.616166591644287, "learning_rate": 1.6365890540657957e-05, "loss": 0.8229, "step": 9209 }, { "epoch": 30.19672131147541, "grad_norm": 4.2184343338012695, "learning_rate": 1.636507157412502e-05, "loss": 0.488, "step": 9210 }, { "epoch": 30.2, "grad_norm": 4.802011966705322, "learning_rate": 1.6364252535819284e-05, "loss": 0.6067, "step": 9211 }, { "epoch": 30.20327868852459, "grad_norm": 5.051175117492676, "learning_rate": 1.6363433425749984e-05, "loss": 0.833, "step": 9212 }, { "epoch": 30.20655737704918, "grad_norm": 4.842356204986572, "learning_rate": 1.6362614243926352e-05, "loss": 0.5336, "step": 9213 }, { "epoch": 30.20983606557377, "grad_norm": 5.2239670753479, "learning_rate": 1.6361794990357634e-05, "loss": 0.432, "step": 9214 }, { "epoch": 30.21311475409836, "grad_norm": 7.265089988708496, "learning_rate": 1.6360975665053058e-05, "loss": 0.486, "step": 9215 }, { "epoch": 30.21639344262295, "grad_norm": 11.480746269226074, "learning_rate": 1.636015626802187e-05, "loss": 0.657, "step": 9216 }, { "epoch": 30.21967213114754, "grad_norm": 4.106452465057373, "learning_rate": 1.6359336799273306e-05, "loss": 0.599, "step": 9217 }, { "epoch": 30.222950819672132, "grad_norm": 4.990832328796387, "learning_rate": 1.6358517258816608e-05, "loss": 0.8199, "step": 9218 }, { "epoch": 30.22622950819672, "grad_norm": 5.439542293548584, "learning_rate": 1.6357697646661018e-05, "loss": 0.6369, "step": 9219 }, { "epoch": 30.229508196721312, "grad_norm": 4.895003318786621, "learning_rate": 1.6356877962815774e-05, "loss": 0.8653, "step": 9220 }, { "epoch": 30.2327868852459, "grad_norm": 4.764125347137451, "learning_rate": 1.6356058207290127e-05, "loss": 0.4661, "step": 9221 }, { "epoch": 30.236065573770492, "grad_norm": 5.046658039093018, "learning_rate": 1.635523838009331e-05, "loss": 0.7267, "step": 9222 }, { "epoch": 30.23934426229508, "grad_norm": 5.219499588012695, "learning_rate": 1.6354418481234576e-05, "loss": 0.8096, "step": 9223 }, { "epoch": 30.242622950819673, "grad_norm": 4.429641246795654, "learning_rate": 1.6353598510723164e-05, "loss": 0.6134, "step": 9224 }, { "epoch": 30.24590163934426, "grad_norm": 4.720158100128174, "learning_rate": 1.6352778468568323e-05, "loss": 0.6772, "step": 9225 }, { "epoch": 30.249180327868853, "grad_norm": 4.433740139007568, "learning_rate": 1.63519583547793e-05, "loss": 0.2403, "step": 9226 }, { "epoch": 30.25245901639344, "grad_norm": 5.1401472091674805, "learning_rate": 1.6351138169365343e-05, "loss": 0.6339, "step": 9227 }, { "epoch": 30.255737704918033, "grad_norm": 4.795014381408691, "learning_rate": 1.6350317912335696e-05, "loss": 0.4314, "step": 9228 }, { "epoch": 30.25901639344262, "grad_norm": 11.676196098327637, "learning_rate": 1.6349497583699618e-05, "loss": 0.9437, "step": 9229 }, { "epoch": 30.262295081967213, "grad_norm": 4.228180408477783, "learning_rate": 1.6348677183466346e-05, "loss": 0.5499, "step": 9230 }, { "epoch": 30.2655737704918, "grad_norm": 4.961150169372559, "learning_rate": 1.6347856711645142e-05, "loss": 0.4146, "step": 9231 }, { "epoch": 30.268852459016394, "grad_norm": 4.324923992156982, "learning_rate": 1.6347036168245253e-05, "loss": 0.7972, "step": 9232 }, { "epoch": 30.272131147540982, "grad_norm": 4.428983211517334, "learning_rate": 1.634621555327593e-05, "loss": 0.6025, "step": 9233 }, { "epoch": 30.275409836065574, "grad_norm": 6.005762100219727, "learning_rate": 1.634539486674643e-05, "loss": 0.5466, "step": 9234 }, { "epoch": 30.278688524590162, "grad_norm": 4.850904941558838, "learning_rate": 1.6344574108666007e-05, "loss": 0.6853, "step": 9235 }, { "epoch": 30.281967213114754, "grad_norm": 6.0005669593811035, "learning_rate": 1.634375327904391e-05, "loss": 0.7887, "step": 9236 }, { "epoch": 30.285245901639342, "grad_norm": 4.3505682945251465, "learning_rate": 1.6342932377889404e-05, "loss": 0.5096, "step": 9237 }, { "epoch": 30.288524590163934, "grad_norm": 5.33385705947876, "learning_rate": 1.634211140521174e-05, "loss": 0.6657, "step": 9238 }, { "epoch": 30.291803278688526, "grad_norm": 5.67031192779541, "learning_rate": 1.6341290361020172e-05, "loss": 0.7879, "step": 9239 }, { "epoch": 30.295081967213115, "grad_norm": 3.7269136905670166, "learning_rate": 1.6340469245323963e-05, "loss": 0.6071, "step": 9240 }, { "epoch": 30.298360655737707, "grad_norm": 4.824344635009766, "learning_rate": 1.6339648058132372e-05, "loss": 0.4603, "step": 9241 }, { "epoch": 30.301639344262295, "grad_norm": 4.886895179748535, "learning_rate": 1.6338826799454657e-05, "loss": 0.7126, "step": 9242 }, { "epoch": 30.304918032786887, "grad_norm": 4.806293964385986, "learning_rate": 1.6338005469300077e-05, "loss": 0.5651, "step": 9243 }, { "epoch": 30.308196721311475, "grad_norm": 11.1897554397583, "learning_rate": 1.6337184067677898e-05, "loss": 0.642, "step": 9244 }, { "epoch": 30.311475409836067, "grad_norm": 4.77782678604126, "learning_rate": 1.633636259459738e-05, "loss": 0.7213, "step": 9245 }, { "epoch": 30.314754098360655, "grad_norm": 4.102348804473877, "learning_rate": 1.6335541050067784e-05, "loss": 0.4244, "step": 9246 }, { "epoch": 30.318032786885247, "grad_norm": 5.137932777404785, "learning_rate": 1.6334719434098375e-05, "loss": 0.6072, "step": 9247 }, { "epoch": 30.321311475409836, "grad_norm": 5.103407859802246, "learning_rate": 1.633389774669842e-05, "loss": 0.5331, "step": 9248 }, { "epoch": 30.324590163934428, "grad_norm": 4.620232105255127, "learning_rate": 1.6333075987877182e-05, "loss": 0.6453, "step": 9249 }, { "epoch": 30.327868852459016, "grad_norm": 5.017899036407471, "learning_rate": 1.6332254157643928e-05, "loss": 0.7444, "step": 9250 }, { "epoch": 30.331147540983608, "grad_norm": 5.129714012145996, "learning_rate": 1.633143225600792e-05, "loss": 0.746, "step": 9251 }, { "epoch": 30.334426229508196, "grad_norm": 5.009064674377441, "learning_rate": 1.6330610282978434e-05, "loss": 0.5119, "step": 9252 }, { "epoch": 30.337704918032788, "grad_norm": 4.927800178527832, "learning_rate": 1.6329788238564734e-05, "loss": 0.3804, "step": 9253 }, { "epoch": 30.340983606557376, "grad_norm": 6.902196884155273, "learning_rate": 1.6328966122776094e-05, "loss": 0.37, "step": 9254 }, { "epoch": 30.34426229508197, "grad_norm": 4.545997142791748, "learning_rate": 1.6328143935621773e-05, "loss": 0.5952, "step": 9255 }, { "epoch": 30.347540983606557, "grad_norm": 4.752719879150391, "learning_rate": 1.6327321677111053e-05, "loss": 0.5615, "step": 9256 }, { "epoch": 30.35081967213115, "grad_norm": 4.640316009521484, "learning_rate": 1.6326499347253206e-05, "loss": 0.6909, "step": 9257 }, { "epoch": 30.354098360655737, "grad_norm": 4.804101467132568, "learning_rate": 1.63256769460575e-05, "loss": 0.6936, "step": 9258 }, { "epoch": 30.35737704918033, "grad_norm": 5.037571907043457, "learning_rate": 1.6324854473533204e-05, "loss": 0.8238, "step": 9259 }, { "epoch": 30.360655737704917, "grad_norm": 7.330496788024902, "learning_rate": 1.63240319296896e-05, "loss": 0.5357, "step": 9260 }, { "epoch": 30.36393442622951, "grad_norm": 4.822771072387695, "learning_rate": 1.6323209314535962e-05, "loss": 0.4137, "step": 9261 }, { "epoch": 30.367213114754097, "grad_norm": 8.060385704040527, "learning_rate": 1.6322386628081564e-05, "loss": 0.8089, "step": 9262 }, { "epoch": 30.37049180327869, "grad_norm": 5.263943195343018, "learning_rate": 1.6321563870335686e-05, "loss": 0.6187, "step": 9263 }, { "epoch": 30.373770491803278, "grad_norm": 5.623281478881836, "learning_rate": 1.6320741041307598e-05, "loss": 0.7277, "step": 9264 }, { "epoch": 30.37704918032787, "grad_norm": 4.526220321655273, "learning_rate": 1.6319918141006583e-05, "loss": 0.5485, "step": 9265 }, { "epoch": 30.380327868852458, "grad_norm": 4.139386177062988, "learning_rate": 1.631909516944192e-05, "loss": 0.6002, "step": 9266 }, { "epoch": 30.38360655737705, "grad_norm": 23.14643669128418, "learning_rate": 1.631827212662289e-05, "loss": 0.4985, "step": 9267 }, { "epoch": 30.386885245901638, "grad_norm": 52.0564079284668, "learning_rate": 1.6317449012558776e-05, "loss": 0.401, "step": 9268 }, { "epoch": 30.39016393442623, "grad_norm": 5.2440266609191895, "learning_rate": 1.6316625827258852e-05, "loss": 0.6476, "step": 9269 }, { "epoch": 30.39344262295082, "grad_norm": 4.914034366607666, "learning_rate": 1.6315802570732405e-05, "loss": 0.5264, "step": 9270 }, { "epoch": 30.39672131147541, "grad_norm": 4.600362300872803, "learning_rate": 1.631497924298872e-05, "loss": 0.4505, "step": 9271 }, { "epoch": 30.4, "grad_norm": 8.88757610321045, "learning_rate": 1.6314155844037074e-05, "loss": 0.6154, "step": 9272 }, { "epoch": 30.40327868852459, "grad_norm": 6.361639499664307, "learning_rate": 1.6313332373886756e-05, "loss": 0.6747, "step": 9273 }, { "epoch": 30.40655737704918, "grad_norm": 4.967477321624756, "learning_rate": 1.6312508832547053e-05, "loss": 0.604, "step": 9274 }, { "epoch": 30.40983606557377, "grad_norm": 5.056888580322266, "learning_rate": 1.6311685220027248e-05, "loss": 0.5635, "step": 9275 }, { "epoch": 30.41311475409836, "grad_norm": 5.152839660644531, "learning_rate": 1.631086153633663e-05, "loss": 0.6101, "step": 9276 }, { "epoch": 30.41639344262295, "grad_norm": 5.4964070320129395, "learning_rate": 1.6310037781484485e-05, "loss": 0.6404, "step": 9277 }, { "epoch": 30.41967213114754, "grad_norm": 5.048109531402588, "learning_rate": 1.6309213955480105e-05, "loss": 0.5532, "step": 9278 }, { "epoch": 30.42295081967213, "grad_norm": 4.935185432434082, "learning_rate": 1.6308390058332778e-05, "loss": 0.5751, "step": 9279 }, { "epoch": 30.42622950819672, "grad_norm": 5.173807144165039, "learning_rate": 1.6307566090051793e-05, "loss": 0.5988, "step": 9280 }, { "epoch": 30.42950819672131, "grad_norm": 4.930876731872559, "learning_rate": 1.6306742050646444e-05, "loss": 0.6825, "step": 9281 }, { "epoch": 30.432786885245903, "grad_norm": 5.662764549255371, "learning_rate": 1.6305917940126018e-05, "loss": 0.7003, "step": 9282 }, { "epoch": 30.43606557377049, "grad_norm": 5.112129211425781, "learning_rate": 1.6305093758499815e-05, "loss": 0.5951, "step": 9283 }, { "epoch": 30.439344262295084, "grad_norm": 4.085355758666992, "learning_rate": 1.6304269505777123e-05, "loss": 0.7614, "step": 9284 }, { "epoch": 30.442622950819672, "grad_norm": 5.529591083526611, "learning_rate": 1.6303445181967234e-05, "loss": 0.6204, "step": 9285 }, { "epoch": 30.445901639344264, "grad_norm": 5.270167350769043, "learning_rate": 1.6302620787079447e-05, "loss": 0.5967, "step": 9286 }, { "epoch": 30.449180327868852, "grad_norm": 4.721025466918945, "learning_rate": 1.630179632112306e-05, "loss": 0.5845, "step": 9287 }, { "epoch": 30.452459016393444, "grad_norm": 4.733482837677002, "learning_rate": 1.630097178410737e-05, "loss": 0.4497, "step": 9288 }, { "epoch": 30.455737704918032, "grad_norm": 5.691894054412842, "learning_rate": 1.6300147176041668e-05, "loss": 0.7755, "step": 9289 }, { "epoch": 30.459016393442624, "grad_norm": 4.6677141189575195, "learning_rate": 1.629932249693526e-05, "loss": 0.5715, "step": 9290 }, { "epoch": 30.462295081967213, "grad_norm": 5.052703380584717, "learning_rate": 1.629849774679743e-05, "loss": 0.4218, "step": 9291 }, { "epoch": 30.465573770491805, "grad_norm": 5.227814674377441, "learning_rate": 1.62976729256375e-05, "loss": 0.621, "step": 9292 }, { "epoch": 30.468852459016393, "grad_norm": 4.407793045043945, "learning_rate": 1.6296848033464755e-05, "loss": 0.6242, "step": 9293 }, { "epoch": 30.472131147540985, "grad_norm": 5.399720668792725, "learning_rate": 1.6296023070288506e-05, "loss": 0.7993, "step": 9294 }, { "epoch": 30.475409836065573, "grad_norm": 4.485662460327148, "learning_rate": 1.6295198036118045e-05, "loss": 0.8573, "step": 9295 }, { "epoch": 30.478688524590165, "grad_norm": 9.098580360412598, "learning_rate": 1.6294372930962685e-05, "loss": 0.8211, "step": 9296 }, { "epoch": 30.481967213114753, "grad_norm": 4.359411716461182, "learning_rate": 1.629354775483172e-05, "loss": 0.4411, "step": 9297 }, { "epoch": 30.485245901639345, "grad_norm": 6.011110305786133, "learning_rate": 1.629272250773447e-05, "loss": 0.609, "step": 9298 }, { "epoch": 30.488524590163934, "grad_norm": 5.013959884643555, "learning_rate": 1.6291897189680222e-05, "loss": 0.6176, "step": 9299 }, { "epoch": 30.491803278688526, "grad_norm": 4.681021690368652, "learning_rate": 1.6291071800678295e-05, "loss": 0.568, "step": 9300 }, { "epoch": 30.495081967213114, "grad_norm": 5.404876232147217, "learning_rate": 1.6290246340737995e-05, "loss": 0.7571, "step": 9301 }, { "epoch": 30.498360655737706, "grad_norm": 5.632426738739014, "learning_rate": 1.6289420809868623e-05, "loss": 0.8223, "step": 9302 }, { "epoch": 30.501639344262294, "grad_norm": 5.107807159423828, "learning_rate": 1.6288595208079493e-05, "loss": 0.5198, "step": 9303 }, { "epoch": 30.504918032786886, "grad_norm": 5.149975299835205, "learning_rate": 1.6287769535379916e-05, "loss": 0.5925, "step": 9304 }, { "epoch": 30.508196721311474, "grad_norm": 4.976259231567383, "learning_rate": 1.62869437917792e-05, "loss": 0.6037, "step": 9305 }, { "epoch": 30.511475409836066, "grad_norm": 4.774158000946045, "learning_rate": 1.6286117977286648e-05, "loss": 0.6953, "step": 9306 }, { "epoch": 30.514754098360655, "grad_norm": 4.902290344238281, "learning_rate": 1.6285292091911585e-05, "loss": 0.7474, "step": 9307 }, { "epoch": 30.518032786885247, "grad_norm": 4.863598346710205, "learning_rate": 1.628446613566332e-05, "loss": 0.7169, "step": 9308 }, { "epoch": 30.521311475409835, "grad_norm": 5.008255958557129, "learning_rate": 1.6283640108551166e-05, "loss": 0.4124, "step": 9309 }, { "epoch": 30.524590163934427, "grad_norm": 5.3352155685424805, "learning_rate": 1.6282814010584433e-05, "loss": 0.5742, "step": 9310 }, { "epoch": 30.527868852459015, "grad_norm": 4.785151958465576, "learning_rate": 1.6281987841772444e-05, "loss": 0.5211, "step": 9311 }, { "epoch": 30.531147540983607, "grad_norm": 5.133955955505371, "learning_rate": 1.6281161602124507e-05, "loss": 0.6884, "step": 9312 }, { "epoch": 30.534426229508195, "grad_norm": 5.041179656982422, "learning_rate": 1.6280335291649944e-05, "loss": 0.7408, "step": 9313 }, { "epoch": 30.537704918032787, "grad_norm": 4.3462324142456055, "learning_rate": 1.627950891035807e-05, "loss": 0.8804, "step": 9314 }, { "epoch": 30.540983606557376, "grad_norm": 4.952993869781494, "learning_rate": 1.6278682458258202e-05, "loss": 0.4452, "step": 9315 }, { "epoch": 30.544262295081968, "grad_norm": 4.878548622131348, "learning_rate": 1.627785593535966e-05, "loss": 0.5674, "step": 9316 }, { "epoch": 30.547540983606556, "grad_norm": 4.3419904708862305, "learning_rate": 1.6277029341671772e-05, "loss": 0.5199, "step": 9317 }, { "epoch": 30.550819672131148, "grad_norm": 5.364060401916504, "learning_rate": 1.6276202677203845e-05, "loss": 0.4478, "step": 9318 }, { "epoch": 30.554098360655736, "grad_norm": 4.895165920257568, "learning_rate": 1.627537594196521e-05, "loss": 0.5167, "step": 9319 }, { "epoch": 30.557377049180328, "grad_norm": 4.982016086578369, "learning_rate": 1.627454913596519e-05, "loss": 0.5481, "step": 9320 }, { "epoch": 30.560655737704916, "grad_norm": 5.177812576293945, "learning_rate": 1.6273722259213095e-05, "loss": 0.4459, "step": 9321 }, { "epoch": 30.56393442622951, "grad_norm": 5.096415042877197, "learning_rate": 1.627289531171827e-05, "loss": 0.5625, "step": 9322 }, { "epoch": 30.567213114754097, "grad_norm": 5.823636531829834, "learning_rate": 1.6272068293490017e-05, "loss": 0.4821, "step": 9323 }, { "epoch": 30.57049180327869, "grad_norm": 5.248676300048828, "learning_rate": 1.627124120453768e-05, "loss": 0.5096, "step": 9324 }, { "epoch": 30.57377049180328, "grad_norm": 4.719134330749512, "learning_rate": 1.6270414044870575e-05, "loss": 0.5862, "step": 9325 }, { "epoch": 30.57704918032787, "grad_norm": 4.847756385803223, "learning_rate": 1.6269586814498035e-05, "loss": 0.4795, "step": 9326 }, { "epoch": 30.58032786885246, "grad_norm": 6.456963539123535, "learning_rate": 1.6268759513429384e-05, "loss": 0.7736, "step": 9327 }, { "epoch": 30.58360655737705, "grad_norm": 13.800947189331055, "learning_rate": 1.6267932141673946e-05, "loss": 0.6139, "step": 9328 }, { "epoch": 30.58688524590164, "grad_norm": 5.773799419403076, "learning_rate": 1.6267104699241066e-05, "loss": 0.6763, "step": 9329 }, { "epoch": 30.59016393442623, "grad_norm": 4.676631450653076, "learning_rate": 1.6266277186140058e-05, "loss": 0.6251, "step": 9330 }, { "epoch": 30.59344262295082, "grad_norm": 6.394673824310303, "learning_rate": 1.626544960238026e-05, "loss": 0.6959, "step": 9331 }, { "epoch": 30.59672131147541, "grad_norm": 4.912835597991943, "learning_rate": 1.6264621947971e-05, "loss": 0.8201, "step": 9332 }, { "epoch": 30.6, "grad_norm": 4.470970630645752, "learning_rate": 1.626379422292162e-05, "loss": 0.6179, "step": 9333 }, { "epoch": 30.60327868852459, "grad_norm": 4.542900085449219, "learning_rate": 1.6262966427241447e-05, "loss": 0.9872, "step": 9334 }, { "epoch": 30.60655737704918, "grad_norm": 7.762466907501221, "learning_rate": 1.6262138560939813e-05, "loss": 0.5271, "step": 9335 }, { "epoch": 30.60983606557377, "grad_norm": 4.913107872009277, "learning_rate": 1.6261310624026056e-05, "loss": 0.3566, "step": 9336 }, { "epoch": 30.613114754098362, "grad_norm": 4.2695631980896, "learning_rate": 1.626048261650951e-05, "loss": 0.7043, "step": 9337 }, { "epoch": 30.61639344262295, "grad_norm": 5.340053081512451, "learning_rate": 1.625965453839952e-05, "loss": 0.5648, "step": 9338 }, { "epoch": 30.619672131147542, "grad_norm": 6.987539768218994, "learning_rate": 1.6258826389705407e-05, "loss": 0.6323, "step": 9339 }, { "epoch": 30.62295081967213, "grad_norm": 5.324636936187744, "learning_rate": 1.6257998170436528e-05, "loss": 0.7935, "step": 9340 }, { "epoch": 30.626229508196722, "grad_norm": 4.701737880706787, "learning_rate": 1.6257169880602207e-05, "loss": 0.7163, "step": 9341 }, { "epoch": 30.62950819672131, "grad_norm": 4.898714542388916, "learning_rate": 1.625634152021179e-05, "loss": 0.6127, "step": 9342 }, { "epoch": 30.632786885245903, "grad_norm": 4.862240314483643, "learning_rate": 1.6255513089274622e-05, "loss": 0.5178, "step": 9343 }, { "epoch": 30.63606557377049, "grad_norm": 9.881221771240234, "learning_rate": 1.6254684587800037e-05, "loss": 0.475, "step": 9344 }, { "epoch": 30.639344262295083, "grad_norm": 5.244969367980957, "learning_rate": 1.625385601579738e-05, "loss": 0.6612, "step": 9345 }, { "epoch": 30.64262295081967, "grad_norm": 5.109424114227295, "learning_rate": 1.6253027373275994e-05, "loss": 0.4109, "step": 9346 }, { "epoch": 30.645901639344263, "grad_norm": 6.465315818786621, "learning_rate": 1.6252198660245223e-05, "loss": 0.5269, "step": 9347 }, { "epoch": 30.64918032786885, "grad_norm": 4.918891906738281, "learning_rate": 1.625136987671441e-05, "loss": 0.7943, "step": 9348 }, { "epoch": 30.652459016393443, "grad_norm": 4.301011562347412, "learning_rate": 1.6250541022692902e-05, "loss": 0.6031, "step": 9349 }, { "epoch": 30.65573770491803, "grad_norm": 5.764031887054443, "learning_rate": 1.624971209819005e-05, "loss": 0.5119, "step": 9350 }, { "epoch": 30.659016393442624, "grad_norm": 9.353511810302734, "learning_rate": 1.624888310321519e-05, "loss": 0.628, "step": 9351 }, { "epoch": 30.662295081967212, "grad_norm": 5.38772439956665, "learning_rate": 1.6248054037777678e-05, "loss": 0.6031, "step": 9352 }, { "epoch": 30.665573770491804, "grad_norm": 4.549745082855225, "learning_rate": 1.624722490188686e-05, "loss": 0.4162, "step": 9353 }, { "epoch": 30.668852459016392, "grad_norm": 5.654331684112549, "learning_rate": 1.6246395695552086e-05, "loss": 0.4474, "step": 9354 }, { "epoch": 30.672131147540984, "grad_norm": 4.818665027618408, "learning_rate": 1.6245566418782707e-05, "loss": 0.6147, "step": 9355 }, { "epoch": 30.675409836065572, "grad_norm": 4.690947532653809, "learning_rate": 1.6244737071588072e-05, "loss": 0.5257, "step": 9356 }, { "epoch": 30.678688524590164, "grad_norm": 5.531101226806641, "learning_rate": 1.6243907653977535e-05, "loss": 0.6059, "step": 9357 }, { "epoch": 30.681967213114753, "grad_norm": 5.149104595184326, "learning_rate": 1.6243078165960443e-05, "loss": 0.4957, "step": 9358 }, { "epoch": 30.685245901639345, "grad_norm": 4.271491527557373, "learning_rate": 1.6242248607546155e-05, "loss": 0.762, "step": 9359 }, { "epoch": 30.688524590163933, "grad_norm": 6.3456597328186035, "learning_rate": 1.6241418978744025e-05, "loss": 1.0646, "step": 9360 }, { "epoch": 30.691803278688525, "grad_norm": 4.966337203979492, "learning_rate": 1.624058927956341e-05, "loss": 0.8712, "step": 9361 }, { "epoch": 30.695081967213113, "grad_norm": 5.323174953460693, "learning_rate": 1.6239759510013657e-05, "loss": 0.5582, "step": 9362 }, { "epoch": 30.698360655737705, "grad_norm": 4.776826858520508, "learning_rate": 1.623892967010413e-05, "loss": 0.8068, "step": 9363 }, { "epoch": 30.701639344262293, "grad_norm": 4.3913140296936035, "learning_rate": 1.623809975984418e-05, "loss": 0.587, "step": 9364 }, { "epoch": 30.704918032786885, "grad_norm": 5.061715126037598, "learning_rate": 1.6237269779243176e-05, "loss": 0.4969, "step": 9365 }, { "epoch": 30.708196721311474, "grad_norm": 6.59266471862793, "learning_rate": 1.6236439728310467e-05, "loss": 0.8506, "step": 9366 }, { "epoch": 30.711475409836066, "grad_norm": 4.503658771514893, "learning_rate": 1.6235609607055414e-05, "loss": 0.4669, "step": 9367 }, { "epoch": 30.714754098360658, "grad_norm": 4.81048583984375, "learning_rate": 1.6234779415487382e-05, "loss": 0.7231, "step": 9368 }, { "epoch": 30.718032786885246, "grad_norm": 7.617380619049072, "learning_rate": 1.6233949153615728e-05, "loss": 0.698, "step": 9369 }, { "epoch": 30.721311475409838, "grad_norm": 4.367655277252197, "learning_rate": 1.623311882144982e-05, "loss": 0.5386, "step": 9370 }, { "epoch": 30.724590163934426, "grad_norm": 4.643325328826904, "learning_rate": 1.623228841899901e-05, "loss": 0.77, "step": 9371 }, { "epoch": 30.727868852459018, "grad_norm": 6.630308151245117, "learning_rate": 1.623145794627267e-05, "loss": 0.4763, "step": 9372 }, { "epoch": 30.731147540983606, "grad_norm": 4.290456771850586, "learning_rate": 1.6230627403280163e-05, "loss": 0.7871, "step": 9373 }, { "epoch": 30.7344262295082, "grad_norm": 4.471423149108887, "learning_rate": 1.6229796790030853e-05, "loss": 0.685, "step": 9374 }, { "epoch": 30.737704918032787, "grad_norm": 5.385819911956787, "learning_rate": 1.622896610653411e-05, "loss": 0.513, "step": 9375 }, { "epoch": 30.74098360655738, "grad_norm": 4.960444927215576, "learning_rate": 1.6228135352799296e-05, "loss": 0.6683, "step": 9376 }, { "epoch": 30.744262295081967, "grad_norm": 6.008176803588867, "learning_rate": 1.6227304528835775e-05, "loss": 0.6145, "step": 9377 }, { "epoch": 30.74754098360656, "grad_norm": 4.164052486419678, "learning_rate": 1.6226473634652928e-05, "loss": 0.5564, "step": 9378 }, { "epoch": 30.750819672131147, "grad_norm": 5.117547035217285, "learning_rate": 1.6225642670260116e-05, "loss": 0.56, "step": 9379 }, { "epoch": 30.75409836065574, "grad_norm": 5.182724475860596, "learning_rate": 1.6224811635666705e-05, "loss": 0.5599, "step": 9380 }, { "epoch": 30.757377049180327, "grad_norm": 5.117766380310059, "learning_rate": 1.622398053088208e-05, "loss": 0.5721, "step": 9381 }, { "epoch": 30.76065573770492, "grad_norm": 5.201872825622559, "learning_rate": 1.6223149355915592e-05, "loss": 0.6952, "step": 9382 }, { "epoch": 30.763934426229508, "grad_norm": 5.219587802886963, "learning_rate": 1.622231811077663e-05, "loss": 0.6949, "step": 9383 }, { "epoch": 30.7672131147541, "grad_norm": 5.860654354095459, "learning_rate": 1.6221486795474562e-05, "loss": 0.4982, "step": 9384 }, { "epoch": 30.770491803278688, "grad_norm": 4.645493984222412, "learning_rate": 1.622065541001876e-05, "loss": 0.6484, "step": 9385 }, { "epoch": 30.77377049180328, "grad_norm": 5.977803707122803, "learning_rate": 1.62198239544186e-05, "loss": 0.6904, "step": 9386 }, { "epoch": 30.777049180327868, "grad_norm": 4.432331085205078, "learning_rate": 1.621899242868346e-05, "loss": 0.8159, "step": 9387 }, { "epoch": 30.78032786885246, "grad_norm": 3.9528968334198, "learning_rate": 1.6218160832822713e-05, "loss": 0.5714, "step": 9388 }, { "epoch": 30.78360655737705, "grad_norm": 4.209991455078125, "learning_rate": 1.621732916684574e-05, "loss": 0.6747, "step": 9389 }, { "epoch": 30.78688524590164, "grad_norm": 5.196920871734619, "learning_rate": 1.6216497430761914e-05, "loss": 0.6024, "step": 9390 }, { "epoch": 30.79016393442623, "grad_norm": 4.536916255950928, "learning_rate": 1.6215665624580617e-05, "loss": 0.8083, "step": 9391 }, { "epoch": 30.79344262295082, "grad_norm": 5.148886203765869, "learning_rate": 1.6214833748311226e-05, "loss": 0.6007, "step": 9392 }, { "epoch": 30.79672131147541, "grad_norm": 5.096654415130615, "learning_rate": 1.621400180196312e-05, "loss": 0.6827, "step": 9393 }, { "epoch": 30.8, "grad_norm": 3.8888776302337646, "learning_rate": 1.6213169785545688e-05, "loss": 0.6318, "step": 9394 }, { "epoch": 30.80327868852459, "grad_norm": 5.79014253616333, "learning_rate": 1.6212337699068304e-05, "loss": 0.3962, "step": 9395 }, { "epoch": 30.80655737704918, "grad_norm": 4.81852388381958, "learning_rate": 1.6211505542540353e-05, "loss": 0.7973, "step": 9396 }, { "epoch": 30.80983606557377, "grad_norm": 4.888646602630615, "learning_rate": 1.6210673315971218e-05, "loss": 0.6255, "step": 9397 }, { "epoch": 30.81311475409836, "grad_norm": 4.504540920257568, "learning_rate": 1.6209841019370286e-05, "loss": 0.6148, "step": 9398 }, { "epoch": 30.81639344262295, "grad_norm": 6.0893168449401855, "learning_rate": 1.620900865274694e-05, "loss": 0.6866, "step": 9399 }, { "epoch": 30.81967213114754, "grad_norm": 4.242323875427246, "learning_rate": 1.6208176216110566e-05, "loss": 0.8467, "step": 9400 }, { "epoch": 30.82295081967213, "grad_norm": 5.803475379943848, "learning_rate": 1.6207343709470545e-05, "loss": 0.5208, "step": 9401 }, { "epoch": 30.82622950819672, "grad_norm": 4.4129109382629395, "learning_rate": 1.6206511132836276e-05, "loss": 0.8034, "step": 9402 }, { "epoch": 30.82950819672131, "grad_norm": 4.951484203338623, "learning_rate": 1.6205678486217142e-05, "loss": 0.6086, "step": 9403 }, { "epoch": 30.832786885245902, "grad_norm": 4.876893520355225, "learning_rate": 1.6204845769622526e-05, "loss": 0.8078, "step": 9404 }, { "epoch": 30.83606557377049, "grad_norm": 8.806879997253418, "learning_rate": 1.6204012983061823e-05, "loss": 0.7681, "step": 9405 }, { "epoch": 30.839344262295082, "grad_norm": 7.532629489898682, "learning_rate": 1.620318012654442e-05, "loss": 0.7778, "step": 9406 }, { "epoch": 30.84262295081967, "grad_norm": 5.056978225708008, "learning_rate": 1.620234720007972e-05, "loss": 0.3974, "step": 9407 }, { "epoch": 30.845901639344262, "grad_norm": 4.296582221984863, "learning_rate": 1.62015142036771e-05, "loss": 0.75, "step": 9408 }, { "epoch": 30.84918032786885, "grad_norm": 13.412745475769043, "learning_rate": 1.6200681137345962e-05, "loss": 0.637, "step": 9409 }, { "epoch": 30.852459016393443, "grad_norm": 6.027973175048828, "learning_rate": 1.6199848001095696e-05, "loss": 0.4747, "step": 9410 }, { "epoch": 30.855737704918035, "grad_norm": 4.8073577880859375, "learning_rate": 1.6199014794935698e-05, "loss": 0.7974, "step": 9411 }, { "epoch": 30.859016393442623, "grad_norm": 4.1389570236206055, "learning_rate": 1.6198181518875363e-05, "loss": 0.6585, "step": 9412 }, { "epoch": 30.862295081967215, "grad_norm": 4.630960464477539, "learning_rate": 1.6197348172924086e-05, "loss": 0.6074, "step": 9413 }, { "epoch": 30.865573770491803, "grad_norm": 5.258893966674805, "learning_rate": 1.6196514757091263e-05, "loss": 0.5486, "step": 9414 }, { "epoch": 30.868852459016395, "grad_norm": 5.231849193572998, "learning_rate": 1.6195681271386296e-05, "loss": 0.3579, "step": 9415 }, { "epoch": 30.872131147540983, "grad_norm": 7.883889198303223, "learning_rate": 1.6194847715818584e-05, "loss": 0.5169, "step": 9416 }, { "epoch": 30.875409836065575, "grad_norm": 5.762238025665283, "learning_rate": 1.6194014090397517e-05, "loss": 0.5873, "step": 9417 }, { "epoch": 30.878688524590164, "grad_norm": 4.420882225036621, "learning_rate": 1.6193180395132503e-05, "loss": 0.4289, "step": 9418 }, { "epoch": 30.881967213114756, "grad_norm": 5.138909816741943, "learning_rate": 1.6192346630032942e-05, "loss": 0.6209, "step": 9419 }, { "epoch": 30.885245901639344, "grad_norm": 5.448409080505371, "learning_rate": 1.6191512795108234e-05, "loss": 0.5928, "step": 9420 }, { "epoch": 30.888524590163936, "grad_norm": 5.391141891479492, "learning_rate": 1.619067889036778e-05, "loss": 0.721, "step": 9421 }, { "epoch": 30.891803278688524, "grad_norm": 4.3747239112854, "learning_rate": 1.6189844915820987e-05, "loss": 0.7444, "step": 9422 }, { "epoch": 30.895081967213116, "grad_norm": 5.203355312347412, "learning_rate": 1.6189010871477252e-05, "loss": 0.6178, "step": 9423 }, { "epoch": 30.898360655737704, "grad_norm": 4.505265235900879, "learning_rate": 1.618817675734599e-05, "loss": 0.6864, "step": 9424 }, { "epoch": 30.901639344262296, "grad_norm": 5.018491268157959, "learning_rate": 1.61873425734366e-05, "loss": 0.442, "step": 9425 }, { "epoch": 30.904918032786885, "grad_norm": 4.4425883293151855, "learning_rate": 1.618650831975849e-05, "loss": 0.5217, "step": 9426 }, { "epoch": 30.908196721311477, "grad_norm": 11.384321212768555, "learning_rate": 1.6185673996321064e-05, "loss": 0.7288, "step": 9427 }, { "epoch": 30.911475409836065, "grad_norm": 8.115010261535645, "learning_rate": 1.618483960313373e-05, "loss": 0.7157, "step": 9428 }, { "epoch": 30.914754098360657, "grad_norm": 4.369086742401123, "learning_rate": 1.6184005140205904e-05, "loss": 0.599, "step": 9429 }, { "epoch": 30.918032786885245, "grad_norm": 5.163203239440918, "learning_rate": 1.6183170607546988e-05, "loss": 0.4104, "step": 9430 }, { "epoch": 30.921311475409837, "grad_norm": 5.743776798248291, "learning_rate": 1.6182336005166394e-05, "loss": 0.626, "step": 9431 }, { "epoch": 30.924590163934425, "grad_norm": 5.339649200439453, "learning_rate": 1.618150133307353e-05, "loss": 0.6195, "step": 9432 }, { "epoch": 30.927868852459017, "grad_norm": 6.8722453117370605, "learning_rate": 1.618066659127782e-05, "loss": 0.4902, "step": 9433 }, { "epoch": 30.931147540983606, "grad_norm": 4.502033233642578, "learning_rate": 1.617983177978866e-05, "loss": 0.5594, "step": 9434 }, { "epoch": 30.934426229508198, "grad_norm": 5.412801742553711, "learning_rate": 1.6178996898615476e-05, "loss": 0.5419, "step": 9435 }, { "epoch": 30.937704918032786, "grad_norm": 4.495089530944824, "learning_rate": 1.6178161947767676e-05, "loss": 0.7779, "step": 9436 }, { "epoch": 30.940983606557378, "grad_norm": 4.460510730743408, "learning_rate": 1.6177326927254678e-05, "loss": 0.8112, "step": 9437 }, { "epoch": 30.944262295081966, "grad_norm": 6.595144271850586, "learning_rate": 1.6176491837085896e-05, "loss": 0.5711, "step": 9438 }, { "epoch": 30.947540983606558, "grad_norm": 3.9926724433898926, "learning_rate": 1.617565667727074e-05, "loss": 0.6559, "step": 9439 }, { "epoch": 30.950819672131146, "grad_norm": 5.064861297607422, "learning_rate": 1.6174821447818642e-05, "loss": 0.6557, "step": 9440 }, { "epoch": 30.95409836065574, "grad_norm": 4.686442852020264, "learning_rate": 1.617398614873901e-05, "loss": 0.6344, "step": 9441 }, { "epoch": 30.957377049180327, "grad_norm": 4.553707122802734, "learning_rate": 1.6173150780041263e-05, "loss": 0.7277, "step": 9442 }, { "epoch": 30.96065573770492, "grad_norm": 4.055496692657471, "learning_rate": 1.6172315341734825e-05, "loss": 0.6367, "step": 9443 }, { "epoch": 30.963934426229507, "grad_norm": 4.99641752243042, "learning_rate": 1.6171479833829108e-05, "loss": 0.8474, "step": 9444 }, { "epoch": 30.9672131147541, "grad_norm": 6.524580478668213, "learning_rate": 1.6170644256333547e-05, "loss": 0.7151, "step": 9445 }, { "epoch": 30.970491803278687, "grad_norm": 4.483695030212402, "learning_rate": 1.6169808609257552e-05, "loss": 0.5026, "step": 9446 }, { "epoch": 30.97377049180328, "grad_norm": 6.77352237701416, "learning_rate": 1.6168972892610547e-05, "loss": 0.6954, "step": 9447 }, { "epoch": 30.977049180327867, "grad_norm": 5.611575603485107, "learning_rate": 1.616813710640196e-05, "loss": 0.6042, "step": 9448 }, { "epoch": 30.98032786885246, "grad_norm": 4.470705032348633, "learning_rate": 1.616730125064122e-05, "loss": 0.6589, "step": 9449 }, { "epoch": 30.983606557377048, "grad_norm": 5.999727725982666, "learning_rate": 1.616646532533774e-05, "loss": 0.8744, "step": 9450 }, { "epoch": 30.98688524590164, "grad_norm": 4.523062229156494, "learning_rate": 1.6165629330500952e-05, "loss": 0.4743, "step": 9451 }, { "epoch": 30.990163934426228, "grad_norm": 6.92891788482666, "learning_rate": 1.6164793266140285e-05, "loss": 0.4189, "step": 9452 }, { "epoch": 30.99344262295082, "grad_norm": 4.224332332611084, "learning_rate": 1.6163957132265166e-05, "loss": 0.5677, "step": 9453 }, { "epoch": 30.99672131147541, "grad_norm": 5.062761306762695, "learning_rate": 1.6163120928885016e-05, "loss": 0.506, "step": 9454 }, { "epoch": 31.0, "grad_norm": 4.409224033355713, "learning_rate": 1.6162284656009276e-05, "loss": 0.6228, "step": 9455 }, { "epoch": 31.003278688524592, "grad_norm": 4.746485233306885, "learning_rate": 1.6161448313647365e-05, "loss": 0.5679, "step": 9456 }, { "epoch": 31.00655737704918, "grad_norm": 4.47867488861084, "learning_rate": 1.6160611901808717e-05, "loss": 0.4764, "step": 9457 }, { "epoch": 31.009836065573772, "grad_norm": 4.456779479980469, "learning_rate": 1.6159775420502767e-05, "loss": 0.5109, "step": 9458 }, { "epoch": 31.01311475409836, "grad_norm": 5.126523017883301, "learning_rate": 1.6158938869738942e-05, "loss": 0.644, "step": 9459 }, { "epoch": 31.016393442622952, "grad_norm": 5.119886875152588, "learning_rate": 1.615810224952668e-05, "loss": 0.6664, "step": 9460 }, { "epoch": 31.01967213114754, "grad_norm": 3.890439510345459, "learning_rate": 1.615726555987541e-05, "loss": 0.4882, "step": 9461 }, { "epoch": 31.022950819672133, "grad_norm": 4.704347610473633, "learning_rate": 1.6156428800794574e-05, "loss": 0.4126, "step": 9462 }, { "epoch": 31.02622950819672, "grad_norm": 5.00974178314209, "learning_rate": 1.61555919722936e-05, "loss": 0.5968, "step": 9463 }, { "epoch": 31.029508196721313, "grad_norm": 7.791601657867432, "learning_rate": 1.615475507438192e-05, "loss": 0.422, "step": 9464 }, { "epoch": 31.0327868852459, "grad_norm": 4.515369415283203, "learning_rate": 1.6153918107068983e-05, "loss": 0.424, "step": 9465 }, { "epoch": 31.036065573770493, "grad_norm": 5.031991481781006, "learning_rate": 1.615308107036422e-05, "loss": 0.7848, "step": 9466 }, { "epoch": 31.03934426229508, "grad_norm": 4.164474964141846, "learning_rate": 1.615224396427707e-05, "loss": 0.6789, "step": 9467 }, { "epoch": 31.042622950819673, "grad_norm": 5.3877854347229, "learning_rate": 1.6151406788816975e-05, "loss": 0.4029, "step": 9468 }, { "epoch": 31.04590163934426, "grad_norm": 5.027912616729736, "learning_rate": 1.6150569543993367e-05, "loss": 0.6971, "step": 9469 }, { "epoch": 31.049180327868854, "grad_norm": 4.449073791503906, "learning_rate": 1.6149732229815698e-05, "loss": 0.6481, "step": 9470 }, { "epoch": 31.052459016393442, "grad_norm": 4.597449779510498, "learning_rate": 1.61488948462934e-05, "loss": 0.3114, "step": 9471 }, { "epoch": 31.055737704918034, "grad_norm": 4.528837203979492, "learning_rate": 1.6148057393435922e-05, "loss": 0.6317, "step": 9472 }, { "epoch": 31.059016393442622, "grad_norm": 4.418788433074951, "learning_rate": 1.6147219871252705e-05, "loss": 0.4259, "step": 9473 }, { "epoch": 31.062295081967214, "grad_norm": 4.49637508392334, "learning_rate": 1.614638227975319e-05, "loss": 0.5424, "step": 9474 }, { "epoch": 31.065573770491802, "grad_norm": 3.8738670349121094, "learning_rate": 1.6145544618946826e-05, "loss": 0.7567, "step": 9475 }, { "epoch": 31.068852459016394, "grad_norm": 5.984095573425293, "learning_rate": 1.6144706888843057e-05, "loss": 0.5258, "step": 9476 }, { "epoch": 31.072131147540983, "grad_norm": 5.829484462738037, "learning_rate": 1.6143869089451326e-05, "loss": 0.2899, "step": 9477 }, { "epoch": 31.075409836065575, "grad_norm": 4.527059555053711, "learning_rate": 1.614303122078109e-05, "loss": 0.6593, "step": 9478 }, { "epoch": 31.078688524590163, "grad_norm": 4.954570770263672, "learning_rate": 1.614219328284178e-05, "loss": 0.4589, "step": 9479 }, { "epoch": 31.081967213114755, "grad_norm": 5.871252059936523, "learning_rate": 1.614135527564286e-05, "loss": 0.4235, "step": 9480 }, { "epoch": 31.085245901639343, "grad_norm": 5.662847518920898, "learning_rate": 1.6140517199193776e-05, "loss": 0.3948, "step": 9481 }, { "epoch": 31.088524590163935, "grad_norm": 4.942902088165283, "learning_rate": 1.613967905350397e-05, "loss": 0.5252, "step": 9482 }, { "epoch": 31.091803278688523, "grad_norm": 5.020353317260742, "learning_rate": 1.6138840838582904e-05, "loss": 0.4348, "step": 9483 }, { "epoch": 31.095081967213115, "grad_norm": 4.235051155090332, "learning_rate": 1.613800255444002e-05, "loss": 0.6829, "step": 9484 }, { "epoch": 31.098360655737704, "grad_norm": 5.77724027633667, "learning_rate": 1.613716420108478e-05, "loss": 0.8021, "step": 9485 }, { "epoch": 31.101639344262296, "grad_norm": 4.454185485839844, "learning_rate": 1.613632577852663e-05, "loss": 0.3793, "step": 9486 }, { "epoch": 31.104918032786884, "grad_norm": 4.92774772644043, "learning_rate": 1.6135487286775028e-05, "loss": 0.629, "step": 9487 }, { "epoch": 31.108196721311476, "grad_norm": 5.506100177764893, "learning_rate": 1.6134648725839427e-05, "loss": 0.6651, "step": 9488 }, { "epoch": 31.111475409836064, "grad_norm": 4.288032531738281, "learning_rate": 1.6133810095729284e-05, "loss": 0.6504, "step": 9489 }, { "epoch": 31.114754098360656, "grad_norm": 4.291989803314209, "learning_rate": 1.6132971396454052e-05, "loss": 0.6629, "step": 9490 }, { "epoch": 31.118032786885244, "grad_norm": 4.744936943054199, "learning_rate": 1.6132132628023192e-05, "loss": 0.5192, "step": 9491 }, { "epoch": 31.121311475409836, "grad_norm": 4.559010028839111, "learning_rate": 1.6131293790446162e-05, "loss": 0.6186, "step": 9492 }, { "epoch": 31.124590163934425, "grad_norm": 4.421444892883301, "learning_rate": 1.6130454883732417e-05, "loss": 0.752, "step": 9493 }, { "epoch": 31.127868852459017, "grad_norm": 4.314370632171631, "learning_rate": 1.6129615907891424e-05, "loss": 0.4309, "step": 9494 }, { "epoch": 31.131147540983605, "grad_norm": 4.623456954956055, "learning_rate": 1.6128776862932635e-05, "loss": 0.7443, "step": 9495 }, { "epoch": 31.134426229508197, "grad_norm": 5.896031379699707, "learning_rate": 1.6127937748865515e-05, "loss": 0.7754, "step": 9496 }, { "epoch": 31.137704918032785, "grad_norm": 5.460696220397949, "learning_rate": 1.6127098565699524e-05, "loss": 0.5711, "step": 9497 }, { "epoch": 31.140983606557377, "grad_norm": 4.454035758972168, "learning_rate": 1.612625931344413e-05, "loss": 0.6981, "step": 9498 }, { "epoch": 31.14426229508197, "grad_norm": 4.941022872924805, "learning_rate": 1.6125419992108788e-05, "loss": 0.4544, "step": 9499 }, { "epoch": 31.147540983606557, "grad_norm": 4.427962303161621, "learning_rate": 1.612458060170297e-05, "loss": 0.7187, "step": 9500 }, { "epoch": 31.15081967213115, "grad_norm": 4.797988414764404, "learning_rate": 1.6123741142236132e-05, "loss": 0.7096, "step": 9501 }, { "epoch": 31.154098360655738, "grad_norm": 5.011356830596924, "learning_rate": 1.6122901613717753e-05, "loss": 0.5638, "step": 9502 }, { "epoch": 31.15737704918033, "grad_norm": 5.095968246459961, "learning_rate": 1.6122062016157288e-05, "loss": 0.5223, "step": 9503 }, { "epoch": 31.160655737704918, "grad_norm": 4.816681861877441, "learning_rate": 1.612122234956421e-05, "loss": 0.5417, "step": 9504 }, { "epoch": 31.16393442622951, "grad_norm": 5.496860504150391, "learning_rate": 1.6120382613947986e-05, "loss": 0.6553, "step": 9505 }, { "epoch": 31.167213114754098, "grad_norm": 5.943999290466309, "learning_rate": 1.6119542809318082e-05, "loss": 0.6314, "step": 9506 }, { "epoch": 31.17049180327869, "grad_norm": 5.625068664550781, "learning_rate": 1.6118702935683975e-05, "loss": 0.5905, "step": 9507 }, { "epoch": 31.17377049180328, "grad_norm": 3.9861347675323486, "learning_rate": 1.6117862993055125e-05, "loss": 0.7758, "step": 9508 }, { "epoch": 31.17704918032787, "grad_norm": 4.960081577301025, "learning_rate": 1.611702298144101e-05, "loss": 0.5767, "step": 9509 }, { "epoch": 31.18032786885246, "grad_norm": 4.551383018493652, "learning_rate": 1.6116182900851104e-05, "loss": 0.8427, "step": 9510 }, { "epoch": 31.18360655737705, "grad_norm": 6.235569477081299, "learning_rate": 1.6115342751294873e-05, "loss": 0.8109, "step": 9511 }, { "epoch": 31.18688524590164, "grad_norm": 3.9976654052734375, "learning_rate": 1.6114502532781794e-05, "loss": 0.7258, "step": 9512 }, { "epoch": 31.19016393442623, "grad_norm": 4.839090824127197, "learning_rate": 1.6113662245321342e-05, "loss": 0.71, "step": 9513 }, { "epoch": 31.19344262295082, "grad_norm": 7.187140464782715, "learning_rate": 1.6112821888922992e-05, "loss": 0.5379, "step": 9514 }, { "epoch": 31.19672131147541, "grad_norm": 4.416884422302246, "learning_rate": 1.611198146359622e-05, "loss": 0.5989, "step": 9515 }, { "epoch": 31.2, "grad_norm": 4.799860954284668, "learning_rate": 1.6111140969350504e-05, "loss": 0.5627, "step": 9516 }, { "epoch": 31.20327868852459, "grad_norm": 4.579836368560791, "learning_rate": 1.6110300406195318e-05, "loss": 0.6703, "step": 9517 }, { "epoch": 31.20655737704918, "grad_norm": 4.172616004943848, "learning_rate": 1.6109459774140138e-05, "loss": 0.7176, "step": 9518 }, { "epoch": 31.20983606557377, "grad_norm": 4.151080131530762, "learning_rate": 1.6108619073194454e-05, "loss": 0.4645, "step": 9519 }, { "epoch": 31.21311475409836, "grad_norm": 5.3007049560546875, "learning_rate": 1.6107778303367735e-05, "loss": 0.5703, "step": 9520 }, { "epoch": 31.21639344262295, "grad_norm": 4.443827152252197, "learning_rate": 1.6106937464669462e-05, "loss": 0.6493, "step": 9521 }, { "epoch": 31.21967213114754, "grad_norm": 4.725500583648682, "learning_rate": 1.6106096557109125e-05, "loss": 0.6608, "step": 9522 }, { "epoch": 31.222950819672132, "grad_norm": 17.280879974365234, "learning_rate": 1.6105255580696197e-05, "loss": 0.6017, "step": 9523 }, { "epoch": 31.22622950819672, "grad_norm": 8.721057891845703, "learning_rate": 1.6104414535440164e-05, "loss": 0.7273, "step": 9524 }, { "epoch": 31.229508196721312, "grad_norm": 4.380885601043701, "learning_rate": 1.610357342135051e-05, "loss": 0.4333, "step": 9525 }, { "epoch": 31.2327868852459, "grad_norm": 4.622711181640625, "learning_rate": 1.6102732238436724e-05, "loss": 0.6612, "step": 9526 }, { "epoch": 31.236065573770492, "grad_norm": 4.175507068634033, "learning_rate": 1.6101890986708282e-05, "loss": 0.6184, "step": 9527 }, { "epoch": 31.23934426229508, "grad_norm": 5.206476211547852, "learning_rate": 1.6101049666174677e-05, "loss": 0.9534, "step": 9528 }, { "epoch": 31.242622950819673, "grad_norm": 5.305301189422607, "learning_rate": 1.6100208276845394e-05, "loss": 0.4211, "step": 9529 }, { "epoch": 31.24590163934426, "grad_norm": 4.660996437072754, "learning_rate": 1.6099366818729918e-05, "loss": 0.7574, "step": 9530 }, { "epoch": 31.249180327868853, "grad_norm": 4.7483720779418945, "learning_rate": 1.6098525291837738e-05, "loss": 0.7168, "step": 9531 }, { "epoch": 31.25245901639344, "grad_norm": 4.7307000160217285, "learning_rate": 1.609768369617835e-05, "loss": 0.5772, "step": 9532 }, { "epoch": 31.255737704918033, "grad_norm": 3.7585103511810303, "learning_rate": 1.6096842031761235e-05, "loss": 0.3814, "step": 9533 }, { "epoch": 31.25901639344262, "grad_norm": 5.693427562713623, "learning_rate": 1.6096000298595885e-05, "loss": 0.6085, "step": 9534 }, { "epoch": 31.262295081967213, "grad_norm": 5.710404872894287, "learning_rate": 1.6095158496691795e-05, "loss": 0.4352, "step": 9535 }, { "epoch": 31.2655737704918, "grad_norm": 4.7855072021484375, "learning_rate": 1.6094316626058456e-05, "loss": 0.5755, "step": 9536 }, { "epoch": 31.268852459016394, "grad_norm": 5.073544502258301, "learning_rate": 1.6093474686705365e-05, "loss": 0.4994, "step": 9537 }, { "epoch": 31.272131147540982, "grad_norm": 4.540652275085449, "learning_rate": 1.6092632678642004e-05, "loss": 0.6298, "step": 9538 }, { "epoch": 31.275409836065574, "grad_norm": 4.641119003295898, "learning_rate": 1.609179060187788e-05, "loss": 0.712, "step": 9539 }, { "epoch": 31.278688524590162, "grad_norm": 4.00256872177124, "learning_rate": 1.6090948456422477e-05, "loss": 0.5253, "step": 9540 }, { "epoch": 31.281967213114754, "grad_norm": 4.826475143432617, "learning_rate": 1.6090106242285304e-05, "loss": 0.6743, "step": 9541 }, { "epoch": 31.285245901639342, "grad_norm": 4.5833659172058105, "learning_rate": 1.6089263959475847e-05, "loss": 0.6353, "step": 9542 }, { "epoch": 31.288524590163934, "grad_norm": 4.451108455657959, "learning_rate": 1.6088421608003608e-05, "loss": 0.7154, "step": 9543 }, { "epoch": 31.291803278688526, "grad_norm": 4.752187252044678, "learning_rate": 1.6087579187878085e-05, "loss": 0.6865, "step": 9544 }, { "epoch": 31.295081967213115, "grad_norm": 7.223480224609375, "learning_rate": 1.6086736699108782e-05, "loss": 0.6676, "step": 9545 }, { "epoch": 31.298360655737707, "grad_norm": 6.662850856781006, "learning_rate": 1.6085894141705188e-05, "loss": 0.9704, "step": 9546 }, { "epoch": 31.301639344262295, "grad_norm": 5.294304370880127, "learning_rate": 1.608505151567681e-05, "loss": 0.7252, "step": 9547 }, { "epoch": 31.304918032786887, "grad_norm": 4.830316066741943, "learning_rate": 1.6084208821033152e-05, "loss": 0.7741, "step": 9548 }, { "epoch": 31.308196721311475, "grad_norm": 5.079685211181641, "learning_rate": 1.6083366057783713e-05, "loss": 0.5937, "step": 9549 }, { "epoch": 31.311475409836067, "grad_norm": 4.393616676330566, "learning_rate": 1.6082523225937995e-05, "loss": 0.4986, "step": 9550 }, { "epoch": 31.314754098360655, "grad_norm": 5.147226810455322, "learning_rate": 1.6081680325505503e-05, "loss": 0.3968, "step": 9551 }, { "epoch": 31.318032786885247, "grad_norm": 4.532220363616943, "learning_rate": 1.6080837356495745e-05, "loss": 0.5941, "step": 9552 }, { "epoch": 31.321311475409836, "grad_norm": 5.214383125305176, "learning_rate": 1.607999431891822e-05, "loss": 0.5432, "step": 9553 }, { "epoch": 31.324590163934428, "grad_norm": 5.018869400024414, "learning_rate": 1.607915121278244e-05, "loss": 0.7622, "step": 9554 }, { "epoch": 31.327868852459016, "grad_norm": 3.9364521503448486, "learning_rate": 1.607830803809791e-05, "loss": 0.6682, "step": 9555 }, { "epoch": 31.331147540983608, "grad_norm": 4.28605318069458, "learning_rate": 1.6077464794874137e-05, "loss": 0.4422, "step": 9556 }, { "epoch": 31.334426229508196, "grad_norm": 12.146242141723633, "learning_rate": 1.6076621483120626e-05, "loss": 0.5832, "step": 9557 }, { "epoch": 31.337704918032788, "grad_norm": 5.230282306671143, "learning_rate": 1.6075778102846892e-05, "loss": 0.6286, "step": 9558 }, { "epoch": 31.340983606557376, "grad_norm": 6.111329555511475, "learning_rate": 1.607493465406244e-05, "loss": 0.4724, "step": 9559 }, { "epoch": 31.34426229508197, "grad_norm": 3.9459915161132812, "learning_rate": 1.6074091136776788e-05, "loss": 0.6431, "step": 9560 }, { "epoch": 31.347540983606557, "grad_norm": 4.514785289764404, "learning_rate": 1.607324755099944e-05, "loss": 0.6317, "step": 9561 }, { "epoch": 31.35081967213115, "grad_norm": 5.090768337249756, "learning_rate": 1.607240389673991e-05, "loss": 0.6012, "step": 9562 }, { "epoch": 31.354098360655737, "grad_norm": 5.915844917297363, "learning_rate": 1.6071560174007717e-05, "loss": 0.5611, "step": 9563 }, { "epoch": 31.35737704918033, "grad_norm": 4.096938133239746, "learning_rate": 1.607071638281237e-05, "loss": 0.6309, "step": 9564 }, { "epoch": 31.360655737704917, "grad_norm": 4.8214569091796875, "learning_rate": 1.6069872523163378e-05, "loss": 0.664, "step": 9565 }, { "epoch": 31.36393442622951, "grad_norm": 4.979528903961182, "learning_rate": 1.6069028595070266e-05, "loss": 0.5573, "step": 9566 }, { "epoch": 31.367213114754097, "grad_norm": 6.3163275718688965, "learning_rate": 1.606818459854255e-05, "loss": 0.4437, "step": 9567 }, { "epoch": 31.37049180327869, "grad_norm": 4.930306911468506, "learning_rate": 1.6067340533589737e-05, "loss": 0.5901, "step": 9568 }, { "epoch": 31.373770491803278, "grad_norm": 5.256237983703613, "learning_rate": 1.6066496400221355e-05, "loss": 0.502, "step": 9569 }, { "epoch": 31.37704918032787, "grad_norm": 4.550839424133301, "learning_rate": 1.6065652198446914e-05, "loss": 0.6619, "step": 9570 }, { "epoch": 31.380327868852458, "grad_norm": 5.309696674346924, "learning_rate": 1.606480792827594e-05, "loss": 0.6805, "step": 9571 }, { "epoch": 31.38360655737705, "grad_norm": 5.268314838409424, "learning_rate": 1.606396358971795e-05, "loss": 0.6417, "step": 9572 }, { "epoch": 31.386885245901638, "grad_norm": 7.636632919311523, "learning_rate": 1.606311918278247e-05, "loss": 0.5607, "step": 9573 }, { "epoch": 31.39016393442623, "grad_norm": 4.941364765167236, "learning_rate": 1.6062274707479013e-05, "loss": 0.5882, "step": 9574 }, { "epoch": 31.39344262295082, "grad_norm": 4.774905681610107, "learning_rate": 1.6061430163817108e-05, "loss": 0.6581, "step": 9575 }, { "epoch": 31.39672131147541, "grad_norm": 5.317511558532715, "learning_rate": 1.6060585551806274e-05, "loss": 0.5887, "step": 9576 }, { "epoch": 31.4, "grad_norm": 4.525757312774658, "learning_rate": 1.6059740871456035e-05, "loss": 0.7203, "step": 9577 }, { "epoch": 31.40327868852459, "grad_norm": 6.308027267456055, "learning_rate": 1.605889612277592e-05, "loss": 0.5802, "step": 9578 }, { "epoch": 31.40655737704918, "grad_norm": 4.263449192047119, "learning_rate": 1.6058051305775452e-05, "loss": 0.703, "step": 9579 }, { "epoch": 31.40983606557377, "grad_norm": 5.654738426208496, "learning_rate": 1.6057206420464156e-05, "loss": 0.4298, "step": 9580 }, { "epoch": 31.41311475409836, "grad_norm": 7.15740442276001, "learning_rate": 1.6056361466851554e-05, "loss": 0.659, "step": 9581 }, { "epoch": 31.41639344262295, "grad_norm": 11.777498245239258, "learning_rate": 1.605551644494719e-05, "loss": 0.7118, "step": 9582 }, { "epoch": 31.41967213114754, "grad_norm": 4.532447814941406, "learning_rate": 1.605467135476057e-05, "loss": 0.7524, "step": 9583 }, { "epoch": 31.42295081967213, "grad_norm": 4.7879252433776855, "learning_rate": 1.6053826196301244e-05, "loss": 0.7323, "step": 9584 }, { "epoch": 31.42622950819672, "grad_norm": 5.05560302734375, "learning_rate": 1.6052980969578732e-05, "loss": 0.6489, "step": 9585 }, { "epoch": 31.42950819672131, "grad_norm": 4.452167510986328, "learning_rate": 1.6052135674602563e-05, "loss": 0.7844, "step": 9586 }, { "epoch": 31.432786885245903, "grad_norm": 5.169841766357422, "learning_rate": 1.6051290311382274e-05, "loss": 0.5112, "step": 9587 }, { "epoch": 31.43606557377049, "grad_norm": 4.983062267303467, "learning_rate": 1.6050444879927392e-05, "loss": 0.6777, "step": 9588 }, { "epoch": 31.439344262295084, "grad_norm": 4.725902557373047, "learning_rate": 1.6049599380247456e-05, "loss": 0.4406, "step": 9589 }, { "epoch": 31.442622950819672, "grad_norm": 6.813853740692139, "learning_rate": 1.6048753812352e-05, "loss": 0.5641, "step": 9590 }, { "epoch": 31.445901639344264, "grad_norm": 5.2350077629089355, "learning_rate": 1.604790817625055e-05, "loss": 0.7097, "step": 9591 }, { "epoch": 31.449180327868852, "grad_norm": 4.8915019035339355, "learning_rate": 1.6047062471952647e-05, "loss": 0.3964, "step": 9592 }, { "epoch": 31.452459016393444, "grad_norm": 5.3196492195129395, "learning_rate": 1.604621669946783e-05, "loss": 0.5979, "step": 9593 }, { "epoch": 31.455737704918032, "grad_norm": 5.116546154022217, "learning_rate": 1.6045370858805633e-05, "loss": 0.7075, "step": 9594 }, { "epoch": 31.459016393442624, "grad_norm": 4.469849586486816, "learning_rate": 1.6044524949975593e-05, "loss": 0.4919, "step": 9595 }, { "epoch": 31.462295081967213, "grad_norm": 4.512690544128418, "learning_rate": 1.604367897298725e-05, "loss": 0.6411, "step": 9596 }, { "epoch": 31.465573770491805, "grad_norm": 5.189019203186035, "learning_rate": 1.6042832927850142e-05, "loss": 0.6538, "step": 9597 }, { "epoch": 31.468852459016393, "grad_norm": 5.158541679382324, "learning_rate": 1.604198681457381e-05, "loss": 0.4756, "step": 9598 }, { "epoch": 31.472131147540985, "grad_norm": 5.908121109008789, "learning_rate": 1.6041140633167795e-05, "loss": 0.6162, "step": 9599 }, { "epoch": 31.475409836065573, "grad_norm": 4.6484222412109375, "learning_rate": 1.604029438364164e-05, "loss": 0.4168, "step": 9600 }, { "epoch": 31.478688524590165, "grad_norm": 4.298432350158691, "learning_rate": 1.6039448066004882e-05, "loss": 0.5285, "step": 9601 }, { "epoch": 31.481967213114753, "grad_norm": 4.602365970611572, "learning_rate": 1.603860168026707e-05, "loss": 0.363, "step": 9602 }, { "epoch": 31.485245901639345, "grad_norm": 4.502681255340576, "learning_rate": 1.6037755226437742e-05, "loss": 0.5534, "step": 9603 }, { "epoch": 31.488524590163934, "grad_norm": 5.324665546417236, "learning_rate": 1.603690870452645e-05, "loss": 0.4667, "step": 9604 }, { "epoch": 31.491803278688526, "grad_norm": 6.098994731903076, "learning_rate": 1.6036062114542734e-05, "loss": 0.4054, "step": 9605 }, { "epoch": 31.495081967213114, "grad_norm": 4.3550944328308105, "learning_rate": 1.6035215456496145e-05, "loss": 0.3876, "step": 9606 }, { "epoch": 31.498360655737706, "grad_norm": 4.809147834777832, "learning_rate": 1.6034368730396225e-05, "loss": 0.5833, "step": 9607 }, { "epoch": 31.501639344262294, "grad_norm": 5.314957141876221, "learning_rate": 1.6033521936252522e-05, "loss": 0.5867, "step": 9608 }, { "epoch": 31.504918032786886, "grad_norm": 4.808416843414307, "learning_rate": 1.6032675074074588e-05, "loss": 0.6943, "step": 9609 }, { "epoch": 31.508196721311474, "grad_norm": 4.758782863616943, "learning_rate": 1.6031828143871962e-05, "loss": 0.7079, "step": 9610 }, { "epoch": 31.511475409836066, "grad_norm": 7.013787746429443, "learning_rate": 1.603098114565421e-05, "loss": 0.5968, "step": 9611 }, { "epoch": 31.514754098360655, "grad_norm": 4.5342817306518555, "learning_rate": 1.6030134079430874e-05, "loss": 0.5983, "step": 9612 }, { "epoch": 31.518032786885247, "grad_norm": 5.352228164672852, "learning_rate": 1.6029286945211507e-05, "loss": 0.498, "step": 9613 }, { "epoch": 31.521311475409835, "grad_norm": 5.706198692321777, "learning_rate": 1.6028439743005657e-05, "loss": 0.6625, "step": 9614 }, { "epoch": 31.524590163934427, "grad_norm": 4.766877174377441, "learning_rate": 1.6027592472822885e-05, "loss": 0.5277, "step": 9615 }, { "epoch": 31.527868852459015, "grad_norm": 4.299432754516602, "learning_rate": 1.602674513467274e-05, "loss": 0.6059, "step": 9616 }, { "epoch": 31.531147540983607, "grad_norm": 5.4696125984191895, "learning_rate": 1.6025897728564775e-05, "loss": 0.4343, "step": 9617 }, { "epoch": 31.534426229508195, "grad_norm": 4.452974319458008, "learning_rate": 1.602505025450855e-05, "loss": 0.5777, "step": 9618 }, { "epoch": 31.537704918032787, "grad_norm": 4.519428730010986, "learning_rate": 1.602420271251362e-05, "loss": 0.5428, "step": 9619 }, { "epoch": 31.540983606557376, "grad_norm": 4.429287910461426, "learning_rate": 1.6023355102589534e-05, "loss": 0.5046, "step": 9620 }, { "epoch": 31.544262295081968, "grad_norm": 4.559747695922852, "learning_rate": 1.6022507424745864e-05, "loss": 0.7193, "step": 9621 }, { "epoch": 31.547540983606556, "grad_norm": 4.94162654876709, "learning_rate": 1.6021659678992162e-05, "loss": 0.5749, "step": 9622 }, { "epoch": 31.550819672131148, "grad_norm": 4.841658115386963, "learning_rate": 1.602081186533798e-05, "loss": 0.5542, "step": 9623 }, { "epoch": 31.554098360655736, "grad_norm": 4.935677528381348, "learning_rate": 1.601996398379289e-05, "loss": 0.4176, "step": 9624 }, { "epoch": 31.557377049180328, "grad_norm": 4.470315933227539, "learning_rate": 1.6019116034366442e-05, "loss": 0.3892, "step": 9625 }, { "epoch": 31.560655737704916, "grad_norm": 4.901475429534912, "learning_rate": 1.6018268017068203e-05, "loss": 0.5368, "step": 9626 }, { "epoch": 31.56393442622951, "grad_norm": 5.072343349456787, "learning_rate": 1.6017419931907734e-05, "loss": 0.5519, "step": 9627 }, { "epoch": 31.567213114754097, "grad_norm": 5.845360279083252, "learning_rate": 1.60165717788946e-05, "loss": 0.6569, "step": 9628 }, { "epoch": 31.57049180327869, "grad_norm": 5.707867622375488, "learning_rate": 1.6015723558038366e-05, "loss": 0.4527, "step": 9629 }, { "epoch": 31.57377049180328, "grad_norm": 4.491844177246094, "learning_rate": 1.601487526934859e-05, "loss": 0.4202, "step": 9630 }, { "epoch": 31.57704918032787, "grad_norm": 5.111096382141113, "learning_rate": 1.6014026912834845e-05, "loss": 0.7902, "step": 9631 }, { "epoch": 31.58032786885246, "grad_norm": 6.3209547996521, "learning_rate": 1.6013178488506694e-05, "loss": 0.7535, "step": 9632 }, { "epoch": 31.58360655737705, "grad_norm": 4.370884895324707, "learning_rate": 1.6012329996373697e-05, "loss": 0.6219, "step": 9633 }, { "epoch": 31.58688524590164, "grad_norm": 4.04661226272583, "learning_rate": 1.6011481436445434e-05, "loss": 0.4738, "step": 9634 }, { "epoch": 31.59016393442623, "grad_norm": 4.473816871643066, "learning_rate": 1.601063280873147e-05, "loss": 0.602, "step": 9635 }, { "epoch": 31.59344262295082, "grad_norm": 5.368789196014404, "learning_rate": 1.6009784113241366e-05, "loss": 0.4304, "step": 9636 }, { "epoch": 31.59672131147541, "grad_norm": 4.82747745513916, "learning_rate": 1.6008935349984697e-05, "loss": 0.7158, "step": 9637 }, { "epoch": 31.6, "grad_norm": 5.07503080368042, "learning_rate": 1.6008086518971037e-05, "loss": 0.3982, "step": 9638 }, { "epoch": 31.60327868852459, "grad_norm": 4.639828205108643, "learning_rate": 1.6007237620209954e-05, "loss": 0.6733, "step": 9639 }, { "epoch": 31.60655737704918, "grad_norm": 4.921619415283203, "learning_rate": 1.600638865371102e-05, "loss": 0.3995, "step": 9640 }, { "epoch": 31.60983606557377, "grad_norm": 4.555599689483643, "learning_rate": 1.6005539619483812e-05, "loss": 0.6485, "step": 9641 }, { "epoch": 31.613114754098362, "grad_norm": 5.555481433868408, "learning_rate": 1.60046905175379e-05, "loss": 0.5002, "step": 9642 }, { "epoch": 31.61639344262295, "grad_norm": 6.150696754455566, "learning_rate": 1.6003841347882855e-05, "loss": 0.6716, "step": 9643 }, { "epoch": 31.619672131147542, "grad_norm": 4.425997734069824, "learning_rate": 1.6002992110528256e-05, "loss": 0.6353, "step": 9644 }, { "epoch": 31.62295081967213, "grad_norm": 5.070331573486328, "learning_rate": 1.6002142805483686e-05, "loss": 0.6163, "step": 9645 }, { "epoch": 31.626229508196722, "grad_norm": 4.430163860321045, "learning_rate": 1.6001293432758707e-05, "loss": 0.5502, "step": 9646 }, { "epoch": 31.62950819672131, "grad_norm": 4.1691060066223145, "learning_rate": 1.600044399236291e-05, "loss": 0.5258, "step": 9647 }, { "epoch": 31.632786885245903, "grad_norm": 4.970241069793701, "learning_rate": 1.599959448430587e-05, "loss": 0.7079, "step": 9648 }, { "epoch": 31.63606557377049, "grad_norm": 5.178867340087891, "learning_rate": 1.599874490859716e-05, "loss": 0.6371, "step": 9649 }, { "epoch": 31.639344262295083, "grad_norm": 5.01676607131958, "learning_rate": 1.5997895265246366e-05, "loss": 0.542, "step": 9650 }, { "epoch": 31.64262295081967, "grad_norm": 5.488036155700684, "learning_rate": 1.5997045554263066e-05, "loss": 0.6289, "step": 9651 }, { "epoch": 31.645901639344263, "grad_norm": 7.803127765655518, "learning_rate": 1.5996195775656843e-05, "loss": 0.7661, "step": 9652 }, { "epoch": 31.64918032786885, "grad_norm": 4.204998970031738, "learning_rate": 1.5995345929437275e-05, "loss": 1.039, "step": 9653 }, { "epoch": 31.652459016393443, "grad_norm": 3.9975063800811768, "learning_rate": 1.599449601561395e-05, "loss": 0.7885, "step": 9654 }, { "epoch": 31.65573770491803, "grad_norm": 5.165309429168701, "learning_rate": 1.599364603419645e-05, "loss": 0.4724, "step": 9655 }, { "epoch": 31.659016393442624, "grad_norm": 4.552426338195801, "learning_rate": 1.599279598519436e-05, "loss": 0.5082, "step": 9656 }, { "epoch": 31.662295081967212, "grad_norm": 4.005012035369873, "learning_rate": 1.5991945868617263e-05, "loss": 0.5991, "step": 9657 }, { "epoch": 31.665573770491804, "grad_norm": 6.419414520263672, "learning_rate": 1.5991095684474748e-05, "loss": 0.5354, "step": 9658 }, { "epoch": 31.668852459016392, "grad_norm": 4.199661731719971, "learning_rate": 1.5990245432776395e-05, "loss": 0.4899, "step": 9659 }, { "epoch": 31.672131147540984, "grad_norm": 5.0311737060546875, "learning_rate": 1.59893951135318e-05, "loss": 0.4143, "step": 9660 }, { "epoch": 31.675409836065572, "grad_norm": 4.673586368560791, "learning_rate": 1.598854472675055e-05, "loss": 0.5833, "step": 9661 }, { "epoch": 31.678688524590164, "grad_norm": 4.364255905151367, "learning_rate": 1.5987694272442228e-05, "loss": 0.6871, "step": 9662 }, { "epoch": 31.681967213114753, "grad_norm": 5.58050537109375, "learning_rate": 1.5986843750616432e-05, "loss": 0.8031, "step": 9663 }, { "epoch": 31.685245901639345, "grad_norm": 4.650010585784912, "learning_rate": 1.5985993161282744e-05, "loss": 0.8143, "step": 9664 }, { "epoch": 31.688524590163933, "grad_norm": 4.415690898895264, "learning_rate": 1.5985142504450762e-05, "loss": 0.4962, "step": 9665 }, { "epoch": 31.691803278688525, "grad_norm": 4.458963394165039, "learning_rate": 1.598429178013007e-05, "loss": 0.5546, "step": 9666 }, { "epoch": 31.695081967213113, "grad_norm": 4.727219104766846, "learning_rate": 1.598344098833027e-05, "loss": 1.1132, "step": 9667 }, { "epoch": 31.698360655737705, "grad_norm": 4.60409688949585, "learning_rate": 1.598259012906095e-05, "loss": 0.5962, "step": 9668 }, { "epoch": 31.701639344262293, "grad_norm": 4.279510974884033, "learning_rate": 1.598173920233171e-05, "loss": 0.6438, "step": 9669 }, { "epoch": 31.704918032786885, "grad_norm": 4.977507591247559, "learning_rate": 1.5980888208152135e-05, "loss": 0.5335, "step": 9670 }, { "epoch": 31.708196721311474, "grad_norm": 10.69422721862793, "learning_rate": 1.5980037146531832e-05, "loss": 0.7413, "step": 9671 }, { "epoch": 31.711475409836066, "grad_norm": 4.2500386238098145, "learning_rate": 1.5979186017480388e-05, "loss": 0.4165, "step": 9672 }, { "epoch": 31.714754098360658, "grad_norm": 4.559986114501953, "learning_rate": 1.5978334821007408e-05, "loss": 0.6134, "step": 9673 }, { "epoch": 31.718032786885246, "grad_norm": 4.943024158477783, "learning_rate": 1.5977483557122488e-05, "loss": 0.5901, "step": 9674 }, { "epoch": 31.721311475409838, "grad_norm": 4.8599090576171875, "learning_rate": 1.5976632225835223e-05, "loss": 0.5245, "step": 9675 }, { "epoch": 31.724590163934426, "grad_norm": 4.142881870269775, "learning_rate": 1.5975780827155218e-05, "loss": 0.5974, "step": 9676 }, { "epoch": 31.727868852459018, "grad_norm": 4.2819085121154785, "learning_rate": 1.5974929361092068e-05, "loss": 0.4463, "step": 9677 }, { "epoch": 31.731147540983606, "grad_norm": 4.041182994842529, "learning_rate": 1.597407782765538e-05, "loss": 0.5073, "step": 9678 }, { "epoch": 31.7344262295082, "grad_norm": 4.7330803871154785, "learning_rate": 1.597322622685475e-05, "loss": 0.5867, "step": 9679 }, { "epoch": 31.737704918032787, "grad_norm": 4.749802589416504, "learning_rate": 1.5972374558699786e-05, "loss": 0.5893, "step": 9680 }, { "epoch": 31.74098360655738, "grad_norm": 4.480035781860352, "learning_rate": 1.5971522823200088e-05, "loss": 0.4041, "step": 9681 }, { "epoch": 31.744262295081967, "grad_norm": 8.311211585998535, "learning_rate": 1.5970671020365264e-05, "loss": 0.7584, "step": 9682 }, { "epoch": 31.74754098360656, "grad_norm": 4.8875322341918945, "learning_rate": 1.596981915020491e-05, "loss": 0.6056, "step": 9683 }, { "epoch": 31.750819672131147, "grad_norm": 5.107083320617676, "learning_rate": 1.5968967212728644e-05, "loss": 0.4359, "step": 9684 }, { "epoch": 31.75409836065574, "grad_norm": 5.10557222366333, "learning_rate": 1.5968115207946065e-05, "loss": 0.4625, "step": 9685 }, { "epoch": 31.757377049180327, "grad_norm": 8.194005012512207, "learning_rate": 1.5967263135866783e-05, "loss": 0.7807, "step": 9686 }, { "epoch": 31.76065573770492, "grad_norm": 4.354273796081543, "learning_rate": 1.5966410996500402e-05, "loss": 0.4787, "step": 9687 }, { "epoch": 31.763934426229508, "grad_norm": 5.562289237976074, "learning_rate": 1.5965558789856533e-05, "loss": 0.6387, "step": 9688 }, { "epoch": 31.7672131147541, "grad_norm": 5.049499988555908, "learning_rate": 1.596470651594479e-05, "loss": 0.8025, "step": 9689 }, { "epoch": 31.770491803278688, "grad_norm": 5.548237323760986, "learning_rate": 1.5963854174774778e-05, "loss": 0.5605, "step": 9690 }, { "epoch": 31.77377049180328, "grad_norm": 4.569095134735107, "learning_rate": 1.5963001766356107e-05, "loss": 0.5375, "step": 9691 }, { "epoch": 31.777049180327868, "grad_norm": 4.590085029602051, "learning_rate": 1.5962149290698392e-05, "loss": 0.6007, "step": 9692 }, { "epoch": 31.78032786885246, "grad_norm": 5.971963405609131, "learning_rate": 1.5961296747811245e-05, "loss": 0.5433, "step": 9693 }, { "epoch": 31.78360655737705, "grad_norm": 5.212219715118408, "learning_rate": 1.5960444137704278e-05, "loss": 0.5936, "step": 9694 }, { "epoch": 31.78688524590164, "grad_norm": 5.103086948394775, "learning_rate": 1.5959591460387107e-05, "loss": 0.7341, "step": 9695 }, { "epoch": 31.79016393442623, "grad_norm": 4.683741092681885, "learning_rate": 1.5958738715869347e-05, "loss": 0.4177, "step": 9696 }, { "epoch": 31.79344262295082, "grad_norm": 4.23539924621582, "learning_rate": 1.5957885904160614e-05, "loss": 0.7928, "step": 9697 }, { "epoch": 31.79672131147541, "grad_norm": 4.637007713317871, "learning_rate": 1.5957033025270517e-05, "loss": 0.372, "step": 9698 }, { "epoch": 31.8, "grad_norm": 5.469669818878174, "learning_rate": 1.5956180079208684e-05, "loss": 0.5643, "step": 9699 }, { "epoch": 31.80327868852459, "grad_norm": 5.226521968841553, "learning_rate": 1.5955327065984727e-05, "loss": 0.56, "step": 9700 }, { "epoch": 31.80655737704918, "grad_norm": 5.600770473480225, "learning_rate": 1.5954473985608263e-05, "loss": 0.7906, "step": 9701 }, { "epoch": 31.80983606557377, "grad_norm": 4.190561771392822, "learning_rate": 1.5953620838088913e-05, "loss": 0.5893, "step": 9702 }, { "epoch": 31.81311475409836, "grad_norm": 6.281290054321289, "learning_rate": 1.59527676234363e-05, "loss": 0.5105, "step": 9703 }, { "epoch": 31.81639344262295, "grad_norm": 4.170708656311035, "learning_rate": 1.5951914341660044e-05, "loss": 0.5401, "step": 9704 }, { "epoch": 31.81967213114754, "grad_norm": 4.666689872741699, "learning_rate": 1.595106099276976e-05, "loss": 0.6458, "step": 9705 }, { "epoch": 31.82295081967213, "grad_norm": 5.145435810089111, "learning_rate": 1.5950207576775082e-05, "loss": 0.7776, "step": 9706 }, { "epoch": 31.82622950819672, "grad_norm": 4.289701461791992, "learning_rate": 1.5949354093685626e-05, "loss": 0.4329, "step": 9707 }, { "epoch": 31.82950819672131, "grad_norm": 5.9271135330200195, "learning_rate": 1.5948500543511015e-05, "loss": 0.6339, "step": 9708 }, { "epoch": 31.832786885245902, "grad_norm": 5.1554059982299805, "learning_rate": 1.5947646926260874e-05, "loss": 0.4511, "step": 9709 }, { "epoch": 31.83606557377049, "grad_norm": 5.082061767578125, "learning_rate": 1.594679324194483e-05, "loss": 0.4996, "step": 9710 }, { "epoch": 31.839344262295082, "grad_norm": 5.562197685241699, "learning_rate": 1.5945939490572514e-05, "loss": 0.4817, "step": 9711 }, { "epoch": 31.84262295081967, "grad_norm": 3.8927321434020996, "learning_rate": 1.5945085672153546e-05, "loss": 0.4383, "step": 9712 }, { "epoch": 31.845901639344262, "grad_norm": 6.066981792449951, "learning_rate": 1.5944231786697554e-05, "loss": 0.4983, "step": 9713 }, { "epoch": 31.84918032786885, "grad_norm": 5.009055137634277, "learning_rate": 1.5943377834214165e-05, "loss": 0.5672, "step": 9714 }, { "epoch": 31.852459016393443, "grad_norm": 4.848581314086914, "learning_rate": 1.5942523814713018e-05, "loss": 0.7411, "step": 9715 }, { "epoch": 31.855737704918035, "grad_norm": 4.7171125411987305, "learning_rate": 1.5941669728203734e-05, "loss": 0.498, "step": 9716 }, { "epoch": 31.859016393442623, "grad_norm": 4.109276294708252, "learning_rate": 1.5940815574695943e-05, "loss": 0.7752, "step": 9717 }, { "epoch": 31.862295081967215, "grad_norm": 4.985367298126221, "learning_rate": 1.593996135419928e-05, "loss": 0.7047, "step": 9718 }, { "epoch": 31.865573770491803, "grad_norm": 5.6317572593688965, "learning_rate": 1.5939107066723384e-05, "loss": 0.4866, "step": 9719 }, { "epoch": 31.868852459016395, "grad_norm": 6.47466516494751, "learning_rate": 1.5938252712277874e-05, "loss": 0.4872, "step": 9720 }, { "epoch": 31.872131147540983, "grad_norm": 5.607895374298096, "learning_rate": 1.5937398290872387e-05, "loss": 0.4741, "step": 9721 }, { "epoch": 31.875409836065575, "grad_norm": 5.350908279418945, "learning_rate": 1.5936543802516568e-05, "loss": 0.7122, "step": 9722 }, { "epoch": 31.878688524590164, "grad_norm": 4.218810081481934, "learning_rate": 1.5935689247220044e-05, "loss": 0.5276, "step": 9723 }, { "epoch": 31.881967213114756, "grad_norm": 3.9377052783966064, "learning_rate": 1.593483462499245e-05, "loss": 0.6498, "step": 9724 }, { "epoch": 31.885245901639344, "grad_norm": 5.13820743560791, "learning_rate": 1.5933979935843423e-05, "loss": 0.689, "step": 9725 }, { "epoch": 31.888524590163936, "grad_norm": 4.735152244567871, "learning_rate": 1.5933125179782608e-05, "loss": 0.5865, "step": 9726 }, { "epoch": 31.891803278688524, "grad_norm": 5.921972751617432, "learning_rate": 1.5932270356819633e-05, "loss": 0.4951, "step": 9727 }, { "epoch": 31.895081967213116, "grad_norm": 10.272222518920898, "learning_rate": 1.5931415466964147e-05, "loss": 0.4803, "step": 9728 }, { "epoch": 31.898360655737704, "grad_norm": 3.878838062286377, "learning_rate": 1.593056051022578e-05, "loss": 0.4527, "step": 9729 }, { "epoch": 31.901639344262296, "grad_norm": 3.966975212097168, "learning_rate": 1.592970548661418e-05, "loss": 0.7608, "step": 9730 }, { "epoch": 31.904918032786885, "grad_norm": 4.331671714782715, "learning_rate": 1.592885039613898e-05, "loss": 0.5184, "step": 9731 }, { "epoch": 31.908196721311477, "grad_norm": 3.927217483520508, "learning_rate": 1.5927995238809833e-05, "loss": 0.4939, "step": 9732 }, { "epoch": 31.911475409836065, "grad_norm": 5.076011657714844, "learning_rate": 1.592714001463637e-05, "loss": 0.6943, "step": 9733 }, { "epoch": 31.914754098360657, "grad_norm": 6.118181228637695, "learning_rate": 1.592628472362825e-05, "loss": 0.449, "step": 9734 }, { "epoch": 31.918032786885245, "grad_norm": 5.504521369934082, "learning_rate": 1.59254293657951e-05, "loss": 0.4527, "step": 9735 }, { "epoch": 31.921311475409837, "grad_norm": 6.451460838317871, "learning_rate": 1.5924573941146574e-05, "loss": 0.5971, "step": 9736 }, { "epoch": 31.924590163934425, "grad_norm": 4.914211273193359, "learning_rate": 1.592371844969232e-05, "loss": 0.6503, "step": 9737 }, { "epoch": 31.927868852459017, "grad_norm": 4.9787468910217285, "learning_rate": 1.592286289144198e-05, "loss": 0.5161, "step": 9738 }, { "epoch": 31.931147540983606, "grad_norm": 6.586679935455322, "learning_rate": 1.5922007266405205e-05, "loss": 0.511, "step": 9739 }, { "epoch": 31.934426229508198, "grad_norm": 6.214733123779297, "learning_rate": 1.5921151574591632e-05, "loss": 0.4337, "step": 9740 }, { "epoch": 31.937704918032786, "grad_norm": 4.103379249572754, "learning_rate": 1.592029581601093e-05, "loss": 0.4484, "step": 9741 }, { "epoch": 31.940983606557378, "grad_norm": 4.607685565948486, "learning_rate": 1.591943999067273e-05, "loss": 0.6715, "step": 9742 }, { "epoch": 31.944262295081966, "grad_norm": 5.933618068695068, "learning_rate": 1.591858409858669e-05, "loss": 0.7016, "step": 9743 }, { "epoch": 31.947540983606558, "grad_norm": 5.897209167480469, "learning_rate": 1.5917728139762464e-05, "loss": 0.3975, "step": 9744 }, { "epoch": 31.950819672131146, "grad_norm": 5.277443885803223, "learning_rate": 1.5916872114209698e-05, "loss": 0.4493, "step": 9745 }, { "epoch": 31.95409836065574, "grad_norm": 5.327122211456299, "learning_rate": 1.5916016021938047e-05, "loss": 0.3944, "step": 9746 }, { "epoch": 31.957377049180327, "grad_norm": 4.891926288604736, "learning_rate": 1.591515986295716e-05, "loss": 0.5705, "step": 9747 }, { "epoch": 31.96065573770492, "grad_norm": 4.761019706726074, "learning_rate": 1.5914303637276703e-05, "loss": 0.7268, "step": 9748 }, { "epoch": 31.963934426229507, "grad_norm": 5.425639629364014, "learning_rate": 1.5913447344906318e-05, "loss": 0.6389, "step": 9749 }, { "epoch": 31.9672131147541, "grad_norm": 5.784884452819824, "learning_rate": 1.5912590985855667e-05, "loss": 0.4434, "step": 9750 }, { "epoch": 31.970491803278687, "grad_norm": 5.117349147796631, "learning_rate": 1.5911734560134403e-05, "loss": 0.4959, "step": 9751 }, { "epoch": 31.97377049180328, "grad_norm": 4.963817119598389, "learning_rate": 1.591087806775219e-05, "loss": 0.5677, "step": 9752 }, { "epoch": 31.977049180327867, "grad_norm": 5.527471542358398, "learning_rate": 1.5910021508718677e-05, "loss": 0.5717, "step": 9753 }, { "epoch": 31.98032786885246, "grad_norm": 4.54756498336792, "learning_rate": 1.590916488304353e-05, "loss": 0.3798, "step": 9754 }, { "epoch": 31.983606557377048, "grad_norm": 5.48799467086792, "learning_rate": 1.5908308190736404e-05, "loss": 0.7424, "step": 9755 }, { "epoch": 31.98688524590164, "grad_norm": 5.295036792755127, "learning_rate": 1.590745143180696e-05, "loss": 0.4419, "step": 9756 }, { "epoch": 31.990163934426228, "grad_norm": 5.2258453369140625, "learning_rate": 1.5906594606264857e-05, "loss": 0.6527, "step": 9757 }, { "epoch": 31.99344262295082, "grad_norm": 5.308283805847168, "learning_rate": 1.590573771411976e-05, "loss": 0.6998, "step": 9758 }, { "epoch": 31.99672131147541, "grad_norm": 4.606551170349121, "learning_rate": 1.590488075538133e-05, "loss": 0.6354, "step": 9759 }, { "epoch": 32.0, "grad_norm": 5.327939510345459, "learning_rate": 1.5904023730059227e-05, "loss": 0.5926, "step": 9760 }, { "epoch": 32.00327868852459, "grad_norm": 4.272768020629883, "learning_rate": 1.590316663816312e-05, "loss": 0.6277, "step": 9761 }, { "epoch": 32.006557377049184, "grad_norm": 4.743110179901123, "learning_rate": 1.5902309479702673e-05, "loss": 0.3718, "step": 9762 }, { "epoch": 32.00983606557377, "grad_norm": 4.808796405792236, "learning_rate": 1.590145225468755e-05, "loss": 0.5628, "step": 9763 }, { "epoch": 32.01311475409836, "grad_norm": 4.182736873626709, "learning_rate": 1.5900594963127414e-05, "loss": 0.6581, "step": 9764 }, { "epoch": 32.01639344262295, "grad_norm": 4.52048921585083, "learning_rate": 1.5899737605031935e-05, "loss": 0.6852, "step": 9765 }, { "epoch": 32.019672131147544, "grad_norm": 4.829336643218994, "learning_rate": 1.589888018041078e-05, "loss": 0.7187, "step": 9766 }, { "epoch": 32.02295081967213, "grad_norm": 4.143379211425781, "learning_rate": 1.589802268927362e-05, "loss": 0.3489, "step": 9767 }, { "epoch": 32.02622950819672, "grad_norm": 4.5925984382629395, "learning_rate": 1.589716513163012e-05, "loss": 0.6145, "step": 9768 }, { "epoch": 32.02950819672131, "grad_norm": 7.700671195983887, "learning_rate": 1.5896307507489953e-05, "loss": 0.4832, "step": 9769 }, { "epoch": 32.032786885245905, "grad_norm": 4.038084983825684, "learning_rate": 1.5895449816862787e-05, "loss": 0.6515, "step": 9770 }, { "epoch": 32.03606557377049, "grad_norm": 4.873740196228027, "learning_rate": 1.5894592059758296e-05, "loss": 0.4678, "step": 9771 }, { "epoch": 32.03934426229508, "grad_norm": 4.723178386688232, "learning_rate": 1.5893734236186148e-05, "loss": 0.7328, "step": 9772 }, { "epoch": 32.04262295081967, "grad_norm": 4.549765110015869, "learning_rate": 1.5892876346156022e-05, "loss": 0.7016, "step": 9773 }, { "epoch": 32.045901639344265, "grad_norm": 4.450799942016602, "learning_rate": 1.5892018389677588e-05, "loss": 0.703, "step": 9774 }, { "epoch": 32.049180327868854, "grad_norm": 14.469578742980957, "learning_rate": 1.5891160366760518e-05, "loss": 0.5684, "step": 9775 }, { "epoch": 32.05245901639344, "grad_norm": 4.453449249267578, "learning_rate": 1.589030227741449e-05, "loss": 0.5799, "step": 9776 }, { "epoch": 32.05573770491803, "grad_norm": 4.856966972351074, "learning_rate": 1.588944412164918e-05, "loss": 0.4046, "step": 9777 }, { "epoch": 32.059016393442626, "grad_norm": 4.512576580047607, "learning_rate": 1.5888585899474266e-05, "loss": 0.6475, "step": 9778 }, { "epoch": 32.062295081967214, "grad_norm": 4.950222015380859, "learning_rate": 1.588772761089942e-05, "loss": 0.5186, "step": 9779 }, { "epoch": 32.0655737704918, "grad_norm": 5.271261692047119, "learning_rate": 1.5886869255934326e-05, "loss": 0.3518, "step": 9780 }, { "epoch": 32.06885245901639, "grad_norm": 5.917337417602539, "learning_rate": 1.588601083458866e-05, "loss": 0.577, "step": 9781 }, { "epoch": 32.072131147540986, "grad_norm": 5.207993507385254, "learning_rate": 1.5885152346872098e-05, "loss": 0.4311, "step": 9782 }, { "epoch": 32.075409836065575, "grad_norm": 4.801064491271973, "learning_rate": 1.5884293792794328e-05, "loss": 0.4421, "step": 9783 }, { "epoch": 32.07868852459016, "grad_norm": 4.683412075042725, "learning_rate": 1.588343517236503e-05, "loss": 0.6201, "step": 9784 }, { "epoch": 32.08196721311475, "grad_norm": 3.9995415210723877, "learning_rate": 1.5882576485593875e-05, "loss": 0.6806, "step": 9785 }, { "epoch": 32.08524590163935, "grad_norm": 4.624195098876953, "learning_rate": 1.588171773249056e-05, "loss": 0.6154, "step": 9786 }, { "epoch": 32.088524590163935, "grad_norm": 4.0348076820373535, "learning_rate": 1.5880858913064764e-05, "loss": 0.6889, "step": 9787 }, { "epoch": 32.09180327868852, "grad_norm": 23.709712982177734, "learning_rate": 1.5880000027326164e-05, "loss": 0.4238, "step": 9788 }, { "epoch": 32.09508196721311, "grad_norm": 4.387264251708984, "learning_rate": 1.587914107528445e-05, "loss": 0.5051, "step": 9789 }, { "epoch": 32.09836065573771, "grad_norm": 5.608233451843262, "learning_rate": 1.587828205694931e-05, "loss": 0.4919, "step": 9790 }, { "epoch": 32.101639344262296, "grad_norm": 4.211345672607422, "learning_rate": 1.587742297233043e-05, "loss": 0.3927, "step": 9791 }, { "epoch": 32.104918032786884, "grad_norm": 4.429272174835205, "learning_rate": 1.587656382143749e-05, "loss": 0.3071, "step": 9792 }, { "epoch": 32.10819672131147, "grad_norm": 4.581762313842773, "learning_rate": 1.5875704604280188e-05, "loss": 0.5401, "step": 9793 }, { "epoch": 32.11147540983607, "grad_norm": 4.697472095489502, "learning_rate": 1.5874845320868205e-05, "loss": 0.6383, "step": 9794 }, { "epoch": 32.114754098360656, "grad_norm": 4.979023456573486, "learning_rate": 1.5873985971211233e-05, "loss": 0.6151, "step": 9795 }, { "epoch": 32.118032786885244, "grad_norm": 4.715385913848877, "learning_rate": 1.5873126555318957e-05, "loss": 0.7356, "step": 9796 }, { "epoch": 32.12131147540983, "grad_norm": 4.075236797332764, "learning_rate": 1.5872267073201082e-05, "loss": 0.476, "step": 9797 }, { "epoch": 32.12459016393443, "grad_norm": 5.740445137023926, "learning_rate": 1.5871407524867284e-05, "loss": 0.747, "step": 9798 }, { "epoch": 32.12786885245902, "grad_norm": 4.2894606590271, "learning_rate": 1.5870547910327262e-05, "loss": 0.6399, "step": 9799 }, { "epoch": 32.131147540983605, "grad_norm": 4.586593151092529, "learning_rate": 1.586968822959071e-05, "loss": 0.4972, "step": 9800 }, { "epoch": 32.13442622950819, "grad_norm": 5.637854099273682, "learning_rate": 1.5868828482667318e-05, "loss": 0.642, "step": 9801 }, { "epoch": 32.13770491803279, "grad_norm": 4.240332126617432, "learning_rate": 1.5867968669566782e-05, "loss": 0.7966, "step": 9802 }, { "epoch": 32.14098360655738, "grad_norm": 4.464478492736816, "learning_rate": 1.5867108790298804e-05, "loss": 0.4319, "step": 9803 }, { "epoch": 32.144262295081965, "grad_norm": 4.982384204864502, "learning_rate": 1.5866248844873066e-05, "loss": 0.4989, "step": 9804 }, { "epoch": 32.14754098360656, "grad_norm": 4.678116321563721, "learning_rate": 1.5865388833299276e-05, "loss": 0.589, "step": 9805 }, { "epoch": 32.15081967213115, "grad_norm": 5.2629570960998535, "learning_rate": 1.586452875558713e-05, "loss": 0.5818, "step": 9806 }, { "epoch": 32.15409836065574, "grad_norm": 5.481082916259766, "learning_rate": 1.5863668611746325e-05, "loss": 0.8269, "step": 9807 }, { "epoch": 32.157377049180326, "grad_norm": 5.080588340759277, "learning_rate": 1.586280840178656e-05, "loss": 0.4301, "step": 9808 }, { "epoch": 32.16065573770492, "grad_norm": 3.8341012001037598, "learning_rate": 1.5861948125717534e-05, "loss": 0.5182, "step": 9809 }, { "epoch": 32.16393442622951, "grad_norm": 4.61167573928833, "learning_rate": 1.5861087783548947e-05, "loss": 0.6475, "step": 9810 }, { "epoch": 32.1672131147541, "grad_norm": 4.394932746887207, "learning_rate": 1.5860227375290502e-05, "loss": 0.662, "step": 9811 }, { "epoch": 32.170491803278686, "grad_norm": 4.912989139556885, "learning_rate": 1.58593669009519e-05, "loss": 0.6099, "step": 9812 }, { "epoch": 32.17377049180328, "grad_norm": 4.09428596496582, "learning_rate": 1.5858506360542844e-05, "loss": 0.6993, "step": 9813 }, { "epoch": 32.17704918032787, "grad_norm": 4.952300548553467, "learning_rate": 1.5857645754073038e-05, "loss": 0.4542, "step": 9814 }, { "epoch": 32.18032786885246, "grad_norm": 5.674692153930664, "learning_rate": 1.5856785081552182e-05, "loss": 0.432, "step": 9815 }, { "epoch": 32.18360655737705, "grad_norm": 5.423886299133301, "learning_rate": 1.585592434298999e-05, "loss": 0.6293, "step": 9816 }, { "epoch": 32.18688524590164, "grad_norm": 5.111644744873047, "learning_rate": 1.585506353839616e-05, "loss": 0.3896, "step": 9817 }, { "epoch": 32.19016393442623, "grad_norm": 5.39193868637085, "learning_rate": 1.58542026677804e-05, "loss": 0.5971, "step": 9818 }, { "epoch": 32.19344262295082, "grad_norm": 5.817603588104248, "learning_rate": 1.5853341731152418e-05, "loss": 0.5995, "step": 9819 }, { "epoch": 32.19672131147541, "grad_norm": 4.101292610168457, "learning_rate": 1.5852480728521925e-05, "loss": 0.3595, "step": 9820 }, { "epoch": 32.2, "grad_norm": 6.668491363525391, "learning_rate": 1.5851619659898623e-05, "loss": 0.6522, "step": 9821 }, { "epoch": 32.20327868852459, "grad_norm": 4.730857849121094, "learning_rate": 1.5850758525292228e-05, "loss": 0.4527, "step": 9822 }, { "epoch": 32.20655737704918, "grad_norm": 5.256827354431152, "learning_rate": 1.5849897324712446e-05, "loss": 0.6663, "step": 9823 }, { "epoch": 32.20983606557377, "grad_norm": 4.52020263671875, "learning_rate": 1.584903605816899e-05, "loss": 0.6569, "step": 9824 }, { "epoch": 32.21311475409836, "grad_norm": 19.94947624206543, "learning_rate": 1.584817472567157e-05, "loss": 0.5704, "step": 9825 }, { "epoch": 32.21639344262295, "grad_norm": 5.219054222106934, "learning_rate": 1.5847313327229897e-05, "loss": 0.6483, "step": 9826 }, { "epoch": 32.21967213114754, "grad_norm": 4.70859956741333, "learning_rate": 1.5846451862853694e-05, "loss": 0.5957, "step": 9827 }, { "epoch": 32.22295081967213, "grad_norm": 4.186412811279297, "learning_rate": 1.5845590332552662e-05, "loss": 0.581, "step": 9828 }, { "epoch": 32.226229508196724, "grad_norm": 4.787449836730957, "learning_rate": 1.584472873633652e-05, "loss": 0.6831, "step": 9829 }, { "epoch": 32.22950819672131, "grad_norm": 4.191562175750732, "learning_rate": 1.584386707421499e-05, "loss": 0.7087, "step": 9830 }, { "epoch": 32.2327868852459, "grad_norm": 5.271337032318115, "learning_rate": 1.5843005346197776e-05, "loss": 0.3948, "step": 9831 }, { "epoch": 32.23606557377049, "grad_norm": 4.116720676422119, "learning_rate": 1.5842143552294606e-05, "loss": 0.6225, "step": 9832 }, { "epoch": 32.239344262295084, "grad_norm": 5.791341781616211, "learning_rate": 1.5841281692515193e-05, "loss": 0.6344, "step": 9833 }, { "epoch": 32.24262295081967, "grad_norm": 4.80037260055542, "learning_rate": 1.584041976686925e-05, "loss": 0.695, "step": 9834 }, { "epoch": 32.24590163934426, "grad_norm": 5.313959121704102, "learning_rate": 1.583955777536651e-05, "loss": 0.475, "step": 9835 }, { "epoch": 32.24918032786885, "grad_norm": 6.6608099937438965, "learning_rate": 1.583869571801668e-05, "loss": 0.6377, "step": 9836 }, { "epoch": 32.252459016393445, "grad_norm": 6.629693508148193, "learning_rate": 1.5837833594829487e-05, "loss": 0.4718, "step": 9837 }, { "epoch": 32.25573770491803, "grad_norm": 4.876755714416504, "learning_rate": 1.583697140581465e-05, "loss": 0.6417, "step": 9838 }, { "epoch": 32.25901639344262, "grad_norm": 5.509284973144531, "learning_rate": 1.5836109150981885e-05, "loss": 0.4239, "step": 9839 }, { "epoch": 32.26229508196721, "grad_norm": 5.089576721191406, "learning_rate": 1.5835246830340933e-05, "loss": 0.7193, "step": 9840 }, { "epoch": 32.265573770491805, "grad_norm": 4.403780937194824, "learning_rate": 1.58343844439015e-05, "loss": 0.347, "step": 9841 }, { "epoch": 32.268852459016394, "grad_norm": 5.839195251464844, "learning_rate": 1.5833521991673314e-05, "loss": 0.6337, "step": 9842 }, { "epoch": 32.27213114754098, "grad_norm": 4.511570453643799, "learning_rate": 1.5832659473666102e-05, "loss": 0.5993, "step": 9843 }, { "epoch": 32.27540983606557, "grad_norm": 4.769787788391113, "learning_rate": 1.583179688988959e-05, "loss": 0.655, "step": 9844 }, { "epoch": 32.278688524590166, "grad_norm": 5.073380947113037, "learning_rate": 1.5830934240353508e-05, "loss": 0.4039, "step": 9845 }, { "epoch": 32.281967213114754, "grad_norm": 4.553647041320801, "learning_rate": 1.583007152506758e-05, "loss": 0.4593, "step": 9846 }, { "epoch": 32.28524590163934, "grad_norm": 4.677957534790039, "learning_rate": 1.582920874404153e-05, "loss": 0.6149, "step": 9847 }, { "epoch": 32.28852459016394, "grad_norm": 4.036916255950928, "learning_rate": 1.5828345897285093e-05, "loss": 0.5251, "step": 9848 }, { "epoch": 32.291803278688526, "grad_norm": 4.348800182342529, "learning_rate": 1.5827482984807997e-05, "loss": 0.6837, "step": 9849 }, { "epoch": 32.295081967213115, "grad_norm": 4.4587531089782715, "learning_rate": 1.582662000661997e-05, "loss": 0.6948, "step": 9850 }, { "epoch": 32.2983606557377, "grad_norm": 4.215157508850098, "learning_rate": 1.5825756962730743e-05, "loss": 0.8183, "step": 9851 }, { "epoch": 32.3016393442623, "grad_norm": 5.96632719039917, "learning_rate": 1.582489385315005e-05, "loss": 0.676, "step": 9852 }, { "epoch": 32.30491803278689, "grad_norm": 4.746922969818115, "learning_rate": 1.5824030677887622e-05, "loss": 0.6035, "step": 9853 }, { "epoch": 32.308196721311475, "grad_norm": 4.592174053192139, "learning_rate": 1.5823167436953192e-05, "loss": 0.6962, "step": 9854 }, { "epoch": 32.31147540983606, "grad_norm": 4.252824783325195, "learning_rate": 1.5822304130356497e-05, "loss": 0.5993, "step": 9855 }, { "epoch": 32.31475409836066, "grad_norm": 3.931375026702881, "learning_rate": 1.5821440758107268e-05, "loss": 0.6148, "step": 9856 }, { "epoch": 32.31803278688525, "grad_norm": 10.13552188873291, "learning_rate": 1.5820577320215242e-05, "loss": 0.7193, "step": 9857 }, { "epoch": 32.321311475409836, "grad_norm": 4.2297210693359375, "learning_rate": 1.5819713816690153e-05, "loss": 0.443, "step": 9858 }, { "epoch": 32.324590163934424, "grad_norm": 5.274296760559082, "learning_rate": 1.5818850247541742e-05, "loss": 0.5233, "step": 9859 }, { "epoch": 32.32786885245902, "grad_norm": 4.6251301765441895, "learning_rate": 1.5817986612779746e-05, "loss": 0.4248, "step": 9860 }, { "epoch": 32.33114754098361, "grad_norm": 4.744899272918701, "learning_rate": 1.5817122912413897e-05, "loss": 0.4646, "step": 9861 }, { "epoch": 32.334426229508196, "grad_norm": 4.488003730773926, "learning_rate": 1.5816259146453942e-05, "loss": 0.5129, "step": 9862 }, { "epoch": 32.337704918032784, "grad_norm": 5.373057842254639, "learning_rate": 1.5815395314909615e-05, "loss": 0.5547, "step": 9863 }, { "epoch": 32.34098360655738, "grad_norm": 5.085632801055908, "learning_rate": 1.5814531417790664e-05, "loss": 0.5735, "step": 9864 }, { "epoch": 32.34426229508197, "grad_norm": 4.231005668640137, "learning_rate": 1.5813667455106822e-05, "loss": 0.3889, "step": 9865 }, { "epoch": 32.34754098360656, "grad_norm": 5.613468170166016, "learning_rate": 1.5812803426867834e-05, "loss": 0.531, "step": 9866 }, { "epoch": 32.350819672131145, "grad_norm": 4.372595310211182, "learning_rate": 1.581193933308345e-05, "loss": 0.6376, "step": 9867 }, { "epoch": 32.35409836065574, "grad_norm": 4.415919780731201, "learning_rate": 1.58110751737634e-05, "loss": 0.391, "step": 9868 }, { "epoch": 32.35737704918033, "grad_norm": 4.480316162109375, "learning_rate": 1.581021094891744e-05, "loss": 0.6181, "step": 9869 }, { "epoch": 32.36065573770492, "grad_norm": 5.512209415435791, "learning_rate": 1.5809346658555303e-05, "loss": 0.5947, "step": 9870 }, { "epoch": 32.363934426229505, "grad_norm": 5.399904251098633, "learning_rate": 1.580848230268675e-05, "loss": 0.5204, "step": 9871 }, { "epoch": 32.3672131147541, "grad_norm": 7.116243362426758, "learning_rate": 1.5807617881321516e-05, "loss": 0.6801, "step": 9872 }, { "epoch": 32.37049180327869, "grad_norm": 4.317626953125, "learning_rate": 1.5806753394469353e-05, "loss": 0.6922, "step": 9873 }, { "epoch": 32.37377049180328, "grad_norm": 4.384377479553223, "learning_rate": 1.580588884214001e-05, "loss": 0.6167, "step": 9874 }, { "epoch": 32.377049180327866, "grad_norm": 4.839547634124756, "learning_rate": 1.5805024224343233e-05, "loss": 0.6026, "step": 9875 }, { "epoch": 32.38032786885246, "grad_norm": 4.294506072998047, "learning_rate": 1.5804159541088768e-05, "loss": 0.4421, "step": 9876 }, { "epoch": 32.38360655737705, "grad_norm": 5.0359601974487305, "learning_rate": 1.5803294792386375e-05, "loss": 0.6422, "step": 9877 }, { "epoch": 32.38688524590164, "grad_norm": 4.976582050323486, "learning_rate": 1.5802429978245797e-05, "loss": 0.5053, "step": 9878 }, { "epoch": 32.390163934426226, "grad_norm": 5.172052383422852, "learning_rate": 1.5801565098676786e-05, "loss": 0.3722, "step": 9879 }, { "epoch": 32.39344262295082, "grad_norm": 4.902065753936768, "learning_rate": 1.58007001536891e-05, "loss": 0.5573, "step": 9880 }, { "epoch": 32.39672131147541, "grad_norm": 4.498410224914551, "learning_rate": 1.5799835143292486e-05, "loss": 0.4842, "step": 9881 }, { "epoch": 32.4, "grad_norm": 4.479353427886963, "learning_rate": 1.57989700674967e-05, "loss": 0.4888, "step": 9882 }, { "epoch": 32.40327868852459, "grad_norm": 5.553475379943848, "learning_rate": 1.57981049263115e-05, "loss": 0.6133, "step": 9883 }, { "epoch": 32.40655737704918, "grad_norm": 4.179698944091797, "learning_rate": 1.5797239719746635e-05, "loss": 0.5074, "step": 9884 }, { "epoch": 32.40983606557377, "grad_norm": 4.106118202209473, "learning_rate": 1.5796374447811868e-05, "loss": 0.5331, "step": 9885 }, { "epoch": 32.41311475409836, "grad_norm": 5.120647430419922, "learning_rate": 1.579550911051695e-05, "loss": 0.5309, "step": 9886 }, { "epoch": 32.41639344262295, "grad_norm": 4.549633026123047, "learning_rate": 1.5794643707871638e-05, "loss": 0.6428, "step": 9887 }, { "epoch": 32.41967213114754, "grad_norm": 4.700777053833008, "learning_rate": 1.5793778239885698e-05, "loss": 0.4401, "step": 9888 }, { "epoch": 32.42295081967213, "grad_norm": 4.575345039367676, "learning_rate": 1.5792912706568883e-05, "loss": 0.6353, "step": 9889 }, { "epoch": 32.42622950819672, "grad_norm": 4.862353801727295, "learning_rate": 1.5792047107930953e-05, "loss": 0.6644, "step": 9890 }, { "epoch": 32.429508196721315, "grad_norm": 4.252007007598877, "learning_rate": 1.579118144398167e-05, "loss": 0.5563, "step": 9891 }, { "epoch": 32.4327868852459, "grad_norm": 4.817765235900879, "learning_rate": 1.5790315714730797e-05, "loss": 0.5181, "step": 9892 }, { "epoch": 32.43606557377049, "grad_norm": 5.9954833984375, "learning_rate": 1.5789449920188092e-05, "loss": 0.4956, "step": 9893 }, { "epoch": 32.43934426229508, "grad_norm": 5.6587042808532715, "learning_rate": 1.578858406036332e-05, "loss": 0.3596, "step": 9894 }, { "epoch": 32.442622950819676, "grad_norm": 4.970775604248047, "learning_rate": 1.5787718135266246e-05, "loss": 0.7757, "step": 9895 }, { "epoch": 32.445901639344264, "grad_norm": 4.80876350402832, "learning_rate": 1.5786852144906634e-05, "loss": 0.8404, "step": 9896 }, { "epoch": 32.44918032786885, "grad_norm": 5.181568622589111, "learning_rate": 1.578598608929424e-05, "loss": 0.7906, "step": 9897 }, { "epoch": 32.45245901639344, "grad_norm": 4.845080852508545, "learning_rate": 1.578511996843884e-05, "loss": 0.7956, "step": 9898 }, { "epoch": 32.455737704918036, "grad_norm": 4.637954235076904, "learning_rate": 1.57842537823502e-05, "loss": 0.7027, "step": 9899 }, { "epoch": 32.459016393442624, "grad_norm": 4.835512638092041, "learning_rate": 1.578338753103808e-05, "loss": 0.5162, "step": 9900 }, { "epoch": 32.46229508196721, "grad_norm": 4.960812568664551, "learning_rate": 1.5782521214512257e-05, "loss": 0.5997, "step": 9901 }, { "epoch": 32.4655737704918, "grad_norm": 5.296941757202148, "learning_rate": 1.5781654832782495e-05, "loss": 0.5673, "step": 9902 }, { "epoch": 32.4688524590164, "grad_norm": 5.2610979080200195, "learning_rate": 1.578078838585856e-05, "loss": 0.3779, "step": 9903 }, { "epoch": 32.472131147540985, "grad_norm": 4.295828819274902, "learning_rate": 1.5779921873750225e-05, "loss": 0.6377, "step": 9904 }, { "epoch": 32.47540983606557, "grad_norm": 4.745842933654785, "learning_rate": 1.5779055296467264e-05, "loss": 0.575, "step": 9905 }, { "epoch": 32.47868852459016, "grad_norm": 4.896256923675537, "learning_rate": 1.5778188654019446e-05, "loss": 0.5873, "step": 9906 }, { "epoch": 32.48196721311476, "grad_norm": 4.498252868652344, "learning_rate": 1.5777321946416542e-05, "loss": 0.5464, "step": 9907 }, { "epoch": 32.485245901639345, "grad_norm": 4.567569732666016, "learning_rate": 1.5776455173668325e-05, "loss": 0.5169, "step": 9908 }, { "epoch": 32.488524590163934, "grad_norm": 5.629577159881592, "learning_rate": 1.5775588335784574e-05, "loss": 0.6189, "step": 9909 }, { "epoch": 32.49180327868852, "grad_norm": 4.567876815795898, "learning_rate": 1.5774721432775053e-05, "loss": 0.5888, "step": 9910 }, { "epoch": 32.49508196721312, "grad_norm": 6.573241710662842, "learning_rate": 1.5773854464649548e-05, "loss": 0.4987, "step": 9911 }, { "epoch": 32.498360655737706, "grad_norm": 3.954976797103882, "learning_rate": 1.577298743141783e-05, "loss": 0.6406, "step": 9912 }, { "epoch": 32.501639344262294, "grad_norm": 3.623753309249878, "learning_rate": 1.5772120333089675e-05, "loss": 0.4994, "step": 9913 }, { "epoch": 32.50491803278688, "grad_norm": 4.3187575340271, "learning_rate": 1.5771253169674862e-05, "loss": 0.4997, "step": 9914 }, { "epoch": 32.50819672131148, "grad_norm": 4.376950740814209, "learning_rate": 1.577038594118317e-05, "loss": 0.6392, "step": 9915 }, { "epoch": 32.511475409836066, "grad_norm": 4.125265121459961, "learning_rate": 1.5769518647624378e-05, "loss": 0.3701, "step": 9916 }, { "epoch": 32.514754098360655, "grad_norm": 4.806239128112793, "learning_rate": 1.5768651289008265e-05, "loss": 0.5225, "step": 9917 }, { "epoch": 32.51803278688524, "grad_norm": 4.722141265869141, "learning_rate": 1.5767783865344605e-05, "loss": 0.6725, "step": 9918 }, { "epoch": 32.52131147540984, "grad_norm": 3.788609027862549, "learning_rate": 1.5766916376643192e-05, "loss": 0.6908, "step": 9919 }, { "epoch": 32.52459016393443, "grad_norm": 4.029853343963623, "learning_rate": 1.5766048822913795e-05, "loss": 0.5139, "step": 9920 }, { "epoch": 32.527868852459015, "grad_norm": 4.4992804527282715, "learning_rate": 1.5765181204166203e-05, "loss": 0.5366, "step": 9921 }, { "epoch": 32.5311475409836, "grad_norm": 4.8202409744262695, "learning_rate": 1.5764313520410205e-05, "loss": 0.4392, "step": 9922 }, { "epoch": 32.5344262295082, "grad_norm": 4.389009952545166, "learning_rate": 1.576344577165557e-05, "loss": 0.7463, "step": 9923 }, { "epoch": 32.53770491803279, "grad_norm": 4.959370136260986, "learning_rate": 1.57625779579121e-05, "loss": 0.679, "step": 9924 }, { "epoch": 32.540983606557376, "grad_norm": 4.547249794006348, "learning_rate": 1.5761710079189563e-05, "loss": 0.5595, "step": 9925 }, { "epoch": 32.544262295081964, "grad_norm": 10.004907608032227, "learning_rate": 1.576084213549776e-05, "loss": 0.4757, "step": 9926 }, { "epoch": 32.54754098360656, "grad_norm": 4.328404426574707, "learning_rate": 1.575997412684647e-05, "loss": 0.381, "step": 9927 }, { "epoch": 32.55081967213115, "grad_norm": 4.313799858093262, "learning_rate": 1.5759106053245483e-05, "loss": 0.4698, "step": 9928 }, { "epoch": 32.554098360655736, "grad_norm": 4.588548183441162, "learning_rate": 1.5758237914704587e-05, "loss": 0.4108, "step": 9929 }, { "epoch": 32.557377049180324, "grad_norm": 4.306911468505859, "learning_rate": 1.5757369711233574e-05, "loss": 0.5024, "step": 9930 }, { "epoch": 32.56065573770492, "grad_norm": 4.450580596923828, "learning_rate": 1.575650144284223e-05, "loss": 0.7239, "step": 9931 }, { "epoch": 32.56393442622951, "grad_norm": 3.778826951980591, "learning_rate": 1.575563310954035e-05, "loss": 0.8172, "step": 9932 }, { "epoch": 32.5672131147541, "grad_norm": 4.1911091804504395, "learning_rate": 1.575476471133772e-05, "loss": 0.5612, "step": 9933 }, { "epoch": 32.570491803278685, "grad_norm": 3.9565603733062744, "learning_rate": 1.575389624824413e-05, "loss": 0.4966, "step": 9934 }, { "epoch": 32.57377049180328, "grad_norm": 4.6605963706970215, "learning_rate": 1.575302772026938e-05, "loss": 0.7781, "step": 9935 }, { "epoch": 32.57704918032787, "grad_norm": 5.383401393890381, "learning_rate": 1.5752159127423262e-05, "loss": 0.4284, "step": 9936 }, { "epoch": 32.58032786885246, "grad_norm": 4.570641040802002, "learning_rate": 1.575129046971557e-05, "loss": 0.6756, "step": 9937 }, { "epoch": 32.58360655737705, "grad_norm": 4.0008649826049805, "learning_rate": 1.5750421747156096e-05, "loss": 0.4143, "step": 9938 }, { "epoch": 32.58688524590164, "grad_norm": 6.204034328460693, "learning_rate": 1.574955295975464e-05, "loss": 0.4991, "step": 9939 }, { "epoch": 32.59016393442623, "grad_norm": 4.635389804840088, "learning_rate": 1.5748684107520994e-05, "loss": 0.3973, "step": 9940 }, { "epoch": 32.59344262295082, "grad_norm": 5.500078201293945, "learning_rate": 1.5747815190464956e-05, "loss": 0.5447, "step": 9941 }, { "epoch": 32.59672131147541, "grad_norm": 4.5814642906188965, "learning_rate": 1.5746946208596326e-05, "loss": 0.4673, "step": 9942 }, { "epoch": 32.6, "grad_norm": 4.18301248550415, "learning_rate": 1.5746077161924905e-05, "loss": 0.5023, "step": 9943 }, { "epoch": 32.60327868852459, "grad_norm": 5.5455803871154785, "learning_rate": 1.5745208050460492e-05, "loss": 0.387, "step": 9944 }, { "epoch": 32.60655737704918, "grad_norm": 4.073360443115234, "learning_rate": 1.574433887421288e-05, "loss": 0.5401, "step": 9945 }, { "epoch": 32.609836065573774, "grad_norm": 6.117594242095947, "learning_rate": 1.5743469633191878e-05, "loss": 0.5569, "step": 9946 }, { "epoch": 32.61311475409836, "grad_norm": 4.14094352722168, "learning_rate": 1.574260032740728e-05, "loss": 0.5744, "step": 9947 }, { "epoch": 32.61639344262295, "grad_norm": 5.069639682769775, "learning_rate": 1.5741730956868896e-05, "loss": 0.4615, "step": 9948 }, { "epoch": 32.61967213114754, "grad_norm": 4.782094478607178, "learning_rate": 1.5740861521586525e-05, "loss": 0.6118, "step": 9949 }, { "epoch": 32.622950819672134, "grad_norm": 4.464630126953125, "learning_rate": 1.5739992021569968e-05, "loss": 0.6032, "step": 9950 }, { "epoch": 32.62622950819672, "grad_norm": 5.043236255645752, "learning_rate": 1.5739122456829036e-05, "loss": 0.7066, "step": 9951 }, { "epoch": 32.62950819672131, "grad_norm": 4.640513896942139, "learning_rate": 1.573825282737353e-05, "loss": 0.3158, "step": 9952 }, { "epoch": 32.6327868852459, "grad_norm": 3.9003684520721436, "learning_rate": 1.573738313321326e-05, "loss": 0.5648, "step": 9953 }, { "epoch": 32.636065573770495, "grad_norm": 3.663421154022217, "learning_rate": 1.5736513374358025e-05, "loss": 0.423, "step": 9954 }, { "epoch": 32.63934426229508, "grad_norm": 4.012012481689453, "learning_rate": 1.573564355081764e-05, "loss": 0.595, "step": 9955 }, { "epoch": 32.64262295081967, "grad_norm": 6.287089824676514, "learning_rate": 1.573477366260191e-05, "loss": 0.5992, "step": 9956 }, { "epoch": 32.64590163934426, "grad_norm": 4.580203056335449, "learning_rate": 1.5733903709720646e-05, "loss": 0.7174, "step": 9957 }, { "epoch": 32.649180327868855, "grad_norm": 7.219116687774658, "learning_rate": 1.5733033692183656e-05, "loss": 0.5346, "step": 9958 }, { "epoch": 32.65245901639344, "grad_norm": 4.4843573570251465, "learning_rate": 1.5732163610000745e-05, "loss": 0.4599, "step": 9959 }, { "epoch": 32.65573770491803, "grad_norm": 4.527775764465332, "learning_rate": 1.5731293463181736e-05, "loss": 0.4336, "step": 9960 }, { "epoch": 32.65901639344262, "grad_norm": 3.94797945022583, "learning_rate": 1.5730423251736427e-05, "loss": 0.5533, "step": 9961 }, { "epoch": 32.662295081967216, "grad_norm": 6.867072105407715, "learning_rate": 1.5729552975674644e-05, "loss": 0.3462, "step": 9962 }, { "epoch": 32.665573770491804, "grad_norm": 4.751415729522705, "learning_rate": 1.572868263500619e-05, "loss": 0.702, "step": 9963 }, { "epoch": 32.66885245901639, "grad_norm": 5.45114803314209, "learning_rate": 1.5727812229740887e-05, "loss": 0.6272, "step": 9964 }, { "epoch": 32.67213114754098, "grad_norm": 4.720691204071045, "learning_rate": 1.572694175988854e-05, "loss": 0.4783, "step": 9965 }, { "epoch": 32.675409836065576, "grad_norm": 4.226512908935547, "learning_rate": 1.5726071225458977e-05, "loss": 0.352, "step": 9966 }, { "epoch": 32.678688524590164, "grad_norm": 4.55392599105835, "learning_rate": 1.5725200626462e-05, "loss": 0.5809, "step": 9967 }, { "epoch": 32.68196721311475, "grad_norm": 4.784919738769531, "learning_rate": 1.5724329962907438e-05, "loss": 0.4871, "step": 9968 }, { "epoch": 32.68524590163934, "grad_norm": 5.0262627601623535, "learning_rate": 1.5723459234805103e-05, "loss": 0.7399, "step": 9969 }, { "epoch": 32.68852459016394, "grad_norm": 4.3680243492126465, "learning_rate": 1.5722588442164813e-05, "loss": 0.3608, "step": 9970 }, { "epoch": 32.691803278688525, "grad_norm": 5.409823417663574, "learning_rate": 1.5721717584996392e-05, "loss": 0.4134, "step": 9971 }, { "epoch": 32.69508196721311, "grad_norm": 15.006057739257812, "learning_rate": 1.5720846663309654e-05, "loss": 0.4879, "step": 9972 }, { "epoch": 32.6983606557377, "grad_norm": 4.904348373413086, "learning_rate": 1.571997567711442e-05, "loss": 0.419, "step": 9973 }, { "epoch": 32.7016393442623, "grad_norm": 4.587015628814697, "learning_rate": 1.5719104626420513e-05, "loss": 0.5453, "step": 9974 }, { "epoch": 32.704918032786885, "grad_norm": 5.791821002960205, "learning_rate": 1.571823351123776e-05, "loss": 0.7418, "step": 9975 }, { "epoch": 32.708196721311474, "grad_norm": 5.180624485015869, "learning_rate": 1.5717362331575973e-05, "loss": 0.6589, "step": 9976 }, { "epoch": 32.71147540983607, "grad_norm": 3.6979596614837646, "learning_rate": 1.571649108744498e-05, "loss": 0.4673, "step": 9977 }, { "epoch": 32.71475409836066, "grad_norm": 4.696988582611084, "learning_rate": 1.5715619778854613e-05, "loss": 0.5226, "step": 9978 }, { "epoch": 32.718032786885246, "grad_norm": 4.3411970138549805, "learning_rate": 1.5714748405814683e-05, "loss": 0.5406, "step": 9979 }, { "epoch": 32.721311475409834, "grad_norm": 11.248299598693848, "learning_rate": 1.5713876968335028e-05, "loss": 0.5762, "step": 9980 }, { "epoch": 32.72459016393443, "grad_norm": 4.266378879547119, "learning_rate": 1.5713005466425466e-05, "loss": 0.651, "step": 9981 }, { "epoch": 32.72786885245902, "grad_norm": 3.831602096557617, "learning_rate": 1.5712133900095826e-05, "loss": 0.5802, "step": 9982 }, { "epoch": 32.731147540983606, "grad_norm": 4.965867042541504, "learning_rate": 1.5711262269355944e-05, "loss": 0.4451, "step": 9983 }, { "epoch": 32.734426229508195, "grad_norm": 4.940347671508789, "learning_rate": 1.571039057421564e-05, "loss": 0.4933, "step": 9984 }, { "epoch": 32.73770491803279, "grad_norm": 4.7744975090026855, "learning_rate": 1.5709518814684737e-05, "loss": 0.6505, "step": 9985 }, { "epoch": 32.74098360655738, "grad_norm": 4.171703338623047, "learning_rate": 1.5708646990773083e-05, "loss": 0.3602, "step": 9986 }, { "epoch": 32.74426229508197, "grad_norm": 4.7734479904174805, "learning_rate": 1.5707775102490493e-05, "loss": 0.7768, "step": 9987 }, { "epoch": 32.747540983606555, "grad_norm": 4.3497233390808105, "learning_rate": 1.5706903149846805e-05, "loss": 0.629, "step": 9988 }, { "epoch": 32.75081967213115, "grad_norm": 4.728837966918945, "learning_rate": 1.5706031132851852e-05, "loss": 0.6639, "step": 9989 }, { "epoch": 32.75409836065574, "grad_norm": 5.057740688323975, "learning_rate": 1.5705159051515464e-05, "loss": 0.5748, "step": 9990 }, { "epoch": 32.75737704918033, "grad_norm": 5.552755355834961, "learning_rate": 1.5704286905847476e-05, "loss": 0.7185, "step": 9991 }, { "epoch": 32.760655737704916, "grad_norm": 5.093021869659424, "learning_rate": 1.5703414695857723e-05, "loss": 0.6284, "step": 9992 }, { "epoch": 32.76393442622951, "grad_norm": 12.811629295349121, "learning_rate": 1.5702542421556035e-05, "loss": 0.5675, "step": 9993 }, { "epoch": 32.7672131147541, "grad_norm": 5.033692836761475, "learning_rate": 1.5701670082952258e-05, "loss": 0.595, "step": 9994 }, { "epoch": 32.77049180327869, "grad_norm": 5.1351141929626465, "learning_rate": 1.5700797680056216e-05, "loss": 0.3895, "step": 9995 }, { "epoch": 32.773770491803276, "grad_norm": 4.089172840118408, "learning_rate": 1.5699925212877757e-05, "loss": 0.3714, "step": 9996 }, { "epoch": 32.77704918032787, "grad_norm": 5.705941677093506, "learning_rate": 1.5699052681426716e-05, "loss": 0.5194, "step": 9997 }, { "epoch": 32.78032786885246, "grad_norm": 6.0807976722717285, "learning_rate": 1.5698180085712928e-05, "loss": 0.9238, "step": 9998 }, { "epoch": 32.78360655737705, "grad_norm": 5.815952301025391, "learning_rate": 1.5697307425746236e-05, "loss": 0.5374, "step": 9999 }, { "epoch": 32.78688524590164, "grad_norm": 4.62792444229126, "learning_rate": 1.569643470153648e-05, "loss": 0.4854, "step": 10000 }, { "epoch": 32.79016393442623, "grad_norm": 5.325009346008301, "learning_rate": 1.5695561913093497e-05, "loss": 0.5958, "step": 10001 }, { "epoch": 32.79344262295082, "grad_norm": 5.448163032531738, "learning_rate": 1.5694689060427135e-05, "loss": 0.7674, "step": 10002 }, { "epoch": 32.79672131147541, "grad_norm": 5.060937881469727, "learning_rate": 1.5693816143547232e-05, "loss": 0.5412, "step": 10003 }, { "epoch": 32.8, "grad_norm": 4.585668087005615, "learning_rate": 1.5692943162463628e-05, "loss": 0.5504, "step": 10004 }, { "epoch": 32.80327868852459, "grad_norm": 3.8736655712127686, "learning_rate": 1.5692070117186174e-05, "loss": 0.4746, "step": 10005 }, { "epoch": 32.80655737704918, "grad_norm": 4.658707141876221, "learning_rate": 1.569119700772471e-05, "loss": 0.7188, "step": 10006 }, { "epoch": 32.80983606557377, "grad_norm": 4.936240196228027, "learning_rate": 1.5690323834089085e-05, "loss": 0.805, "step": 10007 }, { "epoch": 32.81311475409836, "grad_norm": 6.232104301452637, "learning_rate": 1.568945059628914e-05, "loss": 0.6592, "step": 10008 }, { "epoch": 32.81639344262295, "grad_norm": 4.11478328704834, "learning_rate": 1.5688577294334725e-05, "loss": 0.4975, "step": 10009 }, { "epoch": 32.81967213114754, "grad_norm": 5.2114033699035645, "learning_rate": 1.5687703928235686e-05, "loss": 0.5046, "step": 10010 }, { "epoch": 32.82295081967213, "grad_norm": 5.488430023193359, "learning_rate": 1.5686830498001873e-05, "loss": 0.523, "step": 10011 }, { "epoch": 32.82622950819672, "grad_norm": 5.590409755706787, "learning_rate": 1.568595700364313e-05, "loss": 0.5673, "step": 10012 }, { "epoch": 32.829508196721314, "grad_norm": 5.385897636413574, "learning_rate": 1.5685083445169313e-05, "loss": 0.5792, "step": 10013 }, { "epoch": 32.8327868852459, "grad_norm": 5.442275047302246, "learning_rate": 1.568420982259027e-05, "loss": 0.4137, "step": 10014 }, { "epoch": 32.83606557377049, "grad_norm": 6.762298583984375, "learning_rate": 1.5683336135915843e-05, "loss": 0.6558, "step": 10015 }, { "epoch": 32.83934426229508, "grad_norm": 5.788129806518555, "learning_rate": 1.56824623851559e-05, "loss": 0.5854, "step": 10016 }, { "epoch": 32.842622950819674, "grad_norm": 5.3627400398254395, "learning_rate": 1.5681588570320283e-05, "loss": 0.8209, "step": 10017 }, { "epoch": 32.84590163934426, "grad_norm": 7.189132213592529, "learning_rate": 1.5680714691418848e-05, "loss": 0.4637, "step": 10018 }, { "epoch": 32.84918032786885, "grad_norm": 5.859954357147217, "learning_rate": 1.5679840748461444e-05, "loss": 0.548, "step": 10019 }, { "epoch": 32.85245901639344, "grad_norm": 8.233059883117676, "learning_rate": 1.5678966741457938e-05, "loss": 0.4126, "step": 10020 }, { "epoch": 32.855737704918035, "grad_norm": 5.150799751281738, "learning_rate": 1.567809267041817e-05, "loss": 0.423, "step": 10021 }, { "epoch": 32.85901639344262, "grad_norm": 6.427093505859375, "learning_rate": 1.567721853535201e-05, "loss": 0.4258, "step": 10022 }, { "epoch": 32.86229508196721, "grad_norm": 6.716111183166504, "learning_rate": 1.5676344336269303e-05, "loss": 0.6108, "step": 10023 }, { "epoch": 32.86557377049181, "grad_norm": 5.7939534187316895, "learning_rate": 1.5675470073179913e-05, "loss": 0.7008, "step": 10024 }, { "epoch": 32.868852459016395, "grad_norm": 5.767581462860107, "learning_rate": 1.5674595746093698e-05, "loss": 0.6761, "step": 10025 }, { "epoch": 32.87213114754098, "grad_norm": 8.81692886352539, "learning_rate": 1.5673721355020517e-05, "loss": 0.5401, "step": 10026 }, { "epoch": 32.87540983606557, "grad_norm": 10.731027603149414, "learning_rate": 1.5672846899970226e-05, "loss": 0.4246, "step": 10027 }, { "epoch": 32.87868852459017, "grad_norm": 5.311206817626953, "learning_rate": 1.567197238095269e-05, "loss": 0.7747, "step": 10028 }, { "epoch": 32.881967213114756, "grad_norm": 4.311196327209473, "learning_rate": 1.5671097797977764e-05, "loss": 0.4502, "step": 10029 }, { "epoch": 32.885245901639344, "grad_norm": 4.713652610778809, "learning_rate": 1.5670223151055316e-05, "loss": 0.6014, "step": 10030 }, { "epoch": 32.88852459016393, "grad_norm": 4.315144062042236, "learning_rate": 1.566934844019521e-05, "loss": 0.5874, "step": 10031 }, { "epoch": 32.89180327868853, "grad_norm": 4.345402240753174, "learning_rate": 1.56684736654073e-05, "loss": 0.4894, "step": 10032 }, { "epoch": 32.895081967213116, "grad_norm": 5.32860803604126, "learning_rate": 1.5667598826701463e-05, "loss": 0.7251, "step": 10033 }, { "epoch": 32.898360655737704, "grad_norm": 4.5122761726379395, "learning_rate": 1.566672392408755e-05, "loss": 0.4246, "step": 10034 }, { "epoch": 32.90163934426229, "grad_norm": 4.923772811889648, "learning_rate": 1.5665848957575436e-05, "loss": 0.536, "step": 10035 }, { "epoch": 32.90491803278689, "grad_norm": 4.692349910736084, "learning_rate": 1.5664973927174983e-05, "loss": 0.6342, "step": 10036 }, { "epoch": 32.90819672131148, "grad_norm": 4.9463419914245605, "learning_rate": 1.5664098832896058e-05, "loss": 0.8394, "step": 10037 }, { "epoch": 32.911475409836065, "grad_norm": 4.968618869781494, "learning_rate": 1.566322367474853e-05, "loss": 0.4984, "step": 10038 }, { "epoch": 32.91475409836065, "grad_norm": 5.495232105255127, "learning_rate": 1.5662348452742267e-05, "loss": 0.4236, "step": 10039 }, { "epoch": 32.91803278688525, "grad_norm": 4.727354526519775, "learning_rate": 1.566147316688714e-05, "loss": 0.6942, "step": 10040 }, { "epoch": 32.92131147540984, "grad_norm": 4.358510971069336, "learning_rate": 1.5660597817193012e-05, "loss": 0.4601, "step": 10041 }, { "epoch": 32.924590163934425, "grad_norm": 4.713139533996582, "learning_rate": 1.5659722403669762e-05, "loss": 0.5812, "step": 10042 }, { "epoch": 32.927868852459014, "grad_norm": 4.121937274932861, "learning_rate": 1.5658846926327255e-05, "loss": 0.5574, "step": 10043 }, { "epoch": 32.93114754098361, "grad_norm": 4.3942484855651855, "learning_rate": 1.5657971385175367e-05, "loss": 0.6271, "step": 10044 }, { "epoch": 32.9344262295082, "grad_norm": 4.713679790496826, "learning_rate": 1.5657095780223965e-05, "loss": 0.4821, "step": 10045 }, { "epoch": 32.937704918032786, "grad_norm": 5.857980728149414, "learning_rate": 1.5656220111482928e-05, "loss": 0.5801, "step": 10046 }, { "epoch": 32.940983606557374, "grad_norm": 4.922041416168213, "learning_rate": 1.5655344378962133e-05, "loss": 0.6836, "step": 10047 }, { "epoch": 32.94426229508197, "grad_norm": 4.145935535430908, "learning_rate": 1.5654468582671443e-05, "loss": 0.5282, "step": 10048 }, { "epoch": 32.94754098360656, "grad_norm": 5.0533647537231445, "learning_rate": 1.565359272262074e-05, "loss": 0.4568, "step": 10049 }, { "epoch": 32.950819672131146, "grad_norm": 5.428788661956787, "learning_rate": 1.565271679881991e-05, "loss": 0.5515, "step": 10050 }, { "epoch": 32.954098360655735, "grad_norm": 4.525021553039551, "learning_rate": 1.5651840811278813e-05, "loss": 0.4018, "step": 10051 }, { "epoch": 32.95737704918033, "grad_norm": 6.6592302322387695, "learning_rate": 1.5650964760007337e-05, "loss": 0.7491, "step": 10052 }, { "epoch": 32.96065573770492, "grad_norm": 4.098918914794922, "learning_rate": 1.5650088645015357e-05, "loss": 0.6382, "step": 10053 }, { "epoch": 32.96393442622951, "grad_norm": 4.564246654510498, "learning_rate": 1.5649212466312754e-05, "loss": 0.5227, "step": 10054 }, { "epoch": 32.967213114754095, "grad_norm": 5.1039838790893555, "learning_rate": 1.5648336223909405e-05, "loss": 0.3939, "step": 10055 }, { "epoch": 32.97049180327869, "grad_norm": 5.423557281494141, "learning_rate": 1.5647459917815195e-05, "loss": 0.3558, "step": 10056 }, { "epoch": 32.97377049180328, "grad_norm": 5.740471839904785, "learning_rate": 1.564658354804e-05, "loss": 0.5659, "step": 10057 }, { "epoch": 32.97704918032787, "grad_norm": 4.959656238555908, "learning_rate": 1.5645707114593706e-05, "loss": 0.4312, "step": 10058 }, { "epoch": 32.980327868852456, "grad_norm": 4.7794013023376465, "learning_rate": 1.5644830617486194e-05, "loss": 0.5232, "step": 10059 }, { "epoch": 32.98360655737705, "grad_norm": 4.343735218048096, "learning_rate": 1.5643954056727347e-05, "loss": 0.3298, "step": 10060 }, { "epoch": 32.98688524590164, "grad_norm": 4.604254722595215, "learning_rate": 1.5643077432327046e-05, "loss": 0.5418, "step": 10061 }, { "epoch": 32.99016393442623, "grad_norm": 6.071413516998291, "learning_rate": 1.5642200744295187e-05, "loss": 0.5305, "step": 10062 }, { "epoch": 32.993442622950816, "grad_norm": 4.945558547973633, "learning_rate": 1.5641323992641643e-05, "loss": 0.566, "step": 10063 }, { "epoch": 32.99672131147541, "grad_norm": 4.240699768066406, "learning_rate": 1.5640447177376308e-05, "loss": 0.7595, "step": 10064 }, { "epoch": 33.0, "grad_norm": 4.303366184234619, "learning_rate": 1.5639570298509067e-05, "loss": 0.4874, "step": 10065 }, { "epoch": 33.00327868852459, "grad_norm": 5.081435203552246, "learning_rate": 1.563869335604981e-05, "loss": 0.6206, "step": 10066 }, { "epoch": 33.006557377049184, "grad_norm": 5.502679824829102, "learning_rate": 1.5637816350008415e-05, "loss": 0.5515, "step": 10067 }, { "epoch": 33.00983606557377, "grad_norm": 4.16209077835083, "learning_rate": 1.563693928039478e-05, "loss": 0.6197, "step": 10068 }, { "epoch": 33.01311475409836, "grad_norm": 10.794719696044922, "learning_rate": 1.5636062147218796e-05, "loss": 0.6661, "step": 10069 }, { "epoch": 33.01639344262295, "grad_norm": 5.087299823760986, "learning_rate": 1.5635184950490353e-05, "loss": 0.4017, "step": 10070 }, { "epoch": 33.019672131147544, "grad_norm": 5.694509506225586, "learning_rate": 1.563430769021934e-05, "loss": 0.6404, "step": 10071 }, { "epoch": 33.02295081967213, "grad_norm": 5.150879859924316, "learning_rate": 1.563343036641565e-05, "loss": 0.6125, "step": 10072 }, { "epoch": 33.02622950819672, "grad_norm": 4.8798112869262695, "learning_rate": 1.563255297908917e-05, "loss": 0.5717, "step": 10073 }, { "epoch": 33.02950819672131, "grad_norm": 4.339451313018799, "learning_rate": 1.5631675528249804e-05, "loss": 0.3257, "step": 10074 }, { "epoch": 33.032786885245905, "grad_norm": 4.3733649253845215, "learning_rate": 1.563079801390744e-05, "loss": 0.2535, "step": 10075 }, { "epoch": 33.03606557377049, "grad_norm": 5.2455549240112305, "learning_rate": 1.5629920436071974e-05, "loss": 0.3626, "step": 10076 }, { "epoch": 33.03934426229508, "grad_norm": 4.386184215545654, "learning_rate": 1.56290427947533e-05, "loss": 0.4984, "step": 10077 }, { "epoch": 33.04262295081967, "grad_norm": 5.494114398956299, "learning_rate": 1.5628165089961314e-05, "loss": 0.5568, "step": 10078 }, { "epoch": 33.045901639344265, "grad_norm": 4.585800647735596, "learning_rate": 1.562728732170592e-05, "loss": 0.4843, "step": 10079 }, { "epoch": 33.049180327868854, "grad_norm": 4.895934104919434, "learning_rate": 1.5626409489997008e-05, "loss": 0.6198, "step": 10080 }, { "epoch": 33.05245901639344, "grad_norm": 4.745464324951172, "learning_rate": 1.562553159484448e-05, "loss": 0.731, "step": 10081 }, { "epoch": 33.05573770491803, "grad_norm": 6.801229476928711, "learning_rate": 1.562465363625823e-05, "loss": 0.7128, "step": 10082 }, { "epoch": 33.059016393442626, "grad_norm": 4.937385559082031, "learning_rate": 1.5623775614248167e-05, "loss": 0.6592, "step": 10083 }, { "epoch": 33.062295081967214, "grad_norm": 5.569639205932617, "learning_rate": 1.5622897528824185e-05, "loss": 0.6632, "step": 10084 }, { "epoch": 33.0655737704918, "grad_norm": 4.28713321685791, "learning_rate": 1.5622019379996184e-05, "loss": 0.4714, "step": 10085 }, { "epoch": 33.06885245901639, "grad_norm": 5.11442756652832, "learning_rate": 1.5621141167774073e-05, "loss": 0.4721, "step": 10086 }, { "epoch": 33.072131147540986, "grad_norm": 5.127574920654297, "learning_rate": 1.562026289216775e-05, "loss": 0.556, "step": 10087 }, { "epoch": 33.075409836065575, "grad_norm": 4.897861480712891, "learning_rate": 1.561938455318712e-05, "loss": 0.6706, "step": 10088 }, { "epoch": 33.07868852459016, "grad_norm": 5.641843795776367, "learning_rate": 1.5618506150842083e-05, "loss": 0.493, "step": 10089 }, { "epoch": 33.08196721311475, "grad_norm": 5.998987674713135, "learning_rate": 1.5617627685142554e-05, "loss": 0.5256, "step": 10090 }, { "epoch": 33.08524590163935, "grad_norm": 4.3720703125, "learning_rate": 1.5616749156098424e-05, "loss": 0.4617, "step": 10091 }, { "epoch": 33.088524590163935, "grad_norm": 4.486802577972412, "learning_rate": 1.5615870563719612e-05, "loss": 0.5771, "step": 10092 }, { "epoch": 33.09180327868852, "grad_norm": 4.977354526519775, "learning_rate": 1.561499190801602e-05, "loss": 0.3783, "step": 10093 }, { "epoch": 33.09508196721311, "grad_norm": 3.97080135345459, "learning_rate": 1.5614113188997556e-05, "loss": 0.4074, "step": 10094 }, { "epoch": 33.09836065573771, "grad_norm": 4.409687519073486, "learning_rate": 1.5613234406674126e-05, "loss": 0.3929, "step": 10095 }, { "epoch": 33.101639344262296, "grad_norm": 4.560356140136719, "learning_rate": 1.5612355561055644e-05, "loss": 0.5196, "step": 10096 }, { "epoch": 33.104918032786884, "grad_norm": 4.902633190155029, "learning_rate": 1.5611476652152017e-05, "loss": 0.6281, "step": 10097 }, { "epoch": 33.10819672131147, "grad_norm": 5.239652633666992, "learning_rate": 1.561059767997316e-05, "loss": 0.4775, "step": 10098 }, { "epoch": 33.11147540983607, "grad_norm": 4.921475410461426, "learning_rate": 1.5609718644528976e-05, "loss": 0.723, "step": 10099 }, { "epoch": 33.114754098360656, "grad_norm": 4.702960014343262, "learning_rate": 1.5608839545829382e-05, "loss": 0.5943, "step": 10100 }, { "epoch": 33.118032786885244, "grad_norm": 7.985311508178711, "learning_rate": 1.5607960383884294e-05, "loss": 0.596, "step": 10101 }, { "epoch": 33.12131147540983, "grad_norm": 5.2654500007629395, "learning_rate": 1.560708115870362e-05, "loss": 0.5861, "step": 10102 }, { "epoch": 33.12459016393443, "grad_norm": 4.368033409118652, "learning_rate": 1.5606201870297276e-05, "loss": 0.3607, "step": 10103 }, { "epoch": 33.12786885245902, "grad_norm": 5.144751071929932, "learning_rate": 1.5605322518675175e-05, "loss": 0.5396, "step": 10104 }, { "epoch": 33.131147540983605, "grad_norm": 4.002232074737549, "learning_rate": 1.5604443103847236e-05, "loss": 0.4817, "step": 10105 }, { "epoch": 33.13442622950819, "grad_norm": 3.7081384658813477, "learning_rate": 1.5603563625823374e-05, "loss": 0.3397, "step": 10106 }, { "epoch": 33.13770491803279, "grad_norm": 4.970585346221924, "learning_rate": 1.5602684084613504e-05, "loss": 0.4385, "step": 10107 }, { "epoch": 33.14098360655738, "grad_norm": 4.572484016418457, "learning_rate": 1.5601804480227543e-05, "loss": 0.4196, "step": 10108 }, { "epoch": 33.144262295081965, "grad_norm": 5.099598407745361, "learning_rate": 1.560092481267542e-05, "loss": 0.5706, "step": 10109 }, { "epoch": 33.14754098360656, "grad_norm": 5.055237770080566, "learning_rate": 1.5600045081967042e-05, "loss": 0.652, "step": 10110 }, { "epoch": 33.15081967213115, "grad_norm": 4.236312389373779, "learning_rate": 1.5599165288112333e-05, "loss": 0.5264, "step": 10111 }, { "epoch": 33.15409836065574, "grad_norm": 4.020155429840088, "learning_rate": 1.5598285431121215e-05, "loss": 0.6239, "step": 10112 }, { "epoch": 33.157377049180326, "grad_norm": 4.91616678237915, "learning_rate": 1.5597405511003608e-05, "loss": 0.4707, "step": 10113 }, { "epoch": 33.16065573770492, "grad_norm": 5.303276062011719, "learning_rate": 1.559652552776943e-05, "loss": 0.6169, "step": 10114 }, { "epoch": 33.16393442622951, "grad_norm": 4.760218620300293, "learning_rate": 1.5595645481428614e-05, "loss": 0.3751, "step": 10115 }, { "epoch": 33.1672131147541, "grad_norm": 4.5294928550720215, "learning_rate": 1.5594765371991073e-05, "loss": 0.4663, "step": 10116 }, { "epoch": 33.170491803278686, "grad_norm": 30.25650405883789, "learning_rate": 1.5593885199466737e-05, "loss": 0.494, "step": 10117 }, { "epoch": 33.17377049180328, "grad_norm": 5.431346416473389, "learning_rate": 1.559300496386553e-05, "loss": 0.6662, "step": 10118 }, { "epoch": 33.17704918032787, "grad_norm": 5.055188179016113, "learning_rate": 1.5592124665197374e-05, "loss": 0.4872, "step": 10119 }, { "epoch": 33.18032786885246, "grad_norm": 4.201295852661133, "learning_rate": 1.55912443034722e-05, "loss": 0.5897, "step": 10120 }, { "epoch": 33.18360655737705, "grad_norm": 4.296782970428467, "learning_rate": 1.5590363878699932e-05, "loss": 0.5332, "step": 10121 }, { "epoch": 33.18688524590164, "grad_norm": 4.36672306060791, "learning_rate": 1.55894833908905e-05, "loss": 0.5081, "step": 10122 }, { "epoch": 33.19016393442623, "grad_norm": 3.8230323791503906, "learning_rate": 1.558860284005383e-05, "loss": 0.4927, "step": 10123 }, { "epoch": 33.19344262295082, "grad_norm": 6.295041561126709, "learning_rate": 1.5587722226199854e-05, "loss": 0.5417, "step": 10124 }, { "epoch": 33.19672131147541, "grad_norm": 4.96061897277832, "learning_rate": 1.55868415493385e-05, "loss": 0.7355, "step": 10125 }, { "epoch": 33.2, "grad_norm": 4.821801662445068, "learning_rate": 1.5585960809479698e-05, "loss": 0.5351, "step": 10126 }, { "epoch": 33.20327868852459, "grad_norm": 7.355944633483887, "learning_rate": 1.558508000663338e-05, "loss": 0.6494, "step": 10127 }, { "epoch": 33.20655737704918, "grad_norm": 5.325799942016602, "learning_rate": 1.5584199140809476e-05, "loss": 0.5024, "step": 10128 }, { "epoch": 33.20983606557377, "grad_norm": 6.0391340255737305, "learning_rate": 1.5583318212017923e-05, "loss": 0.6584, "step": 10129 }, { "epoch": 33.21311475409836, "grad_norm": 5.082486152648926, "learning_rate": 1.5582437220268648e-05, "loss": 0.4737, "step": 10130 }, { "epoch": 33.21639344262295, "grad_norm": 4.734295845031738, "learning_rate": 1.5581556165571593e-05, "loss": 0.494, "step": 10131 }, { "epoch": 33.21967213114754, "grad_norm": 3.3163318634033203, "learning_rate": 1.5580675047936688e-05, "loss": 0.3362, "step": 10132 }, { "epoch": 33.22295081967213, "grad_norm": 4.265559673309326, "learning_rate": 1.5579793867373868e-05, "loss": 0.4148, "step": 10133 }, { "epoch": 33.226229508196724, "grad_norm": 4.861754894256592, "learning_rate": 1.557891262389307e-05, "loss": 0.65, "step": 10134 }, { "epoch": 33.22950819672131, "grad_norm": 4.748917102813721, "learning_rate": 1.557803131750424e-05, "loss": 0.8114, "step": 10135 }, { "epoch": 33.2327868852459, "grad_norm": 5.120484352111816, "learning_rate": 1.55771499482173e-05, "loss": 0.5329, "step": 10136 }, { "epoch": 33.23606557377049, "grad_norm": 4.8725409507751465, "learning_rate": 1.5576268516042193e-05, "loss": 0.6062, "step": 10137 }, { "epoch": 33.239344262295084, "grad_norm": 4.524280548095703, "learning_rate": 1.5575387020988864e-05, "loss": 0.4025, "step": 10138 }, { "epoch": 33.24262295081967, "grad_norm": 4.373327732086182, "learning_rate": 1.5574505463067252e-05, "loss": 0.6571, "step": 10139 }, { "epoch": 33.24590163934426, "grad_norm": 13.101645469665527, "learning_rate": 1.557362384228729e-05, "loss": 0.4283, "step": 10140 }, { "epoch": 33.24918032786885, "grad_norm": 4.632628440856934, "learning_rate": 1.5572742158658923e-05, "loss": 0.4134, "step": 10141 }, { "epoch": 33.252459016393445, "grad_norm": 4.758519649505615, "learning_rate": 1.55718604121921e-05, "loss": 0.6055, "step": 10142 }, { "epoch": 33.25573770491803, "grad_norm": 5.906380653381348, "learning_rate": 1.5570978602896754e-05, "loss": 0.4261, "step": 10143 }, { "epoch": 33.25901639344262, "grad_norm": 4.50745964050293, "learning_rate": 1.5570096730782833e-05, "loss": 0.5755, "step": 10144 }, { "epoch": 33.26229508196721, "grad_norm": 4.140499114990234, "learning_rate": 1.556921479586028e-05, "loss": 0.3886, "step": 10145 }, { "epoch": 33.265573770491805, "grad_norm": 3.9566683769226074, "learning_rate": 1.556833279813904e-05, "loss": 0.4018, "step": 10146 }, { "epoch": 33.268852459016394, "grad_norm": 4.170814514160156, "learning_rate": 1.5567450737629057e-05, "loss": 0.5604, "step": 10147 }, { "epoch": 33.27213114754098, "grad_norm": 5.552990436553955, "learning_rate": 1.5566568614340278e-05, "loss": 0.5114, "step": 10148 }, { "epoch": 33.27540983606557, "grad_norm": 5.003469944000244, "learning_rate": 1.556568642828265e-05, "loss": 0.6093, "step": 10149 }, { "epoch": 33.278688524590166, "grad_norm": 4.566401481628418, "learning_rate": 1.5564804179466124e-05, "loss": 0.4361, "step": 10150 }, { "epoch": 33.281967213114754, "grad_norm": 4.740884304046631, "learning_rate": 1.556392186790064e-05, "loss": 0.6895, "step": 10151 }, { "epoch": 33.28524590163934, "grad_norm": 4.860287189483643, "learning_rate": 1.556303949359615e-05, "loss": 0.5573, "step": 10152 }, { "epoch": 33.28852459016394, "grad_norm": 4.675597667694092, "learning_rate": 1.5562157056562614e-05, "loss": 0.6931, "step": 10153 }, { "epoch": 33.291803278688526, "grad_norm": 3.958411693572998, "learning_rate": 1.5561274556809968e-05, "loss": 0.4871, "step": 10154 }, { "epoch": 33.295081967213115, "grad_norm": 4.180731296539307, "learning_rate": 1.5560391994348172e-05, "loss": 0.6239, "step": 10155 }, { "epoch": 33.2983606557377, "grad_norm": 5.616351127624512, "learning_rate": 1.555950936918717e-05, "loss": 0.5532, "step": 10156 }, { "epoch": 33.3016393442623, "grad_norm": 4.113106727600098, "learning_rate": 1.5558626681336926e-05, "loss": 0.6047, "step": 10157 }, { "epoch": 33.30491803278689, "grad_norm": 4.819357395172119, "learning_rate": 1.555774393080738e-05, "loss": 0.6771, "step": 10158 }, { "epoch": 33.308196721311475, "grad_norm": 4.319346904754639, "learning_rate": 1.55568611176085e-05, "loss": 0.4645, "step": 10159 }, { "epoch": 33.31147540983606, "grad_norm": 4.928284645080566, "learning_rate": 1.5555978241750228e-05, "loss": 0.4432, "step": 10160 }, { "epoch": 33.31475409836066, "grad_norm": 4.996612071990967, "learning_rate": 1.5555095303242528e-05, "loss": 0.6581, "step": 10161 }, { "epoch": 33.31803278688525, "grad_norm": 4.645545959472656, "learning_rate": 1.555421230209535e-05, "loss": 0.6862, "step": 10162 }, { "epoch": 33.321311475409836, "grad_norm": 5.181867599487305, "learning_rate": 1.5553329238318654e-05, "loss": 0.4662, "step": 10163 }, { "epoch": 33.324590163934424, "grad_norm": 4.422606468200684, "learning_rate": 1.5552446111922396e-05, "loss": 0.3422, "step": 10164 }, { "epoch": 33.32786885245902, "grad_norm": 5.002449989318848, "learning_rate": 1.5551562922916537e-05, "loss": 0.5133, "step": 10165 }, { "epoch": 33.33114754098361, "grad_norm": 4.055215358734131, "learning_rate": 1.5550679671311032e-05, "loss": 0.3874, "step": 10166 }, { "epoch": 33.334426229508196, "grad_norm": 3.9541549682617188, "learning_rate": 1.5549796357115844e-05, "loss": 0.4602, "step": 10167 }, { "epoch": 33.337704918032784, "grad_norm": 4.931694984436035, "learning_rate": 1.554891298034093e-05, "loss": 0.3859, "step": 10168 }, { "epoch": 33.34098360655738, "grad_norm": 5.761638164520264, "learning_rate": 1.5548029540996254e-05, "loss": 0.3595, "step": 10169 }, { "epoch": 33.34426229508197, "grad_norm": 7.904671669006348, "learning_rate": 1.5547146039091775e-05, "loss": 0.6418, "step": 10170 }, { "epoch": 33.34754098360656, "grad_norm": 4.815914154052734, "learning_rate": 1.554626247463746e-05, "loss": 0.4092, "step": 10171 }, { "epoch": 33.350819672131145, "grad_norm": 5.948140621185303, "learning_rate": 1.5545378847643267e-05, "loss": 0.4543, "step": 10172 }, { "epoch": 33.35409836065574, "grad_norm": 4.221286296844482, "learning_rate": 1.554449515811916e-05, "loss": 0.5601, "step": 10173 }, { "epoch": 33.35737704918033, "grad_norm": 12.369024276733398, "learning_rate": 1.5543611406075108e-05, "loss": 0.4391, "step": 10174 }, { "epoch": 33.36065573770492, "grad_norm": 3.9943723678588867, "learning_rate": 1.554272759152107e-05, "loss": 0.4761, "step": 10175 }, { "epoch": 33.363934426229505, "grad_norm": 5.7983293533325195, "learning_rate": 1.5541843714467018e-05, "loss": 0.4588, "step": 10176 }, { "epoch": 33.3672131147541, "grad_norm": 4.0542120933532715, "learning_rate": 1.5540959774922915e-05, "loss": 0.4386, "step": 10177 }, { "epoch": 33.37049180327869, "grad_norm": 4.944047927856445, "learning_rate": 1.5540075772898732e-05, "loss": 0.5321, "step": 10178 }, { "epoch": 33.37377049180328, "grad_norm": 4.444085597991943, "learning_rate": 1.5539191708404432e-05, "loss": 0.4557, "step": 10179 }, { "epoch": 33.377049180327866, "grad_norm": 5.059161186218262, "learning_rate": 1.5538307581449984e-05, "loss": 0.4235, "step": 10180 }, { "epoch": 33.38032786885246, "grad_norm": 4.3951616287231445, "learning_rate": 1.5537423392045365e-05, "loss": 0.4983, "step": 10181 }, { "epoch": 33.38360655737705, "grad_norm": 5.762041091918945, "learning_rate": 1.5536539140200537e-05, "loss": 0.484, "step": 10182 }, { "epoch": 33.38688524590164, "grad_norm": 4.430397033691406, "learning_rate": 1.553565482592547e-05, "loss": 0.3874, "step": 10183 }, { "epoch": 33.390163934426226, "grad_norm": 4.788419246673584, "learning_rate": 1.5534770449230145e-05, "loss": 0.404, "step": 10184 }, { "epoch": 33.39344262295082, "grad_norm": 4.780545711517334, "learning_rate": 1.553388601012453e-05, "loss": 0.8497, "step": 10185 }, { "epoch": 33.39672131147541, "grad_norm": 4.412076473236084, "learning_rate": 1.553300150861859e-05, "loss": 0.7045, "step": 10186 }, { "epoch": 33.4, "grad_norm": 4.94720458984375, "learning_rate": 1.5532116944722308e-05, "loss": 0.6041, "step": 10187 }, { "epoch": 33.40327868852459, "grad_norm": 4.282658100128174, "learning_rate": 1.5531232318445654e-05, "loss": 0.6696, "step": 10188 }, { "epoch": 33.40655737704918, "grad_norm": 4.535087585449219, "learning_rate": 1.5530347629798606e-05, "loss": 0.4823, "step": 10189 }, { "epoch": 33.40983606557377, "grad_norm": 5.920085430145264, "learning_rate": 1.552946287879114e-05, "loss": 0.6901, "step": 10190 }, { "epoch": 33.41311475409836, "grad_norm": 3.970754384994507, "learning_rate": 1.552857806543323e-05, "loss": 0.3838, "step": 10191 }, { "epoch": 33.41639344262295, "grad_norm": 4.20810604095459, "learning_rate": 1.5527693189734853e-05, "loss": 0.5817, "step": 10192 }, { "epoch": 33.41967213114754, "grad_norm": 4.092385768890381, "learning_rate": 1.5526808251705988e-05, "loss": 0.5351, "step": 10193 }, { "epoch": 33.42295081967213, "grad_norm": 5.205174446105957, "learning_rate": 1.5525923251356613e-05, "loss": 0.4479, "step": 10194 }, { "epoch": 33.42622950819672, "grad_norm": 5.660512924194336, "learning_rate": 1.552503818869671e-05, "loss": 0.5574, "step": 10195 }, { "epoch": 33.429508196721315, "grad_norm": 4.352353572845459, "learning_rate": 1.5524153063736255e-05, "loss": 0.5485, "step": 10196 }, { "epoch": 33.4327868852459, "grad_norm": 3.487745523452759, "learning_rate": 1.552326787648523e-05, "loss": 0.4345, "step": 10197 }, { "epoch": 33.43606557377049, "grad_norm": 4.871814250946045, "learning_rate": 1.5522382626953618e-05, "loss": 0.5868, "step": 10198 }, { "epoch": 33.43934426229508, "grad_norm": 4.873475074768066, "learning_rate": 1.55214973151514e-05, "loss": 0.6933, "step": 10199 }, { "epoch": 33.442622950819676, "grad_norm": 4.205678939819336, "learning_rate": 1.5520611941088558e-05, "loss": 0.6536, "step": 10200 }, { "epoch": 33.445901639344264, "grad_norm": 4.825569152832031, "learning_rate": 1.5519726504775076e-05, "loss": 0.5316, "step": 10201 }, { "epoch": 33.44918032786885, "grad_norm": 5.3765339851379395, "learning_rate": 1.5518841006220942e-05, "loss": 0.6538, "step": 10202 }, { "epoch": 33.45245901639344, "grad_norm": 4.949324131011963, "learning_rate": 1.5517955445436138e-05, "loss": 0.7264, "step": 10203 }, { "epoch": 33.455737704918036, "grad_norm": 4.387296199798584, "learning_rate": 1.551706982243064e-05, "loss": 0.5541, "step": 10204 }, { "epoch": 33.459016393442624, "grad_norm": 3.7269365787506104, "learning_rate": 1.5516184137214454e-05, "loss": 0.4924, "step": 10205 }, { "epoch": 33.46229508196721, "grad_norm": 4.277004718780518, "learning_rate": 1.551529838979755e-05, "loss": 0.716, "step": 10206 }, { "epoch": 33.4655737704918, "grad_norm": 4.267120838165283, "learning_rate": 1.5514412580189926e-05, "loss": 0.48, "step": 10207 }, { "epoch": 33.4688524590164, "grad_norm": 3.652655601501465, "learning_rate": 1.5513526708401566e-05, "loss": 0.7598, "step": 10208 }, { "epoch": 33.472131147540985, "grad_norm": 4.02885627746582, "learning_rate": 1.5512640774442455e-05, "loss": 0.5333, "step": 10209 }, { "epoch": 33.47540983606557, "grad_norm": 5.324887752532959, "learning_rate": 1.551175477832259e-05, "loss": 0.7311, "step": 10210 }, { "epoch": 33.47868852459016, "grad_norm": 5.494205951690674, "learning_rate": 1.5510868720051965e-05, "loss": 0.506, "step": 10211 }, { "epoch": 33.48196721311476, "grad_norm": 4.574641704559326, "learning_rate": 1.5509982599640556e-05, "loss": 0.3791, "step": 10212 }, { "epoch": 33.485245901639345, "grad_norm": 4.014054775238037, "learning_rate": 1.5509096417098372e-05, "loss": 0.6663, "step": 10213 }, { "epoch": 33.488524590163934, "grad_norm": 4.6108245849609375, "learning_rate": 1.5508210172435392e-05, "loss": 0.4445, "step": 10214 }, { "epoch": 33.49180327868852, "grad_norm": 4.035258769989014, "learning_rate": 1.550732386566162e-05, "loss": 0.4581, "step": 10215 }, { "epoch": 33.49508196721312, "grad_norm": 5.459244251251221, "learning_rate": 1.5506437496787045e-05, "loss": 0.5634, "step": 10216 }, { "epoch": 33.498360655737706, "grad_norm": 4.660975456237793, "learning_rate": 1.550555106582166e-05, "loss": 0.3754, "step": 10217 }, { "epoch": 33.501639344262294, "grad_norm": 4.48290491104126, "learning_rate": 1.5504664572775462e-05, "loss": 0.6349, "step": 10218 }, { "epoch": 33.50491803278688, "grad_norm": 6.524414539337158, "learning_rate": 1.5503778017658447e-05, "loss": 0.535, "step": 10219 }, { "epoch": 33.50819672131148, "grad_norm": 4.729135990142822, "learning_rate": 1.5502891400480612e-05, "loss": 0.561, "step": 10220 }, { "epoch": 33.511475409836066, "grad_norm": 4.283715724945068, "learning_rate": 1.550200472125196e-05, "loss": 0.4171, "step": 10221 }, { "epoch": 33.514754098360655, "grad_norm": 4.654603958129883, "learning_rate": 1.550111797998248e-05, "loss": 0.4853, "step": 10222 }, { "epoch": 33.51803278688524, "grad_norm": 4.608450889587402, "learning_rate": 1.5500231176682175e-05, "loss": 0.4266, "step": 10223 }, { "epoch": 33.52131147540984, "grad_norm": 4.85013484954834, "learning_rate": 1.5499344311361044e-05, "loss": 0.5319, "step": 10224 }, { "epoch": 33.52459016393443, "grad_norm": 4.790493965148926, "learning_rate": 1.5498457384029088e-05, "loss": 0.5883, "step": 10225 }, { "epoch": 33.527868852459015, "grad_norm": 4.762648582458496, "learning_rate": 1.549757039469631e-05, "loss": 0.331, "step": 10226 }, { "epoch": 33.5311475409836, "grad_norm": 5.010842800140381, "learning_rate": 1.549668334337271e-05, "loss": 0.7729, "step": 10227 }, { "epoch": 33.5344262295082, "grad_norm": 4.892334461212158, "learning_rate": 1.549579623006829e-05, "loss": 0.3793, "step": 10228 }, { "epoch": 33.53770491803279, "grad_norm": 5.094531059265137, "learning_rate": 1.549490905479305e-05, "loss": 0.5656, "step": 10229 }, { "epoch": 33.540983606557376, "grad_norm": 4.161854267120361, "learning_rate": 1.5494021817557002e-05, "loss": 0.3401, "step": 10230 }, { "epoch": 33.544262295081964, "grad_norm": 4.440273284912109, "learning_rate": 1.5493134518370142e-05, "loss": 0.5776, "step": 10231 }, { "epoch": 33.54754098360656, "grad_norm": 5.755524158477783, "learning_rate": 1.549224715724248e-05, "loss": 0.7063, "step": 10232 }, { "epoch": 33.55081967213115, "grad_norm": 5.016541481018066, "learning_rate": 1.549135973418402e-05, "loss": 0.3542, "step": 10233 }, { "epoch": 33.554098360655736, "grad_norm": 5.372189044952393, "learning_rate": 1.549047224920477e-05, "loss": 0.7056, "step": 10234 }, { "epoch": 33.557377049180324, "grad_norm": 4.24453067779541, "learning_rate": 1.5489584702314737e-05, "loss": 0.542, "step": 10235 }, { "epoch": 33.56065573770492, "grad_norm": 4.405117511749268, "learning_rate": 1.548869709352393e-05, "loss": 0.6427, "step": 10236 }, { "epoch": 33.56393442622951, "grad_norm": 4.919482707977295, "learning_rate": 1.5487809422842356e-05, "loss": 0.5313, "step": 10237 }, { "epoch": 33.5672131147541, "grad_norm": 4.999966144561768, "learning_rate": 1.5486921690280024e-05, "loss": 0.6992, "step": 10238 }, { "epoch": 33.570491803278685, "grad_norm": 4.633549690246582, "learning_rate": 1.5486033895846945e-05, "loss": 0.495, "step": 10239 }, { "epoch": 33.57377049180328, "grad_norm": 4.610803127288818, "learning_rate": 1.548514603955313e-05, "loss": 0.6557, "step": 10240 }, { "epoch": 33.57704918032787, "grad_norm": 4.489815711975098, "learning_rate": 1.5484258121408592e-05, "loss": 0.772, "step": 10241 }, { "epoch": 33.58032786885246, "grad_norm": 5.428541660308838, "learning_rate": 1.5483370141423338e-05, "loss": 0.5666, "step": 10242 }, { "epoch": 33.58360655737705, "grad_norm": 4.554998874664307, "learning_rate": 1.5482482099607382e-05, "loss": 0.5314, "step": 10243 }, { "epoch": 33.58688524590164, "grad_norm": 4.172296524047852, "learning_rate": 1.5481593995970747e-05, "loss": 0.54, "step": 10244 }, { "epoch": 33.59016393442623, "grad_norm": 4.4658203125, "learning_rate": 1.5480705830523438e-05, "loss": 0.3423, "step": 10245 }, { "epoch": 33.59344262295082, "grad_norm": 5.179442405700684, "learning_rate": 1.547981760327547e-05, "loss": 0.8214, "step": 10246 }, { "epoch": 33.59672131147541, "grad_norm": 4.271982669830322, "learning_rate": 1.5478929314236865e-05, "loss": 0.6985, "step": 10247 }, { "epoch": 33.6, "grad_norm": 4.173342704772949, "learning_rate": 1.547804096341763e-05, "loss": 0.4846, "step": 10248 }, { "epoch": 33.60327868852459, "grad_norm": 3.958977460861206, "learning_rate": 1.5477152550827792e-05, "loss": 0.5765, "step": 10249 }, { "epoch": 33.60655737704918, "grad_norm": 4.032271385192871, "learning_rate": 1.5476264076477362e-05, "loss": 0.7257, "step": 10250 }, { "epoch": 33.609836065573774, "grad_norm": 6.423512935638428, "learning_rate": 1.547537554037636e-05, "loss": 0.5292, "step": 10251 }, { "epoch": 33.61311475409836, "grad_norm": 4.667898654937744, "learning_rate": 1.5474486942534808e-05, "loss": 0.4387, "step": 10252 }, { "epoch": 33.61639344262295, "grad_norm": 4.963640213012695, "learning_rate": 1.547359828296272e-05, "loss": 0.7315, "step": 10253 }, { "epoch": 33.61967213114754, "grad_norm": 4.251762390136719, "learning_rate": 1.5472709561670125e-05, "loss": 0.4645, "step": 10254 }, { "epoch": 33.622950819672134, "grad_norm": 4.199655055999756, "learning_rate": 1.5471820778667036e-05, "loss": 0.6759, "step": 10255 }, { "epoch": 33.62622950819672, "grad_norm": 4.7262115478515625, "learning_rate": 1.547093193396348e-05, "loss": 0.4401, "step": 10256 }, { "epoch": 33.62950819672131, "grad_norm": 3.987962245941162, "learning_rate": 1.547004302756948e-05, "loss": 0.5541, "step": 10257 }, { "epoch": 33.6327868852459, "grad_norm": 5.017124652862549, "learning_rate": 1.5469154059495054e-05, "loss": 0.6572, "step": 10258 }, { "epoch": 33.636065573770495, "grad_norm": 4.184423923492432, "learning_rate": 1.546826502975023e-05, "loss": 0.6726, "step": 10259 }, { "epoch": 33.63934426229508, "grad_norm": 4.495270729064941, "learning_rate": 1.5467375938345032e-05, "loss": 0.3667, "step": 10260 }, { "epoch": 33.64262295081967, "grad_norm": 4.290936470031738, "learning_rate": 1.5466486785289487e-05, "loss": 0.5226, "step": 10261 }, { "epoch": 33.64590163934426, "grad_norm": 5.4618449211120605, "learning_rate": 1.546559757059362e-05, "loss": 0.4318, "step": 10262 }, { "epoch": 33.649180327868855, "grad_norm": 5.194026947021484, "learning_rate": 1.546470829426746e-05, "loss": 0.7771, "step": 10263 }, { "epoch": 33.65245901639344, "grad_norm": 4.0056586265563965, "learning_rate": 1.5463818956321026e-05, "loss": 0.522, "step": 10264 }, { "epoch": 33.65573770491803, "grad_norm": 4.0569353103637695, "learning_rate": 1.5462929556764358e-05, "loss": 0.5782, "step": 10265 }, { "epoch": 33.65901639344262, "grad_norm": 4.501752853393555, "learning_rate": 1.5462040095607473e-05, "loss": 0.4132, "step": 10266 }, { "epoch": 33.662295081967216, "grad_norm": 5.129546642303467, "learning_rate": 1.5461150572860414e-05, "loss": 0.5723, "step": 10267 }, { "epoch": 33.665573770491804, "grad_norm": 4.518723487854004, "learning_rate": 1.54602609885332e-05, "loss": 0.7699, "step": 10268 }, { "epoch": 33.66885245901639, "grad_norm": 4.832139015197754, "learning_rate": 1.5459371342635866e-05, "loss": 0.481, "step": 10269 }, { "epoch": 33.67213114754098, "grad_norm": 4.858911991119385, "learning_rate": 1.5458481635178443e-05, "loss": 0.6007, "step": 10270 }, { "epoch": 33.675409836065576, "grad_norm": 4.471814155578613, "learning_rate": 1.5457591866170963e-05, "loss": 0.4963, "step": 10271 }, { "epoch": 33.678688524590164, "grad_norm": 4.946550369262695, "learning_rate": 1.5456702035623464e-05, "loss": 0.4424, "step": 10272 }, { "epoch": 33.68196721311475, "grad_norm": 4.759815216064453, "learning_rate": 1.5455812143545977e-05, "loss": 0.6499, "step": 10273 }, { "epoch": 33.68524590163934, "grad_norm": 4.066322326660156, "learning_rate": 1.5454922189948535e-05, "loss": 0.6089, "step": 10274 }, { "epoch": 33.68852459016394, "grad_norm": 4.509555339813232, "learning_rate": 1.545403217484117e-05, "loss": 0.5585, "step": 10275 }, { "epoch": 33.691803278688525, "grad_norm": 7.49911642074585, "learning_rate": 1.5453142098233925e-05, "loss": 0.5945, "step": 10276 }, { "epoch": 33.69508196721311, "grad_norm": 7.375755786895752, "learning_rate": 1.545225196013683e-05, "loss": 0.5555, "step": 10277 }, { "epoch": 33.6983606557377, "grad_norm": 4.157995223999023, "learning_rate": 1.5451361760559925e-05, "loss": 0.5791, "step": 10278 }, { "epoch": 33.7016393442623, "grad_norm": 4.203466892242432, "learning_rate": 1.545047149951325e-05, "loss": 0.5188, "step": 10279 }, { "epoch": 33.704918032786885, "grad_norm": 4.824878215789795, "learning_rate": 1.5449581177006843e-05, "loss": 0.321, "step": 10280 }, { "epoch": 33.708196721311474, "grad_norm": 4.792767524719238, "learning_rate": 1.544869079305074e-05, "loss": 0.4717, "step": 10281 }, { "epoch": 33.71147540983607, "grad_norm": 5.513547897338867, "learning_rate": 1.5447800347654985e-05, "loss": 0.6945, "step": 10282 }, { "epoch": 33.71475409836066, "grad_norm": 4.6984477043151855, "learning_rate": 1.5446909840829618e-05, "loss": 0.5585, "step": 10283 }, { "epoch": 33.718032786885246, "grad_norm": 5.601987838745117, "learning_rate": 1.5446019272584675e-05, "loss": 0.6839, "step": 10284 }, { "epoch": 33.721311475409834, "grad_norm": 3.702758550643921, "learning_rate": 1.5445128642930203e-05, "loss": 0.5508, "step": 10285 }, { "epoch": 33.72459016393443, "grad_norm": 6.795778751373291, "learning_rate": 1.5444237951876244e-05, "loss": 0.5348, "step": 10286 }, { "epoch": 33.72786885245902, "grad_norm": 4.798057556152344, "learning_rate": 1.5443347199432844e-05, "loss": 0.531, "step": 10287 }, { "epoch": 33.731147540983606, "grad_norm": 5.369071006774902, "learning_rate": 1.544245638561004e-05, "loss": 0.7912, "step": 10288 }, { "epoch": 33.734426229508195, "grad_norm": 4.297369480133057, "learning_rate": 1.5441565510417886e-05, "loss": 0.5616, "step": 10289 }, { "epoch": 33.73770491803279, "grad_norm": 4.9180989265441895, "learning_rate": 1.5440674573866423e-05, "loss": 0.6106, "step": 10290 }, { "epoch": 33.74098360655738, "grad_norm": 4.677528381347656, "learning_rate": 1.5439783575965695e-05, "loss": 0.6321, "step": 10291 }, { "epoch": 33.74426229508197, "grad_norm": 4.040208339691162, "learning_rate": 1.5438892516725755e-05, "loss": 0.6026, "step": 10292 }, { "epoch": 33.747540983606555, "grad_norm": 4.077701091766357, "learning_rate": 1.543800139615664e-05, "loss": 0.5703, "step": 10293 }, { "epoch": 33.75081967213115, "grad_norm": 4.578202247619629, "learning_rate": 1.543711021426841e-05, "loss": 0.3867, "step": 10294 }, { "epoch": 33.75409836065574, "grad_norm": 5.0046257972717285, "learning_rate": 1.543621897107111e-05, "loss": 0.6466, "step": 10295 }, { "epoch": 33.75737704918033, "grad_norm": 4.625925064086914, "learning_rate": 1.543532766657479e-05, "loss": 0.627, "step": 10296 }, { "epoch": 33.760655737704916, "grad_norm": 4.260404586791992, "learning_rate": 1.543443630078949e-05, "loss": 0.3691, "step": 10297 }, { "epoch": 33.76393442622951, "grad_norm": 4.561126232147217, "learning_rate": 1.543354487372528e-05, "loss": 0.5377, "step": 10298 }, { "epoch": 33.7672131147541, "grad_norm": 4.485787868499756, "learning_rate": 1.54326533853922e-05, "loss": 0.5038, "step": 10299 }, { "epoch": 33.77049180327869, "grad_norm": 7.737585544586182, "learning_rate": 1.5431761835800305e-05, "loss": 0.5643, "step": 10300 }, { "epoch": 33.773770491803276, "grad_norm": 5.528022289276123, "learning_rate": 1.543087022495964e-05, "loss": 0.5098, "step": 10301 }, { "epoch": 33.77704918032787, "grad_norm": 4.571584224700928, "learning_rate": 1.5429978552880275e-05, "loss": 0.5701, "step": 10302 }, { "epoch": 33.78032786885246, "grad_norm": 4.345888137817383, "learning_rate": 1.5429086819572254e-05, "loss": 0.4827, "step": 10303 }, { "epoch": 33.78360655737705, "grad_norm": 4.093635559082031, "learning_rate": 1.5428195025045635e-05, "loss": 0.7113, "step": 10304 }, { "epoch": 33.78688524590164, "grad_norm": 4.085801124572754, "learning_rate": 1.5427303169310474e-05, "loss": 0.4189, "step": 10305 }, { "epoch": 33.79016393442623, "grad_norm": 4.34311580657959, "learning_rate": 1.5426411252376823e-05, "loss": 0.3129, "step": 10306 }, { "epoch": 33.79344262295082, "grad_norm": 3.746328592300415, "learning_rate": 1.5425519274254745e-05, "loss": 0.7471, "step": 10307 }, { "epoch": 33.79672131147541, "grad_norm": 5.44546365737915, "learning_rate": 1.5424627234954294e-05, "loss": 0.5235, "step": 10308 }, { "epoch": 33.8, "grad_norm": 4.575127601623535, "learning_rate": 1.5423735134485537e-05, "loss": 0.4806, "step": 10309 }, { "epoch": 33.80327868852459, "grad_norm": 4.478314399719238, "learning_rate": 1.542284297285852e-05, "loss": 0.6585, "step": 10310 }, { "epoch": 33.80655737704918, "grad_norm": 4.093988418579102, "learning_rate": 1.5421950750083313e-05, "loss": 0.6261, "step": 10311 }, { "epoch": 33.80983606557377, "grad_norm": 5.783020973205566, "learning_rate": 1.5421058466169972e-05, "loss": 0.5972, "step": 10312 }, { "epoch": 33.81311475409836, "grad_norm": 5.031706809997559, "learning_rate": 1.5420166121128566e-05, "loss": 0.4499, "step": 10313 }, { "epoch": 33.81639344262295, "grad_norm": 5.309452056884766, "learning_rate": 1.5419273714969146e-05, "loss": 0.4515, "step": 10314 }, { "epoch": 33.81967213114754, "grad_norm": 5.210287570953369, "learning_rate": 1.5418381247701784e-05, "loss": 0.7567, "step": 10315 }, { "epoch": 33.82295081967213, "grad_norm": 4.30472993850708, "learning_rate": 1.5417488719336537e-05, "loss": 0.4809, "step": 10316 }, { "epoch": 33.82622950819672, "grad_norm": 5.075959205627441, "learning_rate": 1.541659612988347e-05, "loss": 0.5661, "step": 10317 }, { "epoch": 33.829508196721314, "grad_norm": 4.421672821044922, "learning_rate": 1.5415703479352655e-05, "loss": 0.4919, "step": 10318 }, { "epoch": 33.8327868852459, "grad_norm": 4.566534042358398, "learning_rate": 1.5414810767754147e-05, "loss": 0.3751, "step": 10319 }, { "epoch": 33.83606557377049, "grad_norm": 4.785824775695801, "learning_rate": 1.541391799509802e-05, "loss": 0.4692, "step": 10320 }, { "epoch": 33.83934426229508, "grad_norm": 4.299961566925049, "learning_rate": 1.541302516139434e-05, "loss": 0.525, "step": 10321 }, { "epoch": 33.842622950819674, "grad_norm": 3.776732921600342, "learning_rate": 1.5412132266653174e-05, "loss": 0.5742, "step": 10322 }, { "epoch": 33.84590163934426, "grad_norm": 4.892772674560547, "learning_rate": 1.5411239310884587e-05, "loss": 0.5421, "step": 10323 }, { "epoch": 33.84918032786885, "grad_norm": 7.193851947784424, "learning_rate": 1.541034629409865e-05, "loss": 0.8117, "step": 10324 }, { "epoch": 33.85245901639344, "grad_norm": 3.723240852355957, "learning_rate": 1.5409453216305435e-05, "loss": 0.5081, "step": 10325 }, { "epoch": 33.855737704918035, "grad_norm": 4.4143476486206055, "learning_rate": 1.5408560077515008e-05, "loss": 0.5605, "step": 10326 }, { "epoch": 33.85901639344262, "grad_norm": 4.7133870124816895, "learning_rate": 1.5407666877737443e-05, "loss": 0.4818, "step": 10327 }, { "epoch": 33.86229508196721, "grad_norm": 4.499551296234131, "learning_rate": 1.5406773616982816e-05, "loss": 0.6647, "step": 10328 }, { "epoch": 33.86557377049181, "grad_norm": 6.146030902862549, "learning_rate": 1.540588029526119e-05, "loss": 0.484, "step": 10329 }, { "epoch": 33.868852459016395, "grad_norm": 4.928830623626709, "learning_rate": 1.5404986912582646e-05, "loss": 0.3814, "step": 10330 }, { "epoch": 33.87213114754098, "grad_norm": 6.003345966339111, "learning_rate": 1.540409346895725e-05, "loss": 0.6958, "step": 10331 }, { "epoch": 33.87540983606557, "grad_norm": 4.8265533447265625, "learning_rate": 1.5403199964395087e-05, "loss": 0.6631, "step": 10332 }, { "epoch": 33.87868852459017, "grad_norm": 4.351953506469727, "learning_rate": 1.5402306398906222e-05, "loss": 0.448, "step": 10333 }, { "epoch": 33.881967213114756, "grad_norm": 5.139678478240967, "learning_rate": 1.540141277250074e-05, "loss": 0.5488, "step": 10334 }, { "epoch": 33.885245901639344, "grad_norm": 5.363748550415039, "learning_rate": 1.540051908518871e-05, "loss": 0.6997, "step": 10335 }, { "epoch": 33.88852459016393, "grad_norm": 4.173107147216797, "learning_rate": 1.5399625336980212e-05, "loss": 0.6918, "step": 10336 }, { "epoch": 33.89180327868853, "grad_norm": 4.805824279785156, "learning_rate": 1.5398731527885326e-05, "loss": 0.5914, "step": 10337 }, { "epoch": 33.895081967213116, "grad_norm": 4.743828773498535, "learning_rate": 1.5397837657914124e-05, "loss": 0.5284, "step": 10338 }, { "epoch": 33.898360655737704, "grad_norm": 4.82993745803833, "learning_rate": 1.5396943727076696e-05, "loss": 0.7312, "step": 10339 }, { "epoch": 33.90163934426229, "grad_norm": 4.356595993041992, "learning_rate": 1.5396049735383112e-05, "loss": 0.5361, "step": 10340 }, { "epoch": 33.90491803278689, "grad_norm": 4.445355415344238, "learning_rate": 1.5395155682843462e-05, "loss": 0.7054, "step": 10341 }, { "epoch": 33.90819672131148, "grad_norm": 4.551380634307861, "learning_rate": 1.5394261569467815e-05, "loss": 0.4767, "step": 10342 }, { "epoch": 33.911475409836065, "grad_norm": 5.221229076385498, "learning_rate": 1.5393367395266262e-05, "loss": 0.5808, "step": 10343 }, { "epoch": 33.91475409836065, "grad_norm": 4.849370956420898, "learning_rate": 1.539247316024889e-05, "loss": 0.5091, "step": 10344 }, { "epoch": 33.91803278688525, "grad_norm": 5.314342021942139, "learning_rate": 1.5391578864425773e-05, "loss": 0.3832, "step": 10345 }, { "epoch": 33.92131147540984, "grad_norm": 4.799633979797363, "learning_rate": 1.5390684507806993e-05, "loss": 0.4711, "step": 10346 }, { "epoch": 33.924590163934425, "grad_norm": 5.105965614318848, "learning_rate": 1.5389790090402646e-05, "loss": 0.4709, "step": 10347 }, { "epoch": 33.927868852459014, "grad_norm": 4.7777204513549805, "learning_rate": 1.538889561222281e-05, "loss": 0.4967, "step": 10348 }, { "epoch": 33.93114754098361, "grad_norm": 5.165247917175293, "learning_rate": 1.5388001073277574e-05, "loss": 0.3171, "step": 10349 }, { "epoch": 33.9344262295082, "grad_norm": 4.0647125244140625, "learning_rate": 1.5387106473577022e-05, "loss": 0.2559, "step": 10350 }, { "epoch": 33.937704918032786, "grad_norm": 4.611153602600098, "learning_rate": 1.5386211813131245e-05, "loss": 0.3367, "step": 10351 }, { "epoch": 33.940983606557374, "grad_norm": 23.33939552307129, "learning_rate": 1.538531709195033e-05, "loss": 0.5141, "step": 10352 }, { "epoch": 33.94426229508197, "grad_norm": 5.749543190002441, "learning_rate": 1.538442231004436e-05, "loss": 0.7611, "step": 10353 }, { "epoch": 33.94754098360656, "grad_norm": 5.706076145172119, "learning_rate": 1.538352746742344e-05, "loss": 0.6659, "step": 10354 }, { "epoch": 33.950819672131146, "grad_norm": 4.896246433258057, "learning_rate": 1.538263256409764e-05, "loss": 0.5195, "step": 10355 }, { "epoch": 33.954098360655735, "grad_norm": 6.306041717529297, "learning_rate": 1.5381737600077066e-05, "loss": 0.6867, "step": 10356 }, { "epoch": 33.95737704918033, "grad_norm": 6.617498874664307, "learning_rate": 1.5380842575371807e-05, "loss": 0.6426, "step": 10357 }, { "epoch": 33.96065573770492, "grad_norm": 4.743381977081299, "learning_rate": 1.537994748999195e-05, "loss": 0.5803, "step": 10358 }, { "epoch": 33.96393442622951, "grad_norm": 4.989950656890869, "learning_rate": 1.5379052343947596e-05, "loss": 0.5108, "step": 10359 }, { "epoch": 33.967213114754095, "grad_norm": 5.750900745391846, "learning_rate": 1.5378157137248828e-05, "loss": 0.8147, "step": 10360 }, { "epoch": 33.97049180327869, "grad_norm": 6.065168380737305, "learning_rate": 1.537726186990575e-05, "loss": 0.6624, "step": 10361 }, { "epoch": 33.97377049180328, "grad_norm": 4.4276862144470215, "learning_rate": 1.5376366541928455e-05, "loss": 0.5649, "step": 10362 }, { "epoch": 33.97704918032787, "grad_norm": 5.415041923522949, "learning_rate": 1.5375471153327034e-05, "loss": 0.2767, "step": 10363 }, { "epoch": 33.980327868852456, "grad_norm": 4.589475154876709, "learning_rate": 1.5374575704111586e-05, "loss": 0.7846, "step": 10364 }, { "epoch": 33.98360655737705, "grad_norm": 6.6050238609313965, "learning_rate": 1.5373680194292208e-05, "loss": 0.5067, "step": 10365 }, { "epoch": 33.98688524590164, "grad_norm": 4.61708402633667, "learning_rate": 1.5372784623879003e-05, "loss": 0.3824, "step": 10366 }, { "epoch": 33.99016393442623, "grad_norm": 4.589280605316162, "learning_rate": 1.537188899288206e-05, "loss": 0.4965, "step": 10367 }, { "epoch": 33.993442622950816, "grad_norm": 4.647453784942627, "learning_rate": 1.5370993301311486e-05, "loss": 0.5338, "step": 10368 }, { "epoch": 33.99672131147541, "grad_norm": 4.244741439819336, "learning_rate": 1.537009754917738e-05, "loss": 0.5725, "step": 10369 }, { "epoch": 34.0, "grad_norm": 4.489151954650879, "learning_rate": 1.536920173648984e-05, "loss": 0.4305, "step": 10370 }, { "epoch": 34.00327868852459, "grad_norm": 4.775716304779053, "learning_rate": 1.5368305863258965e-05, "loss": 0.4079, "step": 10371 }, { "epoch": 34.006557377049184, "grad_norm": 3.8873727321624756, "learning_rate": 1.5367409929494863e-05, "loss": 0.5906, "step": 10372 }, { "epoch": 34.00983606557377, "grad_norm": 4.353804588317871, "learning_rate": 1.5366513935207632e-05, "loss": 0.6139, "step": 10373 }, { "epoch": 34.01311475409836, "grad_norm": 4.4615702629089355, "learning_rate": 1.5365617880407377e-05, "loss": 0.6585, "step": 10374 }, { "epoch": 34.01639344262295, "grad_norm": 4.185202121734619, "learning_rate": 1.53647217651042e-05, "loss": 0.4306, "step": 10375 }, { "epoch": 34.019672131147544, "grad_norm": 3.7652828693389893, "learning_rate": 1.5363825589308206e-05, "loss": 0.5815, "step": 10376 }, { "epoch": 34.02295081967213, "grad_norm": 4.063276290893555, "learning_rate": 1.5362929353029506e-05, "loss": 0.3429, "step": 10377 }, { "epoch": 34.02622950819672, "grad_norm": 3.6981735229492188, "learning_rate": 1.5362033056278197e-05, "loss": 0.5748, "step": 10378 }, { "epoch": 34.02950819672131, "grad_norm": 5.177964210510254, "learning_rate": 1.5361136699064392e-05, "loss": 0.7491, "step": 10379 }, { "epoch": 34.032786885245905, "grad_norm": 5.050268173217773, "learning_rate": 1.5360240281398198e-05, "loss": 0.4596, "step": 10380 }, { "epoch": 34.03606557377049, "grad_norm": 7.975889682769775, "learning_rate": 1.5359343803289718e-05, "loss": 0.4525, "step": 10381 }, { "epoch": 34.03934426229508, "grad_norm": 4.521063804626465, "learning_rate": 1.535844726474907e-05, "loss": 0.4903, "step": 10382 }, { "epoch": 34.04262295081967, "grad_norm": 10.171436309814453, "learning_rate": 1.5357550665786355e-05, "loss": 0.455, "step": 10383 }, { "epoch": 34.045901639344265, "grad_norm": 5.09520959854126, "learning_rate": 1.5356654006411683e-05, "loss": 0.5273, "step": 10384 }, { "epoch": 34.049180327868854, "grad_norm": 3.835533380508423, "learning_rate": 1.5355757286635172e-05, "loss": 0.5245, "step": 10385 }, { "epoch": 34.05245901639344, "grad_norm": 3.903134822845459, "learning_rate": 1.5354860506466923e-05, "loss": 0.4564, "step": 10386 }, { "epoch": 34.05573770491803, "grad_norm": 4.332409858703613, "learning_rate": 1.535396366591706e-05, "loss": 0.5859, "step": 10387 }, { "epoch": 34.059016393442626, "grad_norm": 4.584662914276123, "learning_rate": 1.5353066764995686e-05, "loss": 0.674, "step": 10388 }, { "epoch": 34.062295081967214, "grad_norm": 4.463944911956787, "learning_rate": 1.535216980371292e-05, "loss": 0.4416, "step": 10389 }, { "epoch": 34.0655737704918, "grad_norm": 4.636621475219727, "learning_rate": 1.5351272782078876e-05, "loss": 0.768, "step": 10390 }, { "epoch": 34.06885245901639, "grad_norm": 4.086310386657715, "learning_rate": 1.5350375700103664e-05, "loss": 0.4386, "step": 10391 }, { "epoch": 34.072131147540986, "grad_norm": 4.338959693908691, "learning_rate": 1.534947855779741e-05, "loss": 0.4554, "step": 10392 }, { "epoch": 34.075409836065575, "grad_norm": 5.656444549560547, "learning_rate": 1.5348581355170217e-05, "loss": 0.4234, "step": 10393 }, { "epoch": 34.07868852459016, "grad_norm": 5.561027526855469, "learning_rate": 1.534768409223221e-05, "loss": 0.5065, "step": 10394 }, { "epoch": 34.08196721311475, "grad_norm": 5.3045501708984375, "learning_rate": 1.5346786768993503e-05, "loss": 0.6222, "step": 10395 }, { "epoch": 34.08524590163935, "grad_norm": 4.2885870933532715, "learning_rate": 1.5345889385464218e-05, "loss": 0.6893, "step": 10396 }, { "epoch": 34.088524590163935, "grad_norm": 4.849600791931152, "learning_rate": 1.534499194165447e-05, "loss": 0.4291, "step": 10397 }, { "epoch": 34.09180327868852, "grad_norm": 5.431268215179443, "learning_rate": 1.5344094437574383e-05, "loss": 0.412, "step": 10398 }, { "epoch": 34.09508196721311, "grad_norm": 6.403240203857422, "learning_rate": 1.5343196873234073e-05, "loss": 0.4712, "step": 10399 }, { "epoch": 34.09836065573771, "grad_norm": 5.229787826538086, "learning_rate": 1.5342299248643663e-05, "loss": 0.4748, "step": 10400 }, { "epoch": 34.101639344262296, "grad_norm": 4.156607627868652, "learning_rate": 1.5341401563813273e-05, "loss": 0.3398, "step": 10401 }, { "epoch": 34.104918032786884, "grad_norm": 5.946981906890869, "learning_rate": 1.534050381875303e-05, "loss": 0.553, "step": 10402 }, { "epoch": 34.10819672131147, "grad_norm": 4.5711822509765625, "learning_rate": 1.533960601347305e-05, "loss": 0.4066, "step": 10403 }, { "epoch": 34.11147540983607, "grad_norm": 4.282895088195801, "learning_rate": 1.533870814798346e-05, "loss": 0.4912, "step": 10404 }, { "epoch": 34.114754098360656, "grad_norm": 4.323729991912842, "learning_rate": 1.533781022229439e-05, "loss": 0.4226, "step": 10405 }, { "epoch": 34.118032786885244, "grad_norm": 4.733475208282471, "learning_rate": 1.5336912236415954e-05, "loss": 0.446, "step": 10406 }, { "epoch": 34.12131147540983, "grad_norm": 4.865721702575684, "learning_rate": 1.5336014190358287e-05, "loss": 0.7466, "step": 10407 }, { "epoch": 34.12459016393443, "grad_norm": 5.208970546722412, "learning_rate": 1.5335116084131512e-05, "loss": 0.4708, "step": 10408 }, { "epoch": 34.12786885245902, "grad_norm": 6.443398475646973, "learning_rate": 1.5334217917745754e-05, "loss": 0.4713, "step": 10409 }, { "epoch": 34.131147540983605, "grad_norm": 4.083374500274658, "learning_rate": 1.5333319691211143e-05, "loss": 0.4335, "step": 10410 }, { "epoch": 34.13442622950819, "grad_norm": 4.905692100524902, "learning_rate": 1.5332421404537813e-05, "loss": 0.8251, "step": 10411 }, { "epoch": 34.13770491803279, "grad_norm": 4.621713638305664, "learning_rate": 1.5331523057735877e-05, "loss": 0.6078, "step": 10412 }, { "epoch": 34.14098360655738, "grad_norm": 4.291578769683838, "learning_rate": 1.5330624650815482e-05, "loss": 0.4002, "step": 10413 }, { "epoch": 34.144262295081965, "grad_norm": 6.20152473449707, "learning_rate": 1.532972618378675e-05, "loss": 0.4318, "step": 10414 }, { "epoch": 34.14754098360656, "grad_norm": 4.077728271484375, "learning_rate": 1.5328827656659818e-05, "loss": 0.2627, "step": 10415 }, { "epoch": 34.15081967213115, "grad_norm": 4.66466760635376, "learning_rate": 1.5327929069444808e-05, "loss": 0.5346, "step": 10416 }, { "epoch": 34.15409836065574, "grad_norm": 4.870886325836182, "learning_rate": 1.532703042215186e-05, "loss": 0.9031, "step": 10417 }, { "epoch": 34.157377049180326, "grad_norm": 5.0361175537109375, "learning_rate": 1.53261317147911e-05, "loss": 0.5193, "step": 10418 }, { "epoch": 34.16065573770492, "grad_norm": 4.814099311828613, "learning_rate": 1.5325232947372675e-05, "loss": 0.5029, "step": 10419 }, { "epoch": 34.16393442622951, "grad_norm": 5.508472442626953, "learning_rate": 1.5324334119906707e-05, "loss": 0.4187, "step": 10420 }, { "epoch": 34.1672131147541, "grad_norm": 4.426412582397461, "learning_rate": 1.532343523240334e-05, "loss": 0.6177, "step": 10421 }, { "epoch": 34.170491803278686, "grad_norm": 6.089330673217773, "learning_rate": 1.5322536284872704e-05, "loss": 0.4445, "step": 10422 }, { "epoch": 34.17377049180328, "grad_norm": 5.383927345275879, "learning_rate": 1.5321637277324937e-05, "loss": 0.6536, "step": 10423 }, { "epoch": 34.17704918032787, "grad_norm": 4.283030033111572, "learning_rate": 1.5320738209770177e-05, "loss": 0.4521, "step": 10424 }, { "epoch": 34.18032786885246, "grad_norm": 4.818795204162598, "learning_rate": 1.531983908221856e-05, "loss": 0.4237, "step": 10425 }, { "epoch": 34.18360655737705, "grad_norm": 4.245959758758545, "learning_rate": 1.5318939894680228e-05, "loss": 0.6614, "step": 10426 }, { "epoch": 34.18688524590164, "grad_norm": 4.697752952575684, "learning_rate": 1.5318040647165316e-05, "loss": 0.4142, "step": 10427 }, { "epoch": 34.19016393442623, "grad_norm": 4.4950175285339355, "learning_rate": 1.5317141339683968e-05, "loss": 0.5621, "step": 10428 }, { "epoch": 34.19344262295082, "grad_norm": 4.623659610748291, "learning_rate": 1.531624197224632e-05, "loss": 0.403, "step": 10429 }, { "epoch": 34.19672131147541, "grad_norm": 4.6496968269348145, "learning_rate": 1.531534254486252e-05, "loss": 0.6298, "step": 10430 }, { "epoch": 34.2, "grad_norm": 4.737113952636719, "learning_rate": 1.5314443057542703e-05, "loss": 0.4233, "step": 10431 }, { "epoch": 34.20327868852459, "grad_norm": 5.417752742767334, "learning_rate": 1.5313543510297018e-05, "loss": 0.3262, "step": 10432 }, { "epoch": 34.20655737704918, "grad_norm": 5.304774284362793, "learning_rate": 1.5312643903135606e-05, "loss": 0.7207, "step": 10433 }, { "epoch": 34.20983606557377, "grad_norm": 4.180652618408203, "learning_rate": 1.5311744236068606e-05, "loss": 0.4248, "step": 10434 }, { "epoch": 34.21311475409836, "grad_norm": 3.793886423110962, "learning_rate": 1.531084450910617e-05, "loss": 0.5276, "step": 10435 }, { "epoch": 34.21639344262295, "grad_norm": 4.993895530700684, "learning_rate": 1.5309944722258442e-05, "loss": 0.6849, "step": 10436 }, { "epoch": 34.21967213114754, "grad_norm": 5.526662349700928, "learning_rate": 1.5309044875535564e-05, "loss": 0.6793, "step": 10437 }, { "epoch": 34.22295081967213, "grad_norm": 4.013091564178467, "learning_rate": 1.5308144968947684e-05, "loss": 0.4939, "step": 10438 }, { "epoch": 34.226229508196724, "grad_norm": 4.647974014282227, "learning_rate": 1.5307245002504952e-05, "loss": 0.5109, "step": 10439 }, { "epoch": 34.22950819672131, "grad_norm": 4.769322395324707, "learning_rate": 1.530634497621751e-05, "loss": 0.3491, "step": 10440 }, { "epoch": 34.2327868852459, "grad_norm": 5.2151198387146, "learning_rate": 1.5305444890095514e-05, "loss": 0.3085, "step": 10441 }, { "epoch": 34.23606557377049, "grad_norm": 4.057600975036621, "learning_rate": 1.5304544744149113e-05, "loss": 0.5864, "step": 10442 }, { "epoch": 34.239344262295084, "grad_norm": 5.582568168640137, "learning_rate": 1.5303644538388453e-05, "loss": 0.3999, "step": 10443 }, { "epoch": 34.24262295081967, "grad_norm": 4.306905269622803, "learning_rate": 1.5302744272823687e-05, "loss": 0.6045, "step": 10444 }, { "epoch": 34.24590163934426, "grad_norm": 4.475961208343506, "learning_rate": 1.5301843947464967e-05, "loss": 0.4301, "step": 10445 }, { "epoch": 34.24918032786885, "grad_norm": 4.112032890319824, "learning_rate": 1.530094356232244e-05, "loss": 0.3655, "step": 10446 }, { "epoch": 34.252459016393445, "grad_norm": 4.981161117553711, "learning_rate": 1.5300043117406268e-05, "loss": 0.6949, "step": 10447 }, { "epoch": 34.25573770491803, "grad_norm": 4.148924350738525, "learning_rate": 1.5299142612726597e-05, "loss": 0.487, "step": 10448 }, { "epoch": 34.25901639344262, "grad_norm": 3.9689478874206543, "learning_rate": 1.529824204829358e-05, "loss": 0.2604, "step": 10449 }, { "epoch": 34.26229508196721, "grad_norm": 3.6914169788360596, "learning_rate": 1.529734142411738e-05, "loss": 0.5804, "step": 10450 }, { "epoch": 34.265573770491805, "grad_norm": 4.3460516929626465, "learning_rate": 1.529644074020814e-05, "loss": 0.4582, "step": 10451 }, { "epoch": 34.268852459016394, "grad_norm": 5.016635417938232, "learning_rate": 1.5295539996576034e-05, "loss": 0.4289, "step": 10452 }, { "epoch": 34.27213114754098, "grad_norm": 5.538448333740234, "learning_rate": 1.52946391932312e-05, "loss": 0.4863, "step": 10453 }, { "epoch": 34.27540983606557, "grad_norm": 4.160910129547119, "learning_rate": 1.529373833018381e-05, "loss": 0.4821, "step": 10454 }, { "epoch": 34.278688524590166, "grad_norm": 4.598090648651123, "learning_rate": 1.5292837407444015e-05, "loss": 0.3945, "step": 10455 }, { "epoch": 34.281967213114754, "grad_norm": 4.347146034240723, "learning_rate": 1.5291936425021973e-05, "loss": 0.5693, "step": 10456 }, { "epoch": 34.28524590163934, "grad_norm": 4.240715503692627, "learning_rate": 1.5291035382927846e-05, "loss": 0.4791, "step": 10457 }, { "epoch": 34.28852459016394, "grad_norm": 6.236790657043457, "learning_rate": 1.5290134281171795e-05, "loss": 0.5756, "step": 10458 }, { "epoch": 34.291803278688526, "grad_norm": 5.181521892547607, "learning_rate": 1.5289233119763977e-05, "loss": 0.6617, "step": 10459 }, { "epoch": 34.295081967213115, "grad_norm": 4.424931049346924, "learning_rate": 1.5288331898714556e-05, "loss": 0.4568, "step": 10460 }, { "epoch": 34.2983606557377, "grad_norm": 4.238801956176758, "learning_rate": 1.52874306180337e-05, "loss": 0.4794, "step": 10461 }, { "epoch": 34.3016393442623, "grad_norm": 5.6630024909973145, "learning_rate": 1.5286529277731562e-05, "loss": 0.478, "step": 10462 }, { "epoch": 34.30491803278689, "grad_norm": 4.064426422119141, "learning_rate": 1.528562787781831e-05, "loss": 0.5111, "step": 10463 }, { "epoch": 34.308196721311475, "grad_norm": 3.44760799407959, "learning_rate": 1.5284726418304108e-05, "loss": 0.5142, "step": 10464 }, { "epoch": 34.31147540983606, "grad_norm": 4.409022808074951, "learning_rate": 1.528382489919912e-05, "loss": 0.3621, "step": 10465 }, { "epoch": 34.31475409836066, "grad_norm": 4.281805992126465, "learning_rate": 1.5282923320513515e-05, "loss": 0.7743, "step": 10466 }, { "epoch": 34.31803278688525, "grad_norm": 4.312090873718262, "learning_rate": 1.5282021682257457e-05, "loss": 0.534, "step": 10467 }, { "epoch": 34.321311475409836, "grad_norm": 4.709609031677246, "learning_rate": 1.528111998444111e-05, "loss": 0.4246, "step": 10468 }, { "epoch": 34.324590163934424, "grad_norm": 3.6267287731170654, "learning_rate": 1.5280218227074645e-05, "loss": 0.4068, "step": 10469 }, { "epoch": 34.32786885245902, "grad_norm": 4.787257194519043, "learning_rate": 1.527931641016823e-05, "loss": 0.564, "step": 10470 }, { "epoch": 34.33114754098361, "grad_norm": 4.644695281982422, "learning_rate": 1.5278414533732032e-05, "loss": 0.5092, "step": 10471 }, { "epoch": 34.334426229508196, "grad_norm": 5.0089850425720215, "learning_rate": 1.5277512597776227e-05, "loss": 0.5314, "step": 10472 }, { "epoch": 34.337704918032784, "grad_norm": 3.8837108612060547, "learning_rate": 1.5276610602310973e-05, "loss": 0.5781, "step": 10473 }, { "epoch": 34.34098360655738, "grad_norm": 4.466926574707031, "learning_rate": 1.527570854734645e-05, "loss": 0.4259, "step": 10474 }, { "epoch": 34.34426229508197, "grad_norm": 5.367173671722412, "learning_rate": 1.527480643289283e-05, "loss": 0.4117, "step": 10475 }, { "epoch": 34.34754098360656, "grad_norm": 5.014766216278076, "learning_rate": 1.5273904258960283e-05, "loss": 0.46, "step": 10476 }, { "epoch": 34.350819672131145, "grad_norm": 4.474450588226318, "learning_rate": 1.5273002025558982e-05, "loss": 0.5344, "step": 10477 }, { "epoch": 34.35409836065574, "grad_norm": 4.058294773101807, "learning_rate": 1.5272099732699098e-05, "loss": 0.4969, "step": 10478 }, { "epoch": 34.35737704918033, "grad_norm": 4.155172824859619, "learning_rate": 1.527119738039081e-05, "loss": 0.6342, "step": 10479 }, { "epoch": 34.36065573770492, "grad_norm": 5.493752479553223, "learning_rate": 1.5270294968644292e-05, "loss": 0.5594, "step": 10480 }, { "epoch": 34.363934426229505, "grad_norm": 4.240842819213867, "learning_rate": 1.526939249746972e-05, "loss": 0.5653, "step": 10481 }, { "epoch": 34.3672131147541, "grad_norm": 4.487255096435547, "learning_rate": 1.5268489966877265e-05, "loss": 0.5024, "step": 10482 }, { "epoch": 34.37049180327869, "grad_norm": 4.493730545043945, "learning_rate": 1.5267587376877105e-05, "loss": 0.5001, "step": 10483 }, { "epoch": 34.37377049180328, "grad_norm": 3.540287733078003, "learning_rate": 1.5266684727479427e-05, "loss": 0.5022, "step": 10484 }, { "epoch": 34.377049180327866, "grad_norm": 4.769618034362793, "learning_rate": 1.5265782018694396e-05, "loss": 0.5409, "step": 10485 }, { "epoch": 34.38032786885246, "grad_norm": 4.2937846183776855, "learning_rate": 1.5264879250532203e-05, "loss": 0.5821, "step": 10486 }, { "epoch": 34.38360655737705, "grad_norm": 4.3235764503479, "learning_rate": 1.5263976423003022e-05, "loss": 0.6051, "step": 10487 }, { "epoch": 34.38688524590164, "grad_norm": 4.858687877655029, "learning_rate": 1.526307353611703e-05, "loss": 0.3235, "step": 10488 }, { "epoch": 34.390163934426226, "grad_norm": 4.888407230377197, "learning_rate": 1.5262170589884416e-05, "loss": 0.5952, "step": 10489 }, { "epoch": 34.39344262295082, "grad_norm": 4.694697856903076, "learning_rate": 1.5261267584315354e-05, "loss": 0.6962, "step": 10490 }, { "epoch": 34.39672131147541, "grad_norm": 4.052370548248291, "learning_rate": 1.526036451942003e-05, "loss": 0.6547, "step": 10491 }, { "epoch": 34.4, "grad_norm": 4.885592937469482, "learning_rate": 1.5259461395208628e-05, "loss": 0.4226, "step": 10492 }, { "epoch": 34.40327868852459, "grad_norm": 4.474143028259277, "learning_rate": 1.5258558211691331e-05, "loss": 0.5457, "step": 10493 }, { "epoch": 34.40655737704918, "grad_norm": 4.663239002227783, "learning_rate": 1.5257654968878322e-05, "loss": 0.423, "step": 10494 }, { "epoch": 34.40983606557377, "grad_norm": 4.026709079742432, "learning_rate": 1.5256751666779788e-05, "loss": 0.505, "step": 10495 }, { "epoch": 34.41311475409836, "grad_norm": 4.372245788574219, "learning_rate": 1.525584830540591e-05, "loss": 0.4166, "step": 10496 }, { "epoch": 34.41639344262295, "grad_norm": 4.899763584136963, "learning_rate": 1.5254944884766878e-05, "loss": 0.4998, "step": 10497 }, { "epoch": 34.41967213114754, "grad_norm": 4.289584159851074, "learning_rate": 1.525404140487288e-05, "loss": 0.4583, "step": 10498 }, { "epoch": 34.42295081967213, "grad_norm": 4.472045421600342, "learning_rate": 1.5253137865734102e-05, "loss": 0.5805, "step": 10499 }, { "epoch": 34.42622950819672, "grad_norm": 4.531060695648193, "learning_rate": 1.5252234267360735e-05, "loss": 0.5591, "step": 10500 }, { "epoch": 34.429508196721315, "grad_norm": 3.7745139598846436, "learning_rate": 1.5251330609762962e-05, "loss": 0.6141, "step": 10501 }, { "epoch": 34.4327868852459, "grad_norm": 4.6453046798706055, "learning_rate": 1.525042689295098e-05, "loss": 0.6752, "step": 10502 }, { "epoch": 34.43606557377049, "grad_norm": 5.461554050445557, "learning_rate": 1.5249523116934972e-05, "loss": 0.6707, "step": 10503 }, { "epoch": 34.43934426229508, "grad_norm": 4.161657333374023, "learning_rate": 1.5248619281725137e-05, "loss": 0.4527, "step": 10504 }, { "epoch": 34.442622950819676, "grad_norm": 4.1114397048950195, "learning_rate": 1.5247715387331657e-05, "loss": 0.4824, "step": 10505 }, { "epoch": 34.445901639344264, "grad_norm": 4.919296741485596, "learning_rate": 1.5246811433764732e-05, "loss": 0.5804, "step": 10506 }, { "epoch": 34.44918032786885, "grad_norm": 5.559308052062988, "learning_rate": 1.5245907421034554e-05, "loss": 0.4657, "step": 10507 }, { "epoch": 34.45245901639344, "grad_norm": 4.652949333190918, "learning_rate": 1.5245003349151315e-05, "loss": 0.4159, "step": 10508 }, { "epoch": 34.455737704918036, "grad_norm": 4.523249626159668, "learning_rate": 1.524409921812521e-05, "loss": 0.7626, "step": 10509 }, { "epoch": 34.459016393442624, "grad_norm": 5.609543800354004, "learning_rate": 1.5243195027966434e-05, "loss": 0.4557, "step": 10510 }, { "epoch": 34.46229508196721, "grad_norm": 4.282262802124023, "learning_rate": 1.524229077868518e-05, "loss": 0.4976, "step": 10511 }, { "epoch": 34.4655737704918, "grad_norm": 4.26815938949585, "learning_rate": 1.5241386470291648e-05, "loss": 0.5695, "step": 10512 }, { "epoch": 34.4688524590164, "grad_norm": 4.75238561630249, "learning_rate": 1.5240482102796037e-05, "loss": 0.5046, "step": 10513 }, { "epoch": 34.472131147540985, "grad_norm": 4.407052516937256, "learning_rate": 1.5239577676208537e-05, "loss": 0.3014, "step": 10514 }, { "epoch": 34.47540983606557, "grad_norm": 5.446534156799316, "learning_rate": 1.5238673190539355e-05, "loss": 0.6682, "step": 10515 }, { "epoch": 34.47868852459016, "grad_norm": 4.342656135559082, "learning_rate": 1.523776864579868e-05, "loss": 0.4164, "step": 10516 }, { "epoch": 34.48196721311476, "grad_norm": 4.706861972808838, "learning_rate": 1.5236864041996723e-05, "loss": 0.4651, "step": 10517 }, { "epoch": 34.485245901639345, "grad_norm": 4.648932933807373, "learning_rate": 1.523595937914368e-05, "loss": 0.4099, "step": 10518 }, { "epoch": 34.488524590163934, "grad_norm": 5.418273448944092, "learning_rate": 1.5235054657249749e-05, "loss": 0.5859, "step": 10519 }, { "epoch": 34.49180327868852, "grad_norm": 4.09972620010376, "learning_rate": 1.523414987632513e-05, "loss": 0.6127, "step": 10520 }, { "epoch": 34.49508196721312, "grad_norm": 3.9785714149475098, "learning_rate": 1.5233245036380033e-05, "loss": 0.5338, "step": 10521 }, { "epoch": 34.498360655737706, "grad_norm": 4.091445446014404, "learning_rate": 1.5232340137424654e-05, "loss": 0.6968, "step": 10522 }, { "epoch": 34.501639344262294, "grad_norm": 3.912609338760376, "learning_rate": 1.5231435179469202e-05, "loss": 0.4844, "step": 10523 }, { "epoch": 34.50491803278688, "grad_norm": 4.722777843475342, "learning_rate": 1.5230530162523882e-05, "loss": 0.5182, "step": 10524 }, { "epoch": 34.50819672131148, "grad_norm": 4.950273036956787, "learning_rate": 1.522962508659889e-05, "loss": 0.4438, "step": 10525 }, { "epoch": 34.511475409836066, "grad_norm": 4.056789398193359, "learning_rate": 1.5228719951704443e-05, "loss": 0.4475, "step": 10526 }, { "epoch": 34.514754098360655, "grad_norm": 4.087510108947754, "learning_rate": 1.5227814757850739e-05, "loss": 0.6575, "step": 10527 }, { "epoch": 34.51803278688524, "grad_norm": 4.307493686676025, "learning_rate": 1.5226909505047989e-05, "loss": 0.5056, "step": 10528 }, { "epoch": 34.52131147540984, "grad_norm": 4.332648754119873, "learning_rate": 1.5226004193306399e-05, "loss": 0.383, "step": 10529 }, { "epoch": 34.52459016393443, "grad_norm": 4.610828876495361, "learning_rate": 1.522509882263618e-05, "loss": 0.4365, "step": 10530 }, { "epoch": 34.527868852459015, "grad_norm": 3.936922550201416, "learning_rate": 1.5224193393047535e-05, "loss": 0.4463, "step": 10531 }, { "epoch": 34.5311475409836, "grad_norm": 6.410731792449951, "learning_rate": 1.522328790455068e-05, "loss": 0.6159, "step": 10532 }, { "epoch": 34.5344262295082, "grad_norm": 4.239925861358643, "learning_rate": 1.5222382357155826e-05, "loss": 0.4739, "step": 10533 }, { "epoch": 34.53770491803279, "grad_norm": 4.799026966094971, "learning_rate": 1.5221476750873177e-05, "loss": 0.4781, "step": 10534 }, { "epoch": 34.540983606557376, "grad_norm": 3.9651401042938232, "learning_rate": 1.522057108571295e-05, "loss": 0.6014, "step": 10535 }, { "epoch": 34.544262295081964, "grad_norm": 4.156039237976074, "learning_rate": 1.5219665361685356e-05, "loss": 0.6374, "step": 10536 }, { "epoch": 34.54754098360656, "grad_norm": 4.700117111206055, "learning_rate": 1.5218759578800608e-05, "loss": 0.3844, "step": 10537 }, { "epoch": 34.55081967213115, "grad_norm": 4.872420310974121, "learning_rate": 1.521785373706892e-05, "loss": 0.6361, "step": 10538 }, { "epoch": 34.554098360655736, "grad_norm": 4.0718560218811035, "learning_rate": 1.5216947836500504e-05, "loss": 0.3442, "step": 10539 }, { "epoch": 34.557377049180324, "grad_norm": 4.4239091873168945, "learning_rate": 1.5216041877105577e-05, "loss": 0.5512, "step": 10540 }, { "epoch": 34.56065573770492, "grad_norm": 4.338921070098877, "learning_rate": 1.5215135858894355e-05, "loss": 0.4002, "step": 10541 }, { "epoch": 34.56393442622951, "grad_norm": 4.349797248840332, "learning_rate": 1.5214229781877054e-05, "loss": 0.4576, "step": 10542 }, { "epoch": 34.5672131147541, "grad_norm": 4.367559909820557, "learning_rate": 1.5213323646063893e-05, "loss": 0.5852, "step": 10543 }, { "epoch": 34.570491803278685, "grad_norm": 5.147790431976318, "learning_rate": 1.5212417451465085e-05, "loss": 0.5272, "step": 10544 }, { "epoch": 34.57377049180328, "grad_norm": 4.93145751953125, "learning_rate": 1.5211511198090852e-05, "loss": 0.5194, "step": 10545 }, { "epoch": 34.57704918032787, "grad_norm": 3.6191139221191406, "learning_rate": 1.521060488595141e-05, "loss": 0.4113, "step": 10546 }, { "epoch": 34.58032786885246, "grad_norm": 4.269949436187744, "learning_rate": 1.5209698515056982e-05, "loss": 0.5703, "step": 10547 }, { "epoch": 34.58360655737705, "grad_norm": 3.770226240158081, "learning_rate": 1.5208792085417785e-05, "loss": 0.5815, "step": 10548 }, { "epoch": 34.58688524590164, "grad_norm": 4.036175727844238, "learning_rate": 1.5207885597044044e-05, "loss": 0.6028, "step": 10549 }, { "epoch": 34.59016393442623, "grad_norm": 4.199341773986816, "learning_rate": 1.5206979049945977e-05, "loss": 0.4693, "step": 10550 }, { "epoch": 34.59344262295082, "grad_norm": 4.108384132385254, "learning_rate": 1.5206072444133804e-05, "loss": 0.438, "step": 10551 }, { "epoch": 34.59672131147541, "grad_norm": 4.161809921264648, "learning_rate": 1.5205165779617755e-05, "loss": 0.6387, "step": 10552 }, { "epoch": 34.6, "grad_norm": 4.4617085456848145, "learning_rate": 1.5204259056408046e-05, "loss": 0.5198, "step": 10553 }, { "epoch": 34.60327868852459, "grad_norm": 4.683351516723633, "learning_rate": 1.5203352274514909e-05, "loss": 0.4896, "step": 10554 }, { "epoch": 34.60655737704918, "grad_norm": 3.855102777481079, "learning_rate": 1.5202445433948562e-05, "loss": 0.3525, "step": 10555 }, { "epoch": 34.609836065573774, "grad_norm": 4.954136848449707, "learning_rate": 1.5201538534719235e-05, "loss": 0.759, "step": 10556 }, { "epoch": 34.61311475409836, "grad_norm": 3.6758153438568115, "learning_rate": 1.5200631576837155e-05, "loss": 0.5253, "step": 10557 }, { "epoch": 34.61639344262295, "grad_norm": 4.29384708404541, "learning_rate": 1.5199724560312544e-05, "loss": 0.5617, "step": 10558 }, { "epoch": 34.61967213114754, "grad_norm": 4.34787654876709, "learning_rate": 1.5198817485155633e-05, "loss": 0.6339, "step": 10559 }, { "epoch": 34.622950819672134, "grad_norm": 4.534114837646484, "learning_rate": 1.519791035137665e-05, "loss": 0.4453, "step": 10560 }, { "epoch": 34.62622950819672, "grad_norm": 4.496253490447998, "learning_rate": 1.5197003158985821e-05, "loss": 0.7847, "step": 10561 }, { "epoch": 34.62950819672131, "grad_norm": 4.878684997558594, "learning_rate": 1.519609590799338e-05, "loss": 0.3664, "step": 10562 }, { "epoch": 34.6327868852459, "grad_norm": 3.7311408519744873, "learning_rate": 1.5195188598409553e-05, "loss": 0.4307, "step": 10563 }, { "epoch": 34.636065573770495, "grad_norm": 4.480340003967285, "learning_rate": 1.5194281230244576e-05, "loss": 0.5413, "step": 10564 }, { "epoch": 34.63934426229508, "grad_norm": 4.857325077056885, "learning_rate": 1.5193373803508675e-05, "loss": 0.4742, "step": 10565 }, { "epoch": 34.64262295081967, "grad_norm": 4.757161617279053, "learning_rate": 1.5192466318212087e-05, "loss": 0.512, "step": 10566 }, { "epoch": 34.64590163934426, "grad_norm": 4.254969120025635, "learning_rate": 1.519155877436504e-05, "loss": 0.5639, "step": 10567 }, { "epoch": 34.649180327868855, "grad_norm": 4.164283752441406, "learning_rate": 1.5190651171977768e-05, "loss": 0.6906, "step": 10568 }, { "epoch": 34.65245901639344, "grad_norm": 4.145966053009033, "learning_rate": 1.518974351106051e-05, "loss": 0.537, "step": 10569 }, { "epoch": 34.65573770491803, "grad_norm": 4.32603120803833, "learning_rate": 1.51888357916235e-05, "loss": 0.6224, "step": 10570 }, { "epoch": 34.65901639344262, "grad_norm": 4.152637004852295, "learning_rate": 1.5187928013676969e-05, "loss": 0.4426, "step": 10571 }, { "epoch": 34.662295081967216, "grad_norm": 4.3062543869018555, "learning_rate": 1.5187020177231155e-05, "loss": 0.4718, "step": 10572 }, { "epoch": 34.665573770491804, "grad_norm": 4.4180073738098145, "learning_rate": 1.5186112282296298e-05, "loss": 0.5013, "step": 10573 }, { "epoch": 34.66885245901639, "grad_norm": 4.532461166381836, "learning_rate": 1.5185204328882628e-05, "loss": 0.5274, "step": 10574 }, { "epoch": 34.67213114754098, "grad_norm": 4.026179790496826, "learning_rate": 1.5184296317000394e-05, "loss": 0.8523, "step": 10575 }, { "epoch": 34.675409836065576, "grad_norm": 4.324542045593262, "learning_rate": 1.5183388246659828e-05, "loss": 0.5838, "step": 10576 }, { "epoch": 34.678688524590164, "grad_norm": 4.178464889526367, "learning_rate": 1.5182480117871166e-05, "loss": 0.4438, "step": 10577 }, { "epoch": 34.68196721311475, "grad_norm": 5.047579288482666, "learning_rate": 1.5181571930644655e-05, "loss": 0.4213, "step": 10578 }, { "epoch": 34.68524590163934, "grad_norm": 5.886630058288574, "learning_rate": 1.5180663684990529e-05, "loss": 0.5156, "step": 10579 }, { "epoch": 34.68852459016394, "grad_norm": 3.989912986755371, "learning_rate": 1.517975538091904e-05, "loss": 0.4655, "step": 10580 }, { "epoch": 34.691803278688525, "grad_norm": 5.15812873840332, "learning_rate": 1.5178847018440417e-05, "loss": 0.4161, "step": 10581 }, { "epoch": 34.69508196721311, "grad_norm": 4.352181911468506, "learning_rate": 1.5177938597564912e-05, "loss": 0.4673, "step": 10582 }, { "epoch": 34.6983606557377, "grad_norm": 4.704372406005859, "learning_rate": 1.5177030118302765e-05, "loss": 0.6231, "step": 10583 }, { "epoch": 34.7016393442623, "grad_norm": 4.652238368988037, "learning_rate": 1.5176121580664222e-05, "loss": 0.5986, "step": 10584 }, { "epoch": 34.704918032786885, "grad_norm": 4.855283737182617, "learning_rate": 1.5175212984659524e-05, "loss": 0.3737, "step": 10585 }, { "epoch": 34.708196721311474, "grad_norm": 4.386813163757324, "learning_rate": 1.5174304330298923e-05, "loss": 0.4087, "step": 10586 }, { "epoch": 34.71147540983607, "grad_norm": 4.959553241729736, "learning_rate": 1.5173395617592656e-05, "loss": 0.5863, "step": 10587 }, { "epoch": 34.71475409836066, "grad_norm": 3.9909262657165527, "learning_rate": 1.517248684655098e-05, "loss": 0.4334, "step": 10588 }, { "epoch": 34.718032786885246, "grad_norm": 4.434257984161377, "learning_rate": 1.5171578017184132e-05, "loss": 0.6506, "step": 10589 }, { "epoch": 34.721311475409834, "grad_norm": 4.310784816741943, "learning_rate": 1.5170669129502366e-05, "loss": 0.589, "step": 10590 }, { "epoch": 34.72459016393443, "grad_norm": 4.786805152893066, "learning_rate": 1.516976018351593e-05, "loss": 0.4506, "step": 10591 }, { "epoch": 34.72786885245902, "grad_norm": 4.938960552215576, "learning_rate": 1.5168851179235074e-05, "loss": 0.5431, "step": 10592 }, { "epoch": 34.731147540983606, "grad_norm": 5.985193252563477, "learning_rate": 1.5167942116670043e-05, "loss": 0.3488, "step": 10593 }, { "epoch": 34.734426229508195, "grad_norm": 4.0670623779296875, "learning_rate": 1.5167032995831093e-05, "loss": 0.3992, "step": 10594 }, { "epoch": 34.73770491803279, "grad_norm": 4.520923614501953, "learning_rate": 1.5166123816728475e-05, "loss": 0.5026, "step": 10595 }, { "epoch": 34.74098360655738, "grad_norm": 4.387091636657715, "learning_rate": 1.5165214579372437e-05, "loss": 0.3398, "step": 10596 }, { "epoch": 34.74426229508197, "grad_norm": 5.16612434387207, "learning_rate": 1.5164305283773236e-05, "loss": 0.6263, "step": 10597 }, { "epoch": 34.747540983606555, "grad_norm": 6.125960826873779, "learning_rate": 1.516339592994112e-05, "loss": 0.4037, "step": 10598 }, { "epoch": 34.75081967213115, "grad_norm": 4.461660861968994, "learning_rate": 1.5162486517886352e-05, "loss": 0.7291, "step": 10599 }, { "epoch": 34.75409836065574, "grad_norm": 3.8823256492614746, "learning_rate": 1.5161577047619175e-05, "loss": 0.4595, "step": 10600 }, { "epoch": 34.75737704918033, "grad_norm": 4.637864112854004, "learning_rate": 1.5160667519149856e-05, "loss": 0.4909, "step": 10601 }, { "epoch": 34.760655737704916, "grad_norm": 4.624027252197266, "learning_rate": 1.5159757932488644e-05, "loss": 0.5337, "step": 10602 }, { "epoch": 34.76393442622951, "grad_norm": 4.5880513191223145, "learning_rate": 1.5158848287645793e-05, "loss": 0.4302, "step": 10603 }, { "epoch": 34.7672131147541, "grad_norm": 4.638768196105957, "learning_rate": 1.5157938584631566e-05, "loss": 0.4141, "step": 10604 }, { "epoch": 34.77049180327869, "grad_norm": 4.7901201248168945, "learning_rate": 1.5157028823456216e-05, "loss": 0.5717, "step": 10605 }, { "epoch": 34.773770491803276, "grad_norm": 4.508681297302246, "learning_rate": 1.515611900413001e-05, "loss": 0.4712, "step": 10606 }, { "epoch": 34.77704918032787, "grad_norm": 4.328388214111328, "learning_rate": 1.5155209126663196e-05, "loss": 0.5025, "step": 10607 }, { "epoch": 34.78032786885246, "grad_norm": 4.555559158325195, "learning_rate": 1.5154299191066039e-05, "loss": 0.3602, "step": 10608 }, { "epoch": 34.78360655737705, "grad_norm": 4.377427101135254, "learning_rate": 1.5153389197348802e-05, "loss": 0.5988, "step": 10609 }, { "epoch": 34.78688524590164, "grad_norm": 9.152422904968262, "learning_rate": 1.5152479145521741e-05, "loss": 0.4712, "step": 10610 }, { "epoch": 34.79016393442623, "grad_norm": 5.310202598571777, "learning_rate": 1.5151569035595124e-05, "loss": 0.3931, "step": 10611 }, { "epoch": 34.79344262295082, "grad_norm": 4.07009220123291, "learning_rate": 1.515065886757921e-05, "loss": 0.7875, "step": 10612 }, { "epoch": 34.79672131147541, "grad_norm": 4.373379230499268, "learning_rate": 1.5149748641484254e-05, "loss": 0.469, "step": 10613 }, { "epoch": 34.8, "grad_norm": 4.0643205642700195, "learning_rate": 1.5148838357320537e-05, "loss": 0.2918, "step": 10614 }, { "epoch": 34.80327868852459, "grad_norm": 4.574882984161377, "learning_rate": 1.5147928015098309e-05, "loss": 0.4282, "step": 10615 }, { "epoch": 34.80655737704918, "grad_norm": 4.333842754364014, "learning_rate": 1.514701761482784e-05, "loss": 0.5022, "step": 10616 }, { "epoch": 34.80983606557377, "grad_norm": 4.309013843536377, "learning_rate": 1.5146107156519398e-05, "loss": 0.769, "step": 10617 }, { "epoch": 34.81311475409836, "grad_norm": 4.730603218078613, "learning_rate": 1.5145196640183249e-05, "loss": 0.5367, "step": 10618 }, { "epoch": 34.81639344262295, "grad_norm": 4.926801681518555, "learning_rate": 1.5144286065829654e-05, "loss": 0.5233, "step": 10619 }, { "epoch": 34.81967213114754, "grad_norm": 4.506659984588623, "learning_rate": 1.5143375433468887e-05, "loss": 0.5539, "step": 10620 }, { "epoch": 34.82295081967213, "grad_norm": 5.32069730758667, "learning_rate": 1.5142464743111211e-05, "loss": 0.5923, "step": 10621 }, { "epoch": 34.82622950819672, "grad_norm": 4.186882495880127, "learning_rate": 1.5141553994766902e-05, "loss": 0.3843, "step": 10622 }, { "epoch": 34.829508196721314, "grad_norm": 4.31832218170166, "learning_rate": 1.5140643188446226e-05, "loss": 0.5916, "step": 10623 }, { "epoch": 34.8327868852459, "grad_norm": 4.388087749481201, "learning_rate": 1.513973232415945e-05, "loss": 0.4171, "step": 10624 }, { "epoch": 34.83606557377049, "grad_norm": 4.170733451843262, "learning_rate": 1.5138821401916853e-05, "loss": 0.4809, "step": 10625 }, { "epoch": 34.83934426229508, "grad_norm": 3.858140230178833, "learning_rate": 1.5137910421728697e-05, "loss": 0.4784, "step": 10626 }, { "epoch": 34.842622950819674, "grad_norm": 3.544698476791382, "learning_rate": 1.5136999383605262e-05, "loss": 0.3371, "step": 10627 }, { "epoch": 34.84590163934426, "grad_norm": 4.005337715148926, "learning_rate": 1.5136088287556816e-05, "loss": 0.6717, "step": 10628 }, { "epoch": 34.84918032786885, "grad_norm": 3.9134509563446045, "learning_rate": 1.5135177133593634e-05, "loss": 0.4612, "step": 10629 }, { "epoch": 34.85245901639344, "grad_norm": 4.6805033683776855, "learning_rate": 1.5134265921725992e-05, "loss": 0.5219, "step": 10630 }, { "epoch": 34.855737704918035, "grad_norm": 3.921433210372925, "learning_rate": 1.5133354651964162e-05, "loss": 0.313, "step": 10631 }, { "epoch": 34.85901639344262, "grad_norm": 5.5031633377075195, "learning_rate": 1.5132443324318423e-05, "loss": 0.4883, "step": 10632 }, { "epoch": 34.86229508196721, "grad_norm": 5.5658040046691895, "learning_rate": 1.5131531938799047e-05, "loss": 0.6149, "step": 10633 }, { "epoch": 34.86557377049181, "grad_norm": 4.6584672927856445, "learning_rate": 1.5130620495416315e-05, "loss": 0.5964, "step": 10634 }, { "epoch": 34.868852459016395, "grad_norm": 4.661678314208984, "learning_rate": 1.51297089941805e-05, "loss": 0.5509, "step": 10635 }, { "epoch": 34.87213114754098, "grad_norm": 3.954571485519409, "learning_rate": 1.5128797435101885e-05, "loss": 0.4264, "step": 10636 }, { "epoch": 34.87540983606557, "grad_norm": 3.9926085472106934, "learning_rate": 1.5127885818190744e-05, "loss": 0.3996, "step": 10637 }, { "epoch": 34.87868852459017, "grad_norm": 4.540480136871338, "learning_rate": 1.512697414345736e-05, "loss": 0.6904, "step": 10638 }, { "epoch": 34.881967213114756, "grad_norm": 4.632876873016357, "learning_rate": 1.512606241091201e-05, "loss": 0.4932, "step": 10639 }, { "epoch": 34.885245901639344, "grad_norm": 4.4959869384765625, "learning_rate": 1.5125150620564979e-05, "loss": 0.4924, "step": 10640 }, { "epoch": 34.88852459016393, "grad_norm": 4.492227077484131, "learning_rate": 1.5124238772426547e-05, "loss": 0.348, "step": 10641 }, { "epoch": 34.89180327868853, "grad_norm": 5.836606979370117, "learning_rate": 1.5123326866506991e-05, "loss": 0.4789, "step": 10642 }, { "epoch": 34.895081967213116, "grad_norm": 5.039754390716553, "learning_rate": 1.5122414902816602e-05, "loss": 0.4796, "step": 10643 }, { "epoch": 34.898360655737704, "grad_norm": 4.633624076843262, "learning_rate": 1.5121502881365655e-05, "loss": 0.8061, "step": 10644 }, { "epoch": 34.90163934426229, "grad_norm": 4.329128265380859, "learning_rate": 1.512059080216444e-05, "loss": 0.5793, "step": 10645 }, { "epoch": 34.90491803278689, "grad_norm": 4.195728302001953, "learning_rate": 1.511967866522324e-05, "loss": 0.7072, "step": 10646 }, { "epoch": 34.90819672131148, "grad_norm": 4.743607997894287, "learning_rate": 1.5118766470552337e-05, "loss": 0.4971, "step": 10647 }, { "epoch": 34.911475409836065, "grad_norm": 6.771108150482178, "learning_rate": 1.5117854218162023e-05, "loss": 0.7456, "step": 10648 }, { "epoch": 34.91475409836065, "grad_norm": 4.304021835327148, "learning_rate": 1.5116941908062579e-05, "loss": 0.3326, "step": 10649 }, { "epoch": 34.91803278688525, "grad_norm": 4.537993431091309, "learning_rate": 1.5116029540264294e-05, "loss": 0.5453, "step": 10650 }, { "epoch": 34.92131147540984, "grad_norm": 4.266768455505371, "learning_rate": 1.5115117114777462e-05, "loss": 0.6234, "step": 10651 }, { "epoch": 34.924590163934425, "grad_norm": 4.669530391693115, "learning_rate": 1.5114204631612361e-05, "loss": 0.496, "step": 10652 }, { "epoch": 34.927868852459014, "grad_norm": 4.3135151863098145, "learning_rate": 1.5113292090779289e-05, "loss": 0.5074, "step": 10653 }, { "epoch": 34.93114754098361, "grad_norm": 4.3495635986328125, "learning_rate": 1.511237949228853e-05, "loss": 0.5947, "step": 10654 }, { "epoch": 34.9344262295082, "grad_norm": 4.499393463134766, "learning_rate": 1.5111466836150378e-05, "loss": 0.3956, "step": 10655 }, { "epoch": 34.937704918032786, "grad_norm": 4.548275470733643, "learning_rate": 1.511055412237512e-05, "loss": 0.4233, "step": 10656 }, { "epoch": 34.940983606557374, "grad_norm": 4.6886420249938965, "learning_rate": 1.5109641350973054e-05, "loss": 0.466, "step": 10657 }, { "epoch": 34.94426229508197, "grad_norm": 4.245448589324951, "learning_rate": 1.5108728521954468e-05, "loss": 0.6245, "step": 10658 }, { "epoch": 34.94754098360656, "grad_norm": 4.274789810180664, "learning_rate": 1.5107815635329655e-05, "loss": 0.4092, "step": 10659 }, { "epoch": 34.950819672131146, "grad_norm": 4.2466888427734375, "learning_rate": 1.5106902691108913e-05, "loss": 0.5231, "step": 10660 }, { "epoch": 34.954098360655735, "grad_norm": 4.122866153717041, "learning_rate": 1.510598968930253e-05, "loss": 0.5406, "step": 10661 }, { "epoch": 34.95737704918033, "grad_norm": 4.7318501472473145, "learning_rate": 1.5105076629920805e-05, "loss": 0.5224, "step": 10662 }, { "epoch": 34.96065573770492, "grad_norm": 4.106291770935059, "learning_rate": 1.5104163512974033e-05, "loss": 0.6542, "step": 10663 }, { "epoch": 34.96393442622951, "grad_norm": 3.7660231590270996, "learning_rate": 1.5103250338472512e-05, "loss": 0.5665, "step": 10664 }, { "epoch": 34.967213114754095, "grad_norm": 4.585150718688965, "learning_rate": 1.5102337106426536e-05, "loss": 0.4904, "step": 10665 }, { "epoch": 34.97049180327869, "grad_norm": 9.265569686889648, "learning_rate": 1.5101423816846406e-05, "loss": 0.4571, "step": 10666 }, { "epoch": 34.97377049180328, "grad_norm": 4.546318531036377, "learning_rate": 1.5100510469742417e-05, "loss": 0.4188, "step": 10667 }, { "epoch": 34.97704918032787, "grad_norm": 4.38789701461792, "learning_rate": 1.5099597065124867e-05, "loss": 0.5768, "step": 10668 }, { "epoch": 34.980327868852456, "grad_norm": 21.980823516845703, "learning_rate": 1.5098683603004061e-05, "loss": 0.5397, "step": 10669 }, { "epoch": 34.98360655737705, "grad_norm": 4.282190322875977, "learning_rate": 1.5097770083390293e-05, "loss": 0.3542, "step": 10670 }, { "epoch": 34.98688524590164, "grad_norm": 4.455111980438232, "learning_rate": 1.509685650629387e-05, "loss": 0.5643, "step": 10671 }, { "epoch": 34.99016393442623, "grad_norm": 4.350095748901367, "learning_rate": 1.5095942871725088e-05, "loss": 0.6248, "step": 10672 }, { "epoch": 34.993442622950816, "grad_norm": 7.848423957824707, "learning_rate": 1.5095029179694251e-05, "loss": 0.4427, "step": 10673 }, { "epoch": 34.99672131147541, "grad_norm": 4.033292293548584, "learning_rate": 1.5094115430211667e-05, "loss": 0.5282, "step": 10674 }, { "epoch": 35.0, "grad_norm": 4.98663330078125, "learning_rate": 1.5093201623287631e-05, "loss": 0.4211, "step": 10675 }, { "epoch": 35.00327868852459, "grad_norm": 4.553827285766602, "learning_rate": 1.5092287758932451e-05, "loss": 0.3298, "step": 10676 }, { "epoch": 35.006557377049184, "grad_norm": 4.48374080657959, "learning_rate": 1.5091373837156433e-05, "loss": 0.4041, "step": 10677 }, { "epoch": 35.00983606557377, "grad_norm": 3.321861982345581, "learning_rate": 1.5090459857969886e-05, "loss": 0.4108, "step": 10678 }, { "epoch": 35.01311475409836, "grad_norm": 4.244114398956299, "learning_rate": 1.5089545821383102e-05, "loss": 0.4413, "step": 10679 }, { "epoch": 35.01639344262295, "grad_norm": 5.392570495605469, "learning_rate": 1.5088631727406404e-05, "loss": 0.5959, "step": 10680 }, { "epoch": 35.019672131147544, "grad_norm": 4.945605278015137, "learning_rate": 1.508771757605009e-05, "loss": 0.5138, "step": 10681 }, { "epoch": 35.02295081967213, "grad_norm": 4.23435115814209, "learning_rate": 1.5086803367324471e-05, "loss": 0.3798, "step": 10682 }, { "epoch": 35.02622950819672, "grad_norm": 4.479604244232178, "learning_rate": 1.5085889101239857e-05, "loss": 0.4999, "step": 10683 }, { "epoch": 35.02950819672131, "grad_norm": 4.896543979644775, "learning_rate": 1.5084974777806553e-05, "loss": 0.41, "step": 10684 }, { "epoch": 35.032786885245905, "grad_norm": 4.039168357849121, "learning_rate": 1.508406039703487e-05, "loss": 0.4066, "step": 10685 }, { "epoch": 35.03606557377049, "grad_norm": 4.035120964050293, "learning_rate": 1.5083145958935123e-05, "loss": 0.5827, "step": 10686 }, { "epoch": 35.03934426229508, "grad_norm": 4.617123603820801, "learning_rate": 1.5082231463517619e-05, "loss": 0.648, "step": 10687 }, { "epoch": 35.04262295081967, "grad_norm": 4.229197978973389, "learning_rate": 1.508131691079267e-05, "loss": 0.4278, "step": 10688 }, { "epoch": 35.045901639344265, "grad_norm": 4.233074188232422, "learning_rate": 1.508040230077059e-05, "loss": 0.5768, "step": 10689 }, { "epoch": 35.049180327868854, "grad_norm": 4.33323860168457, "learning_rate": 1.5079487633461694e-05, "loss": 0.4379, "step": 10690 }, { "epoch": 35.05245901639344, "grad_norm": 4.808473110198975, "learning_rate": 1.5078572908876291e-05, "loss": 0.7108, "step": 10691 }, { "epoch": 35.05573770491803, "grad_norm": 4.754706382751465, "learning_rate": 1.50776581270247e-05, "loss": 0.5993, "step": 10692 }, { "epoch": 35.059016393442626, "grad_norm": 4.415325164794922, "learning_rate": 1.5076743287917233e-05, "loss": 0.2888, "step": 10693 }, { "epoch": 35.062295081967214, "grad_norm": 4.4085774421691895, "learning_rate": 1.5075828391564206e-05, "loss": 0.5345, "step": 10694 }, { "epoch": 35.0655737704918, "grad_norm": 4.634662628173828, "learning_rate": 1.5074913437975938e-05, "loss": 0.4379, "step": 10695 }, { "epoch": 35.06885245901639, "grad_norm": 4.062276840209961, "learning_rate": 1.5073998427162742e-05, "loss": 0.46, "step": 10696 }, { "epoch": 35.072131147540986, "grad_norm": 5.62350606918335, "learning_rate": 1.5073083359134941e-05, "loss": 0.5055, "step": 10697 }, { "epoch": 35.075409836065575, "grad_norm": 4.255585193634033, "learning_rate": 1.5072168233902848e-05, "loss": 0.6384, "step": 10698 }, { "epoch": 35.07868852459016, "grad_norm": 5.606477737426758, "learning_rate": 1.5071253051476786e-05, "loss": 0.5778, "step": 10699 }, { "epoch": 35.08196721311475, "grad_norm": 5.147245407104492, "learning_rate": 1.5070337811867073e-05, "loss": 0.3275, "step": 10700 }, { "epoch": 35.08524590163935, "grad_norm": 4.368385314941406, "learning_rate": 1.5069422515084027e-05, "loss": 0.6687, "step": 10701 }, { "epoch": 35.088524590163935, "grad_norm": 3.748570442199707, "learning_rate": 1.5068507161137975e-05, "loss": 0.4794, "step": 10702 }, { "epoch": 35.09180327868852, "grad_norm": 4.03159761428833, "learning_rate": 1.5067591750039231e-05, "loss": 0.4774, "step": 10703 }, { "epoch": 35.09508196721311, "grad_norm": 4.61425256729126, "learning_rate": 1.5066676281798124e-05, "loss": 0.4212, "step": 10704 }, { "epoch": 35.09836065573771, "grad_norm": 4.245705604553223, "learning_rate": 1.5065760756424972e-05, "loss": 0.5356, "step": 10705 }, { "epoch": 35.101639344262296, "grad_norm": 4.123607158660889, "learning_rate": 1.5064845173930103e-05, "loss": 0.5101, "step": 10706 }, { "epoch": 35.104918032786884, "grad_norm": 3.9865031242370605, "learning_rate": 1.5063929534323837e-05, "loss": 0.3033, "step": 10707 }, { "epoch": 35.10819672131147, "grad_norm": 4.544210910797119, "learning_rate": 1.5063013837616502e-05, "loss": 0.4439, "step": 10708 }, { "epoch": 35.11147540983607, "grad_norm": 5.56214714050293, "learning_rate": 1.5062098083818417e-05, "loss": 0.6609, "step": 10709 }, { "epoch": 35.114754098360656, "grad_norm": 3.9470317363739014, "learning_rate": 1.5061182272939918e-05, "loss": 0.4461, "step": 10710 }, { "epoch": 35.118032786885244, "grad_norm": 5.163371562957764, "learning_rate": 1.5060266404991323e-05, "loss": 0.5598, "step": 10711 }, { "epoch": 35.12131147540983, "grad_norm": 4.513160705566406, "learning_rate": 1.5059350479982966e-05, "loss": 0.3243, "step": 10712 }, { "epoch": 35.12459016393443, "grad_norm": 5.35383939743042, "learning_rate": 1.505843449792517e-05, "loss": 0.3444, "step": 10713 }, { "epoch": 35.12786885245902, "grad_norm": 7.242471694946289, "learning_rate": 1.5057518458828266e-05, "loss": 0.5685, "step": 10714 }, { "epoch": 35.131147540983605, "grad_norm": 4.281981945037842, "learning_rate": 1.5056602362702584e-05, "loss": 0.2817, "step": 10715 }, { "epoch": 35.13442622950819, "grad_norm": 4.689357280731201, "learning_rate": 1.505568620955845e-05, "loss": 0.544, "step": 10716 }, { "epoch": 35.13770491803279, "grad_norm": 4.6426286697387695, "learning_rate": 1.5054769999406201e-05, "loss": 0.4392, "step": 10717 }, { "epoch": 35.14098360655738, "grad_norm": 4.404138565063477, "learning_rate": 1.5053853732256163e-05, "loss": 0.3999, "step": 10718 }, { "epoch": 35.144262295081965, "grad_norm": 4.070619583129883, "learning_rate": 1.5052937408118669e-05, "loss": 0.4639, "step": 10719 }, { "epoch": 35.14754098360656, "grad_norm": 4.750659942626953, "learning_rate": 1.505202102700405e-05, "loss": 0.322, "step": 10720 }, { "epoch": 35.15081967213115, "grad_norm": 4.192971229553223, "learning_rate": 1.5051104588922645e-05, "loss": 0.6279, "step": 10721 }, { "epoch": 35.15409836065574, "grad_norm": 4.556456565856934, "learning_rate": 1.505018809388478e-05, "loss": 0.3601, "step": 10722 }, { "epoch": 35.157377049180326, "grad_norm": 4.308846473693848, "learning_rate": 1.5049271541900798e-05, "loss": 0.5139, "step": 10723 }, { "epoch": 35.16065573770492, "grad_norm": 4.03952693939209, "learning_rate": 1.5048354932981027e-05, "loss": 0.618, "step": 10724 }, { "epoch": 35.16393442622951, "grad_norm": 4.102809429168701, "learning_rate": 1.5047438267135806e-05, "loss": 0.6463, "step": 10725 }, { "epoch": 35.1672131147541, "grad_norm": 4.736813545227051, "learning_rate": 1.5046521544375468e-05, "loss": 0.4308, "step": 10726 }, { "epoch": 35.170491803278686, "grad_norm": 5.095054626464844, "learning_rate": 1.5045604764710354e-05, "loss": 0.5262, "step": 10727 }, { "epoch": 35.17377049180328, "grad_norm": 20.230783462524414, "learning_rate": 1.50446879281508e-05, "loss": 0.4304, "step": 10728 }, { "epoch": 35.17704918032787, "grad_norm": 5.293105125427246, "learning_rate": 1.5043771034707143e-05, "loss": 0.4919, "step": 10729 }, { "epoch": 35.18032786885246, "grad_norm": 4.431867599487305, "learning_rate": 1.5042854084389729e-05, "loss": 0.3877, "step": 10730 }, { "epoch": 35.18360655737705, "grad_norm": 3.9792582988739014, "learning_rate": 1.5041937077208886e-05, "loss": 0.5684, "step": 10731 }, { "epoch": 35.18688524590164, "grad_norm": 4.006687164306641, "learning_rate": 1.5041020013174962e-05, "loss": 0.5304, "step": 10732 }, { "epoch": 35.19016393442623, "grad_norm": 4.863847732543945, "learning_rate": 1.5040102892298295e-05, "loss": 0.5879, "step": 10733 }, { "epoch": 35.19344262295082, "grad_norm": 4.431341171264648, "learning_rate": 1.5039185714589231e-05, "loss": 0.3145, "step": 10734 }, { "epoch": 35.19672131147541, "grad_norm": 4.01945686340332, "learning_rate": 1.5038268480058107e-05, "loss": 0.4672, "step": 10735 }, { "epoch": 35.2, "grad_norm": 4.0203633308410645, "learning_rate": 1.5037351188715265e-05, "loss": 0.6565, "step": 10736 }, { "epoch": 35.20327868852459, "grad_norm": 4.598611354827881, "learning_rate": 1.5036433840571052e-05, "loss": 0.3776, "step": 10737 }, { "epoch": 35.20655737704918, "grad_norm": 4.404995918273926, "learning_rate": 1.5035516435635813e-05, "loss": 0.8411, "step": 10738 }, { "epoch": 35.20983606557377, "grad_norm": 4.669663906097412, "learning_rate": 1.5034598973919887e-05, "loss": 0.4997, "step": 10739 }, { "epoch": 35.21311475409836, "grad_norm": 4.457508563995361, "learning_rate": 1.5033681455433628e-05, "loss": 0.4474, "step": 10740 }, { "epoch": 35.21639344262295, "grad_norm": 4.081623554229736, "learning_rate": 1.5032763880187374e-05, "loss": 0.7819, "step": 10741 }, { "epoch": 35.21967213114754, "grad_norm": 5.444774150848389, "learning_rate": 1.5031846248191473e-05, "loss": 0.8094, "step": 10742 }, { "epoch": 35.22295081967213, "grad_norm": 4.842304229736328, "learning_rate": 1.5030928559456275e-05, "loss": 0.4986, "step": 10743 }, { "epoch": 35.226229508196724, "grad_norm": 4.8953423500061035, "learning_rate": 1.5030010813992126e-05, "loss": 0.3713, "step": 10744 }, { "epoch": 35.22950819672131, "grad_norm": 3.56675124168396, "learning_rate": 1.502909301180938e-05, "loss": 0.4992, "step": 10745 }, { "epoch": 35.2327868852459, "grad_norm": 4.3604350090026855, "learning_rate": 1.5028175152918374e-05, "loss": 0.5136, "step": 10746 }, { "epoch": 35.23606557377049, "grad_norm": 4.267138481140137, "learning_rate": 1.5027257237329469e-05, "loss": 0.4445, "step": 10747 }, { "epoch": 35.239344262295084, "grad_norm": 4.646742343902588, "learning_rate": 1.502633926505301e-05, "loss": 0.5308, "step": 10748 }, { "epoch": 35.24262295081967, "grad_norm": 4.086306571960449, "learning_rate": 1.5025421236099354e-05, "loss": 0.6373, "step": 10749 }, { "epoch": 35.24590163934426, "grad_norm": 4.402742862701416, "learning_rate": 1.5024503150478842e-05, "loss": 0.4732, "step": 10750 }, { "epoch": 35.24918032786885, "grad_norm": 4.15333366394043, "learning_rate": 1.5023585008201837e-05, "loss": 0.4986, "step": 10751 }, { "epoch": 35.252459016393445, "grad_norm": 4.615882396697998, "learning_rate": 1.5022666809278686e-05, "loss": 0.3069, "step": 10752 }, { "epoch": 35.25573770491803, "grad_norm": 3.6915950775146484, "learning_rate": 1.5021748553719746e-05, "loss": 0.3583, "step": 10753 }, { "epoch": 35.25901639344262, "grad_norm": 4.423025608062744, "learning_rate": 1.5020830241535369e-05, "loss": 0.4808, "step": 10754 }, { "epoch": 35.26229508196721, "grad_norm": 5.0055742263793945, "learning_rate": 1.5019911872735908e-05, "loss": 0.6792, "step": 10755 }, { "epoch": 35.265573770491805, "grad_norm": 4.18892765045166, "learning_rate": 1.5018993447331727e-05, "loss": 0.7675, "step": 10756 }, { "epoch": 35.268852459016394, "grad_norm": 3.9897775650024414, "learning_rate": 1.5018074965333172e-05, "loss": 0.4381, "step": 10757 }, { "epoch": 35.27213114754098, "grad_norm": 11.02109432220459, "learning_rate": 1.5017156426750605e-05, "loss": 0.3099, "step": 10758 }, { "epoch": 35.27540983606557, "grad_norm": 3.5676369667053223, "learning_rate": 1.5016237831594383e-05, "loss": 0.6717, "step": 10759 }, { "epoch": 35.278688524590166, "grad_norm": 4.142388820648193, "learning_rate": 1.5015319179874865e-05, "loss": 0.4709, "step": 10760 }, { "epoch": 35.281967213114754, "grad_norm": 4.576352119445801, "learning_rate": 1.5014400471602408e-05, "loss": 0.437, "step": 10761 }, { "epoch": 35.28524590163934, "grad_norm": 3.8897922039031982, "learning_rate": 1.5013481706787372e-05, "loss": 0.6568, "step": 10762 }, { "epoch": 35.28852459016394, "grad_norm": 4.0349225997924805, "learning_rate": 1.5012562885440117e-05, "loss": 0.4355, "step": 10763 }, { "epoch": 35.291803278688526, "grad_norm": 4.23388671875, "learning_rate": 1.5011644007571003e-05, "loss": 0.5191, "step": 10764 }, { "epoch": 35.295081967213115, "grad_norm": 5.37676477432251, "learning_rate": 1.5010725073190391e-05, "loss": 0.2797, "step": 10765 }, { "epoch": 35.2983606557377, "grad_norm": 4.177201747894287, "learning_rate": 1.5009806082308647e-05, "loss": 0.5241, "step": 10766 }, { "epoch": 35.3016393442623, "grad_norm": 4.541319847106934, "learning_rate": 1.5008887034936129e-05, "loss": 0.489, "step": 10767 }, { "epoch": 35.30491803278689, "grad_norm": 3.8071160316467285, "learning_rate": 1.5007967931083202e-05, "loss": 0.5098, "step": 10768 }, { "epoch": 35.308196721311475, "grad_norm": 4.4742431640625, "learning_rate": 1.5007048770760231e-05, "loss": 0.3973, "step": 10769 }, { "epoch": 35.31147540983606, "grad_norm": 6.299671173095703, "learning_rate": 1.5006129553977577e-05, "loss": 0.3678, "step": 10770 }, { "epoch": 35.31475409836066, "grad_norm": 3.785794496536255, "learning_rate": 1.5005210280745608e-05, "loss": 0.3102, "step": 10771 }, { "epoch": 35.31803278688525, "grad_norm": 3.6152865886688232, "learning_rate": 1.5004290951074688e-05, "loss": 0.2718, "step": 10772 }, { "epoch": 35.321311475409836, "grad_norm": 4.417469501495361, "learning_rate": 1.5003371564975187e-05, "loss": 0.5286, "step": 10773 }, { "epoch": 35.324590163934424, "grad_norm": 5.258586406707764, "learning_rate": 1.5002452122457465e-05, "loss": 0.6739, "step": 10774 }, { "epoch": 35.32786885245902, "grad_norm": 4.801309108734131, "learning_rate": 1.5001532623531897e-05, "loss": 0.3988, "step": 10775 }, { "epoch": 35.33114754098361, "grad_norm": 4.5355730056762695, "learning_rate": 1.500061306820885e-05, "loss": 0.5449, "step": 10776 }, { "epoch": 35.334426229508196, "grad_norm": 3.829465627670288, "learning_rate": 1.4999693456498687e-05, "loss": 0.6286, "step": 10777 }, { "epoch": 35.337704918032784, "grad_norm": 3.840264320373535, "learning_rate": 1.4998773788411785e-05, "loss": 0.4762, "step": 10778 }, { "epoch": 35.34098360655738, "grad_norm": 5.000136375427246, "learning_rate": 1.4997854063958511e-05, "loss": 0.3515, "step": 10779 }, { "epoch": 35.34426229508197, "grad_norm": 3.707174777984619, "learning_rate": 1.4996934283149233e-05, "loss": 0.3713, "step": 10780 }, { "epoch": 35.34754098360656, "grad_norm": 4.019268989562988, "learning_rate": 1.499601444599433e-05, "loss": 0.5535, "step": 10781 }, { "epoch": 35.350819672131145, "grad_norm": 4.234826564788818, "learning_rate": 1.4995094552504165e-05, "loss": 0.4977, "step": 10782 }, { "epoch": 35.35409836065574, "grad_norm": 4.67532205581665, "learning_rate": 1.4994174602689118e-05, "loss": 0.6282, "step": 10783 }, { "epoch": 35.35737704918033, "grad_norm": 4.311595439910889, "learning_rate": 1.4993254596559559e-05, "loss": 0.5494, "step": 10784 }, { "epoch": 35.36065573770492, "grad_norm": 4.6420159339904785, "learning_rate": 1.4992334534125861e-05, "loss": 0.6426, "step": 10785 }, { "epoch": 35.363934426229505, "grad_norm": 4.550228118896484, "learning_rate": 1.4991414415398405e-05, "loss": 0.5035, "step": 10786 }, { "epoch": 35.3672131147541, "grad_norm": 4.0577898025512695, "learning_rate": 1.4990494240387556e-05, "loss": 0.6846, "step": 10787 }, { "epoch": 35.37049180327869, "grad_norm": 5.116581439971924, "learning_rate": 1.4989574009103702e-05, "loss": 0.5011, "step": 10788 }, { "epoch": 35.37377049180328, "grad_norm": 4.24144983291626, "learning_rate": 1.4988653721557209e-05, "loss": 0.3933, "step": 10789 }, { "epoch": 35.377049180327866, "grad_norm": 4.103484630584717, "learning_rate": 1.498773337775846e-05, "loss": 0.5313, "step": 10790 }, { "epoch": 35.38032786885246, "grad_norm": 4.058091163635254, "learning_rate": 1.4986812977717828e-05, "loss": 0.4001, "step": 10791 }, { "epoch": 35.38360655737705, "grad_norm": 4.599402904510498, "learning_rate": 1.49858925214457e-05, "loss": 0.5811, "step": 10792 }, { "epoch": 35.38688524590164, "grad_norm": 4.4268364906311035, "learning_rate": 1.4984972008952445e-05, "loss": 0.7147, "step": 10793 }, { "epoch": 35.390163934426226, "grad_norm": 3.932922840118408, "learning_rate": 1.4984051440248451e-05, "loss": 0.3613, "step": 10794 }, { "epoch": 35.39344262295082, "grad_norm": 4.502823352813721, "learning_rate": 1.4983130815344094e-05, "loss": 0.4878, "step": 10795 }, { "epoch": 35.39672131147541, "grad_norm": 4.87229061126709, "learning_rate": 1.4982210134249754e-05, "loss": 0.5599, "step": 10796 }, { "epoch": 35.4, "grad_norm": 4.441518306732178, "learning_rate": 1.4981289396975818e-05, "loss": 0.3785, "step": 10797 }, { "epoch": 35.40327868852459, "grad_norm": 3.945728063583374, "learning_rate": 1.498036860353266e-05, "loss": 0.3779, "step": 10798 }, { "epoch": 35.40655737704918, "grad_norm": 4.165140628814697, "learning_rate": 1.4979447753930672e-05, "loss": 0.2413, "step": 10799 }, { "epoch": 35.40983606557377, "grad_norm": 4.811776161193848, "learning_rate": 1.497852684818023e-05, "loss": 0.5502, "step": 10800 }, { "epoch": 35.41311475409836, "grad_norm": 4.471138000488281, "learning_rate": 1.4977605886291726e-05, "loss": 0.3087, "step": 10801 }, { "epoch": 35.41639344262295, "grad_norm": 4.384248733520508, "learning_rate": 1.4976684868275535e-05, "loss": 0.3833, "step": 10802 }, { "epoch": 35.41967213114754, "grad_norm": 3.7062530517578125, "learning_rate": 1.497576379414205e-05, "loss": 0.4101, "step": 10803 }, { "epoch": 35.42295081967213, "grad_norm": 4.617863655090332, "learning_rate": 1.4974842663901657e-05, "loss": 0.7009, "step": 10804 }, { "epoch": 35.42622950819672, "grad_norm": 4.098151683807373, "learning_rate": 1.4973921477564736e-05, "loss": 0.53, "step": 10805 }, { "epoch": 35.429508196721315, "grad_norm": 3.904825448989868, "learning_rate": 1.4973000235141681e-05, "loss": 0.4654, "step": 10806 }, { "epoch": 35.4327868852459, "grad_norm": 4.484102725982666, "learning_rate": 1.497207893664288e-05, "loss": 0.3425, "step": 10807 }, { "epoch": 35.43606557377049, "grad_norm": 3.966822862625122, "learning_rate": 1.4971157582078714e-05, "loss": 0.644, "step": 10808 }, { "epoch": 35.43934426229508, "grad_norm": 4.387925148010254, "learning_rate": 1.497023617145958e-05, "loss": 0.3959, "step": 10809 }, { "epoch": 35.442622950819676, "grad_norm": 4.742157459259033, "learning_rate": 1.4969314704795867e-05, "loss": 0.4339, "step": 10810 }, { "epoch": 35.445901639344264, "grad_norm": 4.719576358795166, "learning_rate": 1.4968393182097962e-05, "loss": 0.307, "step": 10811 }, { "epoch": 35.44918032786885, "grad_norm": 4.118162631988525, "learning_rate": 1.496747160337626e-05, "loss": 0.5648, "step": 10812 }, { "epoch": 35.45245901639344, "grad_norm": 4.010156631469727, "learning_rate": 1.4966549968641148e-05, "loss": 0.467, "step": 10813 }, { "epoch": 35.455737704918036, "grad_norm": 3.627268075942993, "learning_rate": 1.4965628277903024e-05, "loss": 0.4102, "step": 10814 }, { "epoch": 35.459016393442624, "grad_norm": 4.9474358558654785, "learning_rate": 1.4964706531172275e-05, "loss": 0.3823, "step": 10815 }, { "epoch": 35.46229508196721, "grad_norm": 5.241086006164551, "learning_rate": 1.49637847284593e-05, "loss": 0.4535, "step": 10816 }, { "epoch": 35.4655737704918, "grad_norm": 4.165347576141357, "learning_rate": 1.496286286977449e-05, "loss": 0.6045, "step": 10817 }, { "epoch": 35.4688524590164, "grad_norm": 4.743769645690918, "learning_rate": 1.4961940955128245e-05, "loss": 0.4321, "step": 10818 }, { "epoch": 35.472131147540985, "grad_norm": 4.420999050140381, "learning_rate": 1.4961018984530952e-05, "loss": 0.6033, "step": 10819 }, { "epoch": 35.47540983606557, "grad_norm": 3.903815507888794, "learning_rate": 1.4960096957993015e-05, "loss": 0.3354, "step": 10820 }, { "epoch": 35.47868852459016, "grad_norm": 4.504626274108887, "learning_rate": 1.4959174875524826e-05, "loss": 0.5606, "step": 10821 }, { "epoch": 35.48196721311476, "grad_norm": 3.8293278217315674, "learning_rate": 1.4958252737136784e-05, "loss": 0.4684, "step": 10822 }, { "epoch": 35.485245901639345, "grad_norm": 4.279985427856445, "learning_rate": 1.4957330542839288e-05, "loss": 0.5015, "step": 10823 }, { "epoch": 35.488524590163934, "grad_norm": 3.869070291519165, "learning_rate": 1.4956408292642734e-05, "loss": 0.5473, "step": 10824 }, { "epoch": 35.49180327868852, "grad_norm": 4.0615715980529785, "learning_rate": 1.4955485986557526e-05, "loss": 0.3974, "step": 10825 }, { "epoch": 35.49508196721312, "grad_norm": 5.281948566436768, "learning_rate": 1.4954563624594057e-05, "loss": 0.6065, "step": 10826 }, { "epoch": 35.498360655737706, "grad_norm": 4.142714500427246, "learning_rate": 1.4953641206762734e-05, "loss": 0.4682, "step": 10827 }, { "epoch": 35.501639344262294, "grad_norm": 8.93558120727539, "learning_rate": 1.4952718733073957e-05, "loss": 0.5155, "step": 10828 }, { "epoch": 35.50491803278688, "grad_norm": 4.121977806091309, "learning_rate": 1.4951796203538125e-05, "loss": 0.381, "step": 10829 }, { "epoch": 35.50819672131148, "grad_norm": 4.357462406158447, "learning_rate": 1.4950873618165642e-05, "loss": 0.6842, "step": 10830 }, { "epoch": 35.511475409836066, "grad_norm": 3.6510865688323975, "learning_rate": 1.4949950976966914e-05, "loss": 0.4333, "step": 10831 }, { "epoch": 35.514754098360655, "grad_norm": 4.531999588012695, "learning_rate": 1.4949028279952338e-05, "loss": 0.4794, "step": 10832 }, { "epoch": 35.51803278688524, "grad_norm": 3.937436580657959, "learning_rate": 1.4948105527132326e-05, "loss": 0.3976, "step": 10833 }, { "epoch": 35.52131147540984, "grad_norm": 4.376774787902832, "learning_rate": 1.4947182718517277e-05, "loss": 0.7297, "step": 10834 }, { "epoch": 35.52459016393443, "grad_norm": 4.717273235321045, "learning_rate": 1.4946259854117601e-05, "loss": 0.5653, "step": 10835 }, { "epoch": 35.527868852459015, "grad_norm": 5.026129722595215, "learning_rate": 1.4945336933943702e-05, "loss": 0.5195, "step": 10836 }, { "epoch": 35.5311475409836, "grad_norm": 4.3052263259887695, "learning_rate": 1.4944413958005984e-05, "loss": 0.6446, "step": 10837 }, { "epoch": 35.5344262295082, "grad_norm": 4.466485500335693, "learning_rate": 1.4943490926314861e-05, "loss": 0.502, "step": 10838 }, { "epoch": 35.53770491803279, "grad_norm": 4.620545387268066, "learning_rate": 1.4942567838880738e-05, "loss": 0.4152, "step": 10839 }, { "epoch": 35.540983606557376, "grad_norm": 3.767104148864746, "learning_rate": 1.4941644695714024e-05, "loss": 0.4133, "step": 10840 }, { "epoch": 35.544262295081964, "grad_norm": 3.891195774078369, "learning_rate": 1.4940721496825124e-05, "loss": 0.6556, "step": 10841 }, { "epoch": 35.54754098360656, "grad_norm": 4.385026454925537, "learning_rate": 1.4939798242224457e-05, "loss": 0.4606, "step": 10842 }, { "epoch": 35.55081967213115, "grad_norm": 3.9496796131134033, "learning_rate": 1.4938874931922424e-05, "loss": 0.7823, "step": 10843 }, { "epoch": 35.554098360655736, "grad_norm": 4.426577091217041, "learning_rate": 1.4937951565929445e-05, "loss": 0.4256, "step": 10844 }, { "epoch": 35.557377049180324, "grad_norm": 3.638624668121338, "learning_rate": 1.4937028144255928e-05, "loss": 0.5768, "step": 10845 }, { "epoch": 35.56065573770492, "grad_norm": 3.3200907707214355, "learning_rate": 1.4936104666912284e-05, "loss": 0.3207, "step": 10846 }, { "epoch": 35.56393442622951, "grad_norm": 3.6112794876098633, "learning_rate": 1.4935181133908926e-05, "loss": 0.4207, "step": 10847 }, { "epoch": 35.5672131147541, "grad_norm": 4.822194576263428, "learning_rate": 1.4934257545256271e-05, "loss": 0.3657, "step": 10848 }, { "epoch": 35.570491803278685, "grad_norm": 4.019379138946533, "learning_rate": 1.4933333900964733e-05, "loss": 0.3814, "step": 10849 }, { "epoch": 35.57377049180328, "grad_norm": 4.414553165435791, "learning_rate": 1.4932410201044725e-05, "loss": 0.4507, "step": 10850 }, { "epoch": 35.57704918032787, "grad_norm": 4.108489513397217, "learning_rate": 1.4931486445506665e-05, "loss": 0.5371, "step": 10851 }, { "epoch": 35.58032786885246, "grad_norm": 4.226382255554199, "learning_rate": 1.4930562634360964e-05, "loss": 0.4447, "step": 10852 }, { "epoch": 35.58360655737705, "grad_norm": 4.31514835357666, "learning_rate": 1.4929638767618046e-05, "loss": 0.5867, "step": 10853 }, { "epoch": 35.58688524590164, "grad_norm": 4.865523338317871, "learning_rate": 1.4928714845288324e-05, "loss": 0.5247, "step": 10854 }, { "epoch": 35.59016393442623, "grad_norm": 4.154989242553711, "learning_rate": 1.492779086738222e-05, "loss": 0.5152, "step": 10855 }, { "epoch": 35.59344262295082, "grad_norm": 4.8648881912231445, "learning_rate": 1.4926866833910145e-05, "loss": 0.5789, "step": 10856 }, { "epoch": 35.59672131147541, "grad_norm": 4.060085773468018, "learning_rate": 1.492594274488253e-05, "loss": 0.4333, "step": 10857 }, { "epoch": 35.6, "grad_norm": 4.542491436004639, "learning_rate": 1.4925018600309784e-05, "loss": 0.5294, "step": 10858 }, { "epoch": 35.60327868852459, "grad_norm": 5.960625648498535, "learning_rate": 1.4924094400202336e-05, "loss": 0.6487, "step": 10859 }, { "epoch": 35.60655737704918, "grad_norm": 4.261531352996826, "learning_rate": 1.49231701445706e-05, "loss": 0.3407, "step": 10860 }, { "epoch": 35.609836065573774, "grad_norm": 4.744043350219727, "learning_rate": 1.4922245833425006e-05, "loss": 0.6035, "step": 10861 }, { "epoch": 35.61311475409836, "grad_norm": 4.421313762664795, "learning_rate": 1.4921321466775969e-05, "loss": 0.5435, "step": 10862 }, { "epoch": 35.61639344262295, "grad_norm": 3.744051218032837, "learning_rate": 1.4920397044633919e-05, "loss": 0.3748, "step": 10863 }, { "epoch": 35.61967213114754, "grad_norm": 4.213745594024658, "learning_rate": 1.4919472567009273e-05, "loss": 0.6496, "step": 10864 }, { "epoch": 35.622950819672134, "grad_norm": 4.781640529632568, "learning_rate": 1.4918548033912459e-05, "loss": 0.3765, "step": 10865 }, { "epoch": 35.62622950819672, "grad_norm": 4.936595916748047, "learning_rate": 1.4917623445353903e-05, "loss": 0.4089, "step": 10866 }, { "epoch": 35.62950819672131, "grad_norm": 3.897949457168579, "learning_rate": 1.491669880134403e-05, "loss": 0.441, "step": 10867 }, { "epoch": 35.6327868852459, "grad_norm": 4.6189093589782715, "learning_rate": 1.4915774101893265e-05, "loss": 0.4128, "step": 10868 }, { "epoch": 35.636065573770495, "grad_norm": 4.703091144561768, "learning_rate": 1.4914849347012037e-05, "loss": 0.6531, "step": 10869 }, { "epoch": 35.63934426229508, "grad_norm": 4.386790752410889, "learning_rate": 1.4913924536710774e-05, "loss": 0.5531, "step": 10870 }, { "epoch": 35.64262295081967, "grad_norm": 3.625525951385498, "learning_rate": 1.4912999670999899e-05, "loss": 0.3589, "step": 10871 }, { "epoch": 35.64590163934426, "grad_norm": 3.9069011211395264, "learning_rate": 1.4912074749889848e-05, "loss": 0.7946, "step": 10872 }, { "epoch": 35.649180327868855, "grad_norm": 4.964866638183594, "learning_rate": 1.4911149773391045e-05, "loss": 0.4862, "step": 10873 }, { "epoch": 35.65245901639344, "grad_norm": 3.6709673404693604, "learning_rate": 1.4910224741513922e-05, "loss": 0.4129, "step": 10874 }, { "epoch": 35.65573770491803, "grad_norm": 4.48141622543335, "learning_rate": 1.490929965426891e-05, "loss": 0.5292, "step": 10875 }, { "epoch": 35.65901639344262, "grad_norm": 5.160655498504639, "learning_rate": 1.4908374511666439e-05, "loss": 0.3303, "step": 10876 }, { "epoch": 35.662295081967216, "grad_norm": 6.146000385284424, "learning_rate": 1.4907449313716944e-05, "loss": 0.3911, "step": 10877 }, { "epoch": 35.665573770491804, "grad_norm": 5.148119926452637, "learning_rate": 1.4906524060430853e-05, "loss": 0.5658, "step": 10878 }, { "epoch": 35.66885245901639, "grad_norm": 11.813277244567871, "learning_rate": 1.4905598751818603e-05, "loss": 0.3908, "step": 10879 }, { "epoch": 35.67213114754098, "grad_norm": 4.709049701690674, "learning_rate": 1.4904673387890626e-05, "loss": 0.4353, "step": 10880 }, { "epoch": 35.675409836065576, "grad_norm": 4.096610069274902, "learning_rate": 1.4903747968657359e-05, "loss": 0.486, "step": 10881 }, { "epoch": 35.678688524590164, "grad_norm": 4.025539875030518, "learning_rate": 1.4902822494129232e-05, "loss": 0.397, "step": 10882 }, { "epoch": 35.68196721311475, "grad_norm": 3.9982268810272217, "learning_rate": 1.4901896964316686e-05, "loss": 0.3402, "step": 10883 }, { "epoch": 35.68524590163934, "grad_norm": 3.938762903213501, "learning_rate": 1.4900971379230156e-05, "loss": 0.4152, "step": 10884 }, { "epoch": 35.68852459016394, "grad_norm": 3.9857163429260254, "learning_rate": 1.4900045738880075e-05, "loss": 0.374, "step": 10885 }, { "epoch": 35.691803278688525, "grad_norm": 4.644453048706055, "learning_rate": 1.4899120043276886e-05, "loss": 0.4064, "step": 10886 }, { "epoch": 35.69508196721311, "grad_norm": 4.176014423370361, "learning_rate": 1.4898194292431023e-05, "loss": 0.5196, "step": 10887 }, { "epoch": 35.6983606557377, "grad_norm": 3.8901798725128174, "learning_rate": 1.4897268486352925e-05, "loss": 0.579, "step": 10888 }, { "epoch": 35.7016393442623, "grad_norm": 4.081429481506348, "learning_rate": 1.4896342625053035e-05, "loss": 0.4905, "step": 10889 }, { "epoch": 35.704918032786885, "grad_norm": 5.1755242347717285, "learning_rate": 1.4895416708541792e-05, "loss": 0.2846, "step": 10890 }, { "epoch": 35.708196721311474, "grad_norm": 4.424622058868408, "learning_rate": 1.4894490736829635e-05, "loss": 0.5624, "step": 10891 }, { "epoch": 35.71147540983607, "grad_norm": 4.540256023406982, "learning_rate": 1.4893564709927005e-05, "loss": 0.4417, "step": 10892 }, { "epoch": 35.71475409836066, "grad_norm": 4.531832218170166, "learning_rate": 1.4892638627844345e-05, "loss": 0.5358, "step": 10893 }, { "epoch": 35.718032786885246, "grad_norm": 4.241086959838867, "learning_rate": 1.4891712490592096e-05, "loss": 0.5508, "step": 10894 }, { "epoch": 35.721311475409834, "grad_norm": 4.0896430015563965, "learning_rate": 1.4890786298180703e-05, "loss": 0.5714, "step": 10895 }, { "epoch": 35.72459016393443, "grad_norm": 4.755134105682373, "learning_rate": 1.4889860050620612e-05, "loss": 0.4863, "step": 10896 }, { "epoch": 35.72786885245902, "grad_norm": 4.352140426635742, "learning_rate": 1.4888933747922265e-05, "loss": 0.5274, "step": 10897 }, { "epoch": 35.731147540983606, "grad_norm": 5.28397798538208, "learning_rate": 1.4888007390096104e-05, "loss": 0.5957, "step": 10898 }, { "epoch": 35.734426229508195, "grad_norm": 4.838186740875244, "learning_rate": 1.4887080977152582e-05, "loss": 0.5688, "step": 10899 }, { "epoch": 35.73770491803279, "grad_norm": 4.481767654418945, "learning_rate": 1.4886154509102135e-05, "loss": 0.5884, "step": 10900 }, { "epoch": 35.74098360655738, "grad_norm": 4.331503391265869, "learning_rate": 1.4885227985955219e-05, "loss": 0.4209, "step": 10901 }, { "epoch": 35.74426229508197, "grad_norm": 4.562607765197754, "learning_rate": 1.4884301407722274e-05, "loss": 0.5635, "step": 10902 }, { "epoch": 35.747540983606555, "grad_norm": 4.857097148895264, "learning_rate": 1.488337477441376e-05, "loss": 0.3948, "step": 10903 }, { "epoch": 35.75081967213115, "grad_norm": 4.571249961853027, "learning_rate": 1.488244808604011e-05, "loss": 0.554, "step": 10904 }, { "epoch": 35.75409836065574, "grad_norm": 5.114328384399414, "learning_rate": 1.4881521342611787e-05, "loss": 0.4623, "step": 10905 }, { "epoch": 35.75737704918033, "grad_norm": 8.5247220993042, "learning_rate": 1.488059454413923e-05, "loss": 0.4177, "step": 10906 }, { "epoch": 35.760655737704916, "grad_norm": 4.369992256164551, "learning_rate": 1.48796676906329e-05, "loss": 0.4403, "step": 10907 }, { "epoch": 35.76393442622951, "grad_norm": 4.259359359741211, "learning_rate": 1.487874078210324e-05, "loss": 0.5709, "step": 10908 }, { "epoch": 35.7672131147541, "grad_norm": 3.9770970344543457, "learning_rate": 1.4877813818560706e-05, "loss": 0.5874, "step": 10909 }, { "epoch": 35.77049180327869, "grad_norm": 4.032358169555664, "learning_rate": 1.487688680001575e-05, "loss": 0.5679, "step": 10910 }, { "epoch": 35.773770491803276, "grad_norm": 4.495434284210205, "learning_rate": 1.4875959726478826e-05, "loss": 0.5142, "step": 10911 }, { "epoch": 35.77704918032787, "grad_norm": 4.317048072814941, "learning_rate": 1.4875032597960383e-05, "loss": 0.4516, "step": 10912 }, { "epoch": 35.78032786885246, "grad_norm": 4.405447959899902, "learning_rate": 1.487410541447088e-05, "loss": 0.5962, "step": 10913 }, { "epoch": 35.78360655737705, "grad_norm": 4.555153846740723, "learning_rate": 1.4873178176020773e-05, "loss": 0.598, "step": 10914 }, { "epoch": 35.78688524590164, "grad_norm": 4.247063636779785, "learning_rate": 1.4872250882620511e-05, "loss": 0.5007, "step": 10915 }, { "epoch": 35.79016393442623, "grad_norm": 5.032130718231201, "learning_rate": 1.4871323534280557e-05, "loss": 0.53, "step": 10916 }, { "epoch": 35.79344262295082, "grad_norm": 4.73917818069458, "learning_rate": 1.4870396131011365e-05, "loss": 0.4277, "step": 10917 }, { "epoch": 35.79672131147541, "grad_norm": 8.011101722717285, "learning_rate": 1.4869468672823393e-05, "loss": 0.6611, "step": 10918 }, { "epoch": 35.8, "grad_norm": 4.179879665374756, "learning_rate": 1.4868541159727097e-05, "loss": 0.5071, "step": 10919 }, { "epoch": 35.80327868852459, "grad_norm": 4.144082546234131, "learning_rate": 1.486761359173294e-05, "loss": 0.2631, "step": 10920 }, { "epoch": 35.80655737704918, "grad_norm": 3.893540143966675, "learning_rate": 1.4866685968851376e-05, "loss": 0.3872, "step": 10921 }, { "epoch": 35.80983606557377, "grad_norm": 4.785672187805176, "learning_rate": 1.4865758291092868e-05, "loss": 0.4394, "step": 10922 }, { "epoch": 35.81311475409836, "grad_norm": 4.695703506469727, "learning_rate": 1.486483055846788e-05, "loss": 0.5124, "step": 10923 }, { "epoch": 35.81639344262295, "grad_norm": 4.1862006187438965, "learning_rate": 1.4863902770986865e-05, "loss": 0.6388, "step": 10924 }, { "epoch": 35.81967213114754, "grad_norm": 3.981658697128296, "learning_rate": 1.4862974928660291e-05, "loss": 0.4996, "step": 10925 }, { "epoch": 35.82295081967213, "grad_norm": 4.75945520401001, "learning_rate": 1.4862047031498619e-05, "loss": 0.548, "step": 10926 }, { "epoch": 35.82622950819672, "grad_norm": 4.392313003540039, "learning_rate": 1.486111907951231e-05, "loss": 0.3595, "step": 10927 }, { "epoch": 35.829508196721314, "grad_norm": 4.064277648925781, "learning_rate": 1.486019107271183e-05, "loss": 0.4295, "step": 10928 }, { "epoch": 35.8327868852459, "grad_norm": 4.447865962982178, "learning_rate": 1.485926301110764e-05, "loss": 0.5218, "step": 10929 }, { "epoch": 35.83606557377049, "grad_norm": 4.319239616394043, "learning_rate": 1.4858334894710212e-05, "loss": 0.5761, "step": 10930 }, { "epoch": 35.83934426229508, "grad_norm": 4.569207668304443, "learning_rate": 1.4857406723530003e-05, "loss": 0.6614, "step": 10931 }, { "epoch": 35.842622950819674, "grad_norm": 4.247715473175049, "learning_rate": 1.4856478497577483e-05, "loss": 0.4946, "step": 10932 }, { "epoch": 35.84590163934426, "grad_norm": 5.046332836151123, "learning_rate": 1.485555021686312e-05, "loss": 0.4366, "step": 10933 }, { "epoch": 35.84918032786885, "grad_norm": 14.30068302154541, "learning_rate": 1.4854621881397378e-05, "loss": 0.5983, "step": 10934 }, { "epoch": 35.85245901639344, "grad_norm": 4.150369644165039, "learning_rate": 1.4853693491190729e-05, "loss": 0.4663, "step": 10935 }, { "epoch": 35.855737704918035, "grad_norm": 4.6243085861206055, "learning_rate": 1.4852765046253637e-05, "loss": 0.4016, "step": 10936 }, { "epoch": 35.85901639344262, "grad_norm": 4.517592906951904, "learning_rate": 1.4851836546596574e-05, "loss": 0.4636, "step": 10937 }, { "epoch": 35.86229508196721, "grad_norm": 4.3102521896362305, "learning_rate": 1.485090799223001e-05, "loss": 0.5057, "step": 10938 }, { "epoch": 35.86557377049181, "grad_norm": 4.255703926086426, "learning_rate": 1.4849979383164415e-05, "loss": 0.3994, "step": 10939 }, { "epoch": 35.868852459016395, "grad_norm": 5.478427410125732, "learning_rate": 1.4849050719410259e-05, "loss": 0.5591, "step": 10940 }, { "epoch": 35.87213114754098, "grad_norm": 3.9984288215637207, "learning_rate": 1.4848122000978014e-05, "loss": 0.3213, "step": 10941 }, { "epoch": 35.87540983606557, "grad_norm": 4.938858509063721, "learning_rate": 1.4847193227878151e-05, "loss": 0.6592, "step": 10942 }, { "epoch": 35.87868852459017, "grad_norm": 5.275289058685303, "learning_rate": 1.4846264400121148e-05, "loss": 0.4344, "step": 10943 }, { "epoch": 35.881967213114756, "grad_norm": 5.178070545196533, "learning_rate": 1.4845335517717472e-05, "loss": 0.4426, "step": 10944 }, { "epoch": 35.885245901639344, "grad_norm": 6.637628555297852, "learning_rate": 1.4844406580677604e-05, "loss": 0.5776, "step": 10945 }, { "epoch": 35.88852459016393, "grad_norm": 4.528432369232178, "learning_rate": 1.4843477589012012e-05, "loss": 0.5149, "step": 10946 }, { "epoch": 35.89180327868853, "grad_norm": 3.6343069076538086, "learning_rate": 1.4842548542731172e-05, "loss": 0.6171, "step": 10947 }, { "epoch": 35.895081967213116, "grad_norm": 6.671514987945557, "learning_rate": 1.4841619441845568e-05, "loss": 0.3967, "step": 10948 }, { "epoch": 35.898360655737704, "grad_norm": 4.217695713043213, "learning_rate": 1.4840690286365669e-05, "loss": 0.5043, "step": 10949 }, { "epoch": 35.90163934426229, "grad_norm": 4.254124164581299, "learning_rate": 1.483976107630195e-05, "loss": 0.6951, "step": 10950 }, { "epoch": 35.90491803278689, "grad_norm": 4.4537434577941895, "learning_rate": 1.4838831811664898e-05, "loss": 0.6118, "step": 10951 }, { "epoch": 35.90819672131148, "grad_norm": 4.413791656494141, "learning_rate": 1.4837902492464982e-05, "loss": 0.5592, "step": 10952 }, { "epoch": 35.911475409836065, "grad_norm": 3.752821445465088, "learning_rate": 1.4836973118712687e-05, "loss": 0.7776, "step": 10953 }, { "epoch": 35.91475409836065, "grad_norm": 4.127963542938232, "learning_rate": 1.483604369041849e-05, "loss": 0.4188, "step": 10954 }, { "epoch": 35.91803278688525, "grad_norm": 4.596115589141846, "learning_rate": 1.4835114207592876e-05, "loss": 0.4993, "step": 10955 }, { "epoch": 35.92131147540984, "grad_norm": 5.119013786315918, "learning_rate": 1.483418467024632e-05, "loss": 0.6184, "step": 10956 }, { "epoch": 35.924590163934425, "grad_norm": 4.281917095184326, "learning_rate": 1.4833255078389304e-05, "loss": 0.4373, "step": 10957 }, { "epoch": 35.927868852459014, "grad_norm": 4.983221530914307, "learning_rate": 1.4832325432032311e-05, "loss": 0.4386, "step": 10958 }, { "epoch": 35.93114754098361, "grad_norm": 4.31329345703125, "learning_rate": 1.4831395731185826e-05, "loss": 0.5467, "step": 10959 }, { "epoch": 35.9344262295082, "grad_norm": 4.014253616333008, "learning_rate": 1.483046597586033e-05, "loss": 0.4649, "step": 10960 }, { "epoch": 35.937704918032786, "grad_norm": 4.4738030433654785, "learning_rate": 1.482953616606631e-05, "loss": 0.4514, "step": 10961 }, { "epoch": 35.940983606557374, "grad_norm": 4.436838150024414, "learning_rate": 1.4828606301814245e-05, "loss": 0.6385, "step": 10962 }, { "epoch": 35.94426229508197, "grad_norm": 4.446076393127441, "learning_rate": 1.4827676383114626e-05, "loss": 0.5223, "step": 10963 }, { "epoch": 35.94754098360656, "grad_norm": 3.8520519733428955, "learning_rate": 1.4826746409977937e-05, "loss": 0.421, "step": 10964 }, { "epoch": 35.950819672131146, "grad_norm": 3.587106466293335, "learning_rate": 1.4825816382414659e-05, "loss": 0.3473, "step": 10965 }, { "epoch": 35.954098360655735, "grad_norm": 4.223624229431152, "learning_rate": 1.4824886300435288e-05, "loss": 0.4406, "step": 10966 }, { "epoch": 35.95737704918033, "grad_norm": 4.583962917327881, "learning_rate": 1.4823956164050306e-05, "loss": 0.6412, "step": 10967 }, { "epoch": 35.96065573770492, "grad_norm": 4.643543720245361, "learning_rate": 1.4823025973270201e-05, "loss": 0.7106, "step": 10968 }, { "epoch": 35.96393442622951, "grad_norm": 4.04338264465332, "learning_rate": 1.4822095728105465e-05, "loss": 0.345, "step": 10969 }, { "epoch": 35.967213114754095, "grad_norm": 5.075414657592773, "learning_rate": 1.4821165428566586e-05, "loss": 0.6005, "step": 10970 }, { "epoch": 35.97049180327869, "grad_norm": 4.570501327514648, "learning_rate": 1.4820235074664055e-05, "loss": 0.6211, "step": 10971 }, { "epoch": 35.97377049180328, "grad_norm": 4.294088840484619, "learning_rate": 1.481930466640836e-05, "loss": 0.5058, "step": 10972 }, { "epoch": 35.97704918032787, "grad_norm": 4.043171405792236, "learning_rate": 1.4818374203809994e-05, "loss": 0.5923, "step": 10973 }, { "epoch": 35.980327868852456, "grad_norm": 5.143324851989746, "learning_rate": 1.4817443686879452e-05, "loss": 0.4129, "step": 10974 }, { "epoch": 35.98360655737705, "grad_norm": 4.519105434417725, "learning_rate": 1.4816513115627221e-05, "loss": 0.2982, "step": 10975 }, { "epoch": 35.98688524590164, "grad_norm": 3.489655017852783, "learning_rate": 1.4815582490063795e-05, "loss": 0.6348, "step": 10976 }, { "epoch": 35.99016393442623, "grad_norm": 5.273484230041504, "learning_rate": 1.4814651810199672e-05, "loss": 0.4438, "step": 10977 }, { "epoch": 35.993442622950816, "grad_norm": 4.966569900512695, "learning_rate": 1.4813721076045342e-05, "loss": 0.4592, "step": 10978 }, { "epoch": 35.99672131147541, "grad_norm": 3.879953145980835, "learning_rate": 1.4812790287611305e-05, "loss": 0.4735, "step": 10979 }, { "epoch": 36.0, "grad_norm": 4.264014720916748, "learning_rate": 1.4811859444908053e-05, "loss": 0.3314, "step": 10980 }, { "epoch": 36.00327868852459, "grad_norm": 4.296621322631836, "learning_rate": 1.481092854794608e-05, "loss": 0.4463, "step": 10981 }, { "epoch": 36.006557377049184, "grad_norm": 3.8781237602233887, "learning_rate": 1.4809997596735888e-05, "loss": 0.4967, "step": 10982 }, { "epoch": 36.00983606557377, "grad_norm": 4.732931613922119, "learning_rate": 1.480906659128797e-05, "loss": 0.6717, "step": 10983 }, { "epoch": 36.01311475409836, "grad_norm": 4.425993919372559, "learning_rate": 1.4808135531612827e-05, "loss": 0.4521, "step": 10984 }, { "epoch": 36.01639344262295, "grad_norm": 3.696932077407837, "learning_rate": 1.4807204417720958e-05, "loss": 0.3497, "step": 10985 }, { "epoch": 36.019672131147544, "grad_norm": 4.116261005401611, "learning_rate": 1.4806273249622858e-05, "loss": 0.4978, "step": 10986 }, { "epoch": 36.02295081967213, "grad_norm": 4.521469593048096, "learning_rate": 1.480534202732903e-05, "loss": 0.3966, "step": 10987 }, { "epoch": 36.02622950819672, "grad_norm": 4.748873710632324, "learning_rate": 1.4804410750849978e-05, "loss": 0.6097, "step": 10988 }, { "epoch": 36.02950819672131, "grad_norm": 4.1369733810424805, "learning_rate": 1.4803479420196197e-05, "loss": 0.3327, "step": 10989 }, { "epoch": 36.032786885245905, "grad_norm": 5.255887508392334, "learning_rate": 1.480254803537819e-05, "loss": 0.5468, "step": 10990 }, { "epoch": 36.03606557377049, "grad_norm": 3.894751787185669, "learning_rate": 1.480161659640646e-05, "loss": 0.3571, "step": 10991 }, { "epoch": 36.03934426229508, "grad_norm": 4.6148905754089355, "learning_rate": 1.4800685103291515e-05, "loss": 0.4858, "step": 10992 }, { "epoch": 36.04262295081967, "grad_norm": 3.656649589538574, "learning_rate": 1.479975355604385e-05, "loss": 0.3945, "step": 10993 }, { "epoch": 36.045901639344265, "grad_norm": 4.4894256591796875, "learning_rate": 1.4798821954673974e-05, "loss": 0.5069, "step": 10994 }, { "epoch": 36.049180327868854, "grad_norm": 3.6931052207946777, "learning_rate": 1.4797890299192392e-05, "loss": 0.6599, "step": 10995 }, { "epoch": 36.05245901639344, "grad_norm": 4.275323867797852, "learning_rate": 1.4796958589609608e-05, "loss": 0.5669, "step": 10996 }, { "epoch": 36.05573770491803, "grad_norm": 4.947656154632568, "learning_rate": 1.4796026825936128e-05, "loss": 0.649, "step": 10997 }, { "epoch": 36.059016393442626, "grad_norm": 4.994645595550537, "learning_rate": 1.4795095008182458e-05, "loss": 0.3119, "step": 10998 }, { "epoch": 36.062295081967214, "grad_norm": 4.481091022491455, "learning_rate": 1.4794163136359108e-05, "loss": 0.463, "step": 10999 }, { "epoch": 36.0655737704918, "grad_norm": 3.791480779647827, "learning_rate": 1.4793231210476582e-05, "loss": 0.2936, "step": 11000 }, { "epoch": 36.06885245901639, "grad_norm": 3.6547207832336426, "learning_rate": 1.4792299230545393e-05, "loss": 0.3333, "step": 11001 }, { "epoch": 36.072131147540986, "grad_norm": 4.011898040771484, "learning_rate": 1.4791367196576044e-05, "loss": 0.4498, "step": 11002 }, { "epoch": 36.075409836065575, "grad_norm": 3.859243392944336, "learning_rate": 1.4790435108579048e-05, "loss": 0.218, "step": 11003 }, { "epoch": 36.07868852459016, "grad_norm": 4.181240081787109, "learning_rate": 1.4789502966564917e-05, "loss": 0.3711, "step": 11004 }, { "epoch": 36.08196721311475, "grad_norm": 4.138740539550781, "learning_rate": 1.4788570770544163e-05, "loss": 0.5585, "step": 11005 }, { "epoch": 36.08524590163935, "grad_norm": 4.364659786224365, "learning_rate": 1.4787638520527292e-05, "loss": 0.4582, "step": 11006 }, { "epoch": 36.088524590163935, "grad_norm": 4.080654621124268, "learning_rate": 1.4786706216524818e-05, "loss": 0.4078, "step": 11007 }, { "epoch": 36.09180327868852, "grad_norm": 4.726481914520264, "learning_rate": 1.4785773858547255e-05, "loss": 0.4145, "step": 11008 }, { "epoch": 36.09508196721311, "grad_norm": 4.256450176239014, "learning_rate": 1.4784841446605116e-05, "loss": 0.3924, "step": 11009 }, { "epoch": 36.09836065573771, "grad_norm": 4.072765827178955, "learning_rate": 1.4783908980708917e-05, "loss": 0.4802, "step": 11010 }, { "epoch": 36.101639344262296, "grad_norm": 3.80104398727417, "learning_rate": 1.4782976460869168e-05, "loss": 0.6868, "step": 11011 }, { "epoch": 36.104918032786884, "grad_norm": 3.9327480792999268, "learning_rate": 1.4782043887096385e-05, "loss": 0.6776, "step": 11012 }, { "epoch": 36.10819672131147, "grad_norm": 3.716130256652832, "learning_rate": 1.4781111259401087e-05, "loss": 0.4914, "step": 11013 }, { "epoch": 36.11147540983607, "grad_norm": 4.4371337890625, "learning_rate": 1.4780178577793789e-05, "loss": 0.5394, "step": 11014 }, { "epoch": 36.114754098360656, "grad_norm": 4.956510543823242, "learning_rate": 1.4779245842285006e-05, "loss": 0.6161, "step": 11015 }, { "epoch": 36.118032786885244, "grad_norm": 3.8801794052124023, "learning_rate": 1.4778313052885259e-05, "loss": 0.5093, "step": 11016 }, { "epoch": 36.12131147540983, "grad_norm": 3.7743022441864014, "learning_rate": 1.477738020960506e-05, "loss": 0.3692, "step": 11017 }, { "epoch": 36.12459016393443, "grad_norm": 3.950601816177368, "learning_rate": 1.4776447312454937e-05, "loss": 0.6859, "step": 11018 }, { "epoch": 36.12786885245902, "grad_norm": 4.588886737823486, "learning_rate": 1.47755143614454e-05, "loss": 0.5566, "step": 11019 }, { "epoch": 36.131147540983605, "grad_norm": 4.811233997344971, "learning_rate": 1.4774581356586975e-05, "loss": 0.4925, "step": 11020 }, { "epoch": 36.13442622950819, "grad_norm": 4.481309413909912, "learning_rate": 1.477364829789018e-05, "loss": 0.4569, "step": 11021 }, { "epoch": 36.13770491803279, "grad_norm": 4.995378494262695, "learning_rate": 1.4772715185365538e-05, "loss": 0.5066, "step": 11022 }, { "epoch": 36.14098360655738, "grad_norm": 4.486570358276367, "learning_rate": 1.4771782019023571e-05, "loss": 0.3724, "step": 11023 }, { "epoch": 36.144262295081965, "grad_norm": 3.887489080429077, "learning_rate": 1.4770848798874796e-05, "loss": 0.4069, "step": 11024 }, { "epoch": 36.14754098360656, "grad_norm": 4.19795560836792, "learning_rate": 1.4769915524929745e-05, "loss": 0.2524, "step": 11025 }, { "epoch": 36.15081967213115, "grad_norm": 4.15377140045166, "learning_rate": 1.4768982197198936e-05, "loss": 0.4563, "step": 11026 }, { "epoch": 36.15409836065574, "grad_norm": 4.725667953491211, "learning_rate": 1.4768048815692892e-05, "loss": 0.3806, "step": 11027 }, { "epoch": 36.157377049180326, "grad_norm": 4.340325832366943, "learning_rate": 1.4767115380422143e-05, "loss": 0.5222, "step": 11028 }, { "epoch": 36.16065573770492, "grad_norm": 5.279648780822754, "learning_rate": 1.4766181891397212e-05, "loss": 0.5891, "step": 11029 }, { "epoch": 36.16393442622951, "grad_norm": 4.448694705963135, "learning_rate": 1.476524834862862e-05, "loss": 0.5174, "step": 11030 }, { "epoch": 36.1672131147541, "grad_norm": 4.226524353027344, "learning_rate": 1.4764314752126902e-05, "loss": 0.4982, "step": 11031 }, { "epoch": 36.170491803278686, "grad_norm": 4.256301403045654, "learning_rate": 1.4763381101902581e-05, "loss": 0.4818, "step": 11032 }, { "epoch": 36.17377049180328, "grad_norm": 4.104901313781738, "learning_rate": 1.4762447397966187e-05, "loss": 0.4499, "step": 11033 }, { "epoch": 36.17704918032787, "grad_norm": 3.8181631565093994, "learning_rate": 1.4761513640328243e-05, "loss": 0.3365, "step": 11034 }, { "epoch": 36.18032786885246, "grad_norm": 3.566758632659912, "learning_rate": 1.4760579828999284e-05, "loss": 0.5019, "step": 11035 }, { "epoch": 36.18360655737705, "grad_norm": 4.098773956298828, "learning_rate": 1.4759645963989837e-05, "loss": 0.2462, "step": 11036 }, { "epoch": 36.18688524590164, "grad_norm": 4.744250774383545, "learning_rate": 1.4758712045310434e-05, "loss": 0.3967, "step": 11037 }, { "epoch": 36.19016393442623, "grad_norm": 4.356387138366699, "learning_rate": 1.4757778072971605e-05, "loss": 0.438, "step": 11038 }, { "epoch": 36.19344262295082, "grad_norm": 4.460747241973877, "learning_rate": 1.475684404698388e-05, "loss": 0.426, "step": 11039 }, { "epoch": 36.19672131147541, "grad_norm": 4.623227596282959, "learning_rate": 1.4755909967357796e-05, "loss": 0.5898, "step": 11040 }, { "epoch": 36.2, "grad_norm": 4.34638786315918, "learning_rate": 1.4754975834103877e-05, "loss": 0.5768, "step": 11041 }, { "epoch": 36.20327868852459, "grad_norm": 4.2591233253479, "learning_rate": 1.4754041647232666e-05, "loss": 0.5832, "step": 11042 }, { "epoch": 36.20655737704918, "grad_norm": 4.463833808898926, "learning_rate": 1.475310740675469e-05, "loss": 0.494, "step": 11043 }, { "epoch": 36.20983606557377, "grad_norm": 5.534736156463623, "learning_rate": 1.4752173112680485e-05, "loss": 0.3797, "step": 11044 }, { "epoch": 36.21311475409836, "grad_norm": 4.429696559906006, "learning_rate": 1.475123876502059e-05, "loss": 0.4445, "step": 11045 }, { "epoch": 36.21639344262295, "grad_norm": 4.066906452178955, "learning_rate": 1.4750304363785537e-05, "loss": 0.4542, "step": 11046 }, { "epoch": 36.21967213114754, "grad_norm": 4.0434794425964355, "learning_rate": 1.4749369908985862e-05, "loss": 0.4435, "step": 11047 }, { "epoch": 36.22295081967213, "grad_norm": 4.354463577270508, "learning_rate": 1.4748435400632107e-05, "loss": 0.2946, "step": 11048 }, { "epoch": 36.226229508196724, "grad_norm": 4.126436233520508, "learning_rate": 1.47475008387348e-05, "loss": 0.7244, "step": 11049 }, { "epoch": 36.22950819672131, "grad_norm": 4.802468299865723, "learning_rate": 1.4746566223304488e-05, "loss": 0.4134, "step": 11050 }, { "epoch": 36.2327868852459, "grad_norm": 7.31674861907959, "learning_rate": 1.4745631554351705e-05, "loss": 0.4319, "step": 11051 }, { "epoch": 36.23606557377049, "grad_norm": 4.4865241050720215, "learning_rate": 1.4744696831886994e-05, "loss": 0.6354, "step": 11052 }, { "epoch": 36.239344262295084, "grad_norm": 4.63390588760376, "learning_rate": 1.4743762055920892e-05, "loss": 0.3086, "step": 11053 }, { "epoch": 36.24262295081967, "grad_norm": 4.648349285125732, "learning_rate": 1.4742827226463941e-05, "loss": 0.5617, "step": 11054 }, { "epoch": 36.24590163934426, "grad_norm": 4.946567058563232, "learning_rate": 1.474189234352668e-05, "loss": 0.2461, "step": 11055 }, { "epoch": 36.24918032786885, "grad_norm": 4.494657039642334, "learning_rate": 1.4740957407119653e-05, "loss": 0.3995, "step": 11056 }, { "epoch": 36.252459016393445, "grad_norm": 4.831202983856201, "learning_rate": 1.4740022417253403e-05, "loss": 0.561, "step": 11057 }, { "epoch": 36.25573770491803, "grad_norm": 4.422691345214844, "learning_rate": 1.4739087373938472e-05, "loss": 0.6476, "step": 11058 }, { "epoch": 36.25901639344262, "grad_norm": 4.538552284240723, "learning_rate": 1.4738152277185404e-05, "loss": 0.4747, "step": 11059 }, { "epoch": 36.26229508196721, "grad_norm": 3.7717511653900146, "learning_rate": 1.4737217127004742e-05, "loss": 0.4104, "step": 11060 }, { "epoch": 36.265573770491805, "grad_norm": 4.471156120300293, "learning_rate": 1.4736281923407033e-05, "loss": 0.36, "step": 11061 }, { "epoch": 36.268852459016394, "grad_norm": 4.545731544494629, "learning_rate": 1.4735346666402817e-05, "loss": 0.4062, "step": 11062 }, { "epoch": 36.27213114754098, "grad_norm": 4.0168633460998535, "learning_rate": 1.4734411356002644e-05, "loss": 0.5666, "step": 11063 }, { "epoch": 36.27540983606557, "grad_norm": 3.7809765338897705, "learning_rate": 1.4733475992217063e-05, "loss": 0.4612, "step": 11064 }, { "epoch": 36.278688524590166, "grad_norm": 4.892650127410889, "learning_rate": 1.473254057505662e-05, "loss": 0.3992, "step": 11065 }, { "epoch": 36.281967213114754, "grad_norm": 7.981266021728516, "learning_rate": 1.4731605104531858e-05, "loss": 0.4145, "step": 11066 }, { "epoch": 36.28524590163934, "grad_norm": 4.7141618728637695, "learning_rate": 1.473066958065333e-05, "loss": 0.5244, "step": 11067 }, { "epoch": 36.28852459016394, "grad_norm": 4.892693519592285, "learning_rate": 1.4729734003431583e-05, "loss": 0.4963, "step": 11068 }, { "epoch": 36.291803278688526, "grad_norm": 3.628199577331543, "learning_rate": 1.472879837287717e-05, "loss": 0.3646, "step": 11069 }, { "epoch": 36.295081967213115, "grad_norm": 3.735703229904175, "learning_rate": 1.4727862689000636e-05, "loss": 0.3684, "step": 11070 }, { "epoch": 36.2983606557377, "grad_norm": 4.502073287963867, "learning_rate": 1.4726926951812535e-05, "loss": 0.4103, "step": 11071 }, { "epoch": 36.3016393442623, "grad_norm": 3.9212827682495117, "learning_rate": 1.4725991161323418e-05, "loss": 0.4429, "step": 11072 }, { "epoch": 36.30491803278689, "grad_norm": 4.154329776763916, "learning_rate": 1.4725055317543837e-05, "loss": 0.2253, "step": 11073 }, { "epoch": 36.308196721311475, "grad_norm": 4.052835464477539, "learning_rate": 1.4724119420484347e-05, "loss": 0.3165, "step": 11074 }, { "epoch": 36.31147540983606, "grad_norm": 3.428917407989502, "learning_rate": 1.4723183470155494e-05, "loss": 0.3964, "step": 11075 }, { "epoch": 36.31475409836066, "grad_norm": 4.068251132965088, "learning_rate": 1.4722247466567838e-05, "loss": 0.411, "step": 11076 }, { "epoch": 36.31803278688525, "grad_norm": 3.8138513565063477, "learning_rate": 1.4721311409731933e-05, "loss": 0.4739, "step": 11077 }, { "epoch": 36.321311475409836, "grad_norm": 4.112339973449707, "learning_rate": 1.4720375299658334e-05, "loss": 0.2563, "step": 11078 }, { "epoch": 36.324590163934424, "grad_norm": 5.3617119789123535, "learning_rate": 1.4719439136357592e-05, "loss": 0.5172, "step": 11079 }, { "epoch": 36.32786885245902, "grad_norm": 4.303701400756836, "learning_rate": 1.4718502919840268e-05, "loss": 0.4926, "step": 11080 }, { "epoch": 36.33114754098361, "grad_norm": 5.097962856292725, "learning_rate": 1.471756665011692e-05, "loss": 0.3166, "step": 11081 }, { "epoch": 36.334426229508196, "grad_norm": 3.801459550857544, "learning_rate": 1.47166303271981e-05, "loss": 0.7051, "step": 11082 }, { "epoch": 36.337704918032784, "grad_norm": 4.045881748199463, "learning_rate": 1.471569395109437e-05, "loss": 0.3747, "step": 11083 }, { "epoch": 36.34098360655738, "grad_norm": 3.8025131225585938, "learning_rate": 1.4714757521816288e-05, "loss": 0.2937, "step": 11084 }, { "epoch": 36.34426229508197, "grad_norm": 4.488104343414307, "learning_rate": 1.4713821039374413e-05, "loss": 0.4939, "step": 11085 }, { "epoch": 36.34754098360656, "grad_norm": 3.9685611724853516, "learning_rate": 1.4712884503779304e-05, "loss": 0.4534, "step": 11086 }, { "epoch": 36.350819672131145, "grad_norm": 4.327630043029785, "learning_rate": 1.4711947915041522e-05, "loss": 0.3212, "step": 11087 }, { "epoch": 36.35409836065574, "grad_norm": 4.234482288360596, "learning_rate": 1.471101127317163e-05, "loss": 0.3379, "step": 11088 }, { "epoch": 36.35737704918033, "grad_norm": 4.388004779815674, "learning_rate": 1.4710074578180184e-05, "loss": 0.4558, "step": 11089 }, { "epoch": 36.36065573770492, "grad_norm": 4.061814785003662, "learning_rate": 1.4709137830077754e-05, "loss": 0.4147, "step": 11090 }, { "epoch": 36.363934426229505, "grad_norm": 4.050270080566406, "learning_rate": 1.4708201028874896e-05, "loss": 0.6053, "step": 11091 }, { "epoch": 36.3672131147541, "grad_norm": 3.762735605239868, "learning_rate": 1.4707264174582178e-05, "loss": 0.3873, "step": 11092 }, { "epoch": 36.37049180327869, "grad_norm": 3.978452205657959, "learning_rate": 1.470632726721016e-05, "loss": 0.4194, "step": 11093 }, { "epoch": 36.37377049180328, "grad_norm": 4.163637638092041, "learning_rate": 1.4705390306769412e-05, "loss": 0.2985, "step": 11094 }, { "epoch": 36.377049180327866, "grad_norm": 4.181921005249023, "learning_rate": 1.470445329327049e-05, "loss": 0.4476, "step": 11095 }, { "epoch": 36.38032786885246, "grad_norm": 4.853713512420654, "learning_rate": 1.4703516226723974e-05, "loss": 0.6374, "step": 11096 }, { "epoch": 36.38360655737705, "grad_norm": 4.462122917175293, "learning_rate": 1.4702579107140413e-05, "loss": 0.6253, "step": 11097 }, { "epoch": 36.38688524590164, "grad_norm": 4.243595600128174, "learning_rate": 1.470164193453039e-05, "loss": 0.5882, "step": 11098 }, { "epoch": 36.390163934426226, "grad_norm": 3.9791245460510254, "learning_rate": 1.4700704708904461e-05, "loss": 0.4657, "step": 11099 }, { "epoch": 36.39344262295082, "grad_norm": 4.0507426261901855, "learning_rate": 1.4699767430273202e-05, "loss": 0.598, "step": 11100 }, { "epoch": 36.39672131147541, "grad_norm": 4.182534694671631, "learning_rate": 1.469883009864718e-05, "loss": 0.3346, "step": 11101 }, { "epoch": 36.4, "grad_norm": 3.843161106109619, "learning_rate": 1.4697892714036959e-05, "loss": 0.4184, "step": 11102 }, { "epoch": 36.40327868852459, "grad_norm": 4.082205772399902, "learning_rate": 1.4696955276453113e-05, "loss": 0.662, "step": 11103 }, { "epoch": 36.40655737704918, "grad_norm": 4.1253509521484375, "learning_rate": 1.4696017785906218e-05, "loss": 0.4737, "step": 11104 }, { "epoch": 36.40983606557377, "grad_norm": 9.009295463562012, "learning_rate": 1.4695080242406834e-05, "loss": 0.3122, "step": 11105 }, { "epoch": 36.41311475409836, "grad_norm": 3.9068987369537354, "learning_rate": 1.469414264596554e-05, "loss": 0.6443, "step": 11106 }, { "epoch": 36.41639344262295, "grad_norm": 4.9808526039123535, "learning_rate": 1.4693204996592909e-05, "loss": 0.4484, "step": 11107 }, { "epoch": 36.41967213114754, "grad_norm": 4.6973557472229, "learning_rate": 1.4692267294299512e-05, "loss": 0.5202, "step": 11108 }, { "epoch": 36.42295081967213, "grad_norm": 5.005378246307373, "learning_rate": 1.469132953909592e-05, "loss": 0.3517, "step": 11109 }, { "epoch": 36.42622950819672, "grad_norm": 4.074984550476074, "learning_rate": 1.4690391730992711e-05, "loss": 0.4344, "step": 11110 }, { "epoch": 36.429508196721315, "grad_norm": 4.110089302062988, "learning_rate": 1.4689453870000461e-05, "loss": 0.5331, "step": 11111 }, { "epoch": 36.4327868852459, "grad_norm": 4.313088417053223, "learning_rate": 1.4688515956129738e-05, "loss": 0.4086, "step": 11112 }, { "epoch": 36.43606557377049, "grad_norm": 4.134970188140869, "learning_rate": 1.4687577989391127e-05, "loss": 0.4302, "step": 11113 }, { "epoch": 36.43934426229508, "grad_norm": 4.426980972290039, "learning_rate": 1.4686639969795199e-05, "loss": 0.511, "step": 11114 }, { "epoch": 36.442622950819676, "grad_norm": 3.546196222305298, "learning_rate": 1.4685701897352532e-05, "loss": 0.5797, "step": 11115 }, { "epoch": 36.445901639344264, "grad_norm": 4.67168664932251, "learning_rate": 1.4684763772073702e-05, "loss": 0.4253, "step": 11116 }, { "epoch": 36.44918032786885, "grad_norm": 4.129098415374756, "learning_rate": 1.4683825593969293e-05, "loss": 0.4968, "step": 11117 }, { "epoch": 36.45245901639344, "grad_norm": 3.6079516410827637, "learning_rate": 1.468288736304988e-05, "loss": 0.2924, "step": 11118 }, { "epoch": 36.455737704918036, "grad_norm": 3.8812639713287354, "learning_rate": 1.4681949079326041e-05, "loss": 0.3641, "step": 11119 }, { "epoch": 36.459016393442624, "grad_norm": 4.570429801940918, "learning_rate": 1.468101074280836e-05, "loss": 0.5644, "step": 11120 }, { "epoch": 36.46229508196721, "grad_norm": 4.327184677124023, "learning_rate": 1.4680072353507414e-05, "loss": 0.4697, "step": 11121 }, { "epoch": 36.4655737704918, "grad_norm": 4.013756275177002, "learning_rate": 1.4679133911433789e-05, "loss": 0.6063, "step": 11122 }, { "epoch": 36.4688524590164, "grad_norm": 3.8453609943389893, "learning_rate": 1.467819541659806e-05, "loss": 0.2176, "step": 11123 }, { "epoch": 36.472131147540985, "grad_norm": 4.375153064727783, "learning_rate": 1.4677256869010817e-05, "loss": 0.5802, "step": 11124 }, { "epoch": 36.47540983606557, "grad_norm": 3.630250930786133, "learning_rate": 1.4676318268682637e-05, "loss": 0.4499, "step": 11125 }, { "epoch": 36.47868852459016, "grad_norm": 8.210724830627441, "learning_rate": 1.467537961562411e-05, "loss": 0.3299, "step": 11126 }, { "epoch": 36.48196721311476, "grad_norm": 4.596469879150391, "learning_rate": 1.4674440909845813e-05, "loss": 0.3663, "step": 11127 }, { "epoch": 36.485245901639345, "grad_norm": 4.251748085021973, "learning_rate": 1.4673502151358338e-05, "loss": 0.475, "step": 11128 }, { "epoch": 36.488524590163934, "grad_norm": 3.853239059448242, "learning_rate": 1.4672563340172265e-05, "loss": 0.4046, "step": 11129 }, { "epoch": 36.49180327868852, "grad_norm": 4.860714912414551, "learning_rate": 1.4671624476298182e-05, "loss": 0.4787, "step": 11130 }, { "epoch": 36.49508196721312, "grad_norm": 4.5297040939331055, "learning_rate": 1.4670685559746675e-05, "loss": 0.7245, "step": 11131 }, { "epoch": 36.498360655737706, "grad_norm": 6.805135250091553, "learning_rate": 1.4669746590528334e-05, "loss": 0.5631, "step": 11132 }, { "epoch": 36.501639344262294, "grad_norm": 4.280125617980957, "learning_rate": 1.4668807568653743e-05, "loss": 0.3236, "step": 11133 }, { "epoch": 36.50491803278688, "grad_norm": 4.510365962982178, "learning_rate": 1.4667868494133493e-05, "loss": 0.544, "step": 11134 }, { "epoch": 36.50819672131148, "grad_norm": 3.9411160945892334, "learning_rate": 1.4666929366978172e-05, "loss": 0.29, "step": 11135 }, { "epoch": 36.511475409836066, "grad_norm": 4.512916564941406, "learning_rate": 1.4665990187198372e-05, "loss": 0.5499, "step": 11136 }, { "epoch": 36.514754098360655, "grad_norm": 4.371188163757324, "learning_rate": 1.4665050954804679e-05, "loss": 0.535, "step": 11137 }, { "epoch": 36.51803278688524, "grad_norm": 3.583937644958496, "learning_rate": 1.4664111669807686e-05, "loss": 0.2214, "step": 11138 }, { "epoch": 36.52131147540984, "grad_norm": 4.303656101226807, "learning_rate": 1.4663172332217986e-05, "loss": 0.3785, "step": 11139 }, { "epoch": 36.52459016393443, "grad_norm": 4.243200302124023, "learning_rate": 1.4662232942046169e-05, "loss": 0.5144, "step": 11140 }, { "epoch": 36.527868852459015, "grad_norm": 4.147496700286865, "learning_rate": 1.466129349930283e-05, "loss": 0.484, "step": 11141 }, { "epoch": 36.5311475409836, "grad_norm": 4.519460678100586, "learning_rate": 1.4660354003998558e-05, "loss": 0.4983, "step": 11142 }, { "epoch": 36.5344262295082, "grad_norm": 4.45239782333374, "learning_rate": 1.4659414456143948e-05, "loss": 0.4074, "step": 11143 }, { "epoch": 36.53770491803279, "grad_norm": 5.160368919372559, "learning_rate": 1.4658474855749597e-05, "loss": 0.6279, "step": 11144 }, { "epoch": 36.540983606557376, "grad_norm": 4.8564772605896, "learning_rate": 1.4657535202826097e-05, "loss": 0.5384, "step": 11145 }, { "epoch": 36.544262295081964, "grad_norm": 3.333672285079956, "learning_rate": 1.4656595497384048e-05, "loss": 0.4167, "step": 11146 }, { "epoch": 36.54754098360656, "grad_norm": 4.733686923980713, "learning_rate": 1.4655655739434038e-05, "loss": 0.5302, "step": 11147 }, { "epoch": 36.55081967213115, "grad_norm": 3.7586724758148193, "learning_rate": 1.4654715928986673e-05, "loss": 0.6507, "step": 11148 }, { "epoch": 36.554098360655736, "grad_norm": 5.326087951660156, "learning_rate": 1.4653776066052545e-05, "loss": 0.4485, "step": 11149 }, { "epoch": 36.557377049180324, "grad_norm": 4.4190354347229, "learning_rate": 1.4652836150642253e-05, "loss": 0.5346, "step": 11150 }, { "epoch": 36.56065573770492, "grad_norm": 4.329877853393555, "learning_rate": 1.4651896182766396e-05, "loss": 0.5857, "step": 11151 }, { "epoch": 36.56393442622951, "grad_norm": 4.446213722229004, "learning_rate": 1.4650956162435575e-05, "loss": 0.5107, "step": 11152 }, { "epoch": 36.5672131147541, "grad_norm": 4.938108921051025, "learning_rate": 1.4650016089660383e-05, "loss": 0.3912, "step": 11153 }, { "epoch": 36.570491803278685, "grad_norm": 3.732754707336426, "learning_rate": 1.464907596445143e-05, "loss": 0.4681, "step": 11154 }, { "epoch": 36.57377049180328, "grad_norm": 4.038129806518555, "learning_rate": 1.4648135786819308e-05, "loss": 0.6658, "step": 11155 }, { "epoch": 36.57704918032787, "grad_norm": 5.12061071395874, "learning_rate": 1.4647195556774623e-05, "loss": 0.4122, "step": 11156 }, { "epoch": 36.58032786885246, "grad_norm": 3.7538368701934814, "learning_rate": 1.4646255274327974e-05, "loss": 0.3697, "step": 11157 }, { "epoch": 36.58360655737705, "grad_norm": 3.8815650939941406, "learning_rate": 1.464531493948997e-05, "loss": 0.549, "step": 11158 }, { "epoch": 36.58688524590164, "grad_norm": 4.213329792022705, "learning_rate": 1.4644374552271208e-05, "loss": 0.7256, "step": 11159 }, { "epoch": 36.59016393442623, "grad_norm": 3.8618221282958984, "learning_rate": 1.4643434112682293e-05, "loss": 0.2292, "step": 11160 }, { "epoch": 36.59344262295082, "grad_norm": 4.2762861251831055, "learning_rate": 1.4642493620733831e-05, "loss": 0.3919, "step": 11161 }, { "epoch": 36.59672131147541, "grad_norm": 4.371321678161621, "learning_rate": 1.4641553076436426e-05, "loss": 0.6805, "step": 11162 }, { "epoch": 36.6, "grad_norm": 4.060225009918213, "learning_rate": 1.4640612479800686e-05, "loss": 0.4353, "step": 11163 }, { "epoch": 36.60327868852459, "grad_norm": 3.9673571586608887, "learning_rate": 1.4639671830837214e-05, "loss": 0.5695, "step": 11164 }, { "epoch": 36.60655737704918, "grad_norm": 4.6624555587768555, "learning_rate": 1.4638731129556617e-05, "loss": 0.5059, "step": 11165 }, { "epoch": 36.609836065573774, "grad_norm": 7.154501438140869, "learning_rate": 1.4637790375969506e-05, "loss": 0.6064, "step": 11166 }, { "epoch": 36.61311475409836, "grad_norm": 8.186904907226562, "learning_rate": 1.4636849570086487e-05, "loss": 0.3464, "step": 11167 }, { "epoch": 36.61639344262295, "grad_norm": 4.187107086181641, "learning_rate": 1.4635908711918165e-05, "loss": 0.4303, "step": 11168 }, { "epoch": 36.61967213114754, "grad_norm": 4.216549873352051, "learning_rate": 1.4634967801475152e-05, "loss": 0.4181, "step": 11169 }, { "epoch": 36.622950819672134, "grad_norm": 4.986663818359375, "learning_rate": 1.463402683876806e-05, "loss": 0.5449, "step": 11170 }, { "epoch": 36.62622950819672, "grad_norm": 4.949542999267578, "learning_rate": 1.4633085823807496e-05, "loss": 0.6307, "step": 11171 }, { "epoch": 36.62950819672131, "grad_norm": 5.017312049865723, "learning_rate": 1.4632144756604074e-05, "loss": 0.5783, "step": 11172 }, { "epoch": 36.6327868852459, "grad_norm": 4.938824653625488, "learning_rate": 1.4631203637168403e-05, "loss": 0.5152, "step": 11173 }, { "epoch": 36.636065573770495, "grad_norm": 4.140318393707275, "learning_rate": 1.4630262465511095e-05, "loss": 0.4314, "step": 11174 }, { "epoch": 36.63934426229508, "grad_norm": 4.758303165435791, "learning_rate": 1.4629321241642762e-05, "loss": 0.5661, "step": 11175 }, { "epoch": 36.64262295081967, "grad_norm": 5.622071743011475, "learning_rate": 1.462837996557402e-05, "loss": 0.3598, "step": 11176 }, { "epoch": 36.64590163934426, "grad_norm": 3.9906890392303467, "learning_rate": 1.4627438637315483e-05, "loss": 0.436, "step": 11177 }, { "epoch": 36.649180327868855, "grad_norm": 4.311677932739258, "learning_rate": 1.4626497256877762e-05, "loss": 0.5945, "step": 11178 }, { "epoch": 36.65245901639344, "grad_norm": 3.858400344848633, "learning_rate": 1.4625555824271474e-05, "loss": 0.5885, "step": 11179 }, { "epoch": 36.65573770491803, "grad_norm": 4.264899730682373, "learning_rate": 1.4624614339507237e-05, "loss": 0.5048, "step": 11180 }, { "epoch": 36.65901639344262, "grad_norm": 4.814651966094971, "learning_rate": 1.4623672802595663e-05, "loss": 0.4734, "step": 11181 }, { "epoch": 36.662295081967216, "grad_norm": 4.028127193450928, "learning_rate": 1.4622731213547372e-05, "loss": 0.6632, "step": 11182 }, { "epoch": 36.665573770491804, "grad_norm": 4.275416851043701, "learning_rate": 1.462178957237298e-05, "loss": 0.3768, "step": 11183 }, { "epoch": 36.66885245901639, "grad_norm": 4.460550308227539, "learning_rate": 1.4620847879083103e-05, "loss": 0.4303, "step": 11184 }, { "epoch": 36.67213114754098, "grad_norm": 5.279131889343262, "learning_rate": 1.4619906133688362e-05, "loss": 0.4337, "step": 11185 }, { "epoch": 36.675409836065576, "grad_norm": 4.3235039710998535, "learning_rate": 1.4618964336199378e-05, "loss": 0.3427, "step": 11186 }, { "epoch": 36.678688524590164, "grad_norm": 4.298533916473389, "learning_rate": 1.4618022486626766e-05, "loss": 0.4008, "step": 11187 }, { "epoch": 36.68196721311475, "grad_norm": 4.423828125, "learning_rate": 1.4617080584981151e-05, "loss": 0.6083, "step": 11188 }, { "epoch": 36.68524590163934, "grad_norm": 4.760589599609375, "learning_rate": 1.4616138631273152e-05, "loss": 0.3865, "step": 11189 }, { "epoch": 36.68852459016394, "grad_norm": 4.45236349105835, "learning_rate": 1.4615196625513389e-05, "loss": 0.4425, "step": 11190 }, { "epoch": 36.691803278688525, "grad_norm": 3.703584671020508, "learning_rate": 1.4614254567712487e-05, "loss": 0.6649, "step": 11191 }, { "epoch": 36.69508196721311, "grad_norm": 4.344639778137207, "learning_rate": 1.4613312457881063e-05, "loss": 0.4585, "step": 11192 }, { "epoch": 36.6983606557377, "grad_norm": 4.333463668823242, "learning_rate": 1.461237029602975e-05, "loss": 0.5533, "step": 11193 }, { "epoch": 36.7016393442623, "grad_norm": 4.706463813781738, "learning_rate": 1.4611428082169166e-05, "loss": 0.336, "step": 11194 }, { "epoch": 36.704918032786885, "grad_norm": 3.9413440227508545, "learning_rate": 1.4610485816309931e-05, "loss": 0.2686, "step": 11195 }, { "epoch": 36.708196721311474, "grad_norm": 4.382339000701904, "learning_rate": 1.460954349846268e-05, "loss": 0.3079, "step": 11196 }, { "epoch": 36.71147540983607, "grad_norm": 4.712825775146484, "learning_rate": 1.4608601128638027e-05, "loss": 0.7398, "step": 11197 }, { "epoch": 36.71475409836066, "grad_norm": 5.045916557312012, "learning_rate": 1.460765870684661e-05, "loss": 0.5798, "step": 11198 }, { "epoch": 36.718032786885246, "grad_norm": 3.8107166290283203, "learning_rate": 1.4606716233099048e-05, "loss": 0.5981, "step": 11199 }, { "epoch": 36.721311475409834, "grad_norm": 4.581722736358643, "learning_rate": 1.4605773707405972e-05, "loss": 0.391, "step": 11200 }, { "epoch": 36.72459016393443, "grad_norm": 4.578255653381348, "learning_rate": 1.4604831129778005e-05, "loss": 0.51, "step": 11201 }, { "epoch": 36.72786885245902, "grad_norm": 4.123518466949463, "learning_rate": 1.4603888500225785e-05, "loss": 0.4332, "step": 11202 }, { "epoch": 36.731147540983606, "grad_norm": 3.8218023777008057, "learning_rate": 1.4602945818759931e-05, "loss": 0.3105, "step": 11203 }, { "epoch": 36.734426229508195, "grad_norm": 6.208738803863525, "learning_rate": 1.4602003085391079e-05, "loss": 0.4462, "step": 11204 }, { "epoch": 36.73770491803279, "grad_norm": 3.7639718055725098, "learning_rate": 1.4601060300129857e-05, "loss": 0.5731, "step": 11205 }, { "epoch": 36.74098360655738, "grad_norm": 5.4026007652282715, "learning_rate": 1.4600117462986895e-05, "loss": 0.5189, "step": 11206 }, { "epoch": 36.74426229508197, "grad_norm": 3.7833456993103027, "learning_rate": 1.4599174573972828e-05, "loss": 0.283, "step": 11207 }, { "epoch": 36.747540983606555, "grad_norm": 14.344467163085938, "learning_rate": 1.4598231633098283e-05, "loss": 0.3125, "step": 11208 }, { "epoch": 36.75081967213115, "grad_norm": 4.629121780395508, "learning_rate": 1.4597288640373898e-05, "loss": 0.6506, "step": 11209 }, { "epoch": 36.75409836065574, "grad_norm": 4.123857498168945, "learning_rate": 1.4596345595810301e-05, "loss": 0.6735, "step": 11210 }, { "epoch": 36.75737704918033, "grad_norm": 4.793495178222656, "learning_rate": 1.4595402499418132e-05, "loss": 0.5082, "step": 11211 }, { "epoch": 36.760655737704916, "grad_norm": 3.997758150100708, "learning_rate": 1.4594459351208019e-05, "loss": 0.3846, "step": 11212 }, { "epoch": 36.76393442622951, "grad_norm": 4.611693859100342, "learning_rate": 1.45935161511906e-05, "loss": 0.43, "step": 11213 }, { "epoch": 36.7672131147541, "grad_norm": 3.928011894226074, "learning_rate": 1.4592572899376509e-05, "loss": 0.5181, "step": 11214 }, { "epoch": 36.77049180327869, "grad_norm": 3.871108055114746, "learning_rate": 1.4591629595776384e-05, "loss": 0.5393, "step": 11215 }, { "epoch": 36.773770491803276, "grad_norm": 4.845241069793701, "learning_rate": 1.4590686240400862e-05, "loss": 0.3989, "step": 11216 }, { "epoch": 36.77704918032787, "grad_norm": 4.680750846862793, "learning_rate": 1.4589742833260581e-05, "loss": 0.3427, "step": 11217 }, { "epoch": 36.78032786885246, "grad_norm": 4.384733200073242, "learning_rate": 1.4588799374366174e-05, "loss": 0.4831, "step": 11218 }, { "epoch": 36.78360655737705, "grad_norm": 3.929743528366089, "learning_rate": 1.4587855863728287e-05, "loss": 0.4268, "step": 11219 }, { "epoch": 36.78688524590164, "grad_norm": 4.294590950012207, "learning_rate": 1.4586912301357553e-05, "loss": 0.5446, "step": 11220 }, { "epoch": 36.79016393442623, "grad_norm": 4.049216270446777, "learning_rate": 1.4585968687264613e-05, "loss": 0.558, "step": 11221 }, { "epoch": 36.79344262295082, "grad_norm": 4.1031389236450195, "learning_rate": 1.458502502146011e-05, "loss": 0.5816, "step": 11222 }, { "epoch": 36.79672131147541, "grad_norm": 5.0122270584106445, "learning_rate": 1.4584081303954681e-05, "loss": 0.5551, "step": 11223 }, { "epoch": 36.8, "grad_norm": 4.76260232925415, "learning_rate": 1.4583137534758968e-05, "loss": 0.359, "step": 11224 }, { "epoch": 36.80327868852459, "grad_norm": 4.9027934074401855, "learning_rate": 1.4582193713883617e-05, "loss": 0.7, "step": 11225 }, { "epoch": 36.80655737704918, "grad_norm": 5.034874439239502, "learning_rate": 1.4581249841339267e-05, "loss": 0.4393, "step": 11226 }, { "epoch": 36.80983606557377, "grad_norm": 4.619413375854492, "learning_rate": 1.4580305917136559e-05, "loss": 0.5093, "step": 11227 }, { "epoch": 36.81311475409836, "grad_norm": 5.794478893280029, "learning_rate": 1.4579361941286142e-05, "loss": 0.6259, "step": 11228 }, { "epoch": 36.81639344262295, "grad_norm": 4.445394039154053, "learning_rate": 1.4578417913798655e-05, "loss": 0.4587, "step": 11229 }, { "epoch": 36.81967213114754, "grad_norm": 4.402431488037109, "learning_rate": 1.4577473834684749e-05, "loss": 0.4585, "step": 11230 }, { "epoch": 36.82295081967213, "grad_norm": 4.850799083709717, "learning_rate": 1.4576529703955062e-05, "loss": 0.3839, "step": 11231 }, { "epoch": 36.82622950819672, "grad_norm": 4.258645057678223, "learning_rate": 1.4575585521620248e-05, "loss": 0.4862, "step": 11232 }, { "epoch": 36.829508196721314, "grad_norm": 4.785347938537598, "learning_rate": 1.4574641287690949e-05, "loss": 0.3694, "step": 11233 }, { "epoch": 36.8327868852459, "grad_norm": 4.818384647369385, "learning_rate": 1.4573697002177814e-05, "loss": 0.2765, "step": 11234 }, { "epoch": 36.83606557377049, "grad_norm": 4.283445835113525, "learning_rate": 1.4572752665091487e-05, "loss": 0.4714, "step": 11235 }, { "epoch": 36.83934426229508, "grad_norm": 3.92681622505188, "learning_rate": 1.4571808276442621e-05, "loss": 0.445, "step": 11236 }, { "epoch": 36.842622950819674, "grad_norm": 5.635227680206299, "learning_rate": 1.4570863836241865e-05, "loss": 0.6746, "step": 11237 }, { "epoch": 36.84590163934426, "grad_norm": 4.470309734344482, "learning_rate": 1.4569919344499863e-05, "loss": 0.6139, "step": 11238 }, { "epoch": 36.84918032786885, "grad_norm": 3.9807639122009277, "learning_rate": 1.4568974801227268e-05, "loss": 0.4799, "step": 11239 }, { "epoch": 36.85245901639344, "grad_norm": 4.1691999435424805, "learning_rate": 1.4568030206434735e-05, "loss": 0.3522, "step": 11240 }, { "epoch": 36.855737704918035, "grad_norm": 4.389660835266113, "learning_rate": 1.4567085560132912e-05, "loss": 0.3834, "step": 11241 }, { "epoch": 36.85901639344262, "grad_norm": 4.467559814453125, "learning_rate": 1.456614086233245e-05, "loss": 0.3896, "step": 11242 }, { "epoch": 36.86229508196721, "grad_norm": 3.9054036140441895, "learning_rate": 1.4565196113044002e-05, "loss": 0.7334, "step": 11243 }, { "epoch": 36.86557377049181, "grad_norm": 3.944934606552124, "learning_rate": 1.456425131227822e-05, "loss": 0.5702, "step": 11244 }, { "epoch": 36.868852459016395, "grad_norm": 4.28951358795166, "learning_rate": 1.456330646004576e-05, "loss": 0.3278, "step": 11245 }, { "epoch": 36.87213114754098, "grad_norm": 5.08500862121582, "learning_rate": 1.4562361556357276e-05, "loss": 0.3667, "step": 11246 }, { "epoch": 36.87540983606557, "grad_norm": 4.68801212310791, "learning_rate": 1.456141660122342e-05, "loss": 0.4655, "step": 11247 }, { "epoch": 36.87868852459017, "grad_norm": 4.303793907165527, "learning_rate": 1.456047159465485e-05, "loss": 0.3837, "step": 11248 }, { "epoch": 36.881967213114756, "grad_norm": 4.41317892074585, "learning_rate": 1.4559526536662221e-05, "loss": 0.5121, "step": 11249 }, { "epoch": 36.885245901639344, "grad_norm": 5.270346641540527, "learning_rate": 1.4558581427256191e-05, "loss": 0.5271, "step": 11250 }, { "epoch": 36.88852459016393, "grad_norm": 4.219232082366943, "learning_rate": 1.4557636266447413e-05, "loss": 0.6541, "step": 11251 }, { "epoch": 36.89180327868853, "grad_norm": 4.177365779876709, "learning_rate": 1.4556691054246547e-05, "loss": 0.5238, "step": 11252 }, { "epoch": 36.895081967213116, "grad_norm": 5.2631611824035645, "learning_rate": 1.4555745790664253e-05, "loss": 0.4949, "step": 11253 }, { "epoch": 36.898360655737704, "grad_norm": 5.027670383453369, "learning_rate": 1.4554800475711191e-05, "loss": 0.4576, "step": 11254 }, { "epoch": 36.90163934426229, "grad_norm": 4.059379577636719, "learning_rate": 1.4553855109398013e-05, "loss": 0.2748, "step": 11255 }, { "epoch": 36.90491803278689, "grad_norm": 4.045928001403809, "learning_rate": 1.4552909691735388e-05, "loss": 0.4899, "step": 11256 }, { "epoch": 36.90819672131148, "grad_norm": 4.629226207733154, "learning_rate": 1.455196422273397e-05, "loss": 0.5121, "step": 11257 }, { "epoch": 36.911475409836065, "grad_norm": 4.772089958190918, "learning_rate": 1.4551018702404424e-05, "loss": 0.6814, "step": 11258 }, { "epoch": 36.91475409836065, "grad_norm": 5.429154872894287, "learning_rate": 1.4550073130757409e-05, "loss": 0.7155, "step": 11259 }, { "epoch": 36.91803278688525, "grad_norm": 4.811806678771973, "learning_rate": 1.4549127507803589e-05, "loss": 0.5572, "step": 11260 }, { "epoch": 36.92131147540984, "grad_norm": 4.591114521026611, "learning_rate": 1.4548181833553628e-05, "loss": 0.4706, "step": 11261 }, { "epoch": 36.924590163934425, "grad_norm": 4.463209629058838, "learning_rate": 1.4547236108018185e-05, "loss": 0.4533, "step": 11262 }, { "epoch": 36.927868852459014, "grad_norm": 5.077394008636475, "learning_rate": 1.4546290331207929e-05, "loss": 0.5625, "step": 11263 }, { "epoch": 36.93114754098361, "grad_norm": 4.665553092956543, "learning_rate": 1.454534450313352e-05, "loss": 0.6522, "step": 11264 }, { "epoch": 36.9344262295082, "grad_norm": 3.9593141078948975, "learning_rate": 1.4544398623805629e-05, "loss": 0.5119, "step": 11265 }, { "epoch": 36.937704918032786, "grad_norm": 4.52690315246582, "learning_rate": 1.4543452693234916e-05, "loss": 0.5585, "step": 11266 }, { "epoch": 36.940983606557374, "grad_norm": 4.641176700592041, "learning_rate": 1.4542506711432049e-05, "loss": 0.344, "step": 11267 }, { "epoch": 36.94426229508197, "grad_norm": 14.052974700927734, "learning_rate": 1.45415606784077e-05, "loss": 0.5979, "step": 11268 }, { "epoch": 36.94754098360656, "grad_norm": 6.3016791343688965, "learning_rate": 1.4540614594172529e-05, "loss": 0.3771, "step": 11269 }, { "epoch": 36.950819672131146, "grad_norm": 4.320488929748535, "learning_rate": 1.4539668458737206e-05, "loss": 0.4965, "step": 11270 }, { "epoch": 36.954098360655735, "grad_norm": 4.25077486038208, "learning_rate": 1.4538722272112406e-05, "loss": 0.3849, "step": 11271 }, { "epoch": 36.95737704918033, "grad_norm": 3.896449565887451, "learning_rate": 1.4537776034308791e-05, "loss": 0.2808, "step": 11272 }, { "epoch": 36.96065573770492, "grad_norm": 3.9596567153930664, "learning_rate": 1.4536829745337034e-05, "loss": 0.5366, "step": 11273 }, { "epoch": 36.96393442622951, "grad_norm": 4.141494274139404, "learning_rate": 1.4535883405207802e-05, "loss": 0.3303, "step": 11274 }, { "epoch": 36.967213114754095, "grad_norm": 4.429478168487549, "learning_rate": 1.4534937013931769e-05, "loss": 0.5231, "step": 11275 }, { "epoch": 36.97049180327869, "grad_norm": 4.063099384307861, "learning_rate": 1.4533990571519607e-05, "loss": 0.6938, "step": 11276 }, { "epoch": 36.97377049180328, "grad_norm": 3.80747652053833, "learning_rate": 1.4533044077981987e-05, "loss": 0.5025, "step": 11277 }, { "epoch": 36.97704918032787, "grad_norm": 4.850834846496582, "learning_rate": 1.4532097533329583e-05, "loss": 0.4408, "step": 11278 }, { "epoch": 36.980327868852456, "grad_norm": 4.091037273406982, "learning_rate": 1.4531150937573064e-05, "loss": 0.3564, "step": 11279 }, { "epoch": 36.98360655737705, "grad_norm": 4.916962146759033, "learning_rate": 1.4530204290723112e-05, "loss": 0.4817, "step": 11280 }, { "epoch": 36.98688524590164, "grad_norm": 4.725515365600586, "learning_rate": 1.4529257592790391e-05, "loss": 0.5913, "step": 11281 }, { "epoch": 36.99016393442623, "grad_norm": 4.062726020812988, "learning_rate": 1.4528310843785584e-05, "loss": 0.4802, "step": 11282 }, { "epoch": 36.993442622950816, "grad_norm": 4.688033103942871, "learning_rate": 1.4527364043719362e-05, "loss": 0.535, "step": 11283 }, { "epoch": 36.99672131147541, "grad_norm": 4.007526397705078, "learning_rate": 1.4526417192602407e-05, "loss": 0.5099, "step": 11284 }, { "epoch": 37.0, "grad_norm": 3.870711088180542, "learning_rate": 1.4525470290445392e-05, "loss": 0.4113, "step": 11285 }, { "epoch": 37.00327868852459, "grad_norm": 3.624638795852661, "learning_rate": 1.4524523337258991e-05, "loss": 0.4146, "step": 11286 }, { "epoch": 37.006557377049184, "grad_norm": 4.482990741729736, "learning_rate": 1.4523576333053885e-05, "loss": 0.4822, "step": 11287 }, { "epoch": 37.00983606557377, "grad_norm": 3.7845027446746826, "learning_rate": 1.4522629277840754e-05, "loss": 0.5163, "step": 11288 }, { "epoch": 37.01311475409836, "grad_norm": 4.474481105804443, "learning_rate": 1.4521682171630276e-05, "loss": 0.3899, "step": 11289 }, { "epoch": 37.01639344262295, "grad_norm": 4.8420515060424805, "learning_rate": 1.4520735014433127e-05, "loss": 0.4728, "step": 11290 }, { "epoch": 37.019672131147544, "grad_norm": 3.7910118103027344, "learning_rate": 1.4519787806259992e-05, "loss": 0.5274, "step": 11291 }, { "epoch": 37.02295081967213, "grad_norm": 4.472302436828613, "learning_rate": 1.451884054712155e-05, "loss": 0.6218, "step": 11292 }, { "epoch": 37.02622950819672, "grad_norm": 3.9545540809631348, "learning_rate": 1.4517893237028483e-05, "loss": 0.3446, "step": 11293 }, { "epoch": 37.02950819672131, "grad_norm": 5.044738292694092, "learning_rate": 1.4516945875991472e-05, "loss": 0.6088, "step": 11294 }, { "epoch": 37.032786885245905, "grad_norm": 3.8346264362335205, "learning_rate": 1.4515998464021199e-05, "loss": 0.3656, "step": 11295 }, { "epoch": 37.03606557377049, "grad_norm": 4.419549942016602, "learning_rate": 1.4515051001128352e-05, "loss": 0.383, "step": 11296 }, { "epoch": 37.03934426229508, "grad_norm": 3.9318087100982666, "learning_rate": 1.4514103487323605e-05, "loss": 0.6219, "step": 11297 }, { "epoch": 37.04262295081967, "grad_norm": 5.103921890258789, "learning_rate": 1.4513155922617652e-05, "loss": 0.445, "step": 11298 }, { "epoch": 37.045901639344265, "grad_norm": 3.9468367099761963, "learning_rate": 1.451220830702117e-05, "loss": 0.4806, "step": 11299 }, { "epoch": 37.049180327868854, "grad_norm": 4.154815673828125, "learning_rate": 1.4511260640544851e-05, "loss": 0.352, "step": 11300 }, { "epoch": 37.05245901639344, "grad_norm": 4.162461280822754, "learning_rate": 1.4510312923199375e-05, "loss": 0.3454, "step": 11301 }, { "epoch": 37.05573770491803, "grad_norm": 4.478365898132324, "learning_rate": 1.4509365154995431e-05, "loss": 0.4573, "step": 11302 }, { "epoch": 37.059016393442626, "grad_norm": 4.478999137878418, "learning_rate": 1.4508417335943707e-05, "loss": 0.3977, "step": 11303 }, { "epoch": 37.062295081967214, "grad_norm": 4.13320255279541, "learning_rate": 1.4507469466054893e-05, "loss": 0.4142, "step": 11304 }, { "epoch": 37.0655737704918, "grad_norm": 4.108426570892334, "learning_rate": 1.4506521545339671e-05, "loss": 0.4033, "step": 11305 }, { "epoch": 37.06885245901639, "grad_norm": 5.205480575561523, "learning_rate": 1.4505573573808733e-05, "loss": 0.5441, "step": 11306 }, { "epoch": 37.072131147540986, "grad_norm": 5.763161659240723, "learning_rate": 1.450462555147277e-05, "loss": 0.4138, "step": 11307 }, { "epoch": 37.075409836065575, "grad_norm": 6.289999008178711, "learning_rate": 1.450367747834247e-05, "loss": 0.5127, "step": 11308 }, { "epoch": 37.07868852459016, "grad_norm": 3.7818243503570557, "learning_rate": 1.4502729354428521e-05, "loss": 0.2169, "step": 11309 }, { "epoch": 37.08196721311475, "grad_norm": 3.980595827102661, "learning_rate": 1.4501781179741622e-05, "loss": 0.2327, "step": 11310 }, { "epoch": 37.08524590163935, "grad_norm": 4.380128383636475, "learning_rate": 1.4500832954292456e-05, "loss": 0.4756, "step": 11311 }, { "epoch": 37.088524590163935, "grad_norm": 3.8242247104644775, "learning_rate": 1.4499884678091717e-05, "loss": 0.4096, "step": 11312 }, { "epoch": 37.09180327868852, "grad_norm": 3.897749423980713, "learning_rate": 1.4498936351150103e-05, "loss": 0.3406, "step": 11313 }, { "epoch": 37.09508196721311, "grad_norm": 4.5401411056518555, "learning_rate": 1.4497987973478305e-05, "loss": 0.4827, "step": 11314 }, { "epoch": 37.09836065573771, "grad_norm": 4.041698932647705, "learning_rate": 1.4497039545087013e-05, "loss": 0.7194, "step": 11315 }, { "epoch": 37.101639344262296, "grad_norm": 3.8867037296295166, "learning_rate": 1.4496091065986923e-05, "loss": 0.4174, "step": 11316 }, { "epoch": 37.104918032786884, "grad_norm": 4.196876525878906, "learning_rate": 1.4495142536188735e-05, "loss": 0.4021, "step": 11317 }, { "epoch": 37.10819672131147, "grad_norm": 4.248269557952881, "learning_rate": 1.4494193955703138e-05, "loss": 0.345, "step": 11318 }, { "epoch": 37.11147540983607, "grad_norm": 4.115214824676514, "learning_rate": 1.4493245324540835e-05, "loss": 0.4797, "step": 11319 }, { "epoch": 37.114754098360656, "grad_norm": 3.3392770290374756, "learning_rate": 1.4492296642712516e-05, "loss": 0.2214, "step": 11320 }, { "epoch": 37.118032786885244, "grad_norm": 3.6750621795654297, "learning_rate": 1.4491347910228881e-05, "loss": 0.4089, "step": 11321 }, { "epoch": 37.12131147540983, "grad_norm": 4.952128887176514, "learning_rate": 1.4490399127100631e-05, "loss": 0.4764, "step": 11322 }, { "epoch": 37.12459016393443, "grad_norm": 4.158364772796631, "learning_rate": 1.448945029333846e-05, "loss": 0.5042, "step": 11323 }, { "epoch": 37.12786885245902, "grad_norm": 4.472314357757568, "learning_rate": 1.4488501408953073e-05, "loss": 0.3489, "step": 11324 }, { "epoch": 37.131147540983605, "grad_norm": 3.406674861907959, "learning_rate": 1.448755247395516e-05, "loss": 0.4922, "step": 11325 }, { "epoch": 37.13442622950819, "grad_norm": 4.080804824829102, "learning_rate": 1.448660348835543e-05, "loss": 0.4107, "step": 11326 }, { "epoch": 37.13770491803279, "grad_norm": 4.79632568359375, "learning_rate": 1.4485654452164581e-05, "loss": 0.3418, "step": 11327 }, { "epoch": 37.14098360655738, "grad_norm": 4.151604175567627, "learning_rate": 1.4484705365393314e-05, "loss": 0.695, "step": 11328 }, { "epoch": 37.144262295081965, "grad_norm": 5.30532693862915, "learning_rate": 1.448375622805233e-05, "loss": 0.4062, "step": 11329 }, { "epoch": 37.14754098360656, "grad_norm": 3.6339316368103027, "learning_rate": 1.4482807040152335e-05, "loss": 0.2552, "step": 11330 }, { "epoch": 37.15081967213115, "grad_norm": 4.435564041137695, "learning_rate": 1.4481857801704027e-05, "loss": 0.2502, "step": 11331 }, { "epoch": 37.15409836065574, "grad_norm": 3.8480472564697266, "learning_rate": 1.4480908512718115e-05, "loss": 0.3363, "step": 11332 }, { "epoch": 37.157377049180326, "grad_norm": 3.7831828594207764, "learning_rate": 1.4479959173205298e-05, "loss": 0.2861, "step": 11333 }, { "epoch": 37.16065573770492, "grad_norm": 4.437252044677734, "learning_rate": 1.4479009783176285e-05, "loss": 0.3861, "step": 11334 }, { "epoch": 37.16393442622951, "grad_norm": 4.065118789672852, "learning_rate": 1.447806034264178e-05, "loss": 0.3464, "step": 11335 }, { "epoch": 37.1672131147541, "grad_norm": 4.357631206512451, "learning_rate": 1.4477110851612488e-05, "loss": 0.5595, "step": 11336 }, { "epoch": 37.170491803278686, "grad_norm": 3.9532663822174072, "learning_rate": 1.4476161310099115e-05, "loss": 0.4139, "step": 11337 }, { "epoch": 37.17377049180328, "grad_norm": 3.9962005615234375, "learning_rate": 1.447521171811237e-05, "loss": 0.5964, "step": 11338 }, { "epoch": 37.17704918032787, "grad_norm": 4.2027153968811035, "learning_rate": 1.4474262075662962e-05, "loss": 0.444, "step": 11339 }, { "epoch": 37.18032786885246, "grad_norm": 4.217596054077148, "learning_rate": 1.4473312382761592e-05, "loss": 0.2795, "step": 11340 }, { "epoch": 37.18360655737705, "grad_norm": 3.9882874488830566, "learning_rate": 1.4472362639418978e-05, "loss": 0.3259, "step": 11341 }, { "epoch": 37.18688524590164, "grad_norm": 4.344355583190918, "learning_rate": 1.4471412845645822e-05, "loss": 0.4489, "step": 11342 }, { "epoch": 37.19016393442623, "grad_norm": 3.514432668685913, "learning_rate": 1.4470463001452841e-05, "loss": 0.4188, "step": 11343 }, { "epoch": 37.19344262295082, "grad_norm": 4.414395332336426, "learning_rate": 1.446951310685074e-05, "loss": 0.4074, "step": 11344 }, { "epoch": 37.19672131147541, "grad_norm": 3.7747340202331543, "learning_rate": 1.4468563161850232e-05, "loss": 0.517, "step": 11345 }, { "epoch": 37.2, "grad_norm": 3.5412604808807373, "learning_rate": 1.4467613166462024e-05, "loss": 0.5369, "step": 11346 }, { "epoch": 37.20327868852459, "grad_norm": 3.798053503036499, "learning_rate": 1.4466663120696837e-05, "loss": 0.5664, "step": 11347 }, { "epoch": 37.20655737704918, "grad_norm": 3.523815393447876, "learning_rate": 1.4465713024565378e-05, "loss": 0.2787, "step": 11348 }, { "epoch": 37.20983606557377, "grad_norm": 3.9917337894439697, "learning_rate": 1.446476287807836e-05, "loss": 0.5986, "step": 11349 }, { "epoch": 37.21311475409836, "grad_norm": 3.9306726455688477, "learning_rate": 1.4463812681246499e-05, "loss": 0.3789, "step": 11350 }, { "epoch": 37.21639344262295, "grad_norm": 3.794943332672119, "learning_rate": 1.4462862434080509e-05, "loss": 0.4122, "step": 11351 }, { "epoch": 37.21967213114754, "grad_norm": 4.166192531585693, "learning_rate": 1.4461912136591106e-05, "loss": 0.4148, "step": 11352 }, { "epoch": 37.22295081967213, "grad_norm": 5.181794166564941, "learning_rate": 1.4460961788789003e-05, "loss": 0.5128, "step": 11353 }, { "epoch": 37.226229508196724, "grad_norm": 3.9666805267333984, "learning_rate": 1.4460011390684918e-05, "loss": 0.3737, "step": 11354 }, { "epoch": 37.22950819672131, "grad_norm": 4.402736663818359, "learning_rate": 1.4459060942289567e-05, "loss": 0.3356, "step": 11355 }, { "epoch": 37.2327868852459, "grad_norm": 4.366949558258057, "learning_rate": 1.445811044361367e-05, "loss": 0.4311, "step": 11356 }, { "epoch": 37.23606557377049, "grad_norm": 7.560091018676758, "learning_rate": 1.4457159894667938e-05, "loss": 0.695, "step": 11357 }, { "epoch": 37.239344262295084, "grad_norm": 3.9600865840911865, "learning_rate": 1.44562092954631e-05, "loss": 0.4285, "step": 11358 }, { "epoch": 37.24262295081967, "grad_norm": 3.7436203956604004, "learning_rate": 1.4455258646009864e-05, "loss": 0.348, "step": 11359 }, { "epoch": 37.24590163934426, "grad_norm": 3.9149084091186523, "learning_rate": 1.4454307946318955e-05, "loss": 0.3198, "step": 11360 }, { "epoch": 37.24918032786885, "grad_norm": 4.125056266784668, "learning_rate": 1.4453357196401096e-05, "loss": 0.4049, "step": 11361 }, { "epoch": 37.252459016393445, "grad_norm": 3.6309943199157715, "learning_rate": 1.4452406396267e-05, "loss": 0.6921, "step": 11362 }, { "epoch": 37.25573770491803, "grad_norm": 4.17243766784668, "learning_rate": 1.4451455545927396e-05, "loss": 0.6551, "step": 11363 }, { "epoch": 37.25901639344262, "grad_norm": 4.084831237792969, "learning_rate": 1.4450504645393e-05, "loss": 0.5822, "step": 11364 }, { "epoch": 37.26229508196721, "grad_norm": 3.5411365032196045, "learning_rate": 1.4449553694674539e-05, "loss": 0.2408, "step": 11365 }, { "epoch": 37.265573770491805, "grad_norm": 4.2928290367126465, "learning_rate": 1.444860269378273e-05, "loss": 0.3858, "step": 11366 }, { "epoch": 37.268852459016394, "grad_norm": 4.074140548706055, "learning_rate": 1.4447651642728304e-05, "loss": 0.6128, "step": 11367 }, { "epoch": 37.27213114754098, "grad_norm": 3.6414828300476074, "learning_rate": 1.4446700541521978e-05, "loss": 0.3315, "step": 11368 }, { "epoch": 37.27540983606557, "grad_norm": 4.03471040725708, "learning_rate": 1.4445749390174484e-05, "loss": 0.4, "step": 11369 }, { "epoch": 37.278688524590166, "grad_norm": 4.3816609382629395, "learning_rate": 1.4444798188696539e-05, "loss": 0.375, "step": 11370 }, { "epoch": 37.281967213114754, "grad_norm": 3.8490149974823, "learning_rate": 1.4443846937098875e-05, "loss": 0.6544, "step": 11371 }, { "epoch": 37.28524590163934, "grad_norm": 3.601839542388916, "learning_rate": 1.4442895635392216e-05, "loss": 0.2867, "step": 11372 }, { "epoch": 37.28852459016394, "grad_norm": 4.0615234375, "learning_rate": 1.444194428358729e-05, "loss": 0.5114, "step": 11373 }, { "epoch": 37.291803278688526, "grad_norm": 4.170941352844238, "learning_rate": 1.4440992881694822e-05, "loss": 0.309, "step": 11374 }, { "epoch": 37.295081967213115, "grad_norm": 4.972428798675537, "learning_rate": 1.4440041429725545e-05, "loss": 0.4324, "step": 11375 }, { "epoch": 37.2983606557377, "grad_norm": 4.427602291107178, "learning_rate": 1.4439089927690183e-05, "loss": 0.3912, "step": 11376 }, { "epoch": 37.3016393442623, "grad_norm": 3.867952585220337, "learning_rate": 1.4438138375599465e-05, "loss": 0.4089, "step": 11377 }, { "epoch": 37.30491803278689, "grad_norm": 3.982896327972412, "learning_rate": 1.4437186773464122e-05, "loss": 0.4523, "step": 11378 }, { "epoch": 37.308196721311475, "grad_norm": 4.788732528686523, "learning_rate": 1.4436235121294886e-05, "loss": 0.8027, "step": 11379 }, { "epoch": 37.31147540983606, "grad_norm": 5.107153415679932, "learning_rate": 1.4435283419102487e-05, "loss": 0.5196, "step": 11380 }, { "epoch": 37.31475409836066, "grad_norm": 4.229018688201904, "learning_rate": 1.4434331666897654e-05, "loss": 0.451, "step": 11381 }, { "epoch": 37.31803278688525, "grad_norm": 3.216728925704956, "learning_rate": 1.4433379864691123e-05, "loss": 0.3817, "step": 11382 }, { "epoch": 37.321311475409836, "grad_norm": 3.5345988273620605, "learning_rate": 1.4432428012493622e-05, "loss": 0.4001, "step": 11383 }, { "epoch": 37.324590163934424, "grad_norm": 4.373704433441162, "learning_rate": 1.4431476110315892e-05, "loss": 0.3369, "step": 11384 }, { "epoch": 37.32786885245902, "grad_norm": 4.048672676086426, "learning_rate": 1.4430524158168655e-05, "loss": 0.4443, "step": 11385 }, { "epoch": 37.33114754098361, "grad_norm": 5.493334770202637, "learning_rate": 1.4429572156062654e-05, "loss": 0.4166, "step": 11386 }, { "epoch": 37.334426229508196, "grad_norm": 4.386414527893066, "learning_rate": 1.4428620104008623e-05, "loss": 0.6193, "step": 11387 }, { "epoch": 37.337704918032784, "grad_norm": 4.928889274597168, "learning_rate": 1.4427668002017294e-05, "loss": 0.3617, "step": 11388 }, { "epoch": 37.34098360655738, "grad_norm": 3.6033456325531006, "learning_rate": 1.4426715850099404e-05, "loss": 0.4911, "step": 11389 }, { "epoch": 37.34426229508197, "grad_norm": 3.824611186981201, "learning_rate": 1.442576364826569e-05, "loss": 0.2897, "step": 11390 }, { "epoch": 37.34754098360656, "grad_norm": 3.805441379547119, "learning_rate": 1.4424811396526892e-05, "loss": 0.5, "step": 11391 }, { "epoch": 37.350819672131145, "grad_norm": 4.542629718780518, "learning_rate": 1.4423859094893744e-05, "loss": 0.5377, "step": 11392 }, { "epoch": 37.35409836065574, "grad_norm": 4.526952743530273, "learning_rate": 1.4422906743376984e-05, "loss": 0.423, "step": 11393 }, { "epoch": 37.35737704918033, "grad_norm": 3.7697200775146484, "learning_rate": 1.4421954341987351e-05, "loss": 0.4858, "step": 11394 }, { "epoch": 37.36065573770492, "grad_norm": 3.892990827560425, "learning_rate": 1.4421001890735586e-05, "loss": 0.6846, "step": 11395 }, { "epoch": 37.363934426229505, "grad_norm": 4.83734655380249, "learning_rate": 1.4420049389632426e-05, "loss": 0.4332, "step": 11396 }, { "epoch": 37.3672131147541, "grad_norm": 6.605886459350586, "learning_rate": 1.4419096838688617e-05, "loss": 0.2838, "step": 11397 }, { "epoch": 37.37049180327869, "grad_norm": 5.577828884124756, "learning_rate": 1.4418144237914896e-05, "loss": 0.6615, "step": 11398 }, { "epoch": 37.37377049180328, "grad_norm": 3.353536605834961, "learning_rate": 1.4417191587322004e-05, "loss": 0.4729, "step": 11399 }, { "epoch": 37.377049180327866, "grad_norm": 3.4240882396698, "learning_rate": 1.4416238886920684e-05, "loss": 0.3643, "step": 11400 }, { "epoch": 37.38032786885246, "grad_norm": 4.415001392364502, "learning_rate": 1.4415286136721678e-05, "loss": 0.3031, "step": 11401 }, { "epoch": 37.38360655737705, "grad_norm": 4.233002662658691, "learning_rate": 1.4414333336735731e-05, "loss": 0.5535, "step": 11402 }, { "epoch": 37.38688524590164, "grad_norm": 3.5050671100616455, "learning_rate": 1.4413380486973585e-05, "loss": 0.4201, "step": 11403 }, { "epoch": 37.390163934426226, "grad_norm": 4.505057334899902, "learning_rate": 1.4412427587445987e-05, "loss": 0.4172, "step": 11404 }, { "epoch": 37.39344262295082, "grad_norm": 3.76886248588562, "learning_rate": 1.4411474638163675e-05, "loss": 0.3209, "step": 11405 }, { "epoch": 37.39672131147541, "grad_norm": 3.694401264190674, "learning_rate": 1.4410521639137407e-05, "loss": 0.7155, "step": 11406 }, { "epoch": 37.4, "grad_norm": 4.682692527770996, "learning_rate": 1.4409568590377918e-05, "loss": 0.5107, "step": 11407 }, { "epoch": 37.40327868852459, "grad_norm": 4.986664295196533, "learning_rate": 1.4408615491895958e-05, "loss": 0.3737, "step": 11408 }, { "epoch": 37.40655737704918, "grad_norm": 4.377413272857666, "learning_rate": 1.4407662343702275e-05, "loss": 0.52, "step": 11409 }, { "epoch": 37.40983606557377, "grad_norm": 4.37222146987915, "learning_rate": 1.4406709145807618e-05, "loss": 0.4264, "step": 11410 }, { "epoch": 37.41311475409836, "grad_norm": 4.173192501068115, "learning_rate": 1.4405755898222733e-05, "loss": 0.3543, "step": 11411 }, { "epoch": 37.41639344262295, "grad_norm": 4.297584056854248, "learning_rate": 1.440480260095837e-05, "loss": 0.5121, "step": 11412 }, { "epoch": 37.41967213114754, "grad_norm": 4.382373809814453, "learning_rate": 1.4403849254025276e-05, "loss": 0.7127, "step": 11413 }, { "epoch": 37.42295081967213, "grad_norm": 4.262608051300049, "learning_rate": 1.4402895857434202e-05, "loss": 0.4517, "step": 11414 }, { "epoch": 37.42622950819672, "grad_norm": 3.6596453189849854, "learning_rate": 1.4401942411195902e-05, "loss": 0.6609, "step": 11415 }, { "epoch": 37.429508196721315, "grad_norm": 3.893510341644287, "learning_rate": 1.4400988915321122e-05, "loss": 0.4606, "step": 11416 }, { "epoch": 37.4327868852459, "grad_norm": 3.734610080718994, "learning_rate": 1.4400035369820617e-05, "loss": 0.573, "step": 11417 }, { "epoch": 37.43606557377049, "grad_norm": 3.7817533016204834, "learning_rate": 1.439908177470514e-05, "loss": 0.4763, "step": 11418 }, { "epoch": 37.43934426229508, "grad_norm": 4.292201519012451, "learning_rate": 1.439812812998544e-05, "loss": 0.3749, "step": 11419 }, { "epoch": 37.442622950819676, "grad_norm": 4.084265232086182, "learning_rate": 1.4397174435672273e-05, "loss": 0.6494, "step": 11420 }, { "epoch": 37.445901639344264, "grad_norm": 4.280900001525879, "learning_rate": 1.4396220691776393e-05, "loss": 0.2735, "step": 11421 }, { "epoch": 37.44918032786885, "grad_norm": 5.80042839050293, "learning_rate": 1.439526689830855e-05, "loss": 0.3126, "step": 11422 }, { "epoch": 37.45245901639344, "grad_norm": 4.070262908935547, "learning_rate": 1.4394313055279507e-05, "loss": 0.5479, "step": 11423 }, { "epoch": 37.455737704918036, "grad_norm": 4.253600597381592, "learning_rate": 1.4393359162700014e-05, "loss": 0.342, "step": 11424 }, { "epoch": 37.459016393442624, "grad_norm": 4.162567615509033, "learning_rate": 1.439240522058083e-05, "loss": 0.5258, "step": 11425 }, { "epoch": 37.46229508196721, "grad_norm": 4.0937066078186035, "learning_rate": 1.439145122893271e-05, "loss": 0.2737, "step": 11426 }, { "epoch": 37.4655737704918, "grad_norm": 4.197179794311523, "learning_rate": 1.4390497187766409e-05, "loss": 0.4904, "step": 11427 }, { "epoch": 37.4688524590164, "grad_norm": 3.4085798263549805, "learning_rate": 1.4389543097092689e-05, "loss": 0.3206, "step": 11428 }, { "epoch": 37.472131147540985, "grad_norm": 3.8279221057891846, "learning_rate": 1.4388588956922304e-05, "loss": 0.3503, "step": 11429 }, { "epoch": 37.47540983606557, "grad_norm": 5.248456954956055, "learning_rate": 1.4387634767266019e-05, "loss": 0.5214, "step": 11430 }, { "epoch": 37.47868852459016, "grad_norm": 4.364169120788574, "learning_rate": 1.4386680528134586e-05, "loss": 0.437, "step": 11431 }, { "epoch": 37.48196721311476, "grad_norm": 11.092743873596191, "learning_rate": 1.4385726239538772e-05, "loss": 0.5918, "step": 11432 }, { "epoch": 37.485245901639345, "grad_norm": 4.4402265548706055, "learning_rate": 1.4384771901489336e-05, "loss": 0.4963, "step": 11433 }, { "epoch": 37.488524590163934, "grad_norm": 3.3634941577911377, "learning_rate": 1.4383817513997035e-05, "loss": 0.316, "step": 11434 }, { "epoch": 37.49180327868852, "grad_norm": 3.9634978771209717, "learning_rate": 1.4382863077072635e-05, "loss": 0.366, "step": 11435 }, { "epoch": 37.49508196721312, "grad_norm": 4.265398979187012, "learning_rate": 1.4381908590726897e-05, "loss": 0.3168, "step": 11436 }, { "epoch": 37.498360655737706, "grad_norm": 4.448906421661377, "learning_rate": 1.4380954054970584e-05, "loss": 0.4024, "step": 11437 }, { "epoch": 37.501639344262294, "grad_norm": 4.753836631774902, "learning_rate": 1.4379999469814458e-05, "loss": 0.5356, "step": 11438 }, { "epoch": 37.50491803278688, "grad_norm": 4.4282450675964355, "learning_rate": 1.4379044835269285e-05, "loss": 0.5919, "step": 11439 }, { "epoch": 37.50819672131148, "grad_norm": 4.219893932342529, "learning_rate": 1.4378090151345827e-05, "loss": 0.7091, "step": 11440 }, { "epoch": 37.511475409836066, "grad_norm": 4.605697154998779, "learning_rate": 1.437713541805485e-05, "loss": 0.3561, "step": 11441 }, { "epoch": 37.514754098360655, "grad_norm": 3.899204730987549, "learning_rate": 1.437618063540712e-05, "loss": 0.434, "step": 11442 }, { "epoch": 37.51803278688524, "grad_norm": 4.147603988647461, "learning_rate": 1.4375225803413406e-05, "loss": 0.3942, "step": 11443 }, { "epoch": 37.52131147540984, "grad_norm": 3.836968421936035, "learning_rate": 1.4374270922084468e-05, "loss": 0.2745, "step": 11444 }, { "epoch": 37.52459016393443, "grad_norm": 5.344334125518799, "learning_rate": 1.437331599143108e-05, "loss": 0.6187, "step": 11445 }, { "epoch": 37.527868852459015, "grad_norm": 3.9885146617889404, "learning_rate": 1.4372361011464006e-05, "loss": 0.4828, "step": 11446 }, { "epoch": 37.5311475409836, "grad_norm": 4.829722881317139, "learning_rate": 1.4371405982194018e-05, "loss": 0.2793, "step": 11447 }, { "epoch": 37.5344262295082, "grad_norm": 3.6262311935424805, "learning_rate": 1.4370450903631879e-05, "loss": 0.5231, "step": 11448 }, { "epoch": 37.53770491803279, "grad_norm": 3.94488525390625, "learning_rate": 1.4369495775788364e-05, "loss": 0.5853, "step": 11449 }, { "epoch": 37.540983606557376, "grad_norm": 3.7902750968933105, "learning_rate": 1.4368540598674238e-05, "loss": 0.6857, "step": 11450 }, { "epoch": 37.544262295081964, "grad_norm": 4.324631214141846, "learning_rate": 1.436758537230028e-05, "loss": 0.5352, "step": 11451 }, { "epoch": 37.54754098360656, "grad_norm": 4.240448951721191, "learning_rate": 1.4366630096677252e-05, "loss": 0.5295, "step": 11452 }, { "epoch": 37.55081967213115, "grad_norm": 8.573121070861816, "learning_rate": 1.436567477181593e-05, "loss": 0.4011, "step": 11453 }, { "epoch": 37.554098360655736, "grad_norm": 4.533307075500488, "learning_rate": 1.4364719397727085e-05, "loss": 0.4039, "step": 11454 }, { "epoch": 37.557377049180324, "grad_norm": 3.818110704421997, "learning_rate": 1.4363763974421492e-05, "loss": 0.4978, "step": 11455 }, { "epoch": 37.56065573770492, "grad_norm": 5.726572513580322, "learning_rate": 1.4362808501909921e-05, "loss": 0.4907, "step": 11456 }, { "epoch": 37.56393442622951, "grad_norm": 3.882281541824341, "learning_rate": 1.4361852980203153e-05, "loss": 0.448, "step": 11457 }, { "epoch": 37.5672131147541, "grad_norm": 4.3030900955200195, "learning_rate": 1.4360897409311951e-05, "loss": 0.4129, "step": 11458 }, { "epoch": 37.570491803278685, "grad_norm": 3.956315755844116, "learning_rate": 1.4359941789247098e-05, "loss": 0.2243, "step": 11459 }, { "epoch": 37.57377049180328, "grad_norm": 4.518610954284668, "learning_rate": 1.4358986120019372e-05, "loss": 0.4437, "step": 11460 }, { "epoch": 37.57704918032787, "grad_norm": 5.228852272033691, "learning_rate": 1.4358030401639543e-05, "loss": 0.492, "step": 11461 }, { "epoch": 37.58032786885246, "grad_norm": 4.595433712005615, "learning_rate": 1.4357074634118391e-05, "loss": 0.4662, "step": 11462 }, { "epoch": 37.58360655737705, "grad_norm": 4.863694667816162, "learning_rate": 1.4356118817466689e-05, "loss": 0.4519, "step": 11463 }, { "epoch": 37.58688524590164, "grad_norm": 4.432868957519531, "learning_rate": 1.435516295169522e-05, "loss": 0.5059, "step": 11464 }, { "epoch": 37.59016393442623, "grad_norm": 4.35555362701416, "learning_rate": 1.4354207036814764e-05, "loss": 0.5439, "step": 11465 }, { "epoch": 37.59344262295082, "grad_norm": 4.377981662750244, "learning_rate": 1.4353251072836092e-05, "loss": 0.3361, "step": 11466 }, { "epoch": 37.59672131147541, "grad_norm": 8.352417945861816, "learning_rate": 1.435229505976999e-05, "loss": 0.4491, "step": 11467 }, { "epoch": 37.6, "grad_norm": 4.275842189788818, "learning_rate": 1.4351338997627233e-05, "loss": 0.5128, "step": 11468 }, { "epoch": 37.60327868852459, "grad_norm": 3.8385257720947266, "learning_rate": 1.4350382886418609e-05, "loss": 0.4515, "step": 11469 }, { "epoch": 37.60655737704918, "grad_norm": 4.203395366668701, "learning_rate": 1.4349426726154891e-05, "loss": 0.6896, "step": 11470 }, { "epoch": 37.609836065573774, "grad_norm": 4.491133689880371, "learning_rate": 1.4348470516846866e-05, "loss": 0.4331, "step": 11471 }, { "epoch": 37.61311475409836, "grad_norm": 4.236077785491943, "learning_rate": 1.4347514258505314e-05, "loss": 0.5372, "step": 11472 }, { "epoch": 37.61639344262295, "grad_norm": 4.280946254730225, "learning_rate": 1.434655795114102e-05, "loss": 0.4789, "step": 11473 }, { "epoch": 37.61967213114754, "grad_norm": 3.812354564666748, "learning_rate": 1.4345601594764765e-05, "loss": 0.3534, "step": 11474 }, { "epoch": 37.622950819672134, "grad_norm": 3.808526039123535, "learning_rate": 1.4344645189387332e-05, "loss": 0.4359, "step": 11475 }, { "epoch": 37.62622950819672, "grad_norm": 3.8448503017425537, "learning_rate": 1.4343688735019508e-05, "loss": 0.4816, "step": 11476 }, { "epoch": 37.62950819672131, "grad_norm": 3.962559700012207, "learning_rate": 1.4342732231672078e-05, "loss": 0.4891, "step": 11477 }, { "epoch": 37.6327868852459, "grad_norm": 3.892357349395752, "learning_rate": 1.4341775679355827e-05, "loss": 0.3366, "step": 11478 }, { "epoch": 37.636065573770495, "grad_norm": 3.8529951572418213, "learning_rate": 1.434081907808154e-05, "loss": 0.4818, "step": 11479 }, { "epoch": 37.63934426229508, "grad_norm": 3.8673765659332275, "learning_rate": 1.4339862427860007e-05, "loss": 0.5448, "step": 11480 }, { "epoch": 37.64262295081967, "grad_norm": 4.238547325134277, "learning_rate": 1.433890572870201e-05, "loss": 0.5405, "step": 11481 }, { "epoch": 37.64590163934426, "grad_norm": 5.111361026763916, "learning_rate": 1.4337948980618339e-05, "loss": 0.4353, "step": 11482 }, { "epoch": 37.649180327868855, "grad_norm": 3.6692683696746826, "learning_rate": 1.4336992183619785e-05, "loss": 0.3306, "step": 11483 }, { "epoch": 37.65245901639344, "grad_norm": 3.8169174194335938, "learning_rate": 1.4336035337717134e-05, "loss": 0.3783, "step": 11484 }, { "epoch": 37.65573770491803, "grad_norm": 3.83793044090271, "learning_rate": 1.4335078442921176e-05, "loss": 0.6279, "step": 11485 }, { "epoch": 37.65901639344262, "grad_norm": 4.627160549163818, "learning_rate": 1.4334121499242701e-05, "loss": 0.5826, "step": 11486 }, { "epoch": 37.662295081967216, "grad_norm": 4.021293640136719, "learning_rate": 1.4333164506692499e-05, "loss": 0.4251, "step": 11487 }, { "epoch": 37.665573770491804, "grad_norm": 4.352323055267334, "learning_rate": 1.4332207465281365e-05, "loss": 0.3618, "step": 11488 }, { "epoch": 37.66885245901639, "grad_norm": 4.372779846191406, "learning_rate": 1.4331250375020083e-05, "loss": 0.3975, "step": 11489 }, { "epoch": 37.67213114754098, "grad_norm": 5.93162727355957, "learning_rate": 1.4330293235919455e-05, "loss": 0.5298, "step": 11490 }, { "epoch": 37.675409836065576, "grad_norm": 4.393845081329346, "learning_rate": 1.4329336047990265e-05, "loss": 0.4176, "step": 11491 }, { "epoch": 37.678688524590164, "grad_norm": 4.799495697021484, "learning_rate": 1.4328378811243311e-05, "loss": 0.6816, "step": 11492 }, { "epoch": 37.68196721311475, "grad_norm": 3.8486227989196777, "learning_rate": 1.4327421525689384e-05, "loss": 0.4451, "step": 11493 }, { "epoch": 37.68524590163934, "grad_norm": 4.168676853179932, "learning_rate": 1.432646419133928e-05, "loss": 0.4025, "step": 11494 }, { "epoch": 37.68852459016394, "grad_norm": 4.2010498046875, "learning_rate": 1.4325506808203795e-05, "loss": 0.3115, "step": 11495 }, { "epoch": 37.691803278688525, "grad_norm": 4.71823263168335, "learning_rate": 1.4324549376293723e-05, "loss": 0.525, "step": 11496 }, { "epoch": 37.69508196721311, "grad_norm": 4.8779072761535645, "learning_rate": 1.4323591895619862e-05, "loss": 0.6144, "step": 11497 }, { "epoch": 37.6983606557377, "grad_norm": 4.5404438972473145, "learning_rate": 1.4322634366193003e-05, "loss": 0.6901, "step": 11498 }, { "epoch": 37.7016393442623, "grad_norm": 4.327544689178467, "learning_rate": 1.4321676788023951e-05, "loss": 0.3864, "step": 11499 }, { "epoch": 37.704918032786885, "grad_norm": 3.9560227394104004, "learning_rate": 1.4320719161123497e-05, "loss": 0.3647, "step": 11500 }, { "epoch": 37.708196721311474, "grad_norm": 4.188723564147949, "learning_rate": 1.4319761485502444e-05, "loss": 0.41, "step": 11501 }, { "epoch": 37.71147540983607, "grad_norm": 3.892138957977295, "learning_rate": 1.4318803761171586e-05, "loss": 0.3538, "step": 11502 }, { "epoch": 37.71475409836066, "grad_norm": 4.488576889038086, "learning_rate": 1.431784598814173e-05, "loss": 0.3877, "step": 11503 }, { "epoch": 37.718032786885246, "grad_norm": 4.318531036376953, "learning_rate": 1.4316888166423669e-05, "loss": 0.3596, "step": 11504 }, { "epoch": 37.721311475409834, "grad_norm": 3.8514034748077393, "learning_rate": 1.4315930296028201e-05, "loss": 0.3586, "step": 11505 }, { "epoch": 37.72459016393443, "grad_norm": 3.9228358268737793, "learning_rate": 1.431497237696614e-05, "loss": 0.7305, "step": 11506 }, { "epoch": 37.72786885245902, "grad_norm": 4.3687944412231445, "learning_rate": 1.4314014409248272e-05, "loss": 0.3188, "step": 11507 }, { "epoch": 37.731147540983606, "grad_norm": 4.180024147033691, "learning_rate": 1.4313056392885409e-05, "loss": 0.4694, "step": 11508 }, { "epoch": 37.734426229508195, "grad_norm": 4.328769207000732, "learning_rate": 1.4312098327888349e-05, "loss": 0.6355, "step": 11509 }, { "epoch": 37.73770491803279, "grad_norm": 5.508169651031494, "learning_rate": 1.4311140214267896e-05, "loss": 0.3994, "step": 11510 }, { "epoch": 37.74098360655738, "grad_norm": 4.654092788696289, "learning_rate": 1.4310182052034858e-05, "loss": 0.4004, "step": 11511 }, { "epoch": 37.74426229508197, "grad_norm": 4.20170783996582, "learning_rate": 1.4309223841200032e-05, "loss": 0.5054, "step": 11512 }, { "epoch": 37.747540983606555, "grad_norm": 4.6825785636901855, "learning_rate": 1.4308265581774228e-05, "loss": 0.5817, "step": 11513 }, { "epoch": 37.75081967213115, "grad_norm": 4.090367317199707, "learning_rate": 1.4307307273768252e-05, "loss": 0.3511, "step": 11514 }, { "epoch": 37.75409836065574, "grad_norm": 4.645042896270752, "learning_rate": 1.4306348917192907e-05, "loss": 0.4159, "step": 11515 }, { "epoch": 37.75737704918033, "grad_norm": 5.212395668029785, "learning_rate": 1.4305390512059e-05, "loss": 0.4977, "step": 11516 }, { "epoch": 37.760655737704916, "grad_norm": 3.6961634159088135, "learning_rate": 1.4304432058377342e-05, "loss": 0.4298, "step": 11517 }, { "epoch": 37.76393442622951, "grad_norm": 4.349883079528809, "learning_rate": 1.4303473556158734e-05, "loss": 0.3003, "step": 11518 }, { "epoch": 37.7672131147541, "grad_norm": 4.398117542266846, "learning_rate": 1.4302515005413989e-05, "loss": 0.4036, "step": 11519 }, { "epoch": 37.77049180327869, "grad_norm": 3.937034845352173, "learning_rate": 1.430155640615391e-05, "loss": 0.4378, "step": 11520 }, { "epoch": 37.773770491803276, "grad_norm": 4.711793422698975, "learning_rate": 1.4300597758389315e-05, "loss": 0.4241, "step": 11521 }, { "epoch": 37.77704918032787, "grad_norm": 4.213230133056641, "learning_rate": 1.4299639062131003e-05, "loss": 0.4733, "step": 11522 }, { "epoch": 37.78032786885246, "grad_norm": 4.425346851348877, "learning_rate": 1.4298680317389794e-05, "loss": 0.359, "step": 11523 }, { "epoch": 37.78360655737705, "grad_norm": 4.793191432952881, "learning_rate": 1.4297721524176493e-05, "loss": 0.6544, "step": 11524 }, { "epoch": 37.78688524590164, "grad_norm": 4.653625011444092, "learning_rate": 1.4296762682501914e-05, "loss": 0.5719, "step": 11525 }, { "epoch": 37.79016393442623, "grad_norm": 4.6836395263671875, "learning_rate": 1.4295803792376868e-05, "loss": 0.502, "step": 11526 }, { "epoch": 37.79344262295082, "grad_norm": 3.8961434364318848, "learning_rate": 1.4294844853812171e-05, "loss": 0.7614, "step": 11527 }, { "epoch": 37.79672131147541, "grad_norm": 4.024440288543701, "learning_rate": 1.429388586681863e-05, "loss": 0.4092, "step": 11528 }, { "epoch": 37.8, "grad_norm": 12.254985809326172, "learning_rate": 1.429292683140706e-05, "loss": 0.4922, "step": 11529 }, { "epoch": 37.80327868852459, "grad_norm": 3.967529773712158, "learning_rate": 1.429196774758828e-05, "loss": 0.3367, "step": 11530 }, { "epoch": 37.80655737704918, "grad_norm": 3.760561227798462, "learning_rate": 1.4291008615373096e-05, "loss": 0.5319, "step": 11531 }, { "epoch": 37.80983606557377, "grad_norm": 4.473695278167725, "learning_rate": 1.4290049434772333e-05, "loss": 0.4789, "step": 11532 }, { "epoch": 37.81311475409836, "grad_norm": 4.135116100311279, "learning_rate": 1.4289090205796798e-05, "loss": 0.2208, "step": 11533 }, { "epoch": 37.81639344262295, "grad_norm": 4.37799072265625, "learning_rate": 1.4288130928457315e-05, "loss": 0.5204, "step": 11534 }, { "epoch": 37.81967213114754, "grad_norm": 4.482376575469971, "learning_rate": 1.4287171602764695e-05, "loss": 0.3368, "step": 11535 }, { "epoch": 37.82295081967213, "grad_norm": 5.087439060211182, "learning_rate": 1.4286212228729758e-05, "loss": 0.3423, "step": 11536 }, { "epoch": 37.82622950819672, "grad_norm": 4.405075550079346, "learning_rate": 1.4285252806363323e-05, "loss": 0.5479, "step": 11537 }, { "epoch": 37.829508196721314, "grad_norm": 3.8669278621673584, "learning_rate": 1.4284293335676207e-05, "loss": 0.4104, "step": 11538 }, { "epoch": 37.8327868852459, "grad_norm": 4.240614891052246, "learning_rate": 1.4283333816679228e-05, "loss": 0.5079, "step": 11539 }, { "epoch": 37.83606557377049, "grad_norm": 5.412987232208252, "learning_rate": 1.4282374249383206e-05, "loss": 0.4135, "step": 11540 }, { "epoch": 37.83934426229508, "grad_norm": 4.3208723068237305, "learning_rate": 1.4281414633798964e-05, "loss": 0.4478, "step": 11541 }, { "epoch": 37.842622950819674, "grad_norm": 4.52449369430542, "learning_rate": 1.4280454969937319e-05, "loss": 0.5951, "step": 11542 }, { "epoch": 37.84590163934426, "grad_norm": 3.9811322689056396, "learning_rate": 1.4279495257809095e-05, "loss": 0.3878, "step": 11543 }, { "epoch": 37.84918032786885, "grad_norm": 4.271897315979004, "learning_rate": 1.427853549742511e-05, "loss": 0.6547, "step": 11544 }, { "epoch": 37.85245901639344, "grad_norm": 4.038230895996094, "learning_rate": 1.427757568879619e-05, "loss": 0.3311, "step": 11545 }, { "epoch": 37.855737704918035, "grad_norm": 4.058940410614014, "learning_rate": 1.4276615831933156e-05, "loss": 0.3608, "step": 11546 }, { "epoch": 37.85901639344262, "grad_norm": 4.153714656829834, "learning_rate": 1.4275655926846834e-05, "loss": 0.6079, "step": 11547 }, { "epoch": 37.86229508196721, "grad_norm": 5.316329479217529, "learning_rate": 1.4274695973548043e-05, "loss": 0.321, "step": 11548 }, { "epoch": 37.86557377049181, "grad_norm": 3.927157402038574, "learning_rate": 1.4273735972047613e-05, "loss": 0.2957, "step": 11549 }, { "epoch": 37.868852459016395, "grad_norm": 4.1946845054626465, "learning_rate": 1.4272775922356363e-05, "loss": 0.33, "step": 11550 }, { "epoch": 37.87213114754098, "grad_norm": 4.151603698730469, "learning_rate": 1.4271815824485127e-05, "loss": 0.422, "step": 11551 }, { "epoch": 37.87540983606557, "grad_norm": 4.39167594909668, "learning_rate": 1.427085567844472e-05, "loss": 0.5767, "step": 11552 }, { "epoch": 37.87868852459017, "grad_norm": 3.923696517944336, "learning_rate": 1.426989548424598e-05, "loss": 0.4577, "step": 11553 }, { "epoch": 37.881967213114756, "grad_norm": 4.686347484588623, "learning_rate": 1.4268935241899727e-05, "loss": 0.6591, "step": 11554 }, { "epoch": 37.885245901639344, "grad_norm": 6.324679374694824, "learning_rate": 1.426797495141679e-05, "loss": 0.4271, "step": 11555 }, { "epoch": 37.88852459016393, "grad_norm": 3.9220330715179443, "learning_rate": 1.4267014612807997e-05, "loss": 0.2399, "step": 11556 }, { "epoch": 37.89180327868853, "grad_norm": 3.7262251377105713, "learning_rate": 1.4266054226084178e-05, "loss": 0.4616, "step": 11557 }, { "epoch": 37.895081967213116, "grad_norm": 4.550666809082031, "learning_rate": 1.4265093791256164e-05, "loss": 0.4892, "step": 11558 }, { "epoch": 37.898360655737704, "grad_norm": 4.215848922729492, "learning_rate": 1.426413330833478e-05, "loss": 0.3949, "step": 11559 }, { "epoch": 37.90163934426229, "grad_norm": 4.200677871704102, "learning_rate": 1.4263172777330861e-05, "loss": 0.2165, "step": 11560 }, { "epoch": 37.90491803278689, "grad_norm": 3.6495015621185303, "learning_rate": 1.4262212198255236e-05, "loss": 0.5195, "step": 11561 }, { "epoch": 37.90819672131148, "grad_norm": 4.5899553298950195, "learning_rate": 1.4261251571118737e-05, "loss": 0.5194, "step": 11562 }, { "epoch": 37.911475409836065, "grad_norm": 4.000706195831299, "learning_rate": 1.4260290895932196e-05, "loss": 0.4849, "step": 11563 }, { "epoch": 37.91475409836065, "grad_norm": 4.953778266906738, "learning_rate": 1.4259330172706443e-05, "loss": 0.4103, "step": 11564 }, { "epoch": 37.91803278688525, "grad_norm": 5.233482837677002, "learning_rate": 1.4258369401452318e-05, "loss": 0.4815, "step": 11565 }, { "epoch": 37.92131147540984, "grad_norm": 4.408151149749756, "learning_rate": 1.4257408582180648e-05, "loss": 0.5769, "step": 11566 }, { "epoch": 37.924590163934425, "grad_norm": 4.451292514801025, "learning_rate": 1.425644771490227e-05, "loss": 0.2733, "step": 11567 }, { "epoch": 37.927868852459014, "grad_norm": 5.122498989105225, "learning_rate": 1.4255486799628018e-05, "loss": 0.4108, "step": 11568 }, { "epoch": 37.93114754098361, "grad_norm": 3.925205945968628, "learning_rate": 1.4254525836368732e-05, "loss": 0.651, "step": 11569 }, { "epoch": 37.9344262295082, "grad_norm": 3.556279182434082, "learning_rate": 1.4253564825135238e-05, "loss": 0.4099, "step": 11570 }, { "epoch": 37.937704918032786, "grad_norm": 3.713507890701294, "learning_rate": 1.425260376593838e-05, "loss": 0.2708, "step": 11571 }, { "epoch": 37.940983606557374, "grad_norm": 4.284059524536133, "learning_rate": 1.4251642658788991e-05, "loss": 0.6355, "step": 11572 }, { "epoch": 37.94426229508197, "grad_norm": 4.553977966308594, "learning_rate": 1.4250681503697914e-05, "loss": 0.6668, "step": 11573 }, { "epoch": 37.94754098360656, "grad_norm": 4.340782642364502, "learning_rate": 1.4249720300675983e-05, "loss": 0.3621, "step": 11574 }, { "epoch": 37.950819672131146, "grad_norm": 4.7199931144714355, "learning_rate": 1.4248759049734035e-05, "loss": 0.4994, "step": 11575 }, { "epoch": 37.954098360655735, "grad_norm": 4.178985595703125, "learning_rate": 1.424779775088291e-05, "loss": 0.3404, "step": 11576 }, { "epoch": 37.95737704918033, "grad_norm": 4.611755847930908, "learning_rate": 1.4246836404133453e-05, "loss": 0.5325, "step": 11577 }, { "epoch": 37.96065573770492, "grad_norm": 4.319588661193848, "learning_rate": 1.4245875009496497e-05, "loss": 0.3479, "step": 11578 }, { "epoch": 37.96393442622951, "grad_norm": 5.4383225440979, "learning_rate": 1.4244913566982886e-05, "loss": 0.4364, "step": 11579 }, { "epoch": 37.967213114754095, "grad_norm": 4.835711479187012, "learning_rate": 1.4243952076603462e-05, "loss": 0.454, "step": 11580 }, { "epoch": 37.97049180327869, "grad_norm": 6.1997575759887695, "learning_rate": 1.4242990538369066e-05, "loss": 0.399, "step": 11581 }, { "epoch": 37.97377049180328, "grad_norm": 4.3005452156066895, "learning_rate": 1.4242028952290538e-05, "loss": 0.4701, "step": 11582 }, { "epoch": 37.97704918032787, "grad_norm": 4.814422130584717, "learning_rate": 1.4241067318378722e-05, "loss": 0.4207, "step": 11583 }, { "epoch": 37.980327868852456, "grad_norm": 3.929945230484009, "learning_rate": 1.4240105636644468e-05, "loss": 0.389, "step": 11584 }, { "epoch": 37.98360655737705, "grad_norm": 3.179764986038208, "learning_rate": 1.423914390709861e-05, "loss": 0.2954, "step": 11585 }, { "epoch": 37.98688524590164, "grad_norm": 4.135224342346191, "learning_rate": 1.4238182129751996e-05, "loss": 0.6129, "step": 11586 }, { "epoch": 37.99016393442623, "grad_norm": 3.9776546955108643, "learning_rate": 1.4237220304615473e-05, "loss": 0.3726, "step": 11587 }, { "epoch": 37.993442622950816, "grad_norm": 4.961541175842285, "learning_rate": 1.4236258431699887e-05, "loss": 0.3914, "step": 11588 }, { "epoch": 37.99672131147541, "grad_norm": 4.9995551109313965, "learning_rate": 1.4235296511016081e-05, "loss": 0.3549, "step": 11589 }, { "epoch": 38.0, "grad_norm": 3.9540581703186035, "learning_rate": 1.4234334542574906e-05, "loss": 0.399, "step": 11590 }, { "epoch": 38.00327868852459, "grad_norm": 3.7561087608337402, "learning_rate": 1.4233372526387202e-05, "loss": 0.6201, "step": 11591 }, { "epoch": 38.006557377049184, "grad_norm": 4.398841857910156, "learning_rate": 1.4232410462463821e-05, "loss": 0.442, "step": 11592 }, { "epoch": 38.00983606557377, "grad_norm": 3.799541711807251, "learning_rate": 1.4231448350815616e-05, "loss": 0.6686, "step": 11593 }, { "epoch": 38.01311475409836, "grad_norm": 4.2292962074279785, "learning_rate": 1.4230486191453428e-05, "loss": 0.3112, "step": 11594 }, { "epoch": 38.01639344262295, "grad_norm": 5.140963077545166, "learning_rate": 1.4229523984388111e-05, "loss": 0.4017, "step": 11595 }, { "epoch": 38.019672131147544, "grad_norm": 4.324506759643555, "learning_rate": 1.4228561729630513e-05, "loss": 0.3342, "step": 11596 }, { "epoch": 38.02295081967213, "grad_norm": 3.8969531059265137, "learning_rate": 1.4227599427191485e-05, "loss": 0.4337, "step": 11597 }, { "epoch": 38.02622950819672, "grad_norm": 4.739080905914307, "learning_rate": 1.4226637077081877e-05, "loss": 0.4281, "step": 11598 }, { "epoch": 38.02950819672131, "grad_norm": 4.3314433097839355, "learning_rate": 1.4225674679312542e-05, "loss": 0.3481, "step": 11599 }, { "epoch": 38.032786885245905, "grad_norm": 3.7769858837127686, "learning_rate": 1.422471223389433e-05, "loss": 0.3367, "step": 11600 }, { "epoch": 38.03606557377049, "grad_norm": 3.8248414993286133, "learning_rate": 1.42237497408381e-05, "loss": 0.6881, "step": 11601 }, { "epoch": 38.03934426229508, "grad_norm": 6.982877731323242, "learning_rate": 1.4222787200154695e-05, "loss": 0.3863, "step": 11602 }, { "epoch": 38.04262295081967, "grad_norm": 4.781035423278809, "learning_rate": 1.4221824611854975e-05, "loss": 0.3699, "step": 11603 }, { "epoch": 38.045901639344265, "grad_norm": 4.760797500610352, "learning_rate": 1.4220861975949793e-05, "loss": 0.5194, "step": 11604 }, { "epoch": 38.049180327868854, "grad_norm": 3.563729763031006, "learning_rate": 1.4219899292450005e-05, "loss": 0.2712, "step": 11605 }, { "epoch": 38.05245901639344, "grad_norm": 4.070822715759277, "learning_rate": 1.4218936561366465e-05, "loss": 0.2823, "step": 11606 }, { "epoch": 38.05573770491803, "grad_norm": 4.482203960418701, "learning_rate": 1.4217973782710025e-05, "loss": 0.807, "step": 11607 }, { "epoch": 38.059016393442626, "grad_norm": 4.185330867767334, "learning_rate": 1.4217010956491551e-05, "loss": 0.4926, "step": 11608 }, { "epoch": 38.062295081967214, "grad_norm": 4.661567211151123, "learning_rate": 1.4216048082721887e-05, "loss": 0.363, "step": 11609 }, { "epoch": 38.0655737704918, "grad_norm": 5.464538097381592, "learning_rate": 1.42150851614119e-05, "loss": 0.2634, "step": 11610 }, { "epoch": 38.06885245901639, "grad_norm": 7.03654146194458, "learning_rate": 1.4214122192572446e-05, "loss": 0.6765, "step": 11611 }, { "epoch": 38.072131147540986, "grad_norm": 4.697499752044678, "learning_rate": 1.4213159176214384e-05, "loss": 0.2734, "step": 11612 }, { "epoch": 38.075409836065575, "grad_norm": 3.9305853843688965, "learning_rate": 1.4212196112348568e-05, "loss": 0.245, "step": 11613 }, { "epoch": 38.07868852459016, "grad_norm": 4.055503845214844, "learning_rate": 1.4211233000985867e-05, "loss": 0.5757, "step": 11614 }, { "epoch": 38.08196721311475, "grad_norm": 3.7050063610076904, "learning_rate": 1.421026984213713e-05, "loss": 0.3184, "step": 11615 }, { "epoch": 38.08524590163935, "grad_norm": 5.1503825187683105, "learning_rate": 1.4209306635813225e-05, "loss": 0.4135, "step": 11616 }, { "epoch": 38.088524590163935, "grad_norm": 3.783228874206543, "learning_rate": 1.4208343382025009e-05, "loss": 0.437, "step": 11617 }, { "epoch": 38.09180327868852, "grad_norm": 3.883500814437866, "learning_rate": 1.4207380080783346e-05, "loss": 0.3821, "step": 11618 }, { "epoch": 38.09508196721311, "grad_norm": 3.68715238571167, "learning_rate": 1.4206416732099101e-05, "loss": 0.468, "step": 11619 }, { "epoch": 38.09836065573771, "grad_norm": 5.169449806213379, "learning_rate": 1.4205453335983133e-05, "loss": 0.5633, "step": 11620 }, { "epoch": 38.101639344262296, "grad_norm": 2.9985170364379883, "learning_rate": 1.4204489892446308e-05, "loss": 0.2997, "step": 11621 }, { "epoch": 38.104918032786884, "grad_norm": 4.5374531745910645, "learning_rate": 1.4203526401499482e-05, "loss": 0.297, "step": 11622 }, { "epoch": 38.10819672131147, "grad_norm": 3.856886863708496, "learning_rate": 1.420256286315353e-05, "loss": 0.2796, "step": 11623 }, { "epoch": 38.11147540983607, "grad_norm": 3.9768483638763428, "learning_rate": 1.420159927741931e-05, "loss": 0.4622, "step": 11624 }, { "epoch": 38.114754098360656, "grad_norm": 9.61745834350586, "learning_rate": 1.420063564430769e-05, "loss": 0.4506, "step": 11625 }, { "epoch": 38.118032786885244, "grad_norm": 4.550299644470215, "learning_rate": 1.4199671963829536e-05, "loss": 0.6454, "step": 11626 }, { "epoch": 38.12131147540983, "grad_norm": 4.368936061859131, "learning_rate": 1.4198708235995712e-05, "loss": 0.4382, "step": 11627 }, { "epoch": 38.12459016393443, "grad_norm": 3.914292573928833, "learning_rate": 1.4197744460817089e-05, "loss": 0.4573, "step": 11628 }, { "epoch": 38.12786885245902, "grad_norm": 4.107287883758545, "learning_rate": 1.4196780638304535e-05, "loss": 0.4534, "step": 11629 }, { "epoch": 38.131147540983605, "grad_norm": 3.5926575660705566, "learning_rate": 1.4195816768468911e-05, "loss": 0.305, "step": 11630 }, { "epoch": 38.13442622950819, "grad_norm": 3.8453376293182373, "learning_rate": 1.4194852851321095e-05, "loss": 0.484, "step": 11631 }, { "epoch": 38.13770491803279, "grad_norm": 4.041004657745361, "learning_rate": 1.419388888687195e-05, "loss": 0.3832, "step": 11632 }, { "epoch": 38.14098360655738, "grad_norm": 4.772102355957031, "learning_rate": 1.4192924875132345e-05, "loss": 0.2787, "step": 11633 }, { "epoch": 38.144262295081965, "grad_norm": 3.836263656616211, "learning_rate": 1.4191960816113154e-05, "loss": 0.4597, "step": 11634 }, { "epoch": 38.14754098360656, "grad_norm": 4.112797737121582, "learning_rate": 1.4190996709825247e-05, "loss": 0.3336, "step": 11635 }, { "epoch": 38.15081967213115, "grad_norm": 4.561160087585449, "learning_rate": 1.4190032556279493e-05, "loss": 0.4674, "step": 11636 }, { "epoch": 38.15409836065574, "grad_norm": 4.4088826179504395, "learning_rate": 1.4189068355486765e-05, "loss": 0.4703, "step": 11637 }, { "epoch": 38.157377049180326, "grad_norm": 3.9116744995117188, "learning_rate": 1.4188104107457939e-05, "loss": 0.3729, "step": 11638 }, { "epoch": 38.16065573770492, "grad_norm": 4.298638343811035, "learning_rate": 1.4187139812203881e-05, "loss": 0.3408, "step": 11639 }, { "epoch": 38.16393442622951, "grad_norm": 4.494269371032715, "learning_rate": 1.4186175469735473e-05, "loss": 0.5356, "step": 11640 }, { "epoch": 38.1672131147541, "grad_norm": 4.216956615447998, "learning_rate": 1.418521108006358e-05, "loss": 0.4599, "step": 11641 }, { "epoch": 38.170491803278686, "grad_norm": 4.138431072235107, "learning_rate": 1.418424664319908e-05, "loss": 0.4468, "step": 11642 }, { "epoch": 38.17377049180328, "grad_norm": 4.174580097198486, "learning_rate": 1.4183282159152848e-05, "loss": 0.5032, "step": 11643 }, { "epoch": 38.17704918032787, "grad_norm": 3.947491407394409, "learning_rate": 1.4182317627935764e-05, "loss": 0.2621, "step": 11644 }, { "epoch": 38.18032786885246, "grad_norm": 5.2317352294921875, "learning_rate": 1.4181353049558698e-05, "loss": 0.4357, "step": 11645 }, { "epoch": 38.18360655737705, "grad_norm": 3.775944948196411, "learning_rate": 1.4180388424032528e-05, "loss": 0.3409, "step": 11646 }, { "epoch": 38.18688524590164, "grad_norm": 6.143288612365723, "learning_rate": 1.4179423751368133e-05, "loss": 0.3656, "step": 11647 }, { "epoch": 38.19016393442623, "grad_norm": 4.474798202514648, "learning_rate": 1.4178459031576388e-05, "loss": 0.5543, "step": 11648 }, { "epoch": 38.19344262295082, "grad_norm": 4.244925022125244, "learning_rate": 1.4177494264668177e-05, "loss": 0.2533, "step": 11649 }, { "epoch": 38.19672131147541, "grad_norm": 3.898507833480835, "learning_rate": 1.417652945065437e-05, "loss": 0.5836, "step": 11650 }, { "epoch": 38.2, "grad_norm": 4.207574844360352, "learning_rate": 1.4175564589545853e-05, "loss": 0.4284, "step": 11651 }, { "epoch": 38.20327868852459, "grad_norm": 3.943399667739868, "learning_rate": 1.4174599681353505e-05, "loss": 0.3227, "step": 11652 }, { "epoch": 38.20655737704918, "grad_norm": 4.350831508636475, "learning_rate": 1.4173634726088205e-05, "loss": 0.3134, "step": 11653 }, { "epoch": 38.20983606557377, "grad_norm": 4.53294563293457, "learning_rate": 1.4172669723760833e-05, "loss": 0.3501, "step": 11654 }, { "epoch": 38.21311475409836, "grad_norm": 3.827204465866089, "learning_rate": 1.4171704674382272e-05, "loss": 0.3547, "step": 11655 }, { "epoch": 38.21639344262295, "grad_norm": 4.379194259643555, "learning_rate": 1.4170739577963402e-05, "loss": 0.5423, "step": 11656 }, { "epoch": 38.21967213114754, "grad_norm": 4.0633721351623535, "learning_rate": 1.4169774434515108e-05, "loss": 0.4678, "step": 11657 }, { "epoch": 38.22295081967213, "grad_norm": 3.723428964614868, "learning_rate": 1.4168809244048271e-05, "loss": 0.5122, "step": 11658 }, { "epoch": 38.226229508196724, "grad_norm": 4.706803321838379, "learning_rate": 1.4167844006573776e-05, "loss": 0.585, "step": 11659 }, { "epoch": 38.22950819672131, "grad_norm": 3.7420551776885986, "learning_rate": 1.4166878722102506e-05, "loss": 0.3457, "step": 11660 }, { "epoch": 38.2327868852459, "grad_norm": 5.502471923828125, "learning_rate": 1.4165913390645346e-05, "loss": 0.5582, "step": 11661 }, { "epoch": 38.23606557377049, "grad_norm": 3.833632230758667, "learning_rate": 1.4164948012213182e-05, "loss": 0.2681, "step": 11662 }, { "epoch": 38.239344262295084, "grad_norm": 6.541121006011963, "learning_rate": 1.4163982586816895e-05, "loss": 0.5399, "step": 11663 }, { "epoch": 38.24262295081967, "grad_norm": 3.7483062744140625, "learning_rate": 1.4163017114467378e-05, "loss": 0.3782, "step": 11664 }, { "epoch": 38.24590163934426, "grad_norm": 3.6078317165374756, "learning_rate": 1.4162051595175514e-05, "loss": 0.4315, "step": 11665 }, { "epoch": 38.24918032786885, "grad_norm": 4.111734867095947, "learning_rate": 1.4161086028952193e-05, "loss": 0.6189, "step": 11666 }, { "epoch": 38.252459016393445, "grad_norm": 4.412516117095947, "learning_rate": 1.4160120415808298e-05, "loss": 0.3907, "step": 11667 }, { "epoch": 38.25573770491803, "grad_norm": 4.4631757736206055, "learning_rate": 1.4159154755754721e-05, "loss": 0.4836, "step": 11668 }, { "epoch": 38.25901639344262, "grad_norm": 3.926572799682617, "learning_rate": 1.4158189048802346e-05, "loss": 0.4073, "step": 11669 }, { "epoch": 38.26229508196721, "grad_norm": 4.546887397766113, "learning_rate": 1.4157223294962073e-05, "loss": 0.5697, "step": 11670 }, { "epoch": 38.265573770491805, "grad_norm": 4.877507209777832, "learning_rate": 1.4156257494244778e-05, "loss": 0.2474, "step": 11671 }, { "epoch": 38.268852459016394, "grad_norm": 4.233856678009033, "learning_rate": 1.4155291646661363e-05, "loss": 0.3124, "step": 11672 }, { "epoch": 38.27213114754098, "grad_norm": 4.403831958770752, "learning_rate": 1.4154325752222712e-05, "loss": 0.6333, "step": 11673 }, { "epoch": 38.27540983606557, "grad_norm": 3.644242286682129, "learning_rate": 1.4153359810939718e-05, "loss": 0.5068, "step": 11674 }, { "epoch": 38.278688524590166, "grad_norm": 4.351736068725586, "learning_rate": 1.4152393822823276e-05, "loss": 0.4449, "step": 11675 }, { "epoch": 38.281967213114754, "grad_norm": 3.603299856185913, "learning_rate": 1.4151427787884276e-05, "loss": 0.42, "step": 11676 }, { "epoch": 38.28524590163934, "grad_norm": 3.802485704421997, "learning_rate": 1.4150461706133612e-05, "loss": 0.5026, "step": 11677 }, { "epoch": 38.28852459016394, "grad_norm": 3.856107711791992, "learning_rate": 1.4149495577582174e-05, "loss": 0.3446, "step": 11678 }, { "epoch": 38.291803278688526, "grad_norm": 3.7489893436431885, "learning_rate": 1.414852940224086e-05, "loss": 0.4054, "step": 11679 }, { "epoch": 38.295081967213115, "grad_norm": 3.6252176761627197, "learning_rate": 1.4147563180120562e-05, "loss": 0.3624, "step": 11680 }, { "epoch": 38.2983606557377, "grad_norm": 4.524418354034424, "learning_rate": 1.414659691123218e-05, "loss": 0.4639, "step": 11681 }, { "epoch": 38.3016393442623, "grad_norm": 3.8864083290100098, "learning_rate": 1.4145630595586607e-05, "loss": 0.4961, "step": 11682 }, { "epoch": 38.30491803278689, "grad_norm": 4.068143367767334, "learning_rate": 1.4144664233194737e-05, "loss": 0.332, "step": 11683 }, { "epoch": 38.308196721311475, "grad_norm": 4.87323522567749, "learning_rate": 1.4143697824067468e-05, "loss": 0.3378, "step": 11684 }, { "epoch": 38.31147540983606, "grad_norm": 3.8596267700195312, "learning_rate": 1.4142731368215696e-05, "loss": 0.5157, "step": 11685 }, { "epoch": 38.31475409836066, "grad_norm": 5.127470016479492, "learning_rate": 1.4141764865650325e-05, "loss": 0.407, "step": 11686 }, { "epoch": 38.31803278688525, "grad_norm": 4.324706554412842, "learning_rate": 1.4140798316382245e-05, "loss": 0.3573, "step": 11687 }, { "epoch": 38.321311475409836, "grad_norm": 4.132841110229492, "learning_rate": 1.4139831720422362e-05, "loss": 0.2798, "step": 11688 }, { "epoch": 38.324590163934424, "grad_norm": 4.182116985321045, "learning_rate": 1.4138865077781567e-05, "loss": 0.3648, "step": 11689 }, { "epoch": 38.32786885245902, "grad_norm": 3.475463390350342, "learning_rate": 1.413789838847077e-05, "loss": 0.2122, "step": 11690 }, { "epoch": 38.33114754098361, "grad_norm": 4.027954578399658, "learning_rate": 1.4136931652500863e-05, "loss": 0.4728, "step": 11691 }, { "epoch": 38.334426229508196, "grad_norm": 4.272472858428955, "learning_rate": 1.4135964869882752e-05, "loss": 0.3559, "step": 11692 }, { "epoch": 38.337704918032784, "grad_norm": 3.860490322113037, "learning_rate": 1.4134998040627334e-05, "loss": 0.4666, "step": 11693 }, { "epoch": 38.34098360655738, "grad_norm": 5.848329067230225, "learning_rate": 1.4134031164745517e-05, "loss": 0.4538, "step": 11694 }, { "epoch": 38.34426229508197, "grad_norm": 3.6325862407684326, "learning_rate": 1.4133064242248198e-05, "loss": 0.6708, "step": 11695 }, { "epoch": 38.34754098360656, "grad_norm": 4.097883701324463, "learning_rate": 1.4132097273146285e-05, "loss": 0.4011, "step": 11696 }, { "epoch": 38.350819672131145, "grad_norm": 4.1194305419921875, "learning_rate": 1.4131130257450677e-05, "loss": 0.3203, "step": 11697 }, { "epoch": 38.35409836065574, "grad_norm": 5.123293876647949, "learning_rate": 1.4130163195172283e-05, "loss": 0.326, "step": 11698 }, { "epoch": 38.35737704918033, "grad_norm": 4.550055027008057, "learning_rate": 1.4129196086322e-05, "loss": 0.5416, "step": 11699 }, { "epoch": 38.36065573770492, "grad_norm": 5.367691516876221, "learning_rate": 1.4128228930910739e-05, "loss": 0.4319, "step": 11700 }, { "epoch": 38.363934426229505, "grad_norm": 3.853245258331299, "learning_rate": 1.4127261728949407e-05, "loss": 0.4423, "step": 11701 }, { "epoch": 38.3672131147541, "grad_norm": 3.848200559616089, "learning_rate": 1.4126294480448906e-05, "loss": 0.2477, "step": 11702 }, { "epoch": 38.37049180327869, "grad_norm": 3.5343892574310303, "learning_rate": 1.4125327185420146e-05, "loss": 0.3069, "step": 11703 }, { "epoch": 38.37377049180328, "grad_norm": 3.570542573928833, "learning_rate": 1.4124359843874029e-05, "loss": 0.406, "step": 11704 }, { "epoch": 38.377049180327866, "grad_norm": 4.55974006652832, "learning_rate": 1.4123392455821469e-05, "loss": 0.3841, "step": 11705 }, { "epoch": 38.38032786885246, "grad_norm": 3.5931665897369385, "learning_rate": 1.4122425021273372e-05, "loss": 0.3284, "step": 11706 }, { "epoch": 38.38360655737705, "grad_norm": 4.46909761428833, "learning_rate": 1.4121457540240649e-05, "loss": 0.5781, "step": 11707 }, { "epoch": 38.38688524590164, "grad_norm": 3.9161217212677, "learning_rate": 1.4120490012734203e-05, "loss": 0.2762, "step": 11708 }, { "epoch": 38.390163934426226, "grad_norm": 4.464936256408691, "learning_rate": 1.411952243876495e-05, "loss": 0.4665, "step": 11709 }, { "epoch": 38.39344262295082, "grad_norm": 5.547101974487305, "learning_rate": 1.4118554818343797e-05, "loss": 0.3363, "step": 11710 }, { "epoch": 38.39672131147541, "grad_norm": 4.135512828826904, "learning_rate": 1.4117587151481656e-05, "loss": 0.3076, "step": 11711 }, { "epoch": 38.4, "grad_norm": 3.4607510566711426, "learning_rate": 1.411661943818944e-05, "loss": 0.3517, "step": 11712 }, { "epoch": 38.40327868852459, "grad_norm": 4.3111677169799805, "learning_rate": 1.411565167847806e-05, "loss": 0.3498, "step": 11713 }, { "epoch": 38.40655737704918, "grad_norm": 4.257147789001465, "learning_rate": 1.4114683872358428e-05, "loss": 0.452, "step": 11714 }, { "epoch": 38.40983606557377, "grad_norm": 4.483437538146973, "learning_rate": 1.4113716019841454e-05, "loss": 0.2879, "step": 11715 }, { "epoch": 38.41311475409836, "grad_norm": 4.882845401763916, "learning_rate": 1.4112748120938057e-05, "loss": 0.5643, "step": 11716 }, { "epoch": 38.41639344262295, "grad_norm": 4.348034858703613, "learning_rate": 1.4111780175659149e-05, "loss": 0.4315, "step": 11717 }, { "epoch": 38.41967213114754, "grad_norm": 4.499452114105225, "learning_rate": 1.4110812184015646e-05, "loss": 0.3546, "step": 11718 }, { "epoch": 38.42295081967213, "grad_norm": 4.44625997543335, "learning_rate": 1.4109844146018458e-05, "loss": 0.4959, "step": 11719 }, { "epoch": 38.42622950819672, "grad_norm": 4.650733947753906, "learning_rate": 1.4108876061678507e-05, "loss": 0.5467, "step": 11720 }, { "epoch": 38.429508196721315, "grad_norm": 5.133798122406006, "learning_rate": 1.4107907931006703e-05, "loss": 0.4828, "step": 11721 }, { "epoch": 38.4327868852459, "grad_norm": 3.8335907459259033, "learning_rate": 1.4106939754013968e-05, "loss": 0.6905, "step": 11722 }, { "epoch": 38.43606557377049, "grad_norm": 3.724961280822754, "learning_rate": 1.410597153071122e-05, "loss": 0.4198, "step": 11723 }, { "epoch": 38.43934426229508, "grad_norm": 3.8654134273529053, "learning_rate": 1.410500326110937e-05, "loss": 0.4343, "step": 11724 }, { "epoch": 38.442622950819676, "grad_norm": 4.4422221183776855, "learning_rate": 1.4104034945219338e-05, "loss": 0.5954, "step": 11725 }, { "epoch": 38.445901639344264, "grad_norm": 4.259356498718262, "learning_rate": 1.410306658305205e-05, "loss": 0.2364, "step": 11726 }, { "epoch": 38.44918032786885, "grad_norm": 3.4798412322998047, "learning_rate": 1.4102098174618417e-05, "loss": 0.4693, "step": 11727 }, { "epoch": 38.45245901639344, "grad_norm": 3.6226208209991455, "learning_rate": 1.410112971992936e-05, "loss": 0.3772, "step": 11728 }, { "epoch": 38.455737704918036, "grad_norm": 4.398629188537598, "learning_rate": 1.4100161218995807e-05, "loss": 0.4004, "step": 11729 }, { "epoch": 38.459016393442624, "grad_norm": 3.6928045749664307, "learning_rate": 1.409919267182867e-05, "loss": 0.5012, "step": 11730 }, { "epoch": 38.46229508196721, "grad_norm": 3.912645101547241, "learning_rate": 1.4098224078438873e-05, "loss": 0.6142, "step": 11731 }, { "epoch": 38.4655737704918, "grad_norm": 3.6995723247528076, "learning_rate": 1.4097255438837338e-05, "loss": 0.3782, "step": 11732 }, { "epoch": 38.4688524590164, "grad_norm": 4.551455974578857, "learning_rate": 1.4096286753034992e-05, "loss": 0.4749, "step": 11733 }, { "epoch": 38.472131147540985, "grad_norm": 4.373166084289551, "learning_rate": 1.409531802104275e-05, "loss": 0.2889, "step": 11734 }, { "epoch": 38.47540983606557, "grad_norm": 3.8887510299682617, "learning_rate": 1.4094349242871541e-05, "loss": 0.5077, "step": 11735 }, { "epoch": 38.47868852459016, "grad_norm": 5.120720863342285, "learning_rate": 1.4093380418532288e-05, "loss": 0.3042, "step": 11736 }, { "epoch": 38.48196721311476, "grad_norm": 3.7739830017089844, "learning_rate": 1.4092411548035912e-05, "loss": 0.5628, "step": 11737 }, { "epoch": 38.485245901639345, "grad_norm": 4.713281154632568, "learning_rate": 1.4091442631393341e-05, "loss": 0.5348, "step": 11738 }, { "epoch": 38.488524590163934, "grad_norm": 4.10711669921875, "learning_rate": 1.40904736686155e-05, "loss": 0.4916, "step": 11739 }, { "epoch": 38.49180327868852, "grad_norm": 4.829769134521484, "learning_rate": 1.4089504659713317e-05, "loss": 0.4772, "step": 11740 }, { "epoch": 38.49508196721312, "grad_norm": 4.168327331542969, "learning_rate": 1.4088535604697717e-05, "loss": 0.4302, "step": 11741 }, { "epoch": 38.498360655737706, "grad_norm": 3.861734628677368, "learning_rate": 1.4087566503579628e-05, "loss": 0.452, "step": 11742 }, { "epoch": 38.501639344262294, "grad_norm": 4.550919055938721, "learning_rate": 1.4086597356369973e-05, "loss": 0.4951, "step": 11743 }, { "epoch": 38.50491803278688, "grad_norm": 3.5950543880462646, "learning_rate": 1.4085628163079688e-05, "loss": 0.4752, "step": 11744 }, { "epoch": 38.50819672131148, "grad_norm": 4.177773475646973, "learning_rate": 1.4084658923719694e-05, "loss": 0.5679, "step": 11745 }, { "epoch": 38.511475409836066, "grad_norm": 4.322771072387695, "learning_rate": 1.4083689638300926e-05, "loss": 0.4451, "step": 11746 }, { "epoch": 38.514754098360655, "grad_norm": 3.640028715133667, "learning_rate": 1.4082720306834312e-05, "loss": 0.5703, "step": 11747 }, { "epoch": 38.51803278688524, "grad_norm": 4.288067817687988, "learning_rate": 1.408175092933078e-05, "loss": 0.4866, "step": 11748 }, { "epoch": 38.52131147540984, "grad_norm": 4.508444309234619, "learning_rate": 1.4080781505801263e-05, "loss": 0.4745, "step": 11749 }, { "epoch": 38.52459016393443, "grad_norm": 5.058890342712402, "learning_rate": 1.4079812036256691e-05, "loss": 0.4581, "step": 11750 }, { "epoch": 38.527868852459015, "grad_norm": 4.770357131958008, "learning_rate": 1.4078842520707993e-05, "loss": 0.5388, "step": 11751 }, { "epoch": 38.5311475409836, "grad_norm": 4.345212459564209, "learning_rate": 1.407787295916611e-05, "loss": 0.4208, "step": 11752 }, { "epoch": 38.5344262295082, "grad_norm": 4.0083513259887695, "learning_rate": 1.4076903351641966e-05, "loss": 0.4306, "step": 11753 }, { "epoch": 38.53770491803279, "grad_norm": 4.101165294647217, "learning_rate": 1.40759336981465e-05, "loss": 0.2788, "step": 11754 }, { "epoch": 38.540983606557376, "grad_norm": 5.042975902557373, "learning_rate": 1.4074963998690643e-05, "loss": 0.3718, "step": 11755 }, { "epoch": 38.544262295081964, "grad_norm": 4.6689043045043945, "learning_rate": 1.4073994253285328e-05, "loss": 0.3771, "step": 11756 }, { "epoch": 38.54754098360656, "grad_norm": 4.199175834655762, "learning_rate": 1.4073024461941496e-05, "loss": 0.4507, "step": 11757 }, { "epoch": 38.55081967213115, "grad_norm": 3.7561087608337402, "learning_rate": 1.4072054624670074e-05, "loss": 0.3523, "step": 11758 }, { "epoch": 38.554098360655736, "grad_norm": 5.282982349395752, "learning_rate": 1.4071084741482006e-05, "loss": 0.5422, "step": 11759 }, { "epoch": 38.557377049180324, "grad_norm": 4.5891432762146, "learning_rate": 1.4070114812388225e-05, "loss": 0.5065, "step": 11760 }, { "epoch": 38.56065573770492, "grad_norm": 4.797507286071777, "learning_rate": 1.4069144837399664e-05, "loss": 0.472, "step": 11761 }, { "epoch": 38.56393442622951, "grad_norm": 4.664265155792236, "learning_rate": 1.4068174816527267e-05, "loss": 0.4996, "step": 11762 }, { "epoch": 38.5672131147541, "grad_norm": 4.378461837768555, "learning_rate": 1.4067204749781966e-05, "loss": 0.3168, "step": 11763 }, { "epoch": 38.570491803278685, "grad_norm": 3.9038820266723633, "learning_rate": 1.4066234637174704e-05, "loss": 0.3648, "step": 11764 }, { "epoch": 38.57377049180328, "grad_norm": 3.6838266849517822, "learning_rate": 1.406526447871642e-05, "loss": 0.298, "step": 11765 }, { "epoch": 38.57704918032787, "grad_norm": 3.784306764602661, "learning_rate": 1.4064294274418053e-05, "loss": 0.3618, "step": 11766 }, { "epoch": 38.58032786885246, "grad_norm": 4.493579387664795, "learning_rate": 1.4063324024290539e-05, "loss": 0.484, "step": 11767 }, { "epoch": 38.58360655737705, "grad_norm": 3.165684461593628, "learning_rate": 1.4062353728344825e-05, "loss": 0.3024, "step": 11768 }, { "epoch": 38.58688524590164, "grad_norm": 3.535048246383667, "learning_rate": 1.4061383386591845e-05, "loss": 0.4045, "step": 11769 }, { "epoch": 38.59016393442623, "grad_norm": 4.767302989959717, "learning_rate": 1.4060412999042547e-05, "loss": 0.4767, "step": 11770 }, { "epoch": 38.59344262295082, "grad_norm": 3.4754199981689453, "learning_rate": 1.405944256570787e-05, "loss": 0.4019, "step": 11771 }, { "epoch": 38.59672131147541, "grad_norm": 3.578874349594116, "learning_rate": 1.405847208659876e-05, "loss": 0.3779, "step": 11772 }, { "epoch": 38.6, "grad_norm": 4.470702171325684, "learning_rate": 1.4057501561726157e-05, "loss": 0.3755, "step": 11773 }, { "epoch": 38.60327868852459, "grad_norm": 3.879865884780884, "learning_rate": 1.4056530991101e-05, "loss": 0.4695, "step": 11774 }, { "epoch": 38.60655737704918, "grad_norm": 4.28241491317749, "learning_rate": 1.4055560374734242e-05, "loss": 0.409, "step": 11775 }, { "epoch": 38.609836065573774, "grad_norm": 3.945481538772583, "learning_rate": 1.4054589712636824e-05, "loss": 0.5429, "step": 11776 }, { "epoch": 38.61311475409836, "grad_norm": 4.63301420211792, "learning_rate": 1.4053619004819691e-05, "loss": 0.4652, "step": 11777 }, { "epoch": 38.61639344262295, "grad_norm": 4.99429178237915, "learning_rate": 1.405264825129379e-05, "loss": 0.2959, "step": 11778 }, { "epoch": 38.61967213114754, "grad_norm": 3.7876646518707275, "learning_rate": 1.4051677452070064e-05, "loss": 0.2958, "step": 11779 }, { "epoch": 38.622950819672134, "grad_norm": 5.040876388549805, "learning_rate": 1.4050706607159463e-05, "loss": 0.3124, "step": 11780 }, { "epoch": 38.62622950819672, "grad_norm": 5.143779277801514, "learning_rate": 1.4049735716572934e-05, "loss": 0.4678, "step": 11781 }, { "epoch": 38.62950819672131, "grad_norm": 9.115337371826172, "learning_rate": 1.4048764780321425e-05, "loss": 0.6551, "step": 11782 }, { "epoch": 38.6327868852459, "grad_norm": 4.159949779510498, "learning_rate": 1.404779379841588e-05, "loss": 0.4139, "step": 11783 }, { "epoch": 38.636065573770495, "grad_norm": 4.073860168457031, "learning_rate": 1.4046822770867254e-05, "loss": 0.3348, "step": 11784 }, { "epoch": 38.63934426229508, "grad_norm": 4.066927909851074, "learning_rate": 1.4045851697686495e-05, "loss": 0.6365, "step": 11785 }, { "epoch": 38.64262295081967, "grad_norm": 5.080766201019287, "learning_rate": 1.404488057888455e-05, "loss": 0.5731, "step": 11786 }, { "epoch": 38.64590163934426, "grad_norm": 4.511198043823242, "learning_rate": 1.404390941447237e-05, "loss": 0.6099, "step": 11787 }, { "epoch": 38.649180327868855, "grad_norm": 4.120630264282227, "learning_rate": 1.4042938204460907e-05, "loss": 0.3118, "step": 11788 }, { "epoch": 38.65245901639344, "grad_norm": 3.9805333614349365, "learning_rate": 1.4041966948861113e-05, "loss": 0.5914, "step": 11789 }, { "epoch": 38.65573770491803, "grad_norm": 4.251260757446289, "learning_rate": 1.404099564768394e-05, "loss": 0.2856, "step": 11790 }, { "epoch": 38.65901639344262, "grad_norm": 4.202016830444336, "learning_rate": 1.4040024300940335e-05, "loss": 0.4578, "step": 11791 }, { "epoch": 38.662295081967216, "grad_norm": 4.076506614685059, "learning_rate": 1.403905290864126e-05, "loss": 0.5445, "step": 11792 }, { "epoch": 38.665573770491804, "grad_norm": 4.031450271606445, "learning_rate": 1.403808147079766e-05, "loss": 0.2726, "step": 11793 }, { "epoch": 38.66885245901639, "grad_norm": 4.167288780212402, "learning_rate": 1.4037109987420499e-05, "loss": 0.6487, "step": 11794 }, { "epoch": 38.67213114754098, "grad_norm": 4.90630578994751, "learning_rate": 1.4036138458520718e-05, "loss": 0.4448, "step": 11795 }, { "epoch": 38.675409836065576, "grad_norm": 4.754471778869629, "learning_rate": 1.4035166884109286e-05, "loss": 0.5709, "step": 11796 }, { "epoch": 38.678688524590164, "grad_norm": 4.270260334014893, "learning_rate": 1.4034195264197145e-05, "loss": 0.463, "step": 11797 }, { "epoch": 38.68196721311475, "grad_norm": 4.868592262268066, "learning_rate": 1.4033223598795264e-05, "loss": 0.4144, "step": 11798 }, { "epoch": 38.68524590163934, "grad_norm": 3.9496421813964844, "learning_rate": 1.4032251887914591e-05, "loss": 0.421, "step": 11799 }, { "epoch": 38.68852459016394, "grad_norm": 3.7741239070892334, "learning_rate": 1.4031280131566085e-05, "loss": 0.4407, "step": 11800 }, { "epoch": 38.691803278688525, "grad_norm": 4.314666271209717, "learning_rate": 1.4030308329760706e-05, "loss": 0.575, "step": 11801 }, { "epoch": 38.69508196721311, "grad_norm": 3.97361159324646, "learning_rate": 1.4029336482509408e-05, "loss": 0.3381, "step": 11802 }, { "epoch": 38.6983606557377, "grad_norm": 4.008190155029297, "learning_rate": 1.4028364589823154e-05, "loss": 0.4041, "step": 11803 }, { "epoch": 38.7016393442623, "grad_norm": 4.144268035888672, "learning_rate": 1.4027392651712896e-05, "loss": 0.4716, "step": 11804 }, { "epoch": 38.704918032786885, "grad_norm": 3.5612118244171143, "learning_rate": 1.4026420668189604e-05, "loss": 0.3737, "step": 11805 }, { "epoch": 38.708196721311474, "grad_norm": 3.6340177059173584, "learning_rate": 1.4025448639264228e-05, "loss": 0.3726, "step": 11806 }, { "epoch": 38.71147540983607, "grad_norm": 3.870107412338257, "learning_rate": 1.4024476564947735e-05, "loss": 0.4417, "step": 11807 }, { "epoch": 38.71475409836066, "grad_norm": 3.686002254486084, "learning_rate": 1.4023504445251086e-05, "loss": 0.4584, "step": 11808 }, { "epoch": 38.718032786885246, "grad_norm": 4.116558074951172, "learning_rate": 1.4022532280185237e-05, "loss": 0.5094, "step": 11809 }, { "epoch": 38.721311475409834, "grad_norm": 3.881591320037842, "learning_rate": 1.4021560069761158e-05, "loss": 0.4939, "step": 11810 }, { "epoch": 38.72459016393443, "grad_norm": 4.900210380554199, "learning_rate": 1.4020587813989806e-05, "loss": 0.5796, "step": 11811 }, { "epoch": 38.72786885245902, "grad_norm": 3.981586217880249, "learning_rate": 1.4019615512882147e-05, "loss": 0.5124, "step": 11812 }, { "epoch": 38.731147540983606, "grad_norm": 3.487553358078003, "learning_rate": 1.4018643166449144e-05, "loss": 0.7137, "step": 11813 }, { "epoch": 38.734426229508195, "grad_norm": 3.958101987838745, "learning_rate": 1.4017670774701762e-05, "loss": 0.232, "step": 11814 }, { "epoch": 38.73770491803279, "grad_norm": 3.5175509452819824, "learning_rate": 1.401669833765096e-05, "loss": 0.2774, "step": 11815 }, { "epoch": 38.74098360655738, "grad_norm": 4.170539379119873, "learning_rate": 1.4015725855307714e-05, "loss": 0.3364, "step": 11816 }, { "epoch": 38.74426229508197, "grad_norm": 4.595202922821045, "learning_rate": 1.401475332768298e-05, "loss": 0.5596, "step": 11817 }, { "epoch": 38.747540983606555, "grad_norm": 4.165707588195801, "learning_rate": 1.4013780754787727e-05, "loss": 0.5636, "step": 11818 }, { "epoch": 38.75081967213115, "grad_norm": 5.004760265350342, "learning_rate": 1.4012808136632922e-05, "loss": 0.5849, "step": 11819 }, { "epoch": 38.75409836065574, "grad_norm": 4.483168601989746, "learning_rate": 1.4011835473229538e-05, "loss": 0.5097, "step": 11820 }, { "epoch": 38.75737704918033, "grad_norm": 3.914466142654419, "learning_rate": 1.4010862764588535e-05, "loss": 0.3476, "step": 11821 }, { "epoch": 38.760655737704916, "grad_norm": 3.978421211242676, "learning_rate": 1.4009890010720883e-05, "loss": 0.3954, "step": 11822 }, { "epoch": 38.76393442622951, "grad_norm": 4.980034828186035, "learning_rate": 1.4008917211637554e-05, "loss": 0.3864, "step": 11823 }, { "epoch": 38.7672131147541, "grad_norm": 3.964548349380493, "learning_rate": 1.4007944367349513e-05, "loss": 0.4367, "step": 11824 }, { "epoch": 38.77049180327869, "grad_norm": 3.906262159347534, "learning_rate": 1.4006971477867737e-05, "loss": 0.6281, "step": 11825 }, { "epoch": 38.773770491803276, "grad_norm": 3.9284026622772217, "learning_rate": 1.4005998543203185e-05, "loss": 0.4078, "step": 11826 }, { "epoch": 38.77704918032787, "grad_norm": 3.9652931690216064, "learning_rate": 1.4005025563366838e-05, "loss": 0.4685, "step": 11827 }, { "epoch": 38.78032786885246, "grad_norm": 4.809998512268066, "learning_rate": 1.4004052538369661e-05, "loss": 0.4447, "step": 11828 }, { "epoch": 38.78360655737705, "grad_norm": 4.441239356994629, "learning_rate": 1.4003079468222631e-05, "loss": 0.4457, "step": 11829 }, { "epoch": 38.78688524590164, "grad_norm": 4.1009321212768555, "learning_rate": 1.4002106352936715e-05, "loss": 0.4052, "step": 11830 }, { "epoch": 38.79016393442623, "grad_norm": 4.701230049133301, "learning_rate": 1.4001133192522894e-05, "loss": 0.5466, "step": 11831 }, { "epoch": 38.79344262295082, "grad_norm": 4.392306804656982, "learning_rate": 1.400015998699213e-05, "loss": 0.5441, "step": 11832 }, { "epoch": 38.79672131147541, "grad_norm": 4.463340759277344, "learning_rate": 1.3999186736355404e-05, "loss": 0.4333, "step": 11833 }, { "epoch": 38.8, "grad_norm": 4.109631538391113, "learning_rate": 1.3998213440623691e-05, "loss": 0.3567, "step": 11834 }, { "epoch": 38.80327868852459, "grad_norm": 4.442773818969727, "learning_rate": 1.3997240099807966e-05, "loss": 0.3452, "step": 11835 }, { "epoch": 38.80655737704918, "grad_norm": 5.445509433746338, "learning_rate": 1.39962667139192e-05, "loss": 0.5638, "step": 11836 }, { "epoch": 38.80983606557377, "grad_norm": 3.9128453731536865, "learning_rate": 1.3995293282968372e-05, "loss": 0.4296, "step": 11837 }, { "epoch": 38.81311475409836, "grad_norm": 4.389983654022217, "learning_rate": 1.399431980696646e-05, "loss": 0.2762, "step": 11838 }, { "epoch": 38.81639344262295, "grad_norm": 3.7100014686584473, "learning_rate": 1.3993346285924438e-05, "loss": 0.6636, "step": 11839 }, { "epoch": 38.81967213114754, "grad_norm": 4.820771217346191, "learning_rate": 1.3992372719853285e-05, "loss": 0.4858, "step": 11840 }, { "epoch": 38.82295081967213, "grad_norm": 4.172950267791748, "learning_rate": 1.3991399108763979e-05, "loss": 0.3597, "step": 11841 }, { "epoch": 38.82622950819672, "grad_norm": 3.8010003566741943, "learning_rate": 1.3990425452667498e-05, "loss": 0.3004, "step": 11842 }, { "epoch": 38.829508196721314, "grad_norm": 3.6478378772735596, "learning_rate": 1.3989451751574819e-05, "loss": 0.4728, "step": 11843 }, { "epoch": 38.8327868852459, "grad_norm": 3.9323208332061768, "learning_rate": 1.3988478005496927e-05, "loss": 0.4058, "step": 11844 }, { "epoch": 38.83606557377049, "grad_norm": 5.843747138977051, "learning_rate": 1.3987504214444794e-05, "loss": 0.4182, "step": 11845 }, { "epoch": 38.83934426229508, "grad_norm": 4.122426509857178, "learning_rate": 1.3986530378429409e-05, "loss": 0.6628, "step": 11846 }, { "epoch": 38.842622950819674, "grad_norm": 4.30539608001709, "learning_rate": 1.3985556497461745e-05, "loss": 0.5519, "step": 11847 }, { "epoch": 38.84590163934426, "grad_norm": 3.4688456058502197, "learning_rate": 1.3984582571552792e-05, "loss": 0.4245, "step": 11848 }, { "epoch": 38.84918032786885, "grad_norm": 5.668643951416016, "learning_rate": 1.3983608600713523e-05, "loss": 0.5614, "step": 11849 }, { "epoch": 38.85245901639344, "grad_norm": 4.5892510414123535, "learning_rate": 1.3982634584954931e-05, "loss": 0.3258, "step": 11850 }, { "epoch": 38.855737704918035, "grad_norm": 4.3141093254089355, "learning_rate": 1.398166052428799e-05, "loss": 0.4688, "step": 11851 }, { "epoch": 38.85901639344262, "grad_norm": 4.656017303466797, "learning_rate": 1.3980686418723685e-05, "loss": 0.3528, "step": 11852 }, { "epoch": 38.86229508196721, "grad_norm": 4.529847145080566, "learning_rate": 1.3979712268273004e-05, "loss": 0.465, "step": 11853 }, { "epoch": 38.86557377049181, "grad_norm": 3.9298598766326904, "learning_rate": 1.3978738072946927e-05, "loss": 0.4133, "step": 11854 }, { "epoch": 38.868852459016395, "grad_norm": 5.264461517333984, "learning_rate": 1.3977763832756446e-05, "loss": 0.4993, "step": 11855 }, { "epoch": 38.87213114754098, "grad_norm": 3.5119988918304443, "learning_rate": 1.3976789547712537e-05, "loss": 0.3473, "step": 11856 }, { "epoch": 38.87540983606557, "grad_norm": 4.704339981079102, "learning_rate": 1.3975815217826195e-05, "loss": 0.5675, "step": 11857 }, { "epoch": 38.87868852459017, "grad_norm": 5.029753684997559, "learning_rate": 1.39748408431084e-05, "loss": 0.6087, "step": 11858 }, { "epoch": 38.881967213114756, "grad_norm": 4.562621116638184, "learning_rate": 1.3973866423570142e-05, "loss": 0.5408, "step": 11859 }, { "epoch": 38.885245901639344, "grad_norm": 3.9599874019622803, "learning_rate": 1.3972891959222407e-05, "loss": 0.3366, "step": 11860 }, { "epoch": 38.88852459016393, "grad_norm": 3.532396078109741, "learning_rate": 1.3971917450076185e-05, "loss": 0.4428, "step": 11861 }, { "epoch": 38.89180327868853, "grad_norm": 6.677402973175049, "learning_rate": 1.3970942896142466e-05, "loss": 0.5444, "step": 11862 }, { "epoch": 38.895081967213116, "grad_norm": 5.376512050628662, "learning_rate": 1.3969968297432236e-05, "loss": 0.4551, "step": 11863 }, { "epoch": 38.898360655737704, "grad_norm": 3.7286875247955322, "learning_rate": 1.3968993653956487e-05, "loss": 0.2802, "step": 11864 }, { "epoch": 38.90163934426229, "grad_norm": 3.8464014530181885, "learning_rate": 1.3968018965726204e-05, "loss": 0.2676, "step": 11865 }, { "epoch": 38.90491803278689, "grad_norm": 4.6288981437683105, "learning_rate": 1.3967044232752384e-05, "loss": 0.5389, "step": 11866 }, { "epoch": 38.90819672131148, "grad_norm": 4.530725955963135, "learning_rate": 1.3966069455046017e-05, "loss": 0.439, "step": 11867 }, { "epoch": 38.911475409836065, "grad_norm": 4.032203197479248, "learning_rate": 1.3965094632618093e-05, "loss": 0.3425, "step": 11868 }, { "epoch": 38.91475409836065, "grad_norm": 3.6315178871154785, "learning_rate": 1.39641197654796e-05, "loss": 0.4074, "step": 11869 }, { "epoch": 38.91803278688525, "grad_norm": 4.616957664489746, "learning_rate": 1.3963144853641541e-05, "loss": 0.3973, "step": 11870 }, { "epoch": 38.92131147540984, "grad_norm": 4.251234531402588, "learning_rate": 1.3962169897114899e-05, "loss": 0.4239, "step": 11871 }, { "epoch": 38.924590163934425, "grad_norm": 4.1647491455078125, "learning_rate": 1.3961194895910675e-05, "loss": 0.3613, "step": 11872 }, { "epoch": 38.927868852459014, "grad_norm": 5.210562705993652, "learning_rate": 1.3960219850039856e-05, "loss": 0.5407, "step": 11873 }, { "epoch": 38.93114754098361, "grad_norm": 3.4212820529937744, "learning_rate": 1.3959244759513445e-05, "loss": 0.48, "step": 11874 }, { "epoch": 38.9344262295082, "grad_norm": 3.984618663787842, "learning_rate": 1.3958269624342427e-05, "loss": 0.3715, "step": 11875 }, { "epoch": 38.937704918032786, "grad_norm": 4.161910057067871, "learning_rate": 1.3957294444537808e-05, "loss": 0.2967, "step": 11876 }, { "epoch": 38.940983606557374, "grad_norm": 3.8434367179870605, "learning_rate": 1.3956319220110579e-05, "loss": 0.3364, "step": 11877 }, { "epoch": 38.94426229508197, "grad_norm": 4.521726608276367, "learning_rate": 1.3955343951071735e-05, "loss": 0.4876, "step": 11878 }, { "epoch": 38.94754098360656, "grad_norm": 4.124401092529297, "learning_rate": 1.3954368637432278e-05, "loss": 0.5022, "step": 11879 }, { "epoch": 38.950819672131146, "grad_norm": 4.070663928985596, "learning_rate": 1.3953393279203201e-05, "loss": 0.4454, "step": 11880 }, { "epoch": 38.954098360655735, "grad_norm": 5.13842248916626, "learning_rate": 1.3952417876395507e-05, "loss": 0.3934, "step": 11881 }, { "epoch": 38.95737704918033, "grad_norm": 4.630457878112793, "learning_rate": 1.395144242902019e-05, "loss": 0.3386, "step": 11882 }, { "epoch": 38.96065573770492, "grad_norm": 4.070234298706055, "learning_rate": 1.3950466937088254e-05, "loss": 0.2849, "step": 11883 }, { "epoch": 38.96393442622951, "grad_norm": 4.528509616851807, "learning_rate": 1.3949491400610691e-05, "loss": 0.3393, "step": 11884 }, { "epoch": 38.967213114754095, "grad_norm": 4.224006175994873, "learning_rate": 1.3948515819598509e-05, "loss": 0.4104, "step": 11885 }, { "epoch": 38.97049180327869, "grad_norm": 4.015697956085205, "learning_rate": 1.3947540194062705e-05, "loss": 0.5317, "step": 11886 }, { "epoch": 38.97377049180328, "grad_norm": 4.937441349029541, "learning_rate": 1.3946564524014281e-05, "loss": 0.4302, "step": 11887 }, { "epoch": 38.97704918032787, "grad_norm": 3.9750161170959473, "learning_rate": 1.3945588809464239e-05, "loss": 0.626, "step": 11888 }, { "epoch": 38.980327868852456, "grad_norm": 4.6372785568237305, "learning_rate": 1.3944613050423579e-05, "loss": 0.5038, "step": 11889 }, { "epoch": 38.98360655737705, "grad_norm": 3.7378106117248535, "learning_rate": 1.3943637246903307e-05, "loss": 0.2732, "step": 11890 }, { "epoch": 38.98688524590164, "grad_norm": 4.591150283813477, "learning_rate": 1.3942661398914423e-05, "loss": 0.3359, "step": 11891 }, { "epoch": 38.99016393442623, "grad_norm": 3.920804262161255, "learning_rate": 1.3941685506467936e-05, "loss": 0.4473, "step": 11892 }, { "epoch": 38.993442622950816, "grad_norm": 3.8545031547546387, "learning_rate": 1.3940709569574844e-05, "loss": 0.423, "step": 11893 }, { "epoch": 38.99672131147541, "grad_norm": 4.1907830238342285, "learning_rate": 1.3939733588246155e-05, "loss": 0.5692, "step": 11894 }, { "epoch": 39.0, "grad_norm": 4.8074517250061035, "learning_rate": 1.3938757562492873e-05, "loss": 0.6829, "step": 11895 }, { "epoch": 39.00327868852459, "grad_norm": 4.453250408172607, "learning_rate": 1.3937781492326005e-05, "loss": 0.2862, "step": 11896 }, { "epoch": 39.006557377049184, "grad_norm": 4.329342842102051, "learning_rate": 1.3936805377756554e-05, "loss": 0.378, "step": 11897 }, { "epoch": 39.00983606557377, "grad_norm": 4.053344249725342, "learning_rate": 1.3935829218795533e-05, "loss": 0.297, "step": 11898 }, { "epoch": 39.01311475409836, "grad_norm": 3.7118306159973145, "learning_rate": 1.3934853015453942e-05, "loss": 0.3211, "step": 11899 }, { "epoch": 39.01639344262295, "grad_norm": 4.2430806159973145, "learning_rate": 1.3933876767742796e-05, "loss": 0.3948, "step": 11900 }, { "epoch": 39.019672131147544, "grad_norm": 4.135587215423584, "learning_rate": 1.3932900475673095e-05, "loss": 0.3327, "step": 11901 }, { "epoch": 39.02295081967213, "grad_norm": 3.8001906871795654, "learning_rate": 1.3931924139255854e-05, "loss": 0.2737, "step": 11902 }, { "epoch": 39.02622950819672, "grad_norm": 4.085233688354492, "learning_rate": 1.3930947758502081e-05, "loss": 0.6838, "step": 11903 }, { "epoch": 39.02950819672131, "grad_norm": 7.095936298370361, "learning_rate": 1.3929971333422783e-05, "loss": 0.381, "step": 11904 }, { "epoch": 39.032786885245905, "grad_norm": 4.410447597503662, "learning_rate": 1.3928994864028974e-05, "loss": 0.5429, "step": 11905 }, { "epoch": 39.03606557377049, "grad_norm": 5.4562273025512695, "learning_rate": 1.3928018350331658e-05, "loss": 0.5864, "step": 11906 }, { "epoch": 39.03934426229508, "grad_norm": 3.395949363708496, "learning_rate": 1.3927041792341856e-05, "loss": 0.2038, "step": 11907 }, { "epoch": 39.04262295081967, "grad_norm": 4.652392387390137, "learning_rate": 1.3926065190070573e-05, "loss": 0.6154, "step": 11908 }, { "epoch": 39.045901639344265, "grad_norm": 4.219604015350342, "learning_rate": 1.3925088543528823e-05, "loss": 0.5146, "step": 11909 }, { "epoch": 39.049180327868854, "grad_norm": 4.114264965057373, "learning_rate": 1.3924111852727617e-05, "loss": 0.4879, "step": 11910 }, { "epoch": 39.05245901639344, "grad_norm": 4.830990314483643, "learning_rate": 1.3923135117677974e-05, "loss": 0.6295, "step": 11911 }, { "epoch": 39.05573770491803, "grad_norm": 4.950469970703125, "learning_rate": 1.3922158338390901e-05, "loss": 0.4128, "step": 11912 }, { "epoch": 39.059016393442626, "grad_norm": 4.478994369506836, "learning_rate": 1.3921181514877415e-05, "loss": 0.3766, "step": 11913 }, { "epoch": 39.062295081967214, "grad_norm": 3.856813430786133, "learning_rate": 1.392020464714853e-05, "loss": 0.6047, "step": 11914 }, { "epoch": 39.0655737704918, "grad_norm": 3.6154251098632812, "learning_rate": 1.3919227735215262e-05, "loss": 0.4743, "step": 11915 }, { "epoch": 39.06885245901639, "grad_norm": 5.722387313842773, "learning_rate": 1.3918250779088625e-05, "loss": 0.3901, "step": 11916 }, { "epoch": 39.072131147540986, "grad_norm": 3.068993091583252, "learning_rate": 1.3917273778779636e-05, "loss": 0.2886, "step": 11917 }, { "epoch": 39.075409836065575, "grad_norm": 4.853532314300537, "learning_rate": 1.3916296734299315e-05, "loss": 0.3776, "step": 11918 }, { "epoch": 39.07868852459016, "grad_norm": 5.702731132507324, "learning_rate": 1.3915319645658674e-05, "loss": 0.2186, "step": 11919 }, { "epoch": 39.08196721311475, "grad_norm": 4.178450107574463, "learning_rate": 1.3914342512868736e-05, "loss": 0.3575, "step": 11920 }, { "epoch": 39.08524590163935, "grad_norm": 4.428042888641357, "learning_rate": 1.3913365335940511e-05, "loss": 0.494, "step": 11921 }, { "epoch": 39.088524590163935, "grad_norm": 4.025694847106934, "learning_rate": 1.3912388114885026e-05, "loss": 0.5005, "step": 11922 }, { "epoch": 39.09180327868852, "grad_norm": 3.420802593231201, "learning_rate": 1.3911410849713298e-05, "loss": 0.3074, "step": 11923 }, { "epoch": 39.09508196721311, "grad_norm": 4.317192077636719, "learning_rate": 1.3910433540436348e-05, "loss": 0.2906, "step": 11924 }, { "epoch": 39.09836065573771, "grad_norm": 4.046220779418945, "learning_rate": 1.390945618706519e-05, "loss": 0.3598, "step": 11925 }, { "epoch": 39.101639344262296, "grad_norm": 3.7901573181152344, "learning_rate": 1.3908478789610851e-05, "loss": 0.5459, "step": 11926 }, { "epoch": 39.104918032786884, "grad_norm": 4.070654392242432, "learning_rate": 1.3907501348084347e-05, "loss": 0.3565, "step": 11927 }, { "epoch": 39.10819672131147, "grad_norm": 3.6875483989715576, "learning_rate": 1.3906523862496705e-05, "loss": 0.5571, "step": 11928 }, { "epoch": 39.11147540983607, "grad_norm": 4.9004807472229, "learning_rate": 1.3905546332858946e-05, "loss": 0.4992, "step": 11929 }, { "epoch": 39.114754098360656, "grad_norm": 4.204878330230713, "learning_rate": 1.3904568759182088e-05, "loss": 0.3821, "step": 11930 }, { "epoch": 39.118032786885244, "grad_norm": 4.707936763763428, "learning_rate": 1.390359114147716e-05, "loss": 0.4954, "step": 11931 }, { "epoch": 39.12131147540983, "grad_norm": 3.2634260654449463, "learning_rate": 1.390261347975518e-05, "loss": 0.473, "step": 11932 }, { "epoch": 39.12459016393443, "grad_norm": 4.677069664001465, "learning_rate": 1.390163577402718e-05, "loss": 0.6035, "step": 11933 }, { "epoch": 39.12786885245902, "grad_norm": 5.2155375480651855, "learning_rate": 1.3900658024304176e-05, "loss": 0.4682, "step": 11934 }, { "epoch": 39.131147540983605, "grad_norm": 3.9732325077056885, "learning_rate": 1.3899680230597199e-05, "loss": 0.5345, "step": 11935 }, { "epoch": 39.13442622950819, "grad_norm": 3.3766181468963623, "learning_rate": 1.3898702392917271e-05, "loss": 0.2161, "step": 11936 }, { "epoch": 39.13770491803279, "grad_norm": 4.0421037673950195, "learning_rate": 1.3897724511275423e-05, "loss": 0.4685, "step": 11937 }, { "epoch": 39.14098360655738, "grad_norm": 6.212882041931152, "learning_rate": 1.3896746585682674e-05, "loss": 0.5899, "step": 11938 }, { "epoch": 39.144262295081965, "grad_norm": 4.865421772003174, "learning_rate": 1.3895768616150061e-05, "loss": 0.4751, "step": 11939 }, { "epoch": 39.14754098360656, "grad_norm": 4.381752967834473, "learning_rate": 1.3894790602688601e-05, "loss": 0.4085, "step": 11940 }, { "epoch": 39.15081967213115, "grad_norm": 4.8456854820251465, "learning_rate": 1.389381254530933e-05, "loss": 0.4108, "step": 11941 }, { "epoch": 39.15409836065574, "grad_norm": 4.318366050720215, "learning_rate": 1.3892834444023275e-05, "loss": 0.3841, "step": 11942 }, { "epoch": 39.157377049180326, "grad_norm": 4.2596001625061035, "learning_rate": 1.3891856298841462e-05, "loss": 0.3089, "step": 11943 }, { "epoch": 39.16065573770492, "grad_norm": 3.697249174118042, "learning_rate": 1.3890878109774924e-05, "loss": 0.2798, "step": 11944 }, { "epoch": 39.16393442622951, "grad_norm": 4.0133538246154785, "learning_rate": 1.388989987683469e-05, "loss": 0.2871, "step": 11945 }, { "epoch": 39.1672131147541, "grad_norm": 5.3863396644592285, "learning_rate": 1.3888921600031791e-05, "loss": 0.2978, "step": 11946 }, { "epoch": 39.170491803278686, "grad_norm": 4.627725601196289, "learning_rate": 1.3887943279377254e-05, "loss": 0.4516, "step": 11947 }, { "epoch": 39.17377049180328, "grad_norm": 4.132181167602539, "learning_rate": 1.388696491488212e-05, "loss": 0.4017, "step": 11948 }, { "epoch": 39.17704918032787, "grad_norm": 3.9903829097747803, "learning_rate": 1.388598650655741e-05, "loss": 0.343, "step": 11949 }, { "epoch": 39.18032786885246, "grad_norm": 3.990633487701416, "learning_rate": 1.3885008054414164e-05, "loss": 0.3106, "step": 11950 }, { "epoch": 39.18360655737705, "grad_norm": 3.8099234104156494, "learning_rate": 1.388402955846341e-05, "loss": 0.5413, "step": 11951 }, { "epoch": 39.18688524590164, "grad_norm": 4.602307319641113, "learning_rate": 1.3883051018716188e-05, "loss": 0.5667, "step": 11952 }, { "epoch": 39.19016393442623, "grad_norm": 3.7876346111297607, "learning_rate": 1.3882072435183526e-05, "loss": 0.737, "step": 11953 }, { "epoch": 39.19344262295082, "grad_norm": 3.5228781700134277, "learning_rate": 1.3881093807876465e-05, "loss": 0.2288, "step": 11954 }, { "epoch": 39.19672131147541, "grad_norm": 4.256753921508789, "learning_rate": 1.3880115136806032e-05, "loss": 0.5873, "step": 11955 }, { "epoch": 39.2, "grad_norm": 4.143209934234619, "learning_rate": 1.3879136421983265e-05, "loss": 0.3887, "step": 11956 }, { "epoch": 39.20327868852459, "grad_norm": 4.297154426574707, "learning_rate": 1.3878157663419207e-05, "loss": 0.4067, "step": 11957 }, { "epoch": 39.20655737704918, "grad_norm": 4.204482555389404, "learning_rate": 1.3877178861124885e-05, "loss": 0.3224, "step": 11958 }, { "epoch": 39.20983606557377, "grad_norm": 4.3862175941467285, "learning_rate": 1.387620001511134e-05, "loss": 0.3533, "step": 11959 }, { "epoch": 39.21311475409836, "grad_norm": 3.726588010787964, "learning_rate": 1.387522112538961e-05, "loss": 0.228, "step": 11960 }, { "epoch": 39.21639344262295, "grad_norm": 4.045422554016113, "learning_rate": 1.3874242191970737e-05, "loss": 0.4969, "step": 11961 }, { "epoch": 39.21967213114754, "grad_norm": 4.206539154052734, "learning_rate": 1.3873263214865749e-05, "loss": 0.2518, "step": 11962 }, { "epoch": 39.22295081967213, "grad_norm": 5.296684265136719, "learning_rate": 1.3872284194085695e-05, "loss": 0.502, "step": 11963 }, { "epoch": 39.226229508196724, "grad_norm": 3.966862440109253, "learning_rate": 1.3871305129641607e-05, "loss": 0.5455, "step": 11964 }, { "epoch": 39.22950819672131, "grad_norm": 4.305211067199707, "learning_rate": 1.3870326021544531e-05, "loss": 0.4603, "step": 11965 }, { "epoch": 39.2327868852459, "grad_norm": 4.33313512802124, "learning_rate": 1.3869346869805506e-05, "loss": 0.5636, "step": 11966 }, { "epoch": 39.23606557377049, "grad_norm": 5.267148971557617, "learning_rate": 1.386836767443557e-05, "loss": 0.4453, "step": 11967 }, { "epoch": 39.239344262295084, "grad_norm": 4.895334243774414, "learning_rate": 1.3867388435445765e-05, "loss": 0.3596, "step": 11968 }, { "epoch": 39.24262295081967, "grad_norm": 3.611356735229492, "learning_rate": 1.3866409152847137e-05, "loss": 0.5269, "step": 11969 }, { "epoch": 39.24590163934426, "grad_norm": 4.884506702423096, "learning_rate": 1.3865429826650724e-05, "loss": 0.3883, "step": 11970 }, { "epoch": 39.24918032786885, "grad_norm": 4.0343337059021, "learning_rate": 1.3864450456867572e-05, "loss": 0.3417, "step": 11971 }, { "epoch": 39.252459016393445, "grad_norm": 4.177248477935791, "learning_rate": 1.3863471043508722e-05, "loss": 0.5953, "step": 11972 }, { "epoch": 39.25573770491803, "grad_norm": 3.921685218811035, "learning_rate": 1.386249158658522e-05, "loss": 0.645, "step": 11973 }, { "epoch": 39.25901639344262, "grad_norm": 4.701279163360596, "learning_rate": 1.386151208610811e-05, "loss": 0.4258, "step": 11974 }, { "epoch": 39.26229508196721, "grad_norm": 4.760870456695557, "learning_rate": 1.3860532542088435e-05, "loss": 0.4895, "step": 11975 }, { "epoch": 39.265573770491805, "grad_norm": 4.200434684753418, "learning_rate": 1.3859552954537243e-05, "loss": 0.3437, "step": 11976 }, { "epoch": 39.268852459016394, "grad_norm": 3.508521556854248, "learning_rate": 1.3858573323465576e-05, "loss": 0.4852, "step": 11977 }, { "epoch": 39.27213114754098, "grad_norm": 4.940845489501953, "learning_rate": 1.3857593648884484e-05, "loss": 0.5483, "step": 11978 }, { "epoch": 39.27540983606557, "grad_norm": 5.262228012084961, "learning_rate": 1.3856613930805013e-05, "loss": 0.4429, "step": 11979 }, { "epoch": 39.278688524590166, "grad_norm": 4.350301742553711, "learning_rate": 1.385563416923821e-05, "loss": 0.4768, "step": 11980 }, { "epoch": 39.281967213114754, "grad_norm": 3.2530434131622314, "learning_rate": 1.3854654364195126e-05, "loss": 0.1956, "step": 11981 }, { "epoch": 39.28524590163934, "grad_norm": 4.507320404052734, "learning_rate": 1.38536745156868e-05, "loss": 0.3067, "step": 11982 }, { "epoch": 39.28852459016394, "grad_norm": 3.975008249282837, "learning_rate": 1.3852694623724292e-05, "loss": 0.2512, "step": 11983 }, { "epoch": 39.291803278688526, "grad_norm": 5.870582580566406, "learning_rate": 1.3851714688318643e-05, "loss": 0.3115, "step": 11984 }, { "epoch": 39.295081967213115, "grad_norm": 4.096336364746094, "learning_rate": 1.3850734709480908e-05, "loss": 0.3572, "step": 11985 }, { "epoch": 39.2983606557377, "grad_norm": 4.062102794647217, "learning_rate": 1.3849754687222135e-05, "loss": 0.323, "step": 11986 }, { "epoch": 39.3016393442623, "grad_norm": 3.4717109203338623, "learning_rate": 1.3848774621553376e-05, "loss": 0.266, "step": 11987 }, { "epoch": 39.30491803278689, "grad_norm": 4.104525566101074, "learning_rate": 1.3847794512485679e-05, "loss": 0.4992, "step": 11988 }, { "epoch": 39.308196721311475, "grad_norm": 5.0904130935668945, "learning_rate": 1.3846814360030102e-05, "loss": 0.3205, "step": 11989 }, { "epoch": 39.31147540983606, "grad_norm": 3.3356595039367676, "learning_rate": 1.384583416419769e-05, "loss": 0.3052, "step": 11990 }, { "epoch": 39.31475409836066, "grad_norm": 4.115958213806152, "learning_rate": 1.3844853924999501e-05, "loss": 0.3876, "step": 11991 }, { "epoch": 39.31803278688525, "grad_norm": 4.020668983459473, "learning_rate": 1.3843873642446585e-05, "loss": 0.6899, "step": 11992 }, { "epoch": 39.321311475409836, "grad_norm": 4.082957744598389, "learning_rate": 1.3842893316549999e-05, "loss": 0.6558, "step": 11993 }, { "epoch": 39.324590163934424, "grad_norm": 4.862818717956543, "learning_rate": 1.3841912947320793e-05, "loss": 0.7912, "step": 11994 }, { "epoch": 39.32786885245902, "grad_norm": 3.830996036529541, "learning_rate": 1.3840932534770024e-05, "loss": 0.4296, "step": 11995 }, { "epoch": 39.33114754098361, "grad_norm": 4.559528350830078, "learning_rate": 1.3839952078908747e-05, "loss": 0.5635, "step": 11996 }, { "epoch": 39.334426229508196, "grad_norm": 3.7842934131622314, "learning_rate": 1.383897157974802e-05, "loss": 0.3823, "step": 11997 }, { "epoch": 39.337704918032784, "grad_norm": 3.510902166366577, "learning_rate": 1.3837991037298895e-05, "loss": 0.2933, "step": 11998 }, { "epoch": 39.34098360655738, "grad_norm": 5.0702924728393555, "learning_rate": 1.383701045157243e-05, "loss": 0.52, "step": 11999 }, { "epoch": 39.34426229508197, "grad_norm": 3.7398319244384766, "learning_rate": 1.3836029822579686e-05, "loss": 0.4452, "step": 12000 }, { "epoch": 39.34754098360656, "grad_norm": 5.00272798538208, "learning_rate": 1.3835049150331714e-05, "loss": 0.6157, "step": 12001 }, { "epoch": 39.350819672131145, "grad_norm": 3.8559675216674805, "learning_rate": 1.3834068434839576e-05, "loss": 0.4797, "step": 12002 }, { "epoch": 39.35409836065574, "grad_norm": 4.083382606506348, "learning_rate": 1.383308767611433e-05, "loss": 0.4233, "step": 12003 }, { "epoch": 39.35737704918033, "grad_norm": 4.499754428863525, "learning_rate": 1.3832106874167037e-05, "loss": 0.3135, "step": 12004 }, { "epoch": 39.36065573770492, "grad_norm": 4.040576934814453, "learning_rate": 1.3831126029008754e-05, "loss": 0.3698, "step": 12005 }, { "epoch": 39.363934426229505, "grad_norm": 3.780571222305298, "learning_rate": 1.3830145140650539e-05, "loss": 0.6352, "step": 12006 }, { "epoch": 39.3672131147541, "grad_norm": 3.602449655532837, "learning_rate": 1.3829164209103459e-05, "loss": 0.5146, "step": 12007 }, { "epoch": 39.37049180327869, "grad_norm": 4.854301452636719, "learning_rate": 1.3828183234378568e-05, "loss": 0.4156, "step": 12008 }, { "epoch": 39.37377049180328, "grad_norm": 4.094320774078369, "learning_rate": 1.3827202216486933e-05, "loss": 0.3653, "step": 12009 }, { "epoch": 39.377049180327866, "grad_norm": 4.8342485427856445, "learning_rate": 1.3826221155439611e-05, "loss": 0.4591, "step": 12010 }, { "epoch": 39.38032786885246, "grad_norm": 4.188351631164551, "learning_rate": 1.382524005124767e-05, "loss": 0.439, "step": 12011 }, { "epoch": 39.38360655737705, "grad_norm": 4.481844425201416, "learning_rate": 1.3824258903922168e-05, "loss": 0.3431, "step": 12012 }, { "epoch": 39.38688524590164, "grad_norm": 5.179032325744629, "learning_rate": 1.3823277713474172e-05, "loss": 0.4268, "step": 12013 }, { "epoch": 39.390163934426226, "grad_norm": 5.098698139190674, "learning_rate": 1.3822296479914743e-05, "loss": 0.5535, "step": 12014 }, { "epoch": 39.39344262295082, "grad_norm": 4.242452144622803, "learning_rate": 1.382131520325495e-05, "loss": 0.4873, "step": 12015 }, { "epoch": 39.39672131147541, "grad_norm": 4.257389068603516, "learning_rate": 1.3820333883505851e-05, "loss": 0.3649, "step": 12016 }, { "epoch": 39.4, "grad_norm": 4.385958194732666, "learning_rate": 1.3819352520678519e-05, "loss": 0.5166, "step": 12017 }, { "epoch": 39.40327868852459, "grad_norm": 4.674267768859863, "learning_rate": 1.3818371114784015e-05, "loss": 0.3581, "step": 12018 }, { "epoch": 39.40655737704918, "grad_norm": 4.20328426361084, "learning_rate": 1.3817389665833405e-05, "loss": 0.3639, "step": 12019 }, { "epoch": 39.40983606557377, "grad_norm": 3.2188668251037598, "learning_rate": 1.3816408173837762e-05, "loss": 0.3724, "step": 12020 }, { "epoch": 39.41311475409836, "grad_norm": 3.7317473888397217, "learning_rate": 1.3815426638808143e-05, "loss": 0.2678, "step": 12021 }, { "epoch": 39.41639344262295, "grad_norm": 3.7858405113220215, "learning_rate": 1.3814445060755621e-05, "loss": 0.3725, "step": 12022 }, { "epoch": 39.41967213114754, "grad_norm": 4.675452709197998, "learning_rate": 1.381346343969127e-05, "loss": 0.3855, "step": 12023 }, { "epoch": 39.42295081967213, "grad_norm": 4.275396823883057, "learning_rate": 1.3812481775626152e-05, "loss": 0.4082, "step": 12024 }, { "epoch": 39.42622950819672, "grad_norm": 4.1131591796875, "learning_rate": 1.3811500068571335e-05, "loss": 0.4526, "step": 12025 }, { "epoch": 39.429508196721315, "grad_norm": 4.523764133453369, "learning_rate": 1.3810518318537895e-05, "loss": 0.5621, "step": 12026 }, { "epoch": 39.4327868852459, "grad_norm": 3.336512327194214, "learning_rate": 1.3809536525536897e-05, "loss": 0.3959, "step": 12027 }, { "epoch": 39.43606557377049, "grad_norm": 3.8328988552093506, "learning_rate": 1.3808554689579417e-05, "loss": 0.5117, "step": 12028 }, { "epoch": 39.43934426229508, "grad_norm": 4.131654739379883, "learning_rate": 1.380757281067652e-05, "loss": 0.4852, "step": 12029 }, { "epoch": 39.442622950819676, "grad_norm": 3.6142430305480957, "learning_rate": 1.3806590888839284e-05, "loss": 0.3689, "step": 12030 }, { "epoch": 39.445901639344264, "grad_norm": 4.093175888061523, "learning_rate": 1.3805608924078775e-05, "loss": 0.4746, "step": 12031 }, { "epoch": 39.44918032786885, "grad_norm": 5.583327770233154, "learning_rate": 1.3804626916406068e-05, "loss": 0.4825, "step": 12032 }, { "epoch": 39.45245901639344, "grad_norm": 2.9626388549804688, "learning_rate": 1.380364486583224e-05, "loss": 0.4345, "step": 12033 }, { "epoch": 39.455737704918036, "grad_norm": 3.8641269207000732, "learning_rate": 1.3802662772368359e-05, "loss": 0.4416, "step": 12034 }, { "epoch": 39.459016393442624, "grad_norm": 4.07253360748291, "learning_rate": 1.38016806360255e-05, "loss": 0.402, "step": 12035 }, { "epoch": 39.46229508196721, "grad_norm": 3.7113702297210693, "learning_rate": 1.380069845681474e-05, "loss": 0.2178, "step": 12036 }, { "epoch": 39.4655737704918, "grad_norm": 3.787684440612793, "learning_rate": 1.3799716234747156e-05, "loss": 0.5255, "step": 12037 }, { "epoch": 39.4688524590164, "grad_norm": 4.299625396728516, "learning_rate": 1.3798733969833818e-05, "loss": 0.4478, "step": 12038 }, { "epoch": 39.472131147540985, "grad_norm": 3.808720588684082, "learning_rate": 1.3797751662085805e-05, "loss": 0.6105, "step": 12039 }, { "epoch": 39.47540983606557, "grad_norm": 3.9024717807769775, "learning_rate": 1.3796769311514193e-05, "loss": 0.3926, "step": 12040 }, { "epoch": 39.47868852459016, "grad_norm": 3.9011056423187256, "learning_rate": 1.3795786918130061e-05, "loss": 0.4105, "step": 12041 }, { "epoch": 39.48196721311476, "grad_norm": 4.186605453491211, "learning_rate": 1.3794804481944484e-05, "loss": 0.5165, "step": 12042 }, { "epoch": 39.485245901639345, "grad_norm": 4.832653045654297, "learning_rate": 1.3793822002968542e-05, "loss": 0.6155, "step": 12043 }, { "epoch": 39.488524590163934, "grad_norm": 3.875927686691284, "learning_rate": 1.3792839481213312e-05, "loss": 0.2947, "step": 12044 }, { "epoch": 39.49180327868852, "grad_norm": 3.8778374195098877, "learning_rate": 1.3791856916689871e-05, "loss": 0.5519, "step": 12045 }, { "epoch": 39.49508196721312, "grad_norm": 4.543728351593018, "learning_rate": 1.3790874309409304e-05, "loss": 0.4557, "step": 12046 }, { "epoch": 39.498360655737706, "grad_norm": 4.332139015197754, "learning_rate": 1.3789891659382683e-05, "loss": 0.483, "step": 12047 }, { "epoch": 39.501639344262294, "grad_norm": 4.684072494506836, "learning_rate": 1.3788908966621097e-05, "loss": 0.2683, "step": 12048 }, { "epoch": 39.50491803278688, "grad_norm": 3.3628485202789307, "learning_rate": 1.3787926231135622e-05, "loss": 0.4688, "step": 12049 }, { "epoch": 39.50819672131148, "grad_norm": 4.021100997924805, "learning_rate": 1.3786943452937337e-05, "loss": 0.3452, "step": 12050 }, { "epoch": 39.511475409836066, "grad_norm": 3.733670473098755, "learning_rate": 1.3785960632037334e-05, "loss": 0.2934, "step": 12051 }, { "epoch": 39.514754098360655, "grad_norm": 4.362321853637695, "learning_rate": 1.3784977768446682e-05, "loss": 0.5905, "step": 12052 }, { "epoch": 39.51803278688524, "grad_norm": 4.324230194091797, "learning_rate": 1.3783994862176472e-05, "loss": 0.457, "step": 12053 }, { "epoch": 39.52131147540984, "grad_norm": 4.608434200286865, "learning_rate": 1.3783011913237788e-05, "loss": 0.4221, "step": 12054 }, { "epoch": 39.52459016393443, "grad_norm": 4.571341514587402, "learning_rate": 1.3782028921641708e-05, "loss": 0.4936, "step": 12055 }, { "epoch": 39.527868852459015, "grad_norm": 8.109626770019531, "learning_rate": 1.3781045887399323e-05, "loss": 0.4259, "step": 12056 }, { "epoch": 39.5311475409836, "grad_norm": 3.860921859741211, "learning_rate": 1.3780062810521716e-05, "loss": 0.5195, "step": 12057 }, { "epoch": 39.5344262295082, "grad_norm": 4.6727294921875, "learning_rate": 1.3779079691019965e-05, "loss": 0.5156, "step": 12058 }, { "epoch": 39.53770491803279, "grad_norm": 5.321224212646484, "learning_rate": 1.3778096528905164e-05, "loss": 0.4454, "step": 12059 }, { "epoch": 39.540983606557376, "grad_norm": 4.138877868652344, "learning_rate": 1.3777113324188394e-05, "loss": 0.3899, "step": 12060 }, { "epoch": 39.544262295081964, "grad_norm": 4.334977149963379, "learning_rate": 1.3776130076880748e-05, "loss": 0.3001, "step": 12061 }, { "epoch": 39.54754098360656, "grad_norm": 4.602794647216797, "learning_rate": 1.3775146786993306e-05, "loss": 0.2379, "step": 12062 }, { "epoch": 39.55081967213115, "grad_norm": 4.908514022827148, "learning_rate": 1.3774163454537161e-05, "loss": 0.4095, "step": 12063 }, { "epoch": 39.554098360655736, "grad_norm": 4.55049991607666, "learning_rate": 1.3773180079523396e-05, "loss": 0.3374, "step": 12064 }, { "epoch": 39.557377049180324, "grad_norm": 5.039113998413086, "learning_rate": 1.3772196661963105e-05, "loss": 0.6653, "step": 12065 }, { "epoch": 39.56065573770492, "grad_norm": 4.392734050750732, "learning_rate": 1.3771213201867372e-05, "loss": 0.4934, "step": 12066 }, { "epoch": 39.56393442622951, "grad_norm": 4.659109592437744, "learning_rate": 1.3770229699247292e-05, "loss": 0.3899, "step": 12067 }, { "epoch": 39.5672131147541, "grad_norm": 4.805996417999268, "learning_rate": 1.3769246154113951e-05, "loss": 0.3865, "step": 12068 }, { "epoch": 39.570491803278685, "grad_norm": 4.038372039794922, "learning_rate": 1.3768262566478443e-05, "loss": 0.3902, "step": 12069 }, { "epoch": 39.57377049180328, "grad_norm": 4.4957380294799805, "learning_rate": 1.3767278936351853e-05, "loss": 0.5398, "step": 12070 }, { "epoch": 39.57704918032787, "grad_norm": 4.923756122589111, "learning_rate": 1.3766295263745277e-05, "loss": 0.4632, "step": 12071 }, { "epoch": 39.58032786885246, "grad_norm": 4.459714889526367, "learning_rate": 1.3765311548669807e-05, "loss": 0.3618, "step": 12072 }, { "epoch": 39.58360655737705, "grad_norm": 4.98431396484375, "learning_rate": 1.3764327791136532e-05, "loss": 0.3858, "step": 12073 }, { "epoch": 39.58688524590164, "grad_norm": 3.9150550365448, "learning_rate": 1.376334399115655e-05, "loss": 0.5881, "step": 12074 }, { "epoch": 39.59016393442623, "grad_norm": 4.296228408813477, "learning_rate": 1.376236014874095e-05, "loss": 0.2843, "step": 12075 }, { "epoch": 39.59344262295082, "grad_norm": 4.364564895629883, "learning_rate": 1.3761376263900826e-05, "loss": 0.5151, "step": 12076 }, { "epoch": 39.59672131147541, "grad_norm": 4.118314743041992, "learning_rate": 1.3760392336647278e-05, "loss": 0.5283, "step": 12077 }, { "epoch": 39.6, "grad_norm": 4.0661797523498535, "learning_rate": 1.3759408366991391e-05, "loss": 0.3047, "step": 12078 }, { "epoch": 39.60327868852459, "grad_norm": 4.118204116821289, "learning_rate": 1.375842435494427e-05, "loss": 0.2879, "step": 12079 }, { "epoch": 39.60655737704918, "grad_norm": 3.7146830558776855, "learning_rate": 1.3757440300517004e-05, "loss": 0.432, "step": 12080 }, { "epoch": 39.609836065573774, "grad_norm": 3.905263900756836, "learning_rate": 1.3756456203720695e-05, "loss": 0.5343, "step": 12081 }, { "epoch": 39.61311475409836, "grad_norm": 3.732175827026367, "learning_rate": 1.3755472064566436e-05, "loss": 0.3444, "step": 12082 }, { "epoch": 39.61639344262295, "grad_norm": 3.7955691814422607, "learning_rate": 1.3754487883065324e-05, "loss": 0.6025, "step": 12083 }, { "epoch": 39.61967213114754, "grad_norm": 4.649402618408203, "learning_rate": 1.3753503659228456e-05, "loss": 0.6869, "step": 12084 }, { "epoch": 39.622950819672134, "grad_norm": 5.152512073516846, "learning_rate": 1.3752519393066935e-05, "loss": 0.2485, "step": 12085 }, { "epoch": 39.62622950819672, "grad_norm": 4.350345134735107, "learning_rate": 1.3751535084591852e-05, "loss": 0.3572, "step": 12086 }, { "epoch": 39.62950819672131, "grad_norm": 4.088338851928711, "learning_rate": 1.3750550733814312e-05, "loss": 0.503, "step": 12087 }, { "epoch": 39.6327868852459, "grad_norm": 4.288074970245361, "learning_rate": 1.3749566340745413e-05, "loss": 0.4403, "step": 12088 }, { "epoch": 39.636065573770495, "grad_norm": 4.581837177276611, "learning_rate": 1.3748581905396257e-05, "loss": 0.3774, "step": 12089 }, { "epoch": 39.63934426229508, "grad_norm": 3.872677803039551, "learning_rate": 1.3747597427777937e-05, "loss": 0.488, "step": 12090 }, { "epoch": 39.64262295081967, "grad_norm": 3.8879599571228027, "learning_rate": 1.3746612907901564e-05, "loss": 0.4293, "step": 12091 }, { "epoch": 39.64590163934426, "grad_norm": 3.991211175918579, "learning_rate": 1.3745628345778235e-05, "loss": 0.4811, "step": 12092 }, { "epoch": 39.649180327868855, "grad_norm": 3.5952510833740234, "learning_rate": 1.374464374141905e-05, "loss": 0.2794, "step": 12093 }, { "epoch": 39.65245901639344, "grad_norm": 4.979724884033203, "learning_rate": 1.3743659094835113e-05, "loss": 0.3329, "step": 12094 }, { "epoch": 39.65573770491803, "grad_norm": 4.686960697174072, "learning_rate": 1.374267440603753e-05, "loss": 0.5922, "step": 12095 }, { "epoch": 39.65901639344262, "grad_norm": 3.4327802658081055, "learning_rate": 1.3741689675037402e-05, "loss": 0.2625, "step": 12096 }, { "epoch": 39.662295081967216, "grad_norm": 4.563035488128662, "learning_rate": 1.374070490184583e-05, "loss": 0.4029, "step": 12097 }, { "epoch": 39.665573770491804, "grad_norm": 4.593659400939941, "learning_rate": 1.3739720086473922e-05, "loss": 0.5027, "step": 12098 }, { "epoch": 39.66885245901639, "grad_norm": 4.521425247192383, "learning_rate": 1.373873522893278e-05, "loss": 0.3304, "step": 12099 }, { "epoch": 39.67213114754098, "grad_norm": 3.3695645332336426, "learning_rate": 1.3737750329233515e-05, "loss": 0.3306, "step": 12100 }, { "epoch": 39.675409836065576, "grad_norm": 3.2541310787200928, "learning_rate": 1.3736765387387223e-05, "loss": 0.4864, "step": 12101 }, { "epoch": 39.678688524590164, "grad_norm": 4.583159923553467, "learning_rate": 1.3735780403405022e-05, "loss": 0.3961, "step": 12102 }, { "epoch": 39.68196721311475, "grad_norm": 3.904475450515747, "learning_rate": 1.3734795377298006e-05, "loss": 0.51, "step": 12103 }, { "epoch": 39.68524590163934, "grad_norm": 3.3941993713378906, "learning_rate": 1.3733810309077294e-05, "loss": 0.3387, "step": 12104 }, { "epoch": 39.68852459016394, "grad_norm": 4.114695072174072, "learning_rate": 1.3732825198753987e-05, "loss": 0.4763, "step": 12105 }, { "epoch": 39.691803278688525, "grad_norm": 3.9910051822662354, "learning_rate": 1.3731840046339193e-05, "loss": 0.5903, "step": 12106 }, { "epoch": 39.69508196721311, "grad_norm": 4.261906147003174, "learning_rate": 1.3730854851844025e-05, "loss": 0.253, "step": 12107 }, { "epoch": 39.6983606557377, "grad_norm": 4.934193134307861, "learning_rate": 1.372986961527959e-05, "loss": 0.4573, "step": 12108 }, { "epoch": 39.7016393442623, "grad_norm": 4.7302327156066895, "learning_rate": 1.3728884336656995e-05, "loss": 0.4043, "step": 12109 }, { "epoch": 39.704918032786885, "grad_norm": 3.5131235122680664, "learning_rate": 1.3727899015987352e-05, "loss": 0.4436, "step": 12110 }, { "epoch": 39.708196721311474, "grad_norm": 3.567286252975464, "learning_rate": 1.3726913653281773e-05, "loss": 0.4352, "step": 12111 }, { "epoch": 39.71147540983607, "grad_norm": 3.9406590461730957, "learning_rate": 1.3725928248551366e-05, "loss": 0.4801, "step": 12112 }, { "epoch": 39.71475409836066, "grad_norm": 4.199075222015381, "learning_rate": 1.3724942801807246e-05, "loss": 0.3709, "step": 12113 }, { "epoch": 39.718032786885246, "grad_norm": 3.935734748840332, "learning_rate": 1.3723957313060521e-05, "loss": 0.4497, "step": 12114 }, { "epoch": 39.721311475409834, "grad_norm": 4.374147415161133, "learning_rate": 1.3722971782322308e-05, "loss": 0.3545, "step": 12115 }, { "epoch": 39.72459016393443, "grad_norm": 4.244344234466553, "learning_rate": 1.3721986209603712e-05, "loss": 0.4247, "step": 12116 }, { "epoch": 39.72786885245902, "grad_norm": 4.412962436676025, "learning_rate": 1.3721000594915857e-05, "loss": 0.3706, "step": 12117 }, { "epoch": 39.731147540983606, "grad_norm": 4.5560078620910645, "learning_rate": 1.3720014938269848e-05, "loss": 0.3524, "step": 12118 }, { "epoch": 39.734426229508195, "grad_norm": 4.16020393371582, "learning_rate": 1.3719029239676807e-05, "loss": 0.5547, "step": 12119 }, { "epoch": 39.73770491803279, "grad_norm": 4.1061625480651855, "learning_rate": 1.371804349914784e-05, "loss": 0.5645, "step": 12120 }, { "epoch": 39.74098360655738, "grad_norm": 3.849074125289917, "learning_rate": 1.3717057716694069e-05, "loss": 0.405, "step": 12121 }, { "epoch": 39.74426229508197, "grad_norm": 12.430516242980957, "learning_rate": 1.3716071892326607e-05, "loss": 0.33, "step": 12122 }, { "epoch": 39.747540983606555, "grad_norm": 4.0779194831848145, "learning_rate": 1.371508602605657e-05, "loss": 0.3009, "step": 12123 }, { "epoch": 39.75081967213115, "grad_norm": 4.096920013427734, "learning_rate": 1.3714100117895077e-05, "loss": 0.3041, "step": 12124 }, { "epoch": 39.75409836065574, "grad_norm": 4.645848274230957, "learning_rate": 1.371311416785324e-05, "loss": 0.3996, "step": 12125 }, { "epoch": 39.75737704918033, "grad_norm": 3.438581943511963, "learning_rate": 1.3712128175942186e-05, "loss": 0.3556, "step": 12126 }, { "epoch": 39.760655737704916, "grad_norm": 4.353823184967041, "learning_rate": 1.3711142142173021e-05, "loss": 0.3638, "step": 12127 }, { "epoch": 39.76393442622951, "grad_norm": 4.2192769050598145, "learning_rate": 1.3710156066556875e-05, "loss": 0.3075, "step": 12128 }, { "epoch": 39.7672131147541, "grad_norm": 3.8823025226593018, "learning_rate": 1.3709169949104857e-05, "loss": 0.4327, "step": 12129 }, { "epoch": 39.77049180327869, "grad_norm": 3.8814632892608643, "learning_rate": 1.3708183789828093e-05, "loss": 0.6136, "step": 12130 }, { "epoch": 39.773770491803276, "grad_norm": 3.7318055629730225, "learning_rate": 1.3707197588737702e-05, "loss": 0.2614, "step": 12131 }, { "epoch": 39.77704918032787, "grad_norm": 4.042036533355713, "learning_rate": 1.3706211345844802e-05, "loss": 0.4186, "step": 12132 }, { "epoch": 39.78032786885246, "grad_norm": 4.576431751251221, "learning_rate": 1.3705225061160516e-05, "loss": 0.3833, "step": 12133 }, { "epoch": 39.78360655737705, "grad_norm": 4.2367472648620605, "learning_rate": 1.3704238734695966e-05, "loss": 0.4427, "step": 12134 }, { "epoch": 39.78688524590164, "grad_norm": 3.5340723991394043, "learning_rate": 1.3703252366462274e-05, "loss": 0.2368, "step": 12135 }, { "epoch": 39.79016393442623, "grad_norm": 3.819174289703369, "learning_rate": 1.3702265956470558e-05, "loss": 0.4286, "step": 12136 }, { "epoch": 39.79344262295082, "grad_norm": 4.622171401977539, "learning_rate": 1.3701279504731946e-05, "loss": 0.3278, "step": 12137 }, { "epoch": 39.79672131147541, "grad_norm": 3.970546007156372, "learning_rate": 1.3700293011257557e-05, "loss": 0.5077, "step": 12138 }, { "epoch": 39.8, "grad_norm": 3.3174238204956055, "learning_rate": 1.3699306476058523e-05, "loss": 0.4217, "step": 12139 }, { "epoch": 39.80327868852459, "grad_norm": 5.179874897003174, "learning_rate": 1.3698319899145956e-05, "loss": 0.4471, "step": 12140 }, { "epoch": 39.80655737704918, "grad_norm": 4.077692031860352, "learning_rate": 1.3697333280530989e-05, "loss": 0.4258, "step": 12141 }, { "epoch": 39.80983606557377, "grad_norm": 4.35296630859375, "learning_rate": 1.3696346620224743e-05, "loss": 0.3924, "step": 12142 }, { "epoch": 39.81311475409836, "grad_norm": 4.459133625030518, "learning_rate": 1.3695359918238349e-05, "loss": 0.4892, "step": 12143 }, { "epoch": 39.81639344262295, "grad_norm": 3.3976423740386963, "learning_rate": 1.3694373174582926e-05, "loss": 0.3833, "step": 12144 }, { "epoch": 39.81967213114754, "grad_norm": 4.333765029907227, "learning_rate": 1.369338638926961e-05, "loss": 0.524, "step": 12145 }, { "epoch": 39.82295081967213, "grad_norm": 3.7008042335510254, "learning_rate": 1.3692399562309517e-05, "loss": 0.3088, "step": 12146 }, { "epoch": 39.82622950819672, "grad_norm": 3.3061811923980713, "learning_rate": 1.3691412693713782e-05, "loss": 0.1809, "step": 12147 }, { "epoch": 39.829508196721314, "grad_norm": 4.2483367919921875, "learning_rate": 1.3690425783493533e-05, "loss": 0.3721, "step": 12148 }, { "epoch": 39.8327868852459, "grad_norm": 4.221951007843018, "learning_rate": 1.3689438831659891e-05, "loss": 0.3783, "step": 12149 }, { "epoch": 39.83606557377049, "grad_norm": 4.071383953094482, "learning_rate": 1.3688451838223995e-05, "loss": 0.3059, "step": 12150 }, { "epoch": 39.83934426229508, "grad_norm": 4.291261672973633, "learning_rate": 1.3687464803196963e-05, "loss": 0.5268, "step": 12151 }, { "epoch": 39.842622950819674, "grad_norm": 3.56680965423584, "learning_rate": 1.368647772658994e-05, "loss": 0.5176, "step": 12152 }, { "epoch": 39.84590163934426, "grad_norm": 5.087568759918213, "learning_rate": 1.3685490608414041e-05, "loss": 0.4162, "step": 12153 }, { "epoch": 39.84918032786885, "grad_norm": 5.185503005981445, "learning_rate": 1.3684503448680407e-05, "loss": 0.2958, "step": 12154 }, { "epoch": 39.85245901639344, "grad_norm": 3.92433762550354, "learning_rate": 1.3683516247400164e-05, "loss": 0.3159, "step": 12155 }, { "epoch": 39.855737704918035, "grad_norm": 4.184677600860596, "learning_rate": 1.3682529004584445e-05, "loss": 0.4859, "step": 12156 }, { "epoch": 39.85901639344262, "grad_norm": 4.40618896484375, "learning_rate": 1.3681541720244382e-05, "loss": 0.2489, "step": 12157 }, { "epoch": 39.86229508196721, "grad_norm": 4.341991901397705, "learning_rate": 1.368055439439111e-05, "loss": 0.3988, "step": 12158 }, { "epoch": 39.86557377049181, "grad_norm": 4.420719623565674, "learning_rate": 1.367956702703576e-05, "loss": 0.4073, "step": 12159 }, { "epoch": 39.868852459016395, "grad_norm": 4.145147323608398, "learning_rate": 1.3678579618189465e-05, "loss": 0.4723, "step": 12160 }, { "epoch": 39.87213114754098, "grad_norm": 4.47098970413208, "learning_rate": 1.367759216786336e-05, "loss": 0.2151, "step": 12161 }, { "epoch": 39.87540983606557, "grad_norm": 5.075201511383057, "learning_rate": 1.3676604676068581e-05, "loss": 0.2944, "step": 12162 }, { "epoch": 39.87868852459017, "grad_norm": 3.9178504943847656, "learning_rate": 1.3675617142816262e-05, "loss": 0.4005, "step": 12163 }, { "epoch": 39.881967213114756, "grad_norm": 13.61291790008545, "learning_rate": 1.3674629568117536e-05, "loss": 0.4485, "step": 12164 }, { "epoch": 39.885245901639344, "grad_norm": 4.422699928283691, "learning_rate": 1.3673641951983543e-05, "loss": 0.498, "step": 12165 }, { "epoch": 39.88852459016393, "grad_norm": 4.4903340339660645, "learning_rate": 1.3672654294425416e-05, "loss": 0.4193, "step": 12166 }, { "epoch": 39.89180327868853, "grad_norm": 4.393299579620361, "learning_rate": 1.3671666595454296e-05, "loss": 0.5466, "step": 12167 }, { "epoch": 39.895081967213116, "grad_norm": 4.9642014503479, "learning_rate": 1.3670678855081315e-05, "loss": 0.3235, "step": 12168 }, { "epoch": 39.898360655737704, "grad_norm": 4.286275386810303, "learning_rate": 1.3669691073317613e-05, "loss": 0.334, "step": 12169 }, { "epoch": 39.90163934426229, "grad_norm": 5.484767436981201, "learning_rate": 1.3668703250174327e-05, "loss": 0.4337, "step": 12170 }, { "epoch": 39.90491803278689, "grad_norm": 5.350493431091309, "learning_rate": 1.36677153856626e-05, "loss": 0.4379, "step": 12171 }, { "epoch": 39.90819672131148, "grad_norm": 3.9852781295776367, "learning_rate": 1.3666727479793571e-05, "loss": 0.482, "step": 12172 }, { "epoch": 39.911475409836065, "grad_norm": 4.608606815338135, "learning_rate": 1.3665739532578373e-05, "loss": 0.5466, "step": 12173 }, { "epoch": 39.91475409836065, "grad_norm": 4.193286895751953, "learning_rate": 1.3664751544028152e-05, "loss": 0.4289, "step": 12174 }, { "epoch": 39.91803278688525, "grad_norm": 5.166491508483887, "learning_rate": 1.3663763514154047e-05, "loss": 0.4737, "step": 12175 }, { "epoch": 39.92131147540984, "grad_norm": 4.245103359222412, "learning_rate": 1.36627754429672e-05, "loss": 0.4386, "step": 12176 }, { "epoch": 39.924590163934425, "grad_norm": 3.921342611312866, "learning_rate": 1.3661787330478749e-05, "loss": 0.5036, "step": 12177 }, { "epoch": 39.927868852459014, "grad_norm": 4.925056457519531, "learning_rate": 1.366079917669984e-05, "loss": 0.3703, "step": 12178 }, { "epoch": 39.93114754098361, "grad_norm": 5.7347540855407715, "learning_rate": 1.3659810981641612e-05, "loss": 0.4378, "step": 12179 }, { "epoch": 39.9344262295082, "grad_norm": 4.178492069244385, "learning_rate": 1.3658822745315213e-05, "loss": 0.4372, "step": 12180 }, { "epoch": 39.937704918032786, "grad_norm": 4.385348320007324, "learning_rate": 1.3657834467731781e-05, "loss": 0.4401, "step": 12181 }, { "epoch": 39.940983606557374, "grad_norm": 4.915949821472168, "learning_rate": 1.3656846148902465e-05, "loss": 0.3757, "step": 12182 }, { "epoch": 39.94426229508197, "grad_norm": 7.452437400817871, "learning_rate": 1.3655857788838404e-05, "loss": 0.4584, "step": 12183 }, { "epoch": 39.94754098360656, "grad_norm": 5.017645835876465, "learning_rate": 1.3654869387550747e-05, "loss": 0.6314, "step": 12184 }, { "epoch": 39.950819672131146, "grad_norm": 4.801267147064209, "learning_rate": 1.3653880945050634e-05, "loss": 0.5259, "step": 12185 }, { "epoch": 39.954098360655735, "grad_norm": 5.027233600616455, "learning_rate": 1.3652892461349216e-05, "loss": 0.3334, "step": 12186 }, { "epoch": 39.95737704918033, "grad_norm": 4.203488349914551, "learning_rate": 1.365190393645764e-05, "loss": 0.4227, "step": 12187 }, { "epoch": 39.96065573770492, "grad_norm": 4.442028045654297, "learning_rate": 1.3650915370387048e-05, "loss": 0.4094, "step": 12188 }, { "epoch": 39.96393442622951, "grad_norm": 3.8008363246917725, "learning_rate": 1.3649926763148588e-05, "loss": 0.484, "step": 12189 }, { "epoch": 39.967213114754095, "grad_norm": 4.074499607086182, "learning_rate": 1.3648938114753411e-05, "loss": 0.4594, "step": 12190 }, { "epoch": 39.97049180327869, "grad_norm": 4.344906330108643, "learning_rate": 1.3647949425212663e-05, "loss": 0.301, "step": 12191 }, { "epoch": 39.97377049180328, "grad_norm": 3.8967113494873047, "learning_rate": 1.364696069453749e-05, "loss": 0.2538, "step": 12192 }, { "epoch": 39.97704918032787, "grad_norm": 4.219771385192871, "learning_rate": 1.3645971922739048e-05, "loss": 0.4587, "step": 12193 }, { "epoch": 39.980327868852456, "grad_norm": 4.930840492248535, "learning_rate": 1.3644983109828476e-05, "loss": 0.3266, "step": 12194 }, { "epoch": 39.98360655737705, "grad_norm": 3.9789745807647705, "learning_rate": 1.3643994255816932e-05, "loss": 0.3605, "step": 12195 }, { "epoch": 39.98688524590164, "grad_norm": 4.357304573059082, "learning_rate": 1.3643005360715564e-05, "loss": 0.4838, "step": 12196 }, { "epoch": 39.99016393442623, "grad_norm": 5.033857822418213, "learning_rate": 1.3642016424535525e-05, "loss": 0.4702, "step": 12197 }, { "epoch": 39.993442622950816, "grad_norm": 3.7925302982330322, "learning_rate": 1.3641027447287963e-05, "loss": 0.1974, "step": 12198 }, { "epoch": 39.99672131147541, "grad_norm": 3.8593807220458984, "learning_rate": 1.3640038428984028e-05, "loss": 0.3439, "step": 12199 }, { "epoch": 40.0, "grad_norm": 4.165156841278076, "learning_rate": 1.3639049369634878e-05, "loss": 0.2395, "step": 12200 }, { "epoch": 40.00327868852459, "grad_norm": 4.362977027893066, "learning_rate": 1.363806026925166e-05, "loss": 0.3506, "step": 12201 }, { "epoch": 40.006557377049184, "grad_norm": 4.082536220550537, "learning_rate": 1.3637071127845531e-05, "loss": 0.4042, "step": 12202 }, { "epoch": 40.00983606557377, "grad_norm": 4.516852378845215, "learning_rate": 1.3636081945427643e-05, "loss": 0.5088, "step": 12203 }, { "epoch": 40.01311475409836, "grad_norm": 4.11345911026001, "learning_rate": 1.3635092722009154e-05, "loss": 0.3124, "step": 12204 }, { "epoch": 40.01639344262295, "grad_norm": 4.529116630554199, "learning_rate": 1.363410345760121e-05, "loss": 0.3111, "step": 12205 }, { "epoch": 40.019672131147544, "grad_norm": 3.9949755668640137, "learning_rate": 1.3633114152214975e-05, "loss": 0.2753, "step": 12206 }, { "epoch": 40.02295081967213, "grad_norm": 4.229132175445557, "learning_rate": 1.3632124805861598e-05, "loss": 0.4239, "step": 12207 }, { "epoch": 40.02622950819672, "grad_norm": 4.246119976043701, "learning_rate": 1.3631135418552237e-05, "loss": 0.4204, "step": 12208 }, { "epoch": 40.02950819672131, "grad_norm": 3.8535637855529785, "learning_rate": 1.363014599029805e-05, "loss": 0.2579, "step": 12209 }, { "epoch": 40.032786885245905, "grad_norm": 4.830264091491699, "learning_rate": 1.3629156521110191e-05, "loss": 0.3283, "step": 12210 }, { "epoch": 40.03606557377049, "grad_norm": 4.453370094299316, "learning_rate": 1.362816701099982e-05, "loss": 0.4982, "step": 12211 }, { "epoch": 40.03934426229508, "grad_norm": 3.7143032550811768, "learning_rate": 1.3627177459978095e-05, "loss": 0.4509, "step": 12212 }, { "epoch": 40.04262295081967, "grad_norm": 4.005520343780518, "learning_rate": 1.362618786805617e-05, "loss": 0.5692, "step": 12213 }, { "epoch": 40.045901639344265, "grad_norm": 4.5832109451293945, "learning_rate": 1.3625198235245208e-05, "loss": 0.3457, "step": 12214 }, { "epoch": 40.049180327868854, "grad_norm": 4.0815534591674805, "learning_rate": 1.3624208561556367e-05, "loss": 0.4037, "step": 12215 }, { "epoch": 40.05245901639344, "grad_norm": 4.040181636810303, "learning_rate": 1.3623218847000805e-05, "loss": 0.5952, "step": 12216 }, { "epoch": 40.05573770491803, "grad_norm": 3.6815850734710693, "learning_rate": 1.3622229091589685e-05, "loss": 0.2311, "step": 12217 }, { "epoch": 40.059016393442626, "grad_norm": 4.794063568115234, "learning_rate": 1.3621239295334166e-05, "loss": 0.4127, "step": 12218 }, { "epoch": 40.062295081967214, "grad_norm": 4.107503890991211, "learning_rate": 1.3620249458245407e-05, "loss": 0.4197, "step": 12219 }, { "epoch": 40.0655737704918, "grad_norm": 3.8132121562957764, "learning_rate": 1.3619259580334571e-05, "loss": 0.3101, "step": 12220 }, { "epoch": 40.06885245901639, "grad_norm": 3.923464775085449, "learning_rate": 1.3618269661612822e-05, "loss": 0.4723, "step": 12221 }, { "epoch": 40.072131147540986, "grad_norm": 3.489114284515381, "learning_rate": 1.3617279702091319e-05, "loss": 0.5456, "step": 12222 }, { "epoch": 40.075409836065575, "grad_norm": 5.619318008422852, "learning_rate": 1.3616289701781229e-05, "loss": 0.388, "step": 12223 }, { "epoch": 40.07868852459016, "grad_norm": 3.9412128925323486, "learning_rate": 1.361529966069371e-05, "loss": 0.2981, "step": 12224 }, { "epoch": 40.08196721311475, "grad_norm": 3.7465908527374268, "learning_rate": 1.3614309578839928e-05, "loss": 0.3266, "step": 12225 }, { "epoch": 40.08524590163935, "grad_norm": 4.282801151275635, "learning_rate": 1.361331945623105e-05, "loss": 0.3319, "step": 12226 }, { "epoch": 40.088524590163935, "grad_norm": 3.9003231525421143, "learning_rate": 1.3612329292878234e-05, "loss": 0.4881, "step": 12227 }, { "epoch": 40.09180327868852, "grad_norm": 4.395322322845459, "learning_rate": 1.3611339088792654e-05, "loss": 0.3327, "step": 12228 }, { "epoch": 40.09508196721311, "grad_norm": 4.912512302398682, "learning_rate": 1.361034884398547e-05, "loss": 0.2875, "step": 12229 }, { "epoch": 40.09836065573771, "grad_norm": 4.240170955657959, "learning_rate": 1.3609358558467847e-05, "loss": 0.447, "step": 12230 }, { "epoch": 40.101639344262296, "grad_norm": 3.889336585998535, "learning_rate": 1.3608368232250955e-05, "loss": 0.4548, "step": 12231 }, { "epoch": 40.104918032786884, "grad_norm": 4.701277256011963, "learning_rate": 1.3607377865345959e-05, "loss": 0.4429, "step": 12232 }, { "epoch": 40.10819672131147, "grad_norm": 5.187977313995361, "learning_rate": 1.3606387457764025e-05, "loss": 0.4046, "step": 12233 }, { "epoch": 40.11147540983607, "grad_norm": 3.7457692623138428, "learning_rate": 1.3605397009516326e-05, "loss": 0.3589, "step": 12234 }, { "epoch": 40.114754098360656, "grad_norm": 3.851605176925659, "learning_rate": 1.3604406520614025e-05, "loss": 0.2415, "step": 12235 }, { "epoch": 40.118032786885244, "grad_norm": 4.0717244148254395, "learning_rate": 1.3603415991068295e-05, "loss": 0.5004, "step": 12236 }, { "epoch": 40.12131147540983, "grad_norm": 3.886749505996704, "learning_rate": 1.36024254208903e-05, "loss": 0.175, "step": 12237 }, { "epoch": 40.12459016393443, "grad_norm": 4.520644187927246, "learning_rate": 1.3601434810091214e-05, "loss": 0.2635, "step": 12238 }, { "epoch": 40.12786885245902, "grad_norm": 4.102972507476807, "learning_rate": 1.3600444158682207e-05, "loss": 0.2891, "step": 12239 }, { "epoch": 40.131147540983605, "grad_norm": 4.7219085693359375, "learning_rate": 1.3599453466674446e-05, "loss": 0.3178, "step": 12240 }, { "epoch": 40.13442622950819, "grad_norm": 4.371878623962402, "learning_rate": 1.3598462734079108e-05, "loss": 0.392, "step": 12241 }, { "epoch": 40.13770491803279, "grad_norm": 4.951931476593018, "learning_rate": 1.3597471960907358e-05, "loss": 0.3111, "step": 12242 }, { "epoch": 40.14098360655738, "grad_norm": 4.279941082000732, "learning_rate": 1.3596481147170373e-05, "loss": 0.3831, "step": 12243 }, { "epoch": 40.144262295081965, "grad_norm": 3.6185531616210938, "learning_rate": 1.3595490292879322e-05, "loss": 0.3011, "step": 12244 }, { "epoch": 40.14754098360656, "grad_norm": 4.666097164154053, "learning_rate": 1.3594499398045382e-05, "loss": 0.3142, "step": 12245 }, { "epoch": 40.15081967213115, "grad_norm": 4.802308082580566, "learning_rate": 1.3593508462679723e-05, "loss": 0.3764, "step": 12246 }, { "epoch": 40.15409836065574, "grad_norm": 4.471870422363281, "learning_rate": 1.359251748679352e-05, "loss": 0.7284, "step": 12247 }, { "epoch": 40.157377049180326, "grad_norm": 4.002406120300293, "learning_rate": 1.3591526470397946e-05, "loss": 0.7148, "step": 12248 }, { "epoch": 40.16065573770492, "grad_norm": 4.627079010009766, "learning_rate": 1.3590535413504177e-05, "loss": 0.3634, "step": 12249 }, { "epoch": 40.16393442622951, "grad_norm": 4.3916802406311035, "learning_rate": 1.358954431612339e-05, "loss": 0.3661, "step": 12250 }, { "epoch": 40.1672131147541, "grad_norm": 4.373827934265137, "learning_rate": 1.358855317826676e-05, "loss": 0.4825, "step": 12251 }, { "epoch": 40.170491803278686, "grad_norm": 4.144105911254883, "learning_rate": 1.3587561999945457e-05, "loss": 0.3985, "step": 12252 }, { "epoch": 40.17377049180328, "grad_norm": 4.445622444152832, "learning_rate": 1.3586570781170665e-05, "loss": 0.4192, "step": 12253 }, { "epoch": 40.17704918032787, "grad_norm": 4.019815921783447, "learning_rate": 1.3585579521953557e-05, "loss": 0.4446, "step": 12254 }, { "epoch": 40.18032786885246, "grad_norm": 4.174724102020264, "learning_rate": 1.3584588222305312e-05, "loss": 0.2744, "step": 12255 }, { "epoch": 40.18360655737705, "grad_norm": 3.789381265640259, "learning_rate": 1.358359688223711e-05, "loss": 0.464, "step": 12256 }, { "epoch": 40.18688524590164, "grad_norm": 3.4443790912628174, "learning_rate": 1.3582605501760124e-05, "loss": 0.2855, "step": 12257 }, { "epoch": 40.19016393442623, "grad_norm": 4.050920486450195, "learning_rate": 1.3581614080885538e-05, "loss": 0.4602, "step": 12258 }, { "epoch": 40.19344262295082, "grad_norm": 3.662527084350586, "learning_rate": 1.3580622619624528e-05, "loss": 0.4047, "step": 12259 }, { "epoch": 40.19672131147541, "grad_norm": 3.665243148803711, "learning_rate": 1.3579631117988277e-05, "loss": 0.3866, "step": 12260 }, { "epoch": 40.2, "grad_norm": 4.2859907150268555, "learning_rate": 1.357863957598796e-05, "loss": 0.4203, "step": 12261 }, { "epoch": 40.20327868852459, "grad_norm": 3.8809168338775635, "learning_rate": 1.3577647993634764e-05, "loss": 0.3999, "step": 12262 }, { "epoch": 40.20655737704918, "grad_norm": 3.804497480392456, "learning_rate": 1.357665637093987e-05, "loss": 0.4923, "step": 12263 }, { "epoch": 40.20983606557377, "grad_norm": 4.03047513961792, "learning_rate": 1.357566470791445e-05, "loss": 0.2847, "step": 12264 }, { "epoch": 40.21311475409836, "grad_norm": 3.6159873008728027, "learning_rate": 1.3574673004569694e-05, "loss": 0.3335, "step": 12265 }, { "epoch": 40.21639344262295, "grad_norm": 4.477160930633545, "learning_rate": 1.3573681260916785e-05, "loss": 0.3796, "step": 12266 }, { "epoch": 40.21967213114754, "grad_norm": 3.5423245429992676, "learning_rate": 1.3572689476966903e-05, "loss": 0.2953, "step": 12267 }, { "epoch": 40.22295081967213, "grad_norm": 3.9711670875549316, "learning_rate": 1.357169765273123e-05, "loss": 0.3013, "step": 12268 }, { "epoch": 40.226229508196724, "grad_norm": 4.569041728973389, "learning_rate": 1.3570705788220957e-05, "loss": 0.387, "step": 12269 }, { "epoch": 40.22950819672131, "grad_norm": 4.39108419418335, "learning_rate": 1.3569713883447262e-05, "loss": 0.4707, "step": 12270 }, { "epoch": 40.2327868852459, "grad_norm": 3.894606828689575, "learning_rate": 1.3568721938421333e-05, "loss": 0.4578, "step": 12271 }, { "epoch": 40.23606557377049, "grad_norm": 4.272561073303223, "learning_rate": 1.3567729953154349e-05, "loss": 0.3998, "step": 12272 }, { "epoch": 40.239344262295084, "grad_norm": 4.310171604156494, "learning_rate": 1.3566737927657505e-05, "loss": 0.3944, "step": 12273 }, { "epoch": 40.24262295081967, "grad_norm": 3.145819664001465, "learning_rate": 1.3565745861941978e-05, "loss": 0.3601, "step": 12274 }, { "epoch": 40.24590163934426, "grad_norm": 5.0942511558532715, "learning_rate": 1.3564753756018965e-05, "loss": 0.5111, "step": 12275 }, { "epoch": 40.24918032786885, "grad_norm": 3.667316198348999, "learning_rate": 1.3563761609899643e-05, "loss": 0.368, "step": 12276 }, { "epoch": 40.252459016393445, "grad_norm": 4.267965793609619, "learning_rate": 1.3562769423595204e-05, "loss": 0.2481, "step": 12277 }, { "epoch": 40.25573770491803, "grad_norm": 4.2062506675720215, "learning_rate": 1.3561777197116837e-05, "loss": 0.4027, "step": 12278 }, { "epoch": 40.25901639344262, "grad_norm": 4.066040992736816, "learning_rate": 1.3560784930475725e-05, "loss": 0.5306, "step": 12279 }, { "epoch": 40.26229508196721, "grad_norm": 4.689840316772461, "learning_rate": 1.3559792623683063e-05, "loss": 0.3996, "step": 12280 }, { "epoch": 40.265573770491805, "grad_norm": 4.657528877258301, "learning_rate": 1.3558800276750038e-05, "loss": 0.3614, "step": 12281 }, { "epoch": 40.268852459016394, "grad_norm": 4.0621562004089355, "learning_rate": 1.355780788968784e-05, "loss": 0.474, "step": 12282 }, { "epoch": 40.27213114754098, "grad_norm": 4.420801639556885, "learning_rate": 1.3556815462507658e-05, "loss": 0.3851, "step": 12283 }, { "epoch": 40.27540983606557, "grad_norm": 4.604193210601807, "learning_rate": 1.3555822995220684e-05, "loss": 0.4458, "step": 12284 }, { "epoch": 40.278688524590166, "grad_norm": 4.078088760375977, "learning_rate": 1.3554830487838109e-05, "loss": 0.2441, "step": 12285 }, { "epoch": 40.281967213114754, "grad_norm": 3.7791097164154053, "learning_rate": 1.3553837940371125e-05, "loss": 0.3829, "step": 12286 }, { "epoch": 40.28524590163934, "grad_norm": 3.762397527694702, "learning_rate": 1.3552845352830918e-05, "loss": 0.4033, "step": 12287 }, { "epoch": 40.28852459016394, "grad_norm": 4.668145656585693, "learning_rate": 1.3551852725228691e-05, "loss": 0.4585, "step": 12288 }, { "epoch": 40.291803278688526, "grad_norm": 3.2477076053619385, "learning_rate": 1.355086005757563e-05, "loss": 0.4111, "step": 12289 }, { "epoch": 40.295081967213115, "grad_norm": 4.030693054199219, "learning_rate": 1.3549867349882927e-05, "loss": 0.5163, "step": 12290 }, { "epoch": 40.2983606557377, "grad_norm": 3.6165032386779785, "learning_rate": 1.3548874602161784e-05, "loss": 0.5114, "step": 12291 }, { "epoch": 40.3016393442623, "grad_norm": 3.693786144256592, "learning_rate": 1.3547881814423388e-05, "loss": 0.2366, "step": 12292 }, { "epoch": 40.30491803278689, "grad_norm": 4.589442729949951, "learning_rate": 1.3546888986678932e-05, "loss": 0.5598, "step": 12293 }, { "epoch": 40.308196721311475, "grad_norm": 3.8305041790008545, "learning_rate": 1.354589611893962e-05, "loss": 0.3068, "step": 12294 }, { "epoch": 40.31147540983606, "grad_norm": 4.11452579498291, "learning_rate": 1.3544903211216638e-05, "loss": 0.4524, "step": 12295 }, { "epoch": 40.31475409836066, "grad_norm": 4.021814823150635, "learning_rate": 1.354391026352119e-05, "loss": 0.2928, "step": 12296 }, { "epoch": 40.31803278688525, "grad_norm": 3.902515172958374, "learning_rate": 1.3542917275864467e-05, "loss": 0.2877, "step": 12297 }, { "epoch": 40.321311475409836, "grad_norm": 3.9107251167297363, "learning_rate": 1.3541924248257668e-05, "loss": 0.2933, "step": 12298 }, { "epoch": 40.324590163934424, "grad_norm": 3.5205984115600586, "learning_rate": 1.3540931180711993e-05, "loss": 0.3958, "step": 12299 }, { "epoch": 40.32786885245902, "grad_norm": 4.146237850189209, "learning_rate": 1.3539938073238634e-05, "loss": 0.4782, "step": 12300 }, { "epoch": 40.33114754098361, "grad_norm": 3.74878191947937, "learning_rate": 1.3538944925848796e-05, "loss": 0.3282, "step": 12301 }, { "epoch": 40.334426229508196, "grad_norm": 4.061086654663086, "learning_rate": 1.3537951738553674e-05, "loss": 0.3642, "step": 12302 }, { "epoch": 40.337704918032784, "grad_norm": 4.560856342315674, "learning_rate": 1.3536958511364464e-05, "loss": 0.3657, "step": 12303 }, { "epoch": 40.34098360655738, "grad_norm": 4.300168037414551, "learning_rate": 1.3535965244292372e-05, "loss": 0.1912, "step": 12304 }, { "epoch": 40.34426229508197, "grad_norm": 3.5345706939697266, "learning_rate": 1.3534971937348593e-05, "loss": 0.3596, "step": 12305 }, { "epoch": 40.34754098360656, "grad_norm": 3.8594253063201904, "learning_rate": 1.3533978590544335e-05, "loss": 0.4216, "step": 12306 }, { "epoch": 40.350819672131145, "grad_norm": 3.9844584465026855, "learning_rate": 1.3532985203890788e-05, "loss": 0.217, "step": 12307 }, { "epoch": 40.35409836065574, "grad_norm": 3.264521837234497, "learning_rate": 1.3531991777399162e-05, "loss": 0.212, "step": 12308 }, { "epoch": 40.35737704918033, "grad_norm": 3.58835768699646, "learning_rate": 1.3530998311080657e-05, "loss": 0.3882, "step": 12309 }, { "epoch": 40.36065573770492, "grad_norm": 4.440226078033447, "learning_rate": 1.3530004804946476e-05, "loss": 0.3097, "step": 12310 }, { "epoch": 40.363934426229505, "grad_norm": 4.140727996826172, "learning_rate": 1.3529011259007815e-05, "loss": 0.3825, "step": 12311 }, { "epoch": 40.3672131147541, "grad_norm": 3.6190061569213867, "learning_rate": 1.3528017673275891e-05, "loss": 0.5445, "step": 12312 }, { "epoch": 40.37049180327869, "grad_norm": 5.358807563781738, "learning_rate": 1.3527024047761893e-05, "loss": 0.4953, "step": 12313 }, { "epoch": 40.37377049180328, "grad_norm": 3.6362714767456055, "learning_rate": 1.3526030382477035e-05, "loss": 0.534, "step": 12314 }, { "epoch": 40.377049180327866, "grad_norm": 3.332836866378784, "learning_rate": 1.3525036677432516e-05, "loss": 0.4951, "step": 12315 }, { "epoch": 40.38032786885246, "grad_norm": 4.578973293304443, "learning_rate": 1.3524042932639545e-05, "loss": 0.3431, "step": 12316 }, { "epoch": 40.38360655737705, "grad_norm": 3.6906871795654297, "learning_rate": 1.3523049148109326e-05, "loss": 0.3683, "step": 12317 }, { "epoch": 40.38688524590164, "grad_norm": 3.712517499923706, "learning_rate": 1.3522055323853063e-05, "loss": 0.4082, "step": 12318 }, { "epoch": 40.390163934426226, "grad_norm": 4.1941680908203125, "learning_rate": 1.3521061459881963e-05, "loss": 0.6008, "step": 12319 }, { "epoch": 40.39344262295082, "grad_norm": 5.709843158721924, "learning_rate": 1.3520067556207238e-05, "loss": 0.5109, "step": 12320 }, { "epoch": 40.39672131147541, "grad_norm": 4.186605930328369, "learning_rate": 1.3519073612840091e-05, "loss": 0.427, "step": 12321 }, { "epoch": 40.4, "grad_norm": 3.8432109355926514, "learning_rate": 1.3518079629791725e-05, "loss": 0.4554, "step": 12322 }, { "epoch": 40.40327868852459, "grad_norm": 4.483599662780762, "learning_rate": 1.3517085607073359e-05, "loss": 0.2552, "step": 12323 }, { "epoch": 40.40655737704918, "grad_norm": 4.441377639770508, "learning_rate": 1.3516091544696193e-05, "loss": 0.5603, "step": 12324 }, { "epoch": 40.40983606557377, "grad_norm": 3.949993133544922, "learning_rate": 1.3515097442671442e-05, "loss": 0.3731, "step": 12325 }, { "epoch": 40.41311475409836, "grad_norm": 4.764122486114502, "learning_rate": 1.3514103301010308e-05, "loss": 0.439, "step": 12326 }, { "epoch": 40.41639344262295, "grad_norm": 2.7472448348999023, "learning_rate": 1.351310911972401e-05, "loss": 0.2875, "step": 12327 }, { "epoch": 40.41967213114754, "grad_norm": 4.209522724151611, "learning_rate": 1.3512114898823754e-05, "loss": 0.2918, "step": 12328 }, { "epoch": 40.42295081967213, "grad_norm": 3.4050185680389404, "learning_rate": 1.3511120638320747e-05, "loss": 0.3404, "step": 12329 }, { "epoch": 40.42622950819672, "grad_norm": 3.89778470993042, "learning_rate": 1.3510126338226206e-05, "loss": 0.6512, "step": 12330 }, { "epoch": 40.429508196721315, "grad_norm": 4.96191930770874, "learning_rate": 1.3509131998551342e-05, "loss": 0.3719, "step": 12331 }, { "epoch": 40.4327868852459, "grad_norm": 4.95749568939209, "learning_rate": 1.3508137619307365e-05, "loss": 0.4728, "step": 12332 }, { "epoch": 40.43606557377049, "grad_norm": 3.9444024562835693, "learning_rate": 1.3507143200505488e-05, "loss": 0.4977, "step": 12333 }, { "epoch": 40.43934426229508, "grad_norm": 3.528390645980835, "learning_rate": 1.3506148742156927e-05, "loss": 0.1732, "step": 12334 }, { "epoch": 40.442622950819676, "grad_norm": 4.555029392242432, "learning_rate": 1.350515424427289e-05, "loss": 0.4885, "step": 12335 }, { "epoch": 40.445901639344264, "grad_norm": 4.656202793121338, "learning_rate": 1.3504159706864597e-05, "loss": 0.307, "step": 12336 }, { "epoch": 40.44918032786885, "grad_norm": 4.751631736755371, "learning_rate": 1.350316512994326e-05, "loss": 0.558, "step": 12337 }, { "epoch": 40.45245901639344, "grad_norm": 3.727993965148926, "learning_rate": 1.3502170513520094e-05, "loss": 0.4223, "step": 12338 }, { "epoch": 40.455737704918036, "grad_norm": 4.163812160491943, "learning_rate": 1.3501175857606313e-05, "loss": 0.4902, "step": 12339 }, { "epoch": 40.459016393442624, "grad_norm": 3.917267084121704, "learning_rate": 1.3500181162213135e-05, "loss": 0.5034, "step": 12340 }, { "epoch": 40.46229508196721, "grad_norm": 3.9997735023498535, "learning_rate": 1.3499186427351776e-05, "loss": 0.3883, "step": 12341 }, { "epoch": 40.4655737704918, "grad_norm": 4.466873645782471, "learning_rate": 1.3498191653033448e-05, "loss": 0.6587, "step": 12342 }, { "epoch": 40.4688524590164, "grad_norm": 4.002163410186768, "learning_rate": 1.3497196839269374e-05, "loss": 0.4415, "step": 12343 }, { "epoch": 40.472131147540985, "grad_norm": 4.540243148803711, "learning_rate": 1.3496201986070769e-05, "loss": 0.458, "step": 12344 }, { "epoch": 40.47540983606557, "grad_norm": 4.6497907638549805, "learning_rate": 1.3495207093448854e-05, "loss": 0.4572, "step": 12345 }, { "epoch": 40.47868852459016, "grad_norm": 4.121675968170166, "learning_rate": 1.3494212161414844e-05, "loss": 0.4423, "step": 12346 }, { "epoch": 40.48196721311476, "grad_norm": 4.061494827270508, "learning_rate": 1.3493217189979954e-05, "loss": 0.3148, "step": 12347 }, { "epoch": 40.485245901639345, "grad_norm": 3.827979803085327, "learning_rate": 1.3492222179155415e-05, "loss": 0.3238, "step": 12348 }, { "epoch": 40.488524590163934, "grad_norm": 4.533478260040283, "learning_rate": 1.3491227128952436e-05, "loss": 0.5571, "step": 12349 }, { "epoch": 40.49180327868852, "grad_norm": 4.385289192199707, "learning_rate": 1.349023203938224e-05, "loss": 0.4087, "step": 12350 }, { "epoch": 40.49508196721312, "grad_norm": 4.569397926330566, "learning_rate": 1.3489236910456052e-05, "loss": 0.4573, "step": 12351 }, { "epoch": 40.498360655737706, "grad_norm": 3.9647467136383057, "learning_rate": 1.3488241742185086e-05, "loss": 0.2839, "step": 12352 }, { "epoch": 40.501639344262294, "grad_norm": 3.898111581802368, "learning_rate": 1.3487246534580573e-05, "loss": 0.4255, "step": 12353 }, { "epoch": 40.50491803278688, "grad_norm": 3.7144763469696045, "learning_rate": 1.3486251287653728e-05, "loss": 0.1815, "step": 12354 }, { "epoch": 40.50819672131148, "grad_norm": 3.579397201538086, "learning_rate": 1.3485256001415772e-05, "loss": 0.2836, "step": 12355 }, { "epoch": 40.511475409836066, "grad_norm": 3.727255344390869, "learning_rate": 1.3484260675877934e-05, "loss": 0.4808, "step": 12356 }, { "epoch": 40.514754098360655, "grad_norm": 5.198446750640869, "learning_rate": 1.3483265311051433e-05, "loss": 0.4207, "step": 12357 }, { "epoch": 40.51803278688524, "grad_norm": 3.5832173824310303, "learning_rate": 1.3482269906947494e-05, "loss": 0.551, "step": 12358 }, { "epoch": 40.52131147540984, "grad_norm": 4.47123908996582, "learning_rate": 1.3481274463577339e-05, "loss": 0.3105, "step": 12359 }, { "epoch": 40.52459016393443, "grad_norm": 4.045948028564453, "learning_rate": 1.3480278980952197e-05, "loss": 0.3393, "step": 12360 }, { "epoch": 40.527868852459015, "grad_norm": 4.321852684020996, "learning_rate": 1.347928345908329e-05, "loss": 0.3275, "step": 12361 }, { "epoch": 40.5311475409836, "grad_norm": 4.141613483428955, "learning_rate": 1.3478287897981846e-05, "loss": 0.3986, "step": 12362 }, { "epoch": 40.5344262295082, "grad_norm": 4.563596248626709, "learning_rate": 1.3477292297659087e-05, "loss": 0.6046, "step": 12363 }, { "epoch": 40.53770491803279, "grad_norm": 3.929297924041748, "learning_rate": 1.3476296658126244e-05, "loss": 0.4623, "step": 12364 }, { "epoch": 40.540983606557376, "grad_norm": 4.464880466461182, "learning_rate": 1.3475300979394542e-05, "loss": 0.4142, "step": 12365 }, { "epoch": 40.544262295081964, "grad_norm": 3.6273539066314697, "learning_rate": 1.3474305261475206e-05, "loss": 0.2381, "step": 12366 }, { "epoch": 40.54754098360656, "grad_norm": 4.163436412811279, "learning_rate": 1.347330950437947e-05, "loss": 0.4073, "step": 12367 }, { "epoch": 40.55081967213115, "grad_norm": 3.5963857173919678, "learning_rate": 1.3472313708118553e-05, "loss": 0.2425, "step": 12368 }, { "epoch": 40.554098360655736, "grad_norm": 5.245442867279053, "learning_rate": 1.3471317872703691e-05, "loss": 0.373, "step": 12369 }, { "epoch": 40.557377049180324, "grad_norm": 4.58428955078125, "learning_rate": 1.347032199814611e-05, "loss": 0.3983, "step": 12370 }, { "epoch": 40.56065573770492, "grad_norm": 3.9146924018859863, "learning_rate": 1.3469326084457041e-05, "loss": 0.3386, "step": 12371 }, { "epoch": 40.56393442622951, "grad_norm": 3.9045143127441406, "learning_rate": 1.346833013164771e-05, "loss": 0.3341, "step": 12372 }, { "epoch": 40.5672131147541, "grad_norm": 4.1049394607543945, "learning_rate": 1.3467334139729354e-05, "loss": 0.5302, "step": 12373 }, { "epoch": 40.570491803278685, "grad_norm": 4.581599712371826, "learning_rate": 1.3466338108713202e-05, "loss": 0.4216, "step": 12374 }, { "epoch": 40.57377049180328, "grad_norm": 3.8150794506073, "learning_rate": 1.3465342038610479e-05, "loss": 0.4261, "step": 12375 }, { "epoch": 40.57704918032787, "grad_norm": 3.892056941986084, "learning_rate": 1.3464345929432425e-05, "loss": 0.2391, "step": 12376 }, { "epoch": 40.58032786885246, "grad_norm": 4.188498497009277, "learning_rate": 1.3463349781190267e-05, "loss": 0.4537, "step": 12377 }, { "epoch": 40.58360655737705, "grad_norm": 3.7573482990264893, "learning_rate": 1.3462353593895238e-05, "loss": 0.6114, "step": 12378 }, { "epoch": 40.58688524590164, "grad_norm": 3.843773603439331, "learning_rate": 1.3461357367558575e-05, "loss": 0.313, "step": 12379 }, { "epoch": 40.59016393442623, "grad_norm": 4.536468505859375, "learning_rate": 1.346036110219151e-05, "loss": 0.4656, "step": 12380 }, { "epoch": 40.59344262295082, "grad_norm": 3.759021043777466, "learning_rate": 1.3459364797805273e-05, "loss": 0.3686, "step": 12381 }, { "epoch": 40.59672131147541, "grad_norm": 5.0485076904296875, "learning_rate": 1.3458368454411101e-05, "loss": 0.352, "step": 12382 }, { "epoch": 40.6, "grad_norm": 3.938260555267334, "learning_rate": 1.345737207202023e-05, "loss": 0.6304, "step": 12383 }, { "epoch": 40.60327868852459, "grad_norm": 3.70564603805542, "learning_rate": 1.3456375650643893e-05, "loss": 0.4715, "step": 12384 }, { "epoch": 40.60655737704918, "grad_norm": 3.943794012069702, "learning_rate": 1.3455379190293327e-05, "loss": 0.3041, "step": 12385 }, { "epoch": 40.609836065573774, "grad_norm": 4.194395542144775, "learning_rate": 1.345438269097977e-05, "loss": 0.5843, "step": 12386 }, { "epoch": 40.61311475409836, "grad_norm": 6.4570159912109375, "learning_rate": 1.3453386152714454e-05, "loss": 0.5202, "step": 12387 }, { "epoch": 40.61639344262295, "grad_norm": 3.661588668823242, "learning_rate": 1.345238957550862e-05, "loss": 0.4667, "step": 12388 }, { "epoch": 40.61967213114754, "grad_norm": 3.814228057861328, "learning_rate": 1.3451392959373502e-05, "loss": 0.3531, "step": 12389 }, { "epoch": 40.622950819672134, "grad_norm": 4.673928260803223, "learning_rate": 1.345039630432034e-05, "loss": 0.3809, "step": 12390 }, { "epoch": 40.62622950819672, "grad_norm": 3.185197591781616, "learning_rate": 1.3449399610360376e-05, "loss": 0.4675, "step": 12391 }, { "epoch": 40.62950819672131, "grad_norm": 3.441490650177002, "learning_rate": 1.3448402877504841e-05, "loss": 0.2092, "step": 12392 }, { "epoch": 40.6327868852459, "grad_norm": 4.144153118133545, "learning_rate": 1.3447406105764982e-05, "loss": 0.3411, "step": 12393 }, { "epoch": 40.636065573770495, "grad_norm": 3.983280658721924, "learning_rate": 1.3446409295152029e-05, "loss": 0.2062, "step": 12394 }, { "epoch": 40.63934426229508, "grad_norm": 3.92160964012146, "learning_rate": 1.3445412445677234e-05, "loss": 0.3212, "step": 12395 }, { "epoch": 40.64262295081967, "grad_norm": 4.613183498382568, "learning_rate": 1.3444415557351827e-05, "loss": 0.2911, "step": 12396 }, { "epoch": 40.64590163934426, "grad_norm": 4.0421576499938965, "learning_rate": 1.3443418630187054e-05, "loss": 0.3514, "step": 12397 }, { "epoch": 40.649180327868855, "grad_norm": 4.480586051940918, "learning_rate": 1.3442421664194156e-05, "loss": 0.6272, "step": 12398 }, { "epoch": 40.65245901639344, "grad_norm": 3.5556132793426514, "learning_rate": 1.3441424659384374e-05, "loss": 0.4135, "step": 12399 }, { "epoch": 40.65573770491803, "grad_norm": 4.2538275718688965, "learning_rate": 1.3440427615768951e-05, "loss": 0.5394, "step": 12400 }, { "epoch": 40.65901639344262, "grad_norm": 4.055660247802734, "learning_rate": 1.343943053335913e-05, "loss": 0.3696, "step": 12401 }, { "epoch": 40.662295081967216, "grad_norm": 4.504842758178711, "learning_rate": 1.3438433412166154e-05, "loss": 0.3526, "step": 12402 }, { "epoch": 40.665573770491804, "grad_norm": 3.5301296710968018, "learning_rate": 1.3437436252201266e-05, "loss": 0.3477, "step": 12403 }, { "epoch": 40.66885245901639, "grad_norm": 3.4813780784606934, "learning_rate": 1.343643905347571e-05, "loss": 0.2948, "step": 12404 }, { "epoch": 40.67213114754098, "grad_norm": 3.5720481872558594, "learning_rate": 1.3435441816000729e-05, "loss": 0.5042, "step": 12405 }, { "epoch": 40.675409836065576, "grad_norm": 5.093912601470947, "learning_rate": 1.3434444539787575e-05, "loss": 0.3324, "step": 12406 }, { "epoch": 40.678688524590164, "grad_norm": 5.042726516723633, "learning_rate": 1.3433447224847482e-05, "loss": 0.4111, "step": 12407 }, { "epoch": 40.68196721311475, "grad_norm": 3.7541043758392334, "learning_rate": 1.3432449871191704e-05, "loss": 0.4195, "step": 12408 }, { "epoch": 40.68524590163934, "grad_norm": 4.390138626098633, "learning_rate": 1.3431452478831483e-05, "loss": 0.3183, "step": 12409 }, { "epoch": 40.68852459016394, "grad_norm": 3.999441146850586, "learning_rate": 1.3430455047778072e-05, "loss": 0.2745, "step": 12410 }, { "epoch": 40.691803278688525, "grad_norm": 4.081667423248291, "learning_rate": 1.3429457578042708e-05, "loss": 0.3965, "step": 12411 }, { "epoch": 40.69508196721311, "grad_norm": 4.53081750869751, "learning_rate": 1.3428460069636648e-05, "loss": 0.3549, "step": 12412 }, { "epoch": 40.6983606557377, "grad_norm": 5.166459560394287, "learning_rate": 1.3427462522571135e-05, "loss": 0.4506, "step": 12413 }, { "epoch": 40.7016393442623, "grad_norm": 4.683684349060059, "learning_rate": 1.342646493685742e-05, "loss": 0.3434, "step": 12414 }, { "epoch": 40.704918032786885, "grad_norm": 4.222109317779541, "learning_rate": 1.3425467312506746e-05, "loss": 0.3649, "step": 12415 }, { "epoch": 40.708196721311474, "grad_norm": 4.937870502471924, "learning_rate": 1.342446964953037e-05, "loss": 0.5453, "step": 12416 }, { "epoch": 40.71147540983607, "grad_norm": 3.9647955894470215, "learning_rate": 1.3423471947939539e-05, "loss": 0.3848, "step": 12417 }, { "epoch": 40.71475409836066, "grad_norm": 4.271966457366943, "learning_rate": 1.3422474207745499e-05, "loss": 0.362, "step": 12418 }, { "epoch": 40.718032786885246, "grad_norm": 4.506864547729492, "learning_rate": 1.3421476428959505e-05, "loss": 0.5547, "step": 12419 }, { "epoch": 40.721311475409834, "grad_norm": 4.781219482421875, "learning_rate": 1.3420478611592807e-05, "loss": 0.2344, "step": 12420 }, { "epoch": 40.72459016393443, "grad_norm": 4.412373065948486, "learning_rate": 1.3419480755656657e-05, "loss": 0.5248, "step": 12421 }, { "epoch": 40.72786885245902, "grad_norm": 3.8060333728790283, "learning_rate": 1.3418482861162305e-05, "loss": 0.3453, "step": 12422 }, { "epoch": 40.731147540983606, "grad_norm": 3.3071129322052, "learning_rate": 1.3417484928121005e-05, "loss": 0.5757, "step": 12423 }, { "epoch": 40.734426229508195, "grad_norm": 3.7920475006103516, "learning_rate": 1.3416486956544009e-05, "loss": 0.2709, "step": 12424 }, { "epoch": 40.73770491803279, "grad_norm": 4.60983419418335, "learning_rate": 1.341548894644257e-05, "loss": 0.3644, "step": 12425 }, { "epoch": 40.74098360655738, "grad_norm": 4.618677616119385, "learning_rate": 1.341449089782794e-05, "loss": 0.408, "step": 12426 }, { "epoch": 40.74426229508197, "grad_norm": 3.118184804916382, "learning_rate": 1.341349281071138e-05, "loss": 0.4123, "step": 12427 }, { "epoch": 40.747540983606555, "grad_norm": 3.8736345767974854, "learning_rate": 1.3412494685104133e-05, "loss": 0.5601, "step": 12428 }, { "epoch": 40.75081967213115, "grad_norm": 4.405992031097412, "learning_rate": 1.3411496521017465e-05, "loss": 0.4346, "step": 12429 }, { "epoch": 40.75409836065574, "grad_norm": 3.6959424018859863, "learning_rate": 1.3410498318462625e-05, "loss": 0.4084, "step": 12430 }, { "epoch": 40.75737704918033, "grad_norm": 4.7308197021484375, "learning_rate": 1.3409500077450869e-05, "loss": 0.4409, "step": 12431 }, { "epoch": 40.760655737704916, "grad_norm": 4.871934413909912, "learning_rate": 1.3408501797993458e-05, "loss": 0.3272, "step": 12432 }, { "epoch": 40.76393442622951, "grad_norm": 4.215899467468262, "learning_rate": 1.3407503480101642e-05, "loss": 0.5177, "step": 12433 }, { "epoch": 40.7672131147541, "grad_norm": 4.4629669189453125, "learning_rate": 1.3406505123786686e-05, "loss": 0.5273, "step": 12434 }, { "epoch": 40.77049180327869, "grad_norm": 3.835843801498413, "learning_rate": 1.3405506729059839e-05, "loss": 0.4048, "step": 12435 }, { "epoch": 40.773770491803276, "grad_norm": 4.322947978973389, "learning_rate": 1.3404508295932363e-05, "loss": 0.3293, "step": 12436 }, { "epoch": 40.77704918032787, "grad_norm": 4.025792121887207, "learning_rate": 1.3403509824415517e-05, "loss": 0.5914, "step": 12437 }, { "epoch": 40.78032786885246, "grad_norm": 3.4174931049346924, "learning_rate": 1.340251131452056e-05, "loss": 0.3384, "step": 12438 }, { "epoch": 40.78360655737705, "grad_norm": 3.6384458541870117, "learning_rate": 1.3401512766258749e-05, "loss": 0.4102, "step": 12439 }, { "epoch": 40.78688524590164, "grad_norm": 4.128764629364014, "learning_rate": 1.3400514179641344e-05, "loss": 0.4894, "step": 12440 }, { "epoch": 40.79016393442623, "grad_norm": 4.379978179931641, "learning_rate": 1.3399515554679607e-05, "loss": 0.2762, "step": 12441 }, { "epoch": 40.79344262295082, "grad_norm": 4.060337066650391, "learning_rate": 1.3398516891384798e-05, "loss": 0.225, "step": 12442 }, { "epoch": 40.79672131147541, "grad_norm": 3.9989640712738037, "learning_rate": 1.3397518189768177e-05, "loss": 0.445, "step": 12443 }, { "epoch": 40.8, "grad_norm": 3.693716526031494, "learning_rate": 1.3396519449841006e-05, "loss": 0.6043, "step": 12444 }, { "epoch": 40.80327868852459, "grad_norm": 3.359375, "learning_rate": 1.3395520671614549e-05, "loss": 0.4514, "step": 12445 }, { "epoch": 40.80655737704918, "grad_norm": 3.864642858505249, "learning_rate": 1.339452185510006e-05, "loss": 0.3636, "step": 12446 }, { "epoch": 40.80983606557377, "grad_norm": 3.644995927810669, "learning_rate": 1.3393523000308812e-05, "loss": 0.2496, "step": 12447 }, { "epoch": 40.81311475409836, "grad_norm": 3.662132501602173, "learning_rate": 1.3392524107252062e-05, "loss": 0.3408, "step": 12448 }, { "epoch": 40.81639344262295, "grad_norm": 3.9123921394348145, "learning_rate": 1.3391525175941078e-05, "loss": 0.4942, "step": 12449 }, { "epoch": 40.81967213114754, "grad_norm": 3.949275255203247, "learning_rate": 1.3390526206387117e-05, "loss": 0.3731, "step": 12450 }, { "epoch": 40.82295081967213, "grad_norm": 3.36335825920105, "learning_rate": 1.3389527198601454e-05, "loss": 0.4532, "step": 12451 }, { "epoch": 40.82622950819672, "grad_norm": 4.275416851043701, "learning_rate": 1.338852815259534e-05, "loss": 0.6035, "step": 12452 }, { "epoch": 40.829508196721314, "grad_norm": 3.111210346221924, "learning_rate": 1.3387529068380056e-05, "loss": 0.5176, "step": 12453 }, { "epoch": 40.8327868852459, "grad_norm": 4.2219343185424805, "learning_rate": 1.3386529945966854e-05, "loss": 0.4493, "step": 12454 }, { "epoch": 40.83606557377049, "grad_norm": 7.183006763458252, "learning_rate": 1.3385530785367005e-05, "loss": 0.5464, "step": 12455 }, { "epoch": 40.83934426229508, "grad_norm": 4.6552042961120605, "learning_rate": 1.338453158659178e-05, "loss": 0.2555, "step": 12456 }, { "epoch": 40.842622950819674, "grad_norm": 3.366503953933716, "learning_rate": 1.338353234965244e-05, "loss": 0.334, "step": 12457 }, { "epoch": 40.84590163934426, "grad_norm": 4.020139217376709, "learning_rate": 1.3382533074560256e-05, "loss": 0.3572, "step": 12458 }, { "epoch": 40.84918032786885, "grad_norm": 3.6088995933532715, "learning_rate": 1.3381533761326494e-05, "loss": 0.3064, "step": 12459 }, { "epoch": 40.85245901639344, "grad_norm": 3.720799446105957, "learning_rate": 1.3380534409962425e-05, "loss": 0.4791, "step": 12460 }, { "epoch": 40.855737704918035, "grad_norm": 4.610383033752441, "learning_rate": 1.337953502047931e-05, "loss": 0.4244, "step": 12461 }, { "epoch": 40.85901639344262, "grad_norm": 3.3898167610168457, "learning_rate": 1.3378535592888431e-05, "loss": 0.3327, "step": 12462 }, { "epoch": 40.86229508196721, "grad_norm": 3.436976909637451, "learning_rate": 1.3377536127201045e-05, "loss": 0.3004, "step": 12463 }, { "epoch": 40.86557377049181, "grad_norm": 4.569047451019287, "learning_rate": 1.3376536623428431e-05, "loss": 0.4145, "step": 12464 }, { "epoch": 40.868852459016395, "grad_norm": 3.85998797416687, "learning_rate": 1.3375537081581853e-05, "loss": 0.4481, "step": 12465 }, { "epoch": 40.87213114754098, "grad_norm": 3.934364080429077, "learning_rate": 1.3374537501672587e-05, "loss": 0.4562, "step": 12466 }, { "epoch": 40.87540983606557, "grad_norm": 4.160168170928955, "learning_rate": 1.33735378837119e-05, "loss": 0.4447, "step": 12467 }, { "epoch": 40.87868852459017, "grad_norm": 4.484077453613281, "learning_rate": 1.3372538227711069e-05, "loss": 0.3177, "step": 12468 }, { "epoch": 40.881967213114756, "grad_norm": 4.105336666107178, "learning_rate": 1.3371538533681361e-05, "loss": 0.4338, "step": 12469 }, { "epoch": 40.885245901639344, "grad_norm": 4.317821979522705, "learning_rate": 1.337053880163405e-05, "loss": 0.298, "step": 12470 }, { "epoch": 40.88852459016393, "grad_norm": 3.685326099395752, "learning_rate": 1.336953903158041e-05, "loss": 0.4711, "step": 12471 }, { "epoch": 40.89180327868853, "grad_norm": 4.8741841316223145, "learning_rate": 1.3368539223531713e-05, "loss": 0.34, "step": 12472 }, { "epoch": 40.895081967213116, "grad_norm": 4.874592304229736, "learning_rate": 1.3367539377499234e-05, "loss": 0.2575, "step": 12473 }, { "epoch": 40.898360655737704, "grad_norm": 5.343216896057129, "learning_rate": 1.3366539493494248e-05, "loss": 0.5812, "step": 12474 }, { "epoch": 40.90163934426229, "grad_norm": 4.858867645263672, "learning_rate": 1.336553957152803e-05, "loss": 0.3065, "step": 12475 }, { "epoch": 40.90491803278689, "grad_norm": 3.8184258937835693, "learning_rate": 1.3364539611611851e-05, "loss": 0.5153, "step": 12476 }, { "epoch": 40.90819672131148, "grad_norm": 3.4209649562835693, "learning_rate": 1.3363539613756992e-05, "loss": 0.5022, "step": 12477 }, { "epoch": 40.911475409836065, "grad_norm": 4.1640801429748535, "learning_rate": 1.3362539577974727e-05, "loss": 0.3354, "step": 12478 }, { "epoch": 40.91475409836065, "grad_norm": 8.223488807678223, "learning_rate": 1.3361539504276331e-05, "loss": 0.2524, "step": 12479 }, { "epoch": 40.91803278688525, "grad_norm": 3.753965377807617, "learning_rate": 1.3360539392673082e-05, "loss": 0.2477, "step": 12480 }, { "epoch": 40.92131147540984, "grad_norm": 3.611189365386963, "learning_rate": 1.3359539243176259e-05, "loss": 0.5176, "step": 12481 }, { "epoch": 40.924590163934425, "grad_norm": 4.203989028930664, "learning_rate": 1.3358539055797133e-05, "loss": 0.328, "step": 12482 }, { "epoch": 40.927868852459014, "grad_norm": 4.291794776916504, "learning_rate": 1.335753883054699e-05, "loss": 0.4299, "step": 12483 }, { "epoch": 40.93114754098361, "grad_norm": 3.4065358638763428, "learning_rate": 1.3356538567437108e-05, "loss": 0.7133, "step": 12484 }, { "epoch": 40.9344262295082, "grad_norm": 7.98599910736084, "learning_rate": 1.335553826647876e-05, "loss": 0.4732, "step": 12485 }, { "epoch": 40.937704918032786, "grad_norm": 4.636048793792725, "learning_rate": 1.3354537927683234e-05, "loss": 0.2518, "step": 12486 }, { "epoch": 40.940983606557374, "grad_norm": 3.847707509994507, "learning_rate": 1.33535375510618e-05, "loss": 0.5834, "step": 12487 }, { "epoch": 40.94426229508197, "grad_norm": 4.743293762207031, "learning_rate": 1.3352537136625748e-05, "loss": 0.7269, "step": 12488 }, { "epoch": 40.94754098360656, "grad_norm": 3.582176446914673, "learning_rate": 1.3351536684386347e-05, "loss": 0.306, "step": 12489 }, { "epoch": 40.950819672131146, "grad_norm": 4.592571258544922, "learning_rate": 1.3350536194354893e-05, "loss": 0.3825, "step": 12490 }, { "epoch": 40.954098360655735, "grad_norm": 3.9667768478393555, "learning_rate": 1.3349535666542654e-05, "loss": 0.3609, "step": 12491 }, { "epoch": 40.95737704918033, "grad_norm": 3.948324680328369, "learning_rate": 1.3348535100960922e-05, "loss": 0.2091, "step": 12492 }, { "epoch": 40.96065573770492, "grad_norm": 4.464352130889893, "learning_rate": 1.334753449762097e-05, "loss": 0.403, "step": 12493 }, { "epoch": 40.96393442622951, "grad_norm": 4.829479217529297, "learning_rate": 1.3346533856534092e-05, "loss": 0.4919, "step": 12494 }, { "epoch": 40.967213114754095, "grad_norm": 3.803429126739502, "learning_rate": 1.3345533177711563e-05, "loss": 0.5715, "step": 12495 }, { "epoch": 40.97049180327869, "grad_norm": 4.801233291625977, "learning_rate": 1.3344532461164667e-05, "loss": 0.2913, "step": 12496 }, { "epoch": 40.97377049180328, "grad_norm": 4.197816371917725, "learning_rate": 1.3343531706904694e-05, "loss": 0.3683, "step": 12497 }, { "epoch": 40.97704918032787, "grad_norm": 4.167715549468994, "learning_rate": 1.334253091494292e-05, "loss": 0.3361, "step": 12498 }, { "epoch": 40.980327868852456, "grad_norm": 4.410492897033691, "learning_rate": 1.3341530085290637e-05, "loss": 0.3554, "step": 12499 }, { "epoch": 40.98360655737705, "grad_norm": 4.672237873077393, "learning_rate": 1.3340529217959126e-05, "loss": 0.5553, "step": 12500 }, { "epoch": 40.98688524590164, "grad_norm": 3.693617820739746, "learning_rate": 1.3339528312959678e-05, "loss": 0.3141, "step": 12501 }, { "epoch": 40.99016393442623, "grad_norm": 4.040022850036621, "learning_rate": 1.3338527370303573e-05, "loss": 0.4929, "step": 12502 }, { "epoch": 40.993442622950816, "grad_norm": 3.521062135696411, "learning_rate": 1.3337526390002102e-05, "loss": 0.2564, "step": 12503 }, { "epoch": 40.99672131147541, "grad_norm": 3.138136386871338, "learning_rate": 1.333652537206655e-05, "loss": 0.2951, "step": 12504 }, { "epoch": 41.0, "grad_norm": 4.471245765686035, "learning_rate": 1.3335524316508208e-05, "loss": 0.4166, "step": 12505 }, { "epoch": 41.00327868852459, "grad_norm": 4.209595680236816, "learning_rate": 1.3334523223338358e-05, "loss": 0.4864, "step": 12506 }, { "epoch": 41.006557377049184, "grad_norm": 3.769059658050537, "learning_rate": 1.3333522092568294e-05, "loss": 0.4225, "step": 12507 }, { "epoch": 41.00983606557377, "grad_norm": 3.7338430881500244, "learning_rate": 1.33325209242093e-05, "loss": 0.2095, "step": 12508 }, { "epoch": 41.01311475409836, "grad_norm": 5.725956916809082, "learning_rate": 1.3331519718272672e-05, "loss": 0.4158, "step": 12509 }, { "epoch": 41.01639344262295, "grad_norm": 5.09309720993042, "learning_rate": 1.333051847476969e-05, "loss": 0.2988, "step": 12510 }, { "epoch": 41.019672131147544, "grad_norm": 3.4782304763793945, "learning_rate": 1.3329517193711653e-05, "loss": 0.354, "step": 12511 }, { "epoch": 41.02295081967213, "grad_norm": 3.983886241912842, "learning_rate": 1.3328515875109847e-05, "loss": 0.2666, "step": 12512 }, { "epoch": 41.02622950819672, "grad_norm": 3.5153048038482666, "learning_rate": 1.332751451897556e-05, "loss": 0.3142, "step": 12513 }, { "epoch": 41.02950819672131, "grad_norm": 4.320437908172607, "learning_rate": 1.332651312532009e-05, "loss": 0.397, "step": 12514 }, { "epoch": 41.032786885245905, "grad_norm": 4.636473178863525, "learning_rate": 1.3325511694154727e-05, "loss": 0.4913, "step": 12515 }, { "epoch": 41.03606557377049, "grad_norm": 4.7457804679870605, "learning_rate": 1.3324510225490763e-05, "loss": 0.3535, "step": 12516 }, { "epoch": 41.03934426229508, "grad_norm": 3.3240509033203125, "learning_rate": 1.3323508719339487e-05, "loss": 0.2835, "step": 12517 }, { "epoch": 41.04262295081967, "grad_norm": 4.204760551452637, "learning_rate": 1.3322507175712197e-05, "loss": 0.4731, "step": 12518 }, { "epoch": 41.045901639344265, "grad_norm": 3.7645158767700195, "learning_rate": 1.3321505594620178e-05, "loss": 0.4052, "step": 12519 }, { "epoch": 41.049180327868854, "grad_norm": 3.9548282623291016, "learning_rate": 1.3320503976074736e-05, "loss": 0.5092, "step": 12520 }, { "epoch": 41.05245901639344, "grad_norm": 3.10738468170166, "learning_rate": 1.3319502320087158e-05, "loss": 0.1582, "step": 12521 }, { "epoch": 41.05573770491803, "grad_norm": 3.2203657627105713, "learning_rate": 1.3318500626668738e-05, "loss": 0.2959, "step": 12522 }, { "epoch": 41.059016393442626, "grad_norm": 3.7967071533203125, "learning_rate": 1.3317498895830777e-05, "loss": 0.2974, "step": 12523 }, { "epoch": 41.062295081967214, "grad_norm": 3.7352375984191895, "learning_rate": 1.3316497127584562e-05, "loss": 0.232, "step": 12524 }, { "epoch": 41.0655737704918, "grad_norm": 3.777853012084961, "learning_rate": 1.3315495321941397e-05, "loss": 0.4414, "step": 12525 }, { "epoch": 41.06885245901639, "grad_norm": 4.088205337524414, "learning_rate": 1.3314493478912574e-05, "loss": 0.393, "step": 12526 }, { "epoch": 41.072131147540986, "grad_norm": 3.8213162422180176, "learning_rate": 1.3313491598509389e-05, "loss": 0.2898, "step": 12527 }, { "epoch": 41.075409836065575, "grad_norm": 4.084128379821777, "learning_rate": 1.331248968074314e-05, "loss": 0.4429, "step": 12528 }, { "epoch": 41.07868852459016, "grad_norm": 3.77994441986084, "learning_rate": 1.3311487725625132e-05, "loss": 0.2871, "step": 12529 }, { "epoch": 41.08196721311475, "grad_norm": 5.135851860046387, "learning_rate": 1.3310485733166652e-05, "loss": 0.4783, "step": 12530 }, { "epoch": 41.08524590163935, "grad_norm": 13.452367782592773, "learning_rate": 1.3309483703379004e-05, "loss": 0.4884, "step": 12531 }, { "epoch": 41.088524590163935, "grad_norm": 4.205394268035889, "learning_rate": 1.3308481636273487e-05, "loss": 0.302, "step": 12532 }, { "epoch": 41.09180327868852, "grad_norm": 4.414432048797607, "learning_rate": 1.33074795318614e-05, "loss": 0.3745, "step": 12533 }, { "epoch": 41.09508196721311, "grad_norm": 4.864893913269043, "learning_rate": 1.330647739015404e-05, "loss": 0.5592, "step": 12534 }, { "epoch": 41.09836065573771, "grad_norm": 3.737480401992798, "learning_rate": 1.3305475211162713e-05, "loss": 0.4955, "step": 12535 }, { "epoch": 41.101639344262296, "grad_norm": 4.079229354858398, "learning_rate": 1.3304472994898712e-05, "loss": 0.3108, "step": 12536 }, { "epoch": 41.104918032786884, "grad_norm": 3.926445484161377, "learning_rate": 1.3303470741373343e-05, "loss": 0.4137, "step": 12537 }, { "epoch": 41.10819672131147, "grad_norm": 3.476771831512451, "learning_rate": 1.330246845059791e-05, "loss": 0.2909, "step": 12538 }, { "epoch": 41.11147540983607, "grad_norm": 3.8902266025543213, "learning_rate": 1.3301466122583708e-05, "loss": 0.5199, "step": 12539 }, { "epoch": 41.114754098360656, "grad_norm": 4.58561897277832, "learning_rate": 1.3300463757342046e-05, "loss": 0.4865, "step": 12540 }, { "epoch": 41.118032786885244, "grad_norm": 3.487104892730713, "learning_rate": 1.329946135488422e-05, "loss": 0.3957, "step": 12541 }, { "epoch": 41.12131147540983, "grad_norm": 5.677691459655762, "learning_rate": 1.3298458915221539e-05, "loss": 0.3305, "step": 12542 }, { "epoch": 41.12459016393443, "grad_norm": 4.632540702819824, "learning_rate": 1.3297456438365304e-05, "loss": 0.5554, "step": 12543 }, { "epoch": 41.12786885245902, "grad_norm": 4.727853298187256, "learning_rate": 1.329645392432682e-05, "loss": 0.3091, "step": 12544 }, { "epoch": 41.131147540983605, "grad_norm": 7.9524407386779785, "learning_rate": 1.3295451373117387e-05, "loss": 0.5311, "step": 12545 }, { "epoch": 41.13442622950819, "grad_norm": 4.396746635437012, "learning_rate": 1.3294448784748316e-05, "loss": 0.3875, "step": 12546 }, { "epoch": 41.13770491803279, "grad_norm": 3.7908077239990234, "learning_rate": 1.3293446159230911e-05, "loss": 0.341, "step": 12547 }, { "epoch": 41.14098360655738, "grad_norm": 4.351805210113525, "learning_rate": 1.3292443496576475e-05, "loss": 0.5279, "step": 12548 }, { "epoch": 41.144262295081965, "grad_norm": 4.800939559936523, "learning_rate": 1.3291440796796315e-05, "loss": 0.3937, "step": 12549 }, { "epoch": 41.14754098360656, "grad_norm": 4.465767860412598, "learning_rate": 1.3290438059901738e-05, "loss": 0.5788, "step": 12550 }, { "epoch": 41.15081967213115, "grad_norm": 4.265585422515869, "learning_rate": 1.3289435285904051e-05, "loss": 0.4383, "step": 12551 }, { "epoch": 41.15409836065574, "grad_norm": 4.48323917388916, "learning_rate": 1.3288432474814558e-05, "loss": 0.5474, "step": 12552 }, { "epoch": 41.157377049180326, "grad_norm": 3.881984233856201, "learning_rate": 1.3287429626644575e-05, "loss": 0.3733, "step": 12553 }, { "epoch": 41.16065573770492, "grad_norm": 3.8101966381073, "learning_rate": 1.3286426741405401e-05, "loss": 0.3763, "step": 12554 }, { "epoch": 41.16393442622951, "grad_norm": 9.057748794555664, "learning_rate": 1.3285423819108349e-05, "loss": 0.4548, "step": 12555 }, { "epoch": 41.1672131147541, "grad_norm": 3.9567785263061523, "learning_rate": 1.3284420859764726e-05, "loss": 0.4014, "step": 12556 }, { "epoch": 41.170491803278686, "grad_norm": 4.021728515625, "learning_rate": 1.3283417863385849e-05, "loss": 0.4341, "step": 12557 }, { "epoch": 41.17377049180328, "grad_norm": 3.9327173233032227, "learning_rate": 1.3282414829983014e-05, "loss": 0.3203, "step": 12558 }, { "epoch": 41.17704918032787, "grad_norm": 3.8342955112457275, "learning_rate": 1.3281411759567544e-05, "loss": 0.2184, "step": 12559 }, { "epoch": 41.18032786885246, "grad_norm": 4.578671455383301, "learning_rate": 1.3280408652150745e-05, "loss": 0.4377, "step": 12560 }, { "epoch": 41.18360655737705, "grad_norm": 3.506788730621338, "learning_rate": 1.3279405507743923e-05, "loss": 0.4465, "step": 12561 }, { "epoch": 41.18688524590164, "grad_norm": 3.602165460586548, "learning_rate": 1.3278402326358397e-05, "loss": 0.3309, "step": 12562 }, { "epoch": 41.19016393442623, "grad_norm": 4.054706573486328, "learning_rate": 1.3277399108005478e-05, "loss": 0.3331, "step": 12563 }, { "epoch": 41.19344262295082, "grad_norm": 4.202820301055908, "learning_rate": 1.327639585269647e-05, "loss": 0.4108, "step": 12564 }, { "epoch": 41.19672131147541, "grad_norm": 3.93407940864563, "learning_rate": 1.3275392560442697e-05, "loss": 0.2458, "step": 12565 }, { "epoch": 41.2, "grad_norm": 6.491087436676025, "learning_rate": 1.3274389231255466e-05, "loss": 0.3593, "step": 12566 }, { "epoch": 41.20327868852459, "grad_norm": 3.38313627243042, "learning_rate": 1.3273385865146093e-05, "loss": 0.3926, "step": 12567 }, { "epoch": 41.20655737704918, "grad_norm": 3.787262439727783, "learning_rate": 1.3272382462125893e-05, "loss": 0.2315, "step": 12568 }, { "epoch": 41.20983606557377, "grad_norm": 3.404144048690796, "learning_rate": 1.3271379022206174e-05, "loss": 0.3912, "step": 12569 }, { "epoch": 41.21311475409836, "grad_norm": 3.742060661315918, "learning_rate": 1.327037554539826e-05, "loss": 0.6216, "step": 12570 }, { "epoch": 41.21639344262295, "grad_norm": 3.5832419395446777, "learning_rate": 1.3269372031713456e-05, "loss": 0.4938, "step": 12571 }, { "epoch": 41.21967213114754, "grad_norm": 3.3964450359344482, "learning_rate": 1.3268368481163085e-05, "loss": 0.5644, "step": 12572 }, { "epoch": 41.22295081967213, "grad_norm": 3.551880121231079, "learning_rate": 1.3267364893758465e-05, "loss": 0.3797, "step": 12573 }, { "epoch": 41.226229508196724, "grad_norm": 3.563000440597534, "learning_rate": 1.3266361269510904e-05, "loss": 0.4294, "step": 12574 }, { "epoch": 41.22950819672131, "grad_norm": 4.410012245178223, "learning_rate": 1.3265357608431726e-05, "loss": 0.2576, "step": 12575 }, { "epoch": 41.2327868852459, "grad_norm": 4.415119647979736, "learning_rate": 1.3264353910532242e-05, "loss": 0.3501, "step": 12576 }, { "epoch": 41.23606557377049, "grad_norm": 4.02602481842041, "learning_rate": 1.3263350175823778e-05, "loss": 0.3736, "step": 12577 }, { "epoch": 41.239344262295084, "grad_norm": 4.703250408172607, "learning_rate": 1.3262346404317646e-05, "loss": 0.4272, "step": 12578 }, { "epoch": 41.24262295081967, "grad_norm": 4.263700008392334, "learning_rate": 1.3261342596025167e-05, "loss": 0.3752, "step": 12579 }, { "epoch": 41.24590163934426, "grad_norm": 4.293125629425049, "learning_rate": 1.3260338750957656e-05, "loss": 0.394, "step": 12580 }, { "epoch": 41.24918032786885, "grad_norm": 2.830791711807251, "learning_rate": 1.3259334869126441e-05, "loss": 0.2011, "step": 12581 }, { "epoch": 41.252459016393445, "grad_norm": 3.650759696960449, "learning_rate": 1.325833095054283e-05, "loss": 0.3182, "step": 12582 }, { "epoch": 41.25573770491803, "grad_norm": 3.297412633895874, "learning_rate": 1.3257326995218157e-05, "loss": 0.4035, "step": 12583 }, { "epoch": 41.25901639344262, "grad_norm": 3.89298677444458, "learning_rate": 1.3256323003163729e-05, "loss": 0.2782, "step": 12584 }, { "epoch": 41.26229508196721, "grad_norm": 4.1343889236450195, "learning_rate": 1.3255318974390879e-05, "loss": 0.6409, "step": 12585 }, { "epoch": 41.265573770491805, "grad_norm": 3.8704919815063477, "learning_rate": 1.325431490891092e-05, "loss": 0.2356, "step": 12586 }, { "epoch": 41.268852459016394, "grad_norm": 4.094243049621582, "learning_rate": 1.3253310806735175e-05, "loss": 0.4522, "step": 12587 }, { "epoch": 41.27213114754098, "grad_norm": 3.5788607597351074, "learning_rate": 1.325230666787497e-05, "loss": 0.2479, "step": 12588 }, { "epoch": 41.27540983606557, "grad_norm": 3.6659162044525146, "learning_rate": 1.3251302492341626e-05, "loss": 0.7874, "step": 12589 }, { "epoch": 41.278688524590166, "grad_norm": 2.895998239517212, "learning_rate": 1.3250298280146464e-05, "loss": 0.4047, "step": 12590 }, { "epoch": 41.281967213114754, "grad_norm": 4.620650768280029, "learning_rate": 1.3249294031300812e-05, "loss": 0.3234, "step": 12591 }, { "epoch": 41.28524590163934, "grad_norm": 4.346780776977539, "learning_rate": 1.324828974581599e-05, "loss": 0.4103, "step": 12592 }, { "epoch": 41.28852459016394, "grad_norm": 4.287540912628174, "learning_rate": 1.3247285423703322e-05, "loss": 0.3202, "step": 12593 }, { "epoch": 41.291803278688526, "grad_norm": 3.7153124809265137, "learning_rate": 1.3246281064974137e-05, "loss": 0.3049, "step": 12594 }, { "epoch": 41.295081967213115, "grad_norm": 4.98324728012085, "learning_rate": 1.324527666963976e-05, "loss": 0.5186, "step": 12595 }, { "epoch": 41.2983606557377, "grad_norm": 3.2676658630371094, "learning_rate": 1.324427223771151e-05, "loss": 0.4183, "step": 12596 }, { "epoch": 41.3016393442623, "grad_norm": 3.8470263481140137, "learning_rate": 1.3243267769200718e-05, "loss": 0.4335, "step": 12597 }, { "epoch": 41.30491803278689, "grad_norm": 5.01577615737915, "learning_rate": 1.3242263264118712e-05, "loss": 0.4808, "step": 12598 }, { "epoch": 41.308196721311475, "grad_norm": 4.372014045715332, "learning_rate": 1.3241258722476817e-05, "loss": 0.3678, "step": 12599 }, { "epoch": 41.31147540983606, "grad_norm": 3.7848424911499023, "learning_rate": 1.3240254144286357e-05, "loss": 0.3928, "step": 12600 }, { "epoch": 41.31475409836066, "grad_norm": 3.93343186378479, "learning_rate": 1.3239249529558664e-05, "loss": 0.4233, "step": 12601 }, { "epoch": 41.31803278688525, "grad_norm": 4.315226078033447, "learning_rate": 1.3238244878305065e-05, "loss": 0.3564, "step": 12602 }, { "epoch": 41.321311475409836, "grad_norm": 4.262903213500977, "learning_rate": 1.3237240190536887e-05, "loss": 0.2994, "step": 12603 }, { "epoch": 41.324590163934424, "grad_norm": 4.36416482925415, "learning_rate": 1.3236235466265459e-05, "loss": 0.4853, "step": 12604 }, { "epoch": 41.32786885245902, "grad_norm": 3.677884340286255, "learning_rate": 1.3235230705502114e-05, "loss": 0.5148, "step": 12605 }, { "epoch": 41.33114754098361, "grad_norm": 3.6383514404296875, "learning_rate": 1.3234225908258175e-05, "loss": 0.2968, "step": 12606 }, { "epoch": 41.334426229508196, "grad_norm": 3.5089964866638184, "learning_rate": 1.3233221074544982e-05, "loss": 0.4026, "step": 12607 }, { "epoch": 41.337704918032784, "grad_norm": 3.7858288288116455, "learning_rate": 1.3232216204373853e-05, "loss": 0.6063, "step": 12608 }, { "epoch": 41.34098360655738, "grad_norm": 3.984118700027466, "learning_rate": 1.323121129775613e-05, "loss": 0.4258, "step": 12609 }, { "epoch": 41.34426229508197, "grad_norm": 3.4880716800689697, "learning_rate": 1.3230206354703141e-05, "loss": 0.5303, "step": 12610 }, { "epoch": 41.34754098360656, "grad_norm": 3.3759615421295166, "learning_rate": 1.3229201375226212e-05, "loss": 0.2142, "step": 12611 }, { "epoch": 41.350819672131145, "grad_norm": 5.241086006164551, "learning_rate": 1.3228196359336684e-05, "loss": 0.3957, "step": 12612 }, { "epoch": 41.35409836065574, "grad_norm": 4.0271382331848145, "learning_rate": 1.3227191307045886e-05, "loss": 0.3137, "step": 12613 }, { "epoch": 41.35737704918033, "grad_norm": 4.352691650390625, "learning_rate": 1.3226186218365149e-05, "loss": 0.2545, "step": 12614 }, { "epoch": 41.36065573770492, "grad_norm": 3.540358304977417, "learning_rate": 1.3225181093305808e-05, "loss": 0.3026, "step": 12615 }, { "epoch": 41.363934426229505, "grad_norm": 3.509295701980591, "learning_rate": 1.3224175931879195e-05, "loss": 0.3448, "step": 12616 }, { "epoch": 41.3672131147541, "grad_norm": 4.189301490783691, "learning_rate": 1.3223170734096649e-05, "loss": 0.3268, "step": 12617 }, { "epoch": 41.37049180327869, "grad_norm": 3.51947021484375, "learning_rate": 1.32221654999695e-05, "loss": 0.3485, "step": 12618 }, { "epoch": 41.37377049180328, "grad_norm": 5.172952651977539, "learning_rate": 1.3221160229509083e-05, "loss": 0.502, "step": 12619 }, { "epoch": 41.377049180327866, "grad_norm": 4.077917575836182, "learning_rate": 1.3220154922726737e-05, "loss": 0.5182, "step": 12620 }, { "epoch": 41.38032786885246, "grad_norm": 3.9887747764587402, "learning_rate": 1.3219149579633796e-05, "loss": 0.3211, "step": 12621 }, { "epoch": 41.38360655737705, "grad_norm": 4.430909633636475, "learning_rate": 1.3218144200241597e-05, "loss": 0.52, "step": 12622 }, { "epoch": 41.38688524590164, "grad_norm": 3.9469974040985107, "learning_rate": 1.3217138784561479e-05, "loss": 0.3765, "step": 12623 }, { "epoch": 41.390163934426226, "grad_norm": 3.7117648124694824, "learning_rate": 1.3216133332604772e-05, "loss": 0.1708, "step": 12624 }, { "epoch": 41.39344262295082, "grad_norm": 4.358267307281494, "learning_rate": 1.321512784438282e-05, "loss": 0.546, "step": 12625 }, { "epoch": 41.39672131147541, "grad_norm": 4.150107383728027, "learning_rate": 1.3214122319906958e-05, "loss": 0.3742, "step": 12626 }, { "epoch": 41.4, "grad_norm": 4.324945449829102, "learning_rate": 1.3213116759188525e-05, "loss": 0.3401, "step": 12627 }, { "epoch": 41.40327868852459, "grad_norm": 3.622171640396118, "learning_rate": 1.3212111162238858e-05, "loss": 0.4043, "step": 12628 }, { "epoch": 41.40655737704918, "grad_norm": 3.815516233444214, "learning_rate": 1.3211105529069301e-05, "loss": 0.398, "step": 12629 }, { "epoch": 41.40983606557377, "grad_norm": 4.174256324768066, "learning_rate": 1.3210099859691189e-05, "loss": 0.6245, "step": 12630 }, { "epoch": 41.41311475409836, "grad_norm": 4.1725172996521, "learning_rate": 1.3209094154115863e-05, "loss": 0.4356, "step": 12631 }, { "epoch": 41.41639344262295, "grad_norm": 3.467686414718628, "learning_rate": 1.3208088412354663e-05, "loss": 0.4276, "step": 12632 }, { "epoch": 41.41967213114754, "grad_norm": 3.4838476181030273, "learning_rate": 1.3207082634418933e-05, "loss": 0.4337, "step": 12633 }, { "epoch": 41.42295081967213, "grad_norm": 4.139491081237793, "learning_rate": 1.3206076820320008e-05, "loss": 0.3685, "step": 12634 }, { "epoch": 41.42622950819672, "grad_norm": 3.8482062816619873, "learning_rate": 1.3205070970069238e-05, "loss": 0.2957, "step": 12635 }, { "epoch": 41.429508196721315, "grad_norm": 4.100925445556641, "learning_rate": 1.320406508367796e-05, "loss": 0.4136, "step": 12636 }, { "epoch": 41.4327868852459, "grad_norm": 3.5940961837768555, "learning_rate": 1.3203059161157512e-05, "loss": 0.2634, "step": 12637 }, { "epoch": 41.43606557377049, "grad_norm": 4.566371917724609, "learning_rate": 1.3202053202519247e-05, "loss": 0.3753, "step": 12638 }, { "epoch": 41.43934426229508, "grad_norm": 3.1797964572906494, "learning_rate": 1.3201047207774498e-05, "loss": 0.3785, "step": 12639 }, { "epoch": 41.442622950819676, "grad_norm": 3.4351305961608887, "learning_rate": 1.3200041176934616e-05, "loss": 0.2824, "step": 12640 }, { "epoch": 41.445901639344264, "grad_norm": 3.266541004180908, "learning_rate": 1.3199035110010943e-05, "loss": 0.3743, "step": 12641 }, { "epoch": 41.44918032786885, "grad_norm": 3.8605659008026123, "learning_rate": 1.3198029007014823e-05, "loss": 0.2947, "step": 12642 }, { "epoch": 41.45245901639344, "grad_norm": 3.925663948059082, "learning_rate": 1.3197022867957598e-05, "loss": 0.4997, "step": 12643 }, { "epoch": 41.455737704918036, "grad_norm": 3.4194767475128174, "learning_rate": 1.3196016692850617e-05, "loss": 0.2207, "step": 12644 }, { "epoch": 41.459016393442624, "grad_norm": 3.4266016483306885, "learning_rate": 1.3195010481705229e-05, "loss": 0.3561, "step": 12645 }, { "epoch": 41.46229508196721, "grad_norm": 4.162174224853516, "learning_rate": 1.3194004234532771e-05, "loss": 0.2957, "step": 12646 }, { "epoch": 41.4655737704918, "grad_norm": 3.592519760131836, "learning_rate": 1.3192997951344595e-05, "loss": 0.5141, "step": 12647 }, { "epoch": 41.4688524590164, "grad_norm": 3.433959722518921, "learning_rate": 1.3191991632152048e-05, "loss": 0.517, "step": 12648 }, { "epoch": 41.472131147540985, "grad_norm": 3.527275800704956, "learning_rate": 1.3190985276966479e-05, "loss": 0.4929, "step": 12649 }, { "epoch": 41.47540983606557, "grad_norm": 5.750242710113525, "learning_rate": 1.318997888579923e-05, "loss": 0.3491, "step": 12650 }, { "epoch": 41.47868852459016, "grad_norm": 3.4783389568328857, "learning_rate": 1.3188972458661655e-05, "loss": 0.3501, "step": 12651 }, { "epoch": 41.48196721311476, "grad_norm": 3.418292999267578, "learning_rate": 1.3187965995565098e-05, "loss": 0.3953, "step": 12652 }, { "epoch": 41.485245901639345, "grad_norm": 3.5939443111419678, "learning_rate": 1.318695949652091e-05, "loss": 0.3077, "step": 12653 }, { "epoch": 41.488524590163934, "grad_norm": 3.9915711879730225, "learning_rate": 1.3185952961540439e-05, "loss": 0.487, "step": 12654 }, { "epoch": 41.49180327868852, "grad_norm": 3.9640603065490723, "learning_rate": 1.3184946390635038e-05, "loss": 0.4619, "step": 12655 }, { "epoch": 41.49508196721312, "grad_norm": 5.179245948791504, "learning_rate": 1.3183939783816054e-05, "loss": 0.343, "step": 12656 }, { "epoch": 41.498360655737706, "grad_norm": 3.8353402614593506, "learning_rate": 1.3182933141094837e-05, "loss": 0.4565, "step": 12657 }, { "epoch": 41.501639344262294, "grad_norm": 4.027927398681641, "learning_rate": 1.318192646248274e-05, "loss": 0.2335, "step": 12658 }, { "epoch": 41.50491803278688, "grad_norm": 3.73388409614563, "learning_rate": 1.3180919747991116e-05, "loss": 0.2056, "step": 12659 }, { "epoch": 41.50819672131148, "grad_norm": 3.157909870147705, "learning_rate": 1.3179912997631313e-05, "loss": 0.3789, "step": 12660 }, { "epoch": 41.511475409836066, "grad_norm": 3.239386558532715, "learning_rate": 1.3178906211414684e-05, "loss": 0.244, "step": 12661 }, { "epoch": 41.514754098360655, "grad_norm": 3.9858169555664062, "learning_rate": 1.3177899389352584e-05, "loss": 0.1635, "step": 12662 }, { "epoch": 41.51803278688524, "grad_norm": 3.798576831817627, "learning_rate": 1.3176892531456363e-05, "loss": 0.2274, "step": 12663 }, { "epoch": 41.52131147540984, "grad_norm": 4.233630180358887, "learning_rate": 1.3175885637737375e-05, "loss": 0.451, "step": 12664 }, { "epoch": 41.52459016393443, "grad_norm": 3.657557249069214, "learning_rate": 1.3174878708206974e-05, "loss": 0.4584, "step": 12665 }, { "epoch": 41.527868852459015, "grad_norm": 4.114789962768555, "learning_rate": 1.3173871742876516e-05, "loss": 0.3075, "step": 12666 }, { "epoch": 41.5311475409836, "grad_norm": 5.1719183921813965, "learning_rate": 1.3172864741757354e-05, "loss": 0.3483, "step": 12667 }, { "epoch": 41.5344262295082, "grad_norm": 4.277644157409668, "learning_rate": 1.3171857704860845e-05, "loss": 0.4043, "step": 12668 }, { "epoch": 41.53770491803279, "grad_norm": 3.602529764175415, "learning_rate": 1.3170850632198337e-05, "loss": 0.5029, "step": 12669 }, { "epoch": 41.540983606557376, "grad_norm": 3.5799620151519775, "learning_rate": 1.3169843523781193e-05, "loss": 0.3161, "step": 12670 }, { "epoch": 41.544262295081964, "grad_norm": 4.209743022918701, "learning_rate": 1.316883637962077e-05, "loss": 0.4168, "step": 12671 }, { "epoch": 41.54754098360656, "grad_norm": 3.1455719470977783, "learning_rate": 1.3167829199728421e-05, "loss": 0.2402, "step": 12672 }, { "epoch": 41.55081967213115, "grad_norm": 4.060178279876709, "learning_rate": 1.3166821984115506e-05, "loss": 0.3601, "step": 12673 }, { "epoch": 41.554098360655736, "grad_norm": 4.1389031410217285, "learning_rate": 1.3165814732793377e-05, "loss": 0.4968, "step": 12674 }, { "epoch": 41.557377049180324, "grad_norm": 3.8854281902313232, "learning_rate": 1.3164807445773398e-05, "loss": 0.3991, "step": 12675 }, { "epoch": 41.56065573770492, "grad_norm": 4.182579517364502, "learning_rate": 1.316380012306692e-05, "loss": 0.578, "step": 12676 }, { "epoch": 41.56393442622951, "grad_norm": 3.5093343257904053, "learning_rate": 1.316279276468531e-05, "loss": 0.3984, "step": 12677 }, { "epoch": 41.5672131147541, "grad_norm": 4.646522521972656, "learning_rate": 1.3161785370639924e-05, "loss": 0.337, "step": 12678 }, { "epoch": 41.570491803278685, "grad_norm": 3.740460157394409, "learning_rate": 1.3160777940942118e-05, "loss": 0.2536, "step": 12679 }, { "epoch": 41.57377049180328, "grad_norm": 4.1060471534729, "learning_rate": 1.3159770475603256e-05, "loss": 0.3544, "step": 12680 }, { "epoch": 41.57704918032787, "grad_norm": 3.6475822925567627, "learning_rate": 1.3158762974634696e-05, "loss": 0.3233, "step": 12681 }, { "epoch": 41.58032786885246, "grad_norm": 3.512077569961548, "learning_rate": 1.3157755438047796e-05, "loss": 0.363, "step": 12682 }, { "epoch": 41.58360655737705, "grad_norm": 4.001697063446045, "learning_rate": 1.3156747865853925e-05, "loss": 0.2697, "step": 12683 }, { "epoch": 41.58688524590164, "grad_norm": 4.072268009185791, "learning_rate": 1.3155740258064438e-05, "loss": 0.4572, "step": 12684 }, { "epoch": 41.59016393442623, "grad_norm": 4.354911804199219, "learning_rate": 1.3154732614690699e-05, "loss": 0.2722, "step": 12685 }, { "epoch": 41.59344262295082, "grad_norm": 4.555116176605225, "learning_rate": 1.3153724935744068e-05, "loss": 0.463, "step": 12686 }, { "epoch": 41.59672131147541, "grad_norm": 3.507995367050171, "learning_rate": 1.3152717221235909e-05, "loss": 0.3217, "step": 12687 }, { "epoch": 41.6, "grad_norm": 4.099178314208984, "learning_rate": 1.3151709471177589e-05, "loss": 0.5784, "step": 12688 }, { "epoch": 41.60327868852459, "grad_norm": 3.89180588722229, "learning_rate": 1.3150701685580462e-05, "loss": 0.3126, "step": 12689 }, { "epoch": 41.60655737704918, "grad_norm": 4.177458763122559, "learning_rate": 1.3149693864455903e-05, "loss": 0.5791, "step": 12690 }, { "epoch": 41.609836065573774, "grad_norm": 3.8960282802581787, "learning_rate": 1.3148686007815268e-05, "loss": 0.2139, "step": 12691 }, { "epoch": 41.61311475409836, "grad_norm": 4.323160171508789, "learning_rate": 1.3147678115669926e-05, "loss": 0.3996, "step": 12692 }, { "epoch": 41.61639344262295, "grad_norm": 3.8601012229919434, "learning_rate": 1.3146670188031238e-05, "loss": 0.263, "step": 12693 }, { "epoch": 41.61967213114754, "grad_norm": 3.956151008605957, "learning_rate": 1.3145662224910576e-05, "loss": 0.2695, "step": 12694 }, { "epoch": 41.622950819672134, "grad_norm": 3.9753851890563965, "learning_rate": 1.3144654226319298e-05, "loss": 0.3084, "step": 12695 }, { "epoch": 41.62622950819672, "grad_norm": 3.7652523517608643, "learning_rate": 1.3143646192268776e-05, "loss": 0.3021, "step": 12696 }, { "epoch": 41.62950819672131, "grad_norm": 3.4609344005584717, "learning_rate": 1.3142638122770372e-05, "loss": 0.2772, "step": 12697 }, { "epoch": 41.6327868852459, "grad_norm": 4.222886085510254, "learning_rate": 1.3141630017835456e-05, "loss": 0.3959, "step": 12698 }, { "epoch": 41.636065573770495, "grad_norm": 3.197385311126709, "learning_rate": 1.31406218774754e-05, "loss": 0.3797, "step": 12699 }, { "epoch": 41.63934426229508, "grad_norm": 3.1233699321746826, "learning_rate": 1.3139613701701561e-05, "loss": 0.2759, "step": 12700 }, { "epoch": 41.64262295081967, "grad_norm": 3.74941086769104, "learning_rate": 1.3138605490525316e-05, "loss": 0.2678, "step": 12701 }, { "epoch": 41.64590163934426, "grad_norm": 3.2427682876586914, "learning_rate": 1.313759724395803e-05, "loss": 0.1264, "step": 12702 }, { "epoch": 41.649180327868855, "grad_norm": 4.490331172943115, "learning_rate": 1.3136588962011074e-05, "loss": 0.5454, "step": 12703 }, { "epoch": 41.65245901639344, "grad_norm": 3.915050506591797, "learning_rate": 1.3135580644695813e-05, "loss": 0.4593, "step": 12704 }, { "epoch": 41.65573770491803, "grad_norm": 3.8795464038848877, "learning_rate": 1.3134572292023624e-05, "loss": 0.4715, "step": 12705 }, { "epoch": 41.65901639344262, "grad_norm": 3.5515284538269043, "learning_rate": 1.313356390400587e-05, "loss": 0.3978, "step": 12706 }, { "epoch": 41.662295081967216, "grad_norm": 3.9393186569213867, "learning_rate": 1.3132555480653929e-05, "loss": 0.4044, "step": 12707 }, { "epoch": 41.665573770491804, "grad_norm": 4.066215515136719, "learning_rate": 1.3131547021979163e-05, "loss": 0.4493, "step": 12708 }, { "epoch": 41.66885245901639, "grad_norm": 3.791837453842163, "learning_rate": 1.3130538527992953e-05, "loss": 0.3487, "step": 12709 }, { "epoch": 41.67213114754098, "grad_norm": 4.575831413269043, "learning_rate": 1.3129529998706663e-05, "loss": 0.6107, "step": 12710 }, { "epoch": 41.675409836065576, "grad_norm": 3.3925747871398926, "learning_rate": 1.312852143413167e-05, "loss": 0.3145, "step": 12711 }, { "epoch": 41.678688524590164, "grad_norm": 4.3060736656188965, "learning_rate": 1.3127512834279344e-05, "loss": 0.605, "step": 12712 }, { "epoch": 41.68196721311475, "grad_norm": 2.8971517086029053, "learning_rate": 1.3126504199161061e-05, "loss": 0.1589, "step": 12713 }, { "epoch": 41.68524590163934, "grad_norm": 5.1218438148498535, "learning_rate": 1.312549552878819e-05, "loss": 0.6279, "step": 12714 }, { "epoch": 41.68852459016394, "grad_norm": 3.411909580230713, "learning_rate": 1.3124486823172107e-05, "loss": 0.3387, "step": 12715 }, { "epoch": 41.691803278688525, "grad_norm": 4.538538932800293, "learning_rate": 1.3123478082324188e-05, "loss": 0.3246, "step": 12716 }, { "epoch": 41.69508196721311, "grad_norm": 3.742938280105591, "learning_rate": 1.3122469306255804e-05, "loss": 0.5566, "step": 12717 }, { "epoch": 41.6983606557377, "grad_norm": 3.742202043533325, "learning_rate": 1.3121460494978335e-05, "loss": 0.4626, "step": 12718 }, { "epoch": 41.7016393442623, "grad_norm": 3.459533452987671, "learning_rate": 1.3120451648503151e-05, "loss": 0.4532, "step": 12719 }, { "epoch": 41.704918032786885, "grad_norm": 4.048403739929199, "learning_rate": 1.3119442766841633e-05, "loss": 0.4772, "step": 12720 }, { "epoch": 41.708196721311474, "grad_norm": 3.8501381874084473, "learning_rate": 1.311843385000515e-05, "loss": 0.3016, "step": 12721 }, { "epoch": 41.71147540983607, "grad_norm": 3.496077060699463, "learning_rate": 1.3117424898005086e-05, "loss": 0.3243, "step": 12722 }, { "epoch": 41.71475409836066, "grad_norm": 3.7023000717163086, "learning_rate": 1.3116415910852814e-05, "loss": 0.3672, "step": 12723 }, { "epoch": 41.718032786885246, "grad_norm": 5.22397518157959, "learning_rate": 1.3115406888559715e-05, "loss": 0.3966, "step": 12724 }, { "epoch": 41.721311475409834, "grad_norm": 3.9541354179382324, "learning_rate": 1.311439783113716e-05, "loss": 0.42, "step": 12725 }, { "epoch": 41.72459016393443, "grad_norm": 3.7723498344421387, "learning_rate": 1.311338873859653e-05, "loss": 0.3978, "step": 12726 }, { "epoch": 41.72786885245902, "grad_norm": 4.931521892547607, "learning_rate": 1.3112379610949211e-05, "loss": 0.43, "step": 12727 }, { "epoch": 41.731147540983606, "grad_norm": 4.397665500640869, "learning_rate": 1.311137044820657e-05, "loss": 0.4364, "step": 12728 }, { "epoch": 41.734426229508195, "grad_norm": 3.840791702270508, "learning_rate": 1.3110361250379997e-05, "loss": 0.2965, "step": 12729 }, { "epoch": 41.73770491803279, "grad_norm": 3.335463523864746, "learning_rate": 1.3109352017480863e-05, "loss": 0.3944, "step": 12730 }, { "epoch": 41.74098360655738, "grad_norm": 3.663343667984009, "learning_rate": 1.3108342749520555e-05, "loss": 0.4514, "step": 12731 }, { "epoch": 41.74426229508197, "grad_norm": 4.105724334716797, "learning_rate": 1.3107333446510448e-05, "loss": 0.3037, "step": 12732 }, { "epoch": 41.747540983606555, "grad_norm": 3.9000771045684814, "learning_rate": 1.310632410846193e-05, "loss": 0.3874, "step": 12733 }, { "epoch": 41.75081967213115, "grad_norm": 3.9889068603515625, "learning_rate": 1.3105314735386374e-05, "loss": 0.2737, "step": 12734 }, { "epoch": 41.75409836065574, "grad_norm": 4.1026611328125, "learning_rate": 1.3104305327295169e-05, "loss": 0.201, "step": 12735 }, { "epoch": 41.75737704918033, "grad_norm": 4.5787529945373535, "learning_rate": 1.3103295884199689e-05, "loss": 0.4673, "step": 12736 }, { "epoch": 41.760655737704916, "grad_norm": 3.7961411476135254, "learning_rate": 1.3102286406111324e-05, "loss": 0.3962, "step": 12737 }, { "epoch": 41.76393442622951, "grad_norm": 3.869682550430298, "learning_rate": 1.3101276893041455e-05, "loss": 0.2877, "step": 12738 }, { "epoch": 41.7672131147541, "grad_norm": 4.138937473297119, "learning_rate": 1.3100267345001463e-05, "loss": 0.4986, "step": 12739 }, { "epoch": 41.77049180327869, "grad_norm": 3.5913307666778564, "learning_rate": 1.3099257762002734e-05, "loss": 0.2762, "step": 12740 }, { "epoch": 41.773770491803276, "grad_norm": 5.336772441864014, "learning_rate": 1.309824814405665e-05, "loss": 0.304, "step": 12741 }, { "epoch": 41.77704918032787, "grad_norm": 4.841658592224121, "learning_rate": 1.30972384911746e-05, "loss": 0.6734, "step": 12742 }, { "epoch": 41.78032786885246, "grad_norm": 3.529489040374756, "learning_rate": 1.309622880336796e-05, "loss": 0.2401, "step": 12743 }, { "epoch": 41.78360655737705, "grad_norm": 3.8358607292175293, "learning_rate": 1.3095219080648128e-05, "loss": 0.2692, "step": 12744 }, { "epoch": 41.78688524590164, "grad_norm": 3.9687840938568115, "learning_rate": 1.3094209323026479e-05, "loss": 0.516, "step": 12745 }, { "epoch": 41.79016393442623, "grad_norm": 4.490931034088135, "learning_rate": 1.3093199530514401e-05, "loss": 0.6763, "step": 12746 }, { "epoch": 41.79344262295082, "grad_norm": 5.207051753997803, "learning_rate": 1.3092189703123284e-05, "loss": 0.4552, "step": 12747 }, { "epoch": 41.79672131147541, "grad_norm": 4.776829719543457, "learning_rate": 1.3091179840864515e-05, "loss": 0.297, "step": 12748 }, { "epoch": 41.8, "grad_norm": 4.675386905670166, "learning_rate": 1.3090169943749475e-05, "loss": 0.4159, "step": 12749 }, { "epoch": 41.80327868852459, "grad_norm": 4.2635016441345215, "learning_rate": 1.308916001178956e-05, "loss": 0.5724, "step": 12750 }, { "epoch": 41.80655737704918, "grad_norm": 4.94313907623291, "learning_rate": 1.3088150044996151e-05, "loss": 0.3328, "step": 12751 }, { "epoch": 41.80983606557377, "grad_norm": 4.564122200012207, "learning_rate": 1.308714004338064e-05, "loss": 0.4962, "step": 12752 }, { "epoch": 41.81311475409836, "grad_norm": 6.150789737701416, "learning_rate": 1.3086130006954417e-05, "loss": 0.3166, "step": 12753 }, { "epoch": 41.81639344262295, "grad_norm": 3.0625972747802734, "learning_rate": 1.3085119935728867e-05, "loss": 0.3996, "step": 12754 }, { "epoch": 41.81967213114754, "grad_norm": 4.008495330810547, "learning_rate": 1.3084109829715383e-05, "loss": 0.3021, "step": 12755 }, { "epoch": 41.82295081967213, "grad_norm": 3.7227835655212402, "learning_rate": 1.3083099688925353e-05, "loss": 0.4224, "step": 12756 }, { "epoch": 41.82622950819672, "grad_norm": 3.8714776039123535, "learning_rate": 1.308208951337017e-05, "loss": 0.2825, "step": 12757 }, { "epoch": 41.829508196721314, "grad_norm": 3.81547474861145, "learning_rate": 1.3081079303061224e-05, "loss": 0.3075, "step": 12758 }, { "epoch": 41.8327868852459, "grad_norm": 3.8885750770568848, "learning_rate": 1.3080069058009904e-05, "loss": 0.2902, "step": 12759 }, { "epoch": 41.83606557377049, "grad_norm": 4.699297904968262, "learning_rate": 1.3079058778227602e-05, "loss": 0.5019, "step": 12760 }, { "epoch": 41.83934426229508, "grad_norm": 3.874681234359741, "learning_rate": 1.3078048463725713e-05, "loss": 0.4006, "step": 12761 }, { "epoch": 41.842622950819674, "grad_norm": 4.001757621765137, "learning_rate": 1.3077038114515625e-05, "loss": 0.4897, "step": 12762 }, { "epoch": 41.84590163934426, "grad_norm": 4.80385684967041, "learning_rate": 1.3076027730608733e-05, "loss": 0.4253, "step": 12763 }, { "epoch": 41.84918032786885, "grad_norm": 3.8775434494018555, "learning_rate": 1.3075017312016432e-05, "loss": 0.31, "step": 12764 }, { "epoch": 41.85245901639344, "grad_norm": 4.031838417053223, "learning_rate": 1.3074006858750115e-05, "loss": 0.2389, "step": 12765 }, { "epoch": 41.855737704918035, "grad_norm": 4.4937357902526855, "learning_rate": 1.3072996370821172e-05, "loss": 0.4162, "step": 12766 }, { "epoch": 41.85901639344262, "grad_norm": 14.64975643157959, "learning_rate": 1.3071985848240998e-05, "loss": 0.4221, "step": 12767 }, { "epoch": 41.86229508196721, "grad_norm": 5.257238388061523, "learning_rate": 1.3070975291020994e-05, "loss": 0.4832, "step": 12768 }, { "epoch": 41.86557377049181, "grad_norm": 3.338228702545166, "learning_rate": 1.3069964699172547e-05, "loss": 0.312, "step": 12769 }, { "epoch": 41.868852459016395, "grad_norm": 4.242710590362549, "learning_rate": 1.3068954072707056e-05, "loss": 0.3041, "step": 12770 }, { "epoch": 41.87213114754098, "grad_norm": 4.67343282699585, "learning_rate": 1.3067943411635917e-05, "loss": 0.3719, "step": 12771 }, { "epoch": 41.87540983606557, "grad_norm": 4.149789333343506, "learning_rate": 1.3066932715970529e-05, "loss": 0.4275, "step": 12772 }, { "epoch": 41.87868852459017, "grad_norm": 4.118353366851807, "learning_rate": 1.3065921985722284e-05, "loss": 0.2818, "step": 12773 }, { "epoch": 41.881967213114756, "grad_norm": 3.9679617881774902, "learning_rate": 1.306491122090258e-05, "loss": 0.3783, "step": 12774 }, { "epoch": 41.885245901639344, "grad_norm": 4.20456075668335, "learning_rate": 1.3063900421522814e-05, "loss": 0.4633, "step": 12775 }, { "epoch": 41.88852459016393, "grad_norm": 3.748655080795288, "learning_rate": 1.3062889587594389e-05, "loss": 0.3773, "step": 12776 }, { "epoch": 41.89180327868853, "grad_norm": 4.4087138175964355, "learning_rate": 1.3061878719128698e-05, "loss": 0.5079, "step": 12777 }, { "epoch": 41.895081967213116, "grad_norm": 4.430253505706787, "learning_rate": 1.306086781613714e-05, "loss": 0.3109, "step": 12778 }, { "epoch": 41.898360655737704, "grad_norm": 3.4306085109710693, "learning_rate": 1.3059856878631115e-05, "loss": 0.3232, "step": 12779 }, { "epoch": 41.90163934426229, "grad_norm": 4.747580051422119, "learning_rate": 1.3058845906622023e-05, "loss": 0.3193, "step": 12780 }, { "epoch": 41.90491803278689, "grad_norm": 3.9673779010772705, "learning_rate": 1.3057834900121261e-05, "loss": 0.3251, "step": 12781 }, { "epoch": 41.90819672131148, "grad_norm": 4.651188850402832, "learning_rate": 1.3056823859140231e-05, "loss": 0.2863, "step": 12782 }, { "epoch": 41.911475409836065, "grad_norm": 4.265894889831543, "learning_rate": 1.3055812783690339e-05, "loss": 0.3607, "step": 12783 }, { "epoch": 41.91475409836065, "grad_norm": 3.8545007705688477, "learning_rate": 1.3054801673782973e-05, "loss": 0.3784, "step": 12784 }, { "epoch": 41.91803278688525, "grad_norm": 4.245827674865723, "learning_rate": 1.305379052942955e-05, "loss": 0.3626, "step": 12785 }, { "epoch": 41.92131147540984, "grad_norm": 4.568040370941162, "learning_rate": 1.3052779350641456e-05, "loss": 0.3898, "step": 12786 }, { "epoch": 41.924590163934425, "grad_norm": 3.792283773422241, "learning_rate": 1.3051768137430105e-05, "loss": 0.3279, "step": 12787 }, { "epoch": 41.927868852459014, "grad_norm": 4.084782123565674, "learning_rate": 1.3050756889806895e-05, "loss": 0.4041, "step": 12788 }, { "epoch": 41.93114754098361, "grad_norm": 3.96832275390625, "learning_rate": 1.3049745607783229e-05, "loss": 0.3044, "step": 12789 }, { "epoch": 41.9344262295082, "grad_norm": 6.044750690460205, "learning_rate": 1.3048734291370508e-05, "loss": 0.4214, "step": 12790 }, { "epoch": 41.937704918032786, "grad_norm": 4.03395938873291, "learning_rate": 1.304772294058014e-05, "loss": 0.2843, "step": 12791 }, { "epoch": 41.940983606557374, "grad_norm": 4.186558723449707, "learning_rate": 1.3046711555423529e-05, "loss": 0.4561, "step": 12792 }, { "epoch": 41.94426229508197, "grad_norm": 3.120413303375244, "learning_rate": 1.3045700135912074e-05, "loss": 0.1449, "step": 12793 }, { "epoch": 41.94754098360656, "grad_norm": 3.8979599475860596, "learning_rate": 1.3044688682057185e-05, "loss": 0.3539, "step": 12794 }, { "epoch": 41.950819672131146, "grad_norm": 3.941432476043701, "learning_rate": 1.3043677193870265e-05, "loss": 0.4733, "step": 12795 }, { "epoch": 41.954098360655735, "grad_norm": 3.8602142333984375, "learning_rate": 1.304266567136272e-05, "loss": 0.4149, "step": 12796 }, { "epoch": 41.95737704918033, "grad_norm": 3.598196268081665, "learning_rate": 1.3041654114545956e-05, "loss": 0.2532, "step": 12797 }, { "epoch": 41.96065573770492, "grad_norm": 3.4633052349090576, "learning_rate": 1.3040642523431379e-05, "loss": 0.5225, "step": 12798 }, { "epoch": 41.96393442622951, "grad_norm": 6.9096198081970215, "learning_rate": 1.3039630898030395e-05, "loss": 0.3067, "step": 12799 }, { "epoch": 41.967213114754095, "grad_norm": 4.538464546203613, "learning_rate": 1.3038619238354415e-05, "loss": 0.3532, "step": 12800 }, { "epoch": 41.97049180327869, "grad_norm": 4.793227672576904, "learning_rate": 1.3037607544414841e-05, "loss": 0.5905, "step": 12801 }, { "epoch": 41.97377049180328, "grad_norm": 4.87512731552124, "learning_rate": 1.3036595816223086e-05, "loss": 0.4051, "step": 12802 }, { "epoch": 41.97704918032787, "grad_norm": 3.6470935344696045, "learning_rate": 1.3035584053790553e-05, "loss": 0.2665, "step": 12803 }, { "epoch": 41.980327868852456, "grad_norm": 3.6702208518981934, "learning_rate": 1.3034572257128658e-05, "loss": 0.3906, "step": 12804 }, { "epoch": 41.98360655737705, "grad_norm": 4.416677951812744, "learning_rate": 1.3033560426248801e-05, "loss": 0.4921, "step": 12805 }, { "epoch": 41.98688524590164, "grad_norm": 3.996227264404297, "learning_rate": 1.30325485611624e-05, "loss": 0.226, "step": 12806 }, { "epoch": 41.99016393442623, "grad_norm": 4.312361717224121, "learning_rate": 1.3031536661880856e-05, "loss": 0.3564, "step": 12807 }, { "epoch": 41.993442622950816, "grad_norm": 3.738535165786743, "learning_rate": 1.3030524728415586e-05, "loss": 0.3535, "step": 12808 }, { "epoch": 41.99672131147541, "grad_norm": 4.205253601074219, "learning_rate": 1.3029512760778e-05, "loss": 0.4377, "step": 12809 }, { "epoch": 42.0, "grad_norm": 4.417876243591309, "learning_rate": 1.3028500758979507e-05, "loss": 0.3922, "step": 12810 }, { "epoch": 42.00327868852459, "grad_norm": 4.354646682739258, "learning_rate": 1.3027488723031522e-05, "loss": 0.2865, "step": 12811 }, { "epoch": 42.006557377049184, "grad_norm": 4.001320838928223, "learning_rate": 1.302647665294545e-05, "loss": 0.3179, "step": 12812 }, { "epoch": 42.00983606557377, "grad_norm": 4.081614017486572, "learning_rate": 1.302546454873271e-05, "loss": 0.3885, "step": 12813 }, { "epoch": 42.01311475409836, "grad_norm": 3.734816789627075, "learning_rate": 1.302445241040471e-05, "loss": 0.2883, "step": 12814 }, { "epoch": 42.01639344262295, "grad_norm": 4.248630046844482, "learning_rate": 1.3023440237972864e-05, "loss": 0.429, "step": 12815 }, { "epoch": 42.019672131147544, "grad_norm": 4.064298152923584, "learning_rate": 1.3022428031448586e-05, "loss": 0.5587, "step": 12816 }, { "epoch": 42.02295081967213, "grad_norm": 4.6266913414001465, "learning_rate": 1.3021415790843288e-05, "loss": 0.4458, "step": 12817 }, { "epoch": 42.02622950819672, "grad_norm": 3.5824034214019775, "learning_rate": 1.302040351616839e-05, "loss": 0.4125, "step": 12818 }, { "epoch": 42.02950819672131, "grad_norm": 4.062141418457031, "learning_rate": 1.3019391207435297e-05, "loss": 0.4412, "step": 12819 }, { "epoch": 42.032786885245905, "grad_norm": 3.741809129714966, "learning_rate": 1.3018378864655433e-05, "loss": 0.2728, "step": 12820 }, { "epoch": 42.03606557377049, "grad_norm": 3.766035556793213, "learning_rate": 1.3017366487840203e-05, "loss": 0.2569, "step": 12821 }, { "epoch": 42.03934426229508, "grad_norm": 3.4825971126556396, "learning_rate": 1.3016354077001035e-05, "loss": 0.3409, "step": 12822 }, { "epoch": 42.04262295081967, "grad_norm": 3.5337281227111816, "learning_rate": 1.3015341632149334e-05, "loss": 0.258, "step": 12823 }, { "epoch": 42.045901639344265, "grad_norm": 4.931499481201172, "learning_rate": 1.3014329153296524e-05, "loss": 0.487, "step": 12824 }, { "epoch": 42.049180327868854, "grad_norm": 3.8614840507507324, "learning_rate": 1.3013316640454017e-05, "loss": 0.1945, "step": 12825 }, { "epoch": 42.05245901639344, "grad_norm": 4.874279022216797, "learning_rate": 1.3012304093633233e-05, "loss": 0.3485, "step": 12826 }, { "epoch": 42.05573770491803, "grad_norm": 4.0119218826293945, "learning_rate": 1.3011291512845587e-05, "loss": 0.5555, "step": 12827 }, { "epoch": 42.059016393442626, "grad_norm": 3.967972755432129, "learning_rate": 1.30102788981025e-05, "loss": 0.5576, "step": 12828 }, { "epoch": 42.062295081967214, "grad_norm": 5.264907360076904, "learning_rate": 1.3009266249415387e-05, "loss": 0.44, "step": 12829 }, { "epoch": 42.0655737704918, "grad_norm": 5.818606853485107, "learning_rate": 1.3008253566795668e-05, "loss": 0.2959, "step": 12830 }, { "epoch": 42.06885245901639, "grad_norm": 3.5998716354370117, "learning_rate": 1.3007240850254762e-05, "loss": 0.3925, "step": 12831 }, { "epoch": 42.072131147540986, "grad_norm": 4.081370830535889, "learning_rate": 1.300622809980409e-05, "loss": 0.4311, "step": 12832 }, { "epoch": 42.075409836065575, "grad_norm": 6.17828369140625, "learning_rate": 1.300521531545507e-05, "loss": 0.4369, "step": 12833 }, { "epoch": 42.07868852459016, "grad_norm": 3.69644832611084, "learning_rate": 1.300420249721912e-05, "loss": 0.2273, "step": 12834 }, { "epoch": 42.08196721311475, "grad_norm": 3.87632155418396, "learning_rate": 1.3003189645107668e-05, "loss": 0.447, "step": 12835 }, { "epoch": 42.08524590163935, "grad_norm": 4.977766990661621, "learning_rate": 1.3002176759132125e-05, "loss": 0.2642, "step": 12836 }, { "epoch": 42.088524590163935, "grad_norm": 4.273942470550537, "learning_rate": 1.3001163839303923e-05, "loss": 0.4631, "step": 12837 }, { "epoch": 42.09180327868852, "grad_norm": 3.7492005825042725, "learning_rate": 1.3000150885634474e-05, "loss": 0.2195, "step": 12838 }, { "epoch": 42.09508196721311, "grad_norm": 3.485304594039917, "learning_rate": 1.2999137898135207e-05, "loss": 0.2339, "step": 12839 }, { "epoch": 42.09836065573771, "grad_norm": 4.4683380126953125, "learning_rate": 1.299812487681754e-05, "loss": 0.4414, "step": 12840 }, { "epoch": 42.101639344262296, "grad_norm": 3.111454486846924, "learning_rate": 1.2997111821692901e-05, "loss": 0.2205, "step": 12841 }, { "epoch": 42.104918032786884, "grad_norm": 3.9034039974212646, "learning_rate": 1.299609873277271e-05, "loss": 0.2446, "step": 12842 }, { "epoch": 42.10819672131147, "grad_norm": 3.814872980117798, "learning_rate": 1.2995085610068385e-05, "loss": 0.3275, "step": 12843 }, { "epoch": 42.11147540983607, "grad_norm": 4.460371494293213, "learning_rate": 1.2994072453591363e-05, "loss": 0.4167, "step": 12844 }, { "epoch": 42.114754098360656, "grad_norm": 3.6008527278900146, "learning_rate": 1.2993059263353054e-05, "loss": 0.24, "step": 12845 }, { "epoch": 42.118032786885244, "grad_norm": 3.8115506172180176, "learning_rate": 1.2992046039364893e-05, "loss": 0.2276, "step": 12846 }, { "epoch": 42.12131147540983, "grad_norm": 4.468141555786133, "learning_rate": 1.2991032781638301e-05, "loss": 0.4623, "step": 12847 }, { "epoch": 42.12459016393443, "grad_norm": 4.418057441711426, "learning_rate": 1.2990019490184707e-05, "loss": 0.5618, "step": 12848 }, { "epoch": 42.12786885245902, "grad_norm": 4.111147403717041, "learning_rate": 1.2989006165015532e-05, "loss": 0.5029, "step": 12849 }, { "epoch": 42.131147540983605, "grad_norm": 4.859105110168457, "learning_rate": 1.2987992806142206e-05, "loss": 0.5091, "step": 12850 }, { "epoch": 42.13442622950819, "grad_norm": 3.2762763500213623, "learning_rate": 1.2986979413576153e-05, "loss": 0.2526, "step": 12851 }, { "epoch": 42.13770491803279, "grad_norm": 3.611483335494995, "learning_rate": 1.2985965987328803e-05, "loss": 0.3451, "step": 12852 }, { "epoch": 42.14098360655738, "grad_norm": 3.701042890548706, "learning_rate": 1.298495252741158e-05, "loss": 0.5154, "step": 12853 }, { "epoch": 42.144262295081965, "grad_norm": 3.709442377090454, "learning_rate": 1.2983939033835916e-05, "loss": 0.4345, "step": 12854 }, { "epoch": 42.14754098360656, "grad_norm": 4.437018871307373, "learning_rate": 1.2982925506613237e-05, "loss": 0.3328, "step": 12855 }, { "epoch": 42.15081967213115, "grad_norm": 4.150411605834961, "learning_rate": 1.298191194575497e-05, "loss": 0.2609, "step": 12856 }, { "epoch": 42.15409836065574, "grad_norm": 4.5409255027771, "learning_rate": 1.2980898351272547e-05, "loss": 0.3689, "step": 12857 }, { "epoch": 42.157377049180326, "grad_norm": 3.7506496906280518, "learning_rate": 1.2979884723177391e-05, "loss": 0.4085, "step": 12858 }, { "epoch": 42.16065573770492, "grad_norm": 3.8587262630462646, "learning_rate": 1.297887106148094e-05, "loss": 0.3582, "step": 12859 }, { "epoch": 42.16393442622951, "grad_norm": 4.376630783081055, "learning_rate": 1.2977857366194622e-05, "loss": 0.3823, "step": 12860 }, { "epoch": 42.1672131147541, "grad_norm": 3.7781293392181396, "learning_rate": 1.2976843637329863e-05, "loss": 0.2433, "step": 12861 }, { "epoch": 42.170491803278686, "grad_norm": 3.9507195949554443, "learning_rate": 1.2975829874898096e-05, "loss": 0.5037, "step": 12862 }, { "epoch": 42.17377049180328, "grad_norm": 3.9194600582122803, "learning_rate": 1.2974816078910757e-05, "loss": 0.4255, "step": 12863 }, { "epoch": 42.17704918032787, "grad_norm": 3.716665029525757, "learning_rate": 1.2973802249379271e-05, "loss": 0.248, "step": 12864 }, { "epoch": 42.18032786885246, "grad_norm": 3.9750101566314697, "learning_rate": 1.2972788386315074e-05, "loss": 0.3649, "step": 12865 }, { "epoch": 42.18360655737705, "grad_norm": 3.6923274993896484, "learning_rate": 1.2971774489729595e-05, "loss": 0.2903, "step": 12866 }, { "epoch": 42.18688524590164, "grad_norm": 4.510326862335205, "learning_rate": 1.2970760559634272e-05, "loss": 0.4109, "step": 12867 }, { "epoch": 42.19016393442623, "grad_norm": 3.156078577041626, "learning_rate": 1.2969746596040534e-05, "loss": 0.4767, "step": 12868 }, { "epoch": 42.19344262295082, "grad_norm": 4.980188369750977, "learning_rate": 1.2968732598959815e-05, "loss": 0.2985, "step": 12869 }, { "epoch": 42.19672131147541, "grad_norm": 3.376190185546875, "learning_rate": 1.2967718568403548e-05, "loss": 0.6354, "step": 12870 }, { "epoch": 42.2, "grad_norm": 3.9797911643981934, "learning_rate": 1.296670450438317e-05, "loss": 0.4252, "step": 12871 }, { "epoch": 42.20327868852459, "grad_norm": 3.338543176651001, "learning_rate": 1.2965690406910114e-05, "loss": 0.1916, "step": 12872 }, { "epoch": 42.20655737704918, "grad_norm": 3.8605992794036865, "learning_rate": 1.2964676275995814e-05, "loss": 0.4486, "step": 12873 }, { "epoch": 42.20983606557377, "grad_norm": 3.8244435787200928, "learning_rate": 1.2963662111651708e-05, "loss": 0.3878, "step": 12874 }, { "epoch": 42.21311475409836, "grad_norm": 5.011865139007568, "learning_rate": 1.2962647913889228e-05, "loss": 0.3766, "step": 12875 }, { "epoch": 42.21639344262295, "grad_norm": 3.3233890533447266, "learning_rate": 1.2961633682719814e-05, "loss": 0.1781, "step": 12876 }, { "epoch": 42.21967213114754, "grad_norm": 4.115860939025879, "learning_rate": 1.29606194181549e-05, "loss": 0.2069, "step": 12877 }, { "epoch": 42.22295081967213, "grad_norm": 4.1310343742370605, "learning_rate": 1.2959605120205924e-05, "loss": 0.501, "step": 12878 }, { "epoch": 42.226229508196724, "grad_norm": 5.378129482269287, "learning_rate": 1.2958590788884324e-05, "loss": 0.3043, "step": 12879 }, { "epoch": 42.22950819672131, "grad_norm": 5.679165840148926, "learning_rate": 1.2957576424201537e-05, "loss": 0.5496, "step": 12880 }, { "epoch": 42.2327868852459, "grad_norm": 3.258758544921875, "learning_rate": 1.2956562026169e-05, "loss": 0.1676, "step": 12881 }, { "epoch": 42.23606557377049, "grad_norm": 4.449280738830566, "learning_rate": 1.2955547594798152e-05, "loss": 0.4671, "step": 12882 }, { "epoch": 42.239344262295084, "grad_norm": 4.03128719329834, "learning_rate": 1.2954533130100433e-05, "loss": 0.2919, "step": 12883 }, { "epoch": 42.24262295081967, "grad_norm": 2.892543077468872, "learning_rate": 1.2953518632087278e-05, "loss": 0.3165, "step": 12884 }, { "epoch": 42.24590163934426, "grad_norm": 3.575312852859497, "learning_rate": 1.2952504100770132e-05, "loss": 0.3192, "step": 12885 }, { "epoch": 42.24918032786885, "grad_norm": 3.791416883468628, "learning_rate": 1.295148953616043e-05, "loss": 0.234, "step": 12886 }, { "epoch": 42.252459016393445, "grad_norm": 4.5081963539123535, "learning_rate": 1.2950474938269615e-05, "loss": 0.3715, "step": 12887 }, { "epoch": 42.25573770491803, "grad_norm": 4.188794136047363, "learning_rate": 1.2949460307109131e-05, "loss": 0.3903, "step": 12888 }, { "epoch": 42.25901639344262, "grad_norm": 2.990811586380005, "learning_rate": 1.2948445642690412e-05, "loss": 0.3292, "step": 12889 }, { "epoch": 42.26229508196721, "grad_norm": 4.146227836608887, "learning_rate": 1.2947430945024904e-05, "loss": 0.2724, "step": 12890 }, { "epoch": 42.265573770491805, "grad_norm": 4.970131874084473, "learning_rate": 1.2946416214124046e-05, "loss": 0.5053, "step": 12891 }, { "epoch": 42.268852459016394, "grad_norm": 3.899850368499756, "learning_rate": 1.2945401449999285e-05, "loss": 0.4765, "step": 12892 }, { "epoch": 42.27213114754098, "grad_norm": 4.135365962982178, "learning_rate": 1.2944386652662058e-05, "loss": 0.4924, "step": 12893 }, { "epoch": 42.27540983606557, "grad_norm": 4.049765586853027, "learning_rate": 1.2943371822123812e-05, "loss": 0.4571, "step": 12894 }, { "epoch": 42.278688524590166, "grad_norm": 4.405578136444092, "learning_rate": 1.2942356958395985e-05, "loss": 0.4069, "step": 12895 }, { "epoch": 42.281967213114754, "grad_norm": 4.022401332855225, "learning_rate": 1.2941342061490027e-05, "loss": 0.4673, "step": 12896 }, { "epoch": 42.28524590163934, "grad_norm": 3.2365176677703857, "learning_rate": 1.2940327131417378e-05, "loss": 0.3611, "step": 12897 }, { "epoch": 42.28852459016394, "grad_norm": 3.875764846801758, "learning_rate": 1.2939312168189484e-05, "loss": 0.3157, "step": 12898 }, { "epoch": 42.291803278688526, "grad_norm": 3.9220900535583496, "learning_rate": 1.2938297171817787e-05, "loss": 0.2516, "step": 12899 }, { "epoch": 42.295081967213115, "grad_norm": 3.80879282951355, "learning_rate": 1.2937282142313736e-05, "loss": 0.3411, "step": 12900 }, { "epoch": 42.2983606557377, "grad_norm": 3.737436532974243, "learning_rate": 1.2936267079688773e-05, "loss": 0.2796, "step": 12901 }, { "epoch": 42.3016393442623, "grad_norm": 3.7378623485565186, "learning_rate": 1.2935251983954348e-05, "loss": 0.7437, "step": 12902 }, { "epoch": 42.30491803278689, "grad_norm": 3.6918535232543945, "learning_rate": 1.2934236855121904e-05, "loss": 0.1736, "step": 12903 }, { "epoch": 42.308196721311475, "grad_norm": 3.501218557357788, "learning_rate": 1.2933221693202891e-05, "loss": 0.2828, "step": 12904 }, { "epoch": 42.31147540983606, "grad_norm": 3.899730682373047, "learning_rate": 1.293220649820875e-05, "loss": 0.3482, "step": 12905 }, { "epoch": 42.31475409836066, "grad_norm": 3.75028920173645, "learning_rate": 1.2931191270150936e-05, "loss": 0.4492, "step": 12906 }, { "epoch": 42.31803278688525, "grad_norm": 4.103923797607422, "learning_rate": 1.293017600904089e-05, "loss": 0.3708, "step": 12907 }, { "epoch": 42.321311475409836, "grad_norm": 3.6723248958587646, "learning_rate": 1.2929160714890063e-05, "loss": 0.2745, "step": 12908 }, { "epoch": 42.324590163934424, "grad_norm": 3.5356557369232178, "learning_rate": 1.2928145387709905e-05, "loss": 0.2877, "step": 12909 }, { "epoch": 42.32786885245902, "grad_norm": 3.823214292526245, "learning_rate": 1.2927130027511861e-05, "loss": 0.3195, "step": 12910 }, { "epoch": 42.33114754098361, "grad_norm": 3.9348061084747314, "learning_rate": 1.2926114634307383e-05, "loss": 0.2671, "step": 12911 }, { "epoch": 42.334426229508196, "grad_norm": 4.323928356170654, "learning_rate": 1.2925099208107921e-05, "loss": 0.2937, "step": 12912 }, { "epoch": 42.337704918032784, "grad_norm": 3.258118152618408, "learning_rate": 1.292408374892492e-05, "loss": 0.1758, "step": 12913 }, { "epoch": 42.34098360655738, "grad_norm": 3.856006145477295, "learning_rate": 1.292306825676984e-05, "loss": 0.4631, "step": 12914 }, { "epoch": 42.34426229508197, "grad_norm": 3.2220940589904785, "learning_rate": 1.2922052731654122e-05, "loss": 0.2109, "step": 12915 }, { "epoch": 42.34754098360656, "grad_norm": 4.33012580871582, "learning_rate": 1.2921037173589223e-05, "loss": 0.3527, "step": 12916 }, { "epoch": 42.350819672131145, "grad_norm": 3.5788283348083496, "learning_rate": 1.2920021582586595e-05, "loss": 0.3279, "step": 12917 }, { "epoch": 42.35409836065574, "grad_norm": 3.643644094467163, "learning_rate": 1.2919005958657685e-05, "loss": 0.329, "step": 12918 }, { "epoch": 42.35737704918033, "grad_norm": 3.8538172245025635, "learning_rate": 1.291799030181395e-05, "loss": 0.3717, "step": 12919 }, { "epoch": 42.36065573770492, "grad_norm": 3.4426870346069336, "learning_rate": 1.2916974612066838e-05, "loss": 0.571, "step": 12920 }, { "epoch": 42.363934426229505, "grad_norm": 3.164005994796753, "learning_rate": 1.2915958889427805e-05, "loss": 0.4242, "step": 12921 }, { "epoch": 42.3672131147541, "grad_norm": 3.794079303741455, "learning_rate": 1.2914943133908304e-05, "loss": 0.4104, "step": 12922 }, { "epoch": 42.37049180327869, "grad_norm": 3.6134731769561768, "learning_rate": 1.2913927345519788e-05, "loss": 0.2353, "step": 12923 }, { "epoch": 42.37377049180328, "grad_norm": 4.183126449584961, "learning_rate": 1.2912911524273714e-05, "loss": 0.3433, "step": 12924 }, { "epoch": 42.377049180327866, "grad_norm": 4.38346004486084, "learning_rate": 1.2911895670181532e-05, "loss": 0.2658, "step": 12925 }, { "epoch": 42.38032786885246, "grad_norm": 3.7332887649536133, "learning_rate": 1.2910879783254698e-05, "loss": 0.3615, "step": 12926 }, { "epoch": 42.38360655737705, "grad_norm": 4.175471305847168, "learning_rate": 1.2909863863504665e-05, "loss": 0.2991, "step": 12927 }, { "epoch": 42.38688524590164, "grad_norm": 4.31395959854126, "learning_rate": 1.2908847910942899e-05, "loss": 0.5012, "step": 12928 }, { "epoch": 42.390163934426226, "grad_norm": 5.166927337646484, "learning_rate": 1.290783192558084e-05, "loss": 0.2389, "step": 12929 }, { "epoch": 42.39344262295082, "grad_norm": 5.195766448974609, "learning_rate": 1.290681590742996e-05, "loss": 0.4649, "step": 12930 }, { "epoch": 42.39672131147541, "grad_norm": 4.718575954437256, "learning_rate": 1.2905799856501703e-05, "loss": 0.2983, "step": 12931 }, { "epoch": 42.4, "grad_norm": 3.708782196044922, "learning_rate": 1.2904783772807534e-05, "loss": 0.4405, "step": 12932 }, { "epoch": 42.40327868852459, "grad_norm": 3.130518674850464, "learning_rate": 1.290376765635891e-05, "loss": 0.1496, "step": 12933 }, { "epoch": 42.40655737704918, "grad_norm": 3.591167449951172, "learning_rate": 1.2902751507167281e-05, "loss": 0.2499, "step": 12934 }, { "epoch": 42.40983606557377, "grad_norm": 4.097297668457031, "learning_rate": 1.2901735325244116e-05, "loss": 0.285, "step": 12935 }, { "epoch": 42.41311475409836, "grad_norm": 3.874591827392578, "learning_rate": 1.2900719110600863e-05, "loss": 0.3196, "step": 12936 }, { "epoch": 42.41639344262295, "grad_norm": 4.040876865386963, "learning_rate": 1.2899702863248992e-05, "loss": 0.3529, "step": 12937 }, { "epoch": 42.41967213114754, "grad_norm": 4.066978454589844, "learning_rate": 1.289868658319995e-05, "loss": 0.2942, "step": 12938 }, { "epoch": 42.42295081967213, "grad_norm": 4.242700576782227, "learning_rate": 1.2897670270465206e-05, "loss": 0.4222, "step": 12939 }, { "epoch": 42.42622950819672, "grad_norm": 3.993197202682495, "learning_rate": 1.2896653925056218e-05, "loss": 0.326, "step": 12940 }, { "epoch": 42.429508196721315, "grad_norm": 3.4286930561065674, "learning_rate": 1.2895637546984441e-05, "loss": 0.3215, "step": 12941 }, { "epoch": 42.4327868852459, "grad_norm": 3.744795322418213, "learning_rate": 1.2894621136261342e-05, "loss": 0.3265, "step": 12942 }, { "epoch": 42.43606557377049, "grad_norm": 4.6797308921813965, "learning_rate": 1.2893604692898381e-05, "loss": 0.4177, "step": 12943 }, { "epoch": 42.43934426229508, "grad_norm": 4.1529316902160645, "learning_rate": 1.2892588216907018e-05, "loss": 0.2336, "step": 12944 }, { "epoch": 42.442622950819676, "grad_norm": 3.954451322555542, "learning_rate": 1.2891571708298716e-05, "loss": 0.4163, "step": 12945 }, { "epoch": 42.445901639344264, "grad_norm": 3.3449044227600098, "learning_rate": 1.2890555167084937e-05, "loss": 0.3321, "step": 12946 }, { "epoch": 42.44918032786885, "grad_norm": 3.5232925415039062, "learning_rate": 1.2889538593277143e-05, "loss": 0.4694, "step": 12947 }, { "epoch": 42.45245901639344, "grad_norm": 3.568291664123535, "learning_rate": 1.2888521986886797e-05, "loss": 0.3932, "step": 12948 }, { "epoch": 42.455737704918036, "grad_norm": 3.2712934017181396, "learning_rate": 1.288750534792536e-05, "loss": 0.4331, "step": 12949 }, { "epoch": 42.459016393442624, "grad_norm": 3.911719560623169, "learning_rate": 1.2886488676404301e-05, "loss": 0.2829, "step": 12950 }, { "epoch": 42.46229508196721, "grad_norm": 3.7252135276794434, "learning_rate": 1.2885471972335078e-05, "loss": 0.2671, "step": 12951 }, { "epoch": 42.4655737704918, "grad_norm": 4.095282077789307, "learning_rate": 1.2884455235729161e-05, "loss": 0.4132, "step": 12952 }, { "epoch": 42.4688524590164, "grad_norm": 3.7755701541900635, "learning_rate": 1.2883438466598008e-05, "loss": 0.4101, "step": 12953 }, { "epoch": 42.472131147540985, "grad_norm": 4.337081432342529, "learning_rate": 1.2882421664953093e-05, "loss": 0.551, "step": 12954 }, { "epoch": 42.47540983606557, "grad_norm": 3.3596510887145996, "learning_rate": 1.2881404830805876e-05, "loss": 0.5283, "step": 12955 }, { "epoch": 42.47868852459016, "grad_norm": 3.169924259185791, "learning_rate": 1.2880387964167821e-05, "loss": 0.4014, "step": 12956 }, { "epoch": 42.48196721311476, "grad_norm": 3.4883265495300293, "learning_rate": 1.2879371065050399e-05, "loss": 0.3963, "step": 12957 }, { "epoch": 42.485245901639345, "grad_norm": 4.576744556427002, "learning_rate": 1.2878354133465073e-05, "loss": 0.4457, "step": 12958 }, { "epoch": 42.488524590163934, "grad_norm": 4.166755199432373, "learning_rate": 1.2877337169423314e-05, "loss": 0.4501, "step": 12959 }, { "epoch": 42.49180327868852, "grad_norm": 3.9365949630737305, "learning_rate": 1.2876320172936584e-05, "loss": 0.5625, "step": 12960 }, { "epoch": 42.49508196721312, "grad_norm": 3.8010199069976807, "learning_rate": 1.2875303144016355e-05, "loss": 0.4171, "step": 12961 }, { "epoch": 42.498360655737706, "grad_norm": 3.000433921813965, "learning_rate": 1.2874286082674092e-05, "loss": 0.1983, "step": 12962 }, { "epoch": 42.501639344262294, "grad_norm": 4.060914039611816, "learning_rate": 1.2873268988921268e-05, "loss": 0.3359, "step": 12963 }, { "epoch": 42.50491803278688, "grad_norm": 4.60826301574707, "learning_rate": 1.2872251862769345e-05, "loss": 0.3271, "step": 12964 }, { "epoch": 42.50819672131148, "grad_norm": 3.697441577911377, "learning_rate": 1.2871234704229799e-05, "loss": 0.32, "step": 12965 }, { "epoch": 42.511475409836066, "grad_norm": 4.191160202026367, "learning_rate": 1.2870217513314095e-05, "loss": 0.4021, "step": 12966 }, { "epoch": 42.514754098360655, "grad_norm": 4.199862480163574, "learning_rate": 1.28692002900337e-05, "loss": 0.5078, "step": 12967 }, { "epoch": 42.51803278688524, "grad_norm": 3.8430094718933105, "learning_rate": 1.2868183034400095e-05, "loss": 0.3539, "step": 12968 }, { "epoch": 42.52131147540984, "grad_norm": 3.816648244857788, "learning_rate": 1.2867165746424739e-05, "loss": 0.4061, "step": 12969 }, { "epoch": 42.52459016393443, "grad_norm": 3.8711752891540527, "learning_rate": 1.2866148426119114e-05, "loss": 0.2849, "step": 12970 }, { "epoch": 42.527868852459015, "grad_norm": 3.685636520385742, "learning_rate": 1.2865131073494678e-05, "loss": 0.373, "step": 12971 }, { "epoch": 42.5311475409836, "grad_norm": 4.470579624176025, "learning_rate": 1.2864113688562919e-05, "loss": 0.4474, "step": 12972 }, { "epoch": 42.5344262295082, "grad_norm": 4.267484664916992, "learning_rate": 1.2863096271335293e-05, "loss": 0.4276, "step": 12973 }, { "epoch": 42.53770491803279, "grad_norm": 3.9544436931610107, "learning_rate": 1.2862078821823283e-05, "loss": 0.4186, "step": 12974 }, { "epoch": 42.540983606557376, "grad_norm": 3.3847811222076416, "learning_rate": 1.2861061340038357e-05, "loss": 0.1469, "step": 12975 }, { "epoch": 42.544262295081964, "grad_norm": 3.755383014678955, "learning_rate": 1.2860043825991993e-05, "loss": 0.2924, "step": 12976 }, { "epoch": 42.54754098360656, "grad_norm": 4.72253942489624, "learning_rate": 1.2859026279695657e-05, "loss": 0.5409, "step": 12977 }, { "epoch": 42.55081967213115, "grad_norm": 6.390723705291748, "learning_rate": 1.285800870116083e-05, "loss": 0.4608, "step": 12978 }, { "epoch": 42.554098360655736, "grad_norm": 4.046102523803711, "learning_rate": 1.2856991090398977e-05, "loss": 0.5054, "step": 12979 }, { "epoch": 42.557377049180324, "grad_norm": 4.342524528503418, "learning_rate": 1.2855973447421588e-05, "loss": 0.4203, "step": 12980 }, { "epoch": 42.56065573770492, "grad_norm": 6.043628215789795, "learning_rate": 1.2854955772240123e-05, "loss": 0.3335, "step": 12981 }, { "epoch": 42.56393442622951, "grad_norm": 4.243535041809082, "learning_rate": 1.2853938064866066e-05, "loss": 0.3801, "step": 12982 }, { "epoch": 42.5672131147541, "grad_norm": 4.405940055847168, "learning_rate": 1.2852920325310887e-05, "loss": 0.4153, "step": 12983 }, { "epoch": 42.570491803278685, "grad_norm": 4.486873149871826, "learning_rate": 1.2851902553586069e-05, "loss": 0.3885, "step": 12984 }, { "epoch": 42.57377049180328, "grad_norm": 3.8749122619628906, "learning_rate": 1.2850884749703084e-05, "loss": 0.4124, "step": 12985 }, { "epoch": 42.57704918032787, "grad_norm": 4.047428131103516, "learning_rate": 1.2849866913673406e-05, "loss": 0.5642, "step": 12986 }, { "epoch": 42.58032786885246, "grad_norm": 4.256092548370361, "learning_rate": 1.2848849045508518e-05, "loss": 0.3131, "step": 12987 }, { "epoch": 42.58360655737705, "grad_norm": 4.635601997375488, "learning_rate": 1.2847831145219893e-05, "loss": 0.3957, "step": 12988 }, { "epoch": 42.58688524590164, "grad_norm": 3.7556204795837402, "learning_rate": 1.2846813212819014e-05, "loss": 0.4557, "step": 12989 }, { "epoch": 42.59016393442623, "grad_norm": 3.3981595039367676, "learning_rate": 1.2845795248317352e-05, "loss": 0.1493, "step": 12990 }, { "epoch": 42.59344262295082, "grad_norm": 5.084708213806152, "learning_rate": 1.2844777251726393e-05, "loss": 0.4664, "step": 12991 }, { "epoch": 42.59672131147541, "grad_norm": 3.893784523010254, "learning_rate": 1.284375922305761e-05, "loss": 0.5137, "step": 12992 }, { "epoch": 42.6, "grad_norm": 3.7692313194274902, "learning_rate": 1.2842741162322487e-05, "loss": 0.2805, "step": 12993 }, { "epoch": 42.60327868852459, "grad_norm": 4.859746932983398, "learning_rate": 1.2841723069532502e-05, "loss": 0.316, "step": 12994 }, { "epoch": 42.60655737704918, "grad_norm": 4.396942138671875, "learning_rate": 1.2840704944699132e-05, "loss": 0.2508, "step": 12995 }, { "epoch": 42.609836065573774, "grad_norm": 4.629777908325195, "learning_rate": 1.2839686787833862e-05, "loss": 0.4937, "step": 12996 }, { "epoch": 42.61311475409836, "grad_norm": 3.879882335662842, "learning_rate": 1.283866859894817e-05, "loss": 0.3112, "step": 12997 }, { "epoch": 42.61639344262295, "grad_norm": 4.443684101104736, "learning_rate": 1.2837650378053541e-05, "loss": 0.3012, "step": 12998 }, { "epoch": 42.61967213114754, "grad_norm": 4.117745876312256, "learning_rate": 1.283663212516145e-05, "loss": 0.495, "step": 12999 }, { "epoch": 42.622950819672134, "grad_norm": 3.242302656173706, "learning_rate": 1.2835613840283386e-05, "loss": 0.4715, "step": 13000 }, { "epoch": 42.62622950819672, "grad_norm": 3.74247670173645, "learning_rate": 1.2834595523430824e-05, "loss": 0.2561, "step": 13001 }, { "epoch": 42.62950819672131, "grad_norm": 3.8068504333496094, "learning_rate": 1.2833577174615253e-05, "loss": 0.1579, "step": 13002 }, { "epoch": 42.6327868852459, "grad_norm": 3.843331813812256, "learning_rate": 1.2832558793848148e-05, "loss": 0.456, "step": 13003 }, { "epoch": 42.636065573770495, "grad_norm": 4.388825416564941, "learning_rate": 1.2831540381141006e-05, "loss": 0.4899, "step": 13004 }, { "epoch": 42.63934426229508, "grad_norm": 3.7644784450531006, "learning_rate": 1.2830521936505296e-05, "loss": 0.4493, "step": 13005 }, { "epoch": 42.64262295081967, "grad_norm": 4.265587329864502, "learning_rate": 1.282950345995251e-05, "loss": 0.2621, "step": 13006 }, { "epoch": 42.64590163934426, "grad_norm": 3.332674264907837, "learning_rate": 1.282848495149413e-05, "loss": 0.3938, "step": 13007 }, { "epoch": 42.649180327868855, "grad_norm": 3.7264928817749023, "learning_rate": 1.2827466411141643e-05, "loss": 0.3205, "step": 13008 }, { "epoch": 42.65245901639344, "grad_norm": 3.7543094158172607, "learning_rate": 1.2826447838906528e-05, "loss": 0.2189, "step": 13009 }, { "epoch": 42.65573770491803, "grad_norm": 4.408766746520996, "learning_rate": 1.282542923480028e-05, "loss": 0.5257, "step": 13010 }, { "epoch": 42.65901639344262, "grad_norm": 5.862399101257324, "learning_rate": 1.2824410598834378e-05, "loss": 0.437, "step": 13011 }, { "epoch": 42.662295081967216, "grad_norm": 3.727262258529663, "learning_rate": 1.2823391931020308e-05, "loss": 0.4796, "step": 13012 }, { "epoch": 42.665573770491804, "grad_norm": 3.468205213546753, "learning_rate": 1.2822373231369562e-05, "loss": 0.2846, "step": 13013 }, { "epoch": 42.66885245901639, "grad_norm": 3.335998058319092, "learning_rate": 1.2821354499893619e-05, "loss": 0.4395, "step": 13014 }, { "epoch": 42.67213114754098, "grad_norm": 4.560472011566162, "learning_rate": 1.2820335736603975e-05, "loss": 0.466, "step": 13015 }, { "epoch": 42.675409836065576, "grad_norm": 4.4017744064331055, "learning_rate": 1.281931694151211e-05, "loss": 0.3084, "step": 13016 }, { "epoch": 42.678688524590164, "grad_norm": 3.7742090225219727, "learning_rate": 1.2818298114629518e-05, "loss": 0.3618, "step": 13017 }, { "epoch": 42.68196721311475, "grad_norm": 3.7760391235351562, "learning_rate": 1.2817279255967679e-05, "loss": 0.3534, "step": 13018 }, { "epoch": 42.68524590163934, "grad_norm": 4.284022331237793, "learning_rate": 1.2816260365538092e-05, "loss": 0.41, "step": 13019 }, { "epoch": 42.68852459016394, "grad_norm": 4.002999782562256, "learning_rate": 1.2815241443352238e-05, "loss": 0.4381, "step": 13020 }, { "epoch": 42.691803278688525, "grad_norm": 5.077850341796875, "learning_rate": 1.2814222489421612e-05, "loss": 0.4866, "step": 13021 }, { "epoch": 42.69508196721311, "grad_norm": 3.398364305496216, "learning_rate": 1.2813203503757702e-05, "loss": 0.3735, "step": 13022 }, { "epoch": 42.6983606557377, "grad_norm": 3.0487496852874756, "learning_rate": 1.2812184486371995e-05, "loss": 0.2959, "step": 13023 }, { "epoch": 42.7016393442623, "grad_norm": 4.3256754875183105, "learning_rate": 1.2811165437275985e-05, "loss": 0.3219, "step": 13024 }, { "epoch": 42.704918032786885, "grad_norm": 3.804276704788208, "learning_rate": 1.2810146356481158e-05, "loss": 0.2899, "step": 13025 }, { "epoch": 42.708196721311474, "grad_norm": 3.7193286418914795, "learning_rate": 1.2809127243999017e-05, "loss": 0.299, "step": 13026 }, { "epoch": 42.71147540983607, "grad_norm": 4.043930530548096, "learning_rate": 1.2808108099841041e-05, "loss": 0.4448, "step": 13027 }, { "epoch": 42.71475409836066, "grad_norm": 3.646193742752075, "learning_rate": 1.2807088924018727e-05, "loss": 0.3601, "step": 13028 }, { "epoch": 42.718032786885246, "grad_norm": 4.04069709777832, "learning_rate": 1.2806069716543566e-05, "loss": 0.4825, "step": 13029 }, { "epoch": 42.721311475409834, "grad_norm": 3.9155781269073486, "learning_rate": 1.2805050477427053e-05, "loss": 0.3955, "step": 13030 }, { "epoch": 42.72459016393443, "grad_norm": 4.134999752044678, "learning_rate": 1.2804031206680679e-05, "loss": 0.4487, "step": 13031 }, { "epoch": 42.72786885245902, "grad_norm": 3.8225138187408447, "learning_rate": 1.2803011904315937e-05, "loss": 0.4632, "step": 13032 }, { "epoch": 42.731147540983606, "grad_norm": 4.603382110595703, "learning_rate": 1.280199257034432e-05, "loss": 0.2496, "step": 13033 }, { "epoch": 42.734426229508195, "grad_norm": 3.8808248043060303, "learning_rate": 1.2800973204777324e-05, "loss": 0.432, "step": 13034 }, { "epoch": 42.73770491803279, "grad_norm": 4.42114782333374, "learning_rate": 1.2799953807626446e-05, "loss": 0.5559, "step": 13035 }, { "epoch": 42.74098360655738, "grad_norm": 3.9626920223236084, "learning_rate": 1.2798934378903174e-05, "loss": 0.317, "step": 13036 }, { "epoch": 42.74426229508197, "grad_norm": 3.0480825901031494, "learning_rate": 1.279791491861901e-05, "loss": 0.3916, "step": 13037 }, { "epoch": 42.747540983606555, "grad_norm": 4.397038459777832, "learning_rate": 1.2796895426785442e-05, "loss": 0.2249, "step": 13038 }, { "epoch": 42.75081967213115, "grad_norm": 3.8886306285858154, "learning_rate": 1.2795875903413975e-05, "loss": 0.218, "step": 13039 }, { "epoch": 42.75409836065574, "grad_norm": 3.2501020431518555, "learning_rate": 1.2794856348516095e-05, "loss": 0.3711, "step": 13040 }, { "epoch": 42.75737704918033, "grad_norm": 3.5453500747680664, "learning_rate": 1.279383676210331e-05, "loss": 0.3439, "step": 13041 }, { "epoch": 42.760655737704916, "grad_norm": 4.122382164001465, "learning_rate": 1.2792817144187104e-05, "loss": 0.4401, "step": 13042 }, { "epoch": 42.76393442622951, "grad_norm": 3.422109842300415, "learning_rate": 1.2791797494778985e-05, "loss": 0.3114, "step": 13043 }, { "epoch": 42.7672131147541, "grad_norm": 3.7601683139801025, "learning_rate": 1.2790777813890445e-05, "loss": 0.287, "step": 13044 }, { "epoch": 42.77049180327869, "grad_norm": 3.670044183731079, "learning_rate": 1.2789758101532983e-05, "loss": 0.4015, "step": 13045 }, { "epoch": 42.773770491803276, "grad_norm": 4.237138271331787, "learning_rate": 1.2788738357718098e-05, "loss": 0.2565, "step": 13046 }, { "epoch": 42.77704918032787, "grad_norm": 3.4692418575286865, "learning_rate": 1.2787718582457292e-05, "loss": 0.2068, "step": 13047 }, { "epoch": 42.78032786885246, "grad_norm": 4.269145488739014, "learning_rate": 1.2786698775762054e-05, "loss": 0.286, "step": 13048 }, { "epoch": 42.78360655737705, "grad_norm": 4.006418228149414, "learning_rate": 1.2785678937643892e-05, "loss": 0.3372, "step": 13049 }, { "epoch": 42.78688524590164, "grad_norm": 3.9033596515655518, "learning_rate": 1.2784659068114306e-05, "loss": 0.3242, "step": 13050 }, { "epoch": 42.79016393442623, "grad_norm": 3.728156566619873, "learning_rate": 1.2783639167184794e-05, "loss": 0.3071, "step": 13051 }, { "epoch": 42.79344262295082, "grad_norm": 3.707439422607422, "learning_rate": 1.2782619234866855e-05, "loss": 0.373, "step": 13052 }, { "epoch": 42.79672131147541, "grad_norm": 4.805273056030273, "learning_rate": 1.278159927117199e-05, "loss": 0.4194, "step": 13053 }, { "epoch": 42.8, "grad_norm": 3.709604263305664, "learning_rate": 1.2780579276111702e-05, "loss": 0.4571, "step": 13054 }, { "epoch": 42.80327868852459, "grad_norm": 4.155536651611328, "learning_rate": 1.2779559249697491e-05, "loss": 0.2869, "step": 13055 }, { "epoch": 42.80655737704918, "grad_norm": 3.567317247390747, "learning_rate": 1.2778539191940859e-05, "loss": 0.5854, "step": 13056 }, { "epoch": 42.80983606557377, "grad_norm": 3.7982423305511475, "learning_rate": 1.2777519102853311e-05, "loss": 0.359, "step": 13057 }, { "epoch": 42.81311475409836, "grad_norm": 3.6004624366760254, "learning_rate": 1.2776498982446346e-05, "loss": 0.5017, "step": 13058 }, { "epoch": 42.81639344262295, "grad_norm": 3.7453091144561768, "learning_rate": 1.2775478830731468e-05, "loss": 0.6302, "step": 13059 }, { "epoch": 42.81967213114754, "grad_norm": 4.114837169647217, "learning_rate": 1.277445864772018e-05, "loss": 0.3729, "step": 13060 }, { "epoch": 42.82295081967213, "grad_norm": 3.9403414726257324, "learning_rate": 1.2773438433423989e-05, "loss": 0.2798, "step": 13061 }, { "epoch": 42.82622950819672, "grad_norm": 4.254940986633301, "learning_rate": 1.277241818785439e-05, "loss": 0.521, "step": 13062 }, { "epoch": 42.829508196721314, "grad_norm": 3.552502155303955, "learning_rate": 1.2771397911022898e-05, "loss": 0.3264, "step": 13063 }, { "epoch": 42.8327868852459, "grad_norm": 4.063418388366699, "learning_rate": 1.277037760294101e-05, "loss": 0.403, "step": 13064 }, { "epoch": 42.83606557377049, "grad_norm": 3.930589437484741, "learning_rate": 1.2769357263620237e-05, "loss": 0.4071, "step": 13065 }, { "epoch": 42.83934426229508, "grad_norm": 4.495701313018799, "learning_rate": 1.276833689307208e-05, "loss": 0.3387, "step": 13066 }, { "epoch": 42.842622950819674, "grad_norm": 3.840291738510132, "learning_rate": 1.2767316491308047e-05, "loss": 0.6175, "step": 13067 }, { "epoch": 42.84590163934426, "grad_norm": 4.82574987411499, "learning_rate": 1.2766296058339642e-05, "loss": 0.4333, "step": 13068 }, { "epoch": 42.84918032786885, "grad_norm": 3.6476974487304688, "learning_rate": 1.2765275594178372e-05, "loss": 0.3213, "step": 13069 }, { "epoch": 42.85245901639344, "grad_norm": 3.870673894882202, "learning_rate": 1.2764255098835747e-05, "loss": 0.4619, "step": 13070 }, { "epoch": 42.855737704918035, "grad_norm": 3.864065170288086, "learning_rate": 1.276323457232327e-05, "loss": 0.318, "step": 13071 }, { "epoch": 42.85901639344262, "grad_norm": 4.089432239532471, "learning_rate": 1.276221401465245e-05, "loss": 0.4191, "step": 13072 }, { "epoch": 42.86229508196721, "grad_norm": 4.008776664733887, "learning_rate": 1.2761193425834798e-05, "loss": 0.4213, "step": 13073 }, { "epoch": 42.86557377049181, "grad_norm": 3.787788152694702, "learning_rate": 1.2760172805881814e-05, "loss": 0.521, "step": 13074 }, { "epoch": 42.868852459016395, "grad_norm": 3.608678102493286, "learning_rate": 1.2759152154805017e-05, "loss": 0.3382, "step": 13075 }, { "epoch": 42.87213114754098, "grad_norm": 3.661062240600586, "learning_rate": 1.2758131472615906e-05, "loss": 0.3305, "step": 13076 }, { "epoch": 42.87540983606557, "grad_norm": 3.5018630027770996, "learning_rate": 1.2757110759325997e-05, "loss": 0.2908, "step": 13077 }, { "epoch": 42.87868852459017, "grad_norm": 3.8716464042663574, "learning_rate": 1.2756090014946798e-05, "loss": 0.4162, "step": 13078 }, { "epoch": 42.881967213114756, "grad_norm": 3.9287447929382324, "learning_rate": 1.2755069239489817e-05, "loss": 0.4352, "step": 13079 }, { "epoch": 42.885245901639344, "grad_norm": 3.8251795768737793, "learning_rate": 1.2754048432966568e-05, "loss": 0.3422, "step": 13080 }, { "epoch": 42.88852459016393, "grad_norm": 3.826036214828491, "learning_rate": 1.2753027595388558e-05, "loss": 0.2839, "step": 13081 }, { "epoch": 42.89180327868853, "grad_norm": 5.532003879547119, "learning_rate": 1.27520067267673e-05, "loss": 0.4073, "step": 13082 }, { "epoch": 42.895081967213116, "grad_norm": 3.299776077270508, "learning_rate": 1.2750985827114304e-05, "loss": 0.1571, "step": 13083 }, { "epoch": 42.898360655737704, "grad_norm": 3.907512664794922, "learning_rate": 1.2749964896441084e-05, "loss": 0.4236, "step": 13084 }, { "epoch": 42.90163934426229, "grad_norm": 3.72714900970459, "learning_rate": 1.2748943934759149e-05, "loss": 0.2556, "step": 13085 }, { "epoch": 42.90491803278689, "grad_norm": 3.9422521591186523, "learning_rate": 1.2747922942080014e-05, "loss": 0.3288, "step": 13086 }, { "epoch": 42.90819672131148, "grad_norm": 3.7846548557281494, "learning_rate": 1.2746901918415191e-05, "loss": 0.3361, "step": 13087 }, { "epoch": 42.911475409836065, "grad_norm": 6.917174339294434, "learning_rate": 1.274588086377619e-05, "loss": 0.3722, "step": 13088 }, { "epoch": 42.91475409836065, "grad_norm": 3.849656105041504, "learning_rate": 1.2744859778174532e-05, "loss": 0.4432, "step": 13089 }, { "epoch": 42.91803278688525, "grad_norm": 4.261634349822998, "learning_rate": 1.2743838661621722e-05, "loss": 0.4112, "step": 13090 }, { "epoch": 42.92131147540984, "grad_norm": 3.9733636379241943, "learning_rate": 1.274281751412928e-05, "loss": 0.4634, "step": 13091 }, { "epoch": 42.924590163934425, "grad_norm": 3.8079941272735596, "learning_rate": 1.2741796335708718e-05, "loss": 0.5505, "step": 13092 }, { "epoch": 42.927868852459014, "grad_norm": 4.277539253234863, "learning_rate": 1.2740775126371553e-05, "loss": 0.3478, "step": 13093 }, { "epoch": 42.93114754098361, "grad_norm": 3.2421915531158447, "learning_rate": 1.2739753886129296e-05, "loss": 0.2837, "step": 13094 }, { "epoch": 42.9344262295082, "grad_norm": 4.503123760223389, "learning_rate": 1.2738732614993467e-05, "loss": 0.3177, "step": 13095 }, { "epoch": 42.937704918032786, "grad_norm": 3.4485135078430176, "learning_rate": 1.273771131297558e-05, "loss": 0.3093, "step": 13096 }, { "epoch": 42.940983606557374, "grad_norm": 4.490245819091797, "learning_rate": 1.2736689980087154e-05, "loss": 0.4349, "step": 13097 }, { "epoch": 42.94426229508197, "grad_norm": 4.100901126861572, "learning_rate": 1.2735668616339699e-05, "loss": 0.4539, "step": 13098 }, { "epoch": 42.94754098360656, "grad_norm": 4.1411333084106445, "learning_rate": 1.2734647221744739e-05, "loss": 0.4367, "step": 13099 }, { "epoch": 42.950819672131146, "grad_norm": 3.2383484840393066, "learning_rate": 1.2733625796313785e-05, "loss": 0.4883, "step": 13100 }, { "epoch": 42.954098360655735, "grad_norm": 3.417019844055176, "learning_rate": 1.2732604340058361e-05, "loss": 0.2695, "step": 13101 }, { "epoch": 42.95737704918033, "grad_norm": 5.689648151397705, "learning_rate": 1.2731582852989978e-05, "loss": 0.2641, "step": 13102 }, { "epoch": 42.96065573770492, "grad_norm": 4.262693405151367, "learning_rate": 1.2730561335120162e-05, "loss": 0.4833, "step": 13103 }, { "epoch": 42.96393442622951, "grad_norm": 4.31086540222168, "learning_rate": 1.2729539786460428e-05, "loss": 0.3118, "step": 13104 }, { "epoch": 42.967213114754095, "grad_norm": 4.19070291519165, "learning_rate": 1.2728518207022292e-05, "loss": 0.3657, "step": 13105 }, { "epoch": 42.97049180327869, "grad_norm": 4.19040584564209, "learning_rate": 1.2727496596817277e-05, "loss": 0.4075, "step": 13106 }, { "epoch": 42.97377049180328, "grad_norm": 5.409989356994629, "learning_rate": 1.2726474955856903e-05, "loss": 0.4315, "step": 13107 }, { "epoch": 42.97704918032787, "grad_norm": 3.7316458225250244, "learning_rate": 1.272545328415269e-05, "loss": 0.3775, "step": 13108 }, { "epoch": 42.980327868852456, "grad_norm": 4.297422885894775, "learning_rate": 1.2724431581716154e-05, "loss": 0.6859, "step": 13109 }, { "epoch": 42.98360655737705, "grad_norm": 4.462640285491943, "learning_rate": 1.2723409848558823e-05, "loss": 0.357, "step": 13110 }, { "epoch": 42.98688524590164, "grad_norm": 3.405599355697632, "learning_rate": 1.2722388084692211e-05, "loss": 0.1733, "step": 13111 }, { "epoch": 42.99016393442623, "grad_norm": 3.419253349304199, "learning_rate": 1.2721366290127848e-05, "loss": 0.2162, "step": 13112 }, { "epoch": 42.993442622950816, "grad_norm": 3.6337056159973145, "learning_rate": 1.2720344464877248e-05, "loss": 0.2814, "step": 13113 }, { "epoch": 42.99672131147541, "grad_norm": 4.092878818511963, "learning_rate": 1.2719322608951932e-05, "loss": 0.4112, "step": 13114 }, { "epoch": 43.0, "grad_norm": 4.460109233856201, "learning_rate": 1.2718300722363431e-05, "loss": 0.2304, "step": 13115 }, { "epoch": 43.00327868852459, "grad_norm": 3.389848232269287, "learning_rate": 1.271727880512326e-05, "loss": 0.3273, "step": 13116 }, { "epoch": 43.006557377049184, "grad_norm": 4.696307182312012, "learning_rate": 1.2716256857242947e-05, "loss": 0.5152, "step": 13117 }, { "epoch": 43.00983606557377, "grad_norm": 4.082668781280518, "learning_rate": 1.271523487873401e-05, "loss": 0.2849, "step": 13118 }, { "epoch": 43.01311475409836, "grad_norm": 3.6882381439208984, "learning_rate": 1.2714212869607982e-05, "loss": 0.218, "step": 13119 }, { "epoch": 43.01639344262295, "grad_norm": 3.452244520187378, "learning_rate": 1.2713190829876378e-05, "loss": 0.5113, "step": 13120 }, { "epoch": 43.019672131147544, "grad_norm": 4.730750560760498, "learning_rate": 1.271216875955073e-05, "loss": 0.3536, "step": 13121 }, { "epoch": 43.02295081967213, "grad_norm": 3.4190986156463623, "learning_rate": 1.2711146658642557e-05, "loss": 0.338, "step": 13122 }, { "epoch": 43.02622950819672, "grad_norm": 3.9701619148254395, "learning_rate": 1.2710124527163387e-05, "loss": 0.3041, "step": 13123 }, { "epoch": 43.02950819672131, "grad_norm": 3.8311121463775635, "learning_rate": 1.2709102365124743e-05, "loss": 0.2222, "step": 13124 }, { "epoch": 43.032786885245905, "grad_norm": 3.7527248859405518, "learning_rate": 1.2708080172538158e-05, "loss": 0.3617, "step": 13125 }, { "epoch": 43.03606557377049, "grad_norm": 3.948014974594116, "learning_rate": 1.270705794941515e-05, "loss": 0.5021, "step": 13126 }, { "epoch": 43.03934426229508, "grad_norm": 3.815378189086914, "learning_rate": 1.270603569576725e-05, "loss": 0.3089, "step": 13127 }, { "epoch": 43.04262295081967, "grad_norm": 3.544692039489746, "learning_rate": 1.270501341160598e-05, "loss": 0.3279, "step": 13128 }, { "epoch": 43.045901639344265, "grad_norm": 3.7612783908843994, "learning_rate": 1.2703991096942876e-05, "loss": 0.4665, "step": 13129 }, { "epoch": 43.049180327868854, "grad_norm": 3.9431960582733154, "learning_rate": 1.270296875178946e-05, "loss": 0.4004, "step": 13130 }, { "epoch": 43.05245901639344, "grad_norm": 3.608452558517456, "learning_rate": 1.2701946376157258e-05, "loss": 0.4721, "step": 13131 }, { "epoch": 43.05573770491803, "grad_norm": 3.8983497619628906, "learning_rate": 1.2700923970057803e-05, "loss": 0.431, "step": 13132 }, { "epoch": 43.059016393442626, "grad_norm": 4.166207790374756, "learning_rate": 1.2699901533502624e-05, "loss": 0.5128, "step": 13133 }, { "epoch": 43.062295081967214, "grad_norm": 4.496685981750488, "learning_rate": 1.2698879066503247e-05, "loss": 0.4645, "step": 13134 }, { "epoch": 43.0655737704918, "grad_norm": 3.299207925796509, "learning_rate": 1.26978565690712e-05, "loss": 0.1528, "step": 13135 }, { "epoch": 43.06885245901639, "grad_norm": 3.466592788696289, "learning_rate": 1.2696834041218017e-05, "loss": 0.2045, "step": 13136 }, { "epoch": 43.072131147540986, "grad_norm": 3.913759231567383, "learning_rate": 1.2695811482955227e-05, "loss": 0.3208, "step": 13137 }, { "epoch": 43.075409836065575, "grad_norm": 4.277550220489502, "learning_rate": 1.2694788894294358e-05, "loss": 0.2772, "step": 13138 }, { "epoch": 43.07868852459016, "grad_norm": 4.566910743713379, "learning_rate": 1.2693766275246947e-05, "loss": 0.2505, "step": 13139 }, { "epoch": 43.08196721311475, "grad_norm": 3.2718446254730225, "learning_rate": 1.2692743625824515e-05, "loss": 0.3146, "step": 13140 }, { "epoch": 43.08524590163935, "grad_norm": 5.724396228790283, "learning_rate": 1.2691720946038602e-05, "loss": 0.2167, "step": 13141 }, { "epoch": 43.088524590163935, "grad_norm": 3.1323680877685547, "learning_rate": 1.2690698235900734e-05, "loss": 0.3856, "step": 13142 }, { "epoch": 43.09180327868852, "grad_norm": 4.29943323135376, "learning_rate": 1.2689675495422447e-05, "loss": 0.2074, "step": 13143 }, { "epoch": 43.09508196721311, "grad_norm": 3.429302453994751, "learning_rate": 1.2688652724615271e-05, "loss": 0.3166, "step": 13144 }, { "epoch": 43.09836065573771, "grad_norm": 3.83225417137146, "learning_rate": 1.2687629923490743e-05, "loss": 0.4055, "step": 13145 }, { "epoch": 43.101639344262296, "grad_norm": 3.438979148864746, "learning_rate": 1.268660709206039e-05, "loss": 0.4559, "step": 13146 }, { "epoch": 43.104918032786884, "grad_norm": 3.530182123184204, "learning_rate": 1.268558423033575e-05, "loss": 0.4167, "step": 13147 }, { "epoch": 43.10819672131147, "grad_norm": 4.362511157989502, "learning_rate": 1.2684561338328356e-05, "loss": 0.2091, "step": 13148 }, { "epoch": 43.11147540983607, "grad_norm": 4.04167366027832, "learning_rate": 1.2683538416049741e-05, "loss": 0.2821, "step": 13149 }, { "epoch": 43.114754098360656, "grad_norm": 4.066095352172852, "learning_rate": 1.268251546351144e-05, "loss": 0.415, "step": 13150 }, { "epoch": 43.118032786885244, "grad_norm": 3.5647130012512207, "learning_rate": 1.2681492480724991e-05, "loss": 0.2129, "step": 13151 }, { "epoch": 43.12131147540983, "grad_norm": 4.201309680938721, "learning_rate": 1.2680469467701924e-05, "loss": 0.4173, "step": 13152 }, { "epoch": 43.12459016393443, "grad_norm": 3.9038257598876953, "learning_rate": 1.2679446424453773e-05, "loss": 0.3207, "step": 13153 }, { "epoch": 43.12786885245902, "grad_norm": 4.215177059173584, "learning_rate": 1.2678423350992085e-05, "loss": 0.4845, "step": 13154 }, { "epoch": 43.131147540983605, "grad_norm": 3.7930097579956055, "learning_rate": 1.2677400247328384e-05, "loss": 0.3591, "step": 13155 }, { "epoch": 43.13442622950819, "grad_norm": 4.8546319007873535, "learning_rate": 1.267637711347421e-05, "loss": 0.4048, "step": 13156 }, { "epoch": 43.13770491803279, "grad_norm": 3.5129847526550293, "learning_rate": 1.2675353949441105e-05, "loss": 0.4406, "step": 13157 }, { "epoch": 43.14098360655738, "grad_norm": 3.9188849925994873, "learning_rate": 1.26743307552406e-05, "loss": 0.1664, "step": 13158 }, { "epoch": 43.144262295081965, "grad_norm": 3.853081464767456, "learning_rate": 1.2673307530884236e-05, "loss": 0.4638, "step": 13159 }, { "epoch": 43.14754098360656, "grad_norm": 3.181420087814331, "learning_rate": 1.267228427638355e-05, "loss": 0.2741, "step": 13160 }, { "epoch": 43.15081967213115, "grad_norm": 3.5551209449768066, "learning_rate": 1.267126099175008e-05, "loss": 0.4529, "step": 13161 }, { "epoch": 43.15409836065574, "grad_norm": 4.304577350616455, "learning_rate": 1.2670237676995368e-05, "loss": 0.4445, "step": 13162 }, { "epoch": 43.157377049180326, "grad_norm": 3.940901041030884, "learning_rate": 1.2669214332130945e-05, "loss": 0.4741, "step": 13163 }, { "epoch": 43.16065573770492, "grad_norm": 3.6780591011047363, "learning_rate": 1.2668190957168358e-05, "loss": 0.3923, "step": 13164 }, { "epoch": 43.16393442622951, "grad_norm": 4.170677661895752, "learning_rate": 1.2667167552119146e-05, "loss": 0.4426, "step": 13165 }, { "epoch": 43.1672131147541, "grad_norm": 3.5573151111602783, "learning_rate": 1.2666144116994843e-05, "loss": 0.6822, "step": 13166 }, { "epoch": 43.170491803278686, "grad_norm": 3.3588128089904785, "learning_rate": 1.2665120651806994e-05, "loss": 0.274, "step": 13167 }, { "epoch": 43.17377049180328, "grad_norm": 3.388162136077881, "learning_rate": 1.2664097156567138e-05, "loss": 0.4178, "step": 13168 }, { "epoch": 43.17704918032787, "grad_norm": 3.957310199737549, "learning_rate": 1.2663073631286817e-05, "loss": 0.2873, "step": 13169 }, { "epoch": 43.18032786885246, "grad_norm": 3.7895190715789795, "learning_rate": 1.2662050075977571e-05, "loss": 0.2867, "step": 13170 }, { "epoch": 43.18360655737705, "grad_norm": 4.374332904815674, "learning_rate": 1.2661026490650945e-05, "loss": 0.6105, "step": 13171 }, { "epoch": 43.18688524590164, "grad_norm": 4.412469387054443, "learning_rate": 1.2660002875318477e-05, "loss": 0.2141, "step": 13172 }, { "epoch": 43.19016393442623, "grad_norm": 3.564971446990967, "learning_rate": 1.2658979229991713e-05, "loss": 0.5148, "step": 13173 }, { "epoch": 43.19344262295082, "grad_norm": 3.6796953678131104, "learning_rate": 1.2657955554682189e-05, "loss": 0.241, "step": 13174 }, { "epoch": 43.19672131147541, "grad_norm": 3.875814199447632, "learning_rate": 1.2656931849401457e-05, "loss": 0.2547, "step": 13175 }, { "epoch": 43.2, "grad_norm": 4.046323299407959, "learning_rate": 1.2655908114161053e-05, "loss": 0.3084, "step": 13176 }, { "epoch": 43.20327868852459, "grad_norm": 7.876180648803711, "learning_rate": 1.2654884348972525e-05, "loss": 0.4426, "step": 13177 }, { "epoch": 43.20655737704918, "grad_norm": 4.100177764892578, "learning_rate": 1.2653860553847417e-05, "loss": 0.2299, "step": 13178 }, { "epoch": 43.20983606557377, "grad_norm": 2.9224865436553955, "learning_rate": 1.2652836728797269e-05, "loss": 0.2509, "step": 13179 }, { "epoch": 43.21311475409836, "grad_norm": 3.189832925796509, "learning_rate": 1.2651812873833631e-05, "loss": 0.3102, "step": 13180 }, { "epoch": 43.21639344262295, "grad_norm": 3.923206090927124, "learning_rate": 1.2650788988968042e-05, "loss": 0.4421, "step": 13181 }, { "epoch": 43.21967213114754, "grad_norm": 4.624400615692139, "learning_rate": 1.2649765074212053e-05, "loss": 0.5753, "step": 13182 }, { "epoch": 43.22295081967213, "grad_norm": 4.280642509460449, "learning_rate": 1.2648741129577208e-05, "loss": 0.4844, "step": 13183 }, { "epoch": 43.226229508196724, "grad_norm": 4.471286296844482, "learning_rate": 1.2647717155075052e-05, "loss": 0.4375, "step": 13184 }, { "epoch": 43.22950819672131, "grad_norm": 4.270950794219971, "learning_rate": 1.2646693150717136e-05, "loss": 0.3324, "step": 13185 }, { "epoch": 43.2327868852459, "grad_norm": 4.375448226928711, "learning_rate": 1.2645669116514998e-05, "loss": 0.286, "step": 13186 }, { "epoch": 43.23606557377049, "grad_norm": 4.283138751983643, "learning_rate": 1.2644645052480188e-05, "loss": 0.3072, "step": 13187 }, { "epoch": 43.239344262295084, "grad_norm": 3.575518846511841, "learning_rate": 1.2643620958624263e-05, "loss": 0.235, "step": 13188 }, { "epoch": 43.24262295081967, "grad_norm": 3.6109468936920166, "learning_rate": 1.2642596834958757e-05, "loss": 0.1726, "step": 13189 }, { "epoch": 43.24590163934426, "grad_norm": 4.000763893127441, "learning_rate": 1.2641572681495226e-05, "loss": 0.6908, "step": 13190 }, { "epoch": 43.24918032786885, "grad_norm": 3.3065950870513916, "learning_rate": 1.264054849824522e-05, "loss": 0.3021, "step": 13191 }, { "epoch": 43.252459016393445, "grad_norm": 4.119235038757324, "learning_rate": 1.263952428522028e-05, "loss": 0.1726, "step": 13192 }, { "epoch": 43.25573770491803, "grad_norm": 4.3521728515625, "learning_rate": 1.263850004243196e-05, "loss": 0.2183, "step": 13193 }, { "epoch": 43.25901639344262, "grad_norm": 3.7911107540130615, "learning_rate": 1.2637475769891807e-05, "loss": 0.4521, "step": 13194 }, { "epoch": 43.26229508196721, "grad_norm": 4.062541961669922, "learning_rate": 1.2636451467611375e-05, "loss": 0.3823, "step": 13195 }, { "epoch": 43.265573770491805, "grad_norm": 4.349847316741943, "learning_rate": 1.2635427135602209e-05, "loss": 0.3281, "step": 13196 }, { "epoch": 43.268852459016394, "grad_norm": 3.425302028656006, "learning_rate": 1.2634402773875866e-05, "loss": 0.2514, "step": 13197 }, { "epoch": 43.27213114754098, "grad_norm": 5.232324600219727, "learning_rate": 1.2633378382443888e-05, "loss": 0.2177, "step": 13198 }, { "epoch": 43.27540983606557, "grad_norm": 3.730238676071167, "learning_rate": 1.2632353961317834e-05, "loss": 0.3054, "step": 13199 }, { "epoch": 43.278688524590166, "grad_norm": 4.166654109954834, "learning_rate": 1.263132951050925e-05, "loss": 0.3198, "step": 13200 }, { "epoch": 43.281967213114754, "grad_norm": 3.4776766300201416, "learning_rate": 1.2630305030029692e-05, "loss": 0.408, "step": 13201 }, { "epoch": 43.28524590163934, "grad_norm": 3.8818559646606445, "learning_rate": 1.262928051989071e-05, "loss": 0.2791, "step": 13202 }, { "epoch": 43.28852459016394, "grad_norm": 3.639613628387451, "learning_rate": 1.2628255980103854e-05, "loss": 0.4179, "step": 13203 }, { "epoch": 43.291803278688526, "grad_norm": 4.67409610748291, "learning_rate": 1.2627231410680682e-05, "loss": 0.3976, "step": 13204 }, { "epoch": 43.295081967213115, "grad_norm": 3.8820695877075195, "learning_rate": 1.2626206811632743e-05, "loss": 0.1686, "step": 13205 }, { "epoch": 43.2983606557377, "grad_norm": 5.140128135681152, "learning_rate": 1.2625182182971591e-05, "loss": 0.3703, "step": 13206 }, { "epoch": 43.3016393442623, "grad_norm": 3.3394579887390137, "learning_rate": 1.262415752470878e-05, "loss": 0.2581, "step": 13207 }, { "epoch": 43.30491803278689, "grad_norm": 3.5586278438568115, "learning_rate": 1.2623132836855865e-05, "loss": 0.2287, "step": 13208 }, { "epoch": 43.308196721311475, "grad_norm": 4.033349990844727, "learning_rate": 1.26221081194244e-05, "loss": 0.4564, "step": 13209 }, { "epoch": 43.31147540983606, "grad_norm": 3.735924243927002, "learning_rate": 1.2621083372425937e-05, "loss": 0.5237, "step": 13210 }, { "epoch": 43.31475409836066, "grad_norm": 4.348464488983154, "learning_rate": 1.2620058595872039e-05, "loss": 0.57, "step": 13211 }, { "epoch": 43.31803278688525, "grad_norm": 3.9178171157836914, "learning_rate": 1.2619033789774251e-05, "loss": 0.5245, "step": 13212 }, { "epoch": 43.321311475409836, "grad_norm": 4.211673259735107, "learning_rate": 1.2618008954144135e-05, "loss": 0.4592, "step": 13213 }, { "epoch": 43.324590163934424, "grad_norm": 4.230254173278809, "learning_rate": 1.261698408899325e-05, "loss": 0.3573, "step": 13214 }, { "epoch": 43.32786885245902, "grad_norm": 3.7078683376312256, "learning_rate": 1.2615959194333143e-05, "loss": 0.2361, "step": 13215 }, { "epoch": 43.33114754098361, "grad_norm": 4.479892253875732, "learning_rate": 1.261493427017538e-05, "loss": 0.4394, "step": 13216 }, { "epoch": 43.334426229508196, "grad_norm": 3.406156539916992, "learning_rate": 1.2613909316531514e-05, "loss": 0.278, "step": 13217 }, { "epoch": 43.337704918032784, "grad_norm": 3.7552144527435303, "learning_rate": 1.2612884333413102e-05, "loss": 0.279, "step": 13218 }, { "epoch": 43.34098360655738, "grad_norm": 3.968719005584717, "learning_rate": 1.2611859320831703e-05, "loss": 0.4592, "step": 13219 }, { "epoch": 43.34426229508197, "grad_norm": 3.618993043899536, "learning_rate": 1.2610834278798873e-05, "loss": 0.3741, "step": 13220 }, { "epoch": 43.34754098360656, "grad_norm": 3.5428221225738525, "learning_rate": 1.2609809207326173e-05, "loss": 0.3265, "step": 13221 }, { "epoch": 43.350819672131145, "grad_norm": 4.542218208312988, "learning_rate": 1.260878410642516e-05, "loss": 0.356, "step": 13222 }, { "epoch": 43.35409836065574, "grad_norm": 3.234273910522461, "learning_rate": 1.2607758976107394e-05, "loss": 0.494, "step": 13223 }, { "epoch": 43.35737704918033, "grad_norm": 3.9505324363708496, "learning_rate": 1.2606733816384433e-05, "loss": 0.4, "step": 13224 }, { "epoch": 43.36065573770492, "grad_norm": 4.2634758949279785, "learning_rate": 1.260570862726784e-05, "loss": 0.3207, "step": 13225 }, { "epoch": 43.363934426229505, "grad_norm": 3.3938939571380615, "learning_rate": 1.2604683408769171e-05, "loss": 0.2561, "step": 13226 }, { "epoch": 43.3672131147541, "grad_norm": 3.683271884918213, "learning_rate": 1.2603658160899991e-05, "loss": 0.2162, "step": 13227 }, { "epoch": 43.37049180327869, "grad_norm": 3.5375192165374756, "learning_rate": 1.2602632883671855e-05, "loss": 0.4054, "step": 13228 }, { "epoch": 43.37377049180328, "grad_norm": 3.330075263977051, "learning_rate": 1.2601607577096331e-05, "loss": 0.312, "step": 13229 }, { "epoch": 43.377049180327866, "grad_norm": 4.364121913909912, "learning_rate": 1.2600582241184975e-05, "loss": 0.4727, "step": 13230 }, { "epoch": 43.38032786885246, "grad_norm": 4.010890007019043, "learning_rate": 1.2599556875949351e-05, "loss": 0.5264, "step": 13231 }, { "epoch": 43.38360655737705, "grad_norm": 3.7422311305999756, "learning_rate": 1.2598531481401017e-05, "loss": 0.2668, "step": 13232 }, { "epoch": 43.38688524590164, "grad_norm": 4.981970310211182, "learning_rate": 1.259750605755154e-05, "loss": 0.3748, "step": 13233 }, { "epoch": 43.390163934426226, "grad_norm": 3.808847665786743, "learning_rate": 1.2596480604412485e-05, "loss": 0.4258, "step": 13234 }, { "epoch": 43.39344262295082, "grad_norm": 3.542510747909546, "learning_rate": 1.2595455121995408e-05, "loss": 0.4093, "step": 13235 }, { "epoch": 43.39672131147541, "grad_norm": 3.9215927124023438, "learning_rate": 1.2594429610311876e-05, "loss": 0.3144, "step": 13236 }, { "epoch": 43.4, "grad_norm": 3.6059317588806152, "learning_rate": 1.2593404069373452e-05, "loss": 0.2343, "step": 13237 }, { "epoch": 43.40327868852459, "grad_norm": 3.778873920440674, "learning_rate": 1.2592378499191701e-05, "loss": 0.266, "step": 13238 }, { "epoch": 43.40655737704918, "grad_norm": 3.8938486576080322, "learning_rate": 1.2591352899778188e-05, "loss": 0.4377, "step": 13239 }, { "epoch": 43.40983606557377, "grad_norm": 3.729830265045166, "learning_rate": 1.2590327271144478e-05, "loss": 0.2793, "step": 13240 }, { "epoch": 43.41311475409836, "grad_norm": 3.277150869369507, "learning_rate": 1.2589301613302131e-05, "loss": 0.317, "step": 13241 }, { "epoch": 43.41639344262295, "grad_norm": 4.480164527893066, "learning_rate": 1.2588275926262721e-05, "loss": 0.4814, "step": 13242 }, { "epoch": 43.41967213114754, "grad_norm": 3.4796106815338135, "learning_rate": 1.2587250210037807e-05, "loss": 0.1544, "step": 13243 }, { "epoch": 43.42295081967213, "grad_norm": 3.5137128829956055, "learning_rate": 1.2586224464638955e-05, "loss": 0.2103, "step": 13244 }, { "epoch": 43.42622950819672, "grad_norm": 3.344038724899292, "learning_rate": 1.2585198690077736e-05, "loss": 0.4113, "step": 13245 }, { "epoch": 43.429508196721315, "grad_norm": 3.356964588165283, "learning_rate": 1.2584172886365709e-05, "loss": 0.389, "step": 13246 }, { "epoch": 43.4327868852459, "grad_norm": 3.597317934036255, "learning_rate": 1.258314705351445e-05, "loss": 0.3405, "step": 13247 }, { "epoch": 43.43606557377049, "grad_norm": 3.6946961879730225, "learning_rate": 1.2582121191535522e-05, "loss": 0.3754, "step": 13248 }, { "epoch": 43.43934426229508, "grad_norm": 3.9763855934143066, "learning_rate": 1.2581095300440493e-05, "loss": 0.5378, "step": 13249 }, { "epoch": 43.442622950819676, "grad_norm": 4.253145217895508, "learning_rate": 1.2580069380240927e-05, "loss": 0.3118, "step": 13250 }, { "epoch": 43.445901639344264, "grad_norm": 4.053602695465088, "learning_rate": 1.25790434309484e-05, "loss": 0.535, "step": 13251 }, { "epoch": 43.44918032786885, "grad_norm": 2.877422332763672, "learning_rate": 1.2578017452574476e-05, "loss": 0.1288, "step": 13252 }, { "epoch": 43.45245901639344, "grad_norm": 4.208886623382568, "learning_rate": 1.2576991445130724e-05, "loss": 0.5303, "step": 13253 }, { "epoch": 43.455737704918036, "grad_norm": 3.556950807571411, "learning_rate": 1.2575965408628716e-05, "loss": 0.4099, "step": 13254 }, { "epoch": 43.459016393442624, "grad_norm": 3.7211036682128906, "learning_rate": 1.257493934308002e-05, "loss": 0.3814, "step": 13255 }, { "epoch": 43.46229508196721, "grad_norm": 8.504340171813965, "learning_rate": 1.2573913248496203e-05, "loss": 0.286, "step": 13256 }, { "epoch": 43.4655737704918, "grad_norm": 5.35249137878418, "learning_rate": 1.2572887124888837e-05, "loss": 0.4748, "step": 13257 }, { "epoch": 43.4688524590164, "grad_norm": 2.916398048400879, "learning_rate": 1.2571860972269496e-05, "loss": 0.24, "step": 13258 }, { "epoch": 43.472131147540985, "grad_norm": 3.507817506790161, "learning_rate": 1.2570834790649748e-05, "loss": 0.4735, "step": 13259 }, { "epoch": 43.47540983606557, "grad_norm": 3.688765287399292, "learning_rate": 1.2569808580041165e-05, "loss": 0.3201, "step": 13260 }, { "epoch": 43.47868852459016, "grad_norm": 3.747056245803833, "learning_rate": 1.2568782340455316e-05, "loss": 0.3123, "step": 13261 }, { "epoch": 43.48196721311476, "grad_norm": 3.9381561279296875, "learning_rate": 1.2567756071903778e-05, "loss": 0.5277, "step": 13262 }, { "epoch": 43.485245901639345, "grad_norm": 4.497677326202393, "learning_rate": 1.2566729774398119e-05, "loss": 0.3872, "step": 13263 }, { "epoch": 43.488524590163934, "grad_norm": 4.310725212097168, "learning_rate": 1.2565703447949914e-05, "loss": 0.2731, "step": 13264 }, { "epoch": 43.49180327868852, "grad_norm": 3.885021686553955, "learning_rate": 1.2564677092570734e-05, "loss": 0.2264, "step": 13265 }, { "epoch": 43.49508196721312, "grad_norm": 3.8131308555603027, "learning_rate": 1.2563650708272155e-05, "loss": 0.4097, "step": 13266 }, { "epoch": 43.498360655737706, "grad_norm": 3.5060043334960938, "learning_rate": 1.2562624295065748e-05, "loss": 0.2452, "step": 13267 }, { "epoch": 43.501639344262294, "grad_norm": 3.7175447940826416, "learning_rate": 1.2561597852963086e-05, "loss": 0.2762, "step": 13268 }, { "epoch": 43.50491803278688, "grad_norm": 3.5710723400115967, "learning_rate": 1.2560571381975745e-05, "loss": 0.4879, "step": 13269 }, { "epoch": 43.50819672131148, "grad_norm": 3.971151828765869, "learning_rate": 1.25595448821153e-05, "loss": 0.1985, "step": 13270 }, { "epoch": 43.511475409836066, "grad_norm": 4.036985397338867, "learning_rate": 1.2558518353393327e-05, "loss": 0.2664, "step": 13271 }, { "epoch": 43.514754098360655, "grad_norm": 3.41949725151062, "learning_rate": 1.2557491795821396e-05, "loss": 0.2052, "step": 13272 }, { "epoch": 43.51803278688524, "grad_norm": 3.7734413146972656, "learning_rate": 1.255646520941109e-05, "loss": 0.3047, "step": 13273 }, { "epoch": 43.52131147540984, "grad_norm": 3.818751573562622, "learning_rate": 1.2555438594173977e-05, "loss": 0.3254, "step": 13274 }, { "epoch": 43.52459016393443, "grad_norm": 3.8219919204711914, "learning_rate": 1.255441195012164e-05, "loss": 0.2459, "step": 13275 }, { "epoch": 43.527868852459015, "grad_norm": 5.157814979553223, "learning_rate": 1.2553385277265649e-05, "loss": 0.3676, "step": 13276 }, { "epoch": 43.5311475409836, "grad_norm": 4.06883430480957, "learning_rate": 1.2552358575617587e-05, "loss": 0.2838, "step": 13277 }, { "epoch": 43.5344262295082, "grad_norm": 4.004101276397705, "learning_rate": 1.2551331845189027e-05, "loss": 0.3871, "step": 13278 }, { "epoch": 43.53770491803279, "grad_norm": 3.8273534774780273, "learning_rate": 1.2550305085991548e-05, "loss": 0.324, "step": 13279 }, { "epoch": 43.540983606557376, "grad_norm": 3.5058491230010986, "learning_rate": 1.2549278298036728e-05, "loss": 0.2994, "step": 13280 }, { "epoch": 43.544262295081964, "grad_norm": 3.886085033416748, "learning_rate": 1.2548251481336144e-05, "loss": 0.2477, "step": 13281 }, { "epoch": 43.54754098360656, "grad_norm": 4.046631336212158, "learning_rate": 1.2547224635901376e-05, "loss": 0.2582, "step": 13282 }, { "epoch": 43.55081967213115, "grad_norm": 4.085160732269287, "learning_rate": 1.2546197761743999e-05, "loss": 0.2301, "step": 13283 }, { "epoch": 43.554098360655736, "grad_norm": 3.9344258308410645, "learning_rate": 1.2545170858875597e-05, "loss": 0.3965, "step": 13284 }, { "epoch": 43.557377049180324, "grad_norm": 3.468946933746338, "learning_rate": 1.2544143927307749e-05, "loss": 0.3776, "step": 13285 }, { "epoch": 43.56065573770492, "grad_norm": 3.7380359172821045, "learning_rate": 1.254311696705203e-05, "loss": 0.2383, "step": 13286 }, { "epoch": 43.56393442622951, "grad_norm": 3.584474563598633, "learning_rate": 1.2542089978120022e-05, "loss": 0.336, "step": 13287 }, { "epoch": 43.5672131147541, "grad_norm": 4.516984939575195, "learning_rate": 1.254106296052331e-05, "loss": 0.3087, "step": 13288 }, { "epoch": 43.570491803278685, "grad_norm": 3.5675132274627686, "learning_rate": 1.2540035914273468e-05, "loss": 0.3995, "step": 13289 }, { "epoch": 43.57377049180328, "grad_norm": 3.291653871536255, "learning_rate": 1.2539008839382083e-05, "loss": 0.3171, "step": 13290 }, { "epoch": 43.57704918032787, "grad_norm": 4.5429301261901855, "learning_rate": 1.2537981735860729e-05, "loss": 0.3342, "step": 13291 }, { "epoch": 43.58032786885246, "grad_norm": 3.6494498252868652, "learning_rate": 1.2536954603720995e-05, "loss": 0.382, "step": 13292 }, { "epoch": 43.58360655737705, "grad_norm": 3.486381769180298, "learning_rate": 1.253592744297446e-05, "loss": 0.26, "step": 13293 }, { "epoch": 43.58688524590164, "grad_norm": 3.7910385131835938, "learning_rate": 1.2534900253632707e-05, "loss": 0.4483, "step": 13294 }, { "epoch": 43.59016393442623, "grad_norm": 3.9382786750793457, "learning_rate": 1.2533873035707317e-05, "loss": 0.3626, "step": 13295 }, { "epoch": 43.59344262295082, "grad_norm": 3.735729455947876, "learning_rate": 1.2532845789209875e-05, "loss": 0.2331, "step": 13296 }, { "epoch": 43.59672131147541, "grad_norm": 4.143410682678223, "learning_rate": 1.2531818514151964e-05, "loss": 0.2889, "step": 13297 }, { "epoch": 43.6, "grad_norm": 4.197372913360596, "learning_rate": 1.2530791210545163e-05, "loss": 0.2452, "step": 13298 }, { "epoch": 43.60327868852459, "grad_norm": 4.269754409790039, "learning_rate": 1.2529763878401063e-05, "loss": 0.3461, "step": 13299 }, { "epoch": 43.60655737704918, "grad_norm": 3.8204522132873535, "learning_rate": 1.2528736517731243e-05, "loss": 0.4098, "step": 13300 }, { "epoch": 43.609836065573774, "grad_norm": 4.3346147537231445, "learning_rate": 1.2527709128547292e-05, "loss": 0.4693, "step": 13301 }, { "epoch": 43.61311475409836, "grad_norm": 3.666055917739868, "learning_rate": 1.2526681710860791e-05, "loss": 0.3629, "step": 13302 }, { "epoch": 43.61639344262295, "grad_norm": 3.89670467376709, "learning_rate": 1.2525654264683327e-05, "loss": 0.2933, "step": 13303 }, { "epoch": 43.61967213114754, "grad_norm": 3.9596307277679443, "learning_rate": 1.2524626790026484e-05, "loss": 0.3774, "step": 13304 }, { "epoch": 43.622950819672134, "grad_norm": 4.283357620239258, "learning_rate": 1.252359928690185e-05, "loss": 0.3674, "step": 13305 }, { "epoch": 43.62622950819672, "grad_norm": 3.0022735595703125, "learning_rate": 1.252257175532101e-05, "loss": 0.3744, "step": 13306 }, { "epoch": 43.62950819672131, "grad_norm": 3.516813278198242, "learning_rate": 1.2521544195295552e-05, "loss": 0.4299, "step": 13307 }, { "epoch": 43.6327868852459, "grad_norm": 3.4194176197052, "learning_rate": 1.2520516606837058e-05, "loss": 0.365, "step": 13308 }, { "epoch": 43.636065573770495, "grad_norm": 4.10664176940918, "learning_rate": 1.2519488989957123e-05, "loss": 0.4226, "step": 13309 }, { "epoch": 43.63934426229508, "grad_norm": 5.008970737457275, "learning_rate": 1.2518461344667327e-05, "loss": 0.3429, "step": 13310 }, { "epoch": 43.64262295081967, "grad_norm": 3.597412347793579, "learning_rate": 1.251743367097926e-05, "loss": 0.2012, "step": 13311 }, { "epoch": 43.64590163934426, "grad_norm": 3.8862006664276123, "learning_rate": 1.2516405968904515e-05, "loss": 0.2059, "step": 13312 }, { "epoch": 43.649180327868855, "grad_norm": 3.9447739124298096, "learning_rate": 1.2515378238454673e-05, "loss": 0.2638, "step": 13313 }, { "epoch": 43.65245901639344, "grad_norm": 3.571579694747925, "learning_rate": 1.2514350479641326e-05, "loss": 0.18, "step": 13314 }, { "epoch": 43.65573770491803, "grad_norm": 3.4818615913391113, "learning_rate": 1.2513322692476063e-05, "loss": 0.3505, "step": 13315 }, { "epoch": 43.65901639344262, "grad_norm": 4.4806108474731445, "learning_rate": 1.2512294876970474e-05, "loss": 0.5728, "step": 13316 }, { "epoch": 43.662295081967216, "grad_norm": 4.200630187988281, "learning_rate": 1.2511267033136147e-05, "loss": 0.37, "step": 13317 }, { "epoch": 43.665573770491804, "grad_norm": 4.446040630340576, "learning_rate": 1.2510239160984676e-05, "loss": 0.3354, "step": 13318 }, { "epoch": 43.66885245901639, "grad_norm": 4.004273891448975, "learning_rate": 1.2509211260527647e-05, "loss": 0.4283, "step": 13319 }, { "epoch": 43.67213114754098, "grad_norm": 3.431509494781494, "learning_rate": 1.2508183331776651e-05, "loss": 0.268, "step": 13320 }, { "epoch": 43.675409836065576, "grad_norm": 3.535379409790039, "learning_rate": 1.2507155374743281e-05, "loss": 0.2203, "step": 13321 }, { "epoch": 43.678688524590164, "grad_norm": 3.653167724609375, "learning_rate": 1.2506127389439126e-05, "loss": 0.4607, "step": 13322 }, { "epoch": 43.68196721311475, "grad_norm": 3.8225483894348145, "learning_rate": 1.2505099375875782e-05, "loss": 0.3795, "step": 13323 }, { "epoch": 43.68524590163934, "grad_norm": 4.092508316040039, "learning_rate": 1.2504071334064836e-05, "loss": 0.3794, "step": 13324 }, { "epoch": 43.68852459016394, "grad_norm": 3.791522264480591, "learning_rate": 1.2503043264017882e-05, "loss": 0.3251, "step": 13325 }, { "epoch": 43.691803278688525, "grad_norm": 4.16071081161499, "learning_rate": 1.2502015165746512e-05, "loss": 0.2723, "step": 13326 }, { "epoch": 43.69508196721311, "grad_norm": 4.175308704376221, "learning_rate": 1.2500987039262322e-05, "loss": 0.2742, "step": 13327 }, { "epoch": 43.6983606557377, "grad_norm": 3.522768259048462, "learning_rate": 1.2499958884576902e-05, "loss": 0.3826, "step": 13328 }, { "epoch": 43.7016393442623, "grad_norm": 3.392014265060425, "learning_rate": 1.2498930701701845e-05, "loss": 0.5, "step": 13329 }, { "epoch": 43.704918032786885, "grad_norm": 3.3336591720581055, "learning_rate": 1.2497902490648746e-05, "loss": 0.3499, "step": 13330 }, { "epoch": 43.708196721311474, "grad_norm": 3.4788553714752197, "learning_rate": 1.24968742514292e-05, "loss": 0.5523, "step": 13331 }, { "epoch": 43.71147540983607, "grad_norm": 3.636225938796997, "learning_rate": 1.2495845984054804e-05, "loss": 0.3054, "step": 13332 }, { "epoch": 43.71475409836066, "grad_norm": 3.868576765060425, "learning_rate": 1.2494817688537144e-05, "loss": 0.2671, "step": 13333 }, { "epoch": 43.718032786885246, "grad_norm": 3.8379292488098145, "learning_rate": 1.2493789364887825e-05, "loss": 0.3641, "step": 13334 }, { "epoch": 43.721311475409834, "grad_norm": 3.8846354484558105, "learning_rate": 1.2492761013118435e-05, "loss": 0.326, "step": 13335 }, { "epoch": 43.72459016393443, "grad_norm": 3.312441825866699, "learning_rate": 1.2491732633240575e-05, "loss": 0.2945, "step": 13336 }, { "epoch": 43.72786885245902, "grad_norm": 3.476123809814453, "learning_rate": 1.2490704225265835e-05, "loss": 0.3566, "step": 13337 }, { "epoch": 43.731147540983606, "grad_norm": 4.035243034362793, "learning_rate": 1.248967578920582e-05, "loss": 0.597, "step": 13338 }, { "epoch": 43.734426229508195, "grad_norm": 4.215327262878418, "learning_rate": 1.2488647325072117e-05, "loss": 0.3487, "step": 13339 }, { "epoch": 43.73770491803279, "grad_norm": 3.99157977104187, "learning_rate": 1.2487618832876331e-05, "loss": 0.3474, "step": 13340 }, { "epoch": 43.74098360655738, "grad_norm": 4.184606075286865, "learning_rate": 1.2486590312630057e-05, "loss": 0.1998, "step": 13341 }, { "epoch": 43.74426229508197, "grad_norm": 3.8417325019836426, "learning_rate": 1.2485561764344889e-05, "loss": 0.3075, "step": 13342 }, { "epoch": 43.747540983606555, "grad_norm": 3.4995741844177246, "learning_rate": 1.2484533188032428e-05, "loss": 0.4413, "step": 13343 }, { "epoch": 43.75081967213115, "grad_norm": 3.4853105545043945, "learning_rate": 1.2483504583704276e-05, "loss": 0.2176, "step": 13344 }, { "epoch": 43.75409836065574, "grad_norm": 3.5620601177215576, "learning_rate": 1.2482475951372024e-05, "loss": 0.4807, "step": 13345 }, { "epoch": 43.75737704918033, "grad_norm": 3.7280702590942383, "learning_rate": 1.2481447291047272e-05, "loss": 0.3394, "step": 13346 }, { "epoch": 43.760655737704916, "grad_norm": 5.049814701080322, "learning_rate": 1.2480418602741626e-05, "loss": 0.3024, "step": 13347 }, { "epoch": 43.76393442622951, "grad_norm": 3.0736021995544434, "learning_rate": 1.2479389886466679e-05, "loss": 0.4005, "step": 13348 }, { "epoch": 43.7672131147541, "grad_norm": 4.264799118041992, "learning_rate": 1.2478361142234037e-05, "loss": 0.4342, "step": 13349 }, { "epoch": 43.77049180327869, "grad_norm": 3.2289016246795654, "learning_rate": 1.2477332370055292e-05, "loss": 0.1926, "step": 13350 }, { "epoch": 43.773770491803276, "grad_norm": 3.621093273162842, "learning_rate": 1.2476303569942052e-05, "loss": 0.3321, "step": 13351 }, { "epoch": 43.77704918032787, "grad_norm": 3.1365609169006348, "learning_rate": 1.2475274741905912e-05, "loss": 0.2521, "step": 13352 }, { "epoch": 43.78032786885246, "grad_norm": 3.842578887939453, "learning_rate": 1.2474245885958478e-05, "loss": 0.4466, "step": 13353 }, { "epoch": 43.78360655737705, "grad_norm": 3.7519984245300293, "learning_rate": 1.2473217002111346e-05, "loss": 0.4974, "step": 13354 }, { "epoch": 43.78688524590164, "grad_norm": 4.0145416259765625, "learning_rate": 1.2472188090376123e-05, "loss": 0.4097, "step": 13355 }, { "epoch": 43.79016393442623, "grad_norm": 2.9683990478515625, "learning_rate": 1.2471159150764409e-05, "loss": 0.2297, "step": 13356 }, { "epoch": 43.79344262295082, "grad_norm": 3.522104024887085, "learning_rate": 1.2470130183287806e-05, "loss": 0.4417, "step": 13357 }, { "epoch": 43.79672131147541, "grad_norm": 3.938495635986328, "learning_rate": 1.2469101187957917e-05, "loss": 0.5583, "step": 13358 }, { "epoch": 43.8, "grad_norm": 3.924777030944824, "learning_rate": 1.2468072164786342e-05, "loss": 0.2637, "step": 13359 }, { "epoch": 43.80327868852459, "grad_norm": 3.789708375930786, "learning_rate": 1.246704311378469e-05, "loss": 0.4991, "step": 13360 }, { "epoch": 43.80655737704918, "grad_norm": 3.825174570083618, "learning_rate": 1.2466014034964562e-05, "loss": 0.5486, "step": 13361 }, { "epoch": 43.80983606557377, "grad_norm": 3.7772881984710693, "learning_rate": 1.2464984928337563e-05, "loss": 0.4114, "step": 13362 }, { "epoch": 43.81311475409836, "grad_norm": 4.434324264526367, "learning_rate": 1.2463955793915292e-05, "loss": 0.3307, "step": 13363 }, { "epoch": 43.81639344262295, "grad_norm": 3.2914907932281494, "learning_rate": 1.246292663170936e-05, "loss": 0.4702, "step": 13364 }, { "epoch": 43.81967213114754, "grad_norm": 5.897953987121582, "learning_rate": 1.246189744173137e-05, "loss": 0.4744, "step": 13365 }, { "epoch": 43.82295081967213, "grad_norm": 3.6247236728668213, "learning_rate": 1.2460868223992925e-05, "loss": 0.3749, "step": 13366 }, { "epoch": 43.82622950819672, "grad_norm": 3.097177028656006, "learning_rate": 1.2459838978505632e-05, "loss": 0.2897, "step": 13367 }, { "epoch": 43.829508196721314, "grad_norm": 3.189012050628662, "learning_rate": 1.2458809705281099e-05, "loss": 0.2693, "step": 13368 }, { "epoch": 43.8327868852459, "grad_norm": 4.659281253814697, "learning_rate": 1.2457780404330928e-05, "loss": 0.4257, "step": 13369 }, { "epoch": 43.83606557377049, "grad_norm": 4.158472537994385, "learning_rate": 1.2456751075666729e-05, "loss": 0.2758, "step": 13370 }, { "epoch": 43.83934426229508, "grad_norm": 4.327736854553223, "learning_rate": 1.2455721719300105e-05, "loss": 0.4381, "step": 13371 }, { "epoch": 43.842622950819674, "grad_norm": 4.132397651672363, "learning_rate": 1.2454692335242668e-05, "loss": 0.388, "step": 13372 }, { "epoch": 43.84590163934426, "grad_norm": 4.443509101867676, "learning_rate": 1.2453662923506021e-05, "loss": 0.2164, "step": 13373 }, { "epoch": 43.84918032786885, "grad_norm": 4.76340913772583, "learning_rate": 1.2452633484101773e-05, "loss": 0.446, "step": 13374 }, { "epoch": 43.85245901639344, "grad_norm": 4.519674301147461, "learning_rate": 1.2451604017041534e-05, "loss": 0.4188, "step": 13375 }, { "epoch": 43.855737704918035, "grad_norm": 4.716965675354004, "learning_rate": 1.2450574522336909e-05, "loss": 0.4051, "step": 13376 }, { "epoch": 43.85901639344262, "grad_norm": 3.5503714084625244, "learning_rate": 1.244954499999951e-05, "loss": 0.3358, "step": 13377 }, { "epoch": 43.86229508196721, "grad_norm": 3.7942054271698, "learning_rate": 1.2448515450040942e-05, "loss": 0.3522, "step": 13378 }, { "epoch": 43.86557377049181, "grad_norm": 3.47589373588562, "learning_rate": 1.2447485872472819e-05, "loss": 0.3118, "step": 13379 }, { "epoch": 43.868852459016395, "grad_norm": 3.699320077896118, "learning_rate": 1.2446456267306745e-05, "loss": 0.2619, "step": 13380 }, { "epoch": 43.87213114754098, "grad_norm": 4.5538530349731445, "learning_rate": 1.2445426634554337e-05, "loss": 0.4138, "step": 13381 }, { "epoch": 43.87540983606557, "grad_norm": 3.752798557281494, "learning_rate": 1.2444396974227197e-05, "loss": 0.3265, "step": 13382 }, { "epoch": 43.87868852459017, "grad_norm": 4.232438087463379, "learning_rate": 1.2443367286336943e-05, "loss": 0.3785, "step": 13383 }, { "epoch": 43.881967213114756, "grad_norm": 4.688269138336182, "learning_rate": 1.2442337570895183e-05, "loss": 0.5456, "step": 13384 }, { "epoch": 43.885245901639344, "grad_norm": 3.4365344047546387, "learning_rate": 1.2441307827913525e-05, "loss": 0.2006, "step": 13385 }, { "epoch": 43.88852459016393, "grad_norm": 4.004470348358154, "learning_rate": 1.2440278057403584e-05, "loss": 0.4343, "step": 13386 }, { "epoch": 43.89180327868853, "grad_norm": 3.3112990856170654, "learning_rate": 1.2439248259376968e-05, "loss": 0.3981, "step": 13387 }, { "epoch": 43.895081967213116, "grad_norm": 5.005695819854736, "learning_rate": 1.2438218433845295e-05, "loss": 0.4497, "step": 13388 }, { "epoch": 43.898360655737704, "grad_norm": 3.85979962348938, "learning_rate": 1.2437188580820172e-05, "loss": 0.374, "step": 13389 }, { "epoch": 43.90163934426229, "grad_norm": 4.420117378234863, "learning_rate": 1.2436158700313215e-05, "loss": 0.3224, "step": 13390 }, { "epoch": 43.90491803278689, "grad_norm": 3.6845319271087646, "learning_rate": 1.2435128792336032e-05, "loss": 0.3472, "step": 13391 }, { "epoch": 43.90819672131148, "grad_norm": 4.694893836975098, "learning_rate": 1.2434098856900245e-05, "loss": 0.3091, "step": 13392 }, { "epoch": 43.911475409836065, "grad_norm": 3.761979579925537, "learning_rate": 1.2433068894017462e-05, "loss": 0.2443, "step": 13393 }, { "epoch": 43.91475409836065, "grad_norm": 3.225066900253296, "learning_rate": 1.2432038903699295e-05, "loss": 0.3306, "step": 13394 }, { "epoch": 43.91803278688525, "grad_norm": 3.353384017944336, "learning_rate": 1.243100888595736e-05, "loss": 0.2081, "step": 13395 }, { "epoch": 43.92131147540984, "grad_norm": 3.715862989425659, "learning_rate": 1.2429978840803277e-05, "loss": 0.318, "step": 13396 }, { "epoch": 43.924590163934425, "grad_norm": 3.497724771499634, "learning_rate": 1.2428948768248653e-05, "loss": 0.3554, "step": 13397 }, { "epoch": 43.927868852459014, "grad_norm": 3.1472890377044678, "learning_rate": 1.2427918668305105e-05, "loss": 0.2278, "step": 13398 }, { "epoch": 43.93114754098361, "grad_norm": 4.943460941314697, "learning_rate": 1.242688854098425e-05, "loss": 0.456, "step": 13399 }, { "epoch": 43.9344262295082, "grad_norm": 3.861464262008667, "learning_rate": 1.2425858386297704e-05, "loss": 0.5551, "step": 13400 }, { "epoch": 43.937704918032786, "grad_norm": 3.716557025909424, "learning_rate": 1.2424828204257082e-05, "loss": 0.4441, "step": 13401 }, { "epoch": 43.940983606557374, "grad_norm": 3.925837755203247, "learning_rate": 1.2423797994874001e-05, "loss": 0.5287, "step": 13402 }, { "epoch": 43.94426229508197, "grad_norm": 4.2804484367370605, "learning_rate": 1.2422767758160079e-05, "loss": 0.4715, "step": 13403 }, { "epoch": 43.94754098360656, "grad_norm": 4.355434894561768, "learning_rate": 1.2421737494126929e-05, "loss": 0.3901, "step": 13404 }, { "epoch": 43.950819672131146, "grad_norm": 4.138524055480957, "learning_rate": 1.2420707202786173e-05, "loss": 0.3647, "step": 13405 }, { "epoch": 43.954098360655735, "grad_norm": 3.235914945602417, "learning_rate": 1.2419676884149425e-05, "loss": 0.4608, "step": 13406 }, { "epoch": 43.95737704918033, "grad_norm": 4.9024977684021, "learning_rate": 1.2418646538228305e-05, "loss": 0.3673, "step": 13407 }, { "epoch": 43.96065573770492, "grad_norm": 3.973416328430176, "learning_rate": 1.2417616165034429e-05, "loss": 0.2688, "step": 13408 }, { "epoch": 43.96393442622951, "grad_norm": 3.8993301391601562, "learning_rate": 1.241658576457942e-05, "loss": 0.4983, "step": 13409 }, { "epoch": 43.967213114754095, "grad_norm": 3.185584783554077, "learning_rate": 1.2415555336874894e-05, "loss": 0.3695, "step": 13410 }, { "epoch": 43.97049180327869, "grad_norm": 6.641196250915527, "learning_rate": 1.2414524881932467e-05, "loss": 0.5347, "step": 13411 }, { "epoch": 43.97377049180328, "grad_norm": 2.9111602306365967, "learning_rate": 1.2413494399763763e-05, "loss": 0.3092, "step": 13412 }, { "epoch": 43.97704918032787, "grad_norm": 3.3453712463378906, "learning_rate": 1.24124638903804e-05, "loss": 0.4563, "step": 13413 }, { "epoch": 43.980327868852456, "grad_norm": 3.967302083969116, "learning_rate": 1.2411433353793998e-05, "loss": 0.2988, "step": 13414 }, { "epoch": 43.98360655737705, "grad_norm": 4.049261093139648, "learning_rate": 1.2410402790016179e-05, "loss": 0.3135, "step": 13415 }, { "epoch": 43.98688524590164, "grad_norm": 3.5920321941375732, "learning_rate": 1.240937219905856e-05, "loss": 0.4763, "step": 13416 }, { "epoch": 43.99016393442623, "grad_norm": 3.4290802478790283, "learning_rate": 1.2408341580932766e-05, "loss": 0.2168, "step": 13417 }, { "epoch": 43.993442622950816, "grad_norm": 3.8582706451416016, "learning_rate": 1.2407310935650416e-05, "loss": 0.3111, "step": 13418 }, { "epoch": 43.99672131147541, "grad_norm": 3.4978630542755127, "learning_rate": 1.2406280263223132e-05, "loss": 0.3838, "step": 13419 }, { "epoch": 44.0, "grad_norm": 3.412646532058716, "learning_rate": 1.2405249563662539e-05, "loss": 0.2892, "step": 13420 }, { "epoch": 44.00327868852459, "grad_norm": 3.2973482608795166, "learning_rate": 1.2404218836980253e-05, "loss": 0.2683, "step": 13421 }, { "epoch": 44.006557377049184, "grad_norm": 3.5476150512695312, "learning_rate": 1.2403188083187901e-05, "loss": 0.3856, "step": 13422 }, { "epoch": 44.00983606557377, "grad_norm": 4.024277210235596, "learning_rate": 1.2402157302297106e-05, "loss": 0.3855, "step": 13423 }, { "epoch": 44.01311475409836, "grad_norm": 3.198195219039917, "learning_rate": 1.2401126494319487e-05, "loss": 0.1463, "step": 13424 }, { "epoch": 44.01639344262295, "grad_norm": 3.2100841999053955, "learning_rate": 1.2400095659266671e-05, "loss": 0.2804, "step": 13425 }, { "epoch": 44.019672131147544, "grad_norm": 7.447854518890381, "learning_rate": 1.2399064797150282e-05, "loss": 0.2879, "step": 13426 }, { "epoch": 44.02295081967213, "grad_norm": 3.881078004837036, "learning_rate": 1.239803390798194e-05, "loss": 0.3193, "step": 13427 }, { "epoch": 44.02622950819672, "grad_norm": 3.7891604900360107, "learning_rate": 1.2397002991773277e-05, "loss": 0.3244, "step": 13428 }, { "epoch": 44.02950819672131, "grad_norm": 3.471741199493408, "learning_rate": 1.2395972048535909e-05, "loss": 0.2541, "step": 13429 }, { "epoch": 44.032786885245905, "grad_norm": 3.1498849391937256, "learning_rate": 1.2394941078281466e-05, "loss": 0.4091, "step": 13430 }, { "epoch": 44.03606557377049, "grad_norm": 3.7183897495269775, "learning_rate": 1.2393910081021574e-05, "loss": 0.3083, "step": 13431 }, { "epoch": 44.03934426229508, "grad_norm": 3.7587833404541016, "learning_rate": 1.2392879056767855e-05, "loss": 0.2466, "step": 13432 }, { "epoch": 44.04262295081967, "grad_norm": 4.21248197555542, "learning_rate": 1.2391848005531938e-05, "loss": 0.2389, "step": 13433 }, { "epoch": 44.045901639344265, "grad_norm": 5.440874099731445, "learning_rate": 1.2390816927325449e-05, "loss": 0.4709, "step": 13434 }, { "epoch": 44.049180327868854, "grad_norm": 3.4730186462402344, "learning_rate": 1.2389785822160011e-05, "loss": 0.4283, "step": 13435 }, { "epoch": 44.05245901639344, "grad_norm": 3.504347562789917, "learning_rate": 1.2388754690047256e-05, "loss": 0.3492, "step": 13436 }, { "epoch": 44.05573770491803, "grad_norm": 3.381333112716675, "learning_rate": 1.2387723530998805e-05, "loss": 0.3125, "step": 13437 }, { "epoch": 44.059016393442626, "grad_norm": 4.816098690032959, "learning_rate": 1.238669234502629e-05, "loss": 0.2914, "step": 13438 }, { "epoch": 44.062295081967214, "grad_norm": 3.934370756149292, "learning_rate": 1.2385661132141335e-05, "loss": 0.2227, "step": 13439 }, { "epoch": 44.0655737704918, "grad_norm": 3.187607765197754, "learning_rate": 1.2384629892355574e-05, "loss": 0.217, "step": 13440 }, { "epoch": 44.06885245901639, "grad_norm": 4.336148262023926, "learning_rate": 1.2383598625680628e-05, "loss": 0.5494, "step": 13441 }, { "epoch": 44.072131147540986, "grad_norm": 3.1339328289031982, "learning_rate": 1.2382567332128133e-05, "loss": 0.1799, "step": 13442 }, { "epoch": 44.075409836065575, "grad_norm": 3.0967628955841064, "learning_rate": 1.2381536011709709e-05, "loss": 0.2795, "step": 13443 }, { "epoch": 44.07868852459016, "grad_norm": 3.506817102432251, "learning_rate": 1.2380504664436996e-05, "loss": 0.2441, "step": 13444 }, { "epoch": 44.08196721311475, "grad_norm": 3.554283857345581, "learning_rate": 1.2379473290321613e-05, "loss": 0.4924, "step": 13445 }, { "epoch": 44.08524590163935, "grad_norm": 3.5498709678649902, "learning_rate": 1.23784418893752e-05, "loss": 0.3364, "step": 13446 }, { "epoch": 44.088524590163935, "grad_norm": 3.9509990215301514, "learning_rate": 1.2377410461609377e-05, "loss": 0.2973, "step": 13447 }, { "epoch": 44.09180327868852, "grad_norm": 3.5097813606262207, "learning_rate": 1.2376379007035779e-05, "loss": 0.3107, "step": 13448 }, { "epoch": 44.09508196721311, "grad_norm": 3.9630587100982666, "learning_rate": 1.237534752566604e-05, "loss": 0.3634, "step": 13449 }, { "epoch": 44.09836065573771, "grad_norm": 4.519533634185791, "learning_rate": 1.2374316017511784e-05, "loss": 0.4596, "step": 13450 }, { "epoch": 44.101639344262296, "grad_norm": 3.155550956726074, "learning_rate": 1.2373284482584652e-05, "loss": 0.2959, "step": 13451 }, { "epoch": 44.104918032786884, "grad_norm": 4.068918228149414, "learning_rate": 1.2372252920896264e-05, "loss": 0.1918, "step": 13452 }, { "epoch": 44.10819672131147, "grad_norm": 3.5533759593963623, "learning_rate": 1.2371221332458258e-05, "loss": 0.2124, "step": 13453 }, { "epoch": 44.11147540983607, "grad_norm": 4.357263565063477, "learning_rate": 1.237018971728227e-05, "loss": 0.4419, "step": 13454 }, { "epoch": 44.114754098360656, "grad_norm": 3.665391445159912, "learning_rate": 1.2369158075379925e-05, "loss": 0.2428, "step": 13455 }, { "epoch": 44.118032786885244, "grad_norm": 3.354931354522705, "learning_rate": 1.2368126406762862e-05, "loss": 0.3934, "step": 13456 }, { "epoch": 44.12131147540983, "grad_norm": 4.573808193206787, "learning_rate": 1.236709471144271e-05, "loss": 0.3955, "step": 13457 }, { "epoch": 44.12459016393443, "grad_norm": 3.273118495941162, "learning_rate": 1.2366062989431105e-05, "loss": 0.2944, "step": 13458 }, { "epoch": 44.12786885245902, "grad_norm": 4.007266521453857, "learning_rate": 1.236503124073968e-05, "loss": 0.3238, "step": 13459 }, { "epoch": 44.131147540983605, "grad_norm": 4.336513042449951, "learning_rate": 1.236399946538007e-05, "loss": 0.2415, "step": 13460 }, { "epoch": 44.13442622950819, "grad_norm": 3.7231242656707764, "learning_rate": 1.2362967663363905e-05, "loss": 0.3996, "step": 13461 }, { "epoch": 44.13770491803279, "grad_norm": 4.356267929077148, "learning_rate": 1.2361935834702826e-05, "loss": 0.335, "step": 13462 }, { "epoch": 44.14098360655738, "grad_norm": 3.900848388671875, "learning_rate": 1.2360903979408461e-05, "loss": 0.5231, "step": 13463 }, { "epoch": 44.144262295081965, "grad_norm": 3.3136513233184814, "learning_rate": 1.235987209749245e-05, "loss": 0.1795, "step": 13464 }, { "epoch": 44.14754098360656, "grad_norm": 3.7112646102905273, "learning_rate": 1.2358840188966427e-05, "loss": 0.3662, "step": 13465 }, { "epoch": 44.15081967213115, "grad_norm": 3.588071823120117, "learning_rate": 1.2357808253842031e-05, "loss": 0.33, "step": 13466 }, { "epoch": 44.15409836065574, "grad_norm": 3.4108810424804688, "learning_rate": 1.2356776292130892e-05, "loss": 0.1952, "step": 13467 }, { "epoch": 44.157377049180326, "grad_norm": 3.8296279907226562, "learning_rate": 1.2355744303844652e-05, "loss": 0.2285, "step": 13468 }, { "epoch": 44.16065573770492, "grad_norm": 3.878258466720581, "learning_rate": 1.2354712288994946e-05, "loss": 0.2643, "step": 13469 }, { "epoch": 44.16393442622951, "grad_norm": 3.5603485107421875, "learning_rate": 1.2353680247593411e-05, "loss": 0.1943, "step": 13470 }, { "epoch": 44.1672131147541, "grad_norm": 3.945617914199829, "learning_rate": 1.235264817965168e-05, "loss": 0.3662, "step": 13471 }, { "epoch": 44.170491803278686, "grad_norm": 5.28598690032959, "learning_rate": 1.23516160851814e-05, "loss": 0.3297, "step": 13472 }, { "epoch": 44.17377049180328, "grad_norm": 4.172948837280273, "learning_rate": 1.2350583964194202e-05, "loss": 0.1745, "step": 13473 }, { "epoch": 44.17704918032787, "grad_norm": 3.4491560459136963, "learning_rate": 1.2349551816701724e-05, "loss": 0.6317, "step": 13474 }, { "epoch": 44.18032786885246, "grad_norm": 3.507674217224121, "learning_rate": 1.2348519642715608e-05, "loss": 0.3078, "step": 13475 }, { "epoch": 44.18360655737705, "grad_norm": 4.40958309173584, "learning_rate": 1.234748744224749e-05, "loss": 0.3778, "step": 13476 }, { "epoch": 44.18688524590164, "grad_norm": 3.9184722900390625, "learning_rate": 1.234645521530901e-05, "loss": 0.473, "step": 13477 }, { "epoch": 44.19016393442623, "grad_norm": 4.2563300132751465, "learning_rate": 1.2345422961911808e-05, "loss": 0.3088, "step": 13478 }, { "epoch": 44.19344262295082, "grad_norm": 4.0145368576049805, "learning_rate": 1.2344390682067524e-05, "loss": 0.3966, "step": 13479 }, { "epoch": 44.19672131147541, "grad_norm": 3.7456729412078857, "learning_rate": 1.2343358375787798e-05, "loss": 0.2588, "step": 13480 }, { "epoch": 44.2, "grad_norm": 4.300199508666992, "learning_rate": 1.2342326043084268e-05, "loss": 0.425, "step": 13481 }, { "epoch": 44.20327868852459, "grad_norm": 3.688549280166626, "learning_rate": 1.2341293683968579e-05, "loss": 0.3181, "step": 13482 }, { "epoch": 44.20655737704918, "grad_norm": 3.788891077041626, "learning_rate": 1.2340261298452365e-05, "loss": 0.291, "step": 13483 }, { "epoch": 44.20983606557377, "grad_norm": 4.234010696411133, "learning_rate": 1.2339228886547273e-05, "loss": 0.5058, "step": 13484 }, { "epoch": 44.21311475409836, "grad_norm": 3.3896729946136475, "learning_rate": 1.2338196448264947e-05, "loss": 0.3586, "step": 13485 }, { "epoch": 44.21639344262295, "grad_norm": 4.1158576011657715, "learning_rate": 1.2337163983617025e-05, "loss": 0.4276, "step": 13486 }, { "epoch": 44.21967213114754, "grad_norm": 3.0715177059173584, "learning_rate": 1.2336131492615145e-05, "loss": 0.3129, "step": 13487 }, { "epoch": 44.22295081967213, "grad_norm": 3.143629550933838, "learning_rate": 1.2335098975270957e-05, "loss": 0.3319, "step": 13488 }, { "epoch": 44.226229508196724, "grad_norm": 3.8389222621917725, "learning_rate": 1.2334066431596097e-05, "loss": 0.252, "step": 13489 }, { "epoch": 44.22950819672131, "grad_norm": 4.814233779907227, "learning_rate": 1.2333033861602213e-05, "loss": 0.4291, "step": 13490 }, { "epoch": 44.2327868852459, "grad_norm": 3.594179153442383, "learning_rate": 1.2332001265300945e-05, "loss": 0.3293, "step": 13491 }, { "epoch": 44.23606557377049, "grad_norm": 4.115472793579102, "learning_rate": 1.233096864270394e-05, "loss": 0.359, "step": 13492 }, { "epoch": 44.239344262295084, "grad_norm": 4.079064846038818, "learning_rate": 1.232993599382284e-05, "loss": 0.4005, "step": 13493 }, { "epoch": 44.24262295081967, "grad_norm": 4.477382183074951, "learning_rate": 1.2328903318669287e-05, "loss": 0.3077, "step": 13494 }, { "epoch": 44.24590163934426, "grad_norm": 3.6457719802856445, "learning_rate": 1.2327870617254929e-05, "loss": 0.155, "step": 13495 }, { "epoch": 44.24918032786885, "grad_norm": 3.9757823944091797, "learning_rate": 1.232683788959141e-05, "loss": 0.2183, "step": 13496 }, { "epoch": 44.252459016393445, "grad_norm": 4.083536624908447, "learning_rate": 1.2325805135690372e-05, "loss": 0.3223, "step": 13497 }, { "epoch": 44.25573770491803, "grad_norm": 4.595238208770752, "learning_rate": 1.2324772355563467e-05, "loss": 0.2622, "step": 13498 }, { "epoch": 44.25901639344262, "grad_norm": 4.520643711090088, "learning_rate": 1.2323739549222333e-05, "loss": 0.4946, "step": 13499 }, { "epoch": 44.26229508196721, "grad_norm": 4.033722877502441, "learning_rate": 1.232270671667862e-05, "loss": 0.5149, "step": 13500 }, { "epoch": 44.265573770491805, "grad_norm": 4.240159511566162, "learning_rate": 1.2321673857943977e-05, "loss": 0.3927, "step": 13501 }, { "epoch": 44.268852459016394, "grad_norm": 3.641681671142578, "learning_rate": 1.2320640973030043e-05, "loss": 0.3243, "step": 13502 }, { "epoch": 44.27213114754098, "grad_norm": 4.318421363830566, "learning_rate": 1.2319608061948471e-05, "loss": 0.4843, "step": 13503 }, { "epoch": 44.27540983606557, "grad_norm": 4.204751968383789, "learning_rate": 1.2318575124710905e-05, "loss": 0.3924, "step": 13504 }, { "epoch": 44.278688524590166, "grad_norm": 3.6102559566497803, "learning_rate": 1.2317542161328997e-05, "loss": 0.1735, "step": 13505 }, { "epoch": 44.281967213114754, "grad_norm": 3.134617805480957, "learning_rate": 1.2316509171814388e-05, "loss": 0.3352, "step": 13506 }, { "epoch": 44.28524590163934, "grad_norm": 3.7029311656951904, "learning_rate": 1.231547615617873e-05, "loss": 0.4476, "step": 13507 }, { "epoch": 44.28852459016394, "grad_norm": 3.6706790924072266, "learning_rate": 1.2314443114433671e-05, "loss": 0.3339, "step": 13508 }, { "epoch": 44.291803278688526, "grad_norm": 6.30447244644165, "learning_rate": 1.2313410046590861e-05, "loss": 0.4426, "step": 13509 }, { "epoch": 44.295081967213115, "grad_norm": 4.727381229400635, "learning_rate": 1.2312376952661946e-05, "loss": 0.3479, "step": 13510 }, { "epoch": 44.2983606557377, "grad_norm": 3.8470618724823, "learning_rate": 1.2311343832658577e-05, "loss": 0.4403, "step": 13511 }, { "epoch": 44.3016393442623, "grad_norm": 3.919487476348877, "learning_rate": 1.2310310686592404e-05, "loss": 0.4413, "step": 13512 }, { "epoch": 44.30491803278689, "grad_norm": 3.976745128631592, "learning_rate": 1.2309277514475076e-05, "loss": 0.2675, "step": 13513 }, { "epoch": 44.308196721311475, "grad_norm": 4.42888069152832, "learning_rate": 1.2308244316318243e-05, "loss": 0.4236, "step": 13514 }, { "epoch": 44.31147540983606, "grad_norm": 3.6385867595672607, "learning_rate": 1.2307211092133552e-05, "loss": 0.4159, "step": 13515 }, { "epoch": 44.31475409836066, "grad_norm": 3.4895522594451904, "learning_rate": 1.230617784193266e-05, "loss": 0.2137, "step": 13516 }, { "epoch": 44.31803278688525, "grad_norm": 3.2491934299468994, "learning_rate": 1.2305144565727214e-05, "loss": 0.4531, "step": 13517 }, { "epoch": 44.321311475409836, "grad_norm": 3.9145185947418213, "learning_rate": 1.2304111263528868e-05, "loss": 0.5171, "step": 13518 }, { "epoch": 44.324590163934424, "grad_norm": 3.840139389038086, "learning_rate": 1.230307793534927e-05, "loss": 0.205, "step": 13519 }, { "epoch": 44.32786885245902, "grad_norm": 3.518173933029175, "learning_rate": 1.2302044581200075e-05, "loss": 0.399, "step": 13520 }, { "epoch": 44.33114754098361, "grad_norm": 3.0742454528808594, "learning_rate": 1.2301011201092931e-05, "loss": 0.51, "step": 13521 }, { "epoch": 44.334426229508196, "grad_norm": 4.101376056671143, "learning_rate": 1.2299977795039498e-05, "loss": 0.4623, "step": 13522 }, { "epoch": 44.337704918032784, "grad_norm": 5.216400623321533, "learning_rate": 1.229894436305142e-05, "loss": 0.2736, "step": 13523 }, { "epoch": 44.34098360655738, "grad_norm": 3.3859617710113525, "learning_rate": 1.2297910905140356e-05, "loss": 0.345, "step": 13524 }, { "epoch": 44.34426229508197, "grad_norm": 4.148850440979004, "learning_rate": 1.2296877421317958e-05, "loss": 0.3637, "step": 13525 }, { "epoch": 44.34754098360656, "grad_norm": 4.097609996795654, "learning_rate": 1.2295843911595876e-05, "loss": 0.2912, "step": 13526 }, { "epoch": 44.350819672131145, "grad_norm": 2.8435535430908203, "learning_rate": 1.229481037598577e-05, "loss": 0.1295, "step": 13527 }, { "epoch": 44.35409836065574, "grad_norm": 4.624363899230957, "learning_rate": 1.229377681449929e-05, "loss": 0.2072, "step": 13528 }, { "epoch": 44.35737704918033, "grad_norm": 3.4592981338500977, "learning_rate": 1.2292743227148091e-05, "loss": 0.2599, "step": 13529 }, { "epoch": 44.36065573770492, "grad_norm": 3.702151298522949, "learning_rate": 1.2291709613943828e-05, "loss": 0.2582, "step": 13530 }, { "epoch": 44.363934426229505, "grad_norm": 4.087280750274658, "learning_rate": 1.2290675974898157e-05, "loss": 0.3506, "step": 13531 }, { "epoch": 44.3672131147541, "grad_norm": 3.3735039234161377, "learning_rate": 1.228964231002273e-05, "loss": 0.3516, "step": 13532 }, { "epoch": 44.37049180327869, "grad_norm": 4.340680122375488, "learning_rate": 1.228860861932921e-05, "loss": 0.3569, "step": 13533 }, { "epoch": 44.37377049180328, "grad_norm": 3.8222060203552246, "learning_rate": 1.2287574902829245e-05, "loss": 0.3336, "step": 13534 }, { "epoch": 44.377049180327866, "grad_norm": 4.259131908416748, "learning_rate": 1.2286541160534492e-05, "loss": 0.3306, "step": 13535 }, { "epoch": 44.38032786885246, "grad_norm": 3.138212203979492, "learning_rate": 1.2285507392456614e-05, "loss": 0.382, "step": 13536 }, { "epoch": 44.38360655737705, "grad_norm": 4.060981750488281, "learning_rate": 1.2284473598607263e-05, "loss": 0.2918, "step": 13537 }, { "epoch": 44.38688524590164, "grad_norm": 3.4109678268432617, "learning_rate": 1.2283439778998099e-05, "loss": 0.1343, "step": 13538 }, { "epoch": 44.390163934426226, "grad_norm": 3.256566047668457, "learning_rate": 1.2282405933640773e-05, "loss": 0.1834, "step": 13539 }, { "epoch": 44.39344262295082, "grad_norm": 4.004889965057373, "learning_rate": 1.228137206254695e-05, "loss": 0.2889, "step": 13540 }, { "epoch": 44.39672131147541, "grad_norm": 4.0404791831970215, "learning_rate": 1.2280338165728285e-05, "loss": 0.352, "step": 13541 }, { "epoch": 44.4, "grad_norm": 3.2636005878448486, "learning_rate": 1.2279304243196438e-05, "loss": 0.2521, "step": 13542 }, { "epoch": 44.40327868852459, "grad_norm": 3.8538944721221924, "learning_rate": 1.227827029496306e-05, "loss": 0.382, "step": 13543 }, { "epoch": 44.40655737704918, "grad_norm": 3.8041956424713135, "learning_rate": 1.227723632103982e-05, "loss": 0.2323, "step": 13544 }, { "epoch": 44.40983606557377, "grad_norm": 3.9822115898132324, "learning_rate": 1.2276202321438371e-05, "loss": 0.4878, "step": 13545 }, { "epoch": 44.41311475409836, "grad_norm": 6.0390214920043945, "learning_rate": 1.2275168296170377e-05, "loss": 0.2597, "step": 13546 }, { "epoch": 44.41639344262295, "grad_norm": 3.685539960861206, "learning_rate": 1.2274134245247492e-05, "loss": 0.4247, "step": 13547 }, { "epoch": 44.41967213114754, "grad_norm": 3.867821455001831, "learning_rate": 1.2273100168681378e-05, "loss": 0.3148, "step": 13548 }, { "epoch": 44.42295081967213, "grad_norm": 4.7427449226379395, "learning_rate": 1.2272066066483697e-05, "loss": 0.2638, "step": 13549 }, { "epoch": 44.42622950819672, "grad_norm": 4.159964084625244, "learning_rate": 1.2271031938666111e-05, "loss": 0.4961, "step": 13550 }, { "epoch": 44.429508196721315, "grad_norm": 2.742579460144043, "learning_rate": 1.2269997785240277e-05, "loss": 0.3538, "step": 13551 }, { "epoch": 44.4327868852459, "grad_norm": 3.6897125244140625, "learning_rate": 1.2268963606217858e-05, "loss": 0.2561, "step": 13552 }, { "epoch": 44.43606557377049, "grad_norm": 4.131546974182129, "learning_rate": 1.2267929401610517e-05, "loss": 0.2671, "step": 13553 }, { "epoch": 44.43934426229508, "grad_norm": 4.537215709686279, "learning_rate": 1.2266895171429911e-05, "loss": 0.5723, "step": 13554 }, { "epoch": 44.442622950819676, "grad_norm": 4.663138389587402, "learning_rate": 1.2265860915687706e-05, "loss": 0.197, "step": 13555 }, { "epoch": 44.445901639344264, "grad_norm": 3.7630717754364014, "learning_rate": 1.226482663439556e-05, "loss": 0.341, "step": 13556 }, { "epoch": 44.44918032786885, "grad_norm": 4.648554801940918, "learning_rate": 1.2263792327565145e-05, "loss": 0.52, "step": 13557 }, { "epoch": 44.45245901639344, "grad_norm": 4.664074897766113, "learning_rate": 1.2262757995208115e-05, "loss": 0.353, "step": 13558 }, { "epoch": 44.455737704918036, "grad_norm": 3.3088390827178955, "learning_rate": 1.2261723637336136e-05, "loss": 0.3459, "step": 13559 }, { "epoch": 44.459016393442624, "grad_norm": 4.508343696594238, "learning_rate": 1.2260689253960872e-05, "loss": 0.3014, "step": 13560 }, { "epoch": 44.46229508196721, "grad_norm": 3.8242290019989014, "learning_rate": 1.2259654845093985e-05, "loss": 0.3808, "step": 13561 }, { "epoch": 44.4655737704918, "grad_norm": 3.7504489421844482, "learning_rate": 1.225862041074714e-05, "loss": 0.2889, "step": 13562 }, { "epoch": 44.4688524590164, "grad_norm": 3.637132167816162, "learning_rate": 1.2257585950932e-05, "loss": 0.3199, "step": 13563 }, { "epoch": 44.472131147540985, "grad_norm": 3.6856822967529297, "learning_rate": 1.2256551465660234e-05, "loss": 0.2976, "step": 13564 }, { "epoch": 44.47540983606557, "grad_norm": 4.406907081604004, "learning_rate": 1.2255516954943503e-05, "loss": 0.3872, "step": 13565 }, { "epoch": 44.47868852459016, "grad_norm": 5.305886745452881, "learning_rate": 1.2254482418793474e-05, "loss": 0.4036, "step": 13566 }, { "epoch": 44.48196721311476, "grad_norm": 3.2307913303375244, "learning_rate": 1.2253447857221809e-05, "loss": 0.3002, "step": 13567 }, { "epoch": 44.485245901639345, "grad_norm": 3.888270854949951, "learning_rate": 1.225241327024018e-05, "loss": 0.215, "step": 13568 }, { "epoch": 44.488524590163934, "grad_norm": 3.7655344009399414, "learning_rate": 1.2251378657860247e-05, "loss": 0.3944, "step": 13569 }, { "epoch": 44.49180327868852, "grad_norm": 4.394033908843994, "learning_rate": 1.225034402009368e-05, "loss": 0.4664, "step": 13570 }, { "epoch": 44.49508196721312, "grad_norm": 4.1717095375061035, "learning_rate": 1.2249309356952143e-05, "loss": 0.2156, "step": 13571 }, { "epoch": 44.498360655737706, "grad_norm": 4.228240013122559, "learning_rate": 1.2248274668447307e-05, "loss": 0.1802, "step": 13572 }, { "epoch": 44.501639344262294, "grad_norm": 4.223969459533691, "learning_rate": 1.2247239954590833e-05, "loss": 0.2727, "step": 13573 }, { "epoch": 44.50491803278688, "grad_norm": 3.8246634006500244, "learning_rate": 1.2246205215394395e-05, "loss": 0.4634, "step": 13574 }, { "epoch": 44.50819672131148, "grad_norm": 3.823885679244995, "learning_rate": 1.2245170450869655e-05, "loss": 0.3158, "step": 13575 }, { "epoch": 44.511475409836066, "grad_norm": 3.427994966506958, "learning_rate": 1.2244135661028287e-05, "loss": 0.246, "step": 13576 }, { "epoch": 44.514754098360655, "grad_norm": 4.606436729431152, "learning_rate": 1.2243100845881955e-05, "loss": 0.4228, "step": 13577 }, { "epoch": 44.51803278688524, "grad_norm": 3.9614391326904297, "learning_rate": 1.2242066005442329e-05, "loss": 0.2968, "step": 13578 }, { "epoch": 44.52131147540984, "grad_norm": 3.3558971881866455, "learning_rate": 1.2241031139721076e-05, "loss": 0.3418, "step": 13579 }, { "epoch": 44.52459016393443, "grad_norm": 4.585712909698486, "learning_rate": 1.2239996248729867e-05, "loss": 0.3462, "step": 13580 }, { "epoch": 44.527868852459015, "grad_norm": 4.103896141052246, "learning_rate": 1.2238961332480372e-05, "loss": 0.252, "step": 13581 }, { "epoch": 44.5311475409836, "grad_norm": 3.46924090385437, "learning_rate": 1.2237926390984259e-05, "loss": 0.2963, "step": 13582 }, { "epoch": 44.5344262295082, "grad_norm": 3.4553606510162354, "learning_rate": 1.2236891424253201e-05, "loss": 0.4161, "step": 13583 }, { "epoch": 44.53770491803279, "grad_norm": 3.442744255065918, "learning_rate": 1.2235856432298864e-05, "loss": 0.2469, "step": 13584 }, { "epoch": 44.540983606557376, "grad_norm": 3.919447422027588, "learning_rate": 1.2234821415132922e-05, "loss": 0.366, "step": 13585 }, { "epoch": 44.544262295081964, "grad_norm": 3.8533153533935547, "learning_rate": 1.2233786372767045e-05, "loss": 0.2457, "step": 13586 }, { "epoch": 44.54754098360656, "grad_norm": 3.2557168006896973, "learning_rate": 1.2232751305212907e-05, "loss": 0.163, "step": 13587 }, { "epoch": 44.55081967213115, "grad_norm": 3.5913946628570557, "learning_rate": 1.2231716212482173e-05, "loss": 0.5438, "step": 13588 }, { "epoch": 44.554098360655736, "grad_norm": 3.3023288249969482, "learning_rate": 1.2230681094586517e-05, "loss": 0.4176, "step": 13589 }, { "epoch": 44.557377049180324, "grad_norm": 3.2598459720611572, "learning_rate": 1.2229645951537616e-05, "loss": 0.3734, "step": 13590 }, { "epoch": 44.56065573770492, "grad_norm": 3.6488709449768066, "learning_rate": 1.2228610783347134e-05, "loss": 0.3545, "step": 13591 }, { "epoch": 44.56393442622951, "grad_norm": 3.2808213233947754, "learning_rate": 1.2227575590026754e-05, "loss": 0.3521, "step": 13592 }, { "epoch": 44.5672131147541, "grad_norm": 4.5158281326293945, "learning_rate": 1.222654037158814e-05, "loss": 0.3876, "step": 13593 }, { "epoch": 44.570491803278685, "grad_norm": 3.7618021965026855, "learning_rate": 1.222550512804297e-05, "loss": 0.264, "step": 13594 }, { "epoch": 44.57377049180328, "grad_norm": 3.6708528995513916, "learning_rate": 1.2224469859402913e-05, "loss": 0.4003, "step": 13595 }, { "epoch": 44.57704918032787, "grad_norm": 4.262595176696777, "learning_rate": 1.2223434565679648e-05, "loss": 0.3678, "step": 13596 }, { "epoch": 44.58032786885246, "grad_norm": 3.891272783279419, "learning_rate": 1.2222399246884845e-05, "loss": 0.3252, "step": 13597 }, { "epoch": 44.58360655737705, "grad_norm": 4.0773797035217285, "learning_rate": 1.2221363903030182e-05, "loss": 0.3582, "step": 13598 }, { "epoch": 44.58688524590164, "grad_norm": 3.6397995948791504, "learning_rate": 1.2220328534127329e-05, "loss": 0.2428, "step": 13599 }, { "epoch": 44.59016393442623, "grad_norm": 4.253413677215576, "learning_rate": 1.2219293140187962e-05, "loss": 0.5979, "step": 13600 }, { "epoch": 44.59344262295082, "grad_norm": 4.359462261199951, "learning_rate": 1.2218257721223759e-05, "loss": 0.4379, "step": 13601 }, { "epoch": 44.59672131147541, "grad_norm": 3.8493034839630127, "learning_rate": 1.2217222277246395e-05, "loss": 0.4255, "step": 13602 }, { "epoch": 44.6, "grad_norm": 3.5061933994293213, "learning_rate": 1.2216186808267544e-05, "loss": 0.3155, "step": 13603 }, { "epoch": 44.60327868852459, "grad_norm": 4.6213812828063965, "learning_rate": 1.221515131429888e-05, "loss": 0.3732, "step": 13604 }, { "epoch": 44.60655737704918, "grad_norm": 3.220581293106079, "learning_rate": 1.2214115795352086e-05, "loss": 0.1544, "step": 13605 }, { "epoch": 44.609836065573774, "grad_norm": 4.851435661315918, "learning_rate": 1.2213080251438832e-05, "loss": 0.4739, "step": 13606 }, { "epoch": 44.61311475409836, "grad_norm": 3.537302017211914, "learning_rate": 1.2212044682570799e-05, "loss": 0.4721, "step": 13607 }, { "epoch": 44.61639344262295, "grad_norm": 4.29525089263916, "learning_rate": 1.2211009088759658e-05, "loss": 0.2758, "step": 13608 }, { "epoch": 44.61967213114754, "grad_norm": 4.243772029876709, "learning_rate": 1.2209973470017093e-05, "loss": 0.2039, "step": 13609 }, { "epoch": 44.622950819672134, "grad_norm": 6.94224739074707, "learning_rate": 1.2208937826354781e-05, "loss": 0.3156, "step": 13610 }, { "epoch": 44.62622950819672, "grad_norm": 3.410198450088501, "learning_rate": 1.2207902157784397e-05, "loss": 0.4217, "step": 13611 }, { "epoch": 44.62950819672131, "grad_norm": 4.001366138458252, "learning_rate": 1.2206866464317619e-05, "loss": 0.4238, "step": 13612 }, { "epoch": 44.6327868852459, "grad_norm": 3.898470401763916, "learning_rate": 1.220583074596613e-05, "loss": 0.4362, "step": 13613 }, { "epoch": 44.636065573770495, "grad_norm": 6.483917713165283, "learning_rate": 1.2204795002741603e-05, "loss": 0.3404, "step": 13614 }, { "epoch": 44.63934426229508, "grad_norm": 3.4682626724243164, "learning_rate": 1.220375923465572e-05, "loss": 0.2335, "step": 13615 }, { "epoch": 44.64262295081967, "grad_norm": 3.4276955127716064, "learning_rate": 1.220272344172016e-05, "loss": 0.2219, "step": 13616 }, { "epoch": 44.64590163934426, "grad_norm": 3.441453218460083, "learning_rate": 1.2201687623946605e-05, "loss": 0.3844, "step": 13617 }, { "epoch": 44.649180327868855, "grad_norm": 3.9192440509796143, "learning_rate": 1.2200651781346732e-05, "loss": 0.1739, "step": 13618 }, { "epoch": 44.65245901639344, "grad_norm": 5.111422538757324, "learning_rate": 1.2199615913932221e-05, "loss": 0.4406, "step": 13619 }, { "epoch": 44.65573770491803, "grad_norm": 3.5252864360809326, "learning_rate": 1.2198580021714756e-05, "loss": 0.4294, "step": 13620 }, { "epoch": 44.65901639344262, "grad_norm": 3.785764455795288, "learning_rate": 1.2197544104706013e-05, "loss": 0.2943, "step": 13621 }, { "epoch": 44.662295081967216, "grad_norm": 3.4640250205993652, "learning_rate": 1.2196508162917678e-05, "loss": 0.3365, "step": 13622 }, { "epoch": 44.665573770491804, "grad_norm": 3.626797914505005, "learning_rate": 1.2195472196361426e-05, "loss": 0.3249, "step": 13623 }, { "epoch": 44.66885245901639, "grad_norm": 3.849114179611206, "learning_rate": 1.2194436205048948e-05, "loss": 0.4715, "step": 13624 }, { "epoch": 44.67213114754098, "grad_norm": 3.6525890827178955, "learning_rate": 1.2193400188991913e-05, "loss": 0.2447, "step": 13625 }, { "epoch": 44.675409836065576, "grad_norm": 3.809091806411743, "learning_rate": 1.2192364148202016e-05, "loss": 0.2558, "step": 13626 }, { "epoch": 44.678688524590164, "grad_norm": 4.911802768707275, "learning_rate": 1.219132808269093e-05, "loss": 0.4256, "step": 13627 }, { "epoch": 44.68196721311475, "grad_norm": 3.7690651416778564, "learning_rate": 1.2190291992470345e-05, "loss": 0.5099, "step": 13628 }, { "epoch": 44.68524590163934, "grad_norm": 4.165862083435059, "learning_rate": 1.218925587755194e-05, "loss": 0.3375, "step": 13629 }, { "epoch": 44.68852459016394, "grad_norm": 3.942725896835327, "learning_rate": 1.2188219737947396e-05, "loss": 0.3099, "step": 13630 }, { "epoch": 44.691803278688525, "grad_norm": 3.753572940826416, "learning_rate": 1.21871835736684e-05, "loss": 0.5665, "step": 13631 }, { "epoch": 44.69508196721311, "grad_norm": 3.0632643699645996, "learning_rate": 1.2186147384726634e-05, "loss": 0.1491, "step": 13632 }, { "epoch": 44.6983606557377, "grad_norm": 3.9595370292663574, "learning_rate": 1.2185111171133786e-05, "loss": 0.4522, "step": 13633 }, { "epoch": 44.7016393442623, "grad_norm": 3.911818265914917, "learning_rate": 1.2184074932901535e-05, "loss": 0.4267, "step": 13634 }, { "epoch": 44.704918032786885, "grad_norm": 4.028921127319336, "learning_rate": 1.2183038670041571e-05, "loss": 0.3646, "step": 13635 }, { "epoch": 44.708196721311474, "grad_norm": 3.786879062652588, "learning_rate": 1.2182002382565575e-05, "loss": 0.5844, "step": 13636 }, { "epoch": 44.71147540983607, "grad_norm": 4.229370594024658, "learning_rate": 1.2180966070485235e-05, "loss": 0.3469, "step": 13637 }, { "epoch": 44.71475409836066, "grad_norm": 72.26377868652344, "learning_rate": 1.217992973381223e-05, "loss": 0.4155, "step": 13638 }, { "epoch": 44.718032786885246, "grad_norm": 3.484924077987671, "learning_rate": 1.2178893372558255e-05, "loss": 0.4553, "step": 13639 }, { "epoch": 44.721311475409834, "grad_norm": 4.272474765777588, "learning_rate": 1.2177856986734991e-05, "loss": 0.3251, "step": 13640 }, { "epoch": 44.72459016393443, "grad_norm": 4.178958415985107, "learning_rate": 1.2176820576354124e-05, "loss": 0.3675, "step": 13641 }, { "epoch": 44.72786885245902, "grad_norm": 4.928297996520996, "learning_rate": 1.2175784141427342e-05, "loss": 0.3391, "step": 13642 }, { "epoch": 44.731147540983606, "grad_norm": 8.289406776428223, "learning_rate": 1.2174747681966332e-05, "loss": 0.1871, "step": 13643 }, { "epoch": 44.734426229508195, "grad_norm": 4.47235107421875, "learning_rate": 1.2173711197982781e-05, "loss": 0.3042, "step": 13644 }, { "epoch": 44.73770491803279, "grad_norm": 4.993602752685547, "learning_rate": 1.2172674689488375e-05, "loss": 0.2978, "step": 13645 }, { "epoch": 44.74098360655738, "grad_norm": 9.303808212280273, "learning_rate": 1.2171638156494803e-05, "loss": 0.29, "step": 13646 }, { "epoch": 44.74426229508197, "grad_norm": 10.903051376342773, "learning_rate": 1.2170601599013755e-05, "loss": 0.5683, "step": 13647 }, { "epoch": 44.747540983606555, "grad_norm": 5.654363632202148, "learning_rate": 1.2169565017056915e-05, "loss": 0.3216, "step": 13648 }, { "epoch": 44.75081967213115, "grad_norm": 6.031821250915527, "learning_rate": 1.2168528410635974e-05, "loss": 0.4559, "step": 13649 }, { "epoch": 44.75409836065574, "grad_norm": 5.980687618255615, "learning_rate": 1.2167491779762621e-05, "loss": 0.3181, "step": 13650 }, { "epoch": 44.75737704918033, "grad_norm": 4.44155740737915, "learning_rate": 1.2166455124448543e-05, "loss": 0.4777, "step": 13651 }, { "epoch": 44.760655737704916, "grad_norm": 4.778518199920654, "learning_rate": 1.2165418444705433e-05, "loss": 0.2598, "step": 13652 }, { "epoch": 44.76393442622951, "grad_norm": 6.963659286499023, "learning_rate": 1.2164381740544978e-05, "loss": 0.4591, "step": 13653 }, { "epoch": 44.7672131147541, "grad_norm": 4.990766525268555, "learning_rate": 1.216334501197887e-05, "loss": 0.3239, "step": 13654 }, { "epoch": 44.77049180327869, "grad_norm": 5.3033552169799805, "learning_rate": 1.2162308259018797e-05, "loss": 0.2701, "step": 13655 }, { "epoch": 44.773770491803276, "grad_norm": 4.346296787261963, "learning_rate": 1.2161271481676447e-05, "loss": 0.3287, "step": 13656 }, { "epoch": 44.77704918032787, "grad_norm": 5.717703819274902, "learning_rate": 1.2160234679963517e-05, "loss": 0.5776, "step": 13657 }, { "epoch": 44.78032786885246, "grad_norm": 6.227569103240967, "learning_rate": 1.215919785389169e-05, "loss": 0.6517, "step": 13658 }, { "epoch": 44.78360655737705, "grad_norm": 4.726812839508057, "learning_rate": 1.2158161003472669e-05, "loss": 0.3673, "step": 13659 }, { "epoch": 44.78688524590164, "grad_norm": 5.714807987213135, "learning_rate": 1.2157124128718133e-05, "loss": 0.3483, "step": 13660 }, { "epoch": 44.79016393442623, "grad_norm": 4.802510738372803, "learning_rate": 1.2156087229639787e-05, "loss": 0.3616, "step": 13661 }, { "epoch": 44.79344262295082, "grad_norm": 4.735146999359131, "learning_rate": 1.2155050306249307e-05, "loss": 0.4544, "step": 13662 }, { "epoch": 44.79672131147541, "grad_norm": 4.88649320602417, "learning_rate": 1.2154013358558401e-05, "loss": 0.3876, "step": 13663 }, { "epoch": 44.8, "grad_norm": 5.158868312835693, "learning_rate": 1.215297638657875e-05, "loss": 0.5577, "step": 13664 }, { "epoch": 44.80327868852459, "grad_norm": 4.225595951080322, "learning_rate": 1.2151939390322057e-05, "loss": 0.5997, "step": 13665 }, { "epoch": 44.80655737704918, "grad_norm": 6.553916931152344, "learning_rate": 1.2150902369800004e-05, "loss": 0.3868, "step": 13666 }, { "epoch": 44.80983606557377, "grad_norm": 5.306924343109131, "learning_rate": 1.2149865325024295e-05, "loss": 0.2938, "step": 13667 }, { "epoch": 44.81311475409836, "grad_norm": 4.210887432098389, "learning_rate": 1.2148828256006616e-05, "loss": 0.3631, "step": 13668 }, { "epoch": 44.81639344262295, "grad_norm": 4.315727233886719, "learning_rate": 1.2147791162758664e-05, "loss": 0.3497, "step": 13669 }, { "epoch": 44.81967213114754, "grad_norm": 4.270236015319824, "learning_rate": 1.2146754045292135e-05, "loss": 0.2746, "step": 13670 }, { "epoch": 44.82295081967213, "grad_norm": 3.829362392425537, "learning_rate": 1.214571690361872e-05, "loss": 0.4076, "step": 13671 }, { "epoch": 44.82622950819672, "grad_norm": 4.858158588409424, "learning_rate": 1.214467973775012e-05, "loss": 0.2921, "step": 13672 }, { "epoch": 44.829508196721314, "grad_norm": 3.860288619995117, "learning_rate": 1.2143642547698022e-05, "loss": 0.3766, "step": 13673 }, { "epoch": 44.8327868852459, "grad_norm": 4.035828113555908, "learning_rate": 1.2142605333474129e-05, "loss": 0.4936, "step": 13674 }, { "epoch": 44.83606557377049, "grad_norm": 4.563320159912109, "learning_rate": 1.2141568095090131e-05, "loss": 0.4381, "step": 13675 }, { "epoch": 44.83934426229508, "grad_norm": 4.121478080749512, "learning_rate": 1.2140530832557727e-05, "loss": 0.3185, "step": 13676 }, { "epoch": 44.842622950819674, "grad_norm": 4.670581817626953, "learning_rate": 1.213949354588861e-05, "loss": 0.5841, "step": 13677 }, { "epoch": 44.84590163934426, "grad_norm": 7.406636714935303, "learning_rate": 1.2138456235094483e-05, "loss": 0.3823, "step": 13678 }, { "epoch": 44.84918032786885, "grad_norm": 4.900954246520996, "learning_rate": 1.2137418900187037e-05, "loss": 0.3899, "step": 13679 }, { "epoch": 44.85245901639344, "grad_norm": 4.380836486816406, "learning_rate": 1.2136381541177969e-05, "loss": 0.3218, "step": 13680 }, { "epoch": 44.855737704918035, "grad_norm": 4.29086971282959, "learning_rate": 1.2135344158078978e-05, "loss": 0.2727, "step": 13681 }, { "epoch": 44.85901639344262, "grad_norm": 3.630814790725708, "learning_rate": 1.2134306750901762e-05, "loss": 0.4316, "step": 13682 }, { "epoch": 44.86229508196721, "grad_norm": 3.3454487323760986, "learning_rate": 1.2133269319658018e-05, "loss": 0.4977, "step": 13683 }, { "epoch": 44.86557377049181, "grad_norm": 3.859881639480591, "learning_rate": 1.2132231864359445e-05, "loss": 0.3633, "step": 13684 }, { "epoch": 44.868852459016395, "grad_norm": 4.165937900543213, "learning_rate": 1.2131194385017739e-05, "loss": 0.2486, "step": 13685 }, { "epoch": 44.87213114754098, "grad_norm": 3.442960262298584, "learning_rate": 1.2130156881644601e-05, "loss": 0.3257, "step": 13686 }, { "epoch": 44.87540983606557, "grad_norm": 4.457896709442139, "learning_rate": 1.2129119354251732e-05, "loss": 0.2998, "step": 13687 }, { "epoch": 44.87868852459017, "grad_norm": 4.431092739105225, "learning_rate": 1.2128081802850826e-05, "loss": 0.404, "step": 13688 }, { "epoch": 44.881967213114756, "grad_norm": 4.46329402923584, "learning_rate": 1.2127044227453586e-05, "loss": 0.3278, "step": 13689 }, { "epoch": 44.885245901639344, "grad_norm": 3.86175799369812, "learning_rate": 1.2126006628071708e-05, "loss": 0.2675, "step": 13690 }, { "epoch": 44.88852459016393, "grad_norm": 3.725813388824463, "learning_rate": 1.21249690047169e-05, "loss": 0.3781, "step": 13691 }, { "epoch": 44.89180327868853, "grad_norm": 4.506524562835693, "learning_rate": 1.2123931357400855e-05, "loss": 0.3782, "step": 13692 }, { "epoch": 44.895081967213116, "grad_norm": 4.433441638946533, "learning_rate": 1.2122893686135273e-05, "loss": 0.4277, "step": 13693 }, { "epoch": 44.898360655737704, "grad_norm": 3.813054323196411, "learning_rate": 1.2121855990931861e-05, "loss": 0.3122, "step": 13694 }, { "epoch": 44.90163934426229, "grad_norm": 3.8555703163146973, "learning_rate": 1.2120818271802315e-05, "loss": 0.4252, "step": 13695 }, { "epoch": 44.90491803278689, "grad_norm": 4.086254596710205, "learning_rate": 1.2119780528758336e-05, "loss": 0.4251, "step": 13696 }, { "epoch": 44.90819672131148, "grad_norm": 4.100091934204102, "learning_rate": 1.211874276181163e-05, "loss": 0.353, "step": 13697 }, { "epoch": 44.911475409836065, "grad_norm": 3.416412353515625, "learning_rate": 1.2117704970973893e-05, "loss": 0.3313, "step": 13698 }, { "epoch": 44.91475409836065, "grad_norm": 4.615980625152588, "learning_rate": 1.2116667156256833e-05, "loss": 0.3088, "step": 13699 }, { "epoch": 44.91803278688525, "grad_norm": 4.000738620758057, "learning_rate": 1.2115629317672154e-05, "loss": 0.3353, "step": 13700 }, { "epoch": 44.92131147540984, "grad_norm": 4.018453598022461, "learning_rate": 1.2114591455231548e-05, "loss": 0.3907, "step": 13701 }, { "epoch": 44.924590163934425, "grad_norm": 4.132658004760742, "learning_rate": 1.2113553568946728e-05, "loss": 0.3686, "step": 13702 }, { "epoch": 44.927868852459014, "grad_norm": 3.738632917404175, "learning_rate": 1.2112515658829393e-05, "loss": 0.1939, "step": 13703 }, { "epoch": 44.93114754098361, "grad_norm": 3.8855793476104736, "learning_rate": 1.211147772489125e-05, "loss": 0.3273, "step": 13704 }, { "epoch": 44.9344262295082, "grad_norm": 4.851585865020752, "learning_rate": 1.2110439767143998e-05, "loss": 0.2651, "step": 13705 }, { "epoch": 44.937704918032786, "grad_norm": 4.074639320373535, "learning_rate": 1.2109401785599344e-05, "loss": 0.3625, "step": 13706 }, { "epoch": 44.940983606557374, "grad_norm": 4.006568908691406, "learning_rate": 1.2108363780268992e-05, "loss": 0.2953, "step": 13707 }, { "epoch": 44.94426229508197, "grad_norm": 3.6024763584136963, "learning_rate": 1.2107325751164645e-05, "loss": 0.4225, "step": 13708 }, { "epoch": 44.94754098360656, "grad_norm": 4.183891296386719, "learning_rate": 1.210628769829801e-05, "loss": 0.3654, "step": 13709 }, { "epoch": 44.950819672131146, "grad_norm": 3.8603296279907227, "learning_rate": 1.210524962168079e-05, "loss": 0.517, "step": 13710 }, { "epoch": 44.954098360655735, "grad_norm": 4.706448078155518, "learning_rate": 1.2104211521324693e-05, "loss": 0.5679, "step": 13711 }, { "epoch": 44.95737704918033, "grad_norm": 4.312280178070068, "learning_rate": 1.2103173397241421e-05, "loss": 0.3391, "step": 13712 }, { "epoch": 44.96065573770492, "grad_norm": 5.012081146240234, "learning_rate": 1.2102135249442687e-05, "loss": 0.3608, "step": 13713 }, { "epoch": 44.96393442622951, "grad_norm": 5.352398872375488, "learning_rate": 1.2101097077940187e-05, "loss": 0.2723, "step": 13714 }, { "epoch": 44.967213114754095, "grad_norm": 4.616207122802734, "learning_rate": 1.2100058882745635e-05, "loss": 0.2958, "step": 13715 }, { "epoch": 44.97049180327869, "grad_norm": 4.059220314025879, "learning_rate": 1.2099020663870734e-05, "loss": 0.4653, "step": 13716 }, { "epoch": 44.97377049180328, "grad_norm": 5.2736430168151855, "learning_rate": 1.2097982421327198e-05, "loss": 0.3466, "step": 13717 }, { "epoch": 44.97704918032787, "grad_norm": 3.812863349914551, "learning_rate": 1.2096944155126727e-05, "loss": 0.2006, "step": 13718 }, { "epoch": 44.980327868852456, "grad_norm": 3.7255587577819824, "learning_rate": 1.2095905865281026e-05, "loss": 0.4838, "step": 13719 }, { "epoch": 44.98360655737705, "grad_norm": 4.097769260406494, "learning_rate": 1.2094867551801812e-05, "loss": 0.2994, "step": 13720 }, { "epoch": 44.98688524590164, "grad_norm": 4.706798076629639, "learning_rate": 1.2093829214700782e-05, "loss": 0.3105, "step": 13721 }, { "epoch": 44.99016393442623, "grad_norm": 3.852385997772217, "learning_rate": 1.2092790853989656e-05, "loss": 0.3153, "step": 13722 }, { "epoch": 44.993442622950816, "grad_norm": 5.571681499481201, "learning_rate": 1.2091752469680136e-05, "loss": 0.6113, "step": 13723 }, { "epoch": 44.99672131147541, "grad_norm": 4.293287754058838, "learning_rate": 1.2090714061783931e-05, "loss": 0.6166, "step": 13724 }, { "epoch": 45.0, "grad_norm": 3.4737210273742676, "learning_rate": 1.2089675630312755e-05, "loss": 0.1996, "step": 13725 }, { "epoch": 45.00327868852459, "grad_norm": 3.5093016624450684, "learning_rate": 1.2088637175278308e-05, "loss": 0.5261, "step": 13726 }, { "epoch": 45.006557377049184, "grad_norm": 3.7320449352264404, "learning_rate": 1.2087598696692306e-05, "loss": 0.3751, "step": 13727 }, { "epoch": 45.00983606557377, "grad_norm": 3.559767961502075, "learning_rate": 1.208656019456646e-05, "loss": 0.2297, "step": 13728 }, { "epoch": 45.01311475409836, "grad_norm": 4.038066864013672, "learning_rate": 1.2085521668912476e-05, "loss": 0.1721, "step": 13729 }, { "epoch": 45.01639344262295, "grad_norm": 4.663552761077881, "learning_rate": 1.2084483119742072e-05, "loss": 0.4133, "step": 13730 }, { "epoch": 45.019672131147544, "grad_norm": 4.306192874908447, "learning_rate": 1.2083444547066951e-05, "loss": 0.3315, "step": 13731 }, { "epoch": 45.02295081967213, "grad_norm": 4.7575507164001465, "learning_rate": 1.2082405950898826e-05, "loss": 0.5465, "step": 13732 }, { "epoch": 45.02622950819672, "grad_norm": 3.855884552001953, "learning_rate": 1.2081367331249409e-05, "loss": 0.4898, "step": 13733 }, { "epoch": 45.02950819672131, "grad_norm": 3.678612232208252, "learning_rate": 1.208032868813041e-05, "loss": 0.3408, "step": 13734 }, { "epoch": 45.032786885245905, "grad_norm": 4.040615081787109, "learning_rate": 1.2079290021553542e-05, "loss": 0.2741, "step": 13735 }, { "epoch": 45.03606557377049, "grad_norm": 3.7438039779663086, "learning_rate": 1.207825133153052e-05, "loss": 0.1767, "step": 13736 }, { "epoch": 45.03934426229508, "grad_norm": 4.3084716796875, "learning_rate": 1.2077212618073052e-05, "loss": 0.2242, "step": 13737 }, { "epoch": 45.04262295081967, "grad_norm": 3.5093441009521484, "learning_rate": 1.207617388119285e-05, "loss": 0.7329, "step": 13738 }, { "epoch": 45.045901639344265, "grad_norm": 3.73980975151062, "learning_rate": 1.207513512090163e-05, "loss": 0.3139, "step": 13739 }, { "epoch": 45.049180327868854, "grad_norm": 4.294327735900879, "learning_rate": 1.2074096337211103e-05, "loss": 0.4079, "step": 13740 }, { "epoch": 45.05245901639344, "grad_norm": 3.408813953399658, "learning_rate": 1.2073057530132985e-05, "loss": 0.1597, "step": 13741 }, { "epoch": 45.05573770491803, "grad_norm": 4.169961452484131, "learning_rate": 1.2072018699678987e-05, "loss": 0.3327, "step": 13742 }, { "epoch": 45.059016393442626, "grad_norm": 4.610719680786133, "learning_rate": 1.2070979845860826e-05, "loss": 0.3706, "step": 13743 }, { "epoch": 45.062295081967214, "grad_norm": 3.9425907135009766, "learning_rate": 1.2069940968690213e-05, "loss": 0.3171, "step": 13744 }, { "epoch": 45.0655737704918, "grad_norm": 3.933640241622925, "learning_rate": 1.206890206817886e-05, "loss": 0.3167, "step": 13745 }, { "epoch": 45.06885245901639, "grad_norm": 3.535904884338379, "learning_rate": 1.2067863144338489e-05, "loss": 0.2397, "step": 13746 }, { "epoch": 45.072131147540986, "grad_norm": 3.509917974472046, "learning_rate": 1.2066824197180808e-05, "loss": 0.3686, "step": 13747 }, { "epoch": 45.075409836065575, "grad_norm": 3.758958101272583, "learning_rate": 1.206578522671754e-05, "loss": 0.3349, "step": 13748 }, { "epoch": 45.07868852459016, "grad_norm": 3.8378753662109375, "learning_rate": 1.2064746232960392e-05, "loss": 0.4271, "step": 13749 }, { "epoch": 45.08196721311475, "grad_norm": 4.243933200836182, "learning_rate": 1.206370721592108e-05, "loss": 0.2466, "step": 13750 }, { "epoch": 45.08524590163935, "grad_norm": 3.976679801940918, "learning_rate": 1.2062668175611329e-05, "loss": 0.257, "step": 13751 }, { "epoch": 45.088524590163935, "grad_norm": 5.14174222946167, "learning_rate": 1.2061629112042848e-05, "loss": 0.2403, "step": 13752 }, { "epoch": 45.09180327868852, "grad_norm": 3.5369253158569336, "learning_rate": 1.2060590025227354e-05, "loss": 0.5695, "step": 13753 }, { "epoch": 45.09508196721311, "grad_norm": 3.54463791847229, "learning_rate": 1.2059550915176567e-05, "loss": 0.2308, "step": 13754 }, { "epoch": 45.09836065573771, "grad_norm": 3.9988136291503906, "learning_rate": 1.20585117819022e-05, "loss": 0.2127, "step": 13755 }, { "epoch": 45.101639344262296, "grad_norm": 4.380591869354248, "learning_rate": 1.2057472625415976e-05, "loss": 0.2528, "step": 13756 }, { "epoch": 45.104918032786884, "grad_norm": 4.357818603515625, "learning_rate": 1.2056433445729607e-05, "loss": 0.3133, "step": 13757 }, { "epoch": 45.10819672131147, "grad_norm": 3.62182879447937, "learning_rate": 1.205539424285481e-05, "loss": 0.379, "step": 13758 }, { "epoch": 45.11147540983607, "grad_norm": 4.606773376464844, "learning_rate": 1.205435501680331e-05, "loss": 0.4778, "step": 13759 }, { "epoch": 45.114754098360656, "grad_norm": 3.2954893112182617, "learning_rate": 1.2053315767586818e-05, "loss": 0.1763, "step": 13760 }, { "epoch": 45.118032786885244, "grad_norm": 6.684093475341797, "learning_rate": 1.2052276495217056e-05, "loss": 0.3824, "step": 13761 }, { "epoch": 45.12131147540983, "grad_norm": 3.8962061405181885, "learning_rate": 1.2051237199705743e-05, "loss": 0.4036, "step": 13762 }, { "epoch": 45.12459016393443, "grad_norm": 4.338061332702637, "learning_rate": 1.20501978810646e-05, "loss": 0.498, "step": 13763 }, { "epoch": 45.12786885245902, "grad_norm": 3.450773000717163, "learning_rate": 1.2049158539305344e-05, "loss": 0.3238, "step": 13764 }, { "epoch": 45.131147540983605, "grad_norm": 3.4952151775360107, "learning_rate": 1.2048119174439694e-05, "loss": 0.3176, "step": 13765 }, { "epoch": 45.13442622950819, "grad_norm": 3.6025633811950684, "learning_rate": 1.204707978647937e-05, "loss": 0.2235, "step": 13766 }, { "epoch": 45.13770491803279, "grad_norm": 3.40250825881958, "learning_rate": 1.2046040375436094e-05, "loss": 0.4443, "step": 13767 }, { "epoch": 45.14098360655738, "grad_norm": 4.275689125061035, "learning_rate": 1.2045000941321583e-05, "loss": 0.3576, "step": 13768 }, { "epoch": 45.144262295081965, "grad_norm": 3.3081185817718506, "learning_rate": 1.2043961484147563e-05, "loss": 0.4037, "step": 13769 }, { "epoch": 45.14754098360656, "grad_norm": 4.263315200805664, "learning_rate": 1.2042922003925752e-05, "loss": 0.1746, "step": 13770 }, { "epoch": 45.15081967213115, "grad_norm": 4.092914581298828, "learning_rate": 1.204188250066787e-05, "loss": 0.3735, "step": 13771 }, { "epoch": 45.15409836065574, "grad_norm": 3.9423725605010986, "learning_rate": 1.204084297438564e-05, "loss": 0.3012, "step": 13772 }, { "epoch": 45.157377049180326, "grad_norm": 4.661608695983887, "learning_rate": 1.2039803425090784e-05, "loss": 0.4341, "step": 13773 }, { "epoch": 45.16065573770492, "grad_norm": 4.147989273071289, "learning_rate": 1.2038763852795024e-05, "loss": 0.541, "step": 13774 }, { "epoch": 45.16393442622951, "grad_norm": 4.2164483070373535, "learning_rate": 1.203772425751008e-05, "loss": 0.4314, "step": 13775 }, { "epoch": 45.1672131147541, "grad_norm": 4.96809196472168, "learning_rate": 1.2036684639247679e-05, "loss": 0.3098, "step": 13776 }, { "epoch": 45.170491803278686, "grad_norm": 4.059662342071533, "learning_rate": 1.2035644998019537e-05, "loss": 0.3842, "step": 13777 }, { "epoch": 45.17377049180328, "grad_norm": 3.773344039916992, "learning_rate": 1.203460533383738e-05, "loss": 0.3026, "step": 13778 }, { "epoch": 45.17704918032787, "grad_norm": 3.821631669998169, "learning_rate": 1.2033565646712939e-05, "loss": 0.274, "step": 13779 }, { "epoch": 45.18032786885246, "grad_norm": 5.132142066955566, "learning_rate": 1.2032525936657926e-05, "loss": 0.2059, "step": 13780 }, { "epoch": 45.18360655737705, "grad_norm": 4.0598344802856445, "learning_rate": 1.2031486203684071e-05, "loss": 0.3756, "step": 13781 }, { "epoch": 45.18688524590164, "grad_norm": 3.3171582221984863, "learning_rate": 1.2030446447803099e-05, "loss": 0.3022, "step": 13782 }, { "epoch": 45.19016393442623, "grad_norm": 4.193132400512695, "learning_rate": 1.202940666902673e-05, "loss": 0.3848, "step": 13783 }, { "epoch": 45.19344262295082, "grad_norm": 3.176982879638672, "learning_rate": 1.2028366867366692e-05, "loss": 0.1196, "step": 13784 }, { "epoch": 45.19672131147541, "grad_norm": 3.7995784282684326, "learning_rate": 1.2027327042834708e-05, "loss": 0.1611, "step": 13785 }, { "epoch": 45.2, "grad_norm": 3.9556736946105957, "learning_rate": 1.2026287195442503e-05, "loss": 0.3268, "step": 13786 }, { "epoch": 45.20327868852459, "grad_norm": 3.506775140762329, "learning_rate": 1.2025247325201805e-05, "loss": 0.1329, "step": 13787 }, { "epoch": 45.20655737704918, "grad_norm": 3.754429578781128, "learning_rate": 1.2024207432124336e-05, "loss": 0.4145, "step": 13788 }, { "epoch": 45.20983606557377, "grad_norm": 3.9277045726776123, "learning_rate": 1.2023167516221823e-05, "loss": 0.4534, "step": 13789 }, { "epoch": 45.21311475409836, "grad_norm": 3.7368221282958984, "learning_rate": 1.2022127577505993e-05, "loss": 0.2067, "step": 13790 }, { "epoch": 45.21639344262295, "grad_norm": 3.5410385131835938, "learning_rate": 1.2021087615988573e-05, "loss": 0.3504, "step": 13791 }, { "epoch": 45.21967213114754, "grad_norm": 3.3950955867767334, "learning_rate": 1.2020047631681289e-05, "loss": 0.3316, "step": 13792 }, { "epoch": 45.22295081967213, "grad_norm": 4.203537464141846, "learning_rate": 1.2019007624595867e-05, "loss": 0.4802, "step": 13793 }, { "epoch": 45.226229508196724, "grad_norm": 3.9810545444488525, "learning_rate": 1.2017967594744034e-05, "loss": 0.4795, "step": 13794 }, { "epoch": 45.22950819672131, "grad_norm": 3.4266185760498047, "learning_rate": 1.201692754213752e-05, "loss": 0.2668, "step": 13795 }, { "epoch": 45.2327868852459, "grad_norm": 3.6753737926483154, "learning_rate": 1.201588746678805e-05, "loss": 0.2114, "step": 13796 }, { "epoch": 45.23606557377049, "grad_norm": 4.506143569946289, "learning_rate": 1.2014847368707353e-05, "loss": 0.339, "step": 13797 }, { "epoch": 45.239344262295084, "grad_norm": 4.3515214920043945, "learning_rate": 1.2013807247907156e-05, "loss": 0.454, "step": 13798 }, { "epoch": 45.24262295081967, "grad_norm": 3.5000243186950684, "learning_rate": 1.201276710439919e-05, "loss": 0.3534, "step": 13799 }, { "epoch": 45.24590163934426, "grad_norm": 5.723353385925293, "learning_rate": 1.201172693819518e-05, "loss": 0.3421, "step": 13800 }, { "epoch": 45.24918032786885, "grad_norm": 3.811333179473877, "learning_rate": 1.201068674930686e-05, "loss": 0.3002, "step": 13801 }, { "epoch": 45.252459016393445, "grad_norm": 4.513136386871338, "learning_rate": 1.2009646537745953e-05, "loss": 0.4843, "step": 13802 }, { "epoch": 45.25573770491803, "grad_norm": 3.9485490322113037, "learning_rate": 1.2008606303524192e-05, "loss": 0.3195, "step": 13803 }, { "epoch": 45.25901639344262, "grad_norm": 3.7803144454956055, "learning_rate": 1.200756604665331e-05, "loss": 0.279, "step": 13804 }, { "epoch": 45.26229508196721, "grad_norm": 4.312528610229492, "learning_rate": 1.2006525767145033e-05, "loss": 0.446, "step": 13805 }, { "epoch": 45.265573770491805, "grad_norm": 3.186232566833496, "learning_rate": 1.2005485465011089e-05, "loss": 0.3386, "step": 13806 }, { "epoch": 45.268852459016394, "grad_norm": 4.181183338165283, "learning_rate": 1.2004445140263213e-05, "loss": 0.4674, "step": 13807 }, { "epoch": 45.27213114754098, "grad_norm": 3.258084774017334, "learning_rate": 1.2003404792913131e-05, "loss": 0.5734, "step": 13808 }, { "epoch": 45.27540983606557, "grad_norm": 3.795536518096924, "learning_rate": 1.200236442297258e-05, "loss": 0.5113, "step": 13809 }, { "epoch": 45.278688524590166, "grad_norm": 3.5802783966064453, "learning_rate": 1.2001324030453288e-05, "loss": 0.2914, "step": 13810 }, { "epoch": 45.281967213114754, "grad_norm": 3.6744930744171143, "learning_rate": 1.2000283615366986e-05, "loss": 0.3115, "step": 13811 }, { "epoch": 45.28524590163934, "grad_norm": 3.5596492290496826, "learning_rate": 1.1999243177725406e-05, "loss": 0.2672, "step": 13812 }, { "epoch": 45.28852459016394, "grad_norm": 3.9981496334075928, "learning_rate": 1.1998202717540282e-05, "loss": 0.4143, "step": 13813 }, { "epoch": 45.291803278688526, "grad_norm": 4.04522180557251, "learning_rate": 1.1997162234823342e-05, "loss": 0.1546, "step": 13814 }, { "epoch": 45.295081967213115, "grad_norm": 4.0632219314575195, "learning_rate": 1.1996121729586324e-05, "loss": 0.293, "step": 13815 }, { "epoch": 45.2983606557377, "grad_norm": 3.2399566173553467, "learning_rate": 1.1995081201840958e-05, "loss": 0.2766, "step": 13816 }, { "epoch": 45.3016393442623, "grad_norm": 4.216673374176025, "learning_rate": 1.1994040651598977e-05, "loss": 0.3071, "step": 13817 }, { "epoch": 45.30491803278689, "grad_norm": 3.3435609340667725, "learning_rate": 1.1993000078872112e-05, "loss": 0.3187, "step": 13818 }, { "epoch": 45.308196721311475, "grad_norm": 3.861579179763794, "learning_rate": 1.1991959483672102e-05, "loss": 0.2037, "step": 13819 }, { "epoch": 45.31147540983606, "grad_norm": 4.536150932312012, "learning_rate": 1.1990918866010675e-05, "loss": 0.3376, "step": 13820 }, { "epoch": 45.31475409836066, "grad_norm": 4.262032985687256, "learning_rate": 1.1989878225899569e-05, "loss": 0.3254, "step": 13821 }, { "epoch": 45.31803278688525, "grad_norm": 3.8374128341674805, "learning_rate": 1.198883756335052e-05, "loss": 0.3589, "step": 13822 }, { "epoch": 45.321311475409836, "grad_norm": 3.7563705444335938, "learning_rate": 1.1987796878375253e-05, "loss": 0.4065, "step": 13823 }, { "epoch": 45.324590163934424, "grad_norm": 4.146636962890625, "learning_rate": 1.1986756170985516e-05, "loss": 0.5113, "step": 13824 }, { "epoch": 45.32786885245902, "grad_norm": 2.8420660495758057, "learning_rate": 1.1985715441193034e-05, "loss": 0.2441, "step": 13825 }, { "epoch": 45.33114754098361, "grad_norm": 4.245473861694336, "learning_rate": 1.1984674689009545e-05, "loss": 0.3431, "step": 13826 }, { "epoch": 45.334426229508196, "grad_norm": 3.367594003677368, "learning_rate": 1.1983633914446787e-05, "loss": 0.246, "step": 13827 }, { "epoch": 45.337704918032784, "grad_norm": 3.7448670864105225, "learning_rate": 1.1982593117516494e-05, "loss": 0.2937, "step": 13828 }, { "epoch": 45.34098360655738, "grad_norm": 3.4018194675445557, "learning_rate": 1.1981552298230402e-05, "loss": 0.3281, "step": 13829 }, { "epoch": 45.34426229508197, "grad_norm": 3.5736351013183594, "learning_rate": 1.1980511456600247e-05, "loss": 0.1811, "step": 13830 }, { "epoch": 45.34754098360656, "grad_norm": 4.115665912628174, "learning_rate": 1.1979470592637768e-05, "loss": 0.4178, "step": 13831 }, { "epoch": 45.350819672131145, "grad_norm": 3.6350228786468506, "learning_rate": 1.1978429706354695e-05, "loss": 0.3794, "step": 13832 }, { "epoch": 45.35409836065574, "grad_norm": 4.226097106933594, "learning_rate": 1.1977388797762774e-05, "loss": 0.2419, "step": 13833 }, { "epoch": 45.35737704918033, "grad_norm": 3.701251268386841, "learning_rate": 1.1976347866873738e-05, "loss": 0.4213, "step": 13834 }, { "epoch": 45.36065573770492, "grad_norm": 3.9471421241760254, "learning_rate": 1.1975306913699328e-05, "loss": 0.3792, "step": 13835 }, { "epoch": 45.363934426229505, "grad_norm": 3.8228249549865723, "learning_rate": 1.1974265938251273e-05, "loss": 0.2562, "step": 13836 }, { "epoch": 45.3672131147541, "grad_norm": 3.510338068008423, "learning_rate": 1.1973224940541322e-05, "loss": 0.3016, "step": 13837 }, { "epoch": 45.37049180327869, "grad_norm": 4.413166046142578, "learning_rate": 1.1972183920581201e-05, "loss": 0.2795, "step": 13838 }, { "epoch": 45.37377049180328, "grad_norm": 3.8213298320770264, "learning_rate": 1.1971142878382663e-05, "loss": 0.3614, "step": 13839 }, { "epoch": 45.377049180327866, "grad_norm": 3.457908868789673, "learning_rate": 1.1970101813957436e-05, "loss": 0.244, "step": 13840 }, { "epoch": 45.38032786885246, "grad_norm": 10.656323432922363, "learning_rate": 1.1969060727317265e-05, "loss": 0.3469, "step": 13841 }, { "epoch": 45.38360655737705, "grad_norm": 2.987670660018921, "learning_rate": 1.1968019618473883e-05, "loss": 0.3669, "step": 13842 }, { "epoch": 45.38688524590164, "grad_norm": 3.952500343322754, "learning_rate": 1.1966978487439039e-05, "loss": 0.3314, "step": 13843 }, { "epoch": 45.390163934426226, "grad_norm": 3.9645867347717285, "learning_rate": 1.1965937334224462e-05, "loss": 0.3246, "step": 13844 }, { "epoch": 45.39344262295082, "grad_norm": 3.54929256439209, "learning_rate": 1.1964896158841902e-05, "loss": 0.2671, "step": 13845 }, { "epoch": 45.39672131147541, "grad_norm": 3.5740694999694824, "learning_rate": 1.1963854961303092e-05, "loss": 0.4038, "step": 13846 }, { "epoch": 45.4, "grad_norm": 4.012089729309082, "learning_rate": 1.1962813741619777e-05, "loss": 0.4841, "step": 13847 }, { "epoch": 45.40327868852459, "grad_norm": 3.95989990234375, "learning_rate": 1.1961772499803696e-05, "loss": 0.4251, "step": 13848 }, { "epoch": 45.40655737704918, "grad_norm": 3.498666524887085, "learning_rate": 1.1960731235866589e-05, "loss": 0.3724, "step": 13849 }, { "epoch": 45.40983606557377, "grad_norm": 3.642131805419922, "learning_rate": 1.19596899498202e-05, "loss": 0.2431, "step": 13850 }, { "epoch": 45.41311475409836, "grad_norm": 4.277458190917969, "learning_rate": 1.1958648641676268e-05, "loss": 0.3673, "step": 13851 }, { "epoch": 45.41639344262295, "grad_norm": 3.6589415073394775, "learning_rate": 1.1957607311446537e-05, "loss": 0.3638, "step": 13852 }, { "epoch": 45.41967213114754, "grad_norm": 4.736719608306885, "learning_rate": 1.1956565959142748e-05, "loss": 0.6877, "step": 13853 }, { "epoch": 45.42295081967213, "grad_norm": 3.960946798324585, "learning_rate": 1.1955524584776646e-05, "loss": 0.3393, "step": 13854 }, { "epoch": 45.42622950819672, "grad_norm": 2.905949831008911, "learning_rate": 1.1954483188359967e-05, "loss": 0.1292, "step": 13855 }, { "epoch": 45.429508196721315, "grad_norm": 4.027907371520996, "learning_rate": 1.1953441769904458e-05, "loss": 0.4749, "step": 13856 }, { "epoch": 45.4327868852459, "grad_norm": 3.3334696292877197, "learning_rate": 1.1952400329421865e-05, "loss": 0.2638, "step": 13857 }, { "epoch": 45.43606557377049, "grad_norm": 3.2033960819244385, "learning_rate": 1.1951358866923926e-05, "loss": 0.2628, "step": 13858 }, { "epoch": 45.43934426229508, "grad_norm": 2.5223312377929688, "learning_rate": 1.1950317382422387e-05, "loss": 0.1971, "step": 13859 }, { "epoch": 45.442622950819676, "grad_norm": 3.3828577995300293, "learning_rate": 1.1949275875928991e-05, "loss": 0.2393, "step": 13860 }, { "epoch": 45.445901639344264, "grad_norm": 3.736626148223877, "learning_rate": 1.1948234347455485e-05, "loss": 0.4986, "step": 13861 }, { "epoch": 45.44918032786885, "grad_norm": 3.286741256713867, "learning_rate": 1.194719279701361e-05, "loss": 0.4206, "step": 13862 }, { "epoch": 45.45245901639344, "grad_norm": 4.0140838623046875, "learning_rate": 1.1946151224615113e-05, "loss": 0.2653, "step": 13863 }, { "epoch": 45.455737704918036, "grad_norm": 3.636913299560547, "learning_rate": 1.1945109630271734e-05, "loss": 0.518, "step": 13864 }, { "epoch": 45.459016393442624, "grad_norm": 3.6857750415802, "learning_rate": 1.1944068013995224e-05, "loss": 0.3783, "step": 13865 }, { "epoch": 45.46229508196721, "grad_norm": 3.0086681842803955, "learning_rate": 1.1943026375797326e-05, "loss": 0.2917, "step": 13866 }, { "epoch": 45.4655737704918, "grad_norm": 3.68981671333313, "learning_rate": 1.1941984715689785e-05, "loss": 0.2726, "step": 13867 }, { "epoch": 45.4688524590164, "grad_norm": 3.123852252960205, "learning_rate": 1.1940943033684348e-05, "loss": 0.2266, "step": 13868 }, { "epoch": 45.472131147540985, "grad_norm": 3.6072463989257812, "learning_rate": 1.1939901329792757e-05, "loss": 0.2996, "step": 13869 }, { "epoch": 45.47540983606557, "grad_norm": 4.85664176940918, "learning_rate": 1.1938859604026765e-05, "loss": 0.3974, "step": 13870 }, { "epoch": 45.47868852459016, "grad_norm": 3.4805498123168945, "learning_rate": 1.1937817856398115e-05, "loss": 0.5423, "step": 13871 }, { "epoch": 45.48196721311476, "grad_norm": 4.207072734832764, "learning_rate": 1.1936776086918552e-05, "loss": 0.2716, "step": 13872 }, { "epoch": 45.485245901639345, "grad_norm": 2.958211660385132, "learning_rate": 1.1935734295599826e-05, "loss": 0.2885, "step": 13873 }, { "epoch": 45.488524590163934, "grad_norm": 3.2087666988372803, "learning_rate": 1.1934692482453683e-05, "loss": 0.2375, "step": 13874 }, { "epoch": 45.49180327868852, "grad_norm": 3.282083511352539, "learning_rate": 1.193365064749187e-05, "loss": 0.2084, "step": 13875 }, { "epoch": 45.49508196721312, "grad_norm": 3.616215229034424, "learning_rate": 1.1932608790726137e-05, "loss": 0.2081, "step": 13876 }, { "epoch": 45.498360655737706, "grad_norm": 3.945272207260132, "learning_rate": 1.1931566912168229e-05, "loss": 0.274, "step": 13877 }, { "epoch": 45.501639344262294, "grad_norm": 4.933427333831787, "learning_rate": 1.1930525011829896e-05, "loss": 0.4535, "step": 13878 }, { "epoch": 45.50491803278688, "grad_norm": 4.007692337036133, "learning_rate": 1.1929483089722887e-05, "loss": 0.3778, "step": 13879 }, { "epoch": 45.50819672131148, "grad_norm": 3.076028823852539, "learning_rate": 1.192844114585895e-05, "loss": 0.3587, "step": 13880 }, { "epoch": 45.511475409836066, "grad_norm": 3.5218026638031006, "learning_rate": 1.1927399180249832e-05, "loss": 0.1626, "step": 13881 }, { "epoch": 45.514754098360655, "grad_norm": 3.5380661487579346, "learning_rate": 1.1926357192907285e-05, "loss": 0.3253, "step": 13882 }, { "epoch": 45.51803278688524, "grad_norm": 3.8001651763916016, "learning_rate": 1.1925315183843058e-05, "loss": 0.4627, "step": 13883 }, { "epoch": 45.52131147540984, "grad_norm": 3.22194766998291, "learning_rate": 1.1924273153068901e-05, "loss": 0.3553, "step": 13884 }, { "epoch": 45.52459016393443, "grad_norm": 6.878493309020996, "learning_rate": 1.1923231100596565e-05, "loss": 0.5099, "step": 13885 }, { "epoch": 45.527868852459015, "grad_norm": 2.9522292613983154, "learning_rate": 1.1922189026437793e-05, "loss": 0.3117, "step": 13886 }, { "epoch": 45.5311475409836, "grad_norm": 2.8600099086761475, "learning_rate": 1.1921146930604348e-05, "loss": 0.3718, "step": 13887 }, { "epoch": 45.5344262295082, "grad_norm": 3.9362897872924805, "learning_rate": 1.1920104813107969e-05, "loss": 0.1578, "step": 13888 }, { "epoch": 45.53770491803279, "grad_norm": 4.134222507476807, "learning_rate": 1.1919062673960415e-05, "loss": 0.4526, "step": 13889 }, { "epoch": 45.540983606557376, "grad_norm": 3.2829980850219727, "learning_rate": 1.1918020513173432e-05, "loss": 0.2566, "step": 13890 }, { "epoch": 45.544262295081964, "grad_norm": 3.336812973022461, "learning_rate": 1.1916978330758774e-05, "loss": 0.3085, "step": 13891 }, { "epoch": 45.54754098360656, "grad_norm": 4.340031147003174, "learning_rate": 1.1915936126728193e-05, "loss": 0.5325, "step": 13892 }, { "epoch": 45.55081967213115, "grad_norm": 3.966737747192383, "learning_rate": 1.191489390109344e-05, "loss": 0.4198, "step": 13893 }, { "epoch": 45.554098360655736, "grad_norm": 12.921137809753418, "learning_rate": 1.1913851653866266e-05, "loss": 0.4389, "step": 13894 }, { "epoch": 45.557377049180324, "grad_norm": 3.7387659549713135, "learning_rate": 1.1912809385058425e-05, "loss": 0.3602, "step": 13895 }, { "epoch": 45.56065573770492, "grad_norm": 3.2750916481018066, "learning_rate": 1.191176709468167e-05, "loss": 0.2652, "step": 13896 }, { "epoch": 45.56393442622951, "grad_norm": 3.961591958999634, "learning_rate": 1.1910724782747756e-05, "loss": 0.2766, "step": 13897 }, { "epoch": 45.5672131147541, "grad_norm": 4.241215705871582, "learning_rate": 1.1909682449268431e-05, "loss": 0.2223, "step": 13898 }, { "epoch": 45.570491803278685, "grad_norm": 3.9319934844970703, "learning_rate": 1.1908640094255449e-05, "loss": 0.3691, "step": 13899 }, { "epoch": 45.57377049180328, "grad_norm": 5.219188213348389, "learning_rate": 1.1907597717720567e-05, "loss": 0.4273, "step": 13900 }, { "epoch": 45.57704918032787, "grad_norm": 3.784759998321533, "learning_rate": 1.1906555319675537e-05, "loss": 0.3573, "step": 13901 }, { "epoch": 45.58032786885246, "grad_norm": 3.8298349380493164, "learning_rate": 1.1905512900132114e-05, "loss": 0.3506, "step": 13902 }, { "epoch": 45.58360655737705, "grad_norm": 4.520282745361328, "learning_rate": 1.1904470459102051e-05, "loss": 0.3545, "step": 13903 }, { "epoch": 45.58688524590164, "grad_norm": 3.7577528953552246, "learning_rate": 1.1903427996597106e-05, "loss": 0.2043, "step": 13904 }, { "epoch": 45.59016393442623, "grad_norm": 3.955923557281494, "learning_rate": 1.1902385512629026e-05, "loss": 0.2221, "step": 13905 }, { "epoch": 45.59344262295082, "grad_norm": 4.571115016937256, "learning_rate": 1.1901343007209575e-05, "loss": 0.3151, "step": 13906 }, { "epoch": 45.59672131147541, "grad_norm": 3.2381937503814697, "learning_rate": 1.1900300480350504e-05, "loss": 0.3035, "step": 13907 }, { "epoch": 45.6, "grad_norm": 3.348783254623413, "learning_rate": 1.189925793206357e-05, "loss": 0.3437, "step": 13908 }, { "epoch": 45.60327868852459, "grad_norm": 4.9810004234313965, "learning_rate": 1.1898215362360527e-05, "loss": 0.407, "step": 13909 }, { "epoch": 45.60655737704918, "grad_norm": 3.956423044204712, "learning_rate": 1.1897172771253133e-05, "loss": 0.3779, "step": 13910 }, { "epoch": 45.609836065573774, "grad_norm": 3.0310463905334473, "learning_rate": 1.1896130158753143e-05, "loss": 0.2799, "step": 13911 }, { "epoch": 45.61311475409836, "grad_norm": 4.027238845825195, "learning_rate": 1.1895087524872316e-05, "loss": 0.5545, "step": 13912 }, { "epoch": 45.61639344262295, "grad_norm": 3.6643569469451904, "learning_rate": 1.1894044869622403e-05, "loss": 0.3896, "step": 13913 }, { "epoch": 45.61967213114754, "grad_norm": 3.3326468467712402, "learning_rate": 1.1893002193015166e-05, "loss": 0.2443, "step": 13914 }, { "epoch": 45.622950819672134, "grad_norm": 3.2942676544189453, "learning_rate": 1.1891959495062362e-05, "loss": 0.1547, "step": 13915 }, { "epoch": 45.62622950819672, "grad_norm": 3.1498019695281982, "learning_rate": 1.1890916775775747e-05, "loss": 0.3585, "step": 13916 }, { "epoch": 45.62950819672131, "grad_norm": 3.5668723583221436, "learning_rate": 1.188987403516708e-05, "loss": 0.3257, "step": 13917 }, { "epoch": 45.6327868852459, "grad_norm": 3.6666266918182373, "learning_rate": 1.1888831273248116e-05, "loss": 0.308, "step": 13918 }, { "epoch": 45.636065573770495, "grad_norm": 5.008322238922119, "learning_rate": 1.1887788490030619e-05, "loss": 0.618, "step": 13919 }, { "epoch": 45.63934426229508, "grad_norm": 3.0809242725372314, "learning_rate": 1.1886745685526342e-05, "loss": 0.3367, "step": 13920 }, { "epoch": 45.64262295081967, "grad_norm": 3.498671531677246, "learning_rate": 1.1885702859747047e-05, "loss": 0.3753, "step": 13921 }, { "epoch": 45.64590163934426, "grad_norm": 3.5437326431274414, "learning_rate": 1.188466001270449e-05, "loss": 0.334, "step": 13922 }, { "epoch": 45.649180327868855, "grad_norm": 3.6653614044189453, "learning_rate": 1.1883617144410433e-05, "loss": 0.2718, "step": 13923 }, { "epoch": 45.65245901639344, "grad_norm": 4.053265571594238, "learning_rate": 1.1882574254876633e-05, "loss": 0.2558, "step": 13924 }, { "epoch": 45.65573770491803, "grad_norm": 3.934551239013672, "learning_rate": 1.1881531344114852e-05, "loss": 0.3072, "step": 13925 }, { "epoch": 45.65901639344262, "grad_norm": 3.8501198291778564, "learning_rate": 1.1880488412136848e-05, "loss": 0.2302, "step": 13926 }, { "epoch": 45.662295081967216, "grad_norm": 3.4125609397888184, "learning_rate": 1.1879445458954382e-05, "loss": 0.3347, "step": 13927 }, { "epoch": 45.665573770491804, "grad_norm": 3.407162666320801, "learning_rate": 1.1878402484579214e-05, "loss": 0.1775, "step": 13928 }, { "epoch": 45.66885245901639, "grad_norm": 3.320037364959717, "learning_rate": 1.1877359489023105e-05, "loss": 0.3553, "step": 13929 }, { "epoch": 45.67213114754098, "grad_norm": 3.5673418045043945, "learning_rate": 1.1876316472297818e-05, "loss": 0.3377, "step": 13930 }, { "epoch": 45.675409836065576, "grad_norm": 4.054677486419678, "learning_rate": 1.1875273434415108e-05, "loss": 0.2995, "step": 13931 }, { "epoch": 45.678688524590164, "grad_norm": 3.7537119388580322, "learning_rate": 1.1874230375386745e-05, "loss": 0.3558, "step": 13932 }, { "epoch": 45.68196721311475, "grad_norm": 3.3593907356262207, "learning_rate": 1.1873187295224482e-05, "loss": 0.3599, "step": 13933 }, { "epoch": 45.68524590163934, "grad_norm": 3.636230945587158, "learning_rate": 1.1872144193940087e-05, "loss": 0.2482, "step": 13934 }, { "epoch": 45.68852459016394, "grad_norm": 3.950423240661621, "learning_rate": 1.1871101071545317e-05, "loss": 0.4174, "step": 13935 }, { "epoch": 45.691803278688525, "grad_norm": 3.581759214401245, "learning_rate": 1.1870057928051939e-05, "loss": 0.1713, "step": 13936 }, { "epoch": 45.69508196721311, "grad_norm": 3.253523826599121, "learning_rate": 1.1869014763471713e-05, "loss": 0.2965, "step": 13937 }, { "epoch": 45.6983606557377, "grad_norm": 3.2357399463653564, "learning_rate": 1.1867971577816401e-05, "loss": 0.2971, "step": 13938 }, { "epoch": 45.7016393442623, "grad_norm": 4.188558101654053, "learning_rate": 1.1866928371097768e-05, "loss": 0.4591, "step": 13939 }, { "epoch": 45.704918032786885, "grad_norm": 4.167577266693115, "learning_rate": 1.1865885143327578e-05, "loss": 0.3222, "step": 13940 }, { "epoch": 45.708196721311474, "grad_norm": 3.613736391067505, "learning_rate": 1.1864841894517591e-05, "loss": 0.489, "step": 13941 }, { "epoch": 45.71147540983607, "grad_norm": 3.5116419792175293, "learning_rate": 1.1863798624679573e-05, "loss": 0.2689, "step": 13942 }, { "epoch": 45.71475409836066, "grad_norm": 3.4150049686431885, "learning_rate": 1.186275533382529e-05, "loss": 0.3487, "step": 13943 }, { "epoch": 45.718032786885246, "grad_norm": 3.6743850708007812, "learning_rate": 1.18617120219665e-05, "loss": 0.2271, "step": 13944 }, { "epoch": 45.721311475409834, "grad_norm": 3.6515045166015625, "learning_rate": 1.1860668689114973e-05, "loss": 0.4024, "step": 13945 }, { "epoch": 45.72459016393443, "grad_norm": 3.40564227104187, "learning_rate": 1.1859625335282474e-05, "loss": 0.3115, "step": 13946 }, { "epoch": 45.72786885245902, "grad_norm": 3.8971879482269287, "learning_rate": 1.1858581960480764e-05, "loss": 0.5453, "step": 13947 }, { "epoch": 45.731147540983606, "grad_norm": 2.92708683013916, "learning_rate": 1.185753856472161e-05, "loss": 0.2921, "step": 13948 }, { "epoch": 45.734426229508195, "grad_norm": 4.05570650100708, "learning_rate": 1.1856495148016779e-05, "loss": 0.3174, "step": 13949 }, { "epoch": 45.73770491803279, "grad_norm": 4.998588562011719, "learning_rate": 1.1855451710378031e-05, "loss": 0.3375, "step": 13950 }, { "epoch": 45.74098360655738, "grad_norm": 4.0792622566223145, "learning_rate": 1.185440825181714e-05, "loss": 0.3358, "step": 13951 }, { "epoch": 45.74426229508197, "grad_norm": 3.798332929611206, "learning_rate": 1.1853364772345865e-05, "loss": 0.3153, "step": 13952 }, { "epoch": 45.747540983606555, "grad_norm": 4.165009021759033, "learning_rate": 1.1852321271975975e-05, "loss": 0.2658, "step": 13953 }, { "epoch": 45.75081967213115, "grad_norm": 3.9307308197021484, "learning_rate": 1.185127775071924e-05, "loss": 0.2709, "step": 13954 }, { "epoch": 45.75409836065574, "grad_norm": 3.7506556510925293, "learning_rate": 1.1850234208587418e-05, "loss": 0.4191, "step": 13955 }, { "epoch": 45.75737704918033, "grad_norm": 3.517585039138794, "learning_rate": 1.1849190645592288e-05, "loss": 0.1658, "step": 13956 }, { "epoch": 45.760655737704916, "grad_norm": 3.3976407051086426, "learning_rate": 1.1848147061745608e-05, "loss": 0.1567, "step": 13957 }, { "epoch": 45.76393442622951, "grad_norm": 3.9066312313079834, "learning_rate": 1.1847103457059148e-05, "loss": 0.2761, "step": 13958 }, { "epoch": 45.7672131147541, "grad_norm": 3.333261251449585, "learning_rate": 1.1846059831544673e-05, "loss": 0.3858, "step": 13959 }, { "epoch": 45.77049180327869, "grad_norm": 3.9616856575012207, "learning_rate": 1.184501618521396e-05, "loss": 0.3389, "step": 13960 }, { "epoch": 45.773770491803276, "grad_norm": 3.8427066802978516, "learning_rate": 1.1843972518078767e-05, "loss": 0.3536, "step": 13961 }, { "epoch": 45.77704918032787, "grad_norm": 3.7550220489501953, "learning_rate": 1.1842928830150869e-05, "loss": 0.3074, "step": 13962 }, { "epoch": 45.78032786885246, "grad_norm": 3.309323787689209, "learning_rate": 1.1841885121442032e-05, "loss": 0.401, "step": 13963 }, { "epoch": 45.78360655737705, "grad_norm": 4.236833095550537, "learning_rate": 1.1840841391964023e-05, "loss": 0.4559, "step": 13964 }, { "epoch": 45.78688524590164, "grad_norm": 4.176485061645508, "learning_rate": 1.1839797641728612e-05, "loss": 0.2238, "step": 13965 }, { "epoch": 45.79016393442623, "grad_norm": 4.111189365386963, "learning_rate": 1.1838753870747575e-05, "loss": 0.2023, "step": 13966 }, { "epoch": 45.79344262295082, "grad_norm": 3.562333822250366, "learning_rate": 1.1837710079032671e-05, "loss": 0.3098, "step": 13967 }, { "epoch": 45.79672131147541, "grad_norm": 4.144506454467773, "learning_rate": 1.1836666266595678e-05, "loss": 0.3816, "step": 13968 }, { "epoch": 45.8, "grad_norm": 3.5559616088867188, "learning_rate": 1.1835622433448361e-05, "loss": 0.2795, "step": 13969 }, { "epoch": 45.80327868852459, "grad_norm": 3.3974623680114746, "learning_rate": 1.1834578579602496e-05, "loss": 0.2845, "step": 13970 }, { "epoch": 45.80655737704918, "grad_norm": 3.4181883335113525, "learning_rate": 1.1833534705069848e-05, "loss": 0.1819, "step": 13971 }, { "epoch": 45.80983606557377, "grad_norm": 9.637622833251953, "learning_rate": 1.1832490809862189e-05, "loss": 0.2412, "step": 13972 }, { "epoch": 45.81311475409836, "grad_norm": 3.7170369625091553, "learning_rate": 1.1831446893991294e-05, "loss": 0.3585, "step": 13973 }, { "epoch": 45.81639344262295, "grad_norm": 4.3719964027404785, "learning_rate": 1.1830402957468927e-05, "loss": 0.4243, "step": 13974 }, { "epoch": 45.81967213114754, "grad_norm": 3.8787283897399902, "learning_rate": 1.1829359000306867e-05, "loss": 0.52, "step": 13975 }, { "epoch": 45.82295081967213, "grad_norm": 3.4905006885528564, "learning_rate": 1.1828315022516881e-05, "loss": 0.3547, "step": 13976 }, { "epoch": 45.82622950819672, "grad_norm": 3.7444658279418945, "learning_rate": 1.182727102411074e-05, "loss": 0.33, "step": 13977 }, { "epoch": 45.829508196721314, "grad_norm": 3.773170232772827, "learning_rate": 1.1826227005100222e-05, "loss": 0.2837, "step": 13978 }, { "epoch": 45.8327868852459, "grad_norm": 3.8387060165405273, "learning_rate": 1.1825182965497092e-05, "loss": 0.4165, "step": 13979 }, { "epoch": 45.83606557377049, "grad_norm": 3.8021318912506104, "learning_rate": 1.182413890531313e-05, "loss": 0.2427, "step": 13980 }, { "epoch": 45.83934426229508, "grad_norm": 3.4707436561584473, "learning_rate": 1.1823094824560102e-05, "loss": 0.4032, "step": 13981 }, { "epoch": 45.842622950819674, "grad_norm": 3.737292766571045, "learning_rate": 1.1822050723249787e-05, "loss": 0.5611, "step": 13982 }, { "epoch": 45.84590163934426, "grad_norm": 3.6172432899475098, "learning_rate": 1.1821006601393954e-05, "loss": 0.3196, "step": 13983 }, { "epoch": 45.84918032786885, "grad_norm": 4.168332576751709, "learning_rate": 1.1819962459004379e-05, "loss": 0.346, "step": 13984 }, { "epoch": 45.85245901639344, "grad_norm": 3.3383922576904297, "learning_rate": 1.1818918296092834e-05, "loss": 0.3215, "step": 13985 }, { "epoch": 45.855737704918035, "grad_norm": 5.243644714355469, "learning_rate": 1.1817874112671096e-05, "loss": 0.2338, "step": 13986 }, { "epoch": 45.85901639344262, "grad_norm": 4.7668328285217285, "learning_rate": 1.1816829908750935e-05, "loss": 0.385, "step": 13987 }, { "epoch": 45.86229508196721, "grad_norm": 3.7110915184020996, "learning_rate": 1.181578568434413e-05, "loss": 0.2103, "step": 13988 }, { "epoch": 45.86557377049181, "grad_norm": 3.864548921585083, "learning_rate": 1.1814741439462451e-05, "loss": 0.304, "step": 13989 }, { "epoch": 45.868852459016395, "grad_norm": 3.775444269180298, "learning_rate": 1.1813697174117675e-05, "loss": 0.5744, "step": 13990 }, { "epoch": 45.87213114754098, "grad_norm": 3.190855026245117, "learning_rate": 1.181265288832158e-05, "loss": 0.2917, "step": 13991 }, { "epoch": 45.87540983606557, "grad_norm": 3.6621804237365723, "learning_rate": 1.1811608582085938e-05, "loss": 0.2337, "step": 13992 }, { "epoch": 45.87868852459017, "grad_norm": 3.419952154159546, "learning_rate": 1.1810564255422526e-05, "loss": 0.3282, "step": 13993 }, { "epoch": 45.881967213114756, "grad_norm": 3.3109827041625977, "learning_rate": 1.180951990834312e-05, "loss": 0.421, "step": 13994 }, { "epoch": 45.885245901639344, "grad_norm": 3.7663040161132812, "learning_rate": 1.1808475540859492e-05, "loss": 0.2413, "step": 13995 }, { "epoch": 45.88852459016393, "grad_norm": 4.777586936950684, "learning_rate": 1.1807431152983423e-05, "loss": 0.3323, "step": 13996 }, { "epoch": 45.89180327868853, "grad_norm": 4.70054292678833, "learning_rate": 1.1806386744726693e-05, "loss": 0.3655, "step": 13997 }, { "epoch": 45.895081967213116, "grad_norm": 3.8708505630493164, "learning_rate": 1.180534231610107e-05, "loss": 0.2272, "step": 13998 }, { "epoch": 45.898360655737704, "grad_norm": 2.9090769290924072, "learning_rate": 1.1804297867118338e-05, "loss": 0.2346, "step": 13999 }, { "epoch": 45.90163934426229, "grad_norm": 3.874022960662842, "learning_rate": 1.180325339779027e-05, "loss": 0.2834, "step": 14000 }, { "epoch": 45.90491803278689, "grad_norm": 3.336747646331787, "learning_rate": 1.1802208908128645e-05, "loss": 0.3273, "step": 14001 }, { "epoch": 45.90819672131148, "grad_norm": 3.9518070220947266, "learning_rate": 1.1801164398145245e-05, "loss": 0.2751, "step": 14002 }, { "epoch": 45.911475409836065, "grad_norm": 3.818480968475342, "learning_rate": 1.1800119867851837e-05, "loss": 0.1401, "step": 14003 }, { "epoch": 45.91475409836065, "grad_norm": 4.094423294067383, "learning_rate": 1.1799075317260209e-05, "loss": 0.1803, "step": 14004 }, { "epoch": 45.91803278688525, "grad_norm": 3.748777389526367, "learning_rate": 1.1798030746382136e-05, "loss": 0.254, "step": 14005 }, { "epoch": 45.92131147540984, "grad_norm": 3.479058265686035, "learning_rate": 1.1796986155229397e-05, "loss": 0.3197, "step": 14006 }, { "epoch": 45.924590163934425, "grad_norm": 3.2212867736816406, "learning_rate": 1.1795941543813769e-05, "loss": 0.1573, "step": 14007 }, { "epoch": 45.927868852459014, "grad_norm": 5.091885566711426, "learning_rate": 1.1794896912147037e-05, "loss": 0.1416, "step": 14008 }, { "epoch": 45.93114754098361, "grad_norm": 3.2106902599334717, "learning_rate": 1.179385226024097e-05, "loss": 0.3151, "step": 14009 }, { "epoch": 45.9344262295082, "grad_norm": 3.852968692779541, "learning_rate": 1.1792807588107358e-05, "loss": 0.2175, "step": 14010 }, { "epoch": 45.937704918032786, "grad_norm": 3.4383203983306885, "learning_rate": 1.1791762895757974e-05, "loss": 0.4603, "step": 14011 }, { "epoch": 45.940983606557374, "grad_norm": 4.3738932609558105, "learning_rate": 1.1790718183204603e-05, "loss": 0.1886, "step": 14012 }, { "epoch": 45.94426229508197, "grad_norm": 3.7672653198242188, "learning_rate": 1.178967345045902e-05, "loss": 0.6196, "step": 14013 }, { "epoch": 45.94754098360656, "grad_norm": 3.55277156829834, "learning_rate": 1.1788628697533012e-05, "loss": 0.3426, "step": 14014 }, { "epoch": 45.950819672131146, "grad_norm": 4.2225236892700195, "learning_rate": 1.1787583924438352e-05, "loss": 0.2696, "step": 14015 }, { "epoch": 45.954098360655735, "grad_norm": 4.305542945861816, "learning_rate": 1.1786539131186828e-05, "loss": 0.4579, "step": 14016 }, { "epoch": 45.95737704918033, "grad_norm": 3.994072675704956, "learning_rate": 1.1785494317790214e-05, "loss": 0.306, "step": 14017 }, { "epoch": 45.96065573770492, "grad_norm": 3.5915658473968506, "learning_rate": 1.1784449484260297e-05, "loss": 0.3587, "step": 14018 }, { "epoch": 45.96393442622951, "grad_norm": 3.40000057220459, "learning_rate": 1.1783404630608854e-05, "loss": 0.4355, "step": 14019 }, { "epoch": 45.967213114754095, "grad_norm": 3.3157639503479004, "learning_rate": 1.1782359756847673e-05, "loss": 0.3465, "step": 14020 }, { "epoch": 45.97049180327869, "grad_norm": 3.872349500656128, "learning_rate": 1.178131486298853e-05, "loss": 0.3578, "step": 14021 }, { "epoch": 45.97377049180328, "grad_norm": 3.42374324798584, "learning_rate": 1.1780269949043212e-05, "loss": 0.3662, "step": 14022 }, { "epoch": 45.97704918032787, "grad_norm": 3.5987770557403564, "learning_rate": 1.1779225015023497e-05, "loss": 0.3111, "step": 14023 }, { "epoch": 45.980327868852456, "grad_norm": 3.5781912803649902, "learning_rate": 1.1778180060941172e-05, "loss": 0.3417, "step": 14024 }, { "epoch": 45.98360655737705, "grad_norm": 3.9426445960998535, "learning_rate": 1.1777135086808018e-05, "loss": 0.3664, "step": 14025 }, { "epoch": 45.98688524590164, "grad_norm": 4.55621337890625, "learning_rate": 1.1776090092635818e-05, "loss": 0.5289, "step": 14026 }, { "epoch": 45.99016393442623, "grad_norm": 3.6863832473754883, "learning_rate": 1.1775045078436353e-05, "loss": 0.3662, "step": 14027 }, { "epoch": 45.993442622950816, "grad_norm": 3.869645833969116, "learning_rate": 1.1774000044221414e-05, "loss": 0.3898, "step": 14028 }, { "epoch": 45.99672131147541, "grad_norm": 4.101218223571777, "learning_rate": 1.1772954990002776e-05, "loss": 0.3526, "step": 14029 }, { "epoch": 46.0, "grad_norm": 4.180586338043213, "learning_rate": 1.177190991579223e-05, "loss": 0.4762, "step": 14030 }, { "epoch": 46.00327868852459, "grad_norm": 3.6785032749176025, "learning_rate": 1.1770864821601553e-05, "loss": 0.3246, "step": 14031 }, { "epoch": 46.006557377049184, "grad_norm": 3.6108691692352295, "learning_rate": 1.1769819707442538e-05, "loss": 0.5731, "step": 14032 }, { "epoch": 46.00983606557377, "grad_norm": 3.3707377910614014, "learning_rate": 1.1768774573326965e-05, "loss": 0.18, "step": 14033 }, { "epoch": 46.01311475409836, "grad_norm": 3.371610641479492, "learning_rate": 1.1767729419266618e-05, "loss": 0.2065, "step": 14034 }, { "epoch": 46.01639344262295, "grad_norm": 3.3653817176818848, "learning_rate": 1.1766684245273286e-05, "loss": 0.2608, "step": 14035 }, { "epoch": 46.019672131147544, "grad_norm": 3.7559151649475098, "learning_rate": 1.176563905135875e-05, "loss": 0.2883, "step": 14036 }, { "epoch": 46.02295081967213, "grad_norm": 3.1997342109680176, "learning_rate": 1.17645938375348e-05, "loss": 0.1375, "step": 14037 }, { "epoch": 46.02622950819672, "grad_norm": 3.9399819374084473, "learning_rate": 1.1763548603813218e-05, "loss": 0.3057, "step": 14038 }, { "epoch": 46.02950819672131, "grad_norm": 3.7757210731506348, "learning_rate": 1.1762503350205792e-05, "loss": 0.325, "step": 14039 }, { "epoch": 46.032786885245905, "grad_norm": 3.570295572280884, "learning_rate": 1.1761458076724308e-05, "loss": 0.2986, "step": 14040 }, { "epoch": 46.03606557377049, "grad_norm": 3.9299919605255127, "learning_rate": 1.1760412783380555e-05, "loss": 0.2679, "step": 14041 }, { "epoch": 46.03934426229508, "grad_norm": 3.7039787769317627, "learning_rate": 1.1759367470186316e-05, "loss": 0.481, "step": 14042 }, { "epoch": 46.04262295081967, "grad_norm": 3.882223606109619, "learning_rate": 1.175832213715338e-05, "loss": 0.2589, "step": 14043 }, { "epoch": 46.045901639344265, "grad_norm": 3.94256854057312, "learning_rate": 1.1757276784293531e-05, "loss": 0.3809, "step": 14044 }, { "epoch": 46.049180327868854, "grad_norm": 3.5784571170806885, "learning_rate": 1.1756231411618563e-05, "loss": 0.2467, "step": 14045 }, { "epoch": 46.05245901639344, "grad_norm": 3.6086606979370117, "learning_rate": 1.1755186019140257e-05, "loss": 0.2042, "step": 14046 }, { "epoch": 46.05573770491803, "grad_norm": 3.3815271854400635, "learning_rate": 1.1754140606870403e-05, "loss": 0.2749, "step": 14047 }, { "epoch": 46.059016393442626, "grad_norm": 4.3854193687438965, "learning_rate": 1.1753095174820794e-05, "loss": 0.2746, "step": 14048 }, { "epoch": 46.062295081967214, "grad_norm": 3.650999069213867, "learning_rate": 1.175204972300321e-05, "loss": 0.1681, "step": 14049 }, { "epoch": 46.0655737704918, "grad_norm": 2.8069188594818115, "learning_rate": 1.1751004251429446e-05, "loss": 0.2859, "step": 14050 }, { "epoch": 46.06885245901639, "grad_norm": 3.2433817386627197, "learning_rate": 1.1749958760111286e-05, "loss": 0.32, "step": 14051 }, { "epoch": 46.072131147540986, "grad_norm": 3.9745242595672607, "learning_rate": 1.1748913249060523e-05, "loss": 0.4031, "step": 14052 }, { "epoch": 46.075409836065575, "grad_norm": 3.269395112991333, "learning_rate": 1.1747867718288948e-05, "loss": 0.282, "step": 14053 }, { "epoch": 46.07868852459016, "grad_norm": 2.653477668762207, "learning_rate": 1.1746822167808344e-05, "loss": 0.1126, "step": 14054 }, { "epoch": 46.08196721311475, "grad_norm": 4.043759346008301, "learning_rate": 1.1745776597630502e-05, "loss": 0.179, "step": 14055 }, { "epoch": 46.08524590163935, "grad_norm": 3.0145442485809326, "learning_rate": 1.1744731007767219e-05, "loss": 0.3605, "step": 14056 }, { "epoch": 46.088524590163935, "grad_norm": 3.4682414531707764, "learning_rate": 1.1743685398230273e-05, "loss": 0.5036, "step": 14057 }, { "epoch": 46.09180327868852, "grad_norm": 4.291137218475342, "learning_rate": 1.1742639769031467e-05, "loss": 0.3874, "step": 14058 }, { "epoch": 46.09508196721311, "grad_norm": 2.7859513759613037, "learning_rate": 1.1741594120182582e-05, "loss": 0.4705, "step": 14059 }, { "epoch": 46.09836065573771, "grad_norm": 3.8620455265045166, "learning_rate": 1.1740548451695415e-05, "loss": 0.2853, "step": 14060 }, { "epoch": 46.101639344262296, "grad_norm": 3.0743863582611084, "learning_rate": 1.1739502763581752e-05, "loss": 0.1083, "step": 14061 }, { "epoch": 46.104918032786884, "grad_norm": 3.4761550426483154, "learning_rate": 1.173845705585339e-05, "loss": 0.2416, "step": 14062 }, { "epoch": 46.10819672131147, "grad_norm": 3.4208667278289795, "learning_rate": 1.1737411328522115e-05, "loss": 0.3643, "step": 14063 }, { "epoch": 46.11147540983607, "grad_norm": 3.8718883991241455, "learning_rate": 1.1736365581599721e-05, "loss": 0.2234, "step": 14064 }, { "epoch": 46.114754098360656, "grad_norm": 3.0776352882385254, "learning_rate": 1.1735319815097999e-05, "loss": 0.289, "step": 14065 }, { "epoch": 46.118032786885244, "grad_norm": 3.333662986755371, "learning_rate": 1.1734274029028743e-05, "loss": 0.2285, "step": 14066 }, { "epoch": 46.12131147540983, "grad_norm": 3.6877877712249756, "learning_rate": 1.1733228223403745e-05, "loss": 0.2107, "step": 14067 }, { "epoch": 46.12459016393443, "grad_norm": 3.0993175506591797, "learning_rate": 1.1732182398234795e-05, "loss": 0.3694, "step": 14068 }, { "epoch": 46.12786885245902, "grad_norm": 3.0500106811523438, "learning_rate": 1.1731136553533689e-05, "loss": 0.3089, "step": 14069 }, { "epoch": 46.131147540983605, "grad_norm": 3.679641008377075, "learning_rate": 1.1730090689312216e-05, "loss": 0.2198, "step": 14070 }, { "epoch": 46.13442622950819, "grad_norm": 3.504638433456421, "learning_rate": 1.1729044805582173e-05, "loss": 0.3057, "step": 14071 }, { "epoch": 46.13770491803279, "grad_norm": 3.4070920944213867, "learning_rate": 1.172799890235535e-05, "loss": 0.2083, "step": 14072 }, { "epoch": 46.14098360655738, "grad_norm": 3.5530219078063965, "learning_rate": 1.1726952979643546e-05, "loss": 0.3721, "step": 14073 }, { "epoch": 46.144262295081965, "grad_norm": 3.1220102310180664, "learning_rate": 1.172590703745855e-05, "loss": 0.2452, "step": 14074 }, { "epoch": 46.14754098360656, "grad_norm": 2.491697072982788, "learning_rate": 1.1724861075812158e-05, "loss": 0.3517, "step": 14075 }, { "epoch": 46.15081967213115, "grad_norm": 2.9368655681610107, "learning_rate": 1.1723815094716165e-05, "loss": 0.2038, "step": 14076 }, { "epoch": 46.15409836065574, "grad_norm": 2.726184606552124, "learning_rate": 1.172276909418236e-05, "loss": 0.3137, "step": 14077 }, { "epoch": 46.157377049180326, "grad_norm": 3.226644515991211, "learning_rate": 1.1721723074222546e-05, "loss": 0.2634, "step": 14078 }, { "epoch": 46.16065573770492, "grad_norm": 3.108423948287964, "learning_rate": 1.1720677034848516e-05, "loss": 0.4149, "step": 14079 }, { "epoch": 46.16393442622951, "grad_norm": 3.384622812271118, "learning_rate": 1.171963097607206e-05, "loss": 0.2642, "step": 14080 }, { "epoch": 46.1672131147541, "grad_norm": 3.835660219192505, "learning_rate": 1.1718584897904978e-05, "loss": 0.3005, "step": 14081 }, { "epoch": 46.170491803278686, "grad_norm": 4.002286911010742, "learning_rate": 1.1717538800359065e-05, "loss": 0.3255, "step": 14082 }, { "epoch": 46.17377049180328, "grad_norm": 4.136146068572998, "learning_rate": 1.1716492683446115e-05, "loss": 0.2433, "step": 14083 }, { "epoch": 46.17704918032787, "grad_norm": 3.807586193084717, "learning_rate": 1.1715446547177926e-05, "loss": 0.2287, "step": 14084 }, { "epoch": 46.18032786885246, "grad_norm": 3.878127098083496, "learning_rate": 1.1714400391566292e-05, "loss": 0.3781, "step": 14085 }, { "epoch": 46.18360655737705, "grad_norm": 3.226945638656616, "learning_rate": 1.1713354216623014e-05, "loss": 0.3783, "step": 14086 }, { "epoch": 46.18688524590164, "grad_norm": 3.297830581665039, "learning_rate": 1.1712308022359884e-05, "loss": 0.2971, "step": 14087 }, { "epoch": 46.19016393442623, "grad_norm": 3.873197555541992, "learning_rate": 1.17112618087887e-05, "loss": 0.2528, "step": 14088 }, { "epoch": 46.19344262295082, "grad_norm": 3.5079448223114014, "learning_rate": 1.1710215575921261e-05, "loss": 0.3569, "step": 14089 }, { "epoch": 46.19672131147541, "grad_norm": 3.5611772537231445, "learning_rate": 1.1709169323769365e-05, "loss": 0.4187, "step": 14090 }, { "epoch": 46.2, "grad_norm": 5.152637004852295, "learning_rate": 1.1708123052344803e-05, "loss": 0.31, "step": 14091 }, { "epoch": 46.20327868852459, "grad_norm": 3.77290678024292, "learning_rate": 1.1707076761659381e-05, "loss": 0.3506, "step": 14092 }, { "epoch": 46.20655737704918, "grad_norm": 3.809739112854004, "learning_rate": 1.1706030451724895e-05, "loss": 0.3936, "step": 14093 }, { "epoch": 46.20983606557377, "grad_norm": 3.9081056118011475, "learning_rate": 1.1704984122553138e-05, "loss": 0.2277, "step": 14094 }, { "epoch": 46.21311475409836, "grad_norm": 3.3939287662506104, "learning_rate": 1.1703937774155916e-05, "loss": 0.3095, "step": 14095 }, { "epoch": 46.21639344262295, "grad_norm": 3.7032291889190674, "learning_rate": 1.170289140654502e-05, "loss": 0.3829, "step": 14096 }, { "epoch": 46.21967213114754, "grad_norm": 3.735853672027588, "learning_rate": 1.1701845019732256e-05, "loss": 0.5554, "step": 14097 }, { "epoch": 46.22295081967213, "grad_norm": 3.716423273086548, "learning_rate": 1.1700798613729419e-05, "loss": 0.3679, "step": 14098 }, { "epoch": 46.226229508196724, "grad_norm": 2.695211172103882, "learning_rate": 1.169975218854831e-05, "loss": 0.3847, "step": 14099 }, { "epoch": 46.22950819672131, "grad_norm": 3.965636730194092, "learning_rate": 1.1698705744200724e-05, "loss": 0.4348, "step": 14100 }, { "epoch": 46.2327868852459, "grad_norm": 3.8060076236724854, "learning_rate": 1.1697659280698468e-05, "loss": 0.1102, "step": 14101 }, { "epoch": 46.23606557377049, "grad_norm": 2.937852621078491, "learning_rate": 1.1696612798053337e-05, "loss": 0.2892, "step": 14102 }, { "epoch": 46.239344262295084, "grad_norm": 3.9036262035369873, "learning_rate": 1.1695566296277135e-05, "loss": 0.2988, "step": 14103 }, { "epoch": 46.24262295081967, "grad_norm": 3.564173460006714, "learning_rate": 1.169451977538166e-05, "loss": 0.2135, "step": 14104 }, { "epoch": 46.24590163934426, "grad_norm": 3.7697670459747314, "learning_rate": 1.169347323537871e-05, "loss": 0.4452, "step": 14105 }, { "epoch": 46.24918032786885, "grad_norm": 3.6043429374694824, "learning_rate": 1.1692426676280088e-05, "loss": 0.2969, "step": 14106 }, { "epoch": 46.252459016393445, "grad_norm": 3.722182273864746, "learning_rate": 1.1691380098097598e-05, "loss": 0.3925, "step": 14107 }, { "epoch": 46.25573770491803, "grad_norm": 3.6777400970458984, "learning_rate": 1.1690333500843039e-05, "loss": 0.3429, "step": 14108 }, { "epoch": 46.25901639344262, "grad_norm": 3.525374174118042, "learning_rate": 1.1689286884528211e-05, "loss": 0.3977, "step": 14109 }, { "epoch": 46.26229508196721, "grad_norm": 3.485778570175171, "learning_rate": 1.1688240249164916e-05, "loss": 0.3321, "step": 14110 }, { "epoch": 46.265573770491805, "grad_norm": 3.2997817993164062, "learning_rate": 1.1687193594764958e-05, "loss": 0.3398, "step": 14111 }, { "epoch": 46.268852459016394, "grad_norm": 3.48642635345459, "learning_rate": 1.168614692134014e-05, "loss": 0.4053, "step": 14112 }, { "epoch": 46.27213114754098, "grad_norm": 3.9965736865997314, "learning_rate": 1.1685100228902258e-05, "loss": 0.287, "step": 14113 }, { "epoch": 46.27540983606557, "grad_norm": 2.6203296184539795, "learning_rate": 1.1684053517463122e-05, "loss": 0.4436, "step": 14114 }, { "epoch": 46.278688524590166, "grad_norm": 3.38659930229187, "learning_rate": 1.168300678703453e-05, "loss": 0.3326, "step": 14115 }, { "epoch": 46.281967213114754, "grad_norm": 4.0664963722229, "learning_rate": 1.1681960037628288e-05, "loss": 0.198, "step": 14116 }, { "epoch": 46.28524590163934, "grad_norm": 3.7216835021972656, "learning_rate": 1.16809132692562e-05, "loss": 0.2004, "step": 14117 }, { "epoch": 46.28852459016394, "grad_norm": 4.929318428039551, "learning_rate": 1.1679866481930063e-05, "loss": 0.3177, "step": 14118 }, { "epoch": 46.291803278688526, "grad_norm": 4.3881049156188965, "learning_rate": 1.1678819675661688e-05, "loss": 0.3735, "step": 14119 }, { "epoch": 46.295081967213115, "grad_norm": 3.604818820953369, "learning_rate": 1.1677772850462872e-05, "loss": 0.1633, "step": 14120 }, { "epoch": 46.2983606557377, "grad_norm": 4.048197269439697, "learning_rate": 1.1676726006345424e-05, "loss": 0.2316, "step": 14121 }, { "epoch": 46.3016393442623, "grad_norm": 6.5880656242370605, "learning_rate": 1.1675679143321148e-05, "loss": 0.2761, "step": 14122 }, { "epoch": 46.30491803278689, "grad_norm": 3.8683435916900635, "learning_rate": 1.1674632261401848e-05, "loss": 0.4715, "step": 14123 }, { "epoch": 46.308196721311475, "grad_norm": 3.926685094833374, "learning_rate": 1.1673585360599324e-05, "loss": 0.1723, "step": 14124 }, { "epoch": 46.31147540983606, "grad_norm": 4.635865688323975, "learning_rate": 1.1672538440925389e-05, "loss": 0.3807, "step": 14125 }, { "epoch": 46.31475409836066, "grad_norm": 4.650418281555176, "learning_rate": 1.1671491502391843e-05, "loss": 0.2982, "step": 14126 }, { "epoch": 46.31803278688525, "grad_norm": 3.301698923110962, "learning_rate": 1.1670444545010493e-05, "loss": 0.2192, "step": 14127 }, { "epoch": 46.321311475409836, "grad_norm": 3.7651515007019043, "learning_rate": 1.1669397568793141e-05, "loss": 0.1942, "step": 14128 }, { "epoch": 46.324590163934424, "grad_norm": 4.980019569396973, "learning_rate": 1.1668350573751599e-05, "loss": 0.4023, "step": 14129 }, { "epoch": 46.32786885245902, "grad_norm": 3.8818695545196533, "learning_rate": 1.166730355989767e-05, "loss": 0.2407, "step": 14130 }, { "epoch": 46.33114754098361, "grad_norm": 3.57830548286438, "learning_rate": 1.1666256527243156e-05, "loss": 0.3727, "step": 14131 }, { "epoch": 46.334426229508196, "grad_norm": 5.872374057769775, "learning_rate": 1.166520947579987e-05, "loss": 0.5175, "step": 14132 }, { "epoch": 46.337704918032784, "grad_norm": 3.446148157119751, "learning_rate": 1.1664162405579616e-05, "loss": 0.325, "step": 14133 }, { "epoch": 46.34098360655738, "grad_norm": 3.345547676086426, "learning_rate": 1.16631153165942e-05, "loss": 0.4513, "step": 14134 }, { "epoch": 46.34426229508197, "grad_norm": 3.730912685394287, "learning_rate": 1.1662068208855428e-05, "loss": 0.4704, "step": 14135 }, { "epoch": 46.34754098360656, "grad_norm": 3.3799424171447754, "learning_rate": 1.1661021082375113e-05, "loss": 0.3385, "step": 14136 }, { "epoch": 46.350819672131145, "grad_norm": 3.1305172443389893, "learning_rate": 1.1659973937165052e-05, "loss": 0.1584, "step": 14137 }, { "epoch": 46.35409836065574, "grad_norm": 3.3780555725097656, "learning_rate": 1.1658926773237064e-05, "loss": 0.3928, "step": 14138 }, { "epoch": 46.35737704918033, "grad_norm": 3.462526321411133, "learning_rate": 1.165787959060295e-05, "loss": 0.3025, "step": 14139 }, { "epoch": 46.36065573770492, "grad_norm": 3.712441921234131, "learning_rate": 1.1656832389274522e-05, "loss": 0.2229, "step": 14140 }, { "epoch": 46.363934426229505, "grad_norm": 3.462639331817627, "learning_rate": 1.1655785169263583e-05, "loss": 0.2895, "step": 14141 }, { "epoch": 46.3672131147541, "grad_norm": 4.163216590881348, "learning_rate": 1.1654737930581948e-05, "loss": 0.245, "step": 14142 }, { "epoch": 46.37049180327869, "grad_norm": 3.8102502822875977, "learning_rate": 1.165369067324142e-05, "loss": 0.216, "step": 14143 }, { "epoch": 46.37377049180328, "grad_norm": 3.849838972091675, "learning_rate": 1.1652643397253809e-05, "loss": 0.3397, "step": 14144 }, { "epoch": 46.377049180327866, "grad_norm": 3.2969484329223633, "learning_rate": 1.165159610263093e-05, "loss": 0.3138, "step": 14145 }, { "epoch": 46.38032786885246, "grad_norm": 3.450188159942627, "learning_rate": 1.1650548789384582e-05, "loss": 0.3007, "step": 14146 }, { "epoch": 46.38360655737705, "grad_norm": 3.478084087371826, "learning_rate": 1.1649501457526585e-05, "loss": 0.2759, "step": 14147 }, { "epoch": 46.38688524590164, "grad_norm": 3.8061177730560303, "learning_rate": 1.1648454107068738e-05, "loss": 0.3495, "step": 14148 }, { "epoch": 46.390163934426226, "grad_norm": 3.4531452655792236, "learning_rate": 1.1647406738022864e-05, "loss": 0.2975, "step": 14149 }, { "epoch": 46.39344262295082, "grad_norm": 3.787102460861206, "learning_rate": 1.164635935040076e-05, "loss": 0.3818, "step": 14150 }, { "epoch": 46.39672131147541, "grad_norm": 3.571660041809082, "learning_rate": 1.1645311944214249e-05, "loss": 0.4806, "step": 14151 }, { "epoch": 46.4, "grad_norm": 4.512670993804932, "learning_rate": 1.164426451947513e-05, "loss": 0.304, "step": 14152 }, { "epoch": 46.40327868852459, "grad_norm": 3.046760320663452, "learning_rate": 1.1643217076195222e-05, "loss": 0.2281, "step": 14153 }, { "epoch": 46.40655737704918, "grad_norm": 3.438521385192871, "learning_rate": 1.164216961438633e-05, "loss": 0.2313, "step": 14154 }, { "epoch": 46.40983606557377, "grad_norm": 3.3468005657196045, "learning_rate": 1.164112213406027e-05, "loss": 0.4202, "step": 14155 }, { "epoch": 46.41311475409836, "grad_norm": 3.432486057281494, "learning_rate": 1.1640074635228852e-05, "loss": 0.4592, "step": 14156 }, { "epoch": 46.41639344262295, "grad_norm": 3.4887688159942627, "learning_rate": 1.1639027117903883e-05, "loss": 0.2557, "step": 14157 }, { "epoch": 46.41967213114754, "grad_norm": 4.442651748657227, "learning_rate": 1.1637979582097186e-05, "loss": 0.2838, "step": 14158 }, { "epoch": 46.42295081967213, "grad_norm": 3.527660846710205, "learning_rate": 1.163693202782056e-05, "loss": 0.3341, "step": 14159 }, { "epoch": 46.42622950819672, "grad_norm": 3.8295981884002686, "learning_rate": 1.1635884455085828e-05, "loss": 0.2271, "step": 14160 }, { "epoch": 46.429508196721315, "grad_norm": 3.868920087814331, "learning_rate": 1.1634836863904794e-05, "loss": 0.295, "step": 14161 }, { "epoch": 46.4327868852459, "grad_norm": 3.6228630542755127, "learning_rate": 1.1633789254289278e-05, "loss": 0.5433, "step": 14162 }, { "epoch": 46.43606557377049, "grad_norm": 3.343602180480957, "learning_rate": 1.1632741626251087e-05, "loss": 0.1838, "step": 14163 }, { "epoch": 46.43934426229508, "grad_norm": 3.7023508548736572, "learning_rate": 1.163169397980204e-05, "loss": 0.182, "step": 14164 }, { "epoch": 46.442622950819676, "grad_norm": 3.082125425338745, "learning_rate": 1.1630646314953942e-05, "loss": 0.2612, "step": 14165 }, { "epoch": 46.445901639344264, "grad_norm": 4.178702354431152, "learning_rate": 1.1629598631718615e-05, "loss": 0.2677, "step": 14166 }, { "epoch": 46.44918032786885, "grad_norm": 4.008130073547363, "learning_rate": 1.1628550930107868e-05, "loss": 0.3886, "step": 14167 }, { "epoch": 46.45245901639344, "grad_norm": 3.9289755821228027, "learning_rate": 1.1627503210133516e-05, "loss": 0.5571, "step": 14168 }, { "epoch": 46.455737704918036, "grad_norm": 3.7592670917510986, "learning_rate": 1.1626455471807374e-05, "loss": 0.3374, "step": 14169 }, { "epoch": 46.459016393442624, "grad_norm": 4.072647571563721, "learning_rate": 1.1625407715141252e-05, "loss": 0.2601, "step": 14170 }, { "epoch": 46.46229508196721, "grad_norm": 3.233690023422241, "learning_rate": 1.1624359940146972e-05, "loss": 0.3342, "step": 14171 }, { "epoch": 46.4655737704918, "grad_norm": 3.9165313243865967, "learning_rate": 1.1623312146836343e-05, "loss": 0.4625, "step": 14172 }, { "epoch": 46.4688524590164, "grad_norm": 4.243398189544678, "learning_rate": 1.1622264335221184e-05, "loss": 0.3496, "step": 14173 }, { "epoch": 46.472131147540985, "grad_norm": 3.621563196182251, "learning_rate": 1.1621216505313304e-05, "loss": 0.4088, "step": 14174 }, { "epoch": 46.47540983606557, "grad_norm": 3.7648990154266357, "learning_rate": 1.1620168657124523e-05, "loss": 0.2919, "step": 14175 }, { "epoch": 46.47868852459016, "grad_norm": 3.5392839908599854, "learning_rate": 1.1619120790666659e-05, "loss": 0.3344, "step": 14176 }, { "epoch": 46.48196721311476, "grad_norm": 4.104025363922119, "learning_rate": 1.161807290595152e-05, "loss": 0.2843, "step": 14177 }, { "epoch": 46.485245901639345, "grad_norm": 3.8068108558654785, "learning_rate": 1.161702500299093e-05, "loss": 0.2311, "step": 14178 }, { "epoch": 46.488524590163934, "grad_norm": 3.1899943351745605, "learning_rate": 1.1615977081796702e-05, "loss": 0.2388, "step": 14179 }, { "epoch": 46.49180327868852, "grad_norm": 5.816915988922119, "learning_rate": 1.161492914238065e-05, "loss": 0.3953, "step": 14180 }, { "epoch": 46.49508196721312, "grad_norm": 3.7999658584594727, "learning_rate": 1.1613881184754595e-05, "loss": 0.1909, "step": 14181 }, { "epoch": 46.498360655737706, "grad_norm": 3.7523930072784424, "learning_rate": 1.161283320893035e-05, "loss": 0.2358, "step": 14182 }, { "epoch": 46.501639344262294, "grad_norm": 3.40736722946167, "learning_rate": 1.1611785214919733e-05, "loss": 0.3474, "step": 14183 }, { "epoch": 46.50491803278688, "grad_norm": 3.4190402030944824, "learning_rate": 1.1610737202734563e-05, "loss": 0.2458, "step": 14184 }, { "epoch": 46.50819672131148, "grad_norm": 3.4046363830566406, "learning_rate": 1.1609689172386656e-05, "loss": 0.1299, "step": 14185 }, { "epoch": 46.511475409836066, "grad_norm": 3.972226858139038, "learning_rate": 1.1608641123887831e-05, "loss": 0.3515, "step": 14186 }, { "epoch": 46.514754098360655, "grad_norm": 3.921759843826294, "learning_rate": 1.1607593057249905e-05, "loss": 0.2661, "step": 14187 }, { "epoch": 46.51803278688524, "grad_norm": 4.121214389801025, "learning_rate": 1.1606544972484695e-05, "loss": 0.2652, "step": 14188 }, { "epoch": 46.52131147540984, "grad_norm": 3.056251287460327, "learning_rate": 1.1605496869604019e-05, "loss": 0.1749, "step": 14189 }, { "epoch": 46.52459016393443, "grad_norm": 3.277520179748535, "learning_rate": 1.1604448748619699e-05, "loss": 0.237, "step": 14190 }, { "epoch": 46.527868852459015, "grad_norm": 3.361961603164673, "learning_rate": 1.1603400609543547e-05, "loss": 0.3888, "step": 14191 }, { "epoch": 46.5311475409836, "grad_norm": 4.691189765930176, "learning_rate": 1.1602352452387391e-05, "loss": 0.6727, "step": 14192 }, { "epoch": 46.5344262295082, "grad_norm": 3.30179500579834, "learning_rate": 1.160130427716304e-05, "loss": 0.1154, "step": 14193 }, { "epoch": 46.53770491803279, "grad_norm": 3.756547689437866, "learning_rate": 1.1600256083882324e-05, "loss": 0.3028, "step": 14194 }, { "epoch": 46.540983606557376, "grad_norm": 3.799316883087158, "learning_rate": 1.1599207872557055e-05, "loss": 0.2516, "step": 14195 }, { "epoch": 46.544262295081964, "grad_norm": 3.4444570541381836, "learning_rate": 1.1598159643199052e-05, "loss": 0.2489, "step": 14196 }, { "epoch": 46.54754098360656, "grad_norm": 4.3840508460998535, "learning_rate": 1.1597111395820137e-05, "loss": 0.3458, "step": 14197 }, { "epoch": 46.55081967213115, "grad_norm": 3.8004989624023438, "learning_rate": 1.1596063130432133e-05, "loss": 0.2815, "step": 14198 }, { "epoch": 46.554098360655736, "grad_norm": 4.26017951965332, "learning_rate": 1.1595014847046857e-05, "loss": 0.352, "step": 14199 }, { "epoch": 46.557377049180324, "grad_norm": 4.081991672515869, "learning_rate": 1.159396654567613e-05, "loss": 0.3618, "step": 14200 }, { "epoch": 46.56065573770492, "grad_norm": 3.7136390209198, "learning_rate": 1.1592918226331774e-05, "loss": 0.3794, "step": 14201 }, { "epoch": 46.56393442622951, "grad_norm": 3.899242639541626, "learning_rate": 1.1591869889025607e-05, "loss": 0.3706, "step": 14202 }, { "epoch": 46.5672131147541, "grad_norm": 3.6977791786193848, "learning_rate": 1.1590821533769452e-05, "loss": 0.2681, "step": 14203 }, { "epoch": 46.570491803278685, "grad_norm": 3.184941053390503, "learning_rate": 1.158977316057513e-05, "loss": 0.2513, "step": 14204 }, { "epoch": 46.57377049180328, "grad_norm": 3.4319095611572266, "learning_rate": 1.1588724769454465e-05, "loss": 0.2616, "step": 14205 }, { "epoch": 46.57704918032787, "grad_norm": 4.339248180389404, "learning_rate": 1.1587676360419274e-05, "loss": 0.3358, "step": 14206 }, { "epoch": 46.58032786885246, "grad_norm": 3.537842273712158, "learning_rate": 1.1586627933481383e-05, "loss": 0.2549, "step": 14207 }, { "epoch": 46.58360655737705, "grad_norm": 3.488679885864258, "learning_rate": 1.158557948865261e-05, "loss": 0.3473, "step": 14208 }, { "epoch": 46.58688524590164, "grad_norm": 4.134126663208008, "learning_rate": 1.158453102594478e-05, "loss": 0.4096, "step": 14209 }, { "epoch": 46.59016393442623, "grad_norm": 4.077078342437744, "learning_rate": 1.1583482545369718e-05, "loss": 0.3104, "step": 14210 }, { "epoch": 46.59344262295082, "grad_norm": 3.493490695953369, "learning_rate": 1.158243404693924e-05, "loss": 0.4299, "step": 14211 }, { "epoch": 46.59672131147541, "grad_norm": 3.4157958030700684, "learning_rate": 1.1581385530665177e-05, "loss": 0.3271, "step": 14212 }, { "epoch": 46.6, "grad_norm": 8.544785499572754, "learning_rate": 1.1580336996559343e-05, "loss": 0.3696, "step": 14213 }, { "epoch": 46.60327868852459, "grad_norm": 3.9563262462615967, "learning_rate": 1.1579288444633572e-05, "loss": 0.3743, "step": 14214 }, { "epoch": 46.60655737704918, "grad_norm": 3.276669979095459, "learning_rate": 1.1578239874899678e-05, "loss": 0.3692, "step": 14215 }, { "epoch": 46.609836065573774, "grad_norm": 4.322892189025879, "learning_rate": 1.1577191287369489e-05, "loss": 0.3958, "step": 14216 }, { "epoch": 46.61311475409836, "grad_norm": 3.9699249267578125, "learning_rate": 1.1576142682054828e-05, "loss": 0.2748, "step": 14217 }, { "epoch": 46.61639344262295, "grad_norm": 3.617157220840454, "learning_rate": 1.157509405896752e-05, "loss": 0.371, "step": 14218 }, { "epoch": 46.61967213114754, "grad_norm": 3.6267359256744385, "learning_rate": 1.1574045418119389e-05, "loss": 0.3055, "step": 14219 }, { "epoch": 46.622950819672134, "grad_norm": 3.6057446002960205, "learning_rate": 1.157299675952226e-05, "loss": 0.2526, "step": 14220 }, { "epoch": 46.62622950819672, "grad_norm": 5.164617538452148, "learning_rate": 1.1571948083187956e-05, "loss": 0.3343, "step": 14221 }, { "epoch": 46.62950819672131, "grad_norm": 3.449683666229248, "learning_rate": 1.1570899389128303e-05, "loss": 0.1487, "step": 14222 }, { "epoch": 46.6327868852459, "grad_norm": 3.9456958770751953, "learning_rate": 1.1569850677355128e-05, "loss": 0.3598, "step": 14223 }, { "epoch": 46.636065573770495, "grad_norm": 3.7425336837768555, "learning_rate": 1.156880194788025e-05, "loss": 0.338, "step": 14224 }, { "epoch": 46.63934426229508, "grad_norm": 3.941279411315918, "learning_rate": 1.1567753200715503e-05, "loss": 0.3978, "step": 14225 }, { "epoch": 46.64262295081967, "grad_norm": 4.156621932983398, "learning_rate": 1.1566704435872707e-05, "loss": 0.4003, "step": 14226 }, { "epoch": 46.64590163934426, "grad_norm": 3.847329616546631, "learning_rate": 1.156565565336369e-05, "loss": 0.3157, "step": 14227 }, { "epoch": 46.649180327868855, "grad_norm": 3.5766546726226807, "learning_rate": 1.1564606853200275e-05, "loss": 0.2968, "step": 14228 }, { "epoch": 46.65245901639344, "grad_norm": 4.047664165496826, "learning_rate": 1.1563558035394296e-05, "loss": 0.32, "step": 14229 }, { "epoch": 46.65573770491803, "grad_norm": 3.2165565490722656, "learning_rate": 1.156250919995757e-05, "loss": 0.2927, "step": 14230 }, { "epoch": 46.65901639344262, "grad_norm": 3.7288014888763428, "learning_rate": 1.1561460346901932e-05, "loss": 0.3011, "step": 14231 }, { "epoch": 46.662295081967216, "grad_norm": 3.748032808303833, "learning_rate": 1.1560411476239201e-05, "loss": 0.3848, "step": 14232 }, { "epoch": 46.665573770491804, "grad_norm": 9.209321022033691, "learning_rate": 1.1559362587981211e-05, "loss": 0.3223, "step": 14233 }, { "epoch": 46.66885245901639, "grad_norm": 3.6034061908721924, "learning_rate": 1.1558313682139786e-05, "loss": 0.1863, "step": 14234 }, { "epoch": 46.67213114754098, "grad_norm": 4.199377059936523, "learning_rate": 1.1557264758726754e-05, "loss": 0.2632, "step": 14235 }, { "epoch": 46.675409836065576, "grad_norm": 3.705528974533081, "learning_rate": 1.1556215817753941e-05, "loss": 0.5593, "step": 14236 }, { "epoch": 46.678688524590164, "grad_norm": 3.5549893379211426, "learning_rate": 1.1555166859233177e-05, "loss": 0.1963, "step": 14237 }, { "epoch": 46.68196721311475, "grad_norm": 3.340362071990967, "learning_rate": 1.155411788317629e-05, "loss": 0.3962, "step": 14238 }, { "epoch": 46.68524590163934, "grad_norm": 4.528700351715088, "learning_rate": 1.155306888959511e-05, "loss": 0.486, "step": 14239 }, { "epoch": 46.68852459016394, "grad_norm": 6.0256667137146, "learning_rate": 1.1552019878501462e-05, "loss": 0.3767, "step": 14240 }, { "epoch": 46.691803278688525, "grad_norm": 4.48356819152832, "learning_rate": 1.1550970849907175e-05, "loss": 0.2859, "step": 14241 }, { "epoch": 46.69508196721311, "grad_norm": 3.4552738666534424, "learning_rate": 1.1549921803824082e-05, "loss": 0.2632, "step": 14242 }, { "epoch": 46.6983606557377, "grad_norm": 3.3594319820404053, "learning_rate": 1.1548872740264006e-05, "loss": 0.237, "step": 14243 }, { "epoch": 46.7016393442623, "grad_norm": 4.56114387512207, "learning_rate": 1.1547823659238781e-05, "loss": 0.3827, "step": 14244 }, { "epoch": 46.704918032786885, "grad_norm": 5.142261981964111, "learning_rate": 1.1546774560760234e-05, "loss": 0.2485, "step": 14245 }, { "epoch": 46.708196721311474, "grad_norm": 3.7523722648620605, "learning_rate": 1.1545725444840196e-05, "loss": 0.2175, "step": 14246 }, { "epoch": 46.71147540983607, "grad_norm": 2.9986681938171387, "learning_rate": 1.1544676311490499e-05, "loss": 0.3133, "step": 14247 }, { "epoch": 46.71475409836066, "grad_norm": 5.317178249359131, "learning_rate": 1.1543627160722968e-05, "loss": 0.3833, "step": 14248 }, { "epoch": 46.718032786885246, "grad_norm": 3.9924612045288086, "learning_rate": 1.1542577992549437e-05, "loss": 0.4395, "step": 14249 }, { "epoch": 46.721311475409834, "grad_norm": 3.192646026611328, "learning_rate": 1.1541528806981734e-05, "loss": 0.3721, "step": 14250 }, { "epoch": 46.72459016393443, "grad_norm": 3.7590267658233643, "learning_rate": 1.1540479604031692e-05, "loss": 0.3769, "step": 14251 }, { "epoch": 46.72786885245902, "grad_norm": 4.139975070953369, "learning_rate": 1.1539430383711138e-05, "loss": 0.3044, "step": 14252 }, { "epoch": 46.731147540983606, "grad_norm": 3.8019516468048096, "learning_rate": 1.153838114603191e-05, "loss": 0.2707, "step": 14253 }, { "epoch": 46.734426229508195, "grad_norm": 10.249335289001465, "learning_rate": 1.1537331891005831e-05, "loss": 0.2794, "step": 14254 }, { "epoch": 46.73770491803279, "grad_norm": 3.57611346244812, "learning_rate": 1.153628261864474e-05, "loss": 0.1429, "step": 14255 }, { "epoch": 46.74098360655738, "grad_norm": 6.3391804695129395, "learning_rate": 1.1535233328960462e-05, "loss": 0.3088, "step": 14256 }, { "epoch": 46.74426229508197, "grad_norm": 3.718912124633789, "learning_rate": 1.1534184021964837e-05, "loss": 0.1864, "step": 14257 }, { "epoch": 46.747540983606555, "grad_norm": 3.3112399578094482, "learning_rate": 1.1533134697669686e-05, "loss": 0.1933, "step": 14258 }, { "epoch": 46.75081967213115, "grad_norm": 4.233333587646484, "learning_rate": 1.153208535608685e-05, "loss": 0.2711, "step": 14259 }, { "epoch": 46.75409836065574, "grad_norm": 3.1765480041503906, "learning_rate": 1.153103599722816e-05, "loss": 0.4089, "step": 14260 }, { "epoch": 46.75737704918033, "grad_norm": 7.939652919769287, "learning_rate": 1.1529986621105445e-05, "loss": 0.4599, "step": 14261 }, { "epoch": 46.760655737704916, "grad_norm": 3.5704281330108643, "learning_rate": 1.1528937227730539e-05, "loss": 0.1573, "step": 14262 }, { "epoch": 46.76393442622951, "grad_norm": 4.793995380401611, "learning_rate": 1.1527887817115279e-05, "loss": 0.3295, "step": 14263 }, { "epoch": 46.7672131147541, "grad_norm": 4.674735069274902, "learning_rate": 1.1526838389271492e-05, "loss": 0.2809, "step": 14264 }, { "epoch": 46.77049180327869, "grad_norm": 4.066988468170166, "learning_rate": 1.1525788944211016e-05, "loss": 0.2985, "step": 14265 }, { "epoch": 46.773770491803276, "grad_norm": 3.259445905685425, "learning_rate": 1.1524739481945686e-05, "loss": 0.2999, "step": 14266 }, { "epoch": 46.77704918032787, "grad_norm": 11.940849304199219, "learning_rate": 1.152369000248733e-05, "loss": 0.2998, "step": 14267 }, { "epoch": 46.78032786885246, "grad_norm": 3.8758418560028076, "learning_rate": 1.1522640505847786e-05, "loss": 0.3652, "step": 14268 }, { "epoch": 46.78360655737705, "grad_norm": 4.495919227600098, "learning_rate": 1.1521590992038887e-05, "loss": 0.4995, "step": 14269 }, { "epoch": 46.78688524590164, "grad_norm": 4.443151473999023, "learning_rate": 1.152054146107247e-05, "loss": 0.4247, "step": 14270 }, { "epoch": 46.79016393442623, "grad_norm": 3.9469470977783203, "learning_rate": 1.1519491912960362e-05, "loss": 0.2506, "step": 14271 }, { "epoch": 46.79344262295082, "grad_norm": 4.1580328941345215, "learning_rate": 1.1518442347714407e-05, "loss": 0.4043, "step": 14272 }, { "epoch": 46.79672131147541, "grad_norm": 4.318272590637207, "learning_rate": 1.1517392765346436e-05, "loss": 0.4525, "step": 14273 }, { "epoch": 46.8, "grad_norm": 3.811674118041992, "learning_rate": 1.151634316586828e-05, "loss": 0.1472, "step": 14274 }, { "epoch": 46.80327868852459, "grad_norm": 3.54122257232666, "learning_rate": 1.1515293549291782e-05, "loss": 0.3168, "step": 14275 }, { "epoch": 46.80655737704918, "grad_norm": 4.953461647033691, "learning_rate": 1.151424391562877e-05, "loss": 0.3078, "step": 14276 }, { "epoch": 46.80983606557377, "grad_norm": 4.506906986236572, "learning_rate": 1.1513194264891088e-05, "loss": 0.3087, "step": 14277 }, { "epoch": 46.81311475409836, "grad_norm": 3.7635178565979004, "learning_rate": 1.1512144597090562e-05, "loss": 0.2866, "step": 14278 }, { "epoch": 46.81639344262295, "grad_norm": 3.344517946243286, "learning_rate": 1.1511094912239039e-05, "loss": 0.3713, "step": 14279 }, { "epoch": 46.81967213114754, "grad_norm": 4.523041725158691, "learning_rate": 1.1510045210348343e-05, "loss": 0.5663, "step": 14280 }, { "epoch": 46.82295081967213, "grad_norm": 3.7170262336730957, "learning_rate": 1.1508995491430324e-05, "loss": 0.2579, "step": 14281 }, { "epoch": 46.82622950819672, "grad_norm": 4.168990135192871, "learning_rate": 1.1507945755496807e-05, "loss": 0.2531, "step": 14282 }, { "epoch": 46.829508196721314, "grad_norm": 4.186839580535889, "learning_rate": 1.1506896002559638e-05, "loss": 0.1958, "step": 14283 }, { "epoch": 46.8327868852459, "grad_norm": 3.096393346786499, "learning_rate": 1.1505846232630647e-05, "loss": 0.3031, "step": 14284 }, { "epoch": 46.83606557377049, "grad_norm": 2.631350040435791, "learning_rate": 1.1504796445721676e-05, "loss": 0.2901, "step": 14285 }, { "epoch": 46.83934426229508, "grad_norm": 5.158679008483887, "learning_rate": 1.1503746641844558e-05, "loss": 0.3022, "step": 14286 }, { "epoch": 46.842622950819674, "grad_norm": 3.618222713470459, "learning_rate": 1.1502696821011134e-05, "loss": 0.3081, "step": 14287 }, { "epoch": 46.84590163934426, "grad_norm": 4.117516994476318, "learning_rate": 1.150164698323324e-05, "loss": 0.2543, "step": 14288 }, { "epoch": 46.84918032786885, "grad_norm": 4.6088032722473145, "learning_rate": 1.1500597128522716e-05, "loss": 0.3392, "step": 14289 }, { "epoch": 46.85245901639344, "grad_norm": 3.8323659896850586, "learning_rate": 1.1499547256891399e-05, "loss": 0.1619, "step": 14290 }, { "epoch": 46.855737704918035, "grad_norm": 3.0827994346618652, "learning_rate": 1.1498497368351129e-05, "loss": 0.3205, "step": 14291 }, { "epoch": 46.85901639344262, "grad_norm": 3.626030445098877, "learning_rate": 1.1497447462913741e-05, "loss": 0.4198, "step": 14292 }, { "epoch": 46.86229508196721, "grad_norm": 3.000884771347046, "learning_rate": 1.1496397540591076e-05, "loss": 0.1862, "step": 14293 }, { "epoch": 46.86557377049181, "grad_norm": 3.5676324367523193, "learning_rate": 1.1495347601394973e-05, "loss": 0.3997, "step": 14294 }, { "epoch": 46.868852459016395, "grad_norm": 3.6105458736419678, "learning_rate": 1.1494297645337272e-05, "loss": 0.2689, "step": 14295 }, { "epoch": 46.87213114754098, "grad_norm": 3.5387449264526367, "learning_rate": 1.1493247672429813e-05, "loss": 0.3093, "step": 14296 }, { "epoch": 46.87540983606557, "grad_norm": 3.3303637504577637, "learning_rate": 1.149219768268443e-05, "loss": 0.3197, "step": 14297 }, { "epoch": 46.87868852459017, "grad_norm": 3.788125991821289, "learning_rate": 1.1491147676112968e-05, "loss": 0.3092, "step": 14298 }, { "epoch": 46.881967213114756, "grad_norm": 3.0232670307159424, "learning_rate": 1.1490097652727267e-05, "loss": 0.2732, "step": 14299 }, { "epoch": 46.885245901639344, "grad_norm": 3.390185832977295, "learning_rate": 1.1489047612539164e-05, "loss": 0.3673, "step": 14300 }, { "epoch": 46.88852459016393, "grad_norm": 4.2266526222229, "learning_rate": 1.1487997555560503e-05, "loss": 0.2522, "step": 14301 }, { "epoch": 46.89180327868853, "grad_norm": 4.203536510467529, "learning_rate": 1.1486947481803122e-05, "loss": 0.3127, "step": 14302 }, { "epoch": 46.895081967213116, "grad_norm": 3.1542162895202637, "learning_rate": 1.148589739127886e-05, "loss": 0.211, "step": 14303 }, { "epoch": 46.898360655737704, "grad_norm": 6.353271007537842, "learning_rate": 1.148484728399956e-05, "loss": 0.3318, "step": 14304 }, { "epoch": 46.90163934426229, "grad_norm": 4.308670997619629, "learning_rate": 1.1483797159977067e-05, "loss": 0.2435, "step": 14305 }, { "epoch": 46.90491803278689, "grad_norm": 3.4707558155059814, "learning_rate": 1.1482747019223212e-05, "loss": 0.2254, "step": 14306 }, { "epoch": 46.90819672131148, "grad_norm": 3.567203998565674, "learning_rate": 1.148169686174985e-05, "loss": 0.4944, "step": 14307 }, { "epoch": 46.911475409836065, "grad_norm": 3.6977288722991943, "learning_rate": 1.1480646687568807e-05, "loss": 0.1751, "step": 14308 }, { "epoch": 46.91475409836065, "grad_norm": 4.348832607269287, "learning_rate": 1.147959649669194e-05, "loss": 0.3935, "step": 14309 }, { "epoch": 46.91803278688525, "grad_norm": 4.699751853942871, "learning_rate": 1.1478546289131081e-05, "loss": 0.4705, "step": 14310 }, { "epoch": 46.92131147540984, "grad_norm": 3.079089879989624, "learning_rate": 1.1477496064898078e-05, "loss": 0.1312, "step": 14311 }, { "epoch": 46.924590163934425, "grad_norm": 3.4895591735839844, "learning_rate": 1.147644582400477e-05, "loss": 0.2366, "step": 14312 }, { "epoch": 46.927868852459014, "grad_norm": 3.2573301792144775, "learning_rate": 1.1475395566462997e-05, "loss": 0.3723, "step": 14313 }, { "epoch": 46.93114754098361, "grad_norm": 4.100666522979736, "learning_rate": 1.1474345292284608e-05, "loss": 0.318, "step": 14314 }, { "epoch": 46.9344262295082, "grad_norm": 3.0272536277770996, "learning_rate": 1.1473295001481439e-05, "loss": 0.1778, "step": 14315 }, { "epoch": 46.937704918032786, "grad_norm": 3.616466999053955, "learning_rate": 1.147224469406534e-05, "loss": 0.26, "step": 14316 }, { "epoch": 46.940983606557374, "grad_norm": 3.684157133102417, "learning_rate": 1.147119437004815e-05, "loss": 0.2764, "step": 14317 }, { "epoch": 46.94426229508197, "grad_norm": 3.2107808589935303, "learning_rate": 1.1470144029441716e-05, "loss": 0.2851, "step": 14318 }, { "epoch": 46.94754098360656, "grad_norm": 3.6507515907287598, "learning_rate": 1.1469093672257879e-05, "loss": 0.2628, "step": 14319 }, { "epoch": 46.950819672131146, "grad_norm": 3.975911855697632, "learning_rate": 1.1468043298508482e-05, "loss": 0.3846, "step": 14320 }, { "epoch": 46.954098360655735, "grad_norm": 3.059886932373047, "learning_rate": 1.146699290820537e-05, "loss": 0.3272, "step": 14321 }, { "epoch": 46.95737704918033, "grad_norm": 4.1162848472595215, "learning_rate": 1.146594250136039e-05, "loss": 0.2784, "step": 14322 }, { "epoch": 46.96065573770492, "grad_norm": 3.959608554840088, "learning_rate": 1.1464892077985384e-05, "loss": 0.2196, "step": 14323 }, { "epoch": 46.96393442622951, "grad_norm": 3.712143659591675, "learning_rate": 1.1463841638092195e-05, "loss": 0.3707, "step": 14324 }, { "epoch": 46.967213114754095, "grad_norm": 3.762622356414795, "learning_rate": 1.1462791181692672e-05, "loss": 0.3016, "step": 14325 }, { "epoch": 46.97049180327869, "grad_norm": 3.954200267791748, "learning_rate": 1.1461740708798654e-05, "loss": 0.4147, "step": 14326 }, { "epoch": 46.97377049180328, "grad_norm": 3.5465595722198486, "learning_rate": 1.146069021942199e-05, "loss": 0.2799, "step": 14327 }, { "epoch": 46.97704918032787, "grad_norm": 3.9705238342285156, "learning_rate": 1.1459639713574527e-05, "loss": 0.3441, "step": 14328 }, { "epoch": 46.980327868852456, "grad_norm": 3.6372876167297363, "learning_rate": 1.1458589191268107e-05, "loss": 0.1783, "step": 14329 }, { "epoch": 46.98360655737705, "grad_norm": 3.8223302364349365, "learning_rate": 1.145753865251458e-05, "loss": 0.4544, "step": 14330 }, { "epoch": 46.98688524590164, "grad_norm": 5.330766201019287, "learning_rate": 1.1456488097325788e-05, "loss": 0.2232, "step": 14331 }, { "epoch": 46.99016393442623, "grad_norm": 3.753432273864746, "learning_rate": 1.1455437525713577e-05, "loss": 0.4538, "step": 14332 }, { "epoch": 46.993442622950816, "grad_norm": 3.77030086517334, "learning_rate": 1.1454386937689796e-05, "loss": 0.3822, "step": 14333 }, { "epoch": 46.99672131147541, "grad_norm": 3.5260674953460693, "learning_rate": 1.145333633326629e-05, "loss": 0.3299, "step": 14334 }, { "epoch": 47.0, "grad_norm": 3.4978291988372803, "learning_rate": 1.1452285712454905e-05, "loss": 0.2876, "step": 14335 }, { "epoch": 47.00327868852459, "grad_norm": 4.897652626037598, "learning_rate": 1.145123507526749e-05, "loss": 0.2382, "step": 14336 }, { "epoch": 47.006557377049184, "grad_norm": 4.856258869171143, "learning_rate": 1.1450184421715889e-05, "loss": 0.4092, "step": 14337 }, { "epoch": 47.00983606557377, "grad_norm": 4.00584602355957, "learning_rate": 1.1449133751811952e-05, "loss": 0.2549, "step": 14338 }, { "epoch": 47.01311475409836, "grad_norm": 3.3710567951202393, "learning_rate": 1.1448083065567523e-05, "loss": 0.2481, "step": 14339 }, { "epoch": 47.01639344262295, "grad_norm": 3.865356922149658, "learning_rate": 1.1447032362994455e-05, "loss": 0.3515, "step": 14340 }, { "epoch": 47.019672131147544, "grad_norm": 3.4871628284454346, "learning_rate": 1.1445981644104588e-05, "loss": 0.1972, "step": 14341 }, { "epoch": 47.02295081967213, "grad_norm": 5.594217777252197, "learning_rate": 1.144493090890978e-05, "loss": 0.1707, "step": 14342 }, { "epoch": 47.02622950819672, "grad_norm": 4.126256942749023, "learning_rate": 1.1443880157421869e-05, "loss": 0.3792, "step": 14343 }, { "epoch": 47.02950819672131, "grad_norm": 3.6739275455474854, "learning_rate": 1.144282938965271e-05, "loss": 0.1677, "step": 14344 }, { "epoch": 47.032786885245905, "grad_norm": 3.2935097217559814, "learning_rate": 1.1441778605614152e-05, "loss": 0.3255, "step": 14345 }, { "epoch": 47.03606557377049, "grad_norm": 3.518930196762085, "learning_rate": 1.1440727805318038e-05, "loss": 0.3026, "step": 14346 }, { "epoch": 47.03934426229508, "grad_norm": 4.003467559814453, "learning_rate": 1.1439676988776217e-05, "loss": 0.1917, "step": 14347 }, { "epoch": 47.04262295081967, "grad_norm": 4.0206451416015625, "learning_rate": 1.1438626156000547e-05, "loss": 0.3487, "step": 14348 }, { "epoch": 47.045901639344265, "grad_norm": 4.122798442840576, "learning_rate": 1.143757530700287e-05, "loss": 0.181, "step": 14349 }, { "epoch": 47.049180327868854, "grad_norm": 3.574904680252075, "learning_rate": 1.1436524441795033e-05, "loss": 0.2272, "step": 14350 }, { "epoch": 47.05245901639344, "grad_norm": 4.128668785095215, "learning_rate": 1.1435473560388894e-05, "loss": 0.2355, "step": 14351 }, { "epoch": 47.05573770491803, "grad_norm": 4.366612911224365, "learning_rate": 1.1434422662796297e-05, "loss": 0.3345, "step": 14352 }, { "epoch": 47.059016393442626, "grad_norm": 3.3758625984191895, "learning_rate": 1.1433371749029093e-05, "loss": 0.2426, "step": 14353 }, { "epoch": 47.062295081967214, "grad_norm": 3.4792630672454834, "learning_rate": 1.1432320819099129e-05, "loss": 0.2818, "step": 14354 }, { "epoch": 47.0655737704918, "grad_norm": 4.118124961853027, "learning_rate": 1.1431269873018263e-05, "loss": 0.4044, "step": 14355 }, { "epoch": 47.06885245901639, "grad_norm": 3.8576431274414062, "learning_rate": 1.1430218910798337e-05, "loss": 0.2603, "step": 14356 }, { "epoch": 47.072131147540986, "grad_norm": 4.085903167724609, "learning_rate": 1.1429167932451208e-05, "loss": 0.2773, "step": 14357 }, { "epoch": 47.075409836065575, "grad_norm": 3.7111518383026123, "learning_rate": 1.1428116937988724e-05, "loss": 0.3299, "step": 14358 }, { "epoch": 47.07868852459016, "grad_norm": 3.12813401222229, "learning_rate": 1.1427065927422737e-05, "loss": 0.2649, "step": 14359 }, { "epoch": 47.08196721311475, "grad_norm": 3.210925579071045, "learning_rate": 1.14260149007651e-05, "loss": 0.4678, "step": 14360 }, { "epoch": 47.08524590163935, "grad_norm": 4.082379341125488, "learning_rate": 1.142496385802766e-05, "loss": 0.2787, "step": 14361 }, { "epoch": 47.088524590163935, "grad_norm": 2.905953884124756, "learning_rate": 1.1423912799222273e-05, "loss": 0.4003, "step": 14362 }, { "epoch": 47.09180327868852, "grad_norm": 3.718243360519409, "learning_rate": 1.1422861724360786e-05, "loss": 0.5329, "step": 14363 }, { "epoch": 47.09508196721311, "grad_norm": 3.7492709159851074, "learning_rate": 1.1421810633455052e-05, "loss": 0.5145, "step": 14364 }, { "epoch": 47.09836065573771, "grad_norm": 3.384666681289673, "learning_rate": 1.1420759526516929e-05, "loss": 0.2335, "step": 14365 }, { "epoch": 47.101639344262296, "grad_norm": 3.6297097206115723, "learning_rate": 1.1419708403558264e-05, "loss": 0.3061, "step": 14366 }, { "epoch": 47.104918032786884, "grad_norm": 5.160374164581299, "learning_rate": 1.141865726459091e-05, "loss": 0.4081, "step": 14367 }, { "epoch": 47.10819672131147, "grad_norm": 4.023669719696045, "learning_rate": 1.141760610962672e-05, "loss": 0.2677, "step": 14368 }, { "epoch": 47.11147540983607, "grad_norm": 3.773432970046997, "learning_rate": 1.1416554938677546e-05, "loss": 0.5025, "step": 14369 }, { "epoch": 47.114754098360656, "grad_norm": 3.901660680770874, "learning_rate": 1.1415503751755245e-05, "loss": 0.374, "step": 14370 }, { "epoch": 47.118032786885244, "grad_norm": 3.0462474822998047, "learning_rate": 1.1414452548871664e-05, "loss": 0.3297, "step": 14371 }, { "epoch": 47.12131147540983, "grad_norm": 4.063731670379639, "learning_rate": 1.1413401330038662e-05, "loss": 0.2282, "step": 14372 }, { "epoch": 47.12459016393443, "grad_norm": 4.136938571929932, "learning_rate": 1.1412350095268092e-05, "loss": 0.4227, "step": 14373 }, { "epoch": 47.12786885245902, "grad_norm": 5.021276950836182, "learning_rate": 1.1411298844571803e-05, "loss": 0.4211, "step": 14374 }, { "epoch": 47.131147540983605, "grad_norm": 3.3278534412384033, "learning_rate": 1.1410247577961656e-05, "loss": 0.3737, "step": 14375 }, { "epoch": 47.13442622950819, "grad_norm": 13.190178871154785, "learning_rate": 1.1409196295449496e-05, "loss": 0.3146, "step": 14376 }, { "epoch": 47.13770491803279, "grad_norm": 4.7743754386901855, "learning_rate": 1.1408144997047189e-05, "loss": 0.2683, "step": 14377 }, { "epoch": 47.14098360655738, "grad_norm": 3.3702199459075928, "learning_rate": 1.1407093682766576e-05, "loss": 0.2435, "step": 14378 }, { "epoch": 47.144262295081965, "grad_norm": 3.413426637649536, "learning_rate": 1.1406042352619523e-05, "loss": 0.2772, "step": 14379 }, { "epoch": 47.14754098360656, "grad_norm": 3.637511968612671, "learning_rate": 1.140499100661788e-05, "loss": 0.3342, "step": 14380 }, { "epoch": 47.15081967213115, "grad_norm": 3.37160325050354, "learning_rate": 1.1403939644773503e-05, "loss": 0.1809, "step": 14381 }, { "epoch": 47.15409836065574, "grad_norm": 3.8601348400115967, "learning_rate": 1.1402888267098246e-05, "loss": 0.333, "step": 14382 }, { "epoch": 47.157377049180326, "grad_norm": 3.934605121612549, "learning_rate": 1.1401836873603966e-05, "loss": 0.3714, "step": 14383 }, { "epoch": 47.16065573770492, "grad_norm": 3.697350025177002, "learning_rate": 1.1400785464302514e-05, "loss": 0.2979, "step": 14384 }, { "epoch": 47.16393442622951, "grad_norm": 3.6795194149017334, "learning_rate": 1.1399734039205757e-05, "loss": 0.257, "step": 14385 }, { "epoch": 47.1672131147541, "grad_norm": 3.940551280975342, "learning_rate": 1.1398682598325536e-05, "loss": 0.3668, "step": 14386 }, { "epoch": 47.170491803278686, "grad_norm": 5.054495811462402, "learning_rate": 1.139763114167372e-05, "loss": 0.4598, "step": 14387 }, { "epoch": 47.17377049180328, "grad_norm": 3.7397196292877197, "learning_rate": 1.1396579669262158e-05, "loss": 0.3428, "step": 14388 }, { "epoch": 47.17704918032787, "grad_norm": 3.614823341369629, "learning_rate": 1.1395528181102704e-05, "loss": 0.2328, "step": 14389 }, { "epoch": 47.18032786885246, "grad_norm": 4.28765869140625, "learning_rate": 1.1394476677207224e-05, "loss": 0.1527, "step": 14390 }, { "epoch": 47.18360655737705, "grad_norm": 4.078264236450195, "learning_rate": 1.1393425157587568e-05, "loss": 0.1623, "step": 14391 }, { "epoch": 47.18688524590164, "grad_norm": 3.5465383529663086, "learning_rate": 1.1392373622255595e-05, "loss": 0.2828, "step": 14392 }, { "epoch": 47.19016393442623, "grad_norm": 5.534337997436523, "learning_rate": 1.1391322071223159e-05, "loss": 0.3355, "step": 14393 }, { "epoch": 47.19344262295082, "grad_norm": 3.2916390895843506, "learning_rate": 1.1390270504502122e-05, "loss": 0.4285, "step": 14394 }, { "epoch": 47.19672131147541, "grad_norm": 3.4302942752838135, "learning_rate": 1.138921892210434e-05, "loss": 0.1873, "step": 14395 }, { "epoch": 47.2, "grad_norm": 3.6046833992004395, "learning_rate": 1.138816732404167e-05, "loss": 0.2958, "step": 14396 }, { "epoch": 47.20327868852459, "grad_norm": 3.0354981422424316, "learning_rate": 1.1387115710325967e-05, "loss": 0.3262, "step": 14397 }, { "epoch": 47.20655737704918, "grad_norm": 3.909759759902954, "learning_rate": 1.1386064080969095e-05, "loss": 0.3582, "step": 14398 }, { "epoch": 47.20983606557377, "grad_norm": 3.4645752906799316, "learning_rate": 1.138501243598291e-05, "loss": 0.5174, "step": 14399 }, { "epoch": 47.21311475409836, "grad_norm": 3.4264650344848633, "learning_rate": 1.1383960775379268e-05, "loss": 0.2019, "step": 14400 }, { "epoch": 47.21639344262295, "grad_norm": 3.8534984588623047, "learning_rate": 1.1382909099170032e-05, "loss": 0.497, "step": 14401 }, { "epoch": 47.21967213114754, "grad_norm": 4.1260199546813965, "learning_rate": 1.1381857407367055e-05, "loss": 0.3374, "step": 14402 }, { "epoch": 47.22295081967213, "grad_norm": 4.186275482177734, "learning_rate": 1.13808056999822e-05, "loss": 0.4863, "step": 14403 }, { "epoch": 47.226229508196724, "grad_norm": 3.6168816089630127, "learning_rate": 1.1379753977027323e-05, "loss": 0.4243, "step": 14404 }, { "epoch": 47.22950819672131, "grad_norm": 3.324418783187866, "learning_rate": 1.137870223851429e-05, "loss": 0.1471, "step": 14405 }, { "epoch": 47.2327868852459, "grad_norm": 3.358013391494751, "learning_rate": 1.1377650484454952e-05, "loss": 0.2512, "step": 14406 }, { "epoch": 47.23606557377049, "grad_norm": 3.54775333404541, "learning_rate": 1.1376598714861176e-05, "loss": 0.3277, "step": 14407 }, { "epoch": 47.239344262295084, "grad_norm": 3.7573423385620117, "learning_rate": 1.1375546929744814e-05, "loss": 0.281, "step": 14408 }, { "epoch": 47.24262295081967, "grad_norm": 3.6884210109710693, "learning_rate": 1.1374495129117732e-05, "loss": 0.2569, "step": 14409 }, { "epoch": 47.24590163934426, "grad_norm": 3.58616304397583, "learning_rate": 1.1373443312991787e-05, "loss": 0.3144, "step": 14410 }, { "epoch": 47.24918032786885, "grad_norm": 4.057646751403809, "learning_rate": 1.1372391481378844e-05, "loss": 0.3201, "step": 14411 }, { "epoch": 47.252459016393445, "grad_norm": 2.5946311950683594, "learning_rate": 1.1371339634290757e-05, "loss": 0.1173, "step": 14412 }, { "epoch": 47.25573770491803, "grad_norm": 3.7183613777160645, "learning_rate": 1.1370287771739392e-05, "loss": 0.3605, "step": 14413 }, { "epoch": 47.25901639344262, "grad_norm": 3.8383891582489014, "learning_rate": 1.1369235893736608e-05, "loss": 0.2248, "step": 14414 }, { "epoch": 47.26229508196721, "grad_norm": 3.344346761703491, "learning_rate": 1.1368184000294263e-05, "loss": 0.2496, "step": 14415 }, { "epoch": 47.265573770491805, "grad_norm": 4.839418411254883, "learning_rate": 1.1367132091424223e-05, "loss": 0.4291, "step": 14416 }, { "epoch": 47.268852459016394, "grad_norm": 3.859142541885376, "learning_rate": 1.1366080167138345e-05, "loss": 0.3484, "step": 14417 }, { "epoch": 47.27213114754098, "grad_norm": 3.972003936767578, "learning_rate": 1.1365028227448496e-05, "loss": 0.3405, "step": 14418 }, { "epoch": 47.27540983606557, "grad_norm": 5.134539604187012, "learning_rate": 1.136397627236653e-05, "loss": 0.3526, "step": 14419 }, { "epoch": 47.278688524590166, "grad_norm": 3.4832558631896973, "learning_rate": 1.1362924301904316e-05, "loss": 0.263, "step": 14420 }, { "epoch": 47.281967213114754, "grad_norm": 3.3933582305908203, "learning_rate": 1.1361872316073714e-05, "loss": 0.2178, "step": 14421 }, { "epoch": 47.28524590163934, "grad_norm": 3.539616823196411, "learning_rate": 1.1360820314886585e-05, "loss": 0.286, "step": 14422 }, { "epoch": 47.28852459016394, "grad_norm": 3.013688087463379, "learning_rate": 1.1359768298354793e-05, "loss": 0.4817, "step": 14423 }, { "epoch": 47.291803278688526, "grad_norm": 4.666942596435547, "learning_rate": 1.1358716266490198e-05, "loss": 0.3659, "step": 14424 }, { "epoch": 47.295081967213115, "grad_norm": 3.967176914215088, "learning_rate": 1.1357664219304665e-05, "loss": 0.3326, "step": 14425 }, { "epoch": 47.2983606557377, "grad_norm": 3.356743574142456, "learning_rate": 1.1356612156810054e-05, "loss": 0.3839, "step": 14426 }, { "epoch": 47.3016393442623, "grad_norm": 3.1793651580810547, "learning_rate": 1.1355560079018232e-05, "loss": 0.2654, "step": 14427 }, { "epoch": 47.30491803278689, "grad_norm": 4.22822904586792, "learning_rate": 1.1354507985941062e-05, "loss": 0.4077, "step": 14428 }, { "epoch": 47.308196721311475, "grad_norm": 3.370938777923584, "learning_rate": 1.1353455877590407e-05, "loss": 0.113, "step": 14429 }, { "epoch": 47.31147540983606, "grad_norm": 3.751357078552246, "learning_rate": 1.1352403753978128e-05, "loss": 0.2558, "step": 14430 }, { "epoch": 47.31475409836066, "grad_norm": 3.6612908840179443, "learning_rate": 1.135135161511609e-05, "loss": 0.4634, "step": 14431 }, { "epoch": 47.31803278688525, "grad_norm": 4.19252872467041, "learning_rate": 1.1350299461016156e-05, "loss": 0.3145, "step": 14432 }, { "epoch": 47.321311475409836, "grad_norm": 3.6248362064361572, "learning_rate": 1.1349247291690198e-05, "loss": 0.4916, "step": 14433 }, { "epoch": 47.324590163934424, "grad_norm": 3.578784704208374, "learning_rate": 1.1348195107150066e-05, "loss": 0.3546, "step": 14434 }, { "epoch": 47.32786885245902, "grad_norm": 4.007938385009766, "learning_rate": 1.1347142907407638e-05, "loss": 0.2198, "step": 14435 }, { "epoch": 47.33114754098361, "grad_norm": 3.775075674057007, "learning_rate": 1.134609069247477e-05, "loss": 0.4339, "step": 14436 }, { "epoch": 47.334426229508196, "grad_norm": 3.6269636154174805, "learning_rate": 1.1345038462363332e-05, "loss": 0.3554, "step": 14437 }, { "epoch": 47.337704918032784, "grad_norm": 3.8967013359069824, "learning_rate": 1.1343986217085185e-05, "loss": 0.3573, "step": 14438 }, { "epoch": 47.34098360655738, "grad_norm": 3.3383126258850098, "learning_rate": 1.1342933956652199e-05, "loss": 0.3214, "step": 14439 }, { "epoch": 47.34426229508197, "grad_norm": 4.036860466003418, "learning_rate": 1.1341881681076235e-05, "loss": 0.2719, "step": 14440 }, { "epoch": 47.34754098360656, "grad_norm": 2.717510223388672, "learning_rate": 1.1340829390369156e-05, "loss": 0.2331, "step": 14441 }, { "epoch": 47.350819672131145, "grad_norm": 3.8455588817596436, "learning_rate": 1.1339777084542836e-05, "loss": 0.2959, "step": 14442 }, { "epoch": 47.35409836065574, "grad_norm": 3.9385597705841064, "learning_rate": 1.1338724763609134e-05, "loss": 0.4556, "step": 14443 }, { "epoch": 47.35737704918033, "grad_norm": 4.143555164337158, "learning_rate": 1.1337672427579923e-05, "loss": 0.3358, "step": 14444 }, { "epoch": 47.36065573770492, "grad_norm": 3.871290445327759, "learning_rate": 1.1336620076467057e-05, "loss": 0.4107, "step": 14445 }, { "epoch": 47.363934426229505, "grad_norm": 4.28833532333374, "learning_rate": 1.1335567710282416e-05, "loss": 0.2538, "step": 14446 }, { "epoch": 47.3672131147541, "grad_norm": 3.275641679763794, "learning_rate": 1.1334515329037857e-05, "loss": 0.2778, "step": 14447 }, { "epoch": 47.37049180327869, "grad_norm": 2.8890748023986816, "learning_rate": 1.1333462932745252e-05, "loss": 0.2981, "step": 14448 }, { "epoch": 47.37377049180328, "grad_norm": 3.7034223079681396, "learning_rate": 1.1332410521416463e-05, "loss": 0.3306, "step": 14449 }, { "epoch": 47.377049180327866, "grad_norm": 3.705742597579956, "learning_rate": 1.1331358095063365e-05, "loss": 0.4071, "step": 14450 }, { "epoch": 47.38032786885246, "grad_norm": 13.06883430480957, "learning_rate": 1.1330305653697815e-05, "loss": 0.3443, "step": 14451 }, { "epoch": 47.38360655737705, "grad_norm": 3.855097770690918, "learning_rate": 1.1329253197331692e-05, "loss": 0.3767, "step": 14452 }, { "epoch": 47.38688524590164, "grad_norm": 4.133005142211914, "learning_rate": 1.132820072597685e-05, "loss": 0.3535, "step": 14453 }, { "epoch": 47.390163934426226, "grad_norm": 4.607109546661377, "learning_rate": 1.1327148239645167e-05, "loss": 0.1926, "step": 14454 }, { "epoch": 47.39344262295082, "grad_norm": 2.576131582260132, "learning_rate": 1.1326095738348508e-05, "loss": 0.1233, "step": 14455 }, { "epoch": 47.39672131147541, "grad_norm": 3.301082134246826, "learning_rate": 1.1325043222098739e-05, "loss": 0.2793, "step": 14456 }, { "epoch": 47.4, "grad_norm": 3.0447165966033936, "learning_rate": 1.1323990690907734e-05, "loss": 0.2701, "step": 14457 }, { "epoch": 47.40327868852459, "grad_norm": 3.603600025177002, "learning_rate": 1.1322938144787352e-05, "loss": 0.2384, "step": 14458 }, { "epoch": 47.40655737704918, "grad_norm": 6.229879856109619, "learning_rate": 1.132188558374947e-05, "loss": 0.4702, "step": 14459 }, { "epoch": 47.40983606557377, "grad_norm": 3.34100604057312, "learning_rate": 1.1320833007805953e-05, "loss": 0.5579, "step": 14460 }, { "epoch": 47.41311475409836, "grad_norm": 3.86468243598938, "learning_rate": 1.1319780416968673e-05, "loss": 0.2651, "step": 14461 }, { "epoch": 47.41639344262295, "grad_norm": 2.927722215652466, "learning_rate": 1.1318727811249493e-05, "loss": 0.2661, "step": 14462 }, { "epoch": 47.41967213114754, "grad_norm": 4.13702392578125, "learning_rate": 1.131767519066029e-05, "loss": 0.3065, "step": 14463 }, { "epoch": 47.42295081967213, "grad_norm": 3.1090989112854004, "learning_rate": 1.1316622555212924e-05, "loss": 0.2503, "step": 14464 }, { "epoch": 47.42622950819672, "grad_norm": 3.8647239208221436, "learning_rate": 1.1315569904919273e-05, "loss": 0.3249, "step": 14465 }, { "epoch": 47.429508196721315, "grad_norm": 3.654921531677246, "learning_rate": 1.1314517239791204e-05, "loss": 0.4509, "step": 14466 }, { "epoch": 47.4327868852459, "grad_norm": 4.061793327331543, "learning_rate": 1.1313464559840583e-05, "loss": 0.356, "step": 14467 }, { "epoch": 47.43606557377049, "grad_norm": 4.218336582183838, "learning_rate": 1.1312411865079286e-05, "loss": 0.2523, "step": 14468 }, { "epoch": 47.43934426229508, "grad_norm": 3.3716583251953125, "learning_rate": 1.1311359155519179e-05, "loss": 0.2711, "step": 14469 }, { "epoch": 47.442622950819676, "grad_norm": 3.8131165504455566, "learning_rate": 1.1310306431172138e-05, "loss": 0.3147, "step": 14470 }, { "epoch": 47.445901639344264, "grad_norm": 3.349820137023926, "learning_rate": 1.1309253692050025e-05, "loss": 0.2926, "step": 14471 }, { "epoch": 47.44918032786885, "grad_norm": 2.938316822052002, "learning_rate": 1.1308200938164717e-05, "loss": 0.2884, "step": 14472 }, { "epoch": 47.45245901639344, "grad_norm": 4.354965686798096, "learning_rate": 1.1307148169528085e-05, "loss": 0.3647, "step": 14473 }, { "epoch": 47.455737704918036, "grad_norm": 4.118947982788086, "learning_rate": 1.1306095386151997e-05, "loss": 0.1703, "step": 14474 }, { "epoch": 47.459016393442624, "grad_norm": 3.5152997970581055, "learning_rate": 1.1305042588048323e-05, "loss": 0.1846, "step": 14475 }, { "epoch": 47.46229508196721, "grad_norm": 3.5532755851745605, "learning_rate": 1.1303989775228942e-05, "loss": 0.2671, "step": 14476 }, { "epoch": 47.4655737704918, "grad_norm": 3.5174388885498047, "learning_rate": 1.1302936947705713e-05, "loss": 0.2234, "step": 14477 }, { "epoch": 47.4688524590164, "grad_norm": 3.1574900150299072, "learning_rate": 1.1301884105490522e-05, "loss": 0.3346, "step": 14478 }, { "epoch": 47.472131147540985, "grad_norm": 3.40940523147583, "learning_rate": 1.130083124859523e-05, "loss": 0.2708, "step": 14479 }, { "epoch": 47.47540983606557, "grad_norm": 3.874940872192383, "learning_rate": 1.1299778377031713e-05, "loss": 0.2592, "step": 14480 }, { "epoch": 47.47868852459016, "grad_norm": 3.5963683128356934, "learning_rate": 1.1298725490811848e-05, "loss": 0.4695, "step": 14481 }, { "epoch": 47.48196721311476, "grad_norm": 2.9858896732330322, "learning_rate": 1.12976725899475e-05, "loss": 0.2785, "step": 14482 }, { "epoch": 47.485245901639345, "grad_norm": 2.829051971435547, "learning_rate": 1.1296619674450545e-05, "loss": 0.1128, "step": 14483 }, { "epoch": 47.488524590163934, "grad_norm": 3.57623291015625, "learning_rate": 1.1295566744332853e-05, "loss": 0.2313, "step": 14484 }, { "epoch": 47.49180327868852, "grad_norm": 3.666390895843506, "learning_rate": 1.1294513799606301e-05, "loss": 0.3538, "step": 14485 }, { "epoch": 47.49508196721312, "grad_norm": 3.4607901573181152, "learning_rate": 1.1293460840282759e-05, "loss": 0.2804, "step": 14486 }, { "epoch": 47.498360655737706, "grad_norm": 3.615403652191162, "learning_rate": 1.1292407866374102e-05, "loss": 0.3607, "step": 14487 }, { "epoch": 47.501639344262294, "grad_norm": 5.512786388397217, "learning_rate": 1.12913548778922e-05, "loss": 0.3184, "step": 14488 }, { "epoch": 47.50491803278688, "grad_norm": 3.373375654220581, "learning_rate": 1.1290301874848932e-05, "loss": 0.2454, "step": 14489 }, { "epoch": 47.50819672131148, "grad_norm": 3.2738726139068604, "learning_rate": 1.1289248857256167e-05, "loss": 0.2494, "step": 14490 }, { "epoch": 47.511475409836066, "grad_norm": 3.594860792160034, "learning_rate": 1.1288195825125783e-05, "loss": 0.271, "step": 14491 }, { "epoch": 47.514754098360655, "grad_norm": 3.7160260677337646, "learning_rate": 1.128714277846965e-05, "loss": 0.3757, "step": 14492 }, { "epoch": 47.51803278688524, "grad_norm": 3.6089117527008057, "learning_rate": 1.1286089717299645e-05, "loss": 0.1964, "step": 14493 }, { "epoch": 47.52131147540984, "grad_norm": 4.083809852600098, "learning_rate": 1.128503664162764e-05, "loss": 0.25, "step": 14494 }, { "epoch": 47.52459016393443, "grad_norm": 2.6762590408325195, "learning_rate": 1.1283983551465512e-05, "loss": 0.2272, "step": 14495 }, { "epoch": 47.527868852459015, "grad_norm": 3.6613192558288574, "learning_rate": 1.1282930446825133e-05, "loss": 0.2924, "step": 14496 }, { "epoch": 47.5311475409836, "grad_norm": 3.2509751319885254, "learning_rate": 1.128187732771838e-05, "loss": 0.2678, "step": 14497 }, { "epoch": 47.5344262295082, "grad_norm": 4.055208683013916, "learning_rate": 1.1280824194157127e-05, "loss": 0.2385, "step": 14498 }, { "epoch": 47.53770491803279, "grad_norm": 3.2546873092651367, "learning_rate": 1.1279771046153249e-05, "loss": 0.2647, "step": 14499 }, { "epoch": 47.540983606557376, "grad_norm": 4.384807586669922, "learning_rate": 1.1278717883718624e-05, "loss": 0.2014, "step": 14500 }, { "epoch": 47.544262295081964, "grad_norm": 3.347050905227661, "learning_rate": 1.127766470686512e-05, "loss": 0.1138, "step": 14501 }, { "epoch": 47.54754098360656, "grad_norm": 4.607539176940918, "learning_rate": 1.1276611515604625e-05, "loss": 0.2774, "step": 14502 }, { "epoch": 47.55081967213115, "grad_norm": 3.8798410892486572, "learning_rate": 1.1275558309949004e-05, "loss": 0.4324, "step": 14503 }, { "epoch": 47.554098360655736, "grad_norm": 4.5339813232421875, "learning_rate": 1.1274505089910137e-05, "loss": 0.5398, "step": 14504 }, { "epoch": 47.557377049180324, "grad_norm": 3.4928958415985107, "learning_rate": 1.12734518554999e-05, "loss": 0.2735, "step": 14505 }, { "epoch": 47.56065573770492, "grad_norm": 3.4595253467559814, "learning_rate": 1.1272398606730171e-05, "loss": 0.1999, "step": 14506 }, { "epoch": 47.56393442622951, "grad_norm": 4.291775226593018, "learning_rate": 1.1271345343612823e-05, "loss": 0.6071, "step": 14507 }, { "epoch": 47.5672131147541, "grad_norm": 3.2786762714385986, "learning_rate": 1.1270292066159732e-05, "loss": 0.2271, "step": 14508 }, { "epoch": 47.570491803278685, "grad_norm": 4.803415298461914, "learning_rate": 1.1269238774382782e-05, "loss": 0.1776, "step": 14509 }, { "epoch": 47.57377049180328, "grad_norm": 3.6973423957824707, "learning_rate": 1.1268185468293843e-05, "loss": 0.359, "step": 14510 }, { "epoch": 47.57704918032787, "grad_norm": 2.8203532695770264, "learning_rate": 1.1267132147904794e-05, "loss": 0.2535, "step": 14511 }, { "epoch": 47.58032786885246, "grad_norm": 3.2355616092681885, "learning_rate": 1.1266078813227512e-05, "loss": 0.1773, "step": 14512 }, { "epoch": 47.58360655737705, "grad_norm": 3.0278422832489014, "learning_rate": 1.1265025464273878e-05, "loss": 0.4844, "step": 14513 }, { "epoch": 47.58688524590164, "grad_norm": 2.6715760231018066, "learning_rate": 1.1263972101055763e-05, "loss": 0.223, "step": 14514 }, { "epoch": 47.59016393442623, "grad_norm": 3.828136682510376, "learning_rate": 1.126291872358505e-05, "loss": 0.3193, "step": 14515 }, { "epoch": 47.59344262295082, "grad_norm": 3.358553647994995, "learning_rate": 1.1261865331873613e-05, "loss": 0.2787, "step": 14516 }, { "epoch": 47.59672131147541, "grad_norm": 2.9345695972442627, "learning_rate": 1.1260811925933337e-05, "loss": 0.2695, "step": 14517 }, { "epoch": 47.6, "grad_norm": 3.6852643489837646, "learning_rate": 1.1259758505776092e-05, "loss": 0.4373, "step": 14518 }, { "epoch": 47.60327868852459, "grad_norm": 3.4632163047790527, "learning_rate": 1.1258705071413761e-05, "loss": 0.2177, "step": 14519 }, { "epoch": 47.60655737704918, "grad_norm": 3.1813039779663086, "learning_rate": 1.1257651622858224e-05, "loss": 0.1743, "step": 14520 }, { "epoch": 47.609836065573774, "grad_norm": 4.30763053894043, "learning_rate": 1.1256598160121353e-05, "loss": 0.2161, "step": 14521 }, { "epoch": 47.61311475409836, "grad_norm": 2.9814162254333496, "learning_rate": 1.1255544683215033e-05, "loss": 0.4264, "step": 14522 }, { "epoch": 47.61639344262295, "grad_norm": 3.9112963676452637, "learning_rate": 1.1254491192151143e-05, "loss": 0.4177, "step": 14523 }, { "epoch": 47.61967213114754, "grad_norm": 4.028597354888916, "learning_rate": 1.125343768694156e-05, "loss": 0.4409, "step": 14524 }, { "epoch": 47.622950819672134, "grad_norm": 3.0392048358917236, "learning_rate": 1.1252384167598161e-05, "loss": 0.1668, "step": 14525 }, { "epoch": 47.62622950819672, "grad_norm": 3.1830341815948486, "learning_rate": 1.1251330634132831e-05, "loss": 0.1211, "step": 14526 }, { "epoch": 47.62950819672131, "grad_norm": 3.4360663890838623, "learning_rate": 1.1250277086557443e-05, "loss": 0.3936, "step": 14527 }, { "epoch": 47.6327868852459, "grad_norm": 3.493281841278076, "learning_rate": 1.1249223524883888e-05, "loss": 0.2471, "step": 14528 }, { "epoch": 47.636065573770495, "grad_norm": 3.3186938762664795, "learning_rate": 1.1248169949124035e-05, "loss": 0.2012, "step": 14529 }, { "epoch": 47.63934426229508, "grad_norm": 3.7901055812835693, "learning_rate": 1.1247116359289767e-05, "loss": 0.2227, "step": 14530 }, { "epoch": 47.64262295081967, "grad_norm": 4.651076316833496, "learning_rate": 1.1246062755392966e-05, "loss": 0.4221, "step": 14531 }, { "epoch": 47.64590163934426, "grad_norm": 3.4786527156829834, "learning_rate": 1.1245009137445513e-05, "loss": 0.4149, "step": 14532 }, { "epoch": 47.649180327868855, "grad_norm": 3.8362765312194824, "learning_rate": 1.1243955505459286e-05, "loss": 0.4779, "step": 14533 }, { "epoch": 47.65245901639344, "grad_norm": 4.207703590393066, "learning_rate": 1.1242901859446169e-05, "loss": 0.1877, "step": 14534 }, { "epoch": 47.65573770491803, "grad_norm": 4.003615856170654, "learning_rate": 1.124184819941804e-05, "loss": 0.3381, "step": 14535 }, { "epoch": 47.65901639344262, "grad_norm": 3.6060357093811035, "learning_rate": 1.1240794525386778e-05, "loss": 0.2766, "step": 14536 }, { "epoch": 47.662295081967216, "grad_norm": 3.368607997894287, "learning_rate": 1.1239740837364272e-05, "loss": 0.2316, "step": 14537 }, { "epoch": 47.665573770491804, "grad_norm": 3.2305424213409424, "learning_rate": 1.1238687135362398e-05, "loss": 0.1286, "step": 14538 }, { "epoch": 47.66885245901639, "grad_norm": 3.7294836044311523, "learning_rate": 1.1237633419393042e-05, "loss": 0.299, "step": 14539 }, { "epoch": 47.67213114754098, "grad_norm": 3.290039300918579, "learning_rate": 1.1236579689468076e-05, "loss": 0.2191, "step": 14540 }, { "epoch": 47.675409836065576, "grad_norm": 3.3644583225250244, "learning_rate": 1.1235525945599393e-05, "loss": 0.2353, "step": 14541 }, { "epoch": 47.678688524590164, "grad_norm": 4.085793495178223, "learning_rate": 1.123447218779887e-05, "loss": 0.4342, "step": 14542 }, { "epoch": 47.68196721311475, "grad_norm": 4.2035017013549805, "learning_rate": 1.1233418416078388e-05, "loss": 0.3437, "step": 14543 }, { "epoch": 47.68524590163934, "grad_norm": 3.5035011768341064, "learning_rate": 1.1232364630449832e-05, "loss": 0.2435, "step": 14544 }, { "epoch": 47.68852459016394, "grad_norm": 3.6414976119995117, "learning_rate": 1.1231310830925082e-05, "loss": 0.2079, "step": 14545 }, { "epoch": 47.691803278688525, "grad_norm": 4.356961250305176, "learning_rate": 1.1230257017516024e-05, "loss": 0.3304, "step": 14546 }, { "epoch": 47.69508196721311, "grad_norm": 3.130074977874756, "learning_rate": 1.1229203190234537e-05, "loss": 0.2062, "step": 14547 }, { "epoch": 47.6983606557377, "grad_norm": 3.845027208328247, "learning_rate": 1.1228149349092507e-05, "loss": 0.2332, "step": 14548 }, { "epoch": 47.7016393442623, "grad_norm": 4.179721832275391, "learning_rate": 1.1227095494101815e-05, "loss": 0.4131, "step": 14549 }, { "epoch": 47.704918032786885, "grad_norm": 2.7071011066436768, "learning_rate": 1.122604162527435e-05, "loss": 0.2806, "step": 14550 }, { "epoch": 47.708196721311474, "grad_norm": 3.167740821838379, "learning_rate": 1.1224987742621984e-05, "loss": 0.246, "step": 14551 }, { "epoch": 47.71147540983607, "grad_norm": 3.5779871940612793, "learning_rate": 1.1223933846156613e-05, "loss": 0.4337, "step": 14552 }, { "epoch": 47.71475409836066, "grad_norm": 4.452710151672363, "learning_rate": 1.1222879935890112e-05, "loss": 0.4149, "step": 14553 }, { "epoch": 47.718032786885246, "grad_norm": 3.652921199798584, "learning_rate": 1.1221826011834371e-05, "loss": 0.3446, "step": 14554 }, { "epoch": 47.721311475409834, "grad_norm": 3.3754971027374268, "learning_rate": 1.1220772074001272e-05, "loss": 0.202, "step": 14555 }, { "epoch": 47.72459016393443, "grad_norm": 3.170759439468384, "learning_rate": 1.1219718122402695e-05, "loss": 0.1558, "step": 14556 }, { "epoch": 47.72786885245902, "grad_norm": 4.079977035522461, "learning_rate": 1.121866415705053e-05, "loss": 0.339, "step": 14557 }, { "epoch": 47.731147540983606, "grad_norm": 3.6193604469299316, "learning_rate": 1.1217610177956657e-05, "loss": 0.3137, "step": 14558 }, { "epoch": 47.734426229508195, "grad_norm": 4.136690616607666, "learning_rate": 1.1216556185132966e-05, "loss": 0.2816, "step": 14559 }, { "epoch": 47.73770491803279, "grad_norm": 3.3444559574127197, "learning_rate": 1.1215502178591337e-05, "loss": 0.402, "step": 14560 }, { "epoch": 47.74098360655738, "grad_norm": 3.5867347717285156, "learning_rate": 1.1214448158343658e-05, "loss": 0.3573, "step": 14561 }, { "epoch": 47.74426229508197, "grad_norm": 3.777838706970215, "learning_rate": 1.1213394124401813e-05, "loss": 0.2613, "step": 14562 }, { "epoch": 47.747540983606555, "grad_norm": 4.741560459136963, "learning_rate": 1.1212340076777691e-05, "loss": 0.4971, "step": 14563 }, { "epoch": 47.75081967213115, "grad_norm": 3.861307382583618, "learning_rate": 1.121128601548317e-05, "loss": 0.4311, "step": 14564 }, { "epoch": 47.75409836065574, "grad_norm": 3.518986940383911, "learning_rate": 1.1210231940530141e-05, "loss": 0.2564, "step": 14565 }, { "epoch": 47.75737704918033, "grad_norm": 2.8717916011810303, "learning_rate": 1.1209177851930488e-05, "loss": 0.2779, "step": 14566 }, { "epoch": 47.760655737704916, "grad_norm": 3.3153421878814697, "learning_rate": 1.12081237496961e-05, "loss": 0.2853, "step": 14567 }, { "epoch": 47.76393442622951, "grad_norm": 3.3950648307800293, "learning_rate": 1.120706963383886e-05, "loss": 0.1543, "step": 14568 }, { "epoch": 47.7672131147541, "grad_norm": 3.9134271144866943, "learning_rate": 1.1206015504370653e-05, "loss": 0.3044, "step": 14569 }, { "epoch": 47.77049180327869, "grad_norm": 3.828561305999756, "learning_rate": 1.1204961361303368e-05, "loss": 0.393, "step": 14570 }, { "epoch": 47.773770491803276, "grad_norm": 3.4126439094543457, "learning_rate": 1.120390720464889e-05, "loss": 0.2888, "step": 14571 }, { "epoch": 47.77704918032787, "grad_norm": 3.7304351329803467, "learning_rate": 1.1202853034419108e-05, "loss": 0.4267, "step": 14572 }, { "epoch": 47.78032786885246, "grad_norm": 3.2692832946777344, "learning_rate": 1.1201798850625906e-05, "loss": 0.3966, "step": 14573 }, { "epoch": 47.78360655737705, "grad_norm": 4.123445510864258, "learning_rate": 1.1200744653281175e-05, "loss": 0.3395, "step": 14574 }, { "epoch": 47.78688524590164, "grad_norm": 3.878368854522705, "learning_rate": 1.1199690442396795e-05, "loss": 0.254, "step": 14575 }, { "epoch": 47.79016393442623, "grad_norm": 3.1216773986816406, "learning_rate": 1.1198636217984662e-05, "loss": 0.2453, "step": 14576 }, { "epoch": 47.79344262295082, "grad_norm": 3.4062063694000244, "learning_rate": 1.1197581980056658e-05, "loss": 0.1964, "step": 14577 }, { "epoch": 47.79672131147541, "grad_norm": 3.7168657779693604, "learning_rate": 1.1196527728624672e-05, "loss": 0.2596, "step": 14578 }, { "epoch": 47.8, "grad_norm": 3.0970871448516846, "learning_rate": 1.119547346370059e-05, "loss": 0.1399, "step": 14579 }, { "epoch": 47.80327868852459, "grad_norm": 3.406313180923462, "learning_rate": 1.1194419185296305e-05, "loss": 0.2519, "step": 14580 }, { "epoch": 47.80655737704918, "grad_norm": 3.6018693447113037, "learning_rate": 1.1193364893423702e-05, "loss": 0.182, "step": 14581 }, { "epoch": 47.80983606557377, "grad_norm": 3.230203151702881, "learning_rate": 1.1192310588094666e-05, "loss": 0.2484, "step": 14582 }, { "epoch": 47.81311475409836, "grad_norm": 4.1977410316467285, "learning_rate": 1.1191256269321092e-05, "loss": 0.3799, "step": 14583 }, { "epoch": 47.81639344262295, "grad_norm": 3.0124475955963135, "learning_rate": 1.119020193711486e-05, "loss": 0.3639, "step": 14584 }, { "epoch": 47.81967213114754, "grad_norm": 3.3389410972595215, "learning_rate": 1.1189147591487867e-05, "loss": 0.2485, "step": 14585 }, { "epoch": 47.82295081967213, "grad_norm": 4.373469352722168, "learning_rate": 1.1188093232451997e-05, "loss": 0.1923, "step": 14586 }, { "epoch": 47.82622950819672, "grad_norm": 3.4244067668914795, "learning_rate": 1.1187038860019142e-05, "loss": 0.2791, "step": 14587 }, { "epoch": 47.829508196721314, "grad_norm": 3.920905828475952, "learning_rate": 1.1185984474201188e-05, "loss": 0.2304, "step": 14588 }, { "epoch": 47.8327868852459, "grad_norm": 3.7930095195770264, "learning_rate": 1.1184930075010025e-05, "loss": 0.2881, "step": 14589 }, { "epoch": 47.83606557377049, "grad_norm": 2.9791717529296875, "learning_rate": 1.1183875662457546e-05, "loss": 0.2373, "step": 14590 }, { "epoch": 47.83934426229508, "grad_norm": 3.9881253242492676, "learning_rate": 1.118282123655564e-05, "loss": 0.3484, "step": 14591 }, { "epoch": 47.842622950819674, "grad_norm": 3.534177780151367, "learning_rate": 1.118176679731619e-05, "loss": 0.3019, "step": 14592 }, { "epoch": 47.84590163934426, "grad_norm": 3.223620891571045, "learning_rate": 1.1180712344751092e-05, "loss": 0.2, "step": 14593 }, { "epoch": 47.84918032786885, "grad_norm": 3.537386417388916, "learning_rate": 1.1179657878872236e-05, "loss": 0.2423, "step": 14594 }, { "epoch": 47.85245901639344, "grad_norm": 3.5303683280944824, "learning_rate": 1.117860339969151e-05, "loss": 0.2931, "step": 14595 }, { "epoch": 47.855737704918035, "grad_norm": 3.204582929611206, "learning_rate": 1.1177548907220805e-05, "loss": 0.1965, "step": 14596 }, { "epoch": 47.85901639344262, "grad_norm": 3.7594571113586426, "learning_rate": 1.1176494401472013e-05, "loss": 0.235, "step": 14597 }, { "epoch": 47.86229508196721, "grad_norm": 3.549560546875, "learning_rate": 1.1175439882457022e-05, "loss": 0.3248, "step": 14598 }, { "epoch": 47.86557377049181, "grad_norm": 3.746074676513672, "learning_rate": 1.1174385350187723e-05, "loss": 0.4607, "step": 14599 }, { "epoch": 47.868852459016395, "grad_norm": 3.143900156021118, "learning_rate": 1.1173330804676012e-05, "loss": 0.2645, "step": 14600 }, { "epoch": 47.87213114754098, "grad_norm": 2.8059568405151367, "learning_rate": 1.1172276245933772e-05, "loss": 0.4406, "step": 14601 }, { "epoch": 47.87540983606557, "grad_norm": 3.896127700805664, "learning_rate": 1.1171221673972903e-05, "loss": 0.2355, "step": 14602 }, { "epoch": 47.87868852459017, "grad_norm": 4.476715564727783, "learning_rate": 1.1170167088805289e-05, "loss": 0.4931, "step": 14603 }, { "epoch": 47.881967213114756, "grad_norm": 3.7044501304626465, "learning_rate": 1.1169112490442826e-05, "loss": 0.1547, "step": 14604 }, { "epoch": 47.885245901639344, "grad_norm": 3.483109474182129, "learning_rate": 1.11680578788974e-05, "loss": 0.3539, "step": 14605 }, { "epoch": 47.88852459016393, "grad_norm": 3.7999930381774902, "learning_rate": 1.1167003254180913e-05, "loss": 0.3727, "step": 14606 }, { "epoch": 47.89180327868853, "grad_norm": 3.783015489578247, "learning_rate": 1.1165948616305249e-05, "loss": 0.3869, "step": 14607 }, { "epoch": 47.895081967213116, "grad_norm": 3.355680227279663, "learning_rate": 1.11648939652823e-05, "loss": 0.2834, "step": 14608 }, { "epoch": 47.898360655737704, "grad_norm": 3.8434500694274902, "learning_rate": 1.116383930112396e-05, "loss": 0.2941, "step": 14609 }, { "epoch": 47.90163934426229, "grad_norm": 3.8591439723968506, "learning_rate": 1.1162784623842123e-05, "loss": 0.3325, "step": 14610 }, { "epoch": 47.90491803278689, "grad_norm": 3.0992236137390137, "learning_rate": 1.1161729933448681e-05, "loss": 0.2825, "step": 14611 }, { "epoch": 47.90819672131148, "grad_norm": 3.5580837726593018, "learning_rate": 1.1160675229955525e-05, "loss": 0.1897, "step": 14612 }, { "epoch": 47.911475409836065, "grad_norm": 3.094849109649658, "learning_rate": 1.1159620513374547e-05, "loss": 0.2135, "step": 14613 }, { "epoch": 47.91475409836065, "grad_norm": 3.7739808559417725, "learning_rate": 1.1158565783717646e-05, "loss": 0.2449, "step": 14614 }, { "epoch": 47.91803278688525, "grad_norm": 9.562455177307129, "learning_rate": 1.115751104099671e-05, "loss": 0.1673, "step": 14615 }, { "epoch": 47.92131147540984, "grad_norm": 3.4106249809265137, "learning_rate": 1.1156456285223634e-05, "loss": 0.1775, "step": 14616 }, { "epoch": 47.924590163934425, "grad_norm": 4.0351433753967285, "learning_rate": 1.1155401516410307e-05, "loss": 0.2694, "step": 14617 }, { "epoch": 47.927868852459014, "grad_norm": 3.947737455368042, "learning_rate": 1.1154346734568629e-05, "loss": 0.2719, "step": 14618 }, { "epoch": 47.93114754098361, "grad_norm": 4.1688642501831055, "learning_rate": 1.1153291939710494e-05, "loss": 0.426, "step": 14619 }, { "epoch": 47.9344262295082, "grad_norm": 4.014446258544922, "learning_rate": 1.1152237131847793e-05, "loss": 0.3431, "step": 14620 }, { "epoch": 47.937704918032786, "grad_norm": 3.280695676803589, "learning_rate": 1.1151182310992417e-05, "loss": 0.1716, "step": 14621 }, { "epoch": 47.940983606557374, "grad_norm": 3.3256897926330566, "learning_rate": 1.1150127477156266e-05, "loss": 0.3634, "step": 14622 }, { "epoch": 47.94426229508197, "grad_norm": 3.6256604194641113, "learning_rate": 1.114907263035123e-05, "loss": 0.2787, "step": 14623 }, { "epoch": 47.94754098360656, "grad_norm": 3.520066738128662, "learning_rate": 1.1148017770589209e-05, "loss": 0.3002, "step": 14624 }, { "epoch": 47.950819672131146, "grad_norm": 3.929309844970703, "learning_rate": 1.1146962897882088e-05, "loss": 0.2646, "step": 14625 }, { "epoch": 47.954098360655735, "grad_norm": 4.82392692565918, "learning_rate": 1.1145908012241772e-05, "loss": 0.3832, "step": 14626 }, { "epoch": 47.95737704918033, "grad_norm": 3.4516773223876953, "learning_rate": 1.114485311368015e-05, "loss": 0.3846, "step": 14627 }, { "epoch": 47.96065573770492, "grad_norm": 3.2994143962860107, "learning_rate": 1.1143798202209122e-05, "loss": 0.3204, "step": 14628 }, { "epoch": 47.96393442622951, "grad_norm": 4.110720634460449, "learning_rate": 1.1142743277840575e-05, "loss": 0.2105, "step": 14629 }, { "epoch": 47.967213114754095, "grad_norm": 3.203122854232788, "learning_rate": 1.1141688340586415e-05, "loss": 0.3393, "step": 14630 }, { "epoch": 47.97049180327869, "grad_norm": 4.034682273864746, "learning_rate": 1.1140633390458526e-05, "loss": 0.251, "step": 14631 }, { "epoch": 47.97377049180328, "grad_norm": 4.352898597717285, "learning_rate": 1.1139578427468813e-05, "loss": 0.5331, "step": 14632 }, { "epoch": 47.97704918032787, "grad_norm": 4.060491561889648, "learning_rate": 1.113852345162917e-05, "loss": 0.3157, "step": 14633 }, { "epoch": 47.980327868852456, "grad_norm": 3.6410505771636963, "learning_rate": 1.1137468462951489e-05, "loss": 0.4768, "step": 14634 }, { "epoch": 47.98360655737705, "grad_norm": 3.702552318572998, "learning_rate": 1.1136413461447669e-05, "loss": 0.252, "step": 14635 }, { "epoch": 47.98688524590164, "grad_norm": 3.7591593265533447, "learning_rate": 1.1135358447129603e-05, "loss": 0.3493, "step": 14636 }, { "epoch": 47.99016393442623, "grad_norm": 3.1950087547302246, "learning_rate": 1.1134303420009194e-05, "loss": 0.1852, "step": 14637 }, { "epoch": 47.993442622950816, "grad_norm": 4.1650919914245605, "learning_rate": 1.1133248380098332e-05, "loss": 0.317, "step": 14638 }, { "epoch": 47.99672131147541, "grad_norm": 3.3772895336151123, "learning_rate": 1.1132193327408918e-05, "loss": 0.1852, "step": 14639 }, { "epoch": 48.0, "grad_norm": 3.7330732345581055, "learning_rate": 1.1131138261952845e-05, "loss": 0.2285, "step": 14640 }, { "epoch": 48.00327868852459, "grad_norm": 4.609606742858887, "learning_rate": 1.1130083183742011e-05, "loss": 0.5652, "step": 14641 }, { "epoch": 48.006557377049184, "grad_norm": 3.0412142276763916, "learning_rate": 1.1129028092788319e-05, "loss": 0.2163, "step": 14642 }, { "epoch": 48.00983606557377, "grad_norm": 3.3639349937438965, "learning_rate": 1.1127972989103657e-05, "loss": 0.4316, "step": 14643 }, { "epoch": 48.01311475409836, "grad_norm": 3.3623921871185303, "learning_rate": 1.1126917872699928e-05, "loss": 0.1488, "step": 14644 }, { "epoch": 48.01639344262295, "grad_norm": 4.354724884033203, "learning_rate": 1.1125862743589029e-05, "loss": 0.4242, "step": 14645 }, { "epoch": 48.019672131147544, "grad_norm": 4.451069355010986, "learning_rate": 1.1124807601782856e-05, "loss": 0.1983, "step": 14646 }, { "epoch": 48.02295081967213, "grad_norm": 5.506342887878418, "learning_rate": 1.1123752447293307e-05, "loss": 0.4187, "step": 14647 }, { "epoch": 48.02622950819672, "grad_norm": 3.634218454360962, "learning_rate": 1.1122697280132284e-05, "loss": 0.3539, "step": 14648 }, { "epoch": 48.02950819672131, "grad_norm": 2.402979850769043, "learning_rate": 1.1121642100311678e-05, "loss": 0.3146, "step": 14649 }, { "epoch": 48.032786885245905, "grad_norm": 4.055130958557129, "learning_rate": 1.1120586907843396e-05, "loss": 0.3343, "step": 14650 }, { "epoch": 48.03606557377049, "grad_norm": 3.7928762435913086, "learning_rate": 1.1119531702739325e-05, "loss": 0.3133, "step": 14651 }, { "epoch": 48.03934426229508, "grad_norm": 3.0586111545562744, "learning_rate": 1.1118476485011375e-05, "loss": 0.1726, "step": 14652 }, { "epoch": 48.04262295081967, "grad_norm": 3.408010721206665, "learning_rate": 1.1117421254671439e-05, "loss": 0.3593, "step": 14653 }, { "epoch": 48.045901639344265, "grad_norm": 3.8930740356445312, "learning_rate": 1.1116366011731416e-05, "loss": 0.3767, "step": 14654 }, { "epoch": 48.049180327868854, "grad_norm": 3.373460292816162, "learning_rate": 1.1115310756203203e-05, "loss": 0.1547, "step": 14655 }, { "epoch": 48.05245901639344, "grad_norm": 3.8415091037750244, "learning_rate": 1.1114255488098706e-05, "loss": 0.3058, "step": 14656 }, { "epoch": 48.05573770491803, "grad_norm": 3.7257769107818604, "learning_rate": 1.1113200207429818e-05, "loss": 0.4157, "step": 14657 }, { "epoch": 48.059016393442626, "grad_norm": 3.1282899379730225, "learning_rate": 1.111214491420844e-05, "loss": 0.2197, "step": 14658 }, { "epoch": 48.062295081967214, "grad_norm": 4.0577616691589355, "learning_rate": 1.1111089608446473e-05, "loss": 0.4061, "step": 14659 }, { "epoch": 48.0655737704918, "grad_norm": 4.053954601287842, "learning_rate": 1.1110034290155813e-05, "loss": 0.2775, "step": 14660 }, { "epoch": 48.06885245901639, "grad_norm": 3.176764488220215, "learning_rate": 1.1108978959348364e-05, "loss": 0.2903, "step": 14661 }, { "epoch": 48.072131147540986, "grad_norm": 3.9713022708892822, "learning_rate": 1.1107923616036023e-05, "loss": 0.2557, "step": 14662 }, { "epoch": 48.075409836065575, "grad_norm": 3.6772348880767822, "learning_rate": 1.1106868260230693e-05, "loss": 0.2271, "step": 14663 }, { "epoch": 48.07868852459016, "grad_norm": 3.2707021236419678, "learning_rate": 1.110581289194427e-05, "loss": 0.1938, "step": 14664 }, { "epoch": 48.08196721311475, "grad_norm": 3.4377291202545166, "learning_rate": 1.110475751118866e-05, "loss": 0.2945, "step": 14665 }, { "epoch": 48.08524590163935, "grad_norm": 3.645556688308716, "learning_rate": 1.1103702117975756e-05, "loss": 0.3766, "step": 14666 }, { "epoch": 48.088524590163935, "grad_norm": 2.3749001026153564, "learning_rate": 1.1102646712317468e-05, "loss": 0.1779, "step": 14667 }, { "epoch": 48.09180327868852, "grad_norm": 3.6593873500823975, "learning_rate": 1.1101591294225689e-05, "loss": 0.2844, "step": 14668 }, { "epoch": 48.09508196721311, "grad_norm": 3.311680555343628, "learning_rate": 1.1100535863712321e-05, "loss": 0.3067, "step": 14669 }, { "epoch": 48.09836065573771, "grad_norm": 3.9012198448181152, "learning_rate": 1.1099480420789271e-05, "loss": 0.2772, "step": 14670 }, { "epoch": 48.101639344262296, "grad_norm": 3.749870538711548, "learning_rate": 1.1098424965468432e-05, "loss": 0.2614, "step": 14671 }, { "epoch": 48.104918032786884, "grad_norm": 2.974362373352051, "learning_rate": 1.1097369497761713e-05, "loss": 0.2537, "step": 14672 }, { "epoch": 48.10819672131147, "grad_norm": 3.595614433288574, "learning_rate": 1.1096314017681009e-05, "loss": 0.3341, "step": 14673 }, { "epoch": 48.11147540983607, "grad_norm": 4.2902607917785645, "learning_rate": 1.1095258525238228e-05, "loss": 0.1917, "step": 14674 }, { "epoch": 48.114754098360656, "grad_norm": 3.600860834121704, "learning_rate": 1.1094203020445267e-05, "loss": 0.2635, "step": 14675 }, { "epoch": 48.118032786885244, "grad_norm": 3.7682955265045166, "learning_rate": 1.1093147503314027e-05, "loss": 0.3906, "step": 14676 }, { "epoch": 48.12131147540983, "grad_norm": 3.6151742935180664, "learning_rate": 1.1092091973856416e-05, "loss": 0.256, "step": 14677 }, { "epoch": 48.12459016393443, "grad_norm": 3.4206438064575195, "learning_rate": 1.109103643208433e-05, "loss": 0.3172, "step": 14678 }, { "epoch": 48.12786885245902, "grad_norm": 2.880352258682251, "learning_rate": 1.1089980878009675e-05, "loss": 0.268, "step": 14679 }, { "epoch": 48.131147540983605, "grad_norm": 4.168034553527832, "learning_rate": 1.1088925311644351e-05, "loss": 0.3416, "step": 14680 }, { "epoch": 48.13442622950819, "grad_norm": 2.959402561187744, "learning_rate": 1.1087869733000262e-05, "loss": 0.1535, "step": 14681 }, { "epoch": 48.13770491803279, "grad_norm": 3.453502655029297, "learning_rate": 1.1086814142089313e-05, "loss": 0.2523, "step": 14682 }, { "epoch": 48.14098360655738, "grad_norm": 3.7377212047576904, "learning_rate": 1.1085758538923401e-05, "loss": 0.5347, "step": 14683 }, { "epoch": 48.144262295081965, "grad_norm": 3.859266996383667, "learning_rate": 1.1084702923514437e-05, "loss": 0.3939, "step": 14684 }, { "epoch": 48.14754098360656, "grad_norm": 3.2841134071350098, "learning_rate": 1.1083647295874318e-05, "loss": 0.2573, "step": 14685 }, { "epoch": 48.15081967213115, "grad_norm": 3.7888665199279785, "learning_rate": 1.1082591656014947e-05, "loss": 0.5571, "step": 14686 }, { "epoch": 48.15409836065574, "grad_norm": 2.8287556171417236, "learning_rate": 1.108153600394823e-05, "loss": 0.3016, "step": 14687 }, { "epoch": 48.157377049180326, "grad_norm": 3.3683228492736816, "learning_rate": 1.108048033968607e-05, "loss": 0.5113, "step": 14688 }, { "epoch": 48.16065573770492, "grad_norm": 3.277700424194336, "learning_rate": 1.1079424663240372e-05, "loss": 0.1431, "step": 14689 }, { "epoch": 48.16393442622951, "grad_norm": 3.9261465072631836, "learning_rate": 1.107836897462304e-05, "loss": 0.2244, "step": 14690 }, { "epoch": 48.1672131147541, "grad_norm": 3.0808475017547607, "learning_rate": 1.1077313273845972e-05, "loss": 0.3447, "step": 14691 }, { "epoch": 48.170491803278686, "grad_norm": 3.123466968536377, "learning_rate": 1.107625756092108e-05, "loss": 0.2616, "step": 14692 }, { "epoch": 48.17377049180328, "grad_norm": 3.2073113918304443, "learning_rate": 1.1075201835860266e-05, "loss": 0.2265, "step": 14693 }, { "epoch": 48.17704918032787, "grad_norm": 3.8133182525634766, "learning_rate": 1.107414609867543e-05, "loss": 0.4797, "step": 14694 }, { "epoch": 48.18032786885246, "grad_norm": 3.7700507640838623, "learning_rate": 1.1073090349378482e-05, "loss": 0.2264, "step": 14695 }, { "epoch": 48.18360655737705, "grad_norm": 2.972363233566284, "learning_rate": 1.1072034587981323e-05, "loss": 0.1339, "step": 14696 }, { "epoch": 48.18688524590164, "grad_norm": 4.266655445098877, "learning_rate": 1.1070978814495862e-05, "loss": 0.3638, "step": 14697 }, { "epoch": 48.19016393442623, "grad_norm": 3.9638428688049316, "learning_rate": 1.1069923028934e-05, "loss": 0.5314, "step": 14698 }, { "epoch": 48.19344262295082, "grad_norm": 3.0630929470062256, "learning_rate": 1.1068867231307643e-05, "loss": 0.3978, "step": 14699 }, { "epoch": 48.19672131147541, "grad_norm": 3.2863073348999023, "learning_rate": 1.1067811421628696e-05, "loss": 0.1318, "step": 14700 }, { "epoch": 48.2, "grad_norm": 3.4738824367523193, "learning_rate": 1.1066755599909065e-05, "loss": 0.3163, "step": 14701 }, { "epoch": 48.20327868852459, "grad_norm": 3.5916500091552734, "learning_rate": 1.1065699766160658e-05, "loss": 0.2673, "step": 14702 }, { "epoch": 48.20655737704918, "grad_norm": 3.7215352058410645, "learning_rate": 1.1064643920395375e-05, "loss": 0.1919, "step": 14703 }, { "epoch": 48.20983606557377, "grad_norm": 3.7257795333862305, "learning_rate": 1.1063588062625128e-05, "loss": 0.3414, "step": 14704 }, { "epoch": 48.21311475409836, "grad_norm": 3.4319286346435547, "learning_rate": 1.1062532192861816e-05, "loss": 0.3866, "step": 14705 }, { "epoch": 48.21639344262295, "grad_norm": 3.8890013694763184, "learning_rate": 1.1061476311117351e-05, "loss": 0.3694, "step": 14706 }, { "epoch": 48.21967213114754, "grad_norm": 3.621619462966919, "learning_rate": 1.1060420417403636e-05, "loss": 0.4996, "step": 14707 }, { "epoch": 48.22295081967213, "grad_norm": 3.13468337059021, "learning_rate": 1.1059364511732583e-05, "loss": 0.1823, "step": 14708 }, { "epoch": 48.226229508196724, "grad_norm": 3.311410665512085, "learning_rate": 1.1058308594116088e-05, "loss": 0.2657, "step": 14709 }, { "epoch": 48.22950819672131, "grad_norm": 4.234737873077393, "learning_rate": 1.1057252664566066e-05, "loss": 0.3936, "step": 14710 }, { "epoch": 48.2327868852459, "grad_norm": 5.5829854011535645, "learning_rate": 1.1056196723094419e-05, "loss": 0.4614, "step": 14711 }, { "epoch": 48.23606557377049, "grad_norm": 3.617504835128784, "learning_rate": 1.1055140769713057e-05, "loss": 0.4039, "step": 14712 }, { "epoch": 48.239344262295084, "grad_norm": 3.6727309226989746, "learning_rate": 1.1054084804433886e-05, "loss": 0.2804, "step": 14713 }, { "epoch": 48.24262295081967, "grad_norm": 3.852212905883789, "learning_rate": 1.105302882726881e-05, "loss": 0.2015, "step": 14714 }, { "epoch": 48.24590163934426, "grad_norm": 3.527318000793457, "learning_rate": 1.1051972838229743e-05, "loss": 0.1973, "step": 14715 }, { "epoch": 48.24918032786885, "grad_norm": 3.77529239654541, "learning_rate": 1.1050916837328587e-05, "loss": 0.4531, "step": 14716 }, { "epoch": 48.252459016393445, "grad_norm": 3.2069246768951416, "learning_rate": 1.104986082457725e-05, "loss": 0.2263, "step": 14717 }, { "epoch": 48.25573770491803, "grad_norm": 3.313750982284546, "learning_rate": 1.104880479998764e-05, "loss": 0.3279, "step": 14718 }, { "epoch": 48.25901639344262, "grad_norm": 3.499159097671509, "learning_rate": 1.1047748763571668e-05, "loss": 0.2301, "step": 14719 }, { "epoch": 48.26229508196721, "grad_norm": 3.576624870300293, "learning_rate": 1.1046692715341239e-05, "loss": 0.1678, "step": 14720 }, { "epoch": 48.265573770491805, "grad_norm": 3.616851806640625, "learning_rate": 1.1045636655308261e-05, "loss": 0.2139, "step": 14721 }, { "epoch": 48.268852459016394, "grad_norm": 3.6712276935577393, "learning_rate": 1.104458058348464e-05, "loss": 0.3749, "step": 14722 }, { "epoch": 48.27213114754098, "grad_norm": 3.4011635780334473, "learning_rate": 1.1043524499882288e-05, "loss": 0.2664, "step": 14723 }, { "epoch": 48.27540983606557, "grad_norm": 3.696596622467041, "learning_rate": 1.1042468404513114e-05, "loss": 0.2805, "step": 14724 }, { "epoch": 48.278688524590166, "grad_norm": 3.8623294830322266, "learning_rate": 1.1041412297389023e-05, "loss": 0.2878, "step": 14725 }, { "epoch": 48.281967213114754, "grad_norm": 4.141693592071533, "learning_rate": 1.1040356178521928e-05, "loss": 0.2199, "step": 14726 }, { "epoch": 48.28524590163934, "grad_norm": 3.73075795173645, "learning_rate": 1.1039300047923733e-05, "loss": 0.4369, "step": 14727 }, { "epoch": 48.28852459016394, "grad_norm": 3.0877058506011963, "learning_rate": 1.103824390560635e-05, "loss": 0.1774, "step": 14728 }, { "epoch": 48.291803278688526, "grad_norm": 3.266275644302368, "learning_rate": 1.1037187751581686e-05, "loss": 0.444, "step": 14729 }, { "epoch": 48.295081967213115, "grad_norm": 2.9498894214630127, "learning_rate": 1.1036131585861655e-05, "loss": 0.1844, "step": 14730 }, { "epoch": 48.2983606557377, "grad_norm": 3.4912612438201904, "learning_rate": 1.103507540845816e-05, "loss": 0.2077, "step": 14731 }, { "epoch": 48.3016393442623, "grad_norm": 3.7401585578918457, "learning_rate": 1.1034019219383116e-05, "loss": 0.2408, "step": 14732 }, { "epoch": 48.30491803278689, "grad_norm": 3.978224039077759, "learning_rate": 1.1032963018648428e-05, "loss": 0.3103, "step": 14733 }, { "epoch": 48.308196721311475, "grad_norm": 3.4689700603485107, "learning_rate": 1.103190680626601e-05, "loss": 0.285, "step": 14734 }, { "epoch": 48.31147540983606, "grad_norm": 3.4042179584503174, "learning_rate": 1.103085058224777e-05, "loss": 0.2271, "step": 14735 }, { "epoch": 48.31475409836066, "grad_norm": 3.275588274002075, "learning_rate": 1.1029794346605619e-05, "loss": 0.3858, "step": 14736 }, { "epoch": 48.31803278688525, "grad_norm": 3.3619232177734375, "learning_rate": 1.1028738099351463e-05, "loss": 0.2721, "step": 14737 }, { "epoch": 48.321311475409836, "grad_norm": 3.929647207260132, "learning_rate": 1.1027681840497215e-05, "loss": 0.2617, "step": 14738 }, { "epoch": 48.324590163934424, "grad_norm": 3.8261373043060303, "learning_rate": 1.1026625570054787e-05, "loss": 0.5127, "step": 14739 }, { "epoch": 48.32786885245902, "grad_norm": 2.8964366912841797, "learning_rate": 1.1025569288036088e-05, "loss": 0.1828, "step": 14740 }, { "epoch": 48.33114754098361, "grad_norm": 4.037951469421387, "learning_rate": 1.102451299445303e-05, "loss": 0.2603, "step": 14741 }, { "epoch": 48.334426229508196, "grad_norm": 3.2988100051879883, "learning_rate": 1.102345668931752e-05, "loss": 0.2247, "step": 14742 }, { "epoch": 48.337704918032784, "grad_norm": 3.587336301803589, "learning_rate": 1.1022400372641476e-05, "loss": 0.3248, "step": 14743 }, { "epoch": 48.34098360655738, "grad_norm": 3.373096466064453, "learning_rate": 1.1021344044436801e-05, "loss": 0.2511, "step": 14744 }, { "epoch": 48.34426229508197, "grad_norm": 3.056901216506958, "learning_rate": 1.1020287704715413e-05, "loss": 0.2486, "step": 14745 }, { "epoch": 48.34754098360656, "grad_norm": 2.987833023071289, "learning_rate": 1.1019231353489217e-05, "loss": 0.2651, "step": 14746 }, { "epoch": 48.350819672131145, "grad_norm": 2.978196620941162, "learning_rate": 1.101817499077013e-05, "loss": 0.2668, "step": 14747 }, { "epoch": 48.35409836065574, "grad_norm": 3.5547752380371094, "learning_rate": 1.1017118616570059e-05, "loss": 0.1781, "step": 14748 }, { "epoch": 48.35737704918033, "grad_norm": 3.356990337371826, "learning_rate": 1.1016062230900921e-05, "loss": 0.1757, "step": 14749 }, { "epoch": 48.36065573770492, "grad_norm": 4.080318927764893, "learning_rate": 1.1015005833774624e-05, "loss": 0.3647, "step": 14750 }, { "epoch": 48.363934426229505, "grad_norm": 3.0027318000793457, "learning_rate": 1.101394942520308e-05, "loss": 0.2441, "step": 14751 }, { "epoch": 48.3672131147541, "grad_norm": 3.161269426345825, "learning_rate": 1.1012893005198202e-05, "loss": 0.3897, "step": 14752 }, { "epoch": 48.37049180327869, "grad_norm": 3.3880648612976074, "learning_rate": 1.1011836573771901e-05, "loss": 0.2174, "step": 14753 }, { "epoch": 48.37377049180328, "grad_norm": 4.073993682861328, "learning_rate": 1.1010780130936094e-05, "loss": 0.4229, "step": 14754 }, { "epoch": 48.377049180327866, "grad_norm": 3.3897500038146973, "learning_rate": 1.1009723676702689e-05, "loss": 0.5147, "step": 14755 }, { "epoch": 48.38032786885246, "grad_norm": 4.329742908477783, "learning_rate": 1.1008667211083599e-05, "loss": 0.1767, "step": 14756 }, { "epoch": 48.38360655737705, "grad_norm": 2.9333815574645996, "learning_rate": 1.1007610734090736e-05, "loss": 0.2558, "step": 14757 }, { "epoch": 48.38688524590164, "grad_norm": 3.4310860633850098, "learning_rate": 1.1006554245736018e-05, "loss": 0.3184, "step": 14758 }, { "epoch": 48.390163934426226, "grad_norm": 3.4046998023986816, "learning_rate": 1.1005497746031351e-05, "loss": 0.3653, "step": 14759 }, { "epoch": 48.39344262295082, "grad_norm": 3.3170430660247803, "learning_rate": 1.1004441234988654e-05, "loss": 0.2919, "step": 14760 }, { "epoch": 48.39672131147541, "grad_norm": 3.7996609210968018, "learning_rate": 1.1003384712619838e-05, "loss": 0.2266, "step": 14761 }, { "epoch": 48.4, "grad_norm": 2.840439796447754, "learning_rate": 1.1002328178936813e-05, "loss": 0.2483, "step": 14762 }, { "epoch": 48.40327868852459, "grad_norm": 3.532445192337036, "learning_rate": 1.1001271633951497e-05, "loss": 0.2253, "step": 14763 }, { "epoch": 48.40655737704918, "grad_norm": 3.388481855392456, "learning_rate": 1.1000215077675802e-05, "loss": 0.1667, "step": 14764 }, { "epoch": 48.40983606557377, "grad_norm": 3.600677490234375, "learning_rate": 1.0999158510121643e-05, "loss": 0.1787, "step": 14765 }, { "epoch": 48.41311475409836, "grad_norm": 3.749093770980835, "learning_rate": 1.0998101931300931e-05, "loss": 0.2771, "step": 14766 }, { "epoch": 48.41639344262295, "grad_norm": 3.823268175125122, "learning_rate": 1.0997045341225584e-05, "loss": 0.316, "step": 14767 }, { "epoch": 48.41967213114754, "grad_norm": 3.667705774307251, "learning_rate": 1.0995988739907513e-05, "loss": 0.2143, "step": 14768 }, { "epoch": 48.42295081967213, "grad_norm": 3.439440965652466, "learning_rate": 1.0994932127358635e-05, "loss": 0.158, "step": 14769 }, { "epoch": 48.42622950819672, "grad_norm": 3.638829469680786, "learning_rate": 1.099387550359086e-05, "loss": 0.3676, "step": 14770 }, { "epoch": 48.429508196721315, "grad_norm": 3.6548497676849365, "learning_rate": 1.099281886861611e-05, "loss": 0.2987, "step": 14771 }, { "epoch": 48.4327868852459, "grad_norm": 4.437811374664307, "learning_rate": 1.099176222244629e-05, "loss": 0.3191, "step": 14772 }, { "epoch": 48.43606557377049, "grad_norm": 3.2826218605041504, "learning_rate": 1.0990705565093323e-05, "loss": 0.2999, "step": 14773 }, { "epoch": 48.43934426229508, "grad_norm": 3.4475460052490234, "learning_rate": 1.0989648896569118e-05, "loss": 0.3504, "step": 14774 }, { "epoch": 48.442622950819676, "grad_norm": 2.8254506587982178, "learning_rate": 1.0988592216885595e-05, "loss": 0.0726, "step": 14775 }, { "epoch": 48.445901639344264, "grad_norm": 4.144393444061279, "learning_rate": 1.0987535526054664e-05, "loss": 0.2966, "step": 14776 }, { "epoch": 48.44918032786885, "grad_norm": 3.6786868572235107, "learning_rate": 1.0986478824088245e-05, "loss": 0.2175, "step": 14777 }, { "epoch": 48.45245901639344, "grad_norm": 3.788146495819092, "learning_rate": 1.0985422110998252e-05, "loss": 0.2783, "step": 14778 }, { "epoch": 48.455737704918036, "grad_norm": 3.0542056560516357, "learning_rate": 1.0984365386796598e-05, "loss": 0.2548, "step": 14779 }, { "epoch": 48.459016393442624, "grad_norm": 3.1140332221984863, "learning_rate": 1.0983308651495204e-05, "loss": 0.2699, "step": 14780 }, { "epoch": 48.46229508196721, "grad_norm": 4.011653423309326, "learning_rate": 1.0982251905105981e-05, "loss": 0.3391, "step": 14781 }, { "epoch": 48.4655737704918, "grad_norm": 4.021981716156006, "learning_rate": 1.0981195147640848e-05, "loss": 0.2845, "step": 14782 }, { "epoch": 48.4688524590164, "grad_norm": 3.3484127521514893, "learning_rate": 1.0980138379111716e-05, "loss": 0.2974, "step": 14783 }, { "epoch": 48.472131147540985, "grad_norm": 3.356487274169922, "learning_rate": 1.0979081599530507e-05, "loss": 0.4361, "step": 14784 }, { "epoch": 48.47540983606557, "grad_norm": 3.8928701877593994, "learning_rate": 1.0978024808909136e-05, "loss": 0.22, "step": 14785 }, { "epoch": 48.47868852459016, "grad_norm": 3.3761210441589355, "learning_rate": 1.0976968007259519e-05, "loss": 0.3688, "step": 14786 }, { "epoch": 48.48196721311476, "grad_norm": 3.0727202892303467, "learning_rate": 1.097591119459357e-05, "loss": 0.2965, "step": 14787 }, { "epoch": 48.485245901639345, "grad_norm": 3.4020655155181885, "learning_rate": 1.0974854370923207e-05, "loss": 0.1692, "step": 14788 }, { "epoch": 48.488524590163934, "grad_norm": 4.336202621459961, "learning_rate": 1.0973797536260349e-05, "loss": 0.3144, "step": 14789 }, { "epoch": 48.49180327868852, "grad_norm": 3.979442834854126, "learning_rate": 1.097274069061691e-05, "loss": 0.2293, "step": 14790 }, { "epoch": 48.49508196721312, "grad_norm": 3.2200872898101807, "learning_rate": 1.0971683834004809e-05, "loss": 0.2754, "step": 14791 }, { "epoch": 48.498360655737706, "grad_norm": 4.682220458984375, "learning_rate": 1.097062696643596e-05, "loss": 0.4511, "step": 14792 }, { "epoch": 48.501639344262294, "grad_norm": 3.4235785007476807, "learning_rate": 1.0969570087922288e-05, "loss": 0.279, "step": 14793 }, { "epoch": 48.50491803278688, "grad_norm": 3.9259939193725586, "learning_rate": 1.0968513198475699e-05, "loss": 0.333, "step": 14794 }, { "epoch": 48.50819672131148, "grad_norm": 3.0511577129364014, "learning_rate": 1.096745629810812e-05, "loss": 0.111, "step": 14795 }, { "epoch": 48.511475409836066, "grad_norm": 3.871661901473999, "learning_rate": 1.0966399386831466e-05, "loss": 0.3285, "step": 14796 }, { "epoch": 48.514754098360655, "grad_norm": 3.2997934818267822, "learning_rate": 1.0965342464657653e-05, "loss": 0.243, "step": 14797 }, { "epoch": 48.51803278688524, "grad_norm": 3.4474146366119385, "learning_rate": 1.09642855315986e-05, "loss": 0.1188, "step": 14798 }, { "epoch": 48.52131147540984, "grad_norm": 3.9540634155273438, "learning_rate": 1.0963228587666226e-05, "loss": 0.1733, "step": 14799 }, { "epoch": 48.52459016393443, "grad_norm": 3.6957461833953857, "learning_rate": 1.096217163287245e-05, "loss": 0.2477, "step": 14800 }, { "epoch": 48.527868852459015, "grad_norm": 3.842667579650879, "learning_rate": 1.0961114667229185e-05, "loss": 0.3054, "step": 14801 }, { "epoch": 48.5311475409836, "grad_norm": 3.824855327606201, "learning_rate": 1.0960057690748351e-05, "loss": 0.5299, "step": 14802 }, { "epoch": 48.5344262295082, "grad_norm": 3.401651382446289, "learning_rate": 1.0959000703441874e-05, "loss": 0.4267, "step": 14803 }, { "epoch": 48.53770491803279, "grad_norm": 3.798037528991699, "learning_rate": 1.095794370532166e-05, "loss": 0.363, "step": 14804 }, { "epoch": 48.540983606557376, "grad_norm": 3.7182066440582275, "learning_rate": 1.095688669639964e-05, "loss": 0.303, "step": 14805 }, { "epoch": 48.544262295081964, "grad_norm": 3.1489880084991455, "learning_rate": 1.0955829676687725e-05, "loss": 0.2449, "step": 14806 }, { "epoch": 48.54754098360656, "grad_norm": 3.341674327850342, "learning_rate": 1.0954772646197838e-05, "loss": 0.3246, "step": 14807 }, { "epoch": 48.55081967213115, "grad_norm": 3.6939990520477295, "learning_rate": 1.0953715604941897e-05, "loss": 0.3414, "step": 14808 }, { "epoch": 48.554098360655736, "grad_norm": 3.0815911293029785, "learning_rate": 1.0952658552931822e-05, "loss": 0.2444, "step": 14809 }, { "epoch": 48.557377049180324, "grad_norm": 2.5497207641601562, "learning_rate": 1.0951601490179528e-05, "loss": 0.0809, "step": 14810 }, { "epoch": 48.56065573770492, "grad_norm": 3.658667802810669, "learning_rate": 1.095054441669694e-05, "loss": 0.3771, "step": 14811 }, { "epoch": 48.56393442622951, "grad_norm": 2.7792537212371826, "learning_rate": 1.0949487332495977e-05, "loss": 0.2341, "step": 14812 }, { "epoch": 48.5672131147541, "grad_norm": 2.8942716121673584, "learning_rate": 1.0948430237588554e-05, "loss": 0.2146, "step": 14813 }, { "epoch": 48.570491803278685, "grad_norm": 3.556361436843872, "learning_rate": 1.0947373131986594e-05, "loss": 0.1377, "step": 14814 }, { "epoch": 48.57377049180328, "grad_norm": 3.8223752975463867, "learning_rate": 1.0946316015702018e-05, "loss": 0.4381, "step": 14815 }, { "epoch": 48.57704918032787, "grad_norm": 3.5773885250091553, "learning_rate": 1.0945258888746745e-05, "loss": 0.2832, "step": 14816 }, { "epoch": 48.58032786885246, "grad_norm": 2.98926043510437, "learning_rate": 1.0944201751132697e-05, "loss": 0.1446, "step": 14817 }, { "epoch": 48.58360655737705, "grad_norm": 3.088284730911255, "learning_rate": 1.094314460287179e-05, "loss": 0.332, "step": 14818 }, { "epoch": 48.58688524590164, "grad_norm": 3.141519546508789, "learning_rate": 1.0942087443975949e-05, "loss": 0.1894, "step": 14819 }, { "epoch": 48.59016393442623, "grad_norm": 4.306888580322266, "learning_rate": 1.0941030274457089e-05, "loss": 0.2697, "step": 14820 }, { "epoch": 48.59344262295082, "grad_norm": 3.2086122035980225, "learning_rate": 1.0939973094327137e-05, "loss": 0.2004, "step": 14821 }, { "epoch": 48.59672131147541, "grad_norm": 4.313432216644287, "learning_rate": 1.0938915903598008e-05, "loss": 0.3485, "step": 14822 }, { "epoch": 48.6, "grad_norm": 4.062161445617676, "learning_rate": 1.0937858702281631e-05, "loss": 0.3246, "step": 14823 }, { "epoch": 48.60327868852459, "grad_norm": 3.299473524093628, "learning_rate": 1.0936801490389919e-05, "loss": 0.3476, "step": 14824 }, { "epoch": 48.60655737704918, "grad_norm": 2.837022542953491, "learning_rate": 1.0935744267934798e-05, "loss": 0.2059, "step": 14825 }, { "epoch": 48.609836065573774, "grad_norm": 3.1437647342681885, "learning_rate": 1.0934687034928186e-05, "loss": 0.314, "step": 14826 }, { "epoch": 48.61311475409836, "grad_norm": 3.2017745971679688, "learning_rate": 1.0933629791382006e-05, "loss": 0.3328, "step": 14827 }, { "epoch": 48.61639344262295, "grad_norm": 3.1418917179107666, "learning_rate": 1.0932572537308177e-05, "loss": 0.2686, "step": 14828 }, { "epoch": 48.61967213114754, "grad_norm": 3.366265058517456, "learning_rate": 1.0931515272718627e-05, "loss": 0.3017, "step": 14829 }, { "epoch": 48.622950819672134, "grad_norm": 2.8110973834991455, "learning_rate": 1.093045799762527e-05, "loss": 0.2403, "step": 14830 }, { "epoch": 48.62622950819672, "grad_norm": 3.5637075901031494, "learning_rate": 1.0929400712040035e-05, "loss": 0.5733, "step": 14831 }, { "epoch": 48.62950819672131, "grad_norm": 3.2368760108947754, "learning_rate": 1.0928343415974839e-05, "loss": 0.2994, "step": 14832 }, { "epoch": 48.6327868852459, "grad_norm": 3.2097103595733643, "learning_rate": 1.0927286109441603e-05, "loss": 0.1748, "step": 14833 }, { "epoch": 48.636065573770495, "grad_norm": 4.25282621383667, "learning_rate": 1.0926228792452259e-05, "loss": 0.4066, "step": 14834 }, { "epoch": 48.63934426229508, "grad_norm": 3.5865354537963867, "learning_rate": 1.0925171465018715e-05, "loss": 0.3416, "step": 14835 }, { "epoch": 48.64262295081967, "grad_norm": 3.7058026790618896, "learning_rate": 1.0924114127152906e-05, "loss": 0.3331, "step": 14836 }, { "epoch": 48.64590163934426, "grad_norm": 2.8221547603607178, "learning_rate": 1.0923056778866744e-05, "loss": 0.2378, "step": 14837 }, { "epoch": 48.649180327868855, "grad_norm": 3.948164463043213, "learning_rate": 1.0921999420172164e-05, "loss": 0.193, "step": 14838 }, { "epoch": 48.65245901639344, "grad_norm": 3.3384721279144287, "learning_rate": 1.092094205108108e-05, "loss": 0.5411, "step": 14839 }, { "epoch": 48.65573770491803, "grad_norm": 3.031398296356201, "learning_rate": 1.0919884671605414e-05, "loss": 0.1539, "step": 14840 }, { "epoch": 48.65901639344262, "grad_norm": 5.534943580627441, "learning_rate": 1.0918827281757094e-05, "loss": 0.2673, "step": 14841 }, { "epoch": 48.662295081967216, "grad_norm": 3.501580238342285, "learning_rate": 1.091776988154804e-05, "loss": 0.2393, "step": 14842 }, { "epoch": 48.665573770491804, "grad_norm": 2.727065086364746, "learning_rate": 1.0916712470990176e-05, "loss": 0.1888, "step": 14843 }, { "epoch": 48.66885245901639, "grad_norm": 4.187394142150879, "learning_rate": 1.0915655050095426e-05, "loss": 0.1991, "step": 14844 }, { "epoch": 48.67213114754098, "grad_norm": 3.133451461791992, "learning_rate": 1.0914597618875714e-05, "loss": 0.3303, "step": 14845 }, { "epoch": 48.675409836065576, "grad_norm": 2.813904285430908, "learning_rate": 1.0913540177342962e-05, "loss": 0.2258, "step": 14846 }, { "epoch": 48.678688524590164, "grad_norm": 3.368889093399048, "learning_rate": 1.0912482725509096e-05, "loss": 0.2523, "step": 14847 }, { "epoch": 48.68196721311475, "grad_norm": 3.256239175796509, "learning_rate": 1.0911425263386037e-05, "loss": 0.2685, "step": 14848 }, { "epoch": 48.68524590163934, "grad_norm": 2.999687433242798, "learning_rate": 1.0910367790985712e-05, "loss": 0.1073, "step": 14849 }, { "epoch": 48.68852459016394, "grad_norm": 3.5899670124053955, "learning_rate": 1.0909310308320042e-05, "loss": 0.3188, "step": 14850 }, { "epoch": 48.691803278688525, "grad_norm": 3.272210121154785, "learning_rate": 1.0908252815400955e-05, "loss": 0.1721, "step": 14851 }, { "epoch": 48.69508196721311, "grad_norm": 4.152721405029297, "learning_rate": 1.0907195312240372e-05, "loss": 0.2218, "step": 14852 }, { "epoch": 48.6983606557377, "grad_norm": 3.1870710849761963, "learning_rate": 1.0906137798850218e-05, "loss": 0.3111, "step": 14853 }, { "epoch": 48.7016393442623, "grad_norm": 3.7961905002593994, "learning_rate": 1.0905080275242421e-05, "loss": 0.1954, "step": 14854 }, { "epoch": 48.704918032786885, "grad_norm": 3.755669355392456, "learning_rate": 1.0904022741428899e-05, "loss": 0.2878, "step": 14855 }, { "epoch": 48.708196721311474, "grad_norm": 3.3471522331237793, "learning_rate": 1.090296519742158e-05, "loss": 0.5836, "step": 14856 }, { "epoch": 48.71147540983607, "grad_norm": 3.4074134826660156, "learning_rate": 1.0901907643232392e-05, "loss": 0.431, "step": 14857 }, { "epoch": 48.71475409836066, "grad_norm": 3.411520481109619, "learning_rate": 1.0900850078873258e-05, "loss": 0.2193, "step": 14858 }, { "epoch": 48.718032786885246, "grad_norm": 3.3238959312438965, "learning_rate": 1.0899792504356102e-05, "loss": 0.251, "step": 14859 }, { "epoch": 48.721311475409834, "grad_norm": 3.226102590560913, "learning_rate": 1.0898734919692849e-05, "loss": 0.2902, "step": 14860 }, { "epoch": 48.72459016393443, "grad_norm": 4.088665008544922, "learning_rate": 1.0897677324895426e-05, "loss": 0.1996, "step": 14861 }, { "epoch": 48.72786885245902, "grad_norm": 3.791637897491455, "learning_rate": 1.0896619719975759e-05, "loss": 0.3341, "step": 14862 }, { "epoch": 48.731147540983606, "grad_norm": 3.6338558197021484, "learning_rate": 1.089556210494577e-05, "loss": 0.2834, "step": 14863 }, { "epoch": 48.734426229508195, "grad_norm": 4.094105243682861, "learning_rate": 1.089450447981739e-05, "loss": 0.2848, "step": 14864 }, { "epoch": 48.73770491803279, "grad_norm": 3.8653175830841064, "learning_rate": 1.0893446844602543e-05, "loss": 0.2285, "step": 14865 }, { "epoch": 48.74098360655738, "grad_norm": 3.542086124420166, "learning_rate": 1.0892389199313148e-05, "loss": 0.2342, "step": 14866 }, { "epoch": 48.74426229508197, "grad_norm": 3.8211894035339355, "learning_rate": 1.0891331543961142e-05, "loss": 0.2545, "step": 14867 }, { "epoch": 48.747540983606555, "grad_norm": 3.562241554260254, "learning_rate": 1.0890273878558444e-05, "loss": 0.303, "step": 14868 }, { "epoch": 48.75081967213115, "grad_norm": 3.165472984313965, "learning_rate": 1.0889216203116984e-05, "loss": 0.2848, "step": 14869 }, { "epoch": 48.75409836065574, "grad_norm": 2.985137701034546, "learning_rate": 1.0888158517648684e-05, "loss": 0.2605, "step": 14870 }, { "epoch": 48.75737704918033, "grad_norm": 5.798984527587891, "learning_rate": 1.0887100822165476e-05, "loss": 0.182, "step": 14871 }, { "epoch": 48.760655737704916, "grad_norm": 3.507720947265625, "learning_rate": 1.0886043116679281e-05, "loss": 0.2744, "step": 14872 }, { "epoch": 48.76393442622951, "grad_norm": 3.1335349082946777, "learning_rate": 1.088498540120203e-05, "loss": 0.4875, "step": 14873 }, { "epoch": 48.7672131147541, "grad_norm": 2.9197428226470947, "learning_rate": 1.088392767574565e-05, "loss": 0.1529, "step": 14874 }, { "epoch": 48.77049180327869, "grad_norm": 3.676163673400879, "learning_rate": 1.0882869940322065e-05, "loss": 0.2962, "step": 14875 }, { "epoch": 48.773770491803276, "grad_norm": 12.14933967590332, "learning_rate": 1.0881812194943202e-05, "loss": 0.291, "step": 14876 }, { "epoch": 48.77704918032787, "grad_norm": 3.8851027488708496, "learning_rate": 1.0880754439620993e-05, "loss": 0.3531, "step": 14877 }, { "epoch": 48.78032786885246, "grad_norm": 3.2743031978607178, "learning_rate": 1.0879696674367362e-05, "loss": 0.3304, "step": 14878 }, { "epoch": 48.78360655737705, "grad_norm": 4.377575874328613, "learning_rate": 1.0878638899194232e-05, "loss": 0.2664, "step": 14879 }, { "epoch": 48.78688524590164, "grad_norm": 3.018202304840088, "learning_rate": 1.087758111411354e-05, "loss": 0.1746, "step": 14880 }, { "epoch": 48.79016393442623, "grad_norm": 4.810310363769531, "learning_rate": 1.0876523319137204e-05, "loss": 0.4026, "step": 14881 }, { "epoch": 48.79344262295082, "grad_norm": 3.209542989730835, "learning_rate": 1.0875465514277159e-05, "loss": 0.3382, "step": 14882 }, { "epoch": 48.79672131147541, "grad_norm": 4.246074676513672, "learning_rate": 1.0874407699545329e-05, "loss": 0.2463, "step": 14883 }, { "epoch": 48.8, "grad_norm": 3.6235010623931885, "learning_rate": 1.087334987495364e-05, "loss": 0.4754, "step": 14884 }, { "epoch": 48.80327868852459, "grad_norm": 3.8426010608673096, "learning_rate": 1.0872292040514027e-05, "loss": 0.292, "step": 14885 }, { "epoch": 48.80655737704918, "grad_norm": 3.7657907009124756, "learning_rate": 1.0871234196238415e-05, "loss": 0.2328, "step": 14886 }, { "epoch": 48.80983606557377, "grad_norm": 3.8123738765716553, "learning_rate": 1.0870176342138728e-05, "loss": 0.2752, "step": 14887 }, { "epoch": 48.81311475409836, "grad_norm": 3.418037176132202, "learning_rate": 1.08691184782269e-05, "loss": 0.2191, "step": 14888 }, { "epoch": 48.81639344262295, "grad_norm": 3.8755905628204346, "learning_rate": 1.0868060604514859e-05, "loss": 0.231, "step": 14889 }, { "epoch": 48.81967213114754, "grad_norm": 3.3904056549072266, "learning_rate": 1.086700272101453e-05, "loss": 0.2529, "step": 14890 }, { "epoch": 48.82295081967213, "grad_norm": 3.598325490951538, "learning_rate": 1.0865944827737846e-05, "loss": 0.1288, "step": 14891 }, { "epoch": 48.82622950819672, "grad_norm": 3.2913873195648193, "learning_rate": 1.086488692469673e-05, "loss": 0.2369, "step": 14892 }, { "epoch": 48.829508196721314, "grad_norm": 2.8896679878234863, "learning_rate": 1.086382901190312e-05, "loss": 0.1189, "step": 14893 }, { "epoch": 48.8327868852459, "grad_norm": 3.596196174621582, "learning_rate": 1.0862771089368935e-05, "loss": 0.1914, "step": 14894 }, { "epoch": 48.83606557377049, "grad_norm": 3.1902177333831787, "learning_rate": 1.0861713157106113e-05, "loss": 0.2476, "step": 14895 }, { "epoch": 48.83934426229508, "grad_norm": 3.883382558822632, "learning_rate": 1.0860655215126575e-05, "loss": 0.4392, "step": 14896 }, { "epoch": 48.842622950819674, "grad_norm": 4.064551830291748, "learning_rate": 1.0859597263442256e-05, "loss": 0.5251, "step": 14897 }, { "epoch": 48.84590163934426, "grad_norm": 3.7526133060455322, "learning_rate": 1.0858539302065086e-05, "loss": 0.2302, "step": 14898 }, { "epoch": 48.84918032786885, "grad_norm": 4.01898717880249, "learning_rate": 1.0857481331006991e-05, "loss": 0.283, "step": 14899 }, { "epoch": 48.85245901639344, "grad_norm": 3.3686513900756836, "learning_rate": 1.08564233502799e-05, "loss": 0.2337, "step": 14900 }, { "epoch": 48.855737704918035, "grad_norm": 3.3606789112091064, "learning_rate": 1.0855365359895751e-05, "loss": 0.4489, "step": 14901 }, { "epoch": 48.85901639344262, "grad_norm": 3.1213576793670654, "learning_rate": 1.0854307359866464e-05, "loss": 0.1518, "step": 14902 }, { "epoch": 48.86229508196721, "grad_norm": 3.281975507736206, "learning_rate": 1.0853249350203977e-05, "loss": 0.252, "step": 14903 }, { "epoch": 48.86557377049181, "grad_norm": 3.7608370780944824, "learning_rate": 1.0852191330920214e-05, "loss": 0.2564, "step": 14904 }, { "epoch": 48.868852459016395, "grad_norm": 3.489192247390747, "learning_rate": 1.0851133302027106e-05, "loss": 0.1384, "step": 14905 }, { "epoch": 48.87213114754098, "grad_norm": 3.170158863067627, "learning_rate": 1.0850075263536588e-05, "loss": 0.2299, "step": 14906 }, { "epoch": 48.87540983606557, "grad_norm": 4.1134114265441895, "learning_rate": 1.0849017215460584e-05, "loss": 0.3003, "step": 14907 }, { "epoch": 48.87868852459017, "grad_norm": 3.1270647048950195, "learning_rate": 1.0847959157811032e-05, "loss": 0.2429, "step": 14908 }, { "epoch": 48.881967213114756, "grad_norm": 3.6419475078582764, "learning_rate": 1.0846901090599856e-05, "loss": 0.1876, "step": 14909 }, { "epoch": 48.885245901639344, "grad_norm": 3.149592399597168, "learning_rate": 1.084584301383899e-05, "loss": 0.2159, "step": 14910 }, { "epoch": 48.88852459016393, "grad_norm": 3.895092725753784, "learning_rate": 1.0844784927540365e-05, "loss": 0.3726, "step": 14911 }, { "epoch": 48.89180327868853, "grad_norm": 3.8938708305358887, "learning_rate": 1.084372683171591e-05, "loss": 0.2744, "step": 14912 }, { "epoch": 48.895081967213116, "grad_norm": 3.5328147411346436, "learning_rate": 1.0842668726377564e-05, "loss": 0.284, "step": 14913 }, { "epoch": 48.898360655737704, "grad_norm": 3.852393865585327, "learning_rate": 1.0841610611537246e-05, "loss": 0.3309, "step": 14914 }, { "epoch": 48.90163934426229, "grad_norm": 4.5284013748168945, "learning_rate": 1.0840552487206893e-05, "loss": 0.2478, "step": 14915 }, { "epoch": 48.90491803278689, "grad_norm": 3.184591293334961, "learning_rate": 1.083949435339844e-05, "loss": 0.2523, "step": 14916 }, { "epoch": 48.90819672131148, "grad_norm": 3.4646475315093994, "learning_rate": 1.0838436210123815e-05, "loss": 0.2162, "step": 14917 }, { "epoch": 48.911475409836065, "grad_norm": 3.4627532958984375, "learning_rate": 1.0837378057394948e-05, "loss": 0.2746, "step": 14918 }, { "epoch": 48.91475409836065, "grad_norm": 3.9220242500305176, "learning_rate": 1.0836319895223774e-05, "loss": 0.2999, "step": 14919 }, { "epoch": 48.91803278688525, "grad_norm": 4.091355323791504, "learning_rate": 1.0835261723622222e-05, "loss": 0.221, "step": 14920 }, { "epoch": 48.92131147540984, "grad_norm": 5.899065971374512, "learning_rate": 1.0834203542602228e-05, "loss": 0.4166, "step": 14921 }, { "epoch": 48.924590163934425, "grad_norm": 3.287881851196289, "learning_rate": 1.083314535217572e-05, "loss": 0.3317, "step": 14922 }, { "epoch": 48.927868852459014, "grad_norm": 3.3492000102996826, "learning_rate": 1.0832087152354634e-05, "loss": 0.2068, "step": 14923 }, { "epoch": 48.93114754098361, "grad_norm": 3.467344284057617, "learning_rate": 1.0831028943150897e-05, "loss": 0.1716, "step": 14924 }, { "epoch": 48.9344262295082, "grad_norm": 3.675718307495117, "learning_rate": 1.0829970724576446e-05, "loss": 0.4531, "step": 14925 }, { "epoch": 48.937704918032786, "grad_norm": 3.799344539642334, "learning_rate": 1.0828912496643213e-05, "loss": 0.3196, "step": 14926 }, { "epoch": 48.940983606557374, "grad_norm": 3.1089632511138916, "learning_rate": 1.0827854259363131e-05, "loss": 0.3103, "step": 14927 }, { "epoch": 48.94426229508197, "grad_norm": 3.7627272605895996, "learning_rate": 1.0826796012748127e-05, "loss": 0.2575, "step": 14928 }, { "epoch": 48.94754098360656, "grad_norm": 4.573881149291992, "learning_rate": 1.0825737756810142e-05, "loss": 0.3099, "step": 14929 }, { "epoch": 48.950819672131146, "grad_norm": 3.566329002380371, "learning_rate": 1.0824679491561106e-05, "loss": 0.4078, "step": 14930 }, { "epoch": 48.954098360655735, "grad_norm": 3.2441303730010986, "learning_rate": 1.0823621217012948e-05, "loss": 0.2361, "step": 14931 }, { "epoch": 48.95737704918033, "grad_norm": 3.93794584274292, "learning_rate": 1.0822562933177607e-05, "loss": 0.3376, "step": 14932 }, { "epoch": 48.96065573770492, "grad_norm": 4.016870975494385, "learning_rate": 1.0821504640067012e-05, "loss": 0.2603, "step": 14933 }, { "epoch": 48.96393442622951, "grad_norm": 3.409284830093384, "learning_rate": 1.08204463376931e-05, "loss": 0.2519, "step": 14934 }, { "epoch": 48.967213114754095, "grad_norm": 2.968881130218506, "learning_rate": 1.0819388026067799e-05, "loss": 0.3454, "step": 14935 }, { "epoch": 48.97049180327869, "grad_norm": 4.050252437591553, "learning_rate": 1.081832970520305e-05, "loss": 0.3452, "step": 14936 }, { "epoch": 48.97377049180328, "grad_norm": 3.580097198486328, "learning_rate": 1.081727137511078e-05, "loss": 0.2882, "step": 14937 }, { "epoch": 48.97704918032787, "grad_norm": 3.7891476154327393, "learning_rate": 1.0816213035802924e-05, "loss": 0.2544, "step": 14938 }, { "epoch": 48.980327868852456, "grad_norm": 2.9945194721221924, "learning_rate": 1.0815154687291422e-05, "loss": 0.2812, "step": 14939 }, { "epoch": 48.98360655737705, "grad_norm": 3.0377249717712402, "learning_rate": 1.08140963295882e-05, "loss": 0.3615, "step": 14940 }, { "epoch": 48.98688524590164, "grad_norm": 3.355872869491577, "learning_rate": 1.0813037962705197e-05, "loss": 0.2215, "step": 14941 }, { "epoch": 48.99016393442623, "grad_norm": 2.918261766433716, "learning_rate": 1.0811979586654343e-05, "loss": 0.1915, "step": 14942 }, { "epoch": 48.993442622950816, "grad_norm": 3.7467453479766846, "learning_rate": 1.081092120144758e-05, "loss": 0.2499, "step": 14943 }, { "epoch": 48.99672131147541, "grad_norm": 3.6326863765716553, "learning_rate": 1.0809862807096834e-05, "loss": 0.434, "step": 14944 }, { "epoch": 49.0, "grad_norm": 3.504875898361206, "learning_rate": 1.0808804403614044e-05, "loss": 0.5291, "step": 14945 }, { "epoch": 49.00327868852459, "grad_norm": 3.0954182147979736, "learning_rate": 1.080774599101114e-05, "loss": 0.1444, "step": 14946 }, { "epoch": 49.006557377049184, "grad_norm": 3.000607967376709, "learning_rate": 1.0806687569300065e-05, "loss": 0.2481, "step": 14947 }, { "epoch": 49.00983606557377, "grad_norm": 3.8500332832336426, "learning_rate": 1.0805629138492746e-05, "loss": 0.1651, "step": 14948 }, { "epoch": 49.01311475409836, "grad_norm": 3.754152774810791, "learning_rate": 1.0804570698601122e-05, "loss": 0.3117, "step": 14949 }, { "epoch": 49.01639344262295, "grad_norm": 3.386084794998169, "learning_rate": 1.0803512249637125e-05, "loss": 0.2739, "step": 14950 }, { "epoch": 49.019672131147544, "grad_norm": 3.9043946266174316, "learning_rate": 1.0802453791612695e-05, "loss": 0.284, "step": 14951 }, { "epoch": 49.02295081967213, "grad_norm": 3.073136568069458, "learning_rate": 1.080139532453976e-05, "loss": 0.3425, "step": 14952 }, { "epoch": 49.02622950819672, "grad_norm": 2.724865198135376, "learning_rate": 1.0800336848430264e-05, "loss": 0.391, "step": 14953 }, { "epoch": 49.02950819672131, "grad_norm": 3.6130101680755615, "learning_rate": 1.0799278363296133e-05, "loss": 0.1958, "step": 14954 }, { "epoch": 49.032786885245905, "grad_norm": 2.9775781631469727, "learning_rate": 1.0798219869149311e-05, "loss": 0.2105, "step": 14955 }, { "epoch": 49.03606557377049, "grad_norm": 3.7633490562438965, "learning_rate": 1.0797161366001731e-05, "loss": 0.1876, "step": 14956 }, { "epoch": 49.03934426229508, "grad_norm": 3.729083299636841, "learning_rate": 1.0796102853865324e-05, "loss": 0.2127, "step": 14957 }, { "epoch": 49.04262295081967, "grad_norm": 3.171567916870117, "learning_rate": 1.0795044332752034e-05, "loss": 0.1588, "step": 14958 }, { "epoch": 49.045901639344265, "grad_norm": 4.284056186676025, "learning_rate": 1.079398580267379e-05, "loss": 0.4068, "step": 14959 }, { "epoch": 49.049180327868854, "grad_norm": 3.594139337539673, "learning_rate": 1.079292726364253e-05, "loss": 0.2507, "step": 14960 }, { "epoch": 49.05245901639344, "grad_norm": 3.692965030670166, "learning_rate": 1.079186871567019e-05, "loss": 0.385, "step": 14961 }, { "epoch": 49.05573770491803, "grad_norm": 3.5684070587158203, "learning_rate": 1.079081015876871e-05, "loss": 0.1727, "step": 14962 }, { "epoch": 49.059016393442626, "grad_norm": 3.1264264583587646, "learning_rate": 1.078975159295002e-05, "loss": 0.2344, "step": 14963 }, { "epoch": 49.062295081967214, "grad_norm": 3.3670902252197266, "learning_rate": 1.0788693018226062e-05, "loss": 0.1521, "step": 14964 }, { "epoch": 49.0655737704918, "grad_norm": 4.0331573486328125, "learning_rate": 1.078763443460877e-05, "loss": 0.2699, "step": 14965 }, { "epoch": 49.06885245901639, "grad_norm": 3.3312668800354004, "learning_rate": 1.0786575842110076e-05, "loss": 0.1725, "step": 14966 }, { "epoch": 49.072131147540986, "grad_norm": 3.3116674423217773, "learning_rate": 1.0785517240741927e-05, "loss": 0.3463, "step": 14967 }, { "epoch": 49.075409836065575, "grad_norm": 3.762136697769165, "learning_rate": 1.0784458630516252e-05, "loss": 0.2386, "step": 14968 }, { "epoch": 49.07868852459016, "grad_norm": 4.364776134490967, "learning_rate": 1.0783400011444993e-05, "loss": 0.3446, "step": 14969 }, { "epoch": 49.08196721311475, "grad_norm": 3.184999465942383, "learning_rate": 1.0782341383540082e-05, "loss": 0.3306, "step": 14970 }, { "epoch": 49.08524590163935, "grad_norm": 3.3517372608184814, "learning_rate": 1.078128274681346e-05, "loss": 0.154, "step": 14971 }, { "epoch": 49.088524590163935, "grad_norm": 3.344411611557007, "learning_rate": 1.0780224101277061e-05, "loss": 0.3553, "step": 14972 }, { "epoch": 49.09180327868852, "grad_norm": 3.306410312652588, "learning_rate": 1.0779165446942826e-05, "loss": 0.3892, "step": 14973 }, { "epoch": 49.09508196721311, "grad_norm": 2.7553107738494873, "learning_rate": 1.0778106783822687e-05, "loss": 0.1162, "step": 14974 }, { "epoch": 49.09836065573771, "grad_norm": 3.525282382965088, "learning_rate": 1.077704811192859e-05, "loss": 0.3785, "step": 14975 }, { "epoch": 49.101639344262296, "grad_norm": 2.792113780975342, "learning_rate": 1.0775989431272463e-05, "loss": 0.2544, "step": 14976 }, { "epoch": 49.104918032786884, "grad_norm": 3.6334171295166016, "learning_rate": 1.0774930741866249e-05, "loss": 0.3928, "step": 14977 }, { "epoch": 49.10819672131147, "grad_norm": 3.3306984901428223, "learning_rate": 1.0773872043721886e-05, "loss": 0.2349, "step": 14978 }, { "epoch": 49.11147540983607, "grad_norm": 4.682725429534912, "learning_rate": 1.0772813336851314e-05, "loss": 0.2498, "step": 14979 }, { "epoch": 49.114754098360656, "grad_norm": 3.9689769744873047, "learning_rate": 1.0771754621266466e-05, "loss": 0.1552, "step": 14980 }, { "epoch": 49.118032786885244, "grad_norm": 3.8578739166259766, "learning_rate": 1.0770695896979281e-05, "loss": 0.1745, "step": 14981 }, { "epoch": 49.12131147540983, "grad_norm": 3.7052721977233887, "learning_rate": 1.07696371640017e-05, "loss": 0.2283, "step": 14982 }, { "epoch": 49.12459016393443, "grad_norm": 3.4010603427886963, "learning_rate": 1.0768578422345658e-05, "loss": 0.2225, "step": 14983 }, { "epoch": 49.12786885245902, "grad_norm": 3.2259132862091064, "learning_rate": 1.0767519672023096e-05, "loss": 0.2843, "step": 14984 }, { "epoch": 49.131147540983605, "grad_norm": 2.9965851306915283, "learning_rate": 1.076646091304595e-05, "loss": 0.1929, "step": 14985 }, { "epoch": 49.13442622950819, "grad_norm": 3.600222587585449, "learning_rate": 1.0765402145426164e-05, "loss": 0.2478, "step": 14986 }, { "epoch": 49.13770491803279, "grad_norm": 3.540766716003418, "learning_rate": 1.0764343369175672e-05, "loss": 0.2367, "step": 14987 }, { "epoch": 49.14098360655738, "grad_norm": 3.674654483795166, "learning_rate": 1.0763284584306412e-05, "loss": 0.2904, "step": 14988 }, { "epoch": 49.144262295081965, "grad_norm": 3.4948339462280273, "learning_rate": 1.0762225790830326e-05, "loss": 0.2578, "step": 14989 }, { "epoch": 49.14754098360656, "grad_norm": 3.5007805824279785, "learning_rate": 1.076116698875935e-05, "loss": 0.2613, "step": 14990 }, { "epoch": 49.15081967213115, "grad_norm": 3.7311246395111084, "learning_rate": 1.0760108178105425e-05, "loss": 0.1887, "step": 14991 }, { "epoch": 49.15409836065574, "grad_norm": 3.8742380142211914, "learning_rate": 1.0759049358880493e-05, "loss": 0.2213, "step": 14992 }, { "epoch": 49.157377049180326, "grad_norm": 3.7204017639160156, "learning_rate": 1.0757990531096487e-05, "loss": 0.2038, "step": 14993 }, { "epoch": 49.16065573770492, "grad_norm": 4.020979881286621, "learning_rate": 1.0756931694765351e-05, "loss": 0.2831, "step": 14994 }, { "epoch": 49.16393442622951, "grad_norm": 3.349137544631958, "learning_rate": 1.0755872849899022e-05, "loss": 0.2512, "step": 14995 }, { "epoch": 49.1672131147541, "grad_norm": 4.018969535827637, "learning_rate": 1.0754813996509444e-05, "loss": 0.3455, "step": 14996 }, { "epoch": 49.170491803278686, "grad_norm": 3.57291841506958, "learning_rate": 1.075375513460855e-05, "loss": 0.3193, "step": 14997 }, { "epoch": 49.17377049180328, "grad_norm": 3.05680775642395, "learning_rate": 1.0752696264208284e-05, "loss": 0.1947, "step": 14998 }, { "epoch": 49.17704918032787, "grad_norm": 3.9346959590911865, "learning_rate": 1.0751637385320585e-05, "loss": 0.4241, "step": 14999 }, { "epoch": 49.18032786885246, "grad_norm": 3.4423916339874268, "learning_rate": 1.0750578497957393e-05, "loss": 0.3076, "step": 15000 }, { "epoch": 49.18360655737705, "grad_norm": 3.0691983699798584, "learning_rate": 1.074951960213065e-05, "loss": 0.2111, "step": 15001 }, { "epoch": 49.18688524590164, "grad_norm": 3.6887502670288086, "learning_rate": 1.0748460697852293e-05, "loss": 0.2785, "step": 15002 }, { "epoch": 49.19016393442623, "grad_norm": 3.465461254119873, "learning_rate": 1.0747401785134262e-05, "loss": 0.2281, "step": 15003 }, { "epoch": 49.19344262295082, "grad_norm": 3.4037251472473145, "learning_rate": 1.07463428639885e-05, "loss": 0.4598, "step": 15004 }, { "epoch": 49.19672131147541, "grad_norm": 3.398646831512451, "learning_rate": 1.0745283934426944e-05, "loss": 0.2509, "step": 15005 }, { "epoch": 49.2, "grad_norm": 3.419752359390259, "learning_rate": 1.0744224996461541e-05, "loss": 0.1956, "step": 15006 }, { "epoch": 49.20327868852459, "grad_norm": 3.6446738243103027, "learning_rate": 1.0743166050104224e-05, "loss": 0.3519, "step": 15007 }, { "epoch": 49.20655737704918, "grad_norm": 2.7000491619110107, "learning_rate": 1.0742107095366939e-05, "loss": 0.1299, "step": 15008 }, { "epoch": 49.20983606557377, "grad_norm": 3.260765552520752, "learning_rate": 1.0741048132261622e-05, "loss": 0.3062, "step": 15009 }, { "epoch": 49.21311475409836, "grad_norm": 4.841734409332275, "learning_rate": 1.0739989160800222e-05, "loss": 0.1756, "step": 15010 }, { "epoch": 49.21639344262295, "grad_norm": 2.8119168281555176, "learning_rate": 1.073893018099467e-05, "loss": 0.2225, "step": 15011 }, { "epoch": 49.21967213114754, "grad_norm": 3.757810592651367, "learning_rate": 1.0737871192856915e-05, "loss": 0.2533, "step": 15012 }, { "epoch": 49.22295081967213, "grad_norm": 3.796281099319458, "learning_rate": 1.0736812196398892e-05, "loss": 0.2601, "step": 15013 }, { "epoch": 49.226229508196724, "grad_norm": 4.095883369445801, "learning_rate": 1.0735753191632547e-05, "loss": 0.3026, "step": 15014 }, { "epoch": 49.22950819672131, "grad_norm": 3.6068520545959473, "learning_rate": 1.073469417856982e-05, "loss": 0.345, "step": 15015 }, { "epoch": 49.2327868852459, "grad_norm": 2.9863805770874023, "learning_rate": 1.0733635157222653e-05, "loss": 0.3279, "step": 15016 }, { "epoch": 49.23606557377049, "grad_norm": 3.065953493118286, "learning_rate": 1.0732576127602985e-05, "loss": 0.1842, "step": 15017 }, { "epoch": 49.239344262295084, "grad_norm": 3.545302391052246, "learning_rate": 1.073151708972276e-05, "loss": 0.5306, "step": 15018 }, { "epoch": 49.24262295081967, "grad_norm": 3.528090238571167, "learning_rate": 1.073045804359392e-05, "loss": 0.4368, "step": 15019 }, { "epoch": 49.24590163934426, "grad_norm": 2.550701141357422, "learning_rate": 1.0729398989228404e-05, "loss": 0.1937, "step": 15020 }, { "epoch": 49.24918032786885, "grad_norm": 3.5460009574890137, "learning_rate": 1.0728339926638158e-05, "loss": 0.1719, "step": 15021 }, { "epoch": 49.252459016393445, "grad_norm": 3.2178938388824463, "learning_rate": 1.072728085583512e-05, "loss": 0.312, "step": 15022 }, { "epoch": 49.25573770491803, "grad_norm": 3.998380184173584, "learning_rate": 1.0726221776831238e-05, "loss": 0.2457, "step": 15023 }, { "epoch": 49.25901639344262, "grad_norm": 3.504709243774414, "learning_rate": 1.0725162689638447e-05, "loss": 0.2483, "step": 15024 }, { "epoch": 49.26229508196721, "grad_norm": 4.428440570831299, "learning_rate": 1.0724103594268696e-05, "loss": 0.2766, "step": 15025 }, { "epoch": 49.265573770491805, "grad_norm": 3.205211877822876, "learning_rate": 1.0723044490733921e-05, "loss": 0.1452, "step": 15026 }, { "epoch": 49.268852459016394, "grad_norm": 4.410665988922119, "learning_rate": 1.0721985379046069e-05, "loss": 0.3693, "step": 15027 }, { "epoch": 49.27213114754098, "grad_norm": 3.5643651485443115, "learning_rate": 1.072092625921708e-05, "loss": 0.2018, "step": 15028 }, { "epoch": 49.27540983606557, "grad_norm": 3.4106879234313965, "learning_rate": 1.0719867131258899e-05, "loss": 0.4178, "step": 15029 }, { "epoch": 49.278688524590166, "grad_norm": 4.321651458740234, "learning_rate": 1.0718807995183467e-05, "loss": 0.4737, "step": 15030 }, { "epoch": 49.281967213114754, "grad_norm": 3.0601630210876465, "learning_rate": 1.0717748851002727e-05, "loss": 0.4255, "step": 15031 }, { "epoch": 49.28524590163934, "grad_norm": 3.716991901397705, "learning_rate": 1.0716689698728627e-05, "loss": 0.2398, "step": 15032 }, { "epoch": 49.28852459016394, "grad_norm": 3.5482497215270996, "learning_rate": 1.07156305383731e-05, "loss": 0.1546, "step": 15033 }, { "epoch": 49.291803278688526, "grad_norm": 3.5906522274017334, "learning_rate": 1.0714571369948097e-05, "loss": 0.557, "step": 15034 }, { "epoch": 49.295081967213115, "grad_norm": 3.131843328475952, "learning_rate": 1.0713512193465557e-05, "loss": 0.231, "step": 15035 }, { "epoch": 49.2983606557377, "grad_norm": 3.1034388542175293, "learning_rate": 1.0712453008937427e-05, "loss": 0.4433, "step": 15036 }, { "epoch": 49.3016393442623, "grad_norm": 4.391637325286865, "learning_rate": 1.0711393816375646e-05, "loss": 0.2323, "step": 15037 }, { "epoch": 49.30491803278689, "grad_norm": 2.6138949394226074, "learning_rate": 1.0710334615792164e-05, "loss": 0.3427, "step": 15038 }, { "epoch": 49.308196721311475, "grad_norm": 3.710425615310669, "learning_rate": 1.0709275407198915e-05, "loss": 0.2479, "step": 15039 }, { "epoch": 49.31147540983606, "grad_norm": 4.521950721740723, "learning_rate": 1.0708216190607854e-05, "loss": 0.3613, "step": 15040 }, { "epoch": 49.31475409836066, "grad_norm": 3.0552151203155518, "learning_rate": 1.0707156966030916e-05, "loss": 0.4676, "step": 15041 }, { "epoch": 49.31803278688525, "grad_norm": 2.611701011657715, "learning_rate": 1.070609773348005e-05, "loss": 0.1181, "step": 15042 }, { "epoch": 49.321311475409836, "grad_norm": 2.5651350021362305, "learning_rate": 1.0705038492967198e-05, "loss": 0.336, "step": 15043 }, { "epoch": 49.324590163934424, "grad_norm": 3.3076796531677246, "learning_rate": 1.07039792445043e-05, "loss": 0.3897, "step": 15044 }, { "epoch": 49.32786885245902, "grad_norm": 3.152444839477539, "learning_rate": 1.0702919988103306e-05, "loss": 0.157, "step": 15045 }, { "epoch": 49.33114754098361, "grad_norm": 2.9025471210479736, "learning_rate": 1.070186072377616e-05, "loss": 0.1576, "step": 15046 }, { "epoch": 49.334426229508196, "grad_norm": 4.375521183013916, "learning_rate": 1.0700801451534804e-05, "loss": 0.2604, "step": 15047 }, { "epoch": 49.337704918032784, "grad_norm": 5.288434028625488, "learning_rate": 1.0699742171391181e-05, "loss": 0.3801, "step": 15048 }, { "epoch": 49.34098360655738, "grad_norm": 3.4711415767669678, "learning_rate": 1.069868288335724e-05, "loss": 0.2373, "step": 15049 }, { "epoch": 49.34426229508197, "grad_norm": 3.933764696121216, "learning_rate": 1.069762358744492e-05, "loss": 0.3234, "step": 15050 }, { "epoch": 49.34754098360656, "grad_norm": 3.3240644931793213, "learning_rate": 1.0696564283666174e-05, "loss": 0.227, "step": 15051 }, { "epoch": 49.350819672131145, "grad_norm": 3.4307284355163574, "learning_rate": 1.0695504972032937e-05, "loss": 0.4248, "step": 15052 }, { "epoch": 49.35409836065574, "grad_norm": 3.3111443519592285, "learning_rate": 1.069444565255716e-05, "loss": 0.2755, "step": 15053 }, { "epoch": 49.35737704918033, "grad_norm": 2.781313419342041, "learning_rate": 1.0693386325250786e-05, "loss": 0.1568, "step": 15054 }, { "epoch": 49.36065573770492, "grad_norm": 3.7804243564605713, "learning_rate": 1.069232699012576e-05, "loss": 0.2205, "step": 15055 }, { "epoch": 49.363934426229505, "grad_norm": 3.091848850250244, "learning_rate": 1.0691267647194026e-05, "loss": 0.4561, "step": 15056 }, { "epoch": 49.3672131147541, "grad_norm": 3.3419888019561768, "learning_rate": 1.0690208296467532e-05, "loss": 0.2437, "step": 15057 }, { "epoch": 49.37049180327869, "grad_norm": 3.097423791885376, "learning_rate": 1.0689148937958224e-05, "loss": 0.2203, "step": 15058 }, { "epoch": 49.37377049180328, "grad_norm": 2.8346314430236816, "learning_rate": 1.0688089571678042e-05, "loss": 0.1834, "step": 15059 }, { "epoch": 49.377049180327866, "grad_norm": 3.6698970794677734, "learning_rate": 1.0687030197638936e-05, "loss": 0.4567, "step": 15060 }, { "epoch": 49.38032786885246, "grad_norm": 3.567283868789673, "learning_rate": 1.0685970815852848e-05, "loss": 0.1577, "step": 15061 }, { "epoch": 49.38360655737705, "grad_norm": 3.8949403762817383, "learning_rate": 1.068491142633173e-05, "loss": 0.3091, "step": 15062 }, { "epoch": 49.38688524590164, "grad_norm": 3.4890551567077637, "learning_rate": 1.068385202908752e-05, "loss": 0.4123, "step": 15063 }, { "epoch": 49.390163934426226, "grad_norm": 3.5547611713409424, "learning_rate": 1.068279262413217e-05, "loss": 0.4218, "step": 15064 }, { "epoch": 49.39344262295082, "grad_norm": 3.1913745403289795, "learning_rate": 1.068173321147762e-05, "loss": 0.2946, "step": 15065 }, { "epoch": 49.39672131147541, "grad_norm": 4.149972915649414, "learning_rate": 1.0680673791135822e-05, "loss": 0.268, "step": 15066 }, { "epoch": 49.4, "grad_norm": 4.640255928039551, "learning_rate": 1.0679614363118718e-05, "loss": 0.282, "step": 15067 }, { "epoch": 49.40327868852459, "grad_norm": 3.4571995735168457, "learning_rate": 1.0678554927438257e-05, "loss": 0.2158, "step": 15068 }, { "epoch": 49.40655737704918, "grad_norm": 2.8103511333465576, "learning_rate": 1.0677495484106379e-05, "loss": 0.2321, "step": 15069 }, { "epoch": 49.40983606557377, "grad_norm": 3.9844532012939453, "learning_rate": 1.067643603313504e-05, "loss": 0.3492, "step": 15070 }, { "epoch": 49.41311475409836, "grad_norm": 2.8617911338806152, "learning_rate": 1.067537657453618e-05, "loss": 0.2553, "step": 15071 }, { "epoch": 49.41639344262295, "grad_norm": 3.2998695373535156, "learning_rate": 1.0674317108321748e-05, "loss": 0.2023, "step": 15072 }, { "epoch": 49.41967213114754, "grad_norm": 3.758145332336426, "learning_rate": 1.0673257634503686e-05, "loss": 0.2173, "step": 15073 }, { "epoch": 49.42295081967213, "grad_norm": 4.399382591247559, "learning_rate": 1.0672198153093947e-05, "loss": 0.2798, "step": 15074 }, { "epoch": 49.42622950819672, "grad_norm": 3.4393584728240967, "learning_rate": 1.0671138664104474e-05, "loss": 0.2543, "step": 15075 }, { "epoch": 49.429508196721315, "grad_norm": 4.377772331237793, "learning_rate": 1.0670079167547214e-05, "loss": 0.3432, "step": 15076 }, { "epoch": 49.4327868852459, "grad_norm": 2.907707691192627, "learning_rate": 1.0669019663434117e-05, "loss": 0.213, "step": 15077 }, { "epoch": 49.43606557377049, "grad_norm": 2.77095103263855, "learning_rate": 1.0667960151777124e-05, "loss": 0.2511, "step": 15078 }, { "epoch": 49.43934426229508, "grad_norm": 3.6564924716949463, "learning_rate": 1.066690063258819e-05, "loss": 0.2251, "step": 15079 }, { "epoch": 49.442622950819676, "grad_norm": 3.5207324028015137, "learning_rate": 1.0665841105879255e-05, "loss": 0.3578, "step": 15080 }, { "epoch": 49.445901639344264, "grad_norm": 3.1677215099334717, "learning_rate": 1.066478157166227e-05, "loss": 0.3312, "step": 15081 }, { "epoch": 49.44918032786885, "grad_norm": 2.9915482997894287, "learning_rate": 1.0663722029949182e-05, "loss": 0.4447, "step": 15082 }, { "epoch": 49.45245901639344, "grad_norm": 3.148190498352051, "learning_rate": 1.066266248075194e-05, "loss": 0.167, "step": 15083 }, { "epoch": 49.455737704918036, "grad_norm": 3.4185853004455566, "learning_rate": 1.0661602924082488e-05, "loss": 0.2481, "step": 15084 }, { "epoch": 49.459016393442624, "grad_norm": 3.538527727127075, "learning_rate": 1.0660543359952773e-05, "loss": 0.3727, "step": 15085 }, { "epoch": 49.46229508196721, "grad_norm": 3.449315071105957, "learning_rate": 1.0659483788374747e-05, "loss": 0.3597, "step": 15086 }, { "epoch": 49.4655737704918, "grad_norm": 3.7879765033721924, "learning_rate": 1.0658424209360354e-05, "loss": 0.2409, "step": 15087 }, { "epoch": 49.4688524590164, "grad_norm": 3.3978426456451416, "learning_rate": 1.0657364622921547e-05, "loss": 0.1452, "step": 15088 }, { "epoch": 49.472131147540985, "grad_norm": 3.5390031337738037, "learning_rate": 1.0656305029070264e-05, "loss": 0.205, "step": 15089 }, { "epoch": 49.47540983606557, "grad_norm": 3.258078098297119, "learning_rate": 1.0655245427818467e-05, "loss": 0.4769, "step": 15090 }, { "epoch": 49.47868852459016, "grad_norm": 3.264059066772461, "learning_rate": 1.0654185819178092e-05, "loss": 0.1508, "step": 15091 }, { "epoch": 49.48196721311476, "grad_norm": 2.831033945083618, "learning_rate": 1.0653126203161092e-05, "loss": 0.2774, "step": 15092 }, { "epoch": 49.485245901639345, "grad_norm": 3.4590110778808594, "learning_rate": 1.0652066579779414e-05, "loss": 0.3615, "step": 15093 }, { "epoch": 49.488524590163934, "grad_norm": 3.042419672012329, "learning_rate": 1.0651006949045011e-05, "loss": 0.1832, "step": 15094 }, { "epoch": 49.49180327868852, "grad_norm": 3.670560598373413, "learning_rate": 1.0649947310969823e-05, "loss": 0.2083, "step": 15095 }, { "epoch": 49.49508196721312, "grad_norm": 3.213095188140869, "learning_rate": 1.0648887665565808e-05, "loss": 0.366, "step": 15096 }, { "epoch": 49.498360655737706, "grad_norm": 3.557929754257202, "learning_rate": 1.0647828012844907e-05, "loss": 0.2993, "step": 15097 }, { "epoch": 49.501639344262294, "grad_norm": 2.3249917030334473, "learning_rate": 1.0646768352819072e-05, "loss": 0.1581, "step": 15098 }, { "epoch": 49.50491803278688, "grad_norm": 3.341193914413452, "learning_rate": 1.0645708685500251e-05, "loss": 0.305, "step": 15099 }, { "epoch": 49.50819672131148, "grad_norm": 3.6291418075561523, "learning_rate": 1.0644649010900395e-05, "loss": 0.3865, "step": 15100 }, { "epoch": 49.511475409836066, "grad_norm": 3.0682899951934814, "learning_rate": 1.064358932903145e-05, "loss": 0.27, "step": 15101 }, { "epoch": 49.514754098360655, "grad_norm": 3.0385494232177734, "learning_rate": 1.0642529639905363e-05, "loss": 0.2157, "step": 15102 }, { "epoch": 49.51803278688524, "grad_norm": 9.138275146484375, "learning_rate": 1.0641469943534091e-05, "loss": 0.1831, "step": 15103 }, { "epoch": 49.52131147540984, "grad_norm": 3.09700083732605, "learning_rate": 1.0640410239929575e-05, "loss": 0.2779, "step": 15104 }, { "epoch": 49.52459016393443, "grad_norm": 3.8151769638061523, "learning_rate": 1.063935052910377e-05, "loss": 0.2247, "step": 15105 }, { "epoch": 49.527868852459015, "grad_norm": 2.885819435119629, "learning_rate": 1.0638290811068623e-05, "loss": 0.1181, "step": 15106 }, { "epoch": 49.5311475409836, "grad_norm": 2.812082290649414, "learning_rate": 1.0637231085836085e-05, "loss": 0.1888, "step": 15107 }, { "epoch": 49.5344262295082, "grad_norm": 3.166616439819336, "learning_rate": 1.06361713534181e-05, "loss": 0.2593, "step": 15108 }, { "epoch": 49.53770491803279, "grad_norm": 2.626417636871338, "learning_rate": 1.0635111613826626e-05, "loss": 0.1852, "step": 15109 }, { "epoch": 49.540983606557376, "grad_norm": 3.2517056465148926, "learning_rate": 1.0634051867073604e-05, "loss": 0.1681, "step": 15110 }, { "epoch": 49.544262295081964, "grad_norm": 3.12972092628479, "learning_rate": 1.063299211317099e-05, "loss": 0.3053, "step": 15111 }, { "epoch": 49.54754098360656, "grad_norm": 3.822038412094116, "learning_rate": 1.0631932352130732e-05, "loss": 0.4845, "step": 15112 }, { "epoch": 49.55081967213115, "grad_norm": 3.7667200565338135, "learning_rate": 1.0630872583964777e-05, "loss": 0.3347, "step": 15113 }, { "epoch": 49.554098360655736, "grad_norm": 3.4826653003692627, "learning_rate": 1.0629812808685083e-05, "loss": 0.2024, "step": 15114 }, { "epoch": 49.557377049180324, "grad_norm": 3.627534866333008, "learning_rate": 1.0628753026303589e-05, "loss": 0.272, "step": 15115 }, { "epoch": 49.56065573770492, "grad_norm": 3.5905814170837402, "learning_rate": 1.0627693236832254e-05, "loss": 0.3314, "step": 15116 }, { "epoch": 49.56393442622951, "grad_norm": 3.2285006046295166, "learning_rate": 1.0626633440283024e-05, "loss": 0.1899, "step": 15117 }, { "epoch": 49.5672131147541, "grad_norm": 3.4622459411621094, "learning_rate": 1.0625573636667852e-05, "loss": 0.3467, "step": 15118 }, { "epoch": 49.570491803278685, "grad_norm": 3.542264223098755, "learning_rate": 1.0624513825998685e-05, "loss": 0.2689, "step": 15119 }, { "epoch": 49.57377049180328, "grad_norm": 4.67466402053833, "learning_rate": 1.0623454008287477e-05, "loss": 0.2715, "step": 15120 }, { "epoch": 49.57704918032787, "grad_norm": 3.0426037311553955, "learning_rate": 1.0622394183546174e-05, "loss": 0.1674, "step": 15121 }, { "epoch": 49.58032786885246, "grad_norm": 3.188621997833252, "learning_rate": 1.0621334351786734e-05, "loss": 0.3066, "step": 15122 }, { "epoch": 49.58360655737705, "grad_norm": 2.9719159603118896, "learning_rate": 1.0620274513021101e-05, "loss": 0.3186, "step": 15123 }, { "epoch": 49.58688524590164, "grad_norm": 3.465902805328369, "learning_rate": 1.0619214667261226e-05, "loss": 0.287, "step": 15124 }, { "epoch": 49.59016393442623, "grad_norm": 2.9073503017425537, "learning_rate": 1.0618154814519064e-05, "loss": 0.1304, "step": 15125 }, { "epoch": 49.59344262295082, "grad_norm": 2.642136812210083, "learning_rate": 1.0617094954806562e-05, "loss": 0.2121, "step": 15126 }, { "epoch": 49.59672131147541, "grad_norm": 2.8731706142425537, "learning_rate": 1.0616035088135673e-05, "loss": 0.1899, "step": 15127 }, { "epoch": 49.6, "grad_norm": 3.3872108459472656, "learning_rate": 1.061497521451835e-05, "loss": 0.4025, "step": 15128 }, { "epoch": 49.60327868852459, "grad_norm": 3.4207677841186523, "learning_rate": 1.0613915333966537e-05, "loss": 0.2784, "step": 15129 }, { "epoch": 49.60655737704918, "grad_norm": 3.2296931743621826, "learning_rate": 1.0612855446492195e-05, "loss": 0.1777, "step": 15130 }, { "epoch": 49.609836065573774, "grad_norm": 2.9917447566986084, "learning_rate": 1.0611795552107271e-05, "loss": 0.3429, "step": 15131 }, { "epoch": 49.61311475409836, "grad_norm": 3.0269861221313477, "learning_rate": 1.0610735650823714e-05, "loss": 0.4534, "step": 15132 }, { "epoch": 49.61639344262295, "grad_norm": 3.3718039989471436, "learning_rate": 1.0609675742653478e-05, "loss": 0.3634, "step": 15133 }, { "epoch": 49.61967213114754, "grad_norm": 3.9949846267700195, "learning_rate": 1.0608615827608513e-05, "loss": 0.1643, "step": 15134 }, { "epoch": 49.622950819672134, "grad_norm": 3.0574018955230713, "learning_rate": 1.0607555905700775e-05, "loss": 0.2332, "step": 15135 }, { "epoch": 49.62622950819672, "grad_norm": 3.814966917037964, "learning_rate": 1.060649597694221e-05, "loss": 0.52, "step": 15136 }, { "epoch": 49.62950819672131, "grad_norm": 3.1850414276123047, "learning_rate": 1.0605436041344773e-05, "loss": 0.1761, "step": 15137 }, { "epoch": 49.6327868852459, "grad_norm": 3.178166389465332, "learning_rate": 1.0604376098920415e-05, "loss": 0.1895, "step": 15138 }, { "epoch": 49.636065573770495, "grad_norm": 3.7359721660614014, "learning_rate": 1.0603316149681087e-05, "loss": 0.2067, "step": 15139 }, { "epoch": 49.63934426229508, "grad_norm": 2.9091603755950928, "learning_rate": 1.0602256193638745e-05, "loss": 0.2346, "step": 15140 }, { "epoch": 49.64262295081967, "grad_norm": 4.20664644241333, "learning_rate": 1.0601196230805335e-05, "loss": 0.2951, "step": 15141 }, { "epoch": 49.64590163934426, "grad_norm": 3.469586133956909, "learning_rate": 1.0600136261192815e-05, "loss": 0.3843, "step": 15142 }, { "epoch": 49.649180327868855, "grad_norm": 3.349536657333374, "learning_rate": 1.0599076284813132e-05, "loss": 0.3644, "step": 15143 }, { "epoch": 49.65245901639344, "grad_norm": 3.7741971015930176, "learning_rate": 1.0598016301678243e-05, "loss": 0.3658, "step": 15144 }, { "epoch": 49.65573770491803, "grad_norm": 3.2408268451690674, "learning_rate": 1.0596956311800097e-05, "loss": 0.2465, "step": 15145 }, { "epoch": 49.65901639344262, "grad_norm": 5.662971019744873, "learning_rate": 1.059589631519065e-05, "loss": 0.2205, "step": 15146 }, { "epoch": 49.662295081967216, "grad_norm": 3.2179839611053467, "learning_rate": 1.059483631186185e-05, "loss": 0.1963, "step": 15147 }, { "epoch": 49.665573770491804, "grad_norm": 4.054075717926025, "learning_rate": 1.0593776301825653e-05, "loss": 0.3641, "step": 15148 }, { "epoch": 49.66885245901639, "grad_norm": 3.12339186668396, "learning_rate": 1.0592716285094014e-05, "loss": 0.1441, "step": 15149 }, { "epoch": 49.67213114754098, "grad_norm": 3.8999526500701904, "learning_rate": 1.0591656261678878e-05, "loss": 0.3066, "step": 15150 }, { "epoch": 49.675409836065576, "grad_norm": 3.296311140060425, "learning_rate": 1.0590596231592205e-05, "loss": 0.3651, "step": 15151 }, { "epoch": 49.678688524590164, "grad_norm": 3.5975043773651123, "learning_rate": 1.0589536194845944e-05, "loss": 0.3246, "step": 15152 }, { "epoch": 49.68196721311475, "grad_norm": 4.35778284072876, "learning_rate": 1.0588476151452047e-05, "loss": 0.113, "step": 15153 }, { "epoch": 49.68524590163934, "grad_norm": 3.672522783279419, "learning_rate": 1.0587416101422475e-05, "loss": 0.1502, "step": 15154 }, { "epoch": 49.68852459016394, "grad_norm": 3.3453726768493652, "learning_rate": 1.0586356044769173e-05, "loss": 0.2862, "step": 15155 }, { "epoch": 49.691803278688525, "grad_norm": 6.737674236297607, "learning_rate": 1.0585295981504098e-05, "loss": 0.3336, "step": 15156 }, { "epoch": 49.69508196721311, "grad_norm": 3.0688281059265137, "learning_rate": 1.05842359116392e-05, "loss": 0.197, "step": 15157 }, { "epoch": 49.6983606557377, "grad_norm": 3.249499559402466, "learning_rate": 1.0583175835186435e-05, "loss": 0.2478, "step": 15158 }, { "epoch": 49.7016393442623, "grad_norm": 3.4492321014404297, "learning_rate": 1.0582115752157758e-05, "loss": 0.2, "step": 15159 }, { "epoch": 49.704918032786885, "grad_norm": 3.5173497200012207, "learning_rate": 1.0581055662565119e-05, "loss": 0.2998, "step": 15160 }, { "epoch": 49.708196721311474, "grad_norm": 4.5508036613464355, "learning_rate": 1.0579995566420476e-05, "loss": 0.3915, "step": 15161 }, { "epoch": 49.71147540983607, "grad_norm": 3.4887847900390625, "learning_rate": 1.0578935463735777e-05, "loss": 0.3681, "step": 15162 }, { "epoch": 49.71475409836066, "grad_norm": 3.564929246902466, "learning_rate": 1.057787535452298e-05, "loss": 0.2301, "step": 15163 }, { "epoch": 49.718032786885246, "grad_norm": 3.179039239883423, "learning_rate": 1.0576815238794039e-05, "loss": 0.148, "step": 15164 }, { "epoch": 49.721311475409834, "grad_norm": 3.0184922218322754, "learning_rate": 1.0575755116560902e-05, "loss": 0.1774, "step": 15165 }, { "epoch": 49.72459016393443, "grad_norm": 3.2791099548339844, "learning_rate": 1.0574694987835531e-05, "loss": 0.2077, "step": 15166 }, { "epoch": 49.72786885245902, "grad_norm": 3.81689190864563, "learning_rate": 1.0573634852629875e-05, "loss": 0.2671, "step": 15167 }, { "epoch": 49.731147540983606, "grad_norm": 4.068149566650391, "learning_rate": 1.0572574710955891e-05, "loss": 0.4197, "step": 15168 }, { "epoch": 49.734426229508195, "grad_norm": 3.7385032176971436, "learning_rate": 1.0571514562825531e-05, "loss": 0.4687, "step": 15169 }, { "epoch": 49.73770491803279, "grad_norm": 2.954218864440918, "learning_rate": 1.0570454408250752e-05, "loss": 0.1862, "step": 15170 }, { "epoch": 49.74098360655738, "grad_norm": 3.1345620155334473, "learning_rate": 1.0569394247243502e-05, "loss": 0.2556, "step": 15171 }, { "epoch": 49.74426229508197, "grad_norm": 3.6684391498565674, "learning_rate": 1.0568334079815745e-05, "loss": 0.1385, "step": 15172 }, { "epoch": 49.747540983606555, "grad_norm": 4.0273237228393555, "learning_rate": 1.0567273905979428e-05, "loss": 0.2305, "step": 15173 }, { "epoch": 49.75081967213115, "grad_norm": 4.135406970977783, "learning_rate": 1.0566213725746506e-05, "loss": 0.4802, "step": 15174 }, { "epoch": 49.75409836065574, "grad_norm": 3.235696792602539, "learning_rate": 1.0565153539128939e-05, "loss": 0.238, "step": 15175 }, { "epoch": 49.75737704918033, "grad_norm": 2.9086225032806396, "learning_rate": 1.0564093346138676e-05, "loss": 0.276, "step": 15176 }, { "epoch": 49.760655737704916, "grad_norm": 3.175624370574951, "learning_rate": 1.0563033146787673e-05, "loss": 0.2908, "step": 15177 }, { "epoch": 49.76393442622951, "grad_norm": 3.5481033325195312, "learning_rate": 1.0561972941087887e-05, "loss": 0.2914, "step": 15178 }, { "epoch": 49.7672131147541, "grad_norm": 3.174316167831421, "learning_rate": 1.0560912729051273e-05, "loss": 0.2295, "step": 15179 }, { "epoch": 49.77049180327869, "grad_norm": 3.45928692817688, "learning_rate": 1.0559852510689783e-05, "loss": 0.3645, "step": 15180 }, { "epoch": 49.773770491803276, "grad_norm": 2.8246963024139404, "learning_rate": 1.0558792286015373e-05, "loss": 0.5031, "step": 15181 }, { "epoch": 49.77704918032787, "grad_norm": 3.149402141571045, "learning_rate": 1.0557732055040002e-05, "loss": 0.2295, "step": 15182 }, { "epoch": 49.78032786885246, "grad_norm": 3.51059627532959, "learning_rate": 1.0556671817775622e-05, "loss": 0.3675, "step": 15183 }, { "epoch": 49.78360655737705, "grad_norm": 3.540668249130249, "learning_rate": 1.0555611574234185e-05, "loss": 0.3463, "step": 15184 }, { "epoch": 49.78688524590164, "grad_norm": 3.050213575363159, "learning_rate": 1.0554551324427654e-05, "loss": 0.4536, "step": 15185 }, { "epoch": 49.79016393442623, "grad_norm": 3.576192617416382, "learning_rate": 1.0553491068367978e-05, "loss": 0.1881, "step": 15186 }, { "epoch": 49.79344262295082, "grad_norm": 5.136474132537842, "learning_rate": 1.0552430806067113e-05, "loss": 0.2959, "step": 15187 }, { "epoch": 49.79672131147541, "grad_norm": 3.403425455093384, "learning_rate": 1.055137053753702e-05, "loss": 0.2208, "step": 15188 }, { "epoch": 49.8, "grad_norm": 3.376863956451416, "learning_rate": 1.055031026278965e-05, "loss": 0.1827, "step": 15189 }, { "epoch": 49.80327868852459, "grad_norm": 3.221498966217041, "learning_rate": 1.054924998183696e-05, "loss": 0.3306, "step": 15190 }, { "epoch": 49.80655737704918, "grad_norm": 2.885205030441284, "learning_rate": 1.0548189694690902e-05, "loss": 0.1806, "step": 15191 }, { "epoch": 49.80983606557377, "grad_norm": 3.528414011001587, "learning_rate": 1.0547129401363439e-05, "loss": 0.3353, "step": 15192 }, { "epoch": 49.81311475409836, "grad_norm": 3.2613747119903564, "learning_rate": 1.054606910186652e-05, "loss": 0.1633, "step": 15193 }, { "epoch": 49.81639344262295, "grad_norm": 3.5402400493621826, "learning_rate": 1.0545008796212105e-05, "loss": 0.2143, "step": 15194 }, { "epoch": 49.81967213114754, "grad_norm": 3.0857994556427, "learning_rate": 1.054394848441215e-05, "loss": 0.6159, "step": 15195 }, { "epoch": 49.82295081967213, "grad_norm": 3.932952880859375, "learning_rate": 1.054288816647861e-05, "loss": 0.2929, "step": 15196 }, { "epoch": 49.82622950819672, "grad_norm": 3.491194486618042, "learning_rate": 1.0541827842423442e-05, "loss": 0.2383, "step": 15197 }, { "epoch": 49.829508196721314, "grad_norm": 3.6927547454833984, "learning_rate": 1.0540767512258603e-05, "loss": 0.1926, "step": 15198 }, { "epoch": 49.8327868852459, "grad_norm": 3.4645214080810547, "learning_rate": 1.0539707175996048e-05, "loss": 0.2652, "step": 15199 }, { "epoch": 49.83606557377049, "grad_norm": 5.2642951011657715, "learning_rate": 1.053864683364773e-05, "loss": 0.1855, "step": 15200 }, { "epoch": 49.83934426229508, "grad_norm": 2.865208148956299, "learning_rate": 1.0537586485225611e-05, "loss": 0.1979, "step": 15201 }, { "epoch": 49.842622950819674, "grad_norm": 3.925161123275757, "learning_rate": 1.0536526130741644e-05, "loss": 0.4626, "step": 15202 }, { "epoch": 49.84590163934426, "grad_norm": 3.1563961505889893, "learning_rate": 1.053546577020779e-05, "loss": 0.3945, "step": 15203 }, { "epoch": 49.84918032786885, "grad_norm": 4.202101230621338, "learning_rate": 1.0534405403636e-05, "loss": 0.3818, "step": 15204 }, { "epoch": 49.85245901639344, "grad_norm": 3.458134651184082, "learning_rate": 1.0533345031038234e-05, "loss": 0.3188, "step": 15205 }, { "epoch": 49.855737704918035, "grad_norm": 3.9392998218536377, "learning_rate": 1.0532284652426447e-05, "loss": 0.3589, "step": 15206 }, { "epoch": 49.85901639344262, "grad_norm": 3.509610652923584, "learning_rate": 1.0531224267812601e-05, "loss": 0.1697, "step": 15207 }, { "epoch": 49.86229508196721, "grad_norm": 3.61668062210083, "learning_rate": 1.0530163877208645e-05, "loss": 0.3312, "step": 15208 }, { "epoch": 49.86557377049181, "grad_norm": 2.7426698207855225, "learning_rate": 1.052910348062654e-05, "loss": 0.1249, "step": 15209 }, { "epoch": 49.868852459016395, "grad_norm": 4.108050346374512, "learning_rate": 1.0528043078078248e-05, "loss": 0.2598, "step": 15210 }, { "epoch": 49.87213114754098, "grad_norm": 2.8680880069732666, "learning_rate": 1.0526982669575716e-05, "loss": 0.3887, "step": 15211 }, { "epoch": 49.87540983606557, "grad_norm": 4.14526891708374, "learning_rate": 1.052592225513091e-05, "loss": 0.2889, "step": 15212 }, { "epoch": 49.87868852459017, "grad_norm": 3.1992268562316895, "learning_rate": 1.0524861834755779e-05, "loss": 0.2488, "step": 15213 }, { "epoch": 49.881967213114756, "grad_norm": 3.406956672668457, "learning_rate": 1.0523801408462291e-05, "loss": 0.4737, "step": 15214 }, { "epoch": 49.885245901639344, "grad_norm": 2.741725206375122, "learning_rate": 1.0522740976262393e-05, "loss": 0.1982, "step": 15215 }, { "epoch": 49.88852459016393, "grad_norm": 3.221273899078369, "learning_rate": 1.0521680538168051e-05, "loss": 0.1895, "step": 15216 }, { "epoch": 49.89180327868853, "grad_norm": 3.8187828063964844, "learning_rate": 1.0520620094191214e-05, "loss": 0.3917, "step": 15217 }, { "epoch": 49.895081967213116, "grad_norm": 3.9294567108154297, "learning_rate": 1.051955964434385e-05, "loss": 0.3445, "step": 15218 }, { "epoch": 49.898360655737704, "grad_norm": 3.331883192062378, "learning_rate": 1.0518499188637905e-05, "loss": 0.2504, "step": 15219 }, { "epoch": 49.90163934426229, "grad_norm": 3.4619054794311523, "learning_rate": 1.0517438727085344e-05, "loss": 0.2415, "step": 15220 }, { "epoch": 49.90491803278689, "grad_norm": 3.5975866317749023, "learning_rate": 1.0516378259698126e-05, "loss": 0.2296, "step": 15221 }, { "epoch": 49.90819672131148, "grad_norm": 3.132218599319458, "learning_rate": 1.0515317786488203e-05, "loss": 0.2592, "step": 15222 }, { "epoch": 49.911475409836065, "grad_norm": 3.5314383506774902, "learning_rate": 1.0514257307467536e-05, "loss": 0.3359, "step": 15223 }, { "epoch": 49.91475409836065, "grad_norm": 5.564095973968506, "learning_rate": 1.0513196822648089e-05, "loss": 0.2859, "step": 15224 }, { "epoch": 49.91803278688525, "grad_norm": 3.1656911373138428, "learning_rate": 1.0512136332041808e-05, "loss": 0.3735, "step": 15225 }, { "epoch": 49.92131147540984, "grad_norm": 3.7211663722991943, "learning_rate": 1.0511075835660661e-05, "loss": 0.2733, "step": 15226 }, { "epoch": 49.924590163934425, "grad_norm": 3.861015796661377, "learning_rate": 1.0510015333516601e-05, "loss": 0.2973, "step": 15227 }, { "epoch": 49.927868852459014, "grad_norm": 3.664198875427246, "learning_rate": 1.0508954825621587e-05, "loss": 0.1911, "step": 15228 }, { "epoch": 49.93114754098361, "grad_norm": 3.497896432876587, "learning_rate": 1.050789431198758e-05, "loss": 0.3472, "step": 15229 }, { "epoch": 49.9344262295082, "grad_norm": 3.9522547721862793, "learning_rate": 1.0506833792626535e-05, "loss": 0.2809, "step": 15230 }, { "epoch": 49.937704918032786, "grad_norm": 3.283578634262085, "learning_rate": 1.0505773267550411e-05, "loss": 0.4162, "step": 15231 }, { "epoch": 49.940983606557374, "grad_norm": 3.2609314918518066, "learning_rate": 1.050471273677117e-05, "loss": 0.2953, "step": 15232 }, { "epoch": 49.94426229508197, "grad_norm": 2.8674235343933105, "learning_rate": 1.0503652200300767e-05, "loss": 0.2854, "step": 15233 }, { "epoch": 49.94754098360656, "grad_norm": 2.983203649520874, "learning_rate": 1.0502591658151162e-05, "loss": 0.2756, "step": 15234 }, { "epoch": 49.950819672131146, "grad_norm": 3.3760945796966553, "learning_rate": 1.0501531110334311e-05, "loss": 0.3947, "step": 15235 }, { "epoch": 49.954098360655735, "grad_norm": 3.0047943592071533, "learning_rate": 1.0500470556862179e-05, "loss": 0.1434, "step": 15236 }, { "epoch": 49.95737704918033, "grad_norm": 3.472019672393799, "learning_rate": 1.0499409997746718e-05, "loss": 0.2789, "step": 15237 }, { "epoch": 49.96065573770492, "grad_norm": 3.6225688457489014, "learning_rate": 1.0498349432999895e-05, "loss": 0.3068, "step": 15238 }, { "epoch": 49.96393442622951, "grad_norm": 3.728151321411133, "learning_rate": 1.0497288862633658e-05, "loss": 0.2706, "step": 15239 }, { "epoch": 49.967213114754095, "grad_norm": 3.498312473297119, "learning_rate": 1.0496228286659977e-05, "loss": 0.1757, "step": 15240 }, { "epoch": 49.97049180327869, "grad_norm": 2.5622222423553467, "learning_rate": 1.0495167705090804e-05, "loss": 0.1482, "step": 15241 }, { "epoch": 49.97377049180328, "grad_norm": 3.2314505577087402, "learning_rate": 1.0494107117938099e-05, "loss": 0.187, "step": 15242 }, { "epoch": 49.97704918032787, "grad_norm": 3.3640098571777344, "learning_rate": 1.0493046525213824e-05, "loss": 0.4538, "step": 15243 }, { "epoch": 49.980327868852456, "grad_norm": 3.246121883392334, "learning_rate": 1.0491985926929938e-05, "loss": 0.2499, "step": 15244 }, { "epoch": 49.98360655737705, "grad_norm": 5.593100547790527, "learning_rate": 1.0490925323098395e-05, "loss": 0.2356, "step": 15245 }, { "epoch": 49.98688524590164, "grad_norm": 4.275005340576172, "learning_rate": 1.0489864713731163e-05, "loss": 0.2784, "step": 15246 }, { "epoch": 49.99016393442623, "grad_norm": 3.5667731761932373, "learning_rate": 1.0488804098840195e-05, "loss": 0.1359, "step": 15247 }, { "epoch": 49.993442622950816, "grad_norm": 3.8581504821777344, "learning_rate": 1.0487743478437453e-05, "loss": 0.4459, "step": 15248 }, { "epoch": 49.99672131147541, "grad_norm": 4.612417697906494, "learning_rate": 1.0486682852534895e-05, "loss": 0.5275, "step": 15249 }, { "epoch": 50.0, "grad_norm": 2.9492783546447754, "learning_rate": 1.0485622221144485e-05, "loss": 0.1761, "step": 15250 }, { "epoch": 50.00327868852459, "grad_norm": 4.051246643066406, "learning_rate": 1.0484561584278177e-05, "loss": 0.312, "step": 15251 }, { "epoch": 50.006557377049184, "grad_norm": 3.0013175010681152, "learning_rate": 1.0483500941947933e-05, "loss": 0.3335, "step": 15252 }, { "epoch": 50.00983606557377, "grad_norm": 2.907327175140381, "learning_rate": 1.0482440294165714e-05, "loss": 0.2497, "step": 15253 }, { "epoch": 50.01311475409836, "grad_norm": 3.8882079124450684, "learning_rate": 1.0481379640943479e-05, "loss": 0.2864, "step": 15254 }, { "epoch": 50.01639344262295, "grad_norm": 3.0992424488067627, "learning_rate": 1.048031898229319e-05, "loss": 0.2609, "step": 15255 }, { "epoch": 50.019672131147544, "grad_norm": 3.206465482711792, "learning_rate": 1.0479258318226799e-05, "loss": 0.1844, "step": 15256 }, { "epoch": 50.02295081967213, "grad_norm": 4.208011150360107, "learning_rate": 1.0478197648756279e-05, "loss": 0.1923, "step": 15257 }, { "epoch": 50.02622950819672, "grad_norm": 3.6597139835357666, "learning_rate": 1.0477136973893578e-05, "loss": 0.4283, "step": 15258 }, { "epoch": 50.02950819672131, "grad_norm": 3.4494409561157227, "learning_rate": 1.0476076293650665e-05, "loss": 0.3614, "step": 15259 }, { "epoch": 50.032786885245905, "grad_norm": 4.169622898101807, "learning_rate": 1.0475015608039496e-05, "loss": 0.3658, "step": 15260 }, { "epoch": 50.03606557377049, "grad_norm": 3.315599203109741, "learning_rate": 1.0473954917072034e-05, "loss": 0.1813, "step": 15261 }, { "epoch": 50.03934426229508, "grad_norm": 3.6099941730499268, "learning_rate": 1.0472894220760233e-05, "loss": 0.1835, "step": 15262 }, { "epoch": 50.04262295081967, "grad_norm": 3.646200180053711, "learning_rate": 1.0471833519116058e-05, "loss": 0.2113, "step": 15263 }, { "epoch": 50.045901639344265, "grad_norm": 3.4895613193511963, "learning_rate": 1.0470772812151473e-05, "loss": 0.2192, "step": 15264 }, { "epoch": 50.049180327868854, "grad_norm": 3.606731653213501, "learning_rate": 1.0469712099878432e-05, "loss": 0.2591, "step": 15265 }, { "epoch": 50.05245901639344, "grad_norm": 3.1886534690856934, "learning_rate": 1.0468651382308903e-05, "loss": 0.1509, "step": 15266 }, { "epoch": 50.05573770491803, "grad_norm": 4.15523624420166, "learning_rate": 1.0467590659454839e-05, "loss": 0.3224, "step": 15267 }, { "epoch": 50.059016393442626, "grad_norm": 5.755600929260254, "learning_rate": 1.0466529931328206e-05, "loss": 0.3038, "step": 15268 }, { "epoch": 50.062295081967214, "grad_norm": 3.9307823181152344, "learning_rate": 1.0465469197940961e-05, "loss": 0.1825, "step": 15269 }, { "epoch": 50.0655737704918, "grad_norm": 3.5883092880249023, "learning_rate": 1.0464408459305069e-05, "loss": 0.2279, "step": 15270 }, { "epoch": 50.06885245901639, "grad_norm": 2.848111391067505, "learning_rate": 1.0463347715432488e-05, "loss": 0.1635, "step": 15271 }, { "epoch": 50.072131147540986, "grad_norm": 3.0469326972961426, "learning_rate": 1.046228696633518e-05, "loss": 0.3376, "step": 15272 }, { "epoch": 50.075409836065575, "grad_norm": 4.600499629974365, "learning_rate": 1.0461226212025103e-05, "loss": 0.2939, "step": 15273 }, { "epoch": 50.07868852459016, "grad_norm": 3.336146831512451, "learning_rate": 1.0460165452514224e-05, "loss": 0.3442, "step": 15274 }, { "epoch": 50.08196721311475, "grad_norm": 3.504467487335205, "learning_rate": 1.0459104687814501e-05, "loss": 0.149, "step": 15275 }, { "epoch": 50.08524590163935, "grad_norm": 3.4366676807403564, "learning_rate": 1.0458043917937894e-05, "loss": 0.2805, "step": 15276 }, { "epoch": 50.088524590163935, "grad_norm": 2.991173028945923, "learning_rate": 1.045698314289637e-05, "loss": 0.2042, "step": 15277 }, { "epoch": 50.09180327868852, "grad_norm": 3.5727243423461914, "learning_rate": 1.0455922362701877e-05, "loss": 0.4063, "step": 15278 }, { "epoch": 50.09508196721311, "grad_norm": 3.0625245571136475, "learning_rate": 1.0454861577366394e-05, "loss": 0.1749, "step": 15279 }, { "epoch": 50.09836065573771, "grad_norm": 3.3246703147888184, "learning_rate": 1.0453800786901868e-05, "loss": 0.2806, "step": 15280 }, { "epoch": 50.101639344262296, "grad_norm": 3.59824275970459, "learning_rate": 1.0452739991320269e-05, "loss": 0.3339, "step": 15281 }, { "epoch": 50.104918032786884, "grad_norm": 3.3868839740753174, "learning_rate": 1.0451679190633555e-05, "loss": 0.1984, "step": 15282 }, { "epoch": 50.10819672131147, "grad_norm": 3.5528759956359863, "learning_rate": 1.045061838485369e-05, "loss": 0.2701, "step": 15283 }, { "epoch": 50.11147540983607, "grad_norm": 3.4555957317352295, "learning_rate": 1.044955757399263e-05, "loss": 0.2444, "step": 15284 }, { "epoch": 50.114754098360656, "grad_norm": 3.337306261062622, "learning_rate": 1.0448496758062345e-05, "loss": 0.3719, "step": 15285 }, { "epoch": 50.118032786885244, "grad_norm": 3.70756196975708, "learning_rate": 1.0447435937074793e-05, "loss": 0.3253, "step": 15286 }, { "epoch": 50.12131147540983, "grad_norm": 3.5019609928131104, "learning_rate": 1.0446375111041934e-05, "loss": 0.171, "step": 15287 }, { "epoch": 50.12459016393443, "grad_norm": 3.3067641258239746, "learning_rate": 1.044531427997573e-05, "loss": 0.2851, "step": 15288 }, { "epoch": 50.12786885245902, "grad_norm": 3.3511226177215576, "learning_rate": 1.0444253443888149e-05, "loss": 0.4707, "step": 15289 }, { "epoch": 50.131147540983605, "grad_norm": 3.843841791152954, "learning_rate": 1.0443192602791143e-05, "loss": 0.2759, "step": 15290 }, { "epoch": 50.13442622950819, "grad_norm": 3.7859714031219482, "learning_rate": 1.0442131756696681e-05, "loss": 0.2366, "step": 15291 }, { "epoch": 50.13770491803279, "grad_norm": 3.7832813262939453, "learning_rate": 1.0441070905616726e-05, "loss": 0.2417, "step": 15292 }, { "epoch": 50.14098360655738, "grad_norm": 3.6859419345855713, "learning_rate": 1.0440010049563236e-05, "loss": 0.1654, "step": 15293 }, { "epoch": 50.144262295081965, "grad_norm": 3.7019150257110596, "learning_rate": 1.0438949188548177e-05, "loss": 0.3711, "step": 15294 }, { "epoch": 50.14754098360656, "grad_norm": 3.019624948501587, "learning_rate": 1.0437888322583509e-05, "loss": 0.3001, "step": 15295 }, { "epoch": 50.15081967213115, "grad_norm": 3.547945499420166, "learning_rate": 1.0436827451681195e-05, "loss": 0.251, "step": 15296 }, { "epoch": 50.15409836065574, "grad_norm": 3.5539052486419678, "learning_rate": 1.0435766575853197e-05, "loss": 0.3599, "step": 15297 }, { "epoch": 50.157377049180326, "grad_norm": 3.467397689819336, "learning_rate": 1.0434705695111478e-05, "loss": 0.2775, "step": 15298 }, { "epoch": 50.16065573770492, "grad_norm": 3.535198926925659, "learning_rate": 1.0433644809467998e-05, "loss": 0.3763, "step": 15299 }, { "epoch": 50.16393442622951, "grad_norm": 6.725162029266357, "learning_rate": 1.0432583918934724e-05, "loss": 0.4672, "step": 15300 }, { "epoch": 50.1672131147541, "grad_norm": 3.462566375732422, "learning_rate": 1.0431523023523616e-05, "loss": 0.2087, "step": 15301 }, { "epoch": 50.170491803278686, "grad_norm": 3.220684766769409, "learning_rate": 1.0430462123246638e-05, "loss": 0.309, "step": 15302 }, { "epoch": 50.17377049180328, "grad_norm": 2.8142879009246826, "learning_rate": 1.0429401218115753e-05, "loss": 0.1724, "step": 15303 }, { "epoch": 50.17704918032787, "grad_norm": 2.9526045322418213, "learning_rate": 1.0428340308142921e-05, "loss": 0.2699, "step": 15304 }, { "epoch": 50.18032786885246, "grad_norm": 3.516958236694336, "learning_rate": 1.0427279393340108e-05, "loss": 0.2611, "step": 15305 }, { "epoch": 50.18360655737705, "grad_norm": 3.0249130725860596, "learning_rate": 1.0426218473719274e-05, "loss": 0.2624, "step": 15306 }, { "epoch": 50.18688524590164, "grad_norm": 3.702857494354248, "learning_rate": 1.0425157549292385e-05, "loss": 0.1896, "step": 15307 }, { "epoch": 50.19016393442623, "grad_norm": 3.239161729812622, "learning_rate": 1.04240966200714e-05, "loss": 0.1178, "step": 15308 }, { "epoch": 50.19344262295082, "grad_norm": 3.403615713119507, "learning_rate": 1.0423035686068289e-05, "loss": 0.3141, "step": 15309 }, { "epoch": 50.19672131147541, "grad_norm": 4.277962684631348, "learning_rate": 1.0421974747295007e-05, "loss": 0.2108, "step": 15310 }, { "epoch": 50.2, "grad_norm": 3.3854987621307373, "learning_rate": 1.0420913803763522e-05, "loss": 0.2049, "step": 15311 }, { "epoch": 50.20327868852459, "grad_norm": 3.3404576778411865, "learning_rate": 1.0419852855485798e-05, "loss": 0.3329, "step": 15312 }, { "epoch": 50.20655737704918, "grad_norm": 3.6118903160095215, "learning_rate": 1.0418791902473794e-05, "loss": 0.2046, "step": 15313 }, { "epoch": 50.20983606557377, "grad_norm": 8.111376762390137, "learning_rate": 1.0417730944739474e-05, "loss": 0.3212, "step": 15314 }, { "epoch": 50.21311475409836, "grad_norm": 3.5180246829986572, "learning_rate": 1.0416669982294808e-05, "loss": 0.2716, "step": 15315 }, { "epoch": 50.21639344262295, "grad_norm": 3.402535915374756, "learning_rate": 1.0415609015151753e-05, "loss": 0.2369, "step": 15316 }, { "epoch": 50.21967213114754, "grad_norm": 3.4721519947052, "learning_rate": 1.0414548043322272e-05, "loss": 0.2695, "step": 15317 }, { "epoch": 50.22295081967213, "grad_norm": 4.133479595184326, "learning_rate": 1.0413487066818335e-05, "loss": 0.4588, "step": 15318 }, { "epoch": 50.226229508196724, "grad_norm": 3.370398998260498, "learning_rate": 1.0412426085651897e-05, "loss": 0.4448, "step": 15319 }, { "epoch": 50.22950819672131, "grad_norm": 2.9853150844573975, "learning_rate": 1.041136509983493e-05, "loss": 0.1697, "step": 15320 }, { "epoch": 50.2327868852459, "grad_norm": 3.9654972553253174, "learning_rate": 1.041030410937939e-05, "loss": 0.2517, "step": 15321 }, { "epoch": 50.23606557377049, "grad_norm": 2.9594228267669678, "learning_rate": 1.040924311429725e-05, "loss": 0.1518, "step": 15322 }, { "epoch": 50.239344262295084, "grad_norm": 2.425408363342285, "learning_rate": 1.0408182114600463e-05, "loss": 0.2301, "step": 15323 }, { "epoch": 50.24262295081967, "grad_norm": 2.988269805908203, "learning_rate": 1.0407121110301001e-05, "loss": 0.1158, "step": 15324 }, { "epoch": 50.24590163934426, "grad_norm": 4.312186241149902, "learning_rate": 1.0406060101410822e-05, "loss": 0.329, "step": 15325 }, { "epoch": 50.24918032786885, "grad_norm": 3.724138021469116, "learning_rate": 1.0404999087941898e-05, "loss": 0.4368, "step": 15326 }, { "epoch": 50.252459016393445, "grad_norm": 2.9730215072631836, "learning_rate": 1.0403938069906186e-05, "loss": 0.3529, "step": 15327 }, { "epoch": 50.25573770491803, "grad_norm": 4.059629917144775, "learning_rate": 1.0402877047315652e-05, "loss": 0.286, "step": 15328 }, { "epoch": 50.25901639344262, "grad_norm": 3.7202541828155518, "learning_rate": 1.040181602018226e-05, "loss": 0.3074, "step": 15329 }, { "epoch": 50.26229508196721, "grad_norm": 3.4743306636810303, "learning_rate": 1.0400754988517976e-05, "loss": 0.2984, "step": 15330 }, { "epoch": 50.265573770491805, "grad_norm": 3.3198156356811523, "learning_rate": 1.0399693952334762e-05, "loss": 0.2072, "step": 15331 }, { "epoch": 50.268852459016394, "grad_norm": 2.8696746826171875, "learning_rate": 1.0398632911644581e-05, "loss": 0.1615, "step": 15332 }, { "epoch": 50.27213114754098, "grad_norm": 2.979210138320923, "learning_rate": 1.0397571866459403e-05, "loss": 0.334, "step": 15333 }, { "epoch": 50.27540983606557, "grad_norm": 3.242748260498047, "learning_rate": 1.0396510816791188e-05, "loss": 0.2895, "step": 15334 }, { "epoch": 50.278688524590166, "grad_norm": 5.2199811935424805, "learning_rate": 1.03954497626519e-05, "loss": 0.5554, "step": 15335 }, { "epoch": 50.281967213114754, "grad_norm": 3.444500207901001, "learning_rate": 1.0394388704053503e-05, "loss": 0.2931, "step": 15336 }, { "epoch": 50.28524590163934, "grad_norm": 3.298675775527954, "learning_rate": 1.0393327641007968e-05, "loss": 0.2861, "step": 15337 }, { "epoch": 50.28852459016394, "grad_norm": 4.134035587310791, "learning_rate": 1.0392266573527253e-05, "loss": 0.3234, "step": 15338 }, { "epoch": 50.291803278688526, "grad_norm": 5.6619462966918945, "learning_rate": 1.0391205501623323e-05, "loss": 0.2936, "step": 15339 }, { "epoch": 50.295081967213115, "grad_norm": 2.8464698791503906, "learning_rate": 1.0390144425308144e-05, "loss": 0.1308, "step": 15340 }, { "epoch": 50.2983606557377, "grad_norm": 3.48708438873291, "learning_rate": 1.0389083344593684e-05, "loss": 0.177, "step": 15341 }, { "epoch": 50.3016393442623, "grad_norm": 3.6565659046173096, "learning_rate": 1.03880222594919e-05, "loss": 0.2734, "step": 15342 }, { "epoch": 50.30491803278689, "grad_norm": 4.3832526206970215, "learning_rate": 1.0386961170014767e-05, "loss": 0.3071, "step": 15343 }, { "epoch": 50.308196721311475, "grad_norm": 3.6008691787719727, "learning_rate": 1.0385900076174242e-05, "loss": 0.3393, "step": 15344 }, { "epoch": 50.31147540983606, "grad_norm": 3.630409002304077, "learning_rate": 1.038483897798229e-05, "loss": 0.298, "step": 15345 }, { "epoch": 50.31475409836066, "grad_norm": 3.4050400257110596, "learning_rate": 1.0383777875450881e-05, "loss": 0.1132, "step": 15346 }, { "epoch": 50.31803278688525, "grad_norm": 3.423057794570923, "learning_rate": 1.0382716768591979e-05, "loss": 0.2489, "step": 15347 }, { "epoch": 50.321311475409836, "grad_norm": 3.451460599899292, "learning_rate": 1.0381655657417547e-05, "loss": 0.2325, "step": 15348 }, { "epoch": 50.324590163934424, "grad_norm": 5.550518035888672, "learning_rate": 1.0380594541939546e-05, "loss": 0.2701, "step": 15349 }, { "epoch": 50.32786885245902, "grad_norm": 3.422443151473999, "learning_rate": 1.0379533422169952e-05, "loss": 0.1229, "step": 15350 }, { "epoch": 50.33114754098361, "grad_norm": 3.432743549346924, "learning_rate": 1.0378472298120719e-05, "loss": 0.2548, "step": 15351 }, { "epoch": 50.334426229508196, "grad_norm": 3.532938241958618, "learning_rate": 1.037741116980382e-05, "loss": 0.322, "step": 15352 }, { "epoch": 50.337704918032784, "grad_norm": 3.1138274669647217, "learning_rate": 1.0376350037231217e-05, "loss": 0.1831, "step": 15353 }, { "epoch": 50.34098360655738, "grad_norm": 4.324737548828125, "learning_rate": 1.0375288900414877e-05, "loss": 0.2517, "step": 15354 }, { "epoch": 50.34426229508197, "grad_norm": 3.274345874786377, "learning_rate": 1.0374227759366764e-05, "loss": 0.3239, "step": 15355 }, { "epoch": 50.34754098360656, "grad_norm": 3.4632163047790527, "learning_rate": 1.0373166614098844e-05, "loss": 0.2267, "step": 15356 }, { "epoch": 50.350819672131145, "grad_norm": 5.606688022613525, "learning_rate": 1.0372105464623083e-05, "loss": 0.3131, "step": 15357 }, { "epoch": 50.35409836065574, "grad_norm": 3.6416378021240234, "learning_rate": 1.0371044310951443e-05, "loss": 0.2186, "step": 15358 }, { "epoch": 50.35737704918033, "grad_norm": 3.4994354248046875, "learning_rate": 1.0369983153095897e-05, "loss": 0.2323, "step": 15359 }, { "epoch": 50.36065573770492, "grad_norm": 3.068214178085327, "learning_rate": 1.0368921991068402e-05, "loss": 0.2092, "step": 15360 }, { "epoch": 50.363934426229505, "grad_norm": 3.287660598754883, "learning_rate": 1.0367860824880932e-05, "loss": 0.3176, "step": 15361 }, { "epoch": 50.3672131147541, "grad_norm": 3.1850926876068115, "learning_rate": 1.0366799654545445e-05, "loss": 0.2035, "step": 15362 }, { "epoch": 50.37049180327869, "grad_norm": 2.8571016788482666, "learning_rate": 1.0365738480073912e-05, "loss": 0.1465, "step": 15363 }, { "epoch": 50.37377049180328, "grad_norm": 4.501901149749756, "learning_rate": 1.0364677301478297e-05, "loss": 0.2316, "step": 15364 }, { "epoch": 50.377049180327866, "grad_norm": 2.484973192214966, "learning_rate": 1.0363616118770567e-05, "loss": 0.2378, "step": 15365 }, { "epoch": 50.38032786885246, "grad_norm": 3.4163155555725098, "learning_rate": 1.0362554931962687e-05, "loss": 0.2483, "step": 15366 }, { "epoch": 50.38360655737705, "grad_norm": 3.7394955158233643, "learning_rate": 1.0361493741066623e-05, "loss": 0.1921, "step": 15367 }, { "epoch": 50.38688524590164, "grad_norm": 3.2759177684783936, "learning_rate": 1.0360432546094341e-05, "loss": 0.4693, "step": 15368 }, { "epoch": 50.390163934426226, "grad_norm": 3.7801175117492676, "learning_rate": 1.035937134705781e-05, "loss": 0.3549, "step": 15369 }, { "epoch": 50.39344262295082, "grad_norm": 3.3546135425567627, "learning_rate": 1.0358310143968988e-05, "loss": 0.2974, "step": 15370 }, { "epoch": 50.39672131147541, "grad_norm": 3.4771649837493896, "learning_rate": 1.0357248936839852e-05, "loss": 0.273, "step": 15371 }, { "epoch": 50.4, "grad_norm": 3.0945730209350586, "learning_rate": 1.0356187725682359e-05, "loss": 0.1796, "step": 15372 }, { "epoch": 50.40327868852459, "grad_norm": 2.4308319091796875, "learning_rate": 1.035512651050848e-05, "loss": 0.2167, "step": 15373 }, { "epoch": 50.40655737704918, "grad_norm": 3.179659605026245, "learning_rate": 1.035406529133018e-05, "loss": 0.2683, "step": 15374 }, { "epoch": 50.40983606557377, "grad_norm": 4.289332866668701, "learning_rate": 1.0353004068159427e-05, "loss": 0.2293, "step": 15375 }, { "epoch": 50.41311475409836, "grad_norm": 3.0308358669281006, "learning_rate": 1.0351942841008185e-05, "loss": 0.1794, "step": 15376 }, { "epoch": 50.41639344262295, "grad_norm": 3.297410726547241, "learning_rate": 1.035088160988842e-05, "loss": 0.2646, "step": 15377 }, { "epoch": 50.41967213114754, "grad_norm": 4.219479560852051, "learning_rate": 1.0349820374812105e-05, "loss": 0.3092, "step": 15378 }, { "epoch": 50.42295081967213, "grad_norm": 4.535449981689453, "learning_rate": 1.0348759135791197e-05, "loss": 0.3152, "step": 15379 }, { "epoch": 50.42622950819672, "grad_norm": 3.6557013988494873, "learning_rate": 1.0347697892837671e-05, "loss": 0.3281, "step": 15380 }, { "epoch": 50.429508196721315, "grad_norm": 3.7581019401550293, "learning_rate": 1.0346636645963489e-05, "loss": 0.3337, "step": 15381 }, { "epoch": 50.4327868852459, "grad_norm": 3.4881246089935303, "learning_rate": 1.0345575395180614e-05, "loss": 0.2774, "step": 15382 }, { "epoch": 50.43606557377049, "grad_norm": 3.952481508255005, "learning_rate": 1.0344514140501023e-05, "loss": 0.1401, "step": 15383 }, { "epoch": 50.43934426229508, "grad_norm": 2.83703351020813, "learning_rate": 1.034345288193667e-05, "loss": 0.2074, "step": 15384 }, { "epoch": 50.442622950819676, "grad_norm": 2.815481185913086, "learning_rate": 1.0342391619499535e-05, "loss": 0.3622, "step": 15385 }, { "epoch": 50.445901639344264, "grad_norm": 3.222773790359497, "learning_rate": 1.0341330353201576e-05, "loss": 0.3281, "step": 15386 }, { "epoch": 50.44918032786885, "grad_norm": 5.160064697265625, "learning_rate": 1.0340269083054764e-05, "loss": 0.1541, "step": 15387 }, { "epoch": 50.45245901639344, "grad_norm": 3.0434458255767822, "learning_rate": 1.0339207809071062e-05, "loss": 0.2984, "step": 15388 }, { "epoch": 50.455737704918036, "grad_norm": 3.3913309574127197, "learning_rate": 1.033814653126244e-05, "loss": 0.4594, "step": 15389 }, { "epoch": 50.459016393442624, "grad_norm": 3.8071255683898926, "learning_rate": 1.0337085249640864e-05, "loss": 0.2928, "step": 15390 }, { "epoch": 50.46229508196721, "grad_norm": 3.5067245960235596, "learning_rate": 1.0336023964218302e-05, "loss": 0.3284, "step": 15391 }, { "epoch": 50.4655737704918, "grad_norm": 3.507549524307251, "learning_rate": 1.0334962675006722e-05, "loss": 0.2766, "step": 15392 }, { "epoch": 50.4688524590164, "grad_norm": 3.537442207336426, "learning_rate": 1.0333901382018085e-05, "loss": 0.2112, "step": 15393 }, { "epoch": 50.472131147540985, "grad_norm": 3.1671595573425293, "learning_rate": 1.0332840085264366e-05, "loss": 0.3014, "step": 15394 }, { "epoch": 50.47540983606557, "grad_norm": 3.192664623260498, "learning_rate": 1.033177878475753e-05, "loss": 0.0953, "step": 15395 }, { "epoch": 50.47868852459016, "grad_norm": 3.154088020324707, "learning_rate": 1.0330717480509539e-05, "loss": 0.2371, "step": 15396 }, { "epoch": 50.48196721311476, "grad_norm": 4.027585983276367, "learning_rate": 1.0329656172532368e-05, "loss": 0.3132, "step": 15397 }, { "epoch": 50.485245901639345, "grad_norm": 3.3909168243408203, "learning_rate": 1.032859486083798e-05, "loss": 0.227, "step": 15398 }, { "epoch": 50.488524590163934, "grad_norm": 3.7790443897247314, "learning_rate": 1.032753354543834e-05, "loss": 0.3492, "step": 15399 }, { "epoch": 50.49180327868852, "grad_norm": 3.966444492340088, "learning_rate": 1.0326472226345423e-05, "loss": 0.2947, "step": 15400 }, { "epoch": 50.49508196721312, "grad_norm": 3.6902036666870117, "learning_rate": 1.0325410903571192e-05, "loss": 0.2481, "step": 15401 }, { "epoch": 50.498360655737706, "grad_norm": 4.17355489730835, "learning_rate": 1.0324349577127614e-05, "loss": 0.2178, "step": 15402 }, { "epoch": 50.501639344262294, "grad_norm": 3.1300671100616455, "learning_rate": 1.0323288247026656e-05, "loss": 0.2715, "step": 15403 }, { "epoch": 50.50491803278688, "grad_norm": 3.3392210006713867, "learning_rate": 1.0322226913280289e-05, "loss": 0.2718, "step": 15404 }, { "epoch": 50.50819672131148, "grad_norm": 2.683009386062622, "learning_rate": 1.0321165575900479e-05, "loss": 0.0625, "step": 15405 }, { "epoch": 50.511475409836066, "grad_norm": 3.424313545227051, "learning_rate": 1.0320104234899191e-05, "loss": 0.3029, "step": 15406 }, { "epoch": 50.514754098360655, "grad_norm": 4.195333003997803, "learning_rate": 1.0319042890288398e-05, "loss": 0.2331, "step": 15407 }, { "epoch": 50.51803278688524, "grad_norm": 4.110692977905273, "learning_rate": 1.031798154208006e-05, "loss": 0.3028, "step": 15408 }, { "epoch": 50.52131147540984, "grad_norm": 3.0686709880828857, "learning_rate": 1.0316920190286152e-05, "loss": 0.289, "step": 15409 }, { "epoch": 50.52459016393443, "grad_norm": 3.170793294906616, "learning_rate": 1.0315858834918639e-05, "loss": 0.1813, "step": 15410 }, { "epoch": 50.527868852459015, "grad_norm": 4.012045860290527, "learning_rate": 1.031479747598949e-05, "loss": 0.2215, "step": 15411 }, { "epoch": 50.5311475409836, "grad_norm": 3.9302029609680176, "learning_rate": 1.0313736113510671e-05, "loss": 0.3058, "step": 15412 }, { "epoch": 50.5344262295082, "grad_norm": 3.18778395652771, "learning_rate": 1.0312674747494154e-05, "loss": 0.137, "step": 15413 }, { "epoch": 50.53770491803279, "grad_norm": 2.4552600383758545, "learning_rate": 1.0311613377951898e-05, "loss": 0.1234, "step": 15414 }, { "epoch": 50.540983606557376, "grad_norm": 3.4956843852996826, "learning_rate": 1.0310552004895884e-05, "loss": 0.1393, "step": 15415 }, { "epoch": 50.544262295081964, "grad_norm": 2.5793678760528564, "learning_rate": 1.030949062833807e-05, "loss": 0.2057, "step": 15416 }, { "epoch": 50.54754098360656, "grad_norm": 3.5375142097473145, "learning_rate": 1.0308429248290428e-05, "loss": 0.317, "step": 15417 }, { "epoch": 50.55081967213115, "grad_norm": 3.1007308959960938, "learning_rate": 1.0307367864764925e-05, "loss": 0.1416, "step": 15418 }, { "epoch": 50.554098360655736, "grad_norm": 3.07917857170105, "learning_rate": 1.0306306477773531e-05, "loss": 0.3205, "step": 15419 }, { "epoch": 50.557377049180324, "grad_norm": 3.8185977935791016, "learning_rate": 1.0305245087328213e-05, "loss": 0.1923, "step": 15420 }, { "epoch": 50.56065573770492, "grad_norm": 3.144355058670044, "learning_rate": 1.0304183693440936e-05, "loss": 0.3022, "step": 15421 }, { "epoch": 50.56393442622951, "grad_norm": 3.2804083824157715, "learning_rate": 1.0303122296123675e-05, "loss": 0.2497, "step": 15422 }, { "epoch": 50.5672131147541, "grad_norm": 3.494978189468384, "learning_rate": 1.0302060895388393e-05, "loss": 0.2344, "step": 15423 }, { "epoch": 50.570491803278685, "grad_norm": 3.333941698074341, "learning_rate": 1.0300999491247058e-05, "loss": 0.1418, "step": 15424 }, { "epoch": 50.57377049180328, "grad_norm": 3.375847101211548, "learning_rate": 1.0299938083711646e-05, "loss": 0.3384, "step": 15425 }, { "epoch": 50.57704918032787, "grad_norm": 3.3988425731658936, "learning_rate": 1.0298876672794118e-05, "loss": 0.1698, "step": 15426 }, { "epoch": 50.58032786885246, "grad_norm": 3.515885591506958, "learning_rate": 1.0297815258506444e-05, "loss": 0.4257, "step": 15427 }, { "epoch": 50.58360655737705, "grad_norm": 3.718949317932129, "learning_rate": 1.0296753840860595e-05, "loss": 0.341, "step": 15428 }, { "epoch": 50.58688524590164, "grad_norm": 3.7577621936798096, "learning_rate": 1.0295692419868537e-05, "loss": 0.1897, "step": 15429 }, { "epoch": 50.59016393442623, "grad_norm": 3.0915579795837402, "learning_rate": 1.0294630995542241e-05, "loss": 0.2851, "step": 15430 }, { "epoch": 50.59344262295082, "grad_norm": 4.015690326690674, "learning_rate": 1.0293569567893673e-05, "loss": 0.3096, "step": 15431 }, { "epoch": 50.59672131147541, "grad_norm": 3.4227023124694824, "learning_rate": 1.0292508136934803e-05, "loss": 0.3163, "step": 15432 }, { "epoch": 50.6, "grad_norm": 3.842120885848999, "learning_rate": 1.0291446702677598e-05, "loss": 0.5348, "step": 15433 }, { "epoch": 50.60327868852459, "grad_norm": 2.490241050720215, "learning_rate": 1.029038526513403e-05, "loss": 0.1106, "step": 15434 }, { "epoch": 50.60655737704918, "grad_norm": 3.7370359897613525, "learning_rate": 1.0289323824316067e-05, "loss": 0.3125, "step": 15435 }, { "epoch": 50.609836065573774, "grad_norm": 4.38801383972168, "learning_rate": 1.0288262380235674e-05, "loss": 0.2817, "step": 15436 }, { "epoch": 50.61311475409836, "grad_norm": 3.157299518585205, "learning_rate": 1.0287200932904826e-05, "loss": 0.1684, "step": 15437 }, { "epoch": 50.61639344262295, "grad_norm": 2.885873794555664, "learning_rate": 1.0286139482335486e-05, "loss": 0.3019, "step": 15438 }, { "epoch": 50.61967213114754, "grad_norm": 3.498471260070801, "learning_rate": 1.028507802853963e-05, "loss": 0.28, "step": 15439 }, { "epoch": 50.622950819672134, "grad_norm": 3.2744410037994385, "learning_rate": 1.0284016571529219e-05, "loss": 0.1163, "step": 15440 }, { "epoch": 50.62622950819672, "grad_norm": 3.1650705337524414, "learning_rate": 1.0282955111316228e-05, "loss": 0.2571, "step": 15441 }, { "epoch": 50.62950819672131, "grad_norm": 2.4755477905273438, "learning_rate": 1.0281893647912621e-05, "loss": 0.1261, "step": 15442 }, { "epoch": 50.6327868852459, "grad_norm": 3.5434675216674805, "learning_rate": 1.0280832181330376e-05, "loss": 0.3575, "step": 15443 }, { "epoch": 50.636065573770495, "grad_norm": 3.3718340396881104, "learning_rate": 1.0279770711581451e-05, "loss": 0.3635, "step": 15444 }, { "epoch": 50.63934426229508, "grad_norm": 3.693665027618408, "learning_rate": 1.0278709238677821e-05, "loss": 0.2967, "step": 15445 }, { "epoch": 50.64262295081967, "grad_norm": 3.66032338142395, "learning_rate": 1.0277647762631457e-05, "loss": 0.271, "step": 15446 }, { "epoch": 50.64590163934426, "grad_norm": 4.007794380187988, "learning_rate": 1.0276586283454323e-05, "loss": 0.3589, "step": 15447 }, { "epoch": 50.649180327868855, "grad_norm": 3.14707088470459, "learning_rate": 1.0275524801158394e-05, "loss": 0.2295, "step": 15448 }, { "epoch": 50.65245901639344, "grad_norm": 2.443492889404297, "learning_rate": 1.0274463315755634e-05, "loss": 0.0848, "step": 15449 }, { "epoch": 50.65573770491803, "grad_norm": 3.2500405311584473, "learning_rate": 1.0273401827258014e-05, "loss": 0.2239, "step": 15450 }, { "epoch": 50.65901639344262, "grad_norm": 3.354790687561035, "learning_rate": 1.0272340335677506e-05, "loss": 0.2476, "step": 15451 }, { "epoch": 50.662295081967216, "grad_norm": 3.573174238204956, "learning_rate": 1.0271278841026075e-05, "loss": 0.3615, "step": 15452 }, { "epoch": 50.665573770491804, "grad_norm": 4.709749698638916, "learning_rate": 1.0270217343315696e-05, "loss": 0.3397, "step": 15453 }, { "epoch": 50.66885245901639, "grad_norm": 4.685684680938721, "learning_rate": 1.0269155842558333e-05, "loss": 0.269, "step": 15454 }, { "epoch": 50.67213114754098, "grad_norm": 3.1719138622283936, "learning_rate": 1.026809433876596e-05, "loss": 0.3035, "step": 15455 }, { "epoch": 50.675409836065576, "grad_norm": 3.18273663520813, "learning_rate": 1.0267032831950546e-05, "loss": 0.1692, "step": 15456 }, { "epoch": 50.678688524590164, "grad_norm": 3.224485397338867, "learning_rate": 1.0265971322124057e-05, "loss": 0.29, "step": 15457 }, { "epoch": 50.68196721311475, "grad_norm": 3.5185508728027344, "learning_rate": 1.0264909809298465e-05, "loss": 0.2699, "step": 15458 }, { "epoch": 50.68524590163934, "grad_norm": 3.443150758743286, "learning_rate": 1.0263848293485738e-05, "loss": 0.2417, "step": 15459 }, { "epoch": 50.68852459016394, "grad_norm": 3.9962785243988037, "learning_rate": 1.0262786774697847e-05, "loss": 0.2852, "step": 15460 }, { "epoch": 50.691803278688525, "grad_norm": 3.452636241912842, "learning_rate": 1.0261725252946766e-05, "loss": 0.209, "step": 15461 }, { "epoch": 50.69508196721311, "grad_norm": 3.4117050170898438, "learning_rate": 1.0260663728244458e-05, "loss": 0.3988, "step": 15462 }, { "epoch": 50.6983606557377, "grad_norm": 4.1428542137146, "learning_rate": 1.0259602200602896e-05, "loss": 0.2753, "step": 15463 }, { "epoch": 50.7016393442623, "grad_norm": 3.313666582107544, "learning_rate": 1.0258540670034047e-05, "loss": 0.1232, "step": 15464 }, { "epoch": 50.704918032786885, "grad_norm": 5.878079891204834, "learning_rate": 1.0257479136549889e-05, "loss": 0.2021, "step": 15465 }, { "epoch": 50.708196721311474, "grad_norm": 3.398019790649414, "learning_rate": 1.0256417600162378e-05, "loss": 0.2796, "step": 15466 }, { "epoch": 50.71147540983607, "grad_norm": 4.581136703491211, "learning_rate": 1.02553560608835e-05, "loss": 0.3059, "step": 15467 }, { "epoch": 50.71475409836066, "grad_norm": 2.760284900665283, "learning_rate": 1.025429451872521e-05, "loss": 0.1119, "step": 15468 }, { "epoch": 50.718032786885246, "grad_norm": 3.0151658058166504, "learning_rate": 1.0253232973699489e-05, "loss": 0.1908, "step": 15469 }, { "epoch": 50.721311475409834, "grad_norm": 2.9061434268951416, "learning_rate": 1.0252171425818303e-05, "loss": 0.2735, "step": 15470 }, { "epoch": 50.72459016393443, "grad_norm": 3.6186699867248535, "learning_rate": 1.025110987509362e-05, "loss": 0.2746, "step": 15471 }, { "epoch": 50.72786885245902, "grad_norm": 4.460209846496582, "learning_rate": 1.0250048321537413e-05, "loss": 0.4227, "step": 15472 }, { "epoch": 50.731147540983606, "grad_norm": 3.379495859146118, "learning_rate": 1.0248986765161651e-05, "loss": 0.1608, "step": 15473 }, { "epoch": 50.734426229508195, "grad_norm": 6.498770713806152, "learning_rate": 1.0247925205978305e-05, "loss": 0.2583, "step": 15474 }, { "epoch": 50.73770491803279, "grad_norm": 3.482027292251587, "learning_rate": 1.0246863643999343e-05, "loss": 0.4482, "step": 15475 }, { "epoch": 50.74098360655738, "grad_norm": 3.5165295600891113, "learning_rate": 1.0245802079236737e-05, "loss": 0.1688, "step": 15476 }, { "epoch": 50.74426229508197, "grad_norm": 3.419116973876953, "learning_rate": 1.0244740511702458e-05, "loss": 0.2687, "step": 15477 }, { "epoch": 50.747540983606555, "grad_norm": 3.466585159301758, "learning_rate": 1.0243678941408475e-05, "loss": 0.2989, "step": 15478 }, { "epoch": 50.75081967213115, "grad_norm": 3.480137348175049, "learning_rate": 1.0242617368366759e-05, "loss": 0.2438, "step": 15479 }, { "epoch": 50.75409836065574, "grad_norm": 4.281168460845947, "learning_rate": 1.0241555792589277e-05, "loss": 0.2551, "step": 15480 }, { "epoch": 50.75737704918033, "grad_norm": 3.631918430328369, "learning_rate": 1.0240494214088004e-05, "loss": 0.3088, "step": 15481 }, { "epoch": 50.760655737704916, "grad_norm": 3.0994746685028076, "learning_rate": 1.023943263287491e-05, "loss": 0.2055, "step": 15482 }, { "epoch": 50.76393442622951, "grad_norm": 3.1351680755615234, "learning_rate": 1.0238371048961966e-05, "loss": 0.3373, "step": 15483 }, { "epoch": 50.7672131147541, "grad_norm": 3.6137359142303467, "learning_rate": 1.0237309462361136e-05, "loss": 0.3167, "step": 15484 }, { "epoch": 50.77049180327869, "grad_norm": 4.540746688842773, "learning_rate": 1.0236247873084397e-05, "loss": 0.5736, "step": 15485 }, { "epoch": 50.773770491803276, "grad_norm": 4.091405391693115, "learning_rate": 1.0235186281143718e-05, "loss": 0.4775, "step": 15486 }, { "epoch": 50.77704918032787, "grad_norm": 3.515021562576294, "learning_rate": 1.023412468655107e-05, "loss": 0.2758, "step": 15487 }, { "epoch": 50.78032786885246, "grad_norm": 4.5966033935546875, "learning_rate": 1.023306308931842e-05, "loss": 0.3504, "step": 15488 }, { "epoch": 50.78360655737705, "grad_norm": 4.53684663772583, "learning_rate": 1.0232001489457744e-05, "loss": 0.3254, "step": 15489 }, { "epoch": 50.78688524590164, "grad_norm": 3.5273454189300537, "learning_rate": 1.0230939886981009e-05, "loss": 0.2545, "step": 15490 }, { "epoch": 50.79016393442623, "grad_norm": 3.062838077545166, "learning_rate": 1.0229878281900188e-05, "loss": 0.3396, "step": 15491 }, { "epoch": 50.79344262295082, "grad_norm": 4.002569198608398, "learning_rate": 1.0228816674227247e-05, "loss": 0.1446, "step": 15492 }, { "epoch": 50.79672131147541, "grad_norm": 3.656325101852417, "learning_rate": 1.0227755063974163e-05, "loss": 0.214, "step": 15493 }, { "epoch": 50.8, "grad_norm": 4.024080276489258, "learning_rate": 1.02266934511529e-05, "loss": 0.2545, "step": 15494 }, { "epoch": 50.80327868852459, "grad_norm": 4.568101406097412, "learning_rate": 1.0225631835775438e-05, "loss": 0.3793, "step": 15495 }, { "epoch": 50.80655737704918, "grad_norm": 3.3494226932525635, "learning_rate": 1.022457021785374e-05, "loss": 0.3287, "step": 15496 }, { "epoch": 50.80983606557377, "grad_norm": 3.319185256958008, "learning_rate": 1.0223508597399778e-05, "loss": 0.3474, "step": 15497 }, { "epoch": 50.81311475409836, "grad_norm": 3.5694596767425537, "learning_rate": 1.0222446974425528e-05, "loss": 0.4241, "step": 15498 }, { "epoch": 50.81639344262295, "grad_norm": 4.442532539367676, "learning_rate": 1.0221385348942953e-05, "loss": 0.2378, "step": 15499 }, { "epoch": 50.81967213114754, "grad_norm": 3.603926420211792, "learning_rate": 1.022032372096403e-05, "loss": 0.264, "step": 15500 }, { "epoch": 50.82295081967213, "grad_norm": 2.6730387210845947, "learning_rate": 1.0219262090500725e-05, "loss": 0.3629, "step": 15501 }, { "epoch": 50.82622950819672, "grad_norm": 3.454512119293213, "learning_rate": 1.0218200457565018e-05, "loss": 0.2298, "step": 15502 }, { "epoch": 50.829508196721314, "grad_norm": 3.846204996109009, "learning_rate": 1.0217138822168869e-05, "loss": 0.5199, "step": 15503 }, { "epoch": 50.8327868852459, "grad_norm": 3.4645791053771973, "learning_rate": 1.0216077184324255e-05, "loss": 0.157, "step": 15504 }, { "epoch": 50.83606557377049, "grad_norm": 3.573223352432251, "learning_rate": 1.0215015544043145e-05, "loss": 0.193, "step": 15505 }, { "epoch": 50.83934426229508, "grad_norm": 3.699798107147217, "learning_rate": 1.0213953901337512e-05, "loss": 0.1681, "step": 15506 }, { "epoch": 50.842622950819674, "grad_norm": 4.8427863121032715, "learning_rate": 1.0212892256219328e-05, "loss": 0.2754, "step": 15507 }, { "epoch": 50.84590163934426, "grad_norm": 3.73370623588562, "learning_rate": 1.0211830608700561e-05, "loss": 0.3426, "step": 15508 }, { "epoch": 50.84918032786885, "grad_norm": 3.631962537765503, "learning_rate": 1.0210768958793184e-05, "loss": 0.3386, "step": 15509 }, { "epoch": 50.85245901639344, "grad_norm": 3.282344102859497, "learning_rate": 1.020970730650917e-05, "loss": 0.2338, "step": 15510 }, { "epoch": 50.855737704918035, "grad_norm": 3.3940696716308594, "learning_rate": 1.0208645651860485e-05, "loss": 0.2576, "step": 15511 }, { "epoch": 50.85901639344262, "grad_norm": 3.73001766204834, "learning_rate": 1.0207583994859103e-05, "loss": 0.2064, "step": 15512 }, { "epoch": 50.86229508196721, "grad_norm": 3.683917999267578, "learning_rate": 1.0206522335516999e-05, "loss": 0.317, "step": 15513 }, { "epoch": 50.86557377049181, "grad_norm": 3.7491118907928467, "learning_rate": 1.0205460673846138e-05, "loss": 0.3201, "step": 15514 }, { "epoch": 50.868852459016395, "grad_norm": 4.56528902053833, "learning_rate": 1.0204399009858495e-05, "loss": 0.2501, "step": 15515 }, { "epoch": 50.87213114754098, "grad_norm": 2.9439468383789062, "learning_rate": 1.020333734356604e-05, "loss": 0.1665, "step": 15516 }, { "epoch": 50.87540983606557, "grad_norm": 3.362788438796997, "learning_rate": 1.0202275674980746e-05, "loss": 0.2454, "step": 15517 }, { "epoch": 50.87868852459017, "grad_norm": 3.868919610977173, "learning_rate": 1.0201214004114584e-05, "loss": 0.2935, "step": 15518 }, { "epoch": 50.881967213114756, "grad_norm": 3.0987303256988525, "learning_rate": 1.0200152330979525e-05, "loss": 0.2346, "step": 15519 }, { "epoch": 50.885245901639344, "grad_norm": 3.7607381343841553, "learning_rate": 1.0199090655587538e-05, "loss": 0.1713, "step": 15520 }, { "epoch": 50.88852459016393, "grad_norm": 4.021154880523682, "learning_rate": 1.01980289779506e-05, "loss": 0.4546, "step": 15521 }, { "epoch": 50.89180327868853, "grad_norm": 3.7210819721221924, "learning_rate": 1.0196967298080678e-05, "loss": 0.3291, "step": 15522 }, { "epoch": 50.895081967213116, "grad_norm": 2.359043836593628, "learning_rate": 1.0195905615989743e-05, "loss": 0.2858, "step": 15523 }, { "epoch": 50.898360655737704, "grad_norm": 3.1115000247955322, "learning_rate": 1.0194843931689772e-05, "loss": 0.206, "step": 15524 }, { "epoch": 50.90163934426229, "grad_norm": 2.858214855194092, "learning_rate": 1.019378224519273e-05, "loss": 0.2307, "step": 15525 }, { "epoch": 50.90491803278689, "grad_norm": 3.443843126296997, "learning_rate": 1.0192720556510593e-05, "loss": 0.287, "step": 15526 }, { "epoch": 50.90819672131148, "grad_norm": 3.633676052093506, "learning_rate": 1.0191658865655332e-05, "loss": 0.2734, "step": 15527 }, { "epoch": 50.911475409836065, "grad_norm": 3.2077667713165283, "learning_rate": 1.0190597172638918e-05, "loss": 0.2457, "step": 15528 }, { "epoch": 50.91475409836065, "grad_norm": 3.3971474170684814, "learning_rate": 1.018953547747332e-05, "loss": 0.1494, "step": 15529 }, { "epoch": 50.91803278688525, "grad_norm": 3.486420154571533, "learning_rate": 1.0188473780170516e-05, "loss": 0.2085, "step": 15530 }, { "epoch": 50.92131147540984, "grad_norm": 3.818272829055786, "learning_rate": 1.0187412080742472e-05, "loss": 0.2989, "step": 15531 }, { "epoch": 50.924590163934425, "grad_norm": 3.7163567543029785, "learning_rate": 1.0186350379201163e-05, "loss": 0.2966, "step": 15532 }, { "epoch": 50.927868852459014, "grad_norm": 4.003514289855957, "learning_rate": 1.0185288675558558e-05, "loss": 0.2929, "step": 15533 }, { "epoch": 50.93114754098361, "grad_norm": 4.276027202606201, "learning_rate": 1.0184226969826632e-05, "loss": 0.2301, "step": 15534 }, { "epoch": 50.9344262295082, "grad_norm": 3.451744556427002, "learning_rate": 1.0183165262017355e-05, "loss": 0.4008, "step": 15535 }, { "epoch": 50.937704918032786, "grad_norm": 4.516457557678223, "learning_rate": 1.01821035521427e-05, "loss": 0.255, "step": 15536 }, { "epoch": 50.940983606557374, "grad_norm": 3.7482798099517822, "learning_rate": 1.0181041840214639e-05, "loss": 0.292, "step": 15537 }, { "epoch": 50.94426229508197, "grad_norm": 2.7492542266845703, "learning_rate": 1.0179980126245139e-05, "loss": 0.1841, "step": 15538 }, { "epoch": 50.94754098360656, "grad_norm": 3.5773417949676514, "learning_rate": 1.0178918410246179e-05, "loss": 0.1995, "step": 15539 }, { "epoch": 50.950819672131146, "grad_norm": 4.274165630340576, "learning_rate": 1.0177856692229727e-05, "loss": 0.2347, "step": 15540 }, { "epoch": 50.954098360655735, "grad_norm": 2.9688773155212402, "learning_rate": 1.0176794972207758e-05, "loss": 0.1875, "step": 15541 }, { "epoch": 50.95737704918033, "grad_norm": 3.568427324295044, "learning_rate": 1.0175733250192238e-05, "loss": 0.3241, "step": 15542 }, { "epoch": 50.96065573770492, "grad_norm": 4.315120220184326, "learning_rate": 1.0174671526195146e-05, "loss": 0.289, "step": 15543 }, { "epoch": 50.96393442622951, "grad_norm": 5.702018737792969, "learning_rate": 1.017360980022845e-05, "loss": 0.4136, "step": 15544 }, { "epoch": 50.967213114754095, "grad_norm": 3.1608023643493652, "learning_rate": 1.0172548072304122e-05, "loss": 0.4741, "step": 15545 }, { "epoch": 50.97049180327869, "grad_norm": 3.3308770656585693, "learning_rate": 1.0171486342434135e-05, "loss": 0.2152, "step": 15546 }, { "epoch": 50.97377049180328, "grad_norm": 3.2867794036865234, "learning_rate": 1.0170424610630465e-05, "loss": 0.4165, "step": 15547 }, { "epoch": 50.97704918032787, "grad_norm": 3.146864652633667, "learning_rate": 1.0169362876905078e-05, "loss": 0.4321, "step": 15548 }, { "epoch": 50.980327868852456, "grad_norm": 2.815500497817993, "learning_rate": 1.0168301141269947e-05, "loss": 0.2374, "step": 15549 }, { "epoch": 50.98360655737705, "grad_norm": 3.677781581878662, "learning_rate": 1.0167239403737048e-05, "loss": 0.316, "step": 15550 }, { "epoch": 50.98688524590164, "grad_norm": 2.870697021484375, "learning_rate": 1.0166177664318348e-05, "loss": 0.1339, "step": 15551 }, { "epoch": 50.99016393442623, "grad_norm": 3.18167781829834, "learning_rate": 1.0165115923025824e-05, "loss": 0.1814, "step": 15552 }, { "epoch": 50.993442622950816, "grad_norm": 3.2490952014923096, "learning_rate": 1.0164054179871445e-05, "loss": 0.223, "step": 15553 }, { "epoch": 50.99672131147541, "grad_norm": 2.9992928504943848, "learning_rate": 1.0162992434867187e-05, "loss": 0.2238, "step": 15554 }, { "epoch": 51.0, "grad_norm": 3.1157500743865967, "learning_rate": 1.0161930688025018e-05, "loss": 0.2299, "step": 15555 }, { "epoch": 51.00327868852459, "grad_norm": 3.0822906494140625, "learning_rate": 1.0160868939356911e-05, "loss": 0.2641, "step": 15556 }, { "epoch": 51.006557377049184, "grad_norm": 3.3682427406311035, "learning_rate": 1.015980718887484e-05, "loss": 0.2771, "step": 15557 }, { "epoch": 51.00983606557377, "grad_norm": 4.372849464416504, "learning_rate": 1.015874543659078e-05, "loss": 0.2384, "step": 15558 }, { "epoch": 51.01311475409836, "grad_norm": 10.727498054504395, "learning_rate": 1.0157683682516695e-05, "loss": 0.249, "step": 15559 }, { "epoch": 51.01639344262295, "grad_norm": 3.5087435245513916, "learning_rate": 1.0156621926664566e-05, "loss": 0.3546, "step": 15560 }, { "epoch": 51.019672131147544, "grad_norm": 3.1735541820526123, "learning_rate": 1.0155560169046362e-05, "loss": 0.3194, "step": 15561 }, { "epoch": 51.02295081967213, "grad_norm": 3.2322957515716553, "learning_rate": 1.0154498409674051e-05, "loss": 0.1955, "step": 15562 }, { "epoch": 51.02622950819672, "grad_norm": 3.3216068744659424, "learning_rate": 1.0153436648559616e-05, "loss": 0.2848, "step": 15563 }, { "epoch": 51.02950819672131, "grad_norm": 3.300720453262329, "learning_rate": 1.015237488571502e-05, "loss": 0.3468, "step": 15564 }, { "epoch": 51.032786885245905, "grad_norm": 2.9744482040405273, "learning_rate": 1.015131312115224e-05, "loss": 0.3506, "step": 15565 }, { "epoch": 51.03606557377049, "grad_norm": 3.359158992767334, "learning_rate": 1.0150251354883245e-05, "loss": 0.3541, "step": 15566 }, { "epoch": 51.03934426229508, "grad_norm": 2.978828191757202, "learning_rate": 1.0149189586920011e-05, "loss": 0.3525, "step": 15567 }, { "epoch": 51.04262295081967, "grad_norm": 6.715365886688232, "learning_rate": 1.0148127817274509e-05, "loss": 0.1934, "step": 15568 }, { "epoch": 51.045901639344265, "grad_norm": 2.9649624824523926, "learning_rate": 1.0147066045958714e-05, "loss": 0.2468, "step": 15569 }, { "epoch": 51.049180327868854, "grad_norm": 5.391648292541504, "learning_rate": 1.0146004272984594e-05, "loss": 0.1755, "step": 15570 }, { "epoch": 51.05245901639344, "grad_norm": 3.3284378051757812, "learning_rate": 1.0144942498364125e-05, "loss": 0.2103, "step": 15571 }, { "epoch": 51.05573770491803, "grad_norm": 3.183105945587158, "learning_rate": 1.0143880722109279e-05, "loss": 0.1407, "step": 15572 }, { "epoch": 51.059016393442626, "grad_norm": 2.975393772125244, "learning_rate": 1.0142818944232028e-05, "loss": 0.1878, "step": 15573 }, { "epoch": 51.062295081967214, "grad_norm": 3.950899600982666, "learning_rate": 1.0141757164744346e-05, "loss": 0.2078, "step": 15574 }, { "epoch": 51.0655737704918, "grad_norm": 4.0596604347229, "learning_rate": 1.01406953836582e-05, "loss": 0.3167, "step": 15575 }, { "epoch": 51.06885245901639, "grad_norm": 3.1420578956604004, "learning_rate": 1.0139633600985572e-05, "loss": 0.2031, "step": 15576 }, { "epoch": 51.072131147540986, "grad_norm": 3.0398120880126953, "learning_rate": 1.013857181673843e-05, "loss": 0.1709, "step": 15577 }, { "epoch": 51.075409836065575, "grad_norm": 2.835099220275879, "learning_rate": 1.0137510030928745e-05, "loss": 0.1077, "step": 15578 }, { "epoch": 51.07868852459016, "grad_norm": 3.748176336288452, "learning_rate": 1.0136448243568491e-05, "loss": 0.2834, "step": 15579 }, { "epoch": 51.08196721311475, "grad_norm": 3.666748523712158, "learning_rate": 1.0135386454669642e-05, "loss": 0.2957, "step": 15580 }, { "epoch": 51.08524590163935, "grad_norm": 3.67049241065979, "learning_rate": 1.0134324664244168e-05, "loss": 0.1944, "step": 15581 }, { "epoch": 51.088524590163935, "grad_norm": 2.940335512161255, "learning_rate": 1.0133262872304048e-05, "loss": 0.2664, "step": 15582 }, { "epoch": 51.09180327868852, "grad_norm": 4.070346832275391, "learning_rate": 1.0132201078861246e-05, "loss": 0.2991, "step": 15583 }, { "epoch": 51.09508196721311, "grad_norm": 2.752413272857666, "learning_rate": 1.0131139283927743e-05, "loss": 0.274, "step": 15584 }, { "epoch": 51.09836065573771, "grad_norm": 4.577966690063477, "learning_rate": 1.0130077487515503e-05, "loss": 0.2527, "step": 15585 }, { "epoch": 51.101639344262296, "grad_norm": 3.6058013439178467, "learning_rate": 1.012901568963651e-05, "loss": 0.4501, "step": 15586 }, { "epoch": 51.104918032786884, "grad_norm": 5.2638750076293945, "learning_rate": 1.012795389030273e-05, "loss": 0.1518, "step": 15587 }, { "epoch": 51.10819672131147, "grad_norm": 3.582930088043213, "learning_rate": 1.0126892089526132e-05, "loss": 0.2132, "step": 15588 }, { "epoch": 51.11147540983607, "grad_norm": 2.825679302215576, "learning_rate": 1.01258302873187e-05, "loss": 0.2345, "step": 15589 }, { "epoch": 51.114754098360656, "grad_norm": 3.3930158615112305, "learning_rate": 1.0124768483692395e-05, "loss": 0.3176, "step": 15590 }, { "epoch": 51.118032786885244, "grad_norm": 3.2553985118865967, "learning_rate": 1.01237066786592e-05, "loss": 0.1244, "step": 15591 }, { "epoch": 51.12131147540983, "grad_norm": 3.59224009513855, "learning_rate": 1.0122644872231082e-05, "loss": 0.339, "step": 15592 }, { "epoch": 51.12459016393443, "grad_norm": 3.492514133453369, "learning_rate": 1.0121583064420015e-05, "loss": 0.2276, "step": 15593 }, { "epoch": 51.12786885245902, "grad_norm": 3.4029741287231445, "learning_rate": 1.0120521255237974e-05, "loss": 0.2178, "step": 15594 }, { "epoch": 51.131147540983605, "grad_norm": 3.1360812187194824, "learning_rate": 1.011945944469693e-05, "loss": 0.3032, "step": 15595 }, { "epoch": 51.13442622950819, "grad_norm": 2.9006314277648926, "learning_rate": 1.0118397632808857e-05, "loss": 0.2501, "step": 15596 }, { "epoch": 51.13770491803279, "grad_norm": 3.6191909313201904, "learning_rate": 1.0117335819585725e-05, "loss": 0.2373, "step": 15597 }, { "epoch": 51.14098360655738, "grad_norm": 3.3480215072631836, "learning_rate": 1.0116274005039513e-05, "loss": 0.1584, "step": 15598 }, { "epoch": 51.144262295081965, "grad_norm": 2.8252384662628174, "learning_rate": 1.0115212189182189e-05, "loss": 0.1602, "step": 15599 }, { "epoch": 51.14754098360656, "grad_norm": 3.748948335647583, "learning_rate": 1.0114150372025729e-05, "loss": 0.1956, "step": 15600 }, { "epoch": 51.15081967213115, "grad_norm": 4.081267833709717, "learning_rate": 1.0113088553582101e-05, "loss": 0.3108, "step": 15601 }, { "epoch": 51.15409836065574, "grad_norm": 3.5212199687957764, "learning_rate": 1.0112026733863288e-05, "loss": 0.3427, "step": 15602 }, { "epoch": 51.157377049180326, "grad_norm": 3.593971014022827, "learning_rate": 1.0110964912881251e-05, "loss": 0.163, "step": 15603 }, { "epoch": 51.16065573770492, "grad_norm": 3.1260809898376465, "learning_rate": 1.0109903090647975e-05, "loss": 0.1944, "step": 15604 }, { "epoch": 51.16393442622951, "grad_norm": 3.41131854057312, "learning_rate": 1.0108841267175423e-05, "loss": 0.1971, "step": 15605 }, { "epoch": 51.1672131147541, "grad_norm": 3.361309766769409, "learning_rate": 1.0107779442475576e-05, "loss": 0.2538, "step": 15606 }, { "epoch": 51.170491803278686, "grad_norm": 3.824310779571533, "learning_rate": 1.01067176165604e-05, "loss": 0.1451, "step": 15607 }, { "epoch": 51.17377049180328, "grad_norm": 3.140540838241577, "learning_rate": 1.0105655789441875e-05, "loss": 0.3587, "step": 15608 }, { "epoch": 51.17704918032787, "grad_norm": 4.081319808959961, "learning_rate": 1.0104593961131967e-05, "loss": 0.3053, "step": 15609 }, { "epoch": 51.18032786885246, "grad_norm": 3.6502480506896973, "learning_rate": 1.0103532131642659e-05, "loss": 0.1637, "step": 15610 }, { "epoch": 51.18360655737705, "grad_norm": 3.2347171306610107, "learning_rate": 1.0102470300985914e-05, "loss": 0.1335, "step": 15611 }, { "epoch": 51.18688524590164, "grad_norm": 3.225055456161499, "learning_rate": 1.0101408469173713e-05, "loss": 0.4011, "step": 15612 }, { "epoch": 51.19016393442623, "grad_norm": 3.1694154739379883, "learning_rate": 1.0100346636218024e-05, "loss": 0.4052, "step": 15613 }, { "epoch": 51.19344262295082, "grad_norm": 2.8874363899230957, "learning_rate": 1.0099284802130822e-05, "loss": 0.1958, "step": 15614 }, { "epoch": 51.19672131147541, "grad_norm": 2.9138896465301514, "learning_rate": 1.0098222966924082e-05, "loss": 0.3448, "step": 15615 }, { "epoch": 51.2, "grad_norm": 3.6076221466064453, "learning_rate": 1.0097161130609774e-05, "loss": 0.2779, "step": 15616 }, { "epoch": 51.20327868852459, "grad_norm": 3.6118059158325195, "learning_rate": 1.0096099293199877e-05, "loss": 0.4707, "step": 15617 }, { "epoch": 51.20655737704918, "grad_norm": 3.54677677154541, "learning_rate": 1.0095037454706356e-05, "loss": 0.4802, "step": 15618 }, { "epoch": 51.20983606557377, "grad_norm": 3.2968249320983887, "learning_rate": 1.0093975615141193e-05, "loss": 0.1496, "step": 15619 }, { "epoch": 51.21311475409836, "grad_norm": 3.225548267364502, "learning_rate": 1.0092913774516354e-05, "loss": 0.1686, "step": 15620 }, { "epoch": 51.21639344262295, "grad_norm": 3.8616316318511963, "learning_rate": 1.0091851932843819e-05, "loss": 0.3712, "step": 15621 }, { "epoch": 51.21967213114754, "grad_norm": 3.4758191108703613, "learning_rate": 1.0090790090135554e-05, "loss": 0.2141, "step": 15622 }, { "epoch": 51.22295081967213, "grad_norm": 3.7741858959198, "learning_rate": 1.0089728246403539e-05, "loss": 0.2497, "step": 15623 }, { "epoch": 51.226229508196724, "grad_norm": 3.389068365097046, "learning_rate": 1.0088666401659746e-05, "loss": 0.4729, "step": 15624 }, { "epoch": 51.22950819672131, "grad_norm": 3.6837451457977295, "learning_rate": 1.0087604555916144e-05, "loss": 0.2872, "step": 15625 }, { "epoch": 51.2327868852459, "grad_norm": 3.320563554763794, "learning_rate": 1.0086542709184712e-05, "loss": 0.3161, "step": 15626 }, { "epoch": 51.23606557377049, "grad_norm": 4.214709281921387, "learning_rate": 1.0085480861477418e-05, "loss": 0.3516, "step": 15627 }, { "epoch": 51.239344262295084, "grad_norm": 4.101557731628418, "learning_rate": 1.0084419012806242e-05, "loss": 0.3483, "step": 15628 }, { "epoch": 51.24262295081967, "grad_norm": 2.913712501525879, "learning_rate": 1.008335716318315e-05, "loss": 0.12, "step": 15629 }, { "epoch": 51.24590163934426, "grad_norm": 3.8492002487182617, "learning_rate": 1.0082295312620123e-05, "loss": 0.2845, "step": 15630 }, { "epoch": 51.24918032786885, "grad_norm": 3.4822473526000977, "learning_rate": 1.0081233461129127e-05, "loss": 0.3072, "step": 15631 }, { "epoch": 51.252459016393445, "grad_norm": 13.962657928466797, "learning_rate": 1.0080171608722143e-05, "loss": 0.1551, "step": 15632 }, { "epoch": 51.25573770491803, "grad_norm": 2.9292898178100586, "learning_rate": 1.0079109755411138e-05, "loss": 0.2314, "step": 15633 }, { "epoch": 51.25901639344262, "grad_norm": 3.3821234703063965, "learning_rate": 1.007804790120809e-05, "loss": 0.3243, "step": 15634 }, { "epoch": 51.26229508196721, "grad_norm": 4.772841930389404, "learning_rate": 1.0076986046124968e-05, "loss": 0.3822, "step": 15635 }, { "epoch": 51.265573770491805, "grad_norm": 3.504462480545044, "learning_rate": 1.0075924190173752e-05, "loss": 0.2451, "step": 15636 }, { "epoch": 51.268852459016394, "grad_norm": 3.20656156539917, "learning_rate": 1.0074862333366412e-05, "loss": 0.2485, "step": 15637 }, { "epoch": 51.27213114754098, "grad_norm": 3.462449789047241, "learning_rate": 1.0073800475714918e-05, "loss": 0.2477, "step": 15638 }, { "epoch": 51.27540983606557, "grad_norm": 3.2585256099700928, "learning_rate": 1.0072738617231248e-05, "loss": 0.2058, "step": 15639 }, { "epoch": 51.278688524590166, "grad_norm": 5.035539150238037, "learning_rate": 1.0071676757927375e-05, "loss": 0.3907, "step": 15640 }, { "epoch": 51.281967213114754, "grad_norm": 3.693685531616211, "learning_rate": 1.0070614897815273e-05, "loss": 0.4323, "step": 15641 }, { "epoch": 51.28524590163934, "grad_norm": 2.8339834213256836, "learning_rate": 1.006955303690691e-05, "loss": 0.1049, "step": 15642 }, { "epoch": 51.28852459016394, "grad_norm": 3.746713399887085, "learning_rate": 1.006849117521427e-05, "loss": 0.2291, "step": 15643 }, { "epoch": 51.291803278688526, "grad_norm": 3.4959986209869385, "learning_rate": 1.0067429312749317e-05, "loss": 0.4393, "step": 15644 }, { "epoch": 51.295081967213115, "grad_norm": 3.8507301807403564, "learning_rate": 1.0066367449524031e-05, "loss": 0.2933, "step": 15645 }, { "epoch": 51.2983606557377, "grad_norm": 3.559170961380005, "learning_rate": 1.006530558555038e-05, "loss": 0.3209, "step": 15646 }, { "epoch": 51.3016393442623, "grad_norm": 7.465240478515625, "learning_rate": 1.0064243720840343e-05, "loss": 0.1209, "step": 15647 }, { "epoch": 51.30491803278689, "grad_norm": 3.6206753253936768, "learning_rate": 1.006318185540589e-05, "loss": 0.2809, "step": 15648 }, { "epoch": 51.308196721311475, "grad_norm": 3.1708261966705322, "learning_rate": 1.0062119989258998e-05, "loss": 0.276, "step": 15649 }, { "epoch": 51.31147540983606, "grad_norm": 22.901275634765625, "learning_rate": 1.006105812241164e-05, "loss": 0.3018, "step": 15650 }, { "epoch": 51.31475409836066, "grad_norm": 3.646820068359375, "learning_rate": 1.0059996254875784e-05, "loss": 0.2077, "step": 15651 }, { "epoch": 51.31803278688525, "grad_norm": 3.4630236625671387, "learning_rate": 1.0058934386663411e-05, "loss": 0.2123, "step": 15652 }, { "epoch": 51.321311475409836, "grad_norm": 3.0399832725524902, "learning_rate": 1.0057872517786486e-05, "loss": 0.2507, "step": 15653 }, { "epoch": 51.324590163934424, "grad_norm": 3.0156309604644775, "learning_rate": 1.0056810648256994e-05, "loss": 0.251, "step": 15654 }, { "epoch": 51.32786885245902, "grad_norm": 4.997231960296631, "learning_rate": 1.00557487780869e-05, "loss": 0.4172, "step": 15655 }, { "epoch": 51.33114754098361, "grad_norm": 2.7958312034606934, "learning_rate": 1.0054686907288183e-05, "loss": 0.1728, "step": 15656 }, { "epoch": 51.334426229508196, "grad_norm": 3.6544461250305176, "learning_rate": 1.005362503587281e-05, "loss": 0.2039, "step": 15657 }, { "epoch": 51.337704918032784, "grad_norm": 4.520071029663086, "learning_rate": 1.0052563163852764e-05, "loss": 0.355, "step": 15658 }, { "epoch": 51.34098360655738, "grad_norm": 3.893949031829834, "learning_rate": 1.0051501291240008e-05, "loss": 0.1359, "step": 15659 }, { "epoch": 51.34426229508197, "grad_norm": 3.6035702228546143, "learning_rate": 1.0050439418046528e-05, "loss": 0.1621, "step": 15660 }, { "epoch": 51.34754098360656, "grad_norm": 4.594381809234619, "learning_rate": 1.0049377544284286e-05, "loss": 0.2363, "step": 15661 }, { "epoch": 51.350819672131145, "grad_norm": 4.083475112915039, "learning_rate": 1.0048315669965263e-05, "loss": 0.3364, "step": 15662 }, { "epoch": 51.35409836065574, "grad_norm": 4.108656406402588, "learning_rate": 1.0047253795101433e-05, "loss": 0.1957, "step": 15663 }, { "epoch": 51.35737704918033, "grad_norm": 3.56634783744812, "learning_rate": 1.0046191919704762e-05, "loss": 0.1273, "step": 15664 }, { "epoch": 51.36065573770492, "grad_norm": 4.242283344268799, "learning_rate": 1.004513004378723e-05, "loss": 0.1652, "step": 15665 }, { "epoch": 51.363934426229505, "grad_norm": 3.039358615875244, "learning_rate": 1.0044068167360814e-05, "loss": 0.354, "step": 15666 }, { "epoch": 51.3672131147541, "grad_norm": 4.048415660858154, "learning_rate": 1.004300629043748e-05, "loss": 0.3545, "step": 15667 }, { "epoch": 51.37049180327869, "grad_norm": 3.9621095657348633, "learning_rate": 1.0041944413029208e-05, "loss": 0.3691, "step": 15668 }, { "epoch": 51.37377049180328, "grad_norm": 3.695983409881592, "learning_rate": 1.0040882535147964e-05, "loss": 0.2231, "step": 15669 }, { "epoch": 51.377049180327866, "grad_norm": 6.581607818603516, "learning_rate": 1.003982065680573e-05, "loss": 0.1889, "step": 15670 }, { "epoch": 51.38032786885246, "grad_norm": 3.4724435806274414, "learning_rate": 1.003875877801448e-05, "loss": 0.1334, "step": 15671 }, { "epoch": 51.38360655737705, "grad_norm": 3.0508272647857666, "learning_rate": 1.003769689878618e-05, "loss": 0.1616, "step": 15672 }, { "epoch": 51.38688524590164, "grad_norm": 5.488921165466309, "learning_rate": 1.003663501913281e-05, "loss": 0.3116, "step": 15673 }, { "epoch": 51.390163934426226, "grad_norm": 4.236602783203125, "learning_rate": 1.003557313906634e-05, "loss": 0.2341, "step": 15674 }, { "epoch": 51.39344262295082, "grad_norm": 4.230939865112305, "learning_rate": 1.003451125859875e-05, "loss": 0.243, "step": 15675 }, { "epoch": 51.39672131147541, "grad_norm": 3.7248854637145996, "learning_rate": 1.0033449377742009e-05, "loss": 0.2328, "step": 15676 }, { "epoch": 51.4, "grad_norm": 5.107142925262451, "learning_rate": 1.003238749650809e-05, "loss": 0.4093, "step": 15677 }, { "epoch": 51.40327868852459, "grad_norm": 3.5792815685272217, "learning_rate": 1.0031325614908969e-05, "loss": 0.3667, "step": 15678 }, { "epoch": 51.40655737704918, "grad_norm": 27.93998908996582, "learning_rate": 1.0030263732956615e-05, "loss": 0.2646, "step": 15679 }, { "epoch": 51.40983606557377, "grad_norm": 2.786508798599243, "learning_rate": 1.002920185066301e-05, "loss": 0.1566, "step": 15680 }, { "epoch": 51.41311475409836, "grad_norm": 3.951746702194214, "learning_rate": 1.0028139968040123e-05, "loss": 0.2851, "step": 15681 }, { "epoch": 51.41639344262295, "grad_norm": 3.8777825832366943, "learning_rate": 1.002707808509993e-05, "loss": 0.3663, "step": 15682 }, { "epoch": 51.41967213114754, "grad_norm": 3.5283279418945312, "learning_rate": 1.0026016201854401e-05, "loss": 0.221, "step": 15683 }, { "epoch": 51.42295081967213, "grad_norm": 4.093225479125977, "learning_rate": 1.0024954318315514e-05, "loss": 0.1388, "step": 15684 }, { "epoch": 51.42622950819672, "grad_norm": 5.7744293212890625, "learning_rate": 1.0023892434495239e-05, "loss": 0.2313, "step": 15685 }, { "epoch": 51.429508196721315, "grad_norm": 4.177285671234131, "learning_rate": 1.0022830550405555e-05, "loss": 0.4199, "step": 15686 }, { "epoch": 51.4327868852459, "grad_norm": 3.728010892868042, "learning_rate": 1.002176866605843e-05, "loss": 0.2282, "step": 15687 }, { "epoch": 51.43606557377049, "grad_norm": 3.8484582901000977, "learning_rate": 1.0020706781465843e-05, "loss": 0.2971, "step": 15688 }, { "epoch": 51.43934426229508, "grad_norm": 3.6306920051574707, "learning_rate": 1.0019644896639766e-05, "loss": 0.1619, "step": 15689 }, { "epoch": 51.442622950819676, "grad_norm": 4.2639288902282715, "learning_rate": 1.001858301159217e-05, "loss": 0.2339, "step": 15690 }, { "epoch": 51.445901639344264, "grad_norm": 4.126516819000244, "learning_rate": 1.0017521126335035e-05, "loss": 0.1718, "step": 15691 }, { "epoch": 51.44918032786885, "grad_norm": 3.8207390308380127, "learning_rate": 1.0016459240880325e-05, "loss": 0.2535, "step": 15692 }, { "epoch": 51.45245901639344, "grad_norm": 4.061809062957764, "learning_rate": 1.0015397355240022e-05, "loss": 0.2398, "step": 15693 }, { "epoch": 51.455737704918036, "grad_norm": 4.12227725982666, "learning_rate": 1.0014335469426099e-05, "loss": 0.3028, "step": 15694 }, { "epoch": 51.459016393442624, "grad_norm": 3.772937536239624, "learning_rate": 1.0013273583450528e-05, "loss": 0.3288, "step": 15695 }, { "epoch": 51.46229508196721, "grad_norm": 3.622623920440674, "learning_rate": 1.0012211697325285e-05, "loss": 0.1171, "step": 15696 }, { "epoch": 51.4655737704918, "grad_norm": 3.3998916149139404, "learning_rate": 1.0011149811062342e-05, "loss": 0.2074, "step": 15697 }, { "epoch": 51.4688524590164, "grad_norm": 9.409416198730469, "learning_rate": 1.0010087924673673e-05, "loss": 0.255, "step": 15698 }, { "epoch": 51.472131147540985, "grad_norm": 9.865656852722168, "learning_rate": 1.0009026038171251e-05, "loss": 0.2241, "step": 15699 }, { "epoch": 51.47540983606557, "grad_norm": 3.9452085494995117, "learning_rate": 1.0007964151567052e-05, "loss": 0.4702, "step": 15700 }, { "epoch": 51.47868852459016, "grad_norm": 5.170980453491211, "learning_rate": 1.0006902264873048e-05, "loss": 0.1781, "step": 15701 }, { "epoch": 51.48196721311476, "grad_norm": 3.904540777206421, "learning_rate": 1.0005840378101218e-05, "loss": 0.412, "step": 15702 }, { "epoch": 51.485245901639345, "grad_norm": 3.9050111770629883, "learning_rate": 1.0004778491263527e-05, "loss": 0.2022, "step": 15703 }, { "epoch": 51.488524590163934, "grad_norm": 4.0930986404418945, "learning_rate": 1.0003716604371956e-05, "loss": 0.3611, "step": 15704 }, { "epoch": 51.49180327868852, "grad_norm": 3.44093918800354, "learning_rate": 1.0002654717438474e-05, "loss": 0.2463, "step": 15705 }, { "epoch": 51.49508196721312, "grad_norm": 4.588039398193359, "learning_rate": 1.000159283047506e-05, "loss": 0.2632, "step": 15706 }, { "epoch": 51.498360655737706, "grad_norm": 5.479030609130859, "learning_rate": 1.0000530943493683e-05, "loss": 0.1381, "step": 15707 }, { "epoch": 51.501639344262294, "grad_norm": 3.545149564743042, "learning_rate": 9.99946905650632e-06, "loss": 0.1736, "step": 15708 }, { "epoch": 51.50491803278688, "grad_norm": 3.487431764602661, "learning_rate": 9.998407169524945e-06, "loss": 0.2247, "step": 15709 }, { "epoch": 51.50819672131148, "grad_norm": 4.733730792999268, "learning_rate": 9.997345282561527e-06, "loss": 0.2628, "step": 15710 }, { "epoch": 51.511475409836066, "grad_norm": 5.708304405212402, "learning_rate": 9.996283395628047e-06, "loss": 0.2281, "step": 15711 }, { "epoch": 51.514754098360655, "grad_norm": 4.117678165435791, "learning_rate": 9.995221508736475e-06, "loss": 0.3731, "step": 15712 }, { "epoch": 51.51803278688524, "grad_norm": 3.3286139965057373, "learning_rate": 9.994159621898787e-06, "loss": 0.26, "step": 15713 }, { "epoch": 51.52131147540984, "grad_norm": 3.7986884117126465, "learning_rate": 9.993097735126955e-06, "loss": 0.328, "step": 15714 }, { "epoch": 51.52459016393443, "grad_norm": 3.099316120147705, "learning_rate": 9.99203584843295e-06, "loss": 0.1489, "step": 15715 }, { "epoch": 51.527868852459015, "grad_norm": 3.320192337036133, "learning_rate": 9.990973961828752e-06, "loss": 0.1858, "step": 15716 }, { "epoch": 51.5311475409836, "grad_norm": 4.317476272583008, "learning_rate": 9.989912075326332e-06, "loss": 0.1736, "step": 15717 }, { "epoch": 51.5344262295082, "grad_norm": 3.880053997039795, "learning_rate": 9.988850188937662e-06, "loss": 0.2385, "step": 15718 }, { "epoch": 51.53770491803279, "grad_norm": 3.0691888332366943, "learning_rate": 9.987788302674716e-06, "loss": 0.2298, "step": 15719 }, { "epoch": 51.540983606557376, "grad_norm": 5.387851238250732, "learning_rate": 9.986726416549473e-06, "loss": 0.3224, "step": 15720 }, { "epoch": 51.544262295081964, "grad_norm": 4.119016170501709, "learning_rate": 9.985664530573903e-06, "loss": 0.4124, "step": 15721 }, { "epoch": 51.54754098360656, "grad_norm": 3.675049066543579, "learning_rate": 9.98460264475998e-06, "loss": 0.2691, "step": 15722 }, { "epoch": 51.55081967213115, "grad_norm": 3.568808078765869, "learning_rate": 9.983540759119677e-06, "loss": 0.1522, "step": 15723 }, { "epoch": 51.554098360655736, "grad_norm": 4.135784149169922, "learning_rate": 9.982478873664969e-06, "loss": 0.2953, "step": 15724 }, { "epoch": 51.557377049180324, "grad_norm": 4.260570049285889, "learning_rate": 9.981416988407833e-06, "loss": 0.3332, "step": 15725 }, { "epoch": 51.56065573770492, "grad_norm": 3.560868978500366, "learning_rate": 9.98035510336024e-06, "loss": 0.3232, "step": 15726 }, { "epoch": 51.56393442622951, "grad_norm": 2.648895502090454, "learning_rate": 9.97929321853416e-06, "loss": 0.1271, "step": 15727 }, { "epoch": 51.5672131147541, "grad_norm": 2.991135835647583, "learning_rate": 9.978231333941569e-06, "loss": 0.1412, "step": 15728 }, { "epoch": 51.570491803278685, "grad_norm": 4.659811019897461, "learning_rate": 9.977169449594446e-06, "loss": 0.2592, "step": 15729 }, { "epoch": 51.57377049180328, "grad_norm": 3.439314126968384, "learning_rate": 9.976107565504762e-06, "loss": 0.413, "step": 15730 }, { "epoch": 51.57704918032787, "grad_norm": 3.6910033226013184, "learning_rate": 9.97504568168449e-06, "loss": 0.0999, "step": 15731 }, { "epoch": 51.58032786885246, "grad_norm": 3.319054126739502, "learning_rate": 9.973983798145599e-06, "loss": 0.2687, "step": 15732 }, { "epoch": 51.58360655737705, "grad_norm": 3.615539312362671, "learning_rate": 9.972921914900073e-06, "loss": 0.1824, "step": 15733 }, { "epoch": 51.58688524590164, "grad_norm": 4.229484558105469, "learning_rate": 9.971860031959879e-06, "loss": 0.388, "step": 15734 }, { "epoch": 51.59016393442623, "grad_norm": 4.71688175201416, "learning_rate": 9.970798149336993e-06, "loss": 0.2513, "step": 15735 }, { "epoch": 51.59344262295082, "grad_norm": 3.1483566761016846, "learning_rate": 9.969736267043385e-06, "loss": 0.3232, "step": 15736 }, { "epoch": 51.59672131147541, "grad_norm": 3.2483019828796387, "learning_rate": 9.968674385091035e-06, "loss": 0.2773, "step": 15737 }, { "epoch": 51.6, "grad_norm": 5.070910930633545, "learning_rate": 9.967612503491915e-06, "loss": 0.2137, "step": 15738 }, { "epoch": 51.60327868852459, "grad_norm": 4.388722896575928, "learning_rate": 9.966550622257996e-06, "loss": 0.372, "step": 15739 }, { "epoch": 51.60655737704918, "grad_norm": 3.725507974624634, "learning_rate": 9.965488741401254e-06, "loss": 0.2392, "step": 15740 }, { "epoch": 51.609836065573774, "grad_norm": 4.1924967765808105, "learning_rate": 9.96442686093366e-06, "loss": 0.3209, "step": 15741 }, { "epoch": 51.61311475409836, "grad_norm": 3.5441465377807617, "learning_rate": 9.963364980867192e-06, "loss": 0.2852, "step": 15742 }, { "epoch": 51.61639344262295, "grad_norm": 3.6108386516571045, "learning_rate": 9.962303101213825e-06, "loss": 0.3087, "step": 15743 }, { "epoch": 51.61967213114754, "grad_norm": 3.943211317062378, "learning_rate": 9.961241221985524e-06, "loss": 0.3262, "step": 15744 }, { "epoch": 51.622950819672134, "grad_norm": 4.13606595993042, "learning_rate": 9.960179343194271e-06, "loss": 0.3003, "step": 15745 }, { "epoch": 51.62622950819672, "grad_norm": 3.669145107269287, "learning_rate": 9.959117464852038e-06, "loss": 0.3691, "step": 15746 }, { "epoch": 51.62950819672131, "grad_norm": 5.214151382446289, "learning_rate": 9.958055586970796e-06, "loss": 0.353, "step": 15747 }, { "epoch": 51.6327868852459, "grad_norm": 3.730707883834839, "learning_rate": 9.956993709562523e-06, "loss": 0.2245, "step": 15748 }, { "epoch": 51.636065573770495, "grad_norm": 4.002718448638916, "learning_rate": 9.95593183263919e-06, "loss": 0.35, "step": 15749 }, { "epoch": 51.63934426229508, "grad_norm": 3.9585981369018555, "learning_rate": 9.954869956212772e-06, "loss": 0.2287, "step": 15750 }, { "epoch": 51.64262295081967, "grad_norm": 3.4021618366241455, "learning_rate": 9.95380808029524e-06, "loss": 0.2886, "step": 15751 }, { "epoch": 51.64590163934426, "grad_norm": 3.806061029434204, "learning_rate": 9.952746204898574e-06, "loss": 0.2451, "step": 15752 }, { "epoch": 51.649180327868855, "grad_norm": 3.525073289871216, "learning_rate": 9.95168433003474e-06, "loss": 0.298, "step": 15753 }, { "epoch": 51.65245901639344, "grad_norm": 3.4423792362213135, "learning_rate": 9.950622455715716e-06, "loss": 0.2895, "step": 15754 }, { "epoch": 51.65573770491803, "grad_norm": 3.7052884101867676, "learning_rate": 9.949560581953476e-06, "loss": 0.2235, "step": 15755 }, { "epoch": 51.65901639344262, "grad_norm": 2.849827527999878, "learning_rate": 9.948498708759993e-06, "loss": 0.0901, "step": 15756 }, { "epoch": 51.662295081967216, "grad_norm": 6.321245193481445, "learning_rate": 9.947436836147241e-06, "loss": 0.252, "step": 15757 }, { "epoch": 51.665573770491804, "grad_norm": 4.170851230621338, "learning_rate": 9.946374964127191e-06, "loss": 0.2834, "step": 15758 }, { "epoch": 51.66885245901639, "grad_norm": 3.603139877319336, "learning_rate": 9.94531309271182e-06, "loss": 0.2259, "step": 15759 }, { "epoch": 51.67213114754098, "grad_norm": 3.852351427078247, "learning_rate": 9.944251221913103e-06, "loss": 0.4005, "step": 15760 }, { "epoch": 51.675409836065576, "grad_norm": 3.057570219039917, "learning_rate": 9.943189351743011e-06, "loss": 0.3247, "step": 15761 }, { "epoch": 51.678688524590164, "grad_norm": 3.7473316192626953, "learning_rate": 9.942127482213514e-06, "loss": 0.4053, "step": 15762 }, { "epoch": 51.68196721311475, "grad_norm": 2.684030055999756, "learning_rate": 9.941065613336594e-06, "loss": 0.1088, "step": 15763 }, { "epoch": 51.68524590163934, "grad_norm": 3.4751336574554443, "learning_rate": 9.940003745124219e-06, "loss": 0.1886, "step": 15764 }, { "epoch": 51.68852459016394, "grad_norm": 3.4768176078796387, "learning_rate": 9.938941877588366e-06, "loss": 0.1808, "step": 15765 }, { "epoch": 51.691803278688525, "grad_norm": 3.8125345706939697, "learning_rate": 9.937880010741007e-06, "loss": 0.2251, "step": 15766 }, { "epoch": 51.69508196721311, "grad_norm": 3.5971062183380127, "learning_rate": 9.93681814459411e-06, "loss": 0.3274, "step": 15767 }, { "epoch": 51.6983606557377, "grad_norm": 3.5146610736846924, "learning_rate": 9.935756279159659e-06, "loss": 0.194, "step": 15768 }, { "epoch": 51.7016393442623, "grad_norm": 4.193072319030762, "learning_rate": 9.934694414449623e-06, "loss": 0.2239, "step": 15769 }, { "epoch": 51.704918032786885, "grad_norm": 3.353271722793579, "learning_rate": 9.933632550475974e-06, "loss": 0.3016, "step": 15770 }, { "epoch": 51.708196721311474, "grad_norm": 3.858804941177368, "learning_rate": 9.932570687250685e-06, "loss": 0.2869, "step": 15771 }, { "epoch": 51.71147540983607, "grad_norm": 4.0209126472473145, "learning_rate": 9.931508824785734e-06, "loss": 0.3096, "step": 15772 }, { "epoch": 51.71475409836066, "grad_norm": 3.3508284091949463, "learning_rate": 9.930446963093091e-06, "loss": 0.2001, "step": 15773 }, { "epoch": 51.718032786885246, "grad_norm": 4.850462436676025, "learning_rate": 9.929385102184733e-06, "loss": 0.2475, "step": 15774 }, { "epoch": 51.721311475409834, "grad_norm": 3.540576934814453, "learning_rate": 9.928323242072627e-06, "loss": 0.3888, "step": 15775 }, { "epoch": 51.72459016393443, "grad_norm": 2.805582284927368, "learning_rate": 9.927261382768757e-06, "loss": 0.293, "step": 15776 }, { "epoch": 51.72786885245902, "grad_norm": 2.7373721599578857, "learning_rate": 9.926199524285085e-06, "loss": 0.1784, "step": 15777 }, { "epoch": 51.731147540983606, "grad_norm": 3.520982265472412, "learning_rate": 9.925137666633593e-06, "loss": 0.223, "step": 15778 }, { "epoch": 51.734426229508195, "grad_norm": 3.4629592895507812, "learning_rate": 9.924075809826253e-06, "loss": 0.3759, "step": 15779 }, { "epoch": 51.73770491803279, "grad_norm": 3.4188427925109863, "learning_rate": 9.92301395387503e-06, "loss": 0.1634, "step": 15780 }, { "epoch": 51.74098360655738, "grad_norm": 3.7988522052764893, "learning_rate": 9.921952098791912e-06, "loss": 0.1781, "step": 15781 }, { "epoch": 51.74426229508197, "grad_norm": 4.351721286773682, "learning_rate": 9.920890244588866e-06, "loss": 0.2783, "step": 15782 }, { "epoch": 51.747540983606555, "grad_norm": 2.747324228286743, "learning_rate": 9.919828391277862e-06, "loss": 0.0769, "step": 15783 }, { "epoch": 51.75081967213115, "grad_norm": 3.6507999897003174, "learning_rate": 9.918766538870873e-06, "loss": 0.1893, "step": 15784 }, { "epoch": 51.75409836065574, "grad_norm": 3.6987669467926025, "learning_rate": 9.91770468737988e-06, "loss": 0.2534, "step": 15785 }, { "epoch": 51.75737704918033, "grad_norm": 3.5395641326904297, "learning_rate": 9.916642836816853e-06, "loss": 0.4961, "step": 15786 }, { "epoch": 51.760655737704916, "grad_norm": 4.375020503997803, "learning_rate": 9.915580987193763e-06, "loss": 0.3306, "step": 15787 }, { "epoch": 51.76393442622951, "grad_norm": 3.824312448501587, "learning_rate": 9.914519138522582e-06, "loss": 0.4414, "step": 15788 }, { "epoch": 51.7672131147541, "grad_norm": 3.469128370285034, "learning_rate": 9.913457290815291e-06, "loss": 0.2299, "step": 15789 }, { "epoch": 51.77049180327869, "grad_norm": 2.9654881954193115, "learning_rate": 9.912395444083858e-06, "loss": 0.2359, "step": 15790 }, { "epoch": 51.773770491803276, "grad_norm": 3.393200397491455, "learning_rate": 9.911333598340259e-06, "loss": 0.3866, "step": 15791 }, { "epoch": 51.77704918032787, "grad_norm": 3.909503698348999, "learning_rate": 9.910271753596465e-06, "loss": 0.3977, "step": 15792 }, { "epoch": 51.78032786885246, "grad_norm": 3.324089765548706, "learning_rate": 9.909209909864446e-06, "loss": 0.1894, "step": 15793 }, { "epoch": 51.78360655737705, "grad_norm": 18.586408615112305, "learning_rate": 9.908148067156184e-06, "loss": 0.3259, "step": 15794 }, { "epoch": 51.78688524590164, "grad_norm": 3.823735475540161, "learning_rate": 9.907086225483649e-06, "loss": 0.4631, "step": 15795 }, { "epoch": 51.79016393442623, "grad_norm": 2.968069314956665, "learning_rate": 9.906024384858812e-06, "loss": 0.1196, "step": 15796 }, { "epoch": 51.79344262295082, "grad_norm": 3.5356860160827637, "learning_rate": 9.904962545293643e-06, "loss": 0.3051, "step": 15797 }, { "epoch": 51.79672131147541, "grad_norm": 3.800093650817871, "learning_rate": 9.903900706800128e-06, "loss": 0.2578, "step": 15798 }, { "epoch": 51.8, "grad_norm": 5.154825687408447, "learning_rate": 9.90283886939023e-06, "loss": 0.2684, "step": 15799 }, { "epoch": 51.80327868852459, "grad_norm": 4.022181510925293, "learning_rate": 9.901777033075921e-06, "loss": 0.2629, "step": 15800 }, { "epoch": 51.80655737704918, "grad_norm": 5.027851581573486, "learning_rate": 9.90071519786918e-06, "loss": 0.3622, "step": 15801 }, { "epoch": 51.80983606557377, "grad_norm": 4.066430568695068, "learning_rate": 9.89965336378198e-06, "loss": 0.2889, "step": 15802 }, { "epoch": 51.81311475409836, "grad_norm": 4.474597930908203, "learning_rate": 9.89859153082629e-06, "loss": 0.1678, "step": 15803 }, { "epoch": 51.81639344262295, "grad_norm": 4.620327472686768, "learning_rate": 9.89752969901409e-06, "loss": 0.1656, "step": 15804 }, { "epoch": 51.81967213114754, "grad_norm": 4.676177024841309, "learning_rate": 9.896467868357346e-06, "loss": 0.3345, "step": 15805 }, { "epoch": 51.82295081967213, "grad_norm": 2.891261577606201, "learning_rate": 9.895406038868031e-06, "loss": 0.1287, "step": 15806 }, { "epoch": 51.82622950819672, "grad_norm": 3.2576212882995605, "learning_rate": 9.894344210558128e-06, "loss": 0.1444, "step": 15807 }, { "epoch": 51.829508196721314, "grad_norm": 5.121691703796387, "learning_rate": 9.893282383439602e-06, "loss": 0.394, "step": 15808 }, { "epoch": 51.8327868852459, "grad_norm": 4.120327949523926, "learning_rate": 9.89222055752443e-06, "loss": 0.2458, "step": 15809 }, { "epoch": 51.83606557377049, "grad_norm": 3.0151469707489014, "learning_rate": 9.891158732824577e-06, "loss": 0.3599, "step": 15810 }, { "epoch": 51.83934426229508, "grad_norm": 2.7824912071228027, "learning_rate": 9.890096909352029e-06, "loss": 0.0731, "step": 15811 }, { "epoch": 51.842622950819674, "grad_norm": 3.3149824142456055, "learning_rate": 9.88903508711875e-06, "loss": 0.2626, "step": 15812 }, { "epoch": 51.84590163934426, "grad_norm": 4.652745246887207, "learning_rate": 9.887973266136717e-06, "loss": 0.3174, "step": 15813 }, { "epoch": 51.84918032786885, "grad_norm": 3.3851301670074463, "learning_rate": 9.886911446417898e-06, "loss": 0.2725, "step": 15814 }, { "epoch": 51.85245901639344, "grad_norm": 4.3441691398620605, "learning_rate": 9.885849627974274e-06, "loss": 0.4779, "step": 15815 }, { "epoch": 51.855737704918035, "grad_norm": 5.650387287139893, "learning_rate": 9.884787810817815e-06, "loss": 0.1705, "step": 15816 }, { "epoch": 51.85901639344262, "grad_norm": 5.699553966522217, "learning_rate": 9.883725994960492e-06, "loss": 0.1722, "step": 15817 }, { "epoch": 51.86229508196721, "grad_norm": 2.809037208557129, "learning_rate": 9.882664180414278e-06, "loss": 0.2003, "step": 15818 }, { "epoch": 51.86557377049181, "grad_norm": 3.9869306087493896, "learning_rate": 9.881602367191145e-06, "loss": 0.3732, "step": 15819 }, { "epoch": 51.868852459016395, "grad_norm": 3.7429094314575195, "learning_rate": 9.880540555303073e-06, "loss": 0.1707, "step": 15820 }, { "epoch": 51.87213114754098, "grad_norm": 2.6868090629577637, "learning_rate": 9.87947874476203e-06, "loss": 0.1399, "step": 15821 }, { "epoch": 51.87540983606557, "grad_norm": 3.3985419273376465, "learning_rate": 9.878416935579988e-06, "loss": 0.3746, "step": 15822 }, { "epoch": 51.87868852459017, "grad_norm": 4.097774028778076, "learning_rate": 9.87735512776892e-06, "loss": 0.2875, "step": 15823 }, { "epoch": 51.881967213114756, "grad_norm": 2.9139511585235596, "learning_rate": 9.876293321340803e-06, "loss": 0.2728, "step": 15824 }, { "epoch": 51.885245901639344, "grad_norm": 3.1756091117858887, "learning_rate": 9.875231516307608e-06, "loss": 0.5002, "step": 15825 }, { "epoch": 51.88852459016393, "grad_norm": 3.0955100059509277, "learning_rate": 9.874169712681304e-06, "loss": 0.2953, "step": 15826 }, { "epoch": 51.89180327868853, "grad_norm": 4.443362712860107, "learning_rate": 9.873107910473868e-06, "loss": 0.338, "step": 15827 }, { "epoch": 51.895081967213116, "grad_norm": 6.015239715576172, "learning_rate": 9.872046109697274e-06, "loss": 0.1612, "step": 15828 }, { "epoch": 51.898360655737704, "grad_norm": 3.730435848236084, "learning_rate": 9.870984310363492e-06, "loss": 0.2603, "step": 15829 }, { "epoch": 51.90163934426229, "grad_norm": 3.8660402297973633, "learning_rate": 9.869922512484498e-06, "loss": 0.2637, "step": 15830 }, { "epoch": 51.90491803278689, "grad_norm": 3.529475688934326, "learning_rate": 9.868860716072262e-06, "loss": 0.3051, "step": 15831 }, { "epoch": 51.90819672131148, "grad_norm": 6.392104148864746, "learning_rate": 9.867798921138757e-06, "loss": 0.5068, "step": 15832 }, { "epoch": 51.911475409836065, "grad_norm": 5.314442157745361, "learning_rate": 9.866737127695956e-06, "loss": 0.2914, "step": 15833 }, { "epoch": 51.91475409836065, "grad_norm": 3.5017242431640625, "learning_rate": 9.865675335755835e-06, "loss": 0.2936, "step": 15834 }, { "epoch": 51.91803278688525, "grad_norm": 3.3768393993377686, "learning_rate": 9.864613545330363e-06, "loss": 0.1867, "step": 15835 }, { "epoch": 51.92131147540984, "grad_norm": 3.631756544113159, "learning_rate": 9.86355175643151e-06, "loss": 0.2913, "step": 15836 }, { "epoch": 51.924590163934425, "grad_norm": 2.8333005905151367, "learning_rate": 9.862489969071258e-06, "loss": 0.2072, "step": 15837 }, { "epoch": 51.927868852459014, "grad_norm": 3.158176898956299, "learning_rate": 9.861428183261575e-06, "loss": 0.3111, "step": 15838 }, { "epoch": 51.93114754098361, "grad_norm": 5.343063831329346, "learning_rate": 9.860366399014431e-06, "loss": 0.3896, "step": 15839 }, { "epoch": 51.9344262295082, "grad_norm": 3.098417282104492, "learning_rate": 9.8593046163418e-06, "loss": 0.2137, "step": 15840 }, { "epoch": 51.937704918032786, "grad_norm": 3.2541730403900146, "learning_rate": 9.858242835255657e-06, "loss": 0.1155, "step": 15841 }, { "epoch": 51.940983606557374, "grad_norm": 3.297118902206421, "learning_rate": 9.857181055767974e-06, "loss": 0.3255, "step": 15842 }, { "epoch": 51.94426229508197, "grad_norm": 3.7266640663146973, "learning_rate": 9.856119277890725e-06, "loss": 0.3499, "step": 15843 }, { "epoch": 51.94754098360656, "grad_norm": 3.7002298831939697, "learning_rate": 9.85505750163588e-06, "loss": 0.1287, "step": 15844 }, { "epoch": 51.950819672131146, "grad_norm": 3.646303415298462, "learning_rate": 9.853995727015406e-06, "loss": 0.3231, "step": 15845 }, { "epoch": 51.954098360655735, "grad_norm": 3.368365526199341, "learning_rate": 9.852933954041288e-06, "loss": 0.1783, "step": 15846 }, { "epoch": 51.95737704918033, "grad_norm": 4.380765914916992, "learning_rate": 9.851872182725493e-06, "loss": 0.1686, "step": 15847 }, { "epoch": 51.96065573770492, "grad_norm": 3.356886386871338, "learning_rate": 9.850810413079992e-06, "loss": 0.3417, "step": 15848 }, { "epoch": 51.96393442622951, "grad_norm": 3.7060890197753906, "learning_rate": 9.849748645116755e-06, "loss": 0.3933, "step": 15849 }, { "epoch": 51.967213114754095, "grad_norm": 3.017383098602295, "learning_rate": 9.848686878847763e-06, "loss": 0.2932, "step": 15850 }, { "epoch": 51.97049180327869, "grad_norm": 3.122798442840576, "learning_rate": 9.847625114284984e-06, "loss": 0.257, "step": 15851 }, { "epoch": 51.97377049180328, "grad_norm": 2.9066975116729736, "learning_rate": 9.846563351440389e-06, "loss": 0.1899, "step": 15852 }, { "epoch": 51.97704918032787, "grad_norm": 3.4200453758239746, "learning_rate": 9.845501590325949e-06, "loss": 0.2296, "step": 15853 }, { "epoch": 51.980327868852456, "grad_norm": 3.4037094116210938, "learning_rate": 9.844439830953641e-06, "loss": 0.2775, "step": 15854 }, { "epoch": 51.98360655737705, "grad_norm": 3.4544355869293213, "learning_rate": 9.843378073335438e-06, "loss": 0.1098, "step": 15855 }, { "epoch": 51.98688524590164, "grad_norm": 4.20072603225708, "learning_rate": 9.842316317483306e-06, "loss": 0.4089, "step": 15856 }, { "epoch": 51.99016393442623, "grad_norm": 3.9739489555358887, "learning_rate": 9.841254563409226e-06, "loss": 0.18, "step": 15857 }, { "epoch": 51.993442622950816, "grad_norm": 4.325475215911865, "learning_rate": 9.840192811125162e-06, "loss": 0.2596, "step": 15858 }, { "epoch": 51.99672131147541, "grad_norm": 4.0964789390563965, "learning_rate": 9.839131060643092e-06, "loss": 0.2294, "step": 15859 }, { "epoch": 52.0, "grad_norm": 3.6114211082458496, "learning_rate": 9.838069311974986e-06, "loss": 0.1462, "step": 15860 }, { "epoch": 52.00327868852459, "grad_norm": 3.2807960510253906, "learning_rate": 9.837007565132818e-06, "loss": 0.3315, "step": 15861 }, { "epoch": 52.006557377049184, "grad_norm": 3.0001018047332764, "learning_rate": 9.835945820128555e-06, "loss": 0.1342, "step": 15862 }, { "epoch": 52.00983606557377, "grad_norm": 2.950531244277954, "learning_rate": 9.834884076974178e-06, "loss": 0.2952, "step": 15863 }, { "epoch": 52.01311475409836, "grad_norm": 3.0736920833587646, "learning_rate": 9.833822335681655e-06, "loss": 0.2702, "step": 15864 }, { "epoch": 52.01639344262295, "grad_norm": 2.673281669616699, "learning_rate": 9.832760596262959e-06, "loss": 0.1975, "step": 15865 }, { "epoch": 52.019672131147544, "grad_norm": 3.74249529838562, "learning_rate": 9.831698858730054e-06, "loss": 0.1853, "step": 15866 }, { "epoch": 52.02295081967213, "grad_norm": 3.224409580230713, "learning_rate": 9.830637123094924e-06, "loss": 0.1371, "step": 15867 }, { "epoch": 52.02622950819672, "grad_norm": 3.2100718021392822, "learning_rate": 9.82957538936954e-06, "loss": 0.19, "step": 15868 }, { "epoch": 52.02950819672131, "grad_norm": 3.2213563919067383, "learning_rate": 9.828513657565866e-06, "loss": 0.2879, "step": 15869 }, { "epoch": 52.032786885245905, "grad_norm": 4.141549587249756, "learning_rate": 9.827451927695883e-06, "loss": 0.2312, "step": 15870 }, { "epoch": 52.03606557377049, "grad_norm": 3.6773104667663574, "learning_rate": 9.826390199771552e-06, "loss": 0.2884, "step": 15871 }, { "epoch": 52.03934426229508, "grad_norm": 3.187377691268921, "learning_rate": 9.825328473804856e-06, "loss": 0.4033, "step": 15872 }, { "epoch": 52.04262295081967, "grad_norm": 3.1736531257629395, "learning_rate": 9.824266749807765e-06, "loss": 0.2574, "step": 15873 }, { "epoch": 52.045901639344265, "grad_norm": 4.168671131134033, "learning_rate": 9.823205027792247e-06, "loss": 0.29, "step": 15874 }, { "epoch": 52.049180327868854, "grad_norm": 3.5720927715301514, "learning_rate": 9.822143307770273e-06, "loss": 0.2229, "step": 15875 }, { "epoch": 52.05245901639344, "grad_norm": 3.6320910453796387, "learning_rate": 9.821081589753823e-06, "loss": 0.2118, "step": 15876 }, { "epoch": 52.05573770491803, "grad_norm": 3.4727962017059326, "learning_rate": 9.820019873754864e-06, "loss": 0.278, "step": 15877 }, { "epoch": 52.059016393442626, "grad_norm": 3.467416763305664, "learning_rate": 9.818958159785368e-06, "loss": 0.2514, "step": 15878 }, { "epoch": 52.062295081967214, "grad_norm": 3.9968440532684326, "learning_rate": 9.8178964478573e-06, "loss": 0.343, "step": 15879 }, { "epoch": 52.0655737704918, "grad_norm": 3.4719932079315186, "learning_rate": 9.816834737982646e-06, "loss": 0.2155, "step": 15880 }, { "epoch": 52.06885245901639, "grad_norm": 4.370271682739258, "learning_rate": 9.815773030173371e-06, "loss": 0.1948, "step": 15881 }, { "epoch": 52.072131147540986, "grad_norm": 3.179292917251587, "learning_rate": 9.814711324441443e-06, "loss": 0.206, "step": 15882 }, { "epoch": 52.075409836065575, "grad_norm": 3.4355251789093018, "learning_rate": 9.813649620798842e-06, "loss": 0.1929, "step": 15883 }, { "epoch": 52.07868852459016, "grad_norm": 2.88132381439209, "learning_rate": 9.812587919257532e-06, "loss": 0.2298, "step": 15884 }, { "epoch": 52.08196721311475, "grad_norm": 3.8380391597747803, "learning_rate": 9.811526219829485e-06, "loss": 0.2413, "step": 15885 }, { "epoch": 52.08524590163935, "grad_norm": 4.611218452453613, "learning_rate": 9.810464522526682e-06, "loss": 0.2432, "step": 15886 }, { "epoch": 52.088524590163935, "grad_norm": 3.72668719291687, "learning_rate": 9.809402827361088e-06, "loss": 0.326, "step": 15887 }, { "epoch": 52.09180327868852, "grad_norm": 3.239889621734619, "learning_rate": 9.80834113434467e-06, "loss": 0.1544, "step": 15888 }, { "epoch": 52.09508196721311, "grad_norm": 2.75927734375, "learning_rate": 9.807279443489408e-06, "loss": 0.1825, "step": 15889 }, { "epoch": 52.09836065573771, "grad_norm": 3.2491602897644043, "learning_rate": 9.806217754807272e-06, "loss": 0.3865, "step": 15890 }, { "epoch": 52.101639344262296, "grad_norm": 4.307433128356934, "learning_rate": 9.805156068310233e-06, "loss": 0.282, "step": 15891 }, { "epoch": 52.104918032786884, "grad_norm": 9.318511009216309, "learning_rate": 9.804094384010256e-06, "loss": 0.3869, "step": 15892 }, { "epoch": 52.10819672131147, "grad_norm": 4.634873867034912, "learning_rate": 9.803032701919325e-06, "loss": 0.1925, "step": 15893 }, { "epoch": 52.11147540983607, "grad_norm": 3.3351526260375977, "learning_rate": 9.801971022049404e-06, "loss": 0.3526, "step": 15894 }, { "epoch": 52.114754098360656, "grad_norm": 4.870035648345947, "learning_rate": 9.800909344412464e-06, "loss": 0.3533, "step": 15895 }, { "epoch": 52.118032786885244, "grad_norm": 3.4746830463409424, "learning_rate": 9.79984766902048e-06, "loss": 0.2018, "step": 15896 }, { "epoch": 52.12131147540983, "grad_norm": 3.389873504638672, "learning_rate": 9.798785995885417e-06, "loss": 0.3651, "step": 15897 }, { "epoch": 52.12459016393443, "grad_norm": 5.1482110023498535, "learning_rate": 9.797724325019256e-06, "loss": 0.1687, "step": 15898 }, { "epoch": 52.12786885245902, "grad_norm": 3.261596918106079, "learning_rate": 9.796662656433963e-06, "loss": 0.1635, "step": 15899 }, { "epoch": 52.131147540983605, "grad_norm": 3.443758487701416, "learning_rate": 9.79560099014151e-06, "loss": 0.1971, "step": 15900 }, { "epoch": 52.13442622950819, "grad_norm": 3.5349276065826416, "learning_rate": 9.794539326153864e-06, "loss": 0.2199, "step": 15901 }, { "epoch": 52.13770491803279, "grad_norm": 3.4689416885375977, "learning_rate": 9.793477664483004e-06, "loss": 0.1914, "step": 15902 }, { "epoch": 52.14098360655738, "grad_norm": 3.5517048835754395, "learning_rate": 9.792416005140899e-06, "loss": 0.2393, "step": 15903 }, { "epoch": 52.144262295081965, "grad_norm": 3.6076653003692627, "learning_rate": 9.79135434813952e-06, "loss": 0.2958, "step": 15904 }, { "epoch": 52.14754098360656, "grad_norm": 2.8648204803466797, "learning_rate": 9.790292693490832e-06, "loss": 0.3598, "step": 15905 }, { "epoch": 52.15081967213115, "grad_norm": 3.448197841644287, "learning_rate": 9.789231041206817e-06, "loss": 0.1379, "step": 15906 }, { "epoch": 52.15409836065574, "grad_norm": 3.726325273513794, "learning_rate": 9.78816939129944e-06, "loss": 0.1077, "step": 15907 }, { "epoch": 52.157377049180326, "grad_norm": 3.6876256465911865, "learning_rate": 9.787107743780674e-06, "loss": 0.1549, "step": 15908 }, { "epoch": 52.16065573770492, "grad_norm": 2.9209628105163574, "learning_rate": 9.786046098662491e-06, "loss": 0.1482, "step": 15909 }, { "epoch": 52.16393442622951, "grad_norm": 3.646714925765991, "learning_rate": 9.784984455956856e-06, "loss": 0.2926, "step": 15910 }, { "epoch": 52.1672131147541, "grad_norm": 4.703893661499023, "learning_rate": 9.783922815675747e-06, "loss": 0.1438, "step": 15911 }, { "epoch": 52.170491803278686, "grad_norm": 2.9668478965759277, "learning_rate": 9.782861177831134e-06, "loss": 0.4394, "step": 15912 }, { "epoch": 52.17377049180328, "grad_norm": 3.617276906967163, "learning_rate": 9.781799542434987e-06, "loss": 0.1322, "step": 15913 }, { "epoch": 52.17704918032787, "grad_norm": 3.84553599357605, "learning_rate": 9.780737909499276e-06, "loss": 0.184, "step": 15914 }, { "epoch": 52.18032786885246, "grad_norm": 4.175805568695068, "learning_rate": 9.779676279035972e-06, "loss": 0.1828, "step": 15915 }, { "epoch": 52.18360655737705, "grad_norm": 4.070304870605469, "learning_rate": 9.77861465105705e-06, "loss": 0.1682, "step": 15916 }, { "epoch": 52.18688524590164, "grad_norm": 3.719575881958008, "learning_rate": 9.777553025574478e-06, "loss": 0.3394, "step": 15917 }, { "epoch": 52.19016393442623, "grad_norm": 3.919989824295044, "learning_rate": 9.776491402600222e-06, "loss": 0.2799, "step": 15918 }, { "epoch": 52.19344262295082, "grad_norm": 3.1473746299743652, "learning_rate": 9.775429782146262e-06, "loss": 0.1548, "step": 15919 }, { "epoch": 52.19672131147541, "grad_norm": 3.541620969772339, "learning_rate": 9.774368164224565e-06, "loss": 0.208, "step": 15920 }, { "epoch": 52.2, "grad_norm": 2.863269567489624, "learning_rate": 9.773306548847102e-06, "loss": 0.184, "step": 15921 }, { "epoch": 52.20327868852459, "grad_norm": 3.1808104515075684, "learning_rate": 9.772244936025844e-06, "loss": 0.2624, "step": 15922 }, { "epoch": 52.20655737704918, "grad_norm": 3.333026647567749, "learning_rate": 9.771183325772753e-06, "loss": 0.1183, "step": 15923 }, { "epoch": 52.20983606557377, "grad_norm": 3.5310707092285156, "learning_rate": 9.770121718099817e-06, "loss": 0.1036, "step": 15924 }, { "epoch": 52.21311475409836, "grad_norm": 3.2577266693115234, "learning_rate": 9.769060113018996e-06, "loss": 0.2885, "step": 15925 }, { "epoch": 52.21639344262295, "grad_norm": 3.1145713329315186, "learning_rate": 9.767998510542261e-06, "loss": 0.2459, "step": 15926 }, { "epoch": 52.21967213114754, "grad_norm": 4.099707126617432, "learning_rate": 9.766936910681581e-06, "loss": 0.2483, "step": 15927 }, { "epoch": 52.22295081967213, "grad_norm": 4.971923828125, "learning_rate": 9.765875313448934e-06, "loss": 0.2744, "step": 15928 }, { "epoch": 52.226229508196724, "grad_norm": 4.370499134063721, "learning_rate": 9.764813718856285e-06, "loss": 0.339, "step": 15929 }, { "epoch": 52.22950819672131, "grad_norm": 2.5944881439208984, "learning_rate": 9.763752126915607e-06, "loss": 0.333, "step": 15930 }, { "epoch": 52.2327868852459, "grad_norm": 4.226301193237305, "learning_rate": 9.762690537638864e-06, "loss": 0.1785, "step": 15931 }, { "epoch": 52.23606557377049, "grad_norm": 4.340850830078125, "learning_rate": 9.761628951038037e-06, "loss": 0.2399, "step": 15932 }, { "epoch": 52.239344262295084, "grad_norm": 10.663779258728027, "learning_rate": 9.760567367125092e-06, "loss": 0.1577, "step": 15933 }, { "epoch": 52.24262295081967, "grad_norm": 3.185242176055908, "learning_rate": 9.759505785911999e-06, "loss": 0.2437, "step": 15934 }, { "epoch": 52.24590163934426, "grad_norm": 3.1961557865142822, "learning_rate": 9.758444207410725e-06, "loss": 0.2015, "step": 15935 }, { "epoch": 52.24918032786885, "grad_norm": 3.135948896408081, "learning_rate": 9.757382631633245e-06, "loss": 0.1045, "step": 15936 }, { "epoch": 52.252459016393445, "grad_norm": 3.7590420246124268, "learning_rate": 9.75632105859153e-06, "loss": 0.1665, "step": 15937 }, { "epoch": 52.25573770491803, "grad_norm": 6.217926502227783, "learning_rate": 9.755259488297544e-06, "loss": 0.3538, "step": 15938 }, { "epoch": 52.25901639344262, "grad_norm": 3.5045552253723145, "learning_rate": 9.754197920763266e-06, "loss": 0.2744, "step": 15939 }, { "epoch": 52.26229508196721, "grad_norm": 12.267749786376953, "learning_rate": 9.75313635600066e-06, "loss": 0.392, "step": 15940 }, { "epoch": 52.265573770491805, "grad_norm": 5.208003997802734, "learning_rate": 9.752074794021697e-06, "loss": 0.3879, "step": 15941 }, { "epoch": 52.268852459016394, "grad_norm": 3.711853504180908, "learning_rate": 9.751013234838352e-06, "loss": 0.1569, "step": 15942 }, { "epoch": 52.27213114754098, "grad_norm": 6.030428409576416, "learning_rate": 9.749951678462592e-06, "loss": 0.3092, "step": 15943 }, { "epoch": 52.27540983606557, "grad_norm": 4.122350692749023, "learning_rate": 9.74889012490638e-06, "loss": 0.1917, "step": 15944 }, { "epoch": 52.278688524590166, "grad_norm": 3.5477726459503174, "learning_rate": 9.747828574181698e-06, "loss": 0.2725, "step": 15945 }, { "epoch": 52.281967213114754, "grad_norm": 4.911773204803467, "learning_rate": 9.746767026300513e-06, "loss": 0.1568, "step": 15946 }, { "epoch": 52.28524590163934, "grad_norm": 4.564178466796875, "learning_rate": 9.745705481274792e-06, "loss": 0.2382, "step": 15947 }, { "epoch": 52.28852459016394, "grad_norm": 4.560072422027588, "learning_rate": 9.744643939116507e-06, "loss": 0.2476, "step": 15948 }, { "epoch": 52.291803278688526, "grad_norm": 3.4171178340911865, "learning_rate": 9.74358239983762e-06, "loss": 0.2617, "step": 15949 }, { "epoch": 52.295081967213115, "grad_norm": 4.005777835845947, "learning_rate": 9.742520863450116e-06, "loss": 0.2146, "step": 15950 }, { "epoch": 52.2983606557377, "grad_norm": 4.379319667816162, "learning_rate": 9.741459329965955e-06, "loss": 0.1904, "step": 15951 }, { "epoch": 52.3016393442623, "grad_norm": 2.9349045753479004, "learning_rate": 9.74039779939711e-06, "loss": 0.0958, "step": 15952 }, { "epoch": 52.30491803278689, "grad_norm": 3.194734811782837, "learning_rate": 9.739336271755542e-06, "loss": 0.3047, "step": 15953 }, { "epoch": 52.308196721311475, "grad_norm": 3.7737300395965576, "learning_rate": 9.738274747053236e-06, "loss": 0.1603, "step": 15954 }, { "epoch": 52.31147540983606, "grad_norm": 3.533830404281616, "learning_rate": 9.737213225302154e-06, "loss": 0.226, "step": 15955 }, { "epoch": 52.31475409836066, "grad_norm": 4.331325054168701, "learning_rate": 9.736151706514265e-06, "loss": 0.3512, "step": 15956 }, { "epoch": 52.31803278688525, "grad_norm": 3.1094601154327393, "learning_rate": 9.735090190701537e-06, "loss": 0.1847, "step": 15957 }, { "epoch": 52.321311475409836, "grad_norm": 4.72053337097168, "learning_rate": 9.734028677875946e-06, "loss": 0.3202, "step": 15958 }, { "epoch": 52.324590163934424, "grad_norm": 3.1627938747406006, "learning_rate": 9.73296716804946e-06, "loss": 0.3233, "step": 15959 }, { "epoch": 52.32786885245902, "grad_norm": 3.270375967025757, "learning_rate": 9.731905661234044e-06, "loss": 0.2567, "step": 15960 }, { "epoch": 52.33114754098361, "grad_norm": 3.739253044128418, "learning_rate": 9.730844157441668e-06, "loss": 0.268, "step": 15961 }, { "epoch": 52.334426229508196, "grad_norm": 2.8172683715820312, "learning_rate": 9.729782656684307e-06, "loss": 0.1583, "step": 15962 }, { "epoch": 52.337704918032784, "grad_norm": 3.2118325233459473, "learning_rate": 9.728721158973927e-06, "loss": 0.2289, "step": 15963 }, { "epoch": 52.34098360655738, "grad_norm": 4.680635929107666, "learning_rate": 9.727659664322497e-06, "loss": 0.3162, "step": 15964 }, { "epoch": 52.34426229508197, "grad_norm": 3.7231903076171875, "learning_rate": 9.72659817274199e-06, "loss": 0.2733, "step": 15965 }, { "epoch": 52.34754098360656, "grad_norm": 4.123644828796387, "learning_rate": 9.72553668424437e-06, "loss": 0.3944, "step": 15966 }, { "epoch": 52.350819672131145, "grad_norm": 3.705808639526367, "learning_rate": 9.72447519884161e-06, "loss": 0.4199, "step": 15967 }, { "epoch": 52.35409836065574, "grad_norm": 7.344147205352783, "learning_rate": 9.72341371654568e-06, "loss": 0.2557, "step": 15968 }, { "epoch": 52.35737704918033, "grad_norm": 3.0059189796447754, "learning_rate": 9.722352237368548e-06, "loss": 0.2399, "step": 15969 }, { "epoch": 52.36065573770492, "grad_norm": 4.639474391937256, "learning_rate": 9.721290761322179e-06, "loss": 0.2218, "step": 15970 }, { "epoch": 52.363934426229505, "grad_norm": 5.716215133666992, "learning_rate": 9.72022928841855e-06, "loss": 0.2832, "step": 15971 }, { "epoch": 52.3672131147541, "grad_norm": 2.8314130306243896, "learning_rate": 9.719167818669629e-06, "loss": 0.1619, "step": 15972 }, { "epoch": 52.37049180327869, "grad_norm": 4.07832670211792, "learning_rate": 9.71810635208738e-06, "loss": 0.1911, "step": 15973 }, { "epoch": 52.37377049180328, "grad_norm": 3.3299829959869385, "learning_rate": 9.717044888683777e-06, "loss": 0.1981, "step": 15974 }, { "epoch": 52.377049180327866, "grad_norm": 3.7657220363616943, "learning_rate": 9.715983428470783e-06, "loss": 0.2714, "step": 15975 }, { "epoch": 52.38032786885246, "grad_norm": 4.002143859863281, "learning_rate": 9.714921971460374e-06, "loss": 0.2528, "step": 15976 }, { "epoch": 52.38360655737705, "grad_norm": 3.3443350791931152, "learning_rate": 9.713860517664517e-06, "loss": 0.255, "step": 15977 }, { "epoch": 52.38688524590164, "grad_norm": 3.3627445697784424, "learning_rate": 9.712799067095179e-06, "loss": 0.1926, "step": 15978 }, { "epoch": 52.390163934426226, "grad_norm": 3.51300311088562, "learning_rate": 9.711737619764326e-06, "loss": 0.2182, "step": 15979 }, { "epoch": 52.39344262295082, "grad_norm": 5.006710529327393, "learning_rate": 9.710676175683936e-06, "loss": 0.2165, "step": 15980 }, { "epoch": 52.39672131147541, "grad_norm": 3.365765333175659, "learning_rate": 9.709614734865972e-06, "loss": 0.1186, "step": 15981 }, { "epoch": 52.4, "grad_norm": 4.843128204345703, "learning_rate": 9.708553297322407e-06, "loss": 0.2203, "step": 15982 }, { "epoch": 52.40327868852459, "grad_norm": 5.457653999328613, "learning_rate": 9.707491863065199e-06, "loss": 0.2956, "step": 15983 }, { "epoch": 52.40655737704918, "grad_norm": 3.6120758056640625, "learning_rate": 9.706430432106329e-06, "loss": 0.1375, "step": 15984 }, { "epoch": 52.40983606557377, "grad_norm": 4.165579795837402, "learning_rate": 9.705369004457764e-06, "loss": 0.3311, "step": 15985 }, { "epoch": 52.41311475409836, "grad_norm": 3.5308573246002197, "learning_rate": 9.704307580131467e-06, "loss": 0.1597, "step": 15986 }, { "epoch": 52.41639344262295, "grad_norm": 3.256923198699951, "learning_rate": 9.703246159139408e-06, "loss": 0.1175, "step": 15987 }, { "epoch": 52.41967213114754, "grad_norm": 4.085236549377441, "learning_rate": 9.702184741493556e-06, "loss": 0.3153, "step": 15988 }, { "epoch": 52.42295081967213, "grad_norm": 3.45904278755188, "learning_rate": 9.701123327205884e-06, "loss": 0.2058, "step": 15989 }, { "epoch": 52.42622950819672, "grad_norm": 3.6821999549865723, "learning_rate": 9.700061916288355e-06, "loss": 0.2809, "step": 15990 }, { "epoch": 52.429508196721315, "grad_norm": 3.3372769355773926, "learning_rate": 9.699000508752943e-06, "loss": 0.2548, "step": 15991 }, { "epoch": 52.4327868852459, "grad_norm": 3.8317060470581055, "learning_rate": 9.69793910461161e-06, "loss": 0.138, "step": 15992 }, { "epoch": 52.43606557377049, "grad_norm": 4.998716831207275, "learning_rate": 9.696877703876328e-06, "loss": 0.3142, "step": 15993 }, { "epoch": 52.43934426229508, "grad_norm": 4.500116348266602, "learning_rate": 9.695816306559066e-06, "loss": 0.2504, "step": 15994 }, { "epoch": 52.442622950819676, "grad_norm": 5.034282207489014, "learning_rate": 9.694754912671792e-06, "loss": 0.2502, "step": 15995 }, { "epoch": 52.445901639344264, "grad_norm": 3.495863199234009, "learning_rate": 9.693693522226472e-06, "loss": 0.3178, "step": 15996 }, { "epoch": 52.44918032786885, "grad_norm": 3.627800941467285, "learning_rate": 9.692632135235077e-06, "loss": 0.28, "step": 15997 }, { "epoch": 52.45245901639344, "grad_norm": 5.451280117034912, "learning_rate": 9.691570751709576e-06, "loss": 0.2654, "step": 15998 }, { "epoch": 52.455737704918036, "grad_norm": 3.732374668121338, "learning_rate": 9.690509371661934e-06, "loss": 0.2297, "step": 15999 }, { "epoch": 52.459016393442624, "grad_norm": 3.478928804397583, "learning_rate": 9.689447995104121e-06, "loss": 0.3284, "step": 16000 }, { "epoch": 52.46229508196721, "grad_norm": 3.888768196105957, "learning_rate": 9.6883866220481e-06, "loss": 0.2433, "step": 16001 }, { "epoch": 52.4655737704918, "grad_norm": 3.169766902923584, "learning_rate": 9.687325252505849e-06, "loss": 0.3056, "step": 16002 }, { "epoch": 52.4688524590164, "grad_norm": 3.4248623847961426, "learning_rate": 9.68626388648933e-06, "loss": 0.1966, "step": 16003 }, { "epoch": 52.472131147540985, "grad_norm": 4.643673896789551, "learning_rate": 9.685202524010515e-06, "loss": 0.3476, "step": 16004 }, { "epoch": 52.47540983606557, "grad_norm": 3.3071465492248535, "learning_rate": 9.684141165081361e-06, "loss": 0.26, "step": 16005 }, { "epoch": 52.47868852459016, "grad_norm": 3.2699007987976074, "learning_rate": 9.68307980971385e-06, "loss": 0.1926, "step": 16006 }, { "epoch": 52.48196721311476, "grad_norm": 3.208961009979248, "learning_rate": 9.682018457919942e-06, "loss": 0.1077, "step": 16007 }, { "epoch": 52.485245901639345, "grad_norm": 3.272925615310669, "learning_rate": 9.680957109711609e-06, "loss": 0.3919, "step": 16008 }, { "epoch": 52.488524590163934, "grad_norm": 4.279740810394287, "learning_rate": 9.679895765100809e-06, "loss": 0.2459, "step": 16009 }, { "epoch": 52.49180327868852, "grad_norm": 5.484840393066406, "learning_rate": 9.678834424099523e-06, "loss": 0.3727, "step": 16010 }, { "epoch": 52.49508196721312, "grad_norm": 3.2940168380737305, "learning_rate": 9.677773086719714e-06, "loss": 0.1712, "step": 16011 }, { "epoch": 52.498360655737706, "grad_norm": 3.2059783935546875, "learning_rate": 9.676711752973347e-06, "loss": 0.1905, "step": 16012 }, { "epoch": 52.501639344262294, "grad_norm": 3.1545753479003906, "learning_rate": 9.67565042287239e-06, "loss": 0.1758, "step": 16013 }, { "epoch": 52.50491803278688, "grad_norm": 3.75474214553833, "learning_rate": 9.67458909642881e-06, "loss": 0.392, "step": 16014 }, { "epoch": 52.50819672131148, "grad_norm": 3.974418878555298, "learning_rate": 9.673527773654578e-06, "loss": 0.1444, "step": 16015 }, { "epoch": 52.511475409836066, "grad_norm": 4.466473579406738, "learning_rate": 9.672466454561662e-06, "loss": 0.2962, "step": 16016 }, { "epoch": 52.514754098360655, "grad_norm": 4.693678379058838, "learning_rate": 9.671405139162025e-06, "loss": 0.3219, "step": 16017 }, { "epoch": 52.51803278688524, "grad_norm": 3.4134554862976074, "learning_rate": 9.670343827467635e-06, "loss": 0.2384, "step": 16018 }, { "epoch": 52.52131147540984, "grad_norm": 7.050143241882324, "learning_rate": 9.669282519490465e-06, "loss": 0.4288, "step": 16019 }, { "epoch": 52.52459016393443, "grad_norm": 3.532329797744751, "learning_rate": 9.668221215242475e-06, "loss": 0.2096, "step": 16020 }, { "epoch": 52.527868852459015, "grad_norm": 2.558384895324707, "learning_rate": 9.66715991473564e-06, "loss": 0.2711, "step": 16021 }, { "epoch": 52.5311475409836, "grad_norm": 3.348125457763672, "learning_rate": 9.666098617981918e-06, "loss": 0.2494, "step": 16022 }, { "epoch": 52.5344262295082, "grad_norm": 4.629035472869873, "learning_rate": 9.665037324993282e-06, "loss": 0.293, "step": 16023 }, { "epoch": 52.53770491803279, "grad_norm": 4.388081073760986, "learning_rate": 9.663976035781701e-06, "loss": 0.2454, "step": 16024 }, { "epoch": 52.540983606557376, "grad_norm": 3.6413614749908447, "learning_rate": 9.662914750359141e-06, "loss": 0.3903, "step": 16025 }, { "epoch": 52.544262295081964, "grad_norm": 3.8499741554260254, "learning_rate": 9.661853468737565e-06, "loss": 0.4254, "step": 16026 }, { "epoch": 52.54754098360656, "grad_norm": 3.3391494750976562, "learning_rate": 9.66079219092894e-06, "loss": 0.2963, "step": 16027 }, { "epoch": 52.55081967213115, "grad_norm": 4.056152820587158, "learning_rate": 9.65973091694524e-06, "loss": 0.3264, "step": 16028 }, { "epoch": 52.554098360655736, "grad_norm": 3.897216796875, "learning_rate": 9.658669646798427e-06, "loss": 0.1991, "step": 16029 }, { "epoch": 52.557377049180324, "grad_norm": 3.8001370429992676, "learning_rate": 9.65760838050047e-06, "loss": 0.2299, "step": 16030 }, { "epoch": 52.56065573770492, "grad_norm": 3.5615921020507812, "learning_rate": 9.656547118063328e-06, "loss": 0.3183, "step": 16031 }, { "epoch": 52.56393442622951, "grad_norm": 3.464437484741211, "learning_rate": 9.65548585949898e-06, "loss": 0.2515, "step": 16032 }, { "epoch": 52.5672131147541, "grad_norm": 5.358741283416748, "learning_rate": 9.654424604819388e-06, "loss": 0.16, "step": 16033 }, { "epoch": 52.570491803278685, "grad_norm": 4.309765815734863, "learning_rate": 9.653363354036516e-06, "loss": 0.2228, "step": 16034 }, { "epoch": 52.57377049180328, "grad_norm": 3.4243576526641846, "learning_rate": 9.65230210716233e-06, "loss": 0.2605, "step": 16035 }, { "epoch": 52.57704918032787, "grad_norm": 3.0473361015319824, "learning_rate": 9.651240864208803e-06, "loss": 0.179, "step": 16036 }, { "epoch": 52.58032786885246, "grad_norm": 8.432808876037598, "learning_rate": 9.650179625187897e-06, "loss": 0.425, "step": 16037 }, { "epoch": 52.58360655737705, "grad_norm": 3.9363327026367188, "learning_rate": 9.649118390111581e-06, "loss": 0.3446, "step": 16038 }, { "epoch": 52.58688524590164, "grad_norm": 4.117015361785889, "learning_rate": 9.648057158991819e-06, "loss": 0.2536, "step": 16039 }, { "epoch": 52.59016393442623, "grad_norm": 3.253476619720459, "learning_rate": 9.646995931840575e-06, "loss": 0.2117, "step": 16040 }, { "epoch": 52.59344262295082, "grad_norm": 2.827648878097534, "learning_rate": 9.645934708669822e-06, "loss": 0.2103, "step": 16041 }, { "epoch": 52.59672131147541, "grad_norm": 4.683154106140137, "learning_rate": 9.644873489491524e-06, "loss": 0.1873, "step": 16042 }, { "epoch": 52.6, "grad_norm": 4.954174041748047, "learning_rate": 9.643812274317644e-06, "loss": 0.2696, "step": 16043 }, { "epoch": 52.60327868852459, "grad_norm": 3.5245308876037598, "learning_rate": 9.642751063160151e-06, "loss": 0.2684, "step": 16044 }, { "epoch": 52.60655737704918, "grad_norm": 4.594735145568848, "learning_rate": 9.641689856031015e-06, "loss": 0.2684, "step": 16045 }, { "epoch": 52.609836065573774, "grad_norm": 2.677494525909424, "learning_rate": 9.640628652942195e-06, "loss": 0.1575, "step": 16046 }, { "epoch": 52.61311475409836, "grad_norm": 3.0774996280670166, "learning_rate": 9.639567453905662e-06, "loss": 0.0909, "step": 16047 }, { "epoch": 52.61639344262295, "grad_norm": 3.9736435413360596, "learning_rate": 9.638506258933378e-06, "loss": 0.3856, "step": 16048 }, { "epoch": 52.61967213114754, "grad_norm": 7.339333534240723, "learning_rate": 9.637445068037315e-06, "loss": 0.4187, "step": 16049 }, { "epoch": 52.622950819672134, "grad_norm": 3.7937769889831543, "learning_rate": 9.636383881229436e-06, "loss": 0.1384, "step": 16050 }, { "epoch": 52.62622950819672, "grad_norm": 3.9900882244110107, "learning_rate": 9.635322698521706e-06, "loss": 0.2114, "step": 16051 }, { "epoch": 52.62950819672131, "grad_norm": 3.912696123123169, "learning_rate": 9.634261519926093e-06, "loss": 0.376, "step": 16052 }, { "epoch": 52.6327868852459, "grad_norm": 4.627433776855469, "learning_rate": 9.633200345454557e-06, "loss": 0.1643, "step": 16053 }, { "epoch": 52.636065573770495, "grad_norm": 5.849637508392334, "learning_rate": 9.632139175119072e-06, "loss": 0.3281, "step": 16054 }, { "epoch": 52.63934426229508, "grad_norm": 3.211549758911133, "learning_rate": 9.6310780089316e-06, "loss": 0.1445, "step": 16055 }, { "epoch": 52.64262295081967, "grad_norm": 3.5262393951416016, "learning_rate": 9.630016846904108e-06, "loss": 0.1211, "step": 16056 }, { "epoch": 52.64590163934426, "grad_norm": 3.5736539363861084, "learning_rate": 9.628955689048557e-06, "loss": 0.284, "step": 16057 }, { "epoch": 52.649180327868855, "grad_norm": 3.670912981033325, "learning_rate": 9.62789453537692e-06, "loss": 0.3152, "step": 16058 }, { "epoch": 52.65245901639344, "grad_norm": 3.7155723571777344, "learning_rate": 9.62683338590116e-06, "loss": 0.2692, "step": 16059 }, { "epoch": 52.65573770491803, "grad_norm": 3.5285284519195557, "learning_rate": 9.62577224063324e-06, "loss": 0.22, "step": 16060 }, { "epoch": 52.65901639344262, "grad_norm": 3.2584187984466553, "learning_rate": 9.624711099585123e-06, "loss": 0.3461, "step": 16061 }, { "epoch": 52.662295081967216, "grad_norm": 2.843433380126953, "learning_rate": 9.623649962768784e-06, "loss": 0.2628, "step": 16062 }, { "epoch": 52.665573770491804, "grad_norm": 3.223116159439087, "learning_rate": 9.622588830196182e-06, "loss": 0.2536, "step": 16063 }, { "epoch": 52.66885245901639, "grad_norm": 3.965259075164795, "learning_rate": 9.621527701879284e-06, "loss": 0.2546, "step": 16064 }, { "epoch": 52.67213114754098, "grad_norm": 4.076274871826172, "learning_rate": 9.620466577830055e-06, "loss": 0.1471, "step": 16065 }, { "epoch": 52.675409836065576, "grad_norm": 3.528351068496704, "learning_rate": 9.619405458060454e-06, "loss": 0.1145, "step": 16066 }, { "epoch": 52.678688524590164, "grad_norm": 3.3024189472198486, "learning_rate": 9.618344342582458e-06, "loss": 0.1586, "step": 16067 }, { "epoch": 52.68196721311475, "grad_norm": 3.4866251945495605, "learning_rate": 9.617283231408026e-06, "loss": 0.1392, "step": 16068 }, { "epoch": 52.68524590163934, "grad_norm": 6.0750732421875, "learning_rate": 9.61622212454912e-06, "loss": 0.3323, "step": 16069 }, { "epoch": 52.68852459016394, "grad_norm": 9.410841941833496, "learning_rate": 9.615161022017709e-06, "loss": 0.2882, "step": 16070 }, { "epoch": 52.691803278688525, "grad_norm": 2.635244131088257, "learning_rate": 9.614099923825761e-06, "loss": 0.076, "step": 16071 }, { "epoch": 52.69508196721311, "grad_norm": 4.013600826263428, "learning_rate": 9.613038829985235e-06, "loss": 0.3171, "step": 16072 }, { "epoch": 52.6983606557377, "grad_norm": 5.330180644989014, "learning_rate": 9.611977740508101e-06, "loss": 0.5005, "step": 16073 }, { "epoch": 52.7016393442623, "grad_norm": 3.7875659465789795, "learning_rate": 9.610916655406319e-06, "loss": 0.2202, "step": 16074 }, { "epoch": 52.704918032786885, "grad_norm": 4.988965034484863, "learning_rate": 9.609855574691856e-06, "loss": 0.2602, "step": 16075 }, { "epoch": 52.708196721311474, "grad_norm": 3.3009464740753174, "learning_rate": 9.608794498376678e-06, "loss": 0.1843, "step": 16076 }, { "epoch": 52.71147540983607, "grad_norm": 3.1653380393981934, "learning_rate": 9.607733426472752e-06, "loss": 0.1906, "step": 16077 }, { "epoch": 52.71475409836066, "grad_norm": 3.629894733428955, "learning_rate": 9.606672358992037e-06, "loss": 0.2872, "step": 16078 }, { "epoch": 52.718032786885246, "grad_norm": 4.2837677001953125, "learning_rate": 9.605611295946497e-06, "loss": 0.2889, "step": 16079 }, { "epoch": 52.721311475409834, "grad_norm": 3.931702136993408, "learning_rate": 9.604550237348103e-06, "loss": 0.2302, "step": 16080 }, { "epoch": 52.72459016393443, "grad_norm": 3.9181060791015625, "learning_rate": 9.603489183208816e-06, "loss": 0.3146, "step": 16081 }, { "epoch": 52.72786885245902, "grad_norm": 4.344219207763672, "learning_rate": 9.602428133540602e-06, "loss": 0.2391, "step": 16082 }, { "epoch": 52.731147540983606, "grad_norm": 3.3224234580993652, "learning_rate": 9.601367088355419e-06, "loss": 0.2153, "step": 16083 }, { "epoch": 52.734426229508195, "grad_norm": 3.958986759185791, "learning_rate": 9.600306047665241e-06, "loss": 0.3791, "step": 16084 }, { "epoch": 52.73770491803279, "grad_norm": 3.9283885955810547, "learning_rate": 9.599245011482027e-06, "loss": 0.2257, "step": 16085 }, { "epoch": 52.74098360655738, "grad_norm": 3.6004655361175537, "learning_rate": 9.598183979817743e-06, "loss": 0.1492, "step": 16086 }, { "epoch": 52.74426229508197, "grad_norm": 3.184109926223755, "learning_rate": 9.59712295268435e-06, "loss": 0.3661, "step": 16087 }, { "epoch": 52.747540983606555, "grad_norm": 3.96317195892334, "learning_rate": 9.596061930093816e-06, "loss": 0.1836, "step": 16088 }, { "epoch": 52.75081967213115, "grad_norm": 4.080379962921143, "learning_rate": 9.595000912058105e-06, "loss": 0.4874, "step": 16089 }, { "epoch": 52.75409836065574, "grad_norm": 4.0552144050598145, "learning_rate": 9.59393989858918e-06, "loss": 0.2456, "step": 16090 }, { "epoch": 52.75737704918033, "grad_norm": 3.369290351867676, "learning_rate": 9.592878889699004e-06, "loss": 0.1947, "step": 16091 }, { "epoch": 52.760655737704916, "grad_norm": 4.556679725646973, "learning_rate": 9.591817885399538e-06, "loss": 0.3259, "step": 16092 }, { "epoch": 52.76393442622951, "grad_norm": 4.338824272155762, "learning_rate": 9.590756885702755e-06, "loss": 0.3433, "step": 16093 }, { "epoch": 52.7672131147541, "grad_norm": 3.8103597164154053, "learning_rate": 9.589695890620611e-06, "loss": 0.152, "step": 16094 }, { "epoch": 52.77049180327869, "grad_norm": 3.106926918029785, "learning_rate": 9.588634900165074e-06, "loss": 0.3651, "step": 16095 }, { "epoch": 52.773770491803276, "grad_norm": 3.9082441329956055, "learning_rate": 9.587573914348103e-06, "loss": 0.2603, "step": 16096 }, { "epoch": 52.77704918032787, "grad_norm": 2.825505018234253, "learning_rate": 9.586512933181668e-06, "loss": 0.1367, "step": 16097 }, { "epoch": 52.78032786885246, "grad_norm": 3.836930751800537, "learning_rate": 9.585451956677731e-06, "loss": 0.4243, "step": 16098 }, { "epoch": 52.78360655737705, "grad_norm": 7.843451976776123, "learning_rate": 9.58439098484825e-06, "loss": 0.3231, "step": 16099 }, { "epoch": 52.78688524590164, "grad_norm": 3.4215173721313477, "learning_rate": 9.583330017705193e-06, "loss": 0.2712, "step": 16100 }, { "epoch": 52.79016393442623, "grad_norm": 3.3386480808258057, "learning_rate": 9.582269055260528e-06, "loss": 0.3138, "step": 16101 }, { "epoch": 52.79344262295082, "grad_norm": 6.754486083984375, "learning_rate": 9.581208097526209e-06, "loss": 0.271, "step": 16102 }, { "epoch": 52.79672131147541, "grad_norm": 3.1528351306915283, "learning_rate": 9.580147144514207e-06, "loss": 0.128, "step": 16103 }, { "epoch": 52.8, "grad_norm": 2.939861297607422, "learning_rate": 9.579086196236483e-06, "loss": 0.1168, "step": 16104 }, { "epoch": 52.80327868852459, "grad_norm": 4.863544940948486, "learning_rate": 9.578025252704994e-06, "loss": 0.3217, "step": 16105 }, { "epoch": 52.80655737704918, "grad_norm": 4.276000022888184, "learning_rate": 9.576964313931715e-06, "loss": 0.145, "step": 16106 }, { "epoch": 52.80983606557377, "grad_norm": 4.003058910369873, "learning_rate": 9.575903379928601e-06, "loss": 0.2927, "step": 16107 }, { "epoch": 52.81311475409836, "grad_norm": 3.658997058868408, "learning_rate": 9.57484245070762e-06, "loss": 0.2924, "step": 16108 }, { "epoch": 52.81639344262295, "grad_norm": 3.2624495029449463, "learning_rate": 9.573781526280726e-06, "loss": 0.3469, "step": 16109 }, { "epoch": 52.81967213114754, "grad_norm": 3.346064567565918, "learning_rate": 9.572720606659895e-06, "loss": 0.2321, "step": 16110 }, { "epoch": 52.82295081967213, "grad_norm": 3.812675952911377, "learning_rate": 9.571659691857082e-06, "loss": 0.3056, "step": 16111 }, { "epoch": 52.82622950819672, "grad_norm": 4.886624336242676, "learning_rate": 9.570598781884252e-06, "loss": 0.1922, "step": 16112 }, { "epoch": 52.829508196721314, "grad_norm": 3.4764785766601562, "learning_rate": 9.569537876753361e-06, "loss": 0.3166, "step": 16113 }, { "epoch": 52.8327868852459, "grad_norm": 3.8351447582244873, "learning_rate": 9.568476976476384e-06, "loss": 0.1961, "step": 16114 }, { "epoch": 52.83606557377049, "grad_norm": 4.276727676391602, "learning_rate": 9.567416081065278e-06, "loss": 0.2333, "step": 16115 }, { "epoch": 52.83934426229508, "grad_norm": 3.629021644592285, "learning_rate": 9.566355190532003e-06, "loss": 0.3041, "step": 16116 }, { "epoch": 52.842622950819674, "grad_norm": 3.272533416748047, "learning_rate": 9.565294304888527e-06, "loss": 0.0833, "step": 16117 }, { "epoch": 52.84590163934426, "grad_norm": 3.5183370113372803, "learning_rate": 9.564233424146804e-06, "loss": 0.2623, "step": 16118 }, { "epoch": 52.84918032786885, "grad_norm": 3.4432260990142822, "learning_rate": 9.563172548318808e-06, "loss": 0.3708, "step": 16119 }, { "epoch": 52.85245901639344, "grad_norm": 3.1728315353393555, "learning_rate": 9.562111677416495e-06, "loss": 0.1763, "step": 16120 }, { "epoch": 52.855737704918035, "grad_norm": 3.209085464477539, "learning_rate": 9.561050811451828e-06, "loss": 0.2631, "step": 16121 }, { "epoch": 52.85901639344262, "grad_norm": 2.659846782684326, "learning_rate": 9.559989950436764e-06, "loss": 0.0862, "step": 16122 }, { "epoch": 52.86229508196721, "grad_norm": 3.21225643157959, "learning_rate": 9.558929094383276e-06, "loss": 0.2536, "step": 16123 }, { "epoch": 52.86557377049181, "grad_norm": 3.3989181518554688, "learning_rate": 9.55786824330332e-06, "loss": 0.258, "step": 16124 }, { "epoch": 52.868852459016395, "grad_norm": 3.674537420272827, "learning_rate": 9.556807397208859e-06, "loss": 0.4641, "step": 16125 }, { "epoch": 52.87213114754098, "grad_norm": 4.363002777099609, "learning_rate": 9.555746556111855e-06, "loss": 0.2958, "step": 16126 }, { "epoch": 52.87540983606557, "grad_norm": 5.529879093170166, "learning_rate": 9.554685720024273e-06, "loss": 0.2361, "step": 16127 }, { "epoch": 52.87868852459017, "grad_norm": 3.186518907546997, "learning_rate": 9.553624888958068e-06, "loss": 0.2225, "step": 16128 }, { "epoch": 52.881967213114756, "grad_norm": 3.3053479194641113, "learning_rate": 9.55256406292521e-06, "loss": 0.1521, "step": 16129 }, { "epoch": 52.885245901639344, "grad_norm": 20.289451599121094, "learning_rate": 9.551503241937658e-06, "loss": 0.1898, "step": 16130 }, { "epoch": 52.88852459016393, "grad_norm": 3.515428066253662, "learning_rate": 9.55044242600737e-06, "loss": 0.2591, "step": 16131 }, { "epoch": 52.89180327868853, "grad_norm": 3.6649413108825684, "learning_rate": 9.549381615146314e-06, "loss": 0.2348, "step": 16132 }, { "epoch": 52.895081967213116, "grad_norm": 3.185183048248291, "learning_rate": 9.548320809366449e-06, "loss": 0.1857, "step": 16133 }, { "epoch": 52.898360655737704, "grad_norm": 3.59698224067688, "learning_rate": 9.547260008679734e-06, "loss": 0.2508, "step": 16134 }, { "epoch": 52.90163934426229, "grad_norm": 4.841800212860107, "learning_rate": 9.546199213098134e-06, "loss": 0.2536, "step": 16135 }, { "epoch": 52.90491803278689, "grad_norm": 4.44107723236084, "learning_rate": 9.54513842263361e-06, "loss": 0.2695, "step": 16136 }, { "epoch": 52.90819672131148, "grad_norm": 3.719782590866089, "learning_rate": 9.544077637298124e-06, "loss": 0.4989, "step": 16137 }, { "epoch": 52.911475409836065, "grad_norm": 5.350657939910889, "learning_rate": 9.543016857103637e-06, "loss": 0.372, "step": 16138 }, { "epoch": 52.91475409836065, "grad_norm": 3.9698166847229004, "learning_rate": 9.541956082062111e-06, "loss": 0.3623, "step": 16139 }, { "epoch": 52.91803278688525, "grad_norm": 4.1265549659729, "learning_rate": 9.5408953121855e-06, "loss": 0.2071, "step": 16140 }, { "epoch": 52.92131147540984, "grad_norm": 10.45885181427002, "learning_rate": 9.539834547485777e-06, "loss": 0.2383, "step": 16141 }, { "epoch": 52.924590163934425, "grad_norm": 4.208532810211182, "learning_rate": 9.538773787974898e-06, "loss": 0.2969, "step": 16142 }, { "epoch": 52.927868852459014, "grad_norm": 4.946936130523682, "learning_rate": 9.537713033664825e-06, "loss": 0.4332, "step": 16143 }, { "epoch": 52.93114754098361, "grad_norm": 4.741243362426758, "learning_rate": 9.536652284567514e-06, "loss": 0.301, "step": 16144 }, { "epoch": 52.9344262295082, "grad_norm": 4.486670017242432, "learning_rate": 9.535591540694933e-06, "loss": 0.1801, "step": 16145 }, { "epoch": 52.937704918032786, "grad_norm": 5.092488765716553, "learning_rate": 9.53453080205904e-06, "loss": 0.2689, "step": 16146 }, { "epoch": 52.940983606557374, "grad_norm": 3.1836211681365967, "learning_rate": 9.533470068671798e-06, "loss": 0.4488, "step": 16147 }, { "epoch": 52.94426229508197, "grad_norm": 3.434481620788574, "learning_rate": 9.532409340545161e-06, "loss": 0.209, "step": 16148 }, { "epoch": 52.94754098360656, "grad_norm": 3.6296939849853516, "learning_rate": 9.5313486176911e-06, "loss": 0.3886, "step": 16149 }, { "epoch": 52.950819672131146, "grad_norm": 7.980249404907227, "learning_rate": 9.53028790012157e-06, "loss": 0.3573, "step": 16150 }, { "epoch": 52.954098360655735, "grad_norm": 4.920903205871582, "learning_rate": 9.529227187848529e-06, "loss": 0.2374, "step": 16151 }, { "epoch": 52.95737704918033, "grad_norm": 3.592914342880249, "learning_rate": 9.528166480883943e-06, "loss": 0.2457, "step": 16152 }, { "epoch": 52.96065573770492, "grad_norm": 5.509382724761963, "learning_rate": 9.52710577923977e-06, "loss": 0.2526, "step": 16153 }, { "epoch": 52.96393442622951, "grad_norm": 3.5753962993621826, "learning_rate": 9.526045082927971e-06, "loss": 0.2153, "step": 16154 }, { "epoch": 52.967213114754095, "grad_norm": 12.196894645690918, "learning_rate": 9.524984391960508e-06, "loss": 0.444, "step": 16155 }, { "epoch": 52.97049180327869, "grad_norm": 4.557772159576416, "learning_rate": 9.52392370634934e-06, "loss": 0.3922, "step": 16156 }, { "epoch": 52.97377049180328, "grad_norm": 5.930198669433594, "learning_rate": 9.522863026106421e-06, "loss": 0.3539, "step": 16157 }, { "epoch": 52.97704918032787, "grad_norm": 3.945798873901367, "learning_rate": 9.521802351243724e-06, "loss": 0.397, "step": 16158 }, { "epoch": 52.980327868852456, "grad_norm": 4.633065700531006, "learning_rate": 9.520741681773203e-06, "loss": 0.2618, "step": 16159 }, { "epoch": 52.98360655737705, "grad_norm": 3.3477888107299805, "learning_rate": 9.519681017706817e-06, "loss": 0.2779, "step": 16160 }, { "epoch": 52.98688524590164, "grad_norm": 3.7798898220062256, "learning_rate": 9.518620359056521e-06, "loss": 0.182, "step": 16161 }, { "epoch": 52.99016393442623, "grad_norm": 4.426928520202637, "learning_rate": 9.517559705834288e-06, "loss": 0.207, "step": 16162 }, { "epoch": 52.993442622950816, "grad_norm": 3.6219911575317383, "learning_rate": 9.51649905805207e-06, "loss": 0.1588, "step": 16163 }, { "epoch": 52.99672131147541, "grad_norm": 3.9528024196624756, "learning_rate": 9.515438415721828e-06, "loss": 0.3285, "step": 16164 }, { "epoch": 53.0, "grad_norm": 3.6527137756347656, "learning_rate": 9.514377778855521e-06, "loss": 0.3443, "step": 16165 }, { "epoch": 53.00327868852459, "grad_norm": 3.2406458854675293, "learning_rate": 9.513317147465105e-06, "loss": 0.2097, "step": 16166 }, { "epoch": 53.006557377049184, "grad_norm": 3.1748600006103516, "learning_rate": 9.51225652156255e-06, "loss": 0.1649, "step": 16167 }, { "epoch": 53.00983606557377, "grad_norm": 3.7317147254943848, "learning_rate": 9.511195901159809e-06, "loss": 0.3217, "step": 16168 }, { "epoch": 53.01311475409836, "grad_norm": 3.4354991912841797, "learning_rate": 9.510135286268842e-06, "loss": 0.2369, "step": 16169 }, { "epoch": 53.01639344262295, "grad_norm": 3.118443489074707, "learning_rate": 9.509074676901605e-06, "loss": 0.1313, "step": 16170 }, { "epoch": 53.019672131147544, "grad_norm": 3.3872363567352295, "learning_rate": 9.508014073070066e-06, "loss": 0.0973, "step": 16171 }, { "epoch": 53.02295081967213, "grad_norm": 4.005355358123779, "learning_rate": 9.506953474786179e-06, "loss": 0.2965, "step": 16172 }, { "epoch": 53.02622950819672, "grad_norm": 3.907156467437744, "learning_rate": 9.505892882061905e-06, "loss": 0.1686, "step": 16173 }, { "epoch": 53.02950819672131, "grad_norm": 4.356093883514404, "learning_rate": 9.504832294909198e-06, "loss": 0.4195, "step": 16174 }, { "epoch": 53.032786885245905, "grad_norm": 5.839663028717041, "learning_rate": 9.503771713340026e-06, "loss": 0.2822, "step": 16175 }, { "epoch": 53.03606557377049, "grad_norm": 5.971975803375244, "learning_rate": 9.502711137366343e-06, "loss": 0.2171, "step": 16176 }, { "epoch": 53.03934426229508, "grad_norm": 3.6284937858581543, "learning_rate": 9.501650567000108e-06, "loss": 0.2, "step": 16177 }, { "epoch": 53.04262295081967, "grad_norm": 4.1691484451293945, "learning_rate": 9.500590002253283e-06, "loss": 0.2048, "step": 16178 }, { "epoch": 53.045901639344265, "grad_norm": 5.4437642097473145, "learning_rate": 9.499529443137823e-06, "loss": 0.1555, "step": 16179 }, { "epoch": 53.049180327868854, "grad_norm": 4.235743522644043, "learning_rate": 9.49846888966569e-06, "loss": 0.4521, "step": 16180 }, { "epoch": 53.05245901639344, "grad_norm": 4.077156066894531, "learning_rate": 9.497408341848842e-06, "loss": 0.3216, "step": 16181 }, { "epoch": 53.05573770491803, "grad_norm": 5.181499004364014, "learning_rate": 9.496347799699238e-06, "loss": 0.2975, "step": 16182 }, { "epoch": 53.059016393442626, "grad_norm": 3.029238224029541, "learning_rate": 9.495287263228834e-06, "loss": 0.2918, "step": 16183 }, { "epoch": 53.062295081967214, "grad_norm": 3.6376137733459473, "learning_rate": 9.49422673244959e-06, "loss": 0.164, "step": 16184 }, { "epoch": 53.0655737704918, "grad_norm": 3.2476420402526855, "learning_rate": 9.493166207373469e-06, "loss": 0.3393, "step": 16185 }, { "epoch": 53.06885245901639, "grad_norm": 3.9559338092803955, "learning_rate": 9.492105688012426e-06, "loss": 0.1709, "step": 16186 }, { "epoch": 53.072131147540986, "grad_norm": 3.4519381523132324, "learning_rate": 9.491045174378415e-06, "loss": 0.153, "step": 16187 }, { "epoch": 53.075409836065575, "grad_norm": 3.5621843338012695, "learning_rate": 9.489984666483402e-06, "loss": 0.1795, "step": 16188 }, { "epoch": 53.07868852459016, "grad_norm": 3.3058767318725586, "learning_rate": 9.488924164339342e-06, "loss": 0.0972, "step": 16189 }, { "epoch": 53.08196721311475, "grad_norm": 4.6478962898254395, "learning_rate": 9.487863667958197e-06, "loss": 0.2623, "step": 16190 }, { "epoch": 53.08524590163935, "grad_norm": 3.944702625274658, "learning_rate": 9.486803177351918e-06, "loss": 0.2292, "step": 16191 }, { "epoch": 53.088524590163935, "grad_norm": 4.405656337738037, "learning_rate": 9.485742692532462e-06, "loss": 0.1445, "step": 16192 }, { "epoch": 53.09180327868852, "grad_norm": 3.8370089530944824, "learning_rate": 9.484682213511798e-06, "loss": 0.2111, "step": 16193 }, { "epoch": 53.09508196721311, "grad_norm": 8.308416366577148, "learning_rate": 9.483621740301879e-06, "loss": 0.3131, "step": 16194 }, { "epoch": 53.09836065573771, "grad_norm": 2.6805360317230225, "learning_rate": 9.48256127291466e-06, "loss": 0.0873, "step": 16195 }, { "epoch": 53.101639344262296, "grad_norm": 3.39774489402771, "learning_rate": 9.481500811362097e-06, "loss": 0.2222, "step": 16196 }, { "epoch": 53.104918032786884, "grad_norm": 2.7501473426818848, "learning_rate": 9.480440355656154e-06, "loss": 0.1747, "step": 16197 }, { "epoch": 53.10819672131147, "grad_norm": 3.73091197013855, "learning_rate": 9.479379905808787e-06, "loss": 0.264, "step": 16198 }, { "epoch": 53.11147540983607, "grad_norm": 4.131209373474121, "learning_rate": 9.478319461831955e-06, "loss": 0.4196, "step": 16199 }, { "epoch": 53.114754098360656, "grad_norm": 3.373115301132202, "learning_rate": 9.477259023737606e-06, "loss": 0.3353, "step": 16200 }, { "epoch": 53.118032786885244, "grad_norm": 3.2677645683288574, "learning_rate": 9.476198591537712e-06, "loss": 0.1817, "step": 16201 }, { "epoch": 53.12131147540983, "grad_norm": 5.177981376647949, "learning_rate": 9.475138165244223e-06, "loss": 0.1551, "step": 16202 }, { "epoch": 53.12459016393443, "grad_norm": 5.882718086242676, "learning_rate": 9.474077744869095e-06, "loss": 0.2116, "step": 16203 }, { "epoch": 53.12786885245902, "grad_norm": 2.654219150543213, "learning_rate": 9.473017330424287e-06, "loss": 0.0982, "step": 16204 }, { "epoch": 53.131147540983605, "grad_norm": 3.879037857055664, "learning_rate": 9.471956921921757e-06, "loss": 0.2333, "step": 16205 }, { "epoch": 53.13442622950819, "grad_norm": 2.9914042949676514, "learning_rate": 9.470896519373463e-06, "loss": 0.2074, "step": 16206 }, { "epoch": 53.13770491803279, "grad_norm": 3.5196940898895264, "learning_rate": 9.469836122791358e-06, "loss": 0.2058, "step": 16207 }, { "epoch": 53.14098360655738, "grad_norm": 3.7504003047943115, "learning_rate": 9.468775732187406e-06, "loss": 0.2484, "step": 16208 }, { "epoch": 53.144262295081965, "grad_norm": 4.0567708015441895, "learning_rate": 9.467715347573555e-06, "loss": 0.458, "step": 16209 }, { "epoch": 53.14754098360656, "grad_norm": 3.4771783351898193, "learning_rate": 9.466654968961767e-06, "loss": 0.2862, "step": 16210 }, { "epoch": 53.15081967213115, "grad_norm": 3.889979124069214, "learning_rate": 9.465594596364004e-06, "loss": 0.2486, "step": 16211 }, { "epoch": 53.15409836065574, "grad_norm": 4.837762832641602, "learning_rate": 9.464534229792216e-06, "loss": 0.3483, "step": 16212 }, { "epoch": 53.157377049180326, "grad_norm": 3.5935637950897217, "learning_rate": 9.463473869258356e-06, "loss": 0.3294, "step": 16213 }, { "epoch": 53.16065573770492, "grad_norm": 4.455319881439209, "learning_rate": 9.46241351477439e-06, "loss": 0.3291, "step": 16214 }, { "epoch": 53.16393442622951, "grad_norm": 3.789787530899048, "learning_rate": 9.461353166352274e-06, "loss": 0.186, "step": 16215 }, { "epoch": 53.1672131147541, "grad_norm": 6.040771007537842, "learning_rate": 9.460292824003957e-06, "loss": 0.2472, "step": 16216 }, { "epoch": 53.170491803278686, "grad_norm": 3.3065435886383057, "learning_rate": 9.459232487741403e-06, "loss": 0.2849, "step": 16217 }, { "epoch": 53.17377049180328, "grad_norm": 3.930438995361328, "learning_rate": 9.458172157576558e-06, "loss": 0.2293, "step": 16218 }, { "epoch": 53.17704918032787, "grad_norm": 3.27518630027771, "learning_rate": 9.457111833521392e-06, "loss": 0.1802, "step": 16219 }, { "epoch": 53.18032786885246, "grad_norm": 4.33458948135376, "learning_rate": 9.456051515587852e-06, "loss": 0.3699, "step": 16220 }, { "epoch": 53.18360655737705, "grad_norm": 3.2731213569641113, "learning_rate": 9.4549912037879e-06, "loss": 0.198, "step": 16221 }, { "epoch": 53.18688524590164, "grad_norm": 3.2963976860046387, "learning_rate": 9.45393089813348e-06, "loss": 0.3112, "step": 16222 }, { "epoch": 53.19016393442623, "grad_norm": 3.9431040287017822, "learning_rate": 9.452870598636565e-06, "loss": 0.3509, "step": 16223 }, { "epoch": 53.19344262295082, "grad_norm": 3.861945152282715, "learning_rate": 9.451810305309101e-06, "loss": 0.2534, "step": 16224 }, { "epoch": 53.19672131147541, "grad_norm": 4.1115593910217285, "learning_rate": 9.450750018163047e-06, "loss": 0.253, "step": 16225 }, { "epoch": 53.2, "grad_norm": 3.432300567626953, "learning_rate": 9.449689737210352e-06, "loss": 0.3457, "step": 16226 }, { "epoch": 53.20327868852459, "grad_norm": 3.7275562286376953, "learning_rate": 9.448629462462983e-06, "loss": 0.3529, "step": 16227 }, { "epoch": 53.20655737704918, "grad_norm": 3.5720531940460205, "learning_rate": 9.447569193932889e-06, "loss": 0.4081, "step": 16228 }, { "epoch": 53.20983606557377, "grad_norm": 4.437370300292969, "learning_rate": 9.446508931632027e-06, "loss": 0.4284, "step": 16229 }, { "epoch": 53.21311475409836, "grad_norm": 2.921027660369873, "learning_rate": 9.44544867557235e-06, "loss": 0.2872, "step": 16230 }, { "epoch": 53.21639344262295, "grad_norm": 4.364586353302002, "learning_rate": 9.444388425765816e-06, "loss": 0.2102, "step": 16231 }, { "epoch": 53.21967213114754, "grad_norm": 3.3253283500671387, "learning_rate": 9.443328182224383e-06, "loss": 0.1809, "step": 16232 }, { "epoch": 53.22295081967213, "grad_norm": 3.718445062637329, "learning_rate": 9.44226794496e-06, "loss": 0.2119, "step": 16233 }, { "epoch": 53.226229508196724, "grad_norm": 3.1810364723205566, "learning_rate": 9.44120771398463e-06, "loss": 0.1099, "step": 16234 }, { "epoch": 53.22950819672131, "grad_norm": 4.0458879470825195, "learning_rate": 9.44014748931022e-06, "loss": 0.3151, "step": 16235 }, { "epoch": 53.2327868852459, "grad_norm": 3.939479112625122, "learning_rate": 9.439087270948728e-06, "loss": 0.2674, "step": 16236 }, { "epoch": 53.23606557377049, "grad_norm": 4.717464923858643, "learning_rate": 9.438027058912115e-06, "loss": 0.1915, "step": 16237 }, { "epoch": 53.239344262295084, "grad_norm": 3.7711148262023926, "learning_rate": 9.43696685321233e-06, "loss": 0.3513, "step": 16238 }, { "epoch": 53.24262295081967, "grad_norm": 2.834531545639038, "learning_rate": 9.435906653861326e-06, "loss": 0.1816, "step": 16239 }, { "epoch": 53.24590163934426, "grad_norm": 3.373596668243408, "learning_rate": 9.434846460871064e-06, "loss": 0.1076, "step": 16240 }, { "epoch": 53.24918032786885, "grad_norm": 3.351435899734497, "learning_rate": 9.433786274253496e-06, "loss": 0.3629, "step": 16241 }, { "epoch": 53.252459016393445, "grad_norm": 3.0677547454833984, "learning_rate": 9.432726094020577e-06, "loss": 0.1014, "step": 16242 }, { "epoch": 53.25573770491803, "grad_norm": 10.94925594329834, "learning_rate": 9.43166592018426e-06, "loss": 0.2958, "step": 16243 }, { "epoch": 53.25901639344262, "grad_norm": 4.042627811431885, "learning_rate": 9.430605752756497e-06, "loss": 0.25, "step": 16244 }, { "epoch": 53.26229508196721, "grad_norm": 3.422342538833618, "learning_rate": 9.429545591749251e-06, "loss": 0.1903, "step": 16245 }, { "epoch": 53.265573770491805, "grad_norm": 3.4071156978607178, "learning_rate": 9.428485437174472e-06, "loss": 0.2045, "step": 16246 }, { "epoch": 53.268852459016394, "grad_norm": 4.511597633361816, "learning_rate": 9.427425289044114e-06, "loss": 0.2598, "step": 16247 }, { "epoch": 53.27213114754098, "grad_norm": 3.81744384765625, "learning_rate": 9.426365147370124e-06, "loss": 0.3407, "step": 16248 }, { "epoch": 53.27540983606557, "grad_norm": 3.829073190689087, "learning_rate": 9.42530501216447e-06, "loss": 0.1801, "step": 16249 }, { "epoch": 53.278688524590166, "grad_norm": 3.5282256603240967, "learning_rate": 9.4242448834391e-06, "loss": 0.2275, "step": 16250 }, { "epoch": 53.281967213114754, "grad_norm": 4.359623908996582, "learning_rate": 9.423184761205966e-06, "loss": 0.2708, "step": 16251 }, { "epoch": 53.28524590163934, "grad_norm": 8.648709297180176, "learning_rate": 9.42212464547702e-06, "loss": 0.2596, "step": 16252 }, { "epoch": 53.28852459016394, "grad_norm": 3.344635248184204, "learning_rate": 9.421064536264225e-06, "loss": 0.158, "step": 16253 }, { "epoch": 53.291803278688526, "grad_norm": 4.087608814239502, "learning_rate": 9.420004433579529e-06, "loss": 0.3029, "step": 16254 }, { "epoch": 53.295081967213115, "grad_norm": 3.298572301864624, "learning_rate": 9.418944337434884e-06, "loss": 0.1128, "step": 16255 }, { "epoch": 53.2983606557377, "grad_norm": 3.2717249393463135, "learning_rate": 9.417884247842245e-06, "loss": 0.1909, "step": 16256 }, { "epoch": 53.3016393442623, "grad_norm": 3.2130274772644043, "learning_rate": 9.416824164813567e-06, "loss": 0.2669, "step": 16257 }, { "epoch": 53.30491803278689, "grad_norm": 3.979104995727539, "learning_rate": 9.415764088360804e-06, "loss": 0.3735, "step": 16258 }, { "epoch": 53.308196721311475, "grad_norm": 3.4936699867248535, "learning_rate": 9.414704018495905e-06, "loss": 0.4892, "step": 16259 }, { "epoch": 53.31147540983606, "grad_norm": 3.225377082824707, "learning_rate": 9.413643955230832e-06, "loss": 0.3006, "step": 16260 }, { "epoch": 53.31475409836066, "grad_norm": 3.2198057174682617, "learning_rate": 9.412583898577527e-06, "loss": 0.3572, "step": 16261 }, { "epoch": 53.31803278688525, "grad_norm": 3.1236579418182373, "learning_rate": 9.411523848547955e-06, "loss": 0.3484, "step": 16262 }, { "epoch": 53.321311475409836, "grad_norm": 3.36741042137146, "learning_rate": 9.410463805154059e-06, "loss": 0.1107, "step": 16263 }, { "epoch": 53.324590163934424, "grad_norm": 3.5169308185577393, "learning_rate": 9.4094037684078e-06, "loss": 0.1907, "step": 16264 }, { "epoch": 53.32786885245902, "grad_norm": 7.160231590270996, "learning_rate": 9.408343738321126e-06, "loss": 0.3154, "step": 16265 }, { "epoch": 53.33114754098361, "grad_norm": 3.8776113986968994, "learning_rate": 9.40728371490599e-06, "loss": 0.2751, "step": 16266 }, { "epoch": 53.334426229508196, "grad_norm": 3.276057004928589, "learning_rate": 9.40622369817435e-06, "loss": 0.2393, "step": 16267 }, { "epoch": 53.337704918032784, "grad_norm": 3.2072293758392334, "learning_rate": 9.405163688138153e-06, "loss": 0.2334, "step": 16268 }, { "epoch": 53.34098360655738, "grad_norm": 4.4437642097473145, "learning_rate": 9.404103684809357e-06, "loss": 0.2849, "step": 16269 }, { "epoch": 53.34426229508197, "grad_norm": 3.1257457733154297, "learning_rate": 9.403043688199905e-06, "loss": 0.2902, "step": 16270 }, { "epoch": 53.34754098360656, "grad_norm": 2.8992230892181396, "learning_rate": 9.401983698321759e-06, "loss": 0.1067, "step": 16271 }, { "epoch": 53.350819672131145, "grad_norm": 3.752232789993286, "learning_rate": 9.400923715186871e-06, "loss": 0.2873, "step": 16272 }, { "epoch": 53.35409836065574, "grad_norm": 5.588192462921143, "learning_rate": 9.399863738807192e-06, "loss": 0.2513, "step": 16273 }, { "epoch": 53.35737704918033, "grad_norm": 3.3034627437591553, "learning_rate": 9.398803769194667e-06, "loss": 0.183, "step": 16274 }, { "epoch": 53.36065573770492, "grad_norm": 4.36881685256958, "learning_rate": 9.397743806361258e-06, "loss": 0.2228, "step": 16275 }, { "epoch": 53.363934426229505, "grad_norm": 4.408754825592041, "learning_rate": 9.396683850318916e-06, "loss": 0.2205, "step": 16276 }, { "epoch": 53.3672131147541, "grad_norm": 5.648752689361572, "learning_rate": 9.39562390107959e-06, "loss": 0.2189, "step": 16277 }, { "epoch": 53.37049180327869, "grad_norm": 2.8617162704467773, "learning_rate": 9.394563958655229e-06, "loss": 0.3464, "step": 16278 }, { "epoch": 53.37377049180328, "grad_norm": 3.193624973297119, "learning_rate": 9.393504023057792e-06, "loss": 0.1995, "step": 16279 }, { "epoch": 53.377049180327866, "grad_norm": 3.9645166397094727, "learning_rate": 9.39244409429923e-06, "loss": 0.3068, "step": 16280 }, { "epoch": 53.38032786885246, "grad_norm": 3.5606017112731934, "learning_rate": 9.39138417239149e-06, "loss": 0.235, "step": 16281 }, { "epoch": 53.38360655737705, "grad_norm": 3.9829201698303223, "learning_rate": 9.390324257346527e-06, "loss": 0.3786, "step": 16282 }, { "epoch": 53.38688524590164, "grad_norm": 3.5354833602905273, "learning_rate": 9.389264349176288e-06, "loss": 0.2525, "step": 16283 }, { "epoch": 53.390163934426226, "grad_norm": 3.8937172889709473, "learning_rate": 9.388204447892732e-06, "loss": 0.2529, "step": 16284 }, { "epoch": 53.39344262295082, "grad_norm": 3.5416219234466553, "learning_rate": 9.387144553507807e-06, "loss": 0.1816, "step": 16285 }, { "epoch": 53.39672131147541, "grad_norm": 4.0137619972229, "learning_rate": 9.386084666033464e-06, "loss": 0.1485, "step": 16286 }, { "epoch": 53.4, "grad_norm": 3.5276641845703125, "learning_rate": 9.385024785481653e-06, "loss": 0.265, "step": 16287 }, { "epoch": 53.40327868852459, "grad_norm": 2.7470240592956543, "learning_rate": 9.38396491186433e-06, "loss": 0.2306, "step": 16288 }, { "epoch": 53.40655737704918, "grad_norm": 2.6990909576416016, "learning_rate": 9.382905045193441e-06, "loss": 0.1897, "step": 16289 }, { "epoch": 53.40983606557377, "grad_norm": 4.353717803955078, "learning_rate": 9.38184518548094e-06, "loss": 0.29, "step": 16290 }, { "epoch": 53.41311475409836, "grad_norm": 3.925577402114868, "learning_rate": 9.380785332738776e-06, "loss": 0.2518, "step": 16291 }, { "epoch": 53.41639344262295, "grad_norm": 3.3515563011169434, "learning_rate": 9.379725486978902e-06, "loss": 0.3913, "step": 16292 }, { "epoch": 53.41967213114754, "grad_norm": 3.5316526889801025, "learning_rate": 9.37866564821327e-06, "loss": 0.1439, "step": 16293 }, { "epoch": 53.42295081967213, "grad_norm": 4.033603668212891, "learning_rate": 9.377605816453828e-06, "loss": 0.1084, "step": 16294 }, { "epoch": 53.42622950819672, "grad_norm": 3.3360095024108887, "learning_rate": 9.376545991712528e-06, "loss": 0.1035, "step": 16295 }, { "epoch": 53.429508196721315, "grad_norm": 2.8587234020233154, "learning_rate": 9.375486174001317e-06, "loss": 0.1194, "step": 16296 }, { "epoch": 53.4327868852459, "grad_norm": 4.281040191650391, "learning_rate": 9.37442636333215e-06, "loss": 0.1633, "step": 16297 }, { "epoch": 53.43606557377049, "grad_norm": 3.638120174407959, "learning_rate": 9.373366559716979e-06, "loss": 0.1765, "step": 16298 }, { "epoch": 53.43934426229508, "grad_norm": 8.15425968170166, "learning_rate": 9.37230676316775e-06, "loss": 0.2305, "step": 16299 }, { "epoch": 53.442622950819676, "grad_norm": 4.321598052978516, "learning_rate": 9.371246973696411e-06, "loss": 0.2297, "step": 16300 }, { "epoch": 53.445901639344264, "grad_norm": 3.66552734375, "learning_rate": 9.37018719131492e-06, "loss": 0.1377, "step": 16301 }, { "epoch": 53.44918032786885, "grad_norm": 3.5000431537628174, "learning_rate": 9.369127416035225e-06, "loss": 0.0918, "step": 16302 }, { "epoch": 53.45245901639344, "grad_norm": 5.853639602661133, "learning_rate": 9.368067647869273e-06, "loss": 0.3267, "step": 16303 }, { "epoch": 53.455737704918036, "grad_norm": 4.1147565841674805, "learning_rate": 9.367007886829011e-06, "loss": 0.3148, "step": 16304 }, { "epoch": 53.459016393442624, "grad_norm": 3.795542001724243, "learning_rate": 9.365948132926397e-06, "loss": 0.1953, "step": 16305 }, { "epoch": 53.46229508196721, "grad_norm": 3.6038897037506104, "learning_rate": 9.364888386173379e-06, "loss": 0.2444, "step": 16306 }, { "epoch": 53.4655737704918, "grad_norm": 4.42197322845459, "learning_rate": 9.363828646581902e-06, "loss": 0.2722, "step": 16307 }, { "epoch": 53.4688524590164, "grad_norm": 4.657376766204834, "learning_rate": 9.36276891416392e-06, "loss": 0.3395, "step": 16308 }, { "epoch": 53.472131147540985, "grad_norm": 3.7107183933258057, "learning_rate": 9.361709188931378e-06, "loss": 0.269, "step": 16309 }, { "epoch": 53.47540983606557, "grad_norm": 4.972513198852539, "learning_rate": 9.360649470896231e-06, "loss": 0.2889, "step": 16310 }, { "epoch": 53.47868852459016, "grad_norm": 3.7799713611602783, "learning_rate": 9.359589760070427e-06, "loss": 0.2251, "step": 16311 }, { "epoch": 53.48196721311476, "grad_norm": 3.630795955657959, "learning_rate": 9.358530056465912e-06, "loss": 0.2505, "step": 16312 }, { "epoch": 53.485245901639345, "grad_norm": 3.8341593742370605, "learning_rate": 9.357470360094637e-06, "loss": 0.1142, "step": 16313 }, { "epoch": 53.488524590163934, "grad_norm": 3.7324326038360596, "learning_rate": 9.356410670968555e-06, "loss": 0.174, "step": 16314 }, { "epoch": 53.49180327868852, "grad_norm": 15.901379585266113, "learning_rate": 9.355350989099607e-06, "loss": 0.1707, "step": 16315 }, { "epoch": 53.49508196721312, "grad_norm": 4.207321643829346, "learning_rate": 9.354291314499752e-06, "loss": 0.194, "step": 16316 }, { "epoch": 53.498360655737706, "grad_norm": 17.95994758605957, "learning_rate": 9.353231647180931e-06, "loss": 0.3476, "step": 16317 }, { "epoch": 53.501639344262294, "grad_norm": 3.405010938644409, "learning_rate": 9.352171987155094e-06, "loss": 0.2001, "step": 16318 }, { "epoch": 53.50491803278688, "grad_norm": 3.7276253700256348, "learning_rate": 9.351112334434195e-06, "loss": 0.389, "step": 16319 }, { "epoch": 53.50819672131148, "grad_norm": 4.715378284454346, "learning_rate": 9.350052689030178e-06, "loss": 0.2551, "step": 16320 }, { "epoch": 53.511475409836066, "grad_norm": 3.8394744396209717, "learning_rate": 9.348993050954996e-06, "loss": 0.1494, "step": 16321 }, { "epoch": 53.514754098360655, "grad_norm": 3.2526562213897705, "learning_rate": 9.347933420220586e-06, "loss": 0.3087, "step": 16322 }, { "epoch": 53.51803278688524, "grad_norm": 3.3828885555267334, "learning_rate": 9.346873796838911e-06, "loss": 0.3117, "step": 16323 }, { "epoch": 53.52131147540984, "grad_norm": 3.9645965099334717, "learning_rate": 9.345814180821913e-06, "loss": 0.1777, "step": 16324 }, { "epoch": 53.52459016393443, "grad_norm": 3.488105535507202, "learning_rate": 9.344754572181538e-06, "loss": 0.2512, "step": 16325 }, { "epoch": 53.527868852459015, "grad_norm": 4.18314790725708, "learning_rate": 9.343694970929736e-06, "loss": 0.3129, "step": 16326 }, { "epoch": 53.5311475409836, "grad_norm": 3.480180263519287, "learning_rate": 9.342635377078456e-06, "loss": 0.4542, "step": 16327 }, { "epoch": 53.5344262295082, "grad_norm": 3.100168466567993, "learning_rate": 9.341575790639649e-06, "loss": 0.2475, "step": 16328 }, { "epoch": 53.53770491803279, "grad_norm": 4.118932723999023, "learning_rate": 9.340516211625258e-06, "loss": 0.1484, "step": 16329 }, { "epoch": 53.540983606557376, "grad_norm": 4.156982421875, "learning_rate": 9.339456640047227e-06, "loss": 0.356, "step": 16330 }, { "epoch": 53.544262295081964, "grad_norm": 3.121371269226074, "learning_rate": 9.338397075917515e-06, "loss": 0.0685, "step": 16331 }, { "epoch": 53.54754098360656, "grad_norm": 3.3199543952941895, "learning_rate": 9.337337519248064e-06, "loss": 0.1385, "step": 16332 }, { "epoch": 53.55081967213115, "grad_norm": 4.128216743469238, "learning_rate": 9.336277970050821e-06, "loss": 0.2007, "step": 16333 }, { "epoch": 53.554098360655736, "grad_norm": 3.3124451637268066, "learning_rate": 9.335218428337735e-06, "loss": 0.1877, "step": 16334 }, { "epoch": 53.557377049180324, "grad_norm": 3.6103098392486572, "learning_rate": 9.334158894120747e-06, "loss": 0.1709, "step": 16335 }, { "epoch": 53.56065573770492, "grad_norm": 5.670359134674072, "learning_rate": 9.333099367411813e-06, "loss": 0.3, "step": 16336 }, { "epoch": 53.56393442622951, "grad_norm": 3.855757474899292, "learning_rate": 9.332039848222878e-06, "loss": 0.4009, "step": 16337 }, { "epoch": 53.5672131147541, "grad_norm": 3.0409109592437744, "learning_rate": 9.330980336565887e-06, "loss": 0.307, "step": 16338 }, { "epoch": 53.570491803278685, "grad_norm": 3.422471046447754, "learning_rate": 9.329920832452786e-06, "loss": 0.2345, "step": 16339 }, { "epoch": 53.57377049180328, "grad_norm": 9.294195175170898, "learning_rate": 9.32886133589553e-06, "loss": 0.247, "step": 16340 }, { "epoch": 53.57704918032787, "grad_norm": 3.2944159507751465, "learning_rate": 9.327801846906055e-06, "loss": 0.2224, "step": 16341 }, { "epoch": 53.58032786885246, "grad_norm": 6.167364597320557, "learning_rate": 9.326742365496316e-06, "loss": 0.2284, "step": 16342 }, { "epoch": 53.58360655737705, "grad_norm": 3.7273638248443604, "learning_rate": 9.325682891678257e-06, "loss": 0.2712, "step": 16343 }, { "epoch": 53.58688524590164, "grad_norm": 4.358894348144531, "learning_rate": 9.324623425463823e-06, "loss": 0.302, "step": 16344 }, { "epoch": 53.59016393442623, "grad_norm": 3.9260494709014893, "learning_rate": 9.323563966864962e-06, "loss": 0.4443, "step": 16345 }, { "epoch": 53.59344262295082, "grad_norm": 3.320971965789795, "learning_rate": 9.322504515893623e-06, "loss": 0.2917, "step": 16346 }, { "epoch": 53.59672131147541, "grad_norm": 2.785994529724121, "learning_rate": 9.321445072561748e-06, "loss": 0.124, "step": 16347 }, { "epoch": 53.6, "grad_norm": 2.950839042663574, "learning_rate": 9.320385636881283e-06, "loss": 0.4286, "step": 16348 }, { "epoch": 53.60327868852459, "grad_norm": 2.904811143875122, "learning_rate": 9.31932620886418e-06, "loss": 0.2666, "step": 16349 }, { "epoch": 53.60655737704918, "grad_norm": 10.5525484085083, "learning_rate": 9.318266788522382e-06, "loss": 0.4147, "step": 16350 }, { "epoch": 53.609836065573774, "grad_norm": 3.391770362854004, "learning_rate": 9.317207375867835e-06, "loss": 0.2121, "step": 16351 }, { "epoch": 53.61311475409836, "grad_norm": 3.593822479248047, "learning_rate": 9.31614797091248e-06, "loss": 0.2489, "step": 16352 }, { "epoch": 53.61639344262295, "grad_norm": 5.867161273956299, "learning_rate": 9.315088573668273e-06, "loss": 0.239, "step": 16353 }, { "epoch": 53.61967213114754, "grad_norm": 43.677154541015625, "learning_rate": 9.314029184147153e-06, "loss": 0.1349, "step": 16354 }, { "epoch": 53.622950819672134, "grad_norm": 2.989356756210327, "learning_rate": 9.312969802361069e-06, "loss": 0.3004, "step": 16355 }, { "epoch": 53.62622950819672, "grad_norm": 3.984917402267456, "learning_rate": 9.311910428321958e-06, "loss": 0.3452, "step": 16356 }, { "epoch": 53.62950819672131, "grad_norm": 4.569619655609131, "learning_rate": 9.310851062041779e-06, "loss": 0.2888, "step": 16357 }, { "epoch": 53.6327868852459, "grad_norm": 11.409542083740234, "learning_rate": 9.30979170353247e-06, "loss": 0.514, "step": 16358 }, { "epoch": 53.636065573770495, "grad_norm": 4.710594654083252, "learning_rate": 9.308732352805976e-06, "loss": 0.2266, "step": 16359 }, { "epoch": 53.63934426229508, "grad_norm": 3.4766998291015625, "learning_rate": 9.307673009874244e-06, "loss": 0.1424, "step": 16360 }, { "epoch": 53.64262295081967, "grad_norm": 8.08178997039795, "learning_rate": 9.306613674749216e-06, "loss": 0.2443, "step": 16361 }, { "epoch": 53.64590163934426, "grad_norm": 4.406111240386963, "learning_rate": 9.305554347442842e-06, "loss": 0.4223, "step": 16362 }, { "epoch": 53.649180327868855, "grad_norm": 3.661447763442993, "learning_rate": 9.304495027967066e-06, "loss": 0.2036, "step": 16363 }, { "epoch": 53.65245901639344, "grad_norm": 4.448866367340088, "learning_rate": 9.303435716333831e-06, "loss": 0.228, "step": 16364 }, { "epoch": 53.65573770491803, "grad_norm": 4.724950313568115, "learning_rate": 9.302376412555078e-06, "loss": 0.2135, "step": 16365 }, { "epoch": 53.65901639344262, "grad_norm": 3.2712883949279785, "learning_rate": 9.301317116642763e-06, "loss": 0.1835, "step": 16366 }, { "epoch": 53.662295081967216, "grad_norm": 3.4363808631896973, "learning_rate": 9.300257828608822e-06, "loss": 0.1912, "step": 16367 }, { "epoch": 53.665573770491804, "grad_norm": 4.508499622344971, "learning_rate": 9.299198548465199e-06, "loss": 0.2819, "step": 16368 }, { "epoch": 53.66885245901639, "grad_norm": 3.4545466899871826, "learning_rate": 9.298139276223841e-06, "loss": 0.4112, "step": 16369 }, { "epoch": 53.67213114754098, "grad_norm": 4.172181129455566, "learning_rate": 9.297080011896696e-06, "loss": 0.1999, "step": 16370 }, { "epoch": 53.675409836065576, "grad_norm": 4.3768744468688965, "learning_rate": 9.296020755495701e-06, "loss": 0.1832, "step": 16371 }, { "epoch": 53.678688524590164, "grad_norm": 16.606708526611328, "learning_rate": 9.294961507032807e-06, "loss": 0.1725, "step": 16372 }, { "epoch": 53.68196721311475, "grad_norm": 4.2345380783081055, "learning_rate": 9.293902266519955e-06, "loss": 0.1608, "step": 16373 }, { "epoch": 53.68524590163934, "grad_norm": 3.4717519283294678, "learning_rate": 9.292843033969085e-06, "loss": 0.2479, "step": 16374 }, { "epoch": 53.68852459016394, "grad_norm": 3.79537296295166, "learning_rate": 9.291783809392148e-06, "loss": 0.2435, "step": 16375 }, { "epoch": 53.691803278688525, "grad_norm": 4.238920211791992, "learning_rate": 9.290724592801087e-06, "loss": 0.365, "step": 16376 }, { "epoch": 53.69508196721311, "grad_norm": 3.2757771015167236, "learning_rate": 9.289665384207842e-06, "loss": 0.1033, "step": 16377 }, { "epoch": 53.6983606557377, "grad_norm": 4.8001017570495605, "learning_rate": 9.288606183624355e-06, "loss": 0.4126, "step": 16378 }, { "epoch": 53.7016393442623, "grad_norm": 3.603264808654785, "learning_rate": 9.287546991062577e-06, "loss": 0.1665, "step": 16379 }, { "epoch": 53.704918032786885, "grad_norm": 3.484314441680908, "learning_rate": 9.286487806534446e-06, "loss": 0.1703, "step": 16380 }, { "epoch": 53.708196721311474, "grad_norm": 3.560469150543213, "learning_rate": 9.28542863005191e-06, "loss": 0.2757, "step": 16381 }, { "epoch": 53.71147540983607, "grad_norm": 3.840235948562622, "learning_rate": 9.284369461626902e-06, "loss": 0.2725, "step": 16382 }, { "epoch": 53.71475409836066, "grad_norm": 3.703516721725464, "learning_rate": 9.283310301271378e-06, "loss": 0.2484, "step": 16383 }, { "epoch": 53.718032786885246, "grad_norm": 4.990357398986816, "learning_rate": 9.282251148997275e-06, "loss": 0.4025, "step": 16384 }, { "epoch": 53.721311475409834, "grad_norm": 3.4754109382629395, "learning_rate": 9.281192004816538e-06, "loss": 0.1411, "step": 16385 }, { "epoch": 53.72459016393443, "grad_norm": 4.688261032104492, "learning_rate": 9.280132868741106e-06, "loss": 0.5266, "step": 16386 }, { "epoch": 53.72786885245902, "grad_norm": 4.398918151855469, "learning_rate": 9.279073740782922e-06, "loss": 0.2402, "step": 16387 }, { "epoch": 53.731147540983606, "grad_norm": 3.6799445152282715, "learning_rate": 9.278014620953934e-06, "loss": 0.2845, "step": 16388 }, { "epoch": 53.734426229508195, "grad_norm": 4.122399806976318, "learning_rate": 9.276955509266084e-06, "loss": 0.3312, "step": 16389 }, { "epoch": 53.73770491803279, "grad_norm": 3.503833055496216, "learning_rate": 9.27589640573131e-06, "loss": 0.2512, "step": 16390 }, { "epoch": 53.74098360655738, "grad_norm": 3.1785318851470947, "learning_rate": 9.274837310361555e-06, "loss": 0.2045, "step": 16391 }, { "epoch": 53.74426229508197, "grad_norm": 4.375088691711426, "learning_rate": 9.273778223168766e-06, "loss": 0.2634, "step": 16392 }, { "epoch": 53.747540983606555, "grad_norm": 2.956294059753418, "learning_rate": 9.272719144164883e-06, "loss": 0.2102, "step": 16393 }, { "epoch": 53.75081967213115, "grad_norm": 3.2465951442718506, "learning_rate": 9.271660073361844e-06, "loss": 0.1991, "step": 16394 }, { "epoch": 53.75409836065574, "grad_norm": 3.383422613143921, "learning_rate": 9.270601010771598e-06, "loss": 0.2721, "step": 16395 }, { "epoch": 53.75737704918033, "grad_norm": 7.2414326667785645, "learning_rate": 9.269541956406084e-06, "loss": 0.28, "step": 16396 }, { "epoch": 53.760655737704916, "grad_norm": 3.5499284267425537, "learning_rate": 9.268482910277242e-06, "loss": 0.1479, "step": 16397 }, { "epoch": 53.76393442622951, "grad_norm": 4.024782180786133, "learning_rate": 9.267423872397019e-06, "loss": 0.1603, "step": 16398 }, { "epoch": 53.7672131147541, "grad_norm": 3.443713665008545, "learning_rate": 9.266364842777352e-06, "loss": 0.1399, "step": 16399 }, { "epoch": 53.77049180327869, "grad_norm": 3.4670684337615967, "learning_rate": 9.265305821430182e-06, "loss": 0.1936, "step": 16400 }, { "epoch": 53.773770491803276, "grad_norm": 3.7226643562316895, "learning_rate": 9.264246808367454e-06, "loss": 0.134, "step": 16401 }, { "epoch": 53.77704918032787, "grad_norm": 4.5816330909729, "learning_rate": 9.263187803601112e-06, "loss": 0.264, "step": 16402 }, { "epoch": 53.78032786885246, "grad_norm": 2.9978628158569336, "learning_rate": 9.262128807143092e-06, "loss": 0.1635, "step": 16403 }, { "epoch": 53.78360655737705, "grad_norm": 3.6021223068237305, "learning_rate": 9.26106981900533e-06, "loss": 0.3577, "step": 16404 }, { "epoch": 53.78688524590164, "grad_norm": 4.334378242492676, "learning_rate": 9.260010839199782e-06, "loss": 0.4466, "step": 16405 }, { "epoch": 53.79016393442623, "grad_norm": 2.8554420471191406, "learning_rate": 9.25895186773838e-06, "loss": 0.1896, "step": 16406 }, { "epoch": 53.79344262295082, "grad_norm": 3.3459274768829346, "learning_rate": 9.257892904633066e-06, "loss": 0.3464, "step": 16407 }, { "epoch": 53.79672131147541, "grad_norm": 3.5978238582611084, "learning_rate": 9.256833949895776e-06, "loss": 0.3528, "step": 16408 }, { "epoch": 53.8, "grad_norm": 2.9463961124420166, "learning_rate": 9.255775003538462e-06, "loss": 0.1199, "step": 16409 }, { "epoch": 53.80327868852459, "grad_norm": 3.720158100128174, "learning_rate": 9.254716065573057e-06, "loss": 0.2361, "step": 16410 }, { "epoch": 53.80655737704918, "grad_norm": 4.086204528808594, "learning_rate": 9.253657136011504e-06, "loss": 0.3343, "step": 16411 }, { "epoch": 53.80983606557377, "grad_norm": 3.316251754760742, "learning_rate": 9.252598214865743e-06, "loss": 0.2668, "step": 16412 }, { "epoch": 53.81311475409836, "grad_norm": 7.132806777954102, "learning_rate": 9.251539302147709e-06, "loss": 0.306, "step": 16413 }, { "epoch": 53.81639344262295, "grad_norm": 3.5377719402313232, "learning_rate": 9.250480397869354e-06, "loss": 0.253, "step": 16414 }, { "epoch": 53.81967213114754, "grad_norm": 3.096147298812866, "learning_rate": 9.249421502042608e-06, "loss": 0.3137, "step": 16415 }, { "epoch": 53.82295081967213, "grad_norm": 4.249504566192627, "learning_rate": 9.24836261467942e-06, "loss": 0.2547, "step": 16416 }, { "epoch": 53.82622950819672, "grad_norm": 3.8399596214294434, "learning_rate": 9.247303735791718e-06, "loss": 0.2316, "step": 16417 }, { "epoch": 53.829508196721314, "grad_norm": 3.48786997795105, "learning_rate": 9.246244865391453e-06, "loss": 0.1885, "step": 16418 }, { "epoch": 53.8327868852459, "grad_norm": 2.484726667404175, "learning_rate": 9.245186003490561e-06, "loss": 0.155, "step": 16419 }, { "epoch": 53.83606557377049, "grad_norm": 4.712283611297607, "learning_rate": 9.24412715010098e-06, "loss": 0.1931, "step": 16420 }, { "epoch": 53.83934426229508, "grad_norm": 3.86505126953125, "learning_rate": 9.24306830523465e-06, "loss": 0.1994, "step": 16421 }, { "epoch": 53.842622950819674, "grad_norm": 3.2502028942108154, "learning_rate": 9.242009468903516e-06, "loss": 0.1756, "step": 16422 }, { "epoch": 53.84590163934426, "grad_norm": 3.638915777206421, "learning_rate": 9.240950641119509e-06, "loss": 0.1798, "step": 16423 }, { "epoch": 53.84918032786885, "grad_norm": 3.009955644607544, "learning_rate": 9.239891821894576e-06, "loss": 0.1094, "step": 16424 }, { "epoch": 53.85245901639344, "grad_norm": 3.7086634635925293, "learning_rate": 9.238833011240653e-06, "loss": 0.2278, "step": 16425 }, { "epoch": 53.855737704918035, "grad_norm": 3.404484510421753, "learning_rate": 9.237774209169677e-06, "loss": 0.2004, "step": 16426 }, { "epoch": 53.85901639344262, "grad_norm": 4.432992935180664, "learning_rate": 9.23671541569359e-06, "loss": 0.3521, "step": 16427 }, { "epoch": 53.86229508196721, "grad_norm": 3.3963463306427, "learning_rate": 9.235656630824332e-06, "loss": 0.2101, "step": 16428 }, { "epoch": 53.86557377049181, "grad_norm": 3.7551372051239014, "learning_rate": 9.23459785457384e-06, "loss": 0.1395, "step": 16429 }, { "epoch": 53.868852459016395, "grad_norm": 3.4900970458984375, "learning_rate": 9.233539086954048e-06, "loss": 0.1865, "step": 16430 }, { "epoch": 53.87213114754098, "grad_norm": 4.853312015533447, "learning_rate": 9.232480327976906e-06, "loss": 0.161, "step": 16431 }, { "epoch": 53.87540983606557, "grad_norm": 3.348407030105591, "learning_rate": 9.231421577654344e-06, "loss": 0.173, "step": 16432 }, { "epoch": 53.87868852459017, "grad_norm": 3.638002634048462, "learning_rate": 9.230362835998305e-06, "loss": 0.2861, "step": 16433 }, { "epoch": 53.881967213114756, "grad_norm": 3.2237517833709717, "learning_rate": 9.22930410302072e-06, "loss": 0.1518, "step": 16434 }, { "epoch": 53.885245901639344, "grad_norm": 3.8647537231445312, "learning_rate": 9.228245378733537e-06, "loss": 0.2536, "step": 16435 }, { "epoch": 53.88852459016393, "grad_norm": 4.474782943725586, "learning_rate": 9.22718666314869e-06, "loss": 0.2041, "step": 16436 }, { "epoch": 53.89180327868853, "grad_norm": 5.936005592346191, "learning_rate": 9.226127956278115e-06, "loss": 0.2728, "step": 16437 }, { "epoch": 53.895081967213116, "grad_norm": 3.5431461334228516, "learning_rate": 9.225069258133754e-06, "loss": 0.1716, "step": 16438 }, { "epoch": 53.898360655737704, "grad_norm": 3.350890636444092, "learning_rate": 9.224010568727539e-06, "loss": 0.3653, "step": 16439 }, { "epoch": 53.90163934426229, "grad_norm": 3.513228416442871, "learning_rate": 9.222951888071415e-06, "loss": 0.1573, "step": 16440 }, { "epoch": 53.90491803278689, "grad_norm": 3.3115410804748535, "learning_rate": 9.221893216177316e-06, "loss": 0.1517, "step": 16441 }, { "epoch": 53.90819672131148, "grad_norm": 3.726893186569214, "learning_rate": 9.220834553057179e-06, "loss": 0.3228, "step": 16442 }, { "epoch": 53.911475409836065, "grad_norm": 3.5292091369628906, "learning_rate": 9.21977589872294e-06, "loss": 0.3944, "step": 16443 }, { "epoch": 53.91475409836065, "grad_norm": 3.7005012035369873, "learning_rate": 9.218717253186544e-06, "loss": 0.291, "step": 16444 }, { "epoch": 53.91803278688525, "grad_norm": 3.149836778640747, "learning_rate": 9.217658616459922e-06, "loss": 0.1736, "step": 16445 }, { "epoch": 53.92131147540984, "grad_norm": 3.2893688678741455, "learning_rate": 9.216599988555012e-06, "loss": 0.3129, "step": 16446 }, { "epoch": 53.924590163934425, "grad_norm": 3.597913980484009, "learning_rate": 9.215541369483748e-06, "loss": 0.283, "step": 16447 }, { "epoch": 53.927868852459014, "grad_norm": 3.9726438522338867, "learning_rate": 9.214482759258074e-06, "loss": 0.2359, "step": 16448 }, { "epoch": 53.93114754098361, "grad_norm": 3.6155846118927, "learning_rate": 9.213424157889926e-06, "loss": 0.2795, "step": 16449 }, { "epoch": 53.9344262295082, "grad_norm": 7.355647087097168, "learning_rate": 9.212365565391234e-06, "loss": 0.1833, "step": 16450 }, { "epoch": 53.937704918032786, "grad_norm": 2.973353385925293, "learning_rate": 9.211306981773943e-06, "loss": 0.2129, "step": 16451 }, { "epoch": 53.940983606557374, "grad_norm": 6.997860908508301, "learning_rate": 9.210248407049982e-06, "loss": 0.3147, "step": 16452 }, { "epoch": 53.94426229508197, "grad_norm": 3.6127703189849854, "learning_rate": 9.209189841231293e-06, "loss": 0.3167, "step": 16453 }, { "epoch": 53.94754098360656, "grad_norm": 2.894300699234009, "learning_rate": 9.208131284329811e-06, "loss": 0.1748, "step": 16454 }, { "epoch": 53.950819672131146, "grad_norm": 3.692368268966675, "learning_rate": 9.207072736357475e-06, "loss": 0.2491, "step": 16455 }, { "epoch": 53.954098360655735, "grad_norm": 3.8931849002838135, "learning_rate": 9.206014197326211e-06, "loss": 0.3459, "step": 16456 }, { "epoch": 53.95737704918033, "grad_norm": 3.6365795135498047, "learning_rate": 9.204955667247969e-06, "loss": 0.2577, "step": 16457 }, { "epoch": 53.96065573770492, "grad_norm": 3.8624684810638428, "learning_rate": 9.203897146134678e-06, "loss": 0.1072, "step": 16458 }, { "epoch": 53.96393442622951, "grad_norm": 3.5212368965148926, "learning_rate": 9.202838633998274e-06, "loss": 0.2246, "step": 16459 }, { "epoch": 53.967213114754095, "grad_norm": 3.5747146606445312, "learning_rate": 9.201780130850689e-06, "loss": 0.2909, "step": 16460 }, { "epoch": 53.97049180327869, "grad_norm": 3.240210771560669, "learning_rate": 9.200721636703866e-06, "loss": 0.2638, "step": 16461 }, { "epoch": 53.97377049180328, "grad_norm": 4.12313175201416, "learning_rate": 9.19966315156974e-06, "loss": 0.1656, "step": 16462 }, { "epoch": 53.97704918032787, "grad_norm": 3.0641701221466064, "learning_rate": 9.198604675460242e-06, "loss": 0.3165, "step": 16463 }, { "epoch": 53.980327868852456, "grad_norm": 3.544201374053955, "learning_rate": 9.197546208387312e-06, "loss": 0.138, "step": 16464 }, { "epoch": 53.98360655737705, "grad_norm": 4.822286605834961, "learning_rate": 9.196487750362876e-06, "loss": 0.4621, "step": 16465 }, { "epoch": 53.98688524590164, "grad_norm": 3.7762863636016846, "learning_rate": 9.195429301398881e-06, "loss": 0.3349, "step": 16466 }, { "epoch": 53.99016393442623, "grad_norm": 3.499203681945801, "learning_rate": 9.194370861507257e-06, "loss": 0.1532, "step": 16467 }, { "epoch": 53.993442622950816, "grad_norm": 3.833775281906128, "learning_rate": 9.193312430699942e-06, "loss": 0.3726, "step": 16468 }, { "epoch": 53.99672131147541, "grad_norm": 5.0508646965026855, "learning_rate": 9.19225400898886e-06, "loss": 0.2439, "step": 16469 }, { "epoch": 54.0, "grad_norm": 4.207407474517822, "learning_rate": 9.19119559638596e-06, "loss": 0.2246, "step": 16470 }, { "epoch": 54.00327868852459, "grad_norm": 3.616417646408081, "learning_rate": 9.19013719290317e-06, "loss": 0.185, "step": 16471 }, { "epoch": 54.006557377049184, "grad_norm": 3.7610318660736084, "learning_rate": 9.189078798552425e-06, "loss": 0.2674, "step": 16472 }, { "epoch": 54.00983606557377, "grad_norm": 3.6859424114227295, "learning_rate": 9.188020413345657e-06, "loss": 0.295, "step": 16473 }, { "epoch": 54.01311475409836, "grad_norm": 2.8853464126586914, "learning_rate": 9.186962037294806e-06, "loss": 0.1414, "step": 16474 }, { "epoch": 54.01639344262295, "grad_norm": 3.796638011932373, "learning_rate": 9.185903670411803e-06, "loss": 0.1099, "step": 16475 }, { "epoch": 54.019672131147544, "grad_norm": 3.594651699066162, "learning_rate": 9.184845312708581e-06, "loss": 0.1352, "step": 16476 }, { "epoch": 54.02295081967213, "grad_norm": 4.2764506340026855, "learning_rate": 9.183786964197077e-06, "loss": 0.2688, "step": 16477 }, { "epoch": 54.02622950819672, "grad_norm": 3.151763439178467, "learning_rate": 9.182728624889223e-06, "loss": 0.1017, "step": 16478 }, { "epoch": 54.02950819672131, "grad_norm": 3.4409987926483154, "learning_rate": 9.181670294796953e-06, "loss": 0.2163, "step": 16479 }, { "epoch": 54.032786885245905, "grad_norm": 3.29072904586792, "learning_rate": 9.180611973932203e-06, "loss": 0.1498, "step": 16480 }, { "epoch": 54.03606557377049, "grad_norm": 3.9876515865325928, "learning_rate": 9.179553662306905e-06, "loss": 0.2803, "step": 16481 }, { "epoch": 54.03934426229508, "grad_norm": 2.945664644241333, "learning_rate": 9.178495359932988e-06, "loss": 0.2302, "step": 16482 }, { "epoch": 54.04262295081967, "grad_norm": 4.832344055175781, "learning_rate": 9.177437066822396e-06, "loss": 0.2403, "step": 16483 }, { "epoch": 54.045901639344265, "grad_norm": 7.081879138946533, "learning_rate": 9.176378782987054e-06, "loss": 0.1645, "step": 16484 }, { "epoch": 54.049180327868854, "grad_norm": 3.6158230304718018, "learning_rate": 9.175320508438899e-06, "loss": 0.13, "step": 16485 }, { "epoch": 54.05245901639344, "grad_norm": 2.898620367050171, "learning_rate": 9.174262243189858e-06, "loss": 0.1784, "step": 16486 }, { "epoch": 54.05573770491803, "grad_norm": 3.624976873397827, "learning_rate": 9.173203987251873e-06, "loss": 0.2555, "step": 16487 }, { "epoch": 54.059016393442626, "grad_norm": 3.723395824432373, "learning_rate": 9.172145740636872e-06, "loss": 0.2296, "step": 16488 }, { "epoch": 54.062295081967214, "grad_norm": 3.0963449478149414, "learning_rate": 9.17108750335679e-06, "loss": 0.189, "step": 16489 }, { "epoch": 54.0655737704918, "grad_norm": 3.4947707653045654, "learning_rate": 9.170029275423557e-06, "loss": 0.332, "step": 16490 }, { "epoch": 54.06885245901639, "grad_norm": 3.5787036418914795, "learning_rate": 9.168971056849105e-06, "loss": 0.1773, "step": 16491 }, { "epoch": 54.072131147540986, "grad_norm": 3.6709327697753906, "learning_rate": 9.16791284764537e-06, "loss": 0.38, "step": 16492 }, { "epoch": 54.075409836065575, "grad_norm": 2.51914644241333, "learning_rate": 9.166854647824284e-06, "loss": 0.0554, "step": 16493 }, { "epoch": 54.07868852459016, "grad_norm": 3.3562655448913574, "learning_rate": 9.165796457397778e-06, "loss": 0.2464, "step": 16494 }, { "epoch": 54.08196721311475, "grad_norm": 3.9157183170318604, "learning_rate": 9.164738276377778e-06, "loss": 0.2669, "step": 16495 }, { "epoch": 54.08524590163935, "grad_norm": 3.0046823024749756, "learning_rate": 9.16368010477623e-06, "loss": 0.2289, "step": 16496 }, { "epoch": 54.088524590163935, "grad_norm": 3.069216251373291, "learning_rate": 9.162621942605055e-06, "loss": 0.3313, "step": 16497 }, { "epoch": 54.09180327868852, "grad_norm": 3.4603750705718994, "learning_rate": 9.161563789876192e-06, "loss": 0.193, "step": 16498 }, { "epoch": 54.09508196721311, "grad_norm": 3.2845771312713623, "learning_rate": 9.160505646601562e-06, "loss": 0.1227, "step": 16499 }, { "epoch": 54.09836065573771, "grad_norm": 6.40873908996582, "learning_rate": 9.159447512793109e-06, "loss": 0.3945, "step": 16500 }, { "epoch": 54.101639344262296, "grad_norm": 3.552110433578491, "learning_rate": 9.158389388462759e-06, "loss": 0.3366, "step": 16501 }, { "epoch": 54.104918032786884, "grad_norm": 3.8552443981170654, "learning_rate": 9.157331273622441e-06, "loss": 0.2852, "step": 16502 }, { "epoch": 54.10819672131147, "grad_norm": 3.5960845947265625, "learning_rate": 9.156273168284091e-06, "loss": 0.1865, "step": 16503 }, { "epoch": 54.11147540983607, "grad_norm": 4.2773213386535645, "learning_rate": 9.155215072459636e-06, "loss": 0.4336, "step": 16504 }, { "epoch": 54.114754098360656, "grad_norm": 2.82144832611084, "learning_rate": 9.154156986161013e-06, "loss": 0.2097, "step": 16505 }, { "epoch": 54.118032786885244, "grad_norm": 2.070955753326416, "learning_rate": 9.153098909400146e-06, "loss": 0.0712, "step": 16506 }, { "epoch": 54.12131147540983, "grad_norm": 2.9778127670288086, "learning_rate": 9.152040842188973e-06, "loss": 0.1694, "step": 16507 }, { "epoch": 54.12459016393443, "grad_norm": 3.1432788372039795, "learning_rate": 9.150982784539419e-06, "loss": 0.1737, "step": 16508 }, { "epoch": 54.12786885245902, "grad_norm": 3.074878215789795, "learning_rate": 9.149924736463415e-06, "loss": 0.1897, "step": 16509 }, { "epoch": 54.131147540983605, "grad_norm": 3.3035197257995605, "learning_rate": 9.148866697972897e-06, "loss": 0.2213, "step": 16510 }, { "epoch": 54.13442622950819, "grad_norm": 3.0366971492767334, "learning_rate": 9.147808669079791e-06, "loss": 0.1749, "step": 16511 }, { "epoch": 54.13770491803279, "grad_norm": 2.872715711593628, "learning_rate": 9.146750649796025e-06, "loss": 0.1756, "step": 16512 }, { "epoch": 54.14098360655738, "grad_norm": 3.9078733921051025, "learning_rate": 9.145692640133536e-06, "loss": 0.2231, "step": 16513 }, { "epoch": 54.144262295081965, "grad_norm": 4.335103511810303, "learning_rate": 9.144634640104252e-06, "loss": 0.3763, "step": 16514 }, { "epoch": 54.14754098360656, "grad_norm": 14.225798606872559, "learning_rate": 9.143576649720101e-06, "loss": 0.2648, "step": 16515 }, { "epoch": 54.15081967213115, "grad_norm": 3.23476243019104, "learning_rate": 9.142518668993015e-06, "loss": 0.2406, "step": 16516 }, { "epoch": 54.15409836065574, "grad_norm": 3.6728644371032715, "learning_rate": 9.141460697934916e-06, "loss": 0.2964, "step": 16517 }, { "epoch": 54.157377049180326, "grad_norm": 3.3451693058013916, "learning_rate": 9.140402736557745e-06, "loss": 0.1138, "step": 16518 }, { "epoch": 54.16065573770492, "grad_norm": 2.7855539321899414, "learning_rate": 9.139344784873429e-06, "loss": 0.2371, "step": 16519 }, { "epoch": 54.16393442622951, "grad_norm": 2.65773606300354, "learning_rate": 9.138286842893894e-06, "loss": 0.1444, "step": 16520 }, { "epoch": 54.1672131147541, "grad_norm": 3.195035696029663, "learning_rate": 9.137228910631065e-06, "loss": 0.2751, "step": 16521 }, { "epoch": 54.170491803278686, "grad_norm": 4.404113292694092, "learning_rate": 9.136170988096883e-06, "loss": 0.2274, "step": 16522 }, { "epoch": 54.17377049180328, "grad_norm": 2.659449577331543, "learning_rate": 9.135113075303271e-06, "loss": 0.1928, "step": 16523 }, { "epoch": 54.17704918032787, "grad_norm": 4.061832427978516, "learning_rate": 9.13405517226216e-06, "loss": 0.4227, "step": 16524 }, { "epoch": 54.18032786885246, "grad_norm": 3.4612603187561035, "learning_rate": 9.13299727898547e-06, "loss": 0.1569, "step": 16525 }, { "epoch": 54.18360655737705, "grad_norm": 3.7747390270233154, "learning_rate": 9.131939395485143e-06, "loss": 0.2091, "step": 16526 }, { "epoch": 54.18688524590164, "grad_norm": 3.382725477218628, "learning_rate": 9.130881521773103e-06, "loss": 0.1646, "step": 16527 }, { "epoch": 54.19016393442623, "grad_norm": 3.5002198219299316, "learning_rate": 9.129823657861276e-06, "loss": 0.2952, "step": 16528 }, { "epoch": 54.19344262295082, "grad_norm": 3.0006630420684814, "learning_rate": 9.128765803761589e-06, "loss": 0.2171, "step": 16529 }, { "epoch": 54.19672131147541, "grad_norm": 3.4447765350341797, "learning_rate": 9.127707959485975e-06, "loss": 0.1468, "step": 16530 }, { "epoch": 54.2, "grad_norm": 3.3147084712982178, "learning_rate": 9.126650125046361e-06, "loss": 0.2768, "step": 16531 }, { "epoch": 54.20327868852459, "grad_norm": 3.465575933456421, "learning_rate": 9.125592300454675e-06, "loss": 0.2333, "step": 16532 }, { "epoch": 54.20655737704918, "grad_norm": 4.079708576202393, "learning_rate": 9.124534485722846e-06, "loss": 0.2625, "step": 16533 }, { "epoch": 54.20983606557377, "grad_norm": 3.501699686050415, "learning_rate": 9.123476680862799e-06, "loss": 0.2638, "step": 16534 }, { "epoch": 54.21311475409836, "grad_norm": 3.6130282878875732, "learning_rate": 9.122418885886464e-06, "loss": 0.1998, "step": 16535 }, { "epoch": 54.21639344262295, "grad_norm": 3.1597774028778076, "learning_rate": 9.12136110080577e-06, "loss": 0.2676, "step": 16536 }, { "epoch": 54.21967213114754, "grad_norm": 3.5852062702178955, "learning_rate": 9.120303325632643e-06, "loss": 0.2298, "step": 16537 }, { "epoch": 54.22295081967213, "grad_norm": 3.2549614906311035, "learning_rate": 9.119245560379007e-06, "loss": 0.1729, "step": 16538 }, { "epoch": 54.226229508196724, "grad_norm": 3.4007434844970703, "learning_rate": 9.118187805056798e-06, "loss": 0.1917, "step": 16539 }, { "epoch": 54.22950819672131, "grad_norm": 3.528413772583008, "learning_rate": 9.117130059677938e-06, "loss": 0.2605, "step": 16540 }, { "epoch": 54.2327868852459, "grad_norm": 7.791841983795166, "learning_rate": 9.116072324254354e-06, "loss": 0.1535, "step": 16541 }, { "epoch": 54.23606557377049, "grad_norm": 3.938824415206909, "learning_rate": 9.115014598797973e-06, "loss": 0.2765, "step": 16542 }, { "epoch": 54.239344262295084, "grad_norm": 4.6134772300720215, "learning_rate": 9.113956883320719e-06, "loss": 0.2717, "step": 16543 }, { "epoch": 54.24262295081967, "grad_norm": 3.2044482231140137, "learning_rate": 9.112899177834528e-06, "loss": 0.351, "step": 16544 }, { "epoch": 54.24590163934426, "grad_norm": 2.5616390705108643, "learning_rate": 9.11184148235132e-06, "loss": 0.2603, "step": 16545 }, { "epoch": 54.24918032786885, "grad_norm": 2.9356415271759033, "learning_rate": 9.110783796883021e-06, "loss": 0.1142, "step": 16546 }, { "epoch": 54.252459016393445, "grad_norm": 3.348026752471924, "learning_rate": 9.109726121441558e-06, "loss": 0.1542, "step": 16547 }, { "epoch": 54.25573770491803, "grad_norm": 3.1238086223602295, "learning_rate": 9.10866845603886e-06, "loss": 0.0986, "step": 16548 }, { "epoch": 54.25901639344262, "grad_norm": 3.3257927894592285, "learning_rate": 9.107610800686855e-06, "loss": 0.1452, "step": 16549 }, { "epoch": 54.26229508196721, "grad_norm": 3.0894436836242676, "learning_rate": 9.106553155397464e-06, "loss": 0.1995, "step": 16550 }, { "epoch": 54.265573770491805, "grad_norm": 3.306401014328003, "learning_rate": 9.105495520182612e-06, "loss": 0.2743, "step": 16551 }, { "epoch": 54.268852459016394, "grad_norm": 2.5007338523864746, "learning_rate": 9.104437895054232e-06, "loss": 0.1055, "step": 16552 }, { "epoch": 54.27213114754098, "grad_norm": 3.5954172611236572, "learning_rate": 9.103380280024244e-06, "loss": 0.1141, "step": 16553 }, { "epoch": 54.27540983606557, "grad_norm": 3.910754919052124, "learning_rate": 9.102322675104578e-06, "loss": 0.1867, "step": 16554 }, { "epoch": 54.278688524590166, "grad_norm": 3.0630955696105957, "learning_rate": 9.101265080307153e-06, "loss": 0.295, "step": 16555 }, { "epoch": 54.281967213114754, "grad_norm": 3.179884195327759, "learning_rate": 9.1002074956439e-06, "loss": 0.1832, "step": 16556 }, { "epoch": 54.28524590163934, "grad_norm": 2.761730432510376, "learning_rate": 9.099149921126746e-06, "loss": 0.1919, "step": 16557 }, { "epoch": 54.28852459016394, "grad_norm": 3.651247024536133, "learning_rate": 9.09809235676761e-06, "loss": 0.1753, "step": 16558 }, { "epoch": 54.291803278688526, "grad_norm": 4.695888042449951, "learning_rate": 9.097034802578421e-06, "loss": 0.2741, "step": 16559 }, { "epoch": 54.295081967213115, "grad_norm": 3.6469268798828125, "learning_rate": 9.095977258571104e-06, "loss": 0.1692, "step": 16560 }, { "epoch": 54.2983606557377, "grad_norm": 3.8468174934387207, "learning_rate": 9.094919724757582e-06, "loss": 0.3301, "step": 16561 }, { "epoch": 54.3016393442623, "grad_norm": 2.8284130096435547, "learning_rate": 9.093862201149785e-06, "loss": 0.2727, "step": 16562 }, { "epoch": 54.30491803278689, "grad_norm": 3.2282660007476807, "learning_rate": 9.092804687759633e-06, "loss": 0.3506, "step": 16563 }, { "epoch": 54.308196721311475, "grad_norm": 2.6607635021209717, "learning_rate": 9.091747184599045e-06, "loss": 0.0962, "step": 16564 }, { "epoch": 54.31147540983606, "grad_norm": 3.062631368637085, "learning_rate": 9.090689691679958e-06, "loss": 0.2987, "step": 16565 }, { "epoch": 54.31475409836066, "grad_norm": 3.947049379348755, "learning_rate": 9.08963220901429e-06, "loss": 0.3898, "step": 16566 }, { "epoch": 54.31803278688525, "grad_norm": 2.633019208908081, "learning_rate": 9.088574736613965e-06, "loss": 0.1097, "step": 16567 }, { "epoch": 54.321311475409836, "grad_norm": 2.6142518520355225, "learning_rate": 9.087517274490909e-06, "loss": 0.1394, "step": 16568 }, { "epoch": 54.324590163934424, "grad_norm": 3.413349151611328, "learning_rate": 9.086459822657038e-06, "loss": 0.2863, "step": 16569 }, { "epoch": 54.32786885245902, "grad_norm": 3.166287660598755, "learning_rate": 9.085402381124287e-06, "loss": 0.3223, "step": 16570 }, { "epoch": 54.33114754098361, "grad_norm": 3.434445858001709, "learning_rate": 9.084344949904576e-06, "loss": 0.2885, "step": 16571 }, { "epoch": 54.334426229508196, "grad_norm": 3.2850942611694336, "learning_rate": 9.083287529009827e-06, "loss": 0.3952, "step": 16572 }, { "epoch": 54.337704918032784, "grad_norm": 3.3466832637786865, "learning_rate": 9.082230118451962e-06, "loss": 0.2659, "step": 16573 }, { "epoch": 54.34098360655738, "grad_norm": 4.344258785247803, "learning_rate": 9.08117271824291e-06, "loss": 0.2568, "step": 16574 }, { "epoch": 54.34426229508197, "grad_norm": 9.660650253295898, "learning_rate": 9.080115328394588e-06, "loss": 0.2001, "step": 16575 }, { "epoch": 54.34754098360656, "grad_norm": 4.252429485321045, "learning_rate": 9.079057948918925e-06, "loss": 0.3041, "step": 16576 }, { "epoch": 54.350819672131145, "grad_norm": 3.709157943725586, "learning_rate": 9.07800057982784e-06, "loss": 0.3162, "step": 16577 }, { "epoch": 54.35409836065574, "grad_norm": 2.9199042320251465, "learning_rate": 9.076943221133254e-06, "loss": 0.2161, "step": 16578 }, { "epoch": 54.35737704918033, "grad_norm": 3.9428915977478027, "learning_rate": 9.075885872847096e-06, "loss": 0.3268, "step": 16579 }, { "epoch": 54.36065573770492, "grad_norm": 3.321777582168579, "learning_rate": 9.074828534981286e-06, "loss": 0.3109, "step": 16580 }, { "epoch": 54.363934426229505, "grad_norm": 4.384671211242676, "learning_rate": 9.073771207547746e-06, "loss": 0.1305, "step": 16581 }, { "epoch": 54.3672131147541, "grad_norm": 4.416423320770264, "learning_rate": 9.072713890558397e-06, "loss": 0.1899, "step": 16582 }, { "epoch": 54.37049180327869, "grad_norm": 2.8685553073883057, "learning_rate": 9.071656584025164e-06, "loss": 0.332, "step": 16583 }, { "epoch": 54.37377049180328, "grad_norm": 3.14935040473938, "learning_rate": 9.070599287959968e-06, "loss": 0.1617, "step": 16584 }, { "epoch": 54.377049180327866, "grad_norm": 3.540154457092285, "learning_rate": 9.069542002374733e-06, "loss": 0.156, "step": 16585 }, { "epoch": 54.38032786885246, "grad_norm": 3.9744880199432373, "learning_rate": 9.068484727281377e-06, "loss": 0.1726, "step": 16586 }, { "epoch": 54.38360655737705, "grad_norm": 3.648494243621826, "learning_rate": 9.067427462691827e-06, "loss": 0.2455, "step": 16587 }, { "epoch": 54.38688524590164, "grad_norm": 3.403608560562134, "learning_rate": 9.066370208617999e-06, "loss": 0.1366, "step": 16588 }, { "epoch": 54.390163934426226, "grad_norm": 3.012516498565674, "learning_rate": 9.065312965071819e-06, "loss": 0.2209, "step": 16589 }, { "epoch": 54.39344262295082, "grad_norm": 3.233736515045166, "learning_rate": 9.064255732065209e-06, "loss": 0.3068, "step": 16590 }, { "epoch": 54.39672131147541, "grad_norm": 3.809691905975342, "learning_rate": 9.063198509610083e-06, "loss": 0.2186, "step": 16591 }, { "epoch": 54.4, "grad_norm": 3.2573986053466797, "learning_rate": 9.062141297718372e-06, "loss": 0.2669, "step": 16592 }, { "epoch": 54.40327868852459, "grad_norm": 3.193289279937744, "learning_rate": 9.061084096401994e-06, "loss": 0.226, "step": 16593 }, { "epoch": 54.40655737704918, "grad_norm": 3.7839417457580566, "learning_rate": 9.060026905672868e-06, "loss": 0.2972, "step": 16594 }, { "epoch": 54.40983606557377, "grad_norm": 2.9267985820770264, "learning_rate": 9.058969725542913e-06, "loss": 0.1281, "step": 16595 }, { "epoch": 54.41311475409836, "grad_norm": 3.864112615585327, "learning_rate": 9.057912556024056e-06, "loss": 0.1587, "step": 16596 }, { "epoch": 54.41639344262295, "grad_norm": 3.5672030448913574, "learning_rate": 9.056855397128214e-06, "loss": 0.1514, "step": 16597 }, { "epoch": 54.41967213114754, "grad_norm": 3.2043542861938477, "learning_rate": 9.05579824886731e-06, "loss": 0.2272, "step": 16598 }, { "epoch": 54.42295081967213, "grad_norm": 3.5231573581695557, "learning_rate": 9.054741111253257e-06, "loss": 0.2019, "step": 16599 }, { "epoch": 54.42622950819672, "grad_norm": 2.931323766708374, "learning_rate": 9.053683984297983e-06, "loss": 0.1282, "step": 16600 }, { "epoch": 54.429508196721315, "grad_norm": 4.061577796936035, "learning_rate": 9.05262686801341e-06, "loss": 0.3676, "step": 16601 }, { "epoch": 54.4327868852459, "grad_norm": 3.25905704498291, "learning_rate": 9.05156976241145e-06, "loss": 0.1648, "step": 16602 }, { "epoch": 54.43606557377049, "grad_norm": 3.6913018226623535, "learning_rate": 9.05051266750403e-06, "loss": 0.2227, "step": 16603 }, { "epoch": 54.43934426229508, "grad_norm": 4.045039176940918, "learning_rate": 9.049455583303061e-06, "loss": 0.2484, "step": 16604 }, { "epoch": 54.442622950819676, "grad_norm": 3.6069626808166504, "learning_rate": 9.048398509820473e-06, "loss": 0.3242, "step": 16605 }, { "epoch": 54.445901639344264, "grad_norm": 2.7640562057495117, "learning_rate": 9.047341447068183e-06, "loss": 0.3833, "step": 16606 }, { "epoch": 54.44918032786885, "grad_norm": 2.9435386657714844, "learning_rate": 9.046284395058104e-06, "loss": 0.2083, "step": 16607 }, { "epoch": 54.45245901639344, "grad_norm": 3.2653191089630127, "learning_rate": 9.045227353802162e-06, "loss": 0.1773, "step": 16608 }, { "epoch": 54.455737704918036, "grad_norm": 3.2790915966033936, "learning_rate": 9.044170323312276e-06, "loss": 0.1459, "step": 16609 }, { "epoch": 54.459016393442624, "grad_norm": 3.0133230686187744, "learning_rate": 9.043113303600363e-06, "loss": 0.3334, "step": 16610 }, { "epoch": 54.46229508196721, "grad_norm": 3.2506561279296875, "learning_rate": 9.042056294678342e-06, "loss": 0.1547, "step": 16611 }, { "epoch": 54.4655737704918, "grad_norm": 2.601689100265503, "learning_rate": 9.04099929655813e-06, "loss": 0.1984, "step": 16612 }, { "epoch": 54.4688524590164, "grad_norm": 3.4459030628204346, "learning_rate": 9.03994230925165e-06, "loss": 0.278, "step": 16613 }, { "epoch": 54.472131147540985, "grad_norm": 3.378790855407715, "learning_rate": 9.03888533277082e-06, "loss": 0.1531, "step": 16614 }, { "epoch": 54.47540983606557, "grad_norm": 4.142580509185791, "learning_rate": 9.037828367127556e-06, "loss": 0.2391, "step": 16615 }, { "epoch": 54.47868852459016, "grad_norm": 4.697633743286133, "learning_rate": 9.036771412333777e-06, "loss": 0.1876, "step": 16616 }, { "epoch": 54.48196721311476, "grad_norm": 3.7637572288513184, "learning_rate": 9.0357144684014e-06, "loss": 0.2066, "step": 16617 }, { "epoch": 54.485245901639345, "grad_norm": 2.6176304817199707, "learning_rate": 9.034657535342349e-06, "loss": 0.1401, "step": 16618 }, { "epoch": 54.488524590163934, "grad_norm": 2.8659541606903076, "learning_rate": 9.033600613168537e-06, "loss": 0.1578, "step": 16619 }, { "epoch": 54.49180327868852, "grad_norm": 3.776916265487671, "learning_rate": 9.032543701891885e-06, "loss": 0.2663, "step": 16620 }, { "epoch": 54.49508196721312, "grad_norm": 3.449979543685913, "learning_rate": 9.031486801524301e-06, "loss": 0.1544, "step": 16621 }, { "epoch": 54.498360655737706, "grad_norm": 3.30027437210083, "learning_rate": 9.030429912077715e-06, "loss": 0.2718, "step": 16622 }, { "epoch": 54.501639344262294, "grad_norm": 3.7992665767669678, "learning_rate": 9.029373033564041e-06, "loss": 0.2231, "step": 16623 }, { "epoch": 54.50491803278688, "grad_norm": 3.7803902626037598, "learning_rate": 9.028316165995196e-06, "loss": 0.1463, "step": 16624 }, { "epoch": 54.50819672131148, "grad_norm": 3.4878671169281006, "learning_rate": 9.027259309383092e-06, "loss": 0.1857, "step": 16625 }, { "epoch": 54.511475409836066, "grad_norm": 3.3171465396881104, "learning_rate": 9.026202463739653e-06, "loss": 0.2655, "step": 16626 }, { "epoch": 54.514754098360655, "grad_norm": 3.667792797088623, "learning_rate": 9.025145629076797e-06, "loss": 0.2544, "step": 16627 }, { "epoch": 54.51803278688524, "grad_norm": 3.647017478942871, "learning_rate": 9.024088805406434e-06, "loss": 0.161, "step": 16628 }, { "epoch": 54.52131147540984, "grad_norm": 3.219815254211426, "learning_rate": 9.023031992740488e-06, "loss": 0.2076, "step": 16629 }, { "epoch": 54.52459016393443, "grad_norm": 3.8757107257843018, "learning_rate": 9.021975191090866e-06, "loss": 0.2429, "step": 16630 }, { "epoch": 54.527868852459015, "grad_norm": 2.7398784160614014, "learning_rate": 9.020918400469494e-06, "loss": 0.3163, "step": 16631 }, { "epoch": 54.5311475409836, "grad_norm": 3.892225742340088, "learning_rate": 9.019861620888286e-06, "loss": 0.1788, "step": 16632 }, { "epoch": 54.5344262295082, "grad_norm": 2.9687647819519043, "learning_rate": 9.018804852359158e-06, "loss": 0.2102, "step": 16633 }, { "epoch": 54.53770491803279, "grad_norm": 3.0032052993774414, "learning_rate": 9.01774809489402e-06, "loss": 0.2141, "step": 16634 }, { "epoch": 54.540983606557376, "grad_norm": 3.100245237350464, "learning_rate": 9.016691348504798e-06, "loss": 0.5187, "step": 16635 }, { "epoch": 54.544262295081964, "grad_norm": 2.653470993041992, "learning_rate": 9.015634613203404e-06, "loss": 0.1774, "step": 16636 }, { "epoch": 54.54754098360656, "grad_norm": 7.029660224914551, "learning_rate": 9.01457788900175e-06, "loss": 0.1979, "step": 16637 }, { "epoch": 54.55081967213115, "grad_norm": 3.1760146617889404, "learning_rate": 9.013521175911755e-06, "loss": 0.1044, "step": 16638 }, { "epoch": 54.554098360655736, "grad_norm": 2.9523065090179443, "learning_rate": 9.012464473945338e-06, "loss": 0.283, "step": 16639 }, { "epoch": 54.557377049180324, "grad_norm": 3.215435266494751, "learning_rate": 9.011407783114407e-06, "loss": 0.2363, "step": 16640 }, { "epoch": 54.56065573770492, "grad_norm": 3.4637999534606934, "learning_rate": 9.010351103430885e-06, "loss": 0.1641, "step": 16641 }, { "epoch": 54.56393442622951, "grad_norm": 3.6959071159362793, "learning_rate": 9.009294434906682e-06, "loss": 0.244, "step": 16642 }, { "epoch": 54.5672131147541, "grad_norm": 3.490715742111206, "learning_rate": 9.008237777553712e-06, "loss": 0.5004, "step": 16643 }, { "epoch": 54.570491803278685, "grad_norm": 3.483339548110962, "learning_rate": 9.007181131383894e-06, "loss": 0.2417, "step": 16644 }, { "epoch": 54.57377049180328, "grad_norm": 3.662885904312134, "learning_rate": 9.006124496409141e-06, "loss": 0.3114, "step": 16645 }, { "epoch": 54.57704918032787, "grad_norm": 3.2343733310699463, "learning_rate": 9.00506787264137e-06, "loss": 0.228, "step": 16646 }, { "epoch": 54.58032786885246, "grad_norm": 3.20839786529541, "learning_rate": 9.004011260092489e-06, "loss": 0.4326, "step": 16647 }, { "epoch": 54.58360655737705, "grad_norm": 3.470496416091919, "learning_rate": 9.002954658774417e-06, "loss": 0.4874, "step": 16648 }, { "epoch": 54.58688524590164, "grad_norm": 3.44856858253479, "learning_rate": 9.00189806869907e-06, "loss": 0.1301, "step": 16649 }, { "epoch": 54.59016393442623, "grad_norm": 3.1312060356140137, "learning_rate": 9.000841489878362e-06, "loss": 0.1882, "step": 16650 }, { "epoch": 54.59344262295082, "grad_norm": 2.793962240219116, "learning_rate": 8.9997849223242e-06, "loss": 0.2139, "step": 16651 }, { "epoch": 54.59672131147541, "grad_norm": 4.046955585479736, "learning_rate": 8.998728366048506e-06, "loss": 0.3041, "step": 16652 }, { "epoch": 54.6, "grad_norm": 2.5377166271209717, "learning_rate": 8.99767182106319e-06, "loss": 0.2798, "step": 16653 }, { "epoch": 54.60327868852459, "grad_norm": 3.0560920238494873, "learning_rate": 8.996615287380168e-06, "loss": 0.2287, "step": 16654 }, { "epoch": 54.60655737704918, "grad_norm": 3.031691789627075, "learning_rate": 8.995558765011351e-06, "loss": 0.1982, "step": 16655 }, { "epoch": 54.609836065573774, "grad_norm": 2.906792402267456, "learning_rate": 8.99450225396865e-06, "loss": 0.1656, "step": 16656 }, { "epoch": 54.61311475409836, "grad_norm": 8.442597389221191, "learning_rate": 8.993445754263985e-06, "loss": 0.2809, "step": 16657 }, { "epoch": 54.61639344262295, "grad_norm": 3.786055326461792, "learning_rate": 8.992389265909265e-06, "loss": 0.3014, "step": 16658 }, { "epoch": 54.61967213114754, "grad_norm": 4.651122093200684, "learning_rate": 8.991332788916406e-06, "loss": 0.4254, "step": 16659 }, { "epoch": 54.622950819672134, "grad_norm": 3.0890536308288574, "learning_rate": 8.990276323297313e-06, "loss": 0.3107, "step": 16660 }, { "epoch": 54.62622950819672, "grad_norm": 5.313288688659668, "learning_rate": 8.989219869063909e-06, "loss": 0.2084, "step": 16661 }, { "epoch": 54.62950819672131, "grad_norm": 3.433551073074341, "learning_rate": 8.9881634262281e-06, "loss": 0.1156, "step": 16662 }, { "epoch": 54.6327868852459, "grad_norm": 3.0915942192077637, "learning_rate": 8.987106994801801e-06, "loss": 0.1842, "step": 16663 }, { "epoch": 54.636065573770495, "grad_norm": 3.6754722595214844, "learning_rate": 8.986050574796922e-06, "loss": 0.2122, "step": 16664 }, { "epoch": 54.63934426229508, "grad_norm": 3.080763578414917, "learning_rate": 8.984994166225379e-06, "loss": 0.1576, "step": 16665 }, { "epoch": 54.64262295081967, "grad_norm": 3.532273530960083, "learning_rate": 8.983937769099082e-06, "loss": 0.2418, "step": 16666 }, { "epoch": 54.64590163934426, "grad_norm": 5.087853908538818, "learning_rate": 8.982881383429943e-06, "loss": 0.4471, "step": 16667 }, { "epoch": 54.649180327868855, "grad_norm": 4.753314971923828, "learning_rate": 8.981825009229873e-06, "loss": 0.2294, "step": 16668 }, { "epoch": 54.65245901639344, "grad_norm": 3.1302027702331543, "learning_rate": 8.980768646510785e-06, "loss": 0.1682, "step": 16669 }, { "epoch": 54.65573770491803, "grad_norm": 2.9333584308624268, "learning_rate": 8.97971229528459e-06, "loss": 0.1542, "step": 16670 }, { "epoch": 54.65901639344262, "grad_norm": 4.5752644538879395, "learning_rate": 8.978655955563202e-06, "loss": 0.268, "step": 16671 }, { "epoch": 54.662295081967216, "grad_norm": 3.490506410598755, "learning_rate": 8.97759962735853e-06, "loss": 0.2323, "step": 16672 }, { "epoch": 54.665573770491804, "grad_norm": 3.093724250793457, "learning_rate": 8.97654331068248e-06, "loss": 0.3493, "step": 16673 }, { "epoch": 54.66885245901639, "grad_norm": 3.5276949405670166, "learning_rate": 8.975487005546972e-06, "loss": 0.2662, "step": 16674 }, { "epoch": 54.67213114754098, "grad_norm": 5.4316630363464355, "learning_rate": 8.974430711963915e-06, "loss": 0.3235, "step": 16675 }, { "epoch": 54.675409836065576, "grad_norm": 3.6968064308166504, "learning_rate": 8.973374429945218e-06, "loss": 0.3053, "step": 16676 }, { "epoch": 54.678688524590164, "grad_norm": 5.121200084686279, "learning_rate": 8.972318159502785e-06, "loss": 0.3506, "step": 16677 }, { "epoch": 54.68196721311475, "grad_norm": 4.471567153930664, "learning_rate": 8.97126190064854e-06, "loss": 0.2169, "step": 16678 }, { "epoch": 54.68524590163934, "grad_norm": 4.299535751342773, "learning_rate": 8.970205653394386e-06, "loss": 0.2703, "step": 16679 }, { "epoch": 54.68852459016394, "grad_norm": 3.52201771736145, "learning_rate": 8.969149417752234e-06, "loss": 0.2839, "step": 16680 }, { "epoch": 54.691803278688525, "grad_norm": 3.006744384765625, "learning_rate": 8.968093193733995e-06, "loss": 0.3188, "step": 16681 }, { "epoch": 54.69508196721311, "grad_norm": 3.6565005779266357, "learning_rate": 8.96703698135157e-06, "loss": 0.3639, "step": 16682 }, { "epoch": 54.6983606557377, "grad_norm": 3.618950605392456, "learning_rate": 8.965980780616886e-06, "loss": 0.1908, "step": 16683 }, { "epoch": 54.7016393442623, "grad_norm": 3.599693536758423, "learning_rate": 8.964924591541842e-06, "loss": 0.2806, "step": 16684 }, { "epoch": 54.704918032786885, "grad_norm": 5.37738037109375, "learning_rate": 8.96386841413835e-06, "loss": 0.2255, "step": 16685 }, { "epoch": 54.708196721311474, "grad_norm": 3.178475856781006, "learning_rate": 8.962812248418314e-06, "loss": 0.3198, "step": 16686 }, { "epoch": 54.71147540983607, "grad_norm": 3.306406021118164, "learning_rate": 8.961756094393652e-06, "loss": 0.2398, "step": 16687 }, { "epoch": 54.71475409836066, "grad_norm": 3.5369906425476074, "learning_rate": 8.96069995207627e-06, "loss": 0.2823, "step": 16688 }, { "epoch": 54.718032786885246, "grad_norm": 3.120108127593994, "learning_rate": 8.959643821478077e-06, "loss": 0.2662, "step": 16689 }, { "epoch": 54.721311475409834, "grad_norm": 3.749992847442627, "learning_rate": 8.958587702610977e-06, "loss": 0.2605, "step": 16690 }, { "epoch": 54.72459016393443, "grad_norm": 2.9858057498931885, "learning_rate": 8.95753159548689e-06, "loss": 0.1879, "step": 16691 }, { "epoch": 54.72786885245902, "grad_norm": 3.733802080154419, "learning_rate": 8.956475500117715e-06, "loss": 0.2029, "step": 16692 }, { "epoch": 54.731147540983606, "grad_norm": 16.348806381225586, "learning_rate": 8.955419416515363e-06, "loss": 0.3354, "step": 16693 }, { "epoch": 54.734426229508195, "grad_norm": 3.5552220344543457, "learning_rate": 8.954363344691744e-06, "loss": 0.3404, "step": 16694 }, { "epoch": 54.73770491803279, "grad_norm": 4.1837477684021, "learning_rate": 8.953307284658765e-06, "loss": 0.3529, "step": 16695 }, { "epoch": 54.74098360655738, "grad_norm": 4.733603000640869, "learning_rate": 8.952251236428334e-06, "loss": 0.2722, "step": 16696 }, { "epoch": 54.74426229508197, "grad_norm": 2.9637858867645264, "learning_rate": 8.951195200012361e-06, "loss": 0.1194, "step": 16697 }, { "epoch": 54.747540983606555, "grad_norm": 4.35210657119751, "learning_rate": 8.950139175422754e-06, "loss": 0.2349, "step": 16698 }, { "epoch": 54.75081967213115, "grad_norm": 4.6314802169799805, "learning_rate": 8.949083162671414e-06, "loss": 0.3135, "step": 16699 }, { "epoch": 54.75409836065574, "grad_norm": 3.7132983207702637, "learning_rate": 8.948027161770259e-06, "loss": 0.3255, "step": 16700 }, { "epoch": 54.75737704918033, "grad_norm": 3.2982819080352783, "learning_rate": 8.946971172731192e-06, "loss": 0.1688, "step": 16701 }, { "epoch": 54.760655737704916, "grad_norm": 3.479788303375244, "learning_rate": 8.945915195566119e-06, "loss": 0.1873, "step": 16702 }, { "epoch": 54.76393442622951, "grad_norm": 3.7000174522399902, "learning_rate": 8.944859230286945e-06, "loss": 0.3131, "step": 16703 }, { "epoch": 54.7672131147541, "grad_norm": 2.986436128616333, "learning_rate": 8.943803276905583e-06, "loss": 0.2434, "step": 16704 }, { "epoch": 54.77049180327869, "grad_norm": 2.8029589653015137, "learning_rate": 8.942747335433938e-06, "loss": 0.4426, "step": 16705 }, { "epoch": 54.773770491803276, "grad_norm": 3.6958253383636475, "learning_rate": 8.941691405883916e-06, "loss": 0.4546, "step": 16706 }, { "epoch": 54.77704918032787, "grad_norm": 2.9468936920166016, "learning_rate": 8.940635488267424e-06, "loss": 0.1544, "step": 16707 }, { "epoch": 54.78032786885246, "grad_norm": 3.454738140106201, "learning_rate": 8.939579582596363e-06, "loss": 0.1327, "step": 16708 }, { "epoch": 54.78360655737705, "grad_norm": 3.9959700107574463, "learning_rate": 8.93852368888265e-06, "loss": 0.2794, "step": 16709 }, { "epoch": 54.78688524590164, "grad_norm": 3.677889347076416, "learning_rate": 8.937467807138185e-06, "loss": 0.3882, "step": 16710 }, { "epoch": 54.79016393442623, "grad_norm": 2.9708309173583984, "learning_rate": 8.936411937374877e-06, "loss": 0.2378, "step": 16711 }, { "epoch": 54.79344262295082, "grad_norm": 3.22383451461792, "learning_rate": 8.935356079604624e-06, "loss": 0.161, "step": 16712 }, { "epoch": 54.79672131147541, "grad_norm": 3.768388032913208, "learning_rate": 8.934300233839344e-06, "loss": 0.1129, "step": 16713 }, { "epoch": 54.8, "grad_norm": 4.8604302406311035, "learning_rate": 8.933244400090937e-06, "loss": 0.2346, "step": 16714 }, { "epoch": 54.80327868852459, "grad_norm": 3.5444552898406982, "learning_rate": 8.932188578371308e-06, "loss": 0.1385, "step": 16715 }, { "epoch": 54.80655737704918, "grad_norm": 3.9270524978637695, "learning_rate": 8.931132768692358e-06, "loss": 0.1777, "step": 16716 }, { "epoch": 54.80983606557377, "grad_norm": 3.4337596893310547, "learning_rate": 8.930076971066003e-06, "loss": 0.2848, "step": 16717 }, { "epoch": 54.81311475409836, "grad_norm": 7.308755874633789, "learning_rate": 8.929021185504142e-06, "loss": 0.3046, "step": 16718 }, { "epoch": 54.81639344262295, "grad_norm": 3.863314390182495, "learning_rate": 8.927965412018678e-06, "loss": 0.1534, "step": 16719 }, { "epoch": 54.81967213114754, "grad_norm": 4.214991092681885, "learning_rate": 8.926909650621523e-06, "loss": 0.1746, "step": 16720 }, { "epoch": 54.82295081967213, "grad_norm": 4.206216812133789, "learning_rate": 8.925853901324573e-06, "loss": 0.3878, "step": 16721 }, { "epoch": 54.82622950819672, "grad_norm": 4.712467670440674, "learning_rate": 8.924798164139738e-06, "loss": 0.3613, "step": 16722 }, { "epoch": 54.829508196721314, "grad_norm": 3.4692673683166504, "learning_rate": 8.923742439078922e-06, "loss": 0.1873, "step": 16723 }, { "epoch": 54.8327868852459, "grad_norm": 2.7898788452148438, "learning_rate": 8.922686726154031e-06, "loss": 0.3248, "step": 16724 }, { "epoch": 54.83606557377049, "grad_norm": 3.9168570041656494, "learning_rate": 8.921631025376962e-06, "loss": 0.3053, "step": 16725 }, { "epoch": 54.83934426229508, "grad_norm": 3.6414437294006348, "learning_rate": 8.92057533675963e-06, "loss": 0.252, "step": 16726 }, { "epoch": 54.842622950819674, "grad_norm": 2.8386034965515137, "learning_rate": 8.919519660313933e-06, "loss": 0.2386, "step": 16727 }, { "epoch": 54.84590163934426, "grad_norm": 3.605541944503784, "learning_rate": 8.918463996051774e-06, "loss": 0.331, "step": 16728 }, { "epoch": 54.84918032786885, "grad_norm": 4.062867164611816, "learning_rate": 8.917408343985054e-06, "loss": 0.1781, "step": 16729 }, { "epoch": 54.85245901639344, "grad_norm": 3.3579819202423096, "learning_rate": 8.916352704125686e-06, "loss": 0.1912, "step": 16730 }, { "epoch": 54.855737704918035, "grad_norm": 2.900601625442505, "learning_rate": 8.915297076485567e-06, "loss": 0.229, "step": 16731 }, { "epoch": 54.85901639344262, "grad_norm": 3.1871771812438965, "learning_rate": 8.914241461076602e-06, "loss": 0.1517, "step": 16732 }, { "epoch": 54.86229508196721, "grad_norm": 3.452265739440918, "learning_rate": 8.913185857910692e-06, "loss": 0.2132, "step": 16733 }, { "epoch": 54.86557377049181, "grad_norm": 2.855649709701538, "learning_rate": 8.91213026699974e-06, "loss": 0.3011, "step": 16734 }, { "epoch": 54.868852459016395, "grad_norm": 3.2684710025787354, "learning_rate": 8.91107468835565e-06, "loss": 0.1399, "step": 16735 }, { "epoch": 54.87213114754098, "grad_norm": 3.3956377506256104, "learning_rate": 8.910019121990329e-06, "loss": 0.2554, "step": 16736 }, { "epoch": 54.87540983606557, "grad_norm": 3.327066421508789, "learning_rate": 8.908963567915675e-06, "loss": 0.1276, "step": 16737 }, { "epoch": 54.87868852459017, "grad_norm": 5.933239459991455, "learning_rate": 8.907908026143586e-06, "loss": 0.2921, "step": 16738 }, { "epoch": 54.881967213114756, "grad_norm": 2.9067137241363525, "learning_rate": 8.906852496685975e-06, "loss": 0.1697, "step": 16739 }, { "epoch": 54.885245901639344, "grad_norm": 3.6771538257598877, "learning_rate": 8.905796979554738e-06, "loss": 0.2591, "step": 16740 }, { "epoch": 54.88852459016393, "grad_norm": 3.53389835357666, "learning_rate": 8.904741474761777e-06, "loss": 0.2311, "step": 16741 }, { "epoch": 54.89180327868853, "grad_norm": 3.440075159072876, "learning_rate": 8.903685982318991e-06, "loss": 0.1828, "step": 16742 }, { "epoch": 54.895081967213116, "grad_norm": 2.916426420211792, "learning_rate": 8.90263050223829e-06, "loss": 0.1665, "step": 16743 }, { "epoch": 54.898360655737704, "grad_norm": 3.177983522415161, "learning_rate": 8.90157503453157e-06, "loss": 0.1456, "step": 16744 }, { "epoch": 54.90163934426229, "grad_norm": 3.1946840286254883, "learning_rate": 8.900519579210732e-06, "loss": 0.2774, "step": 16745 }, { "epoch": 54.90491803278689, "grad_norm": 3.9170544147491455, "learning_rate": 8.89946413628768e-06, "loss": 0.216, "step": 16746 }, { "epoch": 54.90819672131148, "grad_norm": 3.093991756439209, "learning_rate": 8.898408705774316e-06, "loss": 0.1204, "step": 16747 }, { "epoch": 54.911475409836065, "grad_norm": 3.363408088684082, "learning_rate": 8.897353287682535e-06, "loss": 0.3012, "step": 16748 }, { "epoch": 54.91475409836065, "grad_norm": 3.2500364780426025, "learning_rate": 8.896297882024246e-06, "loss": 0.1573, "step": 16749 }, { "epoch": 54.91803278688525, "grad_norm": 3.5296881198883057, "learning_rate": 8.895242488811346e-06, "loss": 0.1311, "step": 16750 }, { "epoch": 54.92131147540984, "grad_norm": 3.7675952911376953, "learning_rate": 8.894187108055734e-06, "loss": 0.2064, "step": 16751 }, { "epoch": 54.924590163934425, "grad_norm": 3.889918088912964, "learning_rate": 8.893131739769309e-06, "loss": 0.224, "step": 16752 }, { "epoch": 54.927868852459014, "grad_norm": 3.4123313426971436, "learning_rate": 8.89207638396398e-06, "loss": 0.1724, "step": 16753 }, { "epoch": 54.93114754098361, "grad_norm": 3.153578519821167, "learning_rate": 8.891021040651641e-06, "loss": 0.212, "step": 16754 }, { "epoch": 54.9344262295082, "grad_norm": 4.017493724822998, "learning_rate": 8.889965709844187e-06, "loss": 0.3265, "step": 16755 }, { "epoch": 54.937704918032786, "grad_norm": 5.047935962677002, "learning_rate": 8.88891039155353e-06, "loss": 0.3616, "step": 16756 }, { "epoch": 54.940983606557374, "grad_norm": 3.2061705589294434, "learning_rate": 8.887855085791563e-06, "loss": 0.2316, "step": 16757 }, { "epoch": 54.94426229508197, "grad_norm": 3.0086193084716797, "learning_rate": 8.886799792570186e-06, "loss": 0.1895, "step": 16758 }, { "epoch": 54.94754098360656, "grad_norm": 3.7383108139038086, "learning_rate": 8.885744511901298e-06, "loss": 0.1481, "step": 16759 }, { "epoch": 54.950819672131146, "grad_norm": 3.7741012573242188, "learning_rate": 8.884689243796795e-06, "loss": 0.1874, "step": 16760 }, { "epoch": 54.954098360655735, "grad_norm": 4.864717483520508, "learning_rate": 8.883633988268586e-06, "loss": 0.2178, "step": 16761 }, { "epoch": 54.95737704918033, "grad_norm": 4.1082682609558105, "learning_rate": 8.882578745328565e-06, "loss": 0.3445, "step": 16762 }, { "epoch": 54.96065573770492, "grad_norm": 3.5438363552093506, "learning_rate": 8.881523514988628e-06, "loss": 0.2392, "step": 16763 }, { "epoch": 54.96393442622951, "grad_norm": 3.473043203353882, "learning_rate": 8.880468297260673e-06, "loss": 0.2696, "step": 16764 }, { "epoch": 54.967213114754095, "grad_norm": 2.9872677326202393, "learning_rate": 8.879413092156608e-06, "loss": 0.1118, "step": 16765 }, { "epoch": 54.97049180327869, "grad_norm": 2.994248867034912, "learning_rate": 8.878357899688324e-06, "loss": 0.0985, "step": 16766 }, { "epoch": 54.97377049180328, "grad_norm": 3.6272711753845215, "learning_rate": 8.87730271986772e-06, "loss": 0.2555, "step": 16767 }, { "epoch": 54.97704918032787, "grad_norm": 3.802340030670166, "learning_rate": 8.876247552706693e-06, "loss": 0.2601, "step": 16768 }, { "epoch": 54.980327868852456, "grad_norm": 3.5028605461120605, "learning_rate": 8.875192398217147e-06, "loss": 0.095, "step": 16769 }, { "epoch": 54.98360655737705, "grad_norm": 3.1986212730407715, "learning_rate": 8.874137256410974e-06, "loss": 0.2972, "step": 16770 }, { "epoch": 54.98688524590164, "grad_norm": 3.129821300506592, "learning_rate": 8.873082127300077e-06, "loss": 0.1936, "step": 16771 }, { "epoch": 54.99016393442623, "grad_norm": 3.3777265548706055, "learning_rate": 8.872027010896347e-06, "loss": 0.2638, "step": 16772 }, { "epoch": 54.993442622950816, "grad_norm": 4.456169605255127, "learning_rate": 8.870971907211685e-06, "loss": 0.1219, "step": 16773 }, { "epoch": 54.99672131147541, "grad_norm": 3.551220417022705, "learning_rate": 8.86991681625799e-06, "loss": 0.2775, "step": 16774 }, { "epoch": 55.0, "grad_norm": 3.7144083976745605, "learning_rate": 8.868861738047158e-06, "loss": 0.1742, "step": 16775 }, { "epoch": 55.00327868852459, "grad_norm": 3.1177501678466797, "learning_rate": 8.867806672591087e-06, "loss": 0.4398, "step": 16776 }, { "epoch": 55.006557377049184, "grad_norm": 4.2260355949401855, "learning_rate": 8.866751619901671e-06, "loss": 0.205, "step": 16777 }, { "epoch": 55.00983606557377, "grad_norm": 3.640766143798828, "learning_rate": 8.86569657999081e-06, "loss": 0.2574, "step": 16778 }, { "epoch": 55.01311475409836, "grad_norm": 3.5007386207580566, "learning_rate": 8.864641552870399e-06, "loss": 0.2651, "step": 16779 }, { "epoch": 55.01639344262295, "grad_norm": 4.13362979888916, "learning_rate": 8.863586538552336e-06, "loss": 0.3783, "step": 16780 }, { "epoch": 55.019672131147544, "grad_norm": 2.3069992065429688, "learning_rate": 8.862531537048513e-06, "loss": 0.0611, "step": 16781 }, { "epoch": 55.02295081967213, "grad_norm": 5.271568775177002, "learning_rate": 8.861476548370833e-06, "loss": 0.3166, "step": 16782 }, { "epoch": 55.02622950819672, "grad_norm": 3.3054935932159424, "learning_rate": 8.860421572531189e-06, "loss": 0.3905, "step": 16783 }, { "epoch": 55.02950819672131, "grad_norm": 3.6482338905334473, "learning_rate": 8.859366609541476e-06, "loss": 0.1871, "step": 16784 }, { "epoch": 55.032786885245905, "grad_norm": 3.667259931564331, "learning_rate": 8.858311659413592e-06, "loss": 0.2839, "step": 16785 }, { "epoch": 55.03606557377049, "grad_norm": 3.3172788619995117, "learning_rate": 8.857256722159425e-06, "loss": 0.2098, "step": 16786 }, { "epoch": 55.03934426229508, "grad_norm": 2.6399433612823486, "learning_rate": 8.856201797790883e-06, "loss": 0.0726, "step": 16787 }, { "epoch": 55.04262295081967, "grad_norm": 4.259372234344482, "learning_rate": 8.855146886319853e-06, "loss": 0.2251, "step": 16788 }, { "epoch": 55.045901639344265, "grad_norm": 3.480438709259033, "learning_rate": 8.854091987758233e-06, "loss": 0.2629, "step": 16789 }, { "epoch": 55.049180327868854, "grad_norm": 3.493720054626465, "learning_rate": 8.853037102117914e-06, "loss": 0.1511, "step": 16790 }, { "epoch": 55.05245901639344, "grad_norm": 3.7790677547454834, "learning_rate": 8.851982229410797e-06, "loss": 0.5058, "step": 16791 }, { "epoch": 55.05573770491803, "grad_norm": 4.057210445404053, "learning_rate": 8.850927369648774e-06, "loss": 0.1253, "step": 16792 }, { "epoch": 55.059016393442626, "grad_norm": 4.0873212814331055, "learning_rate": 8.84987252284374e-06, "loss": 0.2228, "step": 16793 }, { "epoch": 55.062295081967214, "grad_norm": 2.7389984130859375, "learning_rate": 8.848817689007584e-06, "loss": 0.2916, "step": 16794 }, { "epoch": 55.0655737704918, "grad_norm": 3.427494764328003, "learning_rate": 8.84776286815221e-06, "loss": 0.1373, "step": 16795 }, { "epoch": 55.06885245901639, "grad_norm": 2.4790732860565186, "learning_rate": 8.84670806028951e-06, "loss": 0.0933, "step": 16796 }, { "epoch": 55.072131147540986, "grad_norm": 3.3383190631866455, "learning_rate": 8.845653265431373e-06, "loss": 0.1947, "step": 16797 }, { "epoch": 55.075409836065575, "grad_norm": 2.892744302749634, "learning_rate": 8.844598483589695e-06, "loss": 0.183, "step": 16798 }, { "epoch": 55.07868852459016, "grad_norm": 2.982980728149414, "learning_rate": 8.843543714776371e-06, "loss": 0.2678, "step": 16799 }, { "epoch": 55.08196721311475, "grad_norm": 3.195762872695923, "learning_rate": 8.842488959003294e-06, "loss": 0.1212, "step": 16800 }, { "epoch": 55.08524590163935, "grad_norm": 2.6735494136810303, "learning_rate": 8.841434216282356e-06, "loss": 0.1307, "step": 16801 }, { "epoch": 55.088524590163935, "grad_norm": 3.489600896835327, "learning_rate": 8.840379486625456e-06, "loss": 0.2053, "step": 16802 }, { "epoch": 55.09180327868852, "grad_norm": 3.9777262210845947, "learning_rate": 8.839324770044479e-06, "loss": 0.3352, "step": 16803 }, { "epoch": 55.09508196721311, "grad_norm": 3.6968865394592285, "learning_rate": 8.838270066551322e-06, "loss": 0.2466, "step": 16804 }, { "epoch": 55.09836065573771, "grad_norm": 3.6180787086486816, "learning_rate": 8.83721537615788e-06, "loss": 0.1949, "step": 16805 }, { "epoch": 55.101639344262296, "grad_norm": 5.403941631317139, "learning_rate": 8.836160698876044e-06, "loss": 0.2557, "step": 16806 }, { "epoch": 55.104918032786884, "grad_norm": 3.980212450027466, "learning_rate": 8.835106034717701e-06, "loss": 0.3143, "step": 16807 }, { "epoch": 55.10819672131147, "grad_norm": 5.859011173248291, "learning_rate": 8.834051383694754e-06, "loss": 0.1728, "step": 16808 }, { "epoch": 55.11147540983607, "grad_norm": 3.0707812309265137, "learning_rate": 8.83299674581909e-06, "loss": 0.1733, "step": 16809 }, { "epoch": 55.114754098360656, "grad_norm": 2.623532295227051, "learning_rate": 8.831942121102602e-06, "loss": 0.1017, "step": 16810 }, { "epoch": 55.118032786885244, "grad_norm": 3.755352735519409, "learning_rate": 8.83088750955718e-06, "loss": 0.2729, "step": 16811 }, { "epoch": 55.12131147540983, "grad_norm": 4.341593265533447, "learning_rate": 8.829832911194713e-06, "loss": 0.2566, "step": 16812 }, { "epoch": 55.12459016393443, "grad_norm": 3.590892791748047, "learning_rate": 8.8287783260271e-06, "loss": 0.1956, "step": 16813 }, { "epoch": 55.12786885245902, "grad_norm": 5.996687412261963, "learning_rate": 8.82772375406623e-06, "loss": 0.2297, "step": 16814 }, { "epoch": 55.131147540983605, "grad_norm": 3.698437213897705, "learning_rate": 8.826669195323992e-06, "loss": 0.1763, "step": 16815 }, { "epoch": 55.13442622950819, "grad_norm": 3.4355783462524414, "learning_rate": 8.825614649812277e-06, "loss": 0.335, "step": 16816 }, { "epoch": 55.13770491803279, "grad_norm": 3.460862636566162, "learning_rate": 8.82456011754298e-06, "loss": 0.1466, "step": 16817 }, { "epoch": 55.14098360655738, "grad_norm": 3.8283045291900635, "learning_rate": 8.82350559852799e-06, "loss": 0.1905, "step": 16818 }, { "epoch": 55.144262295081965, "grad_norm": 3.21232533454895, "learning_rate": 8.822451092779198e-06, "loss": 0.2988, "step": 16819 }, { "epoch": 55.14754098360656, "grad_norm": 4.340421199798584, "learning_rate": 8.82139660030849e-06, "loss": 0.2743, "step": 16820 }, { "epoch": 55.15081967213115, "grad_norm": 2.964731216430664, "learning_rate": 8.820342121127765e-06, "loss": 0.2486, "step": 16821 }, { "epoch": 55.15409836065574, "grad_norm": 3.370703935623169, "learning_rate": 8.819287655248911e-06, "loss": 0.2852, "step": 16822 }, { "epoch": 55.157377049180326, "grad_norm": 3.3616397380828857, "learning_rate": 8.818233202683815e-06, "loss": 0.2378, "step": 16823 }, { "epoch": 55.16065573770492, "grad_norm": 3.446586847305298, "learning_rate": 8.817178763444366e-06, "loss": 0.1657, "step": 16824 }, { "epoch": 55.16393442622951, "grad_norm": 7.262675762176514, "learning_rate": 8.816124337542456e-06, "loss": 0.3155, "step": 16825 }, { "epoch": 55.1672131147541, "grad_norm": 2.2230987548828125, "learning_rate": 8.815069924989977e-06, "loss": 0.1424, "step": 16826 }, { "epoch": 55.170491803278686, "grad_norm": 4.335635185241699, "learning_rate": 8.814015525798814e-06, "loss": 0.1783, "step": 16827 }, { "epoch": 55.17377049180328, "grad_norm": 4.039190769195557, "learning_rate": 8.812961139980862e-06, "loss": 0.3315, "step": 16828 }, { "epoch": 55.17704918032787, "grad_norm": 3.466386079788208, "learning_rate": 8.811906767548005e-06, "loss": 0.2434, "step": 16829 }, { "epoch": 55.18032786885246, "grad_norm": 3.1955013275146484, "learning_rate": 8.810852408512135e-06, "loss": 0.0941, "step": 16830 }, { "epoch": 55.18360655737705, "grad_norm": 3.5267984867095947, "learning_rate": 8.809798062885143e-06, "loss": 0.2178, "step": 16831 }, { "epoch": 55.18688524590164, "grad_norm": 3.4871647357940674, "learning_rate": 8.808743730678915e-06, "loss": 0.1982, "step": 16832 }, { "epoch": 55.19016393442623, "grad_norm": 3.346027374267578, "learning_rate": 8.807689411905336e-06, "loss": 0.1184, "step": 16833 }, { "epoch": 55.19344262295082, "grad_norm": 3.501612663269043, "learning_rate": 8.806635106576301e-06, "loss": 0.2411, "step": 16834 }, { "epoch": 55.19672131147541, "grad_norm": 2.920684576034546, "learning_rate": 8.805580814703698e-06, "loss": 0.1942, "step": 16835 }, { "epoch": 55.2, "grad_norm": 3.9359819889068604, "learning_rate": 8.804526536299413e-06, "loss": 0.2083, "step": 16836 }, { "epoch": 55.20327868852459, "grad_norm": 2.3063318729400635, "learning_rate": 8.803472271375333e-06, "loss": 0.0436, "step": 16837 }, { "epoch": 55.20655737704918, "grad_norm": 3.248063325881958, "learning_rate": 8.802418019943343e-06, "loss": 0.2751, "step": 16838 }, { "epoch": 55.20983606557377, "grad_norm": 3.0596299171447754, "learning_rate": 8.801363782015341e-06, "loss": 0.1056, "step": 16839 }, { "epoch": 55.21311475409836, "grad_norm": 4.501770496368408, "learning_rate": 8.800309557603208e-06, "loss": 0.158, "step": 16840 }, { "epoch": 55.21639344262295, "grad_norm": 3.8528189659118652, "learning_rate": 8.799255346718831e-06, "loss": 0.2509, "step": 16841 }, { "epoch": 55.21967213114754, "grad_norm": 2.652104377746582, "learning_rate": 8.798201149374095e-06, "loss": 0.2011, "step": 16842 }, { "epoch": 55.22295081967213, "grad_norm": 3.62673020362854, "learning_rate": 8.797146965580895e-06, "loss": 0.1206, "step": 16843 }, { "epoch": 55.226229508196724, "grad_norm": 2.7624621391296387, "learning_rate": 8.796092795351114e-06, "loss": 0.1841, "step": 16844 }, { "epoch": 55.22950819672131, "grad_norm": 7.855294704437256, "learning_rate": 8.795038638696637e-06, "loss": 0.2415, "step": 16845 }, { "epoch": 55.2327868852459, "grad_norm": 8.276254653930664, "learning_rate": 8.793984495629349e-06, "loss": 0.2287, "step": 16846 }, { "epoch": 55.23606557377049, "grad_norm": 3.470167875289917, "learning_rate": 8.792930366161142e-06, "loss": 0.1911, "step": 16847 }, { "epoch": 55.239344262295084, "grad_norm": 4.0716352462768555, "learning_rate": 8.791876250303903e-06, "loss": 0.1805, "step": 16848 }, { "epoch": 55.24262295081967, "grad_norm": 3.4208834171295166, "learning_rate": 8.790822148069515e-06, "loss": 0.3025, "step": 16849 }, { "epoch": 55.24590163934426, "grad_norm": 3.442640542984009, "learning_rate": 8.78976805946986e-06, "loss": 0.1862, "step": 16850 }, { "epoch": 55.24918032786885, "grad_norm": 3.672041177749634, "learning_rate": 8.788713984516832e-06, "loss": 0.159, "step": 16851 }, { "epoch": 55.252459016393445, "grad_norm": 3.4454574584960938, "learning_rate": 8.787659923222314e-06, "loss": 0.2139, "step": 16852 }, { "epoch": 55.25573770491803, "grad_norm": 2.738511085510254, "learning_rate": 8.78660587559819e-06, "loss": 0.1662, "step": 16853 }, { "epoch": 55.25901639344262, "grad_norm": 3.1387619972229004, "learning_rate": 8.785551841656345e-06, "loss": 0.1555, "step": 16854 }, { "epoch": 55.26229508196721, "grad_norm": 3.1295855045318604, "learning_rate": 8.784497821408665e-06, "loss": 0.2678, "step": 16855 }, { "epoch": 55.265573770491805, "grad_norm": 4.4232072830200195, "learning_rate": 8.78344381486704e-06, "loss": 0.2948, "step": 16856 }, { "epoch": 55.268852459016394, "grad_norm": 3.0645930767059326, "learning_rate": 8.782389822043345e-06, "loss": 0.2057, "step": 16857 }, { "epoch": 55.27213114754098, "grad_norm": 3.7082698345184326, "learning_rate": 8.781335842949475e-06, "loss": 0.2322, "step": 16858 }, { "epoch": 55.27540983606557, "grad_norm": 2.615664482116699, "learning_rate": 8.780281877597309e-06, "loss": 0.3048, "step": 16859 }, { "epoch": 55.278688524590166, "grad_norm": 3.254086494445801, "learning_rate": 8.779227925998732e-06, "loss": 0.2977, "step": 16860 }, { "epoch": 55.281967213114754, "grad_norm": 3.4335973262786865, "learning_rate": 8.778173988165632e-06, "loss": 0.1783, "step": 16861 }, { "epoch": 55.28524590163934, "grad_norm": 3.9604077339172363, "learning_rate": 8.77712006410989e-06, "loss": 0.1958, "step": 16862 }, { "epoch": 55.28852459016394, "grad_norm": 3.685371160507202, "learning_rate": 8.776066153843392e-06, "loss": 0.2497, "step": 16863 }, { "epoch": 55.291803278688526, "grad_norm": 3.1032674312591553, "learning_rate": 8.775012257378016e-06, "loss": 0.2617, "step": 16864 }, { "epoch": 55.295081967213115, "grad_norm": 3.052748441696167, "learning_rate": 8.773958374725654e-06, "loss": 0.1486, "step": 16865 }, { "epoch": 55.2983606557377, "grad_norm": 3.4476511478424072, "learning_rate": 8.772904505898186e-06, "loss": 0.1364, "step": 16866 }, { "epoch": 55.3016393442623, "grad_norm": 3.47529673576355, "learning_rate": 8.771850650907498e-06, "loss": 0.2214, "step": 16867 }, { "epoch": 55.30491803278689, "grad_norm": 3.527414560317993, "learning_rate": 8.770796809765464e-06, "loss": 0.2605, "step": 16868 }, { "epoch": 55.308196721311475, "grad_norm": 3.0119810104370117, "learning_rate": 8.769742982483978e-06, "loss": 0.2663, "step": 16869 }, { "epoch": 55.31147540983606, "grad_norm": 3.7605185508728027, "learning_rate": 8.768689169074921e-06, "loss": 0.2864, "step": 16870 }, { "epoch": 55.31475409836066, "grad_norm": 3.673555850982666, "learning_rate": 8.767635369550173e-06, "loss": 0.2299, "step": 16871 }, { "epoch": 55.31803278688525, "grad_norm": 3.5367753505706787, "learning_rate": 8.766581583921613e-06, "loss": 0.3053, "step": 16872 }, { "epoch": 55.321311475409836, "grad_norm": 5.07546329498291, "learning_rate": 8.765527812201133e-06, "loss": 0.2085, "step": 16873 }, { "epoch": 55.324590163934424, "grad_norm": 3.022423028945923, "learning_rate": 8.764474054400609e-06, "loss": 0.2494, "step": 16874 }, { "epoch": 55.32786885245902, "grad_norm": 4.814253330230713, "learning_rate": 8.763420310531926e-06, "loss": 0.2402, "step": 16875 }, { "epoch": 55.33114754098361, "grad_norm": 5.9998626708984375, "learning_rate": 8.762366580606965e-06, "loss": 0.1323, "step": 16876 }, { "epoch": 55.334426229508196, "grad_norm": 3.146329402923584, "learning_rate": 8.761312864637602e-06, "loss": 0.0893, "step": 16877 }, { "epoch": 55.337704918032784, "grad_norm": 4.635434150695801, "learning_rate": 8.76025916263573e-06, "loss": 0.3749, "step": 16878 }, { "epoch": 55.34098360655738, "grad_norm": 4.052896022796631, "learning_rate": 8.759205474613224e-06, "loss": 0.1615, "step": 16879 }, { "epoch": 55.34426229508197, "grad_norm": 3.5514252185821533, "learning_rate": 8.758151800581965e-06, "loss": 0.1729, "step": 16880 }, { "epoch": 55.34754098360656, "grad_norm": 3.002418041229248, "learning_rate": 8.757098140553834e-06, "loss": 0.1477, "step": 16881 }, { "epoch": 55.350819672131145, "grad_norm": 3.4857711791992188, "learning_rate": 8.756044494540717e-06, "loss": 0.2481, "step": 16882 }, { "epoch": 55.35409836065574, "grad_norm": 3.1981019973754883, "learning_rate": 8.75499086255449e-06, "loss": 0.1054, "step": 16883 }, { "epoch": 55.35737704918033, "grad_norm": 3.964284658432007, "learning_rate": 8.753937244607037e-06, "loss": 0.206, "step": 16884 }, { "epoch": 55.36065573770492, "grad_norm": 3.4271492958068848, "learning_rate": 8.752883640710235e-06, "loss": 0.2593, "step": 16885 }, { "epoch": 55.363934426229505, "grad_norm": 2.8362433910369873, "learning_rate": 8.751830050875969e-06, "loss": 0.1758, "step": 16886 }, { "epoch": 55.3672131147541, "grad_norm": 3.01212477684021, "learning_rate": 8.750776475116117e-06, "loss": 0.1774, "step": 16887 }, { "epoch": 55.37049180327869, "grad_norm": 3.794602155685425, "learning_rate": 8.749722913442558e-06, "loss": 0.1631, "step": 16888 }, { "epoch": 55.37377049180328, "grad_norm": 4.1772141456604, "learning_rate": 8.748669365867174e-06, "loss": 0.2608, "step": 16889 }, { "epoch": 55.377049180327866, "grad_norm": 3.2676775455474854, "learning_rate": 8.74761583240184e-06, "loss": 0.2037, "step": 16890 }, { "epoch": 55.38032786885246, "grad_norm": 2.172941207885742, "learning_rate": 8.746562313058444e-06, "loss": 0.1003, "step": 16891 }, { "epoch": 55.38360655737705, "grad_norm": 3.7835984230041504, "learning_rate": 8.74550880784886e-06, "loss": 0.1611, "step": 16892 }, { "epoch": 55.38688524590164, "grad_norm": 3.5380048751831055, "learning_rate": 8.74445531678497e-06, "loss": 0.4063, "step": 16893 }, { "epoch": 55.390163934426226, "grad_norm": 3.8426897525787354, "learning_rate": 8.743401839878647e-06, "loss": 0.2982, "step": 16894 }, { "epoch": 55.39344262295082, "grad_norm": 3.4710352420806885, "learning_rate": 8.74234837714178e-06, "loss": 0.2084, "step": 16895 }, { "epoch": 55.39672131147541, "grad_norm": 3.2410144805908203, "learning_rate": 8.74129492858624e-06, "loss": 0.1921, "step": 16896 }, { "epoch": 55.4, "grad_norm": 4.116991996765137, "learning_rate": 8.740241494223911e-06, "loss": 0.3393, "step": 16897 }, { "epoch": 55.40327868852459, "grad_norm": 2.2998387813568115, "learning_rate": 8.739188074066665e-06, "loss": 0.0845, "step": 16898 }, { "epoch": 55.40655737704918, "grad_norm": 3.646758556365967, "learning_rate": 8.738134668126387e-06, "loss": 0.2486, "step": 16899 }, { "epoch": 55.40983606557377, "grad_norm": 3.6156907081604004, "learning_rate": 8.737081276414953e-06, "loss": 0.1368, "step": 16900 }, { "epoch": 55.41311475409836, "grad_norm": 3.460404872894287, "learning_rate": 8.736027898944242e-06, "loss": 0.2094, "step": 16901 }, { "epoch": 55.41639344262295, "grad_norm": 3.3044562339782715, "learning_rate": 8.734974535726129e-06, "loss": 0.1794, "step": 16902 }, { "epoch": 55.41967213114754, "grad_norm": 4.053457260131836, "learning_rate": 8.73392118677249e-06, "loss": 0.19, "step": 16903 }, { "epoch": 55.42295081967213, "grad_norm": 3.3454232215881348, "learning_rate": 8.73286785209521e-06, "loss": 0.1972, "step": 16904 }, { "epoch": 55.42622950819672, "grad_norm": 3.0455756187438965, "learning_rate": 8.731814531706162e-06, "loss": 0.2731, "step": 16905 }, { "epoch": 55.429508196721315, "grad_norm": 3.226078987121582, "learning_rate": 8.730761225617222e-06, "loss": 0.1978, "step": 16906 }, { "epoch": 55.4327868852459, "grad_norm": 3.3991663455963135, "learning_rate": 8.729707933840268e-06, "loss": 0.2558, "step": 16907 }, { "epoch": 55.43606557377049, "grad_norm": 3.127816677093506, "learning_rate": 8.72865465638718e-06, "loss": 0.2526, "step": 16908 }, { "epoch": 55.43934426229508, "grad_norm": 3.3841114044189453, "learning_rate": 8.727601393269832e-06, "loss": 0.2623, "step": 16909 }, { "epoch": 55.442622950819676, "grad_norm": 3.3155159950256348, "learning_rate": 8.726548144500104e-06, "loss": 0.2082, "step": 16910 }, { "epoch": 55.445901639344264, "grad_norm": 2.9677600860595703, "learning_rate": 8.725494910089866e-06, "loss": 0.1582, "step": 16911 }, { "epoch": 55.44918032786885, "grad_norm": 4.6036787033081055, "learning_rate": 8.724441690050997e-06, "loss": 0.2492, "step": 16912 }, { "epoch": 55.45245901639344, "grad_norm": 3.5103862285614014, "learning_rate": 8.723388484395378e-06, "loss": 0.1936, "step": 16913 }, { "epoch": 55.455737704918036, "grad_norm": 2.76846981048584, "learning_rate": 8.722335293134881e-06, "loss": 0.1692, "step": 16914 }, { "epoch": 55.459016393442624, "grad_norm": 3.7767553329467773, "learning_rate": 8.721282116281382e-06, "loss": 0.2693, "step": 16915 }, { "epoch": 55.46229508196721, "grad_norm": 3.9382388591766357, "learning_rate": 8.720228953846753e-06, "loss": 0.5197, "step": 16916 }, { "epoch": 55.4655737704918, "grad_norm": 5.060024261474609, "learning_rate": 8.719175805842876e-06, "loss": 0.1813, "step": 16917 }, { "epoch": 55.4688524590164, "grad_norm": 3.1947672367095947, "learning_rate": 8.718122672281623e-06, "loss": 0.214, "step": 16918 }, { "epoch": 55.472131147540985, "grad_norm": 2.904474973678589, "learning_rate": 8.717069553174872e-06, "loss": 0.0965, "step": 16919 }, { "epoch": 55.47540983606557, "grad_norm": 3.740619421005249, "learning_rate": 8.71601644853449e-06, "loss": 0.1885, "step": 16920 }, { "epoch": 55.47868852459016, "grad_norm": 3.126613140106201, "learning_rate": 8.714963358372361e-06, "loss": 0.1884, "step": 16921 }, { "epoch": 55.48196721311476, "grad_norm": 3.2767810821533203, "learning_rate": 8.713910282700359e-06, "loss": 0.2557, "step": 16922 }, { "epoch": 55.485245901639345, "grad_norm": 4.098552703857422, "learning_rate": 8.712857221530353e-06, "loss": 0.2561, "step": 16923 }, { "epoch": 55.488524590163934, "grad_norm": 3.5456643104553223, "learning_rate": 8.711804174874217e-06, "loss": 0.2842, "step": 16924 }, { "epoch": 55.49180327868852, "grad_norm": 3.552686929702759, "learning_rate": 8.710751142743833e-06, "loss": 0.2992, "step": 16925 }, { "epoch": 55.49508196721312, "grad_norm": 2.9888291358947754, "learning_rate": 8.70969812515107e-06, "loss": 0.2294, "step": 16926 }, { "epoch": 55.498360655737706, "grad_norm": 3.7812082767486572, "learning_rate": 8.708645122107802e-06, "loss": 0.3558, "step": 16927 }, { "epoch": 55.501639344262294, "grad_norm": 3.7299818992614746, "learning_rate": 8.707592133625903e-06, "loss": 0.1208, "step": 16928 }, { "epoch": 55.50491803278688, "grad_norm": 3.2945523262023926, "learning_rate": 8.706539159717243e-06, "loss": 0.1318, "step": 16929 }, { "epoch": 55.50819672131148, "grad_norm": 3.2952260971069336, "learning_rate": 8.7054862003937e-06, "loss": 0.3391, "step": 16930 }, { "epoch": 55.511475409836066, "grad_norm": 3.4266109466552734, "learning_rate": 8.70443325566715e-06, "loss": 0.1038, "step": 16931 }, { "epoch": 55.514754098360655, "grad_norm": 4.044075012207031, "learning_rate": 8.703380325549458e-06, "loss": 0.2573, "step": 16932 }, { "epoch": 55.51803278688524, "grad_norm": 10.441251754760742, "learning_rate": 8.7023274100525e-06, "loss": 0.4633, "step": 16933 }, { "epoch": 55.52131147540984, "grad_norm": 3.8434276580810547, "learning_rate": 8.701274509188154e-06, "loss": 0.199, "step": 16934 }, { "epoch": 55.52459016393443, "grad_norm": 4.1767168045043945, "learning_rate": 8.700221622968288e-06, "loss": 0.1849, "step": 16935 }, { "epoch": 55.527868852459015, "grad_norm": 3.7418761253356934, "learning_rate": 8.699168751404771e-06, "loss": 0.2081, "step": 16936 }, { "epoch": 55.5311475409836, "grad_norm": 3.9440348148345947, "learning_rate": 8.69811589450948e-06, "loss": 0.1931, "step": 16937 }, { "epoch": 55.5344262295082, "grad_norm": 3.5734288692474365, "learning_rate": 8.697063052294288e-06, "loss": 0.1246, "step": 16938 }, { "epoch": 55.53770491803279, "grad_norm": 4.425153732299805, "learning_rate": 8.696010224771063e-06, "loss": 0.229, "step": 16939 }, { "epoch": 55.540983606557376, "grad_norm": 4.288201332092285, "learning_rate": 8.69495741195168e-06, "loss": 0.1794, "step": 16940 }, { "epoch": 55.544262295081964, "grad_norm": 4.115938186645508, "learning_rate": 8.69390461384801e-06, "loss": 0.2417, "step": 16941 }, { "epoch": 55.54754098360656, "grad_norm": 3.2077460289001465, "learning_rate": 8.692851830471917e-06, "loss": 0.1425, "step": 16942 }, { "epoch": 55.55081967213115, "grad_norm": 3.099200963973999, "learning_rate": 8.691799061835285e-06, "loss": 0.1945, "step": 16943 }, { "epoch": 55.554098360655736, "grad_norm": 3.0184693336486816, "learning_rate": 8.690746307949977e-06, "loss": 0.1277, "step": 16944 }, { "epoch": 55.557377049180324, "grad_norm": 3.1928415298461914, "learning_rate": 8.689693568827868e-06, "loss": 0.0977, "step": 16945 }, { "epoch": 55.56065573770492, "grad_norm": 3.023740530014038, "learning_rate": 8.688640844480821e-06, "loss": 0.1725, "step": 16946 }, { "epoch": 55.56393442622951, "grad_norm": 3.88031005859375, "learning_rate": 8.687588134920715e-06, "loss": 0.2678, "step": 16947 }, { "epoch": 55.5672131147541, "grad_norm": 3.7029120922088623, "learning_rate": 8.686535440159419e-06, "loss": 0.2, "step": 16948 }, { "epoch": 55.570491803278685, "grad_norm": 3.4579827785491943, "learning_rate": 8.685482760208801e-06, "loss": 0.1379, "step": 16949 }, { "epoch": 55.57377049180328, "grad_norm": 4.244884014129639, "learning_rate": 8.684430095080729e-06, "loss": 0.255, "step": 16950 }, { "epoch": 55.57704918032787, "grad_norm": 3.37475848197937, "learning_rate": 8.683377444787078e-06, "loss": 0.1703, "step": 16951 }, { "epoch": 55.58032786885246, "grad_norm": 3.247619390487671, "learning_rate": 8.682324809339716e-06, "loss": 0.2783, "step": 16952 }, { "epoch": 55.58360655737705, "grad_norm": 3.4296836853027344, "learning_rate": 8.68127218875051e-06, "loss": 0.2338, "step": 16953 }, { "epoch": 55.58688524590164, "grad_norm": 3.1058285236358643, "learning_rate": 8.680219583031333e-06, "loss": 0.389, "step": 16954 }, { "epoch": 55.59016393442623, "grad_norm": 3.9727489948272705, "learning_rate": 8.679166992194047e-06, "loss": 0.1721, "step": 16955 }, { "epoch": 55.59344262295082, "grad_norm": 2.7573294639587402, "learning_rate": 8.678114416250531e-06, "loss": 0.1459, "step": 16956 }, { "epoch": 55.59672131147541, "grad_norm": 3.5763559341430664, "learning_rate": 8.67706185521265e-06, "loss": 0.1785, "step": 16957 }, { "epoch": 55.6, "grad_norm": 3.800227165222168, "learning_rate": 8.676009309092273e-06, "loss": 0.2095, "step": 16958 }, { "epoch": 55.60327868852459, "grad_norm": 3.372084140777588, "learning_rate": 8.674956777901261e-06, "loss": 0.2597, "step": 16959 }, { "epoch": 55.60655737704918, "grad_norm": 3.9350180625915527, "learning_rate": 8.673904261651494e-06, "loss": 0.258, "step": 16960 }, { "epoch": 55.609836065573774, "grad_norm": 4.519474029541016, "learning_rate": 8.672851760354836e-06, "loss": 0.2685, "step": 16961 }, { "epoch": 55.61311475409836, "grad_norm": 3.2796671390533447, "learning_rate": 8.671799274023152e-06, "loss": 0.3665, "step": 16962 }, { "epoch": 55.61639344262295, "grad_norm": 3.7217657566070557, "learning_rate": 8.670746802668313e-06, "loss": 0.2151, "step": 16963 }, { "epoch": 55.61967213114754, "grad_norm": 2.5975520610809326, "learning_rate": 8.669694346302186e-06, "loss": 0.1606, "step": 16964 }, { "epoch": 55.622950819672134, "grad_norm": 3.108388662338257, "learning_rate": 8.668641904936639e-06, "loss": 0.2126, "step": 16965 }, { "epoch": 55.62622950819672, "grad_norm": 3.369899034500122, "learning_rate": 8.667589478583539e-06, "loss": 0.2242, "step": 16966 }, { "epoch": 55.62950819672131, "grad_norm": 4.120299339294434, "learning_rate": 8.666537067254753e-06, "loss": 0.2859, "step": 16967 }, { "epoch": 55.6327868852459, "grad_norm": 2.8928356170654297, "learning_rate": 8.665484670962145e-06, "loss": 0.128, "step": 16968 }, { "epoch": 55.636065573770495, "grad_norm": 3.0882580280303955, "learning_rate": 8.664432289717588e-06, "loss": 0.3106, "step": 16969 }, { "epoch": 55.63934426229508, "grad_norm": 2.752141237258911, "learning_rate": 8.663379923532945e-06, "loss": 0.0871, "step": 16970 }, { "epoch": 55.64262295081967, "grad_norm": 3.9438772201538086, "learning_rate": 8.662327572420084e-06, "loss": 0.2596, "step": 16971 }, { "epoch": 55.64590163934426, "grad_norm": 3.2435877323150635, "learning_rate": 8.661275236390866e-06, "loss": 0.2043, "step": 16972 }, { "epoch": 55.649180327868855, "grad_norm": 3.7441916465759277, "learning_rate": 8.660222915457166e-06, "loss": 0.269, "step": 16973 }, { "epoch": 55.65245901639344, "grad_norm": 2.7917871475219727, "learning_rate": 8.659170609630845e-06, "loss": 0.1411, "step": 16974 }, { "epoch": 55.65573770491803, "grad_norm": 3.6583971977233887, "learning_rate": 8.65811831892377e-06, "loss": 0.2322, "step": 16975 }, { "epoch": 55.65901639344262, "grad_norm": 4.154490947723389, "learning_rate": 8.657066043347803e-06, "loss": 0.366, "step": 16976 }, { "epoch": 55.662295081967216, "grad_norm": 2.493849515914917, "learning_rate": 8.656013782914817e-06, "loss": 0.1407, "step": 16977 }, { "epoch": 55.665573770491804, "grad_norm": 3.491544485092163, "learning_rate": 8.65496153763667e-06, "loss": 0.1651, "step": 16978 }, { "epoch": 55.66885245901639, "grad_norm": 8.109122276306152, "learning_rate": 8.653909307525232e-06, "loss": 0.3307, "step": 16979 }, { "epoch": 55.67213114754098, "grad_norm": 3.136368989944458, "learning_rate": 8.652857092592367e-06, "loss": 0.1754, "step": 16980 }, { "epoch": 55.675409836065576, "grad_norm": 4.164614200592041, "learning_rate": 8.651804892849933e-06, "loss": 0.3224, "step": 16981 }, { "epoch": 55.678688524590164, "grad_norm": 2.955458879470825, "learning_rate": 8.650752708309807e-06, "loss": 0.3048, "step": 16982 }, { "epoch": 55.68196721311475, "grad_norm": 2.887162208557129, "learning_rate": 8.649700538983845e-06, "loss": 0.2096, "step": 16983 }, { "epoch": 55.68524590163934, "grad_norm": 4.517714977264404, "learning_rate": 8.648648384883916e-06, "loss": 0.2765, "step": 16984 }, { "epoch": 55.68852459016394, "grad_norm": 3.2439334392547607, "learning_rate": 8.647596246021874e-06, "loss": 0.4286, "step": 16985 }, { "epoch": 55.691803278688525, "grad_norm": 3.147953987121582, "learning_rate": 8.646544122409596e-06, "loss": 0.0988, "step": 16986 }, { "epoch": 55.69508196721311, "grad_norm": 2.7331619262695312, "learning_rate": 8.64549201405894e-06, "loss": 0.1146, "step": 16987 }, { "epoch": 55.6983606557377, "grad_norm": 2.7362611293792725, "learning_rate": 8.64443992098177e-06, "loss": 0.1782, "step": 16988 }, { "epoch": 55.7016393442623, "grad_norm": 2.7321085929870605, "learning_rate": 8.643387843189947e-06, "loss": 0.2148, "step": 16989 }, { "epoch": 55.704918032786885, "grad_norm": 3.585340976715088, "learning_rate": 8.642335780695339e-06, "loss": 0.2966, "step": 16990 }, { "epoch": 55.708196721311474, "grad_norm": 3.0077061653137207, "learning_rate": 8.641283733509804e-06, "loss": 0.2438, "step": 16991 }, { "epoch": 55.71147540983607, "grad_norm": 4.243002891540527, "learning_rate": 8.640231701645213e-06, "loss": 0.1518, "step": 16992 }, { "epoch": 55.71475409836066, "grad_norm": 3.738676071166992, "learning_rate": 8.639179685113419e-06, "loss": 0.3608, "step": 16993 }, { "epoch": 55.718032786885246, "grad_norm": 3.917940616607666, "learning_rate": 8.638127683926287e-06, "loss": 0.3028, "step": 16994 }, { "epoch": 55.721311475409834, "grad_norm": 3.279355525970459, "learning_rate": 8.637075698095686e-06, "loss": 0.1947, "step": 16995 }, { "epoch": 55.72459016393443, "grad_norm": 3.4845399856567383, "learning_rate": 8.636023727633472e-06, "loss": 0.1509, "step": 16996 }, { "epoch": 55.72786885245902, "grad_norm": 4.380299091339111, "learning_rate": 8.63497177255151e-06, "loss": 0.2655, "step": 16997 }, { "epoch": 55.731147540983606, "grad_norm": 2.8212966918945312, "learning_rate": 8.633919832861655e-06, "loss": 0.1461, "step": 16998 }, { "epoch": 55.734426229508195, "grad_norm": 3.625936508178711, "learning_rate": 8.632867908575779e-06, "loss": 0.3728, "step": 16999 }, { "epoch": 55.73770491803279, "grad_norm": 3.7250094413757324, "learning_rate": 8.631815999705739e-06, "loss": 0.2159, "step": 17000 }, { "epoch": 55.74098360655738, "grad_norm": 3.112560272216797, "learning_rate": 8.630764106263397e-06, "loss": 0.1638, "step": 17001 }, { "epoch": 55.74426229508197, "grad_norm": 2.7432124614715576, "learning_rate": 8.629712228260613e-06, "loss": 0.2059, "step": 17002 }, { "epoch": 55.747540983606555, "grad_norm": 2.4366135597229004, "learning_rate": 8.628660365709243e-06, "loss": 0.1824, "step": 17003 }, { "epoch": 55.75081967213115, "grad_norm": 2.8861632347106934, "learning_rate": 8.627608518621159e-06, "loss": 0.1207, "step": 17004 }, { "epoch": 55.75409836065574, "grad_norm": 3.053264856338501, "learning_rate": 8.626556687008214e-06, "loss": 0.2031, "step": 17005 }, { "epoch": 55.75737704918033, "grad_norm": 3.710254430770874, "learning_rate": 8.625504870882271e-06, "loss": 0.3055, "step": 17006 }, { "epoch": 55.760655737704916, "grad_norm": 2.5439019203186035, "learning_rate": 8.624453070255186e-06, "loss": 0.1368, "step": 17007 }, { "epoch": 55.76393442622951, "grad_norm": 3.138190984725952, "learning_rate": 8.623401285138828e-06, "loss": 0.2102, "step": 17008 }, { "epoch": 55.7672131147541, "grad_norm": 3.75766658782959, "learning_rate": 8.622349515545051e-06, "loss": 0.3089, "step": 17009 }, { "epoch": 55.77049180327869, "grad_norm": 3.3359313011169434, "learning_rate": 8.621297761485715e-06, "loss": 0.1729, "step": 17010 }, { "epoch": 55.773770491803276, "grad_norm": 3.2854180335998535, "learning_rate": 8.620246022972675e-06, "loss": 0.4356, "step": 17011 }, { "epoch": 55.77704918032787, "grad_norm": 3.3122613430023193, "learning_rate": 8.619194300017802e-06, "loss": 0.2604, "step": 17012 }, { "epoch": 55.78032786885246, "grad_norm": 3.2603759765625, "learning_rate": 8.618142592632949e-06, "loss": 0.2539, "step": 17013 }, { "epoch": 55.78360655737705, "grad_norm": 3.6694602966308594, "learning_rate": 8.617090900829972e-06, "loss": 0.1748, "step": 17014 }, { "epoch": 55.78688524590164, "grad_norm": 3.713165760040283, "learning_rate": 8.616039224620736e-06, "loss": 0.2195, "step": 17015 }, { "epoch": 55.79016393442623, "grad_norm": 3.378695487976074, "learning_rate": 8.614987564017094e-06, "loss": 0.2092, "step": 17016 }, { "epoch": 55.79344262295082, "grad_norm": 3.580554962158203, "learning_rate": 8.613935919030908e-06, "loss": 0.424, "step": 17017 }, { "epoch": 55.79672131147541, "grad_norm": 3.191828966140747, "learning_rate": 8.612884289674034e-06, "loss": 0.1643, "step": 17018 }, { "epoch": 55.8, "grad_norm": 3.4590632915496826, "learning_rate": 8.611832675958335e-06, "loss": 0.2706, "step": 17019 }, { "epoch": 55.80327868852459, "grad_norm": 4.728450298309326, "learning_rate": 8.610781077895664e-06, "loss": 0.2204, "step": 17020 }, { "epoch": 55.80655737704918, "grad_norm": 2.799079656600952, "learning_rate": 8.60972949549788e-06, "loss": 0.1274, "step": 17021 }, { "epoch": 55.80983606557377, "grad_norm": 3.1188652515411377, "learning_rate": 8.608677928776846e-06, "loss": 0.3361, "step": 17022 }, { "epoch": 55.81311475409836, "grad_norm": 2.846527576446533, "learning_rate": 8.60762637774441e-06, "loss": 0.1322, "step": 17023 }, { "epoch": 55.81639344262295, "grad_norm": 3.4488372802734375, "learning_rate": 8.606574842412434e-06, "loss": 0.1395, "step": 17024 }, { "epoch": 55.81967213114754, "grad_norm": 3.077723979949951, "learning_rate": 8.60552332279278e-06, "loss": 0.1835, "step": 17025 }, { "epoch": 55.82295081967213, "grad_norm": 3.242337942123413, "learning_rate": 8.604471818897297e-06, "loss": 0.1616, "step": 17026 }, { "epoch": 55.82622950819672, "grad_norm": 3.6477346420288086, "learning_rate": 8.603420330737849e-06, "loss": 0.2718, "step": 17027 }, { "epoch": 55.829508196721314, "grad_norm": 3.522629737854004, "learning_rate": 8.602368858326287e-06, "loss": 0.2321, "step": 17028 }, { "epoch": 55.8327868852459, "grad_norm": 3.2161269187927246, "learning_rate": 8.601317401674465e-06, "loss": 0.2786, "step": 17029 }, { "epoch": 55.83606557377049, "grad_norm": 2.468167781829834, "learning_rate": 8.600265960794247e-06, "loss": 0.0773, "step": 17030 }, { "epoch": 55.83934426229508, "grad_norm": 2.630392551422119, "learning_rate": 8.599214535697487e-06, "loss": 0.1173, "step": 17031 }, { "epoch": 55.842622950819674, "grad_norm": 3.220677137374878, "learning_rate": 8.598163126396039e-06, "loss": 0.4066, "step": 17032 }, { "epoch": 55.84590163934426, "grad_norm": 3.2692697048187256, "learning_rate": 8.597111732901756e-06, "loss": 0.2643, "step": 17033 }, { "epoch": 55.84918032786885, "grad_norm": 3.8316471576690674, "learning_rate": 8.5960603552265e-06, "loss": 0.1912, "step": 17034 }, { "epoch": 55.85245901639344, "grad_norm": 3.7656068801879883, "learning_rate": 8.595008993382124e-06, "loss": 0.2416, "step": 17035 }, { "epoch": 55.855737704918035, "grad_norm": 3.1198325157165527, "learning_rate": 8.593957647380482e-06, "loss": 0.1939, "step": 17036 }, { "epoch": 55.85901639344262, "grad_norm": 3.6278457641601562, "learning_rate": 8.592906317233426e-06, "loss": 0.2634, "step": 17037 }, { "epoch": 55.86229508196721, "grad_norm": 4.071266174316406, "learning_rate": 8.591855002952816e-06, "loss": 0.1858, "step": 17038 }, { "epoch": 55.86557377049181, "grad_norm": 4.049790382385254, "learning_rate": 8.590803704550507e-06, "loss": 0.3197, "step": 17039 }, { "epoch": 55.868852459016395, "grad_norm": 3.5697686672210693, "learning_rate": 8.589752422038351e-06, "loss": 0.1985, "step": 17040 }, { "epoch": 55.87213114754098, "grad_norm": 3.434607744216919, "learning_rate": 8.5887011554282e-06, "loss": 0.1882, "step": 17041 }, { "epoch": 55.87540983606557, "grad_norm": 2.8170652389526367, "learning_rate": 8.587649904731911e-06, "loss": 0.2018, "step": 17042 }, { "epoch": 55.87868852459017, "grad_norm": 3.2213783264160156, "learning_rate": 8.586598669961341e-06, "loss": 0.2643, "step": 17043 }, { "epoch": 55.881967213114756, "grad_norm": 2.6631827354431152, "learning_rate": 8.585547451128338e-06, "loss": 0.1463, "step": 17044 }, { "epoch": 55.885245901639344, "grad_norm": 3.032414197921753, "learning_rate": 8.58449624824476e-06, "loss": 0.109, "step": 17045 }, { "epoch": 55.88852459016393, "grad_norm": 3.0600545406341553, "learning_rate": 8.583445061322458e-06, "loss": 0.1958, "step": 17046 }, { "epoch": 55.89180327868853, "grad_norm": 3.289884090423584, "learning_rate": 8.582393890373282e-06, "loss": 0.2125, "step": 17047 }, { "epoch": 55.895081967213116, "grad_norm": 3.6457698345184326, "learning_rate": 8.581342735409096e-06, "loss": 0.1542, "step": 17048 }, { "epoch": 55.898360655737704, "grad_norm": 3.3713138103485107, "learning_rate": 8.580291596441741e-06, "loss": 0.1641, "step": 17049 }, { "epoch": 55.90163934426229, "grad_norm": 3.1138041019439697, "learning_rate": 8.579240473483073e-06, "loss": 0.2468, "step": 17050 }, { "epoch": 55.90491803278689, "grad_norm": 3.171912908554077, "learning_rate": 8.57818936654495e-06, "loss": 0.2528, "step": 17051 }, { "epoch": 55.90819672131148, "grad_norm": 4.479034423828125, "learning_rate": 8.577138275639219e-06, "loss": 0.2502, "step": 17052 }, { "epoch": 55.911475409836065, "grad_norm": 2.4477169513702393, "learning_rate": 8.576087200777732e-06, "loss": 0.2568, "step": 17053 }, { "epoch": 55.91475409836065, "grad_norm": 3.675905227661133, "learning_rate": 8.575036141972346e-06, "loss": 0.4154, "step": 17054 }, { "epoch": 55.91803278688525, "grad_norm": 9.42292594909668, "learning_rate": 8.573985099234902e-06, "loss": 0.2762, "step": 17055 }, { "epoch": 55.92131147540984, "grad_norm": 3.2127525806427, "learning_rate": 8.572934072577264e-06, "loss": 0.1484, "step": 17056 }, { "epoch": 55.924590163934425, "grad_norm": 3.286137819290161, "learning_rate": 8.571883062011279e-06, "loss": 0.1075, "step": 17057 }, { "epoch": 55.927868852459014, "grad_norm": 2.9574224948883057, "learning_rate": 8.570832067548796e-06, "loss": 0.3427, "step": 17058 }, { "epoch": 55.93114754098361, "grad_norm": 4.142163276672363, "learning_rate": 8.569781089201663e-06, "loss": 0.1761, "step": 17059 }, { "epoch": 55.9344262295082, "grad_norm": 3.5640177726745605, "learning_rate": 8.56873012698174e-06, "loss": 0.2824, "step": 17060 }, { "epoch": 55.937704918032786, "grad_norm": 2.6207222938537598, "learning_rate": 8.567679180900872e-06, "loss": 0.433, "step": 17061 }, { "epoch": 55.940983606557374, "grad_norm": 4.02644157409668, "learning_rate": 8.566628250970912e-06, "loss": 0.1151, "step": 17062 }, { "epoch": 55.94426229508197, "grad_norm": 3.330759286880493, "learning_rate": 8.565577337203705e-06, "loss": 0.0995, "step": 17063 }, { "epoch": 55.94754098360656, "grad_norm": 17.98466682434082, "learning_rate": 8.564526439611108e-06, "loss": 0.1912, "step": 17064 }, { "epoch": 55.950819672131146, "grad_norm": 2.821296453475952, "learning_rate": 8.563475558204968e-06, "loss": 0.3232, "step": 17065 }, { "epoch": 55.954098360655735, "grad_norm": 2.837923288345337, "learning_rate": 8.562424692997136e-06, "loss": 0.1093, "step": 17066 }, { "epoch": 55.95737704918033, "grad_norm": 3.151238441467285, "learning_rate": 8.561373843999457e-06, "loss": 0.257, "step": 17067 }, { "epoch": 55.96065573770492, "grad_norm": 3.4990234375, "learning_rate": 8.560323011223784e-06, "loss": 0.4956, "step": 17068 }, { "epoch": 55.96393442622951, "grad_norm": 3.18654203414917, "learning_rate": 8.559272194681967e-06, "loss": 0.1246, "step": 17069 }, { "epoch": 55.967213114754095, "grad_norm": 3.644745349884033, "learning_rate": 8.558221394385853e-06, "loss": 0.507, "step": 17070 }, { "epoch": 55.97049180327869, "grad_norm": 3.1780993938446045, "learning_rate": 8.557170610347293e-06, "loss": 0.2748, "step": 17071 }, { "epoch": 55.97377049180328, "grad_norm": 4.548701286315918, "learning_rate": 8.556119842578133e-06, "loss": 0.2415, "step": 17072 }, { "epoch": 55.97704918032787, "grad_norm": 3.630800247192383, "learning_rate": 8.555069091090222e-06, "loss": 0.2642, "step": 17073 }, { "epoch": 55.980327868852456, "grad_norm": 3.244123935699463, "learning_rate": 8.554018355895413e-06, "loss": 0.1543, "step": 17074 }, { "epoch": 55.98360655737705, "grad_norm": 3.0155038833618164, "learning_rate": 8.55296763700555e-06, "loss": 0.3885, "step": 17075 }, { "epoch": 55.98688524590164, "grad_norm": 3.8525571823120117, "learning_rate": 8.551916934432479e-06, "loss": 0.2039, "step": 17076 }, { "epoch": 55.99016393442623, "grad_norm": 3.4782683849334717, "learning_rate": 8.550866248188052e-06, "loss": 0.4071, "step": 17077 }, { "epoch": 55.993442622950816, "grad_norm": 3.0450892448425293, "learning_rate": 8.549815578284115e-06, "loss": 0.1785, "step": 17078 }, { "epoch": 55.99672131147541, "grad_norm": 6.429236888885498, "learning_rate": 8.548764924732516e-06, "loss": 0.3813, "step": 17079 }, { "epoch": 56.0, "grad_norm": 3.414504051208496, "learning_rate": 8.5477142875451e-06, "loss": 0.3779, "step": 17080 }, { "epoch": 56.00327868852459, "grad_norm": 3.3723301887512207, "learning_rate": 8.546663666733712e-06, "loss": 0.2163, "step": 17081 }, { "epoch": 56.006557377049184, "grad_norm": 3.923114061355591, "learning_rate": 8.545613062310207e-06, "loss": 0.1657, "step": 17082 }, { "epoch": 56.00983606557377, "grad_norm": 3.4950883388519287, "learning_rate": 8.544562474286426e-06, "loss": 0.198, "step": 17083 }, { "epoch": 56.01311475409836, "grad_norm": 3.7266838550567627, "learning_rate": 8.543511902674217e-06, "loss": 0.2527, "step": 17084 }, { "epoch": 56.01639344262295, "grad_norm": 2.9509315490722656, "learning_rate": 8.54246134748542e-06, "loss": 0.2877, "step": 17085 }, { "epoch": 56.019672131147544, "grad_norm": 3.414599657058716, "learning_rate": 8.541410808731894e-06, "loss": 0.2738, "step": 17086 }, { "epoch": 56.02295081967213, "grad_norm": 3.39207124710083, "learning_rate": 8.540360286425476e-06, "loss": 0.1389, "step": 17087 }, { "epoch": 56.02622950819672, "grad_norm": 5.910371780395508, "learning_rate": 8.539309780578013e-06, "loss": 0.3143, "step": 17088 }, { "epoch": 56.02950819672131, "grad_norm": 3.4250433444976807, "learning_rate": 8.538259291201347e-06, "loss": 0.1679, "step": 17089 }, { "epoch": 56.032786885245905, "grad_norm": 2.6123032569885254, "learning_rate": 8.537208818307331e-06, "loss": 0.0668, "step": 17090 }, { "epoch": 56.03606557377049, "grad_norm": 3.007122039794922, "learning_rate": 8.536158361907808e-06, "loss": 0.3986, "step": 17091 }, { "epoch": 56.03934426229508, "grad_norm": 2.8830840587615967, "learning_rate": 8.535107922014621e-06, "loss": 0.1707, "step": 17092 }, { "epoch": 56.04262295081967, "grad_norm": 4.051987171173096, "learning_rate": 8.534057498639613e-06, "loss": 0.2451, "step": 17093 }, { "epoch": 56.045901639344265, "grad_norm": 2.7299463748931885, "learning_rate": 8.533007091794631e-06, "loss": 0.2008, "step": 17094 }, { "epoch": 56.049180327868854, "grad_norm": 3.7872273921966553, "learning_rate": 8.531956701491522e-06, "loss": 0.3078, "step": 17095 }, { "epoch": 56.05245901639344, "grad_norm": 3.045733690261841, "learning_rate": 8.530906327742123e-06, "loss": 0.1356, "step": 17096 }, { "epoch": 56.05573770491803, "grad_norm": 3.420271635055542, "learning_rate": 8.529855970558287e-06, "loss": 0.2584, "step": 17097 }, { "epoch": 56.059016393442626, "grad_norm": 3.361109495162964, "learning_rate": 8.528805629951851e-06, "loss": 0.2447, "step": 17098 }, { "epoch": 56.062295081967214, "grad_norm": 4.0985636711120605, "learning_rate": 8.527755305934663e-06, "loss": 0.2208, "step": 17099 }, { "epoch": 56.0655737704918, "grad_norm": 2.756425142288208, "learning_rate": 8.526704998518563e-06, "loss": 0.2512, "step": 17100 }, { "epoch": 56.06885245901639, "grad_norm": 3.3786778450012207, "learning_rate": 8.525654707715397e-06, "loss": 0.1499, "step": 17101 }, { "epoch": 56.072131147540986, "grad_norm": 2.9773852825164795, "learning_rate": 8.524604433537006e-06, "loss": 0.118, "step": 17102 }, { "epoch": 56.075409836065575, "grad_norm": 3.9824862480163574, "learning_rate": 8.523554175995234e-06, "loss": 0.1091, "step": 17103 }, { "epoch": 56.07868852459016, "grad_norm": 4.195897102355957, "learning_rate": 8.522503935101926e-06, "loss": 0.2738, "step": 17104 }, { "epoch": 56.08196721311475, "grad_norm": 3.2197957038879395, "learning_rate": 8.52145371086892e-06, "loss": 0.1223, "step": 17105 }, { "epoch": 56.08524590163935, "grad_norm": 3.4666244983673096, "learning_rate": 8.520403503308065e-06, "loss": 0.2151, "step": 17106 }, { "epoch": 56.088524590163935, "grad_norm": 3.1455588340759277, "learning_rate": 8.51935331243119e-06, "loss": 0.2543, "step": 17107 }, { "epoch": 56.09180327868852, "grad_norm": 3.780181646347046, "learning_rate": 8.518303138250154e-06, "loss": 0.1553, "step": 17108 }, { "epoch": 56.09508196721311, "grad_norm": 3.421456813812256, "learning_rate": 8.51725298077679e-06, "loss": 0.4194, "step": 17109 }, { "epoch": 56.09836065573771, "grad_norm": 7.060202598571777, "learning_rate": 8.516202840022939e-06, "loss": 0.2088, "step": 17110 }, { "epoch": 56.101639344262296, "grad_norm": 11.283127784729004, "learning_rate": 8.51515271600044e-06, "loss": 0.1361, "step": 17111 }, { "epoch": 56.104918032786884, "grad_norm": 3.705305576324463, "learning_rate": 8.514102608721141e-06, "loss": 0.3201, "step": 17112 }, { "epoch": 56.10819672131147, "grad_norm": 3.8577804565429688, "learning_rate": 8.513052518196883e-06, "loss": 0.2097, "step": 17113 }, { "epoch": 56.11147540983607, "grad_norm": 2.6741623878479004, "learning_rate": 8.512002444439502e-06, "loss": 0.1647, "step": 17114 }, { "epoch": 56.114754098360656, "grad_norm": 3.2906713485717773, "learning_rate": 8.510952387460836e-06, "loss": 0.2588, "step": 17115 }, { "epoch": 56.118032786885244, "grad_norm": 3.6072778701782227, "learning_rate": 8.509902347272734e-06, "loss": 0.1656, "step": 17116 }, { "epoch": 56.12131147540983, "grad_norm": 3.9248197078704834, "learning_rate": 8.508852323887033e-06, "loss": 0.3204, "step": 17117 }, { "epoch": 56.12459016393443, "grad_norm": 3.672560453414917, "learning_rate": 8.507802317315573e-06, "loss": 0.2144, "step": 17118 }, { "epoch": 56.12786885245902, "grad_norm": 3.148778200149536, "learning_rate": 8.506752327570194e-06, "loss": 0.2514, "step": 17119 }, { "epoch": 56.131147540983605, "grad_norm": 3.3766849040985107, "learning_rate": 8.50570235466273e-06, "loss": 0.2045, "step": 17120 }, { "epoch": 56.13442622950819, "grad_norm": 7.546934127807617, "learning_rate": 8.504652398605028e-06, "loss": 0.1965, "step": 17121 }, { "epoch": 56.13770491803279, "grad_norm": 2.7117698192596436, "learning_rate": 8.503602459408929e-06, "loss": 0.0997, "step": 17122 }, { "epoch": 56.14098360655738, "grad_norm": 2.927602529525757, "learning_rate": 8.502552537086262e-06, "loss": 0.204, "step": 17123 }, { "epoch": 56.144262295081965, "grad_norm": 3.4000587463378906, "learning_rate": 8.501502631648874e-06, "loss": 0.3732, "step": 17124 }, { "epoch": 56.14754098360656, "grad_norm": 2.8919317722320557, "learning_rate": 8.500452743108604e-06, "loss": 0.1507, "step": 17125 }, { "epoch": 56.15081967213115, "grad_norm": 2.9899981021881104, "learning_rate": 8.499402871477286e-06, "loss": 0.1685, "step": 17126 }, { "epoch": 56.15409836065574, "grad_norm": 2.973515272140503, "learning_rate": 8.498353016766763e-06, "loss": 0.2091, "step": 17127 }, { "epoch": 56.157377049180326, "grad_norm": 3.546967029571533, "learning_rate": 8.497303178988869e-06, "loss": 0.2544, "step": 17128 }, { "epoch": 56.16065573770492, "grad_norm": 5.147096633911133, "learning_rate": 8.496253358155444e-06, "loss": 0.1399, "step": 17129 }, { "epoch": 56.16393442622951, "grad_norm": 3.3342103958129883, "learning_rate": 8.495203554278328e-06, "loss": 0.2139, "step": 17130 }, { "epoch": 56.1672131147541, "grad_norm": 2.951972484588623, "learning_rate": 8.494153767369357e-06, "loss": 0.0739, "step": 17131 }, { "epoch": 56.170491803278686, "grad_norm": 3.829023599624634, "learning_rate": 8.493103997440367e-06, "loss": 0.2338, "step": 17132 }, { "epoch": 56.17377049180328, "grad_norm": 2.9839203357696533, "learning_rate": 8.492054244503193e-06, "loss": 0.1104, "step": 17133 }, { "epoch": 56.17704918032787, "grad_norm": 3.1299142837524414, "learning_rate": 8.491004508569679e-06, "loss": 0.2342, "step": 17134 }, { "epoch": 56.18032786885246, "grad_norm": 2.5892961025238037, "learning_rate": 8.489954789651658e-06, "loss": 0.2302, "step": 17135 }, { "epoch": 56.18360655737705, "grad_norm": 103.98053741455078, "learning_rate": 8.488905087760966e-06, "loss": 0.2689, "step": 17136 }, { "epoch": 56.18688524590164, "grad_norm": 3.5892343521118164, "learning_rate": 8.487855402909438e-06, "loss": 0.1893, "step": 17137 }, { "epoch": 56.19016393442623, "grad_norm": 3.4014506340026855, "learning_rate": 8.486805735108916e-06, "loss": 0.2584, "step": 17138 }, { "epoch": 56.19344262295082, "grad_norm": 4.4889817237854, "learning_rate": 8.485756084371233e-06, "loss": 0.2443, "step": 17139 }, { "epoch": 56.19672131147541, "grad_norm": 4.017736434936523, "learning_rate": 8.484706450708223e-06, "loss": 0.2794, "step": 17140 }, { "epoch": 56.2, "grad_norm": 3.56931471824646, "learning_rate": 8.48365683413172e-06, "loss": 0.1224, "step": 17141 }, { "epoch": 56.20327868852459, "grad_norm": 3.629803419113159, "learning_rate": 8.482607234653568e-06, "loss": 0.1303, "step": 17142 }, { "epoch": 56.20655737704918, "grad_norm": 4.290371894836426, "learning_rate": 8.481557652285596e-06, "loss": 0.2545, "step": 17143 }, { "epoch": 56.20983606557377, "grad_norm": 5.412858963012695, "learning_rate": 8.48050808703964e-06, "loss": 0.2359, "step": 17144 }, { "epoch": 56.21311475409836, "grad_norm": 14.058627128601074, "learning_rate": 8.479458538927536e-06, "loss": 0.189, "step": 17145 }, { "epoch": 56.21639344262295, "grad_norm": 4.179566860198975, "learning_rate": 8.478409007961113e-06, "loss": 0.2265, "step": 17146 }, { "epoch": 56.21967213114754, "grad_norm": 5.644433975219727, "learning_rate": 8.477359494152215e-06, "loss": 0.2323, "step": 17147 }, { "epoch": 56.22295081967213, "grad_norm": 6.062505722045898, "learning_rate": 8.476309997512672e-06, "loss": 0.2829, "step": 17148 }, { "epoch": 56.226229508196724, "grad_norm": 5.150073051452637, "learning_rate": 8.475260518054316e-06, "loss": 0.1484, "step": 17149 }, { "epoch": 56.22950819672131, "grad_norm": 13.954510688781738, "learning_rate": 8.474211055788984e-06, "loss": 0.193, "step": 17150 }, { "epoch": 56.2327868852459, "grad_norm": 3.4014689922332764, "learning_rate": 8.47316161072851e-06, "loss": 0.2669, "step": 17151 }, { "epoch": 56.23606557377049, "grad_norm": 4.072993278503418, "learning_rate": 8.472112182884724e-06, "loss": 0.3043, "step": 17152 }, { "epoch": 56.239344262295084, "grad_norm": 4.071061611175537, "learning_rate": 8.471062772269463e-06, "loss": 0.2622, "step": 17153 }, { "epoch": 56.24262295081967, "grad_norm": 3.7158660888671875, "learning_rate": 8.470013378894559e-06, "loss": 0.1717, "step": 17154 }, { "epoch": 56.24590163934426, "grad_norm": 4.4445881843566895, "learning_rate": 8.468964002771842e-06, "loss": 0.3481, "step": 17155 }, { "epoch": 56.24918032786885, "grad_norm": 3.979431390762329, "learning_rate": 8.467914643913153e-06, "loss": 0.217, "step": 17156 }, { "epoch": 56.252459016393445, "grad_norm": 3.2112250328063965, "learning_rate": 8.466865302330317e-06, "loss": 0.4634, "step": 17157 }, { "epoch": 56.25573770491803, "grad_norm": 2.965057849884033, "learning_rate": 8.46581597803517e-06, "loss": 0.2624, "step": 17158 }, { "epoch": 56.25901639344262, "grad_norm": 5.1134257316589355, "learning_rate": 8.464766671039538e-06, "loss": 0.3779, "step": 17159 }, { "epoch": 56.26229508196721, "grad_norm": 4.639023303985596, "learning_rate": 8.463717381355263e-06, "loss": 0.2264, "step": 17160 }, { "epoch": 56.265573770491805, "grad_norm": 3.215714454650879, "learning_rate": 8.46266810899417e-06, "loss": 0.0901, "step": 17161 }, { "epoch": 56.268852459016394, "grad_norm": 3.222219944000244, "learning_rate": 8.461618853968095e-06, "loss": 0.3333, "step": 17162 }, { "epoch": 56.27213114754098, "grad_norm": 3.291646957397461, "learning_rate": 8.460569616288862e-06, "loss": 0.2909, "step": 17163 }, { "epoch": 56.27540983606557, "grad_norm": 6.039596080780029, "learning_rate": 8.459520395968312e-06, "loss": 0.1827, "step": 17164 }, { "epoch": 56.278688524590166, "grad_norm": 2.943514108657837, "learning_rate": 8.45847119301827e-06, "loss": 0.2193, "step": 17165 }, { "epoch": 56.281967213114754, "grad_norm": 3.0675745010375977, "learning_rate": 8.457422007450568e-06, "loss": 0.254, "step": 17166 }, { "epoch": 56.28524590163934, "grad_norm": 3.271700859069824, "learning_rate": 8.456372839277033e-06, "loss": 0.1215, "step": 17167 }, { "epoch": 56.28852459016394, "grad_norm": 3.679840326309204, "learning_rate": 8.455323688509504e-06, "loss": 0.2946, "step": 17168 }, { "epoch": 56.291803278688526, "grad_norm": 3.7886457443237305, "learning_rate": 8.454274555159805e-06, "loss": 0.2133, "step": 17169 }, { "epoch": 56.295081967213115, "grad_norm": 3.6796460151672363, "learning_rate": 8.453225439239769e-06, "loss": 0.1576, "step": 17170 }, { "epoch": 56.2983606557377, "grad_norm": 2.862895965576172, "learning_rate": 8.452176340761224e-06, "loss": 0.1304, "step": 17171 }, { "epoch": 56.3016393442623, "grad_norm": 7.173003196716309, "learning_rate": 8.451127259735996e-06, "loss": 0.2877, "step": 17172 }, { "epoch": 56.30491803278689, "grad_norm": 3.3260841369628906, "learning_rate": 8.450078196175921e-06, "loss": 0.2109, "step": 17173 }, { "epoch": 56.308196721311475, "grad_norm": 3.3411166667938232, "learning_rate": 8.449029150092828e-06, "loss": 0.1253, "step": 17174 }, { "epoch": 56.31147540983606, "grad_norm": 3.277657985687256, "learning_rate": 8.447980121498541e-06, "loss": 0.3305, "step": 17175 }, { "epoch": 56.31475409836066, "grad_norm": 3.463884115219116, "learning_rate": 8.446931110404892e-06, "loss": 0.1235, "step": 17176 }, { "epoch": 56.31803278688525, "grad_norm": 3.130929708480835, "learning_rate": 8.445882116823711e-06, "loss": 0.2242, "step": 17177 }, { "epoch": 56.321311475409836, "grad_norm": 4.309383392333984, "learning_rate": 8.444833140766824e-06, "loss": 0.112, "step": 17178 }, { "epoch": 56.324590163934424, "grad_norm": 3.036515474319458, "learning_rate": 8.443784182246062e-06, "loss": 0.1689, "step": 17179 }, { "epoch": 56.32786885245902, "grad_norm": 3.6716036796569824, "learning_rate": 8.44273524127325e-06, "loss": 0.2123, "step": 17180 }, { "epoch": 56.33114754098361, "grad_norm": 3.400843620300293, "learning_rate": 8.441686317860219e-06, "loss": 0.1646, "step": 17181 }, { "epoch": 56.334426229508196, "grad_norm": 3.6107046604156494, "learning_rate": 8.440637412018792e-06, "loss": 0.5194, "step": 17182 }, { "epoch": 56.337704918032784, "grad_norm": 4.10673189163208, "learning_rate": 8.439588523760802e-06, "loss": 0.1189, "step": 17183 }, { "epoch": 56.34098360655738, "grad_norm": 3.2566468715667725, "learning_rate": 8.438539653098073e-06, "loss": 0.3121, "step": 17184 }, { "epoch": 56.34426229508197, "grad_norm": 4.058874130249023, "learning_rate": 8.43749080004243e-06, "loss": 0.3749, "step": 17185 }, { "epoch": 56.34754098360656, "grad_norm": 3.564401388168335, "learning_rate": 8.436441964605708e-06, "loss": 0.1927, "step": 17186 }, { "epoch": 56.350819672131145, "grad_norm": 3.1528069972991943, "learning_rate": 8.435393146799727e-06, "loss": 0.2129, "step": 17187 }, { "epoch": 56.35409836065574, "grad_norm": 3.3408989906311035, "learning_rate": 8.434344346636314e-06, "loss": 0.1471, "step": 17188 }, { "epoch": 56.35737704918033, "grad_norm": 3.529883623123169, "learning_rate": 8.433295564127294e-06, "loss": 0.1896, "step": 17189 }, { "epoch": 56.36065573770492, "grad_norm": 2.945847749710083, "learning_rate": 8.432246799284498e-06, "loss": 0.1476, "step": 17190 }, { "epoch": 56.363934426229505, "grad_norm": 3.1203677654266357, "learning_rate": 8.431198052119753e-06, "loss": 0.1066, "step": 17191 }, { "epoch": 56.3672131147541, "grad_norm": 3.7577743530273438, "learning_rate": 8.430149322644878e-06, "loss": 0.3655, "step": 17192 }, { "epoch": 56.37049180327869, "grad_norm": 3.2715234756469727, "learning_rate": 8.429100610871698e-06, "loss": 0.1306, "step": 17193 }, { "epoch": 56.37377049180328, "grad_norm": 2.7253482341766357, "learning_rate": 8.428051916812046e-06, "loss": 0.1532, "step": 17194 }, { "epoch": 56.377049180327866, "grad_norm": 4.73569917678833, "learning_rate": 8.427003240477743e-06, "loss": 0.1241, "step": 17195 }, { "epoch": 56.38032786885246, "grad_norm": 3.4078145027160645, "learning_rate": 8.425954581880614e-06, "loss": 0.1851, "step": 17196 }, { "epoch": 56.38360655737705, "grad_norm": 2.9026880264282227, "learning_rate": 8.424905941032484e-06, "loss": 0.2132, "step": 17197 }, { "epoch": 56.38688524590164, "grad_norm": 2.9171581268310547, "learning_rate": 8.423857317945174e-06, "loss": 0.126, "step": 17198 }, { "epoch": 56.390163934426226, "grad_norm": 3.5534794330596924, "learning_rate": 8.422808712630513e-06, "loss": 0.3266, "step": 17199 }, { "epoch": 56.39344262295082, "grad_norm": 3.4830853939056396, "learning_rate": 8.421760125100327e-06, "loss": 0.1408, "step": 17200 }, { "epoch": 56.39672131147541, "grad_norm": 3.5619983673095703, "learning_rate": 8.420711555366434e-06, "loss": 0.2205, "step": 17201 }, { "epoch": 56.4, "grad_norm": 3.215183973312378, "learning_rate": 8.419663003440657e-06, "loss": 0.1943, "step": 17202 }, { "epoch": 56.40327868852459, "grad_norm": 3.0289461612701416, "learning_rate": 8.418614469334826e-06, "loss": 0.1849, "step": 17203 }, { "epoch": 56.40655737704918, "grad_norm": 2.668567657470703, "learning_rate": 8.417565953060762e-06, "loss": 0.1531, "step": 17204 }, { "epoch": 56.40983606557377, "grad_norm": 3.0582611560821533, "learning_rate": 8.416517454630287e-06, "loss": 0.2309, "step": 17205 }, { "epoch": 56.41311475409836, "grad_norm": 3.371915340423584, "learning_rate": 8.415468974055221e-06, "loss": 0.2904, "step": 17206 }, { "epoch": 56.41639344262295, "grad_norm": 3.748262405395508, "learning_rate": 8.414420511347393e-06, "loss": 0.2549, "step": 17207 }, { "epoch": 56.41967213114754, "grad_norm": 4.083702564239502, "learning_rate": 8.41337206651862e-06, "loss": 0.2575, "step": 17208 }, { "epoch": 56.42295081967213, "grad_norm": 3.4931082725524902, "learning_rate": 8.41232363958073e-06, "loss": 0.2133, "step": 17209 }, { "epoch": 56.42622950819672, "grad_norm": 3.4723763465881348, "learning_rate": 8.41127523054554e-06, "loss": 0.173, "step": 17210 }, { "epoch": 56.429508196721315, "grad_norm": 3.566664695739746, "learning_rate": 8.410226839424871e-06, "loss": 0.1574, "step": 17211 }, { "epoch": 56.4327868852459, "grad_norm": 3.8509280681610107, "learning_rate": 8.40917846623055e-06, "loss": 0.4715, "step": 17212 }, { "epoch": 56.43606557377049, "grad_norm": 3.3298697471618652, "learning_rate": 8.408130110974398e-06, "loss": 0.0879, "step": 17213 }, { "epoch": 56.43934426229508, "grad_norm": 2.0136566162109375, "learning_rate": 8.407081773668231e-06, "loss": 0.033, "step": 17214 }, { "epoch": 56.442622950819676, "grad_norm": 2.9429988861083984, "learning_rate": 8.40603345432387e-06, "loss": 0.1589, "step": 17215 }, { "epoch": 56.445901639344264, "grad_norm": 4.53541374206543, "learning_rate": 8.404985152953144e-06, "loss": 0.2176, "step": 17216 }, { "epoch": 56.44918032786885, "grad_norm": 3.79144549369812, "learning_rate": 8.40393686956787e-06, "loss": 0.2587, "step": 17217 }, { "epoch": 56.45245901639344, "grad_norm": 2.7933177947998047, "learning_rate": 8.402888604179866e-06, "loss": 0.2293, "step": 17218 }, { "epoch": 56.455737704918036, "grad_norm": 3.0284359455108643, "learning_rate": 8.40184035680095e-06, "loss": 0.1829, "step": 17219 }, { "epoch": 56.459016393442624, "grad_norm": 4.05276346206665, "learning_rate": 8.40079212744295e-06, "loss": 0.2409, "step": 17220 }, { "epoch": 56.46229508196721, "grad_norm": 3.2129180431365967, "learning_rate": 8.399743916117679e-06, "loss": 0.1248, "step": 17221 }, { "epoch": 56.4655737704918, "grad_norm": 3.493567943572998, "learning_rate": 8.398695722836963e-06, "loss": 0.1535, "step": 17222 }, { "epoch": 56.4688524590164, "grad_norm": 3.152797222137451, "learning_rate": 8.397647547612615e-06, "loss": 0.2142, "step": 17223 }, { "epoch": 56.472131147540985, "grad_norm": 3.308015823364258, "learning_rate": 8.396599390456453e-06, "loss": 0.1547, "step": 17224 }, { "epoch": 56.47540983606557, "grad_norm": 3.233689069747925, "learning_rate": 8.395551251380304e-06, "loss": 0.288, "step": 17225 }, { "epoch": 56.47868852459016, "grad_norm": 2.6090078353881836, "learning_rate": 8.394503130395984e-06, "loss": 0.2743, "step": 17226 }, { "epoch": 56.48196721311476, "grad_norm": 3.430016040802002, "learning_rate": 8.39345502751531e-06, "loss": 0.2435, "step": 17227 }, { "epoch": 56.485245901639345, "grad_norm": 3.193247079849243, "learning_rate": 8.392406942750097e-06, "loss": 0.1488, "step": 17228 }, { "epoch": 56.488524590163934, "grad_norm": 3.726442337036133, "learning_rate": 8.391358876112172e-06, "loss": 0.2838, "step": 17229 }, { "epoch": 56.49180327868852, "grad_norm": 3.1535377502441406, "learning_rate": 8.390310827613345e-06, "loss": 0.1799, "step": 17230 }, { "epoch": 56.49508196721312, "grad_norm": 3.630133867263794, "learning_rate": 8.389262797265439e-06, "loss": 0.2438, "step": 17231 }, { "epoch": 56.498360655737706, "grad_norm": 3.1718454360961914, "learning_rate": 8.388214785080267e-06, "loss": 0.1842, "step": 17232 }, { "epoch": 56.501639344262294, "grad_norm": 2.606109380722046, "learning_rate": 8.387166791069653e-06, "loss": 0.1875, "step": 17233 }, { "epoch": 56.50491803278688, "grad_norm": 3.095771551132202, "learning_rate": 8.386118815245407e-06, "loss": 0.3459, "step": 17234 }, { "epoch": 56.50819672131148, "grad_norm": 3.4464685916900635, "learning_rate": 8.385070857619353e-06, "loss": 0.2778, "step": 17235 }, { "epoch": 56.511475409836066, "grad_norm": 3.7852938175201416, "learning_rate": 8.384022918203303e-06, "loss": 0.2678, "step": 17236 }, { "epoch": 56.514754098360655, "grad_norm": 3.0186338424682617, "learning_rate": 8.38297499700907e-06, "loss": 0.1701, "step": 17237 }, { "epoch": 56.51803278688524, "grad_norm": 6.282354354858398, "learning_rate": 8.381927094048481e-06, "loss": 0.1441, "step": 17238 }, { "epoch": 56.52131147540984, "grad_norm": 2.592054605484009, "learning_rate": 8.380879209333346e-06, "loss": 0.1493, "step": 17239 }, { "epoch": 56.52459016393443, "grad_norm": 4.357283115386963, "learning_rate": 8.37983134287548e-06, "loss": 0.3015, "step": 17240 }, { "epoch": 56.527868852459015, "grad_norm": 3.371263265609741, "learning_rate": 8.378783494686698e-06, "loss": 0.1494, "step": 17241 }, { "epoch": 56.5311475409836, "grad_norm": 2.9838531017303467, "learning_rate": 8.37773566477882e-06, "loss": 0.1397, "step": 17242 }, { "epoch": 56.5344262295082, "grad_norm": 3.239696502685547, "learning_rate": 8.376687853163662e-06, "loss": 0.1553, "step": 17243 }, { "epoch": 56.53770491803279, "grad_norm": 3.0752837657928467, "learning_rate": 8.375640059853033e-06, "loss": 0.2191, "step": 17244 }, { "epoch": 56.540983606557376, "grad_norm": 3.697377920150757, "learning_rate": 8.374592284858748e-06, "loss": 0.4601, "step": 17245 }, { "epoch": 56.544262295081964, "grad_norm": 2.6763741970062256, "learning_rate": 8.37354452819263e-06, "loss": 0.1534, "step": 17246 }, { "epoch": 56.54754098360656, "grad_norm": 3.3227038383483887, "learning_rate": 8.372496789866488e-06, "loss": 0.2416, "step": 17247 }, { "epoch": 56.55081967213115, "grad_norm": 3.5908236503601074, "learning_rate": 8.371449069892137e-06, "loss": 0.215, "step": 17248 }, { "epoch": 56.554098360655736, "grad_norm": 3.1143879890441895, "learning_rate": 8.37040136828139e-06, "loss": 0.2526, "step": 17249 }, { "epoch": 56.557377049180324, "grad_norm": 2.95748233795166, "learning_rate": 8.36935368504606e-06, "loss": 0.1867, "step": 17250 }, { "epoch": 56.56065573770492, "grad_norm": 4.412542819976807, "learning_rate": 8.368306020197965e-06, "loss": 0.1724, "step": 17251 }, { "epoch": 56.56393442622951, "grad_norm": 2.6491966247558594, "learning_rate": 8.367258373748916e-06, "loss": 0.1399, "step": 17252 }, { "epoch": 56.5672131147541, "grad_norm": 3.4687399864196777, "learning_rate": 8.366210745710728e-06, "loss": 0.1158, "step": 17253 }, { "epoch": 56.570491803278685, "grad_norm": 2.7295265197753906, "learning_rate": 8.365163136095207e-06, "loss": 0.1752, "step": 17254 }, { "epoch": 56.57377049180328, "grad_norm": 4.301130294799805, "learning_rate": 8.364115544914175e-06, "loss": 0.4182, "step": 17255 }, { "epoch": 56.57704918032787, "grad_norm": 3.178342819213867, "learning_rate": 8.363067972179442e-06, "loss": 0.1906, "step": 17256 }, { "epoch": 56.58032786885246, "grad_norm": 3.2167346477508545, "learning_rate": 8.362020417902819e-06, "loss": 0.2756, "step": 17257 }, { "epoch": 56.58360655737705, "grad_norm": 3.7418415546417236, "learning_rate": 8.360972882096117e-06, "loss": 0.1128, "step": 17258 }, { "epoch": 56.58688524590164, "grad_norm": 4.096615314483643, "learning_rate": 8.359925364771154e-06, "loss": 0.2772, "step": 17259 }, { "epoch": 56.59016393442623, "grad_norm": 2.3559200763702393, "learning_rate": 8.358877865939733e-06, "loss": 0.0687, "step": 17260 }, { "epoch": 56.59344262295082, "grad_norm": 3.485100507736206, "learning_rate": 8.357830385613674e-06, "loss": 0.2412, "step": 17261 }, { "epoch": 56.59672131147541, "grad_norm": 3.5797016620635986, "learning_rate": 8.356782923804785e-06, "loss": 0.427, "step": 17262 }, { "epoch": 56.6, "grad_norm": 2.9456677436828613, "learning_rate": 8.355735480524874e-06, "loss": 0.1582, "step": 17263 }, { "epoch": 56.60327868852459, "grad_norm": 3.648271322250366, "learning_rate": 8.354688055785756e-06, "loss": 0.3503, "step": 17264 }, { "epoch": 56.60655737704918, "grad_norm": 2.7994771003723145, "learning_rate": 8.353640649599242e-06, "loss": 0.2989, "step": 17265 }, { "epoch": 56.609836065573774, "grad_norm": 3.1539366245269775, "learning_rate": 8.352593261977143e-06, "loss": 0.3218, "step": 17266 }, { "epoch": 56.61311475409836, "grad_norm": 3.4601612091064453, "learning_rate": 8.351545892931262e-06, "loss": 0.1336, "step": 17267 }, { "epoch": 56.61639344262295, "grad_norm": 4.639293670654297, "learning_rate": 8.35049854247342e-06, "loss": 0.2779, "step": 17268 }, { "epoch": 56.61967213114754, "grad_norm": 3.190124273300171, "learning_rate": 8.349451210615421e-06, "loss": 0.2879, "step": 17269 }, { "epoch": 56.622950819672134, "grad_norm": 3.734623908996582, "learning_rate": 8.348403897369077e-06, "loss": 0.1838, "step": 17270 }, { "epoch": 56.62622950819672, "grad_norm": 3.659069538116455, "learning_rate": 8.347356602746191e-06, "loss": 0.3545, "step": 17271 }, { "epoch": 56.62950819672131, "grad_norm": 3.1034953594207764, "learning_rate": 8.346309326758583e-06, "loss": 0.298, "step": 17272 }, { "epoch": 56.6327868852459, "grad_norm": 3.568138837814331, "learning_rate": 8.345262069418056e-06, "loss": 0.2001, "step": 17273 }, { "epoch": 56.636065573770495, "grad_norm": 3.2563488483428955, "learning_rate": 8.34421483073642e-06, "loss": 0.2127, "step": 17274 }, { "epoch": 56.63934426229508, "grad_norm": 3.912273406982422, "learning_rate": 8.343167610725485e-06, "loss": 0.2901, "step": 17275 }, { "epoch": 56.64262295081967, "grad_norm": 3.54879093170166, "learning_rate": 8.342120409397052e-06, "loss": 0.1978, "step": 17276 }, { "epoch": 56.64590163934426, "grad_norm": 5.748198986053467, "learning_rate": 8.341073226762938e-06, "loss": 0.2977, "step": 17277 }, { "epoch": 56.649180327868855, "grad_norm": 3.4884378910064697, "learning_rate": 8.34002606283495e-06, "loss": 0.5888, "step": 17278 }, { "epoch": 56.65245901639344, "grad_norm": 2.8900179862976074, "learning_rate": 8.338978917624894e-06, "loss": 0.1119, "step": 17279 }, { "epoch": 56.65573770491803, "grad_norm": 4.120030403137207, "learning_rate": 8.337931791144572e-06, "loss": 0.3644, "step": 17280 }, { "epoch": 56.65901639344262, "grad_norm": 3.051635503768921, "learning_rate": 8.336884683405802e-06, "loss": 0.1482, "step": 17281 }, { "epoch": 56.662295081967216, "grad_norm": 3.084629774093628, "learning_rate": 8.335837594420389e-06, "loss": 0.2058, "step": 17282 }, { "epoch": 56.665573770491804, "grad_norm": 2.9034719467163086, "learning_rate": 8.334790524200134e-06, "loss": 0.3593, "step": 17283 }, { "epoch": 56.66885245901639, "grad_norm": 3.6272785663604736, "learning_rate": 8.333743472756844e-06, "loss": 0.2671, "step": 17284 }, { "epoch": 56.67213114754098, "grad_norm": 3.459285020828247, "learning_rate": 8.332696440102334e-06, "loss": 0.3086, "step": 17285 }, { "epoch": 56.675409836065576, "grad_norm": 3.6102471351623535, "learning_rate": 8.331649426248405e-06, "loss": 0.321, "step": 17286 }, { "epoch": 56.678688524590164, "grad_norm": 2.84982967376709, "learning_rate": 8.33060243120686e-06, "loss": 0.1382, "step": 17287 }, { "epoch": 56.68196721311475, "grad_norm": 3.7709243297576904, "learning_rate": 8.329555454989512e-06, "loss": 0.4457, "step": 17288 }, { "epoch": 56.68524590163934, "grad_norm": 3.496521234512329, "learning_rate": 8.32850849760816e-06, "loss": 0.2258, "step": 17289 }, { "epoch": 56.68852459016394, "grad_norm": 3.369525909423828, "learning_rate": 8.327461559074613e-06, "loss": 0.2695, "step": 17290 }, { "epoch": 56.691803278688525, "grad_norm": 3.883646249771118, "learning_rate": 8.326414639400678e-06, "loss": 0.1697, "step": 17291 }, { "epoch": 56.69508196721311, "grad_norm": 3.622497797012329, "learning_rate": 8.325367738598157e-06, "loss": 0.1981, "step": 17292 }, { "epoch": 56.6983606557377, "grad_norm": 3.5362071990966797, "learning_rate": 8.324320856678854e-06, "loss": 0.232, "step": 17293 }, { "epoch": 56.7016393442623, "grad_norm": 3.4007186889648438, "learning_rate": 8.323273993654577e-06, "loss": 0.1606, "step": 17294 }, { "epoch": 56.704918032786885, "grad_norm": 5.03034782409668, "learning_rate": 8.322227149537131e-06, "loss": 0.2552, "step": 17295 }, { "epoch": 56.708196721311474, "grad_norm": 3.9405622482299805, "learning_rate": 8.321180324338319e-06, "loss": 0.2067, "step": 17296 }, { "epoch": 56.71147540983607, "grad_norm": 3.1538686752319336, "learning_rate": 8.320133518069939e-06, "loss": 0.1208, "step": 17297 }, { "epoch": 56.71475409836066, "grad_norm": 3.3516604900360107, "learning_rate": 8.319086730743804e-06, "loss": 0.2264, "step": 17298 }, { "epoch": 56.718032786885246, "grad_norm": 3.2276418209075928, "learning_rate": 8.318039962371713e-06, "loss": 0.1884, "step": 17299 }, { "epoch": 56.721311475409834, "grad_norm": 3.997743844985962, "learning_rate": 8.316993212965472e-06, "loss": 0.1693, "step": 17300 }, { "epoch": 56.72459016393443, "grad_norm": 2.5543553829193115, "learning_rate": 8.315946482536883e-06, "loss": 0.2598, "step": 17301 }, { "epoch": 56.72786885245902, "grad_norm": 5.494677543640137, "learning_rate": 8.314899771097742e-06, "loss": 0.2277, "step": 17302 }, { "epoch": 56.731147540983606, "grad_norm": 3.00712251663208, "learning_rate": 8.313853078659864e-06, "loss": 0.2634, "step": 17303 }, { "epoch": 56.734426229508195, "grad_norm": 3.626122236251831, "learning_rate": 8.312806405235045e-06, "loss": 0.1687, "step": 17304 }, { "epoch": 56.73770491803279, "grad_norm": 2.8607215881347656, "learning_rate": 8.311759750835089e-06, "loss": 0.092, "step": 17305 }, { "epoch": 56.74098360655738, "grad_norm": 4.407384395599365, "learning_rate": 8.31071311547179e-06, "loss": 0.1617, "step": 17306 }, { "epoch": 56.74426229508197, "grad_norm": 3.6277847290039062, "learning_rate": 8.309666499156964e-06, "loss": 0.1801, "step": 17307 }, { "epoch": 56.747540983606555, "grad_norm": 2.9987587928771973, "learning_rate": 8.308619901902406e-06, "loss": 0.2171, "step": 17308 }, { "epoch": 56.75081967213115, "grad_norm": 3.9366698265075684, "learning_rate": 8.307573323719915e-06, "loss": 0.4136, "step": 17309 }, { "epoch": 56.75409836065574, "grad_norm": 2.9932174682617188, "learning_rate": 8.306526764621292e-06, "loss": 0.2585, "step": 17310 }, { "epoch": 56.75737704918033, "grad_norm": 3.2694573402404785, "learning_rate": 8.305480224618344e-06, "loss": 0.2166, "step": 17311 }, { "epoch": 56.760655737704916, "grad_norm": 3.6822867393493652, "learning_rate": 8.30443370372287e-06, "loss": 0.2096, "step": 17312 }, { "epoch": 56.76393442622951, "grad_norm": 3.1203091144561768, "learning_rate": 8.303387201946665e-06, "loss": 0.1578, "step": 17313 }, { "epoch": 56.7672131147541, "grad_norm": 3.7019283771514893, "learning_rate": 8.302340719301535e-06, "loss": 0.4893, "step": 17314 }, { "epoch": 56.77049180327869, "grad_norm": 2.9283905029296875, "learning_rate": 8.301294255799278e-06, "loss": 0.2172, "step": 17315 }, { "epoch": 56.773770491803276, "grad_norm": 3.809338331222534, "learning_rate": 8.300247811451693e-06, "loss": 0.2318, "step": 17316 }, { "epoch": 56.77704918032787, "grad_norm": 2.2687790393829346, "learning_rate": 8.299201386270585e-06, "loss": 0.2919, "step": 17317 }, { "epoch": 56.78032786885246, "grad_norm": 3.665172576904297, "learning_rate": 8.298154980267747e-06, "loss": 0.3932, "step": 17318 }, { "epoch": 56.78360655737705, "grad_norm": 3.222018003463745, "learning_rate": 8.297108593454979e-06, "loss": 0.1727, "step": 17319 }, { "epoch": 56.78688524590164, "grad_norm": 3.013070583343506, "learning_rate": 8.296062225844086e-06, "loss": 0.2147, "step": 17320 }, { "epoch": 56.79016393442623, "grad_norm": 3.5052168369293213, "learning_rate": 8.295015877446863e-06, "loss": 0.2414, "step": 17321 }, { "epoch": 56.79344262295082, "grad_norm": 2.699131965637207, "learning_rate": 8.293969548275108e-06, "loss": 0.2135, "step": 17322 }, { "epoch": 56.79672131147541, "grad_norm": 4.704293251037598, "learning_rate": 8.292923238340617e-06, "loss": 0.2159, "step": 17323 }, { "epoch": 56.8, "grad_norm": 3.416585683822632, "learning_rate": 8.291876947655197e-06, "loss": 0.3084, "step": 17324 }, { "epoch": 56.80327868852459, "grad_norm": 3.5328986644744873, "learning_rate": 8.29083067623064e-06, "loss": 0.2263, "step": 17325 }, { "epoch": 56.80655737704918, "grad_norm": 3.6141586303710938, "learning_rate": 8.289784424078742e-06, "loss": 0.1775, "step": 17326 }, { "epoch": 56.80983606557377, "grad_norm": 3.549346923828125, "learning_rate": 8.288738191211303e-06, "loss": 0.2395, "step": 17327 }, { "epoch": 56.81311475409836, "grad_norm": 3.8438403606414795, "learning_rate": 8.287691977640117e-06, "loss": 0.201, "step": 17328 }, { "epoch": 56.81639344262295, "grad_norm": 3.187870502471924, "learning_rate": 8.286645783376989e-06, "loss": 0.4174, "step": 17329 }, { "epoch": 56.81967213114754, "grad_norm": 3.2213902473449707, "learning_rate": 8.28559960843371e-06, "loss": 0.1159, "step": 17330 }, { "epoch": 56.82295081967213, "grad_norm": 3.406364679336548, "learning_rate": 8.284553452822079e-06, "loss": 0.1656, "step": 17331 }, { "epoch": 56.82622950819672, "grad_norm": 2.978010654449463, "learning_rate": 8.283507316553885e-06, "loss": 0.1143, "step": 17332 }, { "epoch": 56.829508196721314, "grad_norm": 3.505201816558838, "learning_rate": 8.282461199640938e-06, "loss": 0.3037, "step": 17333 }, { "epoch": 56.8327868852459, "grad_norm": 2.874131917953491, "learning_rate": 8.281415102095025e-06, "loss": 0.1768, "step": 17334 }, { "epoch": 56.83606557377049, "grad_norm": 3.7666969299316406, "learning_rate": 8.280369023927944e-06, "loss": 0.2954, "step": 17335 }, { "epoch": 56.83934426229508, "grad_norm": 3.6948585510253906, "learning_rate": 8.279322965151486e-06, "loss": 0.1856, "step": 17336 }, { "epoch": 56.842622950819674, "grad_norm": 3.428203582763672, "learning_rate": 8.278276925777456e-06, "loss": 0.2361, "step": 17337 }, { "epoch": 56.84590163934426, "grad_norm": 3.3595645427703857, "learning_rate": 8.277230905817642e-06, "loss": 0.1465, "step": 17338 }, { "epoch": 56.84918032786885, "grad_norm": 2.7109527587890625, "learning_rate": 8.276184905283838e-06, "loss": 0.247, "step": 17339 }, { "epoch": 56.85245901639344, "grad_norm": 3.0283219814300537, "learning_rate": 8.275138924187846e-06, "loss": 0.2686, "step": 17340 }, { "epoch": 56.855737704918035, "grad_norm": 4.287640571594238, "learning_rate": 8.274092962541452e-06, "loss": 0.1661, "step": 17341 }, { "epoch": 56.85901639344262, "grad_norm": 2.4355616569519043, "learning_rate": 8.273047020356455e-06, "loss": 0.1862, "step": 17342 }, { "epoch": 56.86229508196721, "grad_norm": 3.0122058391571045, "learning_rate": 8.272001097644651e-06, "loss": 0.1486, "step": 17343 }, { "epoch": 56.86557377049181, "grad_norm": 3.276092290878296, "learning_rate": 8.270955194417832e-06, "loss": 0.1663, "step": 17344 }, { "epoch": 56.868852459016395, "grad_norm": 2.969839334487915, "learning_rate": 8.269909310687787e-06, "loss": 0.2302, "step": 17345 }, { "epoch": 56.87213114754098, "grad_norm": 4.807858467102051, "learning_rate": 8.268863446466315e-06, "loss": 0.2118, "step": 17346 }, { "epoch": 56.87540983606557, "grad_norm": 3.2380499839782715, "learning_rate": 8.267817601765208e-06, "loss": 0.2777, "step": 17347 }, { "epoch": 56.87868852459017, "grad_norm": 3.458282470703125, "learning_rate": 8.26677177659626e-06, "loss": 0.1976, "step": 17348 }, { "epoch": 56.881967213114756, "grad_norm": 3.834739923477173, "learning_rate": 8.265725970971257e-06, "loss": 0.2114, "step": 17349 }, { "epoch": 56.885245901639344, "grad_norm": 3.117227077484131, "learning_rate": 8.264680184902003e-06, "loss": 0.3625, "step": 17350 }, { "epoch": 56.88852459016393, "grad_norm": 3.27622652053833, "learning_rate": 8.26363441840028e-06, "loss": 0.0952, "step": 17351 }, { "epoch": 56.89180327868853, "grad_norm": 3.914781093597412, "learning_rate": 8.26258867147789e-06, "loss": 0.157, "step": 17352 }, { "epoch": 56.895081967213116, "grad_norm": 2.6456778049468994, "learning_rate": 8.261542944146615e-06, "loss": 0.1982, "step": 17353 }, { "epoch": 56.898360655737704, "grad_norm": 2.979494333267212, "learning_rate": 8.260497236418248e-06, "loss": 0.1554, "step": 17354 }, { "epoch": 56.90163934426229, "grad_norm": 3.3316352367401123, "learning_rate": 8.259451548304588e-06, "loss": 0.2199, "step": 17355 }, { "epoch": 56.90491803278689, "grad_norm": 3.3764026165008545, "learning_rate": 8.258405879817421e-06, "loss": 0.3066, "step": 17356 }, { "epoch": 56.90819672131148, "grad_norm": 3.0677363872528076, "learning_rate": 8.257360230968538e-06, "loss": 0.2432, "step": 17357 }, { "epoch": 56.911475409836065, "grad_norm": 3.0754446983337402, "learning_rate": 8.256314601769727e-06, "loss": 0.0778, "step": 17358 }, { "epoch": 56.91475409836065, "grad_norm": 2.7770466804504395, "learning_rate": 8.255268992232785e-06, "loss": 0.2493, "step": 17359 }, { "epoch": 56.91803278688525, "grad_norm": 3.5652992725372314, "learning_rate": 8.2542234023695e-06, "loss": 0.3402, "step": 17360 }, { "epoch": 56.92131147540984, "grad_norm": 4.641302108764648, "learning_rate": 8.25317783219166e-06, "loss": 0.1135, "step": 17361 }, { "epoch": 56.924590163934425, "grad_norm": 3.9398419857025146, "learning_rate": 8.252132281711054e-06, "loss": 0.2177, "step": 17362 }, { "epoch": 56.927868852459014, "grad_norm": 3.327529191970825, "learning_rate": 8.251086750939477e-06, "loss": 0.2109, "step": 17363 }, { "epoch": 56.93114754098361, "grad_norm": 3.257777452468872, "learning_rate": 8.250041239888715e-06, "loss": 0.291, "step": 17364 }, { "epoch": 56.9344262295082, "grad_norm": 3.2721707820892334, "learning_rate": 8.24899574857056e-06, "loss": 0.1673, "step": 17365 }, { "epoch": 56.937704918032786, "grad_norm": 3.940248727798462, "learning_rate": 8.247950276996792e-06, "loss": 0.3664, "step": 17366 }, { "epoch": 56.940983606557374, "grad_norm": 2.924823522567749, "learning_rate": 8.246904825179209e-06, "loss": 0.1965, "step": 17367 }, { "epoch": 56.94426229508197, "grad_norm": 3.456371784210205, "learning_rate": 8.2458593931296e-06, "loss": 0.2636, "step": 17368 }, { "epoch": 56.94754098360656, "grad_norm": 2.9822938442230225, "learning_rate": 8.244813980859747e-06, "loss": 0.1428, "step": 17369 }, { "epoch": 56.950819672131146, "grad_norm": 3.5176026821136475, "learning_rate": 8.243768588381442e-06, "loss": 0.2705, "step": 17370 }, { "epoch": 56.954098360655735, "grad_norm": 3.0527126789093018, "learning_rate": 8.24272321570647e-06, "loss": 0.1311, "step": 17371 }, { "epoch": 56.95737704918033, "grad_norm": 2.2991559505462646, "learning_rate": 8.241677862846624e-06, "loss": 0.2081, "step": 17372 }, { "epoch": 56.96065573770492, "grad_norm": 3.492917776107788, "learning_rate": 8.240632529813689e-06, "loss": 0.2656, "step": 17373 }, { "epoch": 56.96393442622951, "grad_norm": 2.884428024291992, "learning_rate": 8.23958721661945e-06, "loss": 0.097, "step": 17374 }, { "epoch": 56.967213114754095, "grad_norm": 2.5536139011383057, "learning_rate": 8.238541923275692e-06, "loss": 0.0935, "step": 17375 }, { "epoch": 56.97049180327869, "grad_norm": 2.962571859359741, "learning_rate": 8.237496649794208e-06, "loss": 0.1367, "step": 17376 }, { "epoch": 56.97377049180328, "grad_norm": 3.2824928760528564, "learning_rate": 8.236451396186784e-06, "loss": 0.3265, "step": 17377 }, { "epoch": 56.97704918032787, "grad_norm": 3.330009937286377, "learning_rate": 8.235406162465204e-06, "loss": 0.2081, "step": 17378 }, { "epoch": 56.980327868852456, "grad_norm": 3.549506902694702, "learning_rate": 8.234360948641255e-06, "loss": 0.2842, "step": 17379 }, { "epoch": 56.98360655737705, "grad_norm": 2.978121757507324, "learning_rate": 8.233315754726716e-06, "loss": 0.3733, "step": 17380 }, { "epoch": 56.98688524590164, "grad_norm": 3.0511372089385986, "learning_rate": 8.232270580733384e-06, "loss": 0.2095, "step": 17381 }, { "epoch": 56.99016393442623, "grad_norm": 3.694108724594116, "learning_rate": 8.23122542667304e-06, "loss": 0.1703, "step": 17382 }, { "epoch": 56.993442622950816, "grad_norm": 3.5634500980377197, "learning_rate": 8.230180292557465e-06, "loss": 0.2105, "step": 17383 }, { "epoch": 56.99672131147541, "grad_norm": 4.776215076446533, "learning_rate": 8.229135178398447e-06, "loss": 0.403, "step": 17384 }, { "epoch": 57.0, "grad_norm": 3.3902759552001953, "learning_rate": 8.228090084207773e-06, "loss": 0.2293, "step": 17385 }, { "epoch": 57.00327868852459, "grad_norm": 2.710387945175171, "learning_rate": 8.227045009997226e-06, "loss": 0.1651, "step": 17386 }, { "epoch": 57.006557377049184, "grad_norm": 4.1294965744018555, "learning_rate": 8.225999955778592e-06, "loss": 0.187, "step": 17387 }, { "epoch": 57.00983606557377, "grad_norm": 3.461618661880493, "learning_rate": 8.224954921563647e-06, "loss": 0.1868, "step": 17388 }, { "epoch": 57.01311475409836, "grad_norm": 2.6197714805603027, "learning_rate": 8.223909907364184e-06, "loss": 0.091, "step": 17389 }, { "epoch": 57.01639344262295, "grad_norm": 3.1828677654266357, "learning_rate": 8.222864913191986e-06, "loss": 0.197, "step": 17390 }, { "epoch": 57.019672131147544, "grad_norm": 3.198756456375122, "learning_rate": 8.221819939058832e-06, "loss": 0.2518, "step": 17391 }, { "epoch": 57.02295081967213, "grad_norm": 3.0623056888580322, "learning_rate": 8.220774984976504e-06, "loss": 0.2244, "step": 17392 }, { "epoch": 57.02622950819672, "grad_norm": 4.066153049468994, "learning_rate": 8.21973005095679e-06, "loss": 0.148, "step": 17393 }, { "epoch": 57.02950819672131, "grad_norm": 4.171480655670166, "learning_rate": 8.218685137011473e-06, "loss": 0.4858, "step": 17394 }, { "epoch": 57.032786885245905, "grad_norm": 2.602781057357788, "learning_rate": 8.217640243152329e-06, "loss": 0.1815, "step": 17395 }, { "epoch": 57.03606557377049, "grad_norm": 4.056413173675537, "learning_rate": 8.21659536939115e-06, "loss": 0.1148, "step": 17396 }, { "epoch": 57.03934426229508, "grad_norm": 3.0267038345336914, "learning_rate": 8.215550515739708e-06, "loss": 0.3732, "step": 17397 }, { "epoch": 57.04262295081967, "grad_norm": 4.388103485107422, "learning_rate": 8.214505682209788e-06, "loss": 0.2678, "step": 17398 }, { "epoch": 57.045901639344265, "grad_norm": 3.377140760421753, "learning_rate": 8.213460868813177e-06, "loss": 0.2556, "step": 17399 }, { "epoch": 57.049180327868854, "grad_norm": 2.726337432861328, "learning_rate": 8.212416075561651e-06, "loss": 0.1264, "step": 17400 }, { "epoch": 57.05245901639344, "grad_norm": 3.0562727451324463, "learning_rate": 8.21137130246699e-06, "loss": 0.1962, "step": 17401 }, { "epoch": 57.05573770491803, "grad_norm": 4.091536998748779, "learning_rate": 8.21032654954098e-06, "loss": 0.261, "step": 17402 }, { "epoch": 57.059016393442626, "grad_norm": 3.183805465698242, "learning_rate": 8.2092818167954e-06, "loss": 0.1906, "step": 17403 }, { "epoch": 57.062295081967214, "grad_norm": 3.090404748916626, "learning_rate": 8.208237104242029e-06, "loss": 0.0713, "step": 17404 }, { "epoch": 57.0655737704918, "grad_norm": 2.571192741394043, "learning_rate": 8.207192411892645e-06, "loss": 0.2028, "step": 17405 }, { "epoch": 57.06885245901639, "grad_norm": 3.439743757247925, "learning_rate": 8.20614773975903e-06, "loss": 0.2363, "step": 17406 }, { "epoch": 57.072131147540986, "grad_norm": 2.873394727706909, "learning_rate": 8.205103087852967e-06, "loss": 0.2291, "step": 17407 }, { "epoch": 57.075409836065575, "grad_norm": 3.2115416526794434, "learning_rate": 8.204058456186233e-06, "loss": 0.184, "step": 17408 }, { "epoch": 57.07868852459016, "grad_norm": 3.6207003593444824, "learning_rate": 8.203013844770608e-06, "loss": 0.377, "step": 17409 }, { "epoch": 57.08196721311475, "grad_norm": 3.9313254356384277, "learning_rate": 8.201969253617865e-06, "loss": 0.2776, "step": 17410 }, { "epoch": 57.08524590163935, "grad_norm": 2.7079553604125977, "learning_rate": 8.200924682739794e-06, "loss": 0.1783, "step": 17411 }, { "epoch": 57.088524590163935, "grad_norm": 3.1850733757019043, "learning_rate": 8.199880132148166e-06, "loss": 0.2667, "step": 17412 }, { "epoch": 57.09180327868852, "grad_norm": 2.745206832885742, "learning_rate": 8.198835601854762e-06, "loss": 0.183, "step": 17413 }, { "epoch": 57.09508196721311, "grad_norm": 3.236124277114868, "learning_rate": 8.197791091871355e-06, "loss": 0.1016, "step": 17414 }, { "epoch": 57.09836065573771, "grad_norm": 3.057243585586548, "learning_rate": 8.19674660220973e-06, "loss": 0.2216, "step": 17415 }, { "epoch": 57.101639344262296, "grad_norm": 3.2448415756225586, "learning_rate": 8.195702132881664e-06, "loss": 0.1504, "step": 17416 }, { "epoch": 57.104918032786884, "grad_norm": 3.017242431640625, "learning_rate": 8.194657683898932e-06, "loss": 0.1324, "step": 17417 }, { "epoch": 57.10819672131147, "grad_norm": 2.3371787071228027, "learning_rate": 8.193613255273309e-06, "loss": 0.0884, "step": 17418 }, { "epoch": 57.11147540983607, "grad_norm": 11.392614364624023, "learning_rate": 8.192568847016575e-06, "loss": 0.1853, "step": 17419 }, { "epoch": 57.114754098360656, "grad_norm": 3.6493875980377197, "learning_rate": 8.19152445914051e-06, "loss": 0.2704, "step": 17420 }, { "epoch": 57.118032786885244, "grad_norm": 3.458937168121338, "learning_rate": 8.190480091656884e-06, "loss": 0.2383, "step": 17421 }, { "epoch": 57.12131147540983, "grad_norm": 3.1551902294158936, "learning_rate": 8.189435744577477e-06, "loss": 0.139, "step": 17422 }, { "epoch": 57.12459016393443, "grad_norm": 2.935641050338745, "learning_rate": 8.188391417914064e-06, "loss": 0.1482, "step": 17423 }, { "epoch": 57.12786885245902, "grad_norm": 4.1658854484558105, "learning_rate": 8.187347111678422e-06, "loss": 0.2327, "step": 17424 }, { "epoch": 57.131147540983605, "grad_norm": 3.38727068901062, "learning_rate": 8.186302825882327e-06, "loss": 0.1961, "step": 17425 }, { "epoch": 57.13442622950819, "grad_norm": 3.5264430046081543, "learning_rate": 8.185258560537552e-06, "loss": 0.2839, "step": 17426 }, { "epoch": 57.13770491803279, "grad_norm": 2.9565720558166504, "learning_rate": 8.184214315655876e-06, "loss": 0.129, "step": 17427 }, { "epoch": 57.14098360655738, "grad_norm": 3.119154453277588, "learning_rate": 8.183170091249067e-06, "loss": 0.1985, "step": 17428 }, { "epoch": 57.144262295081965, "grad_norm": 3.44523024559021, "learning_rate": 8.182125887328906e-06, "loss": 0.2208, "step": 17429 }, { "epoch": 57.14754098360656, "grad_norm": 2.3538818359375, "learning_rate": 8.18108170390717e-06, "loss": 0.235, "step": 17430 }, { "epoch": 57.15081967213115, "grad_norm": 2.851468563079834, "learning_rate": 8.180037540995626e-06, "loss": 0.1105, "step": 17431 }, { "epoch": 57.15409836065574, "grad_norm": 3.6159965991973877, "learning_rate": 8.178993398606046e-06, "loss": 0.3866, "step": 17432 }, { "epoch": 57.157377049180326, "grad_norm": 3.8899002075195312, "learning_rate": 8.177949276750215e-06, "loss": 0.33, "step": 17433 }, { "epoch": 57.16065573770492, "grad_norm": 3.1410951614379883, "learning_rate": 8.1769051754399e-06, "loss": 0.2456, "step": 17434 }, { "epoch": 57.16393442622951, "grad_norm": 2.973612070083618, "learning_rate": 8.175861094686875e-06, "loss": 0.3382, "step": 17435 }, { "epoch": 57.1672131147541, "grad_norm": 2.9994378089904785, "learning_rate": 8.174817034502908e-06, "loss": 0.101, "step": 17436 }, { "epoch": 57.170491803278686, "grad_norm": 3.012341022491455, "learning_rate": 8.17377299489978e-06, "loss": 0.2589, "step": 17437 }, { "epoch": 57.17377049180328, "grad_norm": 3.1231188774108887, "learning_rate": 8.172728975889261e-06, "loss": 0.4225, "step": 17438 }, { "epoch": 57.17704918032787, "grad_norm": 3.753572940826416, "learning_rate": 8.171684977483122e-06, "loss": 0.1206, "step": 17439 }, { "epoch": 57.18032786885246, "grad_norm": 2.6041927337646484, "learning_rate": 8.170640999693138e-06, "loss": 0.0844, "step": 17440 }, { "epoch": 57.18360655737705, "grad_norm": 4.407557487487793, "learning_rate": 8.169597042531073e-06, "loss": 0.2028, "step": 17441 }, { "epoch": 57.18688524590164, "grad_norm": 3.678056240081787, "learning_rate": 8.168553106008709e-06, "loss": 0.2665, "step": 17442 }, { "epoch": 57.19016393442623, "grad_norm": 2.509450674057007, "learning_rate": 8.167509190137813e-06, "loss": 0.0672, "step": 17443 }, { "epoch": 57.19344262295082, "grad_norm": 2.8020541667938232, "learning_rate": 8.166465294930155e-06, "loss": 0.2576, "step": 17444 }, { "epoch": 57.19672131147541, "grad_norm": 3.4622132778167725, "learning_rate": 8.165421420397506e-06, "loss": 0.2175, "step": 17445 }, { "epoch": 57.2, "grad_norm": 3.3200106620788574, "learning_rate": 8.16437756655164e-06, "loss": 0.2269, "step": 17446 }, { "epoch": 57.20327868852459, "grad_norm": 4.126676082611084, "learning_rate": 8.163333733404327e-06, "loss": 0.3312, "step": 17447 }, { "epoch": 57.20655737704918, "grad_norm": 2.9545581340789795, "learning_rate": 8.16228992096733e-06, "loss": 0.1163, "step": 17448 }, { "epoch": 57.20983606557377, "grad_norm": 3.521817207336426, "learning_rate": 8.16124612925243e-06, "loss": 0.2169, "step": 17449 }, { "epoch": 57.21311475409836, "grad_norm": 2.906060218811035, "learning_rate": 8.16020235827139e-06, "loss": 0.2081, "step": 17450 }, { "epoch": 57.21639344262295, "grad_norm": 3.1752877235412598, "learning_rate": 8.15915860803598e-06, "loss": 0.232, "step": 17451 }, { "epoch": 57.21967213114754, "grad_norm": 3.999302625656128, "learning_rate": 8.158114878557973e-06, "loss": 0.3489, "step": 17452 }, { "epoch": 57.22295081967213, "grad_norm": 3.489021062850952, "learning_rate": 8.157071169849136e-06, "loss": 0.1289, "step": 17453 }, { "epoch": 57.226229508196724, "grad_norm": 3.320695161819458, "learning_rate": 8.156027481921233e-06, "loss": 0.1974, "step": 17454 }, { "epoch": 57.22950819672131, "grad_norm": 3.2412233352661133, "learning_rate": 8.154983814786045e-06, "loss": 0.1491, "step": 17455 }, { "epoch": 57.2327868852459, "grad_norm": 4.080201148986816, "learning_rate": 8.153940168455328e-06, "loss": 0.2278, "step": 17456 }, { "epoch": 57.23606557377049, "grad_norm": 4.542573928833008, "learning_rate": 8.152896542940859e-06, "loss": 0.317, "step": 17457 }, { "epoch": 57.239344262295084, "grad_norm": 2.8866140842437744, "learning_rate": 8.151852938254394e-06, "loss": 0.2679, "step": 17458 }, { "epoch": 57.24262295081967, "grad_norm": 3.8283894062042236, "learning_rate": 8.150809354407716e-06, "loss": 0.2089, "step": 17459 }, { "epoch": 57.24590163934426, "grad_norm": 3.925940752029419, "learning_rate": 8.149765791412583e-06, "loss": 0.2601, "step": 17460 }, { "epoch": 57.24918032786885, "grad_norm": 4.288877487182617, "learning_rate": 8.148722249280765e-06, "loss": 0.2521, "step": 17461 }, { "epoch": 57.252459016393445, "grad_norm": 2.9991109371185303, "learning_rate": 8.147678728024025e-06, "loss": 0.1972, "step": 17462 }, { "epoch": 57.25573770491803, "grad_norm": 3.0435714721679688, "learning_rate": 8.146635227654136e-06, "loss": 0.1332, "step": 17463 }, { "epoch": 57.25901639344262, "grad_norm": 2.0352509021759033, "learning_rate": 8.145591748182863e-06, "loss": 0.0361, "step": 17464 }, { "epoch": 57.26229508196721, "grad_norm": 3.818389654159546, "learning_rate": 8.144548289621972e-06, "loss": 0.2187, "step": 17465 }, { "epoch": 57.265573770491805, "grad_norm": 3.721456527709961, "learning_rate": 8.143504851983226e-06, "loss": 0.2566, "step": 17466 }, { "epoch": 57.268852459016394, "grad_norm": 2.840423345565796, "learning_rate": 8.142461435278392e-06, "loss": 0.1651, "step": 17467 }, { "epoch": 57.27213114754098, "grad_norm": 2.8330419063568115, "learning_rate": 8.14141803951924e-06, "loss": 0.1594, "step": 17468 }, { "epoch": 57.27540983606557, "grad_norm": 3.319235324859619, "learning_rate": 8.14037466471753e-06, "loss": 0.3973, "step": 17469 }, { "epoch": 57.278688524590166, "grad_norm": 3.210279703140259, "learning_rate": 8.13933131088503e-06, "loss": 0.1926, "step": 17470 }, { "epoch": 57.281967213114754, "grad_norm": 3.603121280670166, "learning_rate": 8.1382879780335e-06, "loss": 0.3351, "step": 17471 }, { "epoch": 57.28524590163934, "grad_norm": 2.839926242828369, "learning_rate": 8.137244666174712e-06, "loss": 0.3058, "step": 17472 }, { "epoch": 57.28852459016394, "grad_norm": 3.8482635021209717, "learning_rate": 8.136201375320429e-06, "loss": 0.3108, "step": 17473 }, { "epoch": 57.291803278688526, "grad_norm": 3.1450018882751465, "learning_rate": 8.135158105482412e-06, "loss": 0.1818, "step": 17474 }, { "epoch": 57.295081967213115, "grad_norm": 3.8010716438293457, "learning_rate": 8.134114856672423e-06, "loss": 0.163, "step": 17475 }, { "epoch": 57.2983606557377, "grad_norm": 3.413296937942505, "learning_rate": 8.133071628902233e-06, "loss": 0.1824, "step": 17476 }, { "epoch": 57.3016393442623, "grad_norm": 2.8434016704559326, "learning_rate": 8.1320284221836e-06, "loss": 0.187, "step": 17477 }, { "epoch": 57.30491803278689, "grad_norm": 2.9469335079193115, "learning_rate": 8.13098523652829e-06, "loss": 0.1451, "step": 17478 }, { "epoch": 57.308196721311475, "grad_norm": 2.714461326599121, "learning_rate": 8.129942071948066e-06, "loss": 0.202, "step": 17479 }, { "epoch": 57.31147540983606, "grad_norm": 2.7634127140045166, "learning_rate": 8.128898928454684e-06, "loss": 0.1113, "step": 17480 }, { "epoch": 57.31475409836066, "grad_norm": 3.265119791030884, "learning_rate": 8.127855806059916e-06, "loss": 0.2886, "step": 17481 }, { "epoch": 57.31803278688525, "grad_norm": 3.365267753601074, "learning_rate": 8.126812704775522e-06, "loss": 0.316, "step": 17482 }, { "epoch": 57.321311475409836, "grad_norm": 3.290661334991455, "learning_rate": 8.12576962461326e-06, "loss": 0.2777, "step": 17483 }, { "epoch": 57.324590163934424, "grad_norm": 3.1646533012390137, "learning_rate": 8.124726565584892e-06, "loss": 0.1833, "step": 17484 }, { "epoch": 57.32786885245902, "grad_norm": 2.8827152252197266, "learning_rate": 8.123683527702183e-06, "loss": 0.1746, "step": 17485 }, { "epoch": 57.33114754098361, "grad_norm": 2.725161552429199, "learning_rate": 8.122640510976896e-06, "loss": 0.1362, "step": 17486 }, { "epoch": 57.334426229508196, "grad_norm": 5.134603500366211, "learning_rate": 8.121597515420789e-06, "loss": 0.1498, "step": 17487 }, { "epoch": 57.337704918032784, "grad_norm": 3.6914355754852295, "learning_rate": 8.12055454104562e-06, "loss": 0.202, "step": 17488 }, { "epoch": 57.34098360655738, "grad_norm": 3.0210671424865723, "learning_rate": 8.119511587863153e-06, "loss": 0.1336, "step": 17489 }, { "epoch": 57.34426229508197, "grad_norm": 3.002361536026001, "learning_rate": 8.118468655885153e-06, "loss": 0.1743, "step": 17490 }, { "epoch": 57.34754098360656, "grad_norm": 3.2208776473999023, "learning_rate": 8.11742574512337e-06, "loss": 0.1876, "step": 17491 }, { "epoch": 57.350819672131145, "grad_norm": 2.715977907180786, "learning_rate": 8.116382855589572e-06, "loss": 0.1089, "step": 17492 }, { "epoch": 57.35409836065574, "grad_norm": 6.594885349273682, "learning_rate": 8.115339987295512e-06, "loss": 0.2591, "step": 17493 }, { "epoch": 57.35737704918033, "grad_norm": 31.67826271057129, "learning_rate": 8.114297140252955e-06, "loss": 0.1941, "step": 17494 }, { "epoch": 57.36065573770492, "grad_norm": 3.2398853302001953, "learning_rate": 8.113254314473662e-06, "loss": 0.2874, "step": 17495 }, { "epoch": 57.363934426229505, "grad_norm": 3.594907283782959, "learning_rate": 8.112211509969386e-06, "loss": 0.2271, "step": 17496 }, { "epoch": 57.3672131147541, "grad_norm": 2.7223317623138428, "learning_rate": 8.111168726751884e-06, "loss": 0.2234, "step": 17497 }, { "epoch": 57.37049180327869, "grad_norm": 3.52215576171875, "learning_rate": 8.110125964832922e-06, "loss": 0.3589, "step": 17498 }, { "epoch": 57.37377049180328, "grad_norm": 3.325867176055908, "learning_rate": 8.109083224224256e-06, "loss": 0.1787, "step": 17499 }, { "epoch": 57.377049180327866, "grad_norm": 2.893242120742798, "learning_rate": 8.10804050493764e-06, "loss": 0.1178, "step": 17500 }, { "epoch": 57.38032786885246, "grad_norm": 2.8620712757110596, "learning_rate": 8.106997806984835e-06, "loss": 0.1588, "step": 17501 }, { "epoch": 57.38360655737705, "grad_norm": 3.7750954627990723, "learning_rate": 8.1059551303776e-06, "loss": 0.3541, "step": 17502 }, { "epoch": 57.38688524590164, "grad_norm": 2.382636547088623, "learning_rate": 8.104912475127687e-06, "loss": 0.1065, "step": 17503 }, { "epoch": 57.390163934426226, "grad_norm": 3.481701135635376, "learning_rate": 8.103869841246859e-06, "loss": 0.1964, "step": 17504 }, { "epoch": 57.39344262295082, "grad_norm": 3.071244239807129, "learning_rate": 8.102827228746872e-06, "loss": 0.1746, "step": 17505 }, { "epoch": 57.39672131147541, "grad_norm": 3.8187718391418457, "learning_rate": 8.101784637639474e-06, "loss": 0.2411, "step": 17506 }, { "epoch": 57.4, "grad_norm": 3.062908172607422, "learning_rate": 8.100742067936432e-06, "loss": 0.0689, "step": 17507 }, { "epoch": 57.40327868852459, "grad_norm": 3.379574775695801, "learning_rate": 8.099699519649499e-06, "loss": 0.138, "step": 17508 }, { "epoch": 57.40655737704918, "grad_norm": 2.678542375564575, "learning_rate": 8.09865699279043e-06, "loss": 0.1051, "step": 17509 }, { "epoch": 57.40983606557377, "grad_norm": 4.008720874786377, "learning_rate": 8.097614487370974e-06, "loss": 0.2698, "step": 17510 }, { "epoch": 57.41311475409836, "grad_norm": 3.390608310699463, "learning_rate": 8.096572003402899e-06, "loss": 0.2944, "step": 17511 }, { "epoch": 57.41639344262295, "grad_norm": 3.335700035095215, "learning_rate": 8.095529540897952e-06, "loss": 0.179, "step": 17512 }, { "epoch": 57.41967213114754, "grad_norm": 3.1023969650268555, "learning_rate": 8.094487099867891e-06, "loss": 0.1912, "step": 17513 }, { "epoch": 57.42295081967213, "grad_norm": 2.3373115062713623, "learning_rate": 8.093444680324464e-06, "loss": 0.3673, "step": 17514 }, { "epoch": 57.42622950819672, "grad_norm": 3.432701349258423, "learning_rate": 8.092402282279435e-06, "loss": 0.3886, "step": 17515 }, { "epoch": 57.429508196721315, "grad_norm": 3.8763742446899414, "learning_rate": 8.091359905744553e-06, "loss": 0.1984, "step": 17516 }, { "epoch": 57.4327868852459, "grad_norm": 3.5315358638763428, "learning_rate": 8.090317550731575e-06, "loss": 0.3013, "step": 17517 }, { "epoch": 57.43606557377049, "grad_norm": 2.901777982711792, "learning_rate": 8.08927521725225e-06, "loss": 0.1591, "step": 17518 }, { "epoch": 57.43934426229508, "grad_norm": 3.275902509689331, "learning_rate": 8.088232905318329e-06, "loss": 0.2072, "step": 17519 }, { "epoch": 57.442622950819676, "grad_norm": 3.2192578315734863, "learning_rate": 8.087190614941577e-06, "loss": 0.2489, "step": 17520 }, { "epoch": 57.445901639344264, "grad_norm": 2.4966092109680176, "learning_rate": 8.086148346133736e-06, "loss": 0.1156, "step": 17521 }, { "epoch": 57.44918032786885, "grad_norm": 4.628643035888672, "learning_rate": 8.085106098906565e-06, "loss": 0.4619, "step": 17522 }, { "epoch": 57.45245901639344, "grad_norm": 3.6882739067077637, "learning_rate": 8.084063873271809e-06, "loss": 0.3474, "step": 17523 }, { "epoch": 57.455737704918036, "grad_norm": 2.7838521003723145, "learning_rate": 8.083021669241227e-06, "loss": 0.2725, "step": 17524 }, { "epoch": 57.459016393442624, "grad_norm": 2.84049391746521, "learning_rate": 8.081979486826571e-06, "loss": 0.2252, "step": 17525 }, { "epoch": 57.46229508196721, "grad_norm": 3.15378999710083, "learning_rate": 8.080937326039587e-06, "loss": 0.2468, "step": 17526 }, { "epoch": 57.4655737704918, "grad_norm": 2.501293897628784, "learning_rate": 8.079895186892031e-06, "loss": 0.1705, "step": 17527 }, { "epoch": 57.4688524590164, "grad_norm": 3.3125009536743164, "learning_rate": 8.078853069395656e-06, "loss": 0.1049, "step": 17528 }, { "epoch": 57.472131147540985, "grad_norm": 3.41113018989563, "learning_rate": 8.077810973562209e-06, "loss": 0.2354, "step": 17529 }, { "epoch": 57.47540983606557, "grad_norm": 3.2635271549224854, "learning_rate": 8.07676889940344e-06, "loss": 0.3154, "step": 17530 }, { "epoch": 57.47868852459016, "grad_norm": 2.9643678665161133, "learning_rate": 8.075726846931102e-06, "loss": 0.0765, "step": 17531 }, { "epoch": 57.48196721311476, "grad_norm": 3.933040142059326, "learning_rate": 8.074684816156945e-06, "loss": 0.2591, "step": 17532 }, { "epoch": 57.485245901639345, "grad_norm": 3.435771942138672, "learning_rate": 8.073642807092716e-06, "loss": 0.1499, "step": 17533 }, { "epoch": 57.488524590163934, "grad_norm": 3.1895992755889893, "learning_rate": 8.072600819750171e-06, "loss": 0.2043, "step": 17534 }, { "epoch": 57.49180327868852, "grad_norm": 3.183331251144409, "learning_rate": 8.071558854141056e-06, "loss": 0.1132, "step": 17535 }, { "epoch": 57.49508196721312, "grad_norm": 4.043905735015869, "learning_rate": 8.070516910277115e-06, "loss": 0.2992, "step": 17536 }, { "epoch": 57.498360655737706, "grad_norm": 3.2305006980895996, "learning_rate": 8.069474988170107e-06, "loss": 0.2043, "step": 17537 }, { "epoch": 57.501639344262294, "grad_norm": 3.010917901992798, "learning_rate": 8.068433087831774e-06, "loss": 0.2061, "step": 17538 }, { "epoch": 57.50491803278688, "grad_norm": 2.340022087097168, "learning_rate": 8.067391209273868e-06, "loss": 0.0749, "step": 17539 }, { "epoch": 57.50819672131148, "grad_norm": 3.829141139984131, "learning_rate": 8.06634935250813e-06, "loss": 0.1783, "step": 17540 }, { "epoch": 57.511475409836066, "grad_norm": 2.780649423599243, "learning_rate": 8.065307517546319e-06, "loss": 0.1181, "step": 17541 }, { "epoch": 57.514754098360655, "grad_norm": 2.582801342010498, "learning_rate": 8.064265704400177e-06, "loss": 0.1545, "step": 17542 }, { "epoch": 57.51803278688524, "grad_norm": 3.215296745300293, "learning_rate": 8.063223913081452e-06, "loss": 0.1476, "step": 17543 }, { "epoch": 57.52131147540984, "grad_norm": 3.4862594604492188, "learning_rate": 8.062182143601891e-06, "loss": 0.3246, "step": 17544 }, { "epoch": 57.52459016393443, "grad_norm": 2.9644346237182617, "learning_rate": 8.061140395973237e-06, "loss": 0.1268, "step": 17545 }, { "epoch": 57.527868852459015, "grad_norm": 31.899253845214844, "learning_rate": 8.060098670207244e-06, "loss": 0.2235, "step": 17546 }, { "epoch": 57.5311475409836, "grad_norm": 3.7716214656829834, "learning_rate": 8.059056966315657e-06, "loss": 0.1144, "step": 17547 }, { "epoch": 57.5344262295082, "grad_norm": 2.7019264698028564, "learning_rate": 8.05801528431022e-06, "loss": 0.1672, "step": 17548 }, { "epoch": 57.53770491803279, "grad_norm": 3.3616673946380615, "learning_rate": 8.056973624202676e-06, "loss": 0.1374, "step": 17549 }, { "epoch": 57.540983606557376, "grad_norm": 3.1172351837158203, "learning_rate": 8.055931986004777e-06, "loss": 0.1459, "step": 17550 }, { "epoch": 57.544262295081964, "grad_norm": 5.694077968597412, "learning_rate": 8.05489036972827e-06, "loss": 0.1858, "step": 17551 }, { "epoch": 57.54754098360656, "grad_norm": 3.066429853439331, "learning_rate": 8.053848775384892e-06, "loss": 0.1928, "step": 17552 }, { "epoch": 57.55081967213115, "grad_norm": 4.127686500549316, "learning_rate": 8.052807202986392e-06, "loss": 0.2905, "step": 17553 }, { "epoch": 57.554098360655736, "grad_norm": 3.5632805824279785, "learning_rate": 8.051765652544517e-06, "loss": 0.2844, "step": 17554 }, { "epoch": 57.557377049180324, "grad_norm": 2.8457396030426025, "learning_rate": 8.050724124071012e-06, "loss": 0.2223, "step": 17555 }, { "epoch": 57.56065573770492, "grad_norm": 3.4592978954315186, "learning_rate": 8.049682617577615e-06, "loss": 0.1463, "step": 17556 }, { "epoch": 57.56393442622951, "grad_norm": 3.823115825653076, "learning_rate": 8.048641133076077e-06, "loss": 0.2853, "step": 17557 }, { "epoch": 57.5672131147541, "grad_norm": 3.1197543144226074, "learning_rate": 8.047599670578139e-06, "loss": 0.2813, "step": 17558 }, { "epoch": 57.570491803278685, "grad_norm": 3.6041653156280518, "learning_rate": 8.046558230095543e-06, "loss": 0.1685, "step": 17559 }, { "epoch": 57.57377049180328, "grad_norm": 3.3393514156341553, "learning_rate": 8.045516811640038e-06, "loss": 0.2051, "step": 17560 }, { "epoch": 57.57704918032787, "grad_norm": 3.17458438873291, "learning_rate": 8.044475415223361e-06, "loss": 0.2157, "step": 17561 }, { "epoch": 57.58032786885246, "grad_norm": 3.2664966583251953, "learning_rate": 8.043434040857254e-06, "loss": 0.1546, "step": 17562 }, { "epoch": 57.58360655737705, "grad_norm": 3.353119134902954, "learning_rate": 8.042392688553465e-06, "loss": 0.269, "step": 17563 }, { "epoch": 57.58688524590164, "grad_norm": 3.583974599838257, "learning_rate": 8.041351358323734e-06, "loss": 0.2632, "step": 17564 }, { "epoch": 57.59016393442623, "grad_norm": 5.652653217315674, "learning_rate": 8.040310050179805e-06, "loss": 0.2782, "step": 17565 }, { "epoch": 57.59344262295082, "grad_norm": 2.891862630844116, "learning_rate": 8.039268764133413e-06, "loss": 0.2027, "step": 17566 }, { "epoch": 57.59672131147541, "grad_norm": 2.8454911708831787, "learning_rate": 8.038227500196306e-06, "loss": 0.1189, "step": 17567 }, { "epoch": 57.6, "grad_norm": 3.9786553382873535, "learning_rate": 8.037186258380226e-06, "loss": 0.2707, "step": 17568 }, { "epoch": 57.60327868852459, "grad_norm": 3.1736061573028564, "learning_rate": 8.036145038696913e-06, "loss": 0.2179, "step": 17569 }, { "epoch": 57.60655737704918, "grad_norm": 3.4670467376708984, "learning_rate": 8.035103841158103e-06, "loss": 0.251, "step": 17570 }, { "epoch": 57.609836065573774, "grad_norm": 3.509944200515747, "learning_rate": 8.034062665775538e-06, "loss": 0.2964, "step": 17571 }, { "epoch": 57.61311475409836, "grad_norm": 2.6103038787841797, "learning_rate": 8.033021512560965e-06, "loss": 0.1707, "step": 17572 }, { "epoch": 57.61639344262295, "grad_norm": 3.1898484230041504, "learning_rate": 8.031980381526119e-06, "loss": 0.0692, "step": 17573 }, { "epoch": 57.61967213114754, "grad_norm": 3.007171154022217, "learning_rate": 8.030939272682741e-06, "loss": 0.1834, "step": 17574 }, { "epoch": 57.622950819672134, "grad_norm": 2.8886451721191406, "learning_rate": 8.029898186042564e-06, "loss": 0.1013, "step": 17575 }, { "epoch": 57.62622950819672, "grad_norm": 3.4605977535247803, "learning_rate": 8.028857121617339e-06, "loss": 0.1508, "step": 17576 }, { "epoch": 57.62950819672131, "grad_norm": 2.4846982955932617, "learning_rate": 8.0278160794188e-06, "loss": 0.2685, "step": 17577 }, { "epoch": 57.6327868852459, "grad_norm": 4.311188697814941, "learning_rate": 8.026775059458685e-06, "loss": 0.236, "step": 17578 }, { "epoch": 57.636065573770495, "grad_norm": 3.4565324783325195, "learning_rate": 8.025734061748727e-06, "loss": 0.197, "step": 17579 }, { "epoch": 57.63934426229508, "grad_norm": 3.599942922592163, "learning_rate": 8.024693086300677e-06, "loss": 0.2422, "step": 17580 }, { "epoch": 57.64262295081967, "grad_norm": 3.5150272846221924, "learning_rate": 8.023652133126264e-06, "loss": 0.3168, "step": 17581 }, { "epoch": 57.64590163934426, "grad_norm": 3.2546043395996094, "learning_rate": 8.022611202237228e-06, "loss": 0.3632, "step": 17582 }, { "epoch": 57.649180327868855, "grad_norm": 4.7494683265686035, "learning_rate": 8.021570293645307e-06, "loss": 0.27, "step": 17583 }, { "epoch": 57.65245901639344, "grad_norm": 2.8817837238311768, "learning_rate": 8.020529407362237e-06, "loss": 0.1675, "step": 17584 }, { "epoch": 57.65573770491803, "grad_norm": 2.6794347763061523, "learning_rate": 8.019488543399754e-06, "loss": 0.1761, "step": 17585 }, { "epoch": 57.65901639344262, "grad_norm": 4.284834384918213, "learning_rate": 8.018447701769602e-06, "loss": 0.1626, "step": 17586 }, { "epoch": 57.662295081967216, "grad_norm": 11.944489479064941, "learning_rate": 8.01740688248351e-06, "loss": 0.1203, "step": 17587 }, { "epoch": 57.665573770491804, "grad_norm": 3.2126142978668213, "learning_rate": 8.016366085553215e-06, "loss": 0.1598, "step": 17588 }, { "epoch": 57.66885245901639, "grad_norm": 4.452249050140381, "learning_rate": 8.015325310990457e-06, "loss": 0.1213, "step": 17589 }, { "epoch": 57.67213114754098, "grad_norm": 3.498941421508789, "learning_rate": 8.014284558806971e-06, "loss": 0.1541, "step": 17590 }, { "epoch": 57.675409836065576, "grad_norm": 2.8419175148010254, "learning_rate": 8.01324382901449e-06, "loss": 0.1117, "step": 17591 }, { "epoch": 57.678688524590164, "grad_norm": 3.1706900596618652, "learning_rate": 8.012203121624747e-06, "loss": 0.3841, "step": 17592 }, { "epoch": 57.68196721311475, "grad_norm": 3.715505599975586, "learning_rate": 8.011162436649484e-06, "loss": 0.1742, "step": 17593 }, { "epoch": 57.68524590163934, "grad_norm": 3.4471683502197266, "learning_rate": 8.010121774100433e-06, "loss": 0.174, "step": 17594 }, { "epoch": 57.68852459016394, "grad_norm": 4.23895263671875, "learning_rate": 8.009081133989329e-06, "loss": 0.261, "step": 17595 }, { "epoch": 57.691803278688525, "grad_norm": 3.850900650024414, "learning_rate": 8.008040516327904e-06, "loss": 0.2085, "step": 17596 }, { "epoch": 57.69508196721311, "grad_norm": 2.8764028549194336, "learning_rate": 8.00699992112789e-06, "loss": 0.1376, "step": 17597 }, { "epoch": 57.6983606557377, "grad_norm": 3.0155816078186035, "learning_rate": 8.005959348401026e-06, "loss": 0.1596, "step": 17598 }, { "epoch": 57.7016393442623, "grad_norm": 3.0059125423431396, "learning_rate": 8.004918798159046e-06, "loss": 0.1944, "step": 17599 }, { "epoch": 57.704918032786885, "grad_norm": 3.379138708114624, "learning_rate": 8.00387827041368e-06, "loss": 0.3624, "step": 17600 }, { "epoch": 57.708196721311474, "grad_norm": 3.3914477825164795, "learning_rate": 8.00283776517666e-06, "loss": 0.0942, "step": 17601 }, { "epoch": 57.71147540983607, "grad_norm": 4.566068172454834, "learning_rate": 8.001797282459721e-06, "loss": 0.2457, "step": 17602 }, { "epoch": 57.71475409836066, "grad_norm": 3.796497106552124, "learning_rate": 8.000756822274597e-06, "loss": 0.2968, "step": 17603 }, { "epoch": 57.718032786885246, "grad_norm": 4.220440864562988, "learning_rate": 7.999716384633019e-06, "loss": 0.1348, "step": 17604 }, { "epoch": 57.721311475409834, "grad_norm": 3.159236192703247, "learning_rate": 7.998675969546714e-06, "loss": 0.1283, "step": 17605 }, { "epoch": 57.72459016393443, "grad_norm": 3.3678722381591797, "learning_rate": 7.997635577027423e-06, "loss": 0.3345, "step": 17606 }, { "epoch": 57.72786885245902, "grad_norm": 2.718348264694214, "learning_rate": 7.99659520708687e-06, "loss": 0.132, "step": 17607 }, { "epoch": 57.731147540983606, "grad_norm": 3.1782400608062744, "learning_rate": 7.99555485973679e-06, "loss": 0.2516, "step": 17608 }, { "epoch": 57.734426229508195, "grad_norm": 7.424276828765869, "learning_rate": 7.994514534988916e-06, "loss": 0.3259, "step": 17609 }, { "epoch": 57.73770491803279, "grad_norm": 3.6234967708587646, "learning_rate": 7.993474232854973e-06, "loss": 0.1067, "step": 17610 }, { "epoch": 57.74098360655738, "grad_norm": 3.083832263946533, "learning_rate": 7.992433953346694e-06, "loss": 0.3432, "step": 17611 }, { "epoch": 57.74426229508197, "grad_norm": 3.137346029281616, "learning_rate": 7.99139369647581e-06, "loss": 0.1068, "step": 17612 }, { "epoch": 57.747540983606555, "grad_norm": 3.190749406814575, "learning_rate": 7.99035346225405e-06, "loss": 0.1593, "step": 17613 }, { "epoch": 57.75081967213115, "grad_norm": 2.8868136405944824, "learning_rate": 7.989313250693143e-06, "loss": 0.1351, "step": 17614 }, { "epoch": 57.75409836065574, "grad_norm": 6.657105445861816, "learning_rate": 7.988273061804822e-06, "loss": 0.2735, "step": 17615 }, { "epoch": 57.75737704918033, "grad_norm": 3.304511308670044, "learning_rate": 7.987232895600813e-06, "loss": 0.2031, "step": 17616 }, { "epoch": 57.760655737704916, "grad_norm": 2.7573201656341553, "learning_rate": 7.986192752092847e-06, "loss": 0.2035, "step": 17617 }, { "epoch": 57.76393442622951, "grad_norm": 2.7993693351745605, "learning_rate": 7.985152631292649e-06, "loss": 0.1309, "step": 17618 }, { "epoch": 57.7672131147541, "grad_norm": 3.1594812870025635, "learning_rate": 7.984112533211951e-06, "loss": 0.1895, "step": 17619 }, { "epoch": 57.77049180327869, "grad_norm": 3.4807889461517334, "learning_rate": 7.983072457862482e-06, "loss": 0.2296, "step": 17620 }, { "epoch": 57.773770491803276, "grad_norm": 3.6884541511535645, "learning_rate": 7.98203240525597e-06, "loss": 0.2613, "step": 17621 }, { "epoch": 57.77704918032787, "grad_norm": 3.093647003173828, "learning_rate": 7.980992375404137e-06, "loss": 0.3114, "step": 17622 }, { "epoch": 57.78032786885246, "grad_norm": 3.536699056625366, "learning_rate": 7.979952368318713e-06, "loss": 0.2249, "step": 17623 }, { "epoch": 57.78360655737705, "grad_norm": 3.4486541748046875, "learning_rate": 7.97891238401143e-06, "loss": 0.2498, "step": 17624 }, { "epoch": 57.78688524590164, "grad_norm": 3.1871304512023926, "learning_rate": 7.97787242249401e-06, "loss": 0.1846, "step": 17625 }, { "epoch": 57.79016393442623, "grad_norm": 3.48160982131958, "learning_rate": 7.976832483778183e-06, "loss": 0.2545, "step": 17626 }, { "epoch": 57.79344262295082, "grad_norm": 2.895087242126465, "learning_rate": 7.975792567875666e-06, "loss": 0.2495, "step": 17627 }, { "epoch": 57.79672131147541, "grad_norm": 3.8126957416534424, "learning_rate": 7.974752674798198e-06, "loss": 0.2254, "step": 17628 }, { "epoch": 57.8, "grad_norm": 2.3891026973724365, "learning_rate": 7.9737128045575e-06, "loss": 0.145, "step": 17629 }, { "epoch": 57.80327868852459, "grad_norm": 3.5208661556243896, "learning_rate": 7.972672957165297e-06, "loss": 0.2167, "step": 17630 }, { "epoch": 57.80655737704918, "grad_norm": 3.736130714416504, "learning_rate": 7.97163313263331e-06, "loss": 0.1305, "step": 17631 }, { "epoch": 57.80983606557377, "grad_norm": 3.846830129623413, "learning_rate": 7.970593330973273e-06, "loss": 0.4153, "step": 17632 }, { "epoch": 57.81311475409836, "grad_norm": 3.4317626953125, "learning_rate": 7.969553552196905e-06, "loss": 0.3285, "step": 17633 }, { "epoch": 57.81639344262295, "grad_norm": 3.8426856994628906, "learning_rate": 7.968513796315932e-06, "loss": 0.3603, "step": 17634 }, { "epoch": 57.81967213114754, "grad_norm": 3.717552423477173, "learning_rate": 7.967474063342076e-06, "loss": 0.25, "step": 17635 }, { "epoch": 57.82295081967213, "grad_norm": 3.191178798675537, "learning_rate": 7.966434353287063e-06, "loss": 0.318, "step": 17636 }, { "epoch": 57.82622950819672, "grad_norm": 3.3422610759735107, "learning_rate": 7.965394666162621e-06, "loss": 0.3944, "step": 17637 }, { "epoch": 57.829508196721314, "grad_norm": 3.1178998947143555, "learning_rate": 7.964355001980466e-06, "loss": 0.1793, "step": 17638 }, { "epoch": 57.8327868852459, "grad_norm": 3.622002363204956, "learning_rate": 7.963315360752326e-06, "loss": 0.1949, "step": 17639 }, { "epoch": 57.83606557377049, "grad_norm": 5.320981979370117, "learning_rate": 7.962275742489925e-06, "loss": 0.2589, "step": 17640 }, { "epoch": 57.83934426229508, "grad_norm": 3.0734071731567383, "learning_rate": 7.96123614720498e-06, "loss": 0.2153, "step": 17641 }, { "epoch": 57.842622950819674, "grad_norm": 4.672945022583008, "learning_rate": 7.96019657490922e-06, "loss": 0.1555, "step": 17642 }, { "epoch": 57.84590163934426, "grad_norm": 2.3471882343292236, "learning_rate": 7.959157025614365e-06, "loss": 0.064, "step": 17643 }, { "epoch": 57.84918032786885, "grad_norm": 4.201703071594238, "learning_rate": 7.958117499332132e-06, "loss": 0.1939, "step": 17644 }, { "epoch": 57.85245901639344, "grad_norm": 3.0304863452911377, "learning_rate": 7.95707799607425e-06, "loss": 0.0691, "step": 17645 }, { "epoch": 57.855737704918035, "grad_norm": 2.532230854034424, "learning_rate": 7.95603851585244e-06, "loss": 0.097, "step": 17646 }, { "epoch": 57.85901639344262, "grad_norm": 3.3436007499694824, "learning_rate": 7.954999058678419e-06, "loss": 0.1319, "step": 17647 }, { "epoch": 57.86229508196721, "grad_norm": 3.610626459121704, "learning_rate": 7.953959624563911e-06, "loss": 0.2911, "step": 17648 }, { "epoch": 57.86557377049181, "grad_norm": 2.852417230606079, "learning_rate": 7.952920213520632e-06, "loss": 0.1414, "step": 17649 }, { "epoch": 57.868852459016395, "grad_norm": 3.191049814224243, "learning_rate": 7.95188082556031e-06, "loss": 0.0965, "step": 17650 }, { "epoch": 57.87213114754098, "grad_norm": 3.2626094818115234, "learning_rate": 7.950841460694661e-06, "loss": 0.231, "step": 17651 }, { "epoch": 57.87540983606557, "grad_norm": 3.139103651046753, "learning_rate": 7.949802118935403e-06, "loss": 0.1222, "step": 17652 }, { "epoch": 57.87868852459017, "grad_norm": 5.4706034660339355, "learning_rate": 7.948762800294256e-06, "loss": 0.2658, "step": 17653 }, { "epoch": 57.881967213114756, "grad_norm": 4.094654560089111, "learning_rate": 7.947723504782945e-06, "loss": 0.2726, "step": 17654 }, { "epoch": 57.885245901639344, "grad_norm": 12.156776428222656, "learning_rate": 7.946684232413185e-06, "loss": 0.1703, "step": 17655 }, { "epoch": 57.88852459016393, "grad_norm": 3.2609875202178955, "learning_rate": 7.945644983196695e-06, "loss": 0.1754, "step": 17656 }, { "epoch": 57.89180327868853, "grad_norm": 3.4584498405456543, "learning_rate": 7.944605757145191e-06, "loss": 0.1428, "step": 17657 }, { "epoch": 57.895081967213116, "grad_norm": 3.2410049438476562, "learning_rate": 7.943566554270397e-06, "loss": 0.1541, "step": 17658 }, { "epoch": 57.898360655737704, "grad_norm": 3.027221441268921, "learning_rate": 7.942527374584029e-06, "loss": 0.1464, "step": 17659 }, { "epoch": 57.90163934426229, "grad_norm": 3.0717570781707764, "learning_rate": 7.941488218097803e-06, "loss": 0.2525, "step": 17660 }, { "epoch": 57.90491803278689, "grad_norm": 3.0221235752105713, "learning_rate": 7.940449084823436e-06, "loss": 0.0917, "step": 17661 }, { "epoch": 57.90819672131148, "grad_norm": 3.361006021499634, "learning_rate": 7.939409974772648e-06, "loss": 0.1967, "step": 17662 }, { "epoch": 57.911475409836065, "grad_norm": 2.797074317932129, "learning_rate": 7.938370887957156e-06, "loss": 0.1336, "step": 17663 }, { "epoch": 57.91475409836065, "grad_norm": 3.2120044231414795, "learning_rate": 7.937331824388673e-06, "loss": 0.1841, "step": 17664 }, { "epoch": 57.91803278688525, "grad_norm": 4.004178524017334, "learning_rate": 7.936292784078921e-06, "loss": 0.3138, "step": 17665 }, { "epoch": 57.92131147540984, "grad_norm": 4.369551181793213, "learning_rate": 7.935253767039613e-06, "loss": 0.2206, "step": 17666 }, { "epoch": 57.924590163934425, "grad_norm": 3.1555092334747314, "learning_rate": 7.934214773282464e-06, "loss": 0.2912, "step": 17667 }, { "epoch": 57.927868852459014, "grad_norm": 2.7654378414154053, "learning_rate": 7.933175802819193e-06, "loss": 0.3799, "step": 17668 }, { "epoch": 57.93114754098361, "grad_norm": 3.0986568927764893, "learning_rate": 7.932136855661516e-06, "loss": 0.273, "step": 17669 }, { "epoch": 57.9344262295082, "grad_norm": 3.2432994842529297, "learning_rate": 7.93109793182114e-06, "loss": 0.141, "step": 17670 }, { "epoch": 57.937704918032786, "grad_norm": 3.0774757862091064, "learning_rate": 7.93005903130979e-06, "loss": 0.249, "step": 17671 }, { "epoch": 57.940983606557374, "grad_norm": 2.5594773292541504, "learning_rate": 7.929020154139178e-06, "loss": 0.145, "step": 17672 }, { "epoch": 57.94426229508197, "grad_norm": 3.5812549591064453, "learning_rate": 7.927981300321014e-06, "loss": 0.1562, "step": 17673 }, { "epoch": 57.94754098360656, "grad_norm": 3.354177713394165, "learning_rate": 7.926942469867018e-06, "loss": 0.1651, "step": 17674 }, { "epoch": 57.950819672131146, "grad_norm": 2.7772884368896484, "learning_rate": 7.925903662788897e-06, "loss": 0.1843, "step": 17675 }, { "epoch": 57.954098360655735, "grad_norm": 2.9700937271118164, "learning_rate": 7.924864879098371e-06, "loss": 0.1495, "step": 17676 }, { "epoch": 57.95737704918033, "grad_norm": 3.811811685562134, "learning_rate": 7.923826118807153e-06, "loss": 0.2991, "step": 17677 }, { "epoch": 57.96065573770492, "grad_norm": 4.150457859039307, "learning_rate": 7.922787381926954e-06, "loss": 0.2377, "step": 17678 }, { "epoch": 57.96393442622951, "grad_norm": 3.2949774265289307, "learning_rate": 7.921748668469481e-06, "loss": 0.1122, "step": 17679 }, { "epoch": 57.967213114754095, "grad_norm": 3.1848909854888916, "learning_rate": 7.92070997844646e-06, "loss": 0.264, "step": 17680 }, { "epoch": 57.97049180327869, "grad_norm": 3.554417133331299, "learning_rate": 7.919671311869593e-06, "loss": 0.1243, "step": 17681 }, { "epoch": 57.97377049180328, "grad_norm": 3.6228621006011963, "learning_rate": 7.918632668750596e-06, "loss": 0.2778, "step": 17682 }, { "epoch": 57.97704918032787, "grad_norm": 4.860867500305176, "learning_rate": 7.917594049101176e-06, "loss": 0.1676, "step": 17683 }, { "epoch": 57.980327868852456, "grad_norm": 2.8943185806274414, "learning_rate": 7.916555452933052e-06, "loss": 0.2254, "step": 17684 }, { "epoch": 57.98360655737705, "grad_norm": 3.6125199794769287, "learning_rate": 7.915516880257931e-06, "loss": 0.3206, "step": 17685 }, { "epoch": 57.98688524590164, "grad_norm": 3.3392794132232666, "learning_rate": 7.914478331087525e-06, "loss": 0.1958, "step": 17686 }, { "epoch": 57.99016393442623, "grad_norm": 2.4425995349884033, "learning_rate": 7.913439805433543e-06, "loss": 0.1637, "step": 17687 }, { "epoch": 57.993442622950816, "grad_norm": 3.5123977661132812, "learning_rate": 7.912401303307696e-06, "loss": 0.2529, "step": 17688 }, { "epoch": 57.99672131147541, "grad_norm": 2.852245330810547, "learning_rate": 7.911362824721696e-06, "loss": 0.3207, "step": 17689 }, { "epoch": 58.0, "grad_norm": 2.7110419273376465, "learning_rate": 7.91032436968725e-06, "loss": 0.101, "step": 17690 }, { "epoch": 58.00327868852459, "grad_norm": 3.5873801708221436, "learning_rate": 7.90928593821607e-06, "loss": 0.276, "step": 17691 }, { "epoch": 58.006557377049184, "grad_norm": 3.1981608867645264, "learning_rate": 7.908247530319866e-06, "loss": 0.2477, "step": 17692 }, { "epoch": 58.00983606557377, "grad_norm": 3.2615983486175537, "learning_rate": 7.907209146010348e-06, "loss": 0.2593, "step": 17693 }, { "epoch": 58.01311475409836, "grad_norm": 3.551462411880493, "learning_rate": 7.90617078529922e-06, "loss": 0.1639, "step": 17694 }, { "epoch": 58.01639344262295, "grad_norm": 3.351952314376831, "learning_rate": 7.905132448198195e-06, "loss": 0.341, "step": 17695 }, { "epoch": 58.019672131147544, "grad_norm": 3.004757881164551, "learning_rate": 7.904094134718975e-06, "loss": 0.2882, "step": 17696 }, { "epoch": 58.02295081967213, "grad_norm": 3.424903154373169, "learning_rate": 7.903055844873277e-06, "loss": 0.1714, "step": 17697 }, { "epoch": 58.02622950819672, "grad_norm": 2.5861780643463135, "learning_rate": 7.902017578672804e-06, "loss": 0.0823, "step": 17698 }, { "epoch": 58.02950819672131, "grad_norm": 3.4337120056152344, "learning_rate": 7.900979336129267e-06, "loss": 0.1407, "step": 17699 }, { "epoch": 58.032786885245905, "grad_norm": 3.2217624187469482, "learning_rate": 7.899941117254369e-06, "loss": 0.1189, "step": 17700 }, { "epoch": 58.03606557377049, "grad_norm": 3.37646746635437, "learning_rate": 7.898902922059814e-06, "loss": 0.2552, "step": 17701 }, { "epoch": 58.03934426229508, "grad_norm": 3.3645241260528564, "learning_rate": 7.897864750557317e-06, "loss": 0.2178, "step": 17702 }, { "epoch": 58.04262295081967, "grad_norm": 3.492408514022827, "learning_rate": 7.89682660275858e-06, "loss": 0.2368, "step": 17703 }, { "epoch": 58.045901639344265, "grad_norm": 3.012005090713501, "learning_rate": 7.895788478675312e-06, "loss": 0.2024, "step": 17704 }, { "epoch": 58.049180327868854, "grad_norm": 4.186577320098877, "learning_rate": 7.894750378319212e-06, "loss": 0.234, "step": 17705 }, { "epoch": 58.05245901639344, "grad_norm": 3.038428783416748, "learning_rate": 7.893712301701992e-06, "loss": 0.1211, "step": 17706 }, { "epoch": 58.05573770491803, "grad_norm": 2.946012258529663, "learning_rate": 7.89267424883536e-06, "loss": 0.2709, "step": 17707 }, { "epoch": 58.059016393442626, "grad_norm": 2.3075661659240723, "learning_rate": 7.891636219731013e-06, "loss": 0.0555, "step": 17708 }, { "epoch": 58.062295081967214, "grad_norm": 2.8127918243408203, "learning_rate": 7.890598214400658e-06, "loss": 0.1299, "step": 17709 }, { "epoch": 58.0655737704918, "grad_norm": 3.2097861766815186, "learning_rate": 7.889560232856003e-06, "loss": 0.3357, "step": 17710 }, { "epoch": 58.06885245901639, "grad_norm": 2.7448370456695557, "learning_rate": 7.888522275108753e-06, "loss": 0.0989, "step": 17711 }, { "epoch": 58.072131147540986, "grad_norm": 3.6706836223602295, "learning_rate": 7.88748434117061e-06, "loss": 0.235, "step": 17712 }, { "epoch": 58.075409836065575, "grad_norm": 3.71612286567688, "learning_rate": 7.886446431053277e-06, "loss": 0.2079, "step": 17713 }, { "epoch": 58.07868852459016, "grad_norm": 3.4005610942840576, "learning_rate": 7.885408544768453e-06, "loss": 0.1151, "step": 17714 }, { "epoch": 58.08196721311475, "grad_norm": 3.2659716606140137, "learning_rate": 7.884370682327851e-06, "loss": 0.2261, "step": 17715 }, { "epoch": 58.08524590163935, "grad_norm": 3.2764358520507812, "learning_rate": 7.88333284374317e-06, "loss": 0.3733, "step": 17716 }, { "epoch": 58.088524590163935, "grad_norm": 3.1837410926818848, "learning_rate": 7.882295029026108e-06, "loss": 0.5016, "step": 17717 }, { "epoch": 58.09180327868852, "grad_norm": 3.0231823921203613, "learning_rate": 7.881257238188373e-06, "loss": 0.2513, "step": 17718 }, { "epoch": 58.09508196721311, "grad_norm": 3.59637451171875, "learning_rate": 7.880219471241667e-06, "loss": 0.2417, "step": 17719 }, { "epoch": 58.09836065573771, "grad_norm": 3.464928150177002, "learning_rate": 7.87918172819769e-06, "loss": 0.2512, "step": 17720 }, { "epoch": 58.101639344262296, "grad_norm": 3.6769816875457764, "learning_rate": 7.878144009068144e-06, "loss": 0.3378, "step": 17721 }, { "epoch": 58.104918032786884, "grad_norm": 3.2111377716064453, "learning_rate": 7.877106313864729e-06, "loss": 0.1705, "step": 17722 }, { "epoch": 58.10819672131147, "grad_norm": 3.147364854812622, "learning_rate": 7.876068642599148e-06, "loss": 0.2447, "step": 17723 }, { "epoch": 58.11147540983607, "grad_norm": 5.138496398925781, "learning_rate": 7.875030995283102e-06, "loss": 0.2545, "step": 17724 }, { "epoch": 58.114754098360656, "grad_norm": 3.306262969970703, "learning_rate": 7.873993371928293e-06, "loss": 0.1995, "step": 17725 }, { "epoch": 58.118032786885244, "grad_norm": 12.009943008422852, "learning_rate": 7.87295577254642e-06, "loss": 0.1756, "step": 17726 }, { "epoch": 58.12131147540983, "grad_norm": 3.6324357986450195, "learning_rate": 7.871918197149176e-06, "loss": 0.1324, "step": 17727 }, { "epoch": 58.12459016393443, "grad_norm": 2.9330270290374756, "learning_rate": 7.870880645748271e-06, "loss": 0.2009, "step": 17728 }, { "epoch": 58.12786885245902, "grad_norm": 3.0306203365325928, "learning_rate": 7.8698431183554e-06, "loss": 0.3228, "step": 17729 }, { "epoch": 58.131147540983605, "grad_norm": 7.430261611938477, "learning_rate": 7.868805614982264e-06, "loss": 0.1794, "step": 17730 }, { "epoch": 58.13442622950819, "grad_norm": 3.462306261062622, "learning_rate": 7.867768135640556e-06, "loss": 0.3603, "step": 17731 }, { "epoch": 58.13770491803279, "grad_norm": 3.6769766807556152, "learning_rate": 7.866730680341984e-06, "loss": 0.1708, "step": 17732 }, { "epoch": 58.14098360655738, "grad_norm": 5.42893648147583, "learning_rate": 7.86569324909824e-06, "loss": 0.3278, "step": 17733 }, { "epoch": 58.144262295081965, "grad_norm": 3.203467607498169, "learning_rate": 7.864655841921027e-06, "loss": 0.2714, "step": 17734 }, { "epoch": 58.14754098360656, "grad_norm": 2.954550266265869, "learning_rate": 7.863618458822031e-06, "loss": 0.2471, "step": 17735 }, { "epoch": 58.15081967213115, "grad_norm": 3.344945192337036, "learning_rate": 7.862581099812966e-06, "loss": 0.1867, "step": 17736 }, { "epoch": 58.15409836065574, "grad_norm": 3.039592742919922, "learning_rate": 7.86154376490552e-06, "loss": 0.1266, "step": 17737 }, { "epoch": 58.157377049180326, "grad_norm": 3.064788818359375, "learning_rate": 7.860506454111392e-06, "loss": 0.1486, "step": 17738 }, { "epoch": 58.16065573770492, "grad_norm": 3.2984914779663086, "learning_rate": 7.859469167442278e-06, "loss": 0.0961, "step": 17739 }, { "epoch": 58.16393442622951, "grad_norm": 3.406377077102661, "learning_rate": 7.85843190490987e-06, "loss": 0.2785, "step": 17740 }, { "epoch": 58.1672131147541, "grad_norm": 4.368869304656982, "learning_rate": 7.857394666525873e-06, "loss": 0.3269, "step": 17741 }, { "epoch": 58.170491803278686, "grad_norm": 2.9688198566436768, "learning_rate": 7.856357452301981e-06, "loss": 0.1623, "step": 17742 }, { "epoch": 58.17377049180328, "grad_norm": 5.978414058685303, "learning_rate": 7.855320262249883e-06, "loss": 0.4245, "step": 17743 }, { "epoch": 58.17704918032787, "grad_norm": 3.106160879135132, "learning_rate": 7.85428309638128e-06, "loss": 0.1533, "step": 17744 }, { "epoch": 58.18032786885246, "grad_norm": 3.2338461875915527, "learning_rate": 7.853245954707868e-06, "loss": 0.2831, "step": 17745 }, { "epoch": 58.18360655737705, "grad_norm": 3.1490976810455322, "learning_rate": 7.852208837241337e-06, "loss": 0.2809, "step": 17746 }, { "epoch": 58.18688524590164, "grad_norm": 2.9243552684783936, "learning_rate": 7.851171743993388e-06, "loss": 0.2009, "step": 17747 }, { "epoch": 58.19016393442623, "grad_norm": 3.0097131729125977, "learning_rate": 7.85013467497571e-06, "loss": 0.2064, "step": 17748 }, { "epoch": 58.19344262295082, "grad_norm": 3.144428014755249, "learning_rate": 7.849097630199996e-06, "loss": 0.2386, "step": 17749 }, { "epoch": 58.19672131147541, "grad_norm": 2.5993599891662598, "learning_rate": 7.848060609677948e-06, "loss": 0.1952, "step": 17750 }, { "epoch": 58.2, "grad_norm": 2.9560041427612305, "learning_rate": 7.847023613421251e-06, "loss": 0.1576, "step": 17751 }, { "epoch": 58.20327868852459, "grad_norm": 3.0985193252563477, "learning_rate": 7.845986641441604e-06, "loss": 0.3076, "step": 17752 }, { "epoch": 58.20655737704918, "grad_norm": 3.3923845291137695, "learning_rate": 7.844949693750691e-06, "loss": 0.35, "step": 17753 }, { "epoch": 58.20983606557377, "grad_norm": 2.4670517444610596, "learning_rate": 7.843912770360218e-06, "loss": 0.1582, "step": 17754 }, { "epoch": 58.21311475409836, "grad_norm": 3.2621119022369385, "learning_rate": 7.842875871281868e-06, "loss": 0.1067, "step": 17755 }, { "epoch": 58.21639344262295, "grad_norm": 3.622824192047119, "learning_rate": 7.841838996527336e-06, "loss": 0.0976, "step": 17756 }, { "epoch": 58.21967213114754, "grad_norm": 2.9082398414611816, "learning_rate": 7.840802146108308e-06, "loss": 0.2435, "step": 17757 }, { "epoch": 58.22295081967213, "grad_norm": 3.4839015007019043, "learning_rate": 7.839765320036486e-06, "loss": 0.5027, "step": 17758 }, { "epoch": 58.226229508196724, "grad_norm": 3.419144630432129, "learning_rate": 7.838728518323557e-06, "loss": 0.2028, "step": 17759 }, { "epoch": 58.22950819672131, "grad_norm": 3.3135290145874023, "learning_rate": 7.83769174098121e-06, "loss": 0.332, "step": 17760 }, { "epoch": 58.2327868852459, "grad_norm": 3.098195791244507, "learning_rate": 7.836654988021132e-06, "loss": 0.3054, "step": 17761 }, { "epoch": 58.23606557377049, "grad_norm": 3.117161989212036, "learning_rate": 7.835618259455024e-06, "loss": 0.1704, "step": 17762 }, { "epoch": 58.239344262295084, "grad_norm": 3.064884662628174, "learning_rate": 7.834581555294569e-06, "loss": 0.1081, "step": 17763 }, { "epoch": 58.24262295081967, "grad_norm": 3.2115278244018555, "learning_rate": 7.83354487555146e-06, "loss": 0.2117, "step": 17764 }, { "epoch": 58.24590163934426, "grad_norm": 3.16023588180542, "learning_rate": 7.832508220237384e-06, "loss": 0.2482, "step": 17765 }, { "epoch": 58.24918032786885, "grad_norm": 3.2808218002319336, "learning_rate": 7.831471589364027e-06, "loss": 0.3362, "step": 17766 }, { "epoch": 58.252459016393445, "grad_norm": 3.9110589027404785, "learning_rate": 7.830434982943089e-06, "loss": 0.2455, "step": 17767 }, { "epoch": 58.25573770491803, "grad_norm": 2.769927978515625, "learning_rate": 7.82939840098625e-06, "loss": 0.1432, "step": 17768 }, { "epoch": 58.25901639344262, "grad_norm": 3.3192930221557617, "learning_rate": 7.828361843505198e-06, "loss": 0.181, "step": 17769 }, { "epoch": 58.26229508196721, "grad_norm": 3.3536312580108643, "learning_rate": 7.827325310511627e-06, "loss": 0.1311, "step": 17770 }, { "epoch": 58.265573770491805, "grad_norm": 2.6990885734558105, "learning_rate": 7.826288802017222e-06, "loss": 0.1468, "step": 17771 }, { "epoch": 58.268852459016394, "grad_norm": 2.7390944957733154, "learning_rate": 7.82525231803367e-06, "loss": 0.2071, "step": 17772 }, { "epoch": 58.27213114754098, "grad_norm": 3.987229824066162, "learning_rate": 7.824215858572661e-06, "loss": 0.2655, "step": 17773 }, { "epoch": 58.27540983606557, "grad_norm": 2.747349500656128, "learning_rate": 7.823179423645877e-06, "loss": 0.0999, "step": 17774 }, { "epoch": 58.278688524590166, "grad_norm": 3.5309481620788574, "learning_rate": 7.822143013265014e-06, "loss": 0.1998, "step": 17775 }, { "epoch": 58.281967213114754, "grad_norm": 9.091068267822266, "learning_rate": 7.821106627441748e-06, "loss": 0.2026, "step": 17776 }, { "epoch": 58.28524590163934, "grad_norm": 2.9238855838775635, "learning_rate": 7.820070266187772e-06, "loss": 0.222, "step": 17777 }, { "epoch": 58.28852459016394, "grad_norm": 3.5402469635009766, "learning_rate": 7.819033929514772e-06, "loss": 0.2528, "step": 17778 }, { "epoch": 58.291803278688526, "grad_norm": 2.5046324729919434, "learning_rate": 7.817997617434427e-06, "loss": 0.1415, "step": 17779 }, { "epoch": 58.295081967213115, "grad_norm": 2.6288816928863525, "learning_rate": 7.816961329958432e-06, "loss": 0.1191, "step": 17780 }, { "epoch": 58.2983606557377, "grad_norm": 2.855059862136841, "learning_rate": 7.815925067098466e-06, "loss": 0.1388, "step": 17781 }, { "epoch": 58.3016393442623, "grad_norm": 4.405233860015869, "learning_rate": 7.814888828866219e-06, "loss": 0.3059, "step": 17782 }, { "epoch": 58.30491803278689, "grad_norm": 2.868034601211548, "learning_rate": 7.813852615273366e-06, "loss": 0.1777, "step": 17783 }, { "epoch": 58.308196721311475, "grad_norm": 4.13394832611084, "learning_rate": 7.812816426331602e-06, "loss": 0.1356, "step": 17784 }, { "epoch": 58.31147540983606, "grad_norm": 3.0642106533050537, "learning_rate": 7.811780262052608e-06, "loss": 0.2133, "step": 17785 }, { "epoch": 58.31475409836066, "grad_norm": 2.6817917823791504, "learning_rate": 7.810744122448067e-06, "loss": 0.2062, "step": 17786 }, { "epoch": 58.31803278688525, "grad_norm": 3.568554162979126, "learning_rate": 7.809708007529656e-06, "loss": 0.2248, "step": 17787 }, { "epoch": 58.321311475409836, "grad_norm": 2.563185691833496, "learning_rate": 7.808671917309071e-06, "loss": 0.2147, "step": 17788 }, { "epoch": 58.324590163934424, "grad_norm": 2.976378917694092, "learning_rate": 7.807635851797987e-06, "loss": 0.1828, "step": 17789 }, { "epoch": 58.32786885245902, "grad_norm": 3.157813787460327, "learning_rate": 7.806599811008089e-06, "loss": 0.3898, "step": 17790 }, { "epoch": 58.33114754098361, "grad_norm": 3.221712589263916, "learning_rate": 7.805563794951059e-06, "loss": 0.2461, "step": 17791 }, { "epoch": 58.334426229508196, "grad_norm": 2.5282397270202637, "learning_rate": 7.804527803638574e-06, "loss": 0.2162, "step": 17792 }, { "epoch": 58.337704918032784, "grad_norm": 2.743398427963257, "learning_rate": 7.803491837082324e-06, "loss": 0.2238, "step": 17793 }, { "epoch": 58.34098360655738, "grad_norm": 2.912062168121338, "learning_rate": 7.802455895293988e-06, "loss": 0.1845, "step": 17794 }, { "epoch": 58.34426229508197, "grad_norm": 2.9192256927490234, "learning_rate": 7.80141997828525e-06, "loss": 0.1389, "step": 17795 }, { "epoch": 58.34754098360656, "grad_norm": 2.328416585922241, "learning_rate": 7.800384086067779e-06, "loss": 0.1085, "step": 17796 }, { "epoch": 58.350819672131145, "grad_norm": 2.3369786739349365, "learning_rate": 7.79934821865327e-06, "loss": 0.0852, "step": 17797 }, { "epoch": 58.35409836065574, "grad_norm": 3.1925108432769775, "learning_rate": 7.798312376053398e-06, "loss": 0.2062, "step": 17798 }, { "epoch": 58.35737704918033, "grad_norm": 2.7928717136383057, "learning_rate": 7.79727655827984e-06, "loss": 0.2162, "step": 17799 }, { "epoch": 58.36065573770492, "grad_norm": 2.878239393234253, "learning_rate": 7.796240765344281e-06, "loss": 0.1508, "step": 17800 }, { "epoch": 58.363934426229505, "grad_norm": 3.097058057785034, "learning_rate": 7.795204997258402e-06, "loss": 0.0971, "step": 17801 }, { "epoch": 58.3672131147541, "grad_norm": 3.276439666748047, "learning_rate": 7.794169254033874e-06, "loss": 0.2121, "step": 17802 }, { "epoch": 58.37049180327869, "grad_norm": 3.336782932281494, "learning_rate": 7.793133535682384e-06, "loss": 0.2835, "step": 17803 }, { "epoch": 58.37377049180328, "grad_norm": 2.7242605686187744, "learning_rate": 7.79209784221561e-06, "loss": 0.2912, "step": 17804 }, { "epoch": 58.377049180327866, "grad_norm": 2.4501900672912598, "learning_rate": 7.79106217364522e-06, "loss": 0.2038, "step": 17805 }, { "epoch": 58.38032786885246, "grad_norm": 4.071418762207031, "learning_rate": 7.790026529982909e-06, "loss": 0.2827, "step": 17806 }, { "epoch": 58.38360655737705, "grad_norm": 2.872889757156372, "learning_rate": 7.788990911240344e-06, "loss": 0.3552, "step": 17807 }, { "epoch": 58.38688524590164, "grad_norm": 2.944488048553467, "learning_rate": 7.787955317429208e-06, "loss": 0.1194, "step": 17808 }, { "epoch": 58.390163934426226, "grad_norm": 2.2331085205078125, "learning_rate": 7.78691974856117e-06, "loss": 0.0404, "step": 17809 }, { "epoch": 58.39344262295082, "grad_norm": 2.644595146179199, "learning_rate": 7.785884204647916e-06, "loss": 0.0864, "step": 17810 }, { "epoch": 58.39672131147541, "grad_norm": 3.5175914764404297, "learning_rate": 7.784848685701121e-06, "loss": 0.2132, "step": 17811 }, { "epoch": 58.4, "grad_norm": 4.0090131759643555, "learning_rate": 7.78381319173246e-06, "loss": 0.2009, "step": 17812 }, { "epoch": 58.40327868852459, "grad_norm": 2.5529379844665527, "learning_rate": 7.782777722753605e-06, "loss": 0.1583, "step": 17813 }, { "epoch": 58.40655737704918, "grad_norm": 2.5264673233032227, "learning_rate": 7.781742278776241e-06, "loss": 0.1185, "step": 17814 }, { "epoch": 58.40983606557377, "grad_norm": 3.783153533935547, "learning_rate": 7.78070685981204e-06, "loss": 0.1796, "step": 17815 }, { "epoch": 58.41311475409836, "grad_norm": 2.809274435043335, "learning_rate": 7.779671465872676e-06, "loss": 0.2222, "step": 17816 }, { "epoch": 58.41639344262295, "grad_norm": 2.882859230041504, "learning_rate": 7.778636096969823e-06, "loss": 0.1041, "step": 17817 }, { "epoch": 58.41967213114754, "grad_norm": 3.6577324867248535, "learning_rate": 7.777600753115157e-06, "loss": 0.1066, "step": 17818 }, { "epoch": 58.42295081967213, "grad_norm": 3.0411651134490967, "learning_rate": 7.776565434320354e-06, "loss": 0.1776, "step": 17819 }, { "epoch": 58.42622950819672, "grad_norm": 3.1995115280151367, "learning_rate": 7.775530140597089e-06, "loss": 0.2908, "step": 17820 }, { "epoch": 58.429508196721315, "grad_norm": 2.838834762573242, "learning_rate": 7.774494871957036e-06, "loss": 0.2328, "step": 17821 }, { "epoch": 58.4327868852459, "grad_norm": 2.3621129989624023, "learning_rate": 7.773459628411862e-06, "loss": 0.104, "step": 17822 }, { "epoch": 58.43606557377049, "grad_norm": 3.414196729660034, "learning_rate": 7.77242440997325e-06, "loss": 0.1371, "step": 17823 }, { "epoch": 58.43934426229508, "grad_norm": 4.0699310302734375, "learning_rate": 7.771389216652867e-06, "loss": 0.1286, "step": 17824 }, { "epoch": 58.442622950819676, "grad_norm": 2.5875823497772217, "learning_rate": 7.770354048462387e-06, "loss": 0.1002, "step": 17825 }, { "epoch": 58.445901639344264, "grad_norm": 3.303004026412964, "learning_rate": 7.769318905413483e-06, "loss": 0.1386, "step": 17826 }, { "epoch": 58.44918032786885, "grad_norm": 3.3981306552886963, "learning_rate": 7.76828378751783e-06, "loss": 0.1776, "step": 17827 }, { "epoch": 58.45245901639344, "grad_norm": 3.470942497253418, "learning_rate": 7.767248694787097e-06, "loss": 0.1429, "step": 17828 }, { "epoch": 58.455737704918036, "grad_norm": 3.027153968811035, "learning_rate": 7.766213627232957e-06, "loss": 0.1606, "step": 17829 }, { "epoch": 58.459016393442624, "grad_norm": 3.106494665145874, "learning_rate": 7.765178584867081e-06, "loss": 0.2444, "step": 17830 }, { "epoch": 58.46229508196721, "grad_norm": 2.954425811767578, "learning_rate": 7.764143567701138e-06, "loss": 0.1808, "step": 17831 }, { "epoch": 58.4655737704918, "grad_norm": 3.0467653274536133, "learning_rate": 7.763108575746802e-06, "loss": 0.2642, "step": 17832 }, { "epoch": 58.4688524590164, "grad_norm": 3.080272674560547, "learning_rate": 7.762073609015745e-06, "loss": 0.1103, "step": 17833 }, { "epoch": 58.472131147540985, "grad_norm": 3.5837247371673584, "learning_rate": 7.761038667519633e-06, "loss": 0.2618, "step": 17834 }, { "epoch": 58.47540983606557, "grad_norm": 3.9288229942321777, "learning_rate": 7.760003751270135e-06, "loss": 0.1597, "step": 17835 }, { "epoch": 58.47868852459016, "grad_norm": 2.924738645553589, "learning_rate": 7.758968860278927e-06, "loss": 0.0798, "step": 17836 }, { "epoch": 58.48196721311476, "grad_norm": 2.556980609893799, "learning_rate": 7.757933994557676e-06, "loss": 0.2073, "step": 17837 }, { "epoch": 58.485245901639345, "grad_norm": 2.9047179222106934, "learning_rate": 7.756899154118049e-06, "loss": 0.1342, "step": 17838 }, { "epoch": 58.488524590163934, "grad_norm": 3.03889799118042, "learning_rate": 7.755864338971714e-06, "loss": 0.1718, "step": 17839 }, { "epoch": 58.49180327868852, "grad_norm": 3.239441156387329, "learning_rate": 7.754829549130345e-06, "loss": 0.2163, "step": 17840 }, { "epoch": 58.49508196721312, "grad_norm": 3.097515344619751, "learning_rate": 7.753794784605608e-06, "loss": 0.1164, "step": 17841 }, { "epoch": 58.498360655737706, "grad_norm": 4.689516544342041, "learning_rate": 7.75276004540917e-06, "loss": 0.1557, "step": 17842 }, { "epoch": 58.501639344262294, "grad_norm": 3.0861313343048096, "learning_rate": 7.751725331552698e-06, "loss": 0.2246, "step": 17843 }, { "epoch": 58.50491803278688, "grad_norm": 3.540801763534546, "learning_rate": 7.750690643047858e-06, "loss": 0.1294, "step": 17844 }, { "epoch": 58.50819672131148, "grad_norm": 3.1990911960601807, "learning_rate": 7.749655979906323e-06, "loss": 0.1014, "step": 17845 }, { "epoch": 58.511475409836066, "grad_norm": 3.1517884731292725, "learning_rate": 7.748621342139757e-06, "loss": 0.348, "step": 17846 }, { "epoch": 58.514754098360655, "grad_norm": 3.0821917057037354, "learning_rate": 7.747586729759825e-06, "loss": 0.2411, "step": 17847 }, { "epoch": 58.51803278688524, "grad_norm": 3.670903444290161, "learning_rate": 7.746552142778191e-06, "loss": 0.2426, "step": 17848 }, { "epoch": 58.52131147540984, "grad_norm": 2.9142954349517822, "learning_rate": 7.74551758120653e-06, "loss": 0.2314, "step": 17849 }, { "epoch": 58.52459016393443, "grad_norm": 3.8876028060913086, "learning_rate": 7.744483045056502e-06, "loss": 0.2377, "step": 17850 }, { "epoch": 58.527868852459015, "grad_norm": 3.2095611095428467, "learning_rate": 7.743448534339768e-06, "loss": 0.259, "step": 17851 }, { "epoch": 58.5311475409836, "grad_norm": 3.7590408325195312, "learning_rate": 7.742414049068003e-06, "loss": 0.4039, "step": 17852 }, { "epoch": 58.5344262295082, "grad_norm": 3.3253262042999268, "learning_rate": 7.741379589252864e-06, "loss": 0.1216, "step": 17853 }, { "epoch": 58.53770491803279, "grad_norm": 3.0744831562042236, "learning_rate": 7.740345154906018e-06, "loss": 0.1295, "step": 17854 }, { "epoch": 58.540983606557376, "grad_norm": 2.758981227874756, "learning_rate": 7.739310746039133e-06, "loss": 0.2062, "step": 17855 }, { "epoch": 58.544262295081964, "grad_norm": 3.48150372505188, "learning_rate": 7.73827636266387e-06, "loss": 0.2166, "step": 17856 }, { "epoch": 58.54754098360656, "grad_norm": 3.3501803874969482, "learning_rate": 7.737242004791888e-06, "loss": 0.26, "step": 17857 }, { "epoch": 58.55081967213115, "grad_norm": 3.1945712566375732, "learning_rate": 7.736207672434857e-06, "loss": 0.1993, "step": 17858 }, { "epoch": 58.554098360655736, "grad_norm": 3.120760917663574, "learning_rate": 7.735173365604441e-06, "loss": 0.1238, "step": 17859 }, { "epoch": 58.557377049180324, "grad_norm": 2.5869693756103516, "learning_rate": 7.734139084312299e-06, "loss": 0.151, "step": 17860 }, { "epoch": 58.56065573770492, "grad_norm": 3.5695245265960693, "learning_rate": 7.73310482857009e-06, "loss": 0.2232, "step": 17861 }, { "epoch": 58.56393442622951, "grad_norm": 4.241247653961182, "learning_rate": 7.732070598389486e-06, "loss": 0.1992, "step": 17862 }, { "epoch": 58.5672131147541, "grad_norm": 2.8218297958374023, "learning_rate": 7.731036393782146e-06, "loss": 0.3161, "step": 17863 }, { "epoch": 58.570491803278685, "grad_norm": 2.5810043811798096, "learning_rate": 7.730002214759726e-06, "loss": 0.1872, "step": 17864 }, { "epoch": 58.57377049180328, "grad_norm": 3.226442813873291, "learning_rate": 7.728968061333894e-06, "loss": 0.2085, "step": 17865 }, { "epoch": 58.57704918032787, "grad_norm": 3.471841812133789, "learning_rate": 7.727933933516303e-06, "loss": 0.1656, "step": 17866 }, { "epoch": 58.58032786885246, "grad_norm": 3.1766154766082764, "learning_rate": 7.726899831318624e-06, "loss": 0.2131, "step": 17867 }, { "epoch": 58.58360655737705, "grad_norm": 3.4521303176879883, "learning_rate": 7.725865754752513e-06, "loss": 0.1576, "step": 17868 }, { "epoch": 58.58688524590164, "grad_norm": 3.8381686210632324, "learning_rate": 7.72483170382963e-06, "loss": 0.2843, "step": 17869 }, { "epoch": 58.59016393442623, "grad_norm": 2.979151725769043, "learning_rate": 7.72379767856163e-06, "loss": 0.271, "step": 17870 }, { "epoch": 58.59344262295082, "grad_norm": 2.8480751514434814, "learning_rate": 7.722763678960183e-06, "loss": 0.2074, "step": 17871 }, { "epoch": 58.59672131147541, "grad_norm": 2.668015480041504, "learning_rate": 7.721729705036942e-06, "loss": 0.144, "step": 17872 }, { "epoch": 58.6, "grad_norm": 2.6666407585144043, "learning_rate": 7.720695756803569e-06, "loss": 0.0595, "step": 17873 }, { "epoch": 58.60327868852459, "grad_norm": 4.068458557128906, "learning_rate": 7.719661834271717e-06, "loss": 0.2232, "step": 17874 }, { "epoch": 58.60655737704918, "grad_norm": 3.4062092304229736, "learning_rate": 7.718627937453052e-06, "loss": 0.1741, "step": 17875 }, { "epoch": 58.609836065573774, "grad_norm": 2.600450277328491, "learning_rate": 7.717594066359228e-06, "loss": 0.1702, "step": 17876 }, { "epoch": 58.61311475409836, "grad_norm": 3.447063684463501, "learning_rate": 7.716560221001906e-06, "loss": 0.1305, "step": 17877 }, { "epoch": 58.61639344262295, "grad_norm": 3.365882635116577, "learning_rate": 7.715526401392739e-06, "loss": 0.2816, "step": 17878 }, { "epoch": 58.61967213114754, "grad_norm": 4.483087539672852, "learning_rate": 7.714492607543387e-06, "loss": 0.2065, "step": 17879 }, { "epoch": 58.622950819672134, "grad_norm": 2.641953706741333, "learning_rate": 7.71345883946551e-06, "loss": 0.1912, "step": 17880 }, { "epoch": 58.62622950819672, "grad_norm": 3.4861936569213867, "learning_rate": 7.71242509717076e-06, "loss": 0.209, "step": 17881 }, { "epoch": 58.62950819672131, "grad_norm": 3.4536802768707275, "learning_rate": 7.711391380670797e-06, "loss": 0.2581, "step": 17882 }, { "epoch": 58.6327868852459, "grad_norm": 2.3980963230133057, "learning_rate": 7.710357689977273e-06, "loss": 0.1465, "step": 17883 }, { "epoch": 58.636065573770495, "grad_norm": 2.7086288928985596, "learning_rate": 7.709324025101847e-06, "loss": 0.1455, "step": 17884 }, { "epoch": 58.63934426229508, "grad_norm": 3.651885747909546, "learning_rate": 7.708290386056177e-06, "loss": 0.4215, "step": 17885 }, { "epoch": 58.64262295081967, "grad_norm": 3.680079698562622, "learning_rate": 7.707256772851914e-06, "loss": 0.1799, "step": 17886 }, { "epoch": 58.64590163934426, "grad_norm": 2.9321577548980713, "learning_rate": 7.706223185500712e-06, "loss": 0.1635, "step": 17887 }, { "epoch": 58.649180327868855, "grad_norm": 3.2583436965942383, "learning_rate": 7.705189624014233e-06, "loss": 0.1199, "step": 17888 }, { "epoch": 58.65245901639344, "grad_norm": 2.612875461578369, "learning_rate": 7.704156088404125e-06, "loss": 0.1187, "step": 17889 }, { "epoch": 58.65573770491803, "grad_norm": 2.6479811668395996, "learning_rate": 7.703122578682047e-06, "loss": 0.1169, "step": 17890 }, { "epoch": 58.65901639344262, "grad_norm": 3.3113207817077637, "learning_rate": 7.702089094859649e-06, "loss": 0.3847, "step": 17891 }, { "epoch": 58.662295081967216, "grad_norm": 3.402895212173462, "learning_rate": 7.70105563694858e-06, "loss": 0.439, "step": 17892 }, { "epoch": 58.665573770491804, "grad_norm": 3.2898168563842773, "learning_rate": 7.700022204960504e-06, "loss": 0.2675, "step": 17893 }, { "epoch": 58.66885245901639, "grad_norm": 2.771721601486206, "learning_rate": 7.69898879890707e-06, "loss": 0.1973, "step": 17894 }, { "epoch": 58.67213114754098, "grad_norm": 2.655526876449585, "learning_rate": 7.69795541879993e-06, "loss": 0.1548, "step": 17895 }, { "epoch": 58.675409836065576, "grad_norm": 3.1589231491088867, "learning_rate": 7.696922064650731e-06, "loss": 0.2138, "step": 17896 }, { "epoch": 58.678688524590164, "grad_norm": 3.04742693901062, "learning_rate": 7.695888736471135e-06, "loss": 0.1443, "step": 17897 }, { "epoch": 58.68196721311475, "grad_norm": 3.3283519744873047, "learning_rate": 7.69485543427279e-06, "loss": 0.3079, "step": 17898 }, { "epoch": 58.68524590163934, "grad_norm": 3.7316253185272217, "learning_rate": 7.693822158067345e-06, "loss": 0.135, "step": 17899 }, { "epoch": 58.68852459016394, "grad_norm": 2.7433366775512695, "learning_rate": 7.69278890786645e-06, "loss": 0.0688, "step": 17900 }, { "epoch": 58.691803278688525, "grad_norm": 5.177394866943359, "learning_rate": 7.69175568368176e-06, "loss": 0.2666, "step": 17901 }, { "epoch": 58.69508196721311, "grad_norm": 2.204953908920288, "learning_rate": 7.69072248552493e-06, "loss": 0.2467, "step": 17902 }, { "epoch": 58.6983606557377, "grad_norm": 3.0270938873291016, "learning_rate": 7.6896893134076e-06, "loss": 0.2831, "step": 17903 }, { "epoch": 58.7016393442623, "grad_norm": 2.956165075302124, "learning_rate": 7.688656167341426e-06, "loss": 0.1352, "step": 17904 }, { "epoch": 58.704918032786885, "grad_norm": 3.514404535293579, "learning_rate": 7.687623047338056e-06, "loss": 0.2273, "step": 17905 }, { "epoch": 58.708196721311474, "grad_norm": 3.781730890274048, "learning_rate": 7.686589953409142e-06, "loss": 0.2329, "step": 17906 }, { "epoch": 58.71147540983607, "grad_norm": 7.409000873565674, "learning_rate": 7.68555688556633e-06, "loss": 0.1919, "step": 17907 }, { "epoch": 58.71475409836066, "grad_norm": 2.957592248916626, "learning_rate": 7.684523843821273e-06, "loss": 0.1787, "step": 17908 }, { "epoch": 58.718032786885246, "grad_norm": 3.0513415336608887, "learning_rate": 7.683490828185615e-06, "loss": 0.1296, "step": 17909 }, { "epoch": 58.721311475409834, "grad_norm": 3.6832592487335205, "learning_rate": 7.682457838671006e-06, "loss": 0.4622, "step": 17910 }, { "epoch": 58.72459016393443, "grad_norm": 2.231808662414551, "learning_rate": 7.681424875289097e-06, "loss": 0.1444, "step": 17911 }, { "epoch": 58.72786885245902, "grad_norm": 10.676423072814941, "learning_rate": 7.680391938051534e-06, "loss": 0.1499, "step": 17912 }, { "epoch": 58.731147540983606, "grad_norm": 2.9272446632385254, "learning_rate": 7.679359026969959e-06, "loss": 0.183, "step": 17913 }, { "epoch": 58.734426229508195, "grad_norm": 3.277113676071167, "learning_rate": 7.678326142056028e-06, "loss": 0.1802, "step": 17914 }, { "epoch": 58.73770491803279, "grad_norm": 2.607841730117798, "learning_rate": 7.677293283321383e-06, "loss": 0.2437, "step": 17915 }, { "epoch": 58.74098360655738, "grad_norm": 3.8299500942230225, "learning_rate": 7.67626045077767e-06, "loss": 0.3449, "step": 17916 }, { "epoch": 58.74426229508197, "grad_norm": 3.036003351211548, "learning_rate": 7.675227644436538e-06, "loss": 0.1903, "step": 17917 }, { "epoch": 58.747540983606555, "grad_norm": 2.733506441116333, "learning_rate": 7.674194864309628e-06, "loss": 0.1498, "step": 17918 }, { "epoch": 58.75081967213115, "grad_norm": 3.88236141204834, "learning_rate": 7.673162110408592e-06, "loss": 0.3471, "step": 17919 }, { "epoch": 58.75409836065574, "grad_norm": 2.7783613204956055, "learning_rate": 7.672129382745075e-06, "loss": 0.2546, "step": 17920 }, { "epoch": 58.75737704918033, "grad_norm": 3.112161636352539, "learning_rate": 7.671096681330717e-06, "loss": 0.3109, "step": 17921 }, { "epoch": 58.760655737704916, "grad_norm": 2.784174919128418, "learning_rate": 7.670064006177162e-06, "loss": 0.2121, "step": 17922 }, { "epoch": 58.76393442622951, "grad_norm": 2.844036340713501, "learning_rate": 7.669031357296062e-06, "loss": 0.1254, "step": 17923 }, { "epoch": 58.7672131147541, "grad_norm": 3.1627604961395264, "learning_rate": 7.667998734699058e-06, "loss": 0.2153, "step": 17924 }, { "epoch": 58.77049180327869, "grad_norm": 2.588021993637085, "learning_rate": 7.66696613839779e-06, "loss": 0.072, "step": 17925 }, { "epoch": 58.773770491803276, "grad_norm": 3.16867733001709, "learning_rate": 7.665933568403903e-06, "loss": 0.2015, "step": 17926 }, { "epoch": 58.77704918032787, "grad_norm": 2.9652915000915527, "learning_rate": 7.664901024729047e-06, "loss": 0.1283, "step": 17927 }, { "epoch": 58.78032786885246, "grad_norm": 3.3324782848358154, "learning_rate": 7.663868507384857e-06, "loss": 0.2439, "step": 17928 }, { "epoch": 58.78360655737705, "grad_norm": 3.35351824760437, "learning_rate": 7.66283601638298e-06, "loss": 0.3014, "step": 17929 }, { "epoch": 58.78688524590164, "grad_norm": 3.4833133220672607, "learning_rate": 7.661803551735056e-06, "loss": 0.2717, "step": 17930 }, { "epoch": 58.79016393442623, "grad_norm": 4.1394476890563965, "learning_rate": 7.660771113452725e-06, "loss": 0.1287, "step": 17931 }, { "epoch": 58.79344262295082, "grad_norm": 3.764256238937378, "learning_rate": 7.659738701547637e-06, "loss": 0.1474, "step": 17932 }, { "epoch": 58.79672131147541, "grad_norm": 3.1044061183929443, "learning_rate": 7.658706316031425e-06, "loss": 0.1124, "step": 17933 }, { "epoch": 58.8, "grad_norm": 3.450129508972168, "learning_rate": 7.657673956915735e-06, "loss": 0.3114, "step": 17934 }, { "epoch": 58.80327868852459, "grad_norm": 2.6387176513671875, "learning_rate": 7.656641624212205e-06, "loss": 0.1102, "step": 17935 }, { "epoch": 58.80655737704918, "grad_norm": 2.8200697898864746, "learning_rate": 7.655609317932478e-06, "loss": 0.141, "step": 17936 }, { "epoch": 58.80983606557377, "grad_norm": 3.187333822250366, "learning_rate": 7.654577038088195e-06, "loss": 0.2901, "step": 17937 }, { "epoch": 58.81311475409836, "grad_norm": 3.131171464920044, "learning_rate": 7.653544784690995e-06, "loss": 0.1887, "step": 17938 }, { "epoch": 58.81639344262295, "grad_norm": 3.1028177738189697, "learning_rate": 7.652512557752513e-06, "loss": 0.2421, "step": 17939 }, { "epoch": 58.81967213114754, "grad_norm": 2.736994743347168, "learning_rate": 7.651480357284396e-06, "loss": 0.2422, "step": 17940 }, { "epoch": 58.82295081967213, "grad_norm": 3.393477201461792, "learning_rate": 7.650448183298279e-06, "loss": 0.2823, "step": 17941 }, { "epoch": 58.82622950819672, "grad_norm": 2.947114944458008, "learning_rate": 7.649416035805803e-06, "loss": 0.1242, "step": 17942 }, { "epoch": 58.829508196721314, "grad_norm": 3.7120790481567383, "learning_rate": 7.648383914818605e-06, "loss": 0.1932, "step": 17943 }, { "epoch": 58.8327868852459, "grad_norm": 2.8929219245910645, "learning_rate": 7.64735182034832e-06, "loss": 0.0897, "step": 17944 }, { "epoch": 58.83606557377049, "grad_norm": 2.8984177112579346, "learning_rate": 7.646319752406592e-06, "loss": 0.1289, "step": 17945 }, { "epoch": 58.83934426229508, "grad_norm": 5.1167521476745605, "learning_rate": 7.645287711005057e-06, "loss": 0.2279, "step": 17946 }, { "epoch": 58.842622950819674, "grad_norm": 3.245753765106201, "learning_rate": 7.644255696155352e-06, "loss": 0.2326, "step": 17947 }, { "epoch": 58.84590163934426, "grad_norm": 3.4627020359039307, "learning_rate": 7.643223707869108e-06, "loss": 0.2565, "step": 17948 }, { "epoch": 58.84918032786885, "grad_norm": 2.525602102279663, "learning_rate": 7.642191746157972e-06, "loss": 0.1509, "step": 17949 }, { "epoch": 58.85245901639344, "grad_norm": 2.8483026027679443, "learning_rate": 7.641159811033574e-06, "loss": 0.2694, "step": 17950 }, { "epoch": 58.855737704918035, "grad_norm": 2.509216547012329, "learning_rate": 7.640127902507553e-06, "loss": 0.1703, "step": 17951 }, { "epoch": 58.85901639344262, "grad_norm": 2.56479811668396, "learning_rate": 7.63909602059154e-06, "loss": 0.162, "step": 17952 }, { "epoch": 58.86229508196721, "grad_norm": 3.8338890075683594, "learning_rate": 7.638064165297177e-06, "loss": 0.0897, "step": 17953 }, { "epoch": 58.86557377049181, "grad_norm": 3.138481616973877, "learning_rate": 7.637032336636098e-06, "loss": 0.1987, "step": 17954 }, { "epoch": 58.868852459016395, "grad_norm": 3.4259908199310303, "learning_rate": 7.636000534619935e-06, "loss": 0.1535, "step": 17955 }, { "epoch": 58.87213114754098, "grad_norm": 3.6474928855895996, "learning_rate": 7.634968759260322e-06, "loss": 0.1874, "step": 17956 }, { "epoch": 58.87540983606557, "grad_norm": 3.1723077297210693, "learning_rate": 7.633937010568895e-06, "loss": 0.1252, "step": 17957 }, { "epoch": 58.87868852459017, "grad_norm": 2.865196704864502, "learning_rate": 7.632905288557291e-06, "loss": 0.1936, "step": 17958 }, { "epoch": 58.881967213114756, "grad_norm": 3.7544243335723877, "learning_rate": 7.631873593237141e-06, "loss": 0.1494, "step": 17959 }, { "epoch": 58.885245901639344, "grad_norm": 3.731995105743408, "learning_rate": 7.630841924620076e-06, "loss": 0.2951, "step": 17960 }, { "epoch": 58.88852459016393, "grad_norm": 3.2155020236968994, "learning_rate": 7.629810282717733e-06, "loss": 0.2721, "step": 17961 }, { "epoch": 58.89180327868853, "grad_norm": 3.9501585960388184, "learning_rate": 7.628778667541743e-06, "loss": 0.217, "step": 17962 }, { "epoch": 58.895081967213116, "grad_norm": 2.416469097137451, "learning_rate": 7.627747079103738e-06, "loss": 0.0499, "step": 17963 }, { "epoch": 58.898360655737704, "grad_norm": 2.8217639923095703, "learning_rate": 7.626715517415354e-06, "loss": 0.1551, "step": 17964 }, { "epoch": 58.90163934426229, "grad_norm": 3.460939407348633, "learning_rate": 7.6256839824882165e-06, "loss": 0.3067, "step": 17965 }, { "epoch": 58.90491803278689, "grad_norm": 3.473062753677368, "learning_rate": 7.6246524743339624e-06, "loss": 0.2434, "step": 17966 }, { "epoch": 58.90819672131148, "grad_norm": 2.5023818016052246, "learning_rate": 7.6236209929642226e-06, "loss": 0.2012, "step": 17967 }, { "epoch": 58.911475409836065, "grad_norm": 3.6361844539642334, "learning_rate": 7.6225895383906275e-06, "loss": 0.2212, "step": 17968 }, { "epoch": 58.91475409836065, "grad_norm": 2.5973665714263916, "learning_rate": 7.621558110624807e-06, "loss": 0.1511, "step": 17969 }, { "epoch": 58.91803278688525, "grad_norm": 3.4697325229644775, "learning_rate": 7.620526709678387e-06, "loss": 0.4289, "step": 17970 }, { "epoch": 58.92131147540984, "grad_norm": 3.33880352973938, "learning_rate": 7.619495335563008e-06, "loss": 0.0999, "step": 17971 }, { "epoch": 58.924590163934425, "grad_norm": 8.255590438842773, "learning_rate": 7.618463988290292e-06, "loss": 0.2683, "step": 17972 }, { "epoch": 58.927868852459014, "grad_norm": 2.515292167663574, "learning_rate": 7.617432667871873e-06, "loss": 0.2163, "step": 17973 }, { "epoch": 58.93114754098361, "grad_norm": 3.1595606803894043, "learning_rate": 7.616401374319372e-06, "loss": 0.1671, "step": 17974 }, { "epoch": 58.9344262295082, "grad_norm": 2.038079023361206, "learning_rate": 7.615370107644429e-06, "loss": 0.05, "step": 17975 }, { "epoch": 58.937704918032786, "grad_norm": 2.721902370452881, "learning_rate": 7.6143388678586675e-06, "loss": 0.2818, "step": 17976 }, { "epoch": 58.940983606557374, "grad_norm": 3.357088327407837, "learning_rate": 7.613307654973715e-06, "loss": 0.2271, "step": 17977 }, { "epoch": 58.94426229508197, "grad_norm": 4.081668853759766, "learning_rate": 7.612276469001196e-06, "loss": 0.2986, "step": 17978 }, { "epoch": 58.94754098360656, "grad_norm": 2.958043098449707, "learning_rate": 7.611245309952747e-06, "loss": 0.2565, "step": 17979 }, { "epoch": 58.950819672131146, "grad_norm": 2.870473623275757, "learning_rate": 7.610214177839992e-06, "loss": 0.1852, "step": 17980 }, { "epoch": 58.954098360655735, "grad_norm": 2.982203722000122, "learning_rate": 7.609183072674555e-06, "loss": 0.136, "step": 17981 }, { "epoch": 58.95737704918033, "grad_norm": 3.1839041709899902, "learning_rate": 7.608151994468066e-06, "loss": 0.1692, "step": 17982 }, { "epoch": 58.96065573770492, "grad_norm": 3.042084217071533, "learning_rate": 7.6071209432321444e-06, "loss": 0.1805, "step": 17983 }, { "epoch": 58.96393442622951, "grad_norm": 3.692568302154541, "learning_rate": 7.606089918978428e-06, "loss": 0.2028, "step": 17984 }, { "epoch": 58.967213114754095, "grad_norm": 3.6294572353363037, "learning_rate": 7.605058921718535e-06, "loss": 0.3255, "step": 17985 }, { "epoch": 58.97049180327869, "grad_norm": 3.054056406021118, "learning_rate": 7.6040279514640926e-06, "loss": 0.2331, "step": 17986 }, { "epoch": 58.97377049180328, "grad_norm": 3.2687885761260986, "learning_rate": 7.602997008226725e-06, "loss": 0.1803, "step": 17987 }, { "epoch": 58.97704918032787, "grad_norm": 2.7884702682495117, "learning_rate": 7.601966092018062e-06, "loss": 0.1169, "step": 17988 }, { "epoch": 58.980327868852456, "grad_norm": 3.3252246379852295, "learning_rate": 7.6009352028497205e-06, "loss": 0.1458, "step": 17989 }, { "epoch": 58.98360655737705, "grad_norm": 3.6499369144439697, "learning_rate": 7.599904340733333e-06, "loss": 0.1801, "step": 17990 }, { "epoch": 58.98688524590164, "grad_norm": 2.8077175617218018, "learning_rate": 7.5988735056805166e-06, "loss": 0.2432, "step": 17991 }, { "epoch": 58.99016393442623, "grad_norm": 2.590855360031128, "learning_rate": 7.597842697702897e-06, "loss": 0.0626, "step": 17992 }, { "epoch": 58.993442622950816, "grad_norm": 2.730454683303833, "learning_rate": 7.596811916812102e-06, "loss": 0.1189, "step": 17993 }, { "epoch": 58.99672131147541, "grad_norm": 3.8202617168426514, "learning_rate": 7.595781163019751e-06, "loss": 0.1394, "step": 17994 }, { "epoch": 59.0, "grad_norm": 3.0263760089874268, "learning_rate": 7.594750436337467e-06, "loss": 0.1847, "step": 17995 }, { "epoch": 59.00327868852459, "grad_norm": 3.8867502212524414, "learning_rate": 7.5937197367768675e-06, "loss": 0.1644, "step": 17996 }, { "epoch": 59.006557377049184, "grad_norm": 3.0759451389312744, "learning_rate": 7.592689064349585e-06, "loss": 0.1417, "step": 17997 }, { "epoch": 59.00983606557377, "grad_norm": 2.9549572467803955, "learning_rate": 7.591658419067237e-06, "loss": 0.2896, "step": 17998 }, { "epoch": 59.01311475409836, "grad_norm": 3.218093156814575, "learning_rate": 7.590627800941444e-06, "loss": 0.2524, "step": 17999 }, { "epoch": 59.01639344262295, "grad_norm": 2.616147518157959, "learning_rate": 7.589597209983823e-06, "loss": 0.1801, "step": 18000 }, { "epoch": 59.019672131147544, "grad_norm": 4.302707195281982, "learning_rate": 7.5885666462060035e-06, "loss": 0.2623, "step": 18001 }, { "epoch": 59.02295081967213, "grad_norm": 2.638270378112793, "learning_rate": 7.587536109619603e-06, "loss": 0.1187, "step": 18002 }, { "epoch": 59.02622950819672, "grad_norm": 2.4291152954101562, "learning_rate": 7.586505600236241e-06, "loss": 0.0821, "step": 18003 }, { "epoch": 59.02950819672131, "grad_norm": 2.8557660579681396, "learning_rate": 7.585475118067533e-06, "loss": 0.1862, "step": 18004 }, { "epoch": 59.032786885245905, "grad_norm": 2.873187780380249, "learning_rate": 7.584444663125109e-06, "loss": 0.356, "step": 18005 }, { "epoch": 59.03606557377049, "grad_norm": 3.616309642791748, "learning_rate": 7.583414235420583e-06, "loss": 0.2101, "step": 18006 }, { "epoch": 59.03934426229508, "grad_norm": 3.54854154586792, "learning_rate": 7.5823838349655736e-06, "loss": 0.0758, "step": 18007 }, { "epoch": 59.04262295081967, "grad_norm": 4.213241100311279, "learning_rate": 7.581353461771699e-06, "loss": 0.3243, "step": 18008 }, { "epoch": 59.045901639344265, "grad_norm": 2.030163049697876, "learning_rate": 7.580323115850576e-06, "loss": 0.0725, "step": 18009 }, { "epoch": 59.049180327868854, "grad_norm": 3.220432996749878, "learning_rate": 7.57929279721383e-06, "loss": 0.2918, "step": 18010 }, { "epoch": 59.05245901639344, "grad_norm": 3.739154577255249, "learning_rate": 7.578262505873074e-06, "loss": 0.1693, "step": 18011 }, { "epoch": 59.05573770491803, "grad_norm": 3.404796600341797, "learning_rate": 7.577232241839923e-06, "loss": 0.1655, "step": 18012 }, { "epoch": 59.059016393442626, "grad_norm": 2.1993067264556885, "learning_rate": 7.5762020051259995e-06, "loss": 0.2094, "step": 18013 }, { "epoch": 59.062295081967214, "grad_norm": 3.2191107273101807, "learning_rate": 7.57517179574292e-06, "loss": 0.1634, "step": 18014 }, { "epoch": 59.0655737704918, "grad_norm": 3.9131288528442383, "learning_rate": 7.574141613702297e-06, "loss": 0.081, "step": 18015 }, { "epoch": 59.06885245901639, "grad_norm": 3.199190378189087, "learning_rate": 7.573111459015753e-06, "loss": 0.2186, "step": 18016 }, { "epoch": 59.072131147540986, "grad_norm": 3.292691230773926, "learning_rate": 7.572081331694897e-06, "loss": 0.1951, "step": 18017 }, { "epoch": 59.075409836065575, "grad_norm": 4.773227214813232, "learning_rate": 7.571051231751352e-06, "loss": 0.2206, "step": 18018 }, { "epoch": 59.07868852459016, "grad_norm": 2.6457462310791016, "learning_rate": 7.5700211591967265e-06, "loss": 0.0775, "step": 18019 }, { "epoch": 59.08196721311475, "grad_norm": 3.6949472427368164, "learning_rate": 7.568991114042642e-06, "loss": 0.2278, "step": 18020 }, { "epoch": 59.08524590163935, "grad_norm": 2.7560348510742188, "learning_rate": 7.567961096300709e-06, "loss": 0.1065, "step": 18021 }, { "epoch": 59.088524590163935, "grad_norm": 2.9995553493499756, "learning_rate": 7.5669311059825404e-06, "loss": 0.1255, "step": 18022 }, { "epoch": 59.09180327868852, "grad_norm": 3.173858404159546, "learning_rate": 7.565901143099756e-06, "loss": 0.1484, "step": 18023 }, { "epoch": 59.09508196721311, "grad_norm": 2.595365285873413, "learning_rate": 7.564871207663968e-06, "loss": 0.2322, "step": 18024 }, { "epoch": 59.09836065573771, "grad_norm": 3.1385645866394043, "learning_rate": 7.563841299686791e-06, "loss": 0.1791, "step": 18025 }, { "epoch": 59.101639344262296, "grad_norm": 3.484689235687256, "learning_rate": 7.56281141917983e-06, "loss": 0.1553, "step": 18026 }, { "epoch": 59.104918032786884, "grad_norm": 2.642570734024048, "learning_rate": 7.561781566154709e-06, "loss": 0.1556, "step": 18027 }, { "epoch": 59.10819672131147, "grad_norm": 2.8244941234588623, "learning_rate": 7.560751740623035e-06, "loss": 0.1467, "step": 18028 }, { "epoch": 59.11147540983607, "grad_norm": 3.927572250366211, "learning_rate": 7.559721942596422e-06, "loss": 0.3833, "step": 18029 }, { "epoch": 59.114754098360656, "grad_norm": 4.007821559906006, "learning_rate": 7.558692172086477e-06, "loss": 0.186, "step": 18030 }, { "epoch": 59.118032786885244, "grad_norm": 2.8099992275238037, "learning_rate": 7.557662429104821e-06, "loss": 0.1763, "step": 18031 }, { "epoch": 59.12131147540983, "grad_norm": 4.3370232582092285, "learning_rate": 7.556632713663059e-06, "loss": 0.3492, "step": 18032 }, { "epoch": 59.12459016393443, "grad_norm": 2.9843392372131348, "learning_rate": 7.555603025772805e-06, "loss": 0.2159, "step": 18033 }, { "epoch": 59.12786885245902, "grad_norm": 3.1200876235961914, "learning_rate": 7.5545733654456676e-06, "loss": 0.1741, "step": 18034 }, { "epoch": 59.131147540983605, "grad_norm": 3.930351972579956, "learning_rate": 7.5535437326932545e-06, "loss": 0.2299, "step": 18035 }, { "epoch": 59.13442622950819, "grad_norm": 3.112123727798462, "learning_rate": 7.552514127527184e-06, "loss": 0.1494, "step": 18036 }, { "epoch": 59.13770491803279, "grad_norm": 2.956310749053955, "learning_rate": 7.551484549959061e-06, "loss": 0.3589, "step": 18037 }, { "epoch": 59.14098360655738, "grad_norm": 2.3360161781311035, "learning_rate": 7.550455000000493e-06, "loss": 0.102, "step": 18038 }, { "epoch": 59.144262295081965, "grad_norm": 3.723721504211426, "learning_rate": 7.549425477663093e-06, "loss": 0.1593, "step": 18039 }, { "epoch": 59.14754098360656, "grad_norm": 3.9522299766540527, "learning_rate": 7.54839598295847e-06, "loss": 0.2103, "step": 18040 }, { "epoch": 59.15081967213115, "grad_norm": 2.6099438667297363, "learning_rate": 7.54736651589823e-06, "loss": 0.223, "step": 18041 }, { "epoch": 59.15409836065574, "grad_norm": 26.62393569946289, "learning_rate": 7.5463370764939815e-06, "loss": 0.256, "step": 18042 }, { "epoch": 59.157377049180326, "grad_norm": 3.393932580947876, "learning_rate": 7.545307664757334e-06, "loss": 0.1974, "step": 18043 }, { "epoch": 59.16065573770492, "grad_norm": 3.4850120544433594, "learning_rate": 7.544278280699897e-06, "loss": 0.0973, "step": 18044 }, { "epoch": 59.16393442622951, "grad_norm": 3.4043147563934326, "learning_rate": 7.543248924333274e-06, "loss": 0.1727, "step": 18045 }, { "epoch": 59.1672131147541, "grad_norm": 3.0749244689941406, "learning_rate": 7.542219595669074e-06, "loss": 0.2217, "step": 18046 }, { "epoch": 59.170491803278686, "grad_norm": 3.144247055053711, "learning_rate": 7.541190294718905e-06, "loss": 0.2066, "step": 18047 }, { "epoch": 59.17377049180328, "grad_norm": 2.6411073207855225, "learning_rate": 7.540161021494368e-06, "loss": 0.1518, "step": 18048 }, { "epoch": 59.17704918032787, "grad_norm": 2.3663527965545654, "learning_rate": 7.539131776007077e-06, "loss": 0.1087, "step": 18049 }, { "epoch": 59.18032786885246, "grad_norm": 3.4091787338256836, "learning_rate": 7.538102558268634e-06, "loss": 0.1912, "step": 18050 }, { "epoch": 59.18360655737705, "grad_norm": 3.3611843585968018, "learning_rate": 7.5370733682906434e-06, "loss": 0.0742, "step": 18051 }, { "epoch": 59.18688524590164, "grad_norm": 4.4303507804870605, "learning_rate": 7.536044206084708e-06, "loss": 0.2655, "step": 18052 }, { "epoch": 59.19016393442623, "grad_norm": 3.199314832687378, "learning_rate": 7.535015071662441e-06, "loss": 0.3201, "step": 18053 }, { "epoch": 59.19344262295082, "grad_norm": 2.9224088191986084, "learning_rate": 7.533985965035441e-06, "loss": 0.084, "step": 18054 }, { "epoch": 59.19672131147541, "grad_norm": 14.786588668823242, "learning_rate": 7.532956886215313e-06, "loss": 0.2627, "step": 18055 }, { "epoch": 59.2, "grad_norm": 3.4160101413726807, "learning_rate": 7.531927835213657e-06, "loss": 0.1464, "step": 18056 }, { "epoch": 59.20327868852459, "grad_norm": 3.804828405380249, "learning_rate": 7.530898812042085e-06, "loss": 0.0781, "step": 18057 }, { "epoch": 59.20655737704918, "grad_norm": 5.158442974090576, "learning_rate": 7.5298698167121975e-06, "loss": 0.208, "step": 18058 }, { "epoch": 59.20983606557377, "grad_norm": 2.9757893085479736, "learning_rate": 7.528840849235595e-06, "loss": 0.307, "step": 18059 }, { "epoch": 59.21311475409836, "grad_norm": 2.1733977794647217, "learning_rate": 7.527811909623881e-06, "loss": 0.1124, "step": 18060 }, { "epoch": 59.21639344262295, "grad_norm": 3.1033623218536377, "learning_rate": 7.526782997888654e-06, "loss": 0.2439, "step": 18061 }, { "epoch": 59.21967213114754, "grad_norm": 3.437764883041382, "learning_rate": 7.525754114041526e-06, "loss": 0.1322, "step": 18062 }, { "epoch": 59.22295081967213, "grad_norm": 2.438985824584961, "learning_rate": 7.524725258094091e-06, "loss": 0.1887, "step": 18063 }, { "epoch": 59.226229508196724, "grad_norm": 2.9621880054473877, "learning_rate": 7.523696430057953e-06, "loss": 0.1049, "step": 18064 }, { "epoch": 59.22950819672131, "grad_norm": 2.5552594661712646, "learning_rate": 7.522667629944709e-06, "loss": 0.1115, "step": 18065 }, { "epoch": 59.2327868852459, "grad_norm": 3.7862071990966797, "learning_rate": 7.521638857765966e-06, "loss": 0.2528, "step": 18066 }, { "epoch": 59.23606557377049, "grad_norm": 2.834022283554077, "learning_rate": 7.520610113533322e-06, "loss": 0.1021, "step": 18067 }, { "epoch": 59.239344262295084, "grad_norm": 3.7101104259490967, "learning_rate": 7.519581397258376e-06, "loss": 0.501, "step": 18068 }, { "epoch": 59.24262295081967, "grad_norm": 2.684093475341797, "learning_rate": 7.518552708952728e-06, "loss": 0.1019, "step": 18069 }, { "epoch": 59.24590163934426, "grad_norm": 4.2229814529418945, "learning_rate": 7.51752404862798e-06, "loss": 0.1426, "step": 18070 }, { "epoch": 59.24918032786885, "grad_norm": 2.687842607498169, "learning_rate": 7.516495416295728e-06, "loss": 0.0883, "step": 18071 }, { "epoch": 59.252459016393445, "grad_norm": 3.8203182220458984, "learning_rate": 7.515466811967574e-06, "loss": 0.2983, "step": 18072 }, { "epoch": 59.25573770491803, "grad_norm": 3.0698599815368652, "learning_rate": 7.514438235655115e-06, "loss": 0.2171, "step": 18073 }, { "epoch": 59.25901639344262, "grad_norm": 3.290067195892334, "learning_rate": 7.513409687369946e-06, "loss": 0.2105, "step": 18074 }, { "epoch": 59.26229508196721, "grad_norm": 2.958547353744507, "learning_rate": 7.512381167123671e-06, "loss": 0.2732, "step": 18075 }, { "epoch": 59.265573770491805, "grad_norm": 3.2953407764434814, "learning_rate": 7.5113526749278855e-06, "loss": 0.1392, "step": 18076 }, { "epoch": 59.268852459016394, "grad_norm": 2.9036712646484375, "learning_rate": 7.510324210794187e-06, "loss": 0.2573, "step": 18077 }, { "epoch": 59.27213114754098, "grad_norm": 2.9868786334991455, "learning_rate": 7.509295774734165e-06, "loss": 0.3051, "step": 18078 }, { "epoch": 59.27540983606557, "grad_norm": 2.601895332336426, "learning_rate": 7.508267366759429e-06, "loss": 0.0891, "step": 18079 }, { "epoch": 59.278688524590166, "grad_norm": 2.299548387527466, "learning_rate": 7.507238986881569e-06, "loss": 0.0604, "step": 18080 }, { "epoch": 59.281967213114754, "grad_norm": 2.935929775238037, "learning_rate": 7.506210635112181e-06, "loss": 0.2455, "step": 18081 }, { "epoch": 59.28524590163934, "grad_norm": 4.1158037185668945, "learning_rate": 7.5051823114628565e-06, "loss": 0.1771, "step": 18082 }, { "epoch": 59.28852459016394, "grad_norm": 2.935455799102783, "learning_rate": 7.5041540159452e-06, "loss": 0.1326, "step": 18083 }, { "epoch": 59.291803278688526, "grad_norm": 2.4755969047546387, "learning_rate": 7.503125748570801e-06, "loss": 0.1263, "step": 18084 }, { "epoch": 59.295081967213115, "grad_norm": 2.597785472869873, "learning_rate": 7.502097509351256e-06, "loss": 0.1703, "step": 18085 }, { "epoch": 59.2983606557377, "grad_norm": 3.801647663116455, "learning_rate": 7.501069298298159e-06, "loss": 0.3644, "step": 18086 }, { "epoch": 59.3016393442623, "grad_norm": 3.6855297088623047, "learning_rate": 7.5000411154231e-06, "loss": 0.3087, "step": 18087 }, { "epoch": 59.30491803278689, "grad_norm": 3.6054763793945312, "learning_rate": 7.499012960737679e-06, "loss": 0.205, "step": 18088 }, { "epoch": 59.308196721311475, "grad_norm": 3.890439033508301, "learning_rate": 7.49798483425349e-06, "loss": 0.1485, "step": 18089 }, { "epoch": 59.31147540983606, "grad_norm": 2.7938895225524902, "learning_rate": 7.496956735982122e-06, "loss": 0.1749, "step": 18090 }, { "epoch": 59.31475409836066, "grad_norm": 2.784924268722534, "learning_rate": 7.495928665935166e-06, "loss": 0.3411, "step": 18091 }, { "epoch": 59.31803278688525, "grad_norm": 3.1948108673095703, "learning_rate": 7.4949006241242205e-06, "loss": 0.3794, "step": 18092 }, { "epoch": 59.321311475409836, "grad_norm": 2.825314521789551, "learning_rate": 7.493872610560876e-06, "loss": 0.2035, "step": 18093 }, { "epoch": 59.324590163934424, "grad_norm": 6.886545181274414, "learning_rate": 7.492844625256721e-06, "loss": 0.2046, "step": 18094 }, { "epoch": 59.32786885245902, "grad_norm": 2.9292075634002686, "learning_rate": 7.491816668223351e-06, "loss": 0.1268, "step": 18095 }, { "epoch": 59.33114754098361, "grad_norm": 3.061070203781128, "learning_rate": 7.490788739472357e-06, "loss": 0.1619, "step": 18096 }, { "epoch": 59.334426229508196, "grad_norm": 2.940744161605835, "learning_rate": 7.489760839015327e-06, "loss": 0.2068, "step": 18097 }, { "epoch": 59.337704918032784, "grad_norm": 2.286731719970703, "learning_rate": 7.4887329668638545e-06, "loss": 0.0554, "step": 18098 }, { "epoch": 59.34098360655738, "grad_norm": 3.0019452571868896, "learning_rate": 7.48770512302953e-06, "loss": 0.2414, "step": 18099 }, { "epoch": 59.34426229508197, "grad_norm": 2.303548574447632, "learning_rate": 7.48667730752394e-06, "loss": 0.0853, "step": 18100 }, { "epoch": 59.34754098360656, "grad_norm": 3.4592373371124268, "learning_rate": 7.485649520358678e-06, "loss": 0.2735, "step": 18101 }, { "epoch": 59.350819672131145, "grad_norm": 2.590127944946289, "learning_rate": 7.4846217615453325e-06, "loss": 0.0688, "step": 18102 }, { "epoch": 59.35409836065574, "grad_norm": 3.480999708175659, "learning_rate": 7.483594031095491e-06, "loss": 0.2676, "step": 18103 }, { "epoch": 59.35737704918033, "grad_norm": 2.906700611114502, "learning_rate": 7.482566329020741e-06, "loss": 0.1755, "step": 18104 }, { "epoch": 59.36065573770492, "grad_norm": 3.528960943222046, "learning_rate": 7.481538655332676e-06, "loss": 0.1879, "step": 18105 }, { "epoch": 59.363934426229505, "grad_norm": 3.080333948135376, "learning_rate": 7.480511010042882e-06, "loss": 0.3182, "step": 18106 }, { "epoch": 59.3672131147541, "grad_norm": 2.9977869987487793, "learning_rate": 7.479483393162945e-06, "loss": 0.2032, "step": 18107 }, { "epoch": 59.37049180327869, "grad_norm": 3.021911144256592, "learning_rate": 7.47845580470445e-06, "loss": 0.1742, "step": 18108 }, { "epoch": 59.37377049180328, "grad_norm": 2.227602243423462, "learning_rate": 7.477428244678993e-06, "loss": 0.0713, "step": 18109 }, { "epoch": 59.377049180327866, "grad_norm": 2.9826548099517822, "learning_rate": 7.476400713098153e-06, "loss": 0.0951, "step": 18110 }, { "epoch": 59.38032786885246, "grad_norm": 3.6260178089141846, "learning_rate": 7.47537320997352e-06, "loss": 0.0958, "step": 18111 }, { "epoch": 59.38360655737705, "grad_norm": 2.3591299057006836, "learning_rate": 7.4743457353166795e-06, "loss": 0.1586, "step": 18112 }, { "epoch": 59.38688524590164, "grad_norm": 2.9150609970092773, "learning_rate": 7.4733182891392105e-06, "loss": 0.1606, "step": 18113 }, { "epoch": 59.390163934426226, "grad_norm": 2.8250396251678467, "learning_rate": 7.472290871452711e-06, "loss": 0.191, "step": 18114 }, { "epoch": 59.39344262295082, "grad_norm": 3.8722169399261475, "learning_rate": 7.47126348226876e-06, "loss": 0.1132, "step": 18115 }, { "epoch": 59.39672131147541, "grad_norm": 2.5519444942474365, "learning_rate": 7.470236121598941e-06, "loss": 0.2384, "step": 18116 }, { "epoch": 59.4, "grad_norm": 4.453433036804199, "learning_rate": 7.469208789454838e-06, "loss": 0.155, "step": 18117 }, { "epoch": 59.40327868852459, "grad_norm": 3.478416681289673, "learning_rate": 7.4681814858480405e-06, "loss": 0.2399, "step": 18118 }, { "epoch": 59.40655737704918, "grad_norm": 2.7941980361938477, "learning_rate": 7.467154210790128e-06, "loss": 0.1179, "step": 18119 }, { "epoch": 59.40983606557377, "grad_norm": 2.307640552520752, "learning_rate": 7.466126964292685e-06, "loss": 0.0939, "step": 18120 }, { "epoch": 59.41311475409836, "grad_norm": 2.942476987838745, "learning_rate": 7.465099746367295e-06, "loss": 0.1356, "step": 18121 }, { "epoch": 59.41639344262295, "grad_norm": 3.147712230682373, "learning_rate": 7.464072557025541e-06, "loss": 0.2556, "step": 18122 }, { "epoch": 59.41967213114754, "grad_norm": 2.5024616718292236, "learning_rate": 7.463045396279007e-06, "loss": 0.1548, "step": 18123 }, { "epoch": 59.42295081967213, "grad_norm": 2.1393799781799316, "learning_rate": 7.462018264139273e-06, "loss": 0.1152, "step": 18124 }, { "epoch": 59.42622950819672, "grad_norm": 2.4963490962982178, "learning_rate": 7.460991160617923e-06, "loss": 0.0623, "step": 18125 }, { "epoch": 59.429508196721315, "grad_norm": 2.7643961906433105, "learning_rate": 7.459964085726535e-06, "loss": 0.2098, "step": 18126 }, { "epoch": 59.4327868852459, "grad_norm": 3.720634937286377, "learning_rate": 7.458937039476692e-06, "loss": 0.1864, "step": 18127 }, { "epoch": 59.43606557377049, "grad_norm": 3.185234546661377, "learning_rate": 7.457910021879981e-06, "loss": 0.156, "step": 18128 }, { "epoch": 59.43934426229508, "grad_norm": 2.799881935119629, "learning_rate": 7.456883032947975e-06, "loss": 0.1537, "step": 18129 }, { "epoch": 59.442622950819676, "grad_norm": 3.110211133956909, "learning_rate": 7.455856072692254e-06, "loss": 0.2092, "step": 18130 }, { "epoch": 59.445901639344264, "grad_norm": 2.272962808609009, "learning_rate": 7.4548291411244045e-06, "loss": 0.1626, "step": 18131 }, { "epoch": 59.44918032786885, "grad_norm": 2.922903537750244, "learning_rate": 7.4538022382560025e-06, "loss": 0.1242, "step": 18132 }, { "epoch": 59.45245901639344, "grad_norm": 2.3375940322875977, "learning_rate": 7.45277536409863e-06, "loss": 0.2372, "step": 18133 }, { "epoch": 59.455737704918036, "grad_norm": 3.3510708808898926, "learning_rate": 7.451748518663857e-06, "loss": 0.2053, "step": 18134 }, { "epoch": 59.459016393442624, "grad_norm": 3.1881988048553467, "learning_rate": 7.450721701963274e-06, "loss": 0.1066, "step": 18135 }, { "epoch": 59.46229508196721, "grad_norm": 3.1034934520721436, "learning_rate": 7.449694914008454e-06, "loss": 0.2203, "step": 18136 }, { "epoch": 59.4655737704918, "grad_norm": 3.1374495029449463, "learning_rate": 7.448668154810976e-06, "loss": 0.2274, "step": 18137 }, { "epoch": 59.4688524590164, "grad_norm": 3.541663885116577, "learning_rate": 7.447641424382417e-06, "loss": 0.2136, "step": 18138 }, { "epoch": 59.472131147540985, "grad_norm": 3.4139771461486816, "learning_rate": 7.446614722734351e-06, "loss": 0.1964, "step": 18139 }, { "epoch": 59.47540983606557, "grad_norm": 2.273130416870117, "learning_rate": 7.445588049878363e-06, "loss": 0.0396, "step": 18140 }, { "epoch": 59.47868852459016, "grad_norm": 2.9963700771331787, "learning_rate": 7.444561405826025e-06, "loss": 0.1589, "step": 18141 }, { "epoch": 59.48196721311476, "grad_norm": 2.8299100399017334, "learning_rate": 7.443534790588915e-06, "loss": 0.1277, "step": 18142 }, { "epoch": 59.485245901639345, "grad_norm": 3.238452434539795, "learning_rate": 7.442508204178604e-06, "loss": 0.2431, "step": 18143 }, { "epoch": 59.488524590163934, "grad_norm": 2.8576512336730957, "learning_rate": 7.441481646606675e-06, "loss": 0.2502, "step": 18144 }, { "epoch": 59.49180327868852, "grad_norm": 6.461804389953613, "learning_rate": 7.440455117884702e-06, "loss": 0.1911, "step": 18145 }, { "epoch": 59.49508196721312, "grad_norm": 3.354034423828125, "learning_rate": 7.439428618024258e-06, "loss": 0.3359, "step": 18146 }, { "epoch": 59.498360655737706, "grad_norm": 3.062209367752075, "learning_rate": 7.438402147036915e-06, "loss": 0.1634, "step": 18147 }, { "epoch": 59.501639344262294, "grad_norm": 3.6156952381134033, "learning_rate": 7.437375704934255e-06, "loss": 0.1535, "step": 18148 }, { "epoch": 59.50491803278688, "grad_norm": 3.111630916595459, "learning_rate": 7.436349291727849e-06, "loss": 0.2526, "step": 18149 }, { "epoch": 59.50819672131148, "grad_norm": 3.641510486602783, "learning_rate": 7.435322907429268e-06, "loss": 0.3079, "step": 18150 }, { "epoch": 59.511475409836066, "grad_norm": 3.4039499759674072, "learning_rate": 7.434296552050089e-06, "loss": 0.2743, "step": 18151 }, { "epoch": 59.514754098360655, "grad_norm": 3.8512563705444336, "learning_rate": 7.433270225601883e-06, "loss": 0.2973, "step": 18152 }, { "epoch": 59.51803278688524, "grad_norm": 10.303505897521973, "learning_rate": 7.432243928096224e-06, "loss": 0.2605, "step": 18153 }, { "epoch": 59.52131147540984, "grad_norm": 3.2324726581573486, "learning_rate": 7.431217659544685e-06, "loss": 0.1693, "step": 18154 }, { "epoch": 59.52459016393443, "grad_norm": 2.950946569442749, "learning_rate": 7.4301914199588395e-06, "loss": 0.1925, "step": 18155 }, { "epoch": 59.527868852459015, "grad_norm": 5.3912506103515625, "learning_rate": 7.4291652093502535e-06, "loss": 0.2141, "step": 18156 }, { "epoch": 59.5311475409836, "grad_norm": 3.223128080368042, "learning_rate": 7.428139027730505e-06, "loss": 0.2262, "step": 18157 }, { "epoch": 59.5344262295082, "grad_norm": 2.951373815536499, "learning_rate": 7.427112875111165e-06, "loss": 0.1937, "step": 18158 }, { "epoch": 59.53770491803279, "grad_norm": 3.1285970211029053, "learning_rate": 7.426086751503802e-06, "loss": 0.1397, "step": 18159 }, { "epoch": 59.540983606557376, "grad_norm": 3.1574296951293945, "learning_rate": 7.425060656919983e-06, "loss": 0.1855, "step": 18160 }, { "epoch": 59.544262295081964, "grad_norm": 2.507723808288574, "learning_rate": 7.424034591371285e-06, "loss": 0.1381, "step": 18161 }, { "epoch": 59.54754098360656, "grad_norm": 2.7517337799072266, "learning_rate": 7.423008554869278e-06, "loss": 0.2354, "step": 18162 }, { "epoch": 59.55081967213115, "grad_norm": 3.5081076622009277, "learning_rate": 7.421982547425528e-06, "loss": 0.2994, "step": 18163 }, { "epoch": 59.554098360655736, "grad_norm": 3.4401609897613525, "learning_rate": 7.420956569051604e-06, "loss": 0.2709, "step": 18164 }, { "epoch": 59.557377049180324, "grad_norm": 2.8302001953125, "learning_rate": 7.419930619759073e-06, "loss": 0.2082, "step": 18165 }, { "epoch": 59.56065573770492, "grad_norm": 2.871877670288086, "learning_rate": 7.418904699559511e-06, "loss": 0.2944, "step": 18166 }, { "epoch": 59.56393442622951, "grad_norm": 2.749887704849243, "learning_rate": 7.4178788084644815e-06, "loss": 0.1543, "step": 18167 }, { "epoch": 59.5672131147541, "grad_norm": 3.2145822048187256, "learning_rate": 7.4168529464855535e-06, "loss": 0.1497, "step": 18168 }, { "epoch": 59.570491803278685, "grad_norm": 2.9547526836395264, "learning_rate": 7.415827113634291e-06, "loss": 0.1449, "step": 18169 }, { "epoch": 59.57377049180328, "grad_norm": 3.783562183380127, "learning_rate": 7.414801309922268e-06, "loss": 0.2316, "step": 18170 }, { "epoch": 59.57704918032787, "grad_norm": 3.371858835220337, "learning_rate": 7.413775535361049e-06, "loss": 0.1906, "step": 18171 }, { "epoch": 59.58032786885246, "grad_norm": 3.2398245334625244, "learning_rate": 7.412749789962199e-06, "loss": 0.1325, "step": 18172 }, { "epoch": 59.58360655737705, "grad_norm": 3.24466872215271, "learning_rate": 7.411724073737281e-06, "loss": 0.233, "step": 18173 }, { "epoch": 59.58688524590164, "grad_norm": 2.561969757080078, "learning_rate": 7.4106983866978695e-06, "loss": 0.294, "step": 18174 }, { "epoch": 59.59016393442623, "grad_norm": 2.83296275138855, "learning_rate": 7.409672728855526e-06, "loss": 0.1068, "step": 18175 }, { "epoch": 59.59344262295082, "grad_norm": 3.3461196422576904, "learning_rate": 7.408647100221813e-06, "loss": 0.2323, "step": 18176 }, { "epoch": 59.59672131147541, "grad_norm": 3.5002119541168213, "learning_rate": 7.407621500808302e-06, "loss": 0.2509, "step": 18177 }, { "epoch": 59.6, "grad_norm": 2.6003763675689697, "learning_rate": 7.40659593062655e-06, "loss": 0.2978, "step": 18178 }, { "epoch": 59.60327868852459, "grad_norm": 2.9724724292755127, "learning_rate": 7.405570389688126e-06, "loss": 0.1955, "step": 18179 }, { "epoch": 59.60655737704918, "grad_norm": 3.056929111480713, "learning_rate": 7.4045448780045955e-06, "loss": 0.1148, "step": 18180 }, { "epoch": 59.609836065573774, "grad_norm": 2.9922358989715576, "learning_rate": 7.403519395587522e-06, "loss": 0.2338, "step": 18181 }, { "epoch": 59.61311475409836, "grad_norm": 3.112917423248291, "learning_rate": 7.402493942448462e-06, "loss": 0.1846, "step": 18182 }, { "epoch": 59.61639344262295, "grad_norm": 3.9840986728668213, "learning_rate": 7.401468518598984e-06, "loss": 0.3119, "step": 18183 }, { "epoch": 59.61967213114754, "grad_norm": 3.978456974029541, "learning_rate": 7.400443124050654e-06, "loss": 0.3648, "step": 18184 }, { "epoch": 59.622950819672134, "grad_norm": 2.9624128341674805, "learning_rate": 7.39941775881503e-06, "loss": 0.2592, "step": 18185 }, { "epoch": 59.62622950819672, "grad_norm": 3.1899971961975098, "learning_rate": 7.39839242290367e-06, "loss": 0.2683, "step": 18186 }, { "epoch": 59.62950819672131, "grad_norm": 3.9943573474884033, "learning_rate": 7.397367116328145e-06, "loss": 0.2181, "step": 18187 }, { "epoch": 59.6327868852459, "grad_norm": 2.865496873855591, "learning_rate": 7.396341839100012e-06, "loss": 0.184, "step": 18188 }, { "epoch": 59.636065573770495, "grad_norm": 3.1566905975341797, "learning_rate": 7.39531659123083e-06, "loss": 0.1728, "step": 18189 }, { "epoch": 59.63934426229508, "grad_norm": 2.998201370239258, "learning_rate": 7.394291372732164e-06, "loss": 0.2634, "step": 18190 }, { "epoch": 59.64262295081967, "grad_norm": 3.02146315574646, "learning_rate": 7.393266183615566e-06, "loss": 0.2725, "step": 18191 }, { "epoch": 59.64590163934426, "grad_norm": 3.964228630065918, "learning_rate": 7.392241023892608e-06, "loss": 0.1419, "step": 18192 }, { "epoch": 59.649180327868855, "grad_norm": 3.0748376846313477, "learning_rate": 7.391215893574844e-06, "loss": 0.084, "step": 18193 }, { "epoch": 59.65245901639344, "grad_norm": 2.741074323654175, "learning_rate": 7.390190792673831e-06, "loss": 0.1881, "step": 18194 }, { "epoch": 59.65573770491803, "grad_norm": 3.6476800441741943, "learning_rate": 7.389165721201128e-06, "loss": 0.1607, "step": 18195 }, { "epoch": 59.65901639344262, "grad_norm": 3.4287381172180176, "learning_rate": 7.3881406791683e-06, "loss": 0.2381, "step": 18196 }, { "epoch": 59.662295081967216, "grad_norm": 3.0803966522216797, "learning_rate": 7.387115666586901e-06, "loss": 0.2411, "step": 18197 }, { "epoch": 59.665573770491804, "grad_norm": 3.7839133739471436, "learning_rate": 7.38609068346849e-06, "loss": 0.1042, "step": 18198 }, { "epoch": 59.66885245901639, "grad_norm": 3.2787883281707764, "learning_rate": 7.385065729824621e-06, "loss": 0.1959, "step": 18199 }, { "epoch": 59.67213114754098, "grad_norm": 3.075437307357788, "learning_rate": 7.384040805666857e-06, "loss": 0.2125, "step": 18200 }, { "epoch": 59.675409836065576, "grad_norm": 3.0206732749938965, "learning_rate": 7.383015911006754e-06, "loss": 0.177, "step": 18201 }, { "epoch": 59.678688524590164, "grad_norm": 3.4283299446105957, "learning_rate": 7.381991045855868e-06, "loss": 0.1817, "step": 18202 }, { "epoch": 59.68196721311475, "grad_norm": 2.214350461959839, "learning_rate": 7.380966210225751e-06, "loss": 0.1762, "step": 18203 }, { "epoch": 59.68524590163934, "grad_norm": 3.349149465560913, "learning_rate": 7.379941404127965e-06, "loss": 0.1805, "step": 18204 }, { "epoch": 59.68852459016394, "grad_norm": 2.9221251010894775, "learning_rate": 7.378916627574066e-06, "loss": 0.1283, "step": 18205 }, { "epoch": 59.691803278688525, "grad_norm": 2.86348819732666, "learning_rate": 7.377891880575604e-06, "loss": 0.1079, "step": 18206 }, { "epoch": 59.69508196721311, "grad_norm": 3.106041669845581, "learning_rate": 7.376867163144139e-06, "loss": 0.317, "step": 18207 }, { "epoch": 59.6983606557377, "grad_norm": 2.719897747039795, "learning_rate": 7.375842475291224e-06, "loss": 0.1731, "step": 18208 }, { "epoch": 59.7016393442623, "grad_norm": 3.0668435096740723, "learning_rate": 7.374817817028413e-06, "loss": 0.1055, "step": 18209 }, { "epoch": 59.704918032786885, "grad_norm": 3.0876832008361816, "learning_rate": 7.373793188367262e-06, "loss": 0.1155, "step": 18210 }, { "epoch": 59.708196721311474, "grad_norm": 3.0666286945343018, "learning_rate": 7.372768589319323e-06, "loss": 0.1594, "step": 18211 }, { "epoch": 59.71147540983607, "grad_norm": 3.8244404792785645, "learning_rate": 7.371744019896145e-06, "loss": 0.3072, "step": 18212 }, { "epoch": 59.71475409836066, "grad_norm": 3.192124128341675, "learning_rate": 7.370719480109292e-06, "loss": 0.2062, "step": 18213 }, { "epoch": 59.718032786885246, "grad_norm": 3.448925733566284, "learning_rate": 7.36969496997031e-06, "loss": 0.1304, "step": 18214 }, { "epoch": 59.721311475409834, "grad_norm": 3.4154129028320312, "learning_rate": 7.3686704894907525e-06, "loss": 0.2513, "step": 18215 }, { "epoch": 59.72459016393443, "grad_norm": 2.93845796585083, "learning_rate": 7.367646038682171e-06, "loss": 0.3103, "step": 18216 }, { "epoch": 59.72786885245902, "grad_norm": 2.7018001079559326, "learning_rate": 7.366621617556111e-06, "loss": 0.1135, "step": 18217 }, { "epoch": 59.731147540983606, "grad_norm": 2.7970328330993652, "learning_rate": 7.365597226124137e-06, "loss": 0.0826, "step": 18218 }, { "epoch": 59.734426229508195, "grad_norm": 4.017421245574951, "learning_rate": 7.364572864397792e-06, "loss": 0.246, "step": 18219 }, { "epoch": 59.73770491803279, "grad_norm": 3.2066781520843506, "learning_rate": 7.363548532388629e-06, "loss": 0.2067, "step": 18220 }, { "epoch": 59.74098360655738, "grad_norm": 2.4761271476745605, "learning_rate": 7.362524230108193e-06, "loss": 0.1518, "step": 18221 }, { "epoch": 59.74426229508197, "grad_norm": 2.9157252311706543, "learning_rate": 7.3614999575680435e-06, "loss": 0.2224, "step": 18222 }, { "epoch": 59.747540983606555, "grad_norm": 3.133432149887085, "learning_rate": 7.360475714779724e-06, "loss": 0.1207, "step": 18223 }, { "epoch": 59.75081967213115, "grad_norm": 3.1682515144348145, "learning_rate": 7.3594515017547866e-06, "loss": 0.4697, "step": 18224 }, { "epoch": 59.75409836065574, "grad_norm": 3.0027012825012207, "learning_rate": 7.358427318504773e-06, "loss": 0.124, "step": 18225 }, { "epoch": 59.75737704918033, "grad_norm": 3.4474713802337646, "learning_rate": 7.357403165041243e-06, "loss": 0.2395, "step": 18226 }, { "epoch": 59.760655737704916, "grad_norm": 4.080785274505615, "learning_rate": 7.3563790413757405e-06, "loss": 0.1721, "step": 18227 }, { "epoch": 59.76393442622951, "grad_norm": 3.3300621509552, "learning_rate": 7.355354947519812e-06, "loss": 0.186, "step": 18228 }, { "epoch": 59.7672131147541, "grad_norm": 2.962205648422241, "learning_rate": 7.3543308834850055e-06, "loss": 0.0991, "step": 18229 }, { "epoch": 59.77049180327869, "grad_norm": 3.249675989151001, "learning_rate": 7.3533068492828685e-06, "loss": 0.1944, "step": 18230 }, { "epoch": 59.773770491803276, "grad_norm": 3.1740779876708984, "learning_rate": 7.35228284492495e-06, "loss": 0.1341, "step": 18231 }, { "epoch": 59.77704918032787, "grad_norm": 3.540889024734497, "learning_rate": 7.3512588704227934e-06, "loss": 0.2212, "step": 18232 }, { "epoch": 59.78032786885246, "grad_norm": 3.5156848430633545, "learning_rate": 7.35023492578795e-06, "loss": 0.1238, "step": 18233 }, { "epoch": 59.78360655737705, "grad_norm": 3.6452090740203857, "learning_rate": 7.34921101103196e-06, "loss": 0.2408, "step": 18234 }, { "epoch": 59.78688524590164, "grad_norm": 4.696605682373047, "learning_rate": 7.348187126166373e-06, "loss": 0.3125, "step": 18235 }, { "epoch": 59.79016393442623, "grad_norm": 2.850261688232422, "learning_rate": 7.347163271202735e-06, "loss": 0.2076, "step": 18236 }, { "epoch": 59.79344262295082, "grad_norm": 3.0082173347473145, "learning_rate": 7.34613944615259e-06, "loss": 0.2399, "step": 18237 }, { "epoch": 59.79672131147541, "grad_norm": 3.400505542755127, "learning_rate": 7.3451156510274755e-06, "loss": 0.2217, "step": 18238 }, { "epoch": 59.8, "grad_norm": 2.755391836166382, "learning_rate": 7.344091885838949e-06, "loss": 0.276, "step": 18239 }, { "epoch": 59.80327868852459, "grad_norm": 3.0067548751831055, "learning_rate": 7.3430681505985464e-06, "loss": 0.233, "step": 18240 }, { "epoch": 59.80655737704918, "grad_norm": 3.129876136779785, "learning_rate": 7.342044445317813e-06, "loss": 0.2354, "step": 18241 }, { "epoch": 59.80983606557377, "grad_norm": 3.2992618083953857, "learning_rate": 7.3410207700082936e-06, "loss": 0.2176, "step": 18242 }, { "epoch": 59.81311475409836, "grad_norm": 2.253399133682251, "learning_rate": 7.339997124681524e-06, "loss": 0.0543, "step": 18243 }, { "epoch": 59.81639344262295, "grad_norm": 3.3097739219665527, "learning_rate": 7.338973509349056e-06, "loss": 0.1597, "step": 18244 }, { "epoch": 59.81967213114754, "grad_norm": 2.820405960083008, "learning_rate": 7.33794992402243e-06, "loss": 0.0613, "step": 18245 }, { "epoch": 59.82295081967213, "grad_norm": 3.449974536895752, "learning_rate": 7.3369263687131864e-06, "loss": 0.2975, "step": 18246 }, { "epoch": 59.82622950819672, "grad_norm": 4.057549476623535, "learning_rate": 7.335902843432862e-06, "loss": 0.3361, "step": 18247 }, { "epoch": 59.829508196721314, "grad_norm": 3.361576795578003, "learning_rate": 7.334879348193009e-06, "loss": 0.3207, "step": 18248 }, { "epoch": 59.8327868852459, "grad_norm": 3.086526870727539, "learning_rate": 7.333855883005161e-06, "loss": 0.2013, "step": 18249 }, { "epoch": 59.83606557377049, "grad_norm": 3.497082233428955, "learning_rate": 7.33283244788086e-06, "loss": 0.1405, "step": 18250 }, { "epoch": 59.83934426229508, "grad_norm": 3.073776960372925, "learning_rate": 7.331809042831641e-06, "loss": 0.1519, "step": 18251 }, { "epoch": 59.842622950819674, "grad_norm": 3.0421533584594727, "learning_rate": 7.330785667869056e-06, "loss": 0.2314, "step": 18252 }, { "epoch": 59.84590163934426, "grad_norm": 3.887565851211548, "learning_rate": 7.329762323004635e-06, "loss": 0.1644, "step": 18253 }, { "epoch": 59.84918032786885, "grad_norm": 2.934189796447754, "learning_rate": 7.328739008249922e-06, "loss": 0.2766, "step": 18254 }, { "epoch": 59.85245901639344, "grad_norm": 3.029930591583252, "learning_rate": 7.327715723616452e-06, "loss": 0.233, "step": 18255 }, { "epoch": 59.855737704918035, "grad_norm": 3.2228055000305176, "learning_rate": 7.326692469115766e-06, "loss": 0.329, "step": 18256 }, { "epoch": 59.85901639344262, "grad_norm": 2.615213394165039, "learning_rate": 7.325669244759402e-06, "loss": 0.1837, "step": 18257 }, { "epoch": 59.86229508196721, "grad_norm": 2.993481159210205, "learning_rate": 7.324646050558898e-06, "loss": 0.1366, "step": 18258 }, { "epoch": 59.86557377049181, "grad_norm": 3.37431263923645, "learning_rate": 7.323622886525792e-06, "loss": 0.2548, "step": 18259 }, { "epoch": 59.868852459016395, "grad_norm": 3.3188719749450684, "learning_rate": 7.32259975267162e-06, "loss": 0.1389, "step": 18260 }, { "epoch": 59.87213114754098, "grad_norm": 3.011406898498535, "learning_rate": 7.321576649007919e-06, "loss": 0.0965, "step": 18261 }, { "epoch": 59.87540983606557, "grad_norm": 3.24261212348938, "learning_rate": 7.3205535755462275e-06, "loss": 0.1035, "step": 18262 }, { "epoch": 59.87868852459017, "grad_norm": 6.984122276306152, "learning_rate": 7.319530532298081e-06, "loss": 0.1169, "step": 18263 }, { "epoch": 59.881967213114756, "grad_norm": 2.734605073928833, "learning_rate": 7.318507519275013e-06, "loss": 0.165, "step": 18264 }, { "epoch": 59.885245901639344, "grad_norm": 2.3834547996520996, "learning_rate": 7.31748453648856e-06, "loss": 0.1769, "step": 18265 }, { "epoch": 59.88852459016393, "grad_norm": 3.932537317276001, "learning_rate": 7.31646158395026e-06, "loss": 0.1823, "step": 18266 }, { "epoch": 59.89180327868853, "grad_norm": 2.1776676177978516, "learning_rate": 7.315438661671648e-06, "loss": 0.0536, "step": 18267 }, { "epoch": 59.895081967213116, "grad_norm": 2.7365052700042725, "learning_rate": 7.314415769664254e-06, "loss": 0.3192, "step": 18268 }, { "epoch": 59.898360655737704, "grad_norm": 3.326380729675293, "learning_rate": 7.31339290793961e-06, "loss": 0.2725, "step": 18269 }, { "epoch": 59.90163934426229, "grad_norm": 3.223243474960327, "learning_rate": 7.31237007650926e-06, "loss": 0.2127, "step": 18270 }, { "epoch": 59.90491803278689, "grad_norm": 3.105206251144409, "learning_rate": 7.311347275384731e-06, "loss": 0.1108, "step": 18271 }, { "epoch": 59.90819672131148, "grad_norm": 3.209942579269409, "learning_rate": 7.310324504577557e-06, "loss": 0.2751, "step": 18272 }, { "epoch": 59.911475409836065, "grad_norm": 5.043338775634766, "learning_rate": 7.309301764099267e-06, "loss": 0.2486, "step": 18273 }, { "epoch": 59.91475409836065, "grad_norm": 3.7384603023529053, "learning_rate": 7.308279053961401e-06, "loss": 0.1397, "step": 18274 }, { "epoch": 59.91803278688525, "grad_norm": 3.360123872756958, "learning_rate": 7.307256374175489e-06, "loss": 0.2213, "step": 18275 }, { "epoch": 59.92131147540984, "grad_norm": 3.092648983001709, "learning_rate": 7.306233724753059e-06, "loss": 0.2113, "step": 18276 }, { "epoch": 59.924590163934425, "grad_norm": 3.9510316848754883, "learning_rate": 7.305211105705641e-06, "loss": 0.3672, "step": 18277 }, { "epoch": 59.927868852459014, "grad_norm": 3.169689178466797, "learning_rate": 7.304188517044774e-06, "loss": 0.1586, "step": 18278 }, { "epoch": 59.93114754098361, "grad_norm": 3.283869981765747, "learning_rate": 7.3031659587819836e-06, "loss": 0.2378, "step": 18279 }, { "epoch": 59.9344262295082, "grad_norm": 3.689448595046997, "learning_rate": 7.302143430928803e-06, "loss": 0.2739, "step": 18280 }, { "epoch": 59.937704918032786, "grad_norm": 3.9949002265930176, "learning_rate": 7.301120933496757e-06, "loss": 0.2243, "step": 18281 }, { "epoch": 59.940983606557374, "grad_norm": 3.6857762336730957, "learning_rate": 7.3000984664973785e-06, "loss": 0.3154, "step": 18282 }, { "epoch": 59.94426229508197, "grad_norm": 2.995488166809082, "learning_rate": 7.299076029942198e-06, "loss": 0.2689, "step": 18283 }, { "epoch": 59.94754098360656, "grad_norm": 5.678066730499268, "learning_rate": 7.298053623842745e-06, "loss": 0.1912, "step": 18284 }, { "epoch": 59.950819672131146, "grad_norm": 3.060171127319336, "learning_rate": 7.297031248210544e-06, "loss": 0.4098, "step": 18285 }, { "epoch": 59.954098360655735, "grad_norm": 3.169612407684326, "learning_rate": 7.296008903057127e-06, "loss": 0.2443, "step": 18286 }, { "epoch": 59.95737704918033, "grad_norm": 3.2492809295654297, "learning_rate": 7.294986588394022e-06, "loss": 0.322, "step": 18287 }, { "epoch": 59.96065573770492, "grad_norm": 3.3229362964630127, "learning_rate": 7.2939643042327546e-06, "loss": 0.2308, "step": 18288 }, { "epoch": 59.96393442622951, "grad_norm": 2.6019628047943115, "learning_rate": 7.292942050584855e-06, "loss": 0.1373, "step": 18289 }, { "epoch": 59.967213114754095, "grad_norm": 2.851223945617676, "learning_rate": 7.2919198274618486e-06, "loss": 0.1947, "step": 18290 }, { "epoch": 59.97049180327869, "grad_norm": 3.5981216430664062, "learning_rate": 7.2908976348752565e-06, "loss": 0.2921, "step": 18291 }, { "epoch": 59.97377049180328, "grad_norm": 2.7729098796844482, "learning_rate": 7.289875472836616e-06, "loss": 0.1572, "step": 18292 }, { "epoch": 59.97704918032787, "grad_norm": 2.92503023147583, "learning_rate": 7.288853341357447e-06, "loss": 0.2154, "step": 18293 }, { "epoch": 59.980327868852456, "grad_norm": 3.03645920753479, "learning_rate": 7.2878312404492745e-06, "loss": 0.1912, "step": 18294 }, { "epoch": 59.98360655737705, "grad_norm": 3.007051706314087, "learning_rate": 7.286809170123621e-06, "loss": 0.2173, "step": 18295 }, { "epoch": 59.98688524590164, "grad_norm": 2.9426774978637695, "learning_rate": 7.28578713039202e-06, "loss": 0.1883, "step": 18296 }, { "epoch": 59.99016393442623, "grad_norm": 3.8227484226226807, "learning_rate": 7.2847651212659905e-06, "loss": 0.2721, "step": 18297 }, { "epoch": 59.993442622950816, "grad_norm": 5.440494537353516, "learning_rate": 7.283743142757058e-06, "loss": 0.1809, "step": 18298 }, { "epoch": 59.99672131147541, "grad_norm": 4.010074138641357, "learning_rate": 7.28272119487674e-06, "loss": 0.2965, "step": 18299 }, { "epoch": 60.0, "grad_norm": 3.07370662689209, "learning_rate": 7.2816992776365714e-06, "loss": 0.2813, "step": 18300 }, { "epoch": 60.00327868852459, "grad_norm": 3.411325216293335, "learning_rate": 7.28067739104807e-06, "loss": 0.1998, "step": 18301 }, { "epoch": 60.006557377049184, "grad_norm": 2.738551616668701, "learning_rate": 7.279655535122758e-06, "loss": 0.1645, "step": 18302 }, { "epoch": 60.00983606557377, "grad_norm": 3.103262186050415, "learning_rate": 7.278633709872158e-06, "loss": 0.1801, "step": 18303 }, { "epoch": 60.01311475409836, "grad_norm": 2.98704195022583, "learning_rate": 7.2776119153077895e-06, "loss": 0.1227, "step": 18304 }, { "epoch": 60.01639344262295, "grad_norm": 4.117308616638184, "learning_rate": 7.27659015144118e-06, "loss": 0.1494, "step": 18305 }, { "epoch": 60.019672131147544, "grad_norm": 2.0615341663360596, "learning_rate": 7.275568418283848e-06, "loss": 0.1137, "step": 18306 }, { "epoch": 60.02295081967213, "grad_norm": 3.3153061866760254, "learning_rate": 7.274546715847315e-06, "loss": 0.3128, "step": 18307 }, { "epoch": 60.02622950819672, "grad_norm": 2.849461555480957, "learning_rate": 7.273525044143098e-06, "loss": 0.1289, "step": 18308 }, { "epoch": 60.02950819672131, "grad_norm": 2.7227795124053955, "learning_rate": 7.2725034031827245e-06, "loss": 0.1188, "step": 18309 }, { "epoch": 60.032786885245905, "grad_norm": 3.3500945568084717, "learning_rate": 7.271481792977713e-06, "loss": 0.1999, "step": 18310 }, { "epoch": 60.03606557377049, "grad_norm": 3.289243459701538, "learning_rate": 7.270460213539577e-06, "loss": 0.1795, "step": 18311 }, { "epoch": 60.03934426229508, "grad_norm": 3.4248619079589844, "learning_rate": 7.26943866487984e-06, "loss": 0.2471, "step": 18312 }, { "epoch": 60.04262295081967, "grad_norm": 3.187037229537964, "learning_rate": 7.268417147010024e-06, "loss": 0.1855, "step": 18313 }, { "epoch": 60.045901639344265, "grad_norm": 2.905874013900757, "learning_rate": 7.267395659941643e-06, "loss": 0.199, "step": 18314 }, { "epoch": 60.049180327868854, "grad_norm": 2.960096836090088, "learning_rate": 7.266374203686217e-06, "loss": 0.1248, "step": 18315 }, { "epoch": 60.05245901639344, "grad_norm": 2.6972897052764893, "learning_rate": 7.2653527782552665e-06, "loss": 0.2111, "step": 18316 }, { "epoch": 60.05573770491803, "grad_norm": 3.5142641067504883, "learning_rate": 7.264331383660302e-06, "loss": 0.2749, "step": 18317 }, { "epoch": 60.059016393442626, "grad_norm": 2.684629201889038, "learning_rate": 7.26331001991285e-06, "loss": 0.2222, "step": 18318 }, { "epoch": 60.062295081967214, "grad_norm": 3.118605852127075, "learning_rate": 7.2622886870244226e-06, "loss": 0.3249, "step": 18319 }, { "epoch": 60.0655737704918, "grad_norm": 2.6485483646392822, "learning_rate": 7.2612673850065366e-06, "loss": 0.1797, "step": 18320 }, { "epoch": 60.06885245901639, "grad_norm": 2.9011096954345703, "learning_rate": 7.2602461138707035e-06, "loss": 0.2384, "step": 18321 }, { "epoch": 60.072131147540986, "grad_norm": 3.3646833896636963, "learning_rate": 7.25922487362845e-06, "loss": 0.1293, "step": 18322 }, { "epoch": 60.075409836065575, "grad_norm": 2.8601434230804443, "learning_rate": 7.258203664291285e-06, "loss": 0.1404, "step": 18323 }, { "epoch": 60.07868852459016, "grad_norm": 2.3732638359069824, "learning_rate": 7.257182485870724e-06, "loss": 0.1029, "step": 18324 }, { "epoch": 60.08196721311475, "grad_norm": 2.1479618549346924, "learning_rate": 7.256161338378278e-06, "loss": 0.0951, "step": 18325 }, { "epoch": 60.08524590163935, "grad_norm": 3.1569526195526123, "learning_rate": 7.255140221825472e-06, "loss": 0.1384, "step": 18326 }, { "epoch": 60.088524590163935, "grad_norm": 2.696180820465088, "learning_rate": 7.254119136223812e-06, "loss": 0.0978, "step": 18327 }, { "epoch": 60.09180327868852, "grad_norm": 2.6807825565338135, "learning_rate": 7.253098081584813e-06, "loss": 0.1632, "step": 18328 }, { "epoch": 60.09508196721311, "grad_norm": 2.4727485179901123, "learning_rate": 7.252077057919991e-06, "loss": 0.2087, "step": 18329 }, { "epoch": 60.09836065573771, "grad_norm": 3.070220470428467, "learning_rate": 7.251056065240852e-06, "loss": 0.1938, "step": 18330 }, { "epoch": 60.101639344262296, "grad_norm": 2.760315179824829, "learning_rate": 7.250035103558919e-06, "loss": 0.1784, "step": 18331 }, { "epoch": 60.104918032786884, "grad_norm": 2.318352699279785, "learning_rate": 7.249014172885699e-06, "loss": 0.0951, "step": 18332 }, { "epoch": 60.10819672131147, "grad_norm": 3.23425555229187, "learning_rate": 7.247993273232705e-06, "loss": 0.1247, "step": 18333 }, { "epoch": 60.11147540983607, "grad_norm": 2.519104480743408, "learning_rate": 7.246972404611443e-06, "loss": 0.1729, "step": 18334 }, { "epoch": 60.114754098360656, "grad_norm": 2.8384203910827637, "learning_rate": 7.245951567033435e-06, "loss": 0.2775, "step": 18335 }, { "epoch": 60.118032786885244, "grad_norm": 5.5962748527526855, "learning_rate": 7.2449307605101845e-06, "loss": 0.1432, "step": 18336 }, { "epoch": 60.12131147540983, "grad_norm": 2.9907822608947754, "learning_rate": 7.243909985053205e-06, "loss": 0.2033, "step": 18337 }, { "epoch": 60.12459016393443, "grad_norm": 4.4422688484191895, "learning_rate": 7.242889240674005e-06, "loss": 0.1267, "step": 18338 }, { "epoch": 60.12786885245902, "grad_norm": 2.3013947010040283, "learning_rate": 7.241868527384096e-06, "loss": 0.0927, "step": 18339 }, { "epoch": 60.131147540983605, "grad_norm": 2.813824415206909, "learning_rate": 7.240847845194987e-06, "loss": 0.21, "step": 18340 }, { "epoch": 60.13442622950819, "grad_norm": 3.423313856124878, "learning_rate": 7.239827194118187e-06, "loss": 0.397, "step": 18341 }, { "epoch": 60.13770491803279, "grad_norm": 2.564894676208496, "learning_rate": 7.238806574165208e-06, "loss": 0.1753, "step": 18342 }, { "epoch": 60.14098360655738, "grad_norm": 2.7108230590820312, "learning_rate": 7.237785985347549e-06, "loss": 0.266, "step": 18343 }, { "epoch": 60.144262295081965, "grad_norm": 3.8551838397979736, "learning_rate": 7.236765427676732e-06, "loss": 0.1074, "step": 18344 }, { "epoch": 60.14754098360656, "grad_norm": 2.5757648944854736, "learning_rate": 7.235744901164257e-06, "loss": 0.2237, "step": 18345 }, { "epoch": 60.15081967213115, "grad_norm": 3.2620227336883545, "learning_rate": 7.234724405821631e-06, "loss": 0.27, "step": 18346 }, { "epoch": 60.15409836065574, "grad_norm": 15.629199028015137, "learning_rate": 7.233703941660359e-06, "loss": 0.1981, "step": 18347 }, { "epoch": 60.157377049180326, "grad_norm": 3.2870235443115234, "learning_rate": 7.232683508691956e-06, "loss": 0.2603, "step": 18348 }, { "epoch": 60.16065573770492, "grad_norm": 3.1508641242980957, "learning_rate": 7.231663106927924e-06, "loss": 0.1953, "step": 18349 }, { "epoch": 60.16393442622951, "grad_norm": 2.4740777015686035, "learning_rate": 7.230642736379767e-06, "loss": 0.1029, "step": 18350 }, { "epoch": 60.1672131147541, "grad_norm": 2.823713541030884, "learning_rate": 7.229622397058991e-06, "loss": 0.1083, "step": 18351 }, { "epoch": 60.170491803278686, "grad_norm": 2.5708601474761963, "learning_rate": 7.2286020889771055e-06, "loss": 0.3572, "step": 18352 }, { "epoch": 60.17377049180328, "grad_norm": 2.7507100105285645, "learning_rate": 7.227581812145612e-06, "loss": 0.1122, "step": 18353 }, { "epoch": 60.17704918032787, "grad_norm": 3.0606350898742676, "learning_rate": 7.226561566576018e-06, "loss": 0.2861, "step": 18354 }, { "epoch": 60.18032786885246, "grad_norm": 2.563204288482666, "learning_rate": 7.2255413522798256e-06, "loss": 0.4065, "step": 18355 }, { "epoch": 60.18360655737705, "grad_norm": 3.459210157394409, "learning_rate": 7.224521169268535e-06, "loss": 0.1408, "step": 18356 }, { "epoch": 60.18688524590164, "grad_norm": 3.4595730304718018, "learning_rate": 7.223501017553658e-06, "loss": 0.151, "step": 18357 }, { "epoch": 60.19016393442623, "grad_norm": 3.4959897994995117, "learning_rate": 7.222480897146693e-06, "loss": 0.0987, "step": 18358 }, { "epoch": 60.19344262295082, "grad_norm": 2.8736603260040283, "learning_rate": 7.2214608080591444e-06, "loss": 0.2458, "step": 18359 }, { "epoch": 60.19672131147541, "grad_norm": 2.7755260467529297, "learning_rate": 7.220440750302511e-06, "loss": 0.2334, "step": 18360 }, { "epoch": 60.2, "grad_norm": 2.668987274169922, "learning_rate": 7.219420723888301e-06, "loss": 0.085, "step": 18361 }, { "epoch": 60.20327868852459, "grad_norm": 2.764632225036621, "learning_rate": 7.218400728828013e-06, "loss": 0.153, "step": 18362 }, { "epoch": 60.20655737704918, "grad_norm": 3.524822235107422, "learning_rate": 7.217380765133149e-06, "loss": 0.394, "step": 18363 }, { "epoch": 60.20983606557377, "grad_norm": 3.5042734146118164, "learning_rate": 7.216360832815208e-06, "loss": 0.1862, "step": 18364 }, { "epoch": 60.21311475409836, "grad_norm": 3.4598848819732666, "learning_rate": 7.215340931885695e-06, "loss": 0.1745, "step": 18365 }, { "epoch": 60.21639344262295, "grad_norm": 3.506882429122925, "learning_rate": 7.214321062356109e-06, "loss": 0.2899, "step": 18366 }, { "epoch": 60.21967213114754, "grad_norm": 3.6227753162384033, "learning_rate": 7.213301224237947e-06, "loss": 0.1293, "step": 18367 }, { "epoch": 60.22295081967213, "grad_norm": 3.3871045112609863, "learning_rate": 7.2122814175427145e-06, "loss": 0.2579, "step": 18368 }, { "epoch": 60.226229508196724, "grad_norm": 2.933152914047241, "learning_rate": 7.211261642281904e-06, "loss": 0.1928, "step": 18369 }, { "epoch": 60.22950819672131, "grad_norm": 3.1729087829589844, "learning_rate": 7.2102418984670186e-06, "loss": 0.2105, "step": 18370 }, { "epoch": 60.2327868852459, "grad_norm": 3.449392318725586, "learning_rate": 7.209222186109559e-06, "loss": 0.2181, "step": 18371 }, { "epoch": 60.23606557377049, "grad_norm": 3.4637320041656494, "learning_rate": 7.2082025052210205e-06, "loss": 0.1659, "step": 18372 }, { "epoch": 60.239344262295084, "grad_norm": 2.6036648750305176, "learning_rate": 7.207182855812896e-06, "loss": 0.0749, "step": 18373 }, { "epoch": 60.24262295081967, "grad_norm": 8.698868751525879, "learning_rate": 7.206163237896695e-06, "loss": 0.1855, "step": 18374 }, { "epoch": 60.24590163934426, "grad_norm": 2.7206778526306152, "learning_rate": 7.2051436514839064e-06, "loss": 0.1158, "step": 18375 }, { "epoch": 60.24918032786885, "grad_norm": 3.026495933532715, "learning_rate": 7.20412409658603e-06, "loss": 0.2341, "step": 18376 }, { "epoch": 60.252459016393445, "grad_norm": 3.082598924636841, "learning_rate": 7.203104573214557e-06, "loss": 0.1308, "step": 18377 }, { "epoch": 60.25573770491803, "grad_norm": 3.1533875465393066, "learning_rate": 7.2020850813809925e-06, "loss": 0.2793, "step": 18378 }, { "epoch": 60.25901639344262, "grad_norm": 2.4897687435150146, "learning_rate": 7.201065621096828e-06, "loss": 0.212, "step": 18379 }, { "epoch": 60.26229508196721, "grad_norm": 3.4102039337158203, "learning_rate": 7.200046192373558e-06, "loss": 0.178, "step": 18380 }, { "epoch": 60.265573770491805, "grad_norm": 3.0847463607788086, "learning_rate": 7.199026795222679e-06, "loss": 0.1505, "step": 18381 }, { "epoch": 60.268852459016394, "grad_norm": 3.3029587268829346, "learning_rate": 7.19800742965568e-06, "loss": 0.2997, "step": 18382 }, { "epoch": 60.27213114754098, "grad_norm": 3.304630994796753, "learning_rate": 7.196988095684067e-06, "loss": 0.3269, "step": 18383 }, { "epoch": 60.27540983606557, "grad_norm": 3.656507730484009, "learning_rate": 7.1959687933193255e-06, "loss": 0.2922, "step": 18384 }, { "epoch": 60.278688524590166, "grad_norm": 2.5463504791259766, "learning_rate": 7.194949522572952e-06, "loss": 0.1016, "step": 18385 }, { "epoch": 60.281967213114754, "grad_norm": 2.7547974586486816, "learning_rate": 7.193930283456435e-06, "loss": 0.224, "step": 18386 }, { "epoch": 60.28524590163934, "grad_norm": 2.5245683193206787, "learning_rate": 7.192911075981276e-06, "loss": 0.1528, "step": 18387 }, { "epoch": 60.28852459016394, "grad_norm": 3.1745681762695312, "learning_rate": 7.191891900158963e-06, "loss": 0.0884, "step": 18388 }, { "epoch": 60.291803278688526, "grad_norm": 2.4156739711761475, "learning_rate": 7.190872756000988e-06, "loss": 0.1489, "step": 18389 }, { "epoch": 60.295081967213115, "grad_norm": 2.6121175289154053, "learning_rate": 7.189853643518841e-06, "loss": 0.1112, "step": 18390 }, { "epoch": 60.2983606557377, "grad_norm": 2.6268317699432373, "learning_rate": 7.188834562724019e-06, "loss": 0.1362, "step": 18391 }, { "epoch": 60.3016393442623, "grad_norm": 3.124661445617676, "learning_rate": 7.18781551362801e-06, "loss": 0.3644, "step": 18392 }, { "epoch": 60.30491803278689, "grad_norm": 3.1193692684173584, "learning_rate": 7.1867964962423035e-06, "loss": 0.1124, "step": 18393 }, { "epoch": 60.308196721311475, "grad_norm": 2.9557931423187256, "learning_rate": 7.185777510578392e-06, "loss": 0.2806, "step": 18394 }, { "epoch": 60.31147540983606, "grad_norm": 3.234722852706909, "learning_rate": 7.184758556647764e-06, "loss": 0.156, "step": 18395 }, { "epoch": 60.31475409836066, "grad_norm": 3.244422197341919, "learning_rate": 7.1837396344619116e-06, "loss": 0.2881, "step": 18396 }, { "epoch": 60.31803278688525, "grad_norm": 3.446878671646118, "learning_rate": 7.182720744032323e-06, "loss": 0.3718, "step": 18397 }, { "epoch": 60.321311475409836, "grad_norm": 3.062668561935425, "learning_rate": 7.181701885370489e-06, "loss": 0.2033, "step": 18398 }, { "epoch": 60.324590163934424, "grad_norm": 3.7329916954040527, "learning_rate": 7.180683058487892e-06, "loss": 0.2102, "step": 18399 }, { "epoch": 60.32786885245902, "grad_norm": 2.703434467315674, "learning_rate": 7.179664263396029e-06, "loss": 0.1373, "step": 18400 }, { "epoch": 60.33114754098361, "grad_norm": 4.752232074737549, "learning_rate": 7.178645500106383e-06, "loss": 0.2607, "step": 18401 }, { "epoch": 60.334426229508196, "grad_norm": 2.757582187652588, "learning_rate": 7.1776267686304435e-06, "loss": 0.1995, "step": 18402 }, { "epoch": 60.337704918032784, "grad_norm": 3.8718276023864746, "learning_rate": 7.176608068979691e-06, "loss": 0.3453, "step": 18403 }, { "epoch": 60.34098360655738, "grad_norm": 3.7697055339813232, "learning_rate": 7.175589401165623e-06, "loss": 0.384, "step": 18404 }, { "epoch": 60.34426229508197, "grad_norm": 2.961808443069458, "learning_rate": 7.174570765199722e-06, "loss": 0.1994, "step": 18405 }, { "epoch": 60.34754098360656, "grad_norm": 3.2334675788879395, "learning_rate": 7.173552161093473e-06, "loss": 0.1901, "step": 18406 }, { "epoch": 60.350819672131145, "grad_norm": 3.1829843521118164, "learning_rate": 7.1725335888583635e-06, "loss": 0.1694, "step": 18407 }, { "epoch": 60.35409836065574, "grad_norm": 3.4066922664642334, "learning_rate": 7.171515048505871e-06, "loss": 0.1526, "step": 18408 }, { "epoch": 60.35737704918033, "grad_norm": 3.757276773452759, "learning_rate": 7.170496540047492e-06, "loss": 0.3329, "step": 18409 }, { "epoch": 60.36065573770492, "grad_norm": 2.8194057941436768, "learning_rate": 7.169478063494708e-06, "loss": 0.1598, "step": 18410 }, { "epoch": 60.363934426229505, "grad_norm": 2.3331246376037598, "learning_rate": 7.1684596188590006e-06, "loss": 0.075, "step": 18411 }, { "epoch": 60.3672131147541, "grad_norm": 2.4397168159484863, "learning_rate": 7.167441206151851e-06, "loss": 0.2205, "step": 18412 }, { "epoch": 60.37049180327869, "grad_norm": 3.2902374267578125, "learning_rate": 7.166422825384751e-06, "loss": 0.1983, "step": 18413 }, { "epoch": 60.37377049180328, "grad_norm": 2.11983060836792, "learning_rate": 7.16540447656918e-06, "loss": 0.2192, "step": 18414 }, { "epoch": 60.377049180327866, "grad_norm": 3.973558187484741, "learning_rate": 7.164386159716621e-06, "loss": 0.3708, "step": 18415 }, { "epoch": 60.38032786885246, "grad_norm": 2.8694612979888916, "learning_rate": 7.163367874838552e-06, "loss": 0.1711, "step": 18416 }, { "epoch": 60.38360655737705, "grad_norm": 81.62614440917969, "learning_rate": 7.162349621946463e-06, "loss": 0.1041, "step": 18417 }, { "epoch": 60.38688524590164, "grad_norm": 2.695542812347412, "learning_rate": 7.161331401051832e-06, "loss": 0.1572, "step": 18418 }, { "epoch": 60.390163934426226, "grad_norm": 2.844071388244629, "learning_rate": 7.16031321216614e-06, "loss": 0.1381, "step": 18419 }, { "epoch": 60.39344262295082, "grad_norm": 3.383007049560547, "learning_rate": 7.159295055300871e-06, "loss": 0.2284, "step": 18420 }, { "epoch": 60.39672131147541, "grad_norm": 2.8206984996795654, "learning_rate": 7.1582769304675026e-06, "loss": 0.1527, "step": 18421 }, { "epoch": 60.4, "grad_norm": 4.0821990966796875, "learning_rate": 7.157258837677514e-06, "loss": 0.1056, "step": 18422 }, { "epoch": 60.40327868852459, "grad_norm": 3.5543336868286133, "learning_rate": 7.156240776942393e-06, "loss": 0.2948, "step": 18423 }, { "epoch": 60.40655737704918, "grad_norm": 9.977886199951172, "learning_rate": 7.155222748273612e-06, "loss": 0.1616, "step": 18424 }, { "epoch": 60.40983606557377, "grad_norm": 3.2003254890441895, "learning_rate": 7.1542047516826484e-06, "loss": 0.2105, "step": 18425 }, { "epoch": 60.41311475409836, "grad_norm": 5.366950511932373, "learning_rate": 7.15318678718099e-06, "loss": 0.2737, "step": 18426 }, { "epoch": 60.41639344262295, "grad_norm": 4.699554443359375, "learning_rate": 7.1521688547801095e-06, "loss": 0.2541, "step": 18427 }, { "epoch": 60.41967213114754, "grad_norm": 3.217013359069824, "learning_rate": 7.151150954491486e-06, "loss": 0.1491, "step": 18428 }, { "epoch": 60.42295081967213, "grad_norm": 3.8408007621765137, "learning_rate": 7.150133086326594e-06, "loss": 0.2208, "step": 18429 }, { "epoch": 60.42622950819672, "grad_norm": 4.446542263031006, "learning_rate": 7.14911525029692e-06, "loss": 0.2812, "step": 18430 }, { "epoch": 60.429508196721315, "grad_norm": 6.00831937789917, "learning_rate": 7.1480974464139345e-06, "loss": 0.1825, "step": 18431 }, { "epoch": 60.4327868852459, "grad_norm": 4.24591064453125, "learning_rate": 7.147079674689115e-06, "loss": 0.2379, "step": 18432 }, { "epoch": 60.43606557377049, "grad_norm": 3.590291738510132, "learning_rate": 7.1460619351339385e-06, "loss": 0.2016, "step": 18433 }, { "epoch": 60.43934426229508, "grad_norm": 3.3039493560791016, "learning_rate": 7.145044227759879e-06, "loss": 0.2438, "step": 18434 }, { "epoch": 60.442622950819676, "grad_norm": 3.574857473373413, "learning_rate": 7.144026552578416e-06, "loss": 0.1424, "step": 18435 }, { "epoch": 60.445901639344264, "grad_norm": 4.008566379547119, "learning_rate": 7.143008909601023e-06, "loss": 0.1878, "step": 18436 }, { "epoch": 60.44918032786885, "grad_norm": 3.716242790222168, "learning_rate": 7.141991298839177e-06, "loss": 0.1578, "step": 18437 }, { "epoch": 60.45245901639344, "grad_norm": 3.088059186935425, "learning_rate": 7.140973720304345e-06, "loss": 0.1701, "step": 18438 }, { "epoch": 60.455737704918036, "grad_norm": 3.6006522178649902, "learning_rate": 7.139956174008011e-06, "loss": 0.2961, "step": 18439 }, { "epoch": 60.459016393442624, "grad_norm": 3.9141252040863037, "learning_rate": 7.138938659961645e-06, "loss": 0.3064, "step": 18440 }, { "epoch": 60.46229508196721, "grad_norm": 3.1359333992004395, "learning_rate": 7.137921178176721e-06, "loss": 0.2967, "step": 18441 }, { "epoch": 60.4655737704918, "grad_norm": 3.128274917602539, "learning_rate": 7.1369037286647085e-06, "loss": 0.1357, "step": 18442 }, { "epoch": 60.4688524590164, "grad_norm": 3.6467506885528564, "learning_rate": 7.135886311437086e-06, "loss": 0.162, "step": 18443 }, { "epoch": 60.472131147540985, "grad_norm": 4.072432041168213, "learning_rate": 7.134868926505323e-06, "loss": 0.2153, "step": 18444 }, { "epoch": 60.47540983606557, "grad_norm": 2.7802181243896484, "learning_rate": 7.13385157388089e-06, "loss": 0.1408, "step": 18445 }, { "epoch": 60.47868852459016, "grad_norm": 3.8280208110809326, "learning_rate": 7.132834253575263e-06, "loss": 0.1964, "step": 18446 }, { "epoch": 60.48196721311476, "grad_norm": 3.082106113433838, "learning_rate": 7.131816965599908e-06, "loss": 0.2192, "step": 18447 }, { "epoch": 60.485245901639345, "grad_norm": 3.528779983520508, "learning_rate": 7.130799709966302e-06, "loss": 0.2736, "step": 18448 }, { "epoch": 60.488524590163934, "grad_norm": 4.220820426940918, "learning_rate": 7.12978248668591e-06, "loss": 0.3487, "step": 18449 }, { "epoch": 60.49180327868852, "grad_norm": 2.7604291439056396, "learning_rate": 7.128765295770206e-06, "loss": 0.1318, "step": 18450 }, { "epoch": 60.49508196721312, "grad_norm": 3.0746121406555176, "learning_rate": 7.127748137230658e-06, "loss": 0.1791, "step": 18451 }, { "epoch": 60.498360655737706, "grad_norm": 3.570173978805542, "learning_rate": 7.126731011078736e-06, "loss": 0.2999, "step": 18452 }, { "epoch": 60.501639344262294, "grad_norm": 2.451707363128662, "learning_rate": 7.125713917325909e-06, "loss": 0.1014, "step": 18453 }, { "epoch": 60.50491803278688, "grad_norm": 4.413168430328369, "learning_rate": 7.124696855983648e-06, "loss": 0.2604, "step": 18454 }, { "epoch": 60.50819672131148, "grad_norm": 3.493246555328369, "learning_rate": 7.123679827063416e-06, "loss": 0.2743, "step": 18455 }, { "epoch": 60.511475409836066, "grad_norm": 3.350736618041992, "learning_rate": 7.122662830576688e-06, "loss": 0.214, "step": 18456 }, { "epoch": 60.514754098360655, "grad_norm": 3.494886875152588, "learning_rate": 7.121645866534928e-06, "loss": 0.0848, "step": 18457 }, { "epoch": 60.51803278688524, "grad_norm": 3.3484113216400146, "learning_rate": 7.120628934949605e-06, "loss": 0.2471, "step": 18458 }, { "epoch": 60.52131147540984, "grad_norm": 4.020778656005859, "learning_rate": 7.119612035832183e-06, "loss": 0.1311, "step": 18459 }, { "epoch": 60.52459016393443, "grad_norm": 3.980074644088745, "learning_rate": 7.118595169194127e-06, "loss": 0.1648, "step": 18460 }, { "epoch": 60.527868852459015, "grad_norm": 3.152947425842285, "learning_rate": 7.11757833504691e-06, "loss": 0.0777, "step": 18461 }, { "epoch": 60.5311475409836, "grad_norm": 2.830859899520874, "learning_rate": 7.116561533401993e-06, "loss": 0.0992, "step": 18462 }, { "epoch": 60.5344262295082, "grad_norm": 2.8534491062164307, "learning_rate": 7.115544764270846e-06, "loss": 0.1753, "step": 18463 }, { "epoch": 60.53770491803279, "grad_norm": 3.327526569366455, "learning_rate": 7.114528027664923e-06, "loss": 0.2011, "step": 18464 }, { "epoch": 60.540983606557376, "grad_norm": 3.6827144622802734, "learning_rate": 7.113511323595703e-06, "loss": 0.3121, "step": 18465 }, { "epoch": 60.544262295081964, "grad_norm": 2.9782917499542236, "learning_rate": 7.112494652074643e-06, "loss": 0.1911, "step": 18466 }, { "epoch": 60.54754098360656, "grad_norm": 11.586743354797363, "learning_rate": 7.111478013113209e-06, "loss": 0.2793, "step": 18467 }, { "epoch": 60.55081967213115, "grad_norm": 3.4060497283935547, "learning_rate": 7.1104614067228595e-06, "loss": 0.1883, "step": 18468 }, { "epoch": 60.554098360655736, "grad_norm": 3.4090576171875, "learning_rate": 7.109444832915064e-06, "loss": 0.3237, "step": 18469 }, { "epoch": 60.557377049180324, "grad_norm": 2.7728066444396973, "learning_rate": 7.1084282917012855e-06, "loss": 0.1288, "step": 18470 }, { "epoch": 60.56065573770492, "grad_norm": 3.0651631355285645, "learning_rate": 7.1074117830929856e-06, "loss": 0.1093, "step": 18471 }, { "epoch": 60.56393442622951, "grad_norm": 2.865938186645508, "learning_rate": 7.106395307101621e-06, "loss": 0.1802, "step": 18472 }, { "epoch": 60.5672131147541, "grad_norm": 3.213452100753784, "learning_rate": 7.105378863738659e-06, "loss": 0.2633, "step": 18473 }, { "epoch": 60.570491803278685, "grad_norm": 4.044861793518066, "learning_rate": 7.104362453015561e-06, "loss": 0.2656, "step": 18474 }, { "epoch": 60.57377049180328, "grad_norm": 3.5976040363311768, "learning_rate": 7.1033460749437864e-06, "loss": 0.1616, "step": 18475 }, { "epoch": 60.57704918032787, "grad_norm": 4.218357086181641, "learning_rate": 7.102329729534798e-06, "loss": 0.4125, "step": 18476 }, { "epoch": 60.58032786885246, "grad_norm": 3.692416191101074, "learning_rate": 7.101313416800053e-06, "loss": 0.1587, "step": 18477 }, { "epoch": 60.58360655737705, "grad_norm": 3.0779738426208496, "learning_rate": 7.1002971367510135e-06, "loss": 0.2268, "step": 18478 }, { "epoch": 60.58688524590164, "grad_norm": 7.485226154327393, "learning_rate": 7.099280889399138e-06, "loss": 0.1371, "step": 18479 }, { "epoch": 60.59016393442623, "grad_norm": 3.6302011013031006, "learning_rate": 7.09826467475589e-06, "loss": 0.2003, "step": 18480 }, { "epoch": 60.59344262295082, "grad_norm": 2.833481550216675, "learning_rate": 7.097248492832719e-06, "loss": 0.1345, "step": 18481 }, { "epoch": 60.59672131147541, "grad_norm": 2.3475301265716553, "learning_rate": 7.096232343641094e-06, "loss": 0.1214, "step": 18482 }, { "epoch": 60.6, "grad_norm": 3.183192729949951, "learning_rate": 7.095216227192467e-06, "loss": 0.2499, "step": 18483 }, { "epoch": 60.60327868852459, "grad_norm": 3.030555009841919, "learning_rate": 7.0942001434983e-06, "loss": 0.1306, "step": 18484 }, { "epoch": 60.60655737704918, "grad_norm": 2.9397196769714355, "learning_rate": 7.093184092570044e-06, "loss": 0.2076, "step": 18485 }, { "epoch": 60.609836065573774, "grad_norm": 3.516840934753418, "learning_rate": 7.092168074419159e-06, "loss": 0.2922, "step": 18486 }, { "epoch": 60.61311475409836, "grad_norm": 4.765232086181641, "learning_rate": 7.091152089057105e-06, "loss": 0.2927, "step": 18487 }, { "epoch": 60.61639344262295, "grad_norm": 4.60148811340332, "learning_rate": 7.0901361364953354e-06, "loss": 0.1179, "step": 18488 }, { "epoch": 60.61967213114754, "grad_norm": 3.349843740463257, "learning_rate": 7.089120216745307e-06, "loss": 0.3228, "step": 18489 }, { "epoch": 60.622950819672134, "grad_norm": 3.2471694946289062, "learning_rate": 7.08810432981847e-06, "loss": 0.176, "step": 18490 }, { "epoch": 60.62622950819672, "grad_norm": 3.2597696781158447, "learning_rate": 7.087088475726289e-06, "loss": 0.1705, "step": 18491 }, { "epoch": 60.62950819672131, "grad_norm": 2.7587029933929443, "learning_rate": 7.086072654480214e-06, "loss": 0.136, "step": 18492 }, { "epoch": 60.6327868852459, "grad_norm": 3.491217851638794, "learning_rate": 7.0850568660917e-06, "loss": 0.3123, "step": 18493 }, { "epoch": 60.636065573770495, "grad_norm": 3.1066770553588867, "learning_rate": 7.084041110572195e-06, "loss": 0.3145, "step": 18494 }, { "epoch": 60.63934426229508, "grad_norm": 3.813183069229126, "learning_rate": 7.083025387933165e-06, "loss": 0.1539, "step": 18495 }, { "epoch": 60.64262295081967, "grad_norm": 3.5181519985198975, "learning_rate": 7.0820096981860545e-06, "loss": 0.1415, "step": 18496 }, { "epoch": 60.64590163934426, "grad_norm": 3.4575653076171875, "learning_rate": 7.0809940413423186e-06, "loss": 0.1547, "step": 18497 }, { "epoch": 60.649180327868855, "grad_norm": 2.4493765830993652, "learning_rate": 7.079978417413409e-06, "loss": 0.2149, "step": 18498 }, { "epoch": 60.65245901639344, "grad_norm": 3.136056900024414, "learning_rate": 7.078962826410778e-06, "loss": 0.1777, "step": 18499 }, { "epoch": 60.65573770491803, "grad_norm": 3.118624687194824, "learning_rate": 7.0779472683458795e-06, "loss": 0.1352, "step": 18500 }, { "epoch": 60.65901639344262, "grad_norm": 3.688203811645508, "learning_rate": 7.076931743230162e-06, "loss": 0.2356, "step": 18501 }, { "epoch": 60.662295081967216, "grad_norm": 3.043349266052246, "learning_rate": 7.075916251075081e-06, "loss": 0.2293, "step": 18502 }, { "epoch": 60.665573770491804, "grad_norm": 2.686143159866333, "learning_rate": 7.074900791892083e-06, "loss": 0.0894, "step": 18503 }, { "epoch": 60.66885245901639, "grad_norm": 2.7452120780944824, "learning_rate": 7.07388536569262e-06, "loss": 0.1025, "step": 18504 }, { "epoch": 60.67213114754098, "grad_norm": 2.5315160751342773, "learning_rate": 7.072869972488143e-06, "loss": 0.1211, "step": 18505 }, { "epoch": 60.675409836065576, "grad_norm": 3.3499772548675537, "learning_rate": 7.071854612290101e-06, "loss": 0.1293, "step": 18506 }, { "epoch": 60.678688524590164, "grad_norm": 2.8200087547302246, "learning_rate": 7.070839285109938e-06, "loss": 0.224, "step": 18507 }, { "epoch": 60.68196721311475, "grad_norm": 3.0708043575286865, "learning_rate": 7.069823990959113e-06, "loss": 0.1091, "step": 18508 }, { "epoch": 60.68524590163934, "grad_norm": 2.887392282485962, "learning_rate": 7.068808729849068e-06, "loss": 0.1189, "step": 18509 }, { "epoch": 60.68852459016394, "grad_norm": 2.2405176162719727, "learning_rate": 7.067793501791253e-06, "loss": 0.149, "step": 18510 }, { "epoch": 60.691803278688525, "grad_norm": 3.2487218379974365, "learning_rate": 7.0667783067971145e-06, "loss": 0.2521, "step": 18511 }, { "epoch": 60.69508196721311, "grad_norm": 3.15364933013916, "learning_rate": 7.0657631448780965e-06, "loss": 0.2825, "step": 18512 }, { "epoch": 60.6983606557377, "grad_norm": 2.5504517555236816, "learning_rate": 7.0647480160456525e-06, "loss": 0.0967, "step": 18513 }, { "epoch": 60.7016393442623, "grad_norm": 2.4794163703918457, "learning_rate": 7.0637329203112285e-06, "loss": 0.138, "step": 18514 }, { "epoch": 60.704918032786885, "grad_norm": 3.4070816040039062, "learning_rate": 7.062717857686268e-06, "loss": 0.2248, "step": 18515 }, { "epoch": 60.708196721311474, "grad_norm": 2.2993390560150146, "learning_rate": 7.061702828182214e-06, "loss": 0.2947, "step": 18516 }, { "epoch": 60.71147540983607, "grad_norm": 3.5190250873565674, "learning_rate": 7.0606878318105195e-06, "loss": 0.2636, "step": 18517 }, { "epoch": 60.71475409836066, "grad_norm": 2.5234720706939697, "learning_rate": 7.0596728685826255e-06, "loss": 0.1116, "step": 18518 }, { "epoch": 60.718032786885246, "grad_norm": 2.958451509475708, "learning_rate": 7.058657938509979e-06, "loss": 0.2237, "step": 18519 }, { "epoch": 60.721311475409834, "grad_norm": 3.0754940509796143, "learning_rate": 7.0576430416040155e-06, "loss": 0.4093, "step": 18520 }, { "epoch": 60.72459016393443, "grad_norm": 2.6238958835601807, "learning_rate": 7.056628177876192e-06, "loss": 0.078, "step": 18521 }, { "epoch": 60.72786885245902, "grad_norm": 2.8102760314941406, "learning_rate": 7.055613347337944e-06, "loss": 0.1632, "step": 18522 }, { "epoch": 60.731147540983606, "grad_norm": 3.1318187713623047, "learning_rate": 7.054598550000719e-06, "loss": 0.1493, "step": 18523 }, { "epoch": 60.734426229508195, "grad_norm": 2.492413282394409, "learning_rate": 7.0535837858759545e-06, "loss": 0.1894, "step": 18524 }, { "epoch": 60.73770491803279, "grad_norm": 2.623142957687378, "learning_rate": 7.052569054975098e-06, "loss": 0.0833, "step": 18525 }, { "epoch": 60.74098360655738, "grad_norm": 4.471042156219482, "learning_rate": 7.051554357309591e-06, "loss": 0.1634, "step": 18526 }, { "epoch": 60.74426229508197, "grad_norm": 3.4945881366729736, "learning_rate": 7.050539692890872e-06, "loss": 0.2849, "step": 18527 }, { "epoch": 60.747540983606555, "grad_norm": 3.300816059112549, "learning_rate": 7.0495250617303865e-06, "loss": 0.161, "step": 18528 }, { "epoch": 60.75081967213115, "grad_norm": 2.2084527015686035, "learning_rate": 7.048510463839572e-06, "loss": 0.0568, "step": 18529 }, { "epoch": 60.75409836065574, "grad_norm": 3.273139238357544, "learning_rate": 7.047495899229872e-06, "loss": 0.243, "step": 18530 }, { "epoch": 60.75737704918033, "grad_norm": 4.007822036743164, "learning_rate": 7.0464813679127255e-06, "loss": 0.1265, "step": 18531 }, { "epoch": 60.760655737704916, "grad_norm": 3.2589635848999023, "learning_rate": 7.045466869899573e-06, "loss": 0.1235, "step": 18532 }, { "epoch": 60.76393442622951, "grad_norm": 2.7021443843841553, "learning_rate": 7.0444524052018514e-06, "loss": 0.1763, "step": 18533 }, { "epoch": 60.7672131147541, "grad_norm": 3.124302864074707, "learning_rate": 7.043437973831002e-06, "loss": 0.0955, "step": 18534 }, { "epoch": 60.77049180327869, "grad_norm": 4.413702487945557, "learning_rate": 7.042423575798466e-06, "loss": 0.1379, "step": 18535 }, { "epoch": 60.773770491803276, "grad_norm": 2.7130908966064453, "learning_rate": 7.04140921111568e-06, "loss": 0.1094, "step": 18536 }, { "epoch": 60.77704918032787, "grad_norm": 4.200957775115967, "learning_rate": 7.040394879794079e-06, "loss": 0.2336, "step": 18537 }, { "epoch": 60.78032786885246, "grad_norm": 2.8817214965820312, "learning_rate": 7.039380581845101e-06, "loss": 0.1168, "step": 18538 }, { "epoch": 60.78360655737705, "grad_norm": 3.333467483520508, "learning_rate": 7.038366317280188e-06, "loss": 0.1455, "step": 18539 }, { "epoch": 60.78688524590164, "grad_norm": 3.2607433795928955, "learning_rate": 7.037352086110776e-06, "loss": 0.1548, "step": 18540 }, { "epoch": 60.79016393442623, "grad_norm": 3.0959744453430176, "learning_rate": 7.036337888348298e-06, "loss": 0.1827, "step": 18541 }, { "epoch": 60.79344262295082, "grad_norm": 2.8224802017211914, "learning_rate": 7.035323724004188e-06, "loss": 0.1562, "step": 18542 }, { "epoch": 60.79672131147541, "grad_norm": 2.655324697494507, "learning_rate": 7.03430959308989e-06, "loss": 0.1547, "step": 18543 }, { "epoch": 60.8, "grad_norm": 2.7393126487731934, "learning_rate": 7.033295495616834e-06, "loss": 0.2555, "step": 18544 }, { "epoch": 60.80327868852459, "grad_norm": 2.823601722717285, "learning_rate": 7.032281431596456e-06, "loss": 0.2132, "step": 18545 }, { "epoch": 60.80655737704918, "grad_norm": 2.878856897354126, "learning_rate": 7.031267401040187e-06, "loss": 0.1847, "step": 18546 }, { "epoch": 60.80983606557377, "grad_norm": 2.654636859893799, "learning_rate": 7.030253403959468e-06, "loss": 0.1533, "step": 18547 }, { "epoch": 60.81311475409836, "grad_norm": 4.0925374031066895, "learning_rate": 7.0292394403657305e-06, "loss": 0.1174, "step": 18548 }, { "epoch": 60.81639344262295, "grad_norm": 2.576934576034546, "learning_rate": 7.028225510270406e-06, "loss": 0.1458, "step": 18549 }, { "epoch": 60.81967213114754, "grad_norm": 3.1607258319854736, "learning_rate": 7.02721161368493e-06, "loss": 0.2085, "step": 18550 }, { "epoch": 60.82295081967213, "grad_norm": 3.879100799560547, "learning_rate": 7.026197750620729e-06, "loss": 0.2251, "step": 18551 }, { "epoch": 60.82622950819672, "grad_norm": 3.081003189086914, "learning_rate": 7.025183921089246e-06, "loss": 0.1254, "step": 18552 }, { "epoch": 60.829508196721314, "grad_norm": 3.584246873855591, "learning_rate": 7.024170125101906e-06, "loss": 0.1372, "step": 18553 }, { "epoch": 60.8327868852459, "grad_norm": 3.5318350791931152, "learning_rate": 7.02315636267014e-06, "loss": 0.102, "step": 18554 }, { "epoch": 60.83606557377049, "grad_norm": 3.132359743118286, "learning_rate": 7.022142633805382e-06, "loss": 0.2015, "step": 18555 }, { "epoch": 60.83934426229508, "grad_norm": 3.5377376079559326, "learning_rate": 7.021128938519063e-06, "loss": 0.2051, "step": 18556 }, { "epoch": 60.842622950819674, "grad_norm": 5.478004455566406, "learning_rate": 7.02011527682261e-06, "loss": 0.1504, "step": 18557 }, { "epoch": 60.84590163934426, "grad_norm": 2.917360305786133, "learning_rate": 7.0191016487274585e-06, "loss": 0.1617, "step": 18558 }, { "epoch": 60.84918032786885, "grad_norm": 2.7787771224975586, "learning_rate": 7.018088054245034e-06, "loss": 0.0703, "step": 18559 }, { "epoch": 60.85245901639344, "grad_norm": 3.6918156147003174, "learning_rate": 7.017074493386765e-06, "loss": 0.4403, "step": 18560 }, { "epoch": 60.855737704918035, "grad_norm": 3.0485832691192627, "learning_rate": 7.016060966164087e-06, "loss": 0.1218, "step": 18561 }, { "epoch": 60.85901639344262, "grad_norm": 3.2077345848083496, "learning_rate": 7.015047472588422e-06, "loss": 0.2837, "step": 18562 }, { "epoch": 60.86229508196721, "grad_norm": 3.0037994384765625, "learning_rate": 7.014034012671202e-06, "loss": 0.2715, "step": 18563 }, { "epoch": 60.86557377049181, "grad_norm": 2.878723621368408, "learning_rate": 7.013020586423848e-06, "loss": 0.224, "step": 18564 }, { "epoch": 60.868852459016395, "grad_norm": 3.8527133464813232, "learning_rate": 7.012007193857797e-06, "loss": 0.2288, "step": 18565 }, { "epoch": 60.87213114754098, "grad_norm": 3.009273052215576, "learning_rate": 7.0109938349844706e-06, "loss": 0.1608, "step": 18566 }, { "epoch": 60.87540983606557, "grad_norm": 2.4830055236816406, "learning_rate": 7.009980509815297e-06, "loss": 0.1099, "step": 18567 }, { "epoch": 60.87868852459017, "grad_norm": 3.4282326698303223, "learning_rate": 7.0089672183617e-06, "loss": 0.2551, "step": 18568 }, { "epoch": 60.881967213114756, "grad_norm": 3.713698625564575, "learning_rate": 7.007953960635109e-06, "loss": 0.2689, "step": 18569 }, { "epoch": 60.885245901639344, "grad_norm": 2.316450357437134, "learning_rate": 7.006940736646949e-06, "loss": 0.1432, "step": 18570 }, { "epoch": 60.88852459016393, "grad_norm": 2.8450710773468018, "learning_rate": 7.005927546408644e-06, "loss": 0.2133, "step": 18571 }, { "epoch": 60.89180327868853, "grad_norm": 3.091019868850708, "learning_rate": 7.004914389931615e-06, "loss": 0.2078, "step": 18572 }, { "epoch": 60.895081967213116, "grad_norm": 3.238914966583252, "learning_rate": 7.003901267227296e-06, "loss": 0.185, "step": 18573 }, { "epoch": 60.898360655737704, "grad_norm": 3.8975586891174316, "learning_rate": 7.002888178307102e-06, "loss": 0.2702, "step": 18574 }, { "epoch": 60.90163934426229, "grad_norm": 3.5554802417755127, "learning_rate": 7.001875123182462e-06, "loss": 0.2375, "step": 18575 }, { "epoch": 60.90491803278689, "grad_norm": 2.8748137950897217, "learning_rate": 7.0008621018647984e-06, "loss": 0.291, "step": 18576 }, { "epoch": 60.90819672131148, "grad_norm": 3.307260036468506, "learning_rate": 6.9998491143655264e-06, "loss": 0.2186, "step": 18577 }, { "epoch": 60.911475409836065, "grad_norm": 2.71657395362854, "learning_rate": 6.998836160696081e-06, "loss": 0.1099, "step": 18578 }, { "epoch": 60.91475409836065, "grad_norm": 3.6497151851654053, "learning_rate": 6.997823240867877e-06, "loss": 0.2299, "step": 18579 }, { "epoch": 60.91803278688525, "grad_norm": 3.715430974960327, "learning_rate": 6.996810354892336e-06, "loss": 0.365, "step": 18580 }, { "epoch": 60.92131147540984, "grad_norm": 3.1732523441314697, "learning_rate": 6.995797502780881e-06, "loss": 0.3264, "step": 18581 }, { "epoch": 60.924590163934425, "grad_norm": 3.4685134887695312, "learning_rate": 6.9947846845449355e-06, "loss": 0.1652, "step": 18582 }, { "epoch": 60.927868852459014, "grad_norm": 2.871173858642578, "learning_rate": 6.993771900195914e-06, "loss": 0.1116, "step": 18583 }, { "epoch": 60.93114754098361, "grad_norm": 2.7881252765655518, "learning_rate": 6.992759149745242e-06, "loss": 0.2781, "step": 18584 }, { "epoch": 60.9344262295082, "grad_norm": 2.357799768447876, "learning_rate": 6.991746433204335e-06, "loss": 0.0695, "step": 18585 }, { "epoch": 60.937704918032786, "grad_norm": 3.18117094039917, "learning_rate": 6.990733750584615e-06, "loss": 0.2571, "step": 18586 }, { "epoch": 60.940983606557374, "grad_norm": 2.980293035507202, "learning_rate": 6.989721101897504e-06, "loss": 0.2342, "step": 18587 }, { "epoch": 60.94426229508197, "grad_norm": 3.6169660091400146, "learning_rate": 6.988708487154416e-06, "loss": 0.2561, "step": 18588 }, { "epoch": 60.94754098360656, "grad_norm": 2.732632875442505, "learning_rate": 6.987695906366771e-06, "loss": 0.1375, "step": 18589 }, { "epoch": 60.950819672131146, "grad_norm": 4.235204696655273, "learning_rate": 6.9866833595459846e-06, "loss": 0.2008, "step": 18590 }, { "epoch": 60.954098360655735, "grad_norm": 7.727505683898926, "learning_rate": 6.985670846703478e-06, "loss": 0.1562, "step": 18591 }, { "epoch": 60.95737704918033, "grad_norm": 2.7800326347351074, "learning_rate": 6.984658367850669e-06, "loss": 0.1111, "step": 18592 }, { "epoch": 60.96065573770492, "grad_norm": 3.432133674621582, "learning_rate": 6.983645922998969e-06, "loss": 0.2659, "step": 18593 }, { "epoch": 60.96393442622951, "grad_norm": 3.2018699645996094, "learning_rate": 6.9826335121597965e-06, "loss": 0.2443, "step": 18594 }, { "epoch": 60.967213114754095, "grad_norm": 3.272113084793091, "learning_rate": 6.981621135344572e-06, "loss": 0.1898, "step": 18595 }, { "epoch": 60.97049180327869, "grad_norm": 3.9400908946990967, "learning_rate": 6.980608792564706e-06, "loss": 0.3549, "step": 18596 }, { "epoch": 60.97377049180328, "grad_norm": 2.3956093788146973, "learning_rate": 6.979596483831615e-06, "loss": 0.1612, "step": 18597 }, { "epoch": 60.97704918032787, "grad_norm": 3.232755184173584, "learning_rate": 6.9785842091567115e-06, "loss": 0.104, "step": 18598 }, { "epoch": 60.980327868852456, "grad_norm": 3.4193825721740723, "learning_rate": 6.977571968551416e-06, "loss": 0.291, "step": 18599 }, { "epoch": 60.98360655737705, "grad_norm": 3.0127406120300293, "learning_rate": 6.9765597620271396e-06, "loss": 0.243, "step": 18600 }, { "epoch": 60.98688524590164, "grad_norm": 3.642679214477539, "learning_rate": 6.975547589595295e-06, "loss": 0.3326, "step": 18601 }, { "epoch": 60.99016393442623, "grad_norm": 3.1575090885162354, "learning_rate": 6.9745354512672955e-06, "loss": 0.3949, "step": 18602 }, { "epoch": 60.993442622950816, "grad_norm": 5.0627217292785645, "learning_rate": 6.973523347054552e-06, "loss": 0.173, "step": 18603 }, { "epoch": 60.99672131147541, "grad_norm": 3.492526054382324, "learning_rate": 6.972511276968481e-06, "loss": 0.1015, "step": 18604 }, { "epoch": 61.0, "grad_norm": 3.1277077198028564, "learning_rate": 6.971499241020495e-06, "loss": 0.1853, "step": 18605 }, { "epoch": 61.00327868852459, "grad_norm": 2.5806896686553955, "learning_rate": 6.970487239222001e-06, "loss": 0.1173, "step": 18606 }, { "epoch": 61.006557377049184, "grad_norm": 3.079413890838623, "learning_rate": 6.9694752715844135e-06, "loss": 0.1703, "step": 18607 }, { "epoch": 61.00983606557377, "grad_norm": 3.1316444873809814, "learning_rate": 6.968463338119147e-06, "loss": 0.24, "step": 18608 }, { "epoch": 61.01311475409836, "grad_norm": 2.804429769515991, "learning_rate": 6.967451438837605e-06, "loss": 0.1948, "step": 18609 }, { "epoch": 61.01639344262295, "grad_norm": 2.363525867462158, "learning_rate": 6.966439573751202e-06, "loss": 0.1617, "step": 18610 }, { "epoch": 61.019672131147544, "grad_norm": 3.114610195159912, "learning_rate": 6.965427742871346e-06, "loss": 0.2343, "step": 18611 }, { "epoch": 61.02295081967213, "grad_norm": 2.6231191158294678, "learning_rate": 6.964415946209448e-06, "loss": 0.0988, "step": 18612 }, { "epoch": 61.02622950819672, "grad_norm": 2.615035057067871, "learning_rate": 6.963404183776916e-06, "loss": 0.0842, "step": 18613 }, { "epoch": 61.02950819672131, "grad_norm": 3.3948545455932617, "learning_rate": 6.9623924555851616e-06, "loss": 0.0999, "step": 18614 }, { "epoch": 61.032786885245905, "grad_norm": 3.086771249771118, "learning_rate": 6.96138076164559e-06, "loss": 0.1652, "step": 18615 }, { "epoch": 61.03606557377049, "grad_norm": 4.023641586303711, "learning_rate": 6.960369101969605e-06, "loss": 0.177, "step": 18616 }, { "epoch": 61.03934426229508, "grad_norm": 2.5813040733337402, "learning_rate": 6.9593574765686235e-06, "loss": 0.0652, "step": 18617 }, { "epoch": 61.04262295081967, "grad_norm": 2.8707051277160645, "learning_rate": 6.958345885454047e-06, "loss": 0.1786, "step": 18618 }, { "epoch": 61.045901639344265, "grad_norm": 2.5649166107177734, "learning_rate": 6.957334328637284e-06, "loss": 0.0994, "step": 18619 }, { "epoch": 61.049180327868854, "grad_norm": 2.7043871879577637, "learning_rate": 6.956322806129736e-06, "loss": 0.3213, "step": 18620 }, { "epoch": 61.05245901639344, "grad_norm": 3.3643798828125, "learning_rate": 6.955311317942817e-06, "loss": 0.1695, "step": 18621 }, { "epoch": 61.05573770491803, "grad_norm": 2.8648486137390137, "learning_rate": 6.954299864087929e-06, "loss": 0.1721, "step": 18622 }, { "epoch": 61.059016393442626, "grad_norm": 3.1923716068267822, "learning_rate": 6.953288444576476e-06, "loss": 0.2131, "step": 18623 }, { "epoch": 61.062295081967214, "grad_norm": 3.3588759899139404, "learning_rate": 6.95227705941986e-06, "loss": 0.154, "step": 18624 }, { "epoch": 61.0655737704918, "grad_norm": 2.2250239849090576, "learning_rate": 6.951265708629493e-06, "loss": 0.2039, "step": 18625 }, { "epoch": 61.06885245901639, "grad_norm": 2.981397867202759, "learning_rate": 6.950254392216774e-06, "loss": 0.1759, "step": 18626 }, { "epoch": 61.072131147540986, "grad_norm": 3.19999361038208, "learning_rate": 6.949243110193109e-06, "loss": 0.0979, "step": 18627 }, { "epoch": 61.075409836065575, "grad_norm": 3.271493434906006, "learning_rate": 6.9482318625698986e-06, "loss": 0.2574, "step": 18628 }, { "epoch": 61.07868852459016, "grad_norm": 3.6369574069976807, "learning_rate": 6.947220649358544e-06, "loss": 0.16, "step": 18629 }, { "epoch": 61.08196721311475, "grad_norm": 4.894690036773682, "learning_rate": 6.946209470570455e-06, "loss": 0.159, "step": 18630 }, { "epoch": 61.08524590163935, "grad_norm": 3.1823513507843018, "learning_rate": 6.9451983262170286e-06, "loss": 0.2478, "step": 18631 }, { "epoch": 61.088524590163935, "grad_norm": 2.9586873054504395, "learning_rate": 6.944187216309668e-06, "loss": 0.1085, "step": 18632 }, { "epoch": 61.09180327868852, "grad_norm": 3.342071056365967, "learning_rate": 6.943176140859769e-06, "loss": 0.2605, "step": 18633 }, { "epoch": 61.09508196721311, "grad_norm": 2.586270570755005, "learning_rate": 6.9421650998787415e-06, "loss": 0.2347, "step": 18634 }, { "epoch": 61.09836065573771, "grad_norm": 2.726191997528076, "learning_rate": 6.941154093377982e-06, "loss": 0.148, "step": 18635 }, { "epoch": 61.101639344262296, "grad_norm": 3.308838367462158, "learning_rate": 6.940143121368888e-06, "loss": 0.3724, "step": 18636 }, { "epoch": 61.104918032786884, "grad_norm": 2.7318451404571533, "learning_rate": 6.939132183862862e-06, "loss": 0.2977, "step": 18637 }, { "epoch": 61.10819672131147, "grad_norm": 2.987431287765503, "learning_rate": 6.938121280871306e-06, "loss": 0.1994, "step": 18638 }, { "epoch": 61.11147540983607, "grad_norm": 2.8136940002441406, "learning_rate": 6.937110412405613e-06, "loss": 0.1255, "step": 18639 }, { "epoch": 61.114754098360656, "grad_norm": 3.1406681537628174, "learning_rate": 6.9360995784771865e-06, "loss": 0.2051, "step": 18640 }, { "epoch": 61.118032786885244, "grad_norm": 3.732191324234009, "learning_rate": 6.935088779097426e-06, "loss": 0.3647, "step": 18641 }, { "epoch": 61.12131147540983, "grad_norm": 3.237196445465088, "learning_rate": 6.934078014277719e-06, "loss": 0.3461, "step": 18642 }, { "epoch": 61.12459016393443, "grad_norm": 1.9858341217041016, "learning_rate": 6.933067284029474e-06, "loss": 0.1104, "step": 18643 }, { "epoch": 61.12786885245902, "grad_norm": 3.6205227375030518, "learning_rate": 6.932056588364084e-06, "loss": 0.2228, "step": 18644 }, { "epoch": 61.131147540983605, "grad_norm": 2.7988317012786865, "learning_rate": 6.931045927292948e-06, "loss": 0.1606, "step": 18645 }, { "epoch": 61.13442622950819, "grad_norm": 2.2817881107330322, "learning_rate": 6.930035300827456e-06, "loss": 0.1451, "step": 18646 }, { "epoch": 61.13770491803279, "grad_norm": 2.0324416160583496, "learning_rate": 6.929024708979011e-06, "loss": 0.0816, "step": 18647 }, { "epoch": 61.14098360655738, "grad_norm": 4.118922233581543, "learning_rate": 6.928014151759004e-06, "loss": 0.1745, "step": 18648 }, { "epoch": 61.144262295081965, "grad_norm": 2.964477777481079, "learning_rate": 6.9270036291788335e-06, "loss": 0.1391, "step": 18649 }, { "epoch": 61.14754098360656, "grad_norm": 2.5519042015075684, "learning_rate": 6.925993141249887e-06, "loss": 0.2103, "step": 18650 }, { "epoch": 61.15081967213115, "grad_norm": 2.650439739227295, "learning_rate": 6.924982687983569e-06, "loss": 0.1918, "step": 18651 }, { "epoch": 61.15409836065574, "grad_norm": 3.9893128871917725, "learning_rate": 6.9239722693912684e-06, "loss": 0.1144, "step": 18652 }, { "epoch": 61.157377049180326, "grad_norm": 3.145059585571289, "learning_rate": 6.9229618854843785e-06, "loss": 0.1338, "step": 18653 }, { "epoch": 61.16065573770492, "grad_norm": 2.4856162071228027, "learning_rate": 6.921951536274292e-06, "loss": 0.2678, "step": 18654 }, { "epoch": 61.16393442622951, "grad_norm": 2.9366352558135986, "learning_rate": 6.9209412217724e-06, "loss": 0.079, "step": 18655 }, { "epoch": 61.1672131147541, "grad_norm": 3.237841844558716, "learning_rate": 6.9199309419900984e-06, "loss": 0.3597, "step": 18656 }, { "epoch": 61.170491803278686, "grad_norm": 3.1543755531311035, "learning_rate": 6.91892069693878e-06, "loss": 0.0965, "step": 18657 }, { "epoch": 61.17377049180328, "grad_norm": 3.035673141479492, "learning_rate": 6.917910486629833e-06, "loss": 0.1695, "step": 18658 }, { "epoch": 61.17704918032787, "grad_norm": 3.0334672927856445, "learning_rate": 6.916900311074647e-06, "loss": 0.1523, "step": 18659 }, { "epoch": 61.18032786885246, "grad_norm": 2.70695424079895, "learning_rate": 6.915890170284619e-06, "loss": 0.1114, "step": 18660 }, { "epoch": 61.18360655737705, "grad_norm": 2.3209192752838135, "learning_rate": 6.914880064271136e-06, "loss": 0.1104, "step": 18661 }, { "epoch": 61.18688524590164, "grad_norm": 3.399247169494629, "learning_rate": 6.913869993045586e-06, "loss": 0.1633, "step": 18662 }, { "epoch": 61.19016393442623, "grad_norm": 2.5577762126922607, "learning_rate": 6.912859956619361e-06, "loss": 0.2071, "step": 18663 }, { "epoch": 61.19344262295082, "grad_norm": 4.030406951904297, "learning_rate": 6.911849955003851e-06, "loss": 0.2719, "step": 18664 }, { "epoch": 61.19672131147541, "grad_norm": 3.00996994972229, "learning_rate": 6.9108399882104426e-06, "loss": 0.249, "step": 18665 }, { "epoch": 61.2, "grad_norm": 3.512485980987549, "learning_rate": 6.909830056250527e-06, "loss": 0.2469, "step": 18666 }, { "epoch": 61.20327868852459, "grad_norm": 2.8875224590301514, "learning_rate": 6.9088201591354895e-06, "loss": 0.1823, "step": 18667 }, { "epoch": 61.20655737704918, "grad_norm": 2.9545750617980957, "learning_rate": 6.907810296876716e-06, "loss": 0.1747, "step": 18668 }, { "epoch": 61.20983606557377, "grad_norm": 2.3998310565948486, "learning_rate": 6.9068004694856e-06, "loss": 0.1409, "step": 18669 }, { "epoch": 61.21311475409836, "grad_norm": 3.4552600383758545, "learning_rate": 6.905790676973525e-06, "loss": 0.2, "step": 18670 }, { "epoch": 61.21639344262295, "grad_norm": 3.330547332763672, "learning_rate": 6.904780919351876e-06, "loss": 0.1857, "step": 18671 }, { "epoch": 61.21967213114754, "grad_norm": 2.797762632369995, "learning_rate": 6.903771196632039e-06, "loss": 0.116, "step": 18672 }, { "epoch": 61.22295081967213, "grad_norm": 2.895975351333618, "learning_rate": 6.902761508825403e-06, "loss": 0.1791, "step": 18673 }, { "epoch": 61.226229508196724, "grad_norm": 2.374492883682251, "learning_rate": 6.9017518559433525e-06, "loss": 0.1575, "step": 18674 }, { "epoch": 61.22950819672131, "grad_norm": 2.797431707382202, "learning_rate": 6.90074223799727e-06, "loss": 0.1022, "step": 18675 }, { "epoch": 61.2327868852459, "grad_norm": 3.134230136871338, "learning_rate": 6.899732654998538e-06, "loss": 0.2955, "step": 18676 }, { "epoch": 61.23606557377049, "grad_norm": 2.7997148036956787, "learning_rate": 6.898723106958547e-06, "loss": 0.2683, "step": 18677 }, { "epoch": 61.239344262295084, "grad_norm": 3.056913137435913, "learning_rate": 6.897713593888678e-06, "loss": 0.1767, "step": 18678 }, { "epoch": 61.24262295081967, "grad_norm": 3.004685163497925, "learning_rate": 6.896704115800314e-06, "loss": 0.2224, "step": 18679 }, { "epoch": 61.24590163934426, "grad_norm": 3.5691473484039307, "learning_rate": 6.895694672704838e-06, "loss": 0.1794, "step": 18680 }, { "epoch": 61.24918032786885, "grad_norm": 3.714111089706421, "learning_rate": 6.894685264613627e-06, "loss": 0.2035, "step": 18681 }, { "epoch": 61.252459016393445, "grad_norm": 3.3336706161499023, "learning_rate": 6.893675891538073e-06, "loss": 0.3132, "step": 18682 }, { "epoch": 61.25573770491803, "grad_norm": 2.716874122619629, "learning_rate": 6.892666553489553e-06, "loss": 0.105, "step": 18683 }, { "epoch": 61.25901639344262, "grad_norm": 2.7961931228637695, "learning_rate": 6.891657250479449e-06, "loss": 0.1461, "step": 18684 }, { "epoch": 61.26229508196721, "grad_norm": 2.69477915763855, "learning_rate": 6.890647982519137e-06, "loss": 0.159, "step": 18685 }, { "epoch": 61.265573770491805, "grad_norm": 2.906182289123535, "learning_rate": 6.889638749620006e-06, "loss": 0.2237, "step": 18686 }, { "epoch": 61.268852459016394, "grad_norm": 3.2476179599761963, "learning_rate": 6.8886295517934324e-06, "loss": 0.1608, "step": 18687 }, { "epoch": 61.27213114754098, "grad_norm": 2.7445967197418213, "learning_rate": 6.887620389050793e-06, "loss": 0.1303, "step": 18688 }, { "epoch": 61.27540983606557, "grad_norm": 3.4895029067993164, "learning_rate": 6.886611261403468e-06, "loss": 0.1543, "step": 18689 }, { "epoch": 61.278688524590166, "grad_norm": 2.7257606983184814, "learning_rate": 6.885602168862844e-06, "loss": 0.1309, "step": 18690 }, { "epoch": 61.281967213114754, "grad_norm": 2.7466814517974854, "learning_rate": 6.88459311144029e-06, "loss": 0.1507, "step": 18691 }, { "epoch": 61.28524590163934, "grad_norm": 2.480830192565918, "learning_rate": 6.883584089147189e-06, "loss": 0.158, "step": 18692 }, { "epoch": 61.28852459016394, "grad_norm": 2.4845831394195557, "learning_rate": 6.882575101994918e-06, "loss": 0.1563, "step": 18693 }, { "epoch": 61.291803278688526, "grad_norm": 2.109104871749878, "learning_rate": 6.881566149994853e-06, "loss": 0.0536, "step": 18694 }, { "epoch": 61.295081967213115, "grad_norm": 2.8383021354675293, "learning_rate": 6.8805572331583715e-06, "loss": 0.1369, "step": 18695 }, { "epoch": 61.2983606557377, "grad_norm": 3.1261661052703857, "learning_rate": 6.879548351496852e-06, "loss": 0.2484, "step": 18696 }, { "epoch": 61.3016393442623, "grad_norm": 2.311771869659424, "learning_rate": 6.8785395050216696e-06, "loss": 0.1813, "step": 18697 }, { "epoch": 61.30491803278689, "grad_norm": 3.3228516578674316, "learning_rate": 6.8775306937441964e-06, "loss": 0.2326, "step": 18698 }, { "epoch": 61.308196721311475, "grad_norm": 2.313042402267456, "learning_rate": 6.876521917675814e-06, "loss": 0.2417, "step": 18699 }, { "epoch": 61.31147540983606, "grad_norm": 3.0793747901916504, "learning_rate": 6.875513176827895e-06, "loss": 0.3505, "step": 18700 }, { "epoch": 61.31475409836066, "grad_norm": 3.0358262062072754, "learning_rate": 6.874504471211815e-06, "loss": 0.2564, "step": 18701 }, { "epoch": 61.31803278688525, "grad_norm": 3.2281222343444824, "learning_rate": 6.873495800838941e-06, "loss": 0.2343, "step": 18702 }, { "epoch": 61.321311475409836, "grad_norm": 3.441620349884033, "learning_rate": 6.872487165720658e-06, "loss": 0.1742, "step": 18703 }, { "epoch": 61.324590163934424, "grad_norm": 2.999140739440918, "learning_rate": 6.871478565868333e-06, "loss": 0.1253, "step": 18704 }, { "epoch": 61.32786885245902, "grad_norm": 2.4342355728149414, "learning_rate": 6.87047000129334e-06, "loss": 0.2478, "step": 18705 }, { "epoch": 61.33114754098361, "grad_norm": 2.9353859424591064, "learning_rate": 6.869461472007052e-06, "loss": 0.1765, "step": 18706 }, { "epoch": 61.334426229508196, "grad_norm": 3.765395164489746, "learning_rate": 6.868452978020836e-06, "loss": 0.1766, "step": 18707 }, { "epoch": 61.337704918032784, "grad_norm": 2.8260140419006348, "learning_rate": 6.867444519346074e-06, "loss": 0.242, "step": 18708 }, { "epoch": 61.34098360655738, "grad_norm": 2.341334104537964, "learning_rate": 6.8664360959941314e-06, "loss": 0.1405, "step": 18709 }, { "epoch": 61.34426229508197, "grad_norm": 2.794286012649536, "learning_rate": 6.86542770797638e-06, "loss": 0.0891, "step": 18710 }, { "epoch": 61.34754098360656, "grad_norm": 3.3224146366119385, "learning_rate": 6.864419355304186e-06, "loss": 0.2871, "step": 18711 }, { "epoch": 61.350819672131145, "grad_norm": 3.032362937927246, "learning_rate": 6.86341103798893e-06, "loss": 0.0754, "step": 18712 }, { "epoch": 61.35409836065574, "grad_norm": 2.9656496047973633, "learning_rate": 6.862402756041973e-06, "loss": 0.1448, "step": 18713 }, { "epoch": 61.35737704918033, "grad_norm": 2.7508020401000977, "learning_rate": 6.861394509474688e-06, "loss": 0.1698, "step": 18714 }, { "epoch": 61.36065573770492, "grad_norm": 3.16280198097229, "learning_rate": 6.860386298298442e-06, "loss": 0.1906, "step": 18715 }, { "epoch": 61.363934426229505, "grad_norm": 2.7132985591888428, "learning_rate": 6.859378122524605e-06, "loss": 0.2292, "step": 18716 }, { "epoch": 61.3672131147541, "grad_norm": 4.986085414886475, "learning_rate": 6.8583699821645455e-06, "loss": 0.2183, "step": 18717 }, { "epoch": 61.37049180327869, "grad_norm": 3.2447354793548584, "learning_rate": 6.85736187722963e-06, "loss": 0.132, "step": 18718 }, { "epoch": 61.37377049180328, "grad_norm": 2.4748377799987793, "learning_rate": 6.856353807731228e-06, "loss": 0.1535, "step": 18719 }, { "epoch": 61.377049180327866, "grad_norm": 3.2803988456726074, "learning_rate": 6.855345773680705e-06, "loss": 0.159, "step": 18720 }, { "epoch": 61.38032786885246, "grad_norm": 2.826343059539795, "learning_rate": 6.854337775089428e-06, "loss": 0.1124, "step": 18721 }, { "epoch": 61.38360655737705, "grad_norm": 3.769829511642456, "learning_rate": 6.853329811968764e-06, "loss": 0.3011, "step": 18722 }, { "epoch": 61.38688524590164, "grad_norm": 2.5247933864593506, "learning_rate": 6.852321884330078e-06, "loss": 0.0434, "step": 18723 }, { "epoch": 61.390163934426226, "grad_norm": 4.146163463592529, "learning_rate": 6.851313992184732e-06, "loss": 0.1735, "step": 18724 }, { "epoch": 61.39344262295082, "grad_norm": 3.0760293006896973, "learning_rate": 6.8503061355440994e-06, "loss": 0.2734, "step": 18725 }, { "epoch": 61.39672131147541, "grad_norm": 2.9229440689086914, "learning_rate": 6.849298314419539e-06, "loss": 0.1743, "step": 18726 }, { "epoch": 61.4, "grad_norm": 3.308199167251587, "learning_rate": 6.848290528822417e-06, "loss": 0.1932, "step": 18727 }, { "epoch": 61.40327868852459, "grad_norm": 2.6259167194366455, "learning_rate": 6.847282778764095e-06, "loss": 0.1368, "step": 18728 }, { "epoch": 61.40655737704918, "grad_norm": 3.1349833011627197, "learning_rate": 6.846275064255934e-06, "loss": 0.0789, "step": 18729 }, { "epoch": 61.40983606557377, "grad_norm": 2.3687407970428467, "learning_rate": 6.845267385309304e-06, "loss": 0.0871, "step": 18730 }, { "epoch": 61.41311475409836, "grad_norm": 3.149247884750366, "learning_rate": 6.844259741935566e-06, "loss": 0.1622, "step": 18731 }, { "epoch": 61.41639344262295, "grad_norm": 2.829604148864746, "learning_rate": 6.843252134146079e-06, "loss": 0.2213, "step": 18732 }, { "epoch": 61.41967213114754, "grad_norm": 3.2614798545837402, "learning_rate": 6.842244561952203e-06, "loss": 0.3651, "step": 18733 }, { "epoch": 61.42295081967213, "grad_norm": 2.974092721939087, "learning_rate": 6.841237025365308e-06, "loss": 0.2261, "step": 18734 }, { "epoch": 61.42622950819672, "grad_norm": 2.529778003692627, "learning_rate": 6.840229524396749e-06, "loss": 0.1088, "step": 18735 }, { "epoch": 61.429508196721315, "grad_norm": 3.5348730087280273, "learning_rate": 6.839222059057886e-06, "loss": 0.3256, "step": 18736 }, { "epoch": 61.4327868852459, "grad_norm": 2.387031316757202, "learning_rate": 6.838214629360078e-06, "loss": 0.1383, "step": 18737 }, { "epoch": 61.43606557377049, "grad_norm": 2.655228614807129, "learning_rate": 6.837207235314691e-06, "loss": 0.1859, "step": 18738 }, { "epoch": 61.43934426229508, "grad_norm": 2.539978504180908, "learning_rate": 6.836199876933082e-06, "loss": 0.1628, "step": 18739 }, { "epoch": 61.442622950819676, "grad_norm": 2.5850229263305664, "learning_rate": 6.8351925542266085e-06, "loss": 0.1958, "step": 18740 }, { "epoch": 61.445901639344264, "grad_norm": 3.5789101123809814, "learning_rate": 6.834185267206627e-06, "loss": 0.2155, "step": 18741 }, { "epoch": 61.44918032786885, "grad_norm": 2.802182674407959, "learning_rate": 6.8331780158844985e-06, "loss": 0.2704, "step": 18742 }, { "epoch": 61.45245901639344, "grad_norm": 2.298236846923828, "learning_rate": 6.832170800271583e-06, "loss": 0.1723, "step": 18743 }, { "epoch": 61.455737704918036, "grad_norm": 3.330787420272827, "learning_rate": 6.8311636203792316e-06, "loss": 0.0884, "step": 18744 }, { "epoch": 61.459016393442624, "grad_norm": 4.161545276641846, "learning_rate": 6.8301564762188084e-06, "loss": 0.2439, "step": 18745 }, { "epoch": 61.46229508196721, "grad_norm": 3.242905378341675, "learning_rate": 6.8291493678016654e-06, "loss": 0.1278, "step": 18746 }, { "epoch": 61.4655737704918, "grad_norm": 3.871626377105713, "learning_rate": 6.82814229513916e-06, "loss": 0.2758, "step": 18747 }, { "epoch": 61.4688524590164, "grad_norm": 3.443582773208618, "learning_rate": 6.827135258242649e-06, "loss": 0.2084, "step": 18748 }, { "epoch": 61.472131147540985, "grad_norm": 3.0430235862731934, "learning_rate": 6.826128257123487e-06, "loss": 0.1934, "step": 18749 }, { "epoch": 61.47540983606557, "grad_norm": 3.464542865753174, "learning_rate": 6.8251212917930265e-06, "loss": 0.2554, "step": 18750 }, { "epoch": 61.47868852459016, "grad_norm": 3.010281801223755, "learning_rate": 6.824114362262626e-06, "loss": 0.1519, "step": 18751 }, { "epoch": 61.48196721311476, "grad_norm": 2.8415310382843018, "learning_rate": 6.8231074685436405e-06, "loss": 0.1289, "step": 18752 }, { "epoch": 61.485245901639345, "grad_norm": 2.59590482711792, "learning_rate": 6.822100610647421e-06, "loss": 0.1967, "step": 18753 }, { "epoch": 61.488524590163934, "grad_norm": 5.141946792602539, "learning_rate": 6.82109378858532e-06, "loss": 0.2481, "step": 18754 }, { "epoch": 61.49180327868852, "grad_norm": 2.8570199012756348, "learning_rate": 6.820087002368689e-06, "loss": 0.2984, "step": 18755 }, { "epoch": 61.49508196721312, "grad_norm": 3.2936625480651855, "learning_rate": 6.8190802520088875e-06, "loss": 0.1569, "step": 18756 }, { "epoch": 61.498360655737706, "grad_norm": 3.0369646549224854, "learning_rate": 6.818073537517263e-06, "loss": 0.143, "step": 18757 }, { "epoch": 61.501639344262294, "grad_norm": 2.915179967880249, "learning_rate": 6.817066858905167e-06, "loss": 0.1705, "step": 18758 }, { "epoch": 61.50491803278688, "grad_norm": 2.647350549697876, "learning_rate": 6.816060216183949e-06, "loss": 0.1799, "step": 18759 }, { "epoch": 61.50819672131148, "grad_norm": 3.628262519836426, "learning_rate": 6.815053609364965e-06, "loss": 0.1981, "step": 18760 }, { "epoch": 61.511475409836066, "grad_norm": 7.2025837898254395, "learning_rate": 6.814047038459564e-06, "loss": 0.1114, "step": 18761 }, { "epoch": 61.514754098360655, "grad_norm": 2.5771124362945557, "learning_rate": 6.8130405034790944e-06, "loss": 0.0962, "step": 18762 }, { "epoch": 61.51803278688524, "grad_norm": 2.4244911670684814, "learning_rate": 6.812034004434904e-06, "loss": 0.1681, "step": 18763 }, { "epoch": 61.52131147540984, "grad_norm": 3.096585750579834, "learning_rate": 6.8110275413383485e-06, "loss": 0.0834, "step": 18764 }, { "epoch": 61.52459016393443, "grad_norm": 2.648419141769409, "learning_rate": 6.810021114200773e-06, "loss": 0.1008, "step": 18765 }, { "epoch": 61.527868852459015, "grad_norm": 3.0326123237609863, "learning_rate": 6.809014723033526e-06, "loss": 0.1344, "step": 18766 }, { "epoch": 61.5311475409836, "grad_norm": 2.8069965839385986, "learning_rate": 6.808008367847954e-06, "loss": 0.175, "step": 18767 }, { "epoch": 61.5344262295082, "grad_norm": 3.6988165378570557, "learning_rate": 6.807002048655405e-06, "loss": 0.2551, "step": 18768 }, { "epoch": 61.53770491803279, "grad_norm": 2.801236629486084, "learning_rate": 6.805995765467233e-06, "loss": 0.1471, "step": 18769 }, { "epoch": 61.540983606557376, "grad_norm": 2.6109957695007324, "learning_rate": 6.804989518294775e-06, "loss": 0.2858, "step": 18770 }, { "epoch": 61.544262295081964, "grad_norm": 2.633460760116577, "learning_rate": 6.8039833071493845e-06, "loss": 0.1713, "step": 18771 }, { "epoch": 61.54754098360656, "grad_norm": 2.3589272499084473, "learning_rate": 6.802977132042403e-06, "loss": 0.0986, "step": 18772 }, { "epoch": 61.55081967213115, "grad_norm": 3.1546337604522705, "learning_rate": 6.801970992985181e-06, "loss": 0.1841, "step": 18773 }, { "epoch": 61.554098360655736, "grad_norm": 2.7698004245758057, "learning_rate": 6.8009648899890615e-06, "loss": 0.1707, "step": 18774 }, { "epoch": 61.557377049180324, "grad_norm": 3.1087939739227295, "learning_rate": 6.799958823065388e-06, "loss": 0.1881, "step": 18775 }, { "epoch": 61.56065573770492, "grad_norm": 3.639096260070801, "learning_rate": 6.798952792225504e-06, "loss": 0.2427, "step": 18776 }, { "epoch": 61.56393442622951, "grad_norm": 3.110358715057373, "learning_rate": 6.797946797480756e-06, "loss": 0.1496, "step": 18777 }, { "epoch": 61.5672131147541, "grad_norm": 2.7007954120635986, "learning_rate": 6.79694083884249e-06, "loss": 0.1643, "step": 18778 }, { "epoch": 61.570491803278685, "grad_norm": 2.93274188041687, "learning_rate": 6.795934916322047e-06, "loss": 0.2804, "step": 18779 }, { "epoch": 61.57377049180328, "grad_norm": 2.602830171585083, "learning_rate": 6.794929029930768e-06, "loss": 0.3222, "step": 18780 }, { "epoch": 61.57704918032787, "grad_norm": 2.9227852821350098, "learning_rate": 6.793923179679992e-06, "loss": 0.2161, "step": 18781 }, { "epoch": 61.58032786885246, "grad_norm": 2.850734233856201, "learning_rate": 6.792917365581071e-06, "loss": 0.1627, "step": 18782 }, { "epoch": 61.58360655737705, "grad_norm": 2.1125104427337646, "learning_rate": 6.79191158764534e-06, "loss": 0.1244, "step": 18783 }, { "epoch": 61.58688524590164, "grad_norm": 2.1302053928375244, "learning_rate": 6.790905845884142e-06, "loss": 0.1562, "step": 18784 }, { "epoch": 61.59016393442623, "grad_norm": 2.6532204151153564, "learning_rate": 6.789900140308814e-06, "loss": 0.0893, "step": 18785 }, { "epoch": 61.59344262295082, "grad_norm": 3.923574686050415, "learning_rate": 6.788894470930703e-06, "loss": 0.3278, "step": 18786 }, { "epoch": 61.59672131147541, "grad_norm": 3.366037130355835, "learning_rate": 6.787888837761145e-06, "loss": 0.2715, "step": 18787 }, { "epoch": 61.6, "grad_norm": 4.862688064575195, "learning_rate": 6.786883240811479e-06, "loss": 0.1943, "step": 18788 }, { "epoch": 61.60327868852459, "grad_norm": 3.551988124847412, "learning_rate": 6.785877680093044e-06, "loss": 0.3527, "step": 18789 }, { "epoch": 61.60655737704918, "grad_norm": 2.3661575317382812, "learning_rate": 6.784872155617181e-06, "loss": 0.1657, "step": 18790 }, { "epoch": 61.609836065573774, "grad_norm": 2.499330520629883, "learning_rate": 6.7838666673952305e-06, "loss": 0.1893, "step": 18791 }, { "epoch": 61.61311475409836, "grad_norm": 2.599729537963867, "learning_rate": 6.782861215438525e-06, "loss": 0.2237, "step": 18792 }, { "epoch": 61.61639344262295, "grad_norm": 3.726203203201294, "learning_rate": 6.781855799758403e-06, "loss": 0.2474, "step": 18793 }, { "epoch": 61.61967213114754, "grad_norm": 2.627321720123291, "learning_rate": 6.780850420366204e-06, "loss": 0.3185, "step": 18794 }, { "epoch": 61.622950819672134, "grad_norm": 3.127866268157959, "learning_rate": 6.779845077273264e-06, "loss": 0.112, "step": 18795 }, { "epoch": 61.62622950819672, "grad_norm": 2.638387441635132, "learning_rate": 6.778839770490919e-06, "loss": 0.2061, "step": 18796 }, { "epoch": 61.62950819672131, "grad_norm": 2.86275577545166, "learning_rate": 6.777834500030505e-06, "loss": 0.1992, "step": 18797 }, { "epoch": 61.6327868852459, "grad_norm": 2.678241729736328, "learning_rate": 6.776829265903355e-06, "loss": 0.1982, "step": 18798 }, { "epoch": 61.636065573770495, "grad_norm": 2.659050941467285, "learning_rate": 6.775824068120809e-06, "loss": 0.072, "step": 18799 }, { "epoch": 61.63934426229508, "grad_norm": 2.790036678314209, "learning_rate": 6.774818906694196e-06, "loss": 0.2391, "step": 18800 }, { "epoch": 61.64262295081967, "grad_norm": 2.685922145843506, "learning_rate": 6.773813781634856e-06, "loss": 0.2421, "step": 18801 }, { "epoch": 61.64590163934426, "grad_norm": 3.24375319480896, "learning_rate": 6.772808692954118e-06, "loss": 0.0986, "step": 18802 }, { "epoch": 61.649180327868855, "grad_norm": 3.330148220062256, "learning_rate": 6.771803640663317e-06, "loss": 0.2929, "step": 18803 }, { "epoch": 61.65245901639344, "grad_norm": 3.173661470413208, "learning_rate": 6.770798624773789e-06, "loss": 0.1987, "step": 18804 }, { "epoch": 61.65573770491803, "grad_norm": 2.387756109237671, "learning_rate": 6.769793645296863e-06, "loss": 0.1045, "step": 18805 }, { "epoch": 61.65901639344262, "grad_norm": 3.5240113735198975, "learning_rate": 6.768788702243874e-06, "loss": 0.1005, "step": 18806 }, { "epoch": 61.662295081967216, "grad_norm": 3.4118359088897705, "learning_rate": 6.767783795626146e-06, "loss": 0.2053, "step": 18807 }, { "epoch": 61.665573770491804, "grad_norm": 2.2595529556274414, "learning_rate": 6.766778925455022e-06, "loss": 0.0545, "step": 18808 }, { "epoch": 61.66885245901639, "grad_norm": 2.8141653537750244, "learning_rate": 6.765774091741826e-06, "loss": 0.076, "step": 18809 }, { "epoch": 61.67213114754098, "grad_norm": 4.347270965576172, "learning_rate": 6.764769294497891e-06, "loss": 0.2078, "step": 18810 }, { "epoch": 61.675409836065576, "grad_norm": 3.3403708934783936, "learning_rate": 6.763764533734541e-06, "loss": 0.3169, "step": 18811 }, { "epoch": 61.678688524590164, "grad_norm": 3.0512940883636475, "learning_rate": 6.762759809463115e-06, "loss": 0.1759, "step": 18812 }, { "epoch": 61.68196721311475, "grad_norm": 2.9873223304748535, "learning_rate": 6.761755121694939e-06, "loss": 0.1902, "step": 18813 }, { "epoch": 61.68524590163934, "grad_norm": 3.058460235595703, "learning_rate": 6.7607504704413396e-06, "loss": 0.1242, "step": 18814 }, { "epoch": 61.68852459016394, "grad_norm": 2.2568647861480713, "learning_rate": 6.759745855713645e-06, "loss": 0.2184, "step": 18815 }, { "epoch": 61.691803278688525, "grad_norm": 3.2096683979034424, "learning_rate": 6.758741277523187e-06, "loss": 0.2856, "step": 18816 }, { "epoch": 61.69508196721311, "grad_norm": 2.5862643718719482, "learning_rate": 6.757736735881291e-06, "loss": 0.2103, "step": 18817 }, { "epoch": 61.6983606557377, "grad_norm": 3.018529176712036, "learning_rate": 6.7567322307992834e-06, "loss": 0.4499, "step": 18818 }, { "epoch": 61.7016393442623, "grad_norm": 2.0951292514801025, "learning_rate": 6.7557277622884944e-06, "loss": 0.2601, "step": 18819 }, { "epoch": 61.704918032786885, "grad_norm": 2.8528106212615967, "learning_rate": 6.754723330360242e-06, "loss": 0.3326, "step": 18820 }, { "epoch": 61.708196721311474, "grad_norm": 3.1849374771118164, "learning_rate": 6.753718935025863e-06, "loss": 0.2764, "step": 18821 }, { "epoch": 61.71147540983607, "grad_norm": 3.3012702465057373, "learning_rate": 6.752714576296679e-06, "loss": 0.2472, "step": 18822 }, { "epoch": 61.71475409836066, "grad_norm": 2.459031105041504, "learning_rate": 6.751710254184012e-06, "loss": 0.1338, "step": 18823 }, { "epoch": 61.718032786885246, "grad_norm": 3.275407552719116, "learning_rate": 6.750705968699189e-06, "loss": 0.2987, "step": 18824 }, { "epoch": 61.721311475409834, "grad_norm": 3.538182020187378, "learning_rate": 6.749701719853539e-06, "loss": 0.3248, "step": 18825 }, { "epoch": 61.72459016393443, "grad_norm": 2.8343899250030518, "learning_rate": 6.748697507658377e-06, "loss": 0.1926, "step": 18826 }, { "epoch": 61.72786885245902, "grad_norm": 4.048752784729004, "learning_rate": 6.747693332125034e-06, "loss": 0.1977, "step": 18827 }, { "epoch": 61.731147540983606, "grad_norm": 3.2202610969543457, "learning_rate": 6.746689193264827e-06, "loss": 0.0913, "step": 18828 }, { "epoch": 61.734426229508195, "grad_norm": 3.6721532344818115, "learning_rate": 6.745685091089082e-06, "loss": 0.1836, "step": 18829 }, { "epoch": 61.73770491803279, "grad_norm": 3.3913493156433105, "learning_rate": 6.744681025609125e-06, "loss": 0.1327, "step": 18830 }, { "epoch": 61.74098360655738, "grad_norm": 2.719369411468506, "learning_rate": 6.7436769968362726e-06, "loss": 0.2387, "step": 18831 }, { "epoch": 61.74426229508197, "grad_norm": 2.767998695373535, "learning_rate": 6.742673004781849e-06, "loss": 0.2327, "step": 18832 }, { "epoch": 61.747540983606555, "grad_norm": 2.771430015563965, "learning_rate": 6.741669049457169e-06, "loss": 0.0669, "step": 18833 }, { "epoch": 61.75081967213115, "grad_norm": 2.641857147216797, "learning_rate": 6.740665130873563e-06, "loss": 0.2361, "step": 18834 }, { "epoch": 61.75409836065574, "grad_norm": 3.5034420490264893, "learning_rate": 6.739661249042346e-06, "loss": 0.2504, "step": 18835 }, { "epoch": 61.75737704918033, "grad_norm": 1.7047199010849, "learning_rate": 6.7386574039748385e-06, "loss": 0.0983, "step": 18836 }, { "epoch": 61.760655737704916, "grad_norm": 2.5639994144439697, "learning_rate": 6.737653595682356e-06, "loss": 0.0869, "step": 18837 }, { "epoch": 61.76393442622951, "grad_norm": 3.320797920227051, "learning_rate": 6.736649824176225e-06, "loss": 0.285, "step": 18838 }, { "epoch": 61.7672131147541, "grad_norm": 2.859287977218628, "learning_rate": 6.73564608946776e-06, "loss": 0.1357, "step": 18839 }, { "epoch": 61.77049180327869, "grad_norm": 2.747494697570801, "learning_rate": 6.73464239156828e-06, "loss": 0.0678, "step": 18840 }, { "epoch": 61.773770491803276, "grad_norm": 2.531426429748535, "learning_rate": 6.733638730489098e-06, "loss": 0.1285, "step": 18841 }, { "epoch": 61.77704918032787, "grad_norm": 3.4524457454681396, "learning_rate": 6.732635106241539e-06, "loss": 0.1176, "step": 18842 }, { "epoch": 61.78032786885246, "grad_norm": 3.225522041320801, "learning_rate": 6.731631518836916e-06, "loss": 0.2146, "step": 18843 }, { "epoch": 61.78360655737705, "grad_norm": 2.7647361755371094, "learning_rate": 6.730627968286547e-06, "loss": 0.2091, "step": 18844 }, { "epoch": 61.78688524590164, "grad_norm": 4.076035022735596, "learning_rate": 6.7296244546017465e-06, "loss": 0.0869, "step": 18845 }, { "epoch": 61.79016393442623, "grad_norm": 2.3795366287231445, "learning_rate": 6.728620977793827e-06, "loss": 0.2101, "step": 18846 }, { "epoch": 61.79344262295082, "grad_norm": 2.555881977081299, "learning_rate": 6.72761753787411e-06, "loss": 0.2214, "step": 18847 }, { "epoch": 61.79672131147541, "grad_norm": 3.0679733753204346, "learning_rate": 6.726614134853909e-06, "loss": 0.1637, "step": 18848 }, { "epoch": 61.8, "grad_norm": 2.9478652477264404, "learning_rate": 6.725610768744535e-06, "loss": 0.2592, "step": 18849 }, { "epoch": 61.80327868852459, "grad_norm": 3.526177167892456, "learning_rate": 6.724607439557303e-06, "loss": 0.1172, "step": 18850 }, { "epoch": 61.80655737704918, "grad_norm": 2.828512191772461, "learning_rate": 6.723604147303531e-06, "loss": 0.1305, "step": 18851 }, { "epoch": 61.80983606557377, "grad_norm": 2.73298716545105, "learning_rate": 6.7226008919945266e-06, "loss": 0.0848, "step": 18852 }, { "epoch": 61.81311475409836, "grad_norm": 2.9343984127044678, "learning_rate": 6.721597673641607e-06, "loss": 0.1346, "step": 18853 }, { "epoch": 61.81639344262295, "grad_norm": 2.3209803104400635, "learning_rate": 6.720594492256079e-06, "loss": 0.2161, "step": 18854 }, { "epoch": 61.81967213114754, "grad_norm": 3.1515440940856934, "learning_rate": 6.719591347849259e-06, "loss": 0.1489, "step": 18855 }, { "epoch": 61.82295081967213, "grad_norm": 2.5525381565093994, "learning_rate": 6.718588240432459e-06, "loss": 0.0974, "step": 18856 }, { "epoch": 61.82622950819672, "grad_norm": 3.154719591140747, "learning_rate": 6.717585170016988e-06, "loss": 0.3298, "step": 18857 }, { "epoch": 61.829508196721314, "grad_norm": 3.0051615238189697, "learning_rate": 6.716582136614158e-06, "loss": 0.2304, "step": 18858 }, { "epoch": 61.8327868852459, "grad_norm": 2.4570438861846924, "learning_rate": 6.715579140235273e-06, "loss": 0.2594, "step": 18859 }, { "epoch": 61.83606557377049, "grad_norm": 2.8682656288146973, "learning_rate": 6.714576180891653e-06, "loss": 0.2121, "step": 18860 }, { "epoch": 61.83934426229508, "grad_norm": 3.987511157989502, "learning_rate": 6.713573258594603e-06, "loss": 0.2373, "step": 18861 }, { "epoch": 61.842622950819674, "grad_norm": 5.225222587585449, "learning_rate": 6.712570373355432e-06, "loss": 0.1492, "step": 18862 }, { "epoch": 61.84590163934426, "grad_norm": 4.057270050048828, "learning_rate": 6.711567525185442e-06, "loss": 0.1443, "step": 18863 }, { "epoch": 61.84918032786885, "grad_norm": 3.3312604427337646, "learning_rate": 6.710564714095952e-06, "loss": 0.3349, "step": 18864 }, { "epoch": 61.85245901639344, "grad_norm": 2.5554113388061523, "learning_rate": 6.709561940098266e-06, "loss": 0.1454, "step": 18865 }, { "epoch": 61.855737704918035, "grad_norm": 2.7175095081329346, "learning_rate": 6.7085592032036885e-06, "loss": 0.1139, "step": 18866 }, { "epoch": 61.85901639344262, "grad_norm": 3.5688118934631348, "learning_rate": 6.707556503423526e-06, "loss": 0.2752, "step": 18867 }, { "epoch": 61.86229508196721, "grad_norm": 3.1279284954071045, "learning_rate": 6.70655384076909e-06, "loss": 0.2179, "step": 18868 }, { "epoch": 61.86557377049181, "grad_norm": 3.5733861923217773, "learning_rate": 6.705551215251684e-06, "loss": 0.3121, "step": 18869 }, { "epoch": 61.868852459016395, "grad_norm": 3.0434954166412354, "learning_rate": 6.704548626882614e-06, "loss": 0.215, "step": 18870 }, { "epoch": 61.87213114754098, "grad_norm": 2.464855670928955, "learning_rate": 6.703546075673185e-06, "loss": 0.113, "step": 18871 }, { "epoch": 61.87540983606557, "grad_norm": 3.1849780082702637, "learning_rate": 6.702543561634697e-06, "loss": 0.2667, "step": 18872 }, { "epoch": 61.87868852459017, "grad_norm": 2.964832067489624, "learning_rate": 6.701541084778463e-06, "loss": 0.1307, "step": 18873 }, { "epoch": 61.881967213114756, "grad_norm": 3.063326835632324, "learning_rate": 6.700538645115782e-06, "loss": 0.2373, "step": 18874 }, { "epoch": 61.885245901639344, "grad_norm": 3.598330497741699, "learning_rate": 6.6995362426579565e-06, "loss": 0.1774, "step": 18875 }, { "epoch": 61.88852459016393, "grad_norm": 3.937830924987793, "learning_rate": 6.698533877416292e-06, "loss": 0.1485, "step": 18876 }, { "epoch": 61.89180327868853, "grad_norm": 2.707359552383423, "learning_rate": 6.697531549402093e-06, "loss": 0.1275, "step": 18877 }, { "epoch": 61.895081967213116, "grad_norm": 2.6236634254455566, "learning_rate": 6.696529258626658e-06, "loss": 0.2424, "step": 18878 }, { "epoch": 61.898360655737704, "grad_norm": 2.1828010082244873, "learning_rate": 6.69552700510129e-06, "loss": 0.0649, "step": 18879 }, { "epoch": 61.90163934426229, "grad_norm": 3.2250609397888184, "learning_rate": 6.694524788837291e-06, "loss": 0.161, "step": 18880 }, { "epoch": 61.90491803278689, "grad_norm": 3.0846951007843018, "learning_rate": 6.693522609845963e-06, "loss": 0.2604, "step": 18881 }, { "epoch": 61.90819672131148, "grad_norm": 2.841076135635376, "learning_rate": 6.692520468138603e-06, "loss": 0.1407, "step": 18882 }, { "epoch": 61.911475409836065, "grad_norm": 3.29282546043396, "learning_rate": 6.691518363726517e-06, "loss": 0.1228, "step": 18883 }, { "epoch": 61.91475409836065, "grad_norm": 3.1181790828704834, "learning_rate": 6.690516296621e-06, "loss": 0.1458, "step": 18884 }, { "epoch": 61.91803278688525, "grad_norm": 2.419294595718384, "learning_rate": 6.689514266833349e-06, "loss": 0.1611, "step": 18885 }, { "epoch": 61.92131147540984, "grad_norm": 2.6145381927490234, "learning_rate": 6.688512274374871e-06, "loss": 0.1219, "step": 18886 }, { "epoch": 61.924590163934425, "grad_norm": 3.2769112586975098, "learning_rate": 6.68751031925686e-06, "loss": 0.0845, "step": 18887 }, { "epoch": 61.927868852459014, "grad_norm": 4.610646724700928, "learning_rate": 6.686508401490615e-06, "loss": 0.1321, "step": 18888 }, { "epoch": 61.93114754098361, "grad_norm": 3.1422078609466553, "learning_rate": 6.685506521087428e-06, "loss": 0.0975, "step": 18889 }, { "epoch": 61.9344262295082, "grad_norm": 2.8212528228759766, "learning_rate": 6.684504678058606e-06, "loss": 0.2132, "step": 18890 }, { "epoch": 61.937704918032786, "grad_norm": 2.7270925045013428, "learning_rate": 6.683502872415441e-06, "loss": 0.1306, "step": 18891 }, { "epoch": 61.940983606557374, "grad_norm": 2.8678677082061768, "learning_rate": 6.682501104169228e-06, "loss": 0.2514, "step": 18892 }, { "epoch": 61.94426229508197, "grad_norm": 2.700350522994995, "learning_rate": 6.6814993733312615e-06, "loss": 0.1701, "step": 18893 }, { "epoch": 61.94754098360656, "grad_norm": 3.2792325019836426, "learning_rate": 6.680497679912844e-06, "loss": 0.1221, "step": 18894 }, { "epoch": 61.950819672131146, "grad_norm": 3.0453262329101562, "learning_rate": 6.679496023925267e-06, "loss": 0.2286, "step": 18895 }, { "epoch": 61.954098360655735, "grad_norm": 2.7634265422821045, "learning_rate": 6.6784944053798236e-06, "loss": 0.1591, "step": 18896 }, { "epoch": 61.95737704918033, "grad_norm": 3.2025647163391113, "learning_rate": 6.67749282428781e-06, "loss": 0.0982, "step": 18897 }, { "epoch": 61.96065573770492, "grad_norm": 2.757397413253784, "learning_rate": 6.6764912806605155e-06, "loss": 0.1301, "step": 18898 }, { "epoch": 61.96393442622951, "grad_norm": 2.491454601287842, "learning_rate": 6.675489774509241e-06, "loss": 0.2048, "step": 18899 }, { "epoch": 61.967213114754095, "grad_norm": 3.978811264038086, "learning_rate": 6.674488305845276e-06, "loss": 0.1632, "step": 18900 }, { "epoch": 61.97049180327869, "grad_norm": 2.7317450046539307, "learning_rate": 6.673486874679913e-06, "loss": 0.1584, "step": 18901 }, { "epoch": 61.97377049180328, "grad_norm": 2.5819594860076904, "learning_rate": 6.67248548102444e-06, "loss": 0.0983, "step": 18902 }, { "epoch": 61.97704918032787, "grad_norm": 3.493913173675537, "learning_rate": 6.671484124890157e-06, "loss": 0.1919, "step": 18903 }, { "epoch": 61.980327868852456, "grad_norm": 3.468536615371704, "learning_rate": 6.670482806288352e-06, "loss": 0.1962, "step": 18904 }, { "epoch": 61.98360655737705, "grad_norm": 2.4848203659057617, "learning_rate": 6.6694815252303125e-06, "loss": 0.1212, "step": 18905 }, { "epoch": 61.98688524590164, "grad_norm": 3.240398645401001, "learning_rate": 6.668480281727332e-06, "loss": 0.2586, "step": 18906 }, { "epoch": 61.99016393442623, "grad_norm": 2.911311388015747, "learning_rate": 6.6674790757907016e-06, "loss": 0.2585, "step": 18907 }, { "epoch": 61.993442622950816, "grad_norm": 2.961418628692627, "learning_rate": 6.666477907431707e-06, "loss": 0.1661, "step": 18908 }, { "epoch": 61.99672131147541, "grad_norm": 2.753732204437256, "learning_rate": 6.665476776661643e-06, "loss": 0.0952, "step": 18909 }, { "epoch": 62.0, "grad_norm": 2.5521674156188965, "learning_rate": 6.664475683491797e-06, "loss": 0.1663, "step": 18910 }, { "epoch": 62.00327868852459, "grad_norm": 2.1860008239746094, "learning_rate": 6.663474627933449e-06, "loss": 0.131, "step": 18911 }, { "epoch": 62.006557377049184, "grad_norm": 3.326800584793091, "learning_rate": 6.662473609997899e-06, "loss": 0.0881, "step": 18912 }, { "epoch": 62.00983606557377, "grad_norm": 2.809649705886841, "learning_rate": 6.661472629696429e-06, "loss": 0.2605, "step": 18913 }, { "epoch": 62.01311475409836, "grad_norm": 3.300687551498413, "learning_rate": 6.660471687040327e-06, "loss": 0.1412, "step": 18914 }, { "epoch": 62.01639344262295, "grad_norm": 3.4410955905914307, "learning_rate": 6.659470782040873e-06, "loss": 0.1681, "step": 18915 }, { "epoch": 62.019672131147544, "grad_norm": 2.95929217338562, "learning_rate": 6.658469914709365e-06, "loss": 0.1214, "step": 18916 }, { "epoch": 62.02295081967213, "grad_norm": 2.8921589851379395, "learning_rate": 6.657469085057083e-06, "loss": 0.1444, "step": 18917 }, { "epoch": 62.02622950819672, "grad_norm": 2.82053804397583, "learning_rate": 6.656468293095313e-06, "loss": 0.1788, "step": 18918 }, { "epoch": 62.02950819672131, "grad_norm": 2.7031097412109375, "learning_rate": 6.655467538835334e-06, "loss": 0.1984, "step": 18919 }, { "epoch": 62.032786885245905, "grad_norm": 3.0025522708892822, "learning_rate": 6.654466822288439e-06, "loss": 0.1164, "step": 18920 }, { "epoch": 62.03606557377049, "grad_norm": 2.9962611198425293, "learning_rate": 6.653466143465911e-06, "loss": 0.0912, "step": 18921 }, { "epoch": 62.03934426229508, "grad_norm": 3.177544355392456, "learning_rate": 6.652465502379031e-06, "loss": 0.1412, "step": 18922 }, { "epoch": 62.04262295081967, "grad_norm": 2.519726514816284, "learning_rate": 6.651464899039084e-06, "loss": 0.1493, "step": 18923 }, { "epoch": 62.045901639344265, "grad_norm": 3.2688825130462646, "learning_rate": 6.650464333457347e-06, "loss": 0.161, "step": 18924 }, { "epoch": 62.049180327868854, "grad_norm": 3.327749729156494, "learning_rate": 6.649463805645111e-06, "loss": 0.1867, "step": 18925 }, { "epoch": 62.05245901639344, "grad_norm": 3.1212213039398193, "learning_rate": 6.648463315613653e-06, "loss": 0.1535, "step": 18926 }, { "epoch": 62.05573770491803, "grad_norm": 3.0581624507904053, "learning_rate": 6.647462863374259e-06, "loss": 0.1243, "step": 18927 }, { "epoch": 62.059016393442626, "grad_norm": 3.2112767696380615, "learning_rate": 6.646462448938202e-06, "loss": 0.3047, "step": 18928 }, { "epoch": 62.062295081967214, "grad_norm": 3.034691095352173, "learning_rate": 6.645462072316771e-06, "loss": 0.1239, "step": 18929 }, { "epoch": 62.0655737704918, "grad_norm": 3.492888927459717, "learning_rate": 6.6444617335212426e-06, "loss": 0.1023, "step": 18930 }, { "epoch": 62.06885245901639, "grad_norm": 2.7087831497192383, "learning_rate": 6.643461432562894e-06, "loss": 0.1039, "step": 18931 }, { "epoch": 62.072131147540986, "grad_norm": 2.639620542526245, "learning_rate": 6.64246116945301e-06, "loss": 0.2021, "step": 18932 }, { "epoch": 62.075409836065575, "grad_norm": 3.913109302520752, "learning_rate": 6.641460944202869e-06, "loss": 0.3765, "step": 18933 }, { "epoch": 62.07868852459016, "grad_norm": 2.315786838531494, "learning_rate": 6.640460756823745e-06, "loss": 0.1228, "step": 18934 }, { "epoch": 62.08196721311475, "grad_norm": 3.0159263610839844, "learning_rate": 6.639460607326922e-06, "loss": 0.1138, "step": 18935 }, { "epoch": 62.08524590163935, "grad_norm": 2.6576120853424072, "learning_rate": 6.638460495723673e-06, "loss": 0.1296, "step": 18936 }, { "epoch": 62.088524590163935, "grad_norm": 3.8112714290618896, "learning_rate": 6.637460422025275e-06, "loss": 0.1991, "step": 18937 }, { "epoch": 62.09180327868852, "grad_norm": 2.6568267345428467, "learning_rate": 6.636460386243009e-06, "loss": 0.2145, "step": 18938 }, { "epoch": 62.09508196721311, "grad_norm": 3.492492914199829, "learning_rate": 6.6354603883881504e-06, "loss": 0.1607, "step": 18939 }, { "epoch": 62.09836065573771, "grad_norm": 3.1274969577789307, "learning_rate": 6.634460428471975e-06, "loss": 0.1934, "step": 18940 }, { "epoch": 62.101639344262296, "grad_norm": 3.2115931510925293, "learning_rate": 6.633460506505752e-06, "loss": 0.3433, "step": 18941 }, { "epoch": 62.104918032786884, "grad_norm": 2.289686441421509, "learning_rate": 6.632460622500767e-06, "loss": 0.077, "step": 18942 }, { "epoch": 62.10819672131147, "grad_norm": 2.5765063762664795, "learning_rate": 6.631460776468291e-06, "loss": 0.2693, "step": 18943 }, { "epoch": 62.11147540983607, "grad_norm": 2.4816160202026367, "learning_rate": 6.630460968419594e-06, "loss": 0.1525, "step": 18944 }, { "epoch": 62.114754098360656, "grad_norm": 2.204737424850464, "learning_rate": 6.629461198365952e-06, "loss": 0.0564, "step": 18945 }, { "epoch": 62.118032786885244, "grad_norm": 3.1872482299804688, "learning_rate": 6.628461466318641e-06, "loss": 0.2532, "step": 18946 }, { "epoch": 62.12131147540983, "grad_norm": 2.9091222286224365, "learning_rate": 6.627461772288934e-06, "loss": 0.1992, "step": 18947 }, { "epoch": 62.12459016393443, "grad_norm": 3.0794522762298584, "learning_rate": 6.6264621162881015e-06, "loss": 0.1517, "step": 18948 }, { "epoch": 62.12786885245902, "grad_norm": 2.9693784713745117, "learning_rate": 6.625462498327418e-06, "loss": 0.1876, "step": 18949 }, { "epoch": 62.131147540983605, "grad_norm": 1.8302556276321411, "learning_rate": 6.624462918418147e-06, "loss": 0.0885, "step": 18950 }, { "epoch": 62.13442622950819, "grad_norm": 2.352848768234253, "learning_rate": 6.623463376571571e-06, "loss": 0.1167, "step": 18951 }, { "epoch": 62.13770491803279, "grad_norm": 2.440845251083374, "learning_rate": 6.622463872798956e-06, "loss": 0.0752, "step": 18952 }, { "epoch": 62.14098360655738, "grad_norm": 2.5281455516815186, "learning_rate": 6.621464407111574e-06, "loss": 0.1027, "step": 18953 }, { "epoch": 62.144262295081965, "grad_norm": 2.6543128490448, "learning_rate": 6.620464979520689e-06, "loss": 0.237, "step": 18954 }, { "epoch": 62.14754098360656, "grad_norm": 6.356672763824463, "learning_rate": 6.619465590037579e-06, "loss": 0.2558, "step": 18955 }, { "epoch": 62.15081967213115, "grad_norm": 2.755221128463745, "learning_rate": 6.618466238673509e-06, "loss": 0.2712, "step": 18956 }, { "epoch": 62.15409836065574, "grad_norm": 2.9034149646759033, "learning_rate": 6.617466925439746e-06, "loss": 0.3102, "step": 18957 }, { "epoch": 62.157377049180326, "grad_norm": 3.44541597366333, "learning_rate": 6.61646765034756e-06, "loss": 0.1158, "step": 18958 }, { "epoch": 62.16065573770492, "grad_norm": 3.565429449081421, "learning_rate": 6.6154684134082226e-06, "loss": 0.1391, "step": 18959 }, { "epoch": 62.16393442622951, "grad_norm": 2.9626035690307617, "learning_rate": 6.614469214632997e-06, "loss": 0.1536, "step": 18960 }, { "epoch": 62.1672131147541, "grad_norm": 3.197737216949463, "learning_rate": 6.613470054033149e-06, "loss": 0.243, "step": 18961 }, { "epoch": 62.170491803278686, "grad_norm": 2.6085171699523926, "learning_rate": 6.61247093161995e-06, "loss": 0.1514, "step": 18962 }, { "epoch": 62.17377049180328, "grad_norm": 3.1286964416503906, "learning_rate": 6.61147184740466e-06, "loss": 0.2011, "step": 18963 }, { "epoch": 62.17704918032787, "grad_norm": 3.2555270195007324, "learning_rate": 6.61047280139855e-06, "loss": 0.0915, "step": 18964 }, { "epoch": 62.18032786885246, "grad_norm": 3.6565346717834473, "learning_rate": 6.6094737936128835e-06, "loss": 0.2786, "step": 18965 }, { "epoch": 62.18360655737705, "grad_norm": 2.8066442012786865, "learning_rate": 6.608474824058927e-06, "loss": 0.1498, "step": 18966 }, { "epoch": 62.18688524590164, "grad_norm": 3.2024853229522705, "learning_rate": 6.607475892747938e-06, "loss": 0.1435, "step": 18967 }, { "epoch": 62.19016393442623, "grad_norm": 2.896998167037964, "learning_rate": 6.606476999691189e-06, "loss": 0.2285, "step": 18968 }, { "epoch": 62.19344262295082, "grad_norm": 3.41806697845459, "learning_rate": 6.605478144899942e-06, "loss": 0.2938, "step": 18969 }, { "epoch": 62.19672131147541, "grad_norm": 2.5389480590820312, "learning_rate": 6.604479328385458e-06, "loss": 0.0447, "step": 18970 }, { "epoch": 62.2, "grad_norm": 2.610415458679199, "learning_rate": 6.603480550158995e-06, "loss": 0.1103, "step": 18971 }, { "epoch": 62.20327868852459, "grad_norm": 3.7217512130737305, "learning_rate": 6.602481810231824e-06, "loss": 0.3015, "step": 18972 }, { "epoch": 62.20655737704918, "grad_norm": 2.2623350620269775, "learning_rate": 6.6014831086152055e-06, "loss": 0.0781, "step": 18973 }, { "epoch": 62.20983606557377, "grad_norm": 2.379270553588867, "learning_rate": 6.600484445320396e-06, "loss": 0.0838, "step": 18974 }, { "epoch": 62.21311475409836, "grad_norm": 3.3105287551879883, "learning_rate": 6.59948582035866e-06, "loss": 0.0786, "step": 18975 }, { "epoch": 62.21639344262295, "grad_norm": 2.3900222778320312, "learning_rate": 6.598487233741253e-06, "loss": 0.1467, "step": 18976 }, { "epoch": 62.21967213114754, "grad_norm": 2.6255972385406494, "learning_rate": 6.597488685479443e-06, "loss": 0.0765, "step": 18977 }, { "epoch": 62.22295081967213, "grad_norm": 2.3786063194274902, "learning_rate": 6.596490175584486e-06, "loss": 0.0837, "step": 18978 }, { "epoch": 62.226229508196724, "grad_norm": 3.0093042850494385, "learning_rate": 6.59549170406764e-06, "loss": 0.1164, "step": 18979 }, { "epoch": 62.22950819672131, "grad_norm": 3.9288415908813477, "learning_rate": 6.594493270940162e-06, "loss": 0.2803, "step": 18980 }, { "epoch": 62.2327868852459, "grad_norm": 2.985138416290283, "learning_rate": 6.593494876213318e-06, "loss": 0.1683, "step": 18981 }, { "epoch": 62.23606557377049, "grad_norm": 2.617064952850342, "learning_rate": 6.5924965198983595e-06, "loss": 0.125, "step": 18982 }, { "epoch": 62.239344262295084, "grad_norm": 2.906019687652588, "learning_rate": 6.591498202006545e-06, "loss": 0.0937, "step": 18983 }, { "epoch": 62.24262295081967, "grad_norm": 2.898977279663086, "learning_rate": 6.590499922549131e-06, "loss": 0.1669, "step": 18984 }, { "epoch": 62.24590163934426, "grad_norm": 2.720005512237549, "learning_rate": 6.589501681537378e-06, "loss": 0.2176, "step": 18985 }, { "epoch": 62.24918032786885, "grad_norm": 3.5760269165039062, "learning_rate": 6.5885034789825385e-06, "loss": 0.1763, "step": 18986 }, { "epoch": 62.252459016393445, "grad_norm": 3.316567897796631, "learning_rate": 6.587505314895868e-06, "loss": 0.144, "step": 18987 }, { "epoch": 62.25573770491803, "grad_norm": 3.8369367122650146, "learning_rate": 6.5865071892886266e-06, "loss": 0.3435, "step": 18988 }, { "epoch": 62.25901639344262, "grad_norm": 2.4853827953338623, "learning_rate": 6.585509102172062e-06, "loss": 0.0905, "step": 18989 }, { "epoch": 62.26229508196721, "grad_norm": 2.894462823867798, "learning_rate": 6.584511053557433e-06, "loss": 0.063, "step": 18990 }, { "epoch": 62.265573770491805, "grad_norm": 2.1995022296905518, "learning_rate": 6.583513043455996e-06, "loss": 0.0742, "step": 18991 }, { "epoch": 62.268852459016394, "grad_norm": 3.308260440826416, "learning_rate": 6.582515071879e-06, "loss": 0.1161, "step": 18992 }, { "epoch": 62.27213114754098, "grad_norm": 3.574995279312134, "learning_rate": 6.581517138837697e-06, "loss": 0.1217, "step": 18993 }, { "epoch": 62.27540983606557, "grad_norm": 3.0611400604248047, "learning_rate": 6.580519244343346e-06, "loss": 0.4438, "step": 18994 }, { "epoch": 62.278688524590166, "grad_norm": 3.008396863937378, "learning_rate": 6.579521388407196e-06, "loss": 0.2288, "step": 18995 }, { "epoch": 62.281967213114754, "grad_norm": 3.931272268295288, "learning_rate": 6.578523571040499e-06, "loss": 0.2847, "step": 18996 }, { "epoch": 62.28524590163934, "grad_norm": 2.5326502323150635, "learning_rate": 6.5775257922545026e-06, "loss": 0.1781, "step": 18997 }, { "epoch": 62.28852459016394, "grad_norm": 2.487267017364502, "learning_rate": 6.576528052060465e-06, "loss": 0.0637, "step": 18998 }, { "epoch": 62.291803278688526, "grad_norm": 2.3239071369171143, "learning_rate": 6.5755303504696325e-06, "loss": 0.1229, "step": 18999 }, { "epoch": 62.295081967213115, "grad_norm": 2.9015557765960693, "learning_rate": 6.574532687493256e-06, "loss": 0.1413, "step": 19000 }, { "epoch": 62.2983606557377, "grad_norm": 2.670037031173706, "learning_rate": 6.573535063142587e-06, "loss": 0.1473, "step": 19001 }, { "epoch": 62.3016393442623, "grad_norm": 3.3433420658111572, "learning_rate": 6.572537477428866e-06, "loss": 0.1593, "step": 19002 }, { "epoch": 62.30491803278689, "grad_norm": 2.4732139110565186, "learning_rate": 6.571539930363353e-06, "loss": 0.3095, "step": 19003 }, { "epoch": 62.308196721311475, "grad_norm": 2.9870195388793945, "learning_rate": 6.570542421957293e-06, "loss": 0.2251, "step": 19004 }, { "epoch": 62.31147540983606, "grad_norm": 2.82470440864563, "learning_rate": 6.569544952221934e-06, "loss": 0.1855, "step": 19005 }, { "epoch": 62.31475409836066, "grad_norm": 3.162712574005127, "learning_rate": 6.568547521168515e-06, "loss": 0.2235, "step": 19006 }, { "epoch": 62.31803278688525, "grad_norm": 2.9655678272247314, "learning_rate": 6.567550128808298e-06, "loss": 0.1361, "step": 19007 }, { "epoch": 62.321311475409836, "grad_norm": 3.6327903270721436, "learning_rate": 6.566552775152521e-06, "loss": 0.1096, "step": 19008 }, { "epoch": 62.324590163934424, "grad_norm": 3.2827348709106445, "learning_rate": 6.565555460212432e-06, "loss": 0.1595, "step": 19009 }, { "epoch": 62.32786885245902, "grad_norm": 3.556122303009033, "learning_rate": 6.5645581839992704e-06, "loss": 0.2153, "step": 19010 }, { "epoch": 62.33114754098361, "grad_norm": 3.0395500659942627, "learning_rate": 6.563560946524292e-06, "loss": 0.1557, "step": 19011 }, { "epoch": 62.334426229508196, "grad_norm": 2.467895746231079, "learning_rate": 6.562563747798737e-06, "loss": 0.1767, "step": 19012 }, { "epoch": 62.337704918032784, "grad_norm": 2.037693500518799, "learning_rate": 6.561566587833847e-06, "loss": 0.0544, "step": 19013 }, { "epoch": 62.34098360655738, "grad_norm": 3.255341053009033, "learning_rate": 6.560569466640872e-06, "loss": 0.1793, "step": 19014 }, { "epoch": 62.34426229508197, "grad_norm": 3.287095785140991, "learning_rate": 6.55957238423105e-06, "loss": 0.1735, "step": 19015 }, { "epoch": 62.34754098360656, "grad_norm": 2.4536871910095215, "learning_rate": 6.558575340615627e-06, "loss": 0.3, "step": 19016 }, { "epoch": 62.350819672131145, "grad_norm": 2.8290321826934814, "learning_rate": 6.557578335805847e-06, "loss": 0.0964, "step": 19017 }, { "epoch": 62.35409836065574, "grad_norm": 3.382962465286255, "learning_rate": 6.556581369812951e-06, "loss": 0.222, "step": 19018 }, { "epoch": 62.35737704918033, "grad_norm": 2.6356945037841797, "learning_rate": 6.5555844426481755e-06, "loss": 0.2036, "step": 19019 }, { "epoch": 62.36065573770492, "grad_norm": 3.0358288288116455, "learning_rate": 6.55458755432277e-06, "loss": 0.1402, "step": 19020 }, { "epoch": 62.363934426229505, "grad_norm": 3.9414303302764893, "learning_rate": 6.553590704847973e-06, "loss": 0.1982, "step": 19021 }, { "epoch": 62.3672131147541, "grad_norm": 3.020134925842285, "learning_rate": 6.552593894235024e-06, "loss": 0.099, "step": 19022 }, { "epoch": 62.37049180327869, "grad_norm": 2.668778419494629, "learning_rate": 6.551597122495159e-06, "loss": 0.2391, "step": 19023 }, { "epoch": 62.37377049180328, "grad_norm": 2.575413227081299, "learning_rate": 6.550600389639627e-06, "loss": 0.2007, "step": 19024 }, { "epoch": 62.377049180327866, "grad_norm": 3.796326160430908, "learning_rate": 6.54960369567966e-06, "loss": 0.2787, "step": 19025 }, { "epoch": 62.38032786885246, "grad_norm": 2.439763307571411, "learning_rate": 6.5486070406265016e-06, "loss": 0.0776, "step": 19026 }, { "epoch": 62.38360655737705, "grad_norm": 2.314918279647827, "learning_rate": 6.547610424491385e-06, "loss": 0.2378, "step": 19027 }, { "epoch": 62.38688524590164, "grad_norm": 3.5641462802886963, "learning_rate": 6.546613847285548e-06, "loss": 0.2917, "step": 19028 }, { "epoch": 62.390163934426226, "grad_norm": 3.2639029026031494, "learning_rate": 6.5456173090202336e-06, "loss": 0.1765, "step": 19029 }, { "epoch": 62.39344262295082, "grad_norm": 3.789095878601074, "learning_rate": 6.544620809706677e-06, "loss": 0.193, "step": 19030 }, { "epoch": 62.39672131147541, "grad_norm": 3.4752204418182373, "learning_rate": 6.543624349356112e-06, "loss": 0.2422, "step": 19031 }, { "epoch": 62.4, "grad_norm": 3.077324867248535, "learning_rate": 6.542627927979772e-06, "loss": 0.1562, "step": 19032 }, { "epoch": 62.40327868852459, "grad_norm": 3.1597137451171875, "learning_rate": 6.541631545588901e-06, "loss": 0.1408, "step": 19033 }, { "epoch": 62.40655737704918, "grad_norm": 2.6616928577423096, "learning_rate": 6.540635202194731e-06, "loss": 0.185, "step": 19034 }, { "epoch": 62.40983606557377, "grad_norm": 3.10188364982605, "learning_rate": 6.5396388978084955e-06, "loss": 0.1974, "step": 19035 }, { "epoch": 62.41311475409836, "grad_norm": 2.666203260421753, "learning_rate": 6.538642632441425e-06, "loss": 0.1521, "step": 19036 }, { "epoch": 62.41639344262295, "grad_norm": 3.065488815307617, "learning_rate": 6.537646406104763e-06, "loss": 0.1173, "step": 19037 }, { "epoch": 62.41967213114754, "grad_norm": 2.7983734607696533, "learning_rate": 6.536650218809737e-06, "loss": 0.3345, "step": 19038 }, { "epoch": 62.42295081967213, "grad_norm": 2.7222490310668945, "learning_rate": 6.535654070567578e-06, "loss": 0.1821, "step": 19039 }, { "epoch": 62.42622950819672, "grad_norm": 2.6837055683135986, "learning_rate": 6.534657961389523e-06, "loss": 0.141, "step": 19040 }, { "epoch": 62.429508196721315, "grad_norm": 2.6011743545532227, "learning_rate": 6.5336618912868025e-06, "loss": 0.1322, "step": 19041 }, { "epoch": 62.4327868852459, "grad_norm": 3.397714614868164, "learning_rate": 6.532665860270649e-06, "loss": 0.2825, "step": 19042 }, { "epoch": 62.43606557377049, "grad_norm": 2.8896021842956543, "learning_rate": 6.53166986835229e-06, "loss": 0.1761, "step": 19043 }, { "epoch": 62.43934426229508, "grad_norm": 2.891366481781006, "learning_rate": 6.5306739155429646e-06, "loss": 0.1728, "step": 19044 }, { "epoch": 62.442622950819676, "grad_norm": 3.4567818641662598, "learning_rate": 6.529678001853893e-06, "loss": 0.2179, "step": 19045 }, { "epoch": 62.445901639344264, "grad_norm": 2.3439855575561523, "learning_rate": 6.528682127296311e-06, "loss": 0.0954, "step": 19046 }, { "epoch": 62.44918032786885, "grad_norm": 2.656754732131958, "learning_rate": 6.52768629188145e-06, "loss": 0.109, "step": 19047 }, { "epoch": 62.45245901639344, "grad_norm": 2.7729477882385254, "learning_rate": 6.526690495620536e-06, "loss": 0.1261, "step": 19048 }, { "epoch": 62.455737704918036, "grad_norm": 2.478849172592163, "learning_rate": 6.525694738524794e-06, "loss": 0.1401, "step": 19049 }, { "epoch": 62.459016393442624, "grad_norm": 3.650240421295166, "learning_rate": 6.52469902060546e-06, "loss": 0.311, "step": 19050 }, { "epoch": 62.46229508196721, "grad_norm": 2.1508257389068604, "learning_rate": 6.523703341873757e-06, "loss": 0.1517, "step": 19051 }, { "epoch": 62.4655737704918, "grad_norm": 3.663571357727051, "learning_rate": 6.522707702340916e-06, "loss": 0.1449, "step": 19052 }, { "epoch": 62.4688524590164, "grad_norm": 2.833279609680176, "learning_rate": 6.5217121020181585e-06, "loss": 0.0996, "step": 19053 }, { "epoch": 62.472131147540985, "grad_norm": 2.864567995071411, "learning_rate": 6.520716540916709e-06, "loss": 0.2209, "step": 19054 }, { "epoch": 62.47540983606557, "grad_norm": 2.883647918701172, "learning_rate": 6.5197210190478046e-06, "loss": 0.2088, "step": 19055 }, { "epoch": 62.47868852459016, "grad_norm": 2.4530673027038574, "learning_rate": 6.518725536422663e-06, "loss": 0.1784, "step": 19056 }, { "epoch": 62.48196721311476, "grad_norm": 3.5424954891204834, "learning_rate": 6.517730093052511e-06, "loss": 0.2629, "step": 19057 }, { "epoch": 62.485245901639345, "grad_norm": 2.30552339553833, "learning_rate": 6.51673468894857e-06, "loss": 0.0974, "step": 19058 }, { "epoch": 62.488524590163934, "grad_norm": 2.938239574432373, "learning_rate": 6.515739324122068e-06, "loss": 0.1646, "step": 19059 }, { "epoch": 62.49180327868852, "grad_norm": 2.7555949687957764, "learning_rate": 6.51474399858423e-06, "loss": 0.2921, "step": 19060 }, { "epoch": 62.49508196721312, "grad_norm": 2.967909574508667, "learning_rate": 6.513748712346277e-06, "loss": 0.1295, "step": 19061 }, { "epoch": 62.498360655737706, "grad_norm": 2.779381513595581, "learning_rate": 6.512753465419428e-06, "loss": 0.1644, "step": 19062 }, { "epoch": 62.501639344262294, "grad_norm": 3.2679221630096436, "learning_rate": 6.511758257814913e-06, "loss": 0.1403, "step": 19063 }, { "epoch": 62.50491803278688, "grad_norm": 2.2599079608917236, "learning_rate": 6.510763089543951e-06, "loss": 0.0942, "step": 19064 }, { "epoch": 62.50819672131148, "grad_norm": 2.1773176193237305, "learning_rate": 6.509767960617763e-06, "loss": 0.186, "step": 19065 }, { "epoch": 62.511475409836066, "grad_norm": 2.494758367538452, "learning_rate": 6.508772871047568e-06, "loss": 0.1279, "step": 19066 }, { "epoch": 62.514754098360655, "grad_norm": 2.0107474327087402, "learning_rate": 6.507777820844589e-06, "loss": 0.1832, "step": 19067 }, { "epoch": 62.51803278688524, "grad_norm": 2.943741798400879, "learning_rate": 6.506782810020047e-06, "loss": 0.0854, "step": 19068 }, { "epoch": 62.52131147540984, "grad_norm": 2.8130435943603516, "learning_rate": 6.505787838585161e-06, "loss": 0.1806, "step": 19069 }, { "epoch": 62.52459016393443, "grad_norm": 2.6565780639648438, "learning_rate": 6.50479290655115e-06, "loss": 0.1334, "step": 19070 }, { "epoch": 62.527868852459015, "grad_norm": 2.9080915451049805, "learning_rate": 6.503798013929232e-06, "loss": 0.1838, "step": 19071 }, { "epoch": 62.5311475409836, "grad_norm": 2.3245227336883545, "learning_rate": 6.502803160730626e-06, "loss": 0.1786, "step": 19072 }, { "epoch": 62.5344262295082, "grad_norm": 3.3383419513702393, "learning_rate": 6.501808346966554e-06, "loss": 0.2364, "step": 19073 }, { "epoch": 62.53770491803279, "grad_norm": 2.0735280513763428, "learning_rate": 6.50081357264823e-06, "loss": 0.0441, "step": 19074 }, { "epoch": 62.540983606557376, "grad_norm": 2.959829330444336, "learning_rate": 6.499818837786866e-06, "loss": 0.2694, "step": 19075 }, { "epoch": 62.544262295081964, "grad_norm": 2.1985135078430176, "learning_rate": 6.498824142393688e-06, "loss": 0.0527, "step": 19076 }, { "epoch": 62.54754098360656, "grad_norm": 2.917588472366333, "learning_rate": 6.497829486479909e-06, "loss": 0.2067, "step": 19077 }, { "epoch": 62.55081967213115, "grad_norm": 3.4469964504241943, "learning_rate": 6.496834870056743e-06, "loss": 0.2006, "step": 19078 }, { "epoch": 62.554098360655736, "grad_norm": 3.2734134197235107, "learning_rate": 6.495840293135407e-06, "loss": 0.3189, "step": 19079 }, { "epoch": 62.557377049180324, "grad_norm": 2.9893085956573486, "learning_rate": 6.49484575572711e-06, "loss": 0.2968, "step": 19080 }, { "epoch": 62.56065573770492, "grad_norm": 2.6199796199798584, "learning_rate": 6.493851257843076e-06, "loss": 0.1667, "step": 19081 }, { "epoch": 62.56393442622951, "grad_norm": 3.0002858638763428, "learning_rate": 6.492856799494515e-06, "loss": 0.0995, "step": 19082 }, { "epoch": 62.5672131147541, "grad_norm": 3.007533550262451, "learning_rate": 6.491862380692639e-06, "loss": 0.1976, "step": 19083 }, { "epoch": 62.570491803278685, "grad_norm": 3.0414319038391113, "learning_rate": 6.49086800144866e-06, "loss": 0.1578, "step": 19084 }, { "epoch": 62.57377049180328, "grad_norm": 3.252173662185669, "learning_rate": 6.4898736617737955e-06, "loss": 0.1435, "step": 19085 }, { "epoch": 62.57704918032787, "grad_norm": 2.8384478092193604, "learning_rate": 6.488879361679257e-06, "loss": 0.1636, "step": 19086 }, { "epoch": 62.58032786885246, "grad_norm": 3.5955939292907715, "learning_rate": 6.4878851011762525e-06, "loss": 0.2571, "step": 19087 }, { "epoch": 62.58360655737705, "grad_norm": 2.9006388187408447, "learning_rate": 6.486890880275991e-06, "loss": 0.1068, "step": 19088 }, { "epoch": 62.58688524590164, "grad_norm": 3.729673385620117, "learning_rate": 6.4858966989896925e-06, "loss": 0.1835, "step": 19089 }, { "epoch": 62.59016393442623, "grad_norm": 2.3715226650238037, "learning_rate": 6.484902557328562e-06, "loss": 0.1745, "step": 19090 }, { "epoch": 62.59344262295082, "grad_norm": 2.813908338546753, "learning_rate": 6.483908455303809e-06, "loss": 0.0951, "step": 19091 }, { "epoch": 62.59672131147541, "grad_norm": 2.8812615871429443, "learning_rate": 6.482914392926644e-06, "loss": 0.1203, "step": 19092 }, { "epoch": 62.6, "grad_norm": 2.477752685546875, "learning_rate": 6.481920370208274e-06, "loss": 0.0514, "step": 19093 }, { "epoch": 62.60327868852459, "grad_norm": 2.9855527877807617, "learning_rate": 6.480926387159914e-06, "loss": 0.1514, "step": 19094 }, { "epoch": 62.60655737704918, "grad_norm": 2.7307915687561035, "learning_rate": 6.479932443792765e-06, "loss": 0.1957, "step": 19095 }, { "epoch": 62.609836065573774, "grad_norm": 2.3027262687683105, "learning_rate": 6.478938540118039e-06, "loss": 0.117, "step": 19096 }, { "epoch": 62.61311475409836, "grad_norm": 2.9006173610687256, "learning_rate": 6.477944676146939e-06, "loss": 0.1605, "step": 19097 }, { "epoch": 62.61639344262295, "grad_norm": 3.015761375427246, "learning_rate": 6.476950851890677e-06, "loss": 0.1062, "step": 19098 }, { "epoch": 62.61967213114754, "grad_norm": 2.341855764389038, "learning_rate": 6.475957067360458e-06, "loss": 0.0755, "step": 19099 }, { "epoch": 62.622950819672134, "grad_norm": 10.186031341552734, "learning_rate": 6.474963322567487e-06, "loss": 0.2611, "step": 19100 }, { "epoch": 62.62622950819672, "grad_norm": 5.979412078857422, "learning_rate": 6.473969617522966e-06, "loss": 0.1903, "step": 19101 }, { "epoch": 62.62950819672131, "grad_norm": 2.710921287536621, "learning_rate": 6.4729759522381074e-06, "loss": 0.2159, "step": 19102 }, { "epoch": 62.6327868852459, "grad_norm": 2.837444543838501, "learning_rate": 6.471982326724113e-06, "loss": 0.1108, "step": 19103 }, { "epoch": 62.636065573770495, "grad_norm": 2.3230016231536865, "learning_rate": 6.470988740992185e-06, "loss": 0.203, "step": 19104 }, { "epoch": 62.63934426229508, "grad_norm": 3.2106971740722656, "learning_rate": 6.469995195053529e-06, "loss": 0.1907, "step": 19105 }, { "epoch": 62.64262295081967, "grad_norm": 3.727283239364624, "learning_rate": 6.469001688919343e-06, "loss": 0.1472, "step": 19106 }, { "epoch": 62.64590163934426, "grad_norm": 3.4677422046661377, "learning_rate": 6.468008222600839e-06, "loss": 0.1838, "step": 19107 }, { "epoch": 62.649180327868855, "grad_norm": 3.138373851776123, "learning_rate": 6.467014796109215e-06, "loss": 0.1809, "step": 19108 }, { "epoch": 62.65245901639344, "grad_norm": 2.6043145656585693, "learning_rate": 6.466021409455672e-06, "loss": 0.2577, "step": 19109 }, { "epoch": 62.65573770491803, "grad_norm": 3.0062055587768555, "learning_rate": 6.465028062651406e-06, "loss": 0.2496, "step": 19110 }, { "epoch": 62.65901639344262, "grad_norm": 3.1861093044281006, "learning_rate": 6.464034755707631e-06, "loss": 0.321, "step": 19111 }, { "epoch": 62.662295081967216, "grad_norm": 3.3387110233306885, "learning_rate": 6.463041488635539e-06, "loss": 0.1635, "step": 19112 }, { "epoch": 62.665573770491804, "grad_norm": 3.2093138694763184, "learning_rate": 6.462048261446332e-06, "loss": 0.248, "step": 19113 }, { "epoch": 62.66885245901639, "grad_norm": 3.1364922523498535, "learning_rate": 6.461055074151206e-06, "loss": 0.1532, "step": 19114 }, { "epoch": 62.67213114754098, "grad_norm": 4.326088905334473, "learning_rate": 6.460061926761367e-06, "loss": 0.1519, "step": 19115 }, { "epoch": 62.675409836065576, "grad_norm": 1.9651687145233154, "learning_rate": 6.45906881928801e-06, "loss": 0.1307, "step": 19116 }, { "epoch": 62.678688524590164, "grad_norm": 2.7846152782440186, "learning_rate": 6.4580757517423345e-06, "loss": 0.1891, "step": 19117 }, { "epoch": 62.68196721311475, "grad_norm": 2.9956321716308594, "learning_rate": 6.457082724135535e-06, "loss": 0.216, "step": 19118 }, { "epoch": 62.68524590163934, "grad_norm": 2.604539632797241, "learning_rate": 6.456089736478812e-06, "loss": 0.0713, "step": 19119 }, { "epoch": 62.68852459016394, "grad_norm": 2.4207544326782227, "learning_rate": 6.455096788783364e-06, "loss": 0.0748, "step": 19120 }, { "epoch": 62.691803278688525, "grad_norm": 3.5830681324005127, "learning_rate": 6.454103881060383e-06, "loss": 0.1234, "step": 19121 }, { "epoch": 62.69508196721311, "grad_norm": 2.276381254196167, "learning_rate": 6.45311101332107e-06, "loss": 0.1755, "step": 19122 }, { "epoch": 62.6983606557377, "grad_norm": 4.115350246429443, "learning_rate": 6.452118185576616e-06, "loss": 0.2473, "step": 19123 }, { "epoch": 62.7016393442623, "grad_norm": 2.7753872871398926, "learning_rate": 6.45112539783822e-06, "loss": 0.2416, "step": 19124 }, { "epoch": 62.704918032786885, "grad_norm": 3.0568759441375732, "learning_rate": 6.4501326501170734e-06, "loss": 0.2024, "step": 19125 }, { "epoch": 62.708196721311474, "grad_norm": 2.838881492614746, "learning_rate": 6.449139942424374e-06, "loss": 0.2273, "step": 19126 }, { "epoch": 62.71147540983607, "grad_norm": 3.1452982425689697, "learning_rate": 6.448147274771313e-06, "loss": 0.1863, "step": 19127 }, { "epoch": 62.71475409836066, "grad_norm": 2.8307063579559326, "learning_rate": 6.447154647169082e-06, "loss": 0.2375, "step": 19128 }, { "epoch": 62.718032786885246, "grad_norm": 2.9205527305603027, "learning_rate": 6.4461620596288795e-06, "loss": 0.1259, "step": 19129 }, { "epoch": 62.721311475409834, "grad_norm": 3.5128073692321777, "learning_rate": 6.445169512161895e-06, "loss": 0.0977, "step": 19130 }, { "epoch": 62.72459016393443, "grad_norm": 3.1431000232696533, "learning_rate": 6.44417700477932e-06, "loss": 0.2734, "step": 19131 }, { "epoch": 62.72786885245902, "grad_norm": 2.7986130714416504, "learning_rate": 6.443184537492344e-06, "loss": 0.2025, "step": 19132 }, { "epoch": 62.731147540983606, "grad_norm": 3.158952474594116, "learning_rate": 6.4421921103121624e-06, "loss": 0.282, "step": 19133 }, { "epoch": 62.734426229508195, "grad_norm": 2.828032970428467, "learning_rate": 6.441199723249965e-06, "loss": 0.1284, "step": 19134 }, { "epoch": 62.73770491803279, "grad_norm": 2.894273042678833, "learning_rate": 6.4402073763169405e-06, "loss": 0.35, "step": 19135 }, { "epoch": 62.74098360655738, "grad_norm": 3.275216817855835, "learning_rate": 6.439215069524276e-06, "loss": 0.2309, "step": 19136 }, { "epoch": 62.74426229508197, "grad_norm": 3.2284345626831055, "learning_rate": 6.438222802883167e-06, "loss": 0.0944, "step": 19137 }, { "epoch": 62.747540983606555, "grad_norm": 2.3394253253936768, "learning_rate": 6.4372305764047995e-06, "loss": 0.0614, "step": 19138 }, { "epoch": 62.75081967213115, "grad_norm": 3.4612345695495605, "learning_rate": 6.436238390100361e-06, "loss": 0.2423, "step": 19139 }, { "epoch": 62.75409836065574, "grad_norm": 2.849135637283325, "learning_rate": 6.435246243981041e-06, "loss": 0.2789, "step": 19140 }, { "epoch": 62.75737704918033, "grad_norm": 3.5051820278167725, "learning_rate": 6.434254138058021e-06, "loss": 0.1537, "step": 19141 }, { "epoch": 62.760655737704916, "grad_norm": 3.3478589057922363, "learning_rate": 6.433262072342497e-06, "loss": 0.2909, "step": 19142 }, { "epoch": 62.76393442622951, "grad_norm": 2.970611095428467, "learning_rate": 6.4322700468456525e-06, "loss": 0.1909, "step": 19143 }, { "epoch": 62.7672131147541, "grad_norm": 2.9461216926574707, "learning_rate": 6.431278061578673e-06, "loss": 0.1141, "step": 19144 }, { "epoch": 62.77049180327869, "grad_norm": 3.2650070190429688, "learning_rate": 6.43028611655274e-06, "loss": 0.2656, "step": 19145 }, { "epoch": 62.773770491803276, "grad_norm": 3.1581618785858154, "learning_rate": 6.429294211779045e-06, "loss": 0.1493, "step": 19146 }, { "epoch": 62.77704918032787, "grad_norm": 2.563762903213501, "learning_rate": 6.428302347268771e-06, "loss": 0.265, "step": 19147 }, { "epoch": 62.78032786885246, "grad_norm": 3.448714256286621, "learning_rate": 6.4273105230331e-06, "loss": 0.1414, "step": 19148 }, { "epoch": 62.78360655737705, "grad_norm": 2.5960891246795654, "learning_rate": 6.426318739083217e-06, "loss": 0.1251, "step": 19149 }, { "epoch": 62.78688524590164, "grad_norm": 2.7198846340179443, "learning_rate": 6.42532699543031e-06, "loss": 0.3054, "step": 19150 }, { "epoch": 62.79016393442623, "grad_norm": 3.5483100414276123, "learning_rate": 6.424335292085553e-06, "loss": 0.1167, "step": 19151 }, { "epoch": 62.79344262295082, "grad_norm": 2.738035202026367, "learning_rate": 6.423343629060137e-06, "loss": 0.1651, "step": 19152 }, { "epoch": 62.79672131147541, "grad_norm": 2.780747890472412, "learning_rate": 6.42235200636524e-06, "loss": 0.1808, "step": 19153 }, { "epoch": 62.8, "grad_norm": 2.7591423988342285, "learning_rate": 6.421360424012039e-06, "loss": 0.127, "step": 19154 }, { "epoch": 62.80327868852459, "grad_norm": 3.0113627910614014, "learning_rate": 6.420368882011726e-06, "loss": 0.3334, "step": 19155 }, { "epoch": 62.80655737704918, "grad_norm": 2.5726287364959717, "learning_rate": 6.419377380375476e-06, "loss": 0.1593, "step": 19156 }, { "epoch": 62.80983606557377, "grad_norm": 2.5549864768981934, "learning_rate": 6.418385919114467e-06, "loss": 0.2096, "step": 19157 }, { "epoch": 62.81311475409836, "grad_norm": 3.2649805545806885, "learning_rate": 6.417394498239877e-06, "loss": 0.2878, "step": 19158 }, { "epoch": 62.81639344262295, "grad_norm": 2.0429484844207764, "learning_rate": 6.416403117762892e-06, "loss": 0.1198, "step": 19159 }, { "epoch": 62.81967213114754, "grad_norm": 3.241497755050659, "learning_rate": 6.415411777694691e-06, "loss": 0.3343, "step": 19160 }, { "epoch": 62.82295081967213, "grad_norm": 1.934990406036377, "learning_rate": 6.414420478046447e-06, "loss": 0.0832, "step": 19161 }, { "epoch": 62.82622950819672, "grad_norm": 4.5479607582092285, "learning_rate": 6.413429218829337e-06, "loss": 0.2944, "step": 19162 }, { "epoch": 62.829508196721314, "grad_norm": 2.8659403324127197, "learning_rate": 6.412438000054545e-06, "loss": 0.312, "step": 19163 }, { "epoch": 62.8327868852459, "grad_norm": 3.0226898193359375, "learning_rate": 6.411446821733245e-06, "loss": 0.092, "step": 19164 }, { "epoch": 62.83606557377049, "grad_norm": 3.645958662033081, "learning_rate": 6.410455683876613e-06, "loss": 0.2614, "step": 19165 }, { "epoch": 62.83934426229508, "grad_norm": 2.726735830307007, "learning_rate": 6.409464586495826e-06, "loss": 0.178, "step": 19166 }, { "epoch": 62.842622950819674, "grad_norm": 2.5921425819396973, "learning_rate": 6.408473529602055e-06, "loss": 0.0761, "step": 19167 }, { "epoch": 62.84590163934426, "grad_norm": 3.0103211402893066, "learning_rate": 6.4074825132064825e-06, "loss": 0.1937, "step": 19168 }, { "epoch": 62.84918032786885, "grad_norm": 3.6340019702911377, "learning_rate": 6.406491537320281e-06, "loss": 0.1859, "step": 19169 }, { "epoch": 62.85245901639344, "grad_norm": 2.9963505268096924, "learning_rate": 6.405500601954622e-06, "loss": 0.131, "step": 19170 }, { "epoch": 62.855737704918035, "grad_norm": 2.7910776138305664, "learning_rate": 6.4045097071206785e-06, "loss": 0.0907, "step": 19171 }, { "epoch": 62.85901639344262, "grad_norm": 2.4881598949432373, "learning_rate": 6.403518852829629e-06, "loss": 0.1269, "step": 19172 }, { "epoch": 62.86229508196721, "grad_norm": 3.0381157398223877, "learning_rate": 6.402528039092646e-06, "loss": 0.1378, "step": 19173 }, { "epoch": 62.86557377049181, "grad_norm": 3.4042975902557373, "learning_rate": 6.401537265920896e-06, "loss": 0.1805, "step": 19174 }, { "epoch": 62.868852459016395, "grad_norm": 3.5438594818115234, "learning_rate": 6.4005465333255555e-06, "loss": 0.1429, "step": 19175 }, { "epoch": 62.87213114754098, "grad_norm": 3.858443260192871, "learning_rate": 6.399555841317797e-06, "loss": 0.1657, "step": 19176 }, { "epoch": 62.87540983606557, "grad_norm": 3.1427254676818848, "learning_rate": 6.3985651899087874e-06, "loss": 0.2036, "step": 19177 }, { "epoch": 62.87868852459017, "grad_norm": 3.358703374862671, "learning_rate": 6.397574579109703e-06, "loss": 0.2582, "step": 19178 }, { "epoch": 62.881967213114756, "grad_norm": 2.9142282009124756, "learning_rate": 6.39658400893171e-06, "loss": 0.2633, "step": 19179 }, { "epoch": 62.885245901639344, "grad_norm": 2.4086127281188965, "learning_rate": 6.395593479385975e-06, "loss": 0.1941, "step": 19180 }, { "epoch": 62.88852459016393, "grad_norm": 2.9288580417633057, "learning_rate": 6.394602990483676e-06, "loss": 0.1986, "step": 19181 }, { "epoch": 62.89180327868853, "grad_norm": 2.9163568019866943, "learning_rate": 6.393612542235977e-06, "loss": 0.1324, "step": 19182 }, { "epoch": 62.895081967213116, "grad_norm": 1.9732444286346436, "learning_rate": 6.392622134654045e-06, "loss": 0.0788, "step": 19183 }, { "epoch": 62.898360655737704, "grad_norm": 3.2870688438415527, "learning_rate": 6.391631767749047e-06, "loss": 0.3309, "step": 19184 }, { "epoch": 62.90163934426229, "grad_norm": 6.934967041015625, "learning_rate": 6.3906414415321545e-06, "loss": 0.2056, "step": 19185 }, { "epoch": 62.90491803278689, "grad_norm": 2.6722183227539062, "learning_rate": 6.389651156014534e-06, "loss": 0.1804, "step": 19186 }, { "epoch": 62.90819672131148, "grad_norm": 2.846661329269409, "learning_rate": 6.388660911207349e-06, "loss": 0.3067, "step": 19187 }, { "epoch": 62.911475409836065, "grad_norm": 2.922203779220581, "learning_rate": 6.387670707121765e-06, "loss": 0.1979, "step": 19188 }, { "epoch": 62.91475409836065, "grad_norm": 3.6995043754577637, "learning_rate": 6.386680543768953e-06, "loss": 0.1937, "step": 19189 }, { "epoch": 62.91803278688525, "grad_norm": 3.456953287124634, "learning_rate": 6.3856904211600735e-06, "loss": 0.1549, "step": 19190 }, { "epoch": 62.92131147540984, "grad_norm": 3.102006673812866, "learning_rate": 6.3847003393062936e-06, "loss": 0.3629, "step": 19191 }, { "epoch": 62.924590163934425, "grad_norm": 3.262866258621216, "learning_rate": 6.383710298218777e-06, "loss": 0.2428, "step": 19192 }, { "epoch": 62.927868852459014, "grad_norm": 2.5831212997436523, "learning_rate": 6.382720297908682e-06, "loss": 0.1591, "step": 19193 }, { "epoch": 62.93114754098361, "grad_norm": 3.0395820140838623, "learning_rate": 6.3817303383871796e-06, "loss": 0.0908, "step": 19194 }, { "epoch": 62.9344262295082, "grad_norm": 2.7653450965881348, "learning_rate": 6.380740419665432e-06, "loss": 0.1163, "step": 19195 }, { "epoch": 62.937704918032786, "grad_norm": 2.5761070251464844, "learning_rate": 6.379750541754598e-06, "loss": 0.1197, "step": 19196 }, { "epoch": 62.940983606557374, "grad_norm": 2.425055742263794, "learning_rate": 6.378760704665836e-06, "loss": 0.1034, "step": 19197 }, { "epoch": 62.94426229508197, "grad_norm": 2.8830456733703613, "learning_rate": 6.377770908410316e-06, "loss": 0.2346, "step": 19198 }, { "epoch": 62.94754098360656, "grad_norm": 3.1144425868988037, "learning_rate": 6.376781152999197e-06, "loss": 0.113, "step": 19199 }, { "epoch": 62.950819672131146, "grad_norm": 3.2513506412506104, "learning_rate": 6.375791438443635e-06, "loss": 0.1711, "step": 19200 }, { "epoch": 62.954098360655735, "grad_norm": 3.2198238372802734, "learning_rate": 6.374801764754792e-06, "loss": 0.2424, "step": 19201 }, { "epoch": 62.95737704918033, "grad_norm": 2.8319685459136963, "learning_rate": 6.373812131943832e-06, "loss": 0.1834, "step": 19202 }, { "epoch": 62.96065573770492, "grad_norm": 2.8231611251831055, "learning_rate": 6.372822540021908e-06, "loss": 0.2276, "step": 19203 }, { "epoch": 62.96393442622951, "grad_norm": 3.016049861907959, "learning_rate": 6.371832989000182e-06, "loss": 0.2512, "step": 19204 }, { "epoch": 62.967213114754095, "grad_norm": 3.2624330520629883, "learning_rate": 6.370843478889812e-06, "loss": 0.3102, "step": 19205 }, { "epoch": 62.97049180327869, "grad_norm": 3.6019041538238525, "learning_rate": 6.369854009701953e-06, "loss": 0.1662, "step": 19206 }, { "epoch": 62.97377049180328, "grad_norm": 2.874742031097412, "learning_rate": 6.368864581447765e-06, "loss": 0.107, "step": 19207 }, { "epoch": 62.97704918032787, "grad_norm": 3.0867693424224854, "learning_rate": 6.367875194138406e-06, "loss": 0.1943, "step": 19208 }, { "epoch": 62.980327868852456, "grad_norm": 3.1079537868499756, "learning_rate": 6.36688584778503e-06, "loss": 0.2067, "step": 19209 }, { "epoch": 62.98360655737705, "grad_norm": 3.261683225631714, "learning_rate": 6.365896542398791e-06, "loss": 0.2098, "step": 19210 }, { "epoch": 62.98688524590164, "grad_norm": 3.6050639152526855, "learning_rate": 6.3649072779908505e-06, "loss": 0.2372, "step": 19211 }, { "epoch": 62.99016393442623, "grad_norm": 3.2948808670043945, "learning_rate": 6.363918054572359e-06, "loss": 0.2621, "step": 19212 }, { "epoch": 62.993442622950816, "grad_norm": 2.662094831466675, "learning_rate": 6.362928872154473e-06, "loss": 0.0791, "step": 19213 }, { "epoch": 62.99672131147541, "grad_norm": 3.4866087436676025, "learning_rate": 6.361939730748341e-06, "loss": 0.2788, "step": 19214 }, { "epoch": 63.0, "grad_norm": 2.1172149181365967, "learning_rate": 6.360950630365126e-06, "loss": 0.1157, "step": 19215 }, { "epoch": 63.00327868852459, "grad_norm": 2.714163064956665, "learning_rate": 6.359961571015976e-06, "loss": 0.1959, "step": 19216 }, { "epoch": 63.006557377049184, "grad_norm": 3.0651824474334717, "learning_rate": 6.358972552712043e-06, "loss": 0.3057, "step": 19217 }, { "epoch": 63.00983606557377, "grad_norm": 3.2648465633392334, "learning_rate": 6.357983575464482e-06, "loss": 0.2183, "step": 19218 }, { "epoch": 63.01311475409836, "grad_norm": 2.5873124599456787, "learning_rate": 6.356994639284436e-06, "loss": 0.1264, "step": 19219 }, { "epoch": 63.01639344262295, "grad_norm": 2.7833638191223145, "learning_rate": 6.3560057441830695e-06, "loss": 0.1731, "step": 19220 }, { "epoch": 63.019672131147544, "grad_norm": 3.594325542449951, "learning_rate": 6.355016890171526e-06, "loss": 0.2486, "step": 19221 }, { "epoch": 63.02295081967213, "grad_norm": 8.308038711547852, "learning_rate": 6.354028077260959e-06, "loss": 0.12, "step": 19222 }, { "epoch": 63.02622950819672, "grad_norm": 2.6535685062408447, "learning_rate": 6.353039305462509e-06, "loss": 0.2365, "step": 19223 }, { "epoch": 63.02950819672131, "grad_norm": 2.681112766265869, "learning_rate": 6.3520505747873385e-06, "loss": 0.1992, "step": 19224 }, { "epoch": 63.032786885245905, "grad_norm": 2.846149206161499, "learning_rate": 6.351061885246591e-06, "loss": 0.2572, "step": 19225 }, { "epoch": 63.03606557377049, "grad_norm": 2.6166372299194336, "learning_rate": 6.350073236851415e-06, "loss": 0.1286, "step": 19226 }, { "epoch": 63.03934426229508, "grad_norm": 3.2329037189483643, "learning_rate": 6.349084629612954e-06, "loss": 0.2052, "step": 19227 }, { "epoch": 63.04262295081967, "grad_norm": 2.8090977668762207, "learning_rate": 6.348096063542363e-06, "loss": 0.1257, "step": 19228 }, { "epoch": 63.045901639344265, "grad_norm": 3.307821035385132, "learning_rate": 6.347107538650785e-06, "loss": 0.1926, "step": 19229 }, { "epoch": 63.049180327868854, "grad_norm": 2.217966318130493, "learning_rate": 6.346119054949368e-06, "loss": 0.1073, "step": 19230 }, { "epoch": 63.05245901639344, "grad_norm": 2.7101128101348877, "learning_rate": 6.345130612449259e-06, "loss": 0.0999, "step": 19231 }, { "epoch": 63.05573770491803, "grad_norm": 4.090651512145996, "learning_rate": 6.344142211161599e-06, "loss": 0.2226, "step": 19232 }, { "epoch": 63.059016393442626, "grad_norm": 2.766641139984131, "learning_rate": 6.3431538510975385e-06, "loss": 0.1002, "step": 19233 }, { "epoch": 63.062295081967214, "grad_norm": 2.56227445602417, "learning_rate": 6.3421655322682205e-06, "loss": 0.0817, "step": 19234 }, { "epoch": 63.0655737704918, "grad_norm": 3.2597949504852295, "learning_rate": 6.341177254684792e-06, "loss": 0.2126, "step": 19235 }, { "epoch": 63.06885245901639, "grad_norm": 2.742798328399658, "learning_rate": 6.340189018358388e-06, "loss": 0.1162, "step": 19236 }, { "epoch": 63.072131147540986, "grad_norm": 3.01951003074646, "learning_rate": 6.3392008233001626e-06, "loss": 0.2069, "step": 19237 }, { "epoch": 63.075409836065575, "grad_norm": 3.1189186573028564, "learning_rate": 6.3382126695212554e-06, "loss": 0.2931, "step": 19238 }, { "epoch": 63.07868852459016, "grad_norm": 2.7307028770446777, "learning_rate": 6.3372245570328065e-06, "loss": 0.1632, "step": 19239 }, { "epoch": 63.08196721311475, "grad_norm": 3.1178951263427734, "learning_rate": 6.336236485845954e-06, "loss": 0.1955, "step": 19240 }, { "epoch": 63.08524590163935, "grad_norm": 2.2791950702667236, "learning_rate": 6.33524845597185e-06, "loss": 0.2007, "step": 19241 }, { "epoch": 63.088524590163935, "grad_norm": 3.3496294021606445, "learning_rate": 6.3342604674216305e-06, "loss": 0.2661, "step": 19242 }, { "epoch": 63.09180327868852, "grad_norm": 2.7993099689483643, "learning_rate": 6.333272520206435e-06, "loss": 0.2541, "step": 19243 }, { "epoch": 63.09508196721311, "grad_norm": 3.798424005508423, "learning_rate": 6.332284614337403e-06, "loss": 0.2288, "step": 19244 }, { "epoch": 63.09836065573771, "grad_norm": 4.875602722167969, "learning_rate": 6.331296749825673e-06, "loss": 0.1135, "step": 19245 }, { "epoch": 63.101639344262296, "grad_norm": 2.762010097503662, "learning_rate": 6.330308926682391e-06, "loss": 0.1956, "step": 19246 }, { "epoch": 63.104918032786884, "grad_norm": 3.052147388458252, "learning_rate": 6.3293211449186896e-06, "loss": 0.1586, "step": 19247 }, { "epoch": 63.10819672131147, "grad_norm": 2.681842565536499, "learning_rate": 6.32833340454571e-06, "loss": 0.2843, "step": 19248 }, { "epoch": 63.11147540983607, "grad_norm": 2.636465549468994, "learning_rate": 6.3273457055745844e-06, "loss": 0.1005, "step": 19249 }, { "epoch": 63.114754098360656, "grad_norm": 3.0499515533447266, "learning_rate": 6.326358048016459e-06, "loss": 0.2263, "step": 19250 }, { "epoch": 63.118032786885244, "grad_norm": 2.8900725841522217, "learning_rate": 6.325370431882467e-06, "loss": 0.1355, "step": 19251 }, { "epoch": 63.12131147540983, "grad_norm": 3.1859564781188965, "learning_rate": 6.324382857183742e-06, "loss": 0.2032, "step": 19252 }, { "epoch": 63.12459016393443, "grad_norm": 2.6349332332611084, "learning_rate": 6.323395323931419e-06, "loss": 0.1582, "step": 19253 }, { "epoch": 63.12786885245902, "grad_norm": 2.600966215133667, "learning_rate": 6.322407832136641e-06, "loss": 0.0941, "step": 19254 }, { "epoch": 63.131147540983605, "grad_norm": 2.6215150356292725, "learning_rate": 6.321420381810538e-06, "loss": 0.1953, "step": 19255 }, { "epoch": 63.13442622950819, "grad_norm": 2.5376651287078857, "learning_rate": 6.320432972964243e-06, "loss": 0.1078, "step": 19256 }, { "epoch": 63.13770491803279, "grad_norm": 2.0813193321228027, "learning_rate": 6.319445605608894e-06, "loss": 0.1751, "step": 19257 }, { "epoch": 63.14098360655738, "grad_norm": 2.3577046394348145, "learning_rate": 6.318458279755621e-06, "loss": 0.0995, "step": 19258 }, { "epoch": 63.144262295081965, "grad_norm": 2.5436244010925293, "learning_rate": 6.317470995415558e-06, "loss": 0.0838, "step": 19259 }, { "epoch": 63.14754098360656, "grad_norm": 2.975316286087036, "learning_rate": 6.31648375259984e-06, "loss": 0.1715, "step": 19260 }, { "epoch": 63.15081967213115, "grad_norm": 3.499284267425537, "learning_rate": 6.315496551319599e-06, "loss": 0.1603, "step": 19261 }, { "epoch": 63.15409836065574, "grad_norm": 2.6862378120422363, "learning_rate": 6.31450939158596e-06, "loss": 0.0821, "step": 19262 }, { "epoch": 63.157377049180326, "grad_norm": 3.7801594734191895, "learning_rate": 6.313522273410064e-06, "loss": 0.3435, "step": 19263 }, { "epoch": 63.16065573770492, "grad_norm": 2.679469108581543, "learning_rate": 6.3125351968030375e-06, "loss": 0.1101, "step": 19264 }, { "epoch": 63.16393442622951, "grad_norm": 2.7387261390686035, "learning_rate": 6.311548161776011e-06, "loss": 0.0514, "step": 19265 }, { "epoch": 63.1672131147541, "grad_norm": 2.9307749271392822, "learning_rate": 6.31056116834011e-06, "loss": 0.083, "step": 19266 }, { "epoch": 63.170491803278686, "grad_norm": 3.29872465133667, "learning_rate": 6.309574216506471e-06, "loss": 0.2665, "step": 19267 }, { "epoch": 63.17377049180328, "grad_norm": 2.6627824306488037, "learning_rate": 6.3085873062862204e-06, "loss": 0.1231, "step": 19268 }, { "epoch": 63.17704918032787, "grad_norm": 3.1939327716827393, "learning_rate": 6.307600437690486e-06, "loss": 0.3151, "step": 19269 }, { "epoch": 63.18032786885246, "grad_norm": 2.1085896492004395, "learning_rate": 6.3066136107303964e-06, "loss": 0.0939, "step": 19270 }, { "epoch": 63.18360655737705, "grad_norm": 3.9533345699310303, "learning_rate": 6.305626825417073e-06, "loss": 0.1461, "step": 19271 }, { "epoch": 63.18688524590164, "grad_norm": 2.362607955932617, "learning_rate": 6.304640081761653e-06, "loss": 0.0885, "step": 19272 }, { "epoch": 63.19016393442623, "grad_norm": 2.485163927078247, "learning_rate": 6.303653379775259e-06, "loss": 0.1599, "step": 19273 }, { "epoch": 63.19344262295082, "grad_norm": 2.443244695663452, "learning_rate": 6.302666719469015e-06, "loss": 0.0973, "step": 19274 }, { "epoch": 63.19672131147541, "grad_norm": 3.254688262939453, "learning_rate": 6.301680100854046e-06, "loss": 0.2245, "step": 19275 }, { "epoch": 63.2, "grad_norm": 3.358844518661499, "learning_rate": 6.300693523941481e-06, "loss": 0.2012, "step": 19276 }, { "epoch": 63.20327868852459, "grad_norm": 4.729735374450684, "learning_rate": 6.299706988742444e-06, "loss": 0.2771, "step": 19277 }, { "epoch": 63.20655737704918, "grad_norm": 3.73325514793396, "learning_rate": 6.298720495268058e-06, "loss": 0.2697, "step": 19278 }, { "epoch": 63.20983606557377, "grad_norm": 2.3408186435699463, "learning_rate": 6.297734043529442e-06, "loss": 0.1581, "step": 19279 }, { "epoch": 63.21311475409836, "grad_norm": 3.284729480743408, "learning_rate": 6.29674763353773e-06, "loss": 0.1425, "step": 19280 }, { "epoch": 63.21639344262295, "grad_norm": 2.3854446411132812, "learning_rate": 6.295761265304037e-06, "loss": 0.1034, "step": 19281 }, { "epoch": 63.21967213114754, "grad_norm": 5.794437408447266, "learning_rate": 6.294774938839485e-06, "loss": 0.2154, "step": 19282 }, { "epoch": 63.22295081967213, "grad_norm": 2.0758309364318848, "learning_rate": 6.293788654155202e-06, "loss": 0.1756, "step": 19283 }, { "epoch": 63.226229508196724, "grad_norm": 2.6054606437683105, "learning_rate": 6.292802411262302e-06, "loss": 0.1004, "step": 19284 }, { "epoch": 63.22950819672131, "grad_norm": 2.987598180770874, "learning_rate": 6.291816210171909e-06, "loss": 0.1359, "step": 19285 }, { "epoch": 63.2327868852459, "grad_norm": 3.2297258377075195, "learning_rate": 6.290830050895147e-06, "loss": 0.2065, "step": 19286 }, { "epoch": 63.23606557377049, "grad_norm": 4.004615783691406, "learning_rate": 6.2898439334431316e-06, "loss": 0.1113, "step": 19287 }, { "epoch": 63.239344262295084, "grad_norm": 2.626671314239502, "learning_rate": 6.288857857826982e-06, "loss": 0.2487, "step": 19288 }, { "epoch": 63.24262295081967, "grad_norm": 2.6607666015625, "learning_rate": 6.287871824057818e-06, "loss": 0.1211, "step": 19289 }, { "epoch": 63.24590163934426, "grad_norm": 2.5883431434631348, "learning_rate": 6.286885832146762e-06, "loss": 0.139, "step": 19290 }, { "epoch": 63.24918032786885, "grad_norm": 2.7771289348602295, "learning_rate": 6.2858998821049275e-06, "loss": 0.127, "step": 19291 }, { "epoch": 63.252459016393445, "grad_norm": 2.667320728302002, "learning_rate": 6.284913973943431e-06, "loss": 0.1543, "step": 19292 }, { "epoch": 63.25573770491803, "grad_norm": 3.3941702842712402, "learning_rate": 6.283928107673395e-06, "loss": 0.2914, "step": 19293 }, { "epoch": 63.25901639344262, "grad_norm": 2.695666551589966, "learning_rate": 6.282942283305934e-06, "loss": 0.1102, "step": 19294 }, { "epoch": 63.26229508196721, "grad_norm": 2.264798879623413, "learning_rate": 6.281956500852163e-06, "loss": 0.1274, "step": 19295 }, { "epoch": 63.265573770491805, "grad_norm": 2.3046748638153076, "learning_rate": 6.280970760323199e-06, "loss": 0.3589, "step": 19296 }, { "epoch": 63.268852459016394, "grad_norm": 2.942539691925049, "learning_rate": 6.279985061730152e-06, "loss": 0.2286, "step": 19297 }, { "epoch": 63.27213114754098, "grad_norm": 3.1617517471313477, "learning_rate": 6.278999405084145e-06, "loss": 0.2246, "step": 19298 }, { "epoch": 63.27540983606557, "grad_norm": 3.011949300765991, "learning_rate": 6.278013790396289e-06, "loss": 0.1925, "step": 19299 }, { "epoch": 63.278688524590166, "grad_norm": 2.272474527359009, "learning_rate": 6.277028217677698e-06, "loss": 0.1479, "step": 19300 }, { "epoch": 63.281967213114754, "grad_norm": 2.687396287918091, "learning_rate": 6.27604268693948e-06, "loss": 0.1053, "step": 19301 }, { "epoch": 63.28524590163934, "grad_norm": 2.2940597534179688, "learning_rate": 6.275057198192757e-06, "loss": 0.0724, "step": 19302 }, { "epoch": 63.28852459016394, "grad_norm": 3.218775987625122, "learning_rate": 6.274071751448636e-06, "loss": 0.1437, "step": 19303 }, { "epoch": 63.291803278688526, "grad_norm": 2.120514392852783, "learning_rate": 6.273086346718231e-06, "loss": 0.2467, "step": 19304 }, { "epoch": 63.295081967213115, "grad_norm": 3.1404659748077393, "learning_rate": 6.272100984012648e-06, "loss": 0.4085, "step": 19305 }, { "epoch": 63.2983606557377, "grad_norm": 2.6088778972625732, "learning_rate": 6.271115663343007e-06, "loss": 0.1548, "step": 19306 }, { "epoch": 63.3016393442623, "grad_norm": 2.334611177444458, "learning_rate": 6.270130384720415e-06, "loss": 0.1673, "step": 19307 }, { "epoch": 63.30491803278689, "grad_norm": 2.702016592025757, "learning_rate": 6.269145148155978e-06, "loss": 0.0982, "step": 19308 }, { "epoch": 63.308196721311475, "grad_norm": 2.7739784717559814, "learning_rate": 6.268159953660809e-06, "loss": 0.2359, "step": 19309 }, { "epoch": 63.31147540983606, "grad_norm": 2.5583300590515137, "learning_rate": 6.267174801246015e-06, "loss": 0.0459, "step": 19310 }, { "epoch": 63.31475409836066, "grad_norm": 2.556654453277588, "learning_rate": 6.26618969092271e-06, "loss": 0.2073, "step": 19311 }, { "epoch": 63.31803278688525, "grad_norm": 3.2338180541992188, "learning_rate": 6.2652046227019944e-06, "loss": 0.176, "step": 19312 }, { "epoch": 63.321311475409836, "grad_norm": 3.2664318084716797, "learning_rate": 6.264219596594985e-06, "loss": 0.2216, "step": 19313 }, { "epoch": 63.324590163934424, "grad_norm": 2.5550434589385986, "learning_rate": 6.263234612612779e-06, "loss": 0.2324, "step": 19314 }, { "epoch": 63.32786885245902, "grad_norm": 3.337656259536743, "learning_rate": 6.262249670766489e-06, "loss": 0.247, "step": 19315 }, { "epoch": 63.33114754098361, "grad_norm": 2.8510749340057373, "learning_rate": 6.261264771067222e-06, "loss": 0.1483, "step": 19316 }, { "epoch": 63.334426229508196, "grad_norm": 13.764620780944824, "learning_rate": 6.260279913526083e-06, "loss": 0.1335, "step": 19317 }, { "epoch": 63.337704918032784, "grad_norm": 4.832649230957031, "learning_rate": 6.259295098154171e-06, "loss": 0.4002, "step": 19318 }, { "epoch": 63.34098360655738, "grad_norm": 2.746351957321167, "learning_rate": 6.258310324962601e-06, "loss": 0.1192, "step": 19319 }, { "epoch": 63.34426229508197, "grad_norm": 2.464632272720337, "learning_rate": 6.257325593962472e-06, "loss": 0.1499, "step": 19320 }, { "epoch": 63.34754098360656, "grad_norm": 3.0602428913116455, "learning_rate": 6.256340905164888e-06, "loss": 0.1999, "step": 19321 }, { "epoch": 63.350819672131145, "grad_norm": 2.7562711238861084, "learning_rate": 6.2553562585809534e-06, "loss": 0.1099, "step": 19322 }, { "epoch": 63.35409836065574, "grad_norm": 3.3629872798919678, "learning_rate": 6.254371654221767e-06, "loss": 0.1677, "step": 19323 }, { "epoch": 63.35737704918033, "grad_norm": 2.6559879779815674, "learning_rate": 6.253387092098437e-06, "loss": 0.183, "step": 19324 }, { "epoch": 63.36065573770492, "grad_norm": 3.0806636810302734, "learning_rate": 6.252402572222065e-06, "loss": 0.1807, "step": 19325 }, { "epoch": 63.363934426229505, "grad_norm": 2.9059622287750244, "learning_rate": 6.25141809460375e-06, "loss": 0.1766, "step": 19326 }, { "epoch": 63.3672131147541, "grad_norm": 2.331899404525757, "learning_rate": 6.250433659254589e-06, "loss": 0.088, "step": 19327 }, { "epoch": 63.37049180327869, "grad_norm": 1.9944696426391602, "learning_rate": 6.2494492661856896e-06, "loss": 0.1383, "step": 19328 }, { "epoch": 63.37377049180328, "grad_norm": 2.6170051097869873, "learning_rate": 6.2484649154081524e-06, "loss": 0.0932, "step": 19329 }, { "epoch": 63.377049180327866, "grad_norm": 2.6127359867095947, "learning_rate": 6.247480606933072e-06, "loss": 0.193, "step": 19330 }, { "epoch": 63.38032786885246, "grad_norm": 3.373425245285034, "learning_rate": 6.2464963407715455e-06, "loss": 0.0804, "step": 19331 }, { "epoch": 63.38360655737705, "grad_norm": 4.1419758796691895, "learning_rate": 6.24551211693468e-06, "loss": 0.1887, "step": 19332 }, { "epoch": 63.38688524590164, "grad_norm": 2.8928475379943848, "learning_rate": 6.2445279354335685e-06, "loss": 0.1633, "step": 19333 }, { "epoch": 63.390163934426226, "grad_norm": 2.376345634460449, "learning_rate": 6.24354379627931e-06, "loss": 0.0992, "step": 19334 }, { "epoch": 63.39344262295082, "grad_norm": 4.532782554626465, "learning_rate": 6.2425596994829974e-06, "loss": 0.1802, "step": 19335 }, { "epoch": 63.39672131147541, "grad_norm": 2.966958999633789, "learning_rate": 6.2415756450557315e-06, "loss": 0.1278, "step": 19336 }, { "epoch": 63.4, "grad_norm": 3.3045692443847656, "learning_rate": 6.2405916330086106e-06, "loss": 0.1379, "step": 19337 }, { "epoch": 63.40327868852459, "grad_norm": 2.630415678024292, "learning_rate": 6.239607663352726e-06, "loss": 0.1722, "step": 19338 }, { "epoch": 63.40655737704918, "grad_norm": 3.515165090560913, "learning_rate": 6.238623736099176e-06, "loss": 0.2086, "step": 19339 }, { "epoch": 63.40983606557377, "grad_norm": 3.6792142391204834, "learning_rate": 6.237639851259053e-06, "loss": 0.1854, "step": 19340 }, { "epoch": 63.41311475409836, "grad_norm": 2.8608338832855225, "learning_rate": 6.236656008843452e-06, "loss": 0.227, "step": 19341 }, { "epoch": 63.41639344262295, "grad_norm": 3.2865936756134033, "learning_rate": 6.23567220886347e-06, "loss": 0.3697, "step": 19342 }, { "epoch": 63.41967213114754, "grad_norm": 2.200582504272461, "learning_rate": 6.2346884513301975e-06, "loss": 0.1247, "step": 19343 }, { "epoch": 63.42295081967213, "grad_norm": 2.3333802223205566, "learning_rate": 6.233704736254724e-06, "loss": 0.1973, "step": 19344 }, { "epoch": 63.42622950819672, "grad_norm": 4.911738395690918, "learning_rate": 6.232721063648148e-06, "loss": 0.0654, "step": 19345 }, { "epoch": 63.429508196721315, "grad_norm": 2.599550485610962, "learning_rate": 6.231737433521561e-06, "loss": 0.0877, "step": 19346 }, { "epoch": 63.4327868852459, "grad_norm": 2.8945276737213135, "learning_rate": 6.230753845886051e-06, "loss": 0.1441, "step": 19347 }, { "epoch": 63.43606557377049, "grad_norm": 3.171823024749756, "learning_rate": 6.229770300752711e-06, "loss": 0.2745, "step": 19348 }, { "epoch": 63.43934426229508, "grad_norm": 2.7442684173583984, "learning_rate": 6.228786798132627e-06, "loss": 0.2483, "step": 19349 }, { "epoch": 63.442622950819676, "grad_norm": 2.5741207599639893, "learning_rate": 6.2278033380368975e-06, "loss": 0.1407, "step": 19350 }, { "epoch": 63.445901639344264, "grad_norm": 2.810659646987915, "learning_rate": 6.2268199204766065e-06, "loss": 0.1315, "step": 19351 }, { "epoch": 63.44918032786885, "grad_norm": 3.3470711708068848, "learning_rate": 6.225836545462845e-06, "loss": 0.1551, "step": 19352 }, { "epoch": 63.45245901639344, "grad_norm": 3.5454773902893066, "learning_rate": 6.224853213006695e-06, "loss": 0.3485, "step": 19353 }, { "epoch": 63.455737704918036, "grad_norm": 3.457226037979126, "learning_rate": 6.2238699231192556e-06, "loss": 0.1423, "step": 19354 }, { "epoch": 63.459016393442624, "grad_norm": 2.873809337615967, "learning_rate": 6.222886675811608e-06, "loss": 0.1801, "step": 19355 }, { "epoch": 63.46229508196721, "grad_norm": 3.499218702316284, "learning_rate": 6.2219034710948415e-06, "loss": 0.1176, "step": 19356 }, { "epoch": 63.4655737704918, "grad_norm": 3.0595273971557617, "learning_rate": 6.220920308980036e-06, "loss": 0.2214, "step": 19357 }, { "epoch": 63.4688524590164, "grad_norm": 2.87302303314209, "learning_rate": 6.219937189478289e-06, "loss": 0.0889, "step": 19358 }, { "epoch": 63.472131147540985, "grad_norm": 3.4271411895751953, "learning_rate": 6.218954112600679e-06, "loss": 0.245, "step": 19359 }, { "epoch": 63.47540983606557, "grad_norm": 3.0751609802246094, "learning_rate": 6.217971078358294e-06, "loss": 0.1482, "step": 19360 }, { "epoch": 63.47868852459016, "grad_norm": 2.836243152618408, "learning_rate": 6.216988086762215e-06, "loss": 0.1463, "step": 19361 }, { "epoch": 63.48196721311476, "grad_norm": 2.5553479194641113, "learning_rate": 6.216005137823528e-06, "loss": 0.3403, "step": 19362 }, { "epoch": 63.485245901639345, "grad_norm": 3.2143845558166504, "learning_rate": 6.2150222315533205e-06, "loss": 0.153, "step": 19363 }, { "epoch": 63.488524590163934, "grad_norm": 2.7404589653015137, "learning_rate": 6.21403936796267e-06, "loss": 0.1428, "step": 19364 }, { "epoch": 63.49180327868852, "grad_norm": 3.3431448936462402, "learning_rate": 6.213056547062663e-06, "loss": 0.1918, "step": 19365 }, { "epoch": 63.49508196721312, "grad_norm": 3.4884395599365234, "learning_rate": 6.212073768864382e-06, "loss": 0.1128, "step": 19366 }, { "epoch": 63.498360655737706, "grad_norm": 2.7605767250061035, "learning_rate": 6.211091033378905e-06, "loss": 0.2534, "step": 19367 }, { "epoch": 63.501639344262294, "grad_norm": 6.325965881347656, "learning_rate": 6.210108340617319e-06, "loss": 0.2159, "step": 19368 }, { "epoch": 63.50491803278688, "grad_norm": 3.7754690647125244, "learning_rate": 6.2091256905907015e-06, "loss": 0.2613, "step": 19369 }, { "epoch": 63.50819672131148, "grad_norm": 2.786137342453003, "learning_rate": 6.208143083310132e-06, "loss": 0.2196, "step": 19370 }, { "epoch": 63.511475409836066, "grad_norm": 2.4927122592926025, "learning_rate": 6.207160518786691e-06, "loss": 0.1814, "step": 19371 }, { "epoch": 63.514754098360655, "grad_norm": 3.7010374069213867, "learning_rate": 6.206177997031462e-06, "loss": 0.1837, "step": 19372 }, { "epoch": 63.51803278688524, "grad_norm": 3.5116119384765625, "learning_rate": 6.205195518055519e-06, "loss": 0.1693, "step": 19373 }, { "epoch": 63.52131147540984, "grad_norm": 3.135021686553955, "learning_rate": 6.204213081869943e-06, "loss": 0.2303, "step": 19374 }, { "epoch": 63.52459016393443, "grad_norm": 3.485517978668213, "learning_rate": 6.203230688485807e-06, "loss": 0.139, "step": 19375 }, { "epoch": 63.527868852459015, "grad_norm": 2.493255376815796, "learning_rate": 6.202248337914196e-06, "loss": 0.1425, "step": 19376 }, { "epoch": 63.5311475409836, "grad_norm": 3.9119932651519775, "learning_rate": 6.201266030166185e-06, "loss": 0.2886, "step": 19377 }, { "epoch": 63.5344262295082, "grad_norm": 2.54500150680542, "learning_rate": 6.2002837652528495e-06, "loss": 0.0899, "step": 19378 }, { "epoch": 63.53770491803279, "grad_norm": 2.513774871826172, "learning_rate": 6.19930154318526e-06, "loss": 0.1337, "step": 19379 }, { "epoch": 63.540983606557376, "grad_norm": 3.1172983646392822, "learning_rate": 6.198319363974501e-06, "loss": 0.2859, "step": 19380 }, { "epoch": 63.544262295081964, "grad_norm": 2.836156129837036, "learning_rate": 6.197337227631646e-06, "loss": 0.0999, "step": 19381 }, { "epoch": 63.54754098360656, "grad_norm": 2.7251694202423096, "learning_rate": 6.196355134167766e-06, "loss": 0.1903, "step": 19382 }, { "epoch": 63.55081967213115, "grad_norm": 2.9131367206573486, "learning_rate": 6.195373083593932e-06, "loss": 0.0877, "step": 19383 }, { "epoch": 63.554098360655736, "grad_norm": 3.559007167816162, "learning_rate": 6.1943910759212265e-06, "loss": 0.2503, "step": 19384 }, { "epoch": 63.557377049180324, "grad_norm": 2.864619255065918, "learning_rate": 6.19340911116072e-06, "loss": 0.063, "step": 19385 }, { "epoch": 63.56065573770492, "grad_norm": 2.8116073608398438, "learning_rate": 6.192427189323482e-06, "loss": 0.2045, "step": 19386 }, { "epoch": 63.56393442622951, "grad_norm": 4.350181579589844, "learning_rate": 6.191445310420585e-06, "loss": 0.1048, "step": 19387 }, { "epoch": 63.5672131147541, "grad_norm": 2.8888306617736816, "learning_rate": 6.190463474463102e-06, "loss": 0.1152, "step": 19388 }, { "epoch": 63.570491803278685, "grad_norm": 2.4885294437408447, "learning_rate": 6.189481681462106e-06, "loss": 0.097, "step": 19389 }, { "epoch": 63.57377049180328, "grad_norm": 3.306286573410034, "learning_rate": 6.188499931428666e-06, "loss": 0.2106, "step": 19390 }, { "epoch": 63.57704918032787, "grad_norm": 3.0050249099731445, "learning_rate": 6.187518224373852e-06, "loss": 0.1413, "step": 19391 }, { "epoch": 63.58032786885246, "grad_norm": 2.500175952911377, "learning_rate": 6.186536560308732e-06, "loss": 0.1897, "step": 19392 }, { "epoch": 63.58360655737705, "grad_norm": 2.2790329456329346, "learning_rate": 6.185554939244381e-06, "loss": 0.0788, "step": 19393 }, { "epoch": 63.58688524590164, "grad_norm": 3.976355791091919, "learning_rate": 6.18457336119186e-06, "loss": 0.2643, "step": 19394 }, { "epoch": 63.59016393442623, "grad_norm": 2.7682831287384033, "learning_rate": 6.183591826162245e-06, "loss": 0.1775, "step": 19395 }, { "epoch": 63.59344262295082, "grad_norm": 3.5385541915893555, "learning_rate": 6.1826103341665965e-06, "loss": 0.2222, "step": 19396 }, { "epoch": 63.59672131147541, "grad_norm": 3.2897393703460693, "learning_rate": 6.1816288852159865e-06, "loss": 0.1132, "step": 19397 }, { "epoch": 63.6, "grad_norm": 2.3084909915924072, "learning_rate": 6.180647479321484e-06, "loss": 0.3003, "step": 19398 }, { "epoch": 63.60327868852459, "grad_norm": 2.346724510192871, "learning_rate": 6.179666116494152e-06, "loss": 0.0576, "step": 19399 }, { "epoch": 63.60655737704918, "grad_norm": 2.896641969680786, "learning_rate": 6.178684796745056e-06, "loss": 0.2792, "step": 19400 }, { "epoch": 63.609836065573774, "grad_norm": 3.282154083251953, "learning_rate": 6.177703520085257e-06, "loss": 0.2568, "step": 19401 }, { "epoch": 63.61311475409836, "grad_norm": 2.6227426528930664, "learning_rate": 6.176722286525831e-06, "loss": 0.102, "step": 19402 }, { "epoch": 63.61639344262295, "grad_norm": 3.1459391117095947, "learning_rate": 6.1757410960778356e-06, "loss": 0.2519, "step": 19403 }, { "epoch": 63.61967213114754, "grad_norm": 2.765294313430786, "learning_rate": 6.174759948752336e-06, "loss": 0.1275, "step": 19404 }, { "epoch": 63.622950819672134, "grad_norm": 3.8474550247192383, "learning_rate": 6.173778844560389e-06, "loss": 0.1332, "step": 19405 }, { "epoch": 63.62622950819672, "grad_norm": 3.158514976501465, "learning_rate": 6.17279778351307e-06, "loss": 0.2438, "step": 19406 }, { "epoch": 63.62950819672131, "grad_norm": 3.417398452758789, "learning_rate": 6.171816765621435e-06, "loss": 0.0972, "step": 19407 }, { "epoch": 63.6327868852459, "grad_norm": 2.913947820663452, "learning_rate": 6.170835790896547e-06, "loss": 0.1291, "step": 19408 }, { "epoch": 63.636065573770495, "grad_norm": 3.4753706455230713, "learning_rate": 6.169854859349461e-06, "loss": 0.178, "step": 19409 }, { "epoch": 63.63934426229508, "grad_norm": 3.4145474433898926, "learning_rate": 6.168873970991249e-06, "loss": 0.1432, "step": 19410 }, { "epoch": 63.64262295081967, "grad_norm": 2.865499496459961, "learning_rate": 6.1678931258329665e-06, "loss": 0.1855, "step": 19411 }, { "epoch": 63.64590163934426, "grad_norm": 3.780263662338257, "learning_rate": 6.166912323885672e-06, "loss": 0.1148, "step": 19412 }, { "epoch": 63.649180327868855, "grad_norm": 2.689535140991211, "learning_rate": 6.165931565160428e-06, "loss": 0.1472, "step": 19413 }, { "epoch": 63.65245901639344, "grad_norm": 11.466618537902832, "learning_rate": 6.164950849668287e-06, "loss": 0.1851, "step": 19414 }, { "epoch": 63.65573770491803, "grad_norm": 3.1973824501037598, "learning_rate": 6.1639701774203176e-06, "loss": 0.2438, "step": 19415 }, { "epoch": 63.65901639344262, "grad_norm": 2.6129770278930664, "learning_rate": 6.162989548427573e-06, "loss": 0.1107, "step": 19416 }, { "epoch": 63.662295081967216, "grad_norm": 2.17850661277771, "learning_rate": 6.162008962701107e-06, "loss": 0.3003, "step": 19417 }, { "epoch": 63.665573770491804, "grad_norm": 2.583108425140381, "learning_rate": 6.161028420251982e-06, "loss": 0.1501, "step": 19418 }, { "epoch": 63.66885245901639, "grad_norm": 2.9356372356414795, "learning_rate": 6.160047921091254e-06, "loss": 0.1265, "step": 19419 }, { "epoch": 63.67213114754098, "grad_norm": 2.324193000793457, "learning_rate": 6.159067465229977e-06, "loss": 0.2137, "step": 19420 }, { "epoch": 63.675409836065576, "grad_norm": 3.012275457382202, "learning_rate": 6.158087052679212e-06, "loss": 0.1443, "step": 19421 }, { "epoch": 63.678688524590164, "grad_norm": 2.9128265380859375, "learning_rate": 6.1571066834500046e-06, "loss": 0.2598, "step": 19422 }, { "epoch": 63.68196721311475, "grad_norm": 2.6954972743988037, "learning_rate": 6.156126357553416e-06, "loss": 0.079, "step": 19423 }, { "epoch": 63.68524590163934, "grad_norm": 3.4968113899230957, "learning_rate": 6.155146075000502e-06, "loss": 0.3204, "step": 19424 }, { "epoch": 63.68852459016394, "grad_norm": 2.826904296875, "learning_rate": 6.154165835802312e-06, "loss": 0.2503, "step": 19425 }, { "epoch": 63.691803278688525, "grad_norm": 2.9250075817108154, "learning_rate": 6.1531856399699035e-06, "loss": 0.1795, "step": 19426 }, { "epoch": 63.69508196721311, "grad_norm": 2.4091989994049072, "learning_rate": 6.152205487514321e-06, "loss": 0.387, "step": 19427 }, { "epoch": 63.6983606557377, "grad_norm": 3.0768749713897705, "learning_rate": 6.151225378446626e-06, "loss": 0.2325, "step": 19428 }, { "epoch": 63.7016393442623, "grad_norm": 3.5033202171325684, "learning_rate": 6.150245312777867e-06, "loss": 0.2183, "step": 19429 }, { "epoch": 63.704918032786885, "grad_norm": 2.109302043914795, "learning_rate": 6.1492652905190955e-06, "loss": 0.0529, "step": 19430 }, { "epoch": 63.708196721311474, "grad_norm": 2.734785318374634, "learning_rate": 6.1482853116813565e-06, "loss": 0.0713, "step": 19431 }, { "epoch": 63.71147540983607, "grad_norm": 2.67166805267334, "learning_rate": 6.1473053762757105e-06, "loss": 0.1554, "step": 19432 }, { "epoch": 63.71475409836066, "grad_norm": 2.5195837020874023, "learning_rate": 6.146325484313203e-06, "loss": 0.1226, "step": 19433 }, { "epoch": 63.718032786885246, "grad_norm": 3.540787696838379, "learning_rate": 6.14534563580488e-06, "loss": 0.1232, "step": 19434 }, { "epoch": 63.721311475409834, "grad_norm": 2.903836727142334, "learning_rate": 6.144365830761789e-06, "loss": 0.1264, "step": 19435 }, { "epoch": 63.72459016393443, "grad_norm": 2.6382203102111816, "learning_rate": 6.1433860691949875e-06, "loss": 0.1609, "step": 19436 }, { "epoch": 63.72786885245902, "grad_norm": 2.8641371726989746, "learning_rate": 6.142406351115517e-06, "loss": 0.1581, "step": 19437 }, { "epoch": 63.731147540983606, "grad_norm": 2.732797622680664, "learning_rate": 6.141426676534428e-06, "loss": 0.1884, "step": 19438 }, { "epoch": 63.734426229508195, "grad_norm": 3.6916706562042236, "learning_rate": 6.140447045462763e-06, "loss": 0.1697, "step": 19439 }, { "epoch": 63.73770491803279, "grad_norm": 2.7048838138580322, "learning_rate": 6.139467457911566e-06, "loss": 0.2682, "step": 19440 }, { "epoch": 63.74098360655738, "grad_norm": 2.980854034423828, "learning_rate": 6.138487913891893e-06, "loss": 0.2317, "step": 19441 }, { "epoch": 63.74426229508197, "grad_norm": 2.1578989028930664, "learning_rate": 6.137508413414784e-06, "loss": 0.1522, "step": 19442 }, { "epoch": 63.747540983606555, "grad_norm": 1.9004441499710083, "learning_rate": 6.136528956491279e-06, "loss": 0.0915, "step": 19443 }, { "epoch": 63.75081967213115, "grad_norm": 3.4029245376586914, "learning_rate": 6.135549543132428e-06, "loss": 0.3255, "step": 19444 }, { "epoch": 63.75409836065574, "grad_norm": 2.4863574504852295, "learning_rate": 6.134570173349277e-06, "loss": 0.1025, "step": 19445 }, { "epoch": 63.75737704918033, "grad_norm": 2.701521635055542, "learning_rate": 6.1335908471528636e-06, "loss": 0.3364, "step": 19446 }, { "epoch": 63.760655737704916, "grad_norm": 3.5468037128448486, "learning_rate": 6.132611564554236e-06, "loss": 0.1357, "step": 19447 }, { "epoch": 63.76393442622951, "grad_norm": 2.5991973876953125, "learning_rate": 6.1316323255644316e-06, "loss": 0.0593, "step": 19448 }, { "epoch": 63.7672131147541, "grad_norm": 2.5077688694000244, "learning_rate": 6.130653130194496e-06, "loss": 0.1551, "step": 19449 }, { "epoch": 63.77049180327869, "grad_norm": 2.439528226852417, "learning_rate": 6.12967397845547e-06, "loss": 0.1157, "step": 19450 }, { "epoch": 63.773770491803276, "grad_norm": 2.580012083053589, "learning_rate": 6.1286948703583946e-06, "loss": 0.0805, "step": 19451 }, { "epoch": 63.77704918032787, "grad_norm": 2.9328925609588623, "learning_rate": 6.12771580591431e-06, "loss": 0.169, "step": 19452 }, { "epoch": 63.78032786885246, "grad_norm": 3.544226884841919, "learning_rate": 6.1267367851342506e-06, "loss": 0.1656, "step": 19453 }, { "epoch": 63.78360655737705, "grad_norm": 2.8606438636779785, "learning_rate": 6.125757808029267e-06, "loss": 0.1801, "step": 19454 }, { "epoch": 63.78688524590164, "grad_norm": 2.569754123687744, "learning_rate": 6.124778874610391e-06, "loss": 0.1525, "step": 19455 }, { "epoch": 63.79016393442623, "grad_norm": 2.9815516471862793, "learning_rate": 6.1237999848886634e-06, "loss": 0.0725, "step": 19456 }, { "epoch": 63.79344262295082, "grad_norm": 2.1341912746429443, "learning_rate": 6.122821138875117e-06, "loss": 0.0835, "step": 19457 }, { "epoch": 63.79672131147541, "grad_norm": 2.4172563552856445, "learning_rate": 6.121842336580797e-06, "loss": 0.418, "step": 19458 }, { "epoch": 63.8, "grad_norm": 4.0934038162231445, "learning_rate": 6.120863578016736e-06, "loss": 0.1983, "step": 19459 }, { "epoch": 63.80327868852459, "grad_norm": 3.494361400604248, "learning_rate": 6.1198848631939725e-06, "loss": 0.2148, "step": 19460 }, { "epoch": 63.80655737704918, "grad_norm": 3.171924352645874, "learning_rate": 6.118906192123537e-06, "loss": 0.1683, "step": 19461 }, { "epoch": 63.80983606557377, "grad_norm": 3.107264995574951, "learning_rate": 6.1179275648164745e-06, "loss": 0.1765, "step": 19462 }, { "epoch": 63.81311475409836, "grad_norm": 2.758596181869507, "learning_rate": 6.116948981283815e-06, "loss": 0.1324, "step": 19463 }, { "epoch": 63.81639344262295, "grad_norm": 2.9920473098754883, "learning_rate": 6.115970441536591e-06, "loss": 0.1855, "step": 19464 }, { "epoch": 63.81967213114754, "grad_norm": 2.3844234943389893, "learning_rate": 6.114991945585842e-06, "loss": 0.0628, "step": 19465 }, { "epoch": 63.82295081967213, "grad_norm": 2.4489681720733643, "learning_rate": 6.1140134934425925e-06, "loss": 0.1568, "step": 19466 }, { "epoch": 63.82622950819672, "grad_norm": 2.559753656387329, "learning_rate": 6.113035085117884e-06, "loss": 0.1934, "step": 19467 }, { "epoch": 63.829508196721314, "grad_norm": 3.6285617351531982, "learning_rate": 6.112056720622748e-06, "loss": 0.2545, "step": 19468 }, { "epoch": 63.8327868852459, "grad_norm": 3.3149068355560303, "learning_rate": 6.111078399968212e-06, "loss": 0.2482, "step": 19469 }, { "epoch": 63.83606557377049, "grad_norm": 3.107403039932251, "learning_rate": 6.110100123165311e-06, "loss": 0.2055, "step": 19470 }, { "epoch": 63.83934426229508, "grad_norm": 2.0445616245269775, "learning_rate": 6.109121890225078e-06, "loss": 0.0669, "step": 19471 }, { "epoch": 63.842622950819674, "grad_norm": 2.8536086082458496, "learning_rate": 6.108143701158541e-06, "loss": 0.1038, "step": 19472 }, { "epoch": 63.84590163934426, "grad_norm": 2.5172555446624756, "learning_rate": 6.107165555976728e-06, "loss": 0.1722, "step": 19473 }, { "epoch": 63.84918032786885, "grad_norm": 2.8662755489349365, "learning_rate": 6.10618745469067e-06, "loss": 0.0901, "step": 19474 }, { "epoch": 63.85245901639344, "grad_norm": 3.549619674682617, "learning_rate": 6.1052093973114e-06, "loss": 0.21, "step": 19475 }, { "epoch": 63.855737704918035, "grad_norm": 2.610377788543701, "learning_rate": 6.1042313838499425e-06, "loss": 0.0619, "step": 19476 }, { "epoch": 63.85901639344262, "grad_norm": 2.0094027519226074, "learning_rate": 6.103253414317328e-06, "loss": 0.0705, "step": 19477 }, { "epoch": 63.86229508196721, "grad_norm": 2.7160985469818115, "learning_rate": 6.1022754887245825e-06, "loss": 0.2163, "step": 19478 }, { "epoch": 63.86557377049181, "grad_norm": 2.8070359230041504, "learning_rate": 6.101297607082729e-06, "loss": 0.096, "step": 19479 }, { "epoch": 63.868852459016395, "grad_norm": 2.4912338256835938, "learning_rate": 6.100319769402803e-06, "loss": 0.1695, "step": 19480 }, { "epoch": 63.87213114754098, "grad_norm": 2.62861704826355, "learning_rate": 6.099341975695827e-06, "loss": 0.1366, "step": 19481 }, { "epoch": 63.87540983606557, "grad_norm": 2.751498222351074, "learning_rate": 6.098364225972826e-06, "loss": 0.0913, "step": 19482 }, { "epoch": 63.87868852459017, "grad_norm": 3.121288537979126, "learning_rate": 6.097386520244819e-06, "loss": 0.3265, "step": 19483 }, { "epoch": 63.881967213114756, "grad_norm": 2.4261739253997803, "learning_rate": 6.0964088585228424e-06, "loss": 0.155, "step": 19484 }, { "epoch": 63.885245901639344, "grad_norm": 2.6291775703430176, "learning_rate": 6.095431240817915e-06, "loss": 0.0789, "step": 19485 }, { "epoch": 63.88852459016393, "grad_norm": 2.866149663925171, "learning_rate": 6.094453667141059e-06, "loss": 0.1121, "step": 19486 }, { "epoch": 63.89180327868853, "grad_norm": 2.5904664993286133, "learning_rate": 6.0934761375032955e-06, "loss": 0.1004, "step": 19487 }, { "epoch": 63.895081967213116, "grad_norm": 3.4772796630859375, "learning_rate": 6.092498651915654e-06, "loss": 0.2945, "step": 19488 }, { "epoch": 63.898360655737704, "grad_norm": 2.6048290729522705, "learning_rate": 6.091521210389153e-06, "loss": 0.1518, "step": 19489 }, { "epoch": 63.90163934426229, "grad_norm": 2.84377384185791, "learning_rate": 6.090543812934812e-06, "loss": 0.1786, "step": 19490 }, { "epoch": 63.90491803278689, "grad_norm": 2.8345654010772705, "learning_rate": 6.089566459563659e-06, "loss": 0.2671, "step": 19491 }, { "epoch": 63.90819672131148, "grad_norm": 2.6825640201568604, "learning_rate": 6.088589150286702e-06, "loss": 0.1555, "step": 19492 }, { "epoch": 63.911475409836065, "grad_norm": 2.3053746223449707, "learning_rate": 6.087611885114974e-06, "loss": 0.127, "step": 19493 }, { "epoch": 63.91475409836065, "grad_norm": 2.7089436054229736, "learning_rate": 6.086634664059491e-06, "loss": 0.1476, "step": 19494 }, { "epoch": 63.91803278688525, "grad_norm": 3.131383180618286, "learning_rate": 6.085657487131271e-06, "loss": 0.2115, "step": 19495 }, { "epoch": 63.92131147540984, "grad_norm": 3.2050414085388184, "learning_rate": 6.084680354341328e-06, "loss": 0.2312, "step": 19496 }, { "epoch": 63.924590163934425, "grad_norm": 2.8240668773651123, "learning_rate": 6.083703265700689e-06, "loss": 0.1003, "step": 19497 }, { "epoch": 63.927868852459014, "grad_norm": 2.7530882358551025, "learning_rate": 6.082726221220368e-06, "loss": 0.2159, "step": 19498 }, { "epoch": 63.93114754098361, "grad_norm": 3.1347110271453857, "learning_rate": 6.0817492209113795e-06, "loss": 0.1611, "step": 19499 }, { "epoch": 63.9344262295082, "grad_norm": 3.1594886779785156, "learning_rate": 6.080772264784743e-06, "loss": 0.2784, "step": 19500 }, { "epoch": 63.937704918032786, "grad_norm": 2.953329086303711, "learning_rate": 6.079795352851474e-06, "loss": 0.1927, "step": 19501 }, { "epoch": 63.940983606557374, "grad_norm": 3.400719165802002, "learning_rate": 6.078818485122589e-06, "loss": 0.1488, "step": 19502 }, { "epoch": 63.94426229508197, "grad_norm": 2.266174554824829, "learning_rate": 6.077841661609103e-06, "loss": 0.1459, "step": 19503 }, { "epoch": 63.94754098360656, "grad_norm": 2.690199375152588, "learning_rate": 6.0768648823220314e-06, "loss": 0.1023, "step": 19504 }, { "epoch": 63.950819672131146, "grad_norm": 2.8716397285461426, "learning_rate": 6.075888147272382e-06, "loss": 0.15, "step": 19505 }, { "epoch": 63.954098360655735, "grad_norm": 3.076289415359497, "learning_rate": 6.07491145647118e-06, "loss": 0.2629, "step": 19506 }, { "epoch": 63.95737704918033, "grad_norm": 3.085196018218994, "learning_rate": 6.07393480992943e-06, "loss": 0.2149, "step": 19507 }, { "epoch": 63.96065573770492, "grad_norm": 1.9498063325881958, "learning_rate": 6.072958207658149e-06, "loss": 0.115, "step": 19508 }, { "epoch": 63.96393442622951, "grad_norm": 2.4469728469848633, "learning_rate": 6.0719816496683415e-06, "loss": 0.1361, "step": 19509 }, { "epoch": 63.967213114754095, "grad_norm": 2.7260079383850098, "learning_rate": 6.07100513597103e-06, "loss": 0.3039, "step": 19510 }, { "epoch": 63.97049180327869, "grad_norm": 3.369951009750366, "learning_rate": 6.0700286665772215e-06, "loss": 0.1063, "step": 19511 }, { "epoch": 63.97377049180328, "grad_norm": 2.913228750228882, "learning_rate": 6.069052241497924e-06, "loss": 0.2069, "step": 19512 }, { "epoch": 63.97704918032787, "grad_norm": 3.9358315467834473, "learning_rate": 6.068075860744147e-06, "loss": 0.2129, "step": 19513 }, { "epoch": 63.980327868852456, "grad_norm": 2.3295772075653076, "learning_rate": 6.067099524326907e-06, "loss": 0.1389, "step": 19514 }, { "epoch": 63.98360655737705, "grad_norm": 3.1629014015197754, "learning_rate": 6.066123232257208e-06, "loss": 0.1662, "step": 19515 }, { "epoch": 63.98688524590164, "grad_norm": 3.876988649368286, "learning_rate": 6.06514698454606e-06, "loss": 0.1356, "step": 19516 }, { "epoch": 63.99016393442623, "grad_norm": 2.6200129985809326, "learning_rate": 6.064170781204472e-06, "loss": 0.1711, "step": 19517 }, { "epoch": 63.993442622950816, "grad_norm": 3.09329891204834, "learning_rate": 6.063194622243446e-06, "loss": 0.1509, "step": 19518 }, { "epoch": 63.99672131147541, "grad_norm": 3.4894251823425293, "learning_rate": 6.062218507673998e-06, "loss": 0.1957, "step": 19519 }, { "epoch": 64.0, "grad_norm": 3.3071770668029785, "learning_rate": 6.061242437507131e-06, "loss": 0.1984, "step": 19520 }, { "epoch": 64.00327868852459, "grad_norm": 3.089019298553467, "learning_rate": 6.0602664117538505e-06, "loss": 0.0945, "step": 19521 }, { "epoch": 64.00655737704918, "grad_norm": 2.6409480571746826, "learning_rate": 6.059290430425158e-06, "loss": 0.1158, "step": 19522 }, { "epoch": 64.00983606557377, "grad_norm": 2.699068784713745, "learning_rate": 6.058314493532067e-06, "loss": 0.2383, "step": 19523 }, { "epoch": 64.01311475409837, "grad_norm": 2.474478006362915, "learning_rate": 6.0573386010855784e-06, "loss": 0.1549, "step": 19524 }, { "epoch": 64.01639344262296, "grad_norm": 2.357342481613159, "learning_rate": 6.056362753096695e-06, "loss": 0.1894, "step": 19525 }, { "epoch": 64.01967213114754, "grad_norm": 3.344001054763794, "learning_rate": 6.0553869495764225e-06, "loss": 0.0794, "step": 19526 }, { "epoch": 64.02295081967213, "grad_norm": 2.327359676361084, "learning_rate": 6.054411190535765e-06, "loss": 0.1849, "step": 19527 }, { "epoch": 64.02622950819672, "grad_norm": 3.323065996170044, "learning_rate": 6.053435475985721e-06, "loss": 0.104, "step": 19528 }, { "epoch": 64.02950819672131, "grad_norm": 3.359515428543091, "learning_rate": 6.0524598059372986e-06, "loss": 0.2647, "step": 19529 }, { "epoch": 64.0327868852459, "grad_norm": 2.628192663192749, "learning_rate": 6.051484180401495e-06, "loss": 0.1023, "step": 19530 }, { "epoch": 64.03606557377049, "grad_norm": 2.7603275775909424, "learning_rate": 6.050508599389311e-06, "loss": 0.2211, "step": 19531 }, { "epoch": 64.03934426229509, "grad_norm": 2.561619281768799, "learning_rate": 6.04953306291175e-06, "loss": 0.2121, "step": 19532 }, { "epoch": 64.04262295081968, "grad_norm": 2.913400411605835, "learning_rate": 6.048557570979811e-06, "loss": 0.2365, "step": 19533 }, { "epoch": 64.04590163934427, "grad_norm": 2.7041141986846924, "learning_rate": 6.047582123604496e-06, "loss": 0.3358, "step": 19534 }, { "epoch": 64.04918032786885, "grad_norm": 2.928765296936035, "learning_rate": 6.0466067207967985e-06, "loss": 0.0828, "step": 19535 }, { "epoch": 64.05245901639344, "grad_norm": 2.4961469173431396, "learning_rate": 6.045631362567723e-06, "loss": 0.0737, "step": 19536 }, { "epoch": 64.05573770491803, "grad_norm": 2.79634952545166, "learning_rate": 6.044656048928266e-06, "loss": 0.2034, "step": 19537 }, { "epoch": 64.05901639344262, "grad_norm": 3.682969331741333, "learning_rate": 6.0436807798894245e-06, "loss": 0.1449, "step": 19538 }, { "epoch": 64.0622950819672, "grad_norm": 3.0555012226104736, "learning_rate": 6.042705555462192e-06, "loss": 0.2704, "step": 19539 }, { "epoch": 64.06557377049181, "grad_norm": 2.3067431449890137, "learning_rate": 6.041730375657573e-06, "loss": 0.1175, "step": 19540 }, { "epoch": 64.0688524590164, "grad_norm": 2.602926254272461, "learning_rate": 6.0407552404865595e-06, "loss": 0.1536, "step": 19541 }, { "epoch": 64.07213114754099, "grad_norm": 2.1356985569000244, "learning_rate": 6.0397801499601475e-06, "loss": 0.2119, "step": 19542 }, { "epoch": 64.07540983606557, "grad_norm": 3.369687080383301, "learning_rate": 6.038805104089331e-06, "loss": 0.1739, "step": 19543 }, { "epoch": 64.07868852459016, "grad_norm": 5.918057918548584, "learning_rate": 6.037830102885103e-06, "loss": 0.1791, "step": 19544 }, { "epoch": 64.08196721311475, "grad_norm": 3.347134590148926, "learning_rate": 6.036855146358462e-06, "loss": 0.204, "step": 19545 }, { "epoch": 64.08524590163934, "grad_norm": 2.625312089920044, "learning_rate": 6.035880234520401e-06, "loss": 0.2183, "step": 19546 }, { "epoch": 64.08852459016393, "grad_norm": 3.000408411026001, "learning_rate": 6.034905367381914e-06, "loss": 0.2807, "step": 19547 }, { "epoch": 64.09180327868853, "grad_norm": 2.435990571975708, "learning_rate": 6.033930544953985e-06, "loss": 0.1235, "step": 19548 }, { "epoch": 64.09508196721312, "grad_norm": 3.0429000854492188, "learning_rate": 6.032955767247617e-06, "loss": 0.0956, "step": 19549 }, { "epoch": 64.09836065573771, "grad_norm": 2.7008368968963623, "learning_rate": 6.031981034273798e-06, "loss": 0.096, "step": 19550 }, { "epoch": 64.1016393442623, "grad_norm": 2.989147663116455, "learning_rate": 6.0310063460435175e-06, "loss": 0.1087, "step": 19551 }, { "epoch": 64.10491803278688, "grad_norm": 2.285576343536377, "learning_rate": 6.030031702567766e-06, "loss": 0.1456, "step": 19552 }, { "epoch": 64.10819672131147, "grad_norm": 3.1644251346588135, "learning_rate": 6.0290571038575365e-06, "loss": 0.2631, "step": 19553 }, { "epoch": 64.11147540983606, "grad_norm": 2.6277482509613037, "learning_rate": 6.028082549923817e-06, "loss": 0.1423, "step": 19554 }, { "epoch": 64.11475409836065, "grad_norm": 3.307398796081543, "learning_rate": 6.027108040777596e-06, "loss": 0.2225, "step": 19555 }, { "epoch": 64.11803278688525, "grad_norm": 2.812201499938965, "learning_rate": 6.026133576429862e-06, "loss": 0.1201, "step": 19556 }, { "epoch": 64.12131147540984, "grad_norm": 2.8316619396209717, "learning_rate": 6.025159156891605e-06, "loss": 0.2528, "step": 19557 }, { "epoch": 64.12459016393443, "grad_norm": 2.699507474899292, "learning_rate": 6.0241847821738085e-06, "loss": 0.1222, "step": 19558 }, { "epoch": 64.12786885245902, "grad_norm": 2.1398696899414062, "learning_rate": 6.023210452287466e-06, "loss": 0.0627, "step": 19559 }, { "epoch": 64.1311475409836, "grad_norm": 2.9665095806121826, "learning_rate": 6.02223616724356e-06, "loss": 0.1734, "step": 19560 }, { "epoch": 64.1344262295082, "grad_norm": 3.531057119369507, "learning_rate": 6.0212619270530725e-06, "loss": 0.2657, "step": 19561 }, { "epoch": 64.13770491803278, "grad_norm": 2.8798208236694336, "learning_rate": 6.0202877317269985e-06, "loss": 0.1192, "step": 19562 }, { "epoch": 64.14098360655737, "grad_norm": 4.542744159698486, "learning_rate": 6.019313581276318e-06, "loss": 0.273, "step": 19563 }, { "epoch": 64.14426229508197, "grad_norm": 3.10831356048584, "learning_rate": 6.018339475712015e-06, "loss": 0.2355, "step": 19564 }, { "epoch": 64.14754098360656, "grad_norm": 2.244840145111084, "learning_rate": 6.017365415045071e-06, "loss": 0.0788, "step": 19565 }, { "epoch": 64.15081967213115, "grad_norm": 2.3462109565734863, "learning_rate": 6.016391399286476e-06, "loss": 0.0954, "step": 19566 }, { "epoch": 64.15409836065574, "grad_norm": 3.520063638687134, "learning_rate": 6.0154174284472115e-06, "loss": 0.1898, "step": 19567 }, { "epoch": 64.15737704918033, "grad_norm": 4.692501068115234, "learning_rate": 6.014443502538256e-06, "loss": 0.2797, "step": 19568 }, { "epoch": 64.16065573770491, "grad_norm": 1.674110770225525, "learning_rate": 6.013469621570597e-06, "loss": 0.122, "step": 19569 }, { "epoch": 64.1639344262295, "grad_norm": 3.7318716049194336, "learning_rate": 6.012495785555207e-06, "loss": 0.117, "step": 19570 }, { "epoch": 64.1672131147541, "grad_norm": 3.629443645477295, "learning_rate": 6.011521994503078e-06, "loss": 0.1114, "step": 19571 }, { "epoch": 64.1704918032787, "grad_norm": 3.1337687969207764, "learning_rate": 6.010548248425184e-06, "loss": 0.117, "step": 19572 }, { "epoch": 64.17377049180328, "grad_norm": 2.956638813018799, "learning_rate": 6.009574547332507e-06, "loss": 0.2136, "step": 19573 }, { "epoch": 64.17704918032787, "grad_norm": 2.3383724689483643, "learning_rate": 6.008600891236023e-06, "loss": 0.138, "step": 19574 }, { "epoch": 64.18032786885246, "grad_norm": 3.274022102355957, "learning_rate": 6.007627280146717e-06, "loss": 0.1997, "step": 19575 }, { "epoch": 64.18360655737705, "grad_norm": 3.416609048843384, "learning_rate": 6.0066537140755655e-06, "loss": 0.283, "step": 19576 }, { "epoch": 64.18688524590164, "grad_norm": 4.015628814697266, "learning_rate": 6.005680193033544e-06, "loss": 0.2647, "step": 19577 }, { "epoch": 64.19016393442622, "grad_norm": 8.511327743530273, "learning_rate": 6.00470671703163e-06, "loss": 0.1518, "step": 19578 }, { "epoch": 64.19344262295083, "grad_norm": 2.7417733669281006, "learning_rate": 6.0037332860808015e-06, "loss": 0.2269, "step": 19579 }, { "epoch": 64.19672131147541, "grad_norm": 2.938430070877075, "learning_rate": 6.002759900192039e-06, "loss": 0.0962, "step": 19580 }, { "epoch": 64.2, "grad_norm": 2.4978229999542236, "learning_rate": 6.00178655937631e-06, "loss": 0.0683, "step": 19581 }, { "epoch": 64.20327868852459, "grad_norm": 2.2407708168029785, "learning_rate": 6.0008132636445986e-06, "loss": 0.08, "step": 19582 }, { "epoch": 64.20655737704918, "grad_norm": 2.4705464839935303, "learning_rate": 5.999840013007874e-06, "loss": 0.109, "step": 19583 }, { "epoch": 64.20983606557377, "grad_norm": 2.925861120223999, "learning_rate": 5.998866807477111e-06, "loss": 0.1347, "step": 19584 }, { "epoch": 64.21311475409836, "grad_norm": 2.792227268218994, "learning_rate": 5.997893647063286e-06, "loss": 0.1749, "step": 19585 }, { "epoch": 64.21639344262294, "grad_norm": 27.5219669342041, "learning_rate": 5.996920531777374e-06, "loss": 0.0862, "step": 19586 }, { "epoch": 64.21967213114755, "grad_norm": 2.804593563079834, "learning_rate": 5.99594746163034e-06, "loss": 0.1119, "step": 19587 }, { "epoch": 64.22295081967214, "grad_norm": 2.573207139968872, "learning_rate": 5.994974436633165e-06, "loss": 0.1227, "step": 19588 }, { "epoch": 64.22622950819672, "grad_norm": 3.618044853210449, "learning_rate": 5.994001456796818e-06, "loss": 0.1852, "step": 19589 }, { "epoch": 64.22950819672131, "grad_norm": 3.249833583831787, "learning_rate": 5.993028522132269e-06, "loss": 0.1973, "step": 19590 }, { "epoch": 64.2327868852459, "grad_norm": 7.158759593963623, "learning_rate": 5.9920556326504906e-06, "loss": 0.0837, "step": 19591 }, { "epoch": 64.23606557377049, "grad_norm": 2.525092124938965, "learning_rate": 5.991082788362448e-06, "loss": 0.0669, "step": 19592 }, { "epoch": 64.23934426229508, "grad_norm": 2.568553924560547, "learning_rate": 5.990109989279118e-06, "loss": 0.1532, "step": 19593 }, { "epoch": 64.24262295081967, "grad_norm": 3.475433111190796, "learning_rate": 5.989137235411469e-06, "loss": 0.1408, "step": 19594 }, { "epoch": 64.24590163934427, "grad_norm": 3.5105934143066406, "learning_rate": 5.988164526770467e-06, "loss": 0.1734, "step": 19595 }, { "epoch": 64.24918032786886, "grad_norm": 3.084929943084717, "learning_rate": 5.987191863367077e-06, "loss": 0.315, "step": 19596 }, { "epoch": 64.25245901639344, "grad_norm": 2.7993898391723633, "learning_rate": 5.986219245212275e-06, "loss": 0.2246, "step": 19597 }, { "epoch": 64.25573770491803, "grad_norm": 3.7886931896209717, "learning_rate": 5.985246672317025e-06, "loss": 0.1885, "step": 19598 }, { "epoch": 64.25901639344262, "grad_norm": 3.4998676776885986, "learning_rate": 5.9842741446922926e-06, "loss": 0.235, "step": 19599 }, { "epoch": 64.26229508196721, "grad_norm": 3.164705276489258, "learning_rate": 5.9833016623490405e-06, "loss": 0.0879, "step": 19600 }, { "epoch": 64.2655737704918, "grad_norm": 2.9820940494537354, "learning_rate": 5.982329225298243e-06, "loss": 0.1598, "step": 19601 }, { "epoch": 64.26885245901639, "grad_norm": 3.1645913124084473, "learning_rate": 5.98135683355086e-06, "loss": 0.1112, "step": 19602 }, { "epoch": 64.27213114754099, "grad_norm": 3.164437770843506, "learning_rate": 5.980384487117856e-06, "loss": 0.2889, "step": 19603 }, { "epoch": 64.27540983606558, "grad_norm": 3.084846019744873, "learning_rate": 5.979412186010196e-06, "loss": 0.1992, "step": 19604 }, { "epoch": 64.27868852459017, "grad_norm": 3.6413192749023438, "learning_rate": 5.978439930238844e-06, "loss": 0.1765, "step": 19605 }, { "epoch": 64.28196721311475, "grad_norm": 2.69891357421875, "learning_rate": 5.977467719814765e-06, "loss": 0.1616, "step": 19606 }, { "epoch": 64.28524590163934, "grad_norm": 3.24314546585083, "learning_rate": 5.976495554748918e-06, "loss": 0.1173, "step": 19607 }, { "epoch": 64.28852459016393, "grad_norm": 3.1591920852661133, "learning_rate": 5.975523435052268e-06, "loss": 0.2157, "step": 19608 }, { "epoch": 64.29180327868852, "grad_norm": 3.80507493019104, "learning_rate": 5.9745513607357744e-06, "loss": 0.1029, "step": 19609 }, { "epoch": 64.29508196721312, "grad_norm": 2.5803747177124023, "learning_rate": 5.9735793318104e-06, "loss": 0.116, "step": 19610 }, { "epoch": 64.29836065573771, "grad_norm": 4.072773456573486, "learning_rate": 5.972607348287105e-06, "loss": 0.1516, "step": 19611 }, { "epoch": 64.3016393442623, "grad_norm": 3.4942362308502197, "learning_rate": 5.971635410176853e-06, "loss": 0.1079, "step": 19612 }, { "epoch": 64.30491803278689, "grad_norm": 2.4101221561431885, "learning_rate": 5.9706635174905955e-06, "loss": 0.121, "step": 19613 }, { "epoch": 64.30819672131148, "grad_norm": 3.3126184940338135, "learning_rate": 5.969691670239297e-06, "loss": 0.1691, "step": 19614 }, { "epoch": 64.31147540983606, "grad_norm": 3.597491979598999, "learning_rate": 5.968719868433916e-06, "loss": 0.2121, "step": 19615 }, { "epoch": 64.31475409836065, "grad_norm": 2.7135720252990723, "learning_rate": 5.967748112085413e-06, "loss": 0.1599, "step": 19616 }, { "epoch": 64.31803278688524, "grad_norm": 2.782747507095337, "learning_rate": 5.966776401204741e-06, "loss": 0.0732, "step": 19617 }, { "epoch": 64.32131147540984, "grad_norm": 2.5078861713409424, "learning_rate": 5.965804735802854e-06, "loss": 0.0998, "step": 19618 }, { "epoch": 64.32459016393443, "grad_norm": 3.062633514404297, "learning_rate": 5.964833115890718e-06, "loss": 0.198, "step": 19619 }, { "epoch": 64.32786885245902, "grad_norm": 2.7910385131835938, "learning_rate": 5.963861541479283e-06, "loss": 0.2571, "step": 19620 }, { "epoch": 64.33114754098361, "grad_norm": 2.9810409545898438, "learning_rate": 5.962890012579508e-06, "loss": 0.2242, "step": 19621 }, { "epoch": 64.3344262295082, "grad_norm": 3.0125770568847656, "learning_rate": 5.9619185292023395e-06, "loss": 0.4002, "step": 19622 }, { "epoch": 64.33770491803278, "grad_norm": 2.641789436340332, "learning_rate": 5.960947091358743e-06, "loss": 0.2733, "step": 19623 }, { "epoch": 64.34098360655737, "grad_norm": 2.7840750217437744, "learning_rate": 5.959975699059668e-06, "loss": 0.1182, "step": 19624 }, { "epoch": 64.34426229508196, "grad_norm": 2.8031156063079834, "learning_rate": 5.959004352316067e-06, "loss": 0.1211, "step": 19625 }, { "epoch": 64.34754098360656, "grad_norm": 3.5460052490234375, "learning_rate": 5.958033051138888e-06, "loss": 0.2849, "step": 19626 }, { "epoch": 64.35081967213115, "grad_norm": 3.4421651363372803, "learning_rate": 5.9570617955390955e-06, "loss": 0.2601, "step": 19627 }, { "epoch": 64.35409836065574, "grad_norm": 2.706965923309326, "learning_rate": 5.956090585527634e-06, "loss": 0.0702, "step": 19628 }, { "epoch": 64.35737704918033, "grad_norm": 3.2342371940612793, "learning_rate": 5.955119421115455e-06, "loss": 0.1847, "step": 19629 }, { "epoch": 64.36065573770492, "grad_norm": 2.376574993133545, "learning_rate": 5.954148302313509e-06, "loss": 0.072, "step": 19630 }, { "epoch": 64.3639344262295, "grad_norm": 3.4713919162750244, "learning_rate": 5.953177229132747e-06, "loss": 0.1529, "step": 19631 }, { "epoch": 64.3672131147541, "grad_norm": 2.4379382133483887, "learning_rate": 5.952206201584121e-06, "loss": 0.1599, "step": 19632 }, { "epoch": 64.37049180327868, "grad_norm": 2.8599355220794678, "learning_rate": 5.951235219678577e-06, "loss": 0.0987, "step": 19633 }, { "epoch": 64.37377049180328, "grad_norm": 2.978344678878784, "learning_rate": 5.950264283427069e-06, "loss": 0.0902, "step": 19634 }, { "epoch": 64.37704918032787, "grad_norm": 2.8653652667999268, "learning_rate": 5.9492933928405384e-06, "loss": 0.1217, "step": 19635 }, { "epoch": 64.38032786885246, "grad_norm": 2.172358274459839, "learning_rate": 5.948322547929939e-06, "loss": 0.1193, "step": 19636 }, { "epoch": 64.38360655737705, "grad_norm": 3.572382926940918, "learning_rate": 5.947351748706214e-06, "loss": 0.1818, "step": 19637 }, { "epoch": 64.38688524590164, "grad_norm": 2.4231719970703125, "learning_rate": 5.946380995180312e-06, "loss": 0.2029, "step": 19638 }, { "epoch": 64.39016393442623, "grad_norm": 2.9068334102630615, "learning_rate": 5.945410287363178e-06, "loss": 0.0998, "step": 19639 }, { "epoch": 64.39344262295081, "grad_norm": 3.1377525329589844, "learning_rate": 5.9444396252657586e-06, "loss": 0.1892, "step": 19640 }, { "epoch": 64.3967213114754, "grad_norm": 2.464677095413208, "learning_rate": 5.943469008899001e-06, "loss": 0.1737, "step": 19641 }, { "epoch": 64.4, "grad_norm": 2.899812936782837, "learning_rate": 5.942498438273849e-06, "loss": 0.1512, "step": 19642 }, { "epoch": 64.4032786885246, "grad_norm": 2.238007068634033, "learning_rate": 5.941527913401246e-06, "loss": 0.1137, "step": 19643 }, { "epoch": 64.40655737704918, "grad_norm": 2.231213331222534, "learning_rate": 5.9405574342921305e-06, "loss": 0.0636, "step": 19644 }, { "epoch": 64.40983606557377, "grad_norm": 6.817967414855957, "learning_rate": 5.939587000957454e-06, "loss": 0.137, "step": 19645 }, { "epoch": 64.41311475409836, "grad_norm": 2.5347390174865723, "learning_rate": 5.938616613408157e-06, "loss": 0.1227, "step": 19646 }, { "epoch": 64.41639344262295, "grad_norm": 2.408568859100342, "learning_rate": 5.937646271655181e-06, "loss": 0.0823, "step": 19647 }, { "epoch": 64.41967213114754, "grad_norm": 2.2165708541870117, "learning_rate": 5.936675975709463e-06, "loss": 0.0695, "step": 19648 }, { "epoch": 64.42295081967212, "grad_norm": 3.0106570720672607, "learning_rate": 5.935705725581951e-06, "loss": 0.1949, "step": 19649 }, { "epoch": 64.42622950819673, "grad_norm": 2.7891268730163574, "learning_rate": 5.934735521283583e-06, "loss": 0.2733, "step": 19650 }, { "epoch": 64.42950819672132, "grad_norm": 3.4397242069244385, "learning_rate": 5.933765362825299e-06, "loss": 0.2135, "step": 19651 }, { "epoch": 64.4327868852459, "grad_norm": 2.3942320346832275, "learning_rate": 5.932795250218035e-06, "loss": 0.1471, "step": 19652 }, { "epoch": 64.43606557377049, "grad_norm": 2.292356014251709, "learning_rate": 5.931825183472737e-06, "loss": 0.1378, "step": 19653 }, { "epoch": 64.43934426229508, "grad_norm": 3.067209243774414, "learning_rate": 5.93085516260034e-06, "loss": 0.1116, "step": 19654 }, { "epoch": 64.44262295081967, "grad_norm": 2.2682037353515625, "learning_rate": 5.929885187611782e-06, "loss": 0.0508, "step": 19655 }, { "epoch": 64.44590163934426, "grad_norm": 2.8257970809936523, "learning_rate": 5.928915258517999e-06, "loss": 0.0686, "step": 19656 }, { "epoch": 64.44918032786886, "grad_norm": 2.972702980041504, "learning_rate": 5.9279453753299265e-06, "loss": 0.1496, "step": 19657 }, { "epoch": 64.45245901639345, "grad_norm": 2.8748087882995605, "learning_rate": 5.926975538058508e-06, "loss": 0.1626, "step": 19658 }, { "epoch": 64.45573770491804, "grad_norm": 3.683302640914917, "learning_rate": 5.926005746714674e-06, "loss": 0.2082, "step": 19659 }, { "epoch": 64.45901639344262, "grad_norm": 3.390889883041382, "learning_rate": 5.92503600130936e-06, "loss": 0.4896, "step": 19660 }, { "epoch": 64.46229508196721, "grad_norm": 2.8866958618164062, "learning_rate": 5.924066301853502e-06, "loss": 0.2089, "step": 19661 }, { "epoch": 64.4655737704918, "grad_norm": 3.568044900894165, "learning_rate": 5.9230966483580355e-06, "loss": 0.1445, "step": 19662 }, { "epoch": 64.46885245901639, "grad_norm": 2.2630631923675537, "learning_rate": 5.922127040833892e-06, "loss": 0.1012, "step": 19663 }, { "epoch": 64.47213114754098, "grad_norm": 2.3132095336914062, "learning_rate": 5.921157479292008e-06, "loss": 0.0699, "step": 19664 }, { "epoch": 64.47540983606558, "grad_norm": 2.884925603866577, "learning_rate": 5.920187963743314e-06, "loss": 0.1094, "step": 19665 }, { "epoch": 64.47868852459017, "grad_norm": 2.823615074157715, "learning_rate": 5.91921849419874e-06, "loss": 0.102, "step": 19666 }, { "epoch": 64.48196721311476, "grad_norm": 2.556008815765381, "learning_rate": 5.9182490706692245e-06, "loss": 0.0798, "step": 19667 }, { "epoch": 64.48524590163935, "grad_norm": 3.2083563804626465, "learning_rate": 5.9172796931656915e-06, "loss": 0.1247, "step": 19668 }, { "epoch": 64.48852459016393, "grad_norm": 3.4245059490203857, "learning_rate": 5.916310361699078e-06, "loss": 0.179, "step": 19669 }, { "epoch": 64.49180327868852, "grad_norm": 2.6304733753204346, "learning_rate": 5.9153410762803055e-06, "loss": 0.3806, "step": 19670 }, { "epoch": 64.49508196721311, "grad_norm": 3.341945171356201, "learning_rate": 5.914371836920314e-06, "loss": 0.1981, "step": 19671 }, { "epoch": 64.4983606557377, "grad_norm": 3.3690569400787354, "learning_rate": 5.9134026436300286e-06, "loss": 0.2117, "step": 19672 }, { "epoch": 64.5016393442623, "grad_norm": 2.903005838394165, "learning_rate": 5.912433496420376e-06, "loss": 0.1321, "step": 19673 }, { "epoch": 64.50491803278689, "grad_norm": 3.3443050384521484, "learning_rate": 5.911464395302283e-06, "loss": 0.29, "step": 19674 }, { "epoch": 64.50819672131148, "grad_norm": 3.02756667137146, "learning_rate": 5.910495340286683e-06, "loss": 0.1482, "step": 19675 }, { "epoch": 64.51147540983607, "grad_norm": 3.0652737617492676, "learning_rate": 5.909526331384501e-06, "loss": 0.2555, "step": 19676 }, { "epoch": 64.51475409836065, "grad_norm": 2.2412571907043457, "learning_rate": 5.908557368606662e-06, "loss": 0.2027, "step": 19677 }, { "epoch": 64.51803278688524, "grad_norm": 2.928088665008545, "learning_rate": 5.90758845196409e-06, "loss": 0.3377, "step": 19678 }, { "epoch": 64.52131147540983, "grad_norm": 3.581956624984741, "learning_rate": 5.9066195814677165e-06, "loss": 0.1505, "step": 19679 }, { "epoch": 64.52459016393442, "grad_norm": 4.257174491882324, "learning_rate": 5.905650757128463e-06, "loss": 0.1095, "step": 19680 }, { "epoch": 64.52786885245902, "grad_norm": 3.172816514968872, "learning_rate": 5.904681978957254e-06, "loss": 0.1577, "step": 19681 }, { "epoch": 64.53114754098361, "grad_norm": 2.339653253555298, "learning_rate": 5.903713246965013e-06, "loss": 0.048, "step": 19682 }, { "epoch": 64.5344262295082, "grad_norm": 2.733269691467285, "learning_rate": 5.902744561162661e-06, "loss": 0.1395, "step": 19683 }, { "epoch": 64.53770491803279, "grad_norm": 2.642120361328125, "learning_rate": 5.901775921561128e-06, "loss": 0.2666, "step": 19684 }, { "epoch": 64.54098360655738, "grad_norm": 2.868537425994873, "learning_rate": 5.900807328171334e-06, "loss": 0.1913, "step": 19685 }, { "epoch": 64.54426229508196, "grad_norm": 2.7017459869384766, "learning_rate": 5.899838781004197e-06, "loss": 0.1994, "step": 19686 }, { "epoch": 64.54754098360655, "grad_norm": 3.32820463180542, "learning_rate": 5.8988702800706385e-06, "loss": 0.1532, "step": 19687 }, { "epoch": 64.55081967213114, "grad_norm": 3.531424045562744, "learning_rate": 5.897901825381587e-06, "loss": 0.2609, "step": 19688 }, { "epoch": 64.55409836065574, "grad_norm": 2.4106109142303467, "learning_rate": 5.896933416947953e-06, "loss": 0.0506, "step": 19689 }, { "epoch": 64.55737704918033, "grad_norm": 3.666055679321289, "learning_rate": 5.895965054780663e-06, "loss": 0.3059, "step": 19690 }, { "epoch": 64.56065573770492, "grad_norm": 3.1659770011901855, "learning_rate": 5.894996738890634e-06, "loss": 0.2182, "step": 19691 }, { "epoch": 64.56393442622951, "grad_norm": 2.715691566467285, "learning_rate": 5.894028469288784e-06, "loss": 0.2888, "step": 19692 }, { "epoch": 64.5672131147541, "grad_norm": 2.9181487560272217, "learning_rate": 5.893060245986033e-06, "loss": 0.2069, "step": 19693 }, { "epoch": 64.57049180327868, "grad_norm": 2.8295211791992188, "learning_rate": 5.892092068993299e-06, "loss": 0.1307, "step": 19694 }, { "epoch": 64.57377049180327, "grad_norm": 3.135869264602661, "learning_rate": 5.891123938321498e-06, "loss": 0.2187, "step": 19695 }, { "epoch": 64.57704918032788, "grad_norm": 2.785167694091797, "learning_rate": 5.890155853981543e-06, "loss": 0.1987, "step": 19696 }, { "epoch": 64.58032786885246, "grad_norm": 2.85578989982605, "learning_rate": 5.889187815984357e-06, "loss": 0.3155, "step": 19697 }, { "epoch": 64.58360655737705, "grad_norm": 2.901189088821411, "learning_rate": 5.8882198243408525e-06, "loss": 0.125, "step": 19698 }, { "epoch": 64.58688524590164, "grad_norm": 3.046372413635254, "learning_rate": 5.887251879061946e-06, "loss": 0.2016, "step": 19699 }, { "epoch": 64.59016393442623, "grad_norm": 4.5224714279174805, "learning_rate": 5.8862839801585466e-06, "loss": 0.1584, "step": 19700 }, { "epoch": 64.59344262295082, "grad_norm": 2.7974188327789307, "learning_rate": 5.885316127641576e-06, "loss": 0.2891, "step": 19701 }, { "epoch": 64.5967213114754, "grad_norm": 4.002865791320801, "learning_rate": 5.884348321521944e-06, "loss": 0.3329, "step": 19702 }, { "epoch": 64.6, "grad_norm": 3.137092113494873, "learning_rate": 5.8833805618105635e-06, "loss": 0.1959, "step": 19703 }, { "epoch": 64.6032786885246, "grad_norm": 2.6835737228393555, "learning_rate": 5.882412848518344e-06, "loss": 0.2207, "step": 19704 }, { "epoch": 64.60655737704919, "grad_norm": 3.3278491497039795, "learning_rate": 5.8814451816562045e-06, "loss": 0.249, "step": 19705 }, { "epoch": 64.60983606557377, "grad_norm": 2.993044137954712, "learning_rate": 5.880477561235054e-06, "loss": 0.1198, "step": 19706 }, { "epoch": 64.61311475409836, "grad_norm": 2.91184663772583, "learning_rate": 5.879509987265802e-06, "loss": 0.1519, "step": 19707 }, { "epoch": 64.61639344262295, "grad_norm": 2.386530876159668, "learning_rate": 5.878542459759358e-06, "loss": 0.0653, "step": 19708 }, { "epoch": 64.61967213114754, "grad_norm": 3.29290771484375, "learning_rate": 5.877574978726629e-06, "loss": 0.243, "step": 19709 }, { "epoch": 64.62295081967213, "grad_norm": 2.513864755630493, "learning_rate": 5.876607544178532e-06, "loss": 0.0728, "step": 19710 }, { "epoch": 64.62622950819672, "grad_norm": 2.4385054111480713, "learning_rate": 5.875640156125975e-06, "loss": 0.1643, "step": 19711 }, { "epoch": 64.62950819672132, "grad_norm": 2.6853134632110596, "learning_rate": 5.874672814579858e-06, "loss": 0.0869, "step": 19712 }, { "epoch": 64.6327868852459, "grad_norm": 2.8397645950317383, "learning_rate": 5.873705519551096e-06, "loss": 0.1066, "step": 19713 }, { "epoch": 64.6360655737705, "grad_norm": 2.654224157333374, "learning_rate": 5.872738271050597e-06, "loss": 0.2589, "step": 19714 }, { "epoch": 64.63934426229508, "grad_norm": 2.352498769760132, "learning_rate": 5.871771069089264e-06, "loss": 0.1286, "step": 19715 }, { "epoch": 64.64262295081967, "grad_norm": 2.4016172885894775, "learning_rate": 5.870803913678002e-06, "loss": 0.1816, "step": 19716 }, { "epoch": 64.64590163934426, "grad_norm": 3.612199544906616, "learning_rate": 5.8698368048277225e-06, "loss": 0.1452, "step": 19717 }, { "epoch": 64.64918032786885, "grad_norm": 4.135792255401611, "learning_rate": 5.868869742549326e-06, "loss": 0.2922, "step": 19718 }, { "epoch": 64.65245901639344, "grad_norm": 2.4580798149108887, "learning_rate": 5.867902726853718e-06, "loss": 0.2384, "step": 19719 }, { "epoch": 64.65573770491804, "grad_norm": 2.5275020599365234, "learning_rate": 5.866935757751804e-06, "loss": 0.1371, "step": 19720 }, { "epoch": 64.65901639344263, "grad_norm": 2.7916879653930664, "learning_rate": 5.865968835254488e-06, "loss": 0.1725, "step": 19721 }, { "epoch": 64.66229508196722, "grad_norm": 2.9592888355255127, "learning_rate": 5.865001959372666e-06, "loss": 0.1074, "step": 19722 }, { "epoch": 64.6655737704918, "grad_norm": 2.485323905944824, "learning_rate": 5.864035130117252e-06, "loss": 0.1746, "step": 19723 }, { "epoch": 64.66885245901639, "grad_norm": 4.020582675933838, "learning_rate": 5.8630683474991416e-06, "loss": 0.1987, "step": 19724 }, { "epoch": 64.67213114754098, "grad_norm": 2.641063690185547, "learning_rate": 5.8621016115292365e-06, "loss": 0.2496, "step": 19725 }, { "epoch": 64.67540983606557, "grad_norm": 2.995650053024292, "learning_rate": 5.861134922218434e-06, "loss": 0.3311, "step": 19726 }, { "epoch": 64.67868852459016, "grad_norm": 2.314033031463623, "learning_rate": 5.860168279577643e-06, "loss": 0.3248, "step": 19727 }, { "epoch": 64.68196721311476, "grad_norm": 2.837975263595581, "learning_rate": 5.859201683617758e-06, "loss": 0.2187, "step": 19728 }, { "epoch": 64.68524590163935, "grad_norm": 2.379642963409424, "learning_rate": 5.8582351343496805e-06, "loss": 0.1508, "step": 19729 }, { "epoch": 64.68852459016394, "grad_norm": 2.949791669845581, "learning_rate": 5.8572686317843034e-06, "loss": 0.1928, "step": 19730 }, { "epoch": 64.69180327868852, "grad_norm": 3.247344732284546, "learning_rate": 5.856302175932534e-06, "loss": 0.2663, "step": 19731 }, { "epoch": 64.69508196721311, "grad_norm": 2.392244815826416, "learning_rate": 5.855335766805267e-06, "loss": 0.1246, "step": 19732 }, { "epoch": 64.6983606557377, "grad_norm": 3.565108060836792, "learning_rate": 5.8543694044133984e-06, "loss": 0.1146, "step": 19733 }, { "epoch": 64.70163934426229, "grad_norm": 3.598679780960083, "learning_rate": 5.853403088767824e-06, "loss": 0.242, "step": 19734 }, { "epoch": 64.70491803278688, "grad_norm": 2.8517699241638184, "learning_rate": 5.852436819879438e-06, "loss": 0.1279, "step": 19735 }, { "epoch": 64.70819672131148, "grad_norm": 3.4469363689422607, "learning_rate": 5.851470597759144e-06, "loss": 0.2608, "step": 19736 }, { "epoch": 64.71147540983607, "grad_norm": 3.2015016078948975, "learning_rate": 5.850504422417831e-06, "loss": 0.1954, "step": 19737 }, { "epoch": 64.71475409836066, "grad_norm": 3.0558855533599854, "learning_rate": 5.849538293866395e-06, "loss": 0.2009, "step": 19738 }, { "epoch": 64.71803278688525, "grad_norm": 2.228414535522461, "learning_rate": 5.8485722121157265e-06, "loss": 0.0631, "step": 19739 }, { "epoch": 64.72131147540983, "grad_norm": 2.9498543739318848, "learning_rate": 5.847606177176727e-06, "loss": 0.1122, "step": 19740 }, { "epoch": 64.72459016393442, "grad_norm": 2.3323934078216553, "learning_rate": 5.846640189060284e-06, "loss": 0.074, "step": 19741 }, { "epoch": 64.72786885245901, "grad_norm": 2.3739237785339355, "learning_rate": 5.845674247777291e-06, "loss": 0.085, "step": 19742 }, { "epoch": 64.73114754098361, "grad_norm": 3.2660458087921143, "learning_rate": 5.84470835333864e-06, "loss": 0.1733, "step": 19743 }, { "epoch": 64.7344262295082, "grad_norm": 2.7655081748962402, "learning_rate": 5.843742505755223e-06, "loss": 0.1291, "step": 19744 }, { "epoch": 64.73770491803279, "grad_norm": 2.3482415676116943, "learning_rate": 5.842776705037932e-06, "loss": 0.1478, "step": 19745 }, { "epoch": 64.74098360655738, "grad_norm": 2.625086784362793, "learning_rate": 5.841810951197655e-06, "loss": 0.1768, "step": 19746 }, { "epoch": 64.74426229508197, "grad_norm": 2.389918804168701, "learning_rate": 5.840845244245283e-06, "loss": 0.1432, "step": 19747 }, { "epoch": 64.74754098360656, "grad_norm": 2.918840169906616, "learning_rate": 5.839879584191703e-06, "loss": 0.1643, "step": 19748 }, { "epoch": 64.75081967213114, "grad_norm": 2.9504964351654053, "learning_rate": 5.838913971047811e-06, "loss": 0.2344, "step": 19749 }, { "epoch": 64.75409836065573, "grad_norm": 2.735065460205078, "learning_rate": 5.837948404824487e-06, "loss": 0.0836, "step": 19750 }, { "epoch": 64.75737704918033, "grad_norm": 2.957775592803955, "learning_rate": 5.836982885532625e-06, "loss": 0.228, "step": 19751 }, { "epoch": 64.76065573770492, "grad_norm": 3.1513850688934326, "learning_rate": 5.836017413183104e-06, "loss": 0.1416, "step": 19752 }, { "epoch": 64.76393442622951, "grad_norm": 2.617342472076416, "learning_rate": 5.8350519877868214e-06, "loss": 0.1682, "step": 19753 }, { "epoch": 64.7672131147541, "grad_norm": 3.0489184856414795, "learning_rate": 5.834086609354657e-06, "loss": 0.132, "step": 19754 }, { "epoch": 64.77049180327869, "grad_norm": 3.090688467025757, "learning_rate": 5.833121277897498e-06, "loss": 0.1791, "step": 19755 }, { "epoch": 64.77377049180328, "grad_norm": 2.269174814224243, "learning_rate": 5.832155993426225e-06, "loss": 0.0922, "step": 19756 }, { "epoch": 64.77704918032786, "grad_norm": 2.8110759258270264, "learning_rate": 5.83119075595173e-06, "loss": 0.1595, "step": 19757 }, { "epoch": 64.78032786885245, "grad_norm": 2.9518420696258545, "learning_rate": 5.830225565484895e-06, "loss": 0.1752, "step": 19758 }, { "epoch": 64.78360655737706, "grad_norm": 3.25538969039917, "learning_rate": 5.829260422036601e-06, "loss": 0.1687, "step": 19759 }, { "epoch": 64.78688524590164, "grad_norm": 2.990480899810791, "learning_rate": 5.828295325617733e-06, "loss": 0.1914, "step": 19760 }, { "epoch": 64.79016393442623, "grad_norm": 3.512310266494751, "learning_rate": 5.827330276239169e-06, "loss": 0.188, "step": 19761 }, { "epoch": 64.79344262295082, "grad_norm": 3.2972238063812256, "learning_rate": 5.826365273911798e-06, "loss": 0.178, "step": 19762 }, { "epoch": 64.79672131147541, "grad_norm": 4.612498760223389, "learning_rate": 5.825400318646497e-06, "loss": 0.094, "step": 19763 }, { "epoch": 64.8, "grad_norm": 2.6512293815612793, "learning_rate": 5.82443541045415e-06, "loss": 0.1554, "step": 19764 }, { "epoch": 64.80327868852459, "grad_norm": 2.3929014205932617, "learning_rate": 5.82347054934563e-06, "loss": 0.0742, "step": 19765 }, { "epoch": 64.80655737704917, "grad_norm": 2.552448272705078, "learning_rate": 5.822505735331826e-06, "loss": 0.1849, "step": 19766 }, { "epoch": 64.80983606557378, "grad_norm": 2.686453342437744, "learning_rate": 5.8215409684236135e-06, "loss": 0.108, "step": 19767 }, { "epoch": 64.81311475409836, "grad_norm": 2.6570076942443848, "learning_rate": 5.8205762486318705e-06, "loss": 0.1168, "step": 19768 }, { "epoch": 64.81639344262295, "grad_norm": 2.481937885284424, "learning_rate": 5.819611575967473e-06, "loss": 0.2132, "step": 19769 }, { "epoch": 64.81967213114754, "grad_norm": 3.893122911453247, "learning_rate": 5.818646950441306e-06, "loss": 0.1112, "step": 19770 }, { "epoch": 64.82295081967213, "grad_norm": 3.3658833503723145, "learning_rate": 5.817682372064241e-06, "loss": 0.2293, "step": 19771 }, { "epoch": 64.82622950819672, "grad_norm": 2.9583754539489746, "learning_rate": 5.816717840847156e-06, "loss": 0.2166, "step": 19772 }, { "epoch": 64.8295081967213, "grad_norm": 3.1505625247955322, "learning_rate": 5.8157533568009265e-06, "loss": 0.1591, "step": 19773 }, { "epoch": 64.8327868852459, "grad_norm": 2.9727559089660645, "learning_rate": 5.8147889199364245e-06, "loss": 0.1252, "step": 19774 }, { "epoch": 64.8360655737705, "grad_norm": 2.590339183807373, "learning_rate": 5.8138245302645334e-06, "loss": 0.087, "step": 19775 }, { "epoch": 64.83934426229509, "grad_norm": 3.158637523651123, "learning_rate": 5.812860187796123e-06, "loss": 0.3287, "step": 19776 }, { "epoch": 64.84262295081967, "grad_norm": 2.7492544651031494, "learning_rate": 5.811895892542067e-06, "loss": 0.0882, "step": 19777 }, { "epoch": 64.84590163934426, "grad_norm": 2.461374521255493, "learning_rate": 5.810931644513236e-06, "loss": 0.2111, "step": 19778 }, { "epoch": 64.84918032786885, "grad_norm": 2.8662173748016357, "learning_rate": 5.80996744372051e-06, "loss": 0.2001, "step": 19779 }, { "epoch": 64.85245901639344, "grad_norm": 3.192028045654297, "learning_rate": 5.809003290174759e-06, "loss": 0.1792, "step": 19780 }, { "epoch": 64.85573770491803, "grad_norm": 1.978853464126587, "learning_rate": 5.808039183886847e-06, "loss": 0.1328, "step": 19781 }, { "epoch": 64.85901639344263, "grad_norm": 2.9088735580444336, "learning_rate": 5.807075124867658e-06, "loss": 0.2322, "step": 19782 }, { "epoch": 64.86229508196722, "grad_norm": 2.444445848464966, "learning_rate": 5.806111113128055e-06, "loss": 0.0688, "step": 19783 }, { "epoch": 64.8655737704918, "grad_norm": 3.0536766052246094, "learning_rate": 5.805147148678907e-06, "loss": 0.3141, "step": 19784 }, { "epoch": 64.8688524590164, "grad_norm": 3.011704444885254, "learning_rate": 5.804183231531089e-06, "loss": 0.1903, "step": 19785 }, { "epoch": 64.87213114754098, "grad_norm": 3.3477659225463867, "learning_rate": 5.803219361695469e-06, "loss": 0.2401, "step": 19786 }, { "epoch": 64.87540983606557, "grad_norm": 2.6500356197357178, "learning_rate": 5.802255539182913e-06, "loss": 0.101, "step": 19787 }, { "epoch": 64.87868852459016, "grad_norm": 3.2742815017700195, "learning_rate": 5.801291764004287e-06, "loss": 0.2134, "step": 19788 }, { "epoch": 64.88196721311475, "grad_norm": 3.7832651138305664, "learning_rate": 5.800328036170466e-06, "loss": 0.0883, "step": 19789 }, { "epoch": 64.88524590163935, "grad_norm": 1.9042811393737793, "learning_rate": 5.799364355692312e-06, "loss": 0.0882, "step": 19790 }, { "epoch": 64.88852459016394, "grad_norm": 2.390657663345337, "learning_rate": 5.7984007225806895e-06, "loss": 0.0652, "step": 19791 }, { "epoch": 64.89180327868853, "grad_norm": 2.9940972328186035, "learning_rate": 5.797437136846471e-06, "loss": 0.2806, "step": 19792 }, { "epoch": 64.89508196721312, "grad_norm": 2.9953715801239014, "learning_rate": 5.796473598500518e-06, "loss": 0.3375, "step": 19793 }, { "epoch": 64.8983606557377, "grad_norm": 3.0835092067718506, "learning_rate": 5.795510107553697e-06, "loss": 0.1645, "step": 19794 }, { "epoch": 64.90163934426229, "grad_norm": 2.9382710456848145, "learning_rate": 5.794546664016867e-06, "loss": 0.2086, "step": 19795 }, { "epoch": 64.90491803278688, "grad_norm": 2.834763288497925, "learning_rate": 5.793583267900899e-06, "loss": 0.0908, "step": 19796 }, { "epoch": 64.90819672131147, "grad_norm": 2.6637418270111084, "learning_rate": 5.792619919216653e-06, "loss": 0.1533, "step": 19797 }, { "epoch": 64.91147540983607, "grad_norm": 3.089258909225464, "learning_rate": 5.791656617974992e-06, "loss": 0.147, "step": 19798 }, { "epoch": 64.91475409836066, "grad_norm": 2.071938991546631, "learning_rate": 5.790693364186779e-06, "loss": 0.138, "step": 19799 }, { "epoch": 64.91803278688525, "grad_norm": 2.5620813369750977, "learning_rate": 5.7897301578628714e-06, "loss": 0.0765, "step": 19800 }, { "epoch": 64.92131147540984, "grad_norm": 2.2528982162475586, "learning_rate": 5.788766999014137e-06, "loss": 0.0956, "step": 19801 }, { "epoch": 64.92459016393443, "grad_norm": 5.695097923278809, "learning_rate": 5.787803887651433e-06, "loss": 0.3213, "step": 19802 }, { "epoch": 64.92786885245901, "grad_norm": 4.554962635040283, "learning_rate": 5.78684082378562e-06, "loss": 0.196, "step": 19803 }, { "epoch": 64.9311475409836, "grad_norm": 2.9389090538024902, "learning_rate": 5.785877807427553e-06, "loss": 0.1449, "step": 19804 }, { "epoch": 64.93442622950819, "grad_norm": 2.538353681564331, "learning_rate": 5.7849148385881e-06, "loss": 0.1432, "step": 19805 }, { "epoch": 64.9377049180328, "grad_norm": 2.856750011444092, "learning_rate": 5.783951917278115e-06, "loss": 0.1586, "step": 19806 }, { "epoch": 64.94098360655738, "grad_norm": 2.3097832202911377, "learning_rate": 5.782989043508456e-06, "loss": 0.0605, "step": 19807 }, { "epoch": 64.94426229508197, "grad_norm": 3.2905728816986084, "learning_rate": 5.782026217289975e-06, "loss": 0.0987, "step": 19808 }, { "epoch": 64.94754098360656, "grad_norm": 2.654341220855713, "learning_rate": 5.7810634386335384e-06, "loss": 0.2315, "step": 19809 }, { "epoch": 64.95081967213115, "grad_norm": 2.7784745693206787, "learning_rate": 5.780100707549998e-06, "loss": 0.0655, "step": 19810 }, { "epoch": 64.95409836065573, "grad_norm": 2.6848337650299072, "learning_rate": 5.7791380240502106e-06, "loss": 0.0857, "step": 19811 }, { "epoch": 64.95737704918032, "grad_norm": 2.666901111602783, "learning_rate": 5.778175388145029e-06, "loss": 0.0992, "step": 19812 }, { "epoch": 64.96065573770491, "grad_norm": 1.9707021713256836, "learning_rate": 5.7772127998453066e-06, "loss": 0.0382, "step": 19813 }, { "epoch": 64.96393442622951, "grad_norm": 4.419381141662598, "learning_rate": 5.776250259161904e-06, "loss": 0.1978, "step": 19814 }, { "epoch": 64.9672131147541, "grad_norm": 2.710062265396118, "learning_rate": 5.77528776610567e-06, "loss": 0.1321, "step": 19815 }, { "epoch": 64.97049180327869, "grad_norm": 3.7080726623535156, "learning_rate": 5.7743253206874616e-06, "loss": 0.2796, "step": 19816 }, { "epoch": 64.97377049180328, "grad_norm": 2.8423943519592285, "learning_rate": 5.773362922918124e-06, "loss": 0.2763, "step": 19817 }, { "epoch": 64.97704918032787, "grad_norm": 2.598027467727661, "learning_rate": 5.7724005728085175e-06, "loss": 0.222, "step": 19818 }, { "epoch": 64.98032786885246, "grad_norm": 2.84450101852417, "learning_rate": 5.771438270369491e-06, "loss": 0.1963, "step": 19819 }, { "epoch": 64.98360655737704, "grad_norm": 2.2603724002838135, "learning_rate": 5.770476015611893e-06, "loss": 0.1367, "step": 19820 }, { "epoch": 64.98688524590163, "grad_norm": 2.8784894943237305, "learning_rate": 5.769513808546573e-06, "loss": 0.1763, "step": 19821 }, { "epoch": 64.99016393442623, "grad_norm": 2.1992809772491455, "learning_rate": 5.768551649184386e-06, "loss": 0.1378, "step": 19822 }, { "epoch": 64.99344262295082, "grad_norm": 2.8762900829315186, "learning_rate": 5.7675895375361804e-06, "loss": 0.2106, "step": 19823 }, { "epoch": 64.99672131147541, "grad_norm": 2.5950937271118164, "learning_rate": 5.766627473612802e-06, "loss": 0.1479, "step": 19824 }, { "epoch": 65.0, "grad_norm": 2.6983416080474854, "learning_rate": 5.765665457425102e-06, "loss": 0.1401, "step": 19825 }, { "epoch": 65.00327868852459, "grad_norm": 2.696505546569824, "learning_rate": 5.764703488983923e-06, "loss": 0.1682, "step": 19826 }, { "epoch": 65.00655737704918, "grad_norm": 3.2523927688598633, "learning_rate": 5.763741568300118e-06, "loss": 0.1642, "step": 19827 }, { "epoch": 65.00983606557377, "grad_norm": 2.3765242099761963, "learning_rate": 5.762779695384531e-06, "loss": 0.0643, "step": 19828 }, { "epoch": 65.01311475409837, "grad_norm": 2.6120853424072266, "learning_rate": 5.76181787024801e-06, "loss": 0.0935, "step": 19829 }, { "epoch": 65.01639344262296, "grad_norm": 3.1454408168792725, "learning_rate": 5.760856092901394e-06, "loss": 0.1814, "step": 19830 }, { "epoch": 65.01967213114754, "grad_norm": 3.3424785137176514, "learning_rate": 5.759894363355538e-06, "loss": 0.276, "step": 19831 }, { "epoch": 65.02295081967213, "grad_norm": 2.538809299468994, "learning_rate": 5.758932681621281e-06, "loss": 0.0956, "step": 19832 }, { "epoch": 65.02622950819672, "grad_norm": 2.7813687324523926, "learning_rate": 5.7579710477094675e-06, "loss": 0.157, "step": 19833 }, { "epoch": 65.02950819672131, "grad_norm": 2.0133490562438965, "learning_rate": 5.757009461630938e-06, "loss": 0.1183, "step": 19834 }, { "epoch": 65.0327868852459, "grad_norm": 3.0054965019226074, "learning_rate": 5.756047923396542e-06, "loss": 0.1658, "step": 19835 }, { "epoch": 65.03606557377049, "grad_norm": 4.0520219802856445, "learning_rate": 5.755086433017119e-06, "loss": 0.2201, "step": 19836 }, { "epoch": 65.03934426229509, "grad_norm": 2.3196773529052734, "learning_rate": 5.754124990503504e-06, "loss": 0.1295, "step": 19837 }, { "epoch": 65.04262295081968, "grad_norm": 3.199054002761841, "learning_rate": 5.753163595866551e-06, "loss": 0.1189, "step": 19838 }, { "epoch": 65.04590163934427, "grad_norm": 3.095862865447998, "learning_rate": 5.752202249117091e-06, "loss": 0.1456, "step": 19839 }, { "epoch": 65.04918032786885, "grad_norm": 2.9070260524749756, "learning_rate": 5.7512409502659664e-06, "loss": 0.1919, "step": 19840 }, { "epoch": 65.05245901639344, "grad_norm": 2.5689618587493896, "learning_rate": 5.75027969932402e-06, "loss": 0.1493, "step": 19841 }, { "epoch": 65.05573770491803, "grad_norm": 4.796402454376221, "learning_rate": 5.749318496302088e-06, "loss": 0.1959, "step": 19842 }, { "epoch": 65.05901639344262, "grad_norm": 2.966464042663574, "learning_rate": 5.748357341211006e-06, "loss": 0.2777, "step": 19843 }, { "epoch": 65.0622950819672, "grad_norm": 2.700357437133789, "learning_rate": 5.747396234061621e-06, "loss": 0.26, "step": 19844 }, { "epoch": 65.06557377049181, "grad_norm": 3.5182747840881348, "learning_rate": 5.746435174864764e-06, "loss": 0.1267, "step": 19845 }, { "epoch": 65.0688524590164, "grad_norm": 2.398132085800171, "learning_rate": 5.745474163631272e-06, "loss": 0.1427, "step": 19846 }, { "epoch": 65.07213114754099, "grad_norm": 2.7768588066101074, "learning_rate": 5.74451320037198e-06, "loss": 0.2158, "step": 19847 }, { "epoch": 65.07540983606557, "grad_norm": 3.0498390197753906, "learning_rate": 5.74355228509773e-06, "loss": 0.2647, "step": 19848 }, { "epoch": 65.07868852459016, "grad_norm": 2.369375467300415, "learning_rate": 5.742591417819353e-06, "loss": 0.1172, "step": 19849 }, { "epoch": 65.08196721311475, "grad_norm": 2.786306142807007, "learning_rate": 5.741630598547685e-06, "loss": 0.1868, "step": 19850 }, { "epoch": 65.08524590163934, "grad_norm": 2.864490270614624, "learning_rate": 5.7406698272935595e-06, "loss": 0.1223, "step": 19851 }, { "epoch": 65.08852459016393, "grad_norm": 2.831103563308716, "learning_rate": 5.739709104067805e-06, "loss": 0.1021, "step": 19852 }, { "epoch": 65.09180327868853, "grad_norm": 3.0059967041015625, "learning_rate": 5.738748428881265e-06, "loss": 0.105, "step": 19853 }, { "epoch": 65.09508196721312, "grad_norm": 5.408520698547363, "learning_rate": 5.737787801744768e-06, "loss": 0.1684, "step": 19854 }, { "epoch": 65.09836065573771, "grad_norm": 2.880296468734741, "learning_rate": 5.736827222669144e-06, "loss": 0.1841, "step": 19855 }, { "epoch": 65.1016393442623, "grad_norm": 2.885406970977783, "learning_rate": 5.7358666916652215e-06, "loss": 0.1811, "step": 19856 }, { "epoch": 65.10491803278688, "grad_norm": 2.4655001163482666, "learning_rate": 5.73490620874384e-06, "loss": 0.0836, "step": 19857 }, { "epoch": 65.10819672131147, "grad_norm": 2.7416059970855713, "learning_rate": 5.733945773915826e-06, "loss": 0.2882, "step": 19858 }, { "epoch": 65.11147540983606, "grad_norm": 2.986485004425049, "learning_rate": 5.732985387192007e-06, "loss": 0.1145, "step": 19859 }, { "epoch": 65.11475409836065, "grad_norm": 2.3670928478240967, "learning_rate": 5.7320250485832116e-06, "loss": 0.1644, "step": 19860 }, { "epoch": 65.11803278688525, "grad_norm": 3.955599069595337, "learning_rate": 5.7310647581002755e-06, "loss": 0.283, "step": 19861 }, { "epoch": 65.12131147540984, "grad_norm": 13.898747444152832, "learning_rate": 5.730104515754024e-06, "loss": 0.1946, "step": 19862 }, { "epoch": 65.12459016393443, "grad_norm": 2.866212844848633, "learning_rate": 5.729144321555282e-06, "loss": 0.1992, "step": 19863 }, { "epoch": 65.12786885245902, "grad_norm": 2.934957504272461, "learning_rate": 5.7281841755148795e-06, "loss": 0.1095, "step": 19864 }, { "epoch": 65.1311475409836, "grad_norm": 3.0794224739074707, "learning_rate": 5.727224077643636e-06, "loss": 0.081, "step": 19865 }, { "epoch": 65.1344262295082, "grad_norm": 2.434492588043213, "learning_rate": 5.726264027952391e-06, "loss": 0.1904, "step": 19866 }, { "epoch": 65.13770491803278, "grad_norm": 9.93001937866211, "learning_rate": 5.725304026451959e-06, "loss": 0.2198, "step": 19867 }, { "epoch": 65.14098360655737, "grad_norm": 2.9892263412475586, "learning_rate": 5.724344073153171e-06, "loss": 0.319, "step": 19868 }, { "epoch": 65.14426229508197, "grad_norm": 2.334531784057617, "learning_rate": 5.723384168066845e-06, "loss": 0.1024, "step": 19869 }, { "epoch": 65.14754098360656, "grad_norm": 3.3722188472747803, "learning_rate": 5.722424311203812e-06, "loss": 0.1787, "step": 19870 }, { "epoch": 65.15081967213115, "grad_norm": 2.5752482414245605, "learning_rate": 5.721464502574893e-06, "loss": 0.1159, "step": 19871 }, { "epoch": 65.15409836065574, "grad_norm": 2.887006998062134, "learning_rate": 5.720504742190911e-06, "loss": 0.1431, "step": 19872 }, { "epoch": 65.15737704918033, "grad_norm": 2.5496485233306885, "learning_rate": 5.719545030062682e-06, "loss": 0.1123, "step": 19873 }, { "epoch": 65.16065573770491, "grad_norm": 2.859466791152954, "learning_rate": 5.7185853662010384e-06, "loss": 0.0921, "step": 19874 }, { "epoch": 65.1639344262295, "grad_norm": 2.731098175048828, "learning_rate": 5.7176257506167956e-06, "loss": 0.091, "step": 19875 }, { "epoch": 65.1672131147541, "grad_norm": 2.7020115852355957, "learning_rate": 5.716666183320776e-06, "loss": 0.1364, "step": 19876 }, { "epoch": 65.1704918032787, "grad_norm": 2.7431230545043945, "learning_rate": 5.715706664323799e-06, "loss": 0.1691, "step": 19877 }, { "epoch": 65.17377049180328, "grad_norm": 2.9531617164611816, "learning_rate": 5.714747193636678e-06, "loss": 0.1004, "step": 19878 }, { "epoch": 65.17704918032787, "grad_norm": 2.645275354385376, "learning_rate": 5.713787771270244e-06, "loss": 0.181, "step": 19879 }, { "epoch": 65.18032786885246, "grad_norm": 2.7402806282043457, "learning_rate": 5.7128283972353085e-06, "loss": 0.0913, "step": 19880 }, { "epoch": 65.18360655737705, "grad_norm": 2.8409759998321533, "learning_rate": 5.71186907154269e-06, "loss": 0.134, "step": 19881 }, { "epoch": 65.18688524590164, "grad_norm": 2.8443493843078613, "learning_rate": 5.710909794203204e-06, "loss": 0.1271, "step": 19882 }, { "epoch": 65.19016393442622, "grad_norm": 3.253169536590576, "learning_rate": 5.709950565227671e-06, "loss": 0.1255, "step": 19883 }, { "epoch": 65.19344262295083, "grad_norm": 2.851010322570801, "learning_rate": 5.708991384626908e-06, "loss": 0.1106, "step": 19884 }, { "epoch": 65.19672131147541, "grad_norm": 2.8279807567596436, "learning_rate": 5.708032252411728e-06, "loss": 0.0913, "step": 19885 }, { "epoch": 65.2, "grad_norm": 3.178790807723999, "learning_rate": 5.707073168592943e-06, "loss": 0.2257, "step": 19886 }, { "epoch": 65.20327868852459, "grad_norm": 1.6857434511184692, "learning_rate": 5.706114133181375e-06, "loss": 0.0664, "step": 19887 }, { "epoch": 65.20655737704918, "grad_norm": 2.9263222217559814, "learning_rate": 5.705155146187835e-06, "loss": 0.0797, "step": 19888 }, { "epoch": 65.20983606557377, "grad_norm": 2.957427501678467, "learning_rate": 5.704196207623136e-06, "loss": 0.1695, "step": 19889 }, { "epoch": 65.21311475409836, "grad_norm": 2.340503215789795, "learning_rate": 5.703237317498087e-06, "loss": 0.2042, "step": 19890 }, { "epoch": 65.21639344262294, "grad_norm": 3.561582088470459, "learning_rate": 5.7022784758235095e-06, "loss": 0.1216, "step": 19891 }, { "epoch": 65.21967213114755, "grad_norm": 4.392805576324463, "learning_rate": 5.701319682610211e-06, "loss": 0.1142, "step": 19892 }, { "epoch": 65.22295081967214, "grad_norm": 3.7682063579559326, "learning_rate": 5.700360937868998e-06, "loss": 0.1772, "step": 19893 }, { "epoch": 65.22622950819672, "grad_norm": 2.2103681564331055, "learning_rate": 5.6994022416106896e-06, "loss": 0.1629, "step": 19894 }, { "epoch": 65.22950819672131, "grad_norm": 2.701819658279419, "learning_rate": 5.698443593846092e-06, "loss": 0.211, "step": 19895 }, { "epoch": 65.2327868852459, "grad_norm": 3.5517468452453613, "learning_rate": 5.6974849945860135e-06, "loss": 0.2037, "step": 19896 }, { "epoch": 65.23606557377049, "grad_norm": 2.090914487838745, "learning_rate": 5.696526443841268e-06, "loss": 0.1607, "step": 19897 }, { "epoch": 65.23934426229508, "grad_norm": 2.499875545501709, "learning_rate": 5.6955679416226605e-06, "loss": 0.0904, "step": 19898 }, { "epoch": 65.24262295081967, "grad_norm": 3.3123557567596436, "learning_rate": 5.694609487940997e-06, "loss": 0.2419, "step": 19899 }, { "epoch": 65.24590163934427, "grad_norm": 3.285271167755127, "learning_rate": 5.693651082807092e-06, "loss": 0.2273, "step": 19900 }, { "epoch": 65.24918032786886, "grad_norm": 3.0300281047821045, "learning_rate": 5.692692726231748e-06, "loss": 0.2948, "step": 19901 }, { "epoch": 65.25245901639344, "grad_norm": 2.6661272048950195, "learning_rate": 5.691734418225772e-06, "loss": 0.1052, "step": 19902 }, { "epoch": 65.25573770491803, "grad_norm": 4.813369274139404, "learning_rate": 5.69077615879997e-06, "loss": 0.3992, "step": 19903 }, { "epoch": 65.25901639344262, "grad_norm": 2.355443000793457, "learning_rate": 5.689817947965144e-06, "loss": 0.1618, "step": 19904 }, { "epoch": 65.26229508196721, "grad_norm": 3.114086389541626, "learning_rate": 5.688859785732105e-06, "loss": 0.215, "step": 19905 }, { "epoch": 65.2655737704918, "grad_norm": 2.678779125213623, "learning_rate": 5.687901672111655e-06, "loss": 0.1752, "step": 19906 }, { "epoch": 65.26885245901639, "grad_norm": 2.6429879665374756, "learning_rate": 5.686943607114597e-06, "loss": 0.1855, "step": 19907 }, { "epoch": 65.27213114754099, "grad_norm": 2.9469432830810547, "learning_rate": 5.685985590751731e-06, "loss": 0.2169, "step": 19908 }, { "epoch": 65.27540983606558, "grad_norm": 2.4849677085876465, "learning_rate": 5.685027623033865e-06, "loss": 0.1054, "step": 19909 }, { "epoch": 65.27868852459017, "grad_norm": 2.5053963661193848, "learning_rate": 5.6840697039717994e-06, "loss": 0.2115, "step": 19910 }, { "epoch": 65.28196721311475, "grad_norm": 2.9636335372924805, "learning_rate": 5.683111833576337e-06, "loss": 0.1574, "step": 19911 }, { "epoch": 65.28524590163934, "grad_norm": 2.250232696533203, "learning_rate": 5.682154011858272e-06, "loss": 0.0586, "step": 19912 }, { "epoch": 65.28852459016393, "grad_norm": 2.7222907543182373, "learning_rate": 5.681196238828414e-06, "loss": 0.2736, "step": 19913 }, { "epoch": 65.29180327868852, "grad_norm": 2.7253730297088623, "learning_rate": 5.680238514497559e-06, "loss": 0.087, "step": 19914 }, { "epoch": 65.29508196721312, "grad_norm": 3.098330497741699, "learning_rate": 5.6792808388765065e-06, "loss": 0.2252, "step": 19915 }, { "epoch": 65.29836065573771, "grad_norm": 2.846815586090088, "learning_rate": 5.678323211976055e-06, "loss": 0.0983, "step": 19916 }, { "epoch": 65.3016393442623, "grad_norm": 2.6856322288513184, "learning_rate": 5.677365633806997e-06, "loss": 0.1126, "step": 19917 }, { "epoch": 65.30491803278689, "grad_norm": 2.5303003787994385, "learning_rate": 5.676408104380143e-06, "loss": 0.0991, "step": 19918 }, { "epoch": 65.30819672131148, "grad_norm": 3.723123073577881, "learning_rate": 5.67545062370628e-06, "loss": 0.265, "step": 19919 }, { "epoch": 65.31147540983606, "grad_norm": 3.3931963443756104, "learning_rate": 5.6744931917962084e-06, "loss": 0.1514, "step": 19920 }, { "epoch": 65.31475409836065, "grad_norm": 3.835721015930176, "learning_rate": 5.673535808660721e-06, "loss": 0.1967, "step": 19921 }, { "epoch": 65.31803278688524, "grad_norm": 2.747417449951172, "learning_rate": 5.672578474310618e-06, "loss": 0.1144, "step": 19922 }, { "epoch": 65.32131147540984, "grad_norm": 2.6009621620178223, "learning_rate": 5.671621188756693e-06, "loss": 0.1046, "step": 19923 }, { "epoch": 65.32459016393443, "grad_norm": 2.9853227138519287, "learning_rate": 5.670663952009739e-06, "loss": 0.1588, "step": 19924 }, { "epoch": 65.32786885245902, "grad_norm": 2.5664167404174805, "learning_rate": 5.6697067640805466e-06, "loss": 0.0844, "step": 19925 }, { "epoch": 65.33114754098361, "grad_norm": 2.8906662464141846, "learning_rate": 5.668749624979916e-06, "loss": 0.1747, "step": 19926 }, { "epoch": 65.3344262295082, "grad_norm": 3.0617334842681885, "learning_rate": 5.667792534718639e-06, "loss": 0.2813, "step": 19927 }, { "epoch": 65.33770491803278, "grad_norm": 4.95241641998291, "learning_rate": 5.666835493307503e-06, "loss": 0.1497, "step": 19928 }, { "epoch": 65.34098360655737, "grad_norm": 2.4680745601654053, "learning_rate": 5.665878500757304e-06, "loss": 0.1408, "step": 19929 }, { "epoch": 65.34426229508196, "grad_norm": 2.5070157051086426, "learning_rate": 5.6649215570788265e-06, "loss": 0.0777, "step": 19930 }, { "epoch": 65.34754098360656, "grad_norm": 2.1618635654449463, "learning_rate": 5.6639646622828694e-06, "loss": 0.1903, "step": 19931 }, { "epoch": 65.35081967213115, "grad_norm": 2.6357979774475098, "learning_rate": 5.66300781638022e-06, "loss": 0.2118, "step": 19932 }, { "epoch": 65.35409836065574, "grad_norm": 2.3103158473968506, "learning_rate": 5.662051019381666e-06, "loss": 0.07, "step": 19933 }, { "epoch": 65.35737704918033, "grad_norm": 3.590237855911255, "learning_rate": 5.661094271297993e-06, "loss": 0.2149, "step": 19934 }, { "epoch": 65.36065573770492, "grad_norm": 2.737048864364624, "learning_rate": 5.660137572139999e-06, "loss": 0.0759, "step": 19935 }, { "epoch": 65.3639344262295, "grad_norm": 2.804816246032715, "learning_rate": 5.659180921918464e-06, "loss": 0.1679, "step": 19936 }, { "epoch": 65.3672131147541, "grad_norm": 4.102635860443115, "learning_rate": 5.658224320644179e-06, "loss": 0.0739, "step": 19937 }, { "epoch": 65.37049180327868, "grad_norm": 3.132568120956421, "learning_rate": 5.6572677683279246e-06, "loss": 0.1377, "step": 19938 }, { "epoch": 65.37377049180328, "grad_norm": 3.657830238342285, "learning_rate": 5.656311264980494e-06, "loss": 0.1174, "step": 19939 }, { "epoch": 65.37704918032787, "grad_norm": 2.984569549560547, "learning_rate": 5.655354810612672e-06, "loss": 0.2804, "step": 19940 }, { "epoch": 65.38032786885246, "grad_norm": 3.2371668815612793, "learning_rate": 5.654398405235242e-06, "loss": 0.3221, "step": 19941 }, { "epoch": 65.38360655737705, "grad_norm": 2.9918956756591797, "learning_rate": 5.653442048858984e-06, "loss": 0.1858, "step": 19942 }, { "epoch": 65.38688524590164, "grad_norm": 3.421739101409912, "learning_rate": 5.652485741494689e-06, "loss": 0.1575, "step": 19943 }, { "epoch": 65.39016393442623, "grad_norm": 3.3250210285186768, "learning_rate": 5.651529483153139e-06, "loss": 0.1433, "step": 19944 }, { "epoch": 65.39344262295081, "grad_norm": 2.9587247371673584, "learning_rate": 5.65057327384511e-06, "loss": 0.1423, "step": 19945 }, { "epoch": 65.3967213114754, "grad_norm": 1.971091866493225, "learning_rate": 5.649617113581394e-06, "loss": 0.0419, "step": 19946 }, { "epoch": 65.4, "grad_norm": 3.069420099258423, "learning_rate": 5.648661002372769e-06, "loss": 0.3311, "step": 19947 }, { "epoch": 65.4032786885246, "grad_norm": 3.096315860748291, "learning_rate": 5.647704940230011e-06, "loss": 0.1486, "step": 19948 }, { "epoch": 65.40655737704918, "grad_norm": 2.777064800262451, "learning_rate": 5.64674892716391e-06, "loss": 0.1567, "step": 19949 }, { "epoch": 65.40983606557377, "grad_norm": 3.421159029006958, "learning_rate": 5.6457929631852395e-06, "loss": 0.2261, "step": 19950 }, { "epoch": 65.41311475409836, "grad_norm": 5.025360107421875, "learning_rate": 5.644837048304781e-06, "loss": 0.1851, "step": 19951 }, { "epoch": 65.41639344262295, "grad_norm": 2.9002914428710938, "learning_rate": 5.64388118253331e-06, "loss": 0.2915, "step": 19952 }, { "epoch": 65.41967213114754, "grad_norm": 3.490959882736206, "learning_rate": 5.642925365881611e-06, "loss": 0.1213, "step": 19953 }, { "epoch": 65.42295081967212, "grad_norm": 4.030263423919678, "learning_rate": 5.6419695983604595e-06, "loss": 0.2071, "step": 19954 }, { "epoch": 65.42622950819673, "grad_norm": 3.2400431632995605, "learning_rate": 5.64101387998063e-06, "loss": 0.1828, "step": 19955 }, { "epoch": 65.42950819672132, "grad_norm": 2.7373344898223877, "learning_rate": 5.640058210752899e-06, "loss": 0.0679, "step": 19956 }, { "epoch": 65.4327868852459, "grad_norm": 2.9640889167785645, "learning_rate": 5.6391025906880485e-06, "loss": 0.0577, "step": 19957 }, { "epoch": 65.43606557377049, "grad_norm": 2.5785789489746094, "learning_rate": 5.638147019796851e-06, "loss": 0.0978, "step": 19958 }, { "epoch": 65.43934426229508, "grad_norm": 3.376347541809082, "learning_rate": 5.63719149809008e-06, "loss": 0.2641, "step": 19959 }, { "epoch": 65.44262295081967, "grad_norm": 2.8537840843200684, "learning_rate": 5.6362360255785075e-06, "loss": 0.1491, "step": 19960 }, { "epoch": 65.44590163934426, "grad_norm": 2.793672800064087, "learning_rate": 5.6352806022729155e-06, "loss": 0.1581, "step": 19961 }, { "epoch": 65.44918032786886, "grad_norm": 2.9697890281677246, "learning_rate": 5.634325228184072e-06, "loss": 0.0834, "step": 19962 }, { "epoch": 65.45245901639345, "grad_norm": 2.4730095863342285, "learning_rate": 5.633369903322752e-06, "loss": 0.1756, "step": 19963 }, { "epoch": 65.45573770491804, "grad_norm": 2.8278210163116455, "learning_rate": 5.6324146276997215e-06, "loss": 0.2021, "step": 19964 }, { "epoch": 65.45901639344262, "grad_norm": 2.5360426902770996, "learning_rate": 5.631459401325761e-06, "loss": 0.1815, "step": 19965 }, { "epoch": 65.46229508196721, "grad_norm": 2.8525328636169434, "learning_rate": 5.6305042242116394e-06, "loss": 0.1898, "step": 19966 }, { "epoch": 65.4655737704918, "grad_norm": 2.610992193222046, "learning_rate": 5.629549096368123e-06, "loss": 0.263, "step": 19967 }, { "epoch": 65.46885245901639, "grad_norm": 2.842660903930664, "learning_rate": 5.628594017805987e-06, "loss": 0.1669, "step": 19968 }, { "epoch": 65.47213114754098, "grad_norm": 2.4545881748199463, "learning_rate": 5.627638988535994e-06, "loss": 0.2096, "step": 19969 }, { "epoch": 65.47540983606558, "grad_norm": 2.7634990215301514, "learning_rate": 5.626684008568921e-06, "loss": 0.1464, "step": 19970 }, { "epoch": 65.47868852459017, "grad_norm": 3.1077682971954346, "learning_rate": 5.625729077915534e-06, "loss": 0.308, "step": 19971 }, { "epoch": 65.48196721311476, "grad_norm": 3.820622682571411, "learning_rate": 5.6247741965866e-06, "loss": 0.0915, "step": 19972 }, { "epoch": 65.48524590163935, "grad_norm": 2.0775883197784424, "learning_rate": 5.62381936459288e-06, "loss": 0.0491, "step": 19973 }, { "epoch": 65.48852459016393, "grad_norm": 2.060234785079956, "learning_rate": 5.6228645819451525e-06, "loss": 0.0863, "step": 19974 }, { "epoch": 65.49180327868852, "grad_norm": 3.6395304203033447, "learning_rate": 5.621909848654177e-06, "loss": 0.2611, "step": 19975 }, { "epoch": 65.49508196721311, "grad_norm": 3.1100871562957764, "learning_rate": 5.6209551647307205e-06, "loss": 0.1592, "step": 19976 }, { "epoch": 65.4983606557377, "grad_norm": 2.9085545539855957, "learning_rate": 5.620000530185543e-06, "loss": 0.1089, "step": 19977 }, { "epoch": 65.5016393442623, "grad_norm": 2.7304818630218506, "learning_rate": 5.6190459450294175e-06, "loss": 0.241, "step": 19978 }, { "epoch": 65.50491803278689, "grad_norm": 3.066084384918213, "learning_rate": 5.6180914092731056e-06, "loss": 0.1113, "step": 19979 }, { "epoch": 65.50819672131148, "grad_norm": 3.2030460834503174, "learning_rate": 5.617136922927368e-06, "loss": 0.2084, "step": 19980 }, { "epoch": 65.51147540983607, "grad_norm": 2.515562057495117, "learning_rate": 5.616182486002968e-06, "loss": 0.1544, "step": 19981 }, { "epoch": 65.51475409836065, "grad_norm": 2.9317092895507812, "learning_rate": 5.615228098510665e-06, "loss": 0.1304, "step": 19982 }, { "epoch": 65.51803278688524, "grad_norm": 2.589353084564209, "learning_rate": 5.614273760461228e-06, "loss": 0.0718, "step": 19983 }, { "epoch": 65.52131147540983, "grad_norm": 2.3942689895629883, "learning_rate": 5.6133194718654145e-06, "loss": 0.2238, "step": 19984 }, { "epoch": 65.52459016393442, "grad_norm": 2.976419448852539, "learning_rate": 5.612365232733986e-06, "loss": 0.2092, "step": 19985 }, { "epoch": 65.52786885245902, "grad_norm": 3.5813512802124023, "learning_rate": 5.611411043077697e-06, "loss": 0.0883, "step": 19986 }, { "epoch": 65.53114754098361, "grad_norm": 2.5792105197906494, "learning_rate": 5.610456902907315e-06, "loss": 0.0972, "step": 19987 }, { "epoch": 65.5344262295082, "grad_norm": 2.6868810653686523, "learning_rate": 5.609502812233596e-06, "loss": 0.1698, "step": 19988 }, { "epoch": 65.53770491803279, "grad_norm": 2.810656785964966, "learning_rate": 5.608548771067297e-06, "loss": 0.1841, "step": 19989 }, { "epoch": 65.54098360655738, "grad_norm": 2.358635902404785, "learning_rate": 5.607594779419172e-06, "loss": 0.0948, "step": 19990 }, { "epoch": 65.54426229508196, "grad_norm": 2.7065317630767822, "learning_rate": 5.606640837299988e-06, "loss": 0.1585, "step": 19991 }, { "epoch": 65.54754098360655, "grad_norm": 2.9868204593658447, "learning_rate": 5.605686944720497e-06, "loss": 0.3395, "step": 19992 }, { "epoch": 65.55081967213114, "grad_norm": 3.217226266860962, "learning_rate": 5.604733101691453e-06, "loss": 0.2057, "step": 19993 }, { "epoch": 65.55409836065574, "grad_norm": 2.988466262817383, "learning_rate": 5.6037793082236145e-06, "loss": 0.2212, "step": 19994 }, { "epoch": 65.55737704918033, "grad_norm": 2.2161006927490234, "learning_rate": 5.6028255643277305e-06, "loss": 0.0733, "step": 19995 }, { "epoch": 65.56065573770492, "grad_norm": 5.952373504638672, "learning_rate": 5.601871870014565e-06, "loss": 0.2172, "step": 19996 }, { "epoch": 65.56393442622951, "grad_norm": 2.657348871231079, "learning_rate": 5.600918225294867e-06, "loss": 0.09, "step": 19997 }, { "epoch": 65.5672131147541, "grad_norm": 2.929165840148926, "learning_rate": 5.599964630179384e-06, "loss": 0.1303, "step": 19998 }, { "epoch": 65.57049180327868, "grad_norm": 3.782254934310913, "learning_rate": 5.59901108467888e-06, "loss": 0.1937, "step": 19999 }, { "epoch": 65.57377049180327, "grad_norm": 2.700848340988159, "learning_rate": 5.598057588804103e-06, "loss": 0.1277, "step": 20000 }, { "epoch": 65.57704918032788, "grad_norm": 2.2626962661743164, "learning_rate": 5.597104142565799e-06, "loss": 0.0781, "step": 20001 }, { "epoch": 65.58032786885246, "grad_norm": 2.5191667079925537, "learning_rate": 5.596150745974727e-06, "loss": 0.3177, "step": 20002 }, { "epoch": 65.58360655737705, "grad_norm": 3.3247272968292236, "learning_rate": 5.595197399041634e-06, "loss": 0.2191, "step": 20003 }, { "epoch": 65.58688524590164, "grad_norm": 2.869873046875, "learning_rate": 5.594244101777267e-06, "loss": 0.1752, "step": 20004 }, { "epoch": 65.59016393442623, "grad_norm": 1.8771830797195435, "learning_rate": 5.593290854192383e-06, "loss": 0.1409, "step": 20005 }, { "epoch": 65.59344262295082, "grad_norm": 3.4189529418945312, "learning_rate": 5.592337656297725e-06, "loss": 0.2089, "step": 20006 }, { "epoch": 65.5967213114754, "grad_norm": 2.1014153957366943, "learning_rate": 5.591384508104043e-06, "loss": 0.0726, "step": 20007 }, { "epoch": 65.6, "grad_norm": 3.126394748687744, "learning_rate": 5.590431409622081e-06, "loss": 0.3733, "step": 20008 }, { "epoch": 65.6032786885246, "grad_norm": 2.632877826690674, "learning_rate": 5.589478360862594e-06, "loss": 0.1302, "step": 20009 }, { "epoch": 65.60655737704919, "grad_norm": 3.335303544998169, "learning_rate": 5.588525361836323e-06, "loss": 0.2317, "step": 20010 }, { "epoch": 65.60983606557377, "grad_norm": 2.6316092014312744, "learning_rate": 5.587572412554016e-06, "loss": 0.1209, "step": 20011 }, { "epoch": 65.61311475409836, "grad_norm": 3.0443217754364014, "learning_rate": 5.586619513026415e-06, "loss": 0.1571, "step": 20012 }, { "epoch": 65.61639344262295, "grad_norm": 4.014354705810547, "learning_rate": 5.5856666632642705e-06, "loss": 0.1814, "step": 20013 }, { "epoch": 65.61967213114754, "grad_norm": 2.9616687297821045, "learning_rate": 5.584713863278324e-06, "loss": 0.1204, "step": 20014 }, { "epoch": 65.62295081967213, "grad_norm": 2.295778274536133, "learning_rate": 5.58376111307932e-06, "loss": 0.1347, "step": 20015 }, { "epoch": 65.62622950819672, "grad_norm": 2.5207338333129883, "learning_rate": 5.5828084126780005e-06, "loss": 0.3127, "step": 20016 }, { "epoch": 65.62950819672132, "grad_norm": 2.8592958450317383, "learning_rate": 5.581855762085107e-06, "loss": 0.1292, "step": 20017 }, { "epoch": 65.6327868852459, "grad_norm": 2.544651746749878, "learning_rate": 5.580903161311384e-06, "loss": 0.0856, "step": 20018 }, { "epoch": 65.6360655737705, "grad_norm": 3.296473264694214, "learning_rate": 5.579950610367575e-06, "loss": 0.1485, "step": 20019 }, { "epoch": 65.63934426229508, "grad_norm": 2.18344783782959, "learning_rate": 5.5789981092644175e-06, "loss": 0.0469, "step": 20020 }, { "epoch": 65.64262295081967, "grad_norm": 1.9806926250457764, "learning_rate": 5.57804565801265e-06, "loss": 0.1195, "step": 20021 }, { "epoch": 65.64590163934426, "grad_norm": 3.7002339363098145, "learning_rate": 5.577093256623019e-06, "loss": 0.1957, "step": 20022 }, { "epoch": 65.64918032786885, "grad_norm": 2.8501954078674316, "learning_rate": 5.57614090510626e-06, "loss": 0.1482, "step": 20023 }, { "epoch": 65.65245901639344, "grad_norm": 3.6069560050964355, "learning_rate": 5.575188603473112e-06, "loss": 0.2586, "step": 20024 }, { "epoch": 65.65573770491804, "grad_norm": 3.0956122875213623, "learning_rate": 5.574236351734309e-06, "loss": 0.1774, "step": 20025 }, { "epoch": 65.65901639344263, "grad_norm": 2.913909673690796, "learning_rate": 5.573284149900597e-06, "loss": 0.1973, "step": 20026 }, { "epoch": 65.66229508196722, "grad_norm": 2.722975969314575, "learning_rate": 5.57233199798271e-06, "loss": 0.0797, "step": 20027 }, { "epoch": 65.6655737704918, "grad_norm": 3.662015199661255, "learning_rate": 5.571379895991381e-06, "loss": 0.392, "step": 20028 }, { "epoch": 65.66885245901639, "grad_norm": 2.5427608489990234, "learning_rate": 5.570427843937349e-06, "loss": 0.1073, "step": 20029 }, { "epoch": 65.67213114754098, "grad_norm": 2.8121447563171387, "learning_rate": 5.569475841831346e-06, "loss": 0.3237, "step": 20030 }, { "epoch": 65.67540983606557, "grad_norm": 2.626819610595703, "learning_rate": 5.568523889684112e-06, "loss": 0.2445, "step": 20031 }, { "epoch": 65.67868852459016, "grad_norm": 3.195326805114746, "learning_rate": 5.567571987506379e-06, "loss": 0.1207, "step": 20032 }, { "epoch": 65.68196721311476, "grad_norm": 2.494497537612915, "learning_rate": 5.566620135308881e-06, "loss": 0.1247, "step": 20033 }, { "epoch": 65.68524590163935, "grad_norm": 2.1384472846984863, "learning_rate": 5.565668333102346e-06, "loss": 0.152, "step": 20034 }, { "epoch": 65.68852459016394, "grad_norm": 2.3806397914886475, "learning_rate": 5.564716580897516e-06, "loss": 0.2539, "step": 20035 }, { "epoch": 65.69180327868852, "grad_norm": 2.3191733360290527, "learning_rate": 5.563764878705117e-06, "loss": 0.1009, "step": 20036 }, { "epoch": 65.69508196721311, "grad_norm": 3.6456408500671387, "learning_rate": 5.562813226535881e-06, "loss": 0.1658, "step": 20037 }, { "epoch": 65.6983606557377, "grad_norm": 2.8136138916015625, "learning_rate": 5.561861624400537e-06, "loss": 0.2995, "step": 20038 }, { "epoch": 65.70163934426229, "grad_norm": 3.0293967723846436, "learning_rate": 5.560910072309822e-06, "loss": 0.1455, "step": 20039 }, { "epoch": 65.70491803278688, "grad_norm": 3.529391050338745, "learning_rate": 5.559958570274459e-06, "loss": 0.2337, "step": 20040 }, { "epoch": 65.70819672131148, "grad_norm": 2.252969741821289, "learning_rate": 5.559007118305182e-06, "loss": 0.0521, "step": 20041 }, { "epoch": 65.71147540983607, "grad_norm": 2.7091779708862305, "learning_rate": 5.558055716412716e-06, "loss": 0.1045, "step": 20042 }, { "epoch": 65.71475409836066, "grad_norm": 2.65116024017334, "learning_rate": 5.557104364607786e-06, "loss": 0.1113, "step": 20043 }, { "epoch": 65.71803278688525, "grad_norm": 2.37111496925354, "learning_rate": 5.556153062901128e-06, "loss": 0.0792, "step": 20044 }, { "epoch": 65.72131147540983, "grad_norm": 2.684309244155884, "learning_rate": 5.555201811303465e-06, "loss": 0.2555, "step": 20045 }, { "epoch": 65.72459016393442, "grad_norm": 3.0493690967559814, "learning_rate": 5.5542506098255245e-06, "loss": 0.1136, "step": 20046 }, { "epoch": 65.72786885245901, "grad_norm": 2.6114749908447266, "learning_rate": 5.553299458478024e-06, "loss": 0.1786, "step": 20047 }, { "epoch": 65.73114754098361, "grad_norm": 2.4783525466918945, "learning_rate": 5.552348357271702e-06, "loss": 0.1561, "step": 20048 }, { "epoch": 65.7344262295082, "grad_norm": 2.7532474994659424, "learning_rate": 5.551397306217274e-06, "loss": 0.182, "step": 20049 }, { "epoch": 65.73770491803279, "grad_norm": 2.8502848148345947, "learning_rate": 5.550446305325466e-06, "loss": 0.1609, "step": 20050 }, { "epoch": 65.74098360655738, "grad_norm": 3.0221147537231445, "learning_rate": 5.549495354607003e-06, "loss": 0.3222, "step": 20051 }, { "epoch": 65.74426229508197, "grad_norm": 5.772202491760254, "learning_rate": 5.5485444540726085e-06, "loss": 0.2622, "step": 20052 }, { "epoch": 65.74754098360656, "grad_norm": 3.776735305786133, "learning_rate": 5.5475936037330035e-06, "loss": 0.1195, "step": 20053 }, { "epoch": 65.75081967213114, "grad_norm": 3.4930601119995117, "learning_rate": 5.546642803598907e-06, "loss": 0.2565, "step": 20054 }, { "epoch": 65.75409836065573, "grad_norm": 2.511714220046997, "learning_rate": 5.545692053681047e-06, "loss": 0.3758, "step": 20055 }, { "epoch": 65.75737704918033, "grad_norm": 2.4960086345672607, "learning_rate": 5.544741353990139e-06, "loss": 0.1273, "step": 20056 }, { "epoch": 65.76065573770492, "grad_norm": 2.5420873165130615, "learning_rate": 5.543790704536902e-06, "loss": 0.0897, "step": 20057 }, { "epoch": 65.76393442622951, "grad_norm": 2.8792788982391357, "learning_rate": 5.542840105332061e-06, "loss": 0.0924, "step": 20058 }, { "epoch": 65.7672131147541, "grad_norm": 3.075951337814331, "learning_rate": 5.541889556386333e-06, "loss": 0.1665, "step": 20059 }, { "epoch": 65.77049180327869, "grad_norm": 3.354619026184082, "learning_rate": 5.5409390577104305e-06, "loss": 0.2279, "step": 20060 }, { "epoch": 65.77377049180328, "grad_norm": 3.36678147315979, "learning_rate": 5.539988609315081e-06, "loss": 0.1859, "step": 20061 }, { "epoch": 65.77704918032786, "grad_norm": 2.571826219558716, "learning_rate": 5.539038211210999e-06, "loss": 0.145, "step": 20062 }, { "epoch": 65.78032786885245, "grad_norm": 2.936213493347168, "learning_rate": 5.538087863408897e-06, "loss": 0.0851, "step": 20063 }, { "epoch": 65.78360655737706, "grad_norm": 3.3026633262634277, "learning_rate": 5.53713756591949e-06, "loss": 0.2345, "step": 20064 }, { "epoch": 65.78688524590164, "grad_norm": 2.4913344383239746, "learning_rate": 5.536187318753501e-06, "loss": 0.2042, "step": 20065 }, { "epoch": 65.79016393442623, "grad_norm": 2.2041313648223877, "learning_rate": 5.5352371219216416e-06, "loss": 0.0498, "step": 20066 }, { "epoch": 65.79344262295082, "grad_norm": 2.802267551422119, "learning_rate": 5.5342869754346266e-06, "loss": 0.1096, "step": 20067 }, { "epoch": 65.79672131147541, "grad_norm": 2.913459539413452, "learning_rate": 5.533336879303168e-06, "loss": 0.2386, "step": 20068 }, { "epoch": 65.8, "grad_norm": 2.247213125228882, "learning_rate": 5.5323868335379775e-06, "loss": 0.1407, "step": 20069 }, { "epoch": 65.80327868852459, "grad_norm": 2.864168167114258, "learning_rate": 5.531436838149773e-06, "loss": 0.1858, "step": 20070 }, { "epoch": 65.80655737704917, "grad_norm": 2.457860231399536, "learning_rate": 5.530486893149265e-06, "loss": 0.1529, "step": 20071 }, { "epoch": 65.80983606557378, "grad_norm": 3.3922173976898193, "learning_rate": 5.529536998547164e-06, "loss": 0.2223, "step": 20072 }, { "epoch": 65.81311475409836, "grad_norm": 3.039821147918701, "learning_rate": 5.528587154354177e-06, "loss": 0.1593, "step": 20073 }, { "epoch": 65.81639344262295, "grad_norm": 2.3027169704437256, "learning_rate": 5.527637360581024e-06, "loss": 0.0873, "step": 20074 }, { "epoch": 65.81967213114754, "grad_norm": 2.696167230606079, "learning_rate": 5.526687617238411e-06, "loss": 0.3567, "step": 20075 }, { "epoch": 65.82295081967213, "grad_norm": 2.6198911666870117, "learning_rate": 5.525737924337045e-06, "loss": 0.1084, "step": 20076 }, { "epoch": 65.82622950819672, "grad_norm": 2.8568503856658936, "learning_rate": 5.5247882818876306e-06, "loss": 0.2959, "step": 20077 }, { "epoch": 65.8295081967213, "grad_norm": 3.5161373615264893, "learning_rate": 5.523838689900887e-06, "loss": 0.1865, "step": 20078 }, { "epoch": 65.8327868852459, "grad_norm": 2.3453664779663086, "learning_rate": 5.522889148387516e-06, "loss": 0.1399, "step": 20079 }, { "epoch": 65.8360655737705, "grad_norm": 2.4102985858917236, "learning_rate": 5.521939657358224e-06, "loss": 0.1353, "step": 20080 }, { "epoch": 65.83934426229509, "grad_norm": 2.9921300411224365, "learning_rate": 5.520990216823719e-06, "loss": 0.1251, "step": 20081 }, { "epoch": 65.84262295081967, "grad_norm": 2.807086229324341, "learning_rate": 5.5200408267947026e-06, "loss": 0.0967, "step": 20082 }, { "epoch": 65.84590163934426, "grad_norm": 3.63736629486084, "learning_rate": 5.519091487281887e-06, "loss": 0.1332, "step": 20083 }, { "epoch": 65.84918032786885, "grad_norm": 2.644019603729248, "learning_rate": 5.518142198295975e-06, "loss": 0.2594, "step": 20084 }, { "epoch": 65.85245901639344, "grad_norm": 2.7394697666168213, "learning_rate": 5.517192959847669e-06, "loss": 0.0963, "step": 20085 }, { "epoch": 65.85573770491803, "grad_norm": 3.021771192550659, "learning_rate": 5.516243771947669e-06, "loss": 0.1609, "step": 20086 }, { "epoch": 65.85901639344263, "grad_norm": 2.2194786071777344, "learning_rate": 5.5152946346066875e-06, "loss": 0.0461, "step": 20087 }, { "epoch": 65.86229508196722, "grad_norm": 3.5518972873687744, "learning_rate": 5.514345547835421e-06, "loss": 0.2241, "step": 20088 }, { "epoch": 65.8655737704918, "grad_norm": 3.038994550704956, "learning_rate": 5.513396511644573e-06, "loss": 0.1143, "step": 20089 }, { "epoch": 65.8688524590164, "grad_norm": 2.628453016281128, "learning_rate": 5.51244752604484e-06, "loss": 0.0662, "step": 20090 }, { "epoch": 65.87213114754098, "grad_norm": 2.7996647357940674, "learning_rate": 5.511498591046931e-06, "loss": 0.0993, "step": 20091 }, { "epoch": 65.87540983606557, "grad_norm": 2.8681626319885254, "learning_rate": 5.510549706661542e-06, "loss": 0.1608, "step": 20092 }, { "epoch": 65.87868852459016, "grad_norm": 2.9729413986206055, "learning_rate": 5.509600872899373e-06, "loss": 0.2003, "step": 20093 }, { "epoch": 65.88196721311475, "grad_norm": 2.775662422180176, "learning_rate": 5.508652089771122e-06, "loss": 0.1662, "step": 20094 }, { "epoch": 65.88524590163935, "grad_norm": 2.413813591003418, "learning_rate": 5.507703357287486e-06, "loss": 0.0701, "step": 20095 }, { "epoch": 65.88852459016394, "grad_norm": 2.935084819793701, "learning_rate": 5.506754675459169e-06, "loss": 0.078, "step": 20096 }, { "epoch": 65.89180327868853, "grad_norm": 2.8394112586975098, "learning_rate": 5.505806044296867e-06, "loss": 0.1965, "step": 20097 }, { "epoch": 65.89508196721312, "grad_norm": 3.343266010284424, "learning_rate": 5.50485746381127e-06, "loss": 0.1345, "step": 20098 }, { "epoch": 65.8983606557377, "grad_norm": 3.1517210006713867, "learning_rate": 5.503908934013079e-06, "loss": 0.1622, "step": 20099 }, { "epoch": 65.90163934426229, "grad_norm": 2.605891466140747, "learning_rate": 5.502960454912991e-06, "loss": 0.1561, "step": 20100 }, { "epoch": 65.90491803278688, "grad_norm": 2.5780348777770996, "learning_rate": 5.502012026521701e-06, "loss": 0.1597, "step": 20101 }, { "epoch": 65.90819672131147, "grad_norm": 2.5513734817504883, "learning_rate": 5.501063648849903e-06, "loss": 0.0851, "step": 20102 }, { "epoch": 65.91147540983607, "grad_norm": 2.6612203121185303, "learning_rate": 5.500115321908284e-06, "loss": 0.2315, "step": 20103 }, { "epoch": 65.91475409836066, "grad_norm": 2.439319133758545, "learning_rate": 5.499167045707547e-06, "loss": 0.1133, "step": 20104 }, { "epoch": 65.91803278688525, "grad_norm": 2.550964832305908, "learning_rate": 5.498218820258383e-06, "loss": 0.1759, "step": 20105 }, { "epoch": 65.92131147540984, "grad_norm": 2.666867971420288, "learning_rate": 5.497270645571479e-06, "loss": 0.0936, "step": 20106 }, { "epoch": 65.92459016393443, "grad_norm": 2.9830143451690674, "learning_rate": 5.496322521657533e-06, "loss": 0.2336, "step": 20107 }, { "epoch": 65.92786885245901, "grad_norm": 2.947413682937622, "learning_rate": 5.4953744485272335e-06, "loss": 0.1728, "step": 20108 }, { "epoch": 65.9311475409836, "grad_norm": 2.6011974811553955, "learning_rate": 5.494426426191266e-06, "loss": 0.1152, "step": 20109 }, { "epoch": 65.93442622950819, "grad_norm": 2.5323026180267334, "learning_rate": 5.493478454660331e-06, "loss": 0.2377, "step": 20110 }, { "epoch": 65.9377049180328, "grad_norm": 2.950737953186035, "learning_rate": 5.492530533945109e-06, "loss": 0.0748, "step": 20111 }, { "epoch": 65.94098360655738, "grad_norm": 3.1119472980499268, "learning_rate": 5.4915826640562894e-06, "loss": 0.1031, "step": 20112 }, { "epoch": 65.94426229508197, "grad_norm": 3.5249056816101074, "learning_rate": 5.490634845004568e-06, "loss": 0.2643, "step": 20113 }, { "epoch": 65.94754098360656, "grad_norm": 3.2209694385528564, "learning_rate": 5.489687076800626e-06, "loss": 0.1479, "step": 20114 }, { "epoch": 65.95081967213115, "grad_norm": 2.971724510192871, "learning_rate": 5.488739359455153e-06, "loss": 0.1167, "step": 20115 }, { "epoch": 65.95409836065573, "grad_norm": 2.8257834911346436, "learning_rate": 5.487791692978829e-06, "loss": 0.2522, "step": 20116 }, { "epoch": 65.95737704918032, "grad_norm": 2.426339626312256, "learning_rate": 5.486844077382349e-06, "loss": 0.0812, "step": 20117 }, { "epoch": 65.96065573770491, "grad_norm": 3.2324564456939697, "learning_rate": 5.485896512676395e-06, "loss": 0.1448, "step": 20118 }, { "epoch": 65.96393442622951, "grad_norm": 2.6476919651031494, "learning_rate": 5.484948998871651e-06, "loss": 0.2053, "step": 20119 }, { "epoch": 65.9672131147541, "grad_norm": 3.4400696754455566, "learning_rate": 5.484001535978802e-06, "loss": 0.2515, "step": 20120 }, { "epoch": 65.97049180327869, "grad_norm": 2.9293737411499023, "learning_rate": 5.483054124008528e-06, "loss": 0.1108, "step": 20121 }, { "epoch": 65.97377049180328, "grad_norm": 3.0238168239593506, "learning_rate": 5.482106762971517e-06, "loss": 0.1704, "step": 20122 }, { "epoch": 65.97704918032787, "grad_norm": 2.29508638381958, "learning_rate": 5.481159452878452e-06, "loss": 0.119, "step": 20123 }, { "epoch": 65.98032786885246, "grad_norm": 3.320615291595459, "learning_rate": 5.480212193740011e-06, "loss": 0.1292, "step": 20124 }, { "epoch": 65.98360655737704, "grad_norm": 3.0748424530029297, "learning_rate": 5.479264985566873e-06, "loss": 0.2525, "step": 20125 }, { "epoch": 65.98688524590163, "grad_norm": 3.3170454502105713, "learning_rate": 5.478317828369728e-06, "loss": 0.3195, "step": 20126 }, { "epoch": 65.99016393442623, "grad_norm": 3.676105260848999, "learning_rate": 5.4773707221592496e-06, "loss": 0.0923, "step": 20127 }, { "epoch": 65.99344262295082, "grad_norm": 3.21274995803833, "learning_rate": 5.476423666946119e-06, "loss": 0.1998, "step": 20128 }, { "epoch": 65.99672131147541, "grad_norm": 2.8083913326263428, "learning_rate": 5.4754766627410104e-06, "loss": 0.1093, "step": 20129 }, { "epoch": 66.0, "grad_norm": 2.4739248752593994, "learning_rate": 5.4745297095546125e-06, "loss": 0.1718, "step": 20130 }, { "epoch": 66.00327868852459, "grad_norm": 2.478841543197632, "learning_rate": 5.473582807397595e-06, "loss": 0.1464, "step": 20131 }, { "epoch": 66.00655737704918, "grad_norm": 3.964148998260498, "learning_rate": 5.4726359562806384e-06, "loss": 0.1944, "step": 20132 }, { "epoch": 66.00983606557377, "grad_norm": 2.324424982070923, "learning_rate": 5.471689156214419e-06, "loss": 0.1372, "step": 20133 }, { "epoch": 66.01311475409837, "grad_norm": 2.8033719062805176, "learning_rate": 5.4707424072096095e-06, "loss": 0.2879, "step": 20134 }, { "epoch": 66.01639344262296, "grad_norm": 3.404017210006714, "learning_rate": 5.469795709276892e-06, "loss": 0.1684, "step": 20135 }, { "epoch": 66.01967213114754, "grad_norm": 2.7015254497528076, "learning_rate": 5.468849062426937e-06, "loss": 0.1247, "step": 20136 }, { "epoch": 66.02295081967213, "grad_norm": 3.028798818588257, "learning_rate": 5.4679024666704215e-06, "loss": 0.2344, "step": 20137 }, { "epoch": 66.02622950819672, "grad_norm": 2.5517115592956543, "learning_rate": 5.466955922018013e-06, "loss": 0.1282, "step": 20138 }, { "epoch": 66.02950819672131, "grad_norm": 2.995720624923706, "learning_rate": 5.4660094284803945e-06, "loss": 0.2013, "step": 20139 }, { "epoch": 66.0327868852459, "grad_norm": 2.3036510944366455, "learning_rate": 5.465062986068233e-06, "loss": 0.0629, "step": 20140 }, { "epoch": 66.03606557377049, "grad_norm": 2.877087354660034, "learning_rate": 5.464116594792202e-06, "loss": 0.2785, "step": 20141 }, { "epoch": 66.03934426229509, "grad_norm": 3.3287346363067627, "learning_rate": 5.463170254662968e-06, "loss": 0.1308, "step": 20142 }, { "epoch": 66.04262295081968, "grad_norm": 1.971011757850647, "learning_rate": 5.4622239656912115e-06, "loss": 0.0676, "step": 20143 }, { "epoch": 66.04590163934427, "grad_norm": 2.597943067550659, "learning_rate": 5.461277727887597e-06, "loss": 0.1732, "step": 20144 }, { "epoch": 66.04918032786885, "grad_norm": 3.5353691577911377, "learning_rate": 5.460331541262795e-06, "loss": 0.1358, "step": 20145 }, { "epoch": 66.05245901639344, "grad_norm": 3.2276453971862793, "learning_rate": 5.459385405827477e-06, "loss": 0.2166, "step": 20146 }, { "epoch": 66.05573770491803, "grad_norm": 2.763242483139038, "learning_rate": 5.458439321592304e-06, "loss": 0.2392, "step": 20147 }, { "epoch": 66.05901639344262, "grad_norm": 2.544546604156494, "learning_rate": 5.4574932885679534e-06, "loss": 0.2105, "step": 20148 }, { "epoch": 66.0622950819672, "grad_norm": 2.2049002647399902, "learning_rate": 5.456547306765089e-06, "loss": 0.1292, "step": 20149 }, { "epoch": 66.06557377049181, "grad_norm": 2.4205024242401123, "learning_rate": 5.455601376194377e-06, "loss": 0.1029, "step": 20150 }, { "epoch": 66.0688524590164, "grad_norm": 2.580441951751709, "learning_rate": 5.4546554968664825e-06, "loss": 0.1424, "step": 20151 }, { "epoch": 66.07213114754099, "grad_norm": 2.662191152572632, "learning_rate": 5.453709668792076e-06, "loss": 0.2511, "step": 20152 }, { "epoch": 66.07540983606557, "grad_norm": 3.227083921432495, "learning_rate": 5.4527638919818206e-06, "loss": 0.1206, "step": 20153 }, { "epoch": 66.07868852459016, "grad_norm": 2.600249767303467, "learning_rate": 5.45181816644638e-06, "loss": 0.19, "step": 20154 }, { "epoch": 66.08196721311475, "grad_norm": 2.8049817085266113, "learning_rate": 5.450872492196414e-06, "loss": 0.2755, "step": 20155 }, { "epoch": 66.08524590163934, "grad_norm": 3.4660332202911377, "learning_rate": 5.4499268692425945e-06, "loss": 0.3139, "step": 20156 }, { "epoch": 66.08852459016393, "grad_norm": 2.9614007472991943, "learning_rate": 5.448981297595581e-06, "loss": 0.1299, "step": 20157 }, { "epoch": 66.09180327868853, "grad_norm": 2.186434745788574, "learning_rate": 5.448035777266035e-06, "loss": 0.1322, "step": 20158 }, { "epoch": 66.09508196721312, "grad_norm": 2.7006704807281494, "learning_rate": 5.4470903082646155e-06, "loss": 0.1052, "step": 20159 }, { "epoch": 66.09836065573771, "grad_norm": 2.7191970348358154, "learning_rate": 5.44614489060199e-06, "loss": 0.1501, "step": 20160 }, { "epoch": 66.1016393442623, "grad_norm": 2.615840196609497, "learning_rate": 5.445199524288815e-06, "loss": 0.092, "step": 20161 }, { "epoch": 66.10491803278688, "grad_norm": 3.9671096801757812, "learning_rate": 5.444254209335747e-06, "loss": 0.1681, "step": 20162 }, { "epoch": 66.10819672131147, "grad_norm": 2.6367290019989014, "learning_rate": 5.443308945753454e-06, "loss": 0.0954, "step": 20163 }, { "epoch": 66.11147540983606, "grad_norm": 3.8183512687683105, "learning_rate": 5.442363733552591e-06, "loss": 0.1951, "step": 20164 }, { "epoch": 66.11475409836065, "grad_norm": 2.3278307914733887, "learning_rate": 5.44141857274381e-06, "loss": 0.09, "step": 20165 }, { "epoch": 66.11803278688525, "grad_norm": 3.5524814128875732, "learning_rate": 5.440473463337781e-06, "loss": 0.2897, "step": 20166 }, { "epoch": 66.12131147540984, "grad_norm": 2.6943752765655518, "learning_rate": 5.439528405345152e-06, "loss": 0.0741, "step": 20167 }, { "epoch": 66.12459016393443, "grad_norm": 2.4173319339752197, "learning_rate": 5.438583398776579e-06, "loss": 0.0987, "step": 20168 }, { "epoch": 66.12786885245902, "grad_norm": 2.53080415725708, "learning_rate": 5.437638443642725e-06, "loss": 0.2262, "step": 20169 }, { "epoch": 66.1311475409836, "grad_norm": 3.392648220062256, "learning_rate": 5.4366935399542406e-06, "loss": 0.1442, "step": 20170 }, { "epoch": 66.1344262295082, "grad_norm": 1.8507825136184692, "learning_rate": 5.435748687721781e-06, "loss": 0.1296, "step": 20171 }, { "epoch": 66.13770491803278, "grad_norm": 2.930370569229126, "learning_rate": 5.434803886956002e-06, "loss": 0.1234, "step": 20172 }, { "epoch": 66.14098360655737, "grad_norm": 3.151517868041992, "learning_rate": 5.43385913766755e-06, "loss": 0.1141, "step": 20173 }, { "epoch": 66.14426229508197, "grad_norm": 3.1292245388031006, "learning_rate": 5.4329144398670895e-06, "loss": 0.2173, "step": 20174 }, { "epoch": 66.14754098360656, "grad_norm": 2.2376368045806885, "learning_rate": 5.431969793565266e-06, "loss": 0.1648, "step": 20175 }, { "epoch": 66.15081967213115, "grad_norm": 5.830635070800781, "learning_rate": 5.431025198772732e-06, "loss": 0.1408, "step": 20176 }, { "epoch": 66.15409836065574, "grad_norm": 2.315729856491089, "learning_rate": 5.430080655500139e-06, "loss": 0.146, "step": 20177 }, { "epoch": 66.15737704918033, "grad_norm": 3.1933131217956543, "learning_rate": 5.429136163758139e-06, "loss": 0.1948, "step": 20178 }, { "epoch": 66.16065573770491, "grad_norm": 3.00417423248291, "learning_rate": 5.428191723557381e-06, "loss": 0.2046, "step": 20179 }, { "epoch": 66.1639344262295, "grad_norm": 2.5678069591522217, "learning_rate": 5.427247334908517e-06, "loss": 0.086, "step": 20180 }, { "epoch": 66.1672131147541, "grad_norm": 2.722162961959839, "learning_rate": 5.426302997822188e-06, "loss": 0.1435, "step": 20181 }, { "epoch": 66.1704918032787, "grad_norm": 2.7765486240386963, "learning_rate": 5.425358712309053e-06, "loss": 0.0789, "step": 20182 }, { "epoch": 66.17377049180328, "grad_norm": 10.100286483764648, "learning_rate": 5.424414478379754e-06, "loss": 0.1795, "step": 20183 }, { "epoch": 66.17704918032787, "grad_norm": 3.3557231426239014, "learning_rate": 5.423470296044939e-06, "loss": 0.181, "step": 20184 }, { "epoch": 66.18032786885246, "grad_norm": 2.844120979309082, "learning_rate": 5.4225261653152565e-06, "loss": 0.2929, "step": 20185 }, { "epoch": 66.18360655737705, "grad_norm": 1.9449684619903564, "learning_rate": 5.421582086201346e-06, "loss": 0.0483, "step": 20186 }, { "epoch": 66.18688524590164, "grad_norm": 3.9446609020233154, "learning_rate": 5.420638058713861e-06, "loss": 0.2478, "step": 20187 }, { "epoch": 66.19016393442622, "grad_norm": 2.3724443912506104, "learning_rate": 5.4196940828634445e-06, "loss": 0.1811, "step": 20188 }, { "epoch": 66.19344262295083, "grad_norm": 3.4693992137908936, "learning_rate": 5.418750158660739e-06, "loss": 0.1906, "step": 20189 }, { "epoch": 66.19672131147541, "grad_norm": 2.550342082977295, "learning_rate": 5.417806286116385e-06, "loss": 0.039, "step": 20190 }, { "epoch": 66.2, "grad_norm": 2.537146806716919, "learning_rate": 5.416862465241033e-06, "loss": 0.1546, "step": 20191 }, { "epoch": 66.20327868852459, "grad_norm": 3.475332260131836, "learning_rate": 5.415918696045322e-06, "loss": 0.2673, "step": 20192 }, { "epoch": 66.20655737704918, "grad_norm": 2.9612364768981934, "learning_rate": 5.414974978539895e-06, "loss": 0.2281, "step": 20193 }, { "epoch": 66.20983606557377, "grad_norm": 2.3647940158843994, "learning_rate": 5.4140313127353875e-06, "loss": 0.074, "step": 20194 }, { "epoch": 66.21311475409836, "grad_norm": 3.7502975463867188, "learning_rate": 5.413087698642448e-06, "loss": 0.2253, "step": 20195 }, { "epoch": 66.21639344262294, "grad_norm": 3.175240993499756, "learning_rate": 5.412144136271716e-06, "loss": 0.2059, "step": 20196 }, { "epoch": 66.21967213114755, "grad_norm": 2.82607364654541, "learning_rate": 5.411200625633828e-06, "loss": 0.1116, "step": 20197 }, { "epoch": 66.22295081967214, "grad_norm": 2.30448055267334, "learning_rate": 5.410257166739423e-06, "loss": 0.0991, "step": 20198 }, { "epoch": 66.22622950819672, "grad_norm": 3.1947309970855713, "learning_rate": 5.40931375959914e-06, "loss": 0.1681, "step": 20199 }, { "epoch": 66.22950819672131, "grad_norm": 2.7934165000915527, "learning_rate": 5.408370404223617e-06, "loss": 0.3214, "step": 20200 }, { "epoch": 66.2327868852459, "grad_norm": 3.346249580383301, "learning_rate": 5.407427100623495e-06, "loss": 0.2695, "step": 20201 }, { "epoch": 66.23606557377049, "grad_norm": 39.71656799316406, "learning_rate": 5.406483848809407e-06, "loss": 0.0905, "step": 20202 }, { "epoch": 66.23934426229508, "grad_norm": 2.2972047328948975, "learning_rate": 5.4055406487919845e-06, "loss": 0.0563, "step": 20203 }, { "epoch": 66.24262295081967, "grad_norm": 3.0674564838409424, "learning_rate": 5.404597500581874e-06, "loss": 0.1939, "step": 20204 }, { "epoch": 66.24590163934427, "grad_norm": 3.040273904800415, "learning_rate": 5.403654404189703e-06, "loss": 0.4187, "step": 20205 }, { "epoch": 66.24918032786886, "grad_norm": 2.6792726516723633, "learning_rate": 5.402711359626108e-06, "loss": 0.087, "step": 20206 }, { "epoch": 66.25245901639344, "grad_norm": 2.668107509613037, "learning_rate": 5.401768366901719e-06, "loss": 0.1042, "step": 20207 }, { "epoch": 66.25573770491803, "grad_norm": 3.4979000091552734, "learning_rate": 5.400825426027177e-06, "loss": 0.208, "step": 20208 }, { "epoch": 66.25901639344262, "grad_norm": 2.920867919921875, "learning_rate": 5.399882537013108e-06, "loss": 0.1392, "step": 20209 }, { "epoch": 66.26229508196721, "grad_norm": 2.483459234237671, "learning_rate": 5.3989396998701495e-06, "loss": 0.1962, "step": 20210 }, { "epoch": 66.2655737704918, "grad_norm": 2.4893500804901123, "learning_rate": 5.397996914608923e-06, "loss": 0.0631, "step": 20211 }, { "epoch": 66.26885245901639, "grad_norm": 3.543565273284912, "learning_rate": 5.397054181240071e-06, "loss": 0.2082, "step": 20212 }, { "epoch": 66.27213114754099, "grad_norm": 2.6772773265838623, "learning_rate": 5.396111499774219e-06, "loss": 0.1739, "step": 20213 }, { "epoch": 66.27540983606558, "grad_norm": 3.568715810775757, "learning_rate": 5.395168870221994e-06, "loss": 0.144, "step": 20214 }, { "epoch": 66.27868852459017, "grad_norm": 2.7291259765625, "learning_rate": 5.394226292594031e-06, "loss": 0.1787, "step": 20215 }, { "epoch": 66.28196721311475, "grad_norm": 3.3708765506744385, "learning_rate": 5.3932837669009545e-06, "loss": 0.1642, "step": 20216 }, { "epoch": 66.28524590163934, "grad_norm": 2.7959227561950684, "learning_rate": 5.392341293153393e-06, "loss": 0.1884, "step": 20217 }, { "epoch": 66.28852459016393, "grad_norm": 2.6909782886505127, "learning_rate": 5.391398871361972e-06, "loss": 0.0626, "step": 20218 }, { "epoch": 66.29180327868852, "grad_norm": 2.354995012283325, "learning_rate": 5.390456501537325e-06, "loss": 0.1297, "step": 20219 }, { "epoch": 66.29508196721312, "grad_norm": 2.6640164852142334, "learning_rate": 5.389514183690071e-06, "loss": 0.176, "step": 20220 }, { "epoch": 66.29836065573771, "grad_norm": 3.193758010864258, "learning_rate": 5.388571917830836e-06, "loss": 0.2086, "step": 20221 }, { "epoch": 66.3016393442623, "grad_norm": 1.9528393745422363, "learning_rate": 5.3876297039702506e-06, "loss": 0.0302, "step": 20222 }, { "epoch": 66.30491803278689, "grad_norm": 2.1835832595825195, "learning_rate": 5.386687542118936e-06, "loss": 0.2057, "step": 20223 }, { "epoch": 66.30819672131148, "grad_norm": 2.8126816749572754, "learning_rate": 5.385745432287517e-06, "loss": 0.253, "step": 20224 }, { "epoch": 66.31147540983606, "grad_norm": 2.794290065765381, "learning_rate": 5.384803374486611e-06, "loss": 0.0996, "step": 20225 }, { "epoch": 66.31475409836065, "grad_norm": 2.584242343902588, "learning_rate": 5.383861368726849e-06, "loss": 0.1002, "step": 20226 }, { "epoch": 66.31803278688524, "grad_norm": 4.369365215301514, "learning_rate": 5.38291941501885e-06, "loss": 0.1774, "step": 20227 }, { "epoch": 66.32131147540984, "grad_norm": 2.890338182449341, "learning_rate": 5.381977513373235e-06, "loss": 0.2188, "step": 20228 }, { "epoch": 66.32459016393443, "grad_norm": 2.8487253189086914, "learning_rate": 5.3810356638006224e-06, "loss": 0.0751, "step": 20229 }, { "epoch": 66.32786885245902, "grad_norm": 2.910017728805542, "learning_rate": 5.380093866311639e-06, "loss": 0.1369, "step": 20230 }, { "epoch": 66.33114754098361, "grad_norm": 2.595233678817749, "learning_rate": 5.3791521209169e-06, "loss": 0.0999, "step": 20231 }, { "epoch": 66.3344262295082, "grad_norm": 2.5696349143981934, "learning_rate": 5.378210427627025e-06, "loss": 0.2649, "step": 20232 }, { "epoch": 66.33770491803278, "grad_norm": 2.5202343463897705, "learning_rate": 5.377268786452629e-06, "loss": 0.169, "step": 20233 }, { "epoch": 66.34098360655737, "grad_norm": 3.2703585624694824, "learning_rate": 5.376327197404339e-06, "loss": 0.1184, "step": 20234 }, { "epoch": 66.34426229508196, "grad_norm": 3.1340091228485107, "learning_rate": 5.375385660492767e-06, "loss": 0.2432, "step": 20235 }, { "epoch": 66.34754098360656, "grad_norm": 2.5700042247772217, "learning_rate": 5.374444175728528e-06, "loss": 0.1993, "step": 20236 }, { "epoch": 66.35081967213115, "grad_norm": 3.1116814613342285, "learning_rate": 5.373502743122243e-06, "loss": 0.1719, "step": 20237 }, { "epoch": 66.35409836065574, "grad_norm": 3.3146023750305176, "learning_rate": 5.372561362684519e-06, "loss": 0.1375, "step": 20238 }, { "epoch": 66.35737704918033, "grad_norm": 2.534419298171997, "learning_rate": 5.371620034425981e-06, "loss": 0.1322, "step": 20239 }, { "epoch": 66.36065573770492, "grad_norm": 2.652824878692627, "learning_rate": 5.370678758357241e-06, "loss": 0.0988, "step": 20240 }, { "epoch": 66.3639344262295, "grad_norm": 3.6057403087615967, "learning_rate": 5.36973753448891e-06, "loss": 0.2096, "step": 20241 }, { "epoch": 66.3672131147541, "grad_norm": 2.686525821685791, "learning_rate": 5.368796362831599e-06, "loss": 0.2216, "step": 20242 }, { "epoch": 66.37049180327868, "grad_norm": 3.062767744064331, "learning_rate": 5.367855243395929e-06, "loss": 0.1434, "step": 20243 }, { "epoch": 66.37377049180328, "grad_norm": 2.700817108154297, "learning_rate": 5.366914176192506e-06, "loss": 0.1217, "step": 20244 }, { "epoch": 66.37704918032787, "grad_norm": 2.6618387699127197, "learning_rate": 5.365973161231943e-06, "loss": 0.1791, "step": 20245 }, { "epoch": 66.38032786885246, "grad_norm": 2.4403328895568848, "learning_rate": 5.3650321985248474e-06, "loss": 0.1176, "step": 20246 }, { "epoch": 66.38360655737705, "grad_norm": 2.5991647243499756, "learning_rate": 5.364091288081837e-06, "loss": 0.1849, "step": 20247 }, { "epoch": 66.38688524590164, "grad_norm": 2.3760581016540527, "learning_rate": 5.363150429913518e-06, "loss": 0.0777, "step": 20248 }, { "epoch": 66.39016393442623, "grad_norm": 3.3464245796203613, "learning_rate": 5.362209624030497e-06, "loss": 0.1167, "step": 20249 }, { "epoch": 66.39344262295081, "grad_norm": 2.3914248943328857, "learning_rate": 5.361268870443386e-06, "loss": 0.0503, "step": 20250 }, { "epoch": 66.3967213114754, "grad_norm": 3.1228866577148438, "learning_rate": 5.360328169162787e-06, "loss": 0.0887, "step": 20251 }, { "epoch": 66.4, "grad_norm": 2.3242204189300537, "learning_rate": 5.359387520199317e-06, "loss": 0.1444, "step": 20252 }, { "epoch": 66.4032786885246, "grad_norm": 4.975346088409424, "learning_rate": 5.358446923563576e-06, "loss": 0.1657, "step": 20253 }, { "epoch": 66.40655737704918, "grad_norm": 3.1197147369384766, "learning_rate": 5.357506379266173e-06, "loss": 0.1582, "step": 20254 }, { "epoch": 66.40983606557377, "grad_norm": 3.105564594268799, "learning_rate": 5.356565887317709e-06, "loss": 0.3776, "step": 20255 }, { "epoch": 66.41311475409836, "grad_norm": 2.275404691696167, "learning_rate": 5.355625447728796e-06, "loss": 0.0573, "step": 20256 }, { "epoch": 66.41639344262295, "grad_norm": 2.3991847038269043, "learning_rate": 5.354685060510035e-06, "loss": 0.1655, "step": 20257 }, { "epoch": 66.41967213114754, "grad_norm": 3.1777913570404053, "learning_rate": 5.35374472567203e-06, "loss": 0.2198, "step": 20258 }, { "epoch": 66.42295081967212, "grad_norm": 3.101691246032715, "learning_rate": 5.352804443225381e-06, "loss": 0.2066, "step": 20259 }, { "epoch": 66.42622950819673, "grad_norm": 2.9076485633850098, "learning_rate": 5.351864213180697e-06, "loss": 0.1221, "step": 20260 }, { "epoch": 66.42950819672132, "grad_norm": 3.664661407470703, "learning_rate": 5.350924035548576e-06, "loss": 0.1264, "step": 20261 }, { "epoch": 66.4327868852459, "grad_norm": 3.3555963039398193, "learning_rate": 5.349983910339621e-06, "loss": 0.0848, "step": 20262 }, { "epoch": 66.43606557377049, "grad_norm": 3.1525979042053223, "learning_rate": 5.349043837564432e-06, "loss": 0.2359, "step": 20263 }, { "epoch": 66.43934426229508, "grad_norm": 2.2463643550872803, "learning_rate": 5.348103817233605e-06, "loss": 0.1552, "step": 20264 }, { "epoch": 66.44262295081967, "grad_norm": 3.336580991744995, "learning_rate": 5.34716384935775e-06, "loss": 0.1222, "step": 20265 }, { "epoch": 66.44590163934426, "grad_norm": 3.3795056343078613, "learning_rate": 5.346223933947459e-06, "loss": 0.2176, "step": 20266 }, { "epoch": 66.44918032786886, "grad_norm": 2.495116710662842, "learning_rate": 5.345284071013328e-06, "loss": 0.126, "step": 20267 }, { "epoch": 66.45245901639345, "grad_norm": 2.656869888305664, "learning_rate": 5.3443442605659635e-06, "loss": 0.1717, "step": 20268 }, { "epoch": 66.45573770491804, "grad_norm": 3.261355400085449, "learning_rate": 5.343404502615957e-06, "loss": 0.1883, "step": 20269 }, { "epoch": 66.45901639344262, "grad_norm": 2.28989839553833, "learning_rate": 5.342464797173903e-06, "loss": 0.1546, "step": 20270 }, { "epoch": 66.46229508196721, "grad_norm": 2.387831687927246, "learning_rate": 5.341525144250406e-06, "loss": 0.1444, "step": 20271 }, { "epoch": 66.4655737704918, "grad_norm": 2.5507543087005615, "learning_rate": 5.340585543856055e-06, "loss": 0.0894, "step": 20272 }, { "epoch": 66.46885245901639, "grad_norm": 2.6552796363830566, "learning_rate": 5.339645996001443e-06, "loss": 0.1402, "step": 20273 }, { "epoch": 66.47213114754098, "grad_norm": 2.225954532623291, "learning_rate": 5.338706500697173e-06, "loss": 0.1245, "step": 20274 }, { "epoch": 66.47540983606558, "grad_norm": 4.964498996734619, "learning_rate": 5.337767057953833e-06, "loss": 0.214, "step": 20275 }, { "epoch": 66.47868852459017, "grad_norm": 2.6572296619415283, "learning_rate": 5.336827667782016e-06, "loss": 0.0884, "step": 20276 }, { "epoch": 66.48196721311476, "grad_norm": 3.066329002380371, "learning_rate": 5.335888330192313e-06, "loss": 0.1306, "step": 20277 }, { "epoch": 66.48524590163935, "grad_norm": 2.8540947437286377, "learning_rate": 5.334949045195321e-06, "loss": 0.1745, "step": 20278 }, { "epoch": 66.48852459016393, "grad_norm": 3.1299915313720703, "learning_rate": 5.334009812801631e-06, "loss": 0.2247, "step": 20279 }, { "epoch": 66.49180327868852, "grad_norm": 3.335627794265747, "learning_rate": 5.333070633021829e-06, "loss": 0.1627, "step": 20280 }, { "epoch": 66.49508196721311, "grad_norm": 2.397606611251831, "learning_rate": 5.332131505866507e-06, "loss": 0.123, "step": 20281 }, { "epoch": 66.4983606557377, "grad_norm": 3.892099618911743, "learning_rate": 5.331192431346257e-06, "loss": 0.0738, "step": 20282 }, { "epoch": 66.5016393442623, "grad_norm": 3.4050753116607666, "learning_rate": 5.330253409471668e-06, "loss": 0.2113, "step": 20283 }, { "epoch": 66.50491803278689, "grad_norm": 2.322328805923462, "learning_rate": 5.329314440253328e-06, "loss": 0.1064, "step": 20284 }, { "epoch": 66.50819672131148, "grad_norm": 2.9929890632629395, "learning_rate": 5.328375523701818e-06, "loss": 0.1501, "step": 20285 }, { "epoch": 66.51147540983607, "grad_norm": 2.907073497772217, "learning_rate": 5.327436659827737e-06, "loss": 0.2027, "step": 20286 }, { "epoch": 66.51475409836065, "grad_norm": 2.725067138671875, "learning_rate": 5.326497848641665e-06, "loss": 0.0927, "step": 20287 }, { "epoch": 66.51803278688524, "grad_norm": 3.377974271774292, "learning_rate": 5.325559090154189e-06, "loss": 0.1815, "step": 20288 }, { "epoch": 66.52131147540983, "grad_norm": 3.2560911178588867, "learning_rate": 5.324620384375895e-06, "loss": 0.2221, "step": 20289 }, { "epoch": 66.52459016393442, "grad_norm": 2.67303466796875, "learning_rate": 5.323681731317362e-06, "loss": 0.089, "step": 20290 }, { "epoch": 66.52786885245902, "grad_norm": 3.353978395462036, "learning_rate": 5.322743130989186e-06, "loss": 0.1714, "step": 20291 }, { "epoch": 66.53114754098361, "grad_norm": 5.453184127807617, "learning_rate": 5.3218045834019415e-06, "loss": 0.082, "step": 20292 }, { "epoch": 66.5344262295082, "grad_norm": 2.76955509185791, "learning_rate": 5.320866088566217e-06, "loss": 0.1993, "step": 20293 }, { "epoch": 66.53770491803279, "grad_norm": 2.4880266189575195, "learning_rate": 5.319927646492586e-06, "loss": 0.2011, "step": 20294 }, { "epoch": 66.54098360655738, "grad_norm": 3.402003049850464, "learning_rate": 5.318989257191643e-06, "loss": 0.1559, "step": 20295 }, { "epoch": 66.54426229508196, "grad_norm": 3.167219638824463, "learning_rate": 5.318050920673963e-06, "loss": 0.1133, "step": 20296 }, { "epoch": 66.54754098360655, "grad_norm": 2.789869546890259, "learning_rate": 5.317112636950126e-06, "loss": 0.1603, "step": 20297 }, { "epoch": 66.55081967213114, "grad_norm": 2.8394737243652344, "learning_rate": 5.316174406030709e-06, "loss": 0.1365, "step": 20298 }, { "epoch": 66.55409836065574, "grad_norm": 2.7292122840881348, "learning_rate": 5.315236227926299e-06, "loss": 0.1378, "step": 20299 }, { "epoch": 66.55737704918033, "grad_norm": 2.499211549758911, "learning_rate": 5.314298102647473e-06, "loss": 0.089, "step": 20300 }, { "epoch": 66.56065573770492, "grad_norm": 3.189030170440674, "learning_rate": 5.313360030204806e-06, "loss": 0.3113, "step": 20301 }, { "epoch": 66.56393442622951, "grad_norm": 2.47355318069458, "learning_rate": 5.312422010608879e-06, "loss": 0.1707, "step": 20302 }, { "epoch": 66.5672131147541, "grad_norm": 2.6024136543273926, "learning_rate": 5.311484043870263e-06, "loss": 0.0658, "step": 20303 }, { "epoch": 66.57049180327868, "grad_norm": 2.8409135341644287, "learning_rate": 5.310546129999543e-06, "loss": 0.2759, "step": 20304 }, { "epoch": 66.57377049180327, "grad_norm": 2.2971975803375244, "learning_rate": 5.3096082690072915e-06, "loss": 0.0739, "step": 20305 }, { "epoch": 66.57704918032788, "grad_norm": 1.948040246963501, "learning_rate": 5.308670460904084e-06, "loss": 0.1331, "step": 20306 }, { "epoch": 66.58032786885246, "grad_norm": 2.75398850440979, "learning_rate": 5.30773270570049e-06, "loss": 0.234, "step": 20307 }, { "epoch": 66.58360655737705, "grad_norm": 2.0071699619293213, "learning_rate": 5.306795003407093e-06, "loss": 0.1873, "step": 20308 }, { "epoch": 66.58688524590164, "grad_norm": 2.5458836555480957, "learning_rate": 5.305857354034463e-06, "loss": 0.1248, "step": 20309 }, { "epoch": 66.59016393442623, "grad_norm": 3.1630325317382812, "learning_rate": 5.30491975759317e-06, "loss": 0.2111, "step": 20310 }, { "epoch": 66.59344262295082, "grad_norm": 2.9928579330444336, "learning_rate": 5.303982214093786e-06, "loss": 0.1046, "step": 20311 }, { "epoch": 66.5967213114754, "grad_norm": 2.763162612915039, "learning_rate": 5.303044723546888e-06, "loss": 0.091, "step": 20312 }, { "epoch": 66.6, "grad_norm": 3.0315003395080566, "learning_rate": 5.302107285963045e-06, "loss": 0.077, "step": 20313 }, { "epoch": 66.6032786885246, "grad_norm": 2.1625478267669678, "learning_rate": 5.3011699013528275e-06, "loss": 0.1903, "step": 20314 }, { "epoch": 66.60655737704919, "grad_norm": 1.9408361911773682, "learning_rate": 5.300232569726805e-06, "loss": 0.0601, "step": 20315 }, { "epoch": 66.60983606557377, "grad_norm": 2.1887128353118896, "learning_rate": 5.299295291095541e-06, "loss": 0.1046, "step": 20316 }, { "epoch": 66.61311475409836, "grad_norm": 2.294311046600342, "learning_rate": 5.298358065469615e-06, "loss": 0.0876, "step": 20317 }, { "epoch": 66.61639344262295, "grad_norm": 3.0936264991760254, "learning_rate": 5.2974208928595905e-06, "loss": 0.1175, "step": 20318 }, { "epoch": 66.61967213114754, "grad_norm": 4.309625148773193, "learning_rate": 5.296483773276036e-06, "loss": 0.1082, "step": 20319 }, { "epoch": 66.62295081967213, "grad_norm": 2.480969190597534, "learning_rate": 5.295546706729511e-06, "loss": 0.2533, "step": 20320 }, { "epoch": 66.62622950819672, "grad_norm": 2.7315444946289062, "learning_rate": 5.294609693230593e-06, "loss": 0.1342, "step": 20321 }, { "epoch": 66.62950819672132, "grad_norm": 3.3771023750305176, "learning_rate": 5.293672732789844e-06, "loss": 0.2603, "step": 20322 }, { "epoch": 66.6327868852459, "grad_norm": 2.5510246753692627, "learning_rate": 5.292735825417824e-06, "loss": 0.1282, "step": 20323 }, { "epoch": 66.6360655737705, "grad_norm": 2.793520450592041, "learning_rate": 5.291798971125106e-06, "loss": 0.1473, "step": 20324 }, { "epoch": 66.63934426229508, "grad_norm": 2.4116134643554688, "learning_rate": 5.290862169922249e-06, "loss": 0.0743, "step": 20325 }, { "epoch": 66.64262295081967, "grad_norm": 3.132953405380249, "learning_rate": 5.289925421819815e-06, "loss": 0.1983, "step": 20326 }, { "epoch": 66.64590163934426, "grad_norm": 2.675090789794922, "learning_rate": 5.2889887268283725e-06, "loss": 0.138, "step": 20327 }, { "epoch": 66.64918032786885, "grad_norm": 2.8533973693847656, "learning_rate": 5.288052084958479e-06, "loss": 0.1737, "step": 20328 }, { "epoch": 66.65245901639344, "grad_norm": 2.5670218467712402, "learning_rate": 5.287115496220695e-06, "loss": 0.0435, "step": 20329 }, { "epoch": 66.65573770491804, "grad_norm": 1.9349501132965088, "learning_rate": 5.286178960625587e-06, "loss": 0.1915, "step": 20330 }, { "epoch": 66.65901639344263, "grad_norm": 2.2936174869537354, "learning_rate": 5.285242478183713e-06, "loss": 0.1454, "step": 20331 }, { "epoch": 66.66229508196722, "grad_norm": 2.5907206535339355, "learning_rate": 5.284306048905632e-06, "loss": 0.1254, "step": 20332 }, { "epoch": 66.6655737704918, "grad_norm": 3.421135902404785, "learning_rate": 5.283369672801899e-06, "loss": 0.1592, "step": 20333 }, { "epoch": 66.66885245901639, "grad_norm": 3.019345283508301, "learning_rate": 5.282433349883081e-06, "loss": 0.1692, "step": 20334 }, { "epoch": 66.67213114754098, "grad_norm": 2.5116870403289795, "learning_rate": 5.281497080159732e-06, "loss": 0.1174, "step": 20335 }, { "epoch": 66.67540983606557, "grad_norm": 2.2602267265319824, "learning_rate": 5.28056086364241e-06, "loss": 0.1408, "step": 20336 }, { "epoch": 66.67868852459016, "grad_norm": 3.809494733810425, "learning_rate": 5.279624700341668e-06, "loss": 0.3337, "step": 20337 }, { "epoch": 66.68196721311476, "grad_norm": 3.2499406337738037, "learning_rate": 5.2786885902680684e-06, "loss": 0.177, "step": 20338 }, { "epoch": 66.68524590163935, "grad_norm": 2.0148262977600098, "learning_rate": 5.277752533432163e-06, "loss": 0.0455, "step": 20339 }, { "epoch": 66.68852459016394, "grad_norm": 3.3629329204559326, "learning_rate": 5.276816529844508e-06, "loss": 0.3151, "step": 20340 }, { "epoch": 66.69180327868852, "grad_norm": 2.529010534286499, "learning_rate": 5.27588057951566e-06, "loss": 0.1274, "step": 20341 }, { "epoch": 66.69508196721311, "grad_norm": 2.0620765686035156, "learning_rate": 5.274944682456165e-06, "loss": 0.1267, "step": 20342 }, { "epoch": 66.6983606557377, "grad_norm": 3.3131000995635986, "learning_rate": 5.274008838676583e-06, "loss": 0.0667, "step": 20343 }, { "epoch": 66.70163934426229, "grad_norm": 2.702113151550293, "learning_rate": 5.273073048187468e-06, "loss": 0.1852, "step": 20344 }, { "epoch": 66.70491803278688, "grad_norm": 2.4785919189453125, "learning_rate": 5.2721373109993675e-06, "loss": 0.1358, "step": 20345 }, { "epoch": 66.70819672131148, "grad_norm": 2.6462526321411133, "learning_rate": 5.271201627122833e-06, "loss": 0.0865, "step": 20346 }, { "epoch": 66.71147540983607, "grad_norm": 2.1430459022521973, "learning_rate": 5.270265996568418e-06, "loss": 0.1049, "step": 20347 }, { "epoch": 66.71475409836066, "grad_norm": 3.0050013065338135, "learning_rate": 5.269330419346673e-06, "loss": 0.2889, "step": 20348 }, { "epoch": 66.71803278688525, "grad_norm": 2.425771474838257, "learning_rate": 5.268394895468147e-06, "loss": 0.1925, "step": 20349 }, { "epoch": 66.72131147540983, "grad_norm": 2.5597498416900635, "learning_rate": 5.267459424943382e-06, "loss": 0.1526, "step": 20350 }, { "epoch": 66.72459016393442, "grad_norm": 3.1510541439056396, "learning_rate": 5.266524007782939e-06, "loss": 0.1266, "step": 20351 }, { "epoch": 66.72786885245901, "grad_norm": 3.0897364616394043, "learning_rate": 5.265588643997357e-06, "loss": 0.1425, "step": 20352 }, { "epoch": 66.73114754098361, "grad_norm": 2.657522201538086, "learning_rate": 5.264653333597187e-06, "loss": 0.1273, "step": 20353 }, { "epoch": 66.7344262295082, "grad_norm": 2.992105484008789, "learning_rate": 5.263718076592974e-06, "loss": 0.2732, "step": 20354 }, { "epoch": 66.73770491803279, "grad_norm": 2.8840720653533936, "learning_rate": 5.26278287299526e-06, "loss": 0.0898, "step": 20355 }, { "epoch": 66.74098360655738, "grad_norm": 3.220838785171509, "learning_rate": 5.2618477228145995e-06, "loss": 0.3577, "step": 20356 }, { "epoch": 66.74426229508197, "grad_norm": 2.4193437099456787, "learning_rate": 5.260912626061531e-06, "loss": 0.1972, "step": 20357 }, { "epoch": 66.74754098360656, "grad_norm": 2.8881893157958984, "learning_rate": 5.2599775827466e-06, "loss": 0.3406, "step": 20358 }, { "epoch": 66.75081967213114, "grad_norm": 2.4372236728668213, "learning_rate": 5.259042592880348e-06, "loss": 0.0923, "step": 20359 }, { "epoch": 66.75409836065573, "grad_norm": 2.5568625926971436, "learning_rate": 5.2581076564733215e-06, "loss": 0.0799, "step": 20360 }, { "epoch": 66.75737704918033, "grad_norm": 3.3336920738220215, "learning_rate": 5.257172773536063e-06, "loss": 0.2599, "step": 20361 }, { "epoch": 66.76065573770492, "grad_norm": 3.282693862915039, "learning_rate": 5.256237944079113e-06, "loss": 0.1838, "step": 20362 }, { "epoch": 66.76393442622951, "grad_norm": 4.789698600769043, "learning_rate": 5.2553031681130085e-06, "loss": 0.2044, "step": 20363 }, { "epoch": 66.7672131147541, "grad_norm": 2.258000135421753, "learning_rate": 5.254368445648297e-06, "loss": 0.1309, "step": 20364 }, { "epoch": 66.77049180327869, "grad_norm": 3.1171932220458984, "learning_rate": 5.253433776695517e-06, "loss": 0.1716, "step": 20365 }, { "epoch": 66.77377049180328, "grad_norm": 2.761502981185913, "learning_rate": 5.252499161265205e-06, "loss": 0.2306, "step": 20366 }, { "epoch": 66.77704918032786, "grad_norm": 2.304725170135498, "learning_rate": 5.251564599367902e-06, "loss": 0.0792, "step": 20367 }, { "epoch": 66.78032786885245, "grad_norm": 2.973500967025757, "learning_rate": 5.2506300910141405e-06, "loss": 0.1762, "step": 20368 }, { "epoch": 66.78360655737706, "grad_norm": 2.3147482872009277, "learning_rate": 5.249695636214467e-06, "loss": 0.0945, "step": 20369 }, { "epoch": 66.78688524590164, "grad_norm": 2.4931640625, "learning_rate": 5.248761234979415e-06, "loss": 0.1523, "step": 20370 }, { "epoch": 66.79016393442623, "grad_norm": 3.082632303237915, "learning_rate": 5.24782688731952e-06, "loss": 0.1942, "step": 20371 }, { "epoch": 66.79344262295082, "grad_norm": 2.2510106563568115, "learning_rate": 5.246892593245313e-06, "loss": 0.0668, "step": 20372 }, { "epoch": 66.79672131147541, "grad_norm": 3.94878888130188, "learning_rate": 5.245958352767339e-06, "loss": 0.1423, "step": 20373 }, { "epoch": 66.8, "grad_norm": 2.7731215953826904, "learning_rate": 5.245024165896126e-06, "loss": 0.1647, "step": 20374 }, { "epoch": 66.80327868852459, "grad_norm": 2.6097681522369385, "learning_rate": 5.244090032642207e-06, "loss": 0.2976, "step": 20375 }, { "epoch": 66.80655737704917, "grad_norm": 2.5270068645477295, "learning_rate": 5.243155953016121e-06, "loss": 0.1776, "step": 20376 }, { "epoch": 66.80983606557378, "grad_norm": 1.9465123414993286, "learning_rate": 5.242221927028399e-06, "loss": 0.079, "step": 20377 }, { "epoch": 66.81311475409836, "grad_norm": 3.790313720703125, "learning_rate": 5.241287954689566e-06, "loss": 0.3659, "step": 20378 }, { "epoch": 66.81639344262295, "grad_norm": 2.4225549697875977, "learning_rate": 5.240354036010163e-06, "loss": 0.0904, "step": 20379 }, { "epoch": 66.81967213114754, "grad_norm": 2.7129247188568115, "learning_rate": 5.239420171000717e-06, "loss": 0.1746, "step": 20380 }, { "epoch": 66.82295081967213, "grad_norm": 2.2379860877990723, "learning_rate": 5.238486359671759e-06, "loss": 0.1721, "step": 20381 }, { "epoch": 66.82622950819672, "grad_norm": 2.431877613067627, "learning_rate": 5.2375526020338155e-06, "loss": 0.1203, "step": 20382 }, { "epoch": 66.8295081967213, "grad_norm": 2.8599770069122314, "learning_rate": 5.236618898097421e-06, "loss": 0.1231, "step": 20383 }, { "epoch": 66.8327868852459, "grad_norm": 2.145416736602783, "learning_rate": 5.235685247873099e-06, "loss": 0.0441, "step": 20384 }, { "epoch": 66.8360655737705, "grad_norm": 2.4421796798706055, "learning_rate": 5.234751651371378e-06, "loss": 0.0637, "step": 20385 }, { "epoch": 66.83934426229509, "grad_norm": 2.771434783935547, "learning_rate": 5.233818108602791e-06, "loss": 0.2484, "step": 20386 }, { "epoch": 66.84262295081967, "grad_norm": 2.5516483783721924, "learning_rate": 5.232884619577858e-06, "loss": 0.1759, "step": 20387 }, { "epoch": 66.84590163934426, "grad_norm": 2.850172758102417, "learning_rate": 5.231951184307109e-06, "loss": 0.2235, "step": 20388 }, { "epoch": 66.84918032786885, "grad_norm": 2.447993755340576, "learning_rate": 5.231017802801064e-06, "loss": 0.1651, "step": 20389 }, { "epoch": 66.85245901639344, "grad_norm": 3.3097543716430664, "learning_rate": 5.230084475070255e-06, "loss": 0.1682, "step": 20390 }, { "epoch": 66.85573770491803, "grad_norm": 2.983717203140259, "learning_rate": 5.229151201125204e-06, "loss": 0.1556, "step": 20391 }, { "epoch": 66.85901639344263, "grad_norm": 2.6512832641601562, "learning_rate": 5.228217980976433e-06, "loss": 0.2022, "step": 20392 }, { "epoch": 66.86229508196722, "grad_norm": 3.643329620361328, "learning_rate": 5.227284814634466e-06, "loss": 0.2595, "step": 20393 }, { "epoch": 66.8655737704918, "grad_norm": 3.139592170715332, "learning_rate": 5.226351702109821e-06, "loss": 0.27, "step": 20394 }, { "epoch": 66.8688524590164, "grad_norm": 2.5376484394073486, "learning_rate": 5.225418643413027e-06, "loss": 0.1, "step": 20395 }, { "epoch": 66.87213114754098, "grad_norm": 2.76605486869812, "learning_rate": 5.224485638554604e-06, "loss": 0.1526, "step": 20396 }, { "epoch": 66.87540983606557, "grad_norm": 2.643707752227783, "learning_rate": 5.223552687545069e-06, "loss": 0.0754, "step": 20397 }, { "epoch": 66.87868852459016, "grad_norm": 3.137826681137085, "learning_rate": 5.222619790394941e-06, "loss": 0.1929, "step": 20398 }, { "epoch": 66.88196721311475, "grad_norm": 3.225801944732666, "learning_rate": 5.221686947114745e-06, "loss": 0.1641, "step": 20399 }, { "epoch": 66.88524590163935, "grad_norm": 3.077791213989258, "learning_rate": 5.220754157714998e-06, "loss": 0.246, "step": 20400 }, { "epoch": 66.88852459016394, "grad_norm": 2.770700454711914, "learning_rate": 5.219821422206215e-06, "loss": 0.3073, "step": 20401 }, { "epoch": 66.89180327868853, "grad_norm": 2.1939890384674072, "learning_rate": 5.218888740598913e-06, "loss": 0.1811, "step": 20402 }, { "epoch": 66.89508196721312, "grad_norm": 2.65175199508667, "learning_rate": 5.217956112903616e-06, "loss": 0.1039, "step": 20403 }, { "epoch": 66.8983606557377, "grad_norm": 2.295182466506958, "learning_rate": 5.217023539130835e-06, "loss": 0.0642, "step": 20404 }, { "epoch": 66.90163934426229, "grad_norm": 3.0105440616607666, "learning_rate": 5.216091019291088e-06, "loss": 0.1406, "step": 20405 }, { "epoch": 66.90491803278688, "grad_norm": 2.851717233657837, "learning_rate": 5.215158553394888e-06, "loss": 0.1006, "step": 20406 }, { "epoch": 66.90819672131147, "grad_norm": 2.0917232036590576, "learning_rate": 5.214226141452745e-06, "loss": 0.0542, "step": 20407 }, { "epoch": 66.91147540983607, "grad_norm": 2.6021344661712646, "learning_rate": 5.213293783475184e-06, "loss": 0.0989, "step": 20408 }, { "epoch": 66.91475409836066, "grad_norm": 2.727022886276245, "learning_rate": 5.212361479472712e-06, "loss": 0.0788, "step": 20409 }, { "epoch": 66.91803278688525, "grad_norm": 2.956357479095459, "learning_rate": 5.2114292294558425e-06, "loss": 0.1108, "step": 20410 }, { "epoch": 66.92131147540984, "grad_norm": 2.534585952758789, "learning_rate": 5.210497033435083e-06, "loss": 0.1722, "step": 20411 }, { "epoch": 66.92459016393443, "grad_norm": 1.8514989614486694, "learning_rate": 5.209564891420953e-06, "loss": 0.0749, "step": 20412 }, { "epoch": 66.92786885245901, "grad_norm": 16.727397918701172, "learning_rate": 5.208632803423959e-06, "loss": 0.2502, "step": 20413 }, { "epoch": 66.9311475409836, "grad_norm": 2.9957077503204346, "learning_rate": 5.2077007694546134e-06, "loss": 0.2093, "step": 20414 }, { "epoch": 66.93442622950819, "grad_norm": 3.501835346221924, "learning_rate": 5.20676878952342e-06, "loss": 0.1944, "step": 20415 }, { "epoch": 66.9377049180328, "grad_norm": 2.7561216354370117, "learning_rate": 5.205836863640896e-06, "loss": 0.1826, "step": 20416 }, { "epoch": 66.94098360655738, "grad_norm": 2.123154401779175, "learning_rate": 5.204904991817545e-06, "loss": 0.1265, "step": 20417 }, { "epoch": 66.94426229508197, "grad_norm": 2.618647813796997, "learning_rate": 5.203973174063876e-06, "loss": 0.1129, "step": 20418 }, { "epoch": 66.94754098360656, "grad_norm": 2.419503927230835, "learning_rate": 5.203041410390398e-06, "loss": 0.1367, "step": 20419 }, { "epoch": 66.95081967213115, "grad_norm": 3.035916805267334, "learning_rate": 5.202109700807612e-06, "loss": 0.0928, "step": 20420 }, { "epoch": 66.95409836065573, "grad_norm": 3.360750675201416, "learning_rate": 5.20117804532603e-06, "loss": 0.2078, "step": 20421 }, { "epoch": 66.95737704918032, "grad_norm": 3.4537651538848877, "learning_rate": 5.200246443956154e-06, "loss": 0.2027, "step": 20422 }, { "epoch": 66.96065573770491, "grad_norm": 1.9399921894073486, "learning_rate": 5.1993148967084916e-06, "loss": 0.0352, "step": 20423 }, { "epoch": 66.96393442622951, "grad_norm": 2.489288806915283, "learning_rate": 5.198383403593541e-06, "loss": 0.143, "step": 20424 }, { "epoch": 66.9672131147541, "grad_norm": 2.0360770225524902, "learning_rate": 5.197451964621813e-06, "loss": 0.0527, "step": 20425 }, { "epoch": 66.97049180327869, "grad_norm": 3.07248854637146, "learning_rate": 5.196520579803808e-06, "loss": 0.075, "step": 20426 }, { "epoch": 66.97377049180328, "grad_norm": 2.4006974697113037, "learning_rate": 5.195589249150028e-06, "loss": 0.1469, "step": 20427 }, { "epoch": 66.97704918032787, "grad_norm": 3.4288015365600586, "learning_rate": 5.194657972670971e-06, "loss": 0.3863, "step": 20428 }, { "epoch": 66.98032786885246, "grad_norm": 2.081782817840576, "learning_rate": 5.193726750377146e-06, "loss": 0.0524, "step": 20429 }, { "epoch": 66.98360655737704, "grad_norm": 2.498296022415161, "learning_rate": 5.192795582279049e-06, "loss": 0.1215, "step": 20430 }, { "epoch": 66.98688524590163, "grad_norm": 3.1462972164154053, "learning_rate": 5.191864468387176e-06, "loss": 0.2789, "step": 20431 }, { "epoch": 66.99016393442623, "grad_norm": 2.527411460876465, "learning_rate": 5.190933408712033e-06, "loss": 0.2619, "step": 20432 }, { "epoch": 66.99344262295082, "grad_norm": 2.068854808807373, "learning_rate": 5.190002403264116e-06, "loss": 0.1676, "step": 20433 }, { "epoch": 66.99672131147541, "grad_norm": 2.6929399967193604, "learning_rate": 5.18907145205392e-06, "loss": 0.1146, "step": 20434 }, { "epoch": 67.0, "grad_norm": 3.6288208961486816, "learning_rate": 5.18814055509195e-06, "loss": 0.1779, "step": 20435 }, { "epoch": 67.00327868852459, "grad_norm": 2.4544429779052734, "learning_rate": 5.187209712388696e-06, "loss": 0.1622, "step": 20436 }, { "epoch": 67.00655737704918, "grad_norm": 3.1657118797302246, "learning_rate": 5.186278923954655e-06, "loss": 0.1477, "step": 20437 }, { "epoch": 67.00983606557377, "grad_norm": 3.1966288089752197, "learning_rate": 5.185348189800328e-06, "loss": 0.1826, "step": 20438 }, { "epoch": 67.01311475409837, "grad_norm": 3.2469844818115234, "learning_rate": 5.184417509936206e-06, "loss": 0.124, "step": 20439 }, { "epoch": 67.01639344262296, "grad_norm": 2.4399378299713135, "learning_rate": 5.183486884372782e-06, "loss": 0.0649, "step": 20440 }, { "epoch": 67.01967213114754, "grad_norm": 2.549272060394287, "learning_rate": 5.182556313120553e-06, "loss": 0.1221, "step": 20441 }, { "epoch": 67.02295081967213, "grad_norm": 2.3725218772888184, "learning_rate": 5.181625796190005e-06, "loss": 0.1211, "step": 20442 }, { "epoch": 67.02622950819672, "grad_norm": 2.0914056301116943, "learning_rate": 5.180695333591641e-06, "loss": 0.1002, "step": 20443 }, { "epoch": 67.02950819672131, "grad_norm": 3.391144275665283, "learning_rate": 5.179764925335948e-06, "loss": 0.2456, "step": 20444 }, { "epoch": 67.0327868852459, "grad_norm": 3.0765974521636963, "learning_rate": 5.178834571433416e-06, "loss": 0.2233, "step": 20445 }, { "epoch": 67.03606557377049, "grad_norm": 2.4939401149749756, "learning_rate": 5.177904271894535e-06, "loss": 0.1452, "step": 20446 }, { "epoch": 67.03934426229509, "grad_norm": 2.5459935665130615, "learning_rate": 5.1769740267297995e-06, "loss": 0.0879, "step": 20447 }, { "epoch": 67.04262295081968, "grad_norm": 2.682143211364746, "learning_rate": 5.1760438359496975e-06, "loss": 0.0835, "step": 20448 }, { "epoch": 67.04590163934427, "grad_norm": 2.3348639011383057, "learning_rate": 5.175113699564716e-06, "loss": 0.0877, "step": 20449 }, { "epoch": 67.04918032786885, "grad_norm": 2.5980005264282227, "learning_rate": 5.17418361758534e-06, "loss": 0.0675, "step": 20450 }, { "epoch": 67.05245901639344, "grad_norm": 2.839548349380493, "learning_rate": 5.173253590022067e-06, "loss": 0.1697, "step": 20451 }, { "epoch": 67.05573770491803, "grad_norm": 2.7026731967926025, "learning_rate": 5.172323616885378e-06, "loss": 0.1349, "step": 20452 }, { "epoch": 67.05901639344262, "grad_norm": 3.3540537357330322, "learning_rate": 5.1713936981857585e-06, "loss": 0.1263, "step": 20453 }, { "epoch": 67.0622950819672, "grad_norm": 2.5552682876586914, "learning_rate": 5.170463833933696e-06, "loss": 0.1901, "step": 20454 }, { "epoch": 67.06557377049181, "grad_norm": 3.0636191368103027, "learning_rate": 5.169534024139671e-06, "loss": 0.1986, "step": 20455 }, { "epoch": 67.0688524590164, "grad_norm": 2.5033323764801025, "learning_rate": 5.1686042688141755e-06, "loss": 0.1759, "step": 20456 }, { "epoch": 67.07213114754099, "grad_norm": 2.356401205062866, "learning_rate": 5.167674567967692e-06, "loss": 0.0995, "step": 20457 }, { "epoch": 67.07540983606557, "grad_norm": 2.690376043319702, "learning_rate": 5.166744921610701e-06, "loss": 0.1942, "step": 20458 }, { "epoch": 67.07868852459016, "grad_norm": 2.5358738899230957, "learning_rate": 5.165815329753683e-06, "loss": 0.0869, "step": 20459 }, { "epoch": 67.08196721311475, "grad_norm": 2.9478132724761963, "learning_rate": 5.164885792407127e-06, "loss": 0.1168, "step": 20460 }, { "epoch": 67.08524590163934, "grad_norm": 3.0392003059387207, "learning_rate": 5.163956309581512e-06, "loss": 0.1831, "step": 20461 }, { "epoch": 67.08852459016393, "grad_norm": 4.23804235458374, "learning_rate": 5.163026881287316e-06, "loss": 0.1024, "step": 20462 }, { "epoch": 67.09180327868853, "grad_norm": 3.3961079120635986, "learning_rate": 5.162097507535018e-06, "loss": 0.1972, "step": 20463 }, { "epoch": 67.09508196721312, "grad_norm": 2.519951343536377, "learning_rate": 5.161168188335105e-06, "loss": 0.0868, "step": 20464 }, { "epoch": 67.09836065573771, "grad_norm": 2.363978385925293, "learning_rate": 5.160238923698051e-06, "loss": 0.0627, "step": 20465 }, { "epoch": 67.1016393442623, "grad_norm": 2.285043478012085, "learning_rate": 5.159309713634337e-06, "loss": 0.0664, "step": 20466 }, { "epoch": 67.10491803278688, "grad_norm": 3.511235237121582, "learning_rate": 5.158380558154437e-06, "loss": 0.1181, "step": 20467 }, { "epoch": 67.10819672131147, "grad_norm": 5.193399429321289, "learning_rate": 5.157451457268827e-06, "loss": 0.1378, "step": 20468 }, { "epoch": 67.11147540983606, "grad_norm": 2.22833514213562, "learning_rate": 5.156522410987992e-06, "loss": 0.2245, "step": 20469 }, { "epoch": 67.11475409836065, "grad_norm": 2.7044858932495117, "learning_rate": 5.155593419322401e-06, "loss": 0.1624, "step": 20470 }, { "epoch": 67.11803278688525, "grad_norm": 2.311356544494629, "learning_rate": 5.154664482282532e-06, "loss": 0.0647, "step": 20471 }, { "epoch": 67.12131147540984, "grad_norm": 3.344480514526367, "learning_rate": 5.1537355998788555e-06, "loss": 0.2812, "step": 20472 }, { "epoch": 67.12459016393443, "grad_norm": 64.68514251708984, "learning_rate": 5.152806772121852e-06, "loss": 0.1562, "step": 20473 }, { "epoch": 67.12786885245902, "grad_norm": 3.340257167816162, "learning_rate": 5.151877999021992e-06, "loss": 0.1085, "step": 20474 }, { "epoch": 67.1311475409836, "grad_norm": 2.7931790351867676, "learning_rate": 5.150949280589748e-06, "loss": 0.1615, "step": 20475 }, { "epoch": 67.1344262295082, "grad_norm": 2.645315647125244, "learning_rate": 5.15002061683559e-06, "loss": 0.1562, "step": 20476 }, { "epoch": 67.13770491803278, "grad_norm": 2.9345688819885254, "learning_rate": 5.149092007769994e-06, "loss": 0.1314, "step": 20477 }, { "epoch": 67.14098360655737, "grad_norm": 3.893799304962158, "learning_rate": 5.148163453403431e-06, "loss": 0.1112, "step": 20478 }, { "epoch": 67.14426229508197, "grad_norm": 3.289884328842163, "learning_rate": 5.1472349537463695e-06, "loss": 0.1166, "step": 20479 }, { "epoch": 67.14754098360656, "grad_norm": 3.154632806777954, "learning_rate": 5.146306508809275e-06, "loss": 0.1928, "step": 20480 }, { "epoch": 67.15081967213115, "grad_norm": 3.037086248397827, "learning_rate": 5.145378118602626e-06, "loss": 0.1707, "step": 20481 }, { "epoch": 67.15409836065574, "grad_norm": 3.359102725982666, "learning_rate": 5.144449783136886e-06, "loss": 0.1917, "step": 20482 }, { "epoch": 67.15737704918033, "grad_norm": 3.3122429847717285, "learning_rate": 5.1435215024225215e-06, "loss": 0.178, "step": 20483 }, { "epoch": 67.16065573770491, "grad_norm": 2.693202257156372, "learning_rate": 5.14259327647e-06, "loss": 0.154, "step": 20484 }, { "epoch": 67.1639344262295, "grad_norm": 2.9857308864593506, "learning_rate": 5.141665105289792e-06, "loss": 0.1804, "step": 20485 }, { "epoch": 67.1672131147541, "grad_norm": 2.9854862689971924, "learning_rate": 5.140736988892363e-06, "loss": 0.2517, "step": 20486 }, { "epoch": 67.1704918032787, "grad_norm": 3.866672992706299, "learning_rate": 5.139808927288173e-06, "loss": 0.1811, "step": 20487 }, { "epoch": 67.17377049180328, "grad_norm": 3.0771164894104004, "learning_rate": 5.138880920487692e-06, "loss": 0.0819, "step": 20488 }, { "epoch": 67.17704918032787, "grad_norm": 3.7083845138549805, "learning_rate": 5.137952968501385e-06, "loss": 0.2452, "step": 20489 }, { "epoch": 67.18032786885246, "grad_norm": 2.9056217670440674, "learning_rate": 5.137025071339709e-06, "loss": 0.1067, "step": 20490 }, { "epoch": 67.18360655737705, "grad_norm": 3.714766263961792, "learning_rate": 5.136097229013135e-06, "loss": 0.1873, "step": 20491 }, { "epoch": 67.18688524590164, "grad_norm": 3.4326841831207275, "learning_rate": 5.135169441532123e-06, "loss": 0.1192, "step": 20492 }, { "epoch": 67.19016393442622, "grad_norm": 2.6360485553741455, "learning_rate": 5.1342417089071325e-06, "loss": 0.2112, "step": 20493 }, { "epoch": 67.19344262295083, "grad_norm": 2.628246784210205, "learning_rate": 5.133314031148623e-06, "loss": 0.1795, "step": 20494 }, { "epoch": 67.19672131147541, "grad_norm": 3.1092495918273926, "learning_rate": 5.132386408267062e-06, "loss": 0.2039, "step": 20495 }, { "epoch": 67.2, "grad_norm": 2.3415017127990723, "learning_rate": 5.131458840272905e-06, "loss": 0.1974, "step": 20496 }, { "epoch": 67.20327868852459, "grad_norm": 3.255908966064453, "learning_rate": 5.130531327176611e-06, "loss": 0.1148, "step": 20497 }, { "epoch": 67.20655737704918, "grad_norm": 2.6319260597229004, "learning_rate": 5.129603868988635e-06, "loss": 0.0448, "step": 20498 }, { "epoch": 67.20983606557377, "grad_norm": 3.4326791763305664, "learning_rate": 5.1286764657194446e-06, "loss": 0.0884, "step": 20499 }, { "epoch": 67.21311475409836, "grad_norm": 5.307013511657715, "learning_rate": 5.1277491173794905e-06, "loss": 0.1901, "step": 20500 }, { "epoch": 67.21639344262294, "grad_norm": 3.1156065464019775, "learning_rate": 5.126821823979233e-06, "loss": 0.1223, "step": 20501 }, { "epoch": 67.21967213114755, "grad_norm": 2.9257843494415283, "learning_rate": 5.125894585529121e-06, "loss": 0.1211, "step": 20502 }, { "epoch": 67.22295081967214, "grad_norm": 3.445343494415283, "learning_rate": 5.124967402039618e-06, "loss": 0.2605, "step": 20503 }, { "epoch": 67.22622950819672, "grad_norm": 2.589867115020752, "learning_rate": 5.124040273521178e-06, "loss": 0.1163, "step": 20504 }, { "epoch": 67.22950819672131, "grad_norm": 3.03560733795166, "learning_rate": 5.123113199984253e-06, "loss": 0.1342, "step": 20505 }, { "epoch": 67.2327868852459, "grad_norm": 3.938978910446167, "learning_rate": 5.122186181439298e-06, "loss": 0.1435, "step": 20506 }, { "epoch": 67.23606557377049, "grad_norm": 4.588141441345215, "learning_rate": 5.121259217896762e-06, "loss": 0.0644, "step": 20507 }, { "epoch": 67.23934426229508, "grad_norm": 6.335933685302734, "learning_rate": 5.120332309367103e-06, "loss": 0.2665, "step": 20508 }, { "epoch": 67.24262295081967, "grad_norm": 3.009570837020874, "learning_rate": 5.119405455860772e-06, "loss": 0.092, "step": 20509 }, { "epoch": 67.24590163934427, "grad_norm": 3.7934036254882812, "learning_rate": 5.118478657388219e-06, "loss": 0.169, "step": 20510 }, { "epoch": 67.24918032786886, "grad_norm": 2.891887903213501, "learning_rate": 5.11755191395989e-06, "loss": 0.1169, "step": 20511 }, { "epoch": 67.25245901639344, "grad_norm": 2.0969645977020264, "learning_rate": 5.116625225586245e-06, "loss": 0.1118, "step": 20512 }, { "epoch": 67.25573770491803, "grad_norm": 2.8521125316619873, "learning_rate": 5.115698592277727e-06, "loss": 0.2568, "step": 20513 }, { "epoch": 67.25901639344262, "grad_norm": 2.371119737625122, "learning_rate": 5.114772014044787e-06, "loss": 0.1437, "step": 20514 }, { "epoch": 67.26229508196721, "grad_norm": 2.553058624267578, "learning_rate": 5.1138454908978665e-06, "loss": 0.0796, "step": 20515 }, { "epoch": 67.2655737704918, "grad_norm": 3.2445929050445557, "learning_rate": 5.112919022847422e-06, "loss": 0.2533, "step": 20516 }, { "epoch": 67.26885245901639, "grad_norm": 2.500669002532959, "learning_rate": 5.111992609903898e-06, "loss": 0.0643, "step": 20517 }, { "epoch": 67.27213114754099, "grad_norm": 2.8724122047424316, "learning_rate": 5.111066252077739e-06, "loss": 0.2939, "step": 20518 }, { "epoch": 67.27540983606558, "grad_norm": 2.8837637901306152, "learning_rate": 5.1101399493793915e-06, "loss": 0.1215, "step": 20519 }, { "epoch": 67.27868852459017, "grad_norm": 3.1128852367401123, "learning_rate": 5.109213701819296e-06, "loss": 0.1543, "step": 20520 }, { "epoch": 67.28196721311475, "grad_norm": 2.631808280944824, "learning_rate": 5.108287509407905e-06, "loss": 0.1563, "step": 20521 }, { "epoch": 67.28524590163934, "grad_norm": 3.800947666168213, "learning_rate": 5.107361372155659e-06, "loss": 0.2279, "step": 20522 }, { "epoch": 67.28852459016393, "grad_norm": 3.229388475418091, "learning_rate": 5.106435290073e-06, "loss": 0.1288, "step": 20523 }, { "epoch": 67.29180327868852, "grad_norm": 2.8604931831359863, "learning_rate": 5.105509263170367e-06, "loss": 0.1392, "step": 20524 }, { "epoch": 67.29508196721312, "grad_norm": 3.8888888359069824, "learning_rate": 5.104583291458212e-06, "loss": 0.3307, "step": 20525 }, { "epoch": 67.29836065573771, "grad_norm": 2.9609360694885254, "learning_rate": 5.103657374946969e-06, "loss": 0.2315, "step": 20526 }, { "epoch": 67.3016393442623, "grad_norm": 1.7404805421829224, "learning_rate": 5.102731513647079e-06, "loss": 0.0426, "step": 20527 }, { "epoch": 67.30491803278689, "grad_norm": 2.564338207244873, "learning_rate": 5.1018057075689805e-06, "loss": 0.1508, "step": 20528 }, { "epoch": 67.30819672131148, "grad_norm": 3.5638537406921387, "learning_rate": 5.100879956723119e-06, "loss": 0.1767, "step": 20529 }, { "epoch": 67.31147540983606, "grad_norm": 3.0596911907196045, "learning_rate": 5.099954261119929e-06, "loss": 0.122, "step": 20530 }, { "epoch": 67.31475409836065, "grad_norm": 2.2860448360443115, "learning_rate": 5.09902862076985e-06, "loss": 0.1301, "step": 20531 }, { "epoch": 67.31803278688524, "grad_norm": 3.0906288623809814, "learning_rate": 5.09810303568332e-06, "loss": 0.1741, "step": 20532 }, { "epoch": 67.32131147540984, "grad_norm": 2.578254461288452, "learning_rate": 5.0971775058707695e-06, "loss": 0.1038, "step": 20533 }, { "epoch": 67.32459016393443, "grad_norm": 3.107494592666626, "learning_rate": 5.096252031342646e-06, "loss": 0.1366, "step": 20534 }, { "epoch": 67.32786885245902, "grad_norm": 3.6531617641448975, "learning_rate": 5.095326612109378e-06, "loss": 0.1574, "step": 20535 }, { "epoch": 67.33114754098361, "grad_norm": 2.63893985748291, "learning_rate": 5.094401248181399e-06, "loss": 0.151, "step": 20536 }, { "epoch": 67.3344262295082, "grad_norm": 3.141418933868408, "learning_rate": 5.09347593956915e-06, "loss": 0.1774, "step": 20537 }, { "epoch": 67.33770491803278, "grad_norm": 1.8642842769622803, "learning_rate": 5.092550686283061e-06, "loss": 0.0656, "step": 20538 }, { "epoch": 67.34098360655737, "grad_norm": 3.1129257678985596, "learning_rate": 5.091625488333561e-06, "loss": 0.1123, "step": 20539 }, { "epoch": 67.34426229508196, "grad_norm": 3.6954524517059326, "learning_rate": 5.090700345731092e-06, "loss": 0.1175, "step": 20540 }, { "epoch": 67.34754098360656, "grad_norm": 2.5879576206207275, "learning_rate": 5.089775258486081e-06, "loss": 0.1423, "step": 20541 }, { "epoch": 67.35081967213115, "grad_norm": 3.5184409618377686, "learning_rate": 5.088850226608954e-06, "loss": 0.0818, "step": 20542 }, { "epoch": 67.35409836065574, "grad_norm": 3.308140993118286, "learning_rate": 5.087925250110153e-06, "loss": 0.1791, "step": 20543 }, { "epoch": 67.35737704918033, "grad_norm": 4.070566654205322, "learning_rate": 5.087000329000101e-06, "loss": 0.1399, "step": 20544 }, { "epoch": 67.36065573770492, "grad_norm": 2.330017566680908, "learning_rate": 5.086075463289229e-06, "loss": 0.0941, "step": 20545 }, { "epoch": 67.3639344262295, "grad_norm": 17.019563674926758, "learning_rate": 5.085150652987962e-06, "loss": 0.1854, "step": 20546 }, { "epoch": 67.3672131147541, "grad_norm": 2.787027597427368, "learning_rate": 5.084225898106734e-06, "loss": 0.1, "step": 20547 }, { "epoch": 67.37049180327868, "grad_norm": 3.1060705184936523, "learning_rate": 5.08330119865597e-06, "loss": 0.0909, "step": 20548 }, { "epoch": 67.37377049180328, "grad_norm": 1.4514524936676025, "learning_rate": 5.082376554646098e-06, "loss": 0.1077, "step": 20549 }, { "epoch": 67.37704918032787, "grad_norm": 3.458117961883545, "learning_rate": 5.081451966087539e-06, "loss": 0.1454, "step": 20550 }, { "epoch": 67.38032786885246, "grad_norm": 2.9650144577026367, "learning_rate": 5.080527432990727e-06, "loss": 0.2828, "step": 20551 }, { "epoch": 67.38360655737705, "grad_norm": 2.50671648979187, "learning_rate": 5.0796029553660845e-06, "loss": 0.0568, "step": 20552 }, { "epoch": 67.38688524590164, "grad_norm": 3.114071846008301, "learning_rate": 5.078678533224033e-06, "loss": 0.1917, "step": 20553 }, { "epoch": 67.39016393442623, "grad_norm": 4.649930000305176, "learning_rate": 5.0777541665749955e-06, "loss": 0.2872, "step": 20554 }, { "epoch": 67.39344262295081, "grad_norm": 2.2964327335357666, "learning_rate": 5.076829855429399e-06, "loss": 0.1243, "step": 20555 }, { "epoch": 67.3967213114754, "grad_norm": 3.2869577407836914, "learning_rate": 5.075905599797668e-06, "loss": 0.191, "step": 20556 }, { "epoch": 67.4, "grad_norm": 3.3027801513671875, "learning_rate": 5.074981399690219e-06, "loss": 0.1068, "step": 20557 }, { "epoch": 67.4032786885246, "grad_norm": 1.5332226753234863, "learning_rate": 5.074057255117475e-06, "loss": 0.0225, "step": 20558 }, { "epoch": 67.40655737704918, "grad_norm": 3.5445594787597656, "learning_rate": 5.073133166089854e-06, "loss": 0.4035, "step": 20559 }, { "epoch": 67.40983606557377, "grad_norm": 3.937241792678833, "learning_rate": 5.072209132617784e-06, "loss": 0.1692, "step": 20560 }, { "epoch": 67.41311475409836, "grad_norm": 3.182650566101074, "learning_rate": 5.071285154711678e-06, "loss": 0.0956, "step": 20561 }, { "epoch": 67.41639344262295, "grad_norm": 2.8330538272857666, "learning_rate": 5.070361232381958e-06, "loss": 0.2345, "step": 20562 }, { "epoch": 67.41967213114754, "grad_norm": 3.558290719985962, "learning_rate": 5.069437365639036e-06, "loss": 0.1953, "step": 20563 }, { "epoch": 67.42295081967212, "grad_norm": 2.6500980854034424, "learning_rate": 5.068513554493339e-06, "loss": 0.1985, "step": 20564 }, { "epoch": 67.42622950819673, "grad_norm": 3.065788745880127, "learning_rate": 5.067589798955278e-06, "loss": 0.0875, "step": 20565 }, { "epoch": 67.42950819672132, "grad_norm": 3.095287561416626, "learning_rate": 5.066666099035271e-06, "loss": 0.1897, "step": 20566 }, { "epoch": 67.4327868852459, "grad_norm": 3.231290578842163, "learning_rate": 5.0657424547437285e-06, "loss": 0.0763, "step": 20567 }, { "epoch": 67.43606557377049, "grad_norm": 2.901962995529175, "learning_rate": 5.0648188660910745e-06, "loss": 0.118, "step": 20568 }, { "epoch": 67.43934426229508, "grad_norm": 3.550856590270996, "learning_rate": 5.063895333087719e-06, "loss": 0.3852, "step": 20569 }, { "epoch": 67.44262295081967, "grad_norm": 2.7346489429473877, "learning_rate": 5.062971855744077e-06, "loss": 0.0912, "step": 20570 }, { "epoch": 67.44590163934426, "grad_norm": 9.728199005126953, "learning_rate": 5.062048434070559e-06, "loss": 0.2471, "step": 20571 }, { "epoch": 67.44918032786886, "grad_norm": 3.4005496501922607, "learning_rate": 5.061125068077575e-06, "loss": 0.2893, "step": 20572 }, { "epoch": 67.45245901639345, "grad_norm": 7.314855098724365, "learning_rate": 5.0602017577755465e-06, "loss": 0.2317, "step": 20573 }, { "epoch": 67.45573770491804, "grad_norm": 3.135728597640991, "learning_rate": 5.059278503174878e-06, "loss": 0.1102, "step": 20574 }, { "epoch": 67.45901639344262, "grad_norm": 2.7022924423217773, "learning_rate": 5.058355304285982e-06, "loss": 0.1951, "step": 20575 }, { "epoch": 67.46229508196721, "grad_norm": 2.159862995147705, "learning_rate": 5.057432161119264e-06, "loss": 0.1052, "step": 20576 }, { "epoch": 67.4655737704918, "grad_norm": 2.076549768447876, "learning_rate": 5.05650907368514e-06, "loss": 0.0455, "step": 20577 }, { "epoch": 67.46885245901639, "grad_norm": 20.994699478149414, "learning_rate": 5.055586041994019e-06, "loss": 0.1687, "step": 20578 }, { "epoch": 67.47213114754098, "grad_norm": 2.5757784843444824, "learning_rate": 5.0546630660563045e-06, "loss": 0.2471, "step": 20579 }, { "epoch": 67.47540983606558, "grad_norm": 4.693925857543945, "learning_rate": 5.053740145882402e-06, "loss": 0.1361, "step": 20580 }, { "epoch": 67.47868852459017, "grad_norm": 10.579547882080078, "learning_rate": 5.052817281482726e-06, "loss": 0.1128, "step": 20581 }, { "epoch": 67.48196721311476, "grad_norm": 2.5719642639160156, "learning_rate": 5.051894472867679e-06, "loss": 0.0546, "step": 20582 }, { "epoch": 67.48524590163935, "grad_norm": 2.4698305130004883, "learning_rate": 5.050971720047666e-06, "loss": 0.1393, "step": 20583 }, { "epoch": 67.48852459016393, "grad_norm": 2.721123218536377, "learning_rate": 5.050049023033093e-06, "loss": 0.0775, "step": 20584 }, { "epoch": 67.49180327868852, "grad_norm": 2.582000494003296, "learning_rate": 5.049126381834361e-06, "loss": 0.3165, "step": 20585 }, { "epoch": 67.49508196721311, "grad_norm": 3.038825511932373, "learning_rate": 5.0482037964618795e-06, "loss": 0.1882, "step": 20586 }, { "epoch": 67.4983606557377, "grad_norm": 1.9067811965942383, "learning_rate": 5.047281266926049e-06, "loss": 0.0505, "step": 20587 }, { "epoch": 67.5016393442623, "grad_norm": 2.252448797225952, "learning_rate": 5.046358793237271e-06, "loss": 0.1747, "step": 20588 }, { "epoch": 67.50491803278689, "grad_norm": 4.091323375701904, "learning_rate": 5.045436375405945e-06, "loss": 0.1748, "step": 20589 }, { "epoch": 67.50819672131148, "grad_norm": 5.654068470001221, "learning_rate": 5.0445140134424795e-06, "loss": 0.0765, "step": 20590 }, { "epoch": 67.51147540983607, "grad_norm": 3.1619293689727783, "learning_rate": 5.043591707357271e-06, "loss": 0.1598, "step": 20591 }, { "epoch": 67.51475409836065, "grad_norm": 7.785762786865234, "learning_rate": 5.042669457160715e-06, "loss": 0.1101, "step": 20592 }, { "epoch": 67.51803278688524, "grad_norm": 3.2163448333740234, "learning_rate": 5.041747262863219e-06, "loss": 0.1186, "step": 20593 }, { "epoch": 67.52131147540983, "grad_norm": 4.8012542724609375, "learning_rate": 5.040825124475177e-06, "loss": 0.2373, "step": 20594 }, { "epoch": 67.52459016393442, "grad_norm": 3.284871816635132, "learning_rate": 5.039903042006986e-06, "loss": 0.2514, "step": 20595 }, { "epoch": 67.52786885245902, "grad_norm": 3.0893619060516357, "learning_rate": 5.0389810154690485e-06, "loss": 0.1059, "step": 20596 }, { "epoch": 67.53114754098361, "grad_norm": 3.496253490447998, "learning_rate": 5.038059044871758e-06, "loss": 0.1459, "step": 20597 }, { "epoch": 67.5344262295082, "grad_norm": 3.176926612854004, "learning_rate": 5.037137130225507e-06, "loss": 0.1685, "step": 20598 }, { "epoch": 67.53770491803279, "grad_norm": 2.7295000553131104, "learning_rate": 5.036215271540699e-06, "loss": 0.1208, "step": 20599 }, { "epoch": 67.54098360655738, "grad_norm": 3.0534095764160156, "learning_rate": 5.035293468827724e-06, "loss": 0.1794, "step": 20600 }, { "epoch": 67.54426229508196, "grad_norm": 3.2133679389953613, "learning_rate": 5.034371722096979e-06, "loss": 0.1809, "step": 20601 }, { "epoch": 67.54754098360655, "grad_norm": 3.451420545578003, "learning_rate": 5.0334500313588504e-06, "loss": 0.1519, "step": 20602 }, { "epoch": 67.55081967213114, "grad_norm": 2.9358091354370117, "learning_rate": 5.032528396623741e-06, "loss": 0.0954, "step": 20603 }, { "epoch": 67.55409836065574, "grad_norm": 3.5775504112243652, "learning_rate": 5.031606817902039e-06, "loss": 0.214, "step": 20604 }, { "epoch": 67.55737704918033, "grad_norm": 3.42897891998291, "learning_rate": 5.030685295204136e-06, "loss": 0.1295, "step": 20605 }, { "epoch": 67.56065573770492, "grad_norm": 4.423717975616455, "learning_rate": 5.029763828540419e-06, "loss": 0.2036, "step": 20606 }, { "epoch": 67.56393442622951, "grad_norm": 3.9707257747650146, "learning_rate": 5.028842417921287e-06, "loss": 0.1273, "step": 20607 }, { "epoch": 67.5672131147541, "grad_norm": 3.305737018585205, "learning_rate": 5.027921063357124e-06, "loss": 0.3351, "step": 20608 }, { "epoch": 67.57049180327868, "grad_norm": 4.351077079772949, "learning_rate": 5.026999764858322e-06, "loss": 0.1625, "step": 20609 }, { "epoch": 67.57377049180327, "grad_norm": 3.126649856567383, "learning_rate": 5.026078522435267e-06, "loss": 0.1717, "step": 20610 }, { "epoch": 67.57704918032788, "grad_norm": 2.79327654838562, "learning_rate": 5.025157336098346e-06, "loss": 0.2122, "step": 20611 }, { "epoch": 67.58032786885246, "grad_norm": 3.761254072189331, "learning_rate": 5.02423620585795e-06, "loss": 0.3708, "step": 20612 }, { "epoch": 67.58360655737705, "grad_norm": 3.020775318145752, "learning_rate": 5.023315131724466e-06, "loss": 0.1935, "step": 20613 }, { "epoch": 67.58688524590164, "grad_norm": 2.666330337524414, "learning_rate": 5.02239411370828e-06, "loss": 0.0603, "step": 20614 }, { "epoch": 67.59016393442623, "grad_norm": 2.557269811630249, "learning_rate": 5.021473151819769e-06, "loss": 0.0908, "step": 20615 }, { "epoch": 67.59344262295082, "grad_norm": 3.476137638092041, "learning_rate": 5.02055224606933e-06, "loss": 0.2815, "step": 20616 }, { "epoch": 67.5967213114754, "grad_norm": 2.9717111587524414, "learning_rate": 5.019631396467341e-06, "loss": 0.2147, "step": 20617 }, { "epoch": 67.6, "grad_norm": 2.5988996028900146, "learning_rate": 5.018710603024187e-06, "loss": 0.1384, "step": 20618 }, { "epoch": 67.6032786885246, "grad_norm": 2.4338064193725586, "learning_rate": 5.017789865750246e-06, "loss": 0.142, "step": 20619 }, { "epoch": 67.60655737704919, "grad_norm": 2.706413507461548, "learning_rate": 5.016869184655908e-06, "loss": 0.0935, "step": 20620 }, { "epoch": 67.60983606557377, "grad_norm": 2.193765163421631, "learning_rate": 5.015948559751551e-06, "loss": 0.0723, "step": 20621 }, { "epoch": 67.61311475409836, "grad_norm": 3.7260658740997314, "learning_rate": 5.015027991047557e-06, "loss": 0.146, "step": 20622 }, { "epoch": 67.61639344262295, "grad_norm": 2.920982599258423, "learning_rate": 5.014107478554305e-06, "loss": 0.1163, "step": 20623 }, { "epoch": 67.61967213114754, "grad_norm": 2.2597155570983887, "learning_rate": 5.013187022282171e-06, "loss": 0.1352, "step": 20624 }, { "epoch": 67.62295081967213, "grad_norm": 3.3460328578948975, "learning_rate": 5.012266622241544e-06, "loss": 0.2744, "step": 20625 }, { "epoch": 67.62622950819672, "grad_norm": 3.176717519760132, "learning_rate": 5.011346278442794e-06, "loss": 0.1657, "step": 20626 }, { "epoch": 67.62950819672132, "grad_norm": 4.174103260040283, "learning_rate": 5.010425990896304e-06, "loss": 0.1089, "step": 20627 }, { "epoch": 67.6327868852459, "grad_norm": 3.98862361907959, "learning_rate": 5.009505759612443e-06, "loss": 0.1917, "step": 20628 }, { "epoch": 67.6360655737705, "grad_norm": 1.9213544130325317, "learning_rate": 5.008585584601598e-06, "loss": 0.0367, "step": 20629 }, { "epoch": 67.63934426229508, "grad_norm": 2.4930579662323, "learning_rate": 5.0076654658741405e-06, "loss": 0.0907, "step": 20630 }, { "epoch": 67.64262295081967, "grad_norm": 4.090193271636963, "learning_rate": 5.006745403440445e-06, "loss": 0.2394, "step": 20631 }, { "epoch": 67.64590163934426, "grad_norm": 3.143249034881592, "learning_rate": 5.005825397310884e-06, "loss": 0.1635, "step": 20632 }, { "epoch": 67.64918032786885, "grad_norm": 3.2010557651519775, "learning_rate": 5.004905447495838e-06, "loss": 0.0673, "step": 20633 }, { "epoch": 67.65245901639344, "grad_norm": 8.182212829589844, "learning_rate": 5.003985554005676e-06, "loss": 0.3747, "step": 20634 }, { "epoch": 67.65573770491804, "grad_norm": 3.506253242492676, "learning_rate": 5.003065716850771e-06, "loss": 0.2108, "step": 20635 }, { "epoch": 67.65901639344263, "grad_norm": 3.311934232711792, "learning_rate": 5.002145936041496e-06, "loss": 0.2131, "step": 20636 }, { "epoch": 67.66229508196722, "grad_norm": 3.361760139465332, "learning_rate": 5.0012262115882195e-06, "loss": 0.2424, "step": 20637 }, { "epoch": 67.6655737704918, "grad_norm": 2.546128034591675, "learning_rate": 5.000306543501316e-06, "loss": 0.2092, "step": 20638 }, { "epoch": 67.66885245901639, "grad_norm": 3.3465821743011475, "learning_rate": 4.999386931791157e-06, "loss": 0.1755, "step": 20639 }, { "epoch": 67.67213114754098, "grad_norm": 2.4367573261260986, "learning_rate": 4.998467376468109e-06, "loss": 0.1399, "step": 20640 }, { "epoch": 67.67540983606557, "grad_norm": 2.1978304386138916, "learning_rate": 4.997547877542538e-06, "loss": 0.0765, "step": 20641 }, { "epoch": 67.67868852459016, "grad_norm": 3.0772340297698975, "learning_rate": 4.996628435024819e-06, "loss": 0.1651, "step": 20642 }, { "epoch": 67.68196721311476, "grad_norm": 2.74357271194458, "learning_rate": 4.995709048925317e-06, "loss": 0.1521, "step": 20643 }, { "epoch": 67.68524590163935, "grad_norm": 2.937086820602417, "learning_rate": 4.994789719254395e-06, "loss": 0.1121, "step": 20644 }, { "epoch": 67.68852459016394, "grad_norm": 2.9296934604644775, "learning_rate": 4.993870446022426e-06, "loss": 0.1806, "step": 20645 }, { "epoch": 67.69180327868852, "grad_norm": 2.4870433807373047, "learning_rate": 4.992951229239774e-06, "loss": 0.0572, "step": 20646 }, { "epoch": 67.69508196721311, "grad_norm": 2.437614917755127, "learning_rate": 4.992032068916802e-06, "loss": 0.0559, "step": 20647 }, { "epoch": 67.6983606557377, "grad_norm": 3.6538798809051514, "learning_rate": 4.991112965063872e-06, "loss": 0.2448, "step": 20648 }, { "epoch": 67.70163934426229, "grad_norm": 3.060397148132324, "learning_rate": 4.990193917691355e-06, "loss": 0.1726, "step": 20649 }, { "epoch": 67.70491803278688, "grad_norm": 4.137447834014893, "learning_rate": 4.989274926809611e-06, "loss": 0.2942, "step": 20650 }, { "epoch": 67.70819672131148, "grad_norm": 3.9309020042419434, "learning_rate": 4.988355992428997e-06, "loss": 0.1172, "step": 20651 }, { "epoch": 67.71147540983607, "grad_norm": 5.152975559234619, "learning_rate": 4.987437114559885e-06, "loss": 0.2188, "step": 20652 }, { "epoch": 67.71475409836066, "grad_norm": 2.4509663581848145, "learning_rate": 4.986518293212631e-06, "loss": 0.132, "step": 20653 }, { "epoch": 67.71803278688525, "grad_norm": 2.5515334606170654, "learning_rate": 4.985599528397592e-06, "loss": 0.2827, "step": 20654 }, { "epoch": 67.72131147540983, "grad_norm": 3.806964159011841, "learning_rate": 4.984680820125135e-06, "loss": 0.2961, "step": 20655 }, { "epoch": 67.72459016393442, "grad_norm": 2.187326192855835, "learning_rate": 4.983762168405618e-06, "loss": 0.1708, "step": 20656 }, { "epoch": 67.72786885245901, "grad_norm": 3.358776569366455, "learning_rate": 4.982843573249397e-06, "loss": 0.3557, "step": 20657 }, { "epoch": 67.73114754098361, "grad_norm": 3.4328227043151855, "learning_rate": 4.981925034666828e-06, "loss": 0.2, "step": 20658 }, { "epoch": 67.7344262295082, "grad_norm": 2.9566073417663574, "learning_rate": 4.9810065526682746e-06, "loss": 0.1577, "step": 20659 }, { "epoch": 67.73770491803279, "grad_norm": 2.389045476913452, "learning_rate": 4.980088127264092e-06, "loss": 0.1357, "step": 20660 }, { "epoch": 67.74098360655738, "grad_norm": 2.491313934326172, "learning_rate": 4.979169758464635e-06, "loss": 0.0689, "step": 20661 }, { "epoch": 67.74426229508197, "grad_norm": 3.870708703994751, "learning_rate": 4.9782514462802575e-06, "loss": 0.192, "step": 20662 }, { "epoch": 67.74754098360656, "grad_norm": 6.270437717437744, "learning_rate": 4.9773331907213156e-06, "loss": 0.1462, "step": 20663 }, { "epoch": 67.75081967213114, "grad_norm": 2.9220075607299805, "learning_rate": 4.976414991798165e-06, "loss": 0.1938, "step": 20664 }, { "epoch": 67.75409836065573, "grad_norm": 3.070474624633789, "learning_rate": 4.97549684952116e-06, "loss": 0.1698, "step": 20665 }, { "epoch": 67.75737704918033, "grad_norm": 2.818537473678589, "learning_rate": 4.974578763900653e-06, "loss": 0.1674, "step": 20666 }, { "epoch": 67.76065573770492, "grad_norm": 3.192586660385132, "learning_rate": 4.97366073494699e-06, "loss": 0.1725, "step": 20667 }, { "epoch": 67.76393442622951, "grad_norm": 3.3859150409698486, "learning_rate": 4.972742762670533e-06, "loss": 0.1041, "step": 20668 }, { "epoch": 67.7672131147541, "grad_norm": 2.5614781379699707, "learning_rate": 4.971824847081629e-06, "loss": 0.3557, "step": 20669 }, { "epoch": 67.77049180327869, "grad_norm": 3.301379442214966, "learning_rate": 4.970906988190627e-06, "loss": 0.3039, "step": 20670 }, { "epoch": 67.77377049180328, "grad_norm": 2.771498918533325, "learning_rate": 4.969989186007874e-06, "loss": 0.1501, "step": 20671 }, { "epoch": 67.77704918032786, "grad_norm": 3.490117311477661, "learning_rate": 4.969071440543727e-06, "loss": 0.2197, "step": 20672 }, { "epoch": 67.78032786885245, "grad_norm": 2.536555528640747, "learning_rate": 4.96815375180853e-06, "loss": 0.1426, "step": 20673 }, { "epoch": 67.78360655737706, "grad_norm": 2.927553653717041, "learning_rate": 4.967236119812631e-06, "loss": 0.2259, "step": 20674 }, { "epoch": 67.78688524590164, "grad_norm": 3.098968982696533, "learning_rate": 4.966318544566378e-06, "loss": 0.2142, "step": 20675 }, { "epoch": 67.79016393442623, "grad_norm": 2.269453525543213, "learning_rate": 4.9654010260801124e-06, "loss": 0.034, "step": 20676 }, { "epoch": 67.79344262295082, "grad_norm": 2.252305746078491, "learning_rate": 4.96448356436419e-06, "loss": 0.0926, "step": 20677 }, { "epoch": 67.79672131147541, "grad_norm": 3.079918622970581, "learning_rate": 4.963566159428949e-06, "loss": 0.1946, "step": 20678 }, { "epoch": 67.8, "grad_norm": 2.1986637115478516, "learning_rate": 4.9626488112847384e-06, "loss": 0.1329, "step": 20679 }, { "epoch": 67.80327868852459, "grad_norm": 2.7078163623809814, "learning_rate": 4.9617315199418955e-06, "loss": 0.1373, "step": 20680 }, { "epoch": 67.80655737704917, "grad_norm": 3.7986080646514893, "learning_rate": 4.960814285410772e-06, "loss": 0.1238, "step": 20681 }, { "epoch": 67.80983606557378, "grad_norm": 3.1639444828033447, "learning_rate": 4.959897107701707e-06, "loss": 0.0973, "step": 20682 }, { "epoch": 67.81311475409836, "grad_norm": 2.548588275909424, "learning_rate": 4.958979986825042e-06, "loss": 0.1641, "step": 20683 }, { "epoch": 67.81639344262295, "grad_norm": 3.1497786045074463, "learning_rate": 4.958062922791115e-06, "loss": 0.1835, "step": 20684 }, { "epoch": 67.81967213114754, "grad_norm": 2.188969135284424, "learning_rate": 4.9571459156102755e-06, "loss": 0.0698, "step": 20685 }, { "epoch": 67.82295081967213, "grad_norm": 3.003873586654663, "learning_rate": 4.956228965292858e-06, "loss": 0.2931, "step": 20686 }, { "epoch": 67.82622950819672, "grad_norm": 3.528195381164551, "learning_rate": 4.955312071849204e-06, "loss": 0.2492, "step": 20687 }, { "epoch": 67.8295081967213, "grad_norm": 2.8653674125671387, "learning_rate": 4.9543952352896515e-06, "loss": 0.0605, "step": 20688 }, { "epoch": 67.8327868852459, "grad_norm": 2.685250997543335, "learning_rate": 4.9534784556245356e-06, "loss": 0.2259, "step": 20689 }, { "epoch": 67.8360655737705, "grad_norm": 2.6436984539031982, "learning_rate": 4.952561732864199e-06, "loss": 0.183, "step": 20690 }, { "epoch": 67.83934426229509, "grad_norm": 2.393050193786621, "learning_rate": 4.951645067018979e-06, "loss": 0.1586, "step": 20691 }, { "epoch": 67.84262295081967, "grad_norm": 2.778331995010376, "learning_rate": 4.950728458099209e-06, "loss": 0.0799, "step": 20692 }, { "epoch": 67.84590163934426, "grad_norm": 3.275656223297119, "learning_rate": 4.949811906115221e-06, "loss": 0.2594, "step": 20693 }, { "epoch": 67.84918032786885, "grad_norm": 3.2187154293060303, "learning_rate": 4.948895411077359e-06, "loss": 0.1889, "step": 20694 }, { "epoch": 67.85245901639344, "grad_norm": 3.703556537628174, "learning_rate": 4.9479789729959535e-06, "loss": 0.1532, "step": 20695 }, { "epoch": 67.85573770491803, "grad_norm": 2.0575485229492188, "learning_rate": 4.947062591881338e-06, "loss": 0.2159, "step": 20696 }, { "epoch": 67.85901639344263, "grad_norm": 4.102107048034668, "learning_rate": 4.946146267743841e-06, "loss": 0.255, "step": 20697 }, { "epoch": 67.86229508196722, "grad_norm": 3.5345840454101562, "learning_rate": 4.945230000593804e-06, "loss": 0.2679, "step": 20698 }, { "epoch": 67.8655737704918, "grad_norm": 3.219554901123047, "learning_rate": 4.944313790441554e-06, "loss": 0.1237, "step": 20699 }, { "epoch": 67.8688524590164, "grad_norm": 3.115952253341675, "learning_rate": 4.943397637297418e-06, "loss": 0.1665, "step": 20700 }, { "epoch": 67.87213114754098, "grad_norm": 3.2032673358917236, "learning_rate": 4.942481541171736e-06, "loss": 0.2691, "step": 20701 }, { "epoch": 67.87540983606557, "grad_norm": 2.3356070518493652, "learning_rate": 4.9415655020748335e-06, "loss": 0.063, "step": 20702 }, { "epoch": 67.87868852459016, "grad_norm": 2.891813039779663, "learning_rate": 4.9406495200170345e-06, "loss": 0.1481, "step": 20703 }, { "epoch": 67.88196721311475, "grad_norm": 2.497368812561035, "learning_rate": 4.9397335950086765e-06, "loss": 0.0915, "step": 20704 }, { "epoch": 67.88524590163935, "grad_norm": 2.4292244911193848, "learning_rate": 4.938817727060085e-06, "loss": 0.1072, "step": 20705 }, { "epoch": 67.88852459016394, "grad_norm": 3.3781979084014893, "learning_rate": 4.937901916181581e-06, "loss": 0.2056, "step": 20706 }, { "epoch": 67.89180327868853, "grad_norm": 3.033102512359619, "learning_rate": 4.936986162383499e-06, "loss": 0.2487, "step": 20707 }, { "epoch": 67.89508196721312, "grad_norm": 5.5686492919921875, "learning_rate": 4.9360704656761635e-06, "loss": 0.2483, "step": 20708 }, { "epoch": 67.8983606557377, "grad_norm": 2.7171342372894287, "learning_rate": 4.935154826069899e-06, "loss": 0.203, "step": 20709 }, { "epoch": 67.90163934426229, "grad_norm": 3.4436423778533936, "learning_rate": 4.9342392435750255e-06, "loss": 0.161, "step": 20710 }, { "epoch": 67.90491803278688, "grad_norm": 2.705918073654175, "learning_rate": 4.933323718201876e-06, "loss": 0.1247, "step": 20711 }, { "epoch": 67.90819672131147, "grad_norm": 2.817873239517212, "learning_rate": 4.9324082499607685e-06, "loss": 0.2546, "step": 20712 }, { "epoch": 67.91147540983607, "grad_norm": 3.0420143604278564, "learning_rate": 4.931492838862028e-06, "loss": 0.1668, "step": 20713 }, { "epoch": 67.91475409836066, "grad_norm": 3.384173631668091, "learning_rate": 4.9305774849159746e-06, "loss": 0.2322, "step": 20714 }, { "epoch": 67.91803278688525, "grad_norm": 2.747426986694336, "learning_rate": 4.929662188132928e-06, "loss": 0.1663, "step": 20715 }, { "epoch": 67.92131147540984, "grad_norm": 3.2183141708374023, "learning_rate": 4.928746948523215e-06, "loss": 0.1797, "step": 20716 }, { "epoch": 67.92459016393443, "grad_norm": 3.07582426071167, "learning_rate": 4.9278317660971546e-06, "loss": 0.1085, "step": 20717 }, { "epoch": 67.92786885245901, "grad_norm": 2.2751307487487793, "learning_rate": 4.926916640865063e-06, "loss": 0.0623, "step": 20718 }, { "epoch": 67.9311475409836, "grad_norm": 2.3972034454345703, "learning_rate": 4.926001572837259e-06, "loss": 0.1489, "step": 20719 }, { "epoch": 67.93442622950819, "grad_norm": 2.094205141067505, "learning_rate": 4.925086562024065e-06, "loss": 0.1317, "step": 20720 }, { "epoch": 67.9377049180328, "grad_norm": 3.1136727333068848, "learning_rate": 4.924171608435797e-06, "loss": 0.1149, "step": 20721 }, { "epoch": 67.94098360655738, "grad_norm": 2.149352550506592, "learning_rate": 4.9232567120827725e-06, "loss": 0.0859, "step": 20722 }, { "epoch": 67.94426229508197, "grad_norm": 2.837006092071533, "learning_rate": 4.922341872975302e-06, "loss": 0.0787, "step": 20723 }, { "epoch": 67.94754098360656, "grad_norm": 2.9183199405670166, "learning_rate": 4.92142709112371e-06, "loss": 0.1504, "step": 20724 }, { "epoch": 67.95081967213115, "grad_norm": 3.854382276535034, "learning_rate": 4.9205123665383105e-06, "loss": 0.2273, "step": 20725 }, { "epoch": 67.95409836065573, "grad_norm": 3.480308771133423, "learning_rate": 4.919597699229413e-06, "loss": 0.3821, "step": 20726 }, { "epoch": 67.95737704918032, "grad_norm": 3.3801777362823486, "learning_rate": 4.918683089207334e-06, "loss": 0.1785, "step": 20727 }, { "epoch": 67.96065573770491, "grad_norm": 2.3302934169769287, "learning_rate": 4.9177685364823835e-06, "loss": 0.0927, "step": 20728 }, { "epoch": 67.96393442622951, "grad_norm": 3.002561092376709, "learning_rate": 4.91685404106488e-06, "loss": 0.116, "step": 20729 }, { "epoch": 67.9672131147541, "grad_norm": 2.432497024536133, "learning_rate": 4.9159396029651315e-06, "loss": 0.0842, "step": 20730 }, { "epoch": 67.97049180327869, "grad_norm": 3.1677844524383545, "learning_rate": 4.915025222193453e-06, "loss": 0.1444, "step": 20731 }, { "epoch": 67.97377049180328, "grad_norm": 3.2850465774536133, "learning_rate": 4.914110898760145e-06, "loss": 0.1471, "step": 20732 }, { "epoch": 67.97704918032787, "grad_norm": 2.6913681030273438, "learning_rate": 4.913196632675529e-06, "loss": 0.0897, "step": 20733 }, { "epoch": 67.98032786885246, "grad_norm": 2.518526077270508, "learning_rate": 4.9122824239499126e-06, "loss": 0.0571, "step": 20734 }, { "epoch": 67.98360655737704, "grad_norm": 2.5222008228302, "learning_rate": 4.911368272593599e-06, "loss": 0.1737, "step": 20735 }, { "epoch": 67.98688524590163, "grad_norm": 2.394517421722412, "learning_rate": 4.910454178616897e-06, "loss": 0.0577, "step": 20736 }, { "epoch": 67.99016393442623, "grad_norm": 3.093332290649414, "learning_rate": 4.909540142030118e-06, "loss": 0.1617, "step": 20737 }, { "epoch": 67.99344262295082, "grad_norm": 3.2005631923675537, "learning_rate": 4.908626162843568e-06, "loss": 0.2107, "step": 20738 }, { "epoch": 67.99672131147541, "grad_norm": 2.1880617141723633, "learning_rate": 4.907712241067551e-06, "loss": 0.0496, "step": 20739 }, { "epoch": 68.0, "grad_norm": 2.733402729034424, "learning_rate": 4.9067983767123736e-06, "loss": 0.117, "step": 20740 }, { "epoch": 68.00327868852459, "grad_norm": 3.0508172512054443, "learning_rate": 4.905884569788336e-06, "loss": 0.1745, "step": 20741 }, { "epoch": 68.00655737704918, "grad_norm": 2.8350980281829834, "learning_rate": 4.90497082030575e-06, "loss": 0.1835, "step": 20742 }, { "epoch": 68.00983606557377, "grad_norm": 3.9319090843200684, "learning_rate": 4.904057128274916e-06, "loss": 0.1634, "step": 20743 }, { "epoch": 68.01311475409837, "grad_norm": 2.0741453170776367, "learning_rate": 4.9031434937061364e-06, "loss": 0.0806, "step": 20744 }, { "epoch": 68.01639344262296, "grad_norm": 3.920654296875, "learning_rate": 4.9022299166097095e-06, "loss": 0.1321, "step": 20745 }, { "epoch": 68.01967213114754, "grad_norm": 3.295380115509033, "learning_rate": 4.9013163969959445e-06, "loss": 0.2152, "step": 20746 }, { "epoch": 68.02295081967213, "grad_norm": 2.552438497543335, "learning_rate": 4.900402934875138e-06, "loss": 0.0886, "step": 20747 }, { "epoch": 68.02622950819672, "grad_norm": 2.7083957195281982, "learning_rate": 4.8994895302575905e-06, "loss": 0.2703, "step": 20748 }, { "epoch": 68.02950819672131, "grad_norm": 3.1456522941589355, "learning_rate": 4.898576183153598e-06, "loss": 0.4249, "step": 20749 }, { "epoch": 68.0327868852459, "grad_norm": 2.049774169921875, "learning_rate": 4.897662893573467e-06, "loss": 0.0755, "step": 20750 }, { "epoch": 68.03606557377049, "grad_norm": 2.338376760482788, "learning_rate": 4.896749661527492e-06, "loss": 0.2416, "step": 20751 }, { "epoch": 68.03934426229509, "grad_norm": 4.589575290679932, "learning_rate": 4.895836487025971e-06, "loss": 0.269, "step": 20752 }, { "epoch": 68.04262295081968, "grad_norm": 3.311847686767578, "learning_rate": 4.894923370079198e-06, "loss": 0.2543, "step": 20753 }, { "epoch": 68.04590163934427, "grad_norm": 2.912611484527588, "learning_rate": 4.894010310697474e-06, "loss": 0.1222, "step": 20754 }, { "epoch": 68.04918032786885, "grad_norm": 2.494068145751953, "learning_rate": 4.893097308891093e-06, "loss": 0.2299, "step": 20755 }, { "epoch": 68.05245901639344, "grad_norm": 3.6249189376831055, "learning_rate": 4.892184364670345e-06, "loss": 0.2977, "step": 20756 }, { "epoch": 68.05573770491803, "grad_norm": 2.0615346431732178, "learning_rate": 4.891271478045534e-06, "loss": 0.1281, "step": 20757 }, { "epoch": 68.05901639344262, "grad_norm": 2.276883602142334, "learning_rate": 4.890358649026948e-06, "loss": 0.1692, "step": 20758 }, { "epoch": 68.0622950819672, "grad_norm": 2.4749624729156494, "learning_rate": 4.889445877624879e-06, "loss": 0.0615, "step": 20759 }, { "epoch": 68.06557377049181, "grad_norm": 2.921907901763916, "learning_rate": 4.888533163849624e-06, "loss": 0.1147, "step": 20760 }, { "epoch": 68.0688524590164, "grad_norm": 2.171827793121338, "learning_rate": 4.887620507711472e-06, "loss": 0.1313, "step": 20761 }, { "epoch": 68.07213114754099, "grad_norm": 2.6161372661590576, "learning_rate": 4.8867079092207105e-06, "loss": 0.1144, "step": 20762 }, { "epoch": 68.07540983606557, "grad_norm": 3.1553900241851807, "learning_rate": 4.885795368387637e-06, "loss": 0.1904, "step": 20763 }, { "epoch": 68.07868852459016, "grad_norm": 2.2416372299194336, "learning_rate": 4.884882885222539e-06, "loss": 0.0557, "step": 20764 }, { "epoch": 68.08196721311475, "grad_norm": 2.8688390254974365, "learning_rate": 4.8839704597357055e-06, "loss": 0.1398, "step": 20765 }, { "epoch": 68.08524590163934, "grad_norm": 2.997514009475708, "learning_rate": 4.883058091937424e-06, "loss": 0.1087, "step": 20766 }, { "epoch": 68.08852459016393, "grad_norm": 2.322666883468628, "learning_rate": 4.882145781837978e-06, "loss": 0.0746, "step": 20767 }, { "epoch": 68.09180327868853, "grad_norm": 2.258910655975342, "learning_rate": 4.8812335294476645e-06, "loss": 0.0733, "step": 20768 }, { "epoch": 68.09508196721312, "grad_norm": 2.4388155937194824, "learning_rate": 4.880321334776764e-06, "loss": 0.0897, "step": 20769 }, { "epoch": 68.09836065573771, "grad_norm": 3.135840654373169, "learning_rate": 4.879409197835564e-06, "loss": 0.1232, "step": 20770 }, { "epoch": 68.1016393442623, "grad_norm": 3.1726040840148926, "learning_rate": 4.878497118634345e-06, "loss": 0.0824, "step": 20771 }, { "epoch": 68.10491803278688, "grad_norm": 3.060197114944458, "learning_rate": 4.877585097183401e-06, "loss": 0.1658, "step": 20772 }, { "epoch": 68.10819672131147, "grad_norm": 2.931813955307007, "learning_rate": 4.876673133493011e-06, "loss": 0.0762, "step": 20773 }, { "epoch": 68.11147540983606, "grad_norm": 2.395235776901245, "learning_rate": 4.8757612275734574e-06, "loss": 0.064, "step": 20774 }, { "epoch": 68.11475409836065, "grad_norm": 3.114398717880249, "learning_rate": 4.874849379435022e-06, "loss": 0.0809, "step": 20775 }, { "epoch": 68.11803278688525, "grad_norm": 2.7148921489715576, "learning_rate": 4.8739375890879905e-06, "loss": 0.0518, "step": 20776 }, { "epoch": 68.12131147540984, "grad_norm": 3.397982597351074, "learning_rate": 4.873025856542643e-06, "loss": 0.2269, "step": 20777 }, { "epoch": 68.12459016393443, "grad_norm": 2.962280511856079, "learning_rate": 4.8721141818092595e-06, "loss": 0.2126, "step": 20778 }, { "epoch": 68.12786885245902, "grad_norm": 3.005415916442871, "learning_rate": 4.871202564898121e-06, "loss": 0.1495, "step": 20779 }, { "epoch": 68.1311475409836, "grad_norm": 2.620821237564087, "learning_rate": 4.870291005819502e-06, "loss": 0.2365, "step": 20780 }, { "epoch": 68.1344262295082, "grad_norm": 2.6318812370300293, "learning_rate": 4.869379504583689e-06, "loss": 0.0783, "step": 20781 }, { "epoch": 68.13770491803278, "grad_norm": 3.2325525283813477, "learning_rate": 4.868468061200956e-06, "loss": 0.2063, "step": 20782 }, { "epoch": 68.14098360655737, "grad_norm": 2.9756083488464355, "learning_rate": 4.8675566756815814e-06, "loss": 0.2014, "step": 20783 }, { "epoch": 68.14426229508197, "grad_norm": 2.2906947135925293, "learning_rate": 4.866645348035839e-06, "loss": 0.1016, "step": 20784 }, { "epoch": 68.14754098360656, "grad_norm": 2.52359676361084, "learning_rate": 4.86573407827401e-06, "loss": 0.0975, "step": 20785 }, { "epoch": 68.15081967213115, "grad_norm": 3.4352619647979736, "learning_rate": 4.864822866406369e-06, "loss": 0.1914, "step": 20786 }, { "epoch": 68.15409836065574, "grad_norm": 2.855908155441284, "learning_rate": 4.863911712443189e-06, "loss": 0.1109, "step": 20787 }, { "epoch": 68.15737704918033, "grad_norm": 2.697270154953003, "learning_rate": 4.86300061639474e-06, "loss": 0.1024, "step": 20788 }, { "epoch": 68.16065573770491, "grad_norm": 3.4811315536499023, "learning_rate": 4.862089578271305e-06, "loss": 0.2409, "step": 20789 }, { "epoch": 68.1639344262295, "grad_norm": 2.6880099773406982, "learning_rate": 4.861178598083151e-06, "loss": 0.0714, "step": 20790 }, { "epoch": 68.1672131147541, "grad_norm": 3.336285352706909, "learning_rate": 4.860267675840552e-06, "loss": 0.249, "step": 20791 }, { "epoch": 68.1704918032787, "grad_norm": 2.222937822341919, "learning_rate": 4.859356811553779e-06, "loss": 0.0597, "step": 20792 }, { "epoch": 68.17377049180328, "grad_norm": 2.961735248565674, "learning_rate": 4.8584460052331e-06, "loss": 0.1334, "step": 20793 }, { "epoch": 68.17704918032787, "grad_norm": 2.3680403232574463, "learning_rate": 4.8575352568887905e-06, "loss": 0.067, "step": 20794 }, { "epoch": 68.18032786885246, "grad_norm": 2.468622922897339, "learning_rate": 4.856624566531117e-06, "loss": 0.2237, "step": 20795 }, { "epoch": 68.18360655737705, "grad_norm": 2.753739833831787, "learning_rate": 4.855713934170351e-06, "loss": 0.1156, "step": 20796 }, { "epoch": 68.18688524590164, "grad_norm": 3.1705989837646484, "learning_rate": 4.8548033598167554e-06, "loss": 0.1117, "step": 20797 }, { "epoch": 68.19016393442622, "grad_norm": 2.6825263500213623, "learning_rate": 4.853892843480605e-06, "loss": 0.2186, "step": 20798 }, { "epoch": 68.19344262295083, "grad_norm": 3.089888572692871, "learning_rate": 4.852982385172163e-06, "loss": 0.1625, "step": 20799 }, { "epoch": 68.19672131147541, "grad_norm": 2.7176334857940674, "learning_rate": 4.852071984901696e-06, "loss": 0.1416, "step": 20800 }, { "epoch": 68.2, "grad_norm": 2.087963104248047, "learning_rate": 4.851161642679466e-06, "loss": 0.061, "step": 20801 }, { "epoch": 68.20327868852459, "grad_norm": 2.9290354251861572, "learning_rate": 4.850251358515746e-06, "loss": 0.2921, "step": 20802 }, { "epoch": 68.20655737704918, "grad_norm": 3.3939929008483887, "learning_rate": 4.8493411324207975e-06, "loss": 0.1245, "step": 20803 }, { "epoch": 68.20983606557377, "grad_norm": 2.6818692684173584, "learning_rate": 4.848430964404882e-06, "loss": 0.1037, "step": 20804 }, { "epoch": 68.21311475409836, "grad_norm": 2.5224452018737793, "learning_rate": 4.84752085447826e-06, "loss": 0.1076, "step": 20805 }, { "epoch": 68.21639344262294, "grad_norm": 2.5322372913360596, "learning_rate": 4.846610802651202e-06, "loss": 0.1371, "step": 20806 }, { "epoch": 68.21967213114755, "grad_norm": 2.7946105003356934, "learning_rate": 4.845700808933964e-06, "loss": 0.1093, "step": 20807 }, { "epoch": 68.22295081967214, "grad_norm": 2.6707231998443604, "learning_rate": 4.844790873336806e-06, "loss": 0.1902, "step": 20808 }, { "epoch": 68.22622950819672, "grad_norm": 2.434739828109741, "learning_rate": 4.8438809958699936e-06, "loss": 0.0544, "step": 20809 }, { "epoch": 68.22950819672131, "grad_norm": 3.202714204788208, "learning_rate": 4.842971176543785e-06, "loss": 0.2309, "step": 20810 }, { "epoch": 68.2327868852459, "grad_norm": 2.7936439514160156, "learning_rate": 4.842061415368437e-06, "loss": 0.1079, "step": 20811 }, { "epoch": 68.23606557377049, "grad_norm": 2.7837040424346924, "learning_rate": 4.841151712354208e-06, "loss": 0.075, "step": 20812 }, { "epoch": 68.23934426229508, "grad_norm": 2.169340133666992, "learning_rate": 4.840242067511359e-06, "loss": 0.0583, "step": 20813 }, { "epoch": 68.24262295081967, "grad_norm": 2.4352259635925293, "learning_rate": 4.839332480850146e-06, "loss": 0.3583, "step": 20814 }, { "epoch": 68.24590163934427, "grad_norm": 5.321971416473389, "learning_rate": 4.838422952380821e-06, "loss": 0.149, "step": 20815 }, { "epoch": 68.24918032786886, "grad_norm": 2.3638668060302734, "learning_rate": 4.837513482113648e-06, "loss": 0.1184, "step": 20816 }, { "epoch": 68.25245901639344, "grad_norm": 2.3929977416992188, "learning_rate": 4.836604070058879e-06, "loss": 0.0797, "step": 20817 }, { "epoch": 68.25573770491803, "grad_norm": 5.188054084777832, "learning_rate": 4.835694716226767e-06, "loss": 0.2518, "step": 20818 }, { "epoch": 68.25901639344262, "grad_norm": 2.979398488998413, "learning_rate": 4.834785420627562e-06, "loss": 0.1927, "step": 20819 }, { "epoch": 68.26229508196721, "grad_norm": 2.901658773422241, "learning_rate": 4.8338761832715275e-06, "loss": 0.1461, "step": 20820 }, { "epoch": 68.2655737704918, "grad_norm": 2.526695966720581, "learning_rate": 4.832967004168909e-06, "loss": 0.1527, "step": 20821 }, { "epoch": 68.26885245901639, "grad_norm": 3.808004856109619, "learning_rate": 4.8320578833299605e-06, "loss": 0.1906, "step": 20822 }, { "epoch": 68.27213114754099, "grad_norm": 3.117119789123535, "learning_rate": 4.831148820764928e-06, "loss": 0.1021, "step": 20823 }, { "epoch": 68.27540983606558, "grad_norm": 3.135223627090454, "learning_rate": 4.830239816484071e-06, "loss": 0.1702, "step": 20824 }, { "epoch": 68.27868852459017, "grad_norm": 2.846132278442383, "learning_rate": 4.829330870497636e-06, "loss": 0.1909, "step": 20825 }, { "epoch": 68.28196721311475, "grad_norm": 3.103947401046753, "learning_rate": 4.82842198281587e-06, "loss": 0.2543, "step": 20826 }, { "epoch": 68.28524590163934, "grad_norm": 2.2529680728912354, "learning_rate": 4.827513153449022e-06, "loss": 0.1307, "step": 20827 }, { "epoch": 68.28852459016393, "grad_norm": 4.063246250152588, "learning_rate": 4.826604382407344e-06, "loss": 0.3913, "step": 20828 }, { "epoch": 68.29180327868852, "grad_norm": 2.5515308380126953, "learning_rate": 4.8256956697010795e-06, "loss": 0.1148, "step": 20829 }, { "epoch": 68.29508196721312, "grad_norm": 2.4479548931121826, "learning_rate": 4.8247870153404776e-06, "loss": 0.2378, "step": 20830 }, { "epoch": 68.29836065573771, "grad_norm": 2.9511237144470215, "learning_rate": 4.823878419335781e-06, "loss": 0.1718, "step": 20831 }, { "epoch": 68.3016393442623, "grad_norm": 2.012972116470337, "learning_rate": 4.822969881697235e-06, "loss": 0.1387, "step": 20832 }, { "epoch": 68.30491803278689, "grad_norm": 2.893326759338379, "learning_rate": 4.82206140243509e-06, "loss": 0.131, "step": 20833 }, { "epoch": 68.30819672131148, "grad_norm": 3.4587762355804443, "learning_rate": 4.821152981559586e-06, "loss": 0.1398, "step": 20834 }, { "epoch": 68.31147540983606, "grad_norm": 2.734407424926758, "learning_rate": 4.820244619080967e-06, "loss": 0.2594, "step": 20835 }, { "epoch": 68.31475409836065, "grad_norm": 3.023164987564087, "learning_rate": 4.819336315009471e-06, "loss": 0.2055, "step": 20836 }, { "epoch": 68.31803278688524, "grad_norm": 1.8920550346374512, "learning_rate": 4.81842806935535e-06, "loss": 0.058, "step": 20837 }, { "epoch": 68.32131147540984, "grad_norm": 2.5737600326538086, "learning_rate": 4.817519882128838e-06, "loss": 0.1967, "step": 20838 }, { "epoch": 68.32459016393443, "grad_norm": 3.0915446281433105, "learning_rate": 4.816611753340179e-06, "loss": 0.3046, "step": 20839 }, { "epoch": 68.32786885245902, "grad_norm": 3.3625571727752686, "learning_rate": 4.815703682999607e-06, "loss": 0.0584, "step": 20840 }, { "epoch": 68.33114754098361, "grad_norm": 2.0896267890930176, "learning_rate": 4.814795671117372e-06, "loss": 0.126, "step": 20841 }, { "epoch": 68.3344262295082, "grad_norm": 2.767317533493042, "learning_rate": 4.813887717703706e-06, "loss": 0.1456, "step": 20842 }, { "epoch": 68.33770491803278, "grad_norm": 2.603301525115967, "learning_rate": 4.812979822768847e-06, "loss": 0.075, "step": 20843 }, { "epoch": 68.34098360655737, "grad_norm": 2.983736515045166, "learning_rate": 4.8120719863230345e-06, "loss": 0.262, "step": 20844 }, { "epoch": 68.34426229508196, "grad_norm": 4.381209373474121, "learning_rate": 4.811164208376502e-06, "loss": 0.22, "step": 20845 }, { "epoch": 68.34754098360656, "grad_norm": 2.2609457969665527, "learning_rate": 4.810256488939491e-06, "loss": 0.2004, "step": 20846 }, { "epoch": 68.35081967213115, "grad_norm": 3.2371139526367188, "learning_rate": 4.809348828022233e-06, "loss": 0.1541, "step": 20847 }, { "epoch": 68.35409836065574, "grad_norm": 2.095445156097412, "learning_rate": 4.808441225634966e-06, "loss": 0.0778, "step": 20848 }, { "epoch": 68.35737704918033, "grad_norm": 2.899423360824585, "learning_rate": 4.807533681787916e-06, "loss": 0.1177, "step": 20849 }, { "epoch": 68.36065573770492, "grad_norm": 2.540294647216797, "learning_rate": 4.806626196491328e-06, "loss": 0.1326, "step": 20850 }, { "epoch": 68.3639344262295, "grad_norm": 3.516002655029297, "learning_rate": 4.805718769755428e-06, "loss": 0.1651, "step": 20851 }, { "epoch": 68.3672131147541, "grad_norm": 2.448011636734009, "learning_rate": 4.80481140159045e-06, "loss": 0.114, "step": 20852 }, { "epoch": 68.37049180327868, "grad_norm": 1.7247027158737183, "learning_rate": 4.803904092006626e-06, "loss": 0.0766, "step": 20853 }, { "epoch": 68.37377049180328, "grad_norm": 2.5602142810821533, "learning_rate": 4.802996841014181e-06, "loss": 0.1728, "step": 20854 }, { "epoch": 68.37704918032787, "grad_norm": 2.1976635456085205, "learning_rate": 4.802089648623355e-06, "loss": 0.0717, "step": 20855 }, { "epoch": 68.38032786885246, "grad_norm": 2.562443733215332, "learning_rate": 4.801182514844372e-06, "loss": 0.0814, "step": 20856 }, { "epoch": 68.38360655737705, "grad_norm": 2.2795450687408447, "learning_rate": 4.800275439687462e-06, "loss": 0.1551, "step": 20857 }, { "epoch": 68.38688524590164, "grad_norm": 3.0229249000549316, "learning_rate": 4.799368423162849e-06, "loss": 0.2327, "step": 20858 }, { "epoch": 68.39016393442623, "grad_norm": 3.659290313720703, "learning_rate": 4.798461465280767e-06, "loss": 0.1384, "step": 20859 }, { "epoch": 68.39344262295081, "grad_norm": 2.734588623046875, "learning_rate": 4.797554566051441e-06, "loss": 0.0796, "step": 20860 }, { "epoch": 68.3967213114754, "grad_norm": 2.9810383319854736, "learning_rate": 4.7966477254850926e-06, "loss": 0.1189, "step": 20861 }, { "epoch": 68.4, "grad_norm": 3.7182159423828125, "learning_rate": 4.795740943591955e-06, "loss": 0.2457, "step": 20862 }, { "epoch": 68.4032786885246, "grad_norm": 2.3739466667175293, "learning_rate": 4.794834220382249e-06, "loss": 0.1386, "step": 20863 }, { "epoch": 68.40655737704918, "grad_norm": 2.65386962890625, "learning_rate": 4.793927555866197e-06, "loss": 0.167, "step": 20864 }, { "epoch": 68.40983606557377, "grad_norm": 3.221425771713257, "learning_rate": 4.793020950054027e-06, "loss": 0.287, "step": 20865 }, { "epoch": 68.41311475409836, "grad_norm": 2.8086891174316406, "learning_rate": 4.792114402955961e-06, "loss": 0.1697, "step": 20866 }, { "epoch": 68.41639344262295, "grad_norm": 4.371805191040039, "learning_rate": 4.7912079145822145e-06, "loss": 0.2715, "step": 20867 }, { "epoch": 68.41967213114754, "grad_norm": 3.0297677516937256, "learning_rate": 4.79030148494302e-06, "loss": 0.1331, "step": 20868 }, { "epoch": 68.42295081967212, "grad_norm": 2.79835844039917, "learning_rate": 4.789395114048592e-06, "loss": 0.1561, "step": 20869 }, { "epoch": 68.42622950819673, "grad_norm": 2.638427734375, "learning_rate": 4.788488801909151e-06, "loss": 0.1275, "step": 20870 }, { "epoch": 68.42950819672132, "grad_norm": 2.73089599609375, "learning_rate": 4.787582548534914e-06, "loss": 0.0619, "step": 20871 }, { "epoch": 68.4327868852459, "grad_norm": 3.0834639072418213, "learning_rate": 4.786676353936108e-06, "loss": 0.1327, "step": 20872 }, { "epoch": 68.43606557377049, "grad_norm": 3.222473621368408, "learning_rate": 4.785770218122946e-06, "loss": 0.1253, "step": 20873 }, { "epoch": 68.43934426229508, "grad_norm": 2.577228546142578, "learning_rate": 4.784864141105646e-06, "loss": 0.1858, "step": 20874 }, { "epoch": 68.44262295081967, "grad_norm": 5.440658092498779, "learning_rate": 4.783958122894422e-06, "loss": 0.1899, "step": 20875 }, { "epoch": 68.44590163934426, "grad_norm": 2.2861056327819824, "learning_rate": 4.783052163499497e-06, "loss": 0.126, "step": 20876 }, { "epoch": 68.44918032786886, "grad_norm": 2.621699810028076, "learning_rate": 4.782146262931083e-06, "loss": 0.2752, "step": 20877 }, { "epoch": 68.45245901639345, "grad_norm": 3.070338487625122, "learning_rate": 4.781240421199396e-06, "loss": 0.1808, "step": 20878 }, { "epoch": 68.45573770491804, "grad_norm": 3.743757963180542, "learning_rate": 4.7803346383146485e-06, "loss": 0.1414, "step": 20879 }, { "epoch": 68.45901639344262, "grad_norm": 5.578775882720947, "learning_rate": 4.779428914287052e-06, "loss": 0.1764, "step": 20880 }, { "epoch": 68.46229508196721, "grad_norm": 3.5330302715301514, "learning_rate": 4.778523249126825e-06, "loss": 0.2444, "step": 20881 }, { "epoch": 68.4655737704918, "grad_norm": 2.4084887504577637, "learning_rate": 4.777617642844179e-06, "loss": 0.2149, "step": 20882 }, { "epoch": 68.46885245901639, "grad_norm": 2.4844443798065186, "learning_rate": 4.776712095449323e-06, "loss": 0.2034, "step": 20883 }, { "epoch": 68.47213114754098, "grad_norm": 2.8076884746551514, "learning_rate": 4.7758066069524645e-06, "loss": 0.0856, "step": 20884 }, { "epoch": 68.47540983606558, "grad_norm": 3.2287497520446777, "learning_rate": 4.774901177363823e-06, "loss": 0.3209, "step": 20885 }, { "epoch": 68.47868852459017, "grad_norm": 2.3065364360809326, "learning_rate": 4.773995806693603e-06, "loss": 0.0693, "step": 20886 }, { "epoch": 68.48196721311476, "grad_norm": 2.4340856075286865, "learning_rate": 4.773090494952015e-06, "loss": 0.1548, "step": 20887 }, { "epoch": 68.48524590163935, "grad_norm": 3.650463104248047, "learning_rate": 4.772185242149262e-06, "loss": 0.2406, "step": 20888 }, { "epoch": 68.48852459016393, "grad_norm": 2.734118700027466, "learning_rate": 4.771280048295559e-06, "loss": 0.1065, "step": 20889 }, { "epoch": 68.49180327868852, "grad_norm": 2.3745672702789307, "learning_rate": 4.77037491340111e-06, "loss": 0.1863, "step": 20890 }, { "epoch": 68.49508196721311, "grad_norm": 3.3636465072631836, "learning_rate": 4.769469837476123e-06, "loss": 0.2387, "step": 20891 }, { "epoch": 68.4983606557377, "grad_norm": 3.0049917697906494, "learning_rate": 4.7685648205308e-06, "loss": 0.1108, "step": 20892 }, { "epoch": 68.5016393442623, "grad_norm": 2.897977590560913, "learning_rate": 4.767659862575346e-06, "loss": 0.1183, "step": 20893 }, { "epoch": 68.50491803278689, "grad_norm": 2.775583028793335, "learning_rate": 4.76675496361997e-06, "loss": 0.1505, "step": 20894 }, { "epoch": 68.50819672131148, "grad_norm": 2.5729613304138184, "learning_rate": 4.765850123674872e-06, "loss": 0.1001, "step": 20895 }, { "epoch": 68.51147540983607, "grad_norm": 2.5750136375427246, "learning_rate": 4.764945342750257e-06, "loss": 0.1374, "step": 20896 }, { "epoch": 68.51475409836065, "grad_norm": 3.166084051132202, "learning_rate": 4.764040620856323e-06, "loss": 0.2369, "step": 20897 }, { "epoch": 68.51803278688524, "grad_norm": 2.919004440307617, "learning_rate": 4.763135958003278e-06, "loss": 0.2154, "step": 20898 }, { "epoch": 68.52131147540983, "grad_norm": 2.6657187938690186, "learning_rate": 4.762231354201321e-06, "loss": 0.2299, "step": 20899 }, { "epoch": 68.52459016393442, "grad_norm": 3.6710422039031982, "learning_rate": 4.761326809460651e-06, "loss": 0.1431, "step": 20900 }, { "epoch": 68.52786885245902, "grad_norm": 3.2382476329803467, "learning_rate": 4.760422323791464e-06, "loss": 0.2467, "step": 20901 }, { "epoch": 68.53114754098361, "grad_norm": 3.693206548690796, "learning_rate": 4.759517897203967e-06, "loss": 0.2162, "step": 20902 }, { "epoch": 68.5344262295082, "grad_norm": 2.255239963531494, "learning_rate": 4.758613529708355e-06, "loss": 0.0675, "step": 20903 }, { "epoch": 68.53770491803279, "grad_norm": 2.8761909008026123, "learning_rate": 4.757709221314825e-06, "loss": 0.1046, "step": 20904 }, { "epoch": 68.54098360655738, "grad_norm": 2.772533416748047, "learning_rate": 4.756804972033573e-06, "loss": 0.0724, "step": 20905 }, { "epoch": 68.54426229508196, "grad_norm": 2.7198312282562256, "learning_rate": 4.7559007818747934e-06, "loss": 0.0755, "step": 20906 }, { "epoch": 68.54754098360655, "grad_norm": 2.7155115604400635, "learning_rate": 4.754996650848689e-06, "loss": 0.2703, "step": 20907 }, { "epoch": 68.55081967213114, "grad_norm": 2.534407615661621, "learning_rate": 4.754092578965451e-06, "loss": 0.1588, "step": 20908 }, { "epoch": 68.55409836065574, "grad_norm": 2.7999942302703857, "learning_rate": 4.753188566235273e-06, "loss": 0.1949, "step": 20909 }, { "epoch": 68.55737704918033, "grad_norm": 2.992201089859009, "learning_rate": 4.752284612668345e-06, "loss": 0.179, "step": 20910 }, { "epoch": 68.56065573770492, "grad_norm": 3.7005465030670166, "learning_rate": 4.7513807182748695e-06, "loss": 0.1899, "step": 20911 }, { "epoch": 68.56393442622951, "grad_norm": 2.0203659534454346, "learning_rate": 4.750476883065032e-06, "loss": 0.0888, "step": 20912 }, { "epoch": 68.5672131147541, "grad_norm": 2.6523778438568115, "learning_rate": 4.749573107049027e-06, "loss": 0.0917, "step": 20913 }, { "epoch": 68.57049180327868, "grad_norm": 2.884068250656128, "learning_rate": 4.74866939023704e-06, "loss": 0.1715, "step": 20914 }, { "epoch": 68.57377049180327, "grad_norm": 2.4827613830566406, "learning_rate": 4.7477657326392705e-06, "loss": 0.1815, "step": 20915 }, { "epoch": 68.57704918032788, "grad_norm": 2.4318623542785645, "learning_rate": 4.746862134265902e-06, "loss": 0.0903, "step": 20916 }, { "epoch": 68.58032786885246, "grad_norm": 2.645009994506836, "learning_rate": 4.74595859512712e-06, "loss": 0.1713, "step": 20917 }, { "epoch": 68.58360655737705, "grad_norm": 3.2071492671966553, "learning_rate": 4.745055115233123e-06, "loss": 0.1558, "step": 20918 }, { "epoch": 68.58688524590164, "grad_norm": 2.6697633266448975, "learning_rate": 4.744151694594093e-06, "loss": 0.1532, "step": 20919 }, { "epoch": 68.59016393442623, "grad_norm": 3.8464436531066895, "learning_rate": 4.743248333220214e-06, "loss": 0.1336, "step": 20920 }, { "epoch": 68.59344262295082, "grad_norm": 2.7073006629943848, "learning_rate": 4.74234503112168e-06, "loss": 0.1323, "step": 20921 }, { "epoch": 68.5967213114754, "grad_norm": 2.5797603130340576, "learning_rate": 4.741441788308672e-06, "loss": 0.0697, "step": 20922 }, { "epoch": 68.6, "grad_norm": 4.281400203704834, "learning_rate": 4.740538604791371e-06, "loss": 0.2199, "step": 20923 }, { "epoch": 68.6032786885246, "grad_norm": 5.001072883605957, "learning_rate": 4.73963548057997e-06, "loss": 0.2457, "step": 20924 }, { "epoch": 68.60655737704919, "grad_norm": 4.133576393127441, "learning_rate": 4.738732415684647e-06, "loss": 0.1347, "step": 20925 }, { "epoch": 68.60983606557377, "grad_norm": 3.3973851203918457, "learning_rate": 4.737829410115587e-06, "loss": 0.231, "step": 20926 }, { "epoch": 68.61311475409836, "grad_norm": 3.240590810775757, "learning_rate": 4.7369264638829695e-06, "loss": 0.0879, "step": 20927 }, { "epoch": 68.61639344262295, "grad_norm": 3.037489652633667, "learning_rate": 4.73602357699698e-06, "loss": 0.1677, "step": 20928 }, { "epoch": 68.61967213114754, "grad_norm": 2.9467902183532715, "learning_rate": 4.735120749467799e-06, "loss": 0.1867, "step": 20929 }, { "epoch": 68.62295081967213, "grad_norm": 2.5374529361724854, "learning_rate": 4.7342179813056055e-06, "loss": 0.1483, "step": 20930 }, { "epoch": 68.62622950819672, "grad_norm": 1.8391213417053223, "learning_rate": 4.733315272520579e-06, "loss": 0.1535, "step": 20931 }, { "epoch": 68.62950819672132, "grad_norm": 2.6969833374023438, "learning_rate": 4.732412623122895e-06, "loss": 0.1354, "step": 20932 }, { "epoch": 68.6327868852459, "grad_norm": 2.94309401512146, "learning_rate": 4.731510033122739e-06, "loss": 0.2117, "step": 20933 }, { "epoch": 68.6360655737705, "grad_norm": 5.26589298248291, "learning_rate": 4.730607502530287e-06, "loss": 0.1713, "step": 20934 }, { "epoch": 68.63934426229508, "grad_norm": 2.8002066612243652, "learning_rate": 4.729705031355712e-06, "loss": 0.324, "step": 20935 }, { "epoch": 68.64262295081967, "grad_norm": 2.8897457122802734, "learning_rate": 4.728802619609191e-06, "loss": 0.1596, "step": 20936 }, { "epoch": 68.64590163934426, "grad_norm": 2.5538618564605713, "learning_rate": 4.727900267300904e-06, "loss": 0.1459, "step": 20937 }, { "epoch": 68.64918032786885, "grad_norm": 2.099132537841797, "learning_rate": 4.726997974441022e-06, "loss": 0.0781, "step": 20938 }, { "epoch": 68.65245901639344, "grad_norm": 2.7463746070861816, "learning_rate": 4.726095741039721e-06, "loss": 0.2463, "step": 20939 }, { "epoch": 68.65573770491804, "grad_norm": 2.5095999240875244, "learning_rate": 4.72519356710717e-06, "loss": 0.0475, "step": 20940 }, { "epoch": 68.65901639344263, "grad_norm": 2.597116231918335, "learning_rate": 4.72429145265355e-06, "loss": 0.092, "step": 20941 }, { "epoch": 68.66229508196722, "grad_norm": 2.5424349308013916, "learning_rate": 4.72338939768903e-06, "loss": 0.1433, "step": 20942 }, { "epoch": 68.6655737704918, "grad_norm": 3.301409959793091, "learning_rate": 4.72248740222378e-06, "loss": 0.0574, "step": 20943 }, { "epoch": 68.66885245901639, "grad_norm": 2.8703606128692627, "learning_rate": 4.721585466267972e-06, "loss": 0.1534, "step": 20944 }, { "epoch": 68.67213114754098, "grad_norm": 3.37776517868042, "learning_rate": 4.720683589831771e-06, "loss": 0.0912, "step": 20945 }, { "epoch": 68.67540983606557, "grad_norm": 2.5748119354248047, "learning_rate": 4.719781772925357e-06, "loss": 0.0502, "step": 20946 }, { "epoch": 68.67868852459016, "grad_norm": 3.637864351272583, "learning_rate": 4.718880015558893e-06, "loss": 0.3358, "step": 20947 }, { "epoch": 68.68196721311476, "grad_norm": 2.357503652572632, "learning_rate": 4.7179783177425485e-06, "loss": 0.1038, "step": 20948 }, { "epoch": 68.68524590163935, "grad_norm": 3.1299586296081543, "learning_rate": 4.7170766794864865e-06, "loss": 0.1051, "step": 20949 }, { "epoch": 68.68852459016394, "grad_norm": 2.983577013015747, "learning_rate": 4.71617510080088e-06, "loss": 0.1127, "step": 20950 }, { "epoch": 68.69180327868852, "grad_norm": 3.288961887359619, "learning_rate": 4.715273581695895e-06, "loss": 0.1577, "step": 20951 }, { "epoch": 68.69508196721311, "grad_norm": 3.5381102561950684, "learning_rate": 4.714372122181694e-06, "loss": 0.231, "step": 20952 }, { "epoch": 68.6983606557377, "grad_norm": 2.8519721031188965, "learning_rate": 4.713470722268439e-06, "loss": 0.097, "step": 20953 }, { "epoch": 68.70163934426229, "grad_norm": 2.4575817584991455, "learning_rate": 4.712569381966303e-06, "loss": 0.07, "step": 20954 }, { "epoch": 68.70491803278688, "grad_norm": 3.115286111831665, "learning_rate": 4.7116681012854445e-06, "loss": 0.0972, "step": 20955 }, { "epoch": 68.70819672131148, "grad_norm": 2.9203131198883057, "learning_rate": 4.7107668802360266e-06, "loss": 0.2596, "step": 20956 }, { "epoch": 68.71147540983607, "grad_norm": 2.3207900524139404, "learning_rate": 4.709865718828212e-06, "loss": 0.066, "step": 20957 }, { "epoch": 68.71475409836066, "grad_norm": 3.5256521701812744, "learning_rate": 4.708964617072157e-06, "loss": 0.1733, "step": 20958 }, { "epoch": 68.71803278688525, "grad_norm": 2.655163049697876, "learning_rate": 4.708063574978031e-06, "loss": 0.0628, "step": 20959 }, { "epoch": 68.72131147540983, "grad_norm": 3.004140615463257, "learning_rate": 4.707162592555992e-06, "loss": 0.1102, "step": 20960 }, { "epoch": 68.72459016393442, "grad_norm": 3.1753530502319336, "learning_rate": 4.706261669816196e-06, "loss": 0.2202, "step": 20961 }, { "epoch": 68.72786885245901, "grad_norm": 2.4422342777252197, "learning_rate": 4.7053608067688e-06, "loss": 0.1918, "step": 20962 }, { "epoch": 68.73114754098361, "grad_norm": 3.0464930534362793, "learning_rate": 4.704460003423971e-06, "loss": 0.1024, "step": 20963 }, { "epoch": 68.7344262295082, "grad_norm": 2.9008710384368896, "learning_rate": 4.703559259791861e-06, "loss": 0.325, "step": 20964 }, { "epoch": 68.73770491803279, "grad_norm": 3.64121413230896, "learning_rate": 4.702658575882627e-06, "loss": 0.135, "step": 20965 }, { "epoch": 68.74098360655738, "grad_norm": 2.8873891830444336, "learning_rate": 4.701757951706423e-06, "loss": 0.11, "step": 20966 }, { "epoch": 68.74426229508197, "grad_norm": 3.7913331985473633, "learning_rate": 4.7008573872734085e-06, "loss": 0.2082, "step": 20967 }, { "epoch": 68.74754098360656, "grad_norm": 2.947286605834961, "learning_rate": 4.699956882593738e-06, "loss": 0.1617, "step": 20968 }, { "epoch": 68.75081967213114, "grad_norm": 3.8368334770202637, "learning_rate": 4.69905643767756e-06, "loss": 0.1958, "step": 20969 }, { "epoch": 68.75409836065573, "grad_norm": 4.522919654846191, "learning_rate": 4.698156052535036e-06, "loss": 0.1528, "step": 20970 }, { "epoch": 68.75737704918033, "grad_norm": 2.9812896251678467, "learning_rate": 4.697255727176315e-06, "loss": 0.0906, "step": 20971 }, { "epoch": 68.76065573770492, "grad_norm": 8.805705070495605, "learning_rate": 4.696355461611547e-06, "loss": 0.1189, "step": 20972 }, { "epoch": 68.76393442622951, "grad_norm": 3.31624698638916, "learning_rate": 4.695455255850887e-06, "loss": 0.2745, "step": 20973 }, { "epoch": 68.7672131147541, "grad_norm": 2.537346839904785, "learning_rate": 4.694555109904486e-06, "loss": 0.0697, "step": 20974 }, { "epoch": 68.77049180327869, "grad_norm": 2.7796616554260254, "learning_rate": 4.693655023782492e-06, "loss": 0.0915, "step": 20975 }, { "epoch": 68.77377049180328, "grad_norm": 3.4561924934387207, "learning_rate": 4.69275499749505e-06, "loss": 0.242, "step": 20976 }, { "epoch": 68.77704918032786, "grad_norm": 2.7361629009246826, "learning_rate": 4.6918550310523195e-06, "loss": 0.0837, "step": 20977 }, { "epoch": 68.78032786885245, "grad_norm": 2.879760980606079, "learning_rate": 4.69095512446444e-06, "loss": 0.0883, "step": 20978 }, { "epoch": 68.78360655737706, "grad_norm": 3.181112766265869, "learning_rate": 4.69005527774156e-06, "loss": 0.092, "step": 20979 }, { "epoch": 68.78688524590164, "grad_norm": 2.653231382369995, "learning_rate": 4.68915549089383e-06, "loss": 0.1849, "step": 20980 }, { "epoch": 68.79016393442623, "grad_norm": 3.381277561187744, "learning_rate": 4.688255763931394e-06, "loss": 0.1106, "step": 20981 }, { "epoch": 68.79344262295082, "grad_norm": 2.6219990253448486, "learning_rate": 4.687356096864397e-06, "loss": 0.0866, "step": 20982 }, { "epoch": 68.79672131147541, "grad_norm": 2.9300999641418457, "learning_rate": 4.686456489702984e-06, "loss": 0.1296, "step": 20983 }, { "epoch": 68.8, "grad_norm": 2.8287434577941895, "learning_rate": 4.685556942457296e-06, "loss": 0.2066, "step": 20984 }, { "epoch": 68.80327868852459, "grad_norm": 2.5178115367889404, "learning_rate": 4.684657455137482e-06, "loss": 0.1667, "step": 20985 }, { "epoch": 68.80655737704917, "grad_norm": 2.466543197631836, "learning_rate": 4.683758027753681e-06, "loss": 0.2997, "step": 20986 }, { "epoch": 68.80983606557378, "grad_norm": 2.3490915298461914, "learning_rate": 4.6828586603160365e-06, "loss": 0.1697, "step": 20987 }, { "epoch": 68.81311475409836, "grad_norm": 3.0427777767181396, "learning_rate": 4.681959352834685e-06, "loss": 0.1887, "step": 20988 }, { "epoch": 68.81639344262295, "grad_norm": 3.6254384517669678, "learning_rate": 4.681060105319776e-06, "loss": 0.1087, "step": 20989 }, { "epoch": 68.81967213114754, "grad_norm": 2.4551796913146973, "learning_rate": 4.680160917781443e-06, "loss": 0.0631, "step": 20990 }, { "epoch": 68.82295081967213, "grad_norm": 3.440845251083374, "learning_rate": 4.679261790229829e-06, "loss": 0.322, "step": 20991 }, { "epoch": 68.82622950819672, "grad_norm": 2.117990255355835, "learning_rate": 4.678362722675065e-06, "loss": 0.0652, "step": 20992 }, { "epoch": 68.8295081967213, "grad_norm": 3.021885871887207, "learning_rate": 4.677463715127298e-06, "loss": 0.1147, "step": 20993 }, { "epoch": 68.8327868852459, "grad_norm": 3.316768169403076, "learning_rate": 4.676564767596663e-06, "loss": 0.1419, "step": 20994 }, { "epoch": 68.8360655737705, "grad_norm": 3.0153870582580566, "learning_rate": 4.675665880093294e-06, "loss": 0.2156, "step": 20995 }, { "epoch": 68.83934426229509, "grad_norm": 3.0256247520446777, "learning_rate": 4.6747670526273296e-06, "loss": 0.132, "step": 20996 }, { "epoch": 68.84262295081967, "grad_norm": 3.0654988288879395, "learning_rate": 4.673868285208898e-06, "loss": 0.085, "step": 20997 }, { "epoch": 68.84590163934426, "grad_norm": 2.661463499069214, "learning_rate": 4.672969577848144e-06, "loss": 0.2393, "step": 20998 }, { "epoch": 68.84918032786885, "grad_norm": 3.442643165588379, "learning_rate": 4.672070930555196e-06, "loss": 0.2186, "step": 20999 }, { "epoch": 68.85245901639344, "grad_norm": 2.496331214904785, "learning_rate": 4.671172343340189e-06, "loss": 0.1718, "step": 21000 }, { "epoch": 68.85573770491803, "grad_norm": 2.6019368171691895, "learning_rate": 4.67027381621325e-06, "loss": 0.1216, "step": 21001 }, { "epoch": 68.85901639344263, "grad_norm": 2.2670392990112305, "learning_rate": 4.669375349184519e-06, "loss": 0.0792, "step": 21002 }, { "epoch": 68.86229508196722, "grad_norm": 3.546177625656128, "learning_rate": 4.668476942264124e-06, "loss": 0.1876, "step": 21003 }, { "epoch": 68.8655737704918, "grad_norm": 2.992697238922119, "learning_rate": 4.667578595462194e-06, "loss": 0.06, "step": 21004 }, { "epoch": 68.8688524590164, "grad_norm": 3.572643280029297, "learning_rate": 4.666680308788857e-06, "loss": 0.1982, "step": 21005 }, { "epoch": 68.87213114754098, "grad_norm": 2.607682228088379, "learning_rate": 4.665782082254247e-06, "loss": 0.0953, "step": 21006 }, { "epoch": 68.87540983606557, "grad_norm": 2.346135139465332, "learning_rate": 4.664883915868491e-06, "loss": 0.0972, "step": 21007 }, { "epoch": 68.87868852459016, "grad_norm": 2.587820053100586, "learning_rate": 4.663985809641717e-06, "loss": 0.0978, "step": 21008 }, { "epoch": 68.88196721311475, "grad_norm": 1.982509970664978, "learning_rate": 4.66308776358405e-06, "loss": 0.0376, "step": 21009 }, { "epoch": 68.88524590163935, "grad_norm": 2.955089569091797, "learning_rate": 4.662189777705614e-06, "loss": 0.2011, "step": 21010 }, { "epoch": 68.88852459016394, "grad_norm": 2.5550084114074707, "learning_rate": 4.661291852016542e-06, "loss": 0.0524, "step": 21011 }, { "epoch": 68.89180327868853, "grad_norm": 2.52469801902771, "learning_rate": 4.660393986526954e-06, "loss": 0.1506, "step": 21012 }, { "epoch": 68.89508196721312, "grad_norm": 2.573681592941284, "learning_rate": 4.6594961812469775e-06, "loss": 0.0836, "step": 21013 }, { "epoch": 68.8983606557377, "grad_norm": 2.7719287872314453, "learning_rate": 4.658598436186729e-06, "loss": 0.1543, "step": 21014 }, { "epoch": 68.90163934426229, "grad_norm": 3.268914222717285, "learning_rate": 4.657700751356341e-06, "loss": 0.0679, "step": 21015 }, { "epoch": 68.90491803278688, "grad_norm": 3.7042713165283203, "learning_rate": 4.656803126765932e-06, "loss": 0.1761, "step": 21016 }, { "epoch": 68.90819672131147, "grad_norm": 3.8968403339385986, "learning_rate": 4.655905562425623e-06, "loss": 0.2113, "step": 21017 }, { "epoch": 68.91147540983607, "grad_norm": 2.311347007751465, "learning_rate": 4.6550080583455315e-06, "loss": 0.155, "step": 21018 }, { "epoch": 68.91475409836066, "grad_norm": 2.520219564437866, "learning_rate": 4.654110614535786e-06, "loss": 0.1488, "step": 21019 }, { "epoch": 68.91803278688525, "grad_norm": 2.698307514190674, "learning_rate": 4.6532132310065e-06, "loss": 0.1009, "step": 21020 }, { "epoch": 68.92131147540984, "grad_norm": 2.943711996078491, "learning_rate": 4.652315907767796e-06, "loss": 0.1091, "step": 21021 }, { "epoch": 68.92459016393443, "grad_norm": 2.772831439971924, "learning_rate": 4.651418644829786e-06, "loss": 0.065, "step": 21022 }, { "epoch": 68.92786885245901, "grad_norm": 3.152531147003174, "learning_rate": 4.6505214422025945e-06, "loss": 0.2237, "step": 21023 }, { "epoch": 68.9311475409836, "grad_norm": 2.6132540702819824, "learning_rate": 4.649624299896337e-06, "loss": 0.2297, "step": 21024 }, { "epoch": 68.93442622950819, "grad_norm": 2.5018627643585205, "learning_rate": 4.6487272179211255e-06, "loss": 0.14, "step": 21025 }, { "epoch": 68.9377049180328, "grad_norm": 3.999518632888794, "learning_rate": 4.647830196287081e-06, "loss": 0.2796, "step": 21026 }, { "epoch": 68.94098360655738, "grad_norm": 2.9300596714019775, "learning_rate": 4.646933235004315e-06, "loss": 0.1513, "step": 21027 }, { "epoch": 68.94426229508197, "grad_norm": 2.411163806915283, "learning_rate": 4.6460363340829406e-06, "loss": 0.1211, "step": 21028 }, { "epoch": 68.94754098360656, "grad_norm": 1.6264630556106567, "learning_rate": 4.645139493533077e-06, "loss": 0.0343, "step": 21029 }, { "epoch": 68.95081967213115, "grad_norm": 3.433959484100342, "learning_rate": 4.644242713364831e-06, "loss": 0.1081, "step": 21030 }, { "epoch": 68.95409836065573, "grad_norm": 2.433138847351074, "learning_rate": 4.6433459935883156e-06, "loss": 0.1561, "step": 21031 }, { "epoch": 68.95737704918032, "grad_norm": 2.802272319793701, "learning_rate": 4.6424493342136465e-06, "loss": 0.2584, "step": 21032 }, { "epoch": 68.96065573770491, "grad_norm": 2.5858170986175537, "learning_rate": 4.641552735250933e-06, "loss": 0.2235, "step": 21033 }, { "epoch": 68.96393442622951, "grad_norm": 2.86732816696167, "learning_rate": 4.640656196710281e-06, "loss": 0.0841, "step": 21034 }, { "epoch": 68.9672131147541, "grad_norm": 2.6001169681549072, "learning_rate": 4.639759718601805e-06, "loss": 0.0936, "step": 21035 }, { "epoch": 68.97049180327869, "grad_norm": 5.605099201202393, "learning_rate": 4.638863300935606e-06, "loss": 0.2839, "step": 21036 }, { "epoch": 68.97377049180328, "grad_norm": 2.6494369506835938, "learning_rate": 4.637966943721804e-06, "loss": 0.1895, "step": 21037 }, { "epoch": 68.97704918032787, "grad_norm": 2.682692527770996, "learning_rate": 4.637070646970497e-06, "loss": 0.0632, "step": 21038 }, { "epoch": 68.98032786885246, "grad_norm": 2.838932991027832, "learning_rate": 4.636174410691796e-06, "loss": 0.1411, "step": 21039 }, { "epoch": 68.98360655737704, "grad_norm": 2.844984531402588, "learning_rate": 4.635278234895802e-06, "loss": 0.1276, "step": 21040 }, { "epoch": 68.98688524590163, "grad_norm": 2.855330228805542, "learning_rate": 4.634382119592625e-06, "loss": 0.0846, "step": 21041 }, { "epoch": 68.99016393442623, "grad_norm": 3.3712122440338135, "learning_rate": 4.6334860647923706e-06, "loss": 0.3476, "step": 21042 }, { "epoch": 68.99344262295082, "grad_norm": 2.100480079650879, "learning_rate": 4.632590070505141e-06, "loss": 0.1738, "step": 21043 }, { "epoch": 68.99672131147541, "grad_norm": 2.7368345260620117, "learning_rate": 4.631694136741034e-06, "loss": 0.0839, "step": 21044 }, { "epoch": 69.0, "grad_norm": 2.843302011489868, "learning_rate": 4.630798263510162e-06, "loss": 0.1653, "step": 21045 }, { "epoch": 69.00327868852459, "grad_norm": 2.040804624557495, "learning_rate": 4.629902450822622e-06, "loss": 0.069, "step": 21046 }, { "epoch": 69.00655737704918, "grad_norm": 3.235490322113037, "learning_rate": 4.629006698688515e-06, "loss": 0.2071, "step": 21047 }, { "epoch": 69.00983606557377, "grad_norm": 1.9383829832077026, "learning_rate": 4.628111007117941e-06, "loss": 0.1094, "step": 21048 }, { "epoch": 69.01311475409837, "grad_norm": 2.365769863128662, "learning_rate": 4.627215376120998e-06, "loss": 0.0805, "step": 21049 }, { "epoch": 69.01639344262296, "grad_norm": 2.703184127807617, "learning_rate": 4.6263198057077916e-06, "loss": 0.0824, "step": 21050 }, { "epoch": 69.01967213114754, "grad_norm": 3.225062608718872, "learning_rate": 4.625424295888418e-06, "loss": 0.2623, "step": 21051 }, { "epoch": 69.02295081967213, "grad_norm": 3.086315631866455, "learning_rate": 4.624528846672972e-06, "loss": 0.2336, "step": 21052 }, { "epoch": 69.02622950819672, "grad_norm": 3.4857139587402344, "learning_rate": 4.623633458071549e-06, "loss": 0.2089, "step": 21053 }, { "epoch": 69.02950819672131, "grad_norm": 3.218137264251709, "learning_rate": 4.622738130094252e-06, "loss": 0.1433, "step": 21054 }, { "epoch": 69.0327868852459, "grad_norm": 2.1778924465179443, "learning_rate": 4.6218428627511744e-06, "loss": 0.2185, "step": 21055 }, { "epoch": 69.03606557377049, "grad_norm": 3.567265748977661, "learning_rate": 4.62094765605241e-06, "loss": 0.2073, "step": 21056 }, { "epoch": 69.03934426229509, "grad_norm": 2.998516321182251, "learning_rate": 4.620052510008049e-06, "loss": 0.1084, "step": 21057 }, { "epoch": 69.04262295081968, "grad_norm": 2.8868043422698975, "learning_rate": 4.619157424628195e-06, "loss": 0.1461, "step": 21058 }, { "epoch": 69.04590163934427, "grad_norm": 3.194817304611206, "learning_rate": 4.618262399922935e-06, "loss": 0.2828, "step": 21059 }, { "epoch": 69.04918032786885, "grad_norm": 2.9132792949676514, "learning_rate": 4.617367435902363e-06, "loss": 0.1456, "step": 21060 }, { "epoch": 69.05245901639344, "grad_norm": 3.1576778888702393, "learning_rate": 4.616472532576568e-06, "loss": 0.2243, "step": 21061 }, { "epoch": 69.05573770491803, "grad_norm": 3.1717493534088135, "learning_rate": 4.615577689955639e-06, "loss": 0.1249, "step": 21062 }, { "epoch": 69.05901639344262, "grad_norm": 2.741671323776245, "learning_rate": 4.6146829080496746e-06, "loss": 0.1766, "step": 21063 }, { "epoch": 69.0622950819672, "grad_norm": 4.79697322845459, "learning_rate": 4.613788186868759e-06, "loss": 0.123, "step": 21064 }, { "epoch": 69.06557377049181, "grad_norm": 1.5685955286026, "learning_rate": 4.612893526422983e-06, "loss": 0.0156, "step": 21065 }, { "epoch": 69.0688524590164, "grad_norm": 3.218872308731079, "learning_rate": 4.611998926722428e-06, "loss": 0.1228, "step": 21066 }, { "epoch": 69.07213114754099, "grad_norm": 2.4772655963897705, "learning_rate": 4.611104387777193e-06, "loss": 0.2449, "step": 21067 }, { "epoch": 69.07540983606557, "grad_norm": 2.625659942626953, "learning_rate": 4.610209909597359e-06, "loss": 0.2537, "step": 21068 }, { "epoch": 69.07868852459016, "grad_norm": 1.7845879793167114, "learning_rate": 4.609315492193011e-06, "loss": 0.1646, "step": 21069 }, { "epoch": 69.08196721311475, "grad_norm": 2.680532932281494, "learning_rate": 4.608421135574232e-06, "loss": 0.1099, "step": 21070 }, { "epoch": 69.08524590163934, "grad_norm": 2.576031446456909, "learning_rate": 4.607526839751115e-06, "loss": 0.2005, "step": 21071 }, { "epoch": 69.08852459016393, "grad_norm": 2.4895637035369873, "learning_rate": 4.60663260473374e-06, "loss": 0.1703, "step": 21072 }, { "epoch": 69.09180327868853, "grad_norm": 3.03448224067688, "learning_rate": 4.60573843053219e-06, "loss": 0.1391, "step": 21073 }, { "epoch": 69.09508196721312, "grad_norm": 2.718602418899536, "learning_rate": 4.604844317156543e-06, "loss": 0.1218, "step": 21074 }, { "epoch": 69.09836065573771, "grad_norm": 2.717747211456299, "learning_rate": 4.60395026461689e-06, "loss": 0.1146, "step": 21075 }, { "epoch": 69.1016393442623, "grad_norm": 4.78952169418335, "learning_rate": 4.603056272923309e-06, "loss": 0.0832, "step": 21076 }, { "epoch": 69.10491803278688, "grad_norm": 2.508483648300171, "learning_rate": 4.602162342085879e-06, "loss": 0.3614, "step": 21077 }, { "epoch": 69.10819672131147, "grad_norm": 2.0752124786376953, "learning_rate": 4.6012684721146775e-06, "loss": 0.117, "step": 21078 }, { "epoch": 69.11147540983606, "grad_norm": 2.653359889984131, "learning_rate": 4.600374663019791e-06, "loss": 0.1166, "step": 21079 }, { "epoch": 69.11475409836065, "grad_norm": 3.211397647857666, "learning_rate": 4.599480914811294e-06, "loss": 0.1523, "step": 21080 }, { "epoch": 69.11803278688525, "grad_norm": 2.4882709980010986, "learning_rate": 4.598587227499261e-06, "loss": 0.0905, "step": 21081 }, { "epoch": 69.12131147540984, "grad_norm": 2.486096143722534, "learning_rate": 4.597693601093779e-06, "loss": 0.2124, "step": 21082 }, { "epoch": 69.12459016393443, "grad_norm": 4.031021595001221, "learning_rate": 4.596800035604917e-06, "loss": 0.2071, "step": 21083 }, { "epoch": 69.12786885245902, "grad_norm": 2.767817497253418, "learning_rate": 4.5959065310427485e-06, "loss": 0.0828, "step": 21084 }, { "epoch": 69.1311475409836, "grad_norm": 2.4555745124816895, "learning_rate": 4.595013087417356e-06, "loss": 0.0465, "step": 21085 }, { "epoch": 69.1344262295082, "grad_norm": 3.028736114501953, "learning_rate": 4.594119704738812e-06, "loss": 0.1528, "step": 21086 }, { "epoch": 69.13770491803278, "grad_norm": 2.8447060585021973, "learning_rate": 4.593226383017189e-06, "loss": 0.1704, "step": 21087 }, { "epoch": 69.14098360655737, "grad_norm": 2.4566588401794434, "learning_rate": 4.592333122262555e-06, "loss": 0.1153, "step": 21088 }, { "epoch": 69.14426229508197, "grad_norm": 2.8467891216278076, "learning_rate": 4.591439922484993e-06, "loss": 0.1214, "step": 21089 }, { "epoch": 69.14754098360656, "grad_norm": 2.913653612136841, "learning_rate": 4.590546783694568e-06, "loss": 0.2884, "step": 21090 }, { "epoch": 69.15081967213115, "grad_norm": 4.267098903656006, "learning_rate": 4.5896537059013536e-06, "loss": 0.241, "step": 21091 }, { "epoch": 69.15409836065574, "grad_norm": 2.633254289627075, "learning_rate": 4.588760689115414e-06, "loss": 0.1527, "step": 21092 }, { "epoch": 69.15737704918033, "grad_norm": 3.138097047805786, "learning_rate": 4.587867733346829e-06, "loss": 0.197, "step": 21093 }, { "epoch": 69.16065573770491, "grad_norm": 2.715120553970337, "learning_rate": 4.5869748386056615e-06, "loss": 0.1562, "step": 21094 }, { "epoch": 69.1639344262295, "grad_norm": 2.06113338470459, "learning_rate": 4.586082004901982e-06, "loss": 0.0885, "step": 21095 }, { "epoch": 69.1672131147541, "grad_norm": 3.0507328510284424, "learning_rate": 4.585189232245851e-06, "loss": 0.1139, "step": 21096 }, { "epoch": 69.1704918032787, "grad_norm": 2.9729256629943848, "learning_rate": 4.584296520647348e-06, "loss": 0.1344, "step": 21097 }, { "epoch": 69.17377049180328, "grad_norm": 1.9154902696609497, "learning_rate": 4.5834038701165304e-06, "loss": 0.1521, "step": 21098 }, { "epoch": 69.17704918032787, "grad_norm": 1.9878190755844116, "learning_rate": 4.5825112806634665e-06, "loss": 0.0494, "step": 21099 }, { "epoch": 69.18032786885246, "grad_norm": 2.885728597640991, "learning_rate": 4.581618752298221e-06, "loss": 0.1482, "step": 21100 }, { "epoch": 69.18360655737705, "grad_norm": 2.272531509399414, "learning_rate": 4.5807262850308544e-06, "loss": 0.1173, "step": 21101 }, { "epoch": 69.18688524590164, "grad_norm": 2.1478278636932373, "learning_rate": 4.579833878871437e-06, "loss": 0.0731, "step": 21102 }, { "epoch": 69.19016393442622, "grad_norm": 2.2859480381011963, "learning_rate": 4.578941533830028e-06, "loss": 0.0795, "step": 21103 }, { "epoch": 69.19344262295083, "grad_norm": 3.0654983520507812, "learning_rate": 4.57804924991669e-06, "loss": 0.1086, "step": 21104 }, { "epoch": 69.19672131147541, "grad_norm": 2.5706088542938232, "learning_rate": 4.57715702714148e-06, "loss": 0.2131, "step": 21105 }, { "epoch": 69.2, "grad_norm": 3.440256357192993, "learning_rate": 4.576264865514467e-06, "loss": 0.2495, "step": 21106 }, { "epoch": 69.20327868852459, "grad_norm": 2.082143545150757, "learning_rate": 4.575372765045707e-06, "loss": 0.1289, "step": 21107 }, { "epoch": 69.20655737704918, "grad_norm": 2.959829807281494, "learning_rate": 4.574480725745258e-06, "loss": 0.0833, "step": 21108 }, { "epoch": 69.20983606557377, "grad_norm": 2.7855873107910156, "learning_rate": 4.573588747623178e-06, "loss": 0.1504, "step": 21109 }, { "epoch": 69.21311475409836, "grad_norm": 2.133354663848877, "learning_rate": 4.5726968306895306e-06, "loss": 0.0427, "step": 21110 }, { "epoch": 69.21639344262294, "grad_norm": 2.7001445293426514, "learning_rate": 4.571804974954368e-06, "loss": 0.1542, "step": 21111 }, { "epoch": 69.21967213114755, "grad_norm": 3.074251890182495, "learning_rate": 4.570913180427749e-06, "loss": 0.1692, "step": 21112 }, { "epoch": 69.22295081967214, "grad_norm": 2.9185543060302734, "learning_rate": 4.570021447119729e-06, "loss": 0.0794, "step": 21113 }, { "epoch": 69.22622950819672, "grad_norm": 2.193986654281616, "learning_rate": 4.569129775040359e-06, "loss": 0.1922, "step": 21114 }, { "epoch": 69.22950819672131, "grad_norm": 2.895172119140625, "learning_rate": 4.568238164199701e-06, "loss": 0.0877, "step": 21115 }, { "epoch": 69.2327868852459, "grad_norm": 2.311307430267334, "learning_rate": 4.567346614607805e-06, "loss": 0.0581, "step": 21116 }, { "epoch": 69.23606557377049, "grad_norm": 2.8082988262176514, "learning_rate": 4.5664551262747244e-06, "loss": 0.1032, "step": 21117 }, { "epoch": 69.23934426229508, "grad_norm": 3.0696401596069336, "learning_rate": 4.565563699210509e-06, "loss": 0.1805, "step": 21118 }, { "epoch": 69.24262295081967, "grad_norm": 2.7244997024536133, "learning_rate": 4.5646723334252165e-06, "loss": 0.1939, "step": 21119 }, { "epoch": 69.24590163934427, "grad_norm": 2.8406872749328613, "learning_rate": 4.563781028928894e-06, "loss": 0.0993, "step": 21120 }, { "epoch": 69.24918032786886, "grad_norm": 2.680204153060913, "learning_rate": 4.562889785731594e-06, "loss": 0.0925, "step": 21121 }, { "epoch": 69.25245901639344, "grad_norm": 2.5195415019989014, "learning_rate": 4.561998603843359e-06, "loss": 0.2455, "step": 21122 }, { "epoch": 69.25573770491803, "grad_norm": 3.5643153190612793, "learning_rate": 4.561107483274249e-06, "loss": 0.119, "step": 21123 }, { "epoch": 69.25901639344262, "grad_norm": 3.6960394382476807, "learning_rate": 4.5602164240343085e-06, "loss": 0.2688, "step": 21124 }, { "epoch": 69.26229508196721, "grad_norm": 2.0862667560577393, "learning_rate": 4.5593254261335816e-06, "loss": 0.1823, "step": 21125 }, { "epoch": 69.2655737704918, "grad_norm": 2.8688204288482666, "learning_rate": 4.558434489582119e-06, "loss": 0.3458, "step": 21126 }, { "epoch": 69.26885245901639, "grad_norm": 2.7325592041015625, "learning_rate": 4.557543614389961e-06, "loss": 0.2301, "step": 21127 }, { "epoch": 69.27213114754099, "grad_norm": 2.6368818283081055, "learning_rate": 4.55665280056716e-06, "loss": 0.3081, "step": 21128 }, { "epoch": 69.27540983606558, "grad_norm": 2.370151996612549, "learning_rate": 4.555762048123761e-06, "loss": 0.2132, "step": 21129 }, { "epoch": 69.27868852459017, "grad_norm": 2.2294692993164062, "learning_rate": 4.5548713570698e-06, "loss": 0.1161, "step": 21130 }, { "epoch": 69.28196721311475, "grad_norm": 2.721254825592041, "learning_rate": 4.553980727415329e-06, "loss": 0.1233, "step": 21131 }, { "epoch": 69.28524590163934, "grad_norm": 3.3997669219970703, "learning_rate": 4.553090159170389e-06, "loss": 0.1751, "step": 21132 }, { "epoch": 69.28852459016393, "grad_norm": 2.636094331741333, "learning_rate": 4.552199652345016e-06, "loss": 0.0824, "step": 21133 }, { "epoch": 69.29180327868852, "grad_norm": 2.7628965377807617, "learning_rate": 4.551309206949262e-06, "loss": 0.1485, "step": 21134 }, { "epoch": 69.29508196721312, "grad_norm": 2.1635046005249023, "learning_rate": 4.55041882299316e-06, "loss": 0.1193, "step": 21135 }, { "epoch": 69.29836065573771, "grad_norm": 2.857417583465576, "learning_rate": 4.549528500486753e-06, "loss": 0.1073, "step": 21136 }, { "epoch": 69.3016393442623, "grad_norm": 3.777012586593628, "learning_rate": 4.548638239440074e-06, "loss": 0.1013, "step": 21137 }, { "epoch": 69.30491803278689, "grad_norm": 2.293687343597412, "learning_rate": 4.547748039863172e-06, "loss": 0.1119, "step": 21138 }, { "epoch": 69.30819672131148, "grad_norm": 4.379242897033691, "learning_rate": 4.5468579017660796e-06, "loss": 0.2445, "step": 21139 }, { "epoch": 69.31147540983606, "grad_norm": 2.95583438873291, "learning_rate": 4.54596782515883e-06, "loss": 0.326, "step": 21140 }, { "epoch": 69.31475409836065, "grad_norm": 2.632298231124878, "learning_rate": 4.545077810051468e-06, "loss": 0.1034, "step": 21141 }, { "epoch": 69.31803278688524, "grad_norm": 2.078164577484131, "learning_rate": 4.544187856454025e-06, "loss": 0.1283, "step": 21142 }, { "epoch": 69.32131147540984, "grad_norm": 2.885538339614868, "learning_rate": 4.5432979643765375e-06, "loss": 0.1529, "step": 21143 }, { "epoch": 69.32459016393443, "grad_norm": 2.4104483127593994, "learning_rate": 4.542408133829034e-06, "loss": 0.0887, "step": 21144 }, { "epoch": 69.32786885245902, "grad_norm": 1.9263696670532227, "learning_rate": 4.541518364821557e-06, "loss": 0.1097, "step": 21145 }, { "epoch": 69.33114754098361, "grad_norm": 2.341520071029663, "learning_rate": 4.540628657364136e-06, "loss": 0.0719, "step": 21146 }, { "epoch": 69.3344262295082, "grad_norm": 2.467411994934082, "learning_rate": 4.539739011466805e-06, "loss": 0.1311, "step": 21147 }, { "epoch": 69.33770491803278, "grad_norm": 2.252575159072876, "learning_rate": 4.538849427139588e-06, "loss": 0.0945, "step": 21148 }, { "epoch": 69.34098360655737, "grad_norm": 3.286000967025757, "learning_rate": 4.537959904392527e-06, "loss": 0.1692, "step": 21149 }, { "epoch": 69.34426229508196, "grad_norm": 2.453683614730835, "learning_rate": 4.537070443235646e-06, "loss": 0.1709, "step": 21150 }, { "epoch": 69.34754098360656, "grad_norm": 2.6820085048675537, "learning_rate": 4.536181043678976e-06, "loss": 0.154, "step": 21151 }, { "epoch": 69.35081967213115, "grad_norm": 2.769946336746216, "learning_rate": 4.535291705732547e-06, "loss": 0.202, "step": 21152 }, { "epoch": 69.35409836065574, "grad_norm": 2.41681170463562, "learning_rate": 4.5344024294063805e-06, "loss": 0.208, "step": 21153 }, { "epoch": 69.35737704918033, "grad_norm": 2.5793192386627197, "learning_rate": 4.533513214710515e-06, "loss": 0.088, "step": 21154 }, { "epoch": 69.36065573770492, "grad_norm": 2.5210883617401123, "learning_rate": 4.53262406165497e-06, "loss": 0.0788, "step": 21155 }, { "epoch": 69.3639344262295, "grad_norm": 3.3537042140960693, "learning_rate": 4.531734970249774e-06, "loss": 0.1831, "step": 21156 }, { "epoch": 69.3672131147541, "grad_norm": 2.789363145828247, "learning_rate": 4.530845940504948e-06, "loss": 0.218, "step": 21157 }, { "epoch": 69.37049180327868, "grad_norm": 2.8647310733795166, "learning_rate": 4.529956972430524e-06, "loss": 0.1666, "step": 21158 }, { "epoch": 69.37377049180328, "grad_norm": 2.5169548988342285, "learning_rate": 4.529068066036523e-06, "loss": 0.1066, "step": 21159 }, { "epoch": 69.37704918032787, "grad_norm": 3.885288715362549, "learning_rate": 4.528179221332967e-06, "loss": 0.171, "step": 21160 }, { "epoch": 69.38032786885246, "grad_norm": 2.878718137741089, "learning_rate": 4.527290438329876e-06, "loss": 0.0748, "step": 21161 }, { "epoch": 69.38360655737705, "grad_norm": 2.4177472591400146, "learning_rate": 4.52640171703728e-06, "loss": 0.0958, "step": 21162 }, { "epoch": 69.38688524590164, "grad_norm": 3.7170937061309814, "learning_rate": 4.525513057465195e-06, "loss": 0.1425, "step": 21163 }, { "epoch": 69.39016393442623, "grad_norm": 3.176111936569214, "learning_rate": 4.524624459623643e-06, "loss": 0.145, "step": 21164 }, { "epoch": 69.39344262295081, "grad_norm": 2.057769298553467, "learning_rate": 4.5237359235226434e-06, "loss": 0.0738, "step": 21165 }, { "epoch": 69.3967213114754, "grad_norm": 2.699876070022583, "learning_rate": 4.522847449172211e-06, "loss": 0.1005, "step": 21166 }, { "epoch": 69.4, "grad_norm": 3.232869863510132, "learning_rate": 4.521959036582372e-06, "loss": 0.3586, "step": 21167 }, { "epoch": 69.4032786885246, "grad_norm": 2.535982370376587, "learning_rate": 4.52107068576314e-06, "loss": 0.1046, "step": 21168 }, { "epoch": 69.40655737704918, "grad_norm": 2.918271541595459, "learning_rate": 4.520182396724534e-06, "loss": 0.0456, "step": 21169 }, { "epoch": 69.40983606557377, "grad_norm": 2.8294837474823, "learning_rate": 4.519294169476565e-06, "loss": 0.1017, "step": 21170 }, { "epoch": 69.41311475409836, "grad_norm": 3.0811967849731445, "learning_rate": 4.518406004029256e-06, "loss": 0.1371, "step": 21171 }, { "epoch": 69.41639344262295, "grad_norm": 2.935166835784912, "learning_rate": 4.517517900392618e-06, "loss": 0.0873, "step": 21172 }, { "epoch": 69.41967213114754, "grad_norm": 3.1682212352752686, "learning_rate": 4.516629858576667e-06, "loss": 0.1071, "step": 21173 }, { "epoch": 69.42295081967212, "grad_norm": 2.3903939723968506, "learning_rate": 4.515741878591413e-06, "loss": 0.2884, "step": 21174 }, { "epoch": 69.42622950819673, "grad_norm": 2.4544732570648193, "learning_rate": 4.514853960446873e-06, "loss": 0.098, "step": 21175 }, { "epoch": 69.42950819672132, "grad_norm": 2.594111919403076, "learning_rate": 4.513966104153059e-06, "loss": 0.1128, "step": 21176 }, { "epoch": 69.4327868852459, "grad_norm": 2.4849090576171875, "learning_rate": 4.513078309719981e-06, "loss": 0.0846, "step": 21177 }, { "epoch": 69.43606557377049, "grad_norm": 3.3045945167541504, "learning_rate": 4.51219057715765e-06, "loss": 0.1945, "step": 21178 }, { "epoch": 69.43934426229508, "grad_norm": 2.9421133995056152, "learning_rate": 4.511302906476073e-06, "loss": 0.26, "step": 21179 }, { "epoch": 69.44262295081967, "grad_norm": 2.110388994216919, "learning_rate": 4.510415297685266e-06, "loss": 0.0626, "step": 21180 }, { "epoch": 69.44590163934426, "grad_norm": 2.511258125305176, "learning_rate": 4.509527750795234e-06, "loss": 0.1102, "step": 21181 }, { "epoch": 69.44918032786886, "grad_norm": 2.798309087753296, "learning_rate": 4.508640265815984e-06, "loss": 0.1772, "step": 21182 }, { "epoch": 69.45245901639345, "grad_norm": 3.5223147869110107, "learning_rate": 4.507752842757524e-06, "loss": 0.1668, "step": 21183 }, { "epoch": 69.45573770491804, "grad_norm": 3.127044200897217, "learning_rate": 4.506865481629862e-06, "loss": 0.242, "step": 21184 }, { "epoch": 69.45901639344262, "grad_norm": 3.0458202362060547, "learning_rate": 4.505978182443004e-06, "loss": 0.1391, "step": 21185 }, { "epoch": 69.46229508196721, "grad_norm": 2.740783929824829, "learning_rate": 4.505090945206951e-06, "loss": 0.14, "step": 21186 }, { "epoch": 69.4655737704918, "grad_norm": 1.7720378637313843, "learning_rate": 4.504203769931715e-06, "loss": 0.039, "step": 21187 }, { "epoch": 69.46885245901639, "grad_norm": 1.977089285850525, "learning_rate": 4.503316656627295e-06, "loss": 0.0932, "step": 21188 }, { "epoch": 69.47213114754098, "grad_norm": 2.8155393600463867, "learning_rate": 4.502429605303691e-06, "loss": 0.1315, "step": 21189 }, { "epoch": 69.47540983606558, "grad_norm": 3.084777593612671, "learning_rate": 4.501542615970913e-06, "loss": 0.1948, "step": 21190 }, { "epoch": 69.47868852459017, "grad_norm": 2.4377434253692627, "learning_rate": 4.500655688638959e-06, "loss": 0.1101, "step": 21191 }, { "epoch": 69.48196721311476, "grad_norm": 2.418109893798828, "learning_rate": 4.499768823317826e-06, "loss": 0.3212, "step": 21192 }, { "epoch": 69.48524590163935, "grad_norm": 3.0316717624664307, "learning_rate": 4.498882020017522e-06, "loss": 0.1116, "step": 21193 }, { "epoch": 69.48852459016393, "grad_norm": 2.7906527519226074, "learning_rate": 4.4979952787480444e-06, "loss": 0.0698, "step": 21194 }, { "epoch": 69.49180327868852, "grad_norm": 2.9306821823120117, "learning_rate": 4.497108599519389e-06, "loss": 0.2561, "step": 21195 }, { "epoch": 69.49508196721311, "grad_norm": 2.8526554107666016, "learning_rate": 4.496221982341553e-06, "loss": 0.1785, "step": 21196 }, { "epoch": 69.4983606557377, "grad_norm": 4.314338684082031, "learning_rate": 4.49533542722454e-06, "loss": 0.1453, "step": 21197 }, { "epoch": 69.5016393442623, "grad_norm": 3.130990743637085, "learning_rate": 4.494448934178344e-06, "loss": 0.1116, "step": 21198 }, { "epoch": 69.50491803278689, "grad_norm": 3.0626320838928223, "learning_rate": 4.493562503212959e-06, "loss": 0.2221, "step": 21199 }, { "epoch": 69.50819672131148, "grad_norm": 2.3731019496917725, "learning_rate": 4.49267613433838e-06, "loss": 0.0925, "step": 21200 }, { "epoch": 69.51147540983607, "grad_norm": 2.494107961654663, "learning_rate": 4.491789827564606e-06, "loss": 0.0711, "step": 21201 }, { "epoch": 69.51475409836065, "grad_norm": 2.2143335342407227, "learning_rate": 4.49090358290163e-06, "loss": 0.1749, "step": 21202 }, { "epoch": 69.51803278688524, "grad_norm": 3.3522415161132812, "learning_rate": 4.490017400359444e-06, "loss": 0.1697, "step": 21203 }, { "epoch": 69.52131147540983, "grad_norm": 2.7641987800598145, "learning_rate": 4.48913127994804e-06, "loss": 0.1419, "step": 21204 }, { "epoch": 69.52459016393442, "grad_norm": 2.9235517978668213, "learning_rate": 4.488245221677409e-06, "loss": 0.3091, "step": 21205 }, { "epoch": 69.52786885245902, "grad_norm": 2.214060068130493, "learning_rate": 4.487359225557545e-06, "loss": 0.1033, "step": 21206 }, { "epoch": 69.53114754098361, "grad_norm": 2.6801223754882812, "learning_rate": 4.486473291598439e-06, "loss": 0.0858, "step": 21207 }, { "epoch": 69.5344262295082, "grad_norm": 3.9845573902130127, "learning_rate": 4.485587419810079e-06, "loss": 0.1169, "step": 21208 }, { "epoch": 69.53770491803279, "grad_norm": 3.892359495162964, "learning_rate": 4.4847016102024495e-06, "loss": 0.2021, "step": 21209 }, { "epoch": 69.54098360655738, "grad_norm": 20.493833541870117, "learning_rate": 4.483815862785549e-06, "loss": 0.129, "step": 21210 }, { "epoch": 69.54426229508196, "grad_norm": 3.304053783416748, "learning_rate": 4.48293017756936e-06, "loss": 0.1323, "step": 21211 }, { "epoch": 69.54754098360655, "grad_norm": 2.780855178833008, "learning_rate": 4.482044554563869e-06, "loss": 0.2082, "step": 21212 }, { "epoch": 69.55081967213114, "grad_norm": 2.488563060760498, "learning_rate": 4.481158993779059e-06, "loss": 0.1898, "step": 21213 }, { "epoch": 69.55409836065574, "grad_norm": 2.8991527557373047, "learning_rate": 4.480273495224924e-06, "loss": 0.123, "step": 21214 }, { "epoch": 69.55737704918033, "grad_norm": 2.503105878829956, "learning_rate": 4.479388058911443e-06, "loss": 0.0868, "step": 21215 }, { "epoch": 69.56065573770492, "grad_norm": 3.3138787746429443, "learning_rate": 4.478502684848604e-06, "loss": 0.1857, "step": 21216 }, { "epoch": 69.56393442622951, "grad_norm": 2.680394411087036, "learning_rate": 4.4776173730463864e-06, "loss": 0.1512, "step": 21217 }, { "epoch": 69.5672131147541, "grad_norm": 2.1626763343811035, "learning_rate": 4.476732123514771e-06, "loss": 0.0737, "step": 21218 }, { "epoch": 69.57049180327868, "grad_norm": 2.747079610824585, "learning_rate": 4.475846936263748e-06, "loss": 0.0675, "step": 21219 }, { "epoch": 69.57377049180327, "grad_norm": 2.101644515991211, "learning_rate": 4.4749618113032945e-06, "loss": 0.0496, "step": 21220 }, { "epoch": 69.57704918032788, "grad_norm": 2.7846899032592773, "learning_rate": 4.474076748643391e-06, "loss": 0.0738, "step": 21221 }, { "epoch": 69.58032786885246, "grad_norm": 3.185692548751831, "learning_rate": 4.4731917482940135e-06, "loss": 0.079, "step": 21222 }, { "epoch": 69.58360655737705, "grad_norm": 2.3054442405700684, "learning_rate": 4.47230681026515e-06, "loss": 0.217, "step": 21223 }, { "epoch": 69.58688524590164, "grad_norm": 2.8383214473724365, "learning_rate": 4.4714219345667745e-06, "loss": 0.1397, "step": 21224 }, { "epoch": 69.59016393442623, "grad_norm": 2.8513784408569336, "learning_rate": 4.470537121208864e-06, "loss": 0.1285, "step": 21225 }, { "epoch": 69.59344262295082, "grad_norm": 7.114254951477051, "learning_rate": 4.4696523702013935e-06, "loss": 0.0927, "step": 21226 }, { "epoch": 69.5967213114754, "grad_norm": 2.750359535217285, "learning_rate": 4.468767681554347e-06, "loss": 0.16, "step": 21227 }, { "epoch": 69.6, "grad_norm": 2.77219295501709, "learning_rate": 4.467883055277696e-06, "loss": 0.1092, "step": 21228 }, { "epoch": 69.6032786885246, "grad_norm": 2.583021402359009, "learning_rate": 4.466998491381413e-06, "loss": 0.0584, "step": 21229 }, { "epoch": 69.60655737704919, "grad_norm": 2.440962076187134, "learning_rate": 4.466113989875478e-06, "loss": 0.1805, "step": 21230 }, { "epoch": 69.60983606557377, "grad_norm": 2.5491139888763428, "learning_rate": 4.465229550769856e-06, "loss": 0.0965, "step": 21231 }, { "epoch": 69.61311475409836, "grad_norm": 2.330148935317993, "learning_rate": 4.46434517407453e-06, "loss": 0.0597, "step": 21232 }, { "epoch": 69.61639344262295, "grad_norm": 2.464974880218506, "learning_rate": 4.463460859799468e-06, "loss": 0.0756, "step": 21233 }, { "epoch": 69.61967213114754, "grad_norm": 3.7069807052612305, "learning_rate": 4.462576607954641e-06, "loss": 0.1585, "step": 21234 }, { "epoch": 69.62295081967213, "grad_norm": 2.924229621887207, "learning_rate": 4.461692418550017e-06, "loss": 0.2046, "step": 21235 }, { "epoch": 69.62622950819672, "grad_norm": 3.106898784637451, "learning_rate": 4.460808291595572e-06, "loss": 0.2817, "step": 21236 }, { "epoch": 69.62950819672132, "grad_norm": 2.6059279441833496, "learning_rate": 4.459924227101273e-06, "loss": 0.1121, "step": 21237 }, { "epoch": 69.6327868852459, "grad_norm": 1.9120429754257202, "learning_rate": 4.459040225077086e-06, "loss": 0.0924, "step": 21238 }, { "epoch": 69.6360655737705, "grad_norm": 2.022141695022583, "learning_rate": 4.458156285532984e-06, "loss": 0.2526, "step": 21239 }, { "epoch": 69.63934426229508, "grad_norm": 4.685107231140137, "learning_rate": 4.457272408478933e-06, "loss": 0.1398, "step": 21240 }, { "epoch": 69.64262295081967, "grad_norm": 2.758972644805908, "learning_rate": 4.456388593924897e-06, "loss": 0.1552, "step": 21241 }, { "epoch": 69.64590163934426, "grad_norm": 2.0598745346069336, "learning_rate": 4.455504841880842e-06, "loss": 0.1416, "step": 21242 }, { "epoch": 69.64918032786885, "grad_norm": 2.281113386154175, "learning_rate": 4.454621152356737e-06, "loss": 0.0906, "step": 21243 }, { "epoch": 69.65245901639344, "grad_norm": 2.316019296646118, "learning_rate": 4.453737525362544e-06, "loss": 0.0702, "step": 21244 }, { "epoch": 69.65573770491804, "grad_norm": 3.3874828815460205, "learning_rate": 4.452853960908224e-06, "loss": 0.197, "step": 21245 }, { "epoch": 69.65901639344263, "grad_norm": 3.124506711959839, "learning_rate": 4.4519704590037485e-06, "loss": 0.1265, "step": 21246 }, { "epoch": 69.66229508196722, "grad_norm": 2.1790707111358643, "learning_rate": 4.451087019659073e-06, "loss": 0.174, "step": 21247 }, { "epoch": 69.6655737704918, "grad_norm": 1.966132640838623, "learning_rate": 4.450203642884156e-06, "loss": 0.0488, "step": 21248 }, { "epoch": 69.66885245901639, "grad_norm": 3.4178338050842285, "learning_rate": 4.449320328688969e-06, "loss": 0.2156, "step": 21249 }, { "epoch": 69.67213114754098, "grad_norm": 2.5010969638824463, "learning_rate": 4.448437077083465e-06, "loss": 0.0831, "step": 21250 }, { "epoch": 69.67540983606557, "grad_norm": 2.9574201107025146, "learning_rate": 4.447553888077606e-06, "loss": 0.0877, "step": 21251 }, { "epoch": 69.67868852459016, "grad_norm": 2.7573435306549072, "learning_rate": 4.446670761681345e-06, "loss": 0.1127, "step": 21252 }, { "epoch": 69.68196721311476, "grad_norm": 2.963662624359131, "learning_rate": 4.445787697904651e-06, "loss": 0.2494, "step": 21253 }, { "epoch": 69.68524590163935, "grad_norm": 2.516340732574463, "learning_rate": 4.444904696757474e-06, "loss": 0.0848, "step": 21254 }, { "epoch": 69.68852459016394, "grad_norm": 3.3380508422851562, "learning_rate": 4.444021758249774e-06, "loss": 0.2475, "step": 21255 }, { "epoch": 69.69180327868852, "grad_norm": 3.087597608566284, "learning_rate": 4.443138882391503e-06, "loss": 0.2425, "step": 21256 }, { "epoch": 69.69508196721311, "grad_norm": 3.0660722255706787, "learning_rate": 4.442256069192617e-06, "loss": 0.1853, "step": 21257 }, { "epoch": 69.6983606557377, "grad_norm": 5.698749542236328, "learning_rate": 4.4413733186630755e-06, "loss": 0.2866, "step": 21258 }, { "epoch": 69.70163934426229, "grad_norm": 2.2954671382904053, "learning_rate": 4.440490630812829e-06, "loss": 0.0527, "step": 21259 }, { "epoch": 69.70491803278688, "grad_norm": 2.332080125808716, "learning_rate": 4.439608005651832e-06, "loss": 0.1902, "step": 21260 }, { "epoch": 69.70819672131148, "grad_norm": 3.5163447856903076, "learning_rate": 4.4387254431900314e-06, "loss": 0.2084, "step": 21261 }, { "epoch": 69.71147540983607, "grad_norm": 2.800311803817749, "learning_rate": 4.437842943437389e-06, "loss": 0.1397, "step": 21262 }, { "epoch": 69.71475409836066, "grad_norm": 2.6869313716888428, "learning_rate": 4.436960506403849e-06, "loss": 0.2238, "step": 21263 }, { "epoch": 69.71803278688525, "grad_norm": 3.1000816822052, "learning_rate": 4.436078132099364e-06, "loss": 0.1695, "step": 21264 }, { "epoch": 69.72131147540983, "grad_norm": 2.5858869552612305, "learning_rate": 4.43519582053388e-06, "loss": 0.0861, "step": 21265 }, { "epoch": 69.72459016393442, "grad_norm": 2.444024085998535, "learning_rate": 4.434313571717352e-06, "loss": 0.0765, "step": 21266 }, { "epoch": 69.72786885245901, "grad_norm": 3.058161735534668, "learning_rate": 4.433431385659726e-06, "loss": 0.0768, "step": 21267 }, { "epoch": 69.73114754098361, "grad_norm": 2.8524832725524902, "learning_rate": 4.432549262370948e-06, "loss": 0.2199, "step": 21268 }, { "epoch": 69.7344262295082, "grad_norm": 2.23358154296875, "learning_rate": 4.431667201860965e-06, "loss": 0.1733, "step": 21269 }, { "epoch": 69.73770491803279, "grad_norm": 2.6146838665008545, "learning_rate": 4.430785204139722e-06, "loss": 0.1934, "step": 21270 }, { "epoch": 69.74098360655738, "grad_norm": 2.727254867553711, "learning_rate": 4.4299032692171695e-06, "loss": 0.1442, "step": 21271 }, { "epoch": 69.74426229508197, "grad_norm": 2.9101293087005615, "learning_rate": 4.429021397103249e-06, "loss": 0.1685, "step": 21272 }, { "epoch": 69.74754098360656, "grad_norm": 2.275554656982422, "learning_rate": 4.428139587807905e-06, "loss": 0.1204, "step": 21273 }, { "epoch": 69.75081967213114, "grad_norm": 3.044463872909546, "learning_rate": 4.427257841341076e-06, "loss": 0.1189, "step": 21274 }, { "epoch": 69.75409836065573, "grad_norm": 3.0102455615997314, "learning_rate": 4.426376157712713e-06, "loss": 0.1713, "step": 21275 }, { "epoch": 69.75737704918033, "grad_norm": 2.6914970874786377, "learning_rate": 4.425494536932754e-06, "loss": 0.1485, "step": 21276 }, { "epoch": 69.76065573770492, "grad_norm": 2.54803204536438, "learning_rate": 4.42461297901114e-06, "loss": 0.0971, "step": 21277 }, { "epoch": 69.76393442622951, "grad_norm": 2.070391893386841, "learning_rate": 4.423731483957808e-06, "loss": 0.1717, "step": 21278 }, { "epoch": 69.7672131147541, "grad_norm": 2.580709934234619, "learning_rate": 4.422850051782706e-06, "loss": 0.1231, "step": 21279 }, { "epoch": 69.77049180327869, "grad_norm": 2.197080612182617, "learning_rate": 4.4219686824957666e-06, "loss": 0.1666, "step": 21280 }, { "epoch": 69.77377049180328, "grad_norm": 4.816821575164795, "learning_rate": 4.421087376106931e-06, "loss": 0.1427, "step": 21281 }, { "epoch": 69.77704918032786, "grad_norm": 3.247131109237671, "learning_rate": 4.4202061326261355e-06, "loss": 0.3371, "step": 21282 }, { "epoch": 69.78032786885245, "grad_norm": 3.1042022705078125, "learning_rate": 4.419324952063314e-06, "loss": 0.2443, "step": 21283 }, { "epoch": 69.78360655737706, "grad_norm": 2.7491812705993652, "learning_rate": 4.41844383442841e-06, "loss": 0.0717, "step": 21284 }, { "epoch": 69.78688524590164, "grad_norm": 3.2101128101348877, "learning_rate": 4.417562779731355e-06, "loss": 0.2214, "step": 21285 }, { "epoch": 69.79016393442623, "grad_norm": 2.766878604888916, "learning_rate": 4.416681787982084e-06, "loss": 0.0908, "step": 21286 }, { "epoch": 69.79344262295082, "grad_norm": 2.264232873916626, "learning_rate": 4.415800859190527e-06, "loss": 0.1257, "step": 21287 }, { "epoch": 69.79672131147541, "grad_norm": 4.880668640136719, "learning_rate": 4.414919993366625e-06, "loss": 0.1951, "step": 21288 }, { "epoch": 69.8, "grad_norm": 2.1745073795318604, "learning_rate": 4.414039190520308e-06, "loss": 0.1244, "step": 21289 }, { "epoch": 69.80327868852459, "grad_norm": 2.6812686920166016, "learning_rate": 4.413158450661507e-06, "loss": 0.0786, "step": 21290 }, { "epoch": 69.80655737704917, "grad_norm": 2.3471407890319824, "learning_rate": 4.412277773800149e-06, "loss": 0.0716, "step": 21291 }, { "epoch": 69.80983606557378, "grad_norm": 2.2138123512268066, "learning_rate": 4.411397159946172e-06, "loss": 0.1368, "step": 21292 }, { "epoch": 69.81311475409836, "grad_norm": 2.8303911685943604, "learning_rate": 4.410516609109504e-06, "loss": 0.1871, "step": 21293 }, { "epoch": 69.81639344262295, "grad_norm": 2.897162437438965, "learning_rate": 4.409636121300068e-06, "loss": 0.0968, "step": 21294 }, { "epoch": 69.81967213114754, "grad_norm": 2.497418165206909, "learning_rate": 4.408755696527803e-06, "loss": 0.1667, "step": 21295 }, { "epoch": 69.82295081967213, "grad_norm": 3.2008121013641357, "learning_rate": 4.407875334802628e-06, "loss": 0.0935, "step": 21296 }, { "epoch": 69.82622950819672, "grad_norm": 2.3883323669433594, "learning_rate": 4.406995036134471e-06, "loss": 0.0639, "step": 21297 }, { "epoch": 69.8295081967213, "grad_norm": 3.5892813205718994, "learning_rate": 4.4061148005332635e-06, "loss": 0.1741, "step": 21298 }, { "epoch": 69.8327868852459, "grad_norm": 2.4084103107452393, "learning_rate": 4.405234628008929e-06, "loss": 0.0543, "step": 21299 }, { "epoch": 69.8360655737705, "grad_norm": 2.984314441680908, "learning_rate": 4.40435451857139e-06, "loss": 0.113, "step": 21300 }, { "epoch": 69.83934426229509, "grad_norm": 3.360751152038574, "learning_rate": 4.4034744722305674e-06, "loss": 0.1704, "step": 21301 }, { "epoch": 69.84262295081967, "grad_norm": 2.5809271335601807, "learning_rate": 4.402594488996393e-06, "loss": 0.1361, "step": 21302 }, { "epoch": 69.84590163934426, "grad_norm": 2.2997045516967773, "learning_rate": 4.4017145688787866e-06, "loss": 0.1086, "step": 21303 }, { "epoch": 69.84918032786885, "grad_norm": 3.140622138977051, "learning_rate": 4.400834711887669e-06, "loss": 0.1369, "step": 21304 }, { "epoch": 69.85245901639344, "grad_norm": 3.1067161560058594, "learning_rate": 4.3999549180329584e-06, "loss": 0.1038, "step": 21305 }, { "epoch": 69.85573770491803, "grad_norm": 1.7263561487197876, "learning_rate": 4.399075187324582e-06, "loss": 0.0513, "step": 21306 }, { "epoch": 69.85901639344263, "grad_norm": 2.8710625171661377, "learning_rate": 4.398195519772456e-06, "loss": 0.1578, "step": 21307 }, { "epoch": 69.86229508196722, "grad_norm": 2.9229204654693604, "learning_rate": 4.3973159153865e-06, "loss": 0.0821, "step": 21308 }, { "epoch": 69.8655737704918, "grad_norm": 1.954785943031311, "learning_rate": 4.396436374176628e-06, "loss": 0.1613, "step": 21309 }, { "epoch": 69.8688524590164, "grad_norm": 3.648448944091797, "learning_rate": 4.395556896152767e-06, "loss": 0.1336, "step": 21310 }, { "epoch": 69.87213114754098, "grad_norm": 2.7664308547973633, "learning_rate": 4.394677481324828e-06, "loss": 0.3233, "step": 21311 }, { "epoch": 69.87540983606557, "grad_norm": 9.47138786315918, "learning_rate": 4.3937981297027295e-06, "loss": 0.2054, "step": 21312 }, { "epoch": 69.87868852459016, "grad_norm": 2.5190236568450928, "learning_rate": 4.392918841296382e-06, "loss": 0.1174, "step": 21313 }, { "epoch": 69.88196721311475, "grad_norm": 3.0997531414031982, "learning_rate": 4.392039616115708e-06, "loss": 0.2026, "step": 21314 }, { "epoch": 69.88524590163935, "grad_norm": 3.992520570755005, "learning_rate": 4.391160454170619e-06, "loss": 0.1721, "step": 21315 }, { "epoch": 69.88852459016394, "grad_norm": 3.1400551795959473, "learning_rate": 4.390281355471027e-06, "loss": 0.155, "step": 21316 }, { "epoch": 69.89180327868853, "grad_norm": 2.892745018005371, "learning_rate": 4.389402320026846e-06, "loss": 0.0775, "step": 21317 }, { "epoch": 69.89508196721312, "grad_norm": 2.1479172706604004, "learning_rate": 4.388523347847983e-06, "loss": 0.1114, "step": 21318 }, { "epoch": 69.8983606557377, "grad_norm": 2.8344836235046387, "learning_rate": 4.387644438944358e-06, "loss": 0.1458, "step": 21319 }, { "epoch": 69.90163934426229, "grad_norm": 1.9549338817596436, "learning_rate": 4.386765593325876e-06, "loss": 0.0528, "step": 21320 }, { "epoch": 69.90491803278688, "grad_norm": 2.749349355697632, "learning_rate": 4.385886811002449e-06, "loss": 0.3502, "step": 21321 }, { "epoch": 69.90819672131147, "grad_norm": 2.6619796752929688, "learning_rate": 4.385008091983981e-06, "loss": 0.1785, "step": 21322 }, { "epoch": 69.91147540983607, "grad_norm": 2.8441972732543945, "learning_rate": 4.38412943628039e-06, "loss": 0.1098, "step": 21323 }, { "epoch": 69.91475409836066, "grad_norm": 2.13057804107666, "learning_rate": 4.383250843901577e-06, "loss": 0.1304, "step": 21324 }, { "epoch": 69.91803278688525, "grad_norm": 3.0068323612213135, "learning_rate": 4.382372314857452e-06, "loss": 0.2263, "step": 21325 }, { "epoch": 69.92131147540984, "grad_norm": 2.922659397125244, "learning_rate": 4.381493849157916e-06, "loss": 0.109, "step": 21326 }, { "epoch": 69.92459016393443, "grad_norm": 2.201894760131836, "learning_rate": 4.380615446812883e-06, "loss": 0.0979, "step": 21327 }, { "epoch": 69.92786885245901, "grad_norm": 3.0252456665039062, "learning_rate": 4.379737107832253e-06, "loss": 0.0848, "step": 21328 }, { "epoch": 69.9311475409836, "grad_norm": 2.1814746856689453, "learning_rate": 4.378858832225931e-06, "loss": 0.1835, "step": 21329 }, { "epoch": 69.93442622950819, "grad_norm": 2.6807217597961426, "learning_rate": 4.377980620003819e-06, "loss": 0.1996, "step": 21330 }, { "epoch": 69.9377049180328, "grad_norm": 2.722400188446045, "learning_rate": 4.377102471175818e-06, "loss": 0.3517, "step": 21331 }, { "epoch": 69.94098360655738, "grad_norm": 2.265902042388916, "learning_rate": 4.376224385751837e-06, "loss": 0.053, "step": 21332 }, { "epoch": 69.94426229508197, "grad_norm": 3.408085346221924, "learning_rate": 4.375346363741772e-06, "loss": 0.2243, "step": 21333 }, { "epoch": 69.94754098360656, "grad_norm": 2.7179627418518066, "learning_rate": 4.374468405155528e-06, "loss": 0.2075, "step": 21334 }, { "epoch": 69.95081967213115, "grad_norm": 2.784986972808838, "learning_rate": 4.3735905100029956e-06, "loss": 0.1511, "step": 21335 }, { "epoch": 69.95409836065573, "grad_norm": 2.994673252105713, "learning_rate": 4.372712678294084e-06, "loss": 0.4216, "step": 21336 }, { "epoch": 69.95737704918032, "grad_norm": 2.882436752319336, "learning_rate": 4.371834910038688e-06, "loss": 0.1175, "step": 21337 }, { "epoch": 69.96065573770491, "grad_norm": 2.801042079925537, "learning_rate": 4.370957205246705e-06, "loss": 0.1093, "step": 21338 }, { "epoch": 69.96393442622951, "grad_norm": 2.9272379875183105, "learning_rate": 4.370079563928029e-06, "loss": 0.1866, "step": 21339 }, { "epoch": 69.9672131147541, "grad_norm": 3.9434962272644043, "learning_rate": 4.369201986092564e-06, "loss": 0.0628, "step": 21340 }, { "epoch": 69.97049180327869, "grad_norm": 3.044989824295044, "learning_rate": 4.368324471750201e-06, "loss": 0.1535, "step": 21341 }, { "epoch": 69.97377049180328, "grad_norm": 3.1759228706359863, "learning_rate": 4.367447020910833e-06, "loss": 0.0923, "step": 21342 }, { "epoch": 69.97704918032787, "grad_norm": 2.9621500968933105, "learning_rate": 4.366569633584359e-06, "loss": 0.1864, "step": 21343 }, { "epoch": 69.98032786885246, "grad_norm": 2.2701618671417236, "learning_rate": 4.365692309780664e-06, "loss": 0.088, "step": 21344 }, { "epoch": 69.98360655737704, "grad_norm": 3.453584909439087, "learning_rate": 4.364815049509651e-06, "loss": 0.1052, "step": 21345 }, { "epoch": 69.98688524590163, "grad_norm": 4.559345722198486, "learning_rate": 4.363937852781207e-06, "loss": 0.1101, "step": 21346 }, { "epoch": 69.99016393442623, "grad_norm": 3.247544050216675, "learning_rate": 4.363060719605221e-06, "loss": 0.2316, "step": 21347 }, { "epoch": 69.99344262295082, "grad_norm": 2.04417085647583, "learning_rate": 4.362183649991589e-06, "loss": 0.0973, "step": 21348 }, { "epoch": 69.99672131147541, "grad_norm": 2.6289730072021484, "learning_rate": 4.361306643950199e-06, "loss": 0.121, "step": 21349 }, { "epoch": 70.0, "grad_norm": 2.183363914489746, "learning_rate": 4.360429701490935e-06, "loss": 0.0343, "step": 21350 }, { "epoch": 70.00327868852459, "grad_norm": 3.014752149581909, "learning_rate": 4.359552822623693e-06, "loss": 0.0758, "step": 21351 }, { "epoch": 70.00655737704918, "grad_norm": 2.831887722015381, "learning_rate": 4.358676007358359e-06, "loss": 0.2558, "step": 21352 }, { "epoch": 70.00983606557377, "grad_norm": 3.2116308212280273, "learning_rate": 4.357799255704813e-06, "loss": 0.2664, "step": 21353 }, { "epoch": 70.01311475409837, "grad_norm": 2.6913604736328125, "learning_rate": 4.356922567672952e-06, "loss": 0.1382, "step": 21354 }, { "epoch": 70.01639344262296, "grad_norm": 2.2369446754455566, "learning_rate": 4.356045943272656e-06, "loss": 0.0807, "step": 21355 }, { "epoch": 70.01967213114754, "grad_norm": 3.0837953090667725, "learning_rate": 4.35516938251381e-06, "loss": 0.0944, "step": 21356 }, { "epoch": 70.02295081967213, "grad_norm": 3.013131856918335, "learning_rate": 4.354292885406295e-06, "loss": 0.1081, "step": 21357 }, { "epoch": 70.02622950819672, "grad_norm": 2.097832202911377, "learning_rate": 4.353416451960001e-06, "loss": 0.0712, "step": 21358 }, { "epoch": 70.02950819672131, "grad_norm": 2.7640769481658936, "learning_rate": 4.352540082184809e-06, "loss": 0.169, "step": 21359 }, { "epoch": 70.0327868852459, "grad_norm": 2.1403183937072754, "learning_rate": 4.351663776090598e-06, "loss": 0.2071, "step": 21360 }, { "epoch": 70.03606557377049, "grad_norm": 2.9465432167053223, "learning_rate": 4.3507875336872464e-06, "loss": 0.2838, "step": 21361 }, { "epoch": 70.03934426229509, "grad_norm": 3.286947727203369, "learning_rate": 4.3499113549846436e-06, "loss": 0.1709, "step": 21362 }, { "epoch": 70.04262295081968, "grad_norm": 2.6759769916534424, "learning_rate": 4.349035239992665e-06, "loss": 0.1167, "step": 21363 }, { "epoch": 70.04590163934427, "grad_norm": 2.901517391204834, "learning_rate": 4.348159188721189e-06, "loss": 0.0942, "step": 21364 }, { "epoch": 70.04918032786885, "grad_norm": 2.107624053955078, "learning_rate": 4.347283201180092e-06, "loss": 0.0666, "step": 21365 }, { "epoch": 70.05245901639344, "grad_norm": 2.7104921340942383, "learning_rate": 4.346407277379258e-06, "loss": 0.0886, "step": 21366 }, { "epoch": 70.05573770491803, "grad_norm": 2.54831862449646, "learning_rate": 4.345531417328559e-06, "loss": 0.0918, "step": 21367 }, { "epoch": 70.05901639344262, "grad_norm": 2.0367934703826904, "learning_rate": 4.344655621037873e-06, "loss": 0.1408, "step": 21368 }, { "epoch": 70.0622950819672, "grad_norm": 2.9223687648773193, "learning_rate": 4.343779888517074e-06, "loss": 0.1388, "step": 21369 }, { "epoch": 70.06557377049181, "grad_norm": 2.6587750911712646, "learning_rate": 4.342904219776036e-06, "loss": 0.2041, "step": 21370 }, { "epoch": 70.0688524590164, "grad_norm": 1.9131543636322021, "learning_rate": 4.342028614824637e-06, "loss": 0.0399, "step": 21371 }, { "epoch": 70.07213114754099, "grad_norm": 2.328911304473877, "learning_rate": 4.3411530736727495e-06, "loss": 0.1871, "step": 21372 }, { "epoch": 70.07540983606557, "grad_norm": 2.247328758239746, "learning_rate": 4.340277596330243e-06, "loss": 0.2393, "step": 21373 }, { "epoch": 70.07868852459016, "grad_norm": 2.445976972579956, "learning_rate": 4.33940218280699e-06, "loss": 0.1518, "step": 21374 }, { "epoch": 70.08196721311475, "grad_norm": 3.6159441471099854, "learning_rate": 4.338526833112865e-06, "loss": 0.1393, "step": 21375 }, { "epoch": 70.08524590163934, "grad_norm": 3.1109390258789062, "learning_rate": 4.3376515472577365e-06, "loss": 0.221, "step": 21376 }, { "epoch": 70.08852459016393, "grad_norm": 2.1198246479034424, "learning_rate": 4.336776325251474e-06, "loss": 0.0517, "step": 21377 }, { "epoch": 70.09180327868853, "grad_norm": 2.6259143352508545, "learning_rate": 4.335901167103943e-06, "loss": 0.1385, "step": 21378 }, { "epoch": 70.09508196721312, "grad_norm": 2.502290725708008, "learning_rate": 4.335026072825021e-06, "loss": 0.113, "step": 21379 }, { "epoch": 70.09836065573771, "grad_norm": 2.5677192211151123, "learning_rate": 4.3341510424245685e-06, "loss": 0.1304, "step": 21380 }, { "epoch": 70.1016393442623, "grad_norm": 3.609952211380005, "learning_rate": 4.333276075912454e-06, "loss": 0.1246, "step": 21381 }, { "epoch": 70.10491803278688, "grad_norm": 3.2894370555877686, "learning_rate": 4.332401173298544e-06, "loss": 0.2166, "step": 21382 }, { "epoch": 70.10819672131147, "grad_norm": 2.11557936668396, "learning_rate": 4.331526334592699e-06, "loss": 0.0561, "step": 21383 }, { "epoch": 70.11147540983606, "grad_norm": 2.946786642074585, "learning_rate": 4.330651559804792e-06, "loss": 0.1858, "step": 21384 }, { "epoch": 70.11475409836065, "grad_norm": 3.191237211227417, "learning_rate": 4.329776848944684e-06, "loss": 0.1077, "step": 21385 }, { "epoch": 70.11803278688525, "grad_norm": 2.581291913986206, "learning_rate": 4.328902202022238e-06, "loss": 0.1306, "step": 21386 }, { "epoch": 70.12131147540984, "grad_norm": 2.599182605743408, "learning_rate": 4.328027619047312e-06, "loss": 0.2662, "step": 21387 }, { "epoch": 70.12459016393443, "grad_norm": 2.2644004821777344, "learning_rate": 4.327153100029776e-06, "loss": 0.0799, "step": 21388 }, { "epoch": 70.12786885245902, "grad_norm": 2.4451589584350586, "learning_rate": 4.326278644979487e-06, "loss": 0.1743, "step": 21389 }, { "epoch": 70.1311475409836, "grad_norm": 2.2774674892425537, "learning_rate": 4.325404253906306e-06, "loss": 0.0731, "step": 21390 }, { "epoch": 70.1344262295082, "grad_norm": 3.1963441371917725, "learning_rate": 4.3245299268200865e-06, "loss": 0.0936, "step": 21391 }, { "epoch": 70.13770491803278, "grad_norm": 2.5274431705474854, "learning_rate": 4.323655663730699e-06, "loss": 0.2432, "step": 21392 }, { "epoch": 70.14098360655737, "grad_norm": 2.531853675842285, "learning_rate": 4.322781464647995e-06, "loss": 0.1054, "step": 21393 }, { "epoch": 70.14426229508197, "grad_norm": 2.9017887115478516, "learning_rate": 4.321907329581833e-06, "loss": 0.0627, "step": 21394 }, { "epoch": 70.14754098360656, "grad_norm": 8.655014991760254, "learning_rate": 4.321033258542068e-06, "loss": 0.0697, "step": 21395 }, { "epoch": 70.15081967213115, "grad_norm": 3.124884843826294, "learning_rate": 4.320159251538556e-06, "loss": 0.1565, "step": 21396 }, { "epoch": 70.15409836065574, "grad_norm": 2.518402576446533, "learning_rate": 4.319285308581156e-06, "loss": 0.0682, "step": 21397 }, { "epoch": 70.15737704918033, "grad_norm": 3.527111530303955, "learning_rate": 4.318411429679722e-06, "loss": 0.2443, "step": 21398 }, { "epoch": 70.16065573770491, "grad_norm": 2.6186091899871826, "learning_rate": 4.317537614844101e-06, "loss": 0.2285, "step": 21399 }, { "epoch": 70.1639344262295, "grad_norm": 2.695453643798828, "learning_rate": 4.316663864084158e-06, "loss": 0.2002, "step": 21400 }, { "epoch": 70.1672131147541, "grad_norm": 3.2443220615386963, "learning_rate": 4.3157901774097375e-06, "loss": 0.1865, "step": 21401 }, { "epoch": 70.1704918032787, "grad_norm": 2.7498080730438232, "learning_rate": 4.314916554830693e-06, "loss": 0.1554, "step": 21402 }, { "epoch": 70.17377049180328, "grad_norm": 3.127998113632202, "learning_rate": 4.314042996356872e-06, "loss": 0.19, "step": 21403 }, { "epoch": 70.17704918032787, "grad_norm": 2.354929208755493, "learning_rate": 4.313169501998132e-06, "loss": 0.0643, "step": 21404 }, { "epoch": 70.18032786885246, "grad_norm": 2.2339258193969727, "learning_rate": 4.312296071764318e-06, "loss": 0.049, "step": 21405 }, { "epoch": 70.18360655737705, "grad_norm": 2.834052085876465, "learning_rate": 4.311422705665275e-06, "loss": 0.1617, "step": 21406 }, { "epoch": 70.18688524590164, "grad_norm": 2.8090567588806152, "learning_rate": 4.310549403710862e-06, "loss": 0.0915, "step": 21407 }, { "epoch": 70.19016393442622, "grad_norm": 2.4408228397369385, "learning_rate": 4.309676165910917e-06, "loss": 0.1354, "step": 21408 }, { "epoch": 70.19344262295083, "grad_norm": 3.0957934856414795, "learning_rate": 4.308802992275288e-06, "loss": 0.1614, "step": 21409 }, { "epoch": 70.19672131147541, "grad_norm": 2.43930983543396, "learning_rate": 4.307929882813826e-06, "loss": 0.2579, "step": 21410 }, { "epoch": 70.2, "grad_norm": 2.9808003902435303, "learning_rate": 4.307056837536373e-06, "loss": 0.1772, "step": 21411 }, { "epoch": 70.20327868852459, "grad_norm": 4.11634635925293, "learning_rate": 4.306183856452772e-06, "loss": 0.1292, "step": 21412 }, { "epoch": 70.20655737704918, "grad_norm": 3.1439714431762695, "learning_rate": 4.305310939572866e-06, "loss": 0.2323, "step": 21413 }, { "epoch": 70.20983606557377, "grad_norm": 3.085444688796997, "learning_rate": 4.304438086906502e-06, "loss": 0.2706, "step": 21414 }, { "epoch": 70.21311475409836, "grad_norm": 3.2671494483947754, "learning_rate": 4.303565298463523e-06, "loss": 0.2179, "step": 21415 }, { "epoch": 70.21639344262294, "grad_norm": 2.745706081390381, "learning_rate": 4.302692574253766e-06, "loss": 0.0775, "step": 21416 }, { "epoch": 70.21967213114755, "grad_norm": 2.700897455215454, "learning_rate": 4.301819914287072e-06, "loss": 0.1216, "step": 21417 }, { "epoch": 70.22295081967214, "grad_norm": 2.6646130084991455, "learning_rate": 4.300947318573285e-06, "loss": 0.1956, "step": 21418 }, { "epoch": 70.22622950819672, "grad_norm": 3.182197332382202, "learning_rate": 4.300074787122244e-06, "loss": 0.1316, "step": 21419 }, { "epoch": 70.22950819672131, "grad_norm": 3.4751713275909424, "learning_rate": 4.299202319943785e-06, "loss": 0.1202, "step": 21420 }, { "epoch": 70.2327868852459, "grad_norm": 3.264181137084961, "learning_rate": 4.298329917047748e-06, "loss": 0.2457, "step": 21421 }, { "epoch": 70.23606557377049, "grad_norm": 2.869274616241455, "learning_rate": 4.2974575784439655e-06, "loss": 0.1946, "step": 21422 }, { "epoch": 70.23934426229508, "grad_norm": 2.8767642974853516, "learning_rate": 4.296585304142281e-06, "loss": 0.0866, "step": 21423 }, { "epoch": 70.24262295081967, "grad_norm": 2.828223943710327, "learning_rate": 4.295713094152528e-06, "loss": 0.1484, "step": 21424 }, { "epoch": 70.24590163934427, "grad_norm": 2.735532522201538, "learning_rate": 4.294840948484541e-06, "loss": 0.0515, "step": 21425 }, { "epoch": 70.24918032786886, "grad_norm": 2.483745813369751, "learning_rate": 4.293968867148149e-06, "loss": 0.2212, "step": 21426 }, { "epoch": 70.25245901639344, "grad_norm": 3.103485584259033, "learning_rate": 4.293096850153197e-06, "loss": 0.2022, "step": 21427 }, { "epoch": 70.25573770491803, "grad_norm": 2.7411985397338867, "learning_rate": 4.292224897509511e-06, "loss": 0.1981, "step": 21428 }, { "epoch": 70.25901639344262, "grad_norm": 3.2031283378601074, "learning_rate": 4.291353009226922e-06, "loss": 0.135, "step": 21429 }, { "epoch": 70.26229508196721, "grad_norm": 3.0986452102661133, "learning_rate": 4.29048118531526e-06, "loss": 0.2102, "step": 21430 }, { "epoch": 70.2655737704918, "grad_norm": 2.859337568283081, "learning_rate": 4.2896094257843645e-06, "loss": 0.1802, "step": 21431 }, { "epoch": 70.26885245901639, "grad_norm": 2.9342923164367676, "learning_rate": 4.288737730644059e-06, "loss": 0.0661, "step": 21432 }, { "epoch": 70.27213114754099, "grad_norm": 3.4324629306793213, "learning_rate": 4.287866099904175e-06, "loss": 0.2112, "step": 21433 }, { "epoch": 70.27540983606558, "grad_norm": 2.496206760406494, "learning_rate": 4.2869945335745375e-06, "loss": 0.1309, "step": 21434 }, { "epoch": 70.27868852459017, "grad_norm": 3.4960291385650635, "learning_rate": 4.286123031664975e-06, "loss": 0.0449, "step": 21435 }, { "epoch": 70.28196721311475, "grad_norm": 3.142036199569702, "learning_rate": 4.285251594185319e-06, "loss": 0.2291, "step": 21436 }, { "epoch": 70.28524590163934, "grad_norm": 2.233262062072754, "learning_rate": 4.284380221145393e-06, "loss": 0.0713, "step": 21437 }, { "epoch": 70.28852459016393, "grad_norm": 2.648021936416626, "learning_rate": 4.283508912555023e-06, "loss": 0.2042, "step": 21438 }, { "epoch": 70.29180327868852, "grad_norm": 2.342844247817993, "learning_rate": 4.282637668424031e-06, "loss": 0.0432, "step": 21439 }, { "epoch": 70.29508196721312, "grad_norm": 3.1455204486846924, "learning_rate": 4.2817664887622464e-06, "loss": 0.2953, "step": 21440 }, { "epoch": 70.29836065573771, "grad_norm": 2.518527030944824, "learning_rate": 4.28089537357949e-06, "loss": 0.1567, "step": 21441 }, { "epoch": 70.3016393442623, "grad_norm": 2.1824638843536377, "learning_rate": 4.280024322885585e-06, "loss": 0.2301, "step": 21442 }, { "epoch": 70.30491803278689, "grad_norm": 2.1705105304718018, "learning_rate": 4.279153336690349e-06, "loss": 0.0597, "step": 21443 }, { "epoch": 70.30819672131148, "grad_norm": 2.897765874862671, "learning_rate": 4.278282415003611e-06, "loss": 0.2406, "step": 21444 }, { "epoch": 70.31147540983606, "grad_norm": 2.7591726779937744, "learning_rate": 4.277411557835189e-06, "loss": 0.0632, "step": 21445 }, { "epoch": 70.31475409836065, "grad_norm": 2.4945762157440186, "learning_rate": 4.276540765194901e-06, "loss": 0.1189, "step": 21446 }, { "epoch": 70.31803278688524, "grad_norm": 2.2801764011383057, "learning_rate": 4.275670037092566e-06, "loss": 0.0788, "step": 21447 }, { "epoch": 70.32131147540984, "grad_norm": 2.1995081901550293, "learning_rate": 4.274799373538001e-06, "loss": 0.1373, "step": 21448 }, { "epoch": 70.32459016393443, "grad_norm": 2.3363609313964844, "learning_rate": 4.273928774541028e-06, "loss": 0.0274, "step": 21449 }, { "epoch": 70.32786885245902, "grad_norm": 2.705596923828125, "learning_rate": 4.273058240111463e-06, "loss": 0.0739, "step": 21450 }, { "epoch": 70.33114754098361, "grad_norm": 2.5171146392822266, "learning_rate": 4.27218777025912e-06, "loss": 0.0944, "step": 21451 }, { "epoch": 70.3344262295082, "grad_norm": 1.783471941947937, "learning_rate": 4.271317364993812e-06, "loss": 0.0446, "step": 21452 }, { "epoch": 70.33770491803278, "grad_norm": 3.5454399585723877, "learning_rate": 4.270447024325361e-06, "loss": 0.2489, "step": 21453 }, { "epoch": 70.34098360655737, "grad_norm": 2.289093494415283, "learning_rate": 4.269576748263576e-06, "loss": 0.0542, "step": 21454 }, { "epoch": 70.34426229508196, "grad_norm": 3.4835317134857178, "learning_rate": 4.268706536818268e-06, "loss": 0.0392, "step": 21455 }, { "epoch": 70.34754098360656, "grad_norm": 2.673635244369507, "learning_rate": 4.267836389999257e-06, "loss": 0.078, "step": 21456 }, { "epoch": 70.35081967213115, "grad_norm": 2.715498447418213, "learning_rate": 4.266966307816351e-06, "loss": 0.0985, "step": 21457 }, { "epoch": 70.35409836065574, "grad_norm": 2.1613593101501465, "learning_rate": 4.266096290279355e-06, "loss": 0.1377, "step": 21458 }, { "epoch": 70.35737704918033, "grad_norm": 3.565804958343506, "learning_rate": 4.265226337398091e-06, "loss": 0.1763, "step": 21459 }, { "epoch": 70.36065573770492, "grad_norm": 2.8755733966827393, "learning_rate": 4.264356449182362e-06, "loss": 0.1283, "step": 21460 }, { "epoch": 70.3639344262295, "grad_norm": 2.559767961502075, "learning_rate": 4.263486625641974e-06, "loss": 0.1324, "step": 21461 }, { "epoch": 70.3672131147541, "grad_norm": 3.2467615604400635, "learning_rate": 4.262616866786742e-06, "loss": 0.2496, "step": 21462 }, { "epoch": 70.37049180327868, "grad_norm": 2.4790749549865723, "learning_rate": 4.2617471726264705e-06, "loss": 0.1873, "step": 21463 }, { "epoch": 70.37377049180328, "grad_norm": 2.1485822200775146, "learning_rate": 4.260877543170966e-06, "loss": 0.1322, "step": 21464 }, { "epoch": 70.37704918032787, "grad_norm": 1.999803066253662, "learning_rate": 4.260007978430031e-06, "loss": 0.0421, "step": 21465 }, { "epoch": 70.38032786885246, "grad_norm": 2.3133652210235596, "learning_rate": 4.259138478413477e-06, "loss": 0.1639, "step": 21466 }, { "epoch": 70.38360655737705, "grad_norm": 3.2853050231933594, "learning_rate": 4.258269043131107e-06, "loss": 0.1528, "step": 21467 }, { "epoch": 70.38688524590164, "grad_norm": 3.6390340328216553, "learning_rate": 4.257399672592722e-06, "loss": 0.0419, "step": 21468 }, { "epoch": 70.39016393442623, "grad_norm": 2.9385316371917725, "learning_rate": 4.256530366808124e-06, "loss": 0.0977, "step": 21469 }, { "epoch": 70.39344262295081, "grad_norm": 2.4040684700012207, "learning_rate": 4.255661125787121e-06, "loss": 0.1322, "step": 21470 }, { "epoch": 70.3967213114754, "grad_norm": 1.9845918416976929, "learning_rate": 4.254791949539511e-06, "loss": 0.0968, "step": 21471 }, { "epoch": 70.4, "grad_norm": 2.63877272605896, "learning_rate": 4.2539228380750955e-06, "loss": 0.1384, "step": 21472 }, { "epoch": 70.4032786885246, "grad_norm": 2.63303542137146, "learning_rate": 4.253053791403674e-06, "loss": 0.0953, "step": 21473 }, { "epoch": 70.40655737704918, "grad_norm": 2.9661316871643066, "learning_rate": 4.252184809535044e-06, "loss": 0.0575, "step": 21474 }, { "epoch": 70.40983606557377, "grad_norm": 2.4518542289733887, "learning_rate": 4.251315892479009e-06, "loss": 0.1386, "step": 21475 }, { "epoch": 70.41311475409836, "grad_norm": 3.2587976455688477, "learning_rate": 4.250447040245365e-06, "loss": 0.1659, "step": 21476 }, { "epoch": 70.41639344262295, "grad_norm": 2.5644726753234863, "learning_rate": 4.249578252843908e-06, "loss": 0.1165, "step": 21477 }, { "epoch": 70.41967213114754, "grad_norm": 2.7254185676574707, "learning_rate": 4.248709530284433e-06, "loss": 0.1233, "step": 21478 }, { "epoch": 70.42295081967212, "grad_norm": 2.4930481910705566, "learning_rate": 4.24784087257674e-06, "loss": 0.1552, "step": 21479 }, { "epoch": 70.42622950819673, "grad_norm": 3.3593623638153076, "learning_rate": 4.246972279730622e-06, "loss": 0.1899, "step": 21480 }, { "epoch": 70.42950819672132, "grad_norm": 2.951542615890503, "learning_rate": 4.246103751755873e-06, "loss": 0.1059, "step": 21481 }, { "epoch": 70.4327868852459, "grad_norm": 3.155186653137207, "learning_rate": 4.245235288662285e-06, "loss": 0.1995, "step": 21482 }, { "epoch": 70.43606557377049, "grad_norm": 2.9799561500549316, "learning_rate": 4.244366890459655e-06, "loss": 0.1621, "step": 21483 }, { "epoch": 70.43934426229508, "grad_norm": 2.8848001956939697, "learning_rate": 4.2434985571577724e-06, "loss": 0.0862, "step": 21484 }, { "epoch": 70.44262295081967, "grad_norm": 2.147040843963623, "learning_rate": 4.242630288766428e-06, "loss": 0.0491, "step": 21485 }, { "epoch": 70.44590163934426, "grad_norm": 3.4041128158569336, "learning_rate": 4.241762085295415e-06, "loss": 0.2492, "step": 21486 }, { "epoch": 70.44918032786886, "grad_norm": 2.540954828262329, "learning_rate": 4.240893946754518e-06, "loss": 0.1086, "step": 21487 }, { "epoch": 70.45245901639345, "grad_norm": 2.1529622077941895, "learning_rate": 4.240025873153532e-06, "loss": 0.2402, "step": 21488 }, { "epoch": 70.45573770491804, "grad_norm": 5.468040466308594, "learning_rate": 4.2391578645022436e-06, "loss": 0.1122, "step": 21489 }, { "epoch": 70.45901639344262, "grad_norm": 2.5130064487457275, "learning_rate": 4.23828992081044e-06, "loss": 0.0964, "step": 21490 }, { "epoch": 70.46229508196721, "grad_norm": 2.755072832107544, "learning_rate": 4.237422042087904e-06, "loss": 0.2248, "step": 21491 }, { "epoch": 70.4655737704918, "grad_norm": 2.4746615886688232, "learning_rate": 4.2365542283444305e-06, "loss": 0.1029, "step": 21492 }, { "epoch": 70.46885245901639, "grad_norm": 3.737762212753296, "learning_rate": 4.2356864795898015e-06, "loss": 0.1371, "step": 21493 }, { "epoch": 70.47213114754098, "grad_norm": 2.836787700653076, "learning_rate": 4.234818795833799e-06, "loss": 0.2412, "step": 21494 }, { "epoch": 70.47540983606558, "grad_norm": 3.010512351989746, "learning_rate": 4.233951177086207e-06, "loss": 0.2093, "step": 21495 }, { "epoch": 70.47868852459017, "grad_norm": 3.1509547233581543, "learning_rate": 4.233083623356813e-06, "loss": 0.0854, "step": 21496 }, { "epoch": 70.48196721311476, "grad_norm": 2.8708341121673584, "learning_rate": 4.232216134655397e-06, "loss": 0.119, "step": 21497 }, { "epoch": 70.48524590163935, "grad_norm": 3.219592332839966, "learning_rate": 4.231348710991742e-06, "loss": 0.1911, "step": 21498 }, { "epoch": 70.48852459016393, "grad_norm": 2.212554693222046, "learning_rate": 4.2304813523756274e-06, "loss": 0.1398, "step": 21499 }, { "epoch": 70.49180327868852, "grad_norm": 2.977322816848755, "learning_rate": 4.229614058816831e-06, "loss": 0.1487, "step": 21500 }, { "epoch": 70.49508196721311, "grad_norm": 2.3412394523620605, "learning_rate": 4.22874683032514e-06, "loss": 0.0885, "step": 21501 }, { "epoch": 70.4983606557377, "grad_norm": 8.787252426147461, "learning_rate": 4.227879666910329e-06, "loss": 0.1758, "step": 21502 }, { "epoch": 70.5016393442623, "grad_norm": 2.9731996059417725, "learning_rate": 4.227012568582175e-06, "loss": 0.1361, "step": 21503 }, { "epoch": 70.50491803278689, "grad_norm": 3.5046308040618896, "learning_rate": 4.2261455353504546e-06, "loss": 0.0985, "step": 21504 }, { "epoch": 70.50819672131148, "grad_norm": 2.6458659172058105, "learning_rate": 4.22527856722495e-06, "loss": 0.1252, "step": 21505 }, { "epoch": 70.51147540983607, "grad_norm": 3.2754077911376953, "learning_rate": 4.224411664215433e-06, "loss": 0.1977, "step": 21506 }, { "epoch": 70.51475409836065, "grad_norm": 2.260183572769165, "learning_rate": 4.2235448263316795e-06, "loss": 0.0688, "step": 21507 }, { "epoch": 70.51803278688524, "grad_norm": 2.483811378479004, "learning_rate": 4.222678053583461e-06, "loss": 0.2358, "step": 21508 }, { "epoch": 70.52131147540983, "grad_norm": 2.81372332572937, "learning_rate": 4.2218113459805575e-06, "loss": 0.1253, "step": 21509 }, { "epoch": 70.52459016393442, "grad_norm": 2.8943161964416504, "learning_rate": 4.22094470353274e-06, "loss": 0.096, "step": 21510 }, { "epoch": 70.52786885245902, "grad_norm": 2.3470420837402344, "learning_rate": 4.220078126249775e-06, "loss": 0.0626, "step": 21511 }, { "epoch": 70.53114754098361, "grad_norm": 2.0768556594848633, "learning_rate": 4.219211614141443e-06, "loss": 0.1997, "step": 21512 }, { "epoch": 70.5344262295082, "grad_norm": 2.270606756210327, "learning_rate": 4.21834516721751e-06, "loss": 0.1208, "step": 21513 }, { "epoch": 70.53770491803279, "grad_norm": 4.015796184539795, "learning_rate": 4.217478785487743e-06, "loss": 0.1276, "step": 21514 }, { "epoch": 70.54098360655738, "grad_norm": 2.3053789138793945, "learning_rate": 4.216612468961919e-06, "loss": 0.0754, "step": 21515 }, { "epoch": 70.54426229508196, "grad_norm": 2.5764570236206055, "learning_rate": 4.215746217649803e-06, "loss": 0.1257, "step": 21516 }, { "epoch": 70.54754098360655, "grad_norm": 2.714526653289795, "learning_rate": 4.214880031561158e-06, "loss": 0.058, "step": 21517 }, { "epoch": 70.55081967213114, "grad_norm": 3.0928661823272705, "learning_rate": 4.21401391070576e-06, "loss": 0.1369, "step": 21518 }, { "epoch": 70.55409836065574, "grad_norm": 3.9250147342681885, "learning_rate": 4.21314785509337e-06, "loss": 0.2151, "step": 21519 }, { "epoch": 70.55737704918033, "grad_norm": 2.8005006313323975, "learning_rate": 4.212281864733756e-06, "loss": 0.1775, "step": 21520 }, { "epoch": 70.56065573770492, "grad_norm": 2.6711950302124023, "learning_rate": 4.211415939636677e-06, "loss": 0.1388, "step": 21521 }, { "epoch": 70.56393442622951, "grad_norm": 3.0192770957946777, "learning_rate": 4.210550079811907e-06, "loss": 0.262, "step": 21522 }, { "epoch": 70.5672131147541, "grad_norm": 2.535006523132324, "learning_rate": 4.209684285269203e-06, "loss": 0.1493, "step": 21523 }, { "epoch": 70.57049180327868, "grad_norm": 3.49576997756958, "learning_rate": 4.20881855601833e-06, "loss": 0.0522, "step": 21524 }, { "epoch": 70.57377049180327, "grad_norm": 2.8683691024780273, "learning_rate": 4.2079528920690484e-06, "loss": 0.0872, "step": 21525 }, { "epoch": 70.57704918032788, "grad_norm": 2.7865207195281982, "learning_rate": 4.207087293431117e-06, "loss": 0.099, "step": 21526 }, { "epoch": 70.58032786885246, "grad_norm": 3.6110544204711914, "learning_rate": 4.206221760114303e-06, "loss": 0.3266, "step": 21527 }, { "epoch": 70.58360655737705, "grad_norm": 2.9063286781311035, "learning_rate": 4.2053562921283635e-06, "loss": 0.1327, "step": 21528 }, { "epoch": 70.58688524590164, "grad_norm": 2.1773061752319336, "learning_rate": 4.204490889483055e-06, "loss": 0.0878, "step": 21529 }, { "epoch": 70.59016393442623, "grad_norm": 2.3453149795532227, "learning_rate": 4.203625552188134e-06, "loss": 0.1952, "step": 21530 }, { "epoch": 70.59344262295082, "grad_norm": 2.0442192554473877, "learning_rate": 4.202760280253366e-06, "loss": 0.1012, "step": 21531 }, { "epoch": 70.5967213114754, "grad_norm": 3.329833507537842, "learning_rate": 4.2018950736885046e-06, "loss": 0.1282, "step": 21532 }, { "epoch": 70.6, "grad_norm": 5.8030924797058105, "learning_rate": 4.201029932503303e-06, "loss": 0.1651, "step": 21533 }, { "epoch": 70.6032786885246, "grad_norm": 2.7573659420013428, "learning_rate": 4.200164856707516e-06, "loss": 0.1417, "step": 21534 }, { "epoch": 70.60655737704919, "grad_norm": 2.3757617473602295, "learning_rate": 4.199299846310903e-06, "loss": 0.1331, "step": 21535 }, { "epoch": 70.60983606557377, "grad_norm": 2.610334634780884, "learning_rate": 4.1984349013232165e-06, "loss": 0.1303, "step": 21536 }, { "epoch": 70.61311475409836, "grad_norm": 2.994852066040039, "learning_rate": 4.197570021754207e-06, "loss": 0.0975, "step": 21537 }, { "epoch": 70.61639344262295, "grad_norm": 3.4933245182037354, "learning_rate": 4.19670520761363e-06, "loss": 0.198, "step": 21538 }, { "epoch": 70.61967213114754, "grad_norm": 2.1885688304901123, "learning_rate": 4.195840458911231e-06, "loss": 0.1308, "step": 21539 }, { "epoch": 70.62295081967213, "grad_norm": 2.1935393810272217, "learning_rate": 4.194975775656771e-06, "loss": 0.0833, "step": 21540 }, { "epoch": 70.62622950819672, "grad_norm": 2.5673201084136963, "learning_rate": 4.194111157859993e-06, "loss": 0.1899, "step": 21541 }, { "epoch": 70.62950819672132, "grad_norm": 3.0252296924591064, "learning_rate": 4.1932466055306495e-06, "loss": 0.1552, "step": 21542 }, { "epoch": 70.6327868852459, "grad_norm": 2.532151460647583, "learning_rate": 4.192382118678484e-06, "loss": 0.2287, "step": 21543 }, { "epoch": 70.6360655737705, "grad_norm": 3.773648977279663, "learning_rate": 4.191517697313252e-06, "loss": 0.199, "step": 21544 }, { "epoch": 70.63934426229508, "grad_norm": 2.1520814895629883, "learning_rate": 4.190653341444697e-06, "loss": 0.0576, "step": 21545 }, { "epoch": 70.64262295081967, "grad_norm": 4.162537574768066, "learning_rate": 4.1897890510825665e-06, "loss": 0.1646, "step": 21546 }, { "epoch": 70.64590163934426, "grad_norm": 2.394129753112793, "learning_rate": 4.188924826236601e-06, "loss": 0.1298, "step": 21547 }, { "epoch": 70.64918032786885, "grad_norm": 3.77671217918396, "learning_rate": 4.188060666916555e-06, "loss": 0.1598, "step": 21548 }, { "epoch": 70.65245901639344, "grad_norm": 4.025615215301514, "learning_rate": 4.187196573132167e-06, "loss": 0.2759, "step": 21549 }, { "epoch": 70.65573770491804, "grad_norm": 2.8620829582214355, "learning_rate": 4.186332544893181e-06, "loss": 0.2536, "step": 21550 }, { "epoch": 70.65901639344263, "grad_norm": 3.3230698108673096, "learning_rate": 4.185468582209341e-06, "loss": 0.1333, "step": 21551 }, { "epoch": 70.66229508196722, "grad_norm": 2.2322914600372314, "learning_rate": 4.184604685090386e-06, "loss": 0.0443, "step": 21552 }, { "epoch": 70.6655737704918, "grad_norm": 2.5519161224365234, "learning_rate": 4.183740853546061e-06, "loss": 0.0472, "step": 21553 }, { "epoch": 70.66885245901639, "grad_norm": 2.9854490756988525, "learning_rate": 4.182877087586106e-06, "loss": 0.0984, "step": 21554 }, { "epoch": 70.67213114754098, "grad_norm": 1.9548141956329346, "learning_rate": 4.18201338722026e-06, "loss": 0.0414, "step": 21555 }, { "epoch": 70.67540983606557, "grad_norm": 3.0815541744232178, "learning_rate": 4.181149752458259e-06, "loss": 0.2959, "step": 21556 }, { "epoch": 70.67868852459016, "grad_norm": 2.5643715858459473, "learning_rate": 4.180286183309849e-06, "loss": 0.1165, "step": 21557 }, { "epoch": 70.68196721311476, "grad_norm": 2.305474042892456, "learning_rate": 4.179422679784762e-06, "loss": 0.287, "step": 21558 }, { "epoch": 70.68524590163935, "grad_norm": 2.787973165512085, "learning_rate": 4.178559241892737e-06, "loss": 0.0762, "step": 21559 }, { "epoch": 70.68852459016394, "grad_norm": 2.573624610900879, "learning_rate": 4.1776958696435045e-06, "loss": 0.1307, "step": 21560 }, { "epoch": 70.69180327868852, "grad_norm": 2.5173819065093994, "learning_rate": 4.17683256304681e-06, "loss": 0.1698, "step": 21561 }, { "epoch": 70.69508196721311, "grad_norm": 2.2353546619415283, "learning_rate": 4.1759693221123824e-06, "loss": 0.1234, "step": 21562 }, { "epoch": 70.6983606557377, "grad_norm": 2.7955520153045654, "learning_rate": 4.175106146849951e-06, "loss": 0.1406, "step": 21563 }, { "epoch": 70.70163934426229, "grad_norm": 2.606870412826538, "learning_rate": 4.17424303726926e-06, "loss": 0.1713, "step": 21564 }, { "epoch": 70.70491803278688, "grad_norm": 2.1768696308135986, "learning_rate": 4.173379993380034e-06, "loss": 0.1276, "step": 21565 }, { "epoch": 70.70819672131148, "grad_norm": 2.8417675495147705, "learning_rate": 4.172517015192008e-06, "loss": 0.1597, "step": 21566 }, { "epoch": 70.71147540983607, "grad_norm": 2.1767141819000244, "learning_rate": 4.1716541027149085e-06, "loss": 0.0891, "step": 21567 }, { "epoch": 70.71475409836066, "grad_norm": 2.744866132736206, "learning_rate": 4.170791255958472e-06, "loss": 0.1942, "step": 21568 }, { "epoch": 70.71803278688525, "grad_norm": 2.174452543258667, "learning_rate": 4.169928474932424e-06, "loss": 0.1252, "step": 21569 }, { "epoch": 70.72131147540983, "grad_norm": 2.6868317127227783, "learning_rate": 4.169065759646492e-06, "loss": 0.1326, "step": 21570 }, { "epoch": 70.72459016393442, "grad_norm": 2.820133924484253, "learning_rate": 4.168203110110409e-06, "loss": 0.14, "step": 21571 }, { "epoch": 70.72786885245901, "grad_norm": 2.291799306869507, "learning_rate": 4.167340526333901e-06, "loss": 0.0534, "step": 21572 }, { "epoch": 70.73114754098361, "grad_norm": 2.5575170516967773, "learning_rate": 4.166478008326688e-06, "loss": 0.1497, "step": 21573 }, { "epoch": 70.7344262295082, "grad_norm": 1.8510842323303223, "learning_rate": 4.165615556098504e-06, "loss": 0.042, "step": 21574 }, { "epoch": 70.73770491803279, "grad_norm": 3.0426106452941895, "learning_rate": 4.1647531696590714e-06, "loss": 0.3493, "step": 21575 }, { "epoch": 70.74098360655738, "grad_norm": 2.669283390045166, "learning_rate": 4.163890849018114e-06, "loss": 0.0918, "step": 21576 }, { "epoch": 70.74426229508197, "grad_norm": 2.7366812229156494, "learning_rate": 4.163028594185355e-06, "loss": 0.1734, "step": 21577 }, { "epoch": 70.74754098360656, "grad_norm": 3.1980113983154297, "learning_rate": 4.162166405170514e-06, "loss": 0.1575, "step": 21578 }, { "epoch": 70.75081967213114, "grad_norm": 3.1226577758789062, "learning_rate": 4.161304281983321e-06, "loss": 0.3521, "step": 21579 }, { "epoch": 70.75409836065573, "grad_norm": 2.3567452430725098, "learning_rate": 4.1604422246334916e-06, "loss": 0.1385, "step": 21580 }, { "epoch": 70.75737704918033, "grad_norm": 3.0273900032043457, "learning_rate": 4.1595802331307486e-06, "loss": 0.1897, "step": 21581 }, { "epoch": 70.76065573770492, "grad_norm": 3.2592709064483643, "learning_rate": 4.158718307484808e-06, "loss": 0.1011, "step": 21582 }, { "epoch": 70.76393442622951, "grad_norm": 2.287670135498047, "learning_rate": 4.157856447705395e-06, "loss": 0.1684, "step": 21583 }, { "epoch": 70.7672131147541, "grad_norm": 2.186823844909668, "learning_rate": 4.156994653802224e-06, "loss": 0.0911, "step": 21584 }, { "epoch": 70.77049180327869, "grad_norm": 2.667868137359619, "learning_rate": 4.156132925785015e-06, "loss": 0.2788, "step": 21585 }, { "epoch": 70.77377049180328, "grad_norm": 2.8438560962677, "learning_rate": 4.155271263663479e-06, "loss": 0.2084, "step": 21586 }, { "epoch": 70.77704918032786, "grad_norm": 2.2334673404693604, "learning_rate": 4.154409667447339e-06, "loss": 0.1671, "step": 21587 }, { "epoch": 70.78032786885245, "grad_norm": 2.5116465091705322, "learning_rate": 4.153548137146309e-06, "loss": 0.1962, "step": 21588 }, { "epoch": 70.78360655737706, "grad_norm": 5.549923896789551, "learning_rate": 4.152686672770103e-06, "loss": 0.1231, "step": 21589 }, { "epoch": 70.78688524590164, "grad_norm": 2.833914041519165, "learning_rate": 4.151825274328433e-06, "loss": 0.1144, "step": 21590 }, { "epoch": 70.79016393442623, "grad_norm": 2.6217219829559326, "learning_rate": 4.1509639418310114e-06, "loss": 0.1062, "step": 21591 }, { "epoch": 70.79344262295082, "grad_norm": 2.6251516342163086, "learning_rate": 4.150102675287556e-06, "loss": 0.2003, "step": 21592 }, { "epoch": 70.79672131147541, "grad_norm": 2.314378499984741, "learning_rate": 4.149241474707775e-06, "loss": 0.0788, "step": 21593 }, { "epoch": 70.8, "grad_norm": 2.940241813659668, "learning_rate": 4.14838034010138e-06, "loss": 0.1363, "step": 21594 }, { "epoch": 70.80327868852459, "grad_norm": 3.591264247894287, "learning_rate": 4.147519271478076e-06, "loss": 0.2551, "step": 21595 }, { "epoch": 70.80655737704917, "grad_norm": 5.939738750457764, "learning_rate": 4.146658268847583e-06, "loss": 0.1823, "step": 21596 }, { "epoch": 70.80983606557378, "grad_norm": 2.629042387008667, "learning_rate": 4.145797332219603e-06, "loss": 0.1035, "step": 21597 }, { "epoch": 70.81311475409836, "grad_norm": 3.5161843299865723, "learning_rate": 4.144936461603845e-06, "loss": 0.1767, "step": 21598 }, { "epoch": 70.81639344262295, "grad_norm": 3.4644343852996826, "learning_rate": 4.144075657010012e-06, "loss": 0.1851, "step": 21599 }, { "epoch": 70.81967213114754, "grad_norm": 2.71162748336792, "learning_rate": 4.143214918447818e-06, "loss": 0.2522, "step": 21600 }, { "epoch": 70.82295081967213, "grad_norm": 2.8995048999786377, "learning_rate": 4.142354245926966e-06, "loss": 0.0687, "step": 21601 }, { "epoch": 70.82622950819672, "grad_norm": 2.2433884143829346, "learning_rate": 4.14149363945716e-06, "loss": 0.043, "step": 21602 }, { "epoch": 70.8295081967213, "grad_norm": 2.6182520389556885, "learning_rate": 4.140633099048106e-06, "loss": 0.2463, "step": 21603 }, { "epoch": 70.8327868852459, "grad_norm": 2.327509641647339, "learning_rate": 4.139772624709501e-06, "loss": 0.1007, "step": 21604 }, { "epoch": 70.8360655737705, "grad_norm": 2.8574509620666504, "learning_rate": 4.138912216451057e-06, "loss": 0.2204, "step": 21605 }, { "epoch": 70.83934426229509, "grad_norm": 2.5144248008728027, "learning_rate": 4.13805187428247e-06, "loss": 0.1781, "step": 21606 }, { "epoch": 70.84262295081967, "grad_norm": 2.573545455932617, "learning_rate": 4.137191598213445e-06, "loss": 0.0978, "step": 21607 }, { "epoch": 70.84590163934426, "grad_norm": 3.1459720134735107, "learning_rate": 4.136331388253676e-06, "loss": 0.1914, "step": 21608 }, { "epoch": 70.84918032786885, "grad_norm": 2.6441357135772705, "learning_rate": 4.135471244412871e-06, "loss": 0.108, "step": 21609 }, { "epoch": 70.85245901639344, "grad_norm": 3.3006904125213623, "learning_rate": 4.134611166700725e-06, "loss": 0.1659, "step": 21610 }, { "epoch": 70.85573770491803, "grad_norm": 3.0600311756134033, "learning_rate": 4.133751155126937e-06, "loss": 0.1928, "step": 21611 }, { "epoch": 70.85901639344263, "grad_norm": 2.6472294330596924, "learning_rate": 4.132891209701201e-06, "loss": 0.0976, "step": 21612 }, { "epoch": 70.86229508196722, "grad_norm": 2.162419557571411, "learning_rate": 4.13203133043322e-06, "loss": 0.1025, "step": 21613 }, { "epoch": 70.8655737704918, "grad_norm": 1.8922733068466187, "learning_rate": 4.131171517332687e-06, "loss": 0.1318, "step": 21614 }, { "epoch": 70.8688524590164, "grad_norm": 2.9408416748046875, "learning_rate": 4.130311770409297e-06, "loss": 0.2593, "step": 21615 }, { "epoch": 70.87213114754098, "grad_norm": 3.0839757919311523, "learning_rate": 4.129452089672741e-06, "loss": 0.1107, "step": 21616 }, { "epoch": 70.87540983606557, "grad_norm": 4.1375555992126465, "learning_rate": 4.12859247513272e-06, "loss": 0.1882, "step": 21617 }, { "epoch": 70.87868852459016, "grad_norm": 2.559342861175537, "learning_rate": 4.1277329267989245e-06, "loss": 0.2296, "step": 21618 }, { "epoch": 70.88196721311475, "grad_norm": 2.381873607635498, "learning_rate": 4.126873444681041e-06, "loss": 0.1319, "step": 21619 }, { "epoch": 70.88524590163935, "grad_norm": 2.4481940269470215, "learning_rate": 4.12601402878877e-06, "loss": 0.0943, "step": 21620 }, { "epoch": 70.88852459016394, "grad_norm": 2.485863208770752, "learning_rate": 4.125154679131799e-06, "loss": 0.1469, "step": 21621 }, { "epoch": 70.89180327868853, "grad_norm": 2.2182836532592773, "learning_rate": 4.124295395719813e-06, "loss": 0.0683, "step": 21622 }, { "epoch": 70.89508196721312, "grad_norm": 2.637958526611328, "learning_rate": 4.123436178562509e-06, "loss": 0.0929, "step": 21623 }, { "epoch": 70.8983606557377, "grad_norm": 3.985488176345825, "learning_rate": 4.1225770276695735e-06, "loss": 0.2211, "step": 21624 }, { "epoch": 70.90163934426229, "grad_norm": 1.9347327947616577, "learning_rate": 4.121717943050688e-06, "loss": 0.0291, "step": 21625 }, { "epoch": 70.90491803278688, "grad_norm": 2.389768123626709, "learning_rate": 4.120858924715548e-06, "loss": 0.1978, "step": 21626 }, { "epoch": 70.90819672131147, "grad_norm": 2.548774242401123, "learning_rate": 4.119999972673837e-06, "loss": 0.1045, "step": 21627 }, { "epoch": 70.91147540983607, "grad_norm": 2.280944585800171, "learning_rate": 4.11914108693524e-06, "loss": 0.0522, "step": 21628 }, { "epoch": 70.91475409836066, "grad_norm": 2.6077682971954346, "learning_rate": 4.118282267509442e-06, "loss": 0.0779, "step": 21629 }, { "epoch": 70.91803278688525, "grad_norm": 2.377790927886963, "learning_rate": 4.117423514406124e-06, "loss": 0.1732, "step": 21630 }, { "epoch": 70.92131147540984, "grad_norm": 4.12644624710083, "learning_rate": 4.116564827634975e-06, "loss": 0.1781, "step": 21631 }, { "epoch": 70.92459016393443, "grad_norm": 3.6731624603271484, "learning_rate": 4.1157062072056744e-06, "loss": 0.2206, "step": 21632 }, { "epoch": 70.92786885245901, "grad_norm": 2.7180404663085938, "learning_rate": 4.114847653127904e-06, "loss": 0.1153, "step": 21633 }, { "epoch": 70.9311475409836, "grad_norm": 3.6570167541503906, "learning_rate": 4.113989165411343e-06, "loss": 0.2234, "step": 21634 }, { "epoch": 70.93442622950819, "grad_norm": 2.1065962314605713, "learning_rate": 4.113130744065677e-06, "loss": 0.2229, "step": 21635 }, { "epoch": 70.9377049180328, "grad_norm": 2.8235044479370117, "learning_rate": 4.112272389100582e-06, "loss": 0.0513, "step": 21636 }, { "epoch": 70.94098360655738, "grad_norm": 2.625643253326416, "learning_rate": 4.111414100525739e-06, "loss": 0.1626, "step": 21637 }, { "epoch": 70.94426229508197, "grad_norm": 3.5024771690368652, "learning_rate": 4.11055587835082e-06, "loss": 0.1689, "step": 21638 }, { "epoch": 70.94754098360656, "grad_norm": 2.828244209289551, "learning_rate": 4.1096977225855105e-06, "loss": 0.2084, "step": 21639 }, { "epoch": 70.95081967213115, "grad_norm": 2.40466046333313, "learning_rate": 4.108839633239485e-06, "loss": 0.1019, "step": 21640 }, { "epoch": 70.95409836065573, "grad_norm": 2.74466609954834, "learning_rate": 4.107981610322417e-06, "loss": 0.1489, "step": 21641 }, { "epoch": 70.95737704918032, "grad_norm": 2.9474375247955322, "learning_rate": 4.107123653843982e-06, "loss": 0.2801, "step": 21642 }, { "epoch": 70.96065573770491, "grad_norm": 3.4428677558898926, "learning_rate": 4.106265763813852e-06, "loss": 0.1971, "step": 21643 }, { "epoch": 70.96393442622951, "grad_norm": 3.135354995727539, "learning_rate": 4.105407940241706e-06, "loss": 0.1694, "step": 21644 }, { "epoch": 70.9672131147541, "grad_norm": 2.9386379718780518, "learning_rate": 4.104550183137215e-06, "loss": 0.197, "step": 21645 }, { "epoch": 70.97049180327869, "grad_norm": 2.738084554672241, "learning_rate": 4.103692492510051e-06, "loss": 0.1897, "step": 21646 }, { "epoch": 70.97377049180328, "grad_norm": 2.7932775020599365, "learning_rate": 4.1028348683698804e-06, "loss": 0.1567, "step": 21647 }, { "epoch": 70.97704918032787, "grad_norm": 2.7051596641540527, "learning_rate": 4.1019773107263815e-06, "loss": 0.1098, "step": 21648 }, { "epoch": 70.98032786885246, "grad_norm": 3.005356550216675, "learning_rate": 4.101119819589221e-06, "loss": 0.0955, "step": 21649 }, { "epoch": 70.98360655737704, "grad_norm": 4.022881984710693, "learning_rate": 4.100262394968069e-06, "loss": 0.1324, "step": 21650 }, { "epoch": 70.98688524590163, "grad_norm": 2.2338316440582275, "learning_rate": 4.099405036872588e-06, "loss": 0.1794, "step": 21651 }, { "epoch": 70.99016393442623, "grad_norm": 2.2825310230255127, "learning_rate": 4.098547745312453e-06, "loss": 0.1057, "step": 21652 }, { "epoch": 70.99344262295082, "grad_norm": 2.6154751777648926, "learning_rate": 4.097690520297331e-06, "loss": 0.1191, "step": 21653 }, { "epoch": 70.99672131147541, "grad_norm": 3.2108144760131836, "learning_rate": 4.096833361836883e-06, "loss": 0.242, "step": 21654 }, { "epoch": 71.0, "grad_norm": 3.658215045928955, "learning_rate": 4.095976269940777e-06, "loss": 0.0693, "step": 21655 }, { "epoch": 71.00327868852459, "grad_norm": 2.3167383670806885, "learning_rate": 4.095119244618674e-06, "loss": 0.0489, "step": 21656 }, { "epoch": 71.00655737704918, "grad_norm": 2.7637431621551514, "learning_rate": 4.094262285880244e-06, "loss": 0.1096, "step": 21657 }, { "epoch": 71.00983606557377, "grad_norm": 2.5816080570220947, "learning_rate": 4.093405393735147e-06, "loss": 0.2129, "step": 21658 }, { "epoch": 71.01311475409837, "grad_norm": 2.5754129886627197, "learning_rate": 4.092548568193047e-06, "loss": 0.108, "step": 21659 }, { "epoch": 71.01639344262296, "grad_norm": 1.9300177097320557, "learning_rate": 4.091691809263598e-06, "loss": 0.0497, "step": 21660 }, { "epoch": 71.01967213114754, "grad_norm": 2.195493459701538, "learning_rate": 4.090835116956473e-06, "loss": 0.1084, "step": 21661 }, { "epoch": 71.02295081967213, "grad_norm": 3.037106990814209, "learning_rate": 4.089978491281325e-06, "loss": 0.209, "step": 21662 }, { "epoch": 71.02622950819672, "grad_norm": 2.9797158241271973, "learning_rate": 4.089121932247815e-06, "loss": 0.1412, "step": 21663 }, { "epoch": 71.02950819672131, "grad_norm": 2.9090378284454346, "learning_rate": 4.0882654398655965e-06, "loss": 0.1472, "step": 21664 }, { "epoch": 71.0327868852459, "grad_norm": 3.3885414600372314, "learning_rate": 4.087409014144336e-06, "loss": 0.2105, "step": 21665 }, { "epoch": 71.03606557377049, "grad_norm": 2.243325710296631, "learning_rate": 4.086552655093686e-06, "loss": 0.0746, "step": 21666 }, { "epoch": 71.03934426229509, "grad_norm": 3.458469867706299, "learning_rate": 4.085696362723304e-06, "loss": 0.199, "step": 21667 }, { "epoch": 71.04262295081968, "grad_norm": 2.8588128089904785, "learning_rate": 4.084840137042843e-06, "loss": 0.1099, "step": 21668 }, { "epoch": 71.04590163934427, "grad_norm": 3.6295599937438965, "learning_rate": 4.083983978061958e-06, "loss": 0.1716, "step": 21669 }, { "epoch": 71.04918032786885, "grad_norm": 1.8737070560455322, "learning_rate": 4.083127885790308e-06, "loss": 0.1101, "step": 21670 }, { "epoch": 71.05245901639344, "grad_norm": 3.0051119327545166, "learning_rate": 4.082271860237542e-06, "loss": 0.2455, "step": 21671 }, { "epoch": 71.05573770491803, "grad_norm": 2.5680956840515137, "learning_rate": 4.081415901413312e-06, "loss": 0.0847, "step": 21672 }, { "epoch": 71.05901639344262, "grad_norm": 3.772804021835327, "learning_rate": 4.080560009327274e-06, "loss": 0.1724, "step": 21673 }, { "epoch": 71.0622950819672, "grad_norm": 2.084263324737549, "learning_rate": 4.079704183989076e-06, "loss": 0.0922, "step": 21674 }, { "epoch": 71.06557377049181, "grad_norm": 3.2114291191101074, "learning_rate": 4.078848425408366e-06, "loss": 0.2246, "step": 21675 }, { "epoch": 71.0688524590164, "grad_norm": 2.6765828132629395, "learning_rate": 4.0779927335948e-06, "loss": 0.0932, "step": 21676 }, { "epoch": 71.07213114754099, "grad_norm": 2.670222043991089, "learning_rate": 4.0771371085580234e-06, "loss": 0.0577, "step": 21677 }, { "epoch": 71.07540983606557, "grad_norm": 3.685593843460083, "learning_rate": 4.07628155030768e-06, "loss": 0.1601, "step": 21678 }, { "epoch": 71.07868852459016, "grad_norm": 3.1700375080108643, "learning_rate": 4.075426058853426e-06, "loss": 0.1849, "step": 21679 }, { "epoch": 71.08196721311475, "grad_norm": 3.645719528198242, "learning_rate": 4.074570634204902e-06, "loss": 0.2636, "step": 21680 }, { "epoch": 71.08524590163934, "grad_norm": 2.1177589893341064, "learning_rate": 4.0737152763717556e-06, "loss": 0.0361, "step": 21681 }, { "epoch": 71.08852459016393, "grad_norm": 1.855101227760315, "learning_rate": 4.0728599853636274e-06, "loss": 0.0574, "step": 21682 }, { "epoch": 71.09180327868853, "grad_norm": 2.7541444301605225, "learning_rate": 4.072004761190169e-06, "loss": 0.2365, "step": 21683 }, { "epoch": 71.09508196721312, "grad_norm": 3.413254976272583, "learning_rate": 4.071149603861021e-06, "loss": 0.1427, "step": 21684 }, { "epoch": 71.09836065573771, "grad_norm": 2.226250171661377, "learning_rate": 4.070294513385825e-06, "loss": 0.178, "step": 21685 }, { "epoch": 71.1016393442623, "grad_norm": 2.4793267250061035, "learning_rate": 4.06943948977422e-06, "loss": 0.1395, "step": 21686 }, { "epoch": 71.10491803278688, "grad_norm": 4.562878608703613, "learning_rate": 4.068584533035855e-06, "loss": 0.1332, "step": 21687 }, { "epoch": 71.10819672131147, "grad_norm": 2.8032917976379395, "learning_rate": 4.067729643180367e-06, "loss": 0.3096, "step": 21688 }, { "epoch": 71.11147540983606, "grad_norm": 2.362091064453125, "learning_rate": 4.066874820217395e-06, "loss": 0.1425, "step": 21689 }, { "epoch": 71.11475409836065, "grad_norm": 2.590902090072632, "learning_rate": 4.066020064156574e-06, "loss": 0.1111, "step": 21690 }, { "epoch": 71.11803278688525, "grad_norm": 1.6033098697662354, "learning_rate": 4.065165375007551e-06, "loss": 0.0375, "step": 21691 }, { "epoch": 71.12131147540984, "grad_norm": 2.510011672973633, "learning_rate": 4.064310752779958e-06, "loss": 0.0878, "step": 21692 }, { "epoch": 71.12459016393443, "grad_norm": 2.1198277473449707, "learning_rate": 4.063456197483434e-06, "loss": 0.0972, "step": 21693 }, { "epoch": 71.12786885245902, "grad_norm": 3.452186107635498, "learning_rate": 4.062601709127614e-06, "loss": 0.2402, "step": 21694 }, { "epoch": 71.1311475409836, "grad_norm": 2.473843812942505, "learning_rate": 4.061747287722128e-06, "loss": 0.057, "step": 21695 }, { "epoch": 71.1344262295082, "grad_norm": 2.93042254447937, "learning_rate": 4.0608929332766215e-06, "loss": 0.2282, "step": 21696 }, { "epoch": 71.13770491803278, "grad_norm": 3.68499755859375, "learning_rate": 4.06003864580072e-06, "loss": 0.0905, "step": 21697 }, { "epoch": 71.14098360655737, "grad_norm": 2.20528507232666, "learning_rate": 4.05918442530406e-06, "loss": 0.0722, "step": 21698 }, { "epoch": 71.14426229508197, "grad_norm": 2.5723962783813477, "learning_rate": 4.058330271796269e-06, "loss": 0.161, "step": 21699 }, { "epoch": 71.14754098360656, "grad_norm": 3.0834696292877197, "learning_rate": 4.057476185286985e-06, "loss": 0.1224, "step": 21700 }, { "epoch": 71.15081967213115, "grad_norm": 2.410515069961548, "learning_rate": 4.056622165785837e-06, "loss": 0.0942, "step": 21701 }, { "epoch": 71.15409836065574, "grad_norm": 2.5658273696899414, "learning_rate": 4.055768213302451e-06, "loss": 0.1533, "step": 21702 }, { "epoch": 71.15737704918033, "grad_norm": 2.821932554244995, "learning_rate": 4.054914327846458e-06, "loss": 0.1846, "step": 21703 }, { "epoch": 71.16065573770491, "grad_norm": 2.3148770332336426, "learning_rate": 4.054060509427489e-06, "loss": 0.1469, "step": 21704 }, { "epoch": 71.1639344262295, "grad_norm": 2.697045087814331, "learning_rate": 4.05320675805517e-06, "loss": 0.0807, "step": 21705 }, { "epoch": 71.1672131147541, "grad_norm": 2.4624109268188477, "learning_rate": 4.052353073739129e-06, "loss": 0.0632, "step": 21706 }, { "epoch": 71.1704918032787, "grad_norm": 3.238532304763794, "learning_rate": 4.051499456488991e-06, "loss": 0.1152, "step": 21707 }, { "epoch": 71.17377049180328, "grad_norm": 2.327828884124756, "learning_rate": 4.050645906314378e-06, "loss": 0.1601, "step": 21708 }, { "epoch": 71.17704918032787, "grad_norm": 2.2766225337982178, "learning_rate": 4.049792423224921e-06, "loss": 0.0714, "step": 21709 }, { "epoch": 71.18032786885246, "grad_norm": 2.7620432376861572, "learning_rate": 4.048939007230241e-06, "loss": 0.223, "step": 21710 }, { "epoch": 71.18360655737705, "grad_norm": 2.468743324279785, "learning_rate": 4.048085658339962e-06, "loss": 0.0749, "step": 21711 }, { "epoch": 71.18688524590164, "grad_norm": 2.976395606994629, "learning_rate": 4.047232376563701e-06, "loss": 0.2785, "step": 21712 }, { "epoch": 71.19016393442622, "grad_norm": 1.9545806646347046, "learning_rate": 4.046379161911089e-06, "loss": 0.0891, "step": 21713 }, { "epoch": 71.19344262295083, "grad_norm": 2.0869243144989014, "learning_rate": 4.045526014391742e-06, "loss": 0.0862, "step": 21714 }, { "epoch": 71.19672131147541, "grad_norm": 4.76123571395874, "learning_rate": 4.04467293401528e-06, "loss": 0.1544, "step": 21715 }, { "epoch": 71.2, "grad_norm": 3.263974666595459, "learning_rate": 4.043819920791322e-06, "loss": 0.0898, "step": 21716 }, { "epoch": 71.20327868852459, "grad_norm": 14.930447578430176, "learning_rate": 4.042966974729485e-06, "loss": 0.0604, "step": 21717 }, { "epoch": 71.20655737704918, "grad_norm": 2.70027494430542, "learning_rate": 4.0421140958393925e-06, "loss": 0.1826, "step": 21718 }, { "epoch": 71.20983606557377, "grad_norm": 2.4250195026397705, "learning_rate": 4.041261284130658e-06, "loss": 0.0757, "step": 21719 }, { "epoch": 71.21311475409836, "grad_norm": 3.4095382690429688, "learning_rate": 4.040408539612897e-06, "loss": 0.1187, "step": 21720 }, { "epoch": 71.21639344262294, "grad_norm": 2.8760175704956055, "learning_rate": 4.039555862295723e-06, "loss": 0.0779, "step": 21721 }, { "epoch": 71.21967213114755, "grad_norm": 2.6124210357666016, "learning_rate": 4.038703252188758e-06, "loss": 0.0961, "step": 21722 }, { "epoch": 71.22295081967214, "grad_norm": 2.9759585857391357, "learning_rate": 4.037850709301613e-06, "loss": 0.1304, "step": 21723 }, { "epoch": 71.22622950819672, "grad_norm": 2.071429967880249, "learning_rate": 4.036998233643895e-06, "loss": 0.067, "step": 21724 }, { "epoch": 71.22950819672131, "grad_norm": 3.09014630317688, "learning_rate": 4.036145825225226e-06, "loss": 0.121, "step": 21725 }, { "epoch": 71.2327868852459, "grad_norm": 3.1330678462982178, "learning_rate": 4.035293484055214e-06, "loss": 0.0645, "step": 21726 }, { "epoch": 71.23606557377049, "grad_norm": 3.1312718391418457, "learning_rate": 4.034441210143466e-06, "loss": 0.0752, "step": 21727 }, { "epoch": 71.23934426229508, "grad_norm": 2.425117254257202, "learning_rate": 4.033589003499599e-06, "loss": 0.1677, "step": 21728 }, { "epoch": 71.24262295081967, "grad_norm": 2.94478440284729, "learning_rate": 4.032736864133221e-06, "loss": 0.1281, "step": 21729 }, { "epoch": 71.24590163934427, "grad_norm": 2.5530905723571777, "learning_rate": 4.031884792053938e-06, "loss": 0.1389, "step": 21730 }, { "epoch": 71.24918032786886, "grad_norm": 2.9623148441314697, "learning_rate": 4.031032787271356e-06, "loss": 0.0897, "step": 21731 }, { "epoch": 71.25245901639344, "grad_norm": 2.5146772861480713, "learning_rate": 4.030180849795089e-06, "loss": 0.2265, "step": 21732 }, { "epoch": 71.25573770491803, "grad_norm": 2.628650426864624, "learning_rate": 4.02932897963474e-06, "loss": 0.1047, "step": 21733 }, { "epoch": 71.25901639344262, "grad_norm": 2.255174160003662, "learning_rate": 4.028477176799912e-06, "loss": 0.0784, "step": 21734 }, { "epoch": 71.26229508196721, "grad_norm": 1.9506220817565918, "learning_rate": 4.027625441300214e-06, "loss": 0.0432, "step": 21735 }, { "epoch": 71.2655737704918, "grad_norm": 2.910778760910034, "learning_rate": 4.0267737731452515e-06, "loss": 0.1052, "step": 21736 }, { "epoch": 71.26885245901639, "grad_norm": 2.5344431400299072, "learning_rate": 4.025922172344624e-06, "loss": 0.1689, "step": 21737 }, { "epoch": 71.27213114754099, "grad_norm": 2.3893847465515137, "learning_rate": 4.025070638907932e-06, "loss": 0.1991, "step": 21738 }, { "epoch": 71.27540983606558, "grad_norm": 3.11129093170166, "learning_rate": 4.024219172844784e-06, "loss": 0.0868, "step": 21739 }, { "epoch": 71.27868852459017, "grad_norm": 2.8643603324890137, "learning_rate": 4.023367774164779e-06, "loss": 0.227, "step": 21740 }, { "epoch": 71.28196721311475, "grad_norm": 3.602020740509033, "learning_rate": 4.022516442877515e-06, "loss": 0.17, "step": 21741 }, { "epoch": 71.28524590163934, "grad_norm": 2.9922101497650146, "learning_rate": 4.021665178992595e-06, "loss": 0.1372, "step": 21742 }, { "epoch": 71.28852459016393, "grad_norm": 2.8946259021759033, "learning_rate": 4.020813982519611e-06, "loss": 0.1519, "step": 21743 }, { "epoch": 71.29180327868852, "grad_norm": 2.9481217861175537, "learning_rate": 4.0199628534681715e-06, "loss": 0.1788, "step": 21744 }, { "epoch": 71.29508196721312, "grad_norm": 2.3939738273620605, "learning_rate": 4.0191117918478676e-06, "loss": 0.0777, "step": 21745 }, { "epoch": 71.29836065573771, "grad_norm": 2.568058490753174, "learning_rate": 4.0182607976682956e-06, "loss": 0.1089, "step": 21746 }, { "epoch": 71.3016393442623, "grad_norm": 2.1639134883880615, "learning_rate": 4.01740987093905e-06, "loss": 0.0944, "step": 21747 }, { "epoch": 71.30491803278689, "grad_norm": 2.475407361984253, "learning_rate": 4.0165590116697315e-06, "loss": 0.1945, "step": 21748 }, { "epoch": 71.30819672131148, "grad_norm": 3.242062568664551, "learning_rate": 4.015708219869932e-06, "loss": 0.18, "step": 21749 }, { "epoch": 71.31147540983606, "grad_norm": 2.2911362648010254, "learning_rate": 4.014857495549245e-06, "loss": 0.1652, "step": 21750 }, { "epoch": 71.31475409836065, "grad_norm": 2.638951063156128, "learning_rate": 4.014006838717258e-06, "loss": 0.2777, "step": 21751 }, { "epoch": 71.31803278688524, "grad_norm": 3.3499653339385986, "learning_rate": 4.013156249383572e-06, "loss": 0.105, "step": 21752 }, { "epoch": 71.32131147540984, "grad_norm": 2.146544933319092, "learning_rate": 4.0123057275577735e-06, "loss": 0.1341, "step": 21753 }, { "epoch": 71.32459016393443, "grad_norm": 2.7442612648010254, "learning_rate": 4.011455273249454e-06, "loss": 0.1546, "step": 21754 }, { "epoch": 71.32786885245902, "grad_norm": 2.748926877975464, "learning_rate": 4.010604886468202e-06, "loss": 0.2183, "step": 21755 }, { "epoch": 71.33114754098361, "grad_norm": 3.939195394515991, "learning_rate": 4.009754567223605e-06, "loss": 0.0987, "step": 21756 }, { "epoch": 71.3344262295082, "grad_norm": 2.2062318325042725, "learning_rate": 4.008904315525256e-06, "loss": 0.1831, "step": 21757 }, { "epoch": 71.33770491803278, "grad_norm": 2.9752309322357178, "learning_rate": 4.008054131382741e-06, "loss": 0.291, "step": 21758 }, { "epoch": 71.34098360655737, "grad_norm": 2.5904202461242676, "learning_rate": 4.007204014805644e-06, "loss": 0.1329, "step": 21759 }, { "epoch": 71.34426229508196, "grad_norm": 3.114511489868164, "learning_rate": 4.0063539658035514e-06, "loss": 0.2003, "step": 21760 }, { "epoch": 71.34754098360656, "grad_norm": 3.2133684158325195, "learning_rate": 4.005503984386052e-06, "loss": 0.1844, "step": 21761 }, { "epoch": 71.35081967213115, "grad_norm": 1.788920521736145, "learning_rate": 4.004654070562728e-06, "loss": 0.0969, "step": 21762 }, { "epoch": 71.35409836065574, "grad_norm": 2.5664846897125244, "learning_rate": 4.003804224343163e-06, "loss": 0.2805, "step": 21763 }, { "epoch": 71.35737704918033, "grad_norm": 2.517152786254883, "learning_rate": 4.002954445736936e-06, "loss": 0.1179, "step": 21764 }, { "epoch": 71.36065573770492, "grad_norm": 1.9505021572113037, "learning_rate": 4.002104734753638e-06, "loss": 0.0731, "step": 21765 }, { "epoch": 71.3639344262295, "grad_norm": 2.931079864501953, "learning_rate": 4.001255091402844e-06, "loss": 0.1624, "step": 21766 }, { "epoch": 71.3672131147541, "grad_norm": 3.3888633251190186, "learning_rate": 4.0004055156941355e-06, "loss": 0.2783, "step": 21767 }, { "epoch": 71.37049180327868, "grad_norm": 3.2189741134643555, "learning_rate": 3.999556007637094e-06, "loss": 0.2068, "step": 21768 }, { "epoch": 71.37377049180328, "grad_norm": 2.160296678543091, "learning_rate": 3.9987065672412936e-06, "loss": 0.1237, "step": 21769 }, { "epoch": 71.37704918032787, "grad_norm": 2.057227849960327, "learning_rate": 3.997857194516321e-06, "loss": 0.0634, "step": 21770 }, { "epoch": 71.38032786885246, "grad_norm": 2.2346808910369873, "learning_rate": 3.997007889471747e-06, "loss": 0.204, "step": 21771 }, { "epoch": 71.38360655737705, "grad_norm": 2.452996015548706, "learning_rate": 3.996158652117152e-06, "loss": 0.1162, "step": 21772 }, { "epoch": 71.38688524590164, "grad_norm": 2.218855857849121, "learning_rate": 3.9953094824621064e-06, "loss": 0.0657, "step": 21773 }, { "epoch": 71.39016393442623, "grad_norm": 2.8049404621124268, "learning_rate": 3.994460380516193e-06, "loss": 0.0838, "step": 21774 }, { "epoch": 71.39344262295081, "grad_norm": 2.4719362258911133, "learning_rate": 3.9936113462889836e-06, "loss": 0.1146, "step": 21775 }, { "epoch": 71.3967213114754, "grad_norm": 2.519843578338623, "learning_rate": 3.9927623797900515e-06, "loss": 0.0756, "step": 21776 }, { "epoch": 71.4, "grad_norm": 2.2906014919281006, "learning_rate": 3.991913481028965e-06, "loss": 0.137, "step": 21777 }, { "epoch": 71.4032786885246, "grad_norm": 4.2531328201293945, "learning_rate": 3.991064650015306e-06, "loss": 0.1289, "step": 21778 }, { "epoch": 71.40655737704918, "grad_norm": 2.3710012435913086, "learning_rate": 3.99021588675864e-06, "loss": 0.2012, "step": 21779 }, { "epoch": 71.40983606557377, "grad_norm": 3.381748676300049, "learning_rate": 3.9893671912685336e-06, "loss": 0.1129, "step": 21780 }, { "epoch": 71.41311475409836, "grad_norm": 6.439576148986816, "learning_rate": 3.988518563554567e-06, "loss": 0.0637, "step": 21781 }, { "epoch": 71.41639344262295, "grad_norm": 3.1899349689483643, "learning_rate": 3.9876700036263035e-06, "loss": 0.2046, "step": 21782 }, { "epoch": 71.41967213114754, "grad_norm": 2.309540033340454, "learning_rate": 3.986821511493308e-06, "loss": 0.1577, "step": 21783 }, { "epoch": 71.42295081967212, "grad_norm": 3.1463065147399902, "learning_rate": 3.985973087165156e-06, "loss": 0.2166, "step": 21784 }, { "epoch": 71.42622950819673, "grad_norm": 2.005897283554077, "learning_rate": 3.985124730651411e-06, "loss": 0.0338, "step": 21785 }, { "epoch": 71.42950819672132, "grad_norm": 3.0038437843322754, "learning_rate": 3.9842764419616345e-06, "loss": 0.1822, "step": 21786 }, { "epoch": 71.4327868852459, "grad_norm": 2.6763861179351807, "learning_rate": 3.9834282211053985e-06, "loss": 0.2255, "step": 21787 }, { "epoch": 71.43606557377049, "grad_norm": 2.316455841064453, "learning_rate": 3.982580068092266e-06, "loss": 0.2458, "step": 21788 }, { "epoch": 71.43934426229508, "grad_norm": 2.677558183670044, "learning_rate": 3.9817319829318e-06, "loss": 0.1204, "step": 21789 }, { "epoch": 71.44262295081967, "grad_norm": 2.4875595569610596, "learning_rate": 3.98088396563356e-06, "loss": 0.0975, "step": 21790 }, { "epoch": 71.44590163934426, "grad_norm": 3.479978561401367, "learning_rate": 3.980036016207114e-06, "loss": 0.0969, "step": 21791 }, { "epoch": 71.44918032786886, "grad_norm": 2.8606157302856445, "learning_rate": 3.979188134662022e-06, "loss": 0.2415, "step": 21792 }, { "epoch": 71.45245901639345, "grad_norm": 2.7951061725616455, "learning_rate": 3.978340321007843e-06, "loss": 0.2353, "step": 21793 }, { "epoch": 71.45573770491804, "grad_norm": 2.876311779022217, "learning_rate": 3.977492575254138e-06, "loss": 0.1062, "step": 21794 }, { "epoch": 71.45901639344262, "grad_norm": 2.2526791095733643, "learning_rate": 3.976644897410464e-06, "loss": 0.1971, "step": 21795 }, { "epoch": 71.46229508196721, "grad_norm": 2.6486804485321045, "learning_rate": 3.975797287486383e-06, "loss": 0.0929, "step": 21796 }, { "epoch": 71.4655737704918, "grad_norm": 3.1477301120758057, "learning_rate": 3.974949745491452e-06, "loss": 0.1971, "step": 21797 }, { "epoch": 71.46885245901639, "grad_norm": 2.448690176010132, "learning_rate": 3.974102271435228e-06, "loss": 0.1566, "step": 21798 }, { "epoch": 71.47213114754098, "grad_norm": 2.312459945678711, "learning_rate": 3.973254865327262e-06, "loss": 0.179, "step": 21799 }, { "epoch": 71.47540983606558, "grad_norm": 2.941033363342285, "learning_rate": 3.9724075271771165e-06, "loss": 0.1271, "step": 21800 }, { "epoch": 71.47868852459017, "grad_norm": 2.7684669494628906, "learning_rate": 3.971560256994343e-06, "loss": 0.1769, "step": 21801 }, { "epoch": 71.48196721311476, "grad_norm": 3.0823559761047363, "learning_rate": 3.970713054788498e-06, "loss": 0.1539, "step": 21802 }, { "epoch": 71.48524590163935, "grad_norm": 2.4192311763763428, "learning_rate": 3.969865920569127e-06, "loss": 0.0971, "step": 21803 }, { "epoch": 71.48852459016393, "grad_norm": 2.744314432144165, "learning_rate": 3.969018854345791e-06, "loss": 0.092, "step": 21804 }, { "epoch": 71.49180327868852, "grad_norm": 2.3509864807128906, "learning_rate": 3.968171856128038e-06, "loss": 0.1033, "step": 21805 }, { "epoch": 71.49508196721311, "grad_norm": 2.1280574798583984, "learning_rate": 3.967324925925419e-06, "loss": 0.0606, "step": 21806 }, { "epoch": 71.4983606557377, "grad_norm": 2.0563974380493164, "learning_rate": 3.966478063747484e-06, "loss": 0.2409, "step": 21807 }, { "epoch": 71.5016393442623, "grad_norm": 2.947324752807617, "learning_rate": 3.965631269603778e-06, "loss": 0.1391, "step": 21808 }, { "epoch": 71.50491803278689, "grad_norm": 2.345975399017334, "learning_rate": 3.964784543503858e-06, "loss": 0.0688, "step": 21809 }, { "epoch": 71.50819672131148, "grad_norm": 2.5072925090789795, "learning_rate": 3.963937885457268e-06, "loss": 0.1419, "step": 21810 }, { "epoch": 71.51147540983607, "grad_norm": 3.7854552268981934, "learning_rate": 3.963091295473552e-06, "loss": 0.2069, "step": 21811 }, { "epoch": 71.51475409836065, "grad_norm": 2.6627705097198486, "learning_rate": 3.962244773562256e-06, "loss": 0.1287, "step": 21812 }, { "epoch": 71.51803278688524, "grad_norm": 2.513963460922241, "learning_rate": 3.961398319732932e-06, "loss": 0.0609, "step": 21813 }, { "epoch": 71.52131147540983, "grad_norm": 2.8299036026000977, "learning_rate": 3.96055193399512e-06, "loss": 0.236, "step": 21814 }, { "epoch": 71.52459016393442, "grad_norm": 2.2839231491088867, "learning_rate": 3.959705616358365e-06, "loss": 0.1144, "step": 21815 }, { "epoch": 71.52786885245902, "grad_norm": 3.7150192260742188, "learning_rate": 3.958859366832205e-06, "loss": 0.0966, "step": 21816 }, { "epoch": 71.53114754098361, "grad_norm": 2.1720473766326904, "learning_rate": 3.9580131854261905e-06, "loss": 0.0831, "step": 21817 }, { "epoch": 71.5344262295082, "grad_norm": 2.9882848262786865, "learning_rate": 3.9571670721498604e-06, "loss": 0.1162, "step": 21818 }, { "epoch": 71.53770491803279, "grad_norm": 2.8830606937408447, "learning_rate": 3.956321027012754e-06, "loss": 0.1291, "step": 21819 }, { "epoch": 71.54098360655738, "grad_norm": 2.421618700027466, "learning_rate": 3.955475050024412e-06, "loss": 0.1304, "step": 21820 }, { "epoch": 71.54426229508196, "grad_norm": 3.4092679023742676, "learning_rate": 3.9546291411943694e-06, "loss": 0.2629, "step": 21821 }, { "epoch": 71.54754098360655, "grad_norm": 3.356388568878174, "learning_rate": 3.953783300532172e-06, "loss": 0.0842, "step": 21822 }, { "epoch": 71.55081967213114, "grad_norm": 2.6110639572143555, "learning_rate": 3.9529375280473556e-06, "loss": 0.1532, "step": 21823 }, { "epoch": 71.55409836065574, "grad_norm": 2.4823529720306396, "learning_rate": 3.952091823749455e-06, "loss": 0.1985, "step": 21824 }, { "epoch": 71.55737704918033, "grad_norm": 2.543454885482788, "learning_rate": 3.951246187648004e-06, "loss": 0.0764, "step": 21825 }, { "epoch": 71.56065573770492, "grad_norm": 3.1156325340270996, "learning_rate": 3.950400619752546e-06, "loss": 0.1102, "step": 21826 }, { "epoch": 71.56393442622951, "grad_norm": 2.006049633026123, "learning_rate": 3.94955512007261e-06, "loss": 0.0381, "step": 21827 }, { "epoch": 71.5672131147541, "grad_norm": 3.9392337799072266, "learning_rate": 3.948709688617731e-06, "loss": 0.2147, "step": 21828 }, { "epoch": 71.57049180327868, "grad_norm": 2.581256628036499, "learning_rate": 3.947864325397439e-06, "loss": 0.156, "step": 21829 }, { "epoch": 71.57377049180327, "grad_norm": 2.6918067932128906, "learning_rate": 3.947019030421273e-06, "loss": 0.107, "step": 21830 }, { "epoch": 71.57704918032788, "grad_norm": 1.9648528099060059, "learning_rate": 3.946173803698759e-06, "loss": 0.0787, "step": 21831 }, { "epoch": 71.58032786885246, "grad_norm": 2.3666956424713135, "learning_rate": 3.945328645239432e-06, "loss": 0.1632, "step": 21832 }, { "epoch": 71.58360655737705, "grad_norm": 2.854830503463745, "learning_rate": 3.944483555052816e-06, "loss": 0.2664, "step": 21833 }, { "epoch": 71.58688524590164, "grad_norm": 2.5724709033966064, "learning_rate": 3.943638533148447e-06, "loss": 0.1255, "step": 21834 }, { "epoch": 71.59016393442623, "grad_norm": 2.257307291030884, "learning_rate": 3.942793579535851e-06, "loss": 0.0467, "step": 21835 }, { "epoch": 71.59344262295082, "grad_norm": 2.5736968517303467, "learning_rate": 3.941948694224551e-06, "loss": 0.0729, "step": 21836 }, { "epoch": 71.5967213114754, "grad_norm": 2.3671936988830566, "learning_rate": 3.941103877224083e-06, "loss": 0.0781, "step": 21837 }, { "epoch": 71.6, "grad_norm": 1.4265879392623901, "learning_rate": 3.940259128543967e-06, "loss": 0.0826, "step": 21838 }, { "epoch": 71.6032786885246, "grad_norm": 2.493964433670044, "learning_rate": 3.939414448193727e-06, "loss": 0.1299, "step": 21839 }, { "epoch": 71.60655737704919, "grad_norm": 2.656094789505005, "learning_rate": 3.938569836182894e-06, "loss": 0.1226, "step": 21840 }, { "epoch": 71.60983606557377, "grad_norm": 2.4132978916168213, "learning_rate": 3.937725292520988e-06, "loss": 0.1422, "step": 21841 }, { "epoch": 71.61311475409836, "grad_norm": 2.423510789871216, "learning_rate": 3.93688081721753e-06, "loss": 0.2114, "step": 21842 }, { "epoch": 71.61639344262295, "grad_norm": 3.831645965576172, "learning_rate": 3.936036410282048e-06, "loss": 0.2973, "step": 21843 }, { "epoch": 71.61967213114754, "grad_norm": 2.5762038230895996, "learning_rate": 3.93519207172406e-06, "loss": 0.2242, "step": 21844 }, { "epoch": 71.62295081967213, "grad_norm": 2.5244977474212646, "learning_rate": 3.934347801553088e-06, "loss": 0.0726, "step": 21845 }, { "epoch": 71.62622950819672, "grad_norm": 2.42315411567688, "learning_rate": 3.933503599778651e-06, "loss": 0.0793, "step": 21846 }, { "epoch": 71.62950819672132, "grad_norm": 2.3338000774383545, "learning_rate": 3.932659466410264e-06, "loss": 0.2157, "step": 21847 }, { "epoch": 71.6327868852459, "grad_norm": 1.9066486358642578, "learning_rate": 3.931815401457455e-06, "loss": 0.0533, "step": 21848 }, { "epoch": 71.6360655737705, "grad_norm": 3.040224313735962, "learning_rate": 3.930971404929736e-06, "loss": 0.1634, "step": 21849 }, { "epoch": 71.63934426229508, "grad_norm": 2.515395164489746, "learning_rate": 3.930127476836624e-06, "loss": 0.0782, "step": 21850 }, { "epoch": 71.64262295081967, "grad_norm": 3.6109931468963623, "learning_rate": 3.929283617187632e-06, "loss": 0.1899, "step": 21851 }, { "epoch": 71.64590163934426, "grad_norm": 2.4230117797851562, "learning_rate": 3.928439825992284e-06, "loss": 0.1612, "step": 21852 }, { "epoch": 71.64918032786885, "grad_norm": 2.9186432361602783, "learning_rate": 3.927596103260089e-06, "loss": 0.1099, "step": 21853 }, { "epoch": 71.65245901639344, "grad_norm": 2.727973222732544, "learning_rate": 3.9267524490005625e-06, "loss": 0.1217, "step": 21854 }, { "epoch": 71.65573770491804, "grad_norm": 2.5313384532928467, "learning_rate": 3.925908863223212e-06, "loss": 0.2269, "step": 21855 }, { "epoch": 71.65901639344263, "grad_norm": 3.4027702808380127, "learning_rate": 3.925065345937559e-06, "loss": 0.1025, "step": 21856 }, { "epoch": 71.66229508196722, "grad_norm": 2.300510883331299, "learning_rate": 3.92422189715311e-06, "loss": 0.1323, "step": 21857 }, { "epoch": 71.6655737704918, "grad_norm": 2.6745834350585938, "learning_rate": 3.923378516879377e-06, "loss": 0.1521, "step": 21858 }, { "epoch": 71.66885245901639, "grad_norm": 2.9035611152648926, "learning_rate": 3.922535205125869e-06, "loss": 0.1253, "step": 21859 }, { "epoch": 71.67213114754098, "grad_norm": 2.6892223358154297, "learning_rate": 3.921691961902092e-06, "loss": 0.1278, "step": 21860 }, { "epoch": 71.67540983606557, "grad_norm": 1.6383640766143799, "learning_rate": 3.920848787217562e-06, "loss": 0.0346, "step": 21861 }, { "epoch": 71.67868852459016, "grad_norm": 2.3998987674713135, "learning_rate": 3.920005681081781e-06, "loss": 0.1243, "step": 21862 }, { "epoch": 71.68196721311476, "grad_norm": 2.3615002632141113, "learning_rate": 3.919162643504259e-06, "loss": 0.196, "step": 21863 }, { "epoch": 71.68524590163935, "grad_norm": 2.0964131355285645, "learning_rate": 3.918319674494496e-06, "loss": 0.1435, "step": 21864 }, { "epoch": 71.68852459016394, "grad_norm": 2.7316112518310547, "learning_rate": 3.917476774062007e-06, "loss": 0.2319, "step": 21865 }, { "epoch": 71.69180327868852, "grad_norm": 2.7224366664886475, "learning_rate": 3.916633942216291e-06, "loss": 0.1911, "step": 21866 }, { "epoch": 71.69508196721311, "grad_norm": 2.4012513160705566, "learning_rate": 3.915791178966852e-06, "loss": 0.1432, "step": 21867 }, { "epoch": 71.6983606557377, "grad_norm": 2.1270601749420166, "learning_rate": 3.914948484323191e-06, "loss": 0.2655, "step": 21868 }, { "epoch": 71.70163934426229, "grad_norm": 2.5934901237487793, "learning_rate": 3.914105858294815e-06, "loss": 0.1937, "step": 21869 }, { "epoch": 71.70491803278688, "grad_norm": 2.522369861602783, "learning_rate": 3.913263300891223e-06, "loss": 0.1397, "step": 21870 }, { "epoch": 71.70819672131148, "grad_norm": 2.3061952590942383, "learning_rate": 3.912420812121917e-06, "loss": 0.0886, "step": 21871 }, { "epoch": 71.71147540983607, "grad_norm": 2.8162171840667725, "learning_rate": 3.911578391996395e-06, "loss": 0.1528, "step": 21872 }, { "epoch": 71.71475409836066, "grad_norm": 1.9457519054412842, "learning_rate": 3.910736040524155e-06, "loss": 0.0522, "step": 21873 }, { "epoch": 71.71803278688525, "grad_norm": 2.3574604988098145, "learning_rate": 3.9098937577147e-06, "loss": 0.1703, "step": 21874 }, { "epoch": 71.72131147540983, "grad_norm": 2.216071367263794, "learning_rate": 3.9090515435775245e-06, "loss": 0.0629, "step": 21875 }, { "epoch": 71.72459016393442, "grad_norm": 3.0264079570770264, "learning_rate": 3.908209398122127e-06, "loss": 0.1641, "step": 21876 }, { "epoch": 71.72786885245901, "grad_norm": 2.726741075515747, "learning_rate": 3.907367321357998e-06, "loss": 0.2345, "step": 21877 }, { "epoch": 71.73114754098361, "grad_norm": 3.602954864501953, "learning_rate": 3.90652531329464e-06, "loss": 0.2063, "step": 21878 }, { "epoch": 71.7344262295082, "grad_norm": 2.5740580558776855, "learning_rate": 3.905683373941546e-06, "loss": 0.1367, "step": 21879 }, { "epoch": 71.73770491803279, "grad_norm": 3.071794033050537, "learning_rate": 3.904841503308208e-06, "loss": 0.1191, "step": 21880 }, { "epoch": 71.74098360655738, "grad_norm": 2.572709083557129, "learning_rate": 3.903999701404115e-06, "loss": 0.1211, "step": 21881 }, { "epoch": 71.74426229508197, "grad_norm": 2.2448692321777344, "learning_rate": 3.903157968238769e-06, "loss": 0.0486, "step": 21882 }, { "epoch": 71.74754098360656, "grad_norm": 2.8983266353607178, "learning_rate": 3.902316303821655e-06, "loss": 0.1063, "step": 21883 }, { "epoch": 71.75081967213114, "grad_norm": 1.8226218223571777, "learning_rate": 3.901474708162265e-06, "loss": 0.1182, "step": 21884 }, { "epoch": 71.75409836065573, "grad_norm": 2.199981689453125, "learning_rate": 3.9006331812700845e-06, "loss": 0.2, "step": 21885 }, { "epoch": 71.75737704918033, "grad_norm": 2.6663424968719482, "learning_rate": 3.89979172315461e-06, "loss": 0.185, "step": 21886 }, { "epoch": 71.76065573770492, "grad_norm": 3.155527353286743, "learning_rate": 3.898950333825327e-06, "loss": 0.1285, "step": 21887 }, { "epoch": 71.76393442622951, "grad_norm": 2.6069397926330566, "learning_rate": 3.8981090132917185e-06, "loss": 0.1186, "step": 21888 }, { "epoch": 71.7672131147541, "grad_norm": 3.281980276107788, "learning_rate": 3.89726776156328e-06, "loss": 0.3155, "step": 21889 }, { "epoch": 71.77049180327869, "grad_norm": 2.368722915649414, "learning_rate": 3.8964265786494915e-06, "loss": 0.2114, "step": 21890 }, { "epoch": 71.77377049180328, "grad_norm": 3.2134194374084473, "learning_rate": 3.8955854645598365e-06, "loss": 0.2408, "step": 21891 }, { "epoch": 71.77704918032786, "grad_norm": 2.9096155166625977, "learning_rate": 3.894744419303805e-06, "loss": 0.1019, "step": 21892 }, { "epoch": 71.78032786885245, "grad_norm": 2.5995242595672607, "learning_rate": 3.893903442890879e-06, "loss": 0.142, "step": 21893 }, { "epoch": 71.78360655737706, "grad_norm": 2.065192461013794, "learning_rate": 3.89306253533054e-06, "loss": 0.062, "step": 21894 }, { "epoch": 71.78688524590164, "grad_norm": 3.173309564590454, "learning_rate": 3.892221696632268e-06, "loss": 0.24, "step": 21895 }, { "epoch": 71.79016393442623, "grad_norm": 2.8615710735321045, "learning_rate": 3.891380926805549e-06, "loss": 0.0913, "step": 21896 }, { "epoch": 71.79344262295082, "grad_norm": 2.4197356700897217, "learning_rate": 3.890540225859862e-06, "loss": 0.1885, "step": 21897 }, { "epoch": 71.79672131147541, "grad_norm": 2.398993492126465, "learning_rate": 3.889699593804686e-06, "loss": 0.1788, "step": 21898 }, { "epoch": 71.8, "grad_norm": 3.239614725112915, "learning_rate": 3.888859030649498e-06, "loss": 0.3351, "step": 21899 }, { "epoch": 71.80327868852459, "grad_norm": 2.4680662155151367, "learning_rate": 3.88801853640378e-06, "loss": 0.0924, "step": 21900 }, { "epoch": 71.80655737704917, "grad_norm": 2.2022621631622314, "learning_rate": 3.887178111077009e-06, "loss": 0.1349, "step": 21901 }, { "epoch": 71.80983606557378, "grad_norm": 2.398359775543213, "learning_rate": 3.88633775467866e-06, "loss": 0.0749, "step": 21902 }, { "epoch": 71.81311475409836, "grad_norm": 2.9864883422851562, "learning_rate": 3.885497467218206e-06, "loss": 0.1474, "step": 21903 }, { "epoch": 71.81639344262295, "grad_norm": 3.088510513305664, "learning_rate": 3.884657248705129e-06, "loss": 0.1828, "step": 21904 }, { "epoch": 71.81967213114754, "grad_norm": 2.5710289478302, "learning_rate": 3.8838170991489e-06, "loss": 0.0866, "step": 21905 }, { "epoch": 71.82295081967213, "grad_norm": 3.0044429302215576, "learning_rate": 3.882977018558993e-06, "loss": 0.1563, "step": 21906 }, { "epoch": 71.82622950819672, "grad_norm": 2.956319570541382, "learning_rate": 3.882137006944876e-06, "loss": 0.0665, "step": 21907 }, { "epoch": 71.8295081967213, "grad_norm": 1.6060748100280762, "learning_rate": 3.88129706431603e-06, "loss": 0.0831, "step": 21908 }, { "epoch": 71.8327868852459, "grad_norm": 3.007662057876587, "learning_rate": 3.88045719068192e-06, "loss": 0.1296, "step": 21909 }, { "epoch": 71.8360655737705, "grad_norm": 3.1899783611297607, "learning_rate": 3.879617386052018e-06, "loss": 0.2101, "step": 21910 }, { "epoch": 71.83934426229509, "grad_norm": 2.680682420730591, "learning_rate": 3.878777650435794e-06, "loss": 0.0575, "step": 21911 }, { "epoch": 71.84262295081967, "grad_norm": 2.7144598960876465, "learning_rate": 3.877937983842712e-06, "loss": 0.2063, "step": 21912 }, { "epoch": 71.84590163934426, "grad_norm": 2.628228187561035, "learning_rate": 3.8770983862822496e-06, "loss": 0.1272, "step": 21913 }, { "epoch": 71.84918032786885, "grad_norm": 2.3754115104675293, "learning_rate": 3.8762588577638685e-06, "loss": 0.1283, "step": 21914 }, { "epoch": 71.85245901639344, "grad_norm": 2.5286192893981934, "learning_rate": 3.8754193982970354e-06, "loss": 0.1283, "step": 21915 }, { "epoch": 71.85573770491803, "grad_norm": 2.3067636489868164, "learning_rate": 3.874580007891214e-06, "loss": 0.0505, "step": 21916 }, { "epoch": 71.85901639344263, "grad_norm": 2.8268277645111084, "learning_rate": 3.873740686555875e-06, "loss": 0.1962, "step": 21917 }, { "epoch": 71.86229508196722, "grad_norm": 2.5722343921661377, "learning_rate": 3.872901434300479e-06, "loss": 0.1575, "step": 21918 }, { "epoch": 71.8655737704918, "grad_norm": 2.7643840312957764, "learning_rate": 3.87206225113449e-06, "loss": 0.1343, "step": 21919 }, { "epoch": 71.8688524590164, "grad_norm": 2.4545087814331055, "learning_rate": 3.871223137067368e-06, "loss": 0.1252, "step": 21920 }, { "epoch": 71.87213114754098, "grad_norm": 3.7008683681488037, "learning_rate": 3.87038409210858e-06, "loss": 0.1694, "step": 21921 }, { "epoch": 71.87540983606557, "grad_norm": 2.587130546569824, "learning_rate": 3.869545116267584e-06, "loss": 0.1975, "step": 21922 }, { "epoch": 71.87868852459016, "grad_norm": 1.7700201272964478, "learning_rate": 3.868706209553843e-06, "loss": 0.0342, "step": 21923 }, { "epoch": 71.88196721311475, "grad_norm": 2.6081149578094482, "learning_rate": 3.867867371976812e-06, "loss": 0.1531, "step": 21924 }, { "epoch": 71.88524590163935, "grad_norm": 2.80534029006958, "learning_rate": 3.86702860354595e-06, "loss": 0.1587, "step": 21925 }, { "epoch": 71.88852459016394, "grad_norm": 3.0282986164093018, "learning_rate": 3.86618990427072e-06, "loss": 0.1238, "step": 21926 }, { "epoch": 71.89180327868853, "grad_norm": 1.9742541313171387, "learning_rate": 3.865351274160578e-06, "loss": 0.185, "step": 21927 }, { "epoch": 71.89508196721312, "grad_norm": 2.933506965637207, "learning_rate": 3.864512713224979e-06, "loss": 0.2794, "step": 21928 }, { "epoch": 71.8983606557377, "grad_norm": 2.107485771179199, "learning_rate": 3.863674221473372e-06, "loss": 0.0874, "step": 21929 }, { "epoch": 71.90163934426229, "grad_norm": 2.5443601608276367, "learning_rate": 3.862835798915224e-06, "loss": 0.2733, "step": 21930 }, { "epoch": 71.90491803278688, "grad_norm": 2.5782763957977295, "learning_rate": 3.861997445559983e-06, "loss": 0.0924, "step": 21931 }, { "epoch": 71.90819672131147, "grad_norm": 2.8851919174194336, "learning_rate": 3.861159161417103e-06, "loss": 0.1297, "step": 21932 }, { "epoch": 71.91147540983607, "grad_norm": 2.912522077560425, "learning_rate": 3.860320946496032e-06, "loss": 0.1658, "step": 21933 }, { "epoch": 71.91475409836066, "grad_norm": 2.740318536758423, "learning_rate": 3.85948280080623e-06, "loss": 0.144, "step": 21934 }, { "epoch": 71.91803278688525, "grad_norm": 2.762006998062134, "learning_rate": 3.8586447243571445e-06, "loss": 0.1058, "step": 21935 }, { "epoch": 71.92131147540984, "grad_norm": 2.094958543777466, "learning_rate": 3.857806717158224e-06, "loss": 0.1329, "step": 21936 }, { "epoch": 71.92459016393443, "grad_norm": 2.8617775440216064, "learning_rate": 3.856968779218919e-06, "loss": 0.1944, "step": 21937 }, { "epoch": 71.92786885245901, "grad_norm": 3.5019125938415527, "learning_rate": 3.856130910548676e-06, "loss": 0.1521, "step": 21938 }, { "epoch": 71.9311475409836, "grad_norm": 2.36210036277771, "learning_rate": 3.855293111156948e-06, "loss": 0.0813, "step": 21939 }, { "epoch": 71.93442622950819, "grad_norm": 3.1767239570617676, "learning_rate": 3.854455381053178e-06, "loss": 0.1512, "step": 21940 }, { "epoch": 71.9377049180328, "grad_norm": 2.809703826904297, "learning_rate": 3.853617720246812e-06, "loss": 0.1729, "step": 21941 }, { "epoch": 71.94098360655738, "grad_norm": 2.365063428878784, "learning_rate": 3.852780128747298e-06, "loss": 0.0784, "step": 21942 }, { "epoch": 71.94426229508197, "grad_norm": 3.505021810531616, "learning_rate": 3.851942606564081e-06, "loss": 0.2991, "step": 21943 }, { "epoch": 71.94754098360656, "grad_norm": 1.6869022846221924, "learning_rate": 3.851105153706599e-06, "loss": 0.0703, "step": 21944 }, { "epoch": 71.95081967213115, "grad_norm": 2.560488700866699, "learning_rate": 3.850267770184304e-06, "loss": 0.1589, "step": 21945 }, { "epoch": 71.95409836065573, "grad_norm": 3.2860870361328125, "learning_rate": 3.849430456006633e-06, "loss": 0.1833, "step": 21946 }, { "epoch": 71.95737704918032, "grad_norm": 1.922042727470398, "learning_rate": 3.848593211183026e-06, "loss": 0.1332, "step": 21947 }, { "epoch": 71.96065573770491, "grad_norm": 2.8483145236968994, "learning_rate": 3.8477560357229304e-06, "loss": 0.1716, "step": 21948 }, { "epoch": 71.96393442622951, "grad_norm": 3.1820337772369385, "learning_rate": 3.846918929635781e-06, "loss": 0.1, "step": 21949 }, { "epoch": 71.9672131147541, "grad_norm": 2.238365888595581, "learning_rate": 3.84608189293102e-06, "loss": 0.1446, "step": 21950 }, { "epoch": 71.97049180327869, "grad_norm": 3.0040395259857178, "learning_rate": 3.845244925618078e-06, "loss": 0.2343, "step": 21951 }, { "epoch": 71.97377049180328, "grad_norm": 1.967230200767517, "learning_rate": 3.844408027706405e-06, "loss": 0.116, "step": 21952 }, { "epoch": 71.97704918032787, "grad_norm": 3.2583017349243164, "learning_rate": 3.843571199205429e-06, "loss": 0.0772, "step": 21953 }, { "epoch": 71.98032786885246, "grad_norm": 2.1304192543029785, "learning_rate": 3.842734440124591e-06, "loss": 0.1335, "step": 21954 }, { "epoch": 71.98360655737704, "grad_norm": 3.424386739730835, "learning_rate": 3.8418977504733204e-06, "loss": 0.2112, "step": 21955 }, { "epoch": 71.98688524590163, "grad_norm": 2.78918194770813, "learning_rate": 3.841061130261058e-06, "loss": 0.175, "step": 21956 }, { "epoch": 71.99016393442623, "grad_norm": 3.7667887210845947, "learning_rate": 3.840224579497235e-06, "loss": 0.1243, "step": 21957 }, { "epoch": 71.99344262295082, "grad_norm": 2.0862913131713867, "learning_rate": 3.839388098191285e-06, "loss": 0.1418, "step": 21958 }, { "epoch": 71.99672131147541, "grad_norm": 3.5301239490509033, "learning_rate": 3.838551686352636e-06, "loss": 0.1514, "step": 21959 }, { "epoch": 72.0, "grad_norm": 2.0545692443847656, "learning_rate": 3.837715343990727e-06, "loss": 0.0533, "step": 21960 }, { "epoch": 72.00327868852459, "grad_norm": 2.717574119567871, "learning_rate": 3.8368790711149835e-06, "loss": 0.1528, "step": 21961 }, { "epoch": 72.00655737704918, "grad_norm": 2.6022777557373047, "learning_rate": 3.836042867734838e-06, "loss": 0.148, "step": 21962 }, { "epoch": 72.00983606557377, "grad_norm": 2.4613423347473145, "learning_rate": 3.835206733859718e-06, "loss": 0.1093, "step": 21963 }, { "epoch": 72.01311475409837, "grad_norm": 2.4419212341308594, "learning_rate": 3.834370669499047e-06, "loss": 0.1993, "step": 21964 }, { "epoch": 72.01639344262296, "grad_norm": 2.8313028812408447, "learning_rate": 3.833534674662261e-06, "loss": 0.2436, "step": 21965 }, { "epoch": 72.01967213114754, "grad_norm": 2.4687411785125732, "learning_rate": 3.832698749358784e-06, "loss": 0.0468, "step": 21966 }, { "epoch": 72.02295081967213, "grad_norm": 2.1665189266204834, "learning_rate": 3.8318628935980405e-06, "loss": 0.1068, "step": 21967 }, { "epoch": 72.02622950819672, "grad_norm": 1.7259701490402222, "learning_rate": 3.8310271073894535e-06, "loss": 0.1351, "step": 21968 }, { "epoch": 72.02950819672131, "grad_norm": 2.8274614810943604, "learning_rate": 3.830191390742453e-06, "loss": 0.0986, "step": 21969 }, { "epoch": 72.0327868852459, "grad_norm": 2.6637072563171387, "learning_rate": 3.8293557436664584e-06, "loss": 0.1035, "step": 21970 }, { "epoch": 72.03606557377049, "grad_norm": 3.153128147125244, "learning_rate": 3.828520166170895e-06, "loss": 0.2412, "step": 21971 }, { "epoch": 72.03934426229509, "grad_norm": 7.766210556030273, "learning_rate": 3.82768465826518e-06, "loss": 0.1563, "step": 21972 }, { "epoch": 72.04262295081968, "grad_norm": 3.7919890880584717, "learning_rate": 3.826849219958741e-06, "loss": 0.13, "step": 21973 }, { "epoch": 72.04590163934427, "grad_norm": 1.9888843297958374, "learning_rate": 3.826013851260994e-06, "loss": 0.2614, "step": 21974 }, { "epoch": 72.04918032786885, "grad_norm": 2.6035306453704834, "learning_rate": 3.825178552181362e-06, "loss": 0.1433, "step": 21975 }, { "epoch": 72.05245901639344, "grad_norm": 2.905097723007202, "learning_rate": 3.8243433227292625e-06, "loss": 0.277, "step": 21976 }, { "epoch": 72.05573770491803, "grad_norm": 2.426760196685791, "learning_rate": 3.823508162914108e-06, "loss": 0.3925, "step": 21977 }, { "epoch": 72.05901639344262, "grad_norm": 2.5374107360839844, "learning_rate": 3.822673072745325e-06, "loss": 0.1319, "step": 21978 }, { "epoch": 72.0622950819672, "grad_norm": 2.9629390239715576, "learning_rate": 3.8218380522323275e-06, "loss": 0.0833, "step": 21979 }, { "epoch": 72.06557377049181, "grad_norm": 1.8993918895721436, "learning_rate": 3.821003101384527e-06, "loss": 0.0486, "step": 21980 }, { "epoch": 72.0688524590164, "grad_norm": 2.4694104194641113, "learning_rate": 3.82016822021134e-06, "loss": 0.0939, "step": 21981 }, { "epoch": 72.07213114754099, "grad_norm": 2.6748104095458984, "learning_rate": 3.819333408722184e-06, "loss": 0.1774, "step": 21982 }, { "epoch": 72.07540983606557, "grad_norm": 2.8437390327453613, "learning_rate": 3.81849866692647e-06, "loss": 0.1839, "step": 21983 }, { "epoch": 72.07868852459016, "grad_norm": 1.9634274244308472, "learning_rate": 3.817663994833611e-06, "loss": 0.118, "step": 21984 }, { "epoch": 72.08196721311475, "grad_norm": 2.040841579437256, "learning_rate": 3.816829392453016e-06, "loss": 0.0561, "step": 21985 }, { "epoch": 72.08524590163934, "grad_norm": 3.0542707443237305, "learning_rate": 3.8159948597941e-06, "loss": 0.2182, "step": 21986 }, { "epoch": 72.08852459016393, "grad_norm": 2.2730565071105957, "learning_rate": 3.815160396866272e-06, "loss": 0.1724, "step": 21987 }, { "epoch": 72.09180327868853, "grad_norm": 2.7278871536254883, "learning_rate": 3.814326003678942e-06, "loss": 0.0926, "step": 21988 }, { "epoch": 72.09508196721312, "grad_norm": 2.8707711696624756, "learning_rate": 3.8134916802415178e-06, "loss": 0.1507, "step": 21989 }, { "epoch": 72.09836065573771, "grad_norm": 2.1778757572174072, "learning_rate": 3.812657426563403e-06, "loss": 0.1365, "step": 21990 }, { "epoch": 72.1016393442623, "grad_norm": 3.1437206268310547, "learning_rate": 3.8118232426540135e-06, "loss": 0.0971, "step": 21991 }, { "epoch": 72.10491803278688, "grad_norm": 2.853142261505127, "learning_rate": 3.8109891285227497e-06, "loss": 0.0736, "step": 21992 }, { "epoch": 72.10819672131147, "grad_norm": 2.4025044441223145, "learning_rate": 3.810155084179016e-06, "loss": 0.0598, "step": 21993 }, { "epoch": 72.11147540983606, "grad_norm": 2.951354742050171, "learning_rate": 3.8093211096322223e-06, "loss": 0.0535, "step": 21994 }, { "epoch": 72.11475409836065, "grad_norm": 2.4947707653045654, "learning_rate": 3.808487204891771e-06, "loss": 0.1848, "step": 21995 }, { "epoch": 72.11803278688525, "grad_norm": 3.1351358890533447, "learning_rate": 3.8076533699670627e-06, "loss": 0.2041, "step": 21996 }, { "epoch": 72.12131147540984, "grad_norm": 2.9294626712799072, "learning_rate": 3.8068196048674986e-06, "loss": 0.1098, "step": 21997 }, { "epoch": 72.12459016393443, "grad_norm": 2.9446802139282227, "learning_rate": 3.8059859096024853e-06, "loss": 0.098, "step": 21998 }, { "epoch": 72.12786885245902, "grad_norm": 3.001063108444214, "learning_rate": 3.8051522841814215e-06, "loss": 0.1826, "step": 21999 }, { "epoch": 72.1311475409836, "grad_norm": 3.094437599182129, "learning_rate": 3.804318728613704e-06, "loss": 0.1615, "step": 22000 }, { "epoch": 72.1344262295082, "grad_norm": 2.840015411376953, "learning_rate": 3.8034852429087365e-06, "loss": 0.1522, "step": 22001 }, { "epoch": 72.13770491803278, "grad_norm": 1.8358571529388428, "learning_rate": 3.8026518270759173e-06, "loss": 0.1109, "step": 22002 }, { "epoch": 72.14098360655737, "grad_norm": 2.6257212162017822, "learning_rate": 3.8018184811246386e-06, "loss": 0.1159, "step": 22003 }, { "epoch": 72.14426229508197, "grad_norm": 2.589939594268799, "learning_rate": 3.8009852050643035e-06, "loss": 0.0709, "step": 22004 }, { "epoch": 72.14754098360656, "grad_norm": 2.4490902423858643, "learning_rate": 3.8001519989043057e-06, "loss": 0.2143, "step": 22005 }, { "epoch": 72.15081967213115, "grad_norm": 2.4342498779296875, "learning_rate": 3.799318862654041e-06, "loss": 0.0855, "step": 22006 }, { "epoch": 72.15409836065574, "grad_norm": 2.609682083129883, "learning_rate": 3.7984857963228994e-06, "loss": 0.159, "step": 22007 }, { "epoch": 72.15737704918033, "grad_norm": 3.533459424972534, "learning_rate": 3.7976527999202827e-06, "loss": 0.2839, "step": 22008 }, { "epoch": 72.16065573770491, "grad_norm": 2.332219362258911, "learning_rate": 3.796819873455578e-06, "loss": 0.0427, "step": 22009 }, { "epoch": 72.1639344262295, "grad_norm": 2.2487831115722656, "learning_rate": 3.7959870169381805e-06, "loss": 0.1071, "step": 22010 }, { "epoch": 72.1672131147541, "grad_norm": 2.6828112602233887, "learning_rate": 3.795154230377476e-06, "loss": 0.0891, "step": 22011 }, { "epoch": 72.1704918032787, "grad_norm": 2.57861590385437, "learning_rate": 3.7943215137828616e-06, "loss": 0.0888, "step": 22012 }, { "epoch": 72.17377049180328, "grad_norm": 2.2578015327453613, "learning_rate": 3.793488867163725e-06, "loss": 0.0776, "step": 22013 }, { "epoch": 72.17704918032787, "grad_norm": 2.299452066421509, "learning_rate": 3.792656290529455e-06, "loss": 0.091, "step": 22014 }, { "epoch": 72.18032786885246, "grad_norm": 2.6096975803375244, "learning_rate": 3.791823783889439e-06, "loss": 0.2224, "step": 22015 }, { "epoch": 72.18360655737705, "grad_norm": 3.869004726409912, "learning_rate": 3.7909913472530603e-06, "loss": 0.0531, "step": 22016 }, { "epoch": 72.18688524590164, "grad_norm": 2.931708812713623, "learning_rate": 3.7901589806297144e-06, "loss": 0.2794, "step": 22017 }, { "epoch": 72.19016393442622, "grad_norm": 3.0218281745910645, "learning_rate": 3.7893266840287823e-06, "loss": 0.2403, "step": 22018 }, { "epoch": 72.19344262295083, "grad_norm": 2.4334466457366943, "learning_rate": 3.7884944574596496e-06, "loss": 0.1901, "step": 22019 }, { "epoch": 72.19672131147541, "grad_norm": 2.7413766384124756, "learning_rate": 3.787662300931697e-06, "loss": 0.0811, "step": 22020 }, { "epoch": 72.2, "grad_norm": 2.4699676036834717, "learning_rate": 3.7868302144543146e-06, "loss": 0.2639, "step": 22021 }, { "epoch": 72.20327868852459, "grad_norm": 1.6427769660949707, "learning_rate": 3.785998198036881e-06, "loss": 0.1234, "step": 22022 }, { "epoch": 72.20655737704918, "grad_norm": 2.588467836380005, "learning_rate": 3.7851662516887787e-06, "loss": 0.073, "step": 22023 }, { "epoch": 72.20983606557377, "grad_norm": 3.200655698776245, "learning_rate": 3.7843343754193853e-06, "loss": 0.1038, "step": 22024 }, { "epoch": 72.21311475409836, "grad_norm": 2.9709129333496094, "learning_rate": 3.7835025692380876e-06, "loss": 0.2941, "step": 22025 }, { "epoch": 72.21639344262294, "grad_norm": 2.7985000610351562, "learning_rate": 3.7826708331542627e-06, "loss": 0.1785, "step": 22026 }, { "epoch": 72.21967213114755, "grad_norm": 2.4475367069244385, "learning_rate": 3.7818391671772893e-06, "loss": 0.1266, "step": 22027 }, { "epoch": 72.22295081967214, "grad_norm": 2.7235589027404785, "learning_rate": 3.781007571316543e-06, "loss": 0.1311, "step": 22028 }, { "epoch": 72.22622950819672, "grad_norm": 2.6812024116516113, "learning_rate": 3.7801760455813997e-06, "loss": 0.0967, "step": 22029 }, { "epoch": 72.22950819672131, "grad_norm": 4.051136016845703, "learning_rate": 3.779344589981242e-06, "loss": 0.1989, "step": 22030 }, { "epoch": 72.2327868852459, "grad_norm": 2.542914390563965, "learning_rate": 3.778513204525441e-06, "loss": 0.1485, "step": 22031 }, { "epoch": 72.23606557377049, "grad_norm": 3.443579912185669, "learning_rate": 3.7776818892233737e-06, "loss": 0.1575, "step": 22032 }, { "epoch": 72.23934426229508, "grad_norm": 2.7646117210388184, "learning_rate": 3.776850644084409e-06, "loss": 0.1454, "step": 22033 }, { "epoch": 72.24262295081967, "grad_norm": 2.954698085784912, "learning_rate": 3.776019469117926e-06, "loss": 0.0684, "step": 22034 }, { "epoch": 72.24590163934427, "grad_norm": 3.026092529296875, "learning_rate": 3.7751883643332965e-06, "loss": 0.1878, "step": 22035 }, { "epoch": 72.24918032786886, "grad_norm": 2.3013126850128174, "learning_rate": 3.7743573297398896e-06, "loss": 0.091, "step": 22036 }, { "epoch": 72.25245901639344, "grad_norm": 2.569099187850952, "learning_rate": 3.7735263653470732e-06, "loss": 0.1514, "step": 22037 }, { "epoch": 72.25573770491803, "grad_norm": 2.399895191192627, "learning_rate": 3.7726954711642237e-06, "loss": 0.1384, "step": 22038 }, { "epoch": 72.25901639344262, "grad_norm": 2.747157335281372, "learning_rate": 3.771864647200709e-06, "loss": 0.3015, "step": 22039 }, { "epoch": 72.26229508196721, "grad_norm": 2.360738515853882, "learning_rate": 3.7710338934658952e-06, "loss": 0.0538, "step": 22040 }, { "epoch": 72.2655737704918, "grad_norm": 2.1766486167907715, "learning_rate": 3.770203209969151e-06, "loss": 0.0622, "step": 22041 }, { "epoch": 72.26885245901639, "grad_norm": 2.271669864654541, "learning_rate": 3.769372596719839e-06, "loss": 0.1102, "step": 22042 }, { "epoch": 72.27213114754099, "grad_norm": 1.6488542556762695, "learning_rate": 3.768542053727333e-06, "loss": 0.1085, "step": 22043 }, { "epoch": 72.27540983606558, "grad_norm": 2.5640530586242676, "learning_rate": 3.7677115810009956e-06, "loss": 0.1835, "step": 22044 }, { "epoch": 72.27868852459017, "grad_norm": 2.170665979385376, "learning_rate": 3.766881178550189e-06, "loss": 0.0359, "step": 22045 }, { "epoch": 72.28196721311475, "grad_norm": 3.1663658618927, "learning_rate": 3.766050846384274e-06, "loss": 0.0831, "step": 22046 }, { "epoch": 72.28524590163934, "grad_norm": 1.788360357284546, "learning_rate": 3.765220584512621e-06, "loss": 0.0401, "step": 22047 }, { "epoch": 72.28852459016393, "grad_norm": 2.8376717567443848, "learning_rate": 3.764390392944589e-06, "loss": 0.231, "step": 22048 }, { "epoch": 72.29180327868852, "grad_norm": 2.763669490814209, "learning_rate": 3.763560271689536e-06, "loss": 0.1337, "step": 22049 }, { "epoch": 72.29508196721312, "grad_norm": 2.4028820991516113, "learning_rate": 3.7627302207568272e-06, "loss": 0.0712, "step": 22050 }, { "epoch": 72.29836065573771, "grad_norm": 2.4558115005493164, "learning_rate": 3.76190024015582e-06, "loss": 0.1024, "step": 22051 }, { "epoch": 72.3016393442623, "grad_norm": 2.375364303588867, "learning_rate": 3.7610703298958717e-06, "loss": 0.1628, "step": 22052 }, { "epoch": 72.30491803278689, "grad_norm": 2.1784703731536865, "learning_rate": 3.7602404899863455e-06, "loss": 0.1105, "step": 22053 }, { "epoch": 72.30819672131148, "grad_norm": 3.443652868270874, "learning_rate": 3.759410720436595e-06, "loss": 0.145, "step": 22054 }, { "epoch": 72.31147540983606, "grad_norm": 2.088139295578003, "learning_rate": 3.7585810212559738e-06, "loss": 0.127, "step": 22055 }, { "epoch": 72.31475409836065, "grad_norm": 1.855092167854309, "learning_rate": 3.7577513924538446e-06, "loss": 0.0878, "step": 22056 }, { "epoch": 72.31803278688524, "grad_norm": 2.4862499237060547, "learning_rate": 3.7569218340395575e-06, "loss": 0.1215, "step": 22057 }, { "epoch": 72.32131147540984, "grad_norm": 2.294203519821167, "learning_rate": 3.7560923460224696e-06, "loss": 0.2528, "step": 22058 }, { "epoch": 72.32459016393443, "grad_norm": 2.435420036315918, "learning_rate": 3.755262928411928e-06, "loss": 0.0639, "step": 22059 }, { "epoch": 72.32786885245902, "grad_norm": 2.6149203777313232, "learning_rate": 3.7544335812172938e-06, "loss": 0.1924, "step": 22060 }, { "epoch": 72.33114754098361, "grad_norm": 2.3198866844177246, "learning_rate": 3.753604304447915e-06, "loss": 0.1076, "step": 22061 }, { "epoch": 72.3344262295082, "grad_norm": 2.409633159637451, "learning_rate": 3.7527750981131415e-06, "loss": 0.152, "step": 22062 }, { "epoch": 72.33770491803278, "grad_norm": 2.517064094543457, "learning_rate": 3.751945962222322e-06, "loss": 0.1586, "step": 22063 }, { "epoch": 72.34098360655737, "grad_norm": 3.5607430934906006, "learning_rate": 3.751116896784811e-06, "loss": 0.1468, "step": 22064 }, { "epoch": 72.34426229508196, "grad_norm": 2.5032424926757812, "learning_rate": 3.7502879018099536e-06, "loss": 0.1161, "step": 22065 }, { "epoch": 72.34754098360656, "grad_norm": 2.396329164505005, "learning_rate": 3.749458977307099e-06, "loss": 0.0708, "step": 22066 }, { "epoch": 72.35081967213115, "grad_norm": 3.491452693939209, "learning_rate": 3.7486301232855925e-06, "loss": 0.2756, "step": 22067 }, { "epoch": 72.35409836065574, "grad_norm": 3.182783603668213, "learning_rate": 3.7478013397547786e-06, "loss": 0.2456, "step": 22068 }, { "epoch": 72.35737704918033, "grad_norm": 2.389941453933716, "learning_rate": 3.746972626724008e-06, "loss": 0.0866, "step": 22069 }, { "epoch": 72.36065573770492, "grad_norm": 2.1167852878570557, "learning_rate": 3.7461439842026225e-06, "loss": 0.1068, "step": 22070 }, { "epoch": 72.3639344262295, "grad_norm": 1.8438982963562012, "learning_rate": 3.745315412199967e-06, "loss": 0.0555, "step": 22071 }, { "epoch": 72.3672131147541, "grad_norm": 2.4384500980377197, "learning_rate": 3.7444869107253787e-06, "loss": 0.1606, "step": 22072 }, { "epoch": 72.37049180327868, "grad_norm": 3.4935882091522217, "learning_rate": 3.743658479788209e-06, "loss": 0.1223, "step": 22073 }, { "epoch": 72.37377049180328, "grad_norm": 3.0039336681365967, "learning_rate": 3.7428301193977947e-06, "loss": 0.0855, "step": 22074 }, { "epoch": 72.37704918032787, "grad_norm": 2.6162407398223877, "learning_rate": 3.7420018295634765e-06, "loss": 0.1957, "step": 22075 }, { "epoch": 72.38032786885246, "grad_norm": 2.2296736240386963, "learning_rate": 3.7411736102945905e-06, "loss": 0.1352, "step": 22076 }, { "epoch": 72.38360655737705, "grad_norm": 2.3545260429382324, "learning_rate": 3.740345461600483e-06, "loss": 0.1201, "step": 22077 }, { "epoch": 72.38688524590164, "grad_norm": 2.176194429397583, "learning_rate": 3.7395173834904897e-06, "loss": 0.0725, "step": 22078 }, { "epoch": 72.39016393442623, "grad_norm": 2.98321533203125, "learning_rate": 3.7386893759739464e-06, "loss": 0.1796, "step": 22079 }, { "epoch": 72.39344262295081, "grad_norm": 2.2812161445617676, "learning_rate": 3.737861439060191e-06, "loss": 0.0448, "step": 22080 }, { "epoch": 72.3967213114754, "grad_norm": 2.6670031547546387, "learning_rate": 3.737033572758555e-06, "loss": 0.1705, "step": 22081 }, { "epoch": 72.4, "grad_norm": 2.845797538757324, "learning_rate": 3.736205777078381e-06, "loss": 0.1401, "step": 22082 }, { "epoch": 72.4032786885246, "grad_norm": 2.168024778366089, "learning_rate": 3.7353780520290006e-06, "loss": 0.122, "step": 22083 }, { "epoch": 72.40655737704918, "grad_norm": 2.260808229446411, "learning_rate": 3.734550397619745e-06, "loss": 0.0526, "step": 22084 }, { "epoch": 72.40983606557377, "grad_norm": 2.825439929962158, "learning_rate": 3.7337228138599447e-06, "loss": 0.1801, "step": 22085 }, { "epoch": 72.41311475409836, "grad_norm": 1.8496228456497192, "learning_rate": 3.7328953007589387e-06, "loss": 0.1675, "step": 22086 }, { "epoch": 72.41639344262295, "grad_norm": 2.8481156826019287, "learning_rate": 3.732067858326054e-06, "loss": 0.1337, "step": 22087 }, { "epoch": 72.41967213114754, "grad_norm": 2.816953182220459, "learning_rate": 3.731240486570622e-06, "loss": 0.2113, "step": 22088 }, { "epoch": 72.42295081967212, "grad_norm": 2.002401828765869, "learning_rate": 3.7304131855019663e-06, "loss": 0.1544, "step": 22089 }, { "epoch": 72.42622950819673, "grad_norm": 2.2279248237609863, "learning_rate": 3.7295859551294256e-06, "loss": 0.1482, "step": 22090 }, { "epoch": 72.42950819672132, "grad_norm": 2.06142520904541, "learning_rate": 3.7287587954623228e-06, "loss": 0.1465, "step": 22091 }, { "epoch": 72.4327868852459, "grad_norm": 2.4300572872161865, "learning_rate": 3.7279317065099854e-06, "loss": 0.253, "step": 22092 }, { "epoch": 72.43606557377049, "grad_norm": 2.225508213043213, "learning_rate": 3.7271046882817375e-06, "loss": 0.1627, "step": 22093 }, { "epoch": 72.43934426229508, "grad_norm": 2.6721889972686768, "learning_rate": 3.7262777407869046e-06, "loss": 0.1794, "step": 22094 }, { "epoch": 72.44262295081967, "grad_norm": 2.44823956489563, "learning_rate": 3.7254508640348162e-06, "loss": 0.1057, "step": 22095 }, { "epoch": 72.44590163934426, "grad_norm": 2.177100419998169, "learning_rate": 3.7246240580347924e-06, "loss": 0.0392, "step": 22096 }, { "epoch": 72.44918032786886, "grad_norm": 2.4089815616607666, "learning_rate": 3.723797322796159e-06, "loss": 0.072, "step": 22097 }, { "epoch": 72.45245901639345, "grad_norm": 2.46466326713562, "learning_rate": 3.722970658328231e-06, "loss": 0.1527, "step": 22098 }, { "epoch": 72.45573770491804, "grad_norm": 2.5990538597106934, "learning_rate": 3.7221440646403396e-06, "loss": 0.0975, "step": 22099 }, { "epoch": 72.45901639344262, "grad_norm": 2.31585431098938, "learning_rate": 3.7213175417418012e-06, "loss": 0.1088, "step": 22100 }, { "epoch": 72.46229508196721, "grad_norm": 1.943198561668396, "learning_rate": 3.7204910896419353e-06, "loss": 0.0817, "step": 22101 }, { "epoch": 72.4655737704918, "grad_norm": 4.0949835777282715, "learning_rate": 3.7196647083500593e-06, "loss": 0.1061, "step": 22102 }, { "epoch": 72.46885245901639, "grad_norm": 2.3819689750671387, "learning_rate": 3.718838397875496e-06, "loss": 0.0624, "step": 22103 }, { "epoch": 72.47213114754098, "grad_norm": 3.0483386516571045, "learning_rate": 3.718012158227561e-06, "loss": 0.11, "step": 22104 }, { "epoch": 72.47540983606558, "grad_norm": 3.1062610149383545, "learning_rate": 3.717185989415566e-06, "loss": 0.1699, "step": 22105 }, { "epoch": 72.47868852459017, "grad_norm": 1.919643521308899, "learning_rate": 3.7163598914488364e-06, "loss": 0.0736, "step": 22106 }, { "epoch": 72.48196721311476, "grad_norm": 1.8504451513290405, "learning_rate": 3.715533864336681e-06, "loss": 0.0362, "step": 22107 }, { "epoch": 72.48524590163935, "grad_norm": 2.2480812072753906, "learning_rate": 3.714707908088413e-06, "loss": 0.0541, "step": 22108 }, { "epoch": 72.48852459016393, "grad_norm": 1.4847244024276733, "learning_rate": 3.713882022713351e-06, "loss": 0.0222, "step": 22109 }, { "epoch": 72.49180327868852, "grad_norm": 2.717867612838745, "learning_rate": 3.7130562082208054e-06, "loss": 0.0817, "step": 22110 }, { "epoch": 72.49508196721311, "grad_norm": 2.891793727874756, "learning_rate": 3.7122304646200846e-06, "loss": 0.1383, "step": 22111 }, { "epoch": 72.4983606557377, "grad_norm": 2.379164695739746, "learning_rate": 3.7114047919205066e-06, "loss": 0.0998, "step": 22112 }, { "epoch": 72.5016393442623, "grad_norm": 3.088595151901245, "learning_rate": 3.710579190131378e-06, "loss": 0.1918, "step": 22113 }, { "epoch": 72.50491803278689, "grad_norm": 3.532895088195801, "learning_rate": 3.7097536592620086e-06, "loss": 0.1872, "step": 22114 }, { "epoch": 72.50819672131148, "grad_norm": 2.3270328044891357, "learning_rate": 3.708928199321703e-06, "loss": 0.1408, "step": 22115 }, { "epoch": 72.51147540983607, "grad_norm": 2.8346192836761475, "learning_rate": 3.708102810319777e-06, "loss": 0.0873, "step": 22116 }, { "epoch": 72.51475409836065, "grad_norm": 2.749316930770874, "learning_rate": 3.707277492265533e-06, "loss": 0.0674, "step": 22117 }, { "epoch": 72.51803278688524, "grad_norm": 2.456407070159912, "learning_rate": 3.7064522451682782e-06, "loss": 0.1134, "step": 22118 }, { "epoch": 72.52131147540983, "grad_norm": 3.047264814376831, "learning_rate": 3.7056270690373186e-06, "loss": 0.263, "step": 22119 }, { "epoch": 72.52459016393442, "grad_norm": 2.9906277656555176, "learning_rate": 3.7048019638819545e-06, "loss": 0.1758, "step": 22120 }, { "epoch": 72.52786885245902, "grad_norm": 2.570941686630249, "learning_rate": 3.7039769297114968e-06, "loss": 0.1228, "step": 22121 }, { "epoch": 72.53114754098361, "grad_norm": 2.70226788520813, "learning_rate": 3.7031519665352456e-06, "loss": 0.2002, "step": 22122 }, { "epoch": 72.5344262295082, "grad_norm": 2.468238115310669, "learning_rate": 3.702327074362504e-06, "loss": 0.2036, "step": 22123 }, { "epoch": 72.53770491803279, "grad_norm": 2.8272173404693604, "learning_rate": 3.701502253202568e-06, "loss": 0.0865, "step": 22124 }, { "epoch": 72.54098360655738, "grad_norm": 2.683138608932495, "learning_rate": 3.700677503064747e-06, "loss": 0.1306, "step": 22125 }, { "epoch": 72.54426229508196, "grad_norm": 2.5475761890411377, "learning_rate": 3.699852823958335e-06, "loss": 0.1489, "step": 22126 }, { "epoch": 72.54754098360655, "grad_norm": 2.3128440380096436, "learning_rate": 3.699028215892635e-06, "loss": 0.1369, "step": 22127 }, { "epoch": 72.55081967213114, "grad_norm": 2.6765100955963135, "learning_rate": 3.698203678876939e-06, "loss": 0.1053, "step": 22128 }, { "epoch": 72.55409836065574, "grad_norm": 2.3673951625823975, "learning_rate": 3.697379212920552e-06, "loss": 0.08, "step": 22129 }, { "epoch": 72.55737704918033, "grad_norm": 2.8089122772216797, "learning_rate": 3.696554818032768e-06, "loss": 0.2157, "step": 22130 }, { "epoch": 72.56065573770492, "grad_norm": 2.3722431659698486, "learning_rate": 3.6957304942228822e-06, "loss": 0.1604, "step": 22131 }, { "epoch": 72.56393442622951, "grad_norm": 5.635140895843506, "learning_rate": 3.69490624150019e-06, "loss": 0.0517, "step": 22132 }, { "epoch": 72.5672131147541, "grad_norm": 2.347935676574707, "learning_rate": 3.6940820598739823e-06, "loss": 0.17, "step": 22133 }, { "epoch": 72.57049180327868, "grad_norm": 2.569303512573242, "learning_rate": 3.693257949353558e-06, "loss": 0.0796, "step": 22134 }, { "epoch": 72.57377049180327, "grad_norm": 2.499502182006836, "learning_rate": 3.69243390994821e-06, "loss": 0.1585, "step": 22135 }, { "epoch": 72.57704918032788, "grad_norm": 3.0402309894561768, "learning_rate": 3.6916099416672255e-06, "loss": 0.2011, "step": 22136 }, { "epoch": 72.58032786885246, "grad_norm": 2.568711519241333, "learning_rate": 3.690786044519896e-06, "loss": 0.2131, "step": 22137 }, { "epoch": 72.58360655737705, "grad_norm": 2.4550764560699463, "learning_rate": 3.689962218515517e-06, "loss": 0.1344, "step": 22138 }, { "epoch": 72.58688524590164, "grad_norm": 2.6391913890838623, "learning_rate": 3.6891384636633744e-06, "loss": 0.2265, "step": 22139 }, { "epoch": 72.59016393442623, "grad_norm": 3.2884836196899414, "learning_rate": 3.688314779972757e-06, "loss": 0.0961, "step": 22140 }, { "epoch": 72.59344262295082, "grad_norm": 2.5197436809539795, "learning_rate": 3.6874911674529535e-06, "loss": 0.1017, "step": 22141 }, { "epoch": 72.5967213114754, "grad_norm": 2.1662890911102295, "learning_rate": 3.6866676261132473e-06, "loss": 0.1302, "step": 22142 }, { "epoch": 72.6, "grad_norm": 2.550771951675415, "learning_rate": 3.685844155962931e-06, "loss": 0.1429, "step": 22143 }, { "epoch": 72.6032786885246, "grad_norm": 2.890981674194336, "learning_rate": 3.6850207570112872e-06, "loss": 0.1478, "step": 22144 }, { "epoch": 72.60655737704919, "grad_norm": 2.2195072174072266, "learning_rate": 3.6841974292676e-06, "loss": 0.0776, "step": 22145 }, { "epoch": 72.60983606557377, "grad_norm": 2.8216519355773926, "learning_rate": 3.6833741727411497e-06, "loss": 0.2452, "step": 22146 }, { "epoch": 72.61311475409836, "grad_norm": 2.3827130794525146, "learning_rate": 3.682550987441227e-06, "loss": 0.1479, "step": 22147 }, { "epoch": 72.61639344262295, "grad_norm": 3.0847184658050537, "learning_rate": 3.68172787337711e-06, "loss": 0.2194, "step": 22148 }, { "epoch": 72.61967213114754, "grad_norm": 3.4622087478637695, "learning_rate": 3.6809048305580818e-06, "loss": 0.3754, "step": 22149 }, { "epoch": 72.62295081967213, "grad_norm": 2.5146403312683105, "learning_rate": 3.6800818589934174e-06, "loss": 0.1498, "step": 22150 }, { "epoch": 72.62622950819672, "grad_norm": 2.6907248497009277, "learning_rate": 3.679258958692404e-06, "loss": 0.1093, "step": 22151 }, { "epoch": 72.62950819672132, "grad_norm": 1.9752655029296875, "learning_rate": 3.678436129664319e-06, "loss": 0.0897, "step": 22152 }, { "epoch": 72.6327868852459, "grad_norm": 2.980173349380493, "learning_rate": 3.677613371918439e-06, "loss": 0.0573, "step": 22153 }, { "epoch": 72.6360655737705, "grad_norm": 2.83556866645813, "learning_rate": 3.676790685464039e-06, "loss": 0.1671, "step": 22154 }, { "epoch": 72.63934426229508, "grad_norm": 2.686047077178955, "learning_rate": 3.6759680703104016e-06, "loss": 0.074, "step": 22155 }, { "epoch": 72.64262295081967, "grad_norm": 2.338972330093384, "learning_rate": 3.675145526466799e-06, "loss": 0.0897, "step": 22156 }, { "epoch": 72.64590163934426, "grad_norm": 1.8827065229415894, "learning_rate": 3.6743230539425035e-06, "loss": 0.0436, "step": 22157 }, { "epoch": 72.64918032786885, "grad_norm": 2.959444761276245, "learning_rate": 3.6735006527467967e-06, "loss": 0.1127, "step": 22158 }, { "epoch": 72.65245901639344, "grad_norm": 2.9652719497680664, "learning_rate": 3.6726783228889475e-06, "loss": 0.0962, "step": 22159 }, { "epoch": 72.65573770491804, "grad_norm": 3.2182528972625732, "learning_rate": 3.671856064378229e-06, "loss": 0.1627, "step": 22160 }, { "epoch": 72.65901639344263, "grad_norm": 3.1252939701080322, "learning_rate": 3.6710338772239094e-06, "loss": 0.2204, "step": 22161 }, { "epoch": 72.66229508196722, "grad_norm": 2.392728567123413, "learning_rate": 3.6702117614352663e-06, "loss": 0.1598, "step": 22162 }, { "epoch": 72.6655737704918, "grad_norm": 2.3206851482391357, "learning_rate": 3.6693897170215674e-06, "loss": 0.0806, "step": 22163 }, { "epoch": 72.66885245901639, "grad_norm": 2.022676944732666, "learning_rate": 3.6685677439920788e-06, "loss": 0.0446, "step": 22164 }, { "epoch": 72.67213114754098, "grad_norm": 3.012601375579834, "learning_rate": 3.6677458423560754e-06, "loss": 0.1254, "step": 22165 }, { "epoch": 72.67540983606557, "grad_norm": 2.203561305999756, "learning_rate": 3.66692401212282e-06, "loss": 0.0766, "step": 22166 }, { "epoch": 72.67868852459016, "grad_norm": 2.5727574825286865, "learning_rate": 3.6661022533015822e-06, "loss": 0.0849, "step": 22167 }, { "epoch": 72.68196721311476, "grad_norm": 4.758814334869385, "learning_rate": 3.6652805659016234e-06, "loss": 0.1499, "step": 22168 }, { "epoch": 72.68524590163935, "grad_norm": 2.35746693611145, "learning_rate": 3.664458949932217e-06, "loss": 0.1208, "step": 22169 }, { "epoch": 72.68852459016394, "grad_norm": 3.147785186767578, "learning_rate": 3.6636374054026223e-06, "loss": 0.2139, "step": 22170 }, { "epoch": 72.69180327868852, "grad_norm": 2.801100254058838, "learning_rate": 3.6628159323221034e-06, "loss": 0.1121, "step": 22171 }, { "epoch": 72.69508196721311, "grad_norm": 2.8310635089874268, "learning_rate": 3.6619945306999216e-06, "loss": 0.182, "step": 22172 }, { "epoch": 72.6983606557377, "grad_norm": 2.923370122909546, "learning_rate": 3.6611732005453448e-06, "loss": 0.0783, "step": 22173 }, { "epoch": 72.70163934426229, "grad_norm": 2.715083122253418, "learning_rate": 3.6603519418676304e-06, "loss": 0.1425, "step": 22174 }, { "epoch": 72.70491803278688, "grad_norm": 2.7863876819610596, "learning_rate": 3.6595307546760393e-06, "loss": 0.1069, "step": 22175 }, { "epoch": 72.70819672131148, "grad_norm": 2.6247057914733887, "learning_rate": 3.658709638979828e-06, "loss": 0.0604, "step": 22176 }, { "epoch": 72.71147540983607, "grad_norm": 2.684525966644287, "learning_rate": 3.6578885947882625e-06, "loss": 0.2408, "step": 22177 }, { "epoch": 72.71475409836066, "grad_norm": 3.171175003051758, "learning_rate": 3.6570676221105973e-06, "loss": 0.1939, "step": 22178 }, { "epoch": 72.71803278688525, "grad_norm": 3.1283226013183594, "learning_rate": 3.6562467209560905e-06, "loss": 0.1815, "step": 22179 }, { "epoch": 72.72131147540983, "grad_norm": 2.584561824798584, "learning_rate": 3.655425891333996e-06, "loss": 0.2391, "step": 22180 }, { "epoch": 72.72459016393442, "grad_norm": 2.5864346027374268, "learning_rate": 3.654605133253569e-06, "loss": 0.1095, "step": 22181 }, { "epoch": 72.72786885245901, "grad_norm": 2.7395331859588623, "learning_rate": 3.653784446724069e-06, "loss": 0.1178, "step": 22182 }, { "epoch": 72.73114754098361, "grad_norm": 2.1362524032592773, "learning_rate": 3.652963831754749e-06, "loss": 0.1443, "step": 22183 }, { "epoch": 72.7344262295082, "grad_norm": 2.7423129081726074, "learning_rate": 3.6521432883548603e-06, "loss": 0.1389, "step": 22184 }, { "epoch": 72.73770491803279, "grad_norm": 2.9237797260284424, "learning_rate": 3.6513228165336535e-06, "loss": 0.1217, "step": 22185 }, { "epoch": 72.74098360655738, "grad_norm": 2.6974995136260986, "learning_rate": 3.6505024163003853e-06, "loss": 0.1526, "step": 22186 }, { "epoch": 72.74426229508197, "grad_norm": 2.4185469150543213, "learning_rate": 3.649682087664306e-06, "loss": 0.0841, "step": 22187 }, { "epoch": 72.74754098360656, "grad_norm": 3.6385226249694824, "learning_rate": 3.648861830634661e-06, "loss": 0.144, "step": 22188 }, { "epoch": 72.75081967213114, "grad_norm": 2.539454221725464, "learning_rate": 3.6480416452207015e-06, "loss": 0.0848, "step": 22189 }, { "epoch": 72.75409836065573, "grad_norm": 2.384289503097534, "learning_rate": 3.6472215314316796e-06, "loss": 0.0586, "step": 22190 }, { "epoch": 72.75737704918033, "grad_norm": 2.457540988922119, "learning_rate": 3.6464014892768397e-06, "loss": 0.148, "step": 22191 }, { "epoch": 72.76065573770492, "grad_norm": 3.3029768466949463, "learning_rate": 3.6455815187654285e-06, "loss": 0.1131, "step": 22192 }, { "epoch": 72.76393442622951, "grad_norm": 5.831982612609863, "learning_rate": 3.6447616199066937e-06, "loss": 0.2415, "step": 22193 }, { "epoch": 72.7672131147541, "grad_norm": 2.3462064266204834, "learning_rate": 3.6439417927098754e-06, "loss": 0.1152, "step": 22194 }, { "epoch": 72.77049180327869, "grad_norm": 2.2375073432922363, "learning_rate": 3.6431220371842255e-06, "loss": 0.0951, "step": 22195 }, { "epoch": 72.77377049180328, "grad_norm": 2.9720046520233154, "learning_rate": 3.6423023533389845e-06, "loss": 0.1258, "step": 22196 }, { "epoch": 72.77704918032786, "grad_norm": 1.777548909187317, "learning_rate": 3.641482741183395e-06, "loss": 0.1031, "step": 22197 }, { "epoch": 72.78032786885245, "grad_norm": 3.32816743850708, "learning_rate": 3.640663200726695e-06, "loss": 0.1986, "step": 22198 }, { "epoch": 72.78360655737706, "grad_norm": 1.9238224029541016, "learning_rate": 3.639843731978133e-06, "loss": 0.0835, "step": 22199 }, { "epoch": 72.78688524590164, "grad_norm": 2.6505889892578125, "learning_rate": 3.6390243349469458e-06, "loss": 0.2467, "step": 22200 }, { "epoch": 72.79016393442623, "grad_norm": 2.3296425342559814, "learning_rate": 3.638205009642373e-06, "loss": 0.0819, "step": 22201 }, { "epoch": 72.79344262295082, "grad_norm": 2.737881898880005, "learning_rate": 3.637385756073649e-06, "loss": 0.1786, "step": 22202 }, { "epoch": 72.79672131147541, "grad_norm": 2.6688220500946045, "learning_rate": 3.636566574250021e-06, "loss": 0.0662, "step": 22203 }, { "epoch": 72.8, "grad_norm": 3.120537519454956, "learning_rate": 3.63574746418072e-06, "loss": 0.0848, "step": 22204 }, { "epoch": 72.80327868852459, "grad_norm": 2.4505844116210938, "learning_rate": 3.6349284258749853e-06, "loss": 0.0946, "step": 22205 }, { "epoch": 72.80655737704917, "grad_norm": 2.034337282180786, "learning_rate": 3.634109459342049e-06, "loss": 0.0623, "step": 22206 }, { "epoch": 72.80983606557378, "grad_norm": 2.198528289794922, "learning_rate": 3.6332905645911444e-06, "loss": 0.0916, "step": 22207 }, { "epoch": 72.81311475409836, "grad_norm": 3.0453081130981445, "learning_rate": 3.6324717416315116e-06, "loss": 0.1227, "step": 22208 }, { "epoch": 72.81639344262295, "grad_norm": 2.6543877124786377, "learning_rate": 3.6316529904723795e-06, "loss": 0.4472, "step": 22209 }, { "epoch": 72.81967213114754, "grad_norm": 2.4591612815856934, "learning_rate": 3.6308343111229795e-06, "loss": 0.1553, "step": 22210 }, { "epoch": 72.82295081967213, "grad_norm": 1.8699556589126587, "learning_rate": 3.6300157035925477e-06, "loss": 0.0544, "step": 22211 }, { "epoch": 72.82622950819672, "grad_norm": 3.079286575317383, "learning_rate": 3.6291971678903124e-06, "loss": 0.1783, "step": 22212 }, { "epoch": 72.8295081967213, "grad_norm": 2.278127431869507, "learning_rate": 3.628378704025499e-06, "loss": 0.0714, "step": 22213 }, { "epoch": 72.8327868852459, "grad_norm": 3.5775346755981445, "learning_rate": 3.6275603120073444e-06, "loss": 0.193, "step": 22214 }, { "epoch": 72.8360655737705, "grad_norm": 2.4353981018066406, "learning_rate": 3.6267419918450732e-06, "loss": 0.1186, "step": 22215 }, { "epoch": 72.83934426229509, "grad_norm": 3.0521035194396973, "learning_rate": 3.625923743547909e-06, "loss": 0.1738, "step": 22216 }, { "epoch": 72.84262295081967, "grad_norm": 2.9753618240356445, "learning_rate": 3.6251055671250845e-06, "loss": 0.2013, "step": 22217 }, { "epoch": 72.84590163934426, "grad_norm": 2.903059720993042, "learning_rate": 3.624287462585824e-06, "loss": 0.0663, "step": 22218 }, { "epoch": 72.84918032786885, "grad_norm": 13.037323951721191, "learning_rate": 3.623469429939351e-06, "loss": 0.3104, "step": 22219 }, { "epoch": 72.85245901639344, "grad_norm": 3.184452772140503, "learning_rate": 3.6226514691948867e-06, "loss": 0.1227, "step": 22220 }, { "epoch": 72.85573770491803, "grad_norm": 2.209685802459717, "learning_rate": 3.621833580361661e-06, "loss": 0.1024, "step": 22221 }, { "epoch": 72.85901639344263, "grad_norm": 2.9476237297058105, "learning_rate": 3.6210157634488943e-06, "loss": 0.0932, "step": 22222 }, { "epoch": 72.86229508196722, "grad_norm": 2.326932430267334, "learning_rate": 3.620198018465807e-06, "loss": 0.0986, "step": 22223 }, { "epoch": 72.8655737704918, "grad_norm": 2.778318405151367, "learning_rate": 3.619380345421616e-06, "loss": 0.1019, "step": 22224 }, { "epoch": 72.8688524590164, "grad_norm": 2.660841464996338, "learning_rate": 3.61856274432555e-06, "loss": 0.0749, "step": 22225 }, { "epoch": 72.87213114754098, "grad_norm": 3.020749568939209, "learning_rate": 3.617745215186824e-06, "loss": 0.1764, "step": 22226 }, { "epoch": 72.87540983606557, "grad_norm": 2.7638773918151855, "learning_rate": 3.616927758014657e-06, "loss": 0.2127, "step": 22227 }, { "epoch": 72.87868852459016, "grad_norm": 2.5950329303741455, "learning_rate": 3.616110372818262e-06, "loss": 0.0876, "step": 22228 }, { "epoch": 72.88196721311475, "grad_norm": 3.0753533840179443, "learning_rate": 3.615293059606864e-06, "loss": 0.151, "step": 22229 }, { "epoch": 72.88524590163935, "grad_norm": 3.06520676612854, "learning_rate": 3.6144758183896754e-06, "loss": 0.1432, "step": 22230 }, { "epoch": 72.88852459016394, "grad_norm": 2.023467540740967, "learning_rate": 3.6136586491759106e-06, "loss": 0.2168, "step": 22231 }, { "epoch": 72.89180327868853, "grad_norm": 2.0838892459869385, "learning_rate": 3.612841551974785e-06, "loss": 0.0552, "step": 22232 }, { "epoch": 72.89508196721312, "grad_norm": 3.33172607421875, "learning_rate": 3.612024526795509e-06, "loss": 0.2255, "step": 22233 }, { "epoch": 72.8983606557377, "grad_norm": 4.24757719039917, "learning_rate": 3.6112075736473006e-06, "loss": 0.1374, "step": 22234 }, { "epoch": 72.90163934426229, "grad_norm": 2.562145709991455, "learning_rate": 3.6103906925393706e-06, "loss": 0.127, "step": 22235 }, { "epoch": 72.90491803278688, "grad_norm": 1.966685175895691, "learning_rate": 3.609573883480928e-06, "loss": 0.025, "step": 22236 }, { "epoch": 72.90819672131147, "grad_norm": 2.7442731857299805, "learning_rate": 3.608757146481181e-06, "loss": 0.1595, "step": 22237 }, { "epoch": 72.91147540983607, "grad_norm": 3.064941644668579, "learning_rate": 3.607940481549347e-06, "loss": 0.1626, "step": 22238 }, { "epoch": 72.91475409836066, "grad_norm": 2.262083053588867, "learning_rate": 3.6071238886946293e-06, "loss": 0.1103, "step": 22239 }, { "epoch": 72.91803278688525, "grad_norm": 2.684328556060791, "learning_rate": 3.6063073679262363e-06, "loss": 0.1139, "step": 22240 }, { "epoch": 72.92131147540984, "grad_norm": 2.291767120361328, "learning_rate": 3.6054909192533728e-06, "loss": 0.1279, "step": 22241 }, { "epoch": 72.92459016393443, "grad_norm": 3.104473352432251, "learning_rate": 3.6046745426852502e-06, "loss": 0.1514, "step": 22242 }, { "epoch": 72.92786885245901, "grad_norm": 2.215123176574707, "learning_rate": 3.6038582382310725e-06, "loss": 0.2015, "step": 22243 }, { "epoch": 72.9311475409836, "grad_norm": 3.550163507461548, "learning_rate": 3.6030420059000435e-06, "loss": 0.1502, "step": 22244 }, { "epoch": 72.93442622950819, "grad_norm": 3.272454261779785, "learning_rate": 3.602225845701367e-06, "loss": 0.1555, "step": 22245 }, { "epoch": 72.9377049180328, "grad_norm": 3.210106134414673, "learning_rate": 3.601409757644242e-06, "loss": 0.0781, "step": 22246 }, { "epoch": 72.94098360655738, "grad_norm": 2.2036430835723877, "learning_rate": 3.6005937417378787e-06, "loss": 0.0549, "step": 22247 }, { "epoch": 72.94426229508197, "grad_norm": 3.0489096641540527, "learning_rate": 3.599777797991475e-06, "loss": 0.2233, "step": 22248 }, { "epoch": 72.94754098360656, "grad_norm": 2.4701600074768066, "learning_rate": 3.5989619264142316e-06, "loss": 0.1672, "step": 22249 }, { "epoch": 72.95081967213115, "grad_norm": 2.6268203258514404, "learning_rate": 3.598146127015344e-06, "loss": 0.079, "step": 22250 }, { "epoch": 72.95409836065573, "grad_norm": 2.4229185581207275, "learning_rate": 3.5973303998040178e-06, "loss": 0.0537, "step": 22251 }, { "epoch": 72.95737704918032, "grad_norm": 2.6559500694274902, "learning_rate": 3.596514744789449e-06, "loss": 0.1446, "step": 22252 }, { "epoch": 72.96065573770491, "grad_norm": 2.8457703590393066, "learning_rate": 3.5956991619808345e-06, "loss": 0.2281, "step": 22253 }, { "epoch": 72.96393442622951, "grad_norm": 3.2007758617401123, "learning_rate": 3.5948836513873674e-06, "loss": 0.1711, "step": 22254 }, { "epoch": 72.9672131147541, "grad_norm": 2.253723621368408, "learning_rate": 3.594068213018249e-06, "loss": 0.2037, "step": 22255 }, { "epoch": 72.97049180327869, "grad_norm": 2.3396599292755127, "learning_rate": 3.5932528468826734e-06, "loss": 0.1284, "step": 22256 }, { "epoch": 72.97377049180328, "grad_norm": 2.5697643756866455, "learning_rate": 3.5924375529898338e-06, "loss": 0.1729, "step": 22257 }, { "epoch": 72.97704918032787, "grad_norm": 3.040173053741455, "learning_rate": 3.591622331348922e-06, "loss": 0.2681, "step": 22258 }, { "epoch": 72.98032786885246, "grad_norm": 3.410871982574463, "learning_rate": 3.590807181969128e-06, "loss": 0.0609, "step": 22259 }, { "epoch": 72.98360655737704, "grad_norm": 3.201317548751831, "learning_rate": 3.5899921048596496e-06, "loss": 0.2031, "step": 22260 }, { "epoch": 72.98688524590163, "grad_norm": 2.575425863265991, "learning_rate": 3.589177100029676e-06, "loss": 0.2341, "step": 22261 }, { "epoch": 72.99016393442623, "grad_norm": 2.7876787185668945, "learning_rate": 3.588362167488396e-06, "loss": 0.0878, "step": 22262 }, { "epoch": 72.99344262295082, "grad_norm": 2.654169797897339, "learning_rate": 3.5875473072449964e-06, "loss": 0.0958, "step": 22263 }, { "epoch": 72.99672131147541, "grad_norm": 3.164316177368164, "learning_rate": 3.58673251930867e-06, "loss": 0.1234, "step": 22264 }, { "epoch": 73.0, "grad_norm": 2.6553778648376465, "learning_rate": 3.585917803688603e-06, "loss": 0.0753, "step": 22265 }, { "epoch": 73.00327868852459, "grad_norm": 2.6519672870635986, "learning_rate": 3.585103160393979e-06, "loss": 0.21, "step": 22266 }, { "epoch": 73.00655737704918, "grad_norm": 2.2933273315429688, "learning_rate": 3.5842885894339898e-06, "loss": 0.0435, "step": 22267 }, { "epoch": 73.00983606557377, "grad_norm": 3.130589246749878, "learning_rate": 3.583474090817818e-06, "loss": 0.3182, "step": 22268 }, { "epoch": 73.01311475409837, "grad_norm": 2.4809186458587646, "learning_rate": 3.582659664554643e-06, "loss": 0.1574, "step": 22269 }, { "epoch": 73.01639344262296, "grad_norm": 2.6937203407287598, "learning_rate": 3.5818453106536566e-06, "loss": 0.111, "step": 22270 }, { "epoch": 73.01967213114754, "grad_norm": 3.5906736850738525, "learning_rate": 3.581031029124037e-06, "loss": 0.0963, "step": 22271 }, { "epoch": 73.02295081967213, "grad_norm": 3.2576751708984375, "learning_rate": 3.580216819974963e-06, "loss": 0.1677, "step": 22272 }, { "epoch": 73.02622950819672, "grad_norm": 3.5269970893859863, "learning_rate": 3.5794026832156238e-06, "loss": 0.2236, "step": 22273 }, { "epoch": 73.02950819672131, "grad_norm": 2.976783514022827, "learning_rate": 3.5785886188551945e-06, "loss": 0.2236, "step": 22274 }, { "epoch": 73.0327868852459, "grad_norm": 2.564234972000122, "learning_rate": 3.5777746269028545e-06, "loss": 0.1189, "step": 22275 }, { "epoch": 73.03606557377049, "grad_norm": 3.0351500511169434, "learning_rate": 3.5769607073677805e-06, "loss": 0.1862, "step": 22276 }, { "epoch": 73.03934426229509, "grad_norm": 2.35248064994812, "learning_rate": 3.5761468602591566e-06, "loss": 0.0632, "step": 22277 }, { "epoch": 73.04262295081968, "grad_norm": 2.60200834274292, "learning_rate": 3.5753330855861544e-06, "loss": 0.1876, "step": 22278 }, { "epoch": 73.04590163934427, "grad_norm": 2.698742628097534, "learning_rate": 3.5745193833579527e-06, "loss": 0.1493, "step": 22279 }, { "epoch": 73.04918032786885, "grad_norm": 1.679944634437561, "learning_rate": 3.573705753583723e-06, "loss": 0.0296, "step": 22280 }, { "epoch": 73.05245901639344, "grad_norm": 2.287317991256714, "learning_rate": 3.572892196272645e-06, "loss": 0.2829, "step": 22281 }, { "epoch": 73.05573770491803, "grad_norm": 2.1328773498535156, "learning_rate": 3.5720787114338897e-06, "loss": 0.1106, "step": 22282 }, { "epoch": 73.05901639344262, "grad_norm": 2.439821243286133, "learning_rate": 3.5712652990766307e-06, "loss": 0.1236, "step": 22283 }, { "epoch": 73.0622950819672, "grad_norm": 3.2875330448150635, "learning_rate": 3.5704519592100407e-06, "loss": 0.1357, "step": 22284 }, { "epoch": 73.06557377049181, "grad_norm": 2.756376266479492, "learning_rate": 3.5696386918432848e-06, "loss": 0.1928, "step": 22285 }, { "epoch": 73.0688524590164, "grad_norm": 2.574216604232788, "learning_rate": 3.568825496985543e-06, "loss": 0.0749, "step": 22286 }, { "epoch": 73.07213114754099, "grad_norm": 2.4240942001342773, "learning_rate": 3.5680123746459805e-06, "loss": 0.0718, "step": 22287 }, { "epoch": 73.07540983606557, "grad_norm": 3.0668132305145264, "learning_rate": 3.5671993248337654e-06, "loss": 0.1945, "step": 22288 }, { "epoch": 73.07868852459016, "grad_norm": 2.6093504428863525, "learning_rate": 3.566386347558063e-06, "loss": 0.1608, "step": 22289 }, { "epoch": 73.08196721311475, "grad_norm": 2.5415778160095215, "learning_rate": 3.5655734428280474e-06, "loss": 0.3758, "step": 22290 }, { "epoch": 73.08524590163934, "grad_norm": 2.0567567348480225, "learning_rate": 3.564760610652882e-06, "loss": 0.1232, "step": 22291 }, { "epoch": 73.08852459016393, "grad_norm": 3.0574095249176025, "learning_rate": 3.5639478510417315e-06, "loss": 0.2, "step": 22292 }, { "epoch": 73.09180327868853, "grad_norm": 2.755703926086426, "learning_rate": 3.563135164003757e-06, "loss": 0.1794, "step": 22293 }, { "epoch": 73.09508196721312, "grad_norm": 1.9087849855422974, "learning_rate": 3.5623225495481296e-06, "loss": 0.1108, "step": 22294 }, { "epoch": 73.09836065573771, "grad_norm": 3.0061469078063965, "learning_rate": 3.5615100076840093e-06, "loss": 0.1974, "step": 22295 }, { "epoch": 73.1016393442623, "grad_norm": 2.9174983501434326, "learning_rate": 3.5606975384205568e-06, "loss": 0.24, "step": 22296 }, { "epoch": 73.10491803278688, "grad_norm": 3.8600411415100098, "learning_rate": 3.5598851417669356e-06, "loss": 0.1194, "step": 22297 }, { "epoch": 73.10819672131147, "grad_norm": 2.622842788696289, "learning_rate": 3.559072817732303e-06, "loss": 0.1096, "step": 22298 }, { "epoch": 73.11147540983606, "grad_norm": 2.9740428924560547, "learning_rate": 3.558260566325823e-06, "loss": 0.3873, "step": 22299 }, { "epoch": 73.11475409836065, "grad_norm": 2.142117977142334, "learning_rate": 3.5574483875566547e-06, "loss": 0.1192, "step": 22300 }, { "epoch": 73.11803278688525, "grad_norm": 2.3829519748687744, "learning_rate": 3.556636281433953e-06, "loss": 0.1574, "step": 22301 }, { "epoch": 73.12131147540984, "grad_norm": 2.5583958625793457, "learning_rate": 3.5558242479668736e-06, "loss": 0.1486, "step": 22302 }, { "epoch": 73.12459016393443, "grad_norm": 2.5030789375305176, "learning_rate": 3.5550122871645786e-06, "loss": 0.106, "step": 22303 }, { "epoch": 73.12786885245902, "grad_norm": 2.7132859230041504, "learning_rate": 3.554200399036223e-06, "loss": 0.1666, "step": 22304 }, { "epoch": 73.1311475409836, "grad_norm": 2.350369691848755, "learning_rate": 3.5533885835909587e-06, "loss": 0.0638, "step": 22305 }, { "epoch": 73.1344262295082, "grad_norm": 2.5290021896362305, "learning_rate": 3.552576840837938e-06, "loss": 0.0618, "step": 22306 }, { "epoch": 73.13770491803278, "grad_norm": 2.864715099334717, "learning_rate": 3.55176517078632e-06, "loss": 0.1939, "step": 22307 }, { "epoch": 73.14098360655737, "grad_norm": 2.9463417530059814, "learning_rate": 3.550953573445254e-06, "loss": 0.2044, "step": 22308 }, { "epoch": 73.14426229508197, "grad_norm": 2.80861234664917, "learning_rate": 3.5501420488238926e-06, "loss": 0.2137, "step": 22309 }, { "epoch": 73.14754098360656, "grad_norm": 2.241656541824341, "learning_rate": 3.5493305969313852e-06, "loss": 0.2275, "step": 22310 }, { "epoch": 73.15081967213115, "grad_norm": 3.28060245513916, "learning_rate": 3.548519217776879e-06, "loss": 0.1543, "step": 22311 }, { "epoch": 73.15409836065574, "grad_norm": 2.505793809890747, "learning_rate": 3.54770791136953e-06, "loss": 0.1234, "step": 22312 }, { "epoch": 73.15737704918033, "grad_norm": 2.652766704559326, "learning_rate": 3.546896677718482e-06, "loss": 0.0862, "step": 22313 }, { "epoch": 73.16065573770491, "grad_norm": 2.908704996109009, "learning_rate": 3.5460855168328843e-06, "loss": 0.1205, "step": 22314 }, { "epoch": 73.1639344262295, "grad_norm": 2.1149861812591553, "learning_rate": 3.5452744287218798e-06, "loss": 0.0435, "step": 22315 }, { "epoch": 73.1672131147541, "grad_norm": 3.182356595993042, "learning_rate": 3.544463413394621e-06, "loss": 0.0655, "step": 22316 }, { "epoch": 73.1704918032787, "grad_norm": 2.070908784866333, "learning_rate": 3.543652470860248e-06, "loss": 0.0453, "step": 22317 }, { "epoch": 73.17377049180328, "grad_norm": 2.32987117767334, "learning_rate": 3.5428416011279043e-06, "loss": 0.1749, "step": 22318 }, { "epoch": 73.17704918032787, "grad_norm": 2.2807867527008057, "learning_rate": 3.5420308042067375e-06, "loss": 0.0913, "step": 22319 }, { "epoch": 73.18032786885246, "grad_norm": 2.4449586868286133, "learning_rate": 3.5412200801058894e-06, "loss": 0.1376, "step": 22320 }, { "epoch": 73.18360655737705, "grad_norm": 2.1032981872558594, "learning_rate": 3.540409428834496e-06, "loss": 0.1543, "step": 22321 }, { "epoch": 73.18688524590164, "grad_norm": 2.727471351623535, "learning_rate": 3.539598850401705e-06, "loss": 0.1198, "step": 22322 }, { "epoch": 73.19016393442622, "grad_norm": 2.782524347305298, "learning_rate": 3.538788344816656e-06, "loss": 0.2092, "step": 22323 }, { "epoch": 73.19344262295083, "grad_norm": 3.2170426845550537, "learning_rate": 3.537977912088486e-06, "loss": 0.1787, "step": 22324 }, { "epoch": 73.19672131147541, "grad_norm": 2.1906182765960693, "learning_rate": 3.5371675522263296e-06, "loss": 0.1074, "step": 22325 }, { "epoch": 73.2, "grad_norm": 3.1068177223205566, "learning_rate": 3.536357265239333e-06, "loss": 0.2502, "step": 22326 }, { "epoch": 73.20327868852459, "grad_norm": 2.631755828857422, "learning_rate": 3.5355470511366272e-06, "loss": 0.181, "step": 22327 }, { "epoch": 73.20655737704918, "grad_norm": 1.991268515586853, "learning_rate": 3.5347369099273475e-06, "loss": 0.0766, "step": 22328 }, { "epoch": 73.20983606557377, "grad_norm": 2.7197773456573486, "learning_rate": 3.533926841620635e-06, "loss": 0.1191, "step": 22329 }, { "epoch": 73.21311475409836, "grad_norm": 2.8255858421325684, "learning_rate": 3.5331168462256204e-06, "loss": 0.1735, "step": 22330 }, { "epoch": 73.21639344262294, "grad_norm": 2.0852978229522705, "learning_rate": 3.5323069237514362e-06, "loss": 0.0671, "step": 22331 }, { "epoch": 73.21967213114755, "grad_norm": 2.245806932449341, "learning_rate": 3.531497074207214e-06, "loss": 0.055, "step": 22332 }, { "epoch": 73.22295081967214, "grad_norm": 2.47153377532959, "learning_rate": 3.53068729760209e-06, "loss": 0.1359, "step": 22333 }, { "epoch": 73.22622950819672, "grad_norm": 5.485971927642822, "learning_rate": 3.5298775939451945e-06, "loss": 0.1024, "step": 22334 }, { "epoch": 73.22950819672131, "grad_norm": 2.458528518676758, "learning_rate": 3.529067963245656e-06, "loss": 0.0606, "step": 22335 }, { "epoch": 73.2327868852459, "grad_norm": 2.3187942504882812, "learning_rate": 3.5282584055126035e-06, "loss": 0.0529, "step": 22336 }, { "epoch": 73.23606557377049, "grad_norm": 3.070904016494751, "learning_rate": 3.5274489207551632e-06, "loss": 0.1485, "step": 22337 }, { "epoch": 73.23934426229508, "grad_norm": 3.055955648422241, "learning_rate": 3.52663950898247e-06, "loss": 0.0954, "step": 22338 }, { "epoch": 73.24262295081967, "grad_norm": 1.9217615127563477, "learning_rate": 3.5258301702036468e-06, "loss": 0.0395, "step": 22339 }, { "epoch": 73.24590163934427, "grad_norm": 3.7829298973083496, "learning_rate": 3.5250209044278196e-06, "loss": 0.0718, "step": 22340 }, { "epoch": 73.24918032786886, "grad_norm": 2.4637210369110107, "learning_rate": 3.5242117116641106e-06, "loss": 0.0779, "step": 22341 }, { "epoch": 73.25245901639344, "grad_norm": 2.8621299266815186, "learning_rate": 3.523402591921651e-06, "loss": 0.1001, "step": 22342 }, { "epoch": 73.25573770491803, "grad_norm": 1.8820252418518066, "learning_rate": 3.5225935452095616e-06, "loss": 0.1164, "step": 22343 }, { "epoch": 73.25901639344262, "grad_norm": 2.4235215187072754, "learning_rate": 3.5217845715369646e-06, "loss": 0.2023, "step": 22344 }, { "epoch": 73.26229508196721, "grad_norm": 2.171581745147705, "learning_rate": 3.520975670912978e-06, "loss": 0.1473, "step": 22345 }, { "epoch": 73.2655737704918, "grad_norm": 2.0275979042053223, "learning_rate": 3.5201668433467315e-06, "loss": 0.0645, "step": 22346 }, { "epoch": 73.26885245901639, "grad_norm": 3.0022454261779785, "learning_rate": 3.519358088847341e-06, "loss": 0.1368, "step": 22347 }, { "epoch": 73.27213114754099, "grad_norm": 2.2549052238464355, "learning_rate": 3.518549407423927e-06, "loss": 0.0536, "step": 22348 }, { "epoch": 73.27540983606558, "grad_norm": 2.1838183403015137, "learning_rate": 3.517740799085607e-06, "loss": 0.2138, "step": 22349 }, { "epoch": 73.27868852459017, "grad_norm": 2.146160364151001, "learning_rate": 3.516932263841495e-06, "loss": 0.1608, "step": 22350 }, { "epoch": 73.28196721311475, "grad_norm": 1.8662731647491455, "learning_rate": 3.5161238017007173e-06, "loss": 0.1107, "step": 22351 }, { "epoch": 73.28524590163934, "grad_norm": 2.582509994506836, "learning_rate": 3.515315412672384e-06, "loss": 0.1533, "step": 22352 }, { "epoch": 73.28852459016393, "grad_norm": 3.1305549144744873, "learning_rate": 3.5145070967656126e-06, "loss": 0.1567, "step": 22353 }, { "epoch": 73.29180327868852, "grad_norm": 2.4903361797332764, "learning_rate": 3.5136988539895135e-06, "loss": 0.2879, "step": 22354 }, { "epoch": 73.29508196721312, "grad_norm": 2.7007415294647217, "learning_rate": 3.5128906843532063e-06, "loss": 0.1281, "step": 22355 }, { "epoch": 73.29836065573771, "grad_norm": 2.6062052249908447, "learning_rate": 3.5120825878658026e-06, "loss": 0.2133, "step": 22356 }, { "epoch": 73.3016393442623, "grad_norm": 2.412600517272949, "learning_rate": 3.511274564536413e-06, "loss": 0.0854, "step": 22357 }, { "epoch": 73.30491803278689, "grad_norm": 1.7599650621414185, "learning_rate": 3.5104666143741462e-06, "loss": 0.1275, "step": 22358 }, { "epoch": 73.30819672131148, "grad_norm": 2.9777133464813232, "learning_rate": 3.5096587373881187e-06, "loss": 0.1358, "step": 22359 }, { "epoch": 73.31147540983606, "grad_norm": 2.9414939880371094, "learning_rate": 3.508850933587438e-06, "loss": 0.0531, "step": 22360 }, { "epoch": 73.31475409836065, "grad_norm": 2.2423441410064697, "learning_rate": 3.5080432029812105e-06, "loss": 0.1735, "step": 22361 }, { "epoch": 73.31803278688524, "grad_norm": 2.9692766666412354, "learning_rate": 3.5072355455785465e-06, "loss": 0.1671, "step": 22362 }, { "epoch": 73.32131147540984, "grad_norm": 2.439119577407837, "learning_rate": 3.50642796138855e-06, "loss": 0.1871, "step": 22363 }, { "epoch": 73.32459016393443, "grad_norm": 2.5246400833129883, "learning_rate": 3.5056204504203327e-06, "loss": 0.0759, "step": 22364 }, { "epoch": 73.32786885245902, "grad_norm": 3.1701409816741943, "learning_rate": 3.504813012682997e-06, "loss": 0.16, "step": 22365 }, { "epoch": 73.33114754098361, "grad_norm": 2.230309247970581, "learning_rate": 3.504005648185649e-06, "loss": 0.1648, "step": 22366 }, { "epoch": 73.3344262295082, "grad_norm": 2.0613021850585938, "learning_rate": 3.5031983569373874e-06, "loss": 0.0653, "step": 22367 }, { "epoch": 73.33770491803278, "grad_norm": 2.467538833618164, "learning_rate": 3.502391138947322e-06, "loss": 0.2219, "step": 22368 }, { "epoch": 73.34098360655737, "grad_norm": 1.8611156940460205, "learning_rate": 3.5015839942245533e-06, "loss": 0.1619, "step": 22369 }, { "epoch": 73.34426229508196, "grad_norm": 2.7386691570281982, "learning_rate": 3.5007769227781805e-06, "loss": 0.1107, "step": 22370 }, { "epoch": 73.34754098360656, "grad_norm": 1.7117488384246826, "learning_rate": 3.4999699246173038e-06, "loss": 0.043, "step": 22371 }, { "epoch": 73.35081967213115, "grad_norm": 2.3329269886016846, "learning_rate": 3.4991629997510256e-06, "loss": 0.0853, "step": 22372 }, { "epoch": 73.35409836065574, "grad_norm": 2.4290051460266113, "learning_rate": 3.4983561481884453e-06, "loss": 0.1734, "step": 22373 }, { "epoch": 73.35737704918033, "grad_norm": 3.050863265991211, "learning_rate": 3.4975493699386563e-06, "loss": 0.1334, "step": 22374 }, { "epoch": 73.36065573770492, "grad_norm": 1.7104365825653076, "learning_rate": 3.4967426650107615e-06, "loss": 0.0558, "step": 22375 }, { "epoch": 73.3639344262295, "grad_norm": 2.7760350704193115, "learning_rate": 3.495936033413856e-06, "loss": 0.0862, "step": 22376 }, { "epoch": 73.3672131147541, "grad_norm": 3.224184274673462, "learning_rate": 3.49512947515703e-06, "loss": 0.1446, "step": 22377 }, { "epoch": 73.37049180327868, "grad_norm": 2.587599039077759, "learning_rate": 3.494322990249386e-06, "loss": 0.1023, "step": 22378 }, { "epoch": 73.37377049180328, "grad_norm": 3.078000783920288, "learning_rate": 3.4935165787000146e-06, "loss": 0.1774, "step": 22379 }, { "epoch": 73.37704918032787, "grad_norm": 3.110595703125, "learning_rate": 3.4927102405180046e-06, "loss": 0.1261, "step": 22380 }, { "epoch": 73.38032786885246, "grad_norm": 2.373882532119751, "learning_rate": 3.4919039757124573e-06, "loss": 0.2162, "step": 22381 }, { "epoch": 73.38360655737705, "grad_norm": 2.4532878398895264, "learning_rate": 3.491097784292459e-06, "loss": 0.1461, "step": 22382 }, { "epoch": 73.38688524590164, "grad_norm": 3.0215699672698975, "learning_rate": 3.4902916662671003e-06, "loss": 0.0698, "step": 22383 }, { "epoch": 73.39016393442623, "grad_norm": 2.194751501083374, "learning_rate": 3.489485621645469e-06, "loss": 0.1511, "step": 22384 }, { "epoch": 73.39344262295081, "grad_norm": 2.724144697189331, "learning_rate": 3.4886796504366584e-06, "loss": 0.1288, "step": 22385 }, { "epoch": 73.3967213114754, "grad_norm": 3.090449810028076, "learning_rate": 3.487873752649756e-06, "loss": 0.1893, "step": 22386 }, { "epoch": 73.4, "grad_norm": 2.5078632831573486, "learning_rate": 3.487067928293848e-06, "loss": 0.1208, "step": 22387 }, { "epoch": 73.4032786885246, "grad_norm": 3.2900993824005127, "learning_rate": 3.48626217737802e-06, "loss": 0.0652, "step": 22388 }, { "epoch": 73.40655737704918, "grad_norm": 2.649303436279297, "learning_rate": 3.485456499911356e-06, "loss": 0.0856, "step": 22389 }, { "epoch": 73.40983606557377, "grad_norm": 2.320439100265503, "learning_rate": 3.4846508959029457e-06, "loss": 0.117, "step": 22390 }, { "epoch": 73.41311475409836, "grad_norm": 2.7371842861175537, "learning_rate": 3.483845365361872e-06, "loss": 0.238, "step": 22391 }, { "epoch": 73.41639344262295, "grad_norm": 2.9975697994232178, "learning_rate": 3.4830399082972165e-06, "loss": 0.1767, "step": 22392 }, { "epoch": 73.41967213114754, "grad_norm": 2.9635984897613525, "learning_rate": 3.482234524718059e-06, "loss": 0.1751, "step": 22393 }, { "epoch": 73.42295081967212, "grad_norm": 3.330418825149536, "learning_rate": 3.481429214633486e-06, "loss": 0.2278, "step": 22394 }, { "epoch": 73.42622950819673, "grad_norm": 2.5059847831726074, "learning_rate": 3.4806239780525776e-06, "loss": 0.1492, "step": 22395 }, { "epoch": 73.42950819672132, "grad_norm": 2.7009646892547607, "learning_rate": 3.4798188149844115e-06, "loss": 0.1342, "step": 22396 }, { "epoch": 73.4327868852459, "grad_norm": 2.229429244995117, "learning_rate": 3.4790137254380653e-06, "loss": 0.1478, "step": 22397 }, { "epoch": 73.43606557377049, "grad_norm": 2.651888370513916, "learning_rate": 3.4782087094226213e-06, "loss": 0.2438, "step": 22398 }, { "epoch": 73.43934426229508, "grad_norm": 2.371018171310425, "learning_rate": 3.477403766947156e-06, "loss": 0.1678, "step": 22399 }, { "epoch": 73.44262295081967, "grad_norm": 2.389500141143799, "learning_rate": 3.476598898020744e-06, "loss": 0.0707, "step": 22400 }, { "epoch": 73.44590163934426, "grad_norm": 3.7617645263671875, "learning_rate": 3.4757941026524634e-06, "loss": 0.2033, "step": 22401 }, { "epoch": 73.44918032786886, "grad_norm": 3.023618698120117, "learning_rate": 3.4749893808513848e-06, "loss": 0.2436, "step": 22402 }, { "epoch": 73.45245901639345, "grad_norm": 1.8399220705032349, "learning_rate": 3.4741847326265878e-06, "loss": 0.0508, "step": 22403 }, { "epoch": 73.45573770491804, "grad_norm": 2.6058974266052246, "learning_rate": 3.4733801579871428e-06, "loss": 0.1257, "step": 22404 }, { "epoch": 73.45901639344262, "grad_norm": 1.961327075958252, "learning_rate": 3.472575656942122e-06, "loss": 0.1346, "step": 22405 }, { "epoch": 73.46229508196721, "grad_norm": 2.6591098308563232, "learning_rate": 3.4717712295005957e-06, "loss": 0.1613, "step": 22406 }, { "epoch": 73.4655737704918, "grad_norm": 2.7998805046081543, "learning_rate": 3.470966875671639e-06, "loss": 0.1663, "step": 22407 }, { "epoch": 73.46885245901639, "grad_norm": 2.9259653091430664, "learning_rate": 3.4701625954643182e-06, "loss": 0.232, "step": 22408 }, { "epoch": 73.47213114754098, "grad_norm": 2.841071128845215, "learning_rate": 3.4693583888877045e-06, "loss": 0.0729, "step": 22409 }, { "epoch": 73.47540983606558, "grad_norm": 2.4869558811187744, "learning_rate": 3.468554255950862e-06, "loss": 0.1457, "step": 22410 }, { "epoch": 73.47868852459017, "grad_norm": 1.874733328819275, "learning_rate": 3.4677501966628645e-06, "loss": 0.0336, "step": 22411 }, { "epoch": 73.48196721311476, "grad_norm": 2.5994296073913574, "learning_rate": 3.4669462110327758e-06, "loss": 0.177, "step": 22412 }, { "epoch": 73.48524590163935, "grad_norm": 2.8816978931427, "learning_rate": 3.4661422990696604e-06, "loss": 0.1939, "step": 22413 }, { "epoch": 73.48852459016393, "grad_norm": 2.967966318130493, "learning_rate": 3.4653384607825833e-06, "loss": 0.1683, "step": 22414 }, { "epoch": 73.49180327868852, "grad_norm": 1.9535737037658691, "learning_rate": 3.4645346961806083e-06, "loss": 0.0543, "step": 22415 }, { "epoch": 73.49508196721311, "grad_norm": 2.8078341484069824, "learning_rate": 3.4637310052728015e-06, "loss": 0.2794, "step": 22416 }, { "epoch": 73.4983606557377, "grad_norm": 2.508065938949585, "learning_rate": 3.4629273880682244e-06, "loss": 0.1444, "step": 22417 }, { "epoch": 73.5016393442623, "grad_norm": 2.118262767791748, "learning_rate": 3.4621238445759375e-06, "loss": 0.1491, "step": 22418 }, { "epoch": 73.50491803278689, "grad_norm": 2.245979070663452, "learning_rate": 3.4613203748049983e-06, "loss": 0.1039, "step": 22419 }, { "epoch": 73.50819672131148, "grad_norm": 3.695894956588745, "learning_rate": 3.4605169787644745e-06, "loss": 0.0632, "step": 22420 }, { "epoch": 73.51147540983607, "grad_norm": 3.786334991455078, "learning_rate": 3.4597136564634203e-06, "loss": 0.128, "step": 22421 }, { "epoch": 73.51475409836065, "grad_norm": 2.276808738708496, "learning_rate": 3.4589104079108948e-06, "loss": 0.0807, "step": 22422 }, { "epoch": 73.51803278688524, "grad_norm": 2.60168194770813, "learning_rate": 3.4581072331159527e-06, "loss": 0.1118, "step": 22423 }, { "epoch": 73.52131147540983, "grad_norm": 2.534916877746582, "learning_rate": 3.4573041320876566e-06, "loss": 0.0939, "step": 22424 }, { "epoch": 73.52459016393442, "grad_norm": 3.173689126968384, "learning_rate": 3.4565011048350593e-06, "loss": 0.1367, "step": 22425 }, { "epoch": 73.52786885245902, "grad_norm": 1.959256887435913, "learning_rate": 3.455698151367215e-06, "loss": 0.1551, "step": 22426 }, { "epoch": 73.53114754098361, "grad_norm": 2.6679539680480957, "learning_rate": 3.4548952716931762e-06, "loss": 0.1222, "step": 22427 }, { "epoch": 73.5344262295082, "grad_norm": 1.9119166135787964, "learning_rate": 3.4540924658220008e-06, "loss": 0.0624, "step": 22428 }, { "epoch": 73.53770491803279, "grad_norm": 2.7825233936309814, "learning_rate": 3.453289733762739e-06, "loss": 0.1043, "step": 22429 }, { "epoch": 73.54098360655738, "grad_norm": 2.8460493087768555, "learning_rate": 3.452487075524439e-06, "loss": 0.1268, "step": 22430 }, { "epoch": 73.54426229508196, "grad_norm": 3.199382781982422, "learning_rate": 3.4516844911161584e-06, "loss": 0.1971, "step": 22431 }, { "epoch": 73.54754098360655, "grad_norm": 2.326322555541992, "learning_rate": 3.450881980546944e-06, "loss": 0.138, "step": 22432 }, { "epoch": 73.55081967213114, "grad_norm": 3.483548879623413, "learning_rate": 3.4500795438258404e-06, "loss": 0.0633, "step": 22433 }, { "epoch": 73.55409836065574, "grad_norm": 2.8686509132385254, "learning_rate": 3.449277180961904e-06, "loss": 0.1161, "step": 22434 }, { "epoch": 73.55737704918033, "grad_norm": 2.485964775085449, "learning_rate": 3.4484748919641786e-06, "loss": 0.1899, "step": 22435 }, { "epoch": 73.56065573770492, "grad_norm": 2.6636369228363037, "learning_rate": 3.447672676841707e-06, "loss": 0.2754, "step": 22436 }, { "epoch": 73.56393442622951, "grad_norm": 1.9418033361434937, "learning_rate": 3.4468705356035427e-06, "loss": 0.0286, "step": 22437 }, { "epoch": 73.5672131147541, "grad_norm": 2.9688756465911865, "learning_rate": 3.4460684682587253e-06, "loss": 0.1729, "step": 22438 }, { "epoch": 73.57049180327868, "grad_norm": 2.986886739730835, "learning_rate": 3.4452664748163013e-06, "loss": 0.1152, "step": 22439 }, { "epoch": 73.57377049180327, "grad_norm": 2.4770398139953613, "learning_rate": 3.444464555285313e-06, "loss": 0.073, "step": 22440 }, { "epoch": 73.57704918032788, "grad_norm": 2.8997254371643066, "learning_rate": 3.4436627096747997e-06, "loss": 0.165, "step": 22441 }, { "epoch": 73.58032786885246, "grad_norm": 2.8626956939697266, "learning_rate": 3.4428609379938095e-06, "loss": 0.0887, "step": 22442 }, { "epoch": 73.58360655737705, "grad_norm": 2.455336570739746, "learning_rate": 3.44205924025138e-06, "loss": 0.0661, "step": 22443 }, { "epoch": 73.58688524590164, "grad_norm": 2.5790467262268066, "learning_rate": 3.4412576164565503e-06, "loss": 0.1281, "step": 22444 }, { "epoch": 73.59016393442623, "grad_norm": 2.4961395263671875, "learning_rate": 3.4404560666183574e-06, "loss": 0.0686, "step": 22445 }, { "epoch": 73.59344262295082, "grad_norm": 2.487830400466919, "learning_rate": 3.439654590745847e-06, "loss": 0.2319, "step": 22446 }, { "epoch": 73.5967213114754, "grad_norm": 3.1409294605255127, "learning_rate": 3.4388531888480505e-06, "loss": 0.1786, "step": 22447 }, { "epoch": 73.6, "grad_norm": 2.362703561782837, "learning_rate": 3.4380518609340076e-06, "loss": 0.1009, "step": 22448 }, { "epoch": 73.6032786885246, "grad_norm": 2.5809030532836914, "learning_rate": 3.4372506070127476e-06, "loss": 0.1279, "step": 22449 }, { "epoch": 73.60655737704919, "grad_norm": 2.1069681644439697, "learning_rate": 3.4364494270933156e-06, "loss": 0.1576, "step": 22450 }, { "epoch": 73.60983606557377, "grad_norm": 2.5363521575927734, "learning_rate": 3.43564832118474e-06, "loss": 0.0793, "step": 22451 }, { "epoch": 73.61311475409836, "grad_norm": 2.271193027496338, "learning_rate": 3.434847289296055e-06, "loss": 0.2075, "step": 22452 }, { "epoch": 73.61639344262295, "grad_norm": 2.3428611755371094, "learning_rate": 3.434046331436293e-06, "loss": 0.1255, "step": 22453 }, { "epoch": 73.61967213114754, "grad_norm": 1.7041531801223755, "learning_rate": 3.4332454476144815e-06, "loss": 0.0327, "step": 22454 }, { "epoch": 73.62295081967213, "grad_norm": 2.352905511856079, "learning_rate": 3.4324446378396582e-06, "loss": 0.1767, "step": 22455 }, { "epoch": 73.62622950819672, "grad_norm": 1.5153634548187256, "learning_rate": 3.4316439021208514e-06, "loss": 0.0345, "step": 22456 }, { "epoch": 73.62950819672132, "grad_norm": 2.927995204925537, "learning_rate": 3.4308432404670887e-06, "loss": 0.1343, "step": 22457 }, { "epoch": 73.6327868852459, "grad_norm": 2.290757179260254, "learning_rate": 3.4300426528873952e-06, "loss": 0.122, "step": 22458 }, { "epoch": 73.6360655737705, "grad_norm": 2.005877733230591, "learning_rate": 3.4292421393908047e-06, "loss": 0.0938, "step": 22459 }, { "epoch": 73.63934426229508, "grad_norm": 2.6956920623779297, "learning_rate": 3.4284416999863413e-06, "loss": 0.1156, "step": 22460 }, { "epoch": 73.64262295081967, "grad_norm": 2.726025104522705, "learning_rate": 3.4276413346830307e-06, "loss": 0.0706, "step": 22461 }, { "epoch": 73.64590163934426, "grad_norm": 2.673462152481079, "learning_rate": 3.4268410434898937e-06, "loss": 0.1281, "step": 22462 }, { "epoch": 73.64918032786885, "grad_norm": 2.1514201164245605, "learning_rate": 3.4260408264159618e-06, "loss": 0.1264, "step": 22463 }, { "epoch": 73.65245901639344, "grad_norm": 2.4145264625549316, "learning_rate": 3.4252406834702555e-06, "loss": 0.0857, "step": 22464 }, { "epoch": 73.65573770491804, "grad_norm": 2.0143179893493652, "learning_rate": 3.424440614661796e-06, "loss": 0.1189, "step": 22465 }, { "epoch": 73.65901639344263, "grad_norm": 2.6657116413116455, "learning_rate": 3.4236406199996054e-06, "loss": 0.192, "step": 22466 }, { "epoch": 73.66229508196722, "grad_norm": 2.981397867202759, "learning_rate": 3.422840699492701e-06, "loss": 0.102, "step": 22467 }, { "epoch": 73.6655737704918, "grad_norm": 5.030497074127197, "learning_rate": 3.42204085315011e-06, "loss": 0.0961, "step": 22468 }, { "epoch": 73.66885245901639, "grad_norm": 2.5798115730285645, "learning_rate": 3.421241080980847e-06, "loss": 0.1043, "step": 22469 }, { "epoch": 73.67213114754098, "grad_norm": 2.8192458152770996, "learning_rate": 3.420441382993932e-06, "loss": 0.1142, "step": 22470 }, { "epoch": 73.67540983606557, "grad_norm": 2.7815349102020264, "learning_rate": 3.419641759198378e-06, "loss": 0.1962, "step": 22471 }, { "epoch": 73.67868852459016, "grad_norm": 2.83298397064209, "learning_rate": 3.418842209603208e-06, "loss": 0.1907, "step": 22472 }, { "epoch": 73.68196721311476, "grad_norm": 2.6344783306121826, "learning_rate": 3.418042734217435e-06, "loss": 0.1531, "step": 22473 }, { "epoch": 73.68524590163935, "grad_norm": 2.9055898189544678, "learning_rate": 3.4172433330500732e-06, "loss": 0.1362, "step": 22474 }, { "epoch": 73.68852459016394, "grad_norm": 2.9369776248931885, "learning_rate": 3.4164440061101335e-06, "loss": 0.0854, "step": 22475 }, { "epoch": 73.69180327868852, "grad_norm": 2.8090813159942627, "learning_rate": 3.4156447534066372e-06, "loss": 0.1923, "step": 22476 }, { "epoch": 73.69508196721311, "grad_norm": 2.0453128814697266, "learning_rate": 3.414845574948592e-06, "loss": 0.0464, "step": 22477 }, { "epoch": 73.6983606557377, "grad_norm": 3.112783193588257, "learning_rate": 3.4140464707450096e-06, "loss": 0.2322, "step": 22478 }, { "epoch": 73.70163934426229, "grad_norm": 2.4751806259155273, "learning_rate": 3.4132474408048976e-06, "loss": 0.1451, "step": 22479 }, { "epoch": 73.70491803278688, "grad_norm": 2.3683722019195557, "learning_rate": 3.412448485137273e-06, "loss": 0.0523, "step": 22480 }, { "epoch": 73.70819672131148, "grad_norm": 3.238739013671875, "learning_rate": 3.4116496037511405e-06, "loss": 0.2809, "step": 22481 }, { "epoch": 73.71147540983607, "grad_norm": 2.3297603130340576, "learning_rate": 3.410850796655505e-06, "loss": 0.1039, "step": 22482 }, { "epoch": 73.71475409836066, "grad_norm": 2.143301248550415, "learning_rate": 3.4100520638593827e-06, "loss": 0.0503, "step": 22483 }, { "epoch": 73.71803278688525, "grad_norm": 2.008011817932129, "learning_rate": 3.409253405371774e-06, "loss": 0.1023, "step": 22484 }, { "epoch": 73.72131147540983, "grad_norm": 2.5164072513580322, "learning_rate": 3.4084548212016823e-06, "loss": 0.0791, "step": 22485 }, { "epoch": 73.72459016393442, "grad_norm": 3.468900680541992, "learning_rate": 3.4076563113581196e-06, "loss": 0.1153, "step": 22486 }, { "epoch": 73.72786885245901, "grad_norm": 4.022477149963379, "learning_rate": 3.406857875850087e-06, "loss": 0.0848, "step": 22487 }, { "epoch": 73.73114754098361, "grad_norm": 2.964857816696167, "learning_rate": 3.406059514686586e-06, "loss": 0.269, "step": 22488 }, { "epoch": 73.7344262295082, "grad_norm": 2.5184481143951416, "learning_rate": 3.405261227876617e-06, "loss": 0.1176, "step": 22489 }, { "epoch": 73.73770491803279, "grad_norm": 2.696354866027832, "learning_rate": 3.404463015429188e-06, "loss": 0.0816, "step": 22490 }, { "epoch": 73.74098360655738, "grad_norm": 2.653339385986328, "learning_rate": 3.4036648773532967e-06, "loss": 0.1, "step": 22491 }, { "epoch": 73.74426229508197, "grad_norm": 2.433110237121582, "learning_rate": 3.402866813657941e-06, "loss": 0.0735, "step": 22492 }, { "epoch": 73.74754098360656, "grad_norm": 2.2721915245056152, "learning_rate": 3.402068824352119e-06, "loss": 0.1997, "step": 22493 }, { "epoch": 73.75081967213114, "grad_norm": 1.960583209991455, "learning_rate": 3.401270909444835e-06, "loss": 0.0742, "step": 22494 }, { "epoch": 73.75409836065573, "grad_norm": 1.9890438318252563, "learning_rate": 3.4004730689450817e-06, "loss": 0.0426, "step": 22495 }, { "epoch": 73.75737704918033, "grad_norm": 2.452186346054077, "learning_rate": 3.399675302861856e-06, "loss": 0.1356, "step": 22496 }, { "epoch": 73.76065573770492, "grad_norm": 2.81215763092041, "learning_rate": 3.3988776112041497e-06, "loss": 0.1224, "step": 22497 }, { "epoch": 73.76393442622951, "grad_norm": 2.1416561603546143, "learning_rate": 3.3980799939809674e-06, "loss": 0.0583, "step": 22498 }, { "epoch": 73.7672131147541, "grad_norm": 4.535206317901611, "learning_rate": 3.3972824512012957e-06, "loss": 0.0284, "step": 22499 }, { "epoch": 73.77049180327869, "grad_norm": 2.870635747909546, "learning_rate": 3.39648498287413e-06, "loss": 0.0992, "step": 22500 }, { "epoch": 73.77377049180328, "grad_norm": 2.6825270652770996, "learning_rate": 3.3956875890084586e-06, "loss": 0.2211, "step": 22501 }, { "epoch": 73.77704918032786, "grad_norm": 2.1144542694091797, "learning_rate": 3.3948902696132803e-06, "loss": 0.0787, "step": 22502 }, { "epoch": 73.78032786885245, "grad_norm": 3.2534632682800293, "learning_rate": 3.394093024697581e-06, "loss": 0.2291, "step": 22503 }, { "epoch": 73.78360655737706, "grad_norm": 3.1491494178771973, "learning_rate": 3.393295854270352e-06, "loss": 0.2504, "step": 22504 }, { "epoch": 73.78688524590164, "grad_norm": 2.7026212215423584, "learning_rate": 3.392498758340581e-06, "loss": 0.123, "step": 22505 }, { "epoch": 73.79016393442623, "grad_norm": 2.3019585609436035, "learning_rate": 3.3917017369172533e-06, "loss": 0.0655, "step": 22506 }, { "epoch": 73.79344262295082, "grad_norm": 2.7644598484039307, "learning_rate": 3.390904790009363e-06, "loss": 0.1342, "step": 22507 }, { "epoch": 73.79672131147541, "grad_norm": 2.9472827911376953, "learning_rate": 3.3901079176258923e-06, "loss": 0.111, "step": 22508 }, { "epoch": 73.8, "grad_norm": 2.6516714096069336, "learning_rate": 3.3893111197758276e-06, "loss": 0.2273, "step": 22509 }, { "epoch": 73.80327868852459, "grad_norm": 2.272641897201538, "learning_rate": 3.3885143964681513e-06, "loss": 0.179, "step": 22510 }, { "epoch": 73.80655737704917, "grad_norm": 2.774977207183838, "learning_rate": 3.387717747711852e-06, "loss": 0.1742, "step": 22511 }, { "epoch": 73.80983606557378, "grad_norm": 3.1702005863189697, "learning_rate": 3.3869211735159093e-06, "loss": 0.1649, "step": 22512 }, { "epoch": 73.81311475409836, "grad_norm": 2.444775342941284, "learning_rate": 3.386124673889307e-06, "loss": 0.0997, "step": 22513 }, { "epoch": 73.81639344262295, "grad_norm": 2.1660168170928955, "learning_rate": 3.385328248841022e-06, "loss": 0.1638, "step": 22514 }, { "epoch": 73.81967213114754, "grad_norm": 2.773127555847168, "learning_rate": 3.3845318983800423e-06, "loss": 0.134, "step": 22515 }, { "epoch": 73.82295081967213, "grad_norm": 2.5207574367523193, "learning_rate": 3.3837356225153426e-06, "loss": 0.1271, "step": 22516 }, { "epoch": 73.82622950819672, "grad_norm": 3.1090381145477295, "learning_rate": 3.3829394212559043e-06, "loss": 0.1455, "step": 22517 }, { "epoch": 73.8295081967213, "grad_norm": 2.805560350418091, "learning_rate": 3.3821432946107035e-06, "loss": 0.112, "step": 22518 }, { "epoch": 73.8327868852459, "grad_norm": 3.0429368019104004, "learning_rate": 3.381347242588715e-06, "loss": 0.0738, "step": 22519 }, { "epoch": 73.8360655737705, "grad_norm": 2.8981728553771973, "learning_rate": 3.3805512651989215e-06, "loss": 0.2102, "step": 22520 }, { "epoch": 73.83934426229509, "grad_norm": 2.7096199989318848, "learning_rate": 3.3797553624502945e-06, "loss": 0.1813, "step": 22521 }, { "epoch": 73.84262295081967, "grad_norm": 2.360456705093384, "learning_rate": 3.378959534351809e-06, "loss": 0.0811, "step": 22522 }, { "epoch": 73.84590163934426, "grad_norm": 2.5843005180358887, "learning_rate": 3.3781637809124358e-06, "loss": 0.1979, "step": 22523 }, { "epoch": 73.84918032786885, "grad_norm": 2.166248321533203, "learning_rate": 3.377368102141154e-06, "loss": 0.1155, "step": 22524 }, { "epoch": 73.85245901639344, "grad_norm": 2.5358753204345703, "learning_rate": 3.376572498046934e-06, "loss": 0.2809, "step": 22525 }, { "epoch": 73.85573770491803, "grad_norm": 2.6323254108428955, "learning_rate": 3.3757769686387444e-06, "loss": 0.1638, "step": 22526 }, { "epoch": 73.85901639344263, "grad_norm": 2.0700759887695312, "learning_rate": 3.374981513925554e-06, "loss": 0.044, "step": 22527 }, { "epoch": 73.86229508196722, "grad_norm": 3.004793643951416, "learning_rate": 3.3741861339163383e-06, "loss": 0.215, "step": 22528 }, { "epoch": 73.8655737704918, "grad_norm": 2.933572769165039, "learning_rate": 3.373390828620063e-06, "loss": 0.0666, "step": 22529 }, { "epoch": 73.8688524590164, "grad_norm": 2.5576722621917725, "learning_rate": 3.3725955980456958e-06, "loss": 0.1184, "step": 22530 }, { "epoch": 73.87213114754098, "grad_norm": 1.8616957664489746, "learning_rate": 3.3718004422022056e-06, "loss": 0.1969, "step": 22531 }, { "epoch": 73.87540983606557, "grad_norm": 2.521071195602417, "learning_rate": 3.3710053610985517e-06, "loss": 0.1241, "step": 22532 }, { "epoch": 73.87868852459016, "grad_norm": 1.9785282611846924, "learning_rate": 3.3702103547437093e-06, "loss": 0.0789, "step": 22533 }, { "epoch": 73.88196721311475, "grad_norm": 2.333637237548828, "learning_rate": 3.369415423146638e-06, "loss": 0.0692, "step": 22534 }, { "epoch": 73.88524590163935, "grad_norm": 1.925811767578125, "learning_rate": 3.3686205663162987e-06, "loss": 0.0591, "step": 22535 }, { "epoch": 73.88852459016394, "grad_norm": 2.9404025077819824, "learning_rate": 3.3678257842616613e-06, "loss": 0.1536, "step": 22536 }, { "epoch": 73.89180327868853, "grad_norm": 2.921757936477661, "learning_rate": 3.3670310769916827e-06, "loss": 0.1257, "step": 22537 }, { "epoch": 73.89508196721312, "grad_norm": 2.2004828453063965, "learning_rate": 3.3662364445153227e-06, "loss": 0.1426, "step": 22538 }, { "epoch": 73.8983606557377, "grad_norm": 2.5725834369659424, "learning_rate": 3.3654418868415472e-06, "loss": 0.1077, "step": 22539 }, { "epoch": 73.90163934426229, "grad_norm": 2.836156129837036, "learning_rate": 3.3646474039793133e-06, "loss": 0.1388, "step": 22540 }, { "epoch": 73.90491803278688, "grad_norm": 3.8392446041107178, "learning_rate": 3.363852995937575e-06, "loss": 0.1616, "step": 22541 }, { "epoch": 73.90819672131147, "grad_norm": 2.38942551612854, "learning_rate": 3.363058662725297e-06, "loss": 0.0571, "step": 22542 }, { "epoch": 73.91147540983607, "grad_norm": 2.517205238342285, "learning_rate": 3.362264404351434e-06, "loss": 0.0925, "step": 22543 }, { "epoch": 73.91475409836066, "grad_norm": 2.8512039184570312, "learning_rate": 3.361470220824942e-06, "loss": 0.1424, "step": 22544 }, { "epoch": 73.91803278688525, "grad_norm": 2.8649539947509766, "learning_rate": 3.3606761121547703e-06, "loss": 0.1357, "step": 22545 }, { "epoch": 73.92131147540984, "grad_norm": 2.455777168273926, "learning_rate": 3.3598820783498833e-06, "loss": 0.056, "step": 22546 }, { "epoch": 73.92459016393443, "grad_norm": 2.7150204181671143, "learning_rate": 3.359088119419229e-06, "loss": 0.1566, "step": 22547 }, { "epoch": 73.92786885245901, "grad_norm": 2.398996353149414, "learning_rate": 3.358294235371763e-06, "loss": 0.0737, "step": 22548 }, { "epoch": 73.9311475409836, "grad_norm": 4.061474800109863, "learning_rate": 3.35750042621643e-06, "loss": 0.1005, "step": 22549 }, { "epoch": 73.93442622950819, "grad_norm": 2.706041097640991, "learning_rate": 3.3567066919621894e-06, "loss": 0.281, "step": 22550 }, { "epoch": 73.9377049180328, "grad_norm": 2.4461562633514404, "learning_rate": 3.355913032617989e-06, "loss": 0.1313, "step": 22551 }, { "epoch": 73.94098360655738, "grad_norm": 2.6410038471221924, "learning_rate": 3.3551194481927763e-06, "loss": 0.1587, "step": 22552 }, { "epoch": 73.94426229508197, "grad_norm": 1.967308521270752, "learning_rate": 3.3543259386954984e-06, "loss": 0.0559, "step": 22553 }, { "epoch": 73.94754098360656, "grad_norm": 3.213892936706543, "learning_rate": 3.3535325041351084e-06, "loss": 0.2477, "step": 22554 }, { "epoch": 73.95081967213115, "grad_norm": 2.161639451980591, "learning_rate": 3.3527391445205494e-06, "loss": 0.1142, "step": 22555 }, { "epoch": 73.95409836065573, "grad_norm": 2.5882465839385986, "learning_rate": 3.351945859860769e-06, "loss": 0.1446, "step": 22556 }, { "epoch": 73.95737704918032, "grad_norm": 3.0109150409698486, "learning_rate": 3.351152650164711e-06, "loss": 0.1988, "step": 22557 }, { "epoch": 73.96065573770491, "grad_norm": 2.085843563079834, "learning_rate": 3.350359515441316e-06, "loss": 0.0337, "step": 22558 }, { "epoch": 73.96393442622951, "grad_norm": 1.9209247827529907, "learning_rate": 3.3495664556995345e-06, "loss": 0.0789, "step": 22559 }, { "epoch": 73.9672131147541, "grad_norm": 3.4800920486450195, "learning_rate": 3.3487734709483065e-06, "loss": 0.1101, "step": 22560 }, { "epoch": 73.97049180327869, "grad_norm": 2.3245766162872314, "learning_rate": 3.347980561196573e-06, "loss": 0.1447, "step": 22561 }, { "epoch": 73.97377049180328, "grad_norm": 2.379912853240967, "learning_rate": 3.347187726453273e-06, "loss": 0.1047, "step": 22562 }, { "epoch": 73.97704918032787, "grad_norm": 1.937309741973877, "learning_rate": 3.3463949667273497e-06, "loss": 0.1317, "step": 22563 }, { "epoch": 73.98032786885246, "grad_norm": 2.8733506202697754, "learning_rate": 3.3456022820277422e-06, "loss": 0.115, "step": 22564 }, { "epoch": 73.98360655737704, "grad_norm": 3.3083560466766357, "learning_rate": 3.3448096723633882e-06, "loss": 0.1904, "step": 22565 }, { "epoch": 73.98688524590163, "grad_norm": 2.166677713394165, "learning_rate": 3.3440171377432205e-06, "loss": 0.0993, "step": 22566 }, { "epoch": 73.99016393442623, "grad_norm": 3.576936721801758, "learning_rate": 3.3432246781761845e-06, "loss": 0.2957, "step": 22567 }, { "epoch": 73.99344262295082, "grad_norm": 3.335200309753418, "learning_rate": 3.3424322936712106e-06, "loss": 0.1743, "step": 22568 }, { "epoch": 73.99672131147541, "grad_norm": 3.0045080184936523, "learning_rate": 3.341639984237235e-06, "loss": 0.0791, "step": 22569 }, { "epoch": 74.0, "grad_norm": 3.1553173065185547, "learning_rate": 3.3408477498831917e-06, "loss": 0.148, "step": 22570 }, { "epoch": 74.00327868852459, "grad_norm": 3.0880677700042725, "learning_rate": 3.340055590618011e-06, "loss": 0.1439, "step": 22571 }, { "epoch": 74.00655737704918, "grad_norm": 2.425565004348755, "learning_rate": 3.3392635064506308e-06, "loss": 0.1805, "step": 22572 }, { "epoch": 74.00983606557377, "grad_norm": 3.049088478088379, "learning_rate": 3.33847149738998e-06, "loss": 0.2025, "step": 22573 }, { "epoch": 74.01311475409837, "grad_norm": 2.8489954471588135, "learning_rate": 3.3376795634449887e-06, "loss": 0.1674, "step": 22574 }, { "epoch": 74.01639344262296, "grad_norm": 2.1470789909362793, "learning_rate": 3.336887704624585e-06, "loss": 0.0573, "step": 22575 }, { "epoch": 74.01967213114754, "grad_norm": 2.635305881500244, "learning_rate": 3.336095920937703e-06, "loss": 0.0802, "step": 22576 }, { "epoch": 74.02295081967213, "grad_norm": 2.2296810150146484, "learning_rate": 3.335304212393269e-06, "loss": 0.0593, "step": 22577 }, { "epoch": 74.02622950819672, "grad_norm": 3.039189100265503, "learning_rate": 3.3345125790002096e-06, "loss": 0.1332, "step": 22578 }, { "epoch": 74.02950819672131, "grad_norm": 3.3281915187835693, "learning_rate": 3.3337210207674508e-06, "loss": 0.0968, "step": 22579 }, { "epoch": 74.0327868852459, "grad_norm": 1.5461310148239136, "learning_rate": 3.332929537703915e-06, "loss": 0.0233, "step": 22580 }, { "epoch": 74.03606557377049, "grad_norm": 2.5855977535247803, "learning_rate": 3.3321381298185353e-06, "loss": 0.2341, "step": 22581 }, { "epoch": 74.03934426229509, "grad_norm": 2.8225018978118896, "learning_rate": 3.3313467971202296e-06, "loss": 0.1796, "step": 22582 }, { "epoch": 74.04262295081968, "grad_norm": 2.747941017150879, "learning_rate": 3.3305555396179225e-06, "loss": 0.0761, "step": 22583 }, { "epoch": 74.04590163934427, "grad_norm": 2.3944413661956787, "learning_rate": 3.329764357320534e-06, "loss": 0.1771, "step": 22584 }, { "epoch": 74.04918032786885, "grad_norm": 3.2532618045806885, "learning_rate": 3.3289732502369896e-06, "loss": 0.179, "step": 22585 }, { "epoch": 74.05245901639344, "grad_norm": 3.2314956188201904, "learning_rate": 3.328182218376209e-06, "loss": 0.0895, "step": 22586 }, { "epoch": 74.05573770491803, "grad_norm": 3.4141931533813477, "learning_rate": 3.3273912617471073e-06, "loss": 0.1583, "step": 22587 }, { "epoch": 74.05901639344262, "grad_norm": 3.105802059173584, "learning_rate": 3.32660038035861e-06, "loss": 0.2136, "step": 22588 }, { "epoch": 74.0622950819672, "grad_norm": 2.369626045227051, "learning_rate": 3.3258095742196316e-06, "loss": 0.1195, "step": 22589 }, { "epoch": 74.06557377049181, "grad_norm": 2.1556851863861084, "learning_rate": 3.3250188433390908e-06, "loss": 0.14, "step": 22590 }, { "epoch": 74.0688524590164, "grad_norm": 3.4043614864349365, "learning_rate": 3.3242281877258985e-06, "loss": 0.1548, "step": 22591 }, { "epoch": 74.07213114754099, "grad_norm": 4.110113143920898, "learning_rate": 3.3234376073889787e-06, "loss": 0.201, "step": 22592 }, { "epoch": 74.07540983606557, "grad_norm": 2.1101503372192383, "learning_rate": 3.322647102337241e-06, "loss": 0.0732, "step": 22593 }, { "epoch": 74.07868852459016, "grad_norm": 2.0822744369506836, "learning_rate": 3.3218566725795966e-06, "loss": 0.0869, "step": 22594 }, { "epoch": 74.08196721311475, "grad_norm": 3.2359704971313477, "learning_rate": 3.3210663181249647e-06, "loss": 0.1093, "step": 22595 }, { "epoch": 74.08524590163934, "grad_norm": 2.2376291751861572, "learning_rate": 3.3202760389822553e-06, "loss": 0.0931, "step": 22596 }, { "epoch": 74.08852459016393, "grad_norm": 2.2442750930786133, "learning_rate": 3.3194858351603744e-06, "loss": 0.087, "step": 22597 }, { "epoch": 74.09180327868853, "grad_norm": 2.2037906646728516, "learning_rate": 3.318695706668241e-06, "loss": 0.0596, "step": 22598 }, { "epoch": 74.09508196721312, "grad_norm": 2.8115811347961426, "learning_rate": 3.3179056535147602e-06, "loss": 0.1983, "step": 22599 }, { "epoch": 74.09836065573771, "grad_norm": 2.5215601921081543, "learning_rate": 3.3171156757088406e-06, "loss": 0.2107, "step": 22600 }, { "epoch": 74.1016393442623, "grad_norm": 2.402634620666504, "learning_rate": 3.3163257732593866e-06, "loss": 0.0935, "step": 22601 }, { "epoch": 74.10491803278688, "grad_norm": 2.570535659790039, "learning_rate": 3.315535946175312e-06, "loss": 0.1272, "step": 22602 }, { "epoch": 74.10819672131147, "grad_norm": 2.5672340393066406, "learning_rate": 3.3147461944655203e-06, "loss": 0.0822, "step": 22603 }, { "epoch": 74.11147540983606, "grad_norm": 2.074010133743286, "learning_rate": 3.3139565181389155e-06, "loss": 0.0386, "step": 22604 }, { "epoch": 74.11475409836065, "grad_norm": 2.9892966747283936, "learning_rate": 3.3131669172044035e-06, "loss": 0.1729, "step": 22605 }, { "epoch": 74.11803278688525, "grad_norm": 2.680476188659668, "learning_rate": 3.3123773916708836e-06, "loss": 0.1135, "step": 22606 }, { "epoch": 74.12131147540984, "grad_norm": 2.2915501594543457, "learning_rate": 3.311587941547265e-06, "loss": 0.1752, "step": 22607 }, { "epoch": 74.12459016393443, "grad_norm": 2.909956932067871, "learning_rate": 3.310798566842447e-06, "loss": 0.1934, "step": 22608 }, { "epoch": 74.12786885245902, "grad_norm": 1.84333074092865, "learning_rate": 3.3100092675653296e-06, "loss": 0.1387, "step": 22609 }, { "epoch": 74.1311475409836, "grad_norm": 2.6120195388793945, "learning_rate": 3.30922004372481e-06, "loss": 0.0843, "step": 22610 }, { "epoch": 74.1344262295082, "grad_norm": 2.1459832191467285, "learning_rate": 3.308430895329795e-06, "loss": 0.0425, "step": 22611 }, { "epoch": 74.13770491803278, "grad_norm": 2.4559779167175293, "learning_rate": 3.3076418223891792e-06, "loss": 0.1414, "step": 22612 }, { "epoch": 74.14098360655737, "grad_norm": 2.4037716388702393, "learning_rate": 3.3068528249118602e-06, "loss": 0.0697, "step": 22613 }, { "epoch": 74.14426229508197, "grad_norm": 2.5390419960021973, "learning_rate": 3.306063902906731e-06, "loss": 0.0705, "step": 22614 }, { "epoch": 74.14754098360656, "grad_norm": 2.7511327266693115, "learning_rate": 3.305275056382695e-06, "loss": 0.1032, "step": 22615 }, { "epoch": 74.15081967213115, "grad_norm": 3.8860254287719727, "learning_rate": 3.304486285348644e-06, "loss": 0.1447, "step": 22616 }, { "epoch": 74.15409836065574, "grad_norm": 2.0752933025360107, "learning_rate": 3.3036975898134704e-06, "loss": 0.0964, "step": 22617 }, { "epoch": 74.15737704918033, "grad_norm": 2.8344197273254395, "learning_rate": 3.3029089697860694e-06, "loss": 0.1079, "step": 22618 }, { "epoch": 74.16065573770491, "grad_norm": 3.1020452976226807, "learning_rate": 3.3021204252753293e-06, "loss": 0.0601, "step": 22619 }, { "epoch": 74.1639344262295, "grad_norm": 8.111727714538574, "learning_rate": 3.301331956290149e-06, "loss": 0.0933, "step": 22620 }, { "epoch": 74.1672131147541, "grad_norm": 1.9963369369506836, "learning_rate": 3.3005435628394167e-06, "loss": 0.0375, "step": 22621 }, { "epoch": 74.1704918032787, "grad_norm": 2.111835241317749, "learning_rate": 3.2997552449320204e-06, "loss": 0.1129, "step": 22622 }, { "epoch": 74.17377049180328, "grad_norm": 1.9601846933364868, "learning_rate": 3.2989670025768473e-06, "loss": 0.0764, "step": 22623 }, { "epoch": 74.17704918032787, "grad_norm": 2.511963129043579, "learning_rate": 3.2981788357827914e-06, "loss": 0.0987, "step": 22624 }, { "epoch": 74.18032786885246, "grad_norm": 2.521423101425171, "learning_rate": 3.2973907445587384e-06, "loss": 0.1571, "step": 22625 }, { "epoch": 74.18360655737705, "grad_norm": 2.0226993560791016, "learning_rate": 3.2966027289135725e-06, "loss": 0.0895, "step": 22626 }, { "epoch": 74.18688524590164, "grad_norm": 5.574810028076172, "learning_rate": 3.2958147888561776e-06, "loss": 0.124, "step": 22627 }, { "epoch": 74.19016393442622, "grad_norm": 3.0244803428649902, "learning_rate": 3.295026924395446e-06, "loss": 0.1289, "step": 22628 }, { "epoch": 74.19344262295083, "grad_norm": 2.3970589637756348, "learning_rate": 3.2942391355402557e-06, "loss": 0.0553, "step": 22629 }, { "epoch": 74.19672131147541, "grad_norm": 2.0674335956573486, "learning_rate": 3.293451422299492e-06, "loss": 0.0909, "step": 22630 }, { "epoch": 74.2, "grad_norm": 2.9450058937072754, "learning_rate": 3.2926637846820366e-06, "loss": 0.2191, "step": 22631 }, { "epoch": 74.20327868852459, "grad_norm": 2.85343074798584, "learning_rate": 3.291876222696767e-06, "loss": 0.2438, "step": 22632 }, { "epoch": 74.20655737704918, "grad_norm": 2.62608003616333, "learning_rate": 3.2910887363525723e-06, "loss": 0.2781, "step": 22633 }, { "epoch": 74.20983606557377, "grad_norm": 3.149473190307617, "learning_rate": 3.2903013256583273e-06, "loss": 0.1241, "step": 22634 }, { "epoch": 74.21311475409836, "grad_norm": 2.3395872116088867, "learning_rate": 3.2895139906229103e-06, "loss": 0.0565, "step": 22635 }, { "epoch": 74.21639344262294, "grad_norm": 2.018381118774414, "learning_rate": 3.2887267312551975e-06, "loss": 0.1669, "step": 22636 }, { "epoch": 74.21967213114755, "grad_norm": 2.700002908706665, "learning_rate": 3.2879395475640717e-06, "loss": 0.0802, "step": 22637 }, { "epoch": 74.22295081967214, "grad_norm": 2.885472059249878, "learning_rate": 3.2871524395584065e-06, "loss": 0.1134, "step": 22638 }, { "epoch": 74.22622950819672, "grad_norm": 2.5530521869659424, "learning_rate": 3.2863654072470774e-06, "loss": 0.1217, "step": 22639 }, { "epoch": 74.22950819672131, "grad_norm": 2.398833751678467, "learning_rate": 3.2855784506389554e-06, "loss": 0.0848, "step": 22640 }, { "epoch": 74.2327868852459, "grad_norm": 2.208442449569702, "learning_rate": 3.2847915697429213e-06, "loss": 0.1, "step": 22641 }, { "epoch": 74.23606557377049, "grad_norm": 2.7121071815490723, "learning_rate": 3.284004764567843e-06, "loss": 0.0725, "step": 22642 }, { "epoch": 74.23934426229508, "grad_norm": 2.9659361839294434, "learning_rate": 3.2832180351225916e-06, "loss": 0.174, "step": 22643 }, { "epoch": 74.24262295081967, "grad_norm": 3.5128672122955322, "learning_rate": 3.2824313814160434e-06, "loss": 0.3128, "step": 22644 }, { "epoch": 74.24590163934427, "grad_norm": 1.751803994178772, "learning_rate": 3.281644803457067e-06, "loss": 0.0376, "step": 22645 }, { "epoch": 74.24918032786886, "grad_norm": 2.929875373840332, "learning_rate": 3.280858301254526e-06, "loss": 0.2641, "step": 22646 }, { "epoch": 74.25245901639344, "grad_norm": 3.495497226715088, "learning_rate": 3.280071874817298e-06, "loss": 0.292, "step": 22647 }, { "epoch": 74.25573770491803, "grad_norm": 2.6958065032958984, "learning_rate": 3.2792855241542465e-06, "loss": 0.0747, "step": 22648 }, { "epoch": 74.25901639344262, "grad_norm": 3.482053756713867, "learning_rate": 3.2784992492742385e-06, "loss": 0.179, "step": 22649 }, { "epoch": 74.26229508196721, "grad_norm": 1.9579716920852661, "learning_rate": 3.2777130501861364e-06, "loss": 0.0829, "step": 22650 }, { "epoch": 74.2655737704918, "grad_norm": 1.3798731565475464, "learning_rate": 3.2769269268988125e-06, "loss": 0.0226, "step": 22651 }, { "epoch": 74.26885245901639, "grad_norm": 2.408142328262329, "learning_rate": 3.276140879421128e-06, "loss": 0.1486, "step": 22652 }, { "epoch": 74.27213114754099, "grad_norm": 2.915644407272339, "learning_rate": 3.2753549077619417e-06, "loss": 0.1471, "step": 22653 }, { "epoch": 74.27540983606558, "grad_norm": 3.1179330348968506, "learning_rate": 3.2745690119301255e-06, "loss": 0.2433, "step": 22654 }, { "epoch": 74.27868852459017, "grad_norm": 2.264841318130493, "learning_rate": 3.2737831919345353e-06, "loss": 0.0645, "step": 22655 }, { "epoch": 74.28196721311475, "grad_norm": 2.3527755737304688, "learning_rate": 3.2729974477840344e-06, "loss": 0.1383, "step": 22656 }, { "epoch": 74.28524590163934, "grad_norm": 2.250150442123413, "learning_rate": 3.272211779487481e-06, "loss": 0.1104, "step": 22657 }, { "epoch": 74.28852459016393, "grad_norm": 1.7832382917404175, "learning_rate": 3.271426187053731e-06, "loss": 0.1259, "step": 22658 }, { "epoch": 74.29180327868852, "grad_norm": 2.2847626209259033, "learning_rate": 3.2706406704916505e-06, "loss": 0.1119, "step": 22659 }, { "epoch": 74.29508196721312, "grad_norm": 2.510371685028076, "learning_rate": 3.2698552298100938e-06, "loss": 0.1765, "step": 22660 }, { "epoch": 74.29836065573771, "grad_norm": 1.8575077056884766, "learning_rate": 3.2690698650179165e-06, "loss": 0.0474, "step": 22661 }, { "epoch": 74.3016393442623, "grad_norm": 3.0511903762817383, "learning_rate": 3.2682845761239714e-06, "loss": 0.1326, "step": 22662 }, { "epoch": 74.30491803278689, "grad_norm": 2.4543590545654297, "learning_rate": 3.2674993631371198e-06, "loss": 0.2062, "step": 22663 }, { "epoch": 74.30819672131148, "grad_norm": 2.166315793991089, "learning_rate": 3.266714226066213e-06, "loss": 0.1377, "step": 22664 }, { "epoch": 74.31147540983606, "grad_norm": 2.7799556255340576, "learning_rate": 3.265929164920105e-06, "loss": 0.1243, "step": 22665 }, { "epoch": 74.31475409836065, "grad_norm": 4.4703826904296875, "learning_rate": 3.2651441797076432e-06, "loss": 0.3852, "step": 22666 }, { "epoch": 74.31803278688524, "grad_norm": 2.7398650646209717, "learning_rate": 3.264359270437688e-06, "loss": 0.06, "step": 22667 }, { "epoch": 74.32131147540984, "grad_norm": 2.467442035675049, "learning_rate": 3.2635744371190834e-06, "loss": 0.1069, "step": 22668 }, { "epoch": 74.32459016393443, "grad_norm": 2.3188962936401367, "learning_rate": 3.262789679760683e-06, "loss": 0.1603, "step": 22669 }, { "epoch": 74.32786885245902, "grad_norm": 3.2145731449127197, "learning_rate": 3.2620049983713333e-06, "loss": 0.1862, "step": 22670 }, { "epoch": 74.33114754098361, "grad_norm": 2.820404529571533, "learning_rate": 3.2612203929598786e-06, "loss": 0.1041, "step": 22671 }, { "epoch": 74.3344262295082, "grad_norm": 2.960148811340332, "learning_rate": 3.2604358635351752e-06, "loss": 0.1313, "step": 22672 }, { "epoch": 74.33770491803278, "grad_norm": 2.2891321182250977, "learning_rate": 3.259651410106064e-06, "loss": 0.1499, "step": 22673 }, { "epoch": 74.34098360655737, "grad_norm": 2.0145628452301025, "learning_rate": 3.258867032681392e-06, "loss": 0.0423, "step": 22674 }, { "epoch": 74.34426229508196, "grad_norm": 2.1719484329223633, "learning_rate": 3.2580827312699993e-06, "loss": 0.0741, "step": 22675 }, { "epoch": 74.34754098360656, "grad_norm": 2.6064958572387695, "learning_rate": 3.2572985058807373e-06, "loss": 0.3156, "step": 22676 }, { "epoch": 74.35081967213115, "grad_norm": 2.2243196964263916, "learning_rate": 3.2565143565224453e-06, "loss": 0.0652, "step": 22677 }, { "epoch": 74.35409836065574, "grad_norm": 2.53543758392334, "learning_rate": 3.255730283203965e-06, "loss": 0.0819, "step": 22678 }, { "epoch": 74.35737704918033, "grad_norm": 2.1694445610046387, "learning_rate": 3.254946285934135e-06, "loss": 0.1256, "step": 22679 }, { "epoch": 74.36065573770492, "grad_norm": 2.6806752681732178, "learning_rate": 3.2541623647218025e-06, "loss": 0.1678, "step": 22680 }, { "epoch": 74.3639344262295, "grad_norm": 2.857687473297119, "learning_rate": 3.2533785195758026e-06, "loss": 0.1501, "step": 22681 }, { "epoch": 74.3672131147541, "grad_norm": 2.480436086654663, "learning_rate": 3.252594750504975e-06, "loss": 0.1478, "step": 22682 }, { "epoch": 74.37049180327868, "grad_norm": 1.9251970052719116, "learning_rate": 3.2518110575181573e-06, "loss": 0.1108, "step": 22683 }, { "epoch": 74.37377049180328, "grad_norm": 3.3388209342956543, "learning_rate": 3.2510274406241814e-06, "loss": 0.2225, "step": 22684 }, { "epoch": 74.37704918032787, "grad_norm": 2.801506757736206, "learning_rate": 3.250243899831893e-06, "loss": 0.2387, "step": 22685 }, { "epoch": 74.38032786885246, "grad_norm": 3.0799825191497803, "learning_rate": 3.2494604351501223e-06, "loss": 0.1706, "step": 22686 }, { "epoch": 74.38360655737705, "grad_norm": 2.6189115047454834, "learning_rate": 3.2486770465877048e-06, "loss": 0.0997, "step": 22687 }, { "epoch": 74.38688524590164, "grad_norm": 2.10886812210083, "learning_rate": 3.247893734153469e-06, "loss": 0.0735, "step": 22688 }, { "epoch": 74.39016393442623, "grad_norm": 2.9351694583892822, "learning_rate": 3.247110497856255e-06, "loss": 0.3423, "step": 22689 }, { "epoch": 74.39344262295081, "grad_norm": 2.761030912399292, "learning_rate": 3.246327337704892e-06, "loss": 0.1758, "step": 22690 }, { "epoch": 74.3967213114754, "grad_norm": 2.59521222114563, "learning_rate": 3.2455442537082106e-06, "loss": 0.0854, "step": 22691 }, { "epoch": 74.4, "grad_norm": 2.438471555709839, "learning_rate": 3.2447612458750365e-06, "loss": 0.0601, "step": 22692 }, { "epoch": 74.4032786885246, "grad_norm": 1.7640279531478882, "learning_rate": 3.2439783142142067e-06, "loss": 0.1373, "step": 22693 }, { "epoch": 74.40655737704918, "grad_norm": 2.914773941040039, "learning_rate": 3.243195458734546e-06, "loss": 0.1795, "step": 22694 }, { "epoch": 74.40983606557377, "grad_norm": 2.41814923286438, "learning_rate": 3.2424126794448816e-06, "loss": 0.0716, "step": 22695 }, { "epoch": 74.41311475409836, "grad_norm": 2.3264729976654053, "learning_rate": 3.2416299763540372e-06, "loss": 0.0753, "step": 22696 }, { "epoch": 74.41639344262295, "grad_norm": 3.039729356765747, "learning_rate": 3.240847349470845e-06, "loss": 0.0977, "step": 22697 }, { "epoch": 74.41967213114754, "grad_norm": 2.5277135372161865, "learning_rate": 3.2400647988041266e-06, "loss": 0.1497, "step": 22698 }, { "epoch": 74.42295081967212, "grad_norm": 2.054776668548584, "learning_rate": 3.2392823243627024e-06, "loss": 0.0456, "step": 22699 }, { "epoch": 74.42622950819673, "grad_norm": 2.7545289993286133, "learning_rate": 3.238499926155403e-06, "loss": 0.1808, "step": 22700 }, { "epoch": 74.42950819672132, "grad_norm": 4.056042671203613, "learning_rate": 3.2377176041910473e-06, "loss": 0.198, "step": 22701 }, { "epoch": 74.4327868852459, "grad_norm": 2.590395927429199, "learning_rate": 3.2369353584784534e-06, "loss": 0.1723, "step": 22702 }, { "epoch": 74.43606557377049, "grad_norm": 2.400981903076172, "learning_rate": 3.2361531890264486e-06, "loss": 0.1392, "step": 22703 }, { "epoch": 74.43934426229508, "grad_norm": 2.443345069885254, "learning_rate": 3.2353710958438488e-06, "loss": 0.1653, "step": 22704 }, { "epoch": 74.44262295081967, "grad_norm": 2.072868585586548, "learning_rate": 3.234589078939471e-06, "loss": 0.0726, "step": 22705 }, { "epoch": 74.44590163934426, "grad_norm": 2.726276397705078, "learning_rate": 3.233807138322138e-06, "loss": 0.1455, "step": 22706 }, { "epoch": 74.44918032786886, "grad_norm": 2.3615360260009766, "learning_rate": 3.233025274000664e-06, "loss": 0.2156, "step": 22707 }, { "epoch": 74.45245901639345, "grad_norm": 3.1959314346313477, "learning_rate": 3.232243485983867e-06, "loss": 0.1055, "step": 22708 }, { "epoch": 74.45573770491804, "grad_norm": 1.787837266921997, "learning_rate": 3.231461774280561e-06, "loss": 0.1326, "step": 22709 }, { "epoch": 74.45901639344262, "grad_norm": 1.9269063472747803, "learning_rate": 3.230680138899558e-06, "loss": 0.0493, "step": 22710 }, { "epoch": 74.46229508196721, "grad_norm": 2.5543124675750732, "learning_rate": 3.2298985798496785e-06, "loss": 0.0831, "step": 22711 }, { "epoch": 74.4655737704918, "grad_norm": 2.5754759311676025, "learning_rate": 3.22911709713973e-06, "loss": 0.1359, "step": 22712 }, { "epoch": 74.46885245901639, "grad_norm": 2.529510498046875, "learning_rate": 3.228335690778528e-06, "loss": 0.3187, "step": 22713 }, { "epoch": 74.47213114754098, "grad_norm": 2.4094390869140625, "learning_rate": 3.227554360774877e-06, "loss": 0.0861, "step": 22714 }, { "epoch": 74.47540983606558, "grad_norm": 2.374232292175293, "learning_rate": 3.2267731071375964e-06, "loss": 0.1034, "step": 22715 }, { "epoch": 74.47868852459017, "grad_norm": 2.9923458099365234, "learning_rate": 3.2259919298754915e-06, "loss": 0.1001, "step": 22716 }, { "epoch": 74.48196721311476, "grad_norm": 2.3830416202545166, "learning_rate": 3.2252108289973707e-06, "loss": 0.1904, "step": 22717 }, { "epoch": 74.48524590163935, "grad_norm": 2.5869455337524414, "learning_rate": 3.2244298045120383e-06, "loss": 0.0708, "step": 22718 }, { "epoch": 74.48852459016393, "grad_norm": 2.127674102783203, "learning_rate": 3.2236488564283074e-06, "loss": 0.0794, "step": 22719 }, { "epoch": 74.49180327868852, "grad_norm": 3.4661457538604736, "learning_rate": 3.2228679847549825e-06, "loss": 0.1928, "step": 22720 }, { "epoch": 74.49508196721311, "grad_norm": 2.5548489093780518, "learning_rate": 3.2220871895008667e-06, "loss": 0.1801, "step": 22721 }, { "epoch": 74.4983606557377, "grad_norm": 2.546245574951172, "learning_rate": 3.2213064706747654e-06, "loss": 0.1672, "step": 22722 }, { "epoch": 74.5016393442623, "grad_norm": 2.2342233657836914, "learning_rate": 3.220525828285478e-06, "loss": 0.0597, "step": 22723 }, { "epoch": 74.50491803278689, "grad_norm": 2.3946290016174316, "learning_rate": 3.2197452623418146e-06, "loss": 0.0914, "step": 22724 }, { "epoch": 74.50819672131148, "grad_norm": 2.5078539848327637, "learning_rate": 3.218964772852573e-06, "loss": 0.0747, "step": 22725 }, { "epoch": 74.51147540983607, "grad_norm": 1.8236092329025269, "learning_rate": 3.218184359826554e-06, "loss": 0.0594, "step": 22726 }, { "epoch": 74.51475409836065, "grad_norm": 2.72611141204834, "learning_rate": 3.2174040232725546e-06, "loss": 0.0998, "step": 22727 }, { "epoch": 74.51803278688524, "grad_norm": 1.9703953266143799, "learning_rate": 3.216623763199379e-06, "loss": 0.1264, "step": 22728 }, { "epoch": 74.52131147540983, "grad_norm": 3.2060470581054688, "learning_rate": 3.215843579615825e-06, "loss": 0.1523, "step": 22729 }, { "epoch": 74.52459016393442, "grad_norm": 3.485023260116577, "learning_rate": 3.2150634725306873e-06, "loss": 0.1556, "step": 22730 }, { "epoch": 74.52786885245902, "grad_norm": 2.8143723011016846, "learning_rate": 3.214283441952761e-06, "loss": 0.1316, "step": 22731 }, { "epoch": 74.53114754098361, "grad_norm": 3.006240129470825, "learning_rate": 3.2135034878908477e-06, "loss": 0.1724, "step": 22732 }, { "epoch": 74.5344262295082, "grad_norm": 7.005686283111572, "learning_rate": 3.2127236103537384e-06, "loss": 0.2458, "step": 22733 }, { "epoch": 74.53770491803279, "grad_norm": 2.1883838176727295, "learning_rate": 3.2119438093502266e-06, "loss": 0.0957, "step": 22734 }, { "epoch": 74.54098360655738, "grad_norm": 2.618809938430786, "learning_rate": 3.2111640848891068e-06, "loss": 0.0981, "step": 22735 }, { "epoch": 74.54426229508196, "grad_norm": 2.878671884536743, "learning_rate": 3.210384436979168e-06, "loss": 0.2358, "step": 22736 }, { "epoch": 74.54754098360655, "grad_norm": 2.999208927154541, "learning_rate": 3.209604865629207e-06, "loss": 0.1387, "step": 22737 }, { "epoch": 74.55081967213114, "grad_norm": 1.85587739944458, "learning_rate": 3.208825370848011e-06, "loss": 0.0586, "step": 22738 }, { "epoch": 74.55409836065574, "grad_norm": 2.582557201385498, "learning_rate": 3.2080459526443696e-06, "loss": 0.1351, "step": 22739 }, { "epoch": 74.55737704918033, "grad_norm": 1.8321048021316528, "learning_rate": 3.207266611027069e-06, "loss": 0.1983, "step": 22740 }, { "epoch": 74.56065573770492, "grad_norm": 3.4139842987060547, "learning_rate": 3.206487346004904e-06, "loss": 0.0939, "step": 22741 }, { "epoch": 74.56393442622951, "grad_norm": 2.7118585109710693, "learning_rate": 3.2057081575866566e-06, "loss": 0.168, "step": 22742 }, { "epoch": 74.5672131147541, "grad_norm": 3.9898760318756104, "learning_rate": 3.2049290457811154e-06, "loss": 0.1124, "step": 22743 }, { "epoch": 74.57049180327868, "grad_norm": 4.2325215339660645, "learning_rate": 3.2041500105970603e-06, "loss": 0.1914, "step": 22744 }, { "epoch": 74.57377049180327, "grad_norm": 3.1174371242523193, "learning_rate": 3.2033710520432827e-06, "loss": 0.0337, "step": 22745 }, { "epoch": 74.57704918032788, "grad_norm": 2.669835090637207, "learning_rate": 3.2025921701285633e-06, "loss": 0.0713, "step": 22746 }, { "epoch": 74.58032786885246, "grad_norm": 2.6852667331695557, "learning_rate": 3.2018133648616847e-06, "loss": 0.1976, "step": 22747 }, { "epoch": 74.58360655737705, "grad_norm": 1.9136778116226196, "learning_rate": 3.2010346362514254e-06, "loss": 0.1043, "step": 22748 }, { "epoch": 74.58688524590164, "grad_norm": 2.3711366653442383, "learning_rate": 3.200255984306574e-06, "loss": 0.0922, "step": 22749 }, { "epoch": 74.59016393442623, "grad_norm": 2.5155649185180664, "learning_rate": 3.199477409035905e-06, "loss": 0.1068, "step": 22750 }, { "epoch": 74.59344262295082, "grad_norm": 2.49849796295166, "learning_rate": 3.198698910448197e-06, "loss": 0.0915, "step": 22751 }, { "epoch": 74.5967213114754, "grad_norm": 2.0496108531951904, "learning_rate": 3.1979204885522317e-06, "loss": 0.0957, "step": 22752 }, { "epoch": 74.6, "grad_norm": 1.6713526248931885, "learning_rate": 3.197142143356787e-06, "loss": 0.0347, "step": 22753 }, { "epoch": 74.6032786885246, "grad_norm": 1.7033236026763916, "learning_rate": 3.1963638748706373e-06, "loss": 0.0235, "step": 22754 }, { "epoch": 74.60655737704919, "grad_norm": 2.410813093185425, "learning_rate": 3.1955856831025556e-06, "loss": 0.1268, "step": 22755 }, { "epoch": 74.60983606557377, "grad_norm": 1.9344635009765625, "learning_rate": 3.1948075680613233e-06, "loss": 0.0341, "step": 22756 }, { "epoch": 74.61311475409836, "grad_norm": 2.7647922039031982, "learning_rate": 3.1940295297557123e-06, "loss": 0.2316, "step": 22757 }, { "epoch": 74.61639344262295, "grad_norm": 1.6463398933410645, "learning_rate": 3.19325156819449e-06, "loss": 0.1379, "step": 22758 }, { "epoch": 74.61967213114754, "grad_norm": 2.1121199131011963, "learning_rate": 3.192473683386438e-06, "loss": 0.2165, "step": 22759 }, { "epoch": 74.62295081967213, "grad_norm": 2.394819974899292, "learning_rate": 3.191695875340323e-06, "loss": 0.0873, "step": 22760 }, { "epoch": 74.62622950819672, "grad_norm": 1.8987809419631958, "learning_rate": 3.190918144064915e-06, "loss": 0.0943, "step": 22761 }, { "epoch": 74.62950819672132, "grad_norm": 2.4358558654785156, "learning_rate": 3.1901404895689825e-06, "loss": 0.0679, "step": 22762 }, { "epoch": 74.6327868852459, "grad_norm": 2.6496522426605225, "learning_rate": 3.189362911861299e-06, "loss": 0.049, "step": 22763 }, { "epoch": 74.6360655737705, "grad_norm": 2.23838210105896, "learning_rate": 3.18858541095063e-06, "loss": 0.1337, "step": 22764 }, { "epoch": 74.63934426229508, "grad_norm": 2.215632915496826, "learning_rate": 3.1878079868457422e-06, "loss": 0.1143, "step": 22765 }, { "epoch": 74.64262295081967, "grad_norm": 2.5699405670166016, "learning_rate": 3.1870306395553995e-06, "loss": 0.0925, "step": 22766 }, { "epoch": 74.64590163934426, "grad_norm": 2.3854472637176514, "learning_rate": 3.1862533690883735e-06, "loss": 0.0577, "step": 22767 }, { "epoch": 74.64918032786885, "grad_norm": 4.618350028991699, "learning_rate": 3.185476175453426e-06, "loss": 0.2885, "step": 22768 }, { "epoch": 74.65245901639344, "grad_norm": 2.416438341140747, "learning_rate": 3.1846990586593185e-06, "loss": 0.1231, "step": 22769 }, { "epoch": 74.65573770491804, "grad_norm": 2.1200296878814697, "learning_rate": 3.183922018714812e-06, "loss": 0.1491, "step": 22770 }, { "epoch": 74.65901639344263, "grad_norm": 1.9552639722824097, "learning_rate": 3.1831450556286756e-06, "loss": 0.0591, "step": 22771 }, { "epoch": 74.66229508196722, "grad_norm": 2.3193142414093018, "learning_rate": 3.1823681694096665e-06, "loss": 0.1298, "step": 22772 }, { "epoch": 74.6655737704918, "grad_norm": 3.009813070297241, "learning_rate": 3.1815913600665448e-06, "loss": 0.1734, "step": 22773 }, { "epoch": 74.66885245901639, "grad_norm": 2.640512704849243, "learning_rate": 3.1808146276080696e-06, "loss": 0.1394, "step": 22774 }, { "epoch": 74.67213114754098, "grad_norm": 2.3504085540771484, "learning_rate": 3.1800379720429964e-06, "loss": 0.0559, "step": 22775 }, { "epoch": 74.67540983606557, "grad_norm": 3.341796398162842, "learning_rate": 3.1792613933800898e-06, "loss": 0.1749, "step": 22776 }, { "epoch": 74.67868852459016, "grad_norm": 2.5962862968444824, "learning_rate": 3.178484891628103e-06, "loss": 0.1363, "step": 22777 }, { "epoch": 74.68196721311476, "grad_norm": 3.3795177936553955, "learning_rate": 3.17770846679579e-06, "loss": 0.2136, "step": 22778 }, { "epoch": 74.68524590163935, "grad_norm": 1.8515375852584839, "learning_rate": 3.1769321188919056e-06, "loss": 0.0595, "step": 22779 }, { "epoch": 74.68852459016394, "grad_norm": 3.0017805099487305, "learning_rate": 3.176155847925209e-06, "loss": 0.1329, "step": 22780 }, { "epoch": 74.69180327868852, "grad_norm": 2.87834095954895, "learning_rate": 3.1753796539044502e-06, "loss": 0.1231, "step": 22781 }, { "epoch": 74.69508196721311, "grad_norm": 2.2785980701446533, "learning_rate": 3.174603536838381e-06, "loss": 0.1265, "step": 22782 }, { "epoch": 74.6983606557377, "grad_norm": 2.388366460800171, "learning_rate": 3.173827496735751e-06, "loss": 0.1012, "step": 22783 }, { "epoch": 74.70163934426229, "grad_norm": 2.798872232437134, "learning_rate": 3.173051533605316e-06, "loss": 0.2591, "step": 22784 }, { "epoch": 74.70491803278688, "grad_norm": 3.8084778785705566, "learning_rate": 3.1722756474558235e-06, "loss": 0.0615, "step": 22785 }, { "epoch": 74.70819672131148, "grad_norm": 3.4307310581207275, "learning_rate": 3.171499838296023e-06, "loss": 0.1585, "step": 22786 }, { "epoch": 74.71147540983607, "grad_norm": 2.7440881729125977, "learning_rate": 3.1707241061346604e-06, "loss": 0.077, "step": 22787 }, { "epoch": 74.71475409836066, "grad_norm": 2.6340596675872803, "learning_rate": 3.169948450980481e-06, "loss": 0.075, "step": 22788 }, { "epoch": 74.71803278688525, "grad_norm": 2.574910879135132, "learning_rate": 3.169172872842238e-06, "loss": 0.0803, "step": 22789 }, { "epoch": 74.72131147540983, "grad_norm": 2.9598209857940674, "learning_rate": 3.168397371728673e-06, "loss": 0.098, "step": 22790 }, { "epoch": 74.72459016393442, "grad_norm": 2.0068750381469727, "learning_rate": 3.1676219476485317e-06, "loss": 0.0436, "step": 22791 }, { "epoch": 74.72786885245901, "grad_norm": 2.45617938041687, "learning_rate": 3.1668466006105523e-06, "loss": 0.0998, "step": 22792 }, { "epoch": 74.73114754098361, "grad_norm": 2.283613681793213, "learning_rate": 3.1660713306234857e-06, "loss": 0.0636, "step": 22793 }, { "epoch": 74.7344262295082, "grad_norm": 2.821622133255005, "learning_rate": 3.16529613769607e-06, "loss": 0.174, "step": 22794 }, { "epoch": 74.73770491803279, "grad_norm": 2.566866636276245, "learning_rate": 3.164521021837047e-06, "loss": 0.1742, "step": 22795 }, { "epoch": 74.74098360655738, "grad_norm": 2.579218864440918, "learning_rate": 3.163745983055154e-06, "loss": 0.0625, "step": 22796 }, { "epoch": 74.74426229508197, "grad_norm": 2.7800095081329346, "learning_rate": 3.162971021359136e-06, "loss": 0.0778, "step": 22797 }, { "epoch": 74.74754098360656, "grad_norm": 3.141233205795288, "learning_rate": 3.162196136757727e-06, "loss": 0.2002, "step": 22798 }, { "epoch": 74.75081967213114, "grad_norm": 3.0412614345550537, "learning_rate": 3.1614213292596674e-06, "loss": 0.166, "step": 22799 }, { "epoch": 74.75409836065573, "grad_norm": 3.1262683868408203, "learning_rate": 3.160646598873692e-06, "loss": 0.2317, "step": 22800 }, { "epoch": 74.75737704918033, "grad_norm": 2.4913382530212402, "learning_rate": 3.1598719456085345e-06, "loss": 0.2281, "step": 22801 }, { "epoch": 74.76065573770492, "grad_norm": 2.4063186645507812, "learning_rate": 3.1590973694729367e-06, "loss": 0.1499, "step": 22802 }, { "epoch": 74.76393442622951, "grad_norm": 2.5078518390655518, "learning_rate": 3.1583228704756285e-06, "loss": 0.1388, "step": 22803 }, { "epoch": 74.7672131147541, "grad_norm": 2.1837141513824463, "learning_rate": 3.1575484486253393e-06, "loss": 0.068, "step": 22804 }, { "epoch": 74.77049180327869, "grad_norm": 2.615729331970215, "learning_rate": 3.1567741039308098e-06, "loss": 0.1264, "step": 22805 }, { "epoch": 74.77377049180328, "grad_norm": 3.259148359298706, "learning_rate": 3.155999836400767e-06, "loss": 0.1847, "step": 22806 }, { "epoch": 74.77704918032786, "grad_norm": 3.356276035308838, "learning_rate": 3.155225646043939e-06, "loss": 0.1863, "step": 22807 }, { "epoch": 74.78032786885245, "grad_norm": 3.0208005905151367, "learning_rate": 3.1544515328690617e-06, "loss": 0.1552, "step": 22808 }, { "epoch": 74.78360655737706, "grad_norm": 2.4047563076019287, "learning_rate": 3.1536774968848615e-06, "loss": 0.1509, "step": 22809 }, { "epoch": 74.78688524590164, "grad_norm": 3.0013022422790527, "learning_rate": 3.152903538100063e-06, "loss": 0.182, "step": 22810 }, { "epoch": 74.79016393442623, "grad_norm": 3.5963306427001953, "learning_rate": 3.1521296565233985e-06, "loss": 0.1474, "step": 22811 }, { "epoch": 74.79344262295082, "grad_norm": 3.4180071353912354, "learning_rate": 3.151355852163592e-06, "loss": 0.1572, "step": 22812 }, { "epoch": 74.79672131147541, "grad_norm": 2.2223777770996094, "learning_rate": 3.15058212502937e-06, "loss": 0.062, "step": 22813 }, { "epoch": 74.8, "grad_norm": 2.907453775405884, "learning_rate": 3.1498084751294523e-06, "loss": 0.104, "step": 22814 }, { "epoch": 74.80327868852459, "grad_norm": 4.571503162384033, "learning_rate": 3.14903490247257e-06, "loss": 0.133, "step": 22815 }, { "epoch": 74.80655737704917, "grad_norm": 2.7722301483154297, "learning_rate": 3.1482614070674423e-06, "loss": 0.1779, "step": 22816 }, { "epoch": 74.80983606557378, "grad_norm": 3.5144665241241455, "learning_rate": 3.1474879889227907e-06, "loss": 0.1596, "step": 22817 }, { "epoch": 74.81311475409836, "grad_norm": 4.017956256866455, "learning_rate": 3.1467146480473344e-06, "loss": 0.0934, "step": 22818 }, { "epoch": 74.81639344262295, "grad_norm": 2.7275664806365967, "learning_rate": 3.1459413844497986e-06, "loss": 0.1864, "step": 22819 }, { "epoch": 74.81967213114754, "grad_norm": 2.5803017616271973, "learning_rate": 3.1451681981389003e-06, "loss": 0.1242, "step": 22820 }, { "epoch": 74.82295081967213, "grad_norm": 2.627558469772339, "learning_rate": 3.1443950891233587e-06, "loss": 0.1586, "step": 22821 }, { "epoch": 74.82622950819672, "grad_norm": 2.1327691078186035, "learning_rate": 3.143622057411887e-06, "loss": 0.1127, "step": 22822 }, { "epoch": 74.8295081967213, "grad_norm": 2.4280195236206055, "learning_rate": 3.142849103013208e-06, "loss": 0.0813, "step": 22823 }, { "epoch": 74.8327868852459, "grad_norm": 4.525713920593262, "learning_rate": 3.142076225936035e-06, "loss": 0.1394, "step": 22824 }, { "epoch": 74.8360655737705, "grad_norm": 2.775980234146118, "learning_rate": 3.141303426189083e-06, "loss": 0.1596, "step": 22825 }, { "epoch": 74.83934426229509, "grad_norm": 2.0855417251586914, "learning_rate": 3.1405307037810672e-06, "loss": 0.0361, "step": 22826 }, { "epoch": 74.84262295081967, "grad_norm": 2.243143081665039, "learning_rate": 3.1397580587206955e-06, "loss": 0.1345, "step": 22827 }, { "epoch": 74.84590163934426, "grad_norm": 1.976274847984314, "learning_rate": 3.138985491016688e-06, "loss": 0.1249, "step": 22828 }, { "epoch": 74.84918032786885, "grad_norm": 2.600773572921753, "learning_rate": 3.138213000677752e-06, "loss": 0.3053, "step": 22829 }, { "epoch": 74.85245901639344, "grad_norm": 2.8155243396759033, "learning_rate": 3.1374405877126e-06, "loss": 0.098, "step": 22830 }, { "epoch": 74.85573770491803, "grad_norm": 2.424003839492798, "learning_rate": 3.1366682521299374e-06, "loss": 0.1815, "step": 22831 }, { "epoch": 74.85901639344263, "grad_norm": 2.561091899871826, "learning_rate": 3.135895993938479e-06, "loss": 0.189, "step": 22832 }, { "epoch": 74.86229508196722, "grad_norm": 2.5241599082946777, "learning_rate": 3.1351238131469307e-06, "loss": 0.1937, "step": 22833 }, { "epoch": 74.8655737704918, "grad_norm": 3.0912911891937256, "learning_rate": 3.134351709763999e-06, "loss": 0.1135, "step": 22834 }, { "epoch": 74.8688524590164, "grad_norm": 2.035287380218506, "learning_rate": 3.1335796837983866e-06, "loss": 0.0768, "step": 22835 }, { "epoch": 74.87213114754098, "grad_norm": 2.5219380855560303, "learning_rate": 3.132807735258806e-06, "loss": 0.1926, "step": 22836 }, { "epoch": 74.87540983606557, "grad_norm": 3.390946388244629, "learning_rate": 3.1320358641539583e-06, "loss": 0.0866, "step": 22837 }, { "epoch": 74.87868852459016, "grad_norm": 2.373587131500244, "learning_rate": 3.131264070492548e-06, "loss": 0.2442, "step": 22838 }, { "epoch": 74.88196721311475, "grad_norm": 2.39679217338562, "learning_rate": 3.1304923542832753e-06, "loss": 0.2329, "step": 22839 }, { "epoch": 74.88524590163935, "grad_norm": 2.091560125350952, "learning_rate": 3.1297207155348417e-06, "loss": 0.0659, "step": 22840 }, { "epoch": 74.88852459016394, "grad_norm": 2.6852073669433594, "learning_rate": 3.1289491542559535e-06, "loss": 0.3061, "step": 22841 }, { "epoch": 74.89180327868853, "grad_norm": 2.5998294353485107, "learning_rate": 3.128177670455307e-06, "loss": 0.1043, "step": 22842 }, { "epoch": 74.89508196721312, "grad_norm": 2.7040932178497314, "learning_rate": 3.1274062641416025e-06, "loss": 0.1045, "step": 22843 }, { "epoch": 74.8983606557377, "grad_norm": 2.4920594692230225, "learning_rate": 3.126634935323535e-06, "loss": 0.0771, "step": 22844 }, { "epoch": 74.90163934426229, "grad_norm": 3.2863683700561523, "learning_rate": 3.125863684009809e-06, "loss": 0.1976, "step": 22845 }, { "epoch": 74.90491803278688, "grad_norm": 3.6367342472076416, "learning_rate": 3.1250925102091156e-06, "loss": 0.1715, "step": 22846 }, { "epoch": 74.90819672131147, "grad_norm": 2.925893783569336, "learning_rate": 3.1243214139301535e-06, "loss": 0.0885, "step": 22847 }, { "epoch": 74.91147540983607, "grad_norm": 3.0484068393707275, "learning_rate": 3.123550395181614e-06, "loss": 0.0808, "step": 22848 }, { "epoch": 74.91475409836066, "grad_norm": 2.2964024543762207, "learning_rate": 3.1227794539721947e-06, "loss": 0.1115, "step": 22849 }, { "epoch": 74.91803278688525, "grad_norm": 2.1299798488616943, "learning_rate": 3.122008590310589e-06, "loss": 0.1166, "step": 22850 }, { "epoch": 74.92131147540984, "grad_norm": 3.143826723098755, "learning_rate": 3.121237804205487e-06, "loss": 0.1639, "step": 22851 }, { "epoch": 74.92459016393443, "grad_norm": 1.7664463520050049, "learning_rate": 3.1204670956655813e-06, "loss": 0.0952, "step": 22852 }, { "epoch": 74.92786885245901, "grad_norm": 2.0748231410980225, "learning_rate": 3.119696464699559e-06, "loss": 0.1728, "step": 22853 }, { "epoch": 74.9311475409836, "grad_norm": 3.0358777046203613, "learning_rate": 3.1189259113161152e-06, "loss": 0.253, "step": 22854 }, { "epoch": 74.93442622950819, "grad_norm": 1.9731487035751343, "learning_rate": 3.1181554355239363e-06, "loss": 0.0375, "step": 22855 }, { "epoch": 74.9377049180328, "grad_norm": 2.6296322345733643, "learning_rate": 3.1173850373317106e-06, "loss": 0.1669, "step": 22856 }, { "epoch": 74.94098360655738, "grad_norm": 2.5131828784942627, "learning_rate": 3.116614716748122e-06, "loss": 0.1326, "step": 22857 }, { "epoch": 74.94426229508197, "grad_norm": 3.2195167541503906, "learning_rate": 3.1158444737818616e-06, "loss": 0.1774, "step": 22858 }, { "epoch": 74.94754098360656, "grad_norm": 2.9660909175872803, "learning_rate": 3.1150743084416133e-06, "loss": 0.0827, "step": 22859 }, { "epoch": 74.95081967213115, "grad_norm": 2.551321268081665, "learning_rate": 3.1143042207360573e-06, "loss": 0.2344, "step": 22860 }, { "epoch": 74.95409836065573, "grad_norm": 5.620745658874512, "learning_rate": 3.113534210673883e-06, "loss": 0.0904, "step": 22861 }, { "epoch": 74.95737704918032, "grad_norm": 3.1468875408172607, "learning_rate": 3.1127642782637714e-06, "loss": 0.1564, "step": 22862 }, { "epoch": 74.96065573770491, "grad_norm": 2.824082136154175, "learning_rate": 3.111994423514401e-06, "loss": 0.2242, "step": 22863 }, { "epoch": 74.96393442622951, "grad_norm": 2.7772064208984375, "learning_rate": 3.1112246464344565e-06, "loss": 0.1107, "step": 22864 }, { "epoch": 74.9672131147541, "grad_norm": 2.810084342956543, "learning_rate": 3.1104549470326182e-06, "loss": 0.1789, "step": 22865 }, { "epoch": 74.97049180327869, "grad_norm": 2.2766189575195312, "learning_rate": 3.1096853253175595e-06, "loss": 0.0856, "step": 22866 }, { "epoch": 74.97377049180328, "grad_norm": 3.045167922973633, "learning_rate": 3.1089157812979662e-06, "loss": 0.1637, "step": 22867 }, { "epoch": 74.97704918032787, "grad_norm": 3.6454803943634033, "learning_rate": 3.108146314982513e-06, "loss": 0.2218, "step": 22868 }, { "epoch": 74.98032786885246, "grad_norm": 2.5908260345458984, "learning_rate": 3.1073769263798757e-06, "loss": 0.1649, "step": 22869 }, { "epoch": 74.98360655737704, "grad_norm": 3.5879738330841064, "learning_rate": 3.106607615498727e-06, "loss": 0.3252, "step": 22870 }, { "epoch": 74.98688524590163, "grad_norm": 2.223090648651123, "learning_rate": 3.1058383823477478e-06, "loss": 0.1255, "step": 22871 }, { "epoch": 74.99016393442623, "grad_norm": 2.590667486190796, "learning_rate": 3.1050692269356086e-06, "loss": 0.0942, "step": 22872 }, { "epoch": 74.99344262295082, "grad_norm": 3.380244016647339, "learning_rate": 3.1043001492709833e-06, "loss": 0.1019, "step": 22873 }, { "epoch": 74.99672131147541, "grad_norm": 3.451915740966797, "learning_rate": 3.103531149362541e-06, "loss": 0.1384, "step": 22874 }, { "epoch": 75.0, "grad_norm": 2.9146292209625244, "learning_rate": 3.1027622272189572e-06, "loss": 0.1366, "step": 22875 }, { "epoch": 75.00327868852459, "grad_norm": 3.5717663764953613, "learning_rate": 3.101993382848901e-06, "loss": 0.3192, "step": 22876 }, { "epoch": 75.00655737704918, "grad_norm": 3.67301607131958, "learning_rate": 3.101224616261043e-06, "loss": 0.1441, "step": 22877 }, { "epoch": 75.00983606557377, "grad_norm": 2.222831964492798, "learning_rate": 3.100455927464049e-06, "loss": 0.0674, "step": 22878 }, { "epoch": 75.01311475409837, "grad_norm": 3.472548007965088, "learning_rate": 3.099687316466584e-06, "loss": 0.1091, "step": 22879 }, { "epoch": 75.01639344262296, "grad_norm": 2.5467708110809326, "learning_rate": 3.098918783277324e-06, "loss": 0.1214, "step": 22880 }, { "epoch": 75.01967213114754, "grad_norm": 2.3203413486480713, "learning_rate": 3.0981503279049295e-06, "loss": 0.0495, "step": 22881 }, { "epoch": 75.02295081967213, "grad_norm": 2.4025189876556396, "learning_rate": 3.097381950358066e-06, "loss": 0.0947, "step": 22882 }, { "epoch": 75.02622950819672, "grad_norm": 3.6965436935424805, "learning_rate": 3.096613650645395e-06, "loss": 0.2006, "step": 22883 }, { "epoch": 75.02950819672131, "grad_norm": 2.815551519393921, "learning_rate": 3.095845428775586e-06, "loss": 0.1468, "step": 22884 }, { "epoch": 75.0327868852459, "grad_norm": 2.9033701419830322, "learning_rate": 3.0950772847572987e-06, "loss": 0.1341, "step": 22885 }, { "epoch": 75.03606557377049, "grad_norm": 2.220942735671997, "learning_rate": 3.094309218599193e-06, "loss": 0.1026, "step": 22886 }, { "epoch": 75.03934426229509, "grad_norm": 2.4906351566314697, "learning_rate": 3.093541230309929e-06, "loss": 0.0859, "step": 22887 }, { "epoch": 75.04262295081968, "grad_norm": 2.215325355529785, "learning_rate": 3.0927733198981714e-06, "loss": 0.3507, "step": 22888 }, { "epoch": 75.04590163934427, "grad_norm": 2.4995651245117188, "learning_rate": 3.092005487372576e-06, "loss": 0.1126, "step": 22889 }, { "epoch": 75.04918032786885, "grad_norm": 2.238555669784546, "learning_rate": 3.091237732741802e-06, "loss": 0.0491, "step": 22890 }, { "epoch": 75.05245901639344, "grad_norm": 2.2200257778167725, "learning_rate": 3.090470056014505e-06, "loss": 0.0907, "step": 22891 }, { "epoch": 75.05573770491803, "grad_norm": 2.3160769939422607, "learning_rate": 3.0897024571993396e-06, "loss": 0.0685, "step": 22892 }, { "epoch": 75.05901639344262, "grad_norm": 2.3391196727752686, "learning_rate": 3.088934936304967e-06, "loss": 0.1077, "step": 22893 }, { "epoch": 75.0622950819672, "grad_norm": 2.5077998638153076, "learning_rate": 3.0881674933400385e-06, "loss": 0.047, "step": 22894 }, { "epoch": 75.06557377049181, "grad_norm": 12.706571578979492, "learning_rate": 3.087400128313208e-06, "loss": 0.0554, "step": 22895 }, { "epoch": 75.0688524590164, "grad_norm": 3.430018901824951, "learning_rate": 3.086632841233125e-06, "loss": 0.1939, "step": 22896 }, { "epoch": 75.07213114754099, "grad_norm": 3.398216724395752, "learning_rate": 3.085865632108448e-06, "loss": 0.2095, "step": 22897 }, { "epoch": 75.07540983606557, "grad_norm": 2.497403383255005, "learning_rate": 3.085098500947825e-06, "loss": 0.1523, "step": 22898 }, { "epoch": 75.07868852459016, "grad_norm": 2.8044440746307373, "learning_rate": 3.0843314477599072e-06, "loss": 0.1419, "step": 22899 }, { "epoch": 75.08196721311475, "grad_norm": 3.2903432846069336, "learning_rate": 3.0835644725533385e-06, "loss": 0.14, "step": 22900 }, { "epoch": 75.08524590163934, "grad_norm": 2.617899179458618, "learning_rate": 3.0827975753367745e-06, "loss": 0.2451, "step": 22901 }, { "epoch": 75.08852459016393, "grad_norm": 2.7885568141937256, "learning_rate": 3.0820307561188612e-06, "loss": 0.1608, "step": 22902 }, { "epoch": 75.09180327868853, "grad_norm": 2.6512861251831055, "learning_rate": 3.081264014908243e-06, "loss": 0.0793, "step": 22903 }, { "epoch": 75.09508196721312, "grad_norm": 2.614389657974243, "learning_rate": 3.080497351713567e-06, "loss": 0.1733, "step": 22904 }, { "epoch": 75.09836065573771, "grad_norm": 6.220271110534668, "learning_rate": 3.079730766543475e-06, "loss": 0.1087, "step": 22905 }, { "epoch": 75.1016393442623, "grad_norm": 1.9101148843765259, "learning_rate": 3.078964259406616e-06, "loss": 0.0464, "step": 22906 }, { "epoch": 75.10491803278688, "grad_norm": 2.4059553146362305, "learning_rate": 3.0781978303116323e-06, "loss": 0.0696, "step": 22907 }, { "epoch": 75.10819672131147, "grad_norm": 2.294832229614258, "learning_rate": 3.0774314792671643e-06, "loss": 0.0894, "step": 22908 }, { "epoch": 75.11147540983606, "grad_norm": 2.474637031555176, "learning_rate": 3.0766652062818514e-06, "loss": 0.0817, "step": 22909 }, { "epoch": 75.11475409836065, "grad_norm": 2.7408084869384766, "learning_rate": 3.0758990113643393e-06, "loss": 0.1953, "step": 22910 }, { "epoch": 75.11803278688525, "grad_norm": 2.7686593532562256, "learning_rate": 3.075132894523265e-06, "loss": 0.1589, "step": 22911 }, { "epoch": 75.12131147540984, "grad_norm": 1.9238519668579102, "learning_rate": 3.0743668557672648e-06, "loss": 0.0521, "step": 22912 }, { "epoch": 75.12459016393443, "grad_norm": 3.134467840194702, "learning_rate": 3.073600895104982e-06, "loss": 0.1057, "step": 22913 }, { "epoch": 75.12786885245902, "grad_norm": 2.8820574283599854, "learning_rate": 3.0728350125450513e-06, "loss": 0.1481, "step": 22914 }, { "epoch": 75.1311475409836, "grad_norm": 2.4140567779541016, "learning_rate": 3.072069208096108e-06, "loss": 0.0858, "step": 22915 }, { "epoch": 75.1344262295082, "grad_norm": 2.344252824783325, "learning_rate": 3.071303481766783e-06, "loss": 0.186, "step": 22916 }, { "epoch": 75.13770491803278, "grad_norm": 2.351006507873535, "learning_rate": 3.0705378335657197e-06, "loss": 0.0623, "step": 22917 }, { "epoch": 75.14098360655737, "grad_norm": 2.454580545425415, "learning_rate": 3.0697722635015482e-06, "loss": 0.1761, "step": 22918 }, { "epoch": 75.14426229508197, "grad_norm": 2.8476486206054688, "learning_rate": 3.0690067715828953e-06, "loss": 0.1353, "step": 22919 }, { "epoch": 75.14754098360656, "grad_norm": 3.4414379596710205, "learning_rate": 3.0682413578184012e-06, "loss": 0.077, "step": 22920 }, { "epoch": 75.15081967213115, "grad_norm": 1.9110126495361328, "learning_rate": 3.0674760222166934e-06, "loss": 0.0581, "step": 22921 }, { "epoch": 75.15409836065574, "grad_norm": 2.6047401428222656, "learning_rate": 3.066710764786398e-06, "loss": 0.1227, "step": 22922 }, { "epoch": 75.15737704918033, "grad_norm": 2.5398058891296387, "learning_rate": 3.0659455855361496e-06, "loss": 0.0723, "step": 22923 }, { "epoch": 75.16065573770491, "grad_norm": 2.577536106109619, "learning_rate": 3.065180484474576e-06, "loss": 0.1823, "step": 22924 }, { "epoch": 75.1639344262295, "grad_norm": 2.675907611846924, "learning_rate": 3.0644154616103015e-06, "loss": 0.163, "step": 22925 }, { "epoch": 75.1672131147541, "grad_norm": 2.4066896438598633, "learning_rate": 3.0636505169519516e-06, "loss": 0.1294, "step": 22926 }, { "epoch": 75.1704918032787, "grad_norm": 2.715503215789795, "learning_rate": 3.0628856505081573e-06, "loss": 0.2494, "step": 22927 }, { "epoch": 75.17377049180328, "grad_norm": 2.4140636920928955, "learning_rate": 3.0621208622875397e-06, "loss": 0.2404, "step": 22928 }, { "epoch": 75.17704918032787, "grad_norm": 3.123499631881714, "learning_rate": 3.0613561522987233e-06, "loss": 0.1031, "step": 22929 }, { "epoch": 75.18032786885246, "grad_norm": 6.489049911499023, "learning_rate": 3.06059152055033e-06, "loss": 0.2595, "step": 22930 }, { "epoch": 75.18360655737705, "grad_norm": 2.401085376739502, "learning_rate": 3.059826967050981e-06, "loss": 0.1688, "step": 22931 }, { "epoch": 75.18688524590164, "grad_norm": 2.3113152980804443, "learning_rate": 3.0590624918093002e-06, "loss": 0.0669, "step": 22932 }, { "epoch": 75.19016393442622, "grad_norm": 2.466857671737671, "learning_rate": 3.058298094833907e-06, "loss": 0.1666, "step": 22933 }, { "epoch": 75.19344262295083, "grad_norm": 2.4606540203094482, "learning_rate": 3.0575337761334213e-06, "loss": 0.0816, "step": 22934 }, { "epoch": 75.19672131147541, "grad_norm": 2.7287755012512207, "learning_rate": 3.0567695357164563e-06, "loss": 0.2522, "step": 22935 }, { "epoch": 75.2, "grad_norm": 2.7054641246795654, "learning_rate": 3.0560053735916372e-06, "loss": 0.1444, "step": 22936 }, { "epoch": 75.20327868852459, "grad_norm": 3.598079204559326, "learning_rate": 3.0552412897675776e-06, "loss": 0.196, "step": 22937 }, { "epoch": 75.20655737704918, "grad_norm": 2.6349737644195557, "learning_rate": 3.0544772842528935e-06, "loss": 0.1638, "step": 22938 }, { "epoch": 75.20983606557377, "grad_norm": 2.2645115852355957, "learning_rate": 3.0537133570561974e-06, "loss": 0.0946, "step": 22939 }, { "epoch": 75.21311475409836, "grad_norm": 2.1344540119171143, "learning_rate": 3.052949508186107e-06, "loss": 0.1065, "step": 22940 }, { "epoch": 75.21639344262294, "grad_norm": 2.7482845783233643, "learning_rate": 3.052185737651234e-06, "loss": 0.1665, "step": 22941 }, { "epoch": 75.21967213114755, "grad_norm": 3.391157388687134, "learning_rate": 3.0514220454601917e-06, "loss": 0.2926, "step": 22942 }, { "epoch": 75.22295081967214, "grad_norm": 2.8270463943481445, "learning_rate": 3.0506584316215904e-06, "loss": 0.1901, "step": 22943 }, { "epoch": 75.22622950819672, "grad_norm": 12.517684936523438, "learning_rate": 3.049894896144038e-06, "loss": 0.0576, "step": 22944 }, { "epoch": 75.22950819672131, "grad_norm": 3.152236223220825, "learning_rate": 3.0491314390361492e-06, "loss": 0.1667, "step": 22945 }, { "epoch": 75.2327868852459, "grad_norm": 2.5196900367736816, "learning_rate": 3.0483680603065303e-06, "loss": 0.1138, "step": 22946 }, { "epoch": 75.23606557377049, "grad_norm": 2.0618133544921875, "learning_rate": 3.0476047599637904e-06, "loss": 0.353, "step": 22947 }, { "epoch": 75.23934426229508, "grad_norm": 2.1030266284942627, "learning_rate": 3.046841538016532e-06, "loss": 0.1391, "step": 22948 }, { "epoch": 75.24262295081967, "grad_norm": 2.2196273803710938, "learning_rate": 3.0460783944733675e-06, "loss": 0.0686, "step": 22949 }, { "epoch": 75.24590163934427, "grad_norm": 2.7908544540405273, "learning_rate": 3.0453153293428996e-06, "loss": 0.0776, "step": 22950 }, { "epoch": 75.24918032786886, "grad_norm": 1.6760978698730469, "learning_rate": 3.044552342633732e-06, "loss": 0.1069, "step": 22951 }, { "epoch": 75.25245901639344, "grad_norm": 2.121725559234619, "learning_rate": 3.043789434354466e-06, "loss": 0.0855, "step": 22952 }, { "epoch": 75.25573770491803, "grad_norm": 2.0696451663970947, "learning_rate": 3.04302660451371e-06, "loss": 0.1576, "step": 22953 }, { "epoch": 75.25901639344262, "grad_norm": 2.364182710647583, "learning_rate": 3.042263853120062e-06, "loss": 0.084, "step": 22954 }, { "epoch": 75.26229508196721, "grad_norm": 1.4840013980865479, "learning_rate": 3.0415011801821236e-06, "loss": 0.0238, "step": 22955 }, { "epoch": 75.2655737704918, "grad_norm": 2.786409378051758, "learning_rate": 3.040738585708495e-06, "loss": 0.18, "step": 22956 }, { "epoch": 75.26885245901639, "grad_norm": 2.100386381149292, "learning_rate": 3.0399760697077706e-06, "loss": 0.0755, "step": 22957 }, { "epoch": 75.27213114754099, "grad_norm": 2.7276458740234375, "learning_rate": 3.039213632188556e-06, "loss": 0.0952, "step": 22958 }, { "epoch": 75.27540983606558, "grad_norm": 2.75803804397583, "learning_rate": 3.038451273159445e-06, "loss": 0.1036, "step": 22959 }, { "epoch": 75.27868852459017, "grad_norm": 2.3265655040740967, "learning_rate": 3.0376889926290342e-06, "loss": 0.1154, "step": 22960 }, { "epoch": 75.28196721311475, "grad_norm": 3.223629951477051, "learning_rate": 3.036926790605916e-06, "loss": 0.1029, "step": 22961 }, { "epoch": 75.28524590163934, "grad_norm": 3.5570554733276367, "learning_rate": 3.036164667098691e-06, "loss": 0.1239, "step": 22962 }, { "epoch": 75.28852459016393, "grad_norm": 2.4776296615600586, "learning_rate": 3.0354026221159505e-06, "loss": 0.0793, "step": 22963 }, { "epoch": 75.29180327868852, "grad_norm": 2.248485803604126, "learning_rate": 3.0346406556662853e-06, "loss": 0.2371, "step": 22964 }, { "epoch": 75.29508196721312, "grad_norm": 2.693415880203247, "learning_rate": 3.0338787677582872e-06, "loss": 0.1825, "step": 22965 }, { "epoch": 75.29836065573771, "grad_norm": 3.37227463722229, "learning_rate": 3.0331169584005514e-06, "loss": 0.0854, "step": 22966 }, { "epoch": 75.3016393442623, "grad_norm": 2.1338858604431152, "learning_rate": 3.0323552276016664e-06, "loss": 0.152, "step": 22967 }, { "epoch": 75.30491803278689, "grad_norm": 2.682065010070801, "learning_rate": 3.0315935753702174e-06, "loss": 0.0659, "step": 22968 }, { "epoch": 75.30819672131148, "grad_norm": 2.798790454864502, "learning_rate": 3.0308320017147986e-06, "loss": 0.1865, "step": 22969 }, { "epoch": 75.31147540983606, "grad_norm": 2.2100350856781006, "learning_rate": 3.030070506643995e-06, "loss": 0.0644, "step": 22970 }, { "epoch": 75.31475409836065, "grad_norm": 8.101114273071289, "learning_rate": 3.0293090901663913e-06, "loss": 0.0521, "step": 22971 }, { "epoch": 75.31803278688524, "grad_norm": 3.044276714324951, "learning_rate": 3.0285477522905784e-06, "loss": 0.0923, "step": 22972 }, { "epoch": 75.32131147540984, "grad_norm": 2.9978559017181396, "learning_rate": 3.0277864930251366e-06, "loss": 0.1474, "step": 22973 }, { "epoch": 75.32459016393443, "grad_norm": 2.8292829990386963, "learning_rate": 3.02702531237865e-06, "loss": 0.073, "step": 22974 }, { "epoch": 75.32786885245902, "grad_norm": 2.546649217605591, "learning_rate": 3.026264210359705e-06, "loss": 0.1531, "step": 22975 }, { "epoch": 75.33114754098361, "grad_norm": 2.505842685699463, "learning_rate": 3.0255031869768827e-06, "loss": 0.1823, "step": 22976 }, { "epoch": 75.3344262295082, "grad_norm": 2.1884214878082275, "learning_rate": 3.024742242238763e-06, "loss": 0.0902, "step": 22977 }, { "epoch": 75.33770491803278, "grad_norm": 2.6624701023101807, "learning_rate": 3.0239813761539236e-06, "loss": 0.0918, "step": 22978 }, { "epoch": 75.34098360655737, "grad_norm": 3.3643798828125, "learning_rate": 3.0232205887309507e-06, "loss": 0.1363, "step": 22979 }, { "epoch": 75.34426229508196, "grad_norm": 3.2898857593536377, "learning_rate": 3.0224598799784197e-06, "loss": 0.2366, "step": 22980 }, { "epoch": 75.34754098360656, "grad_norm": 3.479555130004883, "learning_rate": 3.021699249904909e-06, "loss": 0.1274, "step": 22981 }, { "epoch": 75.35081967213115, "grad_norm": 2.8573238849639893, "learning_rate": 3.0209386985189946e-06, "loss": 0.211, "step": 22982 }, { "epoch": 75.35409836065574, "grad_norm": 2.7367775440216064, "learning_rate": 3.02017822582925e-06, "loss": 0.0725, "step": 22983 }, { "epoch": 75.35737704918033, "grad_norm": 4.353555679321289, "learning_rate": 3.0194178318442558e-06, "loss": 0.1957, "step": 22984 }, { "epoch": 75.36065573770492, "grad_norm": 2.932504653930664, "learning_rate": 3.018657516572583e-06, "loss": 0.1327, "step": 22985 }, { "epoch": 75.3639344262295, "grad_norm": 2.2823410034179688, "learning_rate": 3.0178972800228066e-06, "loss": 0.1471, "step": 22986 }, { "epoch": 75.3672131147541, "grad_norm": 3.6783220767974854, "learning_rate": 3.017137122203494e-06, "loss": 0.1528, "step": 22987 }, { "epoch": 75.37049180327868, "grad_norm": 2.105405330657959, "learning_rate": 3.016377043123224e-06, "loss": 0.1961, "step": 22988 }, { "epoch": 75.37377049180328, "grad_norm": 2.5017952919006348, "learning_rate": 3.015617042790564e-06, "loss": 0.1942, "step": 22989 }, { "epoch": 75.37704918032787, "grad_norm": 2.8303163051605225, "learning_rate": 3.014857121214084e-06, "loss": 0.3061, "step": 22990 }, { "epoch": 75.38032786885246, "grad_norm": 3.3993921279907227, "learning_rate": 3.0140972784023493e-06, "loss": 0.1018, "step": 22991 }, { "epoch": 75.38360655737705, "grad_norm": 2.6875181198120117, "learning_rate": 3.0133375143639344e-06, "loss": 0.1398, "step": 22992 }, { "epoch": 75.38688524590164, "grad_norm": 2.7494683265686035, "learning_rate": 3.012577829107404e-06, "loss": 0.0551, "step": 22993 }, { "epoch": 75.39016393442623, "grad_norm": 2.373228073120117, "learning_rate": 3.011818222641323e-06, "loss": 0.0823, "step": 22994 }, { "epoch": 75.39344262295081, "grad_norm": 2.6040611267089844, "learning_rate": 3.011058694974257e-06, "loss": 0.2233, "step": 22995 }, { "epoch": 75.3967213114754, "grad_norm": 2.2236831188201904, "learning_rate": 3.0102992461147685e-06, "loss": 0.1022, "step": 22996 }, { "epoch": 75.4, "grad_norm": 3.145613431930542, "learning_rate": 3.009539876071427e-06, "loss": 0.1332, "step": 22997 }, { "epoch": 75.4032786885246, "grad_norm": 2.3571054935455322, "learning_rate": 3.008780584852791e-06, "loss": 0.1497, "step": 22998 }, { "epoch": 75.40655737704918, "grad_norm": 2.8185362815856934, "learning_rate": 3.0080213724674223e-06, "loss": 0.1324, "step": 22999 }, { "epoch": 75.40983606557377, "grad_norm": 2.5520248413085938, "learning_rate": 3.0072622389238805e-06, "loss": 0.0695, "step": 23000 }, { "epoch": 75.41311475409836, "grad_norm": 3.6819448471069336, "learning_rate": 3.00650318423073e-06, "loss": 0.1269, "step": 23001 }, { "epoch": 75.41639344262295, "grad_norm": 2.581963539123535, "learning_rate": 3.005744208396527e-06, "loss": 0.1389, "step": 23002 }, { "epoch": 75.41967213114754, "grad_norm": 2.2086007595062256, "learning_rate": 3.004985311429832e-06, "loss": 0.0622, "step": 23003 }, { "epoch": 75.42295081967212, "grad_norm": 2.2878334522247314, "learning_rate": 3.0042264933391997e-06, "loss": 0.076, "step": 23004 }, { "epoch": 75.42622950819673, "grad_norm": 2.4290595054626465, "learning_rate": 3.0034677541331835e-06, "loss": 0.1398, "step": 23005 }, { "epoch": 75.42950819672132, "grad_norm": 2.0076398849487305, "learning_rate": 3.0027090938203475e-06, "loss": 0.0411, "step": 23006 }, { "epoch": 75.4327868852459, "grad_norm": 2.4519221782684326, "learning_rate": 3.001950512409241e-06, "loss": 0.1466, "step": 23007 }, { "epoch": 75.43606557377049, "grad_norm": 2.3067820072174072, "learning_rate": 3.001192009908419e-06, "loss": 0.1134, "step": 23008 }, { "epoch": 75.43934426229508, "grad_norm": 5.128652095794678, "learning_rate": 3.00043358632643e-06, "loss": 0.1913, "step": 23009 }, { "epoch": 75.44262295081967, "grad_norm": 2.4121651649475098, "learning_rate": 2.9996752416718345e-06, "loss": 0.0536, "step": 23010 }, { "epoch": 75.44590163934426, "grad_norm": 2.516247034072876, "learning_rate": 2.9989169759531777e-06, "loss": 0.1043, "step": 23011 }, { "epoch": 75.44918032786886, "grad_norm": 3.3410544395446777, "learning_rate": 2.998158789179012e-06, "loss": 0.1236, "step": 23012 }, { "epoch": 75.45245901639345, "grad_norm": 3.491739511489868, "learning_rate": 2.997400681357884e-06, "loss": 0.2515, "step": 23013 }, { "epoch": 75.45573770491804, "grad_norm": 3.7536356449127197, "learning_rate": 2.996642652498346e-06, "loss": 0.186, "step": 23014 }, { "epoch": 75.45901639344262, "grad_norm": 2.830498695373535, "learning_rate": 2.9958847026089444e-06, "loss": 0.0622, "step": 23015 }, { "epoch": 75.46229508196721, "grad_norm": 2.389443874359131, "learning_rate": 2.995126831698224e-06, "loss": 0.1887, "step": 23016 }, { "epoch": 75.4655737704918, "grad_norm": 2.483771324157715, "learning_rate": 2.9943690397747337e-06, "loss": 0.0745, "step": 23017 }, { "epoch": 75.46885245901639, "grad_norm": 3.07869553565979, "learning_rate": 2.9936113268470124e-06, "loss": 0.0534, "step": 23018 }, { "epoch": 75.47213114754098, "grad_norm": 2.682211399078369, "learning_rate": 2.9928536929236106e-06, "loss": 0.1574, "step": 23019 }, { "epoch": 75.47540983606558, "grad_norm": 2.374765396118164, "learning_rate": 2.9920961380130696e-06, "loss": 0.0926, "step": 23020 }, { "epoch": 75.47868852459017, "grad_norm": 2.7756152153015137, "learning_rate": 2.991338662123928e-06, "loss": 0.3122, "step": 23021 }, { "epoch": 75.48196721311476, "grad_norm": 4.487934589385986, "learning_rate": 2.9905812652647337e-06, "loss": 0.2629, "step": 23022 }, { "epoch": 75.48524590163935, "grad_norm": 3.25820255279541, "learning_rate": 2.989823947444024e-06, "loss": 0.1867, "step": 23023 }, { "epoch": 75.48852459016393, "grad_norm": 2.9637887477874756, "learning_rate": 2.989066708670334e-06, "loss": 0.2258, "step": 23024 }, { "epoch": 75.49180327868852, "grad_norm": 2.2059218883514404, "learning_rate": 2.9883095489522096e-06, "loss": 0.051, "step": 23025 }, { "epoch": 75.49508196721311, "grad_norm": 1.9603424072265625, "learning_rate": 2.9875524682981848e-06, "loss": 0.0631, "step": 23026 }, { "epoch": 75.4983606557377, "grad_norm": 3.150179147720337, "learning_rate": 2.9867954667167955e-06, "loss": 0.1778, "step": 23027 }, { "epoch": 75.5016393442623, "grad_norm": 2.918558359146118, "learning_rate": 2.9860385442165807e-06, "loss": 0.2434, "step": 23028 }, { "epoch": 75.50491803278689, "grad_norm": 2.3726601600646973, "learning_rate": 2.9852817008060752e-06, "loss": 0.0836, "step": 23029 }, { "epoch": 75.50819672131148, "grad_norm": 2.7654635906219482, "learning_rate": 2.984524936493811e-06, "loss": 0.1744, "step": 23030 }, { "epoch": 75.51147540983607, "grad_norm": 3.493288040161133, "learning_rate": 2.9837682512883205e-06, "loss": 0.3545, "step": 23031 }, { "epoch": 75.51475409836065, "grad_norm": 1.8508607149124146, "learning_rate": 2.9830116451981408e-06, "loss": 0.0318, "step": 23032 }, { "epoch": 75.51803278688524, "grad_norm": 2.7660958766937256, "learning_rate": 2.9822551182317993e-06, "loss": 0.1258, "step": 23033 }, { "epoch": 75.52131147540983, "grad_norm": 2.443187952041626, "learning_rate": 2.98149867039783e-06, "loss": 0.1332, "step": 23034 }, { "epoch": 75.52459016393442, "grad_norm": 2.6168313026428223, "learning_rate": 2.9807423017047553e-06, "loss": 0.1301, "step": 23035 }, { "epoch": 75.52786885245902, "grad_norm": 2.511592388153076, "learning_rate": 2.9799860121611145e-06, "loss": 0.1501, "step": 23036 }, { "epoch": 75.53114754098361, "grad_norm": 2.617607593536377, "learning_rate": 2.9792298017754296e-06, "loss": 0.0879, "step": 23037 }, { "epoch": 75.5344262295082, "grad_norm": 1.984655499458313, "learning_rate": 2.978473670556228e-06, "loss": 0.049, "step": 23038 }, { "epoch": 75.53770491803279, "grad_norm": 2.3212666511535645, "learning_rate": 2.9777176185120336e-06, "loss": 0.1024, "step": 23039 }, { "epoch": 75.54098360655738, "grad_norm": 2.7770955562591553, "learning_rate": 2.9769616456513774e-06, "loss": 0.1788, "step": 23040 }, { "epoch": 75.54426229508196, "grad_norm": 2.5763795375823975, "learning_rate": 2.976205751982781e-06, "loss": 0.1026, "step": 23041 }, { "epoch": 75.54754098360655, "grad_norm": 2.8400745391845703, "learning_rate": 2.975449937514767e-06, "loss": 0.0906, "step": 23042 }, { "epoch": 75.55081967213114, "grad_norm": 2.2853338718414307, "learning_rate": 2.97469420225586e-06, "loss": 0.1391, "step": 23043 }, { "epoch": 75.55409836065574, "grad_norm": 2.547060489654541, "learning_rate": 2.9739385462145766e-06, "loss": 0.1687, "step": 23044 }, { "epoch": 75.55737704918033, "grad_norm": 2.6026620864868164, "learning_rate": 2.973182969399444e-06, "loss": 0.134, "step": 23045 }, { "epoch": 75.56065573770492, "grad_norm": 2.490532636642456, "learning_rate": 2.9724274718189796e-06, "loss": 0.0586, "step": 23046 }, { "epoch": 75.56393442622951, "grad_norm": 2.253648281097412, "learning_rate": 2.971672053481702e-06, "loss": 0.0513, "step": 23047 }, { "epoch": 75.5672131147541, "grad_norm": 2.5859265327453613, "learning_rate": 2.970916714396128e-06, "loss": 0.1656, "step": 23048 }, { "epoch": 75.57049180327868, "grad_norm": 1.751308798789978, "learning_rate": 2.970161454570778e-06, "loss": 0.0391, "step": 23049 }, { "epoch": 75.57377049180327, "grad_norm": 3.1876583099365234, "learning_rate": 2.969406274014167e-06, "loss": 0.1045, "step": 23050 }, { "epoch": 75.57704918032788, "grad_norm": 2.5284574031829834, "learning_rate": 2.968651172734811e-06, "loss": 0.132, "step": 23051 }, { "epoch": 75.58032786885246, "grad_norm": 1.9234873056411743, "learning_rate": 2.9678961507412205e-06, "loss": 0.0341, "step": 23052 }, { "epoch": 75.58360655737705, "grad_norm": 2.3967082500457764, "learning_rate": 2.967141208041915e-06, "loss": 0.1278, "step": 23053 }, { "epoch": 75.58688524590164, "grad_norm": 1.8965023756027222, "learning_rate": 2.9663863446454053e-06, "loss": 0.063, "step": 23054 }, { "epoch": 75.59016393442623, "grad_norm": 2.8428547382354736, "learning_rate": 2.9656315605602028e-06, "loss": 0.1819, "step": 23055 }, { "epoch": 75.59344262295082, "grad_norm": 1.8891723155975342, "learning_rate": 2.9648768557948182e-06, "loss": 0.0417, "step": 23056 }, { "epoch": 75.5967213114754, "grad_norm": 3.688870429992676, "learning_rate": 2.9641222303577576e-06, "loss": 0.2932, "step": 23057 }, { "epoch": 75.6, "grad_norm": 2.5286924839019775, "learning_rate": 2.9633676842575386e-06, "loss": 0.074, "step": 23058 }, { "epoch": 75.6032786885246, "grad_norm": 2.2219457626342773, "learning_rate": 2.9626132175026656e-06, "loss": 0.0852, "step": 23059 }, { "epoch": 75.60655737704919, "grad_norm": 2.4722015857696533, "learning_rate": 2.9618588301016447e-06, "loss": 0.0709, "step": 23060 }, { "epoch": 75.60983606557377, "grad_norm": 2.6638121604919434, "learning_rate": 2.9611045220629807e-06, "loss": 0.0735, "step": 23061 }, { "epoch": 75.61311475409836, "grad_norm": 1.7251416444778442, "learning_rate": 2.9603502933951846e-06, "loss": 0.1065, "step": 23062 }, { "epoch": 75.61639344262295, "grad_norm": 2.3379714488983154, "learning_rate": 2.959596144106758e-06, "loss": 0.049, "step": 23063 }, { "epoch": 75.61967213114754, "grad_norm": 3.156353712081909, "learning_rate": 2.9588420742062063e-06, "loss": 0.2671, "step": 23064 }, { "epoch": 75.62295081967213, "grad_norm": 2.653042793273926, "learning_rate": 2.9580880837020266e-06, "loss": 0.1002, "step": 23065 }, { "epoch": 75.62622950819672, "grad_norm": 1.671486496925354, "learning_rate": 2.9573341726027295e-06, "loss": 0.0427, "step": 23066 }, { "epoch": 75.62950819672132, "grad_norm": 1.7939543724060059, "learning_rate": 2.9565803409168116e-06, "loss": 0.1028, "step": 23067 }, { "epoch": 75.6327868852459, "grad_norm": 2.5083506107330322, "learning_rate": 2.955826588652775e-06, "loss": 0.1289, "step": 23068 }, { "epoch": 75.6360655737705, "grad_norm": 1.7486132383346558, "learning_rate": 2.955072915819116e-06, "loss": 0.0266, "step": 23069 }, { "epoch": 75.63934426229508, "grad_norm": 2.6291134357452393, "learning_rate": 2.954319322424333e-06, "loss": 0.1027, "step": 23070 }, { "epoch": 75.64262295081967, "grad_norm": 2.131124496459961, "learning_rate": 2.953565808476927e-06, "loss": 0.0891, "step": 23071 }, { "epoch": 75.64590163934426, "grad_norm": 2.167341709136963, "learning_rate": 2.9528123739853943e-06, "loss": 0.0508, "step": 23072 }, { "epoch": 75.64918032786885, "grad_norm": 2.6742441654205322, "learning_rate": 2.9520590189582254e-06, "loss": 0.0926, "step": 23073 }, { "epoch": 75.65245901639344, "grad_norm": 2.4816720485687256, "learning_rate": 2.9513057434039227e-06, "loss": 0.0936, "step": 23074 }, { "epoch": 75.65573770491804, "grad_norm": 2.8389055728912354, "learning_rate": 2.950552547330976e-06, "loss": 0.2232, "step": 23075 }, { "epoch": 75.65901639344263, "grad_norm": 2.305806875228882, "learning_rate": 2.9497994307478763e-06, "loss": 0.0519, "step": 23076 }, { "epoch": 75.66229508196722, "grad_norm": 1.712167501449585, "learning_rate": 2.949046393663121e-06, "loss": 0.1005, "step": 23077 }, { "epoch": 75.6655737704918, "grad_norm": 1.911038875579834, "learning_rate": 2.948293436085199e-06, "loss": 0.1311, "step": 23078 }, { "epoch": 75.66885245901639, "grad_norm": 1.9712413549423218, "learning_rate": 2.9475405580226015e-06, "loss": 0.0446, "step": 23079 }, { "epoch": 75.67213114754098, "grad_norm": 1.868290901184082, "learning_rate": 2.946787759483812e-06, "loss": 0.1337, "step": 23080 }, { "epoch": 75.67540983606557, "grad_norm": 2.9551947116851807, "learning_rate": 2.9460350404773288e-06, "loss": 0.0976, "step": 23081 }, { "epoch": 75.67868852459016, "grad_norm": 2.8934457302093506, "learning_rate": 2.945282401011633e-06, "loss": 0.2101, "step": 23082 }, { "epoch": 75.68196721311476, "grad_norm": 2.5732972621917725, "learning_rate": 2.9445298410952117e-06, "loss": 0.0916, "step": 23083 }, { "epoch": 75.68524590163935, "grad_norm": 2.876560688018799, "learning_rate": 2.943777360736555e-06, "loss": 0.3073, "step": 23084 }, { "epoch": 75.68852459016394, "grad_norm": 1.9943612813949585, "learning_rate": 2.943024959944144e-06, "loss": 0.1188, "step": 23085 }, { "epoch": 75.69180327868852, "grad_norm": 3.0623044967651367, "learning_rate": 2.9422726387264657e-06, "loss": 0.1177, "step": 23086 }, { "epoch": 75.69508196721311, "grad_norm": 3.0444788932800293, "learning_rate": 2.9415203970919983e-06, "loss": 0.1908, "step": 23087 }, { "epoch": 75.6983606557377, "grad_norm": 3.3911428451538086, "learning_rate": 2.9407682350492295e-06, "loss": 0.1041, "step": 23088 }, { "epoch": 75.70163934426229, "grad_norm": 2.4390194416046143, "learning_rate": 2.9400161526066386e-06, "loss": 0.0713, "step": 23089 }, { "epoch": 75.70491803278688, "grad_norm": 3.129181146621704, "learning_rate": 2.939264149772707e-06, "loss": 0.108, "step": 23090 }, { "epoch": 75.70819672131148, "grad_norm": 2.0465593338012695, "learning_rate": 2.9385122265559094e-06, "loss": 0.2808, "step": 23091 }, { "epoch": 75.71147540983607, "grad_norm": 2.5841856002807617, "learning_rate": 2.937760382964733e-06, "loss": 0.2331, "step": 23092 }, { "epoch": 75.71475409836066, "grad_norm": 2.1485838890075684, "learning_rate": 2.9370086190076496e-06, "loss": 0.0315, "step": 23093 }, { "epoch": 75.71803278688525, "grad_norm": 2.2053537368774414, "learning_rate": 2.936256934693139e-06, "loss": 0.0889, "step": 23094 }, { "epoch": 75.72131147540983, "grad_norm": 2.1779704093933105, "learning_rate": 2.9355053300296755e-06, "loss": 0.0609, "step": 23095 }, { "epoch": 75.72459016393442, "grad_norm": 2.7901947498321533, "learning_rate": 2.9347538050257306e-06, "loss": 0.1534, "step": 23096 }, { "epoch": 75.72786885245901, "grad_norm": 2.62947416305542, "learning_rate": 2.934002359689787e-06, "loss": 0.0924, "step": 23097 }, { "epoch": 75.73114754098361, "grad_norm": 2.9029393196105957, "learning_rate": 2.9332509940303134e-06, "loss": 0.204, "step": 23098 }, { "epoch": 75.7344262295082, "grad_norm": 2.8208096027374268, "learning_rate": 2.9324997080557814e-06, "loss": 0.2319, "step": 23099 }, { "epoch": 75.73770491803279, "grad_norm": 2.6728270053863525, "learning_rate": 2.9317485017746615e-06, "loss": 0.1749, "step": 23100 }, { "epoch": 75.74098360655738, "grad_norm": 2.39215350151062, "learning_rate": 2.930997375195429e-06, "loss": 0.0769, "step": 23101 }, { "epoch": 75.74426229508197, "grad_norm": 2.591433048248291, "learning_rate": 2.9302463283265505e-06, "loss": 0.2106, "step": 23102 }, { "epoch": 75.74754098360656, "grad_norm": 2.6484618186950684, "learning_rate": 2.9294953611764963e-06, "loss": 0.1054, "step": 23103 }, { "epoch": 75.75081967213114, "grad_norm": 3.8821096420288086, "learning_rate": 2.9287444737537296e-06, "loss": 0.1266, "step": 23104 }, { "epoch": 75.75409836065573, "grad_norm": 2.5304999351501465, "learning_rate": 2.9279936660667253e-06, "loss": 0.0744, "step": 23105 }, { "epoch": 75.75737704918033, "grad_norm": 2.7048866748809814, "learning_rate": 2.927242938123944e-06, "loss": 0.0553, "step": 23106 }, { "epoch": 75.76065573770492, "grad_norm": 2.3197999000549316, "learning_rate": 2.926492289933853e-06, "loss": 0.0435, "step": 23107 }, { "epoch": 75.76393442622951, "grad_norm": 2.808786630630493, "learning_rate": 2.9257417215049166e-06, "loss": 0.2194, "step": 23108 }, { "epoch": 75.7672131147541, "grad_norm": 2.2961690425872803, "learning_rate": 2.924991232845594e-06, "loss": 0.1752, "step": 23109 }, { "epoch": 75.77049180327869, "grad_norm": 2.706434488296509, "learning_rate": 2.924240823964355e-06, "loss": 0.1473, "step": 23110 }, { "epoch": 75.77377049180328, "grad_norm": 3.4344451427459717, "learning_rate": 2.9234904948696573e-06, "loss": 0.0826, "step": 23111 }, { "epoch": 75.77704918032786, "grad_norm": 2.4670097827911377, "learning_rate": 2.9227402455699627e-06, "loss": 0.1002, "step": 23112 }, { "epoch": 75.78032786885245, "grad_norm": 2.338996171951294, "learning_rate": 2.9219900760737263e-06, "loss": 0.0845, "step": 23113 }, { "epoch": 75.78360655737706, "grad_norm": 2.1944005489349365, "learning_rate": 2.9212399863894146e-06, "loss": 0.0849, "step": 23114 }, { "epoch": 75.78688524590164, "grad_norm": 2.485022783279419, "learning_rate": 2.920489976525482e-06, "loss": 0.1424, "step": 23115 }, { "epoch": 75.79016393442623, "grad_norm": 2.874429225921631, "learning_rate": 2.919740046490386e-06, "loss": 0.2924, "step": 23116 }, { "epoch": 75.79344262295082, "grad_norm": 2.7833003997802734, "learning_rate": 2.918990196292579e-06, "loss": 0.1057, "step": 23117 }, { "epoch": 75.79672131147541, "grad_norm": 2.821868658065796, "learning_rate": 2.918240425940523e-06, "loss": 0.1936, "step": 23118 }, { "epoch": 75.8, "grad_norm": 2.876129627227783, "learning_rate": 2.9174907354426696e-06, "loss": 0.088, "step": 23119 }, { "epoch": 75.80327868852459, "grad_norm": 3.885343074798584, "learning_rate": 2.9167411248074728e-06, "loss": 0.0824, "step": 23120 }, { "epoch": 75.80655737704917, "grad_norm": 1.9799987077713013, "learning_rate": 2.9159915940433837e-06, "loss": 0.0591, "step": 23121 }, { "epoch": 75.80983606557378, "grad_norm": 2.203627586364746, "learning_rate": 2.915242143158852e-06, "loss": 0.0885, "step": 23122 }, { "epoch": 75.81311475409836, "grad_norm": 4.769912242889404, "learning_rate": 2.914492772162335e-06, "loss": 0.3297, "step": 23123 }, { "epoch": 75.81639344262295, "grad_norm": 2.3470990657806396, "learning_rate": 2.9137434810622788e-06, "loss": 0.0406, "step": 23124 }, { "epoch": 75.81967213114754, "grad_norm": 2.4738211631774902, "learning_rate": 2.912994269867132e-06, "loss": 0.1623, "step": 23125 }, { "epoch": 75.82295081967213, "grad_norm": 2.924643039703369, "learning_rate": 2.912245138585341e-06, "loss": 0.2433, "step": 23126 }, { "epoch": 75.82622950819672, "grad_norm": 4.275934219360352, "learning_rate": 2.9114960872253585e-06, "loss": 0.1327, "step": 23127 }, { "epoch": 75.8295081967213, "grad_norm": 3.4465296268463135, "learning_rate": 2.910747115795628e-06, "loss": 0.1228, "step": 23128 }, { "epoch": 75.8327868852459, "grad_norm": 2.6760201454162598, "learning_rate": 2.909998224304592e-06, "loss": 0.1631, "step": 23129 }, { "epoch": 75.8360655737705, "grad_norm": 2.9322805404663086, "learning_rate": 2.9092494127606997e-06, "loss": 0.0948, "step": 23130 }, { "epoch": 75.83934426229509, "grad_norm": 2.5155396461486816, "learning_rate": 2.9085006811723937e-06, "loss": 0.0704, "step": 23131 }, { "epoch": 75.84262295081967, "grad_norm": 2.4587459564208984, "learning_rate": 2.907752029548112e-06, "loss": 0.1492, "step": 23132 }, { "epoch": 75.84590163934426, "grad_norm": 3.2939443588256836, "learning_rate": 2.907003457896305e-06, "loss": 0.2098, "step": 23133 }, { "epoch": 75.84918032786885, "grad_norm": 2.324861764907837, "learning_rate": 2.906254966225407e-06, "loss": 0.0399, "step": 23134 }, { "epoch": 75.85245901639344, "grad_norm": 2.1775879859924316, "learning_rate": 2.9055065545438576e-06, "loss": 0.1043, "step": 23135 }, { "epoch": 75.85573770491803, "grad_norm": 6.586817741394043, "learning_rate": 2.9047582228601014e-06, "loss": 0.1557, "step": 23136 }, { "epoch": 75.85901639344263, "grad_norm": 2.6954572200775146, "learning_rate": 2.904009971182573e-06, "loss": 0.1725, "step": 23137 }, { "epoch": 75.86229508196722, "grad_norm": 1.9976266622543335, "learning_rate": 2.903261799519711e-06, "loss": 0.0463, "step": 23138 }, { "epoch": 75.8655737704918, "grad_norm": 3.184968948364258, "learning_rate": 2.902513707879947e-06, "loss": 0.2762, "step": 23139 }, { "epoch": 75.8688524590164, "grad_norm": 2.675950527191162, "learning_rate": 2.9017656962717235e-06, "loss": 0.0888, "step": 23140 }, { "epoch": 75.87213114754098, "grad_norm": 2.397676944732666, "learning_rate": 2.901017764703473e-06, "loss": 0.2071, "step": 23141 }, { "epoch": 75.87540983606557, "grad_norm": 3.299924850463867, "learning_rate": 2.9002699131836274e-06, "loss": 0.0526, "step": 23142 }, { "epoch": 75.87868852459016, "grad_norm": 3.040417194366455, "learning_rate": 2.8995221417206176e-06, "loss": 0.1468, "step": 23143 }, { "epoch": 75.88196721311475, "grad_norm": 2.933518886566162, "learning_rate": 2.898774450322882e-06, "loss": 0.0805, "step": 23144 }, { "epoch": 75.88524590163935, "grad_norm": 4.701473236083984, "learning_rate": 2.8980268389988477e-06, "loss": 0.3235, "step": 23145 }, { "epoch": 75.88852459016394, "grad_norm": 3.0644662380218506, "learning_rate": 2.897279307756944e-06, "loss": 0.1694, "step": 23146 }, { "epoch": 75.89180327868853, "grad_norm": 1.3135182857513428, "learning_rate": 2.8965318566056024e-06, "loss": 0.0231, "step": 23147 }, { "epoch": 75.89508196721312, "grad_norm": 2.6576316356658936, "learning_rate": 2.8957844855532457e-06, "loss": 0.0856, "step": 23148 }, { "epoch": 75.8983606557377, "grad_norm": 1.766695499420166, "learning_rate": 2.895037194608309e-06, "loss": 0.1159, "step": 23149 }, { "epoch": 75.90163934426229, "grad_norm": 1.7962188720703125, "learning_rate": 2.894289983779215e-06, "loss": 0.0568, "step": 23150 }, { "epoch": 75.90491803278688, "grad_norm": 2.1223983764648438, "learning_rate": 2.8935428530743892e-06, "loss": 0.0768, "step": 23151 }, { "epoch": 75.90819672131147, "grad_norm": 2.4010872840881348, "learning_rate": 2.892795802502254e-06, "loss": 0.1368, "step": 23152 }, { "epoch": 75.91147540983607, "grad_norm": 1.693619728088379, "learning_rate": 2.8920488320712394e-06, "loss": 0.0862, "step": 23153 }, { "epoch": 75.91475409836066, "grad_norm": 2.6689388751983643, "learning_rate": 2.8913019417897637e-06, "loss": 0.0959, "step": 23154 }, { "epoch": 75.91803278688525, "grad_norm": 2.4401557445526123, "learning_rate": 2.8905551316662506e-06, "loss": 0.1901, "step": 23155 }, { "epoch": 75.92131147540984, "grad_norm": 2.2811405658721924, "learning_rate": 2.8898084017091166e-06, "loss": 0.0819, "step": 23156 }, { "epoch": 75.92459016393443, "grad_norm": 2.408294916152954, "learning_rate": 2.8890617519267894e-06, "loss": 0.085, "step": 23157 }, { "epoch": 75.92786885245901, "grad_norm": 2.9094650745391846, "learning_rate": 2.8883151823276833e-06, "loss": 0.2118, "step": 23158 }, { "epoch": 75.9311475409836, "grad_norm": 3.477982521057129, "learning_rate": 2.8875686929202196e-06, "loss": 0.2759, "step": 23159 }, { "epoch": 75.93442622950819, "grad_norm": 3.000842809677124, "learning_rate": 2.886822283712812e-06, "loss": 0.1781, "step": 23160 }, { "epoch": 75.9377049180328, "grad_norm": 2.81583833694458, "learning_rate": 2.8860759547138773e-06, "loss": 0.2239, "step": 23161 }, { "epoch": 75.94098360655738, "grad_norm": 3.0849626064300537, "learning_rate": 2.885329705931835e-06, "loss": 0.1112, "step": 23162 }, { "epoch": 75.94426229508197, "grad_norm": 2.8026010990142822, "learning_rate": 2.8845835373750986e-06, "loss": 0.1595, "step": 23163 }, { "epoch": 75.94754098360656, "grad_norm": 2.735771656036377, "learning_rate": 2.8838374490520803e-06, "loss": 0.1938, "step": 23164 }, { "epoch": 75.95081967213115, "grad_norm": 2.9024312496185303, "learning_rate": 2.883091440971191e-06, "loss": 0.1096, "step": 23165 }, { "epoch": 75.95409836065573, "grad_norm": 2.150871515274048, "learning_rate": 2.8823455131408486e-06, "loss": 0.1468, "step": 23166 }, { "epoch": 75.95737704918032, "grad_norm": 2.283433675765991, "learning_rate": 2.881599665569461e-06, "loss": 0.0723, "step": 23167 }, { "epoch": 75.96065573770491, "grad_norm": 2.684812545776367, "learning_rate": 2.8808538982654378e-06, "loss": 0.1902, "step": 23168 }, { "epoch": 75.96393442622951, "grad_norm": 1.9349859952926636, "learning_rate": 2.880108211237187e-06, "loss": 0.0511, "step": 23169 }, { "epoch": 75.9672131147541, "grad_norm": 3.2852365970611572, "learning_rate": 2.879362604493121e-06, "loss": 0.1201, "step": 23170 }, { "epoch": 75.97049180327869, "grad_norm": 2.7479822635650635, "learning_rate": 2.8786170780416454e-06, "loss": 0.1795, "step": 23171 }, { "epoch": 75.97377049180328, "grad_norm": 2.9690656661987305, "learning_rate": 2.877871631891167e-06, "loss": 0.116, "step": 23172 }, { "epoch": 75.97704918032787, "grad_norm": 2.6067986488342285, "learning_rate": 2.877126266050091e-06, "loss": 0.0385, "step": 23173 }, { "epoch": 75.98032786885246, "grad_norm": 2.2814078330993652, "learning_rate": 2.8763809805268195e-06, "loss": 0.0551, "step": 23174 }, { "epoch": 75.98360655737704, "grad_norm": 1.907675862312317, "learning_rate": 2.8756357753297613e-06, "loss": 0.1413, "step": 23175 }, { "epoch": 75.98688524590163, "grad_norm": 1.9314038753509521, "learning_rate": 2.8748906504673178e-06, "loss": 0.0318, "step": 23176 }, { "epoch": 75.99016393442623, "grad_norm": 2.380526304244995, "learning_rate": 2.874145605947891e-06, "loss": 0.132, "step": 23177 }, { "epoch": 75.99344262295082, "grad_norm": 2.365624189376831, "learning_rate": 2.8734006417798776e-06, "loss": 0.0955, "step": 23178 }, { "epoch": 75.99672131147541, "grad_norm": 3.3104045391082764, "learning_rate": 2.8726557579716852e-06, "loss": 0.1417, "step": 23179 }, { "epoch": 76.0, "grad_norm": 2.6441221237182617, "learning_rate": 2.8719109545317102e-06, "loss": 0.3114, "step": 23180 }, { "epoch": 76.00327868852459, "grad_norm": 2.8144354820251465, "learning_rate": 2.8711662314683496e-06, "loss": 0.1436, "step": 23181 }, { "epoch": 76.00655737704918, "grad_norm": 2.591212034225464, "learning_rate": 2.870421588789999e-06, "loss": 0.2071, "step": 23182 }, { "epoch": 76.00983606557377, "grad_norm": 2.1292097568511963, "learning_rate": 2.869677026505061e-06, "loss": 0.0745, "step": 23183 }, { "epoch": 76.01311475409837, "grad_norm": 3.275726556777954, "learning_rate": 2.8689325446219285e-06, "loss": 0.1527, "step": 23184 }, { "epoch": 76.01639344262296, "grad_norm": 2.466486692428589, "learning_rate": 2.8681881431489933e-06, "loss": 0.179, "step": 23185 }, { "epoch": 76.01967213114754, "grad_norm": 1.832565188407898, "learning_rate": 2.8674438220946544e-06, "loss": 0.1028, "step": 23186 }, { "epoch": 76.02295081967213, "grad_norm": 3.7115626335144043, "learning_rate": 2.8666995814673027e-06, "loss": 0.0691, "step": 23187 }, { "epoch": 76.02622950819672, "grad_norm": 2.2066009044647217, "learning_rate": 2.865955421275327e-06, "loss": 0.0876, "step": 23188 }, { "epoch": 76.02950819672131, "grad_norm": 4.470715045928955, "learning_rate": 2.8652113415271243e-06, "loss": 0.0659, "step": 23189 }, { "epoch": 76.0327868852459, "grad_norm": 2.547619104385376, "learning_rate": 2.864467342231082e-06, "loss": 0.0982, "step": 23190 }, { "epoch": 76.03606557377049, "grad_norm": 2.789390802383423, "learning_rate": 2.863723423395587e-06, "loss": 0.2604, "step": 23191 }, { "epoch": 76.03934426229509, "grad_norm": 1.9236568212509155, "learning_rate": 2.862979585029032e-06, "loss": 0.0489, "step": 23192 }, { "epoch": 76.04262295081968, "grad_norm": 3.3989930152893066, "learning_rate": 2.8622358271398044e-06, "loss": 0.2293, "step": 23193 }, { "epoch": 76.04590163934427, "grad_norm": 2.3170361518859863, "learning_rate": 2.861492149736288e-06, "loss": 0.1246, "step": 23194 }, { "epoch": 76.04918032786885, "grad_norm": 2.5426485538482666, "learning_rate": 2.8607485528268676e-06, "loss": 0.0455, "step": 23195 }, { "epoch": 76.05245901639344, "grad_norm": 2.511303186416626, "learning_rate": 2.860005036419933e-06, "loss": 0.1953, "step": 23196 }, { "epoch": 76.05573770491803, "grad_norm": 2.039039134979248, "learning_rate": 2.859261600523865e-06, "loss": 0.0944, "step": 23197 }, { "epoch": 76.05901639344262, "grad_norm": 8.764710426330566, "learning_rate": 2.8585182451470473e-06, "loss": 0.2854, "step": 23198 }, { "epoch": 76.0622950819672, "grad_norm": 3.6778030395507812, "learning_rate": 2.8577749702978617e-06, "loss": 0.1459, "step": 23199 }, { "epoch": 76.06557377049181, "grad_norm": 2.2637500762939453, "learning_rate": 2.8570317759846865e-06, "loss": 0.0911, "step": 23200 }, { "epoch": 76.0688524590164, "grad_norm": 1.9801647663116455, "learning_rate": 2.856288662215908e-06, "loss": 0.0884, "step": 23201 }, { "epoch": 76.07213114754099, "grad_norm": 2.4703967571258545, "learning_rate": 2.8555456289999016e-06, "loss": 0.2159, "step": 23202 }, { "epoch": 76.07540983606557, "grad_norm": 1.6064752340316772, "learning_rate": 2.8548026763450475e-06, "loss": 0.0368, "step": 23203 }, { "epoch": 76.07868852459016, "grad_norm": 2.315986156463623, "learning_rate": 2.854059804259719e-06, "loss": 0.1213, "step": 23204 }, { "epoch": 76.08196721311475, "grad_norm": 2.145472526550293, "learning_rate": 2.8533170127522992e-06, "loss": 0.0622, "step": 23205 }, { "epoch": 76.08524590163934, "grad_norm": 3.096122980117798, "learning_rate": 2.8525743018311603e-06, "loss": 0.1255, "step": 23206 }, { "epoch": 76.08852459016393, "grad_norm": 3.334876775741577, "learning_rate": 2.851831671504678e-06, "loss": 0.1226, "step": 23207 }, { "epoch": 76.09180327868853, "grad_norm": 2.671416997909546, "learning_rate": 2.851089121781223e-06, "loss": 0.0896, "step": 23208 }, { "epoch": 76.09508196721312, "grad_norm": 1.9853498935699463, "learning_rate": 2.8503466526691737e-06, "loss": 0.1686, "step": 23209 }, { "epoch": 76.09836065573771, "grad_norm": 1.8850512504577637, "learning_rate": 2.8496042641769007e-06, "loss": 0.0518, "step": 23210 }, { "epoch": 76.1016393442623, "grad_norm": 2.4895381927490234, "learning_rate": 2.848861956312773e-06, "loss": 0.2271, "step": 23211 }, { "epoch": 76.10491803278688, "grad_norm": 2.5772693157196045, "learning_rate": 2.8481197290851626e-06, "loss": 0.1222, "step": 23212 }, { "epoch": 76.10819672131147, "grad_norm": 2.3624815940856934, "learning_rate": 2.847377582502435e-06, "loss": 0.1022, "step": 23213 }, { "epoch": 76.11147540983606, "grad_norm": 2.6040518283843994, "learning_rate": 2.8466355165729653e-06, "loss": 0.1072, "step": 23214 }, { "epoch": 76.11475409836065, "grad_norm": 2.5671634674072266, "learning_rate": 2.8458935313051174e-06, "loss": 0.175, "step": 23215 }, { "epoch": 76.11803278688525, "grad_norm": 1.6119956970214844, "learning_rate": 2.845151626707259e-06, "loss": 0.0444, "step": 23216 }, { "epoch": 76.12131147540984, "grad_norm": 2.717890501022339, "learning_rate": 2.844409802787752e-06, "loss": 0.1354, "step": 23217 }, { "epoch": 76.12459016393443, "grad_norm": 1.883766770362854, "learning_rate": 2.843668059554967e-06, "loss": 0.0414, "step": 23218 }, { "epoch": 76.12786885245902, "grad_norm": 1.8719792366027832, "learning_rate": 2.842926397017266e-06, "loss": 0.0473, "step": 23219 }, { "epoch": 76.1311475409836, "grad_norm": 3.004056453704834, "learning_rate": 2.842184815183011e-06, "loss": 0.0961, "step": 23220 }, { "epoch": 76.1344262295082, "grad_norm": 2.4845612049102783, "learning_rate": 2.8414433140605614e-06, "loss": 0.1131, "step": 23221 }, { "epoch": 76.13770491803278, "grad_norm": 2.551712989807129, "learning_rate": 2.840701893658284e-06, "loss": 0.1425, "step": 23222 }, { "epoch": 76.14098360655737, "grad_norm": 2.3661365509033203, "learning_rate": 2.8399605539845376e-06, "loss": 0.1068, "step": 23223 }, { "epoch": 76.14426229508197, "grad_norm": 2.0330328941345215, "learning_rate": 2.8392192950476796e-06, "loss": 0.041, "step": 23224 }, { "epoch": 76.14754098360656, "grad_norm": 2.1867334842681885, "learning_rate": 2.8384781168560693e-06, "loss": 0.0402, "step": 23225 }, { "epoch": 76.15081967213115, "grad_norm": 2.992274761199951, "learning_rate": 2.837737019418062e-06, "loss": 0.2069, "step": 23226 }, { "epoch": 76.15409836065574, "grad_norm": 2.6604061126708984, "learning_rate": 2.8369960027420197e-06, "loss": 0.1067, "step": 23227 }, { "epoch": 76.15737704918033, "grad_norm": 2.029869556427002, "learning_rate": 2.8362550668362952e-06, "loss": 0.0399, "step": 23228 }, { "epoch": 76.16065573770491, "grad_norm": 3.2020387649536133, "learning_rate": 2.8355142117092425e-06, "loss": 0.2286, "step": 23229 }, { "epoch": 76.1639344262295, "grad_norm": 2.338602066040039, "learning_rate": 2.8347734373692137e-06, "loss": 0.0931, "step": 23230 }, { "epoch": 76.1672131147541, "grad_norm": 2.675426483154297, "learning_rate": 2.8340327438245673e-06, "loss": 0.1323, "step": 23231 }, { "epoch": 76.1704918032787, "grad_norm": 3.344789505004883, "learning_rate": 2.833292131083654e-06, "loss": 0.164, "step": 23232 }, { "epoch": 76.17377049180328, "grad_norm": 2.5546793937683105, "learning_rate": 2.832551599154821e-06, "loss": 0.0645, "step": 23233 }, { "epoch": 76.17704918032787, "grad_norm": 2.2965247631073, "learning_rate": 2.8318111480464194e-06, "loss": 0.0408, "step": 23234 }, { "epoch": 76.18032786885246, "grad_norm": 2.429502248764038, "learning_rate": 2.8310707777668025e-06, "loss": 0.0738, "step": 23235 }, { "epoch": 76.18360655737705, "grad_norm": 2.4341766834259033, "learning_rate": 2.8303304883243165e-06, "loss": 0.1255, "step": 23236 }, { "epoch": 76.18688524590164, "grad_norm": 2.925211191177368, "learning_rate": 2.8295902797273057e-06, "loss": 0.1419, "step": 23237 }, { "epoch": 76.19016393442622, "grad_norm": 2.612504720687866, "learning_rate": 2.828850151984124e-06, "loss": 0.1741, "step": 23238 }, { "epoch": 76.19344262295083, "grad_norm": 2.6702888011932373, "learning_rate": 2.8281101051031112e-06, "loss": 0.0831, "step": 23239 }, { "epoch": 76.19672131147541, "grad_norm": 3.048330307006836, "learning_rate": 2.827370139092612e-06, "loss": 0.1564, "step": 23240 }, { "epoch": 76.2, "grad_norm": 2.5055975914001465, "learning_rate": 2.8266302539609747e-06, "loss": 0.0874, "step": 23241 }, { "epoch": 76.20327868852459, "grad_norm": 2.829197883605957, "learning_rate": 2.8258904497165406e-06, "loss": 0.0743, "step": 23242 }, { "epoch": 76.20655737704918, "grad_norm": 3.4235076904296875, "learning_rate": 2.8251507263676503e-06, "loss": 0.0934, "step": 23243 }, { "epoch": 76.20983606557377, "grad_norm": 2.8015904426574707, "learning_rate": 2.8244110839226426e-06, "loss": 0.1332, "step": 23244 }, { "epoch": 76.21311475409836, "grad_norm": 2.147540807723999, "learning_rate": 2.8236715223898626e-06, "loss": 0.0955, "step": 23245 }, { "epoch": 76.21639344262294, "grad_norm": 1.6998974084854126, "learning_rate": 2.8229320417776497e-06, "loss": 0.0952, "step": 23246 }, { "epoch": 76.21967213114755, "grad_norm": 2.3754801750183105, "learning_rate": 2.822192642094336e-06, "loss": 0.1086, "step": 23247 }, { "epoch": 76.22295081967214, "grad_norm": 1.9569000005722046, "learning_rate": 2.8214533233482654e-06, "loss": 0.0415, "step": 23248 }, { "epoch": 76.22622950819672, "grad_norm": 2.2095415592193604, "learning_rate": 2.820714085547774e-06, "loss": 0.1012, "step": 23249 }, { "epoch": 76.22950819672131, "grad_norm": 2.830810546875, "learning_rate": 2.8199749287011957e-06, "loss": 0.2461, "step": 23250 }, { "epoch": 76.2327868852459, "grad_norm": 2.645813226699829, "learning_rate": 2.819235852816865e-06, "loss": 0.1349, "step": 23251 }, { "epoch": 76.23606557377049, "grad_norm": 2.0931811332702637, "learning_rate": 2.8184968579031134e-06, "loss": 0.1169, "step": 23252 }, { "epoch": 76.23934426229508, "grad_norm": 3.044348955154419, "learning_rate": 2.817757943968279e-06, "loss": 0.1416, "step": 23253 }, { "epoch": 76.24262295081967, "grad_norm": 2.0888664722442627, "learning_rate": 2.8170191110206924e-06, "loss": 0.1249, "step": 23254 }, { "epoch": 76.24590163934427, "grad_norm": 2.0261387825012207, "learning_rate": 2.816280359068684e-06, "loss": 0.0564, "step": 23255 }, { "epoch": 76.24918032786886, "grad_norm": 3.138312339782715, "learning_rate": 2.81554168812058e-06, "loss": 0.2175, "step": 23256 }, { "epoch": 76.25245901639344, "grad_norm": 2.7650485038757324, "learning_rate": 2.8148030981847164e-06, "loss": 0.128, "step": 23257 }, { "epoch": 76.25573770491803, "grad_norm": 2.08611798286438, "learning_rate": 2.8140645892694184e-06, "loss": 0.1825, "step": 23258 }, { "epoch": 76.25901639344262, "grad_norm": 2.4520065784454346, "learning_rate": 2.8133261613830145e-06, "loss": 0.0226, "step": 23259 }, { "epoch": 76.26229508196721, "grad_norm": 2.6687941551208496, "learning_rate": 2.812587814533826e-06, "loss": 0.1388, "step": 23260 }, { "epoch": 76.2655737704918, "grad_norm": 2.411297559738159, "learning_rate": 2.8118495487301865e-06, "loss": 0.1797, "step": 23261 }, { "epoch": 76.26885245901639, "grad_norm": 2.6545021533966064, "learning_rate": 2.8111113639804177e-06, "loss": 0.1295, "step": 23262 }, { "epoch": 76.27213114754099, "grad_norm": 1.600890040397644, "learning_rate": 2.8103732602928424e-06, "loss": 0.1214, "step": 23263 }, { "epoch": 76.27540983606558, "grad_norm": 4.0789289474487305, "learning_rate": 2.809635237675784e-06, "loss": 0.2297, "step": 23264 }, { "epoch": 76.27868852459017, "grad_norm": 2.7678041458129883, "learning_rate": 2.8088972961375614e-06, "loss": 0.1583, "step": 23265 }, { "epoch": 76.28196721311475, "grad_norm": 3.262711763381958, "learning_rate": 2.808159435686503e-06, "loss": 0.0704, "step": 23266 }, { "epoch": 76.28524590163934, "grad_norm": 2.499638557434082, "learning_rate": 2.8074216563309233e-06, "loss": 0.2607, "step": 23267 }, { "epoch": 76.28852459016393, "grad_norm": 2.5445284843444824, "learning_rate": 2.8066839580791427e-06, "loss": 0.2375, "step": 23268 }, { "epoch": 76.29180327868852, "grad_norm": 3.9151782989501953, "learning_rate": 2.805946340939476e-06, "loss": 0.063, "step": 23269 }, { "epoch": 76.29508196721312, "grad_norm": 2.716048240661621, "learning_rate": 2.805208804920249e-06, "loss": 0.1484, "step": 23270 }, { "epoch": 76.29836065573771, "grad_norm": 2.234739303588867, "learning_rate": 2.8044713500297716e-06, "loss": 0.0759, "step": 23271 }, { "epoch": 76.3016393442623, "grad_norm": 2.7579681873321533, "learning_rate": 2.8037339762763627e-06, "loss": 0.0822, "step": 23272 }, { "epoch": 76.30491803278689, "grad_norm": 2.223130226135254, "learning_rate": 2.802996683668332e-06, "loss": 0.073, "step": 23273 }, { "epoch": 76.30819672131148, "grad_norm": 2.8069944381713867, "learning_rate": 2.8022594722139997e-06, "loss": 0.2697, "step": 23274 }, { "epoch": 76.31147540983606, "grad_norm": 3.3035361766815186, "learning_rate": 2.8015223419216754e-06, "loss": 0.1625, "step": 23275 }, { "epoch": 76.31475409836065, "grad_norm": 2.9855706691741943, "learning_rate": 2.80078529279967e-06, "loss": 0.0603, "step": 23276 }, { "epoch": 76.31803278688524, "grad_norm": 2.060770034790039, "learning_rate": 2.800048324856298e-06, "loss": 0.0625, "step": 23277 }, { "epoch": 76.32131147540984, "grad_norm": 2.817068576812744, "learning_rate": 2.7993114380998633e-06, "loss": 0.2161, "step": 23278 }, { "epoch": 76.32459016393443, "grad_norm": 2.2854275703430176, "learning_rate": 2.798574632538682e-06, "loss": 0.1595, "step": 23279 }, { "epoch": 76.32786885245902, "grad_norm": 2.855520009994507, "learning_rate": 2.7978379081810592e-06, "loss": 0.079, "step": 23280 }, { "epoch": 76.33114754098361, "grad_norm": 1.6854171752929688, "learning_rate": 2.7971012650353023e-06, "loss": 0.0453, "step": 23281 }, { "epoch": 76.3344262295082, "grad_norm": 2.7684714794158936, "learning_rate": 2.7963647031097153e-06, "loss": 0.2155, "step": 23282 }, { "epoch": 76.33770491803278, "grad_norm": 2.177661895751953, "learning_rate": 2.7956282224126084e-06, "loss": 0.0669, "step": 23283 }, { "epoch": 76.34098360655737, "grad_norm": 2.659466505050659, "learning_rate": 2.7948918229522847e-06, "loss": 0.1485, "step": 23284 }, { "epoch": 76.34426229508196, "grad_norm": 3.346979856491089, "learning_rate": 2.794155504737046e-06, "loss": 0.1761, "step": 23285 }, { "epoch": 76.34754098360656, "grad_norm": 2.333280563354492, "learning_rate": 2.793419267775194e-06, "loss": 0.0943, "step": 23286 }, { "epoch": 76.35081967213115, "grad_norm": 4.036673545837402, "learning_rate": 2.7926831120750362e-06, "loss": 0.1313, "step": 23287 }, { "epoch": 76.35409836065574, "grad_norm": 2.2886881828308105, "learning_rate": 2.791947037644869e-06, "loss": 0.0547, "step": 23288 }, { "epoch": 76.35737704918033, "grad_norm": 3.0134081840515137, "learning_rate": 2.7912110444929942e-06, "loss": 0.097, "step": 23289 }, { "epoch": 76.36065573770492, "grad_norm": 2.5736114978790283, "learning_rate": 2.7904751326277067e-06, "loss": 0.1647, "step": 23290 }, { "epoch": 76.3639344262295, "grad_norm": 2.466968536376953, "learning_rate": 2.7897393020573117e-06, "loss": 0.1396, "step": 23291 }, { "epoch": 76.3672131147541, "grad_norm": 3.189641237258911, "learning_rate": 2.7890035527901027e-06, "loss": 0.123, "step": 23292 }, { "epoch": 76.37049180327868, "grad_norm": 2.4281575679779053, "learning_rate": 2.7882678848343724e-06, "loss": 0.1434, "step": 23293 }, { "epoch": 76.37377049180328, "grad_norm": 1.8769714832305908, "learning_rate": 2.787532298198423e-06, "loss": 0.1367, "step": 23294 }, { "epoch": 76.37704918032787, "grad_norm": 2.6889219284057617, "learning_rate": 2.786796792890547e-06, "loss": 0.1481, "step": 23295 }, { "epoch": 76.38032786885246, "grad_norm": 2.521862506866455, "learning_rate": 2.786061368919034e-06, "loss": 0.2171, "step": 23296 }, { "epoch": 76.38360655737705, "grad_norm": 2.6311800479888916, "learning_rate": 2.7853260262921823e-06, "loss": 0.2887, "step": 23297 }, { "epoch": 76.38688524590164, "grad_norm": 2.6120223999023438, "learning_rate": 2.7845907650182814e-06, "loss": 0.0763, "step": 23298 }, { "epoch": 76.39016393442623, "grad_norm": 2.3896636962890625, "learning_rate": 2.783855585105618e-06, "loss": 0.0976, "step": 23299 }, { "epoch": 76.39344262295081, "grad_norm": 2.886017084121704, "learning_rate": 2.7831204865624897e-06, "loss": 0.1269, "step": 23300 }, { "epoch": 76.3967213114754, "grad_norm": 2.6011486053466797, "learning_rate": 2.782385469397181e-06, "loss": 0.0802, "step": 23301 }, { "epoch": 76.4, "grad_norm": 2.2432029247283936, "learning_rate": 2.78165053361798e-06, "loss": 0.1677, "step": 23302 }, { "epoch": 76.4032786885246, "grad_norm": 1.953948974609375, "learning_rate": 2.7809156792331748e-06, "loss": 0.1697, "step": 23303 }, { "epoch": 76.40655737704918, "grad_norm": 2.552602767944336, "learning_rate": 2.7801809062510488e-06, "loss": 0.1705, "step": 23304 }, { "epoch": 76.40983606557377, "grad_norm": 2.381169319152832, "learning_rate": 2.7794462146798928e-06, "loss": 0.116, "step": 23305 }, { "epoch": 76.41311475409836, "grad_norm": 2.04394268989563, "learning_rate": 2.778711604527988e-06, "loss": 0.1188, "step": 23306 }, { "epoch": 76.41639344262295, "grad_norm": 2.940487861633301, "learning_rate": 2.7779770758036173e-06, "loss": 0.2393, "step": 23307 }, { "epoch": 76.41967213114754, "grad_norm": 2.7131214141845703, "learning_rate": 2.777242628515062e-06, "loss": 0.1693, "step": 23308 }, { "epoch": 76.42295081967212, "grad_norm": 2.786360025405884, "learning_rate": 2.7765082626706097e-06, "loss": 0.1342, "step": 23309 }, { "epoch": 76.42622950819673, "grad_norm": 3.2002642154693604, "learning_rate": 2.7757739782785354e-06, "loss": 0.1057, "step": 23310 }, { "epoch": 76.42950819672132, "grad_norm": 2.3560192584991455, "learning_rate": 2.7750397753471227e-06, "loss": 0.0612, "step": 23311 }, { "epoch": 76.4327868852459, "grad_norm": 3.8269741535186768, "learning_rate": 2.7743056538846437e-06, "loss": 0.1949, "step": 23312 }, { "epoch": 76.43606557377049, "grad_norm": 2.761383533477783, "learning_rate": 2.773571613899385e-06, "loss": 0.1577, "step": 23313 }, { "epoch": 76.43934426229508, "grad_norm": 2.844104766845703, "learning_rate": 2.7728376553996207e-06, "loss": 0.3204, "step": 23314 }, { "epoch": 76.44262295081967, "grad_norm": 2.447052240371704, "learning_rate": 2.7721037783936256e-06, "loss": 0.222, "step": 23315 }, { "epoch": 76.44590163934426, "grad_norm": 2.793217897415161, "learning_rate": 2.7713699828896756e-06, "loss": 0.129, "step": 23316 }, { "epoch": 76.44918032786886, "grad_norm": 1.8590315580368042, "learning_rate": 2.770636268896042e-06, "loss": 0.1009, "step": 23317 }, { "epoch": 76.45245901639345, "grad_norm": 2.097651243209839, "learning_rate": 2.7699026364210048e-06, "loss": 0.0607, "step": 23318 }, { "epoch": 76.45573770491804, "grad_norm": 3.010730743408203, "learning_rate": 2.7691690854728317e-06, "loss": 0.1301, "step": 23319 }, { "epoch": 76.45901639344262, "grad_norm": 3.1797430515289307, "learning_rate": 2.768435616059796e-06, "loss": 0.1894, "step": 23320 }, { "epoch": 76.46229508196721, "grad_norm": 1.3973520994186401, "learning_rate": 2.7677022281901634e-06, "loss": 0.0981, "step": 23321 }, { "epoch": 76.4655737704918, "grad_norm": 3.1574878692626953, "learning_rate": 2.766968921872213e-06, "loss": 0.08, "step": 23322 }, { "epoch": 76.46885245901639, "grad_norm": 3.0219318866729736, "learning_rate": 2.766235697114207e-06, "loss": 0.1161, "step": 23323 }, { "epoch": 76.47213114754098, "grad_norm": 3.107478141784668, "learning_rate": 2.765502553924415e-06, "loss": 0.1233, "step": 23324 }, { "epoch": 76.47540983606558, "grad_norm": 2.3916149139404297, "learning_rate": 2.7647694923111e-06, "loss": 0.1127, "step": 23325 }, { "epoch": 76.47868852459017, "grad_norm": 3.152834415435791, "learning_rate": 2.7640365122825363e-06, "loss": 0.0485, "step": 23326 }, { "epoch": 76.48196721311476, "grad_norm": 2.3879966735839844, "learning_rate": 2.7633036138469836e-06, "loss": 0.1013, "step": 23327 }, { "epoch": 76.48524590163935, "grad_norm": 2.7621726989746094, "learning_rate": 2.762570797012707e-06, "loss": 0.125, "step": 23328 }, { "epoch": 76.48852459016393, "grad_norm": 2.351724624633789, "learning_rate": 2.761838061787969e-06, "loss": 0.1436, "step": 23329 }, { "epoch": 76.49180327868852, "grad_norm": 2.2343029975891113, "learning_rate": 2.7611054081810307e-06, "loss": 0.0962, "step": 23330 }, { "epoch": 76.49508196721311, "grad_norm": 1.4470465183258057, "learning_rate": 2.760372836200158e-06, "loss": 0.0237, "step": 23331 }, { "epoch": 76.4983606557377, "grad_norm": 3.0868124961853027, "learning_rate": 2.7596403458536092e-06, "loss": 0.1618, "step": 23332 }, { "epoch": 76.5016393442623, "grad_norm": 2.1709439754486084, "learning_rate": 2.758907937149643e-06, "loss": 0.0568, "step": 23333 }, { "epoch": 76.50491803278689, "grad_norm": 2.465745687484741, "learning_rate": 2.7581756100965164e-06, "loss": 0.0698, "step": 23334 }, { "epoch": 76.50819672131148, "grad_norm": 2.7028543949127197, "learning_rate": 2.757443364702492e-06, "loss": 0.1679, "step": 23335 }, { "epoch": 76.51147540983607, "grad_norm": 1.5619933605194092, "learning_rate": 2.7567112009758245e-06, "loss": 0.0289, "step": 23336 }, { "epoch": 76.51475409836065, "grad_norm": 3.0284993648529053, "learning_rate": 2.7559791189247688e-06, "loss": 0.1142, "step": 23337 }, { "epoch": 76.51803278688524, "grad_norm": 2.3001623153686523, "learning_rate": 2.7552471185575767e-06, "loss": 0.0663, "step": 23338 }, { "epoch": 76.52131147540983, "grad_norm": 2.388956069946289, "learning_rate": 2.7545151998825103e-06, "loss": 0.0605, "step": 23339 }, { "epoch": 76.52459016393442, "grad_norm": 2.6149396896362305, "learning_rate": 2.753783362907818e-06, "loss": 0.0618, "step": 23340 }, { "epoch": 76.52786885245902, "grad_norm": 2.2941207885742188, "learning_rate": 2.7530516076417522e-06, "loss": 0.1255, "step": 23341 }, { "epoch": 76.53114754098361, "grad_norm": 3.0033202171325684, "learning_rate": 2.752319934092562e-06, "loss": 0.1718, "step": 23342 }, { "epoch": 76.5344262295082, "grad_norm": 2.0652945041656494, "learning_rate": 2.751588342268503e-06, "loss": 0.1638, "step": 23343 }, { "epoch": 76.53770491803279, "grad_norm": 2.4376654624938965, "learning_rate": 2.7508568321778218e-06, "loss": 0.1127, "step": 23344 }, { "epoch": 76.54098360655738, "grad_norm": 2.069866418838501, "learning_rate": 2.750125403828767e-06, "loss": 0.0549, "step": 23345 }, { "epoch": 76.54426229508196, "grad_norm": 2.3256993293762207, "learning_rate": 2.7493940572295843e-06, "loss": 0.1169, "step": 23346 }, { "epoch": 76.54754098360655, "grad_norm": 2.8660318851470947, "learning_rate": 2.7486627923885236e-06, "loss": 0.1892, "step": 23347 }, { "epoch": 76.55081967213114, "grad_norm": 1.9914915561676025, "learning_rate": 2.7479316093138297e-06, "loss": 0.1508, "step": 23348 }, { "epoch": 76.55409836065574, "grad_norm": 3.116715908050537, "learning_rate": 2.7472005080137455e-06, "loss": 0.1733, "step": 23349 }, { "epoch": 76.55737704918033, "grad_norm": 2.771667242050171, "learning_rate": 2.746469488496518e-06, "loss": 0.1086, "step": 23350 }, { "epoch": 76.56065573770492, "grad_norm": 3.1502342224121094, "learning_rate": 2.7457385507703905e-06, "loss": 0.0472, "step": 23351 }, { "epoch": 76.56393442622951, "grad_norm": 2.581336259841919, "learning_rate": 2.745007694843599e-06, "loss": 0.1451, "step": 23352 }, { "epoch": 76.5672131147541, "grad_norm": 2.8021509647369385, "learning_rate": 2.7442769207243926e-06, "loss": 0.1951, "step": 23353 }, { "epoch": 76.57049180327868, "grad_norm": 2.7653372287750244, "learning_rate": 2.743546228421008e-06, "loss": 0.09, "step": 23354 }, { "epoch": 76.57377049180327, "grad_norm": 1.724440336227417, "learning_rate": 2.7428156179416842e-06, "loss": 0.0418, "step": 23355 }, { "epoch": 76.57704918032788, "grad_norm": 2.356717586517334, "learning_rate": 2.742085089294657e-06, "loss": 0.1274, "step": 23356 }, { "epoch": 76.58032786885246, "grad_norm": 2.4007821083068848, "learning_rate": 2.7413546424881698e-06, "loss": 0.0997, "step": 23357 }, { "epoch": 76.58360655737705, "grad_norm": 4.046225547790527, "learning_rate": 2.740624277530456e-06, "loss": 0.1611, "step": 23358 }, { "epoch": 76.58688524590164, "grad_norm": 2.230302095413208, "learning_rate": 2.739893994429751e-06, "loss": 0.0635, "step": 23359 }, { "epoch": 76.59016393442623, "grad_norm": 2.473318338394165, "learning_rate": 2.739163793194287e-06, "loss": 0.0671, "step": 23360 }, { "epoch": 76.59344262295082, "grad_norm": 2.25216007232666, "learning_rate": 2.7384336738323047e-06, "loss": 0.2113, "step": 23361 }, { "epoch": 76.5967213114754, "grad_norm": 2.7438650131225586, "learning_rate": 2.7377036363520317e-06, "loss": 0.0978, "step": 23362 }, { "epoch": 76.6, "grad_norm": 2.270393133163452, "learning_rate": 2.736973680761702e-06, "loss": 0.2007, "step": 23363 }, { "epoch": 76.6032786885246, "grad_norm": 1.7666378021240234, "learning_rate": 2.7362438070695418e-06, "loss": 0.1267, "step": 23364 }, { "epoch": 76.60655737704919, "grad_norm": 3.0860373973846436, "learning_rate": 2.735514015283789e-06, "loss": 0.2528, "step": 23365 }, { "epoch": 76.60983606557377, "grad_norm": 1.8211109638214111, "learning_rate": 2.734784305412668e-06, "loss": 0.0422, "step": 23366 }, { "epoch": 76.61311475409836, "grad_norm": 2.4685826301574707, "learning_rate": 2.7340546774644083e-06, "loss": 0.281, "step": 23367 }, { "epoch": 76.61639344262295, "grad_norm": 1.8236955404281616, "learning_rate": 2.7333251314472363e-06, "loss": 0.0667, "step": 23368 }, { "epoch": 76.61967213114754, "grad_norm": 2.2938284873962402, "learning_rate": 2.7325956673693766e-06, "loss": 0.1011, "step": 23369 }, { "epoch": 76.62295081967213, "grad_norm": 2.309063196182251, "learning_rate": 2.7318662852390586e-06, "loss": 0.1629, "step": 23370 }, { "epoch": 76.62622950819672, "grad_norm": 2.455780506134033, "learning_rate": 2.7311369850645064e-06, "loss": 0.0997, "step": 23371 }, { "epoch": 76.62950819672132, "grad_norm": 3.2072131633758545, "learning_rate": 2.730407766853943e-06, "loss": 0.0751, "step": 23372 }, { "epoch": 76.6327868852459, "grad_norm": 2.40728759765625, "learning_rate": 2.7296786306155864e-06, "loss": 0.2024, "step": 23373 }, { "epoch": 76.6360655737705, "grad_norm": 4.557993412017822, "learning_rate": 2.7289495763576657e-06, "loss": 0.1574, "step": 23374 }, { "epoch": 76.63934426229508, "grad_norm": 2.642287492752075, "learning_rate": 2.7282206040883987e-06, "loss": 0.1717, "step": 23375 }, { "epoch": 76.64262295081967, "grad_norm": 2.6573846340179443, "learning_rate": 2.727491713816005e-06, "loss": 0.1565, "step": 23376 }, { "epoch": 76.64590163934426, "grad_norm": 2.2912285327911377, "learning_rate": 2.726762905548701e-06, "loss": 0.1698, "step": 23377 }, { "epoch": 76.64918032786885, "grad_norm": 2.88192081451416, "learning_rate": 2.7260341792947097e-06, "loss": 0.2215, "step": 23378 }, { "epoch": 76.65245901639344, "grad_norm": 2.117525577545166, "learning_rate": 2.725305535062247e-06, "loss": 0.082, "step": 23379 }, { "epoch": 76.65573770491804, "grad_norm": 2.8818233013153076, "learning_rate": 2.7245769728595284e-06, "loss": 0.1939, "step": 23380 }, { "epoch": 76.65901639344263, "grad_norm": 2.5795421600341797, "learning_rate": 2.7238484926947684e-06, "loss": 0.1075, "step": 23381 }, { "epoch": 76.66229508196722, "grad_norm": 2.660783529281616, "learning_rate": 2.723120094576178e-06, "loss": 0.1319, "step": 23382 }, { "epoch": 76.6655737704918, "grad_norm": 2.5779054164886475, "learning_rate": 2.722391778511979e-06, "loss": 0.1035, "step": 23383 }, { "epoch": 76.66885245901639, "grad_norm": 2.80501127243042, "learning_rate": 2.721663544510379e-06, "loss": 0.0796, "step": 23384 }, { "epoch": 76.67213114754098, "grad_norm": 2.5981249809265137, "learning_rate": 2.72093539257959e-06, "loss": 0.0757, "step": 23385 }, { "epoch": 76.67540983606557, "grad_norm": 2.238036870956421, "learning_rate": 2.72020732272782e-06, "loss": 0.0593, "step": 23386 }, { "epoch": 76.67868852459016, "grad_norm": 2.4681506156921387, "learning_rate": 2.7194793349632854e-06, "loss": 0.143, "step": 23387 }, { "epoch": 76.68196721311476, "grad_norm": 1.9188076257705688, "learning_rate": 2.71875142929419e-06, "loss": 0.2349, "step": 23388 }, { "epoch": 76.68524590163935, "grad_norm": 3.229585647583008, "learning_rate": 2.7180236057287423e-06, "loss": 0.2346, "step": 23389 }, { "epoch": 76.68852459016394, "grad_norm": 2.168210744857788, "learning_rate": 2.717295864275148e-06, "loss": 0.0454, "step": 23390 }, { "epoch": 76.69180327868852, "grad_norm": 3.4541103839874268, "learning_rate": 2.7165682049416175e-06, "loss": 0.0808, "step": 23391 }, { "epoch": 76.69508196721311, "grad_norm": 2.0270702838897705, "learning_rate": 2.7158406277363537e-06, "loss": 0.0591, "step": 23392 }, { "epoch": 76.6983606557377, "grad_norm": 4.086715221405029, "learning_rate": 2.7151131326675596e-06, "loss": 0.14, "step": 23393 }, { "epoch": 76.70163934426229, "grad_norm": 2.518101692199707, "learning_rate": 2.7143857197434397e-06, "loss": 0.0907, "step": 23394 }, { "epoch": 76.70491803278688, "grad_norm": 2.5488390922546387, "learning_rate": 2.7136583889721933e-06, "loss": 0.1698, "step": 23395 }, { "epoch": 76.70819672131148, "grad_norm": 2.082148790359497, "learning_rate": 2.712931140362027e-06, "loss": 0.0875, "step": 23396 }, { "epoch": 76.71147540983607, "grad_norm": 2.5016517639160156, "learning_rate": 2.7122039739211394e-06, "loss": 0.1455, "step": 23397 }, { "epoch": 76.71475409836066, "grad_norm": 2.548816680908203, "learning_rate": 2.711476889657726e-06, "loss": 0.1706, "step": 23398 }, { "epoch": 76.71803278688525, "grad_norm": 2.659006357192993, "learning_rate": 2.7107498875799922e-06, "loss": 0.0612, "step": 23399 }, { "epoch": 76.72131147540983, "grad_norm": 2.8244848251342773, "learning_rate": 2.710022967696132e-06, "loss": 0.0943, "step": 23400 }, { "epoch": 76.72459016393442, "grad_norm": 2.1160717010498047, "learning_rate": 2.7092961300143396e-06, "loss": 0.0565, "step": 23401 }, { "epoch": 76.72786885245901, "grad_norm": 2.651660919189453, "learning_rate": 2.7085693745428167e-06, "loss": 0.1974, "step": 23402 }, { "epoch": 76.73114754098361, "grad_norm": 3.1005847454071045, "learning_rate": 2.7078427012897547e-06, "loss": 0.2934, "step": 23403 }, { "epoch": 76.7344262295082, "grad_norm": 1.8205041885375977, "learning_rate": 2.707116110263346e-06, "loss": 0.0997, "step": 23404 }, { "epoch": 76.73770491803279, "grad_norm": 2.4423272609710693, "learning_rate": 2.7063896014717884e-06, "loss": 0.1843, "step": 23405 }, { "epoch": 76.74098360655738, "grad_norm": 12.172706604003906, "learning_rate": 2.705663174923272e-06, "loss": 0.2301, "step": 23406 }, { "epoch": 76.74426229508197, "grad_norm": 2.2810404300689697, "learning_rate": 2.704936830625987e-06, "loss": 0.0738, "step": 23407 }, { "epoch": 76.74754098360656, "grad_norm": 2.976485252380371, "learning_rate": 2.7042105685881213e-06, "loss": 0.0939, "step": 23408 }, { "epoch": 76.75081967213114, "grad_norm": 2.359849691390991, "learning_rate": 2.7034843888178698e-06, "loss": 0.2358, "step": 23409 }, { "epoch": 76.75409836065573, "grad_norm": 2.243697166442871, "learning_rate": 2.7027582913234186e-06, "loss": 0.0801, "step": 23410 }, { "epoch": 76.75737704918033, "grad_norm": 2.7160532474517822, "learning_rate": 2.702032276112956e-06, "loss": 0.1286, "step": 23411 }, { "epoch": 76.76065573770492, "grad_norm": 2.6230039596557617, "learning_rate": 2.7013063431946627e-06, "loss": 0.1032, "step": 23412 }, { "epoch": 76.76393442622951, "grad_norm": 2.217892646789551, "learning_rate": 2.7005804925767333e-06, "loss": 0.0646, "step": 23413 }, { "epoch": 76.7672131147541, "grad_norm": 2.554497480392456, "learning_rate": 2.699854724267348e-06, "loss": 0.1165, "step": 23414 }, { "epoch": 76.77049180327869, "grad_norm": 2.3326385021209717, "learning_rate": 2.6991290382746905e-06, "loss": 0.1161, "step": 23415 }, { "epoch": 76.77377049180328, "grad_norm": 2.138286828994751, "learning_rate": 2.698403434606942e-06, "loss": 0.0944, "step": 23416 }, { "epoch": 76.77704918032786, "grad_norm": 3.143646001815796, "learning_rate": 2.6976779132722887e-06, "loss": 0.2384, "step": 23417 }, { "epoch": 76.78032786885245, "grad_norm": 2.334660530090332, "learning_rate": 2.69695247427891e-06, "loss": 0.1106, "step": 23418 }, { "epoch": 76.78360655737706, "grad_norm": 2.9515604972839355, "learning_rate": 2.696227117634985e-06, "loss": 0.1587, "step": 23419 }, { "epoch": 76.78688524590164, "grad_norm": 2.1050522327423096, "learning_rate": 2.6955018433486933e-06, "loss": 0.0497, "step": 23420 }, { "epoch": 76.79016393442623, "grad_norm": 1.4929593801498413, "learning_rate": 2.6947766514282104e-06, "loss": 0.0193, "step": 23421 }, { "epoch": 76.79344262295082, "grad_norm": 2.5594403743743896, "learning_rate": 2.6940515418817194e-06, "loss": 0.1657, "step": 23422 }, { "epoch": 76.79672131147541, "grad_norm": 2.5680997371673584, "learning_rate": 2.6933265147173924e-06, "loss": 0.087, "step": 23423 }, { "epoch": 76.8, "grad_norm": 2.7841782569885254, "learning_rate": 2.692601569943407e-06, "loss": 0.157, "step": 23424 }, { "epoch": 76.80327868852459, "grad_norm": 2.4369282722473145, "learning_rate": 2.6918767075679342e-06, "loss": 0.2331, "step": 23425 }, { "epoch": 76.80655737704917, "grad_norm": 1.7458384037017822, "learning_rate": 2.6911519275991517e-06, "loss": 0.1289, "step": 23426 }, { "epoch": 76.80983606557378, "grad_norm": 2.3895986080169678, "learning_rate": 2.6904272300452316e-06, "loss": 0.0799, "step": 23427 }, { "epoch": 76.81311475409836, "grad_norm": 2.0513200759887695, "learning_rate": 2.6897026149143435e-06, "loss": 0.147, "step": 23428 }, { "epoch": 76.81639344262295, "grad_norm": 2.1033968925476074, "learning_rate": 2.6889780822146605e-06, "loss": 0.0881, "step": 23429 }, { "epoch": 76.81967213114754, "grad_norm": 2.9517195224761963, "learning_rate": 2.688253631954347e-06, "loss": 0.1701, "step": 23430 }, { "epoch": 76.82295081967213, "grad_norm": 2.972024917602539, "learning_rate": 2.6875292641415794e-06, "loss": 0.0461, "step": 23431 }, { "epoch": 76.82622950819672, "grad_norm": 2.60837721824646, "learning_rate": 2.686804978784523e-06, "loss": 0.158, "step": 23432 }, { "epoch": 76.8295081967213, "grad_norm": 2.9686601161956787, "learning_rate": 2.6860807758913445e-06, "loss": 0.1317, "step": 23433 }, { "epoch": 76.8327868852459, "grad_norm": 2.112151861190796, "learning_rate": 2.685356655470206e-06, "loss": 0.1084, "step": 23434 }, { "epoch": 76.8360655737705, "grad_norm": 2.629490852355957, "learning_rate": 2.68463261752928e-06, "loss": 0.2369, "step": 23435 }, { "epoch": 76.83934426229509, "grad_norm": 2.2199666500091553, "learning_rate": 2.6839086620767273e-06, "loss": 0.122, "step": 23436 }, { "epoch": 76.84262295081967, "grad_norm": 2.506425619125366, "learning_rate": 2.683184789120711e-06, "loss": 0.0529, "step": 23437 }, { "epoch": 76.84590163934426, "grad_norm": 2.72475528717041, "learning_rate": 2.6824609986693906e-06, "loss": 0.0901, "step": 23438 }, { "epoch": 76.84918032786885, "grad_norm": 2.0937185287475586, "learning_rate": 2.6817372907309336e-06, "loss": 0.0711, "step": 23439 }, { "epoch": 76.85245901639344, "grad_norm": 2.191049098968506, "learning_rate": 2.681013665313499e-06, "loss": 0.0612, "step": 23440 }, { "epoch": 76.85573770491803, "grad_norm": 1.6997507810592651, "learning_rate": 2.6802901224252444e-06, "loss": 0.0307, "step": 23441 }, { "epoch": 76.85901639344263, "grad_norm": 1.9371683597564697, "learning_rate": 2.679566662074329e-06, "loss": 0.1209, "step": 23442 }, { "epoch": 76.86229508196722, "grad_norm": 2.7609524726867676, "learning_rate": 2.678843284268907e-06, "loss": 0.0578, "step": 23443 }, { "epoch": 76.8655737704918, "grad_norm": 2.94964337348938, "learning_rate": 2.6781199890171438e-06, "loss": 0.1323, "step": 23444 }, { "epoch": 76.8688524590164, "grad_norm": 3.111555814743042, "learning_rate": 2.6773967763271903e-06, "loss": 0.0837, "step": 23445 }, { "epoch": 76.87213114754098, "grad_norm": 2.750430107116699, "learning_rate": 2.676673646207201e-06, "loss": 0.102, "step": 23446 }, { "epoch": 76.87540983606557, "grad_norm": 1.9344784021377563, "learning_rate": 2.675950598665328e-06, "loss": 0.0489, "step": 23447 }, { "epoch": 76.87868852459016, "grad_norm": 2.3308093547821045, "learning_rate": 2.6752276337097293e-06, "loss": 0.0636, "step": 23448 }, { "epoch": 76.88196721311475, "grad_norm": 2.604893684387207, "learning_rate": 2.6745047513485557e-06, "loss": 0.1437, "step": 23449 }, { "epoch": 76.88524590163935, "grad_norm": 2.505357503890991, "learning_rate": 2.6737819515899576e-06, "loss": 0.0871, "step": 23450 }, { "epoch": 76.88852459016394, "grad_norm": 2.7228763103485107, "learning_rate": 2.6730592344420826e-06, "loss": 0.2643, "step": 23451 }, { "epoch": 76.89180327868853, "grad_norm": 2.307770013809204, "learning_rate": 2.6723365999130855e-06, "loss": 0.1343, "step": 23452 }, { "epoch": 76.89508196721312, "grad_norm": 2.2466869354248047, "learning_rate": 2.671614048011112e-06, "loss": 0.1339, "step": 23453 }, { "epoch": 76.8983606557377, "grad_norm": 2.303079605102539, "learning_rate": 2.6708915787443068e-06, "loss": 0.0446, "step": 23454 }, { "epoch": 76.90163934426229, "grad_norm": 2.250062942504883, "learning_rate": 2.670169192120823e-06, "loss": 0.1531, "step": 23455 }, { "epoch": 76.90491803278688, "grad_norm": 2.7617557048797607, "learning_rate": 2.669446888148802e-06, "loss": 0.0589, "step": 23456 }, { "epoch": 76.90819672131147, "grad_norm": 2.2505228519439697, "learning_rate": 2.6687246668363865e-06, "loss": 0.1271, "step": 23457 }, { "epoch": 76.91147540983607, "grad_norm": 2.5032050609588623, "learning_rate": 2.6680025281917255e-06, "loss": 0.1312, "step": 23458 }, { "epoch": 76.91475409836066, "grad_norm": 2.5090134143829346, "learning_rate": 2.6672804722229604e-06, "loss": 0.0587, "step": 23459 }, { "epoch": 76.91803278688525, "grad_norm": 2.2760653495788574, "learning_rate": 2.6665584989382288e-06, "loss": 0.0598, "step": 23460 }, { "epoch": 76.92131147540984, "grad_norm": 2.8535232543945312, "learning_rate": 2.6658366083456765e-06, "loss": 0.0829, "step": 23461 }, { "epoch": 76.92459016393443, "grad_norm": 1.7876383066177368, "learning_rate": 2.6651148004534434e-06, "loss": 0.1299, "step": 23462 }, { "epoch": 76.92786885245901, "grad_norm": 1.9905656576156616, "learning_rate": 2.664393075269668e-06, "loss": 0.0491, "step": 23463 }, { "epoch": 76.9311475409836, "grad_norm": 2.0336875915527344, "learning_rate": 2.663671432802484e-06, "loss": 0.0533, "step": 23464 }, { "epoch": 76.93442622950819, "grad_norm": 2.736673593521118, "learning_rate": 2.6629498730600346e-06, "loss": 0.1896, "step": 23465 }, { "epoch": 76.9377049180328, "grad_norm": 3.186551332473755, "learning_rate": 2.6622283960504546e-06, "loss": 0.1149, "step": 23466 }, { "epoch": 76.94098360655738, "grad_norm": 2.105377435684204, "learning_rate": 2.661507001781879e-06, "loss": 0.183, "step": 23467 }, { "epoch": 76.94426229508197, "grad_norm": 2.892740488052368, "learning_rate": 2.6607856902624417e-06, "loss": 0.1361, "step": 23468 }, { "epoch": 76.94754098360656, "grad_norm": 2.665074348449707, "learning_rate": 2.6600644615002745e-06, "loss": 0.1549, "step": 23469 }, { "epoch": 76.95081967213115, "grad_norm": 2.5725743770599365, "learning_rate": 2.6593433155035143e-06, "loss": 0.2311, "step": 23470 }, { "epoch": 76.95409836065573, "grad_norm": 3.1942689418792725, "learning_rate": 2.6586222522802905e-06, "loss": 0.2429, "step": 23471 }, { "epoch": 76.95737704918032, "grad_norm": 2.851715326309204, "learning_rate": 2.657901271838734e-06, "loss": 0.2119, "step": 23472 }, { "epoch": 76.96065573770491, "grad_norm": 2.0502684116363525, "learning_rate": 2.6571803741869727e-06, "loss": 0.0614, "step": 23473 }, { "epoch": 76.96393442622951, "grad_norm": 2.258615493774414, "learning_rate": 2.6564595593331387e-06, "loss": 0.0795, "step": 23474 }, { "epoch": 76.9672131147541, "grad_norm": 2.1576781272888184, "learning_rate": 2.65573882728536e-06, "loss": 0.1927, "step": 23475 }, { "epoch": 76.97049180327869, "grad_norm": 3.0732123851776123, "learning_rate": 2.6550181780517614e-06, "loss": 0.235, "step": 23476 }, { "epoch": 76.97377049180328, "grad_norm": 2.3890819549560547, "learning_rate": 2.6542976116404662e-06, "loss": 0.1585, "step": 23477 }, { "epoch": 76.97704918032787, "grad_norm": 2.965029239654541, "learning_rate": 2.653577128059607e-06, "loss": 0.2392, "step": 23478 }, { "epoch": 76.98032786885246, "grad_norm": 2.7221288681030273, "learning_rate": 2.6528567273173035e-06, "loss": 0.0365, "step": 23479 }, { "epoch": 76.98360655737704, "grad_norm": 2.7432024478912354, "learning_rate": 2.6521364094216795e-06, "loss": 0.1641, "step": 23480 }, { "epoch": 76.98688524590163, "grad_norm": 3.224546194076538, "learning_rate": 2.6514161743808575e-06, "loss": 0.2896, "step": 23481 }, { "epoch": 76.99016393442623, "grad_norm": 2.680906295776367, "learning_rate": 2.6506960222029555e-06, "loss": 0.148, "step": 23482 }, { "epoch": 76.99344262295082, "grad_norm": 2.429548740386963, "learning_rate": 2.6499759528961e-06, "loss": 0.1371, "step": 23483 }, { "epoch": 76.99672131147541, "grad_norm": 2.737682342529297, "learning_rate": 2.6492559664684083e-06, "loss": 0.0857, "step": 23484 }, { "epoch": 77.0, "grad_norm": 2.693408966064453, "learning_rate": 2.648536062927999e-06, "loss": 0.1281, "step": 23485 }, { "epoch": 77.00327868852459, "grad_norm": 2.8501689434051514, "learning_rate": 2.6478162422829845e-06, "loss": 0.1657, "step": 23486 }, { "epoch": 77.00655737704918, "grad_norm": 2.8673694133758545, "learning_rate": 2.647096504541491e-06, "loss": 0.223, "step": 23487 }, { "epoch": 77.00983606557377, "grad_norm": 2.351851463317871, "learning_rate": 2.6463768497116283e-06, "loss": 0.1321, "step": 23488 }, { "epoch": 77.01311475409837, "grad_norm": 2.232050895690918, "learning_rate": 2.6456572778015132e-06, "loss": 0.1429, "step": 23489 }, { "epoch": 77.01639344262296, "grad_norm": 2.360212802886963, "learning_rate": 2.644937788819255e-06, "loss": 0.1226, "step": 23490 }, { "epoch": 77.01967213114754, "grad_norm": 2.534489154815674, "learning_rate": 2.644218382772974e-06, "loss": 0.1741, "step": 23491 }, { "epoch": 77.02295081967213, "grad_norm": 3.0752460956573486, "learning_rate": 2.6434990596707788e-06, "loss": 0.1264, "step": 23492 }, { "epoch": 77.02622950819672, "grad_norm": 2.140078067779541, "learning_rate": 2.642779819520781e-06, "loss": 0.1978, "step": 23493 }, { "epoch": 77.02950819672131, "grad_norm": 3.030512571334839, "learning_rate": 2.6420606623310894e-06, "loss": 0.3835, "step": 23494 }, { "epoch": 77.0327868852459, "grad_norm": 2.7631871700286865, "learning_rate": 2.6413415881098124e-06, "loss": 0.1565, "step": 23495 }, { "epoch": 77.03606557377049, "grad_norm": 3.535224199295044, "learning_rate": 2.6406225968650625e-06, "loss": 0.1086, "step": 23496 }, { "epoch": 77.03934426229509, "grad_norm": 2.316077947616577, "learning_rate": 2.6399036886049443e-06, "loss": 0.051, "step": 23497 }, { "epoch": 77.04262295081968, "grad_norm": 2.4709107875823975, "learning_rate": 2.639184863337565e-06, "loss": 0.1055, "step": 23498 }, { "epoch": 77.04590163934427, "grad_norm": 2.21004319190979, "learning_rate": 2.638466121071027e-06, "loss": 0.125, "step": 23499 }, { "epoch": 77.04918032786885, "grad_norm": 2.4803805351257324, "learning_rate": 2.6377474618134403e-06, "loss": 0.1824, "step": 23500 }, { "epoch": 77.05245901639344, "grad_norm": 2.386221170425415, "learning_rate": 2.6370288855729055e-06, "loss": 0.1065, "step": 23501 }, { "epoch": 77.05573770491803, "grad_norm": 3.0921971797943115, "learning_rate": 2.6363103923575263e-06, "loss": 0.1593, "step": 23502 }, { "epoch": 77.05901639344262, "grad_norm": 2.783268690109253, "learning_rate": 2.6355919821753995e-06, "loss": 0.0969, "step": 23503 }, { "epoch": 77.0622950819672, "grad_norm": 2.405066728591919, "learning_rate": 2.634873655034634e-06, "loss": 0.0462, "step": 23504 }, { "epoch": 77.06557377049181, "grad_norm": 1.6000277996063232, "learning_rate": 2.6341554109433253e-06, "loss": 0.1073, "step": 23505 }, { "epoch": 77.0688524590164, "grad_norm": 2.4614787101745605, "learning_rate": 2.6334372499095706e-06, "loss": 0.1499, "step": 23506 }, { "epoch": 77.07213114754099, "grad_norm": 3.1623377799987793, "learning_rate": 2.6327191719414737e-06, "loss": 0.2241, "step": 23507 }, { "epoch": 77.07540983606557, "grad_norm": 2.4962329864501953, "learning_rate": 2.6320011770471267e-06, "loss": 0.0954, "step": 23508 }, { "epoch": 77.07868852459016, "grad_norm": 2.9626238346099854, "learning_rate": 2.6312832652346276e-06, "loss": 0.2656, "step": 23509 }, { "epoch": 77.08196721311475, "grad_norm": 11.760268211364746, "learning_rate": 2.6305654365120694e-06, "loss": 0.1228, "step": 23510 }, { "epoch": 77.08524590163934, "grad_norm": 2.6457345485687256, "learning_rate": 2.62984769088755e-06, "loss": 0.1354, "step": 23511 }, { "epoch": 77.08852459016393, "grad_norm": 1.9512486457824707, "learning_rate": 2.629130028369161e-06, "loss": 0.0955, "step": 23512 }, { "epoch": 77.09180327868853, "grad_norm": 2.558473587036133, "learning_rate": 2.628412448964992e-06, "loss": 0.0922, "step": 23513 }, { "epoch": 77.09508196721312, "grad_norm": 1.9445719718933105, "learning_rate": 2.6276949526831407e-06, "loss": 0.0851, "step": 23514 }, { "epoch": 77.09836065573771, "grad_norm": 2.258472442626953, "learning_rate": 2.626977539531693e-06, "loss": 0.0901, "step": 23515 }, { "epoch": 77.1016393442623, "grad_norm": 2.6760363578796387, "learning_rate": 2.626260209518737e-06, "loss": 0.1594, "step": 23516 }, { "epoch": 77.10491803278688, "grad_norm": 2.7712366580963135, "learning_rate": 2.6255429626523677e-06, "loss": 0.1888, "step": 23517 }, { "epoch": 77.10819672131147, "grad_norm": 3.0682873725891113, "learning_rate": 2.624825798940668e-06, "loss": 0.1607, "step": 23518 }, { "epoch": 77.11147540983606, "grad_norm": 2.210479974746704, "learning_rate": 2.6241087183917257e-06, "loss": 0.1402, "step": 23519 }, { "epoch": 77.11475409836065, "grad_norm": 4.1317572593688965, "learning_rate": 2.623391721013627e-06, "loss": 0.1942, "step": 23520 }, { "epoch": 77.11803278688525, "grad_norm": 2.777146100997925, "learning_rate": 2.6226748068144537e-06, "loss": 0.1025, "step": 23521 }, { "epoch": 77.12131147540984, "grad_norm": 2.155616283416748, "learning_rate": 2.621957975802295e-06, "loss": 0.1834, "step": 23522 }, { "epoch": 77.12459016393443, "grad_norm": 2.294140338897705, "learning_rate": 2.6212412279852316e-06, "loss": 0.1504, "step": 23523 }, { "epoch": 77.12786885245902, "grad_norm": 2.770622491836548, "learning_rate": 2.620524563371345e-06, "loss": 0.1798, "step": 23524 }, { "epoch": 77.1311475409836, "grad_norm": 1.9440420866012573, "learning_rate": 2.619807981968714e-06, "loss": 0.0912, "step": 23525 }, { "epoch": 77.1344262295082, "grad_norm": 2.6134514808654785, "learning_rate": 2.619091483785424e-06, "loss": 0.1262, "step": 23526 }, { "epoch": 77.13770491803278, "grad_norm": 2.4645116329193115, "learning_rate": 2.618375068829552e-06, "loss": 0.14, "step": 23527 }, { "epoch": 77.14098360655737, "grad_norm": 2.0409164428710938, "learning_rate": 2.6176587371091767e-06, "loss": 0.0593, "step": 23528 }, { "epoch": 77.14426229508197, "grad_norm": 1.9651503562927246, "learning_rate": 2.6169424886323714e-06, "loss": 0.0803, "step": 23529 }, { "epoch": 77.14754098360656, "grad_norm": 3.288844108581543, "learning_rate": 2.616226323407218e-06, "loss": 0.1876, "step": 23530 }, { "epoch": 77.15081967213115, "grad_norm": 2.0460195541381836, "learning_rate": 2.615510241441791e-06, "loss": 0.0902, "step": 23531 }, { "epoch": 77.15409836065574, "grad_norm": 2.571218967437744, "learning_rate": 2.614794242744164e-06, "loss": 0.1805, "step": 23532 }, { "epoch": 77.15737704918033, "grad_norm": 3.130796432495117, "learning_rate": 2.6140783273224103e-06, "loss": 0.1425, "step": 23533 }, { "epoch": 77.16065573770491, "grad_norm": 2.854950189590454, "learning_rate": 2.6133624951846005e-06, "loss": 0.1291, "step": 23534 }, { "epoch": 77.1639344262295, "grad_norm": 3.773313522338867, "learning_rate": 2.6126467463388104e-06, "loss": 0.1009, "step": 23535 }, { "epoch": 77.1672131147541, "grad_norm": 2.985136032104492, "learning_rate": 2.611931080793111e-06, "loss": 0.11, "step": 23536 }, { "epoch": 77.1704918032787, "grad_norm": 2.46681547164917, "learning_rate": 2.6112154985555695e-06, "loss": 0.1601, "step": 23537 }, { "epoch": 77.17377049180328, "grad_norm": 2.1107819080352783, "learning_rate": 2.610499999634252e-06, "loss": 0.1983, "step": 23538 }, { "epoch": 77.17704918032787, "grad_norm": 2.2808375358581543, "learning_rate": 2.609784584037234e-06, "loss": 0.0722, "step": 23539 }, { "epoch": 77.18032786885246, "grad_norm": 1.851128101348877, "learning_rate": 2.609069251772578e-06, "loss": 0.0815, "step": 23540 }, { "epoch": 77.18360655737705, "grad_norm": 3.3633291721343994, "learning_rate": 2.608354002848351e-06, "loss": 0.2891, "step": 23541 }, { "epoch": 77.18688524590164, "grad_norm": 2.555959701538086, "learning_rate": 2.607638837272616e-06, "loss": 0.1649, "step": 23542 }, { "epoch": 77.19016393442622, "grad_norm": 2.0231082439422607, "learning_rate": 2.606923755053441e-06, "loss": 0.1071, "step": 23543 }, { "epoch": 77.19344262295083, "grad_norm": 2.317136526107788, "learning_rate": 2.606208756198888e-06, "loss": 0.0589, "step": 23544 }, { "epoch": 77.19672131147541, "grad_norm": 3.1070146560668945, "learning_rate": 2.6054938407170193e-06, "loss": 0.0963, "step": 23545 }, { "epoch": 77.2, "grad_norm": 2.9548864364624023, "learning_rate": 2.604779008615895e-06, "loss": 0.1168, "step": 23546 }, { "epoch": 77.20327868852459, "grad_norm": 2.231036424636841, "learning_rate": 2.604064259903574e-06, "loss": 0.0998, "step": 23547 }, { "epoch": 77.20655737704918, "grad_norm": 2.1047942638397217, "learning_rate": 2.6033495945881215e-06, "loss": 0.0721, "step": 23548 }, { "epoch": 77.20983606557377, "grad_norm": 1.9402662515640259, "learning_rate": 2.6026350126775923e-06, "loss": 0.0588, "step": 23549 }, { "epoch": 77.21311475409836, "grad_norm": 1.609035849571228, "learning_rate": 2.601920514180045e-06, "loss": 0.0964, "step": 23550 }, { "epoch": 77.21639344262294, "grad_norm": 2.448701858520508, "learning_rate": 2.6012060991035337e-06, "loss": 0.1373, "step": 23551 }, { "epoch": 77.21967213114755, "grad_norm": 3.1196579933166504, "learning_rate": 2.600491767456118e-06, "loss": 0.0729, "step": 23552 }, { "epoch": 77.22295081967214, "grad_norm": 2.9585940837860107, "learning_rate": 2.599777519245853e-06, "loss": 0.1135, "step": 23553 }, { "epoch": 77.22622950819672, "grad_norm": 2.418123960494995, "learning_rate": 2.5990633544807895e-06, "loss": 0.1734, "step": 23554 }, { "epoch": 77.22950819672131, "grad_norm": 1.91825532913208, "learning_rate": 2.5983492731689785e-06, "loss": 0.1535, "step": 23555 }, { "epoch": 77.2327868852459, "grad_norm": 2.5259664058685303, "learning_rate": 2.5976352753184785e-06, "loss": 0.2024, "step": 23556 }, { "epoch": 77.23606557377049, "grad_norm": 2.554985761642456, "learning_rate": 2.5969213609373377e-06, "loss": 0.1693, "step": 23557 }, { "epoch": 77.23934426229508, "grad_norm": 3.1339516639709473, "learning_rate": 2.5962075300336054e-06, "loss": 0.1238, "step": 23558 }, { "epoch": 77.24262295081967, "grad_norm": 2.9381394386291504, "learning_rate": 2.5954937826153293e-06, "loss": 0.1625, "step": 23559 }, { "epoch": 77.24590163934427, "grad_norm": 2.3725931644439697, "learning_rate": 2.5947801186905608e-06, "loss": 0.0889, "step": 23560 }, { "epoch": 77.24918032786886, "grad_norm": 2.353717803955078, "learning_rate": 2.5940665382673467e-06, "loss": 0.1828, "step": 23561 }, { "epoch": 77.25245901639344, "grad_norm": 2.3434152603149414, "learning_rate": 2.59335304135373e-06, "loss": 0.0632, "step": 23562 }, { "epoch": 77.25573770491803, "grad_norm": 2.311765193939209, "learning_rate": 2.5926396279577616e-06, "loss": 0.1525, "step": 23563 }, { "epoch": 77.25901639344262, "grad_norm": 2.5734522342681885, "learning_rate": 2.5919262980874837e-06, "loss": 0.1021, "step": 23564 }, { "epoch": 77.26229508196721, "grad_norm": 2.608585834503174, "learning_rate": 2.591213051750935e-06, "loss": 0.0765, "step": 23565 }, { "epoch": 77.2655737704918, "grad_norm": 2.8365702629089355, "learning_rate": 2.590499888956166e-06, "loss": 0.0765, "step": 23566 }, { "epoch": 77.26885245901639, "grad_norm": 2.77958345413208, "learning_rate": 2.5897868097112143e-06, "loss": 0.2548, "step": 23567 }, { "epoch": 77.27213114754099, "grad_norm": 2.656277656555176, "learning_rate": 2.589073814024119e-06, "loss": 0.2229, "step": 23568 }, { "epoch": 77.27540983606558, "grad_norm": 2.17191481590271, "learning_rate": 2.5883609019029244e-06, "loss": 0.0461, "step": 23569 }, { "epoch": 77.27868852459017, "grad_norm": 2.9043779373168945, "learning_rate": 2.5876480733556664e-06, "loss": 0.1184, "step": 23570 }, { "epoch": 77.28196721311475, "grad_norm": 2.3653106689453125, "learning_rate": 2.586935328390383e-06, "loss": 0.1724, "step": 23571 }, { "epoch": 77.28524590163934, "grad_norm": 2.527712345123291, "learning_rate": 2.5862226670151124e-06, "loss": 0.067, "step": 23572 }, { "epoch": 77.28852459016393, "grad_norm": 2.4643287658691406, "learning_rate": 2.585510089237886e-06, "loss": 0.1831, "step": 23573 }, { "epoch": 77.29180327868852, "grad_norm": 2.2306230068206787, "learning_rate": 2.584797595066746e-06, "loss": 0.0887, "step": 23574 }, { "epoch": 77.29508196721312, "grad_norm": 3.1130409240722656, "learning_rate": 2.5840851845097224e-06, "loss": 0.1159, "step": 23575 }, { "epoch": 77.29836065573771, "grad_norm": 2.607177257537842, "learning_rate": 2.5833728575748497e-06, "loss": 0.1437, "step": 23576 }, { "epoch": 77.3016393442623, "grad_norm": 2.6518096923828125, "learning_rate": 2.582660614270156e-06, "loss": 0.0614, "step": 23577 }, { "epoch": 77.30491803278689, "grad_norm": 2.345367908477783, "learning_rate": 2.5819484546036787e-06, "loss": 0.0623, "step": 23578 }, { "epoch": 77.30819672131148, "grad_norm": 2.214923143386841, "learning_rate": 2.5812363785834448e-06, "loss": 0.1175, "step": 23579 }, { "epoch": 77.31147540983606, "grad_norm": 3.118882179260254, "learning_rate": 2.5805243862174857e-06, "loss": 0.1116, "step": 23580 }, { "epoch": 77.31475409836065, "grad_norm": 2.3869547843933105, "learning_rate": 2.5798124775138243e-06, "loss": 0.0663, "step": 23581 }, { "epoch": 77.31803278688524, "grad_norm": 2.223228693008423, "learning_rate": 2.579100652480496e-06, "loss": 0.109, "step": 23582 }, { "epoch": 77.32131147540984, "grad_norm": 2.9620676040649414, "learning_rate": 2.5783889111255234e-06, "loss": 0.199, "step": 23583 }, { "epoch": 77.32459016393443, "grad_norm": 2.013016939163208, "learning_rate": 2.5776772534569326e-06, "loss": 0.0747, "step": 23584 }, { "epoch": 77.32786885245902, "grad_norm": 2.626887321472168, "learning_rate": 2.576965679482748e-06, "loss": 0.0805, "step": 23585 }, { "epoch": 77.33114754098361, "grad_norm": 3.8854963779449463, "learning_rate": 2.5762541892109904e-06, "loss": 0.0841, "step": 23586 }, { "epoch": 77.3344262295082, "grad_norm": 1.6255488395690918, "learning_rate": 2.575542782649688e-06, "loss": 0.0348, "step": 23587 }, { "epoch": 77.33770491803278, "grad_norm": 2.4285213947296143, "learning_rate": 2.5748314598068613e-06, "loss": 0.4084, "step": 23588 }, { "epoch": 77.34098360655737, "grad_norm": 2.4224295616149902, "learning_rate": 2.5741202206905293e-06, "loss": 0.142, "step": 23589 }, { "epoch": 77.34426229508196, "grad_norm": 2.578563928604126, "learning_rate": 2.5734090653087096e-06, "loss": 0.2131, "step": 23590 }, { "epoch": 77.34754098360656, "grad_norm": 3.0449366569519043, "learning_rate": 2.5726979936694285e-06, "loss": 0.2503, "step": 23591 }, { "epoch": 77.35081967213115, "grad_norm": 2.47055983543396, "learning_rate": 2.5719870057806996e-06, "loss": 0.1051, "step": 23592 }, { "epoch": 77.35409836065574, "grad_norm": 2.8831028938293457, "learning_rate": 2.5712761016505394e-06, "loss": 0.1639, "step": 23593 }, { "epoch": 77.35737704918033, "grad_norm": 2.4865593910217285, "learning_rate": 2.570565281286963e-06, "loss": 0.0939, "step": 23594 }, { "epoch": 77.36065573770492, "grad_norm": 2.5410139560699463, "learning_rate": 2.5698545446979907e-06, "loss": 0.2231, "step": 23595 }, { "epoch": 77.3639344262295, "grad_norm": 2.239656448364258, "learning_rate": 2.5691438918916332e-06, "loss": 0.186, "step": 23596 }, { "epoch": 77.3672131147541, "grad_norm": 2.746612548828125, "learning_rate": 2.568433322875905e-06, "loss": 0.1046, "step": 23597 }, { "epoch": 77.37049180327868, "grad_norm": 2.0801453590393066, "learning_rate": 2.567722837658818e-06, "loss": 0.1252, "step": 23598 }, { "epoch": 77.37377049180328, "grad_norm": 2.261399030685425, "learning_rate": 2.5670124362483805e-06, "loss": 0.114, "step": 23599 }, { "epoch": 77.37704918032787, "grad_norm": 3.042232036590576, "learning_rate": 2.5663021186526094e-06, "loss": 0.227, "step": 23600 }, { "epoch": 77.38032786885246, "grad_norm": 9.74543285369873, "learning_rate": 2.56559188487951e-06, "loss": 0.1631, "step": 23601 }, { "epoch": 77.38360655737705, "grad_norm": 2.4835588932037354, "learning_rate": 2.5648817349370935e-06, "loss": 0.2355, "step": 23602 }, { "epoch": 77.38688524590164, "grad_norm": 2.2586097717285156, "learning_rate": 2.564171668833362e-06, "loss": 0.0902, "step": 23603 }, { "epoch": 77.39016393442623, "grad_norm": 1.828678846359253, "learning_rate": 2.5634616865763295e-06, "loss": 0.0333, "step": 23604 }, { "epoch": 77.39344262295081, "grad_norm": 2.4723544120788574, "learning_rate": 2.5627517881739982e-06, "loss": 0.0821, "step": 23605 }, { "epoch": 77.3967213114754, "grad_norm": 2.4956374168395996, "learning_rate": 2.5620419736343738e-06, "loss": 0.0369, "step": 23606 }, { "epoch": 77.4, "grad_norm": 2.29026198387146, "learning_rate": 2.5613322429654573e-06, "loss": 0.0951, "step": 23607 }, { "epoch": 77.4032786885246, "grad_norm": 2.976715564727783, "learning_rate": 2.560622596175256e-06, "loss": 0.0808, "step": 23608 }, { "epoch": 77.40655737704918, "grad_norm": 2.7349307537078857, "learning_rate": 2.5599130332717705e-06, "loss": 0.1869, "step": 23609 }, { "epoch": 77.40983606557377, "grad_norm": 1.7824243307113647, "learning_rate": 2.559203554263001e-06, "loss": 0.0401, "step": 23610 }, { "epoch": 77.41311475409836, "grad_norm": 2.7358546257019043, "learning_rate": 2.558494159156948e-06, "loss": 0.1708, "step": 23611 }, { "epoch": 77.41639344262295, "grad_norm": 2.6400084495544434, "learning_rate": 2.5577848479616074e-06, "loss": 0.3271, "step": 23612 }, { "epoch": 77.41967213114754, "grad_norm": 3.0840399265289307, "learning_rate": 2.5570756206849834e-06, "loss": 0.1917, "step": 23613 }, { "epoch": 77.42295081967212, "grad_norm": 1.64871346950531, "learning_rate": 2.5563664773350706e-06, "loss": 0.0271, "step": 23614 }, { "epoch": 77.42622950819673, "grad_norm": 2.8992104530334473, "learning_rate": 2.5556574179198625e-06, "loss": 0.0599, "step": 23615 }, { "epoch": 77.42950819672132, "grad_norm": 2.4930388927459717, "learning_rate": 2.55494844244736e-06, "loss": 0.1193, "step": 23616 }, { "epoch": 77.4327868852459, "grad_norm": 2.55788254737854, "learning_rate": 2.5542395509255547e-06, "loss": 0.1547, "step": 23617 }, { "epoch": 77.43606557377049, "grad_norm": 2.620558023452759, "learning_rate": 2.553530743362438e-06, "loss": 0.1634, "step": 23618 }, { "epoch": 77.43934426229508, "grad_norm": 2.1363742351531982, "learning_rate": 2.5528220197660056e-06, "loss": 0.0494, "step": 23619 }, { "epoch": 77.44262295081967, "grad_norm": 2.4798998832702637, "learning_rate": 2.55211338014425e-06, "loss": 0.0825, "step": 23620 }, { "epoch": 77.44590163934426, "grad_norm": 3.768045663833618, "learning_rate": 2.551404824505156e-06, "loss": 0.1531, "step": 23621 }, { "epoch": 77.44918032786886, "grad_norm": 2.177338123321533, "learning_rate": 2.5506963528567208e-06, "loss": 0.0473, "step": 23622 }, { "epoch": 77.45245901639345, "grad_norm": 2.5294060707092285, "learning_rate": 2.5499879652069293e-06, "loss": 0.1268, "step": 23623 }, { "epoch": 77.45573770491804, "grad_norm": 1.9411003589630127, "learning_rate": 2.5492796615637683e-06, "loss": 0.1405, "step": 23624 }, { "epoch": 77.45901639344262, "grad_norm": 1.6713993549346924, "learning_rate": 2.548571441935225e-06, "loss": 0.0777, "step": 23625 }, { "epoch": 77.46229508196721, "grad_norm": 3.0077967643737793, "learning_rate": 2.5478633063292877e-06, "loss": 0.1301, "step": 23626 }, { "epoch": 77.4655737704918, "grad_norm": 1.9858472347259521, "learning_rate": 2.5471552547539403e-06, "loss": 0.0656, "step": 23627 }, { "epoch": 77.46885245901639, "grad_norm": 2.6986141204833984, "learning_rate": 2.5464472872171665e-06, "loss": 0.1726, "step": 23628 }, { "epoch": 77.47213114754098, "grad_norm": 3.349721670150757, "learning_rate": 2.545739403726947e-06, "loss": 0.237, "step": 23629 }, { "epoch": 77.47540983606558, "grad_norm": 2.6936264038085938, "learning_rate": 2.545031604291268e-06, "loss": 0.1191, "step": 23630 }, { "epoch": 77.47868852459017, "grad_norm": 2.255380868911743, "learning_rate": 2.54432388891811e-06, "loss": 0.1628, "step": 23631 }, { "epoch": 77.48196721311476, "grad_norm": 2.2175114154815674, "learning_rate": 2.5436162576154513e-06, "loss": 0.1476, "step": 23632 }, { "epoch": 77.48524590163935, "grad_norm": 2.9023239612579346, "learning_rate": 2.5429087103912685e-06, "loss": 0.1056, "step": 23633 }, { "epoch": 77.48852459016393, "grad_norm": 2.915153980255127, "learning_rate": 2.5422012472535474e-06, "loss": 0.2257, "step": 23634 }, { "epoch": 77.49180327868852, "grad_norm": 2.274153709411621, "learning_rate": 2.5414938682102606e-06, "loss": 0.0838, "step": 23635 }, { "epoch": 77.49508196721311, "grad_norm": 2.5106942653656006, "learning_rate": 2.540786573269386e-06, "loss": 0.0693, "step": 23636 }, { "epoch": 77.4983606557377, "grad_norm": 1.8995124101638794, "learning_rate": 2.5400793624388985e-06, "loss": 0.1051, "step": 23637 }, { "epoch": 77.5016393442623, "grad_norm": 2.237319231033325, "learning_rate": 2.539372235726769e-06, "loss": 0.0434, "step": 23638 }, { "epoch": 77.50491803278689, "grad_norm": 2.161961793899536, "learning_rate": 2.538665193140979e-06, "loss": 0.1993, "step": 23639 }, { "epoch": 77.50819672131148, "grad_norm": 2.2926032543182373, "learning_rate": 2.537958234689496e-06, "loss": 0.0635, "step": 23640 }, { "epoch": 77.51147540983607, "grad_norm": 2.1561803817749023, "learning_rate": 2.5372513603802915e-06, "loss": 0.052, "step": 23641 }, { "epoch": 77.51475409836065, "grad_norm": 2.818032741546631, "learning_rate": 2.5365445702213355e-06, "loss": 0.138, "step": 23642 }, { "epoch": 77.51803278688524, "grad_norm": 3.5207035541534424, "learning_rate": 2.5358378642206017e-06, "loss": 0.1813, "step": 23643 }, { "epoch": 77.52131147540983, "grad_norm": 4.090522766113281, "learning_rate": 2.5351312423860574e-06, "loss": 0.1406, "step": 23644 }, { "epoch": 77.52459016393442, "grad_norm": 2.495041847229004, "learning_rate": 2.5344247047256697e-06, "loss": 0.0655, "step": 23645 }, { "epoch": 77.52786885245902, "grad_norm": 2.5160982608795166, "learning_rate": 2.5337182512474025e-06, "loss": 0.1287, "step": 23646 }, { "epoch": 77.53114754098361, "grad_norm": 2.3316986560821533, "learning_rate": 2.5330118819592275e-06, "loss": 0.0368, "step": 23647 }, { "epoch": 77.5344262295082, "grad_norm": 4.219166278839111, "learning_rate": 2.5323055968691077e-06, "loss": 0.047, "step": 23648 }, { "epoch": 77.53770491803279, "grad_norm": 2.601930618286133, "learning_rate": 2.531599395985007e-06, "loss": 0.0836, "step": 23649 }, { "epoch": 77.54098360655738, "grad_norm": 3.7948005199432373, "learning_rate": 2.530893279314889e-06, "loss": 0.1155, "step": 23650 }, { "epoch": 77.54426229508196, "grad_norm": 2.3389697074890137, "learning_rate": 2.530187246866711e-06, "loss": 0.1923, "step": 23651 }, { "epoch": 77.54754098360655, "grad_norm": 2.4432168006896973, "learning_rate": 2.5294812986484416e-06, "loss": 0.0569, "step": 23652 }, { "epoch": 77.55081967213114, "grad_norm": 2.5557494163513184, "learning_rate": 2.5287754346680382e-06, "loss": 0.0665, "step": 23653 }, { "epoch": 77.55409836065574, "grad_norm": 2.5921173095703125, "learning_rate": 2.5280696549334593e-06, "loss": 0.0624, "step": 23654 }, { "epoch": 77.55737704918033, "grad_norm": 2.220418691635132, "learning_rate": 2.5273639594526624e-06, "loss": 0.1592, "step": 23655 }, { "epoch": 77.56065573770492, "grad_norm": 2.698906660079956, "learning_rate": 2.5266583482336083e-06, "loss": 0.0907, "step": 23656 }, { "epoch": 77.56393442622951, "grad_norm": 2.4584174156188965, "learning_rate": 2.5259528212842523e-06, "loss": 0.0547, "step": 23657 }, { "epoch": 77.5672131147541, "grad_norm": 1.9441405534744263, "learning_rate": 2.5252473786125485e-06, "loss": 0.0766, "step": 23658 }, { "epoch": 77.57049180327868, "grad_norm": 3.6475465297698975, "learning_rate": 2.5245420202264493e-06, "loss": 0.1671, "step": 23659 }, { "epoch": 77.57377049180327, "grad_norm": 2.2943546772003174, "learning_rate": 2.5238367461339155e-06, "loss": 0.1467, "step": 23660 }, { "epoch": 77.57704918032788, "grad_norm": 2.7888553142547607, "learning_rate": 2.523131556342894e-06, "loss": 0.2088, "step": 23661 }, { "epoch": 77.58032786885246, "grad_norm": 3.0546460151672363, "learning_rate": 2.5224264508613395e-06, "loss": 0.1506, "step": 23662 }, { "epoch": 77.58360655737705, "grad_norm": 2.3281874656677246, "learning_rate": 2.5217214296972005e-06, "loss": 0.0758, "step": 23663 }, { "epoch": 77.58688524590164, "grad_norm": 2.288174867630005, "learning_rate": 2.5210164928584257e-06, "loss": 0.1182, "step": 23664 }, { "epoch": 77.59016393442623, "grad_norm": 2.006795644760132, "learning_rate": 2.5203116403529693e-06, "loss": 0.138, "step": 23665 }, { "epoch": 77.59344262295082, "grad_norm": 3.333010196685791, "learning_rate": 2.5196068721887756e-06, "loss": 0.2178, "step": 23666 }, { "epoch": 77.5967213114754, "grad_norm": 2.370941638946533, "learning_rate": 2.5189021883737885e-06, "loss": 0.1243, "step": 23667 }, { "epoch": 77.6, "grad_norm": 1.62144935131073, "learning_rate": 2.5181975889159615e-06, "loss": 0.0268, "step": 23668 }, { "epoch": 77.6032786885246, "grad_norm": 2.6421573162078857, "learning_rate": 2.517493073823235e-06, "loss": 0.1114, "step": 23669 }, { "epoch": 77.60655737704919, "grad_norm": 1.9758691787719727, "learning_rate": 2.5167886431035517e-06, "loss": 0.0688, "step": 23670 }, { "epoch": 77.60983606557377, "grad_norm": 2.1903703212738037, "learning_rate": 2.5160842967648603e-06, "loss": 0.0584, "step": 23671 }, { "epoch": 77.61311475409836, "grad_norm": 2.4762730598449707, "learning_rate": 2.5153800348150993e-06, "loss": 0.0308, "step": 23672 }, { "epoch": 77.61639344262295, "grad_norm": 2.3798937797546387, "learning_rate": 2.51467585726221e-06, "loss": 0.0731, "step": 23673 }, { "epoch": 77.61967213114754, "grad_norm": 2.260453939437866, "learning_rate": 2.5139717641141305e-06, "loss": 0.081, "step": 23674 }, { "epoch": 77.62295081967213, "grad_norm": 2.1832656860351562, "learning_rate": 2.5132677553788064e-06, "loss": 0.0809, "step": 23675 }, { "epoch": 77.62622950819672, "grad_norm": 2.0479023456573486, "learning_rate": 2.512563831064172e-06, "loss": 0.1147, "step": 23676 }, { "epoch": 77.62950819672132, "grad_norm": 3.0624163150787354, "learning_rate": 2.5118599911781626e-06, "loss": 0.1385, "step": 23677 }, { "epoch": 77.6327868852459, "grad_norm": 2.625082015991211, "learning_rate": 2.511156235728721e-06, "loss": 0.171, "step": 23678 }, { "epoch": 77.6360655737705, "grad_norm": 2.299567699432373, "learning_rate": 2.510452564723779e-06, "loss": 0.1307, "step": 23679 }, { "epoch": 77.63934426229508, "grad_norm": 2.429600238800049, "learning_rate": 2.5097489781712704e-06, "loss": 0.1694, "step": 23680 }, { "epoch": 77.64262295081967, "grad_norm": 2.4489078521728516, "learning_rate": 2.5090454760791284e-06, "loss": 0.1363, "step": 23681 }, { "epoch": 77.64590163934426, "grad_norm": 2.7691471576690674, "learning_rate": 2.5083420584552896e-06, "loss": 0.1189, "step": 23682 }, { "epoch": 77.64918032786885, "grad_norm": 2.6193289756774902, "learning_rate": 2.507638725307684e-06, "loss": 0.1589, "step": 23683 }, { "epoch": 77.65245901639344, "grad_norm": 3.027862310409546, "learning_rate": 2.506935476644241e-06, "loss": 0.1675, "step": 23684 }, { "epoch": 77.65573770491804, "grad_norm": 4.136300563812256, "learning_rate": 2.5062323124728893e-06, "loss": 0.183, "step": 23685 }, { "epoch": 77.65901639344263, "grad_norm": 3.2993597984313965, "learning_rate": 2.5055292328015622e-06, "loss": 0.1732, "step": 23686 }, { "epoch": 77.66229508196722, "grad_norm": 2.647826910018921, "learning_rate": 2.504826237638186e-06, "loss": 0.0914, "step": 23687 }, { "epoch": 77.6655737704918, "grad_norm": 3.1528797149658203, "learning_rate": 2.5041233269906863e-06, "loss": 0.1428, "step": 23688 }, { "epoch": 77.66885245901639, "grad_norm": 2.394601345062256, "learning_rate": 2.5034205008669898e-06, "loss": 0.0901, "step": 23689 }, { "epoch": 77.67213114754098, "grad_norm": 2.8534369468688965, "learning_rate": 2.5027177592750184e-06, "loss": 0.1267, "step": 23690 }, { "epoch": 77.67540983606557, "grad_norm": 2.6506552696228027, "learning_rate": 2.5020151022227035e-06, "loss": 0.1825, "step": 23691 }, { "epoch": 77.67868852459016, "grad_norm": 2.3260574340820312, "learning_rate": 2.5013125297179643e-06, "loss": 0.1201, "step": 23692 }, { "epoch": 77.68196721311476, "grad_norm": 2.4366984367370605, "learning_rate": 2.500610041768722e-06, "loss": 0.1256, "step": 23693 }, { "epoch": 77.68524590163935, "grad_norm": 2.7746994495391846, "learning_rate": 2.4999076383828957e-06, "loss": 0.2689, "step": 23694 }, { "epoch": 77.68852459016394, "grad_norm": 2.6964383125305176, "learning_rate": 2.499205319568413e-06, "loss": 0.1563, "step": 23695 }, { "epoch": 77.69180327868852, "grad_norm": 2.472982168197632, "learning_rate": 2.498503085333188e-06, "loss": 0.0865, "step": 23696 }, { "epoch": 77.69508196721311, "grad_norm": 2.7937397956848145, "learning_rate": 2.497800935685142e-06, "loss": 0.0378, "step": 23697 }, { "epoch": 77.6983606557377, "grad_norm": 2.103789806365967, "learning_rate": 2.4970988706321866e-06, "loss": 0.1252, "step": 23698 }, { "epoch": 77.70163934426229, "grad_norm": 2.952877998352051, "learning_rate": 2.4963968901822454e-06, "loss": 0.0652, "step": 23699 }, { "epoch": 77.70491803278688, "grad_norm": 1.6771138906478882, "learning_rate": 2.4956949943432317e-06, "loss": 0.0574, "step": 23700 }, { "epoch": 77.70819672131148, "grad_norm": 1.7881779670715332, "learning_rate": 2.494993183123059e-06, "loss": 0.0261, "step": 23701 }, { "epoch": 77.71147540983607, "grad_norm": 1.862908124923706, "learning_rate": 2.4942914565296416e-06, "loss": 0.0415, "step": 23702 }, { "epoch": 77.71475409836066, "grad_norm": 3.5105812549591064, "learning_rate": 2.493589814570889e-06, "loss": 0.072, "step": 23703 }, { "epoch": 77.71803278688525, "grad_norm": 2.349547863006592, "learning_rate": 2.4928882572547184e-06, "loss": 0.0502, "step": 23704 }, { "epoch": 77.72131147540983, "grad_norm": 2.8967878818511963, "learning_rate": 2.492186784589039e-06, "loss": 0.0968, "step": 23705 }, { "epoch": 77.72459016393442, "grad_norm": 2.169381618499756, "learning_rate": 2.4914853965817598e-06, "loss": 0.1038, "step": 23706 }, { "epoch": 77.72786885245901, "grad_norm": 2.0330851078033447, "learning_rate": 2.4907840932407866e-06, "loss": 0.035, "step": 23707 }, { "epoch": 77.73114754098361, "grad_norm": 1.8835389614105225, "learning_rate": 2.4900828745740326e-06, "loss": 0.0609, "step": 23708 }, { "epoch": 77.7344262295082, "grad_norm": 2.919459342956543, "learning_rate": 2.489381740589403e-06, "loss": 0.134, "step": 23709 }, { "epoch": 77.73770491803279, "grad_norm": 2.3645734786987305, "learning_rate": 2.4886806912948034e-06, "loss": 0.147, "step": 23710 }, { "epoch": 77.74098360655738, "grad_norm": 2.6990063190460205, "learning_rate": 2.4879797266981352e-06, "loss": 0.1126, "step": 23711 }, { "epoch": 77.74426229508197, "grad_norm": 1.9492636919021606, "learning_rate": 2.4872788468073105e-06, "loss": 0.1272, "step": 23712 }, { "epoch": 77.74754098360656, "grad_norm": 2.165595769882202, "learning_rate": 2.4865780516302274e-06, "loss": 0.0967, "step": 23713 }, { "epoch": 77.75081967213114, "grad_norm": 2.041799306869507, "learning_rate": 2.4858773411747883e-06, "loss": 0.0458, "step": 23714 }, { "epoch": 77.75409836065573, "grad_norm": 3.347423553466797, "learning_rate": 2.4851767154488947e-06, "loss": 0.1875, "step": 23715 }, { "epoch": 77.75737704918033, "grad_norm": 2.357426166534424, "learning_rate": 2.4844761744604453e-06, "loss": 0.1106, "step": 23716 }, { "epoch": 77.76065573770492, "grad_norm": 1.8784141540527344, "learning_rate": 2.483775718217344e-06, "loss": 0.1601, "step": 23717 }, { "epoch": 77.76393442622951, "grad_norm": 2.6529674530029297, "learning_rate": 2.483075346727486e-06, "loss": 0.172, "step": 23718 }, { "epoch": 77.7672131147541, "grad_norm": 2.6292173862457275, "learning_rate": 2.4823750599987683e-06, "loss": 0.1452, "step": 23719 }, { "epoch": 77.77049180327869, "grad_norm": 3.158310890197754, "learning_rate": 2.481674858039086e-06, "loss": 0.1736, "step": 23720 }, { "epoch": 77.77377049180328, "grad_norm": 2.592726469039917, "learning_rate": 2.48097474085634e-06, "loss": 0.2661, "step": 23721 }, { "epoch": 77.77704918032786, "grad_norm": 1.710453987121582, "learning_rate": 2.480274708458421e-06, "loss": 0.0535, "step": 23722 }, { "epoch": 77.78032786885245, "grad_norm": 2.3621840476989746, "learning_rate": 2.4795747608532204e-06, "loss": 0.0808, "step": 23723 }, { "epoch": 77.78360655737706, "grad_norm": 2.5312912464141846, "learning_rate": 2.478874898048638e-06, "loss": 0.0583, "step": 23724 }, { "epoch": 77.78688524590164, "grad_norm": 2.190967321395874, "learning_rate": 2.4781751200525595e-06, "loss": 0.2588, "step": 23725 }, { "epoch": 77.79016393442623, "grad_norm": 3.200237512588501, "learning_rate": 2.477475426872874e-06, "loss": 0.0869, "step": 23726 }, { "epoch": 77.79344262295082, "grad_norm": 2.3958914279937744, "learning_rate": 2.4767758185174786e-06, "loss": 0.0845, "step": 23727 }, { "epoch": 77.79672131147541, "grad_norm": 2.417799949645996, "learning_rate": 2.4760762949942575e-06, "loss": 0.1362, "step": 23728 }, { "epoch": 77.8, "grad_norm": 4.87496280670166, "learning_rate": 2.475376856311097e-06, "loss": 0.1, "step": 23729 }, { "epoch": 77.80327868852459, "grad_norm": 2.20648455619812, "learning_rate": 2.4746775024758874e-06, "loss": 0.0552, "step": 23730 }, { "epoch": 77.80655737704917, "grad_norm": 2.7107229232788086, "learning_rate": 2.4739782334965147e-06, "loss": 0.0772, "step": 23731 }, { "epoch": 77.80983606557378, "grad_norm": 2.305783987045288, "learning_rate": 2.4732790493808622e-06, "loss": 0.1288, "step": 23732 }, { "epoch": 77.81311475409836, "grad_norm": 2.6222422122955322, "learning_rate": 2.472579950136811e-06, "loss": 0.1828, "step": 23733 }, { "epoch": 77.81639344262295, "grad_norm": 2.6061389446258545, "learning_rate": 2.4718809357722505e-06, "loss": 0.1587, "step": 23734 }, { "epoch": 77.81967213114754, "grad_norm": 2.979752540588379, "learning_rate": 2.4711820062950596e-06, "loss": 0.2573, "step": 23735 }, { "epoch": 77.82295081967213, "grad_norm": 2.564631700515747, "learning_rate": 2.4704831617131197e-06, "loss": 0.0831, "step": 23736 }, { "epoch": 77.82622950819672, "grad_norm": 2.1377031803131104, "learning_rate": 2.4697844020343087e-06, "loss": 0.0717, "step": 23737 }, { "epoch": 77.8295081967213, "grad_norm": 2.5767624378204346, "learning_rate": 2.4690857272665102e-06, "loss": 0.2214, "step": 23738 }, { "epoch": 77.8327868852459, "grad_norm": 2.262929916381836, "learning_rate": 2.4683871374176006e-06, "loss": 0.0764, "step": 23739 }, { "epoch": 77.8360655737705, "grad_norm": 2.603459596633911, "learning_rate": 2.467688632495456e-06, "loss": 0.1261, "step": 23740 }, { "epoch": 77.83934426229509, "grad_norm": 1.8876696825027466, "learning_rate": 2.466990212507955e-06, "loss": 0.1248, "step": 23741 }, { "epoch": 77.84262295081967, "grad_norm": 2.118034839630127, "learning_rate": 2.4662918774629675e-06, "loss": 0.0683, "step": 23742 }, { "epoch": 77.84590163934426, "grad_norm": 2.2447144985198975, "learning_rate": 2.4655936273683768e-06, "loss": 0.1378, "step": 23743 }, { "epoch": 77.84918032786885, "grad_norm": 2.117203712463379, "learning_rate": 2.46489546223205e-06, "loss": 0.1906, "step": 23744 }, { "epoch": 77.85245901639344, "grad_norm": 2.9764444828033447, "learning_rate": 2.4641973820618627e-06, "loss": 0.0422, "step": 23745 }, { "epoch": 77.85573770491803, "grad_norm": 3.6447484493255615, "learning_rate": 2.463499386865681e-06, "loss": 0.1031, "step": 23746 }, { "epoch": 77.85901639344263, "grad_norm": 2.3774523735046387, "learning_rate": 2.4628014766513842e-06, "loss": 0.0982, "step": 23747 }, { "epoch": 77.86229508196722, "grad_norm": 2.7969369888305664, "learning_rate": 2.462103651426836e-06, "loss": 0.1128, "step": 23748 }, { "epoch": 77.8655737704918, "grad_norm": 2.9412059783935547, "learning_rate": 2.4614059111999076e-06, "loss": 0.194, "step": 23749 }, { "epoch": 77.8688524590164, "grad_norm": 3.565946102142334, "learning_rate": 2.4607082559784624e-06, "loss": 0.1894, "step": 23750 }, { "epoch": 77.87213114754098, "grad_norm": 2.4494240283966064, "learning_rate": 2.4600106857703734e-06, "loss": 0.0436, "step": 23751 }, { "epoch": 77.87540983606557, "grad_norm": 2.6458730697631836, "learning_rate": 2.459313200583504e-06, "loss": 0.0689, "step": 23752 }, { "epoch": 77.87868852459016, "grad_norm": 2.3547611236572266, "learning_rate": 2.4586158004257177e-06, "loss": 0.1542, "step": 23753 }, { "epoch": 77.88196721311475, "grad_norm": 2.7017650604248047, "learning_rate": 2.45791848530488e-06, "loss": 0.1354, "step": 23754 }, { "epoch": 77.88524590163935, "grad_norm": 2.409449338912964, "learning_rate": 2.4572212552288497e-06, "loss": 0.1162, "step": 23755 }, { "epoch": 77.88852459016394, "grad_norm": 2.3982248306274414, "learning_rate": 2.456524110205496e-06, "loss": 0.1252, "step": 23756 }, { "epoch": 77.89180327868853, "grad_norm": 3.971360921859741, "learning_rate": 2.455827050242676e-06, "loss": 0.1543, "step": 23757 }, { "epoch": 77.89508196721312, "grad_norm": 2.2479805946350098, "learning_rate": 2.45513007534825e-06, "loss": 0.0508, "step": 23758 }, { "epoch": 77.8983606557377, "grad_norm": 2.085966110229492, "learning_rate": 2.4544331855300742e-06, "loss": 0.0542, "step": 23759 }, { "epoch": 77.90163934426229, "grad_norm": 2.5334372520446777, "learning_rate": 2.4537363807960125e-06, "loss": 0.0969, "step": 23760 }, { "epoch": 77.90491803278688, "grad_norm": 2.0990922451019287, "learning_rate": 2.453039661153919e-06, "loss": 0.0359, "step": 23761 }, { "epoch": 77.90819672131147, "grad_norm": 3.622523546218872, "learning_rate": 2.45234302661165e-06, "loss": 0.0704, "step": 23762 }, { "epoch": 77.91147540983607, "grad_norm": 2.580789804458618, "learning_rate": 2.4516464771770577e-06, "loss": 0.0879, "step": 23763 }, { "epoch": 77.91475409836066, "grad_norm": 2.4907240867614746, "learning_rate": 2.450950012858003e-06, "loss": 0.1233, "step": 23764 }, { "epoch": 77.91803278688525, "grad_norm": 2.063477039337158, "learning_rate": 2.4502536336623373e-06, "loss": 0.0636, "step": 23765 }, { "epoch": 77.92131147540984, "grad_norm": 3.2431037425994873, "learning_rate": 2.44955733959791e-06, "loss": 0.0858, "step": 23766 }, { "epoch": 77.92459016393443, "grad_norm": 2.9445366859436035, "learning_rate": 2.4488611306725753e-06, "loss": 0.1233, "step": 23767 }, { "epoch": 77.92786885245901, "grad_norm": 2.5112552642822266, "learning_rate": 2.448165006894179e-06, "loss": 0.1718, "step": 23768 }, { "epoch": 77.9311475409836, "grad_norm": 2.301358222961426, "learning_rate": 2.4474689682705766e-06, "loss": 0.0605, "step": 23769 }, { "epoch": 77.93442622950819, "grad_norm": 2.674079656600952, "learning_rate": 2.4467730148096146e-06, "loss": 0.141, "step": 23770 }, { "epoch": 77.9377049180328, "grad_norm": 2.376917600631714, "learning_rate": 2.4460771465191415e-06, "loss": 0.1293, "step": 23771 }, { "epoch": 77.94098360655738, "grad_norm": 3.6487674713134766, "learning_rate": 2.445381363406998e-06, "loss": 0.1938, "step": 23772 }, { "epoch": 77.94426229508197, "grad_norm": 2.4255824089050293, "learning_rate": 2.444685665481038e-06, "loss": 0.076, "step": 23773 }, { "epoch": 77.94754098360656, "grad_norm": 3.4353480339050293, "learning_rate": 2.4439900527491034e-06, "loss": 0.1405, "step": 23774 }, { "epoch": 77.95081967213115, "grad_norm": 2.5785553455352783, "learning_rate": 2.4432945252190367e-06, "loss": 0.0646, "step": 23775 }, { "epoch": 77.95409836065573, "grad_norm": 2.492936611175537, "learning_rate": 2.442599082898678e-06, "loss": 0.1725, "step": 23776 }, { "epoch": 77.95737704918032, "grad_norm": 3.468677043914795, "learning_rate": 2.4419037257958757e-06, "loss": 0.1679, "step": 23777 }, { "epoch": 77.96065573770491, "grad_norm": 3.1793344020843506, "learning_rate": 2.4412084539184667e-06, "loss": 0.1644, "step": 23778 }, { "epoch": 77.96393442622951, "grad_norm": 2.5449609756469727, "learning_rate": 2.4405132672742893e-06, "loss": 0.0906, "step": 23779 }, { "epoch": 77.9672131147541, "grad_norm": 2.6799488067626953, "learning_rate": 2.4398181658711874e-06, "loss": 0.1983, "step": 23780 }, { "epoch": 77.97049180327869, "grad_norm": 1.9411511421203613, "learning_rate": 2.4391231497169965e-06, "loss": 0.0566, "step": 23781 }, { "epoch": 77.97377049180328, "grad_norm": 2.8735437393188477, "learning_rate": 2.438428218819551e-06, "loss": 0.0765, "step": 23782 }, { "epoch": 77.97704918032787, "grad_norm": 2.7616772651672363, "learning_rate": 2.4377333731866914e-06, "loss": 0.2037, "step": 23783 }, { "epoch": 77.98032786885246, "grad_norm": 2.440613269805908, "learning_rate": 2.4370386128262514e-06, "loss": 0.1627, "step": 23784 }, { "epoch": 77.98360655737704, "grad_norm": 2.3490641117095947, "learning_rate": 2.4363439377460606e-06, "loss": 0.1447, "step": 23785 }, { "epoch": 77.98688524590163, "grad_norm": 1.748305082321167, "learning_rate": 2.43564934795396e-06, "loss": 0.0781, "step": 23786 }, { "epoch": 77.99016393442623, "grad_norm": 2.7309584617614746, "learning_rate": 2.4349548434577774e-06, "loss": 0.1205, "step": 23787 }, { "epoch": 77.99344262295082, "grad_norm": 3.5052144527435303, "learning_rate": 2.4342604242653455e-06, "loss": 0.0878, "step": 23788 }, { "epoch": 77.99672131147541, "grad_norm": 2.7249834537506104, "learning_rate": 2.4335660903844893e-06, "loss": 0.218, "step": 23789 }, { "epoch": 78.0, "grad_norm": 2.651291608810425, "learning_rate": 2.432871841823047e-06, "loss": 0.1314, "step": 23790 }, { "epoch": 78.00327868852459, "grad_norm": 1.907983660697937, "learning_rate": 2.432177678588842e-06, "loss": 0.043, "step": 23791 }, { "epoch": 78.00655737704918, "grad_norm": 2.654142379760742, "learning_rate": 2.4314836006897027e-06, "loss": 0.267, "step": 23792 }, { "epoch": 78.00983606557377, "grad_norm": 2.363013744354248, "learning_rate": 2.4307896081334535e-06, "loss": 0.1228, "step": 23793 }, { "epoch": 78.01311475409837, "grad_norm": 2.8394691944122314, "learning_rate": 2.4300957009279203e-06, "loss": 0.0903, "step": 23794 }, { "epoch": 78.01639344262296, "grad_norm": 3.2054941654205322, "learning_rate": 2.4294018790809303e-06, "loss": 0.2714, "step": 23795 }, { "epoch": 78.01967213114754, "grad_norm": 2.4221885204315186, "learning_rate": 2.428708142600307e-06, "loss": 0.1208, "step": 23796 }, { "epoch": 78.02295081967213, "grad_norm": 2.5231199264526367, "learning_rate": 2.4280144914938706e-06, "loss": 0.1684, "step": 23797 }, { "epoch": 78.02622950819672, "grad_norm": 2.117757797241211, "learning_rate": 2.427320925769441e-06, "loss": 0.0631, "step": 23798 }, { "epoch": 78.02950819672131, "grad_norm": 2.3068833351135254, "learning_rate": 2.426627445434845e-06, "loss": 0.0483, "step": 23799 }, { "epoch": 78.0327868852459, "grad_norm": 2.3359317779541016, "learning_rate": 2.425934050497898e-06, "loss": 0.2078, "step": 23800 }, { "epoch": 78.03606557377049, "grad_norm": 3.026165246963501, "learning_rate": 2.42524074096642e-06, "loss": 0.0915, "step": 23801 }, { "epoch": 78.03934426229509, "grad_norm": 2.2181689739227295, "learning_rate": 2.424547516848226e-06, "loss": 0.1625, "step": 23802 }, { "epoch": 78.04262295081968, "grad_norm": 5.417883396148682, "learning_rate": 2.423854378151137e-06, "loss": 0.1714, "step": 23803 }, { "epoch": 78.04590163934427, "grad_norm": 2.1368260383605957, "learning_rate": 2.4231613248829677e-06, "loss": 0.0688, "step": 23804 }, { "epoch": 78.04918032786885, "grad_norm": 2.28548264503479, "learning_rate": 2.4224683570515326e-06, "loss": 0.1286, "step": 23805 }, { "epoch": 78.05245901639344, "grad_norm": 2.7162015438079834, "learning_rate": 2.4217754746646447e-06, "loss": 0.1038, "step": 23806 }, { "epoch": 78.05573770491803, "grad_norm": 2.9040210247039795, "learning_rate": 2.4210826777301154e-06, "loss": 0.1374, "step": 23807 }, { "epoch": 78.05901639344262, "grad_norm": 2.5733566284179688, "learning_rate": 2.420389966255763e-06, "loss": 0.0751, "step": 23808 }, { "epoch": 78.0622950819672, "grad_norm": 3.0815844535827637, "learning_rate": 2.4196973402493927e-06, "loss": 0.158, "step": 23809 }, { "epoch": 78.06557377049181, "grad_norm": 2.208430290222168, "learning_rate": 2.419004799718817e-06, "loss": 0.0854, "step": 23810 }, { "epoch": 78.0688524590164, "grad_norm": 2.6075143814086914, "learning_rate": 2.4183123446718425e-06, "loss": 0.2095, "step": 23811 }, { "epoch": 78.07213114754099, "grad_norm": 1.9823660850524902, "learning_rate": 2.417619975116281e-06, "loss": 0.202, "step": 23812 }, { "epoch": 78.07540983606557, "grad_norm": 2.0277607440948486, "learning_rate": 2.416927691059938e-06, "loss": 0.1209, "step": 23813 }, { "epoch": 78.07868852459016, "grad_norm": 3.2694060802459717, "learning_rate": 2.4162354925106214e-06, "loss": 0.182, "step": 23814 }, { "epoch": 78.08196721311475, "grad_norm": 2.4754021167755127, "learning_rate": 2.4155433794761294e-06, "loss": 0.1523, "step": 23815 }, { "epoch": 78.08524590163934, "grad_norm": 7.464511871337891, "learning_rate": 2.4148513519642756e-06, "loss": 0.0859, "step": 23816 }, { "epoch": 78.08852459016393, "grad_norm": 2.1541948318481445, "learning_rate": 2.4141594099828603e-06, "loss": 0.1371, "step": 23817 }, { "epoch": 78.09180327868853, "grad_norm": 2.5111148357391357, "learning_rate": 2.4134675535396847e-06, "loss": 0.0924, "step": 23818 }, { "epoch": 78.09508196721312, "grad_norm": 1.6247549057006836, "learning_rate": 2.412775782642549e-06, "loss": 0.0265, "step": 23819 }, { "epoch": 78.09836065573771, "grad_norm": 2.4542291164398193, "learning_rate": 2.4120840972992533e-06, "loss": 0.1052, "step": 23820 }, { "epoch": 78.1016393442623, "grad_norm": 3.5971219539642334, "learning_rate": 2.4113924975176016e-06, "loss": 0.2359, "step": 23821 }, { "epoch": 78.10491803278688, "grad_norm": 2.6203219890594482, "learning_rate": 2.4107009833053896e-06, "loss": 0.0774, "step": 23822 }, { "epoch": 78.10819672131147, "grad_norm": 5.776360034942627, "learning_rate": 2.410009554670415e-06, "loss": 0.1848, "step": 23823 }, { "epoch": 78.11147540983606, "grad_norm": 2.446908473968506, "learning_rate": 2.4093182116204716e-06, "loss": 0.0683, "step": 23824 }, { "epoch": 78.11475409836065, "grad_norm": 2.9514262676239014, "learning_rate": 2.40862695416336e-06, "loss": 0.2372, "step": 23825 }, { "epoch": 78.11803278688525, "grad_norm": 1.917506217956543, "learning_rate": 2.4079357823068727e-06, "loss": 0.0425, "step": 23826 }, { "epoch": 78.12131147540984, "grad_norm": 2.8591885566711426, "learning_rate": 2.4072446960588035e-06, "loss": 0.0642, "step": 23827 }, { "epoch": 78.12459016393443, "grad_norm": 2.565061092376709, "learning_rate": 2.406553695426942e-06, "loss": 0.1493, "step": 23828 }, { "epoch": 78.12786885245902, "grad_norm": 4.426124095916748, "learning_rate": 2.405862780419085e-06, "loss": 0.0634, "step": 23829 }, { "epoch": 78.1311475409836, "grad_norm": 2.434124708175659, "learning_rate": 2.4051719510430215e-06, "loss": 0.0536, "step": 23830 }, { "epoch": 78.1344262295082, "grad_norm": 3.0855331420898438, "learning_rate": 2.4044812073065384e-06, "loss": 0.1209, "step": 23831 }, { "epoch": 78.13770491803278, "grad_norm": 2.9523837566375732, "learning_rate": 2.4037905492174296e-06, "loss": 0.2118, "step": 23832 }, { "epoch": 78.14098360655737, "grad_norm": 1.7343482971191406, "learning_rate": 2.4030999767834807e-06, "loss": 0.0587, "step": 23833 }, { "epoch": 78.14426229508197, "grad_norm": 1.9896659851074219, "learning_rate": 2.402409490012475e-06, "loss": 0.1049, "step": 23834 }, { "epoch": 78.14754098360656, "grad_norm": 2.1192703247070312, "learning_rate": 2.4017190889122045e-06, "loss": 0.0652, "step": 23835 }, { "epoch": 78.15081967213115, "grad_norm": 2.456305742263794, "learning_rate": 2.4010287734904525e-06, "loss": 0.0595, "step": 23836 }, { "epoch": 78.15409836065574, "grad_norm": 2.3255579471588135, "learning_rate": 2.400338543755001e-06, "loss": 0.0826, "step": 23837 }, { "epoch": 78.15737704918033, "grad_norm": 1.8984938859939575, "learning_rate": 2.3996483997136322e-06, "loss": 0.1547, "step": 23838 }, { "epoch": 78.16065573770491, "grad_norm": 2.3041341304779053, "learning_rate": 2.3989583413741325e-06, "loss": 0.1683, "step": 23839 }, { "epoch": 78.1639344262295, "grad_norm": 2.5032312870025635, "learning_rate": 2.39826836874428e-06, "loss": 0.1732, "step": 23840 }, { "epoch": 78.1672131147541, "grad_norm": 2.294009208679199, "learning_rate": 2.3975784818318536e-06, "loss": 0.2201, "step": 23841 }, { "epoch": 78.1704918032787, "grad_norm": 1.930098533630371, "learning_rate": 2.396888680644637e-06, "loss": 0.0536, "step": 23842 }, { "epoch": 78.17377049180328, "grad_norm": 2.9609556198120117, "learning_rate": 2.396198965190405e-06, "loss": 0.1833, "step": 23843 }, { "epoch": 78.17704918032787, "grad_norm": 1.7930912971496582, "learning_rate": 2.3955093354769366e-06, "loss": 0.0418, "step": 23844 }, { "epoch": 78.18032786885246, "grad_norm": 2.378875732421875, "learning_rate": 2.3948197915120063e-06, "loss": 0.0644, "step": 23845 }, { "epoch": 78.18360655737705, "grad_norm": 2.1069469451904297, "learning_rate": 2.394130333303387e-06, "loss": 0.1441, "step": 23846 }, { "epoch": 78.18688524590164, "grad_norm": 2.40574049949646, "learning_rate": 2.3934409608588607e-06, "loss": 0.0583, "step": 23847 }, { "epoch": 78.19016393442622, "grad_norm": 2.17353892326355, "learning_rate": 2.3927516741861956e-06, "loss": 0.0442, "step": 23848 }, { "epoch": 78.19344262295083, "grad_norm": 2.186323881149292, "learning_rate": 2.3920624732931654e-06, "loss": 0.0566, "step": 23849 }, { "epoch": 78.19672131147541, "grad_norm": 2.088393449783325, "learning_rate": 2.3913733581875376e-06, "loss": 0.0894, "step": 23850 }, { "epoch": 78.2, "grad_norm": 2.659280300140381, "learning_rate": 2.390684328877089e-06, "loss": 0.1164, "step": 23851 }, { "epoch": 78.20327868852459, "grad_norm": 3.4510903358459473, "learning_rate": 2.389995385369587e-06, "loss": 0.1914, "step": 23852 }, { "epoch": 78.20655737704918, "grad_norm": 2.7063071727752686, "learning_rate": 2.3893065276727976e-06, "loss": 0.1684, "step": 23853 }, { "epoch": 78.20983606557377, "grad_norm": 2.121272087097168, "learning_rate": 2.388617755794489e-06, "loss": 0.0538, "step": 23854 }, { "epoch": 78.21311475409836, "grad_norm": 1.8684731721878052, "learning_rate": 2.387929069742432e-06, "loss": 0.1117, "step": 23855 }, { "epoch": 78.21639344262294, "grad_norm": 3.524268388748169, "learning_rate": 2.3872404695243887e-06, "loss": 0.166, "step": 23856 }, { "epoch": 78.21967213114755, "grad_norm": 2.0605554580688477, "learning_rate": 2.3865519551481254e-06, "loss": 0.0907, "step": 23857 }, { "epoch": 78.22295081967214, "grad_norm": 2.5159895420074463, "learning_rate": 2.3858635266214036e-06, "loss": 0.0679, "step": 23858 }, { "epoch": 78.22622950819672, "grad_norm": 3.102335214614868, "learning_rate": 2.3851751839519853e-06, "loss": 0.1206, "step": 23859 }, { "epoch": 78.22950819672131, "grad_norm": 2.795060157775879, "learning_rate": 2.384486927147637e-06, "loss": 0.1345, "step": 23860 }, { "epoch": 78.2327868852459, "grad_norm": 2.1645586490631104, "learning_rate": 2.383798756216116e-06, "loss": 0.0807, "step": 23861 }, { "epoch": 78.23606557377049, "grad_norm": 4.558911323547363, "learning_rate": 2.383110671165183e-06, "loss": 0.1008, "step": 23862 }, { "epoch": 78.23934426229508, "grad_norm": 2.361417770385742, "learning_rate": 2.3824226720025944e-06, "loss": 0.0573, "step": 23863 }, { "epoch": 78.24262295081967, "grad_norm": 3.1306874752044678, "learning_rate": 2.381734758736113e-06, "loss": 0.107, "step": 23864 }, { "epoch": 78.24590163934427, "grad_norm": 2.3747012615203857, "learning_rate": 2.381046931373493e-06, "loss": 0.2055, "step": 23865 }, { "epoch": 78.24918032786886, "grad_norm": 3.2329649925231934, "learning_rate": 2.380359189922491e-06, "loss": 0.0485, "step": 23866 }, { "epoch": 78.25245901639344, "grad_norm": 3.163177728652954, "learning_rate": 2.379671534390862e-06, "loss": 0.3127, "step": 23867 }, { "epoch": 78.25573770491803, "grad_norm": 2.780609607696533, "learning_rate": 2.3789839647863556e-06, "loss": 0.0732, "step": 23868 }, { "epoch": 78.25901639344262, "grad_norm": 2.7080142498016357, "learning_rate": 2.378296481116733e-06, "loss": 0.1419, "step": 23869 }, { "epoch": 78.26229508196721, "grad_norm": 2.5388872623443604, "learning_rate": 2.3776090833897414e-06, "loss": 0.1425, "step": 23870 }, { "epoch": 78.2655737704918, "grad_norm": 2.9857254028320312, "learning_rate": 2.3769217716131332e-06, "loss": 0.0941, "step": 23871 }, { "epoch": 78.26885245901639, "grad_norm": 2.0169975757598877, "learning_rate": 2.3762345457946546e-06, "loss": 0.1251, "step": 23872 }, { "epoch": 78.27213114754099, "grad_norm": 2.1301767826080322, "learning_rate": 2.3755474059420614e-06, "loss": 0.1102, "step": 23873 }, { "epoch": 78.27540983606558, "grad_norm": 2.3926284313201904, "learning_rate": 2.3748603520630977e-06, "loss": 0.0801, "step": 23874 }, { "epoch": 78.27868852459017, "grad_norm": 2.0278351306915283, "learning_rate": 2.3741733841655124e-06, "loss": 0.0605, "step": 23875 }, { "epoch": 78.28196721311475, "grad_norm": 2.718254804611206, "learning_rate": 2.3734865022570484e-06, "loss": 0.0894, "step": 23876 }, { "epoch": 78.28524590163934, "grad_norm": 2.6958107948303223, "learning_rate": 2.372799706345457e-06, "loss": 0.1474, "step": 23877 }, { "epoch": 78.28852459016393, "grad_norm": 2.449881076812744, "learning_rate": 2.3721129964384783e-06, "loss": 0.0831, "step": 23878 }, { "epoch": 78.29180327868852, "grad_norm": 1.9775145053863525, "learning_rate": 2.371426372543857e-06, "loss": 0.0339, "step": 23879 }, { "epoch": 78.29508196721312, "grad_norm": 2.2911033630371094, "learning_rate": 2.3707398346693346e-06, "loss": 0.1058, "step": 23880 }, { "epoch": 78.29836065573771, "grad_norm": 2.896928548812866, "learning_rate": 2.370053382822651e-06, "loss": 0.098, "step": 23881 }, { "epoch": 78.3016393442623, "grad_norm": 2.4625988006591797, "learning_rate": 2.3693670170115512e-06, "loss": 0.0956, "step": 23882 }, { "epoch": 78.30491803278689, "grad_norm": 2.945725917816162, "learning_rate": 2.3686807372437716e-06, "loss": 0.1195, "step": 23883 }, { "epoch": 78.30819672131148, "grad_norm": 2.3494515419006348, "learning_rate": 2.367994543527049e-06, "loss": 0.143, "step": 23884 }, { "epoch": 78.31147540983606, "grad_norm": 2.5165209770202637, "learning_rate": 2.3673084358691257e-06, "loss": 0.0773, "step": 23885 }, { "epoch": 78.31475409836065, "grad_norm": 2.2824175357818604, "learning_rate": 2.3666224142777363e-06, "loss": 0.0728, "step": 23886 }, { "epoch": 78.31803278688524, "grad_norm": 2.465602159500122, "learning_rate": 2.365936478760613e-06, "loss": 0.1082, "step": 23887 }, { "epoch": 78.32131147540984, "grad_norm": 3.2764408588409424, "learning_rate": 2.3652506293254953e-06, "loss": 0.241, "step": 23888 }, { "epoch": 78.32459016393443, "grad_norm": 3.219505548477173, "learning_rate": 2.3645648659801155e-06, "loss": 0.1443, "step": 23889 }, { "epoch": 78.32786885245902, "grad_norm": 2.6201717853546143, "learning_rate": 2.363879188732203e-06, "loss": 0.1811, "step": 23890 }, { "epoch": 78.33114754098361, "grad_norm": 2.970764398574829, "learning_rate": 2.3631935975894947e-06, "loss": 0.1026, "step": 23891 }, { "epoch": 78.3344262295082, "grad_norm": 3.2684786319732666, "learning_rate": 2.3625080925597198e-06, "loss": 0.2121, "step": 23892 }, { "epoch": 78.33770491803278, "grad_norm": 9.49886703491211, "learning_rate": 2.361822673650607e-06, "loss": 0.0894, "step": 23893 }, { "epoch": 78.34098360655737, "grad_norm": 2.7147552967071533, "learning_rate": 2.3611373408698813e-06, "loss": 0.1347, "step": 23894 }, { "epoch": 78.34426229508196, "grad_norm": 3.3766918182373047, "learning_rate": 2.3604520942252783e-06, "loss": 0.1527, "step": 23895 }, { "epoch": 78.34754098360656, "grad_norm": 2.2860965728759766, "learning_rate": 2.35976693372452e-06, "loss": 0.0571, "step": 23896 }, { "epoch": 78.35081967213115, "grad_norm": 3.034818172454834, "learning_rate": 2.359081859375334e-06, "loss": 0.2723, "step": 23897 }, { "epoch": 78.35409836065574, "grad_norm": 2.4372353553771973, "learning_rate": 2.358396871185442e-06, "loss": 0.1948, "step": 23898 }, { "epoch": 78.35737704918033, "grad_norm": 2.2788186073303223, "learning_rate": 2.357711969162574e-06, "loss": 0.1085, "step": 23899 }, { "epoch": 78.36065573770492, "grad_norm": 2.4634788036346436, "learning_rate": 2.3570271533144485e-06, "loss": 0.067, "step": 23900 }, { "epoch": 78.3639344262295, "grad_norm": 2.427356004714966, "learning_rate": 2.3563424236487887e-06, "loss": 0.1352, "step": 23901 }, { "epoch": 78.3672131147541, "grad_norm": 3.528977870941162, "learning_rate": 2.3556577801733126e-06, "loss": 0.1462, "step": 23902 }, { "epoch": 78.37049180327868, "grad_norm": 3.2552413940429688, "learning_rate": 2.3549732228957466e-06, "loss": 0.1387, "step": 23903 }, { "epoch": 78.37377049180328, "grad_norm": 1.8043725490570068, "learning_rate": 2.3542887518238056e-06, "loss": 0.0489, "step": 23904 }, { "epoch": 78.37704918032787, "grad_norm": 3.593222141265869, "learning_rate": 2.3536043669652086e-06, "loss": 0.1459, "step": 23905 }, { "epoch": 78.38032786885246, "grad_norm": 1.9642508029937744, "learning_rate": 2.3529200683276733e-06, "loss": 0.0548, "step": 23906 }, { "epoch": 78.38360655737705, "grad_norm": 2.8026952743530273, "learning_rate": 2.352235855918912e-06, "loss": 0.2497, "step": 23907 }, { "epoch": 78.38688524590164, "grad_norm": 2.4379358291625977, "learning_rate": 2.351551729746646e-06, "loss": 0.0772, "step": 23908 }, { "epoch": 78.39016393442623, "grad_norm": 3.3909316062927246, "learning_rate": 2.3508676898185856e-06, "loss": 0.1648, "step": 23909 }, { "epoch": 78.39344262295081, "grad_norm": 1.8344696760177612, "learning_rate": 2.350183736142446e-06, "loss": 0.061, "step": 23910 }, { "epoch": 78.3967213114754, "grad_norm": 2.410508632659912, "learning_rate": 2.349499868725936e-06, "loss": 0.1374, "step": 23911 }, { "epoch": 78.4, "grad_norm": 2.3948707580566406, "learning_rate": 2.3488160875767717e-06, "loss": 0.0864, "step": 23912 }, { "epoch": 78.4032786885246, "grad_norm": 2.8669273853302, "learning_rate": 2.3481323927026623e-06, "loss": 0.0781, "step": 23913 }, { "epoch": 78.40655737704918, "grad_norm": 3.0183680057525635, "learning_rate": 2.3474487841113146e-06, "loss": 0.2269, "step": 23914 }, { "epoch": 78.40983606557377, "grad_norm": 2.230452299118042, "learning_rate": 2.3467652618104375e-06, "loss": 0.112, "step": 23915 }, { "epoch": 78.41311475409836, "grad_norm": 2.14048171043396, "learning_rate": 2.346081825807741e-06, "loss": 0.1154, "step": 23916 }, { "epoch": 78.41639344262295, "grad_norm": 2.4862256050109863, "learning_rate": 2.3453984761109304e-06, "loss": 0.0442, "step": 23917 }, { "epoch": 78.41967213114754, "grad_norm": 2.9792840480804443, "learning_rate": 2.3447152127277105e-06, "loss": 0.088, "step": 23918 }, { "epoch": 78.42295081967212, "grad_norm": 3.5440218448638916, "learning_rate": 2.3440320356657863e-06, "loss": 0.223, "step": 23919 }, { "epoch": 78.42622950819673, "grad_norm": 2.2677724361419678, "learning_rate": 2.3433489449328583e-06, "loss": 0.0767, "step": 23920 }, { "epoch": 78.42950819672132, "grad_norm": 2.322706937789917, "learning_rate": 2.342665940536635e-06, "loss": 0.2521, "step": 23921 }, { "epoch": 78.4327868852459, "grad_norm": 1.6972426176071167, "learning_rate": 2.341983022484814e-06, "loss": 0.1714, "step": 23922 }, { "epoch": 78.43606557377049, "grad_norm": 2.52629017829895, "learning_rate": 2.3413001907850975e-06, "loss": 0.0673, "step": 23923 }, { "epoch": 78.43934426229508, "grad_norm": 5.11346435546875, "learning_rate": 2.340617445445181e-06, "loss": 0.0859, "step": 23924 }, { "epoch": 78.44262295081967, "grad_norm": 2.6757640838623047, "learning_rate": 2.3399347864727693e-06, "loss": 0.0895, "step": 23925 }, { "epoch": 78.44590163934426, "grad_norm": 2.4685440063476562, "learning_rate": 2.339252213875558e-06, "loss": 0.1069, "step": 23926 }, { "epoch": 78.44918032786886, "grad_norm": 2.29840087890625, "learning_rate": 2.338569727661243e-06, "loss": 0.1115, "step": 23927 }, { "epoch": 78.45245901639345, "grad_norm": 2.9399561882019043, "learning_rate": 2.337887327837517e-06, "loss": 0.1818, "step": 23928 }, { "epoch": 78.45573770491804, "grad_norm": 2.5885446071624756, "learning_rate": 2.3372050144120815e-06, "loss": 0.1289, "step": 23929 }, { "epoch": 78.45901639344262, "grad_norm": 2.851985216140747, "learning_rate": 2.3365227873926266e-06, "loss": 0.1478, "step": 23930 }, { "epoch": 78.46229508196721, "grad_norm": 2.460573673248291, "learning_rate": 2.3358406467868445e-06, "loss": 0.2217, "step": 23931 }, { "epoch": 78.4655737704918, "grad_norm": 2.5542409420013428, "learning_rate": 2.335158592602429e-06, "loss": 0.0773, "step": 23932 }, { "epoch": 78.46885245901639, "grad_norm": 2.615872383117676, "learning_rate": 2.334476624847066e-06, "loss": 0.1018, "step": 23933 }, { "epoch": 78.47213114754098, "grad_norm": 2.2335987091064453, "learning_rate": 2.3337947435284525e-06, "loss": 0.1339, "step": 23934 }, { "epoch": 78.47540983606558, "grad_norm": 2.043100595474243, "learning_rate": 2.3331129486542738e-06, "loss": 0.0911, "step": 23935 }, { "epoch": 78.47868852459017, "grad_norm": 2.0957159996032715, "learning_rate": 2.332431240232216e-06, "loss": 0.0953, "step": 23936 }, { "epoch": 78.48196721311476, "grad_norm": 2.1178979873657227, "learning_rate": 2.3317496182699704e-06, "loss": 0.1093, "step": 23937 }, { "epoch": 78.48524590163935, "grad_norm": 2.1320860385894775, "learning_rate": 2.3310680827752207e-06, "loss": 0.0606, "step": 23938 }, { "epoch": 78.48852459016393, "grad_norm": 2.783644914627075, "learning_rate": 2.3303866337556523e-06, "loss": 0.0891, "step": 23939 }, { "epoch": 78.49180327868852, "grad_norm": 3.674236536026001, "learning_rate": 2.3297052712189462e-06, "loss": 0.0681, "step": 23940 }, { "epoch": 78.49508196721311, "grad_norm": 1.9400123357772827, "learning_rate": 2.3290239951727913e-06, "loss": 0.0535, "step": 23941 }, { "epoch": 78.4983606557377, "grad_norm": 3.8020732402801514, "learning_rate": 2.3283428056248668e-06, "loss": 0.3057, "step": 23942 }, { "epoch": 78.5016393442623, "grad_norm": 2.2244832515716553, "learning_rate": 2.3276617025828505e-06, "loss": 0.1395, "step": 23943 }, { "epoch": 78.50491803278689, "grad_norm": 2.63531756401062, "learning_rate": 2.3269806860544286e-06, "loss": 0.0948, "step": 23944 }, { "epoch": 78.50819672131148, "grad_norm": 2.888925075531006, "learning_rate": 2.3262997560472776e-06, "loss": 0.216, "step": 23945 }, { "epoch": 78.51147540983607, "grad_norm": 2.1878867149353027, "learning_rate": 2.325618912569073e-06, "loss": 0.1401, "step": 23946 }, { "epoch": 78.51475409836065, "grad_norm": 2.9025368690490723, "learning_rate": 2.3249381556274964e-06, "loss": 0.1807, "step": 23947 }, { "epoch": 78.51803278688524, "grad_norm": 2.045248508453369, "learning_rate": 2.3242574852302225e-06, "loss": 0.0506, "step": 23948 }, { "epoch": 78.52131147540983, "grad_norm": 2.9295122623443604, "learning_rate": 2.323576901384926e-06, "loss": 0.264, "step": 23949 }, { "epoch": 78.52459016393442, "grad_norm": 2.7145731449127197, "learning_rate": 2.3228964040992798e-06, "loss": 0.116, "step": 23950 }, { "epoch": 78.52786885245902, "grad_norm": 2.5491769313812256, "learning_rate": 2.3222159933809606e-06, "loss": 0.1848, "step": 23951 }, { "epoch": 78.53114754098361, "grad_norm": 1.912379503250122, "learning_rate": 2.321535669237639e-06, "loss": 0.0648, "step": 23952 }, { "epoch": 78.5344262295082, "grad_norm": 2.984064817428589, "learning_rate": 2.320855431676987e-06, "loss": 0.1089, "step": 23953 }, { "epoch": 78.53770491803279, "grad_norm": 2.2171316146850586, "learning_rate": 2.3201752807066715e-06, "loss": 0.166, "step": 23954 }, { "epoch": 78.54098360655738, "grad_norm": 2.844801425933838, "learning_rate": 2.3194952163343667e-06, "loss": 0.1704, "step": 23955 }, { "epoch": 78.54426229508196, "grad_norm": 2.499972105026245, "learning_rate": 2.3188152385677397e-06, "loss": 0.0709, "step": 23956 }, { "epoch": 78.54754098360655, "grad_norm": 2.788947820663452, "learning_rate": 2.3181353474144565e-06, "loss": 0.1387, "step": 23957 }, { "epoch": 78.55081967213114, "grad_norm": 3.8916430473327637, "learning_rate": 2.3174555428821854e-06, "loss": 0.1696, "step": 23958 }, { "epoch": 78.55409836065574, "grad_norm": 1.689725637435913, "learning_rate": 2.3167758249785877e-06, "loss": 0.0567, "step": 23959 }, { "epoch": 78.55737704918033, "grad_norm": 2.240882396697998, "learning_rate": 2.3160961937113334e-06, "loss": 0.1462, "step": 23960 }, { "epoch": 78.56065573770492, "grad_norm": 2.3674561977386475, "learning_rate": 2.3154166490880847e-06, "loss": 0.1171, "step": 23961 }, { "epoch": 78.56393442622951, "grad_norm": 1.8985103368759155, "learning_rate": 2.314737191116503e-06, "loss": 0.132, "step": 23962 }, { "epoch": 78.5672131147541, "grad_norm": 2.06660532951355, "learning_rate": 2.3140578198042463e-06, "loss": 0.0476, "step": 23963 }, { "epoch": 78.57049180327868, "grad_norm": 2.029811382293701, "learning_rate": 2.3133785351589833e-06, "loss": 0.0719, "step": 23964 }, { "epoch": 78.57377049180327, "grad_norm": 3.6140732765197754, "learning_rate": 2.3126993371883686e-06, "loss": 0.1236, "step": 23965 }, { "epoch": 78.57704918032788, "grad_norm": 2.604950189590454, "learning_rate": 2.312020225900061e-06, "loss": 0.0832, "step": 23966 }, { "epoch": 78.58032786885246, "grad_norm": 2.411280870437622, "learning_rate": 2.311341201301718e-06, "loss": 0.1293, "step": 23967 }, { "epoch": 78.58360655737705, "grad_norm": 2.4822916984558105, "learning_rate": 2.310662263400998e-06, "loss": 0.1348, "step": 23968 }, { "epoch": 78.58688524590164, "grad_norm": 2.8460354804992676, "learning_rate": 2.3099834122055574e-06, "loss": 0.1128, "step": 23969 }, { "epoch": 78.59016393442623, "grad_norm": 2.3455793857574463, "learning_rate": 2.3093046477230486e-06, "loss": 0.183, "step": 23970 }, { "epoch": 78.59344262295082, "grad_norm": 2.11635422706604, "learning_rate": 2.308625969961127e-06, "loss": 0.1516, "step": 23971 }, { "epoch": 78.5967213114754, "grad_norm": 2.5039048194885254, "learning_rate": 2.307947378927441e-06, "loss": 0.1931, "step": 23972 }, { "epoch": 78.6, "grad_norm": 2.154900312423706, "learning_rate": 2.307268874629649e-06, "loss": 0.0715, "step": 23973 }, { "epoch": 78.6032786885246, "grad_norm": 2.906905174255371, "learning_rate": 2.3065904570753983e-06, "loss": 0.2751, "step": 23974 }, { "epoch": 78.60655737704919, "grad_norm": 3.7049853801727295, "learning_rate": 2.3059121262723405e-06, "loss": 0.3352, "step": 23975 }, { "epoch": 78.60983606557377, "grad_norm": 3.5382168292999268, "learning_rate": 2.3052338822281194e-06, "loss": 0.1098, "step": 23976 }, { "epoch": 78.61311475409836, "grad_norm": 2.253187656402588, "learning_rate": 2.3045557249503903e-06, "loss": 0.0845, "step": 23977 }, { "epoch": 78.61639344262295, "grad_norm": 2.980398654937744, "learning_rate": 2.303877654446797e-06, "loss": 0.168, "step": 23978 }, { "epoch": 78.61967213114754, "grad_norm": 2.9956395626068115, "learning_rate": 2.3031996707249837e-06, "loss": 0.1015, "step": 23979 }, { "epoch": 78.62295081967213, "grad_norm": 2.4411072731018066, "learning_rate": 2.3025217737925955e-06, "loss": 0.0405, "step": 23980 }, { "epoch": 78.62622950819672, "grad_norm": 2.504512071609497, "learning_rate": 2.30184396365728e-06, "loss": 0.0968, "step": 23981 }, { "epoch": 78.62950819672132, "grad_norm": 2.982649326324463, "learning_rate": 2.3011662403266778e-06, "loss": 0.1046, "step": 23982 }, { "epoch": 78.6327868852459, "grad_norm": 2.372695207595825, "learning_rate": 2.3004886038084308e-06, "loss": 0.0709, "step": 23983 }, { "epoch": 78.6360655737705, "grad_norm": 2.601206064224243, "learning_rate": 2.2998110541101804e-06, "loss": 0.1519, "step": 23984 }, { "epoch": 78.63934426229508, "grad_norm": 2.555483341217041, "learning_rate": 2.299133591239564e-06, "loss": 0.1411, "step": 23985 }, { "epoch": 78.64262295081967, "grad_norm": 2.5382096767425537, "learning_rate": 2.2984562152042258e-06, "loss": 0.1175, "step": 23986 }, { "epoch": 78.64590163934426, "grad_norm": 2.6206068992614746, "learning_rate": 2.297778926011801e-06, "loss": 0.1053, "step": 23987 }, { "epoch": 78.64918032786885, "grad_norm": 2.4972786903381348, "learning_rate": 2.2971017236699277e-06, "loss": 0.0711, "step": 23988 }, { "epoch": 78.65245901639344, "grad_norm": 2.0980958938598633, "learning_rate": 2.2964246081862385e-06, "loss": 0.0836, "step": 23989 }, { "epoch": 78.65573770491804, "grad_norm": 3.1563680171966553, "learning_rate": 2.295747579568375e-06, "loss": 0.0664, "step": 23990 }, { "epoch": 78.65901639344263, "grad_norm": 8.768284797668457, "learning_rate": 2.2950706378239673e-06, "loss": 0.2676, "step": 23991 }, { "epoch": 78.66229508196722, "grad_norm": 2.1647400856018066, "learning_rate": 2.2943937829606464e-06, "loss": 0.1021, "step": 23992 }, { "epoch": 78.6655737704918, "grad_norm": 2.684229850769043, "learning_rate": 2.29371701498605e-06, "loss": 0.1452, "step": 23993 }, { "epoch": 78.66885245901639, "grad_norm": 2.4166882038116455, "learning_rate": 2.293040333907808e-06, "loss": 0.0838, "step": 23994 }, { "epoch": 78.67213114754098, "grad_norm": 3.197141647338867, "learning_rate": 2.292363739733545e-06, "loss": 0.1518, "step": 23995 }, { "epoch": 78.67540983606557, "grad_norm": 2.6667685508728027, "learning_rate": 2.2916872324708985e-06, "loss": 0.1231, "step": 23996 }, { "epoch": 78.67868852459016, "grad_norm": 2.0714123249053955, "learning_rate": 2.2910108121274933e-06, "loss": 0.1193, "step": 23997 }, { "epoch": 78.68196721311476, "grad_norm": 2.314462423324585, "learning_rate": 2.2903344787109527e-06, "loss": 0.1085, "step": 23998 }, { "epoch": 78.68524590163935, "grad_norm": 2.2840144634246826, "learning_rate": 2.2896582322289106e-06, "loss": 0.0445, "step": 23999 }, { "epoch": 78.68852459016394, "grad_norm": 1.9790117740631104, "learning_rate": 2.2889820726889887e-06, "loss": 0.1269, "step": 24000 }, { "epoch": 78.69180327868852, "grad_norm": 2.1020233631134033, "learning_rate": 2.288306000098811e-06, "loss": 0.1193, "step": 24001 }, { "epoch": 78.69508196721311, "grad_norm": 2.756448745727539, "learning_rate": 2.287630014465999e-06, "loss": 0.0859, "step": 24002 }, { "epoch": 78.6983606557377, "grad_norm": 2.2845540046691895, "learning_rate": 2.28695411579818e-06, "loss": 0.0742, "step": 24003 }, { "epoch": 78.70163934426229, "grad_norm": 2.081949234008789, "learning_rate": 2.286278304102972e-06, "loss": 0.0476, "step": 24004 }, { "epoch": 78.70491803278688, "grad_norm": 2.0449013710021973, "learning_rate": 2.2856025793879978e-06, "loss": 0.1436, "step": 24005 }, { "epoch": 78.70819672131148, "grad_norm": 2.9159319400787354, "learning_rate": 2.284926941660872e-06, "loss": 0.0517, "step": 24006 }, { "epoch": 78.71147540983607, "grad_norm": 2.7180263996124268, "learning_rate": 2.2842513909292197e-06, "loss": 0.2049, "step": 24007 }, { "epoch": 78.71475409836066, "grad_norm": 2.1741766929626465, "learning_rate": 2.2835759272006554e-06, "loss": 0.0344, "step": 24008 }, { "epoch": 78.71803278688525, "grad_norm": 2.111375093460083, "learning_rate": 2.282900550482795e-06, "loss": 0.2519, "step": 24009 }, { "epoch": 78.72131147540983, "grad_norm": 2.2653005123138428, "learning_rate": 2.282225260783254e-06, "loss": 0.064, "step": 24010 }, { "epoch": 78.72459016393442, "grad_norm": 3.1161398887634277, "learning_rate": 2.281550058109646e-06, "loss": 0.1361, "step": 24011 }, { "epoch": 78.72786885245901, "grad_norm": 1.9122469425201416, "learning_rate": 2.2808749424695896e-06, "loss": 0.2153, "step": 24012 }, { "epoch": 78.73114754098361, "grad_norm": 2.436480760574341, "learning_rate": 2.2801999138706933e-06, "loss": 0.2296, "step": 24013 }, { "epoch": 78.7344262295082, "grad_norm": 2.258831024169922, "learning_rate": 2.2795249723205693e-06, "loss": 0.162, "step": 24014 }, { "epoch": 78.73770491803279, "grad_norm": 2.5494346618652344, "learning_rate": 2.2788501178268262e-06, "loss": 0.103, "step": 24015 }, { "epoch": 78.74098360655738, "grad_norm": 2.358483076095581, "learning_rate": 2.2781753503970782e-06, "loss": 0.1073, "step": 24016 }, { "epoch": 78.74426229508197, "grad_norm": 2.23290753364563, "learning_rate": 2.2775006700389325e-06, "loss": 0.0305, "step": 24017 }, { "epoch": 78.74754098360656, "grad_norm": 2.522068738937378, "learning_rate": 2.2768260767599958e-06, "loss": 0.0581, "step": 24018 }, { "epoch": 78.75081967213114, "grad_norm": 2.1706581115722656, "learning_rate": 2.2761515705678727e-06, "loss": 0.1466, "step": 24019 }, { "epoch": 78.75409836065573, "grad_norm": 2.18628191947937, "learning_rate": 2.2754771514701735e-06, "loss": 0.055, "step": 24020 }, { "epoch": 78.75737704918033, "grad_norm": 2.6293785572052, "learning_rate": 2.274802819474502e-06, "loss": 0.1789, "step": 24021 }, { "epoch": 78.76065573770492, "grad_norm": 3.328843832015991, "learning_rate": 2.27412857458846e-06, "loss": 0.0806, "step": 24022 }, { "epoch": 78.76393442622951, "grad_norm": 2.3704378604888916, "learning_rate": 2.2734544168196515e-06, "loss": 0.1296, "step": 24023 }, { "epoch": 78.7672131147541, "grad_norm": 2.713409185409546, "learning_rate": 2.2727803461756748e-06, "loss": 0.1378, "step": 24024 }, { "epoch": 78.77049180327869, "grad_norm": 2.4489493370056152, "learning_rate": 2.272106362664137e-06, "loss": 0.0804, "step": 24025 }, { "epoch": 78.77377049180328, "grad_norm": 1.9327201843261719, "learning_rate": 2.2714324662926357e-06, "loss": 0.0459, "step": 24026 }, { "epoch": 78.77704918032786, "grad_norm": 2.0791165828704834, "learning_rate": 2.270758657068769e-06, "loss": 0.1118, "step": 24027 }, { "epoch": 78.78032786885245, "grad_norm": 3.037649393081665, "learning_rate": 2.2700849350001306e-06, "loss": 0.293, "step": 24028 }, { "epoch": 78.78360655737706, "grad_norm": 2.14587140083313, "learning_rate": 2.269411300094326e-06, "loss": 0.0469, "step": 24029 }, { "epoch": 78.78688524590164, "grad_norm": 2.1415224075317383, "learning_rate": 2.268737752358946e-06, "loss": 0.1184, "step": 24030 }, { "epoch": 78.79016393442623, "grad_norm": 2.7922468185424805, "learning_rate": 2.268064291801587e-06, "loss": 0.07, "step": 24031 }, { "epoch": 78.79344262295082, "grad_norm": 2.6296536922454834, "learning_rate": 2.267390918429839e-06, "loss": 0.066, "step": 24032 }, { "epoch": 78.79672131147541, "grad_norm": 2.1234452724456787, "learning_rate": 2.2667176322513005e-06, "loss": 0.0358, "step": 24033 }, { "epoch": 78.8, "grad_norm": 2.0166616439819336, "learning_rate": 2.266044433273562e-06, "loss": 0.0579, "step": 24034 }, { "epoch": 78.80327868852459, "grad_norm": 2.614384412765503, "learning_rate": 2.2653713215042137e-06, "loss": 0.0565, "step": 24035 }, { "epoch": 78.80655737704917, "grad_norm": 2.821880578994751, "learning_rate": 2.2646982969508456e-06, "loss": 0.2041, "step": 24036 }, { "epoch": 78.80983606557378, "grad_norm": 2.618736743927002, "learning_rate": 2.264025359621045e-06, "loss": 0.0338, "step": 24037 }, { "epoch": 78.81311475409836, "grad_norm": 1.8753483295440674, "learning_rate": 2.263352509522404e-06, "loss": 0.0319, "step": 24038 }, { "epoch": 78.81639344262295, "grad_norm": 2.772217035293579, "learning_rate": 2.262679746662507e-06, "loss": 0.1954, "step": 24039 }, { "epoch": 78.81967213114754, "grad_norm": 2.767335891723633, "learning_rate": 2.2620070710489416e-06, "loss": 0.1404, "step": 24040 }, { "epoch": 78.82295081967213, "grad_norm": 2.738593816757202, "learning_rate": 2.261334482689289e-06, "loss": 0.1934, "step": 24041 }, { "epoch": 78.82622950819672, "grad_norm": 2.3143370151519775, "learning_rate": 2.260661981591139e-06, "loss": 0.1895, "step": 24042 }, { "epoch": 78.8295081967213, "grad_norm": 2.1024816036224365, "learning_rate": 2.259989567762072e-06, "loss": 0.1081, "step": 24043 }, { "epoch": 78.8327868852459, "grad_norm": 6.392049312591553, "learning_rate": 2.2593172412096698e-06, "loss": 0.1002, "step": 24044 }, { "epoch": 78.8360655737705, "grad_norm": 2.492861270904541, "learning_rate": 2.2586450019415118e-06, "loss": 0.1837, "step": 24045 }, { "epoch": 78.83934426229509, "grad_norm": 2.209482431411743, "learning_rate": 2.257972849965182e-06, "loss": 0.1249, "step": 24046 }, { "epoch": 78.84262295081967, "grad_norm": 3.486022472381592, "learning_rate": 2.257300785288259e-06, "loss": 0.182, "step": 24047 }, { "epoch": 78.84590163934426, "grad_norm": 3.139402389526367, "learning_rate": 2.2566288079183172e-06, "loss": 0.1257, "step": 24048 }, { "epoch": 78.84918032786885, "grad_norm": 1.6896917819976807, "learning_rate": 2.2559569178629394e-06, "loss": 0.0567, "step": 24049 }, { "epoch": 78.85245901639344, "grad_norm": 1.4992684125900269, "learning_rate": 2.2552851151296995e-06, "loss": 0.024, "step": 24050 }, { "epoch": 78.85573770491803, "grad_norm": 2.881845474243164, "learning_rate": 2.254613399726169e-06, "loss": 0.0441, "step": 24051 }, { "epoch": 78.85901639344263, "grad_norm": 2.5857317447662354, "learning_rate": 2.2539417716599286e-06, "loss": 0.0942, "step": 24052 }, { "epoch": 78.86229508196722, "grad_norm": 2.2992401123046875, "learning_rate": 2.253270230938549e-06, "loss": 0.1003, "step": 24053 }, { "epoch": 78.8655737704918, "grad_norm": 2.7004406452178955, "learning_rate": 2.252598777569599e-06, "loss": 0.1243, "step": 24054 }, { "epoch": 78.8688524590164, "grad_norm": 3.6153602600097656, "learning_rate": 2.2519274115606558e-06, "loss": 0.1021, "step": 24055 }, { "epoch": 78.87213114754098, "grad_norm": 3.180424213409424, "learning_rate": 2.251256132919287e-06, "loss": 0.0846, "step": 24056 }, { "epoch": 78.87540983606557, "grad_norm": 2.3656489849090576, "learning_rate": 2.2505849416530624e-06, "loss": 0.0567, "step": 24057 }, { "epoch": 78.87868852459016, "grad_norm": 2.154365301132202, "learning_rate": 2.2499138377695463e-06, "loss": 0.1366, "step": 24058 }, { "epoch": 78.88196721311475, "grad_norm": 1.9294062852859497, "learning_rate": 2.249242821276313e-06, "loss": 0.1285, "step": 24059 }, { "epoch": 78.88524590163935, "grad_norm": 2.574688673019409, "learning_rate": 2.2485718921809263e-06, "loss": 0.1223, "step": 24060 }, { "epoch": 78.88852459016394, "grad_norm": 1.9492400884628296, "learning_rate": 2.2479010504909504e-06, "loss": 0.0422, "step": 24061 }, { "epoch": 78.89180327868853, "grad_norm": 5.854201316833496, "learning_rate": 2.2472302962139504e-06, "loss": 0.1547, "step": 24062 }, { "epoch": 78.89508196721312, "grad_norm": 1.6911214590072632, "learning_rate": 2.2465596293574867e-06, "loss": 0.0779, "step": 24063 }, { "epoch": 78.8983606557377, "grad_norm": 1.7648684978485107, "learning_rate": 2.2458890499291273e-06, "loss": 0.0635, "step": 24064 }, { "epoch": 78.90163934426229, "grad_norm": 2.362813711166382, "learning_rate": 2.2452185579364318e-06, "loss": 0.0672, "step": 24065 }, { "epoch": 78.90491803278688, "grad_norm": 2.743501663208008, "learning_rate": 2.2445481533869597e-06, "loss": 0.0893, "step": 24066 }, { "epoch": 78.90819672131147, "grad_norm": 2.4161674976348877, "learning_rate": 2.2438778362882672e-06, "loss": 0.1715, "step": 24067 }, { "epoch": 78.91147540983607, "grad_norm": 2.6255879402160645, "learning_rate": 2.2432076066479203e-06, "loss": 0.1557, "step": 24068 }, { "epoch": 78.91475409836066, "grad_norm": 2.916642904281616, "learning_rate": 2.242537464473472e-06, "loss": 0.074, "step": 24069 }, { "epoch": 78.91803278688525, "grad_norm": 2.6259357929229736, "learning_rate": 2.24186740977248e-06, "loss": 0.0645, "step": 24070 }, { "epoch": 78.92131147540984, "grad_norm": 2.292178153991699, "learning_rate": 2.2411974425524964e-06, "loss": 0.2485, "step": 24071 }, { "epoch": 78.92459016393443, "grad_norm": 3.530891180038452, "learning_rate": 2.240527562821082e-06, "loss": 0.223, "step": 24072 }, { "epoch": 78.92786885245901, "grad_norm": 2.4203078746795654, "learning_rate": 2.239857770585787e-06, "loss": 0.0843, "step": 24073 }, { "epoch": 78.9311475409836, "grad_norm": 1.7783198356628418, "learning_rate": 2.2391880658541644e-06, "loss": 0.0473, "step": 24074 }, { "epoch": 78.93442622950819, "grad_norm": 3.1946921348571777, "learning_rate": 2.2385184486337643e-06, "loss": 0.2531, "step": 24075 }, { "epoch": 78.9377049180328, "grad_norm": 3.3174917697906494, "learning_rate": 2.237848918932137e-06, "loss": 0.1133, "step": 24076 }, { "epoch": 78.94098360655738, "grad_norm": 3.3995635509490967, "learning_rate": 2.2371794767568367e-06, "loss": 0.1084, "step": 24077 }, { "epoch": 78.94426229508197, "grad_norm": 2.610185146331787, "learning_rate": 2.2365101221154084e-06, "loss": 0.1992, "step": 24078 }, { "epoch": 78.94754098360656, "grad_norm": 2.399167060852051, "learning_rate": 2.2358408550154e-06, "loss": 0.2657, "step": 24079 }, { "epoch": 78.95081967213115, "grad_norm": 2.621631383895874, "learning_rate": 2.235171675464357e-06, "loss": 0.1371, "step": 24080 }, { "epoch": 78.95409836065573, "grad_norm": 2.172571897506714, "learning_rate": 2.2345025834698285e-06, "loss": 0.0432, "step": 24081 }, { "epoch": 78.95737704918032, "grad_norm": 2.453714370727539, "learning_rate": 2.2338335790393583e-06, "loss": 0.1994, "step": 24082 }, { "epoch": 78.96065573770491, "grad_norm": 2.5567784309387207, "learning_rate": 2.233164662180489e-06, "loss": 0.1858, "step": 24083 }, { "epoch": 78.96393442622951, "grad_norm": 2.597871780395508, "learning_rate": 2.232495832900762e-06, "loss": 0.1846, "step": 24084 }, { "epoch": 78.9672131147541, "grad_norm": 2.6628148555755615, "learning_rate": 2.231827091207721e-06, "loss": 0.1103, "step": 24085 }, { "epoch": 78.97049180327869, "grad_norm": 2.725159168243408, "learning_rate": 2.231158437108908e-06, "loss": 0.1597, "step": 24086 }, { "epoch": 78.97377049180328, "grad_norm": 2.659539222717285, "learning_rate": 2.2304898706118614e-06, "loss": 0.0857, "step": 24087 }, { "epoch": 78.97704918032787, "grad_norm": 3.255573272705078, "learning_rate": 2.2298213917241196e-06, "loss": 0.127, "step": 24088 }, { "epoch": 78.98032786885246, "grad_norm": 2.1340444087982178, "learning_rate": 2.2291530004532168e-06, "loss": 0.0288, "step": 24089 }, { "epoch": 78.98360655737704, "grad_norm": 2.9655954837799072, "learning_rate": 2.2284846968066976e-06, "loss": 0.1381, "step": 24090 }, { "epoch": 78.98688524590163, "grad_norm": 3.290705442428589, "learning_rate": 2.2278164807920933e-06, "loss": 0.1577, "step": 24091 }, { "epoch": 78.99016393442623, "grad_norm": 2.771977186203003, "learning_rate": 2.227148352416939e-06, "loss": 0.09, "step": 24092 }, { "epoch": 78.99344262295082, "grad_norm": 2.7874691486358643, "learning_rate": 2.2264803116887666e-06, "loss": 0.1625, "step": 24093 }, { "epoch": 78.99672131147541, "grad_norm": 3.4072492122650146, "learning_rate": 2.2258123586151137e-06, "loss": 0.1258, "step": 24094 }, { "epoch": 79.0, "grad_norm": 2.054152488708496, "learning_rate": 2.2251444932035094e-06, "loss": 0.0916, "step": 24095 }, { "epoch": 79.00327868852459, "grad_norm": 2.077890157699585, "learning_rate": 2.2244767154614843e-06, "loss": 0.0484, "step": 24096 }, { "epoch": 79.00655737704918, "grad_norm": 8.681817054748535, "learning_rate": 2.2238090253965662e-06, "loss": 0.0718, "step": 24097 }, { "epoch": 79.00983606557377, "grad_norm": 4.123477935791016, "learning_rate": 2.2231414230162897e-06, "loss": 0.1801, "step": 24098 }, { "epoch": 79.01311475409837, "grad_norm": 1.988189458847046, "learning_rate": 2.222473908328179e-06, "loss": 0.1527, "step": 24099 }, { "epoch": 79.01639344262296, "grad_norm": 3.026151418685913, "learning_rate": 2.22180648133976e-06, "loss": 0.1069, "step": 24100 }, { "epoch": 79.01967213114754, "grad_norm": 1.4711308479309082, "learning_rate": 2.2211391420585614e-06, "loss": 0.0515, "step": 24101 }, { "epoch": 79.02295081967213, "grad_norm": 3.7359440326690674, "learning_rate": 2.2204718904921084e-06, "loss": 0.0789, "step": 24102 }, { "epoch": 79.02622950819672, "grad_norm": 1.8199807405471802, "learning_rate": 2.219804726647923e-06, "loss": 0.0437, "step": 24103 }, { "epoch": 79.02950819672131, "grad_norm": 3.3120498657226562, "learning_rate": 2.219137650533525e-06, "loss": 0.0743, "step": 24104 }, { "epoch": 79.0327868852459, "grad_norm": 2.9135069847106934, "learning_rate": 2.2184706621564433e-06, "loss": 0.1313, "step": 24105 }, { "epoch": 79.03606557377049, "grad_norm": 2.4521751403808594, "learning_rate": 2.2178037615241967e-06, "loss": 0.1641, "step": 24106 }, { "epoch": 79.03934426229509, "grad_norm": 2.3830528259277344, "learning_rate": 2.217136948644301e-06, "loss": 0.115, "step": 24107 }, { "epoch": 79.04262295081968, "grad_norm": 1.9358378648757935, "learning_rate": 2.2164702235242795e-06, "loss": 0.0475, "step": 24108 }, { "epoch": 79.04590163934427, "grad_norm": 2.0593245029449463, "learning_rate": 2.215803586171651e-06, "loss": 0.0451, "step": 24109 }, { "epoch": 79.04918032786885, "grad_norm": 4.911606788635254, "learning_rate": 2.2151370365939275e-06, "loss": 0.1563, "step": 24110 }, { "epoch": 79.05245901639344, "grad_norm": 1.9070978164672852, "learning_rate": 2.2144705747986304e-06, "loss": 0.1798, "step": 24111 }, { "epoch": 79.05573770491803, "grad_norm": 2.284803867340088, "learning_rate": 2.2138042007932725e-06, "loss": 0.1685, "step": 24112 }, { "epoch": 79.05901639344262, "grad_norm": 3.222079277038574, "learning_rate": 2.213137914585368e-06, "loss": 0.3716, "step": 24113 }, { "epoch": 79.0622950819672, "grad_norm": 2.7768523693084717, "learning_rate": 2.2124717161824296e-06, "loss": 0.1206, "step": 24114 }, { "epoch": 79.06557377049181, "grad_norm": 2.254532814025879, "learning_rate": 2.211805605591967e-06, "loss": 0.0427, "step": 24115 }, { "epoch": 79.0688524590164, "grad_norm": 2.8413939476013184, "learning_rate": 2.2111395828214967e-06, "loss": 0.1008, "step": 24116 }, { "epoch": 79.07213114754099, "grad_norm": 2.622960329055786, "learning_rate": 2.210473647878526e-06, "loss": 0.0871, "step": 24117 }, { "epoch": 79.07540983606557, "grad_norm": 2.95332932472229, "learning_rate": 2.209807800770565e-06, "loss": 0.1048, "step": 24118 }, { "epoch": 79.07868852459016, "grad_norm": 2.168203353881836, "learning_rate": 2.2091420415051168e-06, "loss": 0.185, "step": 24119 }, { "epoch": 79.08196721311475, "grad_norm": 2.3660643100738525, "learning_rate": 2.208476370089695e-06, "loss": 0.0386, "step": 24120 }, { "epoch": 79.08524590163934, "grad_norm": 1.868765115737915, "learning_rate": 2.2078107865318044e-06, "loss": 0.0426, "step": 24121 }, { "epoch": 79.08852459016393, "grad_norm": 2.472295045852661, "learning_rate": 2.2071452908389478e-06, "loss": 0.2253, "step": 24122 }, { "epoch": 79.09180327868853, "grad_norm": 3.2186055183410645, "learning_rate": 2.2064798830186283e-06, "loss": 0.1992, "step": 24123 }, { "epoch": 79.09508196721312, "grad_norm": 2.2708847522735596, "learning_rate": 2.2058145630783545e-06, "loss": 0.0947, "step": 24124 }, { "epoch": 79.09836065573771, "grad_norm": 2.282604694366455, "learning_rate": 2.2051493310256255e-06, "loss": 0.1377, "step": 24125 }, { "epoch": 79.1016393442623, "grad_norm": 2.472022294998169, "learning_rate": 2.2044841868679422e-06, "loss": 0.0615, "step": 24126 }, { "epoch": 79.10491803278688, "grad_norm": 2.8268320560455322, "learning_rate": 2.2038191306128043e-06, "loss": 0.1283, "step": 24127 }, { "epoch": 79.10819672131147, "grad_norm": 2.499892234802246, "learning_rate": 2.20315416226771e-06, "loss": 0.1347, "step": 24128 }, { "epoch": 79.11147540983606, "grad_norm": 3.1015164852142334, "learning_rate": 2.202489281840161e-06, "loss": 0.1966, "step": 24129 }, { "epoch": 79.11475409836065, "grad_norm": 3.0146195888519287, "learning_rate": 2.201824489337654e-06, "loss": 0.1434, "step": 24130 }, { "epoch": 79.11803278688525, "grad_norm": 2.5643134117126465, "learning_rate": 2.2011597847676825e-06, "loss": 0.1582, "step": 24131 }, { "epoch": 79.12131147540984, "grad_norm": 3.2787413597106934, "learning_rate": 2.2004951681377417e-06, "loss": 0.1622, "step": 24132 }, { "epoch": 79.12459016393443, "grad_norm": 2.5278213024139404, "learning_rate": 2.1998306394553293e-06, "loss": 0.1182, "step": 24133 }, { "epoch": 79.12786885245902, "grad_norm": 2.6347525119781494, "learning_rate": 2.1991661987279368e-06, "loss": 0.0997, "step": 24134 }, { "epoch": 79.1311475409836, "grad_norm": 2.415283441543579, "learning_rate": 2.1985018459630557e-06, "loss": 0.1472, "step": 24135 }, { "epoch": 79.1344262295082, "grad_norm": 3.448200225830078, "learning_rate": 2.197837581168176e-06, "loss": 0.1614, "step": 24136 }, { "epoch": 79.13770491803278, "grad_norm": 2.276296854019165, "learning_rate": 2.197173404350792e-06, "loss": 0.0454, "step": 24137 }, { "epoch": 79.14098360655737, "grad_norm": 2.7027628421783447, "learning_rate": 2.1965093155183914e-06, "loss": 0.1295, "step": 24138 }, { "epoch": 79.14426229508197, "grad_norm": 3.335838556289673, "learning_rate": 2.1958453146784607e-06, "loss": 0.1724, "step": 24139 }, { "epoch": 79.14754098360656, "grad_norm": 2.7030293941497803, "learning_rate": 2.1951814018384897e-06, "loss": 0.1822, "step": 24140 }, { "epoch": 79.15081967213115, "grad_norm": 2.6092288494110107, "learning_rate": 2.19451757700596e-06, "loss": 0.1513, "step": 24141 }, { "epoch": 79.15409836065574, "grad_norm": 2.143704414367676, "learning_rate": 2.1938538401883625e-06, "loss": 0.0984, "step": 24142 }, { "epoch": 79.15737704918033, "grad_norm": 2.85292911529541, "learning_rate": 2.1931901913931797e-06, "loss": 0.1347, "step": 24143 }, { "epoch": 79.16065573770491, "grad_norm": 3.3131611347198486, "learning_rate": 2.1925266306278945e-06, "loss": 0.0784, "step": 24144 }, { "epoch": 79.1639344262295, "grad_norm": 2.682610511779785, "learning_rate": 2.191863157899987e-06, "loss": 0.0658, "step": 24145 }, { "epoch": 79.1672131147541, "grad_norm": 1.7813692092895508, "learning_rate": 2.191199773216943e-06, "loss": 0.1014, "step": 24146 }, { "epoch": 79.1704918032787, "grad_norm": 2.760054111480713, "learning_rate": 2.1905364765862415e-06, "loss": 0.0961, "step": 24147 }, { "epoch": 79.17377049180328, "grad_norm": 2.2832584381103516, "learning_rate": 2.18987326801536e-06, "loss": 0.0988, "step": 24148 }, { "epoch": 79.17704918032787, "grad_norm": 2.485926628112793, "learning_rate": 2.1892101475117754e-06, "loss": 0.0548, "step": 24149 }, { "epoch": 79.18032786885246, "grad_norm": 1.6828750371932983, "learning_rate": 2.1885471150829705e-06, "loss": 0.1113, "step": 24150 }, { "epoch": 79.18360655737705, "grad_norm": 2.2588791847229004, "learning_rate": 2.1878841707364196e-06, "loss": 0.1823, "step": 24151 }, { "epoch": 79.18688524590164, "grad_norm": 3.1669695377349854, "learning_rate": 2.187221314479596e-06, "loss": 0.1053, "step": 24152 }, { "epoch": 79.19016393442622, "grad_norm": 2.9597439765930176, "learning_rate": 2.1865585463199736e-06, "loss": 0.14, "step": 24153 }, { "epoch": 79.19344262295083, "grad_norm": 2.5066659450531006, "learning_rate": 2.1858958662650287e-06, "loss": 0.1608, "step": 24154 }, { "epoch": 79.19672131147541, "grad_norm": 2.459346294403076, "learning_rate": 2.185233274322234e-06, "loss": 0.1201, "step": 24155 }, { "epoch": 79.2, "grad_norm": 2.9349312782287598, "learning_rate": 2.184570770499056e-06, "loss": 0.1558, "step": 24156 }, { "epoch": 79.20327868852459, "grad_norm": 2.135474920272827, "learning_rate": 2.1839083548029715e-06, "loss": 0.0507, "step": 24157 }, { "epoch": 79.20655737704918, "grad_norm": 2.5632941722869873, "learning_rate": 2.1832460272414466e-06, "loss": 0.0832, "step": 24158 }, { "epoch": 79.20983606557377, "grad_norm": 3.0896005630493164, "learning_rate": 2.182583787821948e-06, "loss": 0.1836, "step": 24159 }, { "epoch": 79.21311475409836, "grad_norm": 3.073272466659546, "learning_rate": 2.181921636551948e-06, "loss": 0.174, "step": 24160 }, { "epoch": 79.21639344262294, "grad_norm": 2.8492543697357178, "learning_rate": 2.181259573438911e-06, "loss": 0.1591, "step": 24161 }, { "epoch": 79.21967213114755, "grad_norm": 2.4327213764190674, "learning_rate": 2.1805975984903007e-06, "loss": 0.0951, "step": 24162 }, { "epoch": 79.22295081967214, "grad_norm": 2.3955934047698975, "learning_rate": 2.17993571171358e-06, "loss": 0.0562, "step": 24163 }, { "epoch": 79.22622950819672, "grad_norm": 2.7534351348876953, "learning_rate": 2.1792739131162177e-06, "loss": 0.1209, "step": 24164 }, { "epoch": 79.22950819672131, "grad_norm": 2.3869638442993164, "learning_rate": 2.1786122027056735e-06, "loss": 0.1288, "step": 24165 }, { "epoch": 79.2327868852459, "grad_norm": 2.131187677383423, "learning_rate": 2.1779505804894085e-06, "loss": 0.0358, "step": 24166 }, { "epoch": 79.23606557377049, "grad_norm": 2.5826539993286133, "learning_rate": 2.177289046474882e-06, "loss": 0.1826, "step": 24167 }, { "epoch": 79.23934426229508, "grad_norm": 2.6280605792999268, "learning_rate": 2.1766276006695573e-06, "loss": 0.0644, "step": 24168 }, { "epoch": 79.24262295081967, "grad_norm": 1.9555516242980957, "learning_rate": 2.1759662430808904e-06, "loss": 0.0495, "step": 24169 }, { "epoch": 79.24590163934427, "grad_norm": 2.477069616317749, "learning_rate": 2.175304973716339e-06, "loss": 0.2053, "step": 24170 }, { "epoch": 79.24918032786886, "grad_norm": 2.7279672622680664, "learning_rate": 2.1746437925833575e-06, "loss": 0.1291, "step": 24171 }, { "epoch": 79.25245901639344, "grad_norm": 1.7797930240631104, "learning_rate": 2.1739826996894063e-06, "loss": 0.0402, "step": 24172 }, { "epoch": 79.25573770491803, "grad_norm": 3.010312080383301, "learning_rate": 2.1733216950419366e-06, "loss": 0.2883, "step": 24173 }, { "epoch": 79.25901639344262, "grad_norm": 4.147518157958984, "learning_rate": 2.1726607786484035e-06, "loss": 0.2997, "step": 24174 }, { "epoch": 79.26229508196721, "grad_norm": 2.5798346996307373, "learning_rate": 2.171999950516255e-06, "loss": 0.1473, "step": 24175 }, { "epoch": 79.2655737704918, "grad_norm": 2.886537551879883, "learning_rate": 2.1713392106529485e-06, "loss": 0.1586, "step": 24176 }, { "epoch": 79.26885245901639, "grad_norm": 2.3715660572052, "learning_rate": 2.170678559065933e-06, "loss": 0.1276, "step": 24177 }, { "epoch": 79.27213114754099, "grad_norm": 2.364917278289795, "learning_rate": 2.1700179957626567e-06, "loss": 0.102, "step": 24178 }, { "epoch": 79.27540983606558, "grad_norm": 2.858935832977295, "learning_rate": 2.1693575207505677e-06, "loss": 0.0776, "step": 24179 }, { "epoch": 79.27868852459017, "grad_norm": 2.4053852558135986, "learning_rate": 2.1686971340371132e-06, "loss": 0.2255, "step": 24180 }, { "epoch": 79.28196721311475, "grad_norm": 3.2165238857269287, "learning_rate": 2.1680368356297433e-06, "loss": 0.2223, "step": 24181 }, { "epoch": 79.28524590163934, "grad_norm": 2.429722547531128, "learning_rate": 2.167376625535902e-06, "loss": 0.1048, "step": 24182 }, { "epoch": 79.28852459016393, "grad_norm": 2.715116500854492, "learning_rate": 2.166716503763032e-06, "loss": 0.0964, "step": 24183 }, { "epoch": 79.29180327868852, "grad_norm": 2.5889878273010254, "learning_rate": 2.166056470318576e-06, "loss": 0.0601, "step": 24184 }, { "epoch": 79.29508196721312, "grad_norm": 7.007682800292969, "learning_rate": 2.1653965252099808e-06, "loss": 0.119, "step": 24185 }, { "epoch": 79.29836065573771, "grad_norm": 2.527707815170288, "learning_rate": 2.1647366684446858e-06, "loss": 0.1237, "step": 24186 }, { "epoch": 79.3016393442623, "grad_norm": 2.616701364517212, "learning_rate": 2.164076900030132e-06, "loss": 0.1581, "step": 24187 }, { "epoch": 79.30491803278689, "grad_norm": 2.7381904125213623, "learning_rate": 2.163417219973755e-06, "loss": 0.193, "step": 24188 }, { "epoch": 79.30819672131148, "grad_norm": 1.9298818111419678, "learning_rate": 2.162757628283e-06, "loss": 0.0626, "step": 24189 }, { "epoch": 79.31147540983606, "grad_norm": 2.6883134841918945, "learning_rate": 2.1620981249653016e-06, "loss": 0.1162, "step": 24190 }, { "epoch": 79.31475409836065, "grad_norm": 3.2722625732421875, "learning_rate": 2.1614387100280954e-06, "loss": 0.1413, "step": 24191 }, { "epoch": 79.31803278688524, "grad_norm": 2.4276928901672363, "learning_rate": 2.1607793834788184e-06, "loss": 0.1031, "step": 24192 }, { "epoch": 79.32131147540984, "grad_norm": 1.7147847414016724, "learning_rate": 2.160120145324902e-06, "loss": 0.1082, "step": 24193 }, { "epoch": 79.32459016393443, "grad_norm": 2.9693386554718018, "learning_rate": 2.1594609955737855e-06, "loss": 0.074, "step": 24194 }, { "epoch": 79.32786885245902, "grad_norm": 1.8210352659225464, "learning_rate": 2.158801934232897e-06, "loss": 0.0465, "step": 24195 }, { "epoch": 79.33114754098361, "grad_norm": 3.5867178440093994, "learning_rate": 2.1581429613096706e-06, "loss": 0.1637, "step": 24196 }, { "epoch": 79.3344262295082, "grad_norm": 2.2213499546051025, "learning_rate": 2.1574840768115333e-06, "loss": 0.1624, "step": 24197 }, { "epoch": 79.33770491803278, "grad_norm": 2.1288535594940186, "learning_rate": 2.156825280745919e-06, "loss": 0.1376, "step": 24198 }, { "epoch": 79.34098360655737, "grad_norm": 2.588362693786621, "learning_rate": 2.1561665731202554e-06, "loss": 0.0597, "step": 24199 }, { "epoch": 79.34426229508196, "grad_norm": 2.506831169128418, "learning_rate": 2.1555079539419687e-06, "loss": 0.1227, "step": 24200 }, { "epoch": 79.34754098360656, "grad_norm": 2.538433313369751, "learning_rate": 2.1548494232184836e-06, "loss": 0.221, "step": 24201 }, { "epoch": 79.35081967213115, "grad_norm": 1.9354972839355469, "learning_rate": 2.15419098095723e-06, "loss": 0.0468, "step": 24202 }, { "epoch": 79.35409836065574, "grad_norm": 6.721894264221191, "learning_rate": 2.153532627165632e-06, "loss": 0.1148, "step": 24203 }, { "epoch": 79.35737704918033, "grad_norm": 3.7847084999084473, "learning_rate": 2.1528743618511116e-06, "loss": 0.1744, "step": 24204 }, { "epoch": 79.36065573770492, "grad_norm": 2.0635979175567627, "learning_rate": 2.1522161850210908e-06, "loss": 0.1526, "step": 24205 }, { "epoch": 79.3639344262295, "grad_norm": 2.6449639797210693, "learning_rate": 2.151558096682991e-06, "loss": 0.0939, "step": 24206 }, { "epoch": 79.3672131147541, "grad_norm": 1.8957005739212036, "learning_rate": 2.150900096844235e-06, "loss": 0.0496, "step": 24207 }, { "epoch": 79.37049180327868, "grad_norm": 2.6277694702148438, "learning_rate": 2.1502421855122425e-06, "loss": 0.0775, "step": 24208 }, { "epoch": 79.37377049180328, "grad_norm": 3.2335126399993896, "learning_rate": 2.149584362694428e-06, "loss": 0.0471, "step": 24209 }, { "epoch": 79.37704918032787, "grad_norm": 1.8807405233383179, "learning_rate": 2.1489266283982147e-06, "loss": 0.0319, "step": 24210 }, { "epoch": 79.38032786885246, "grad_norm": 2.672811508178711, "learning_rate": 2.1482689826310177e-06, "loss": 0.0808, "step": 24211 }, { "epoch": 79.38360655737705, "grad_norm": 2.9180562496185303, "learning_rate": 2.147611425400248e-06, "loss": 0.3352, "step": 24212 }, { "epoch": 79.38688524590164, "grad_norm": 2.379655361175537, "learning_rate": 2.146953956713327e-06, "loss": 0.148, "step": 24213 }, { "epoch": 79.39016393442623, "grad_norm": 3.453803777694702, "learning_rate": 2.1462965765776646e-06, "loss": 0.1615, "step": 24214 }, { "epoch": 79.39344262295081, "grad_norm": 1.8324447870254517, "learning_rate": 2.1456392850006725e-06, "loss": 0.0434, "step": 24215 }, { "epoch": 79.3967213114754, "grad_norm": 3.859588623046875, "learning_rate": 2.144982081989766e-06, "loss": 0.2502, "step": 24216 }, { "epoch": 79.4, "grad_norm": 1.6940507888793945, "learning_rate": 2.1443249675523536e-06, "loss": 0.0726, "step": 24217 }, { "epoch": 79.4032786885246, "grad_norm": 2.22882342338562, "learning_rate": 2.143667941695845e-06, "loss": 0.0496, "step": 24218 }, { "epoch": 79.40655737704918, "grad_norm": 1.738215446472168, "learning_rate": 2.1430110044276464e-06, "loss": 0.1975, "step": 24219 }, { "epoch": 79.40983606557377, "grad_norm": 2.09163498878479, "learning_rate": 2.142354155755171e-06, "loss": 0.0583, "step": 24220 }, { "epoch": 79.41311475409836, "grad_norm": 1.869728922843933, "learning_rate": 2.1416973956858224e-06, "loss": 0.0455, "step": 24221 }, { "epoch": 79.41639344262295, "grad_norm": 2.3839194774627686, "learning_rate": 2.141040724227006e-06, "loss": 0.0575, "step": 24222 }, { "epoch": 79.41967213114754, "grad_norm": 2.763488531112671, "learning_rate": 2.1403841413861236e-06, "loss": 0.1268, "step": 24223 }, { "epoch": 79.42295081967212, "grad_norm": 3.6822915077209473, "learning_rate": 2.1397276471705853e-06, "loss": 0.0751, "step": 24224 }, { "epoch": 79.42622950819673, "grad_norm": 2.78467059135437, "learning_rate": 2.13907124158779e-06, "loss": 0.1294, "step": 24225 }, { "epoch": 79.42950819672132, "grad_norm": 4.158019542694092, "learning_rate": 2.13841492464514e-06, "loss": 0.1512, "step": 24226 }, { "epoch": 79.4327868852459, "grad_norm": 1.7510018348693848, "learning_rate": 2.137758696350033e-06, "loss": 0.0429, "step": 24227 }, { "epoch": 79.43606557377049, "grad_norm": 5.556652069091797, "learning_rate": 2.1371025567098735e-06, "loss": 0.0664, "step": 24228 }, { "epoch": 79.43934426229508, "grad_norm": 2.1363353729248047, "learning_rate": 2.1364465057320584e-06, "loss": 0.0754, "step": 24229 }, { "epoch": 79.44262295081967, "grad_norm": 3.2396132946014404, "learning_rate": 2.1357905434239858e-06, "loss": 0.0973, "step": 24230 }, { "epoch": 79.44590163934426, "grad_norm": 21.541818618774414, "learning_rate": 2.1351346697930507e-06, "loss": 0.0276, "step": 24231 }, { "epoch": 79.44918032786886, "grad_norm": 2.7826037406921387, "learning_rate": 2.134478884846647e-06, "loss": 0.1241, "step": 24232 }, { "epoch": 79.45245901639345, "grad_norm": 2.614778995513916, "learning_rate": 2.1338231885921743e-06, "loss": 0.1095, "step": 24233 }, { "epoch": 79.45573770491804, "grad_norm": 1.994672179222107, "learning_rate": 2.1331675810370244e-06, "loss": 0.0988, "step": 24234 }, { "epoch": 79.45901639344262, "grad_norm": 2.3584794998168945, "learning_rate": 2.1325120621885896e-06, "loss": 0.2356, "step": 24235 }, { "epoch": 79.46229508196721, "grad_norm": 3.1189522743225098, "learning_rate": 2.131856632054259e-06, "loss": 0.1947, "step": 24236 }, { "epoch": 79.4655737704918, "grad_norm": 3.19537615776062, "learning_rate": 2.1312012906414282e-06, "loss": 0.071, "step": 24237 }, { "epoch": 79.46885245901639, "grad_norm": 2.927595376968384, "learning_rate": 2.1305460379574837e-06, "loss": 0.1283, "step": 24238 }, { "epoch": 79.47213114754098, "grad_norm": 2.4766411781311035, "learning_rate": 2.1298908740098157e-06, "loss": 0.0809, "step": 24239 }, { "epoch": 79.47540983606558, "grad_norm": 2.1844286918640137, "learning_rate": 2.1292357988058078e-06, "loss": 0.0491, "step": 24240 }, { "epoch": 79.47868852459017, "grad_norm": 2.348820447921753, "learning_rate": 2.1285808123528516e-06, "loss": 0.0884, "step": 24241 }, { "epoch": 79.48196721311476, "grad_norm": 2.082747459411621, "learning_rate": 2.127925914658332e-06, "loss": 0.1267, "step": 24242 }, { "epoch": 79.48524590163935, "grad_norm": 3.0147013664245605, "learning_rate": 2.1272711057296325e-06, "loss": 0.1654, "step": 24243 }, { "epoch": 79.48852459016393, "grad_norm": 2.20920991897583, "learning_rate": 2.1266163855741373e-06, "loss": 0.1865, "step": 24244 }, { "epoch": 79.49180327868852, "grad_norm": 1.7707493305206299, "learning_rate": 2.125961754199225e-06, "loss": 0.1106, "step": 24245 }, { "epoch": 79.49508196721311, "grad_norm": 1.6821486949920654, "learning_rate": 2.1253072116122843e-06, "loss": 0.0401, "step": 24246 }, { "epoch": 79.4983606557377, "grad_norm": 2.2795822620391846, "learning_rate": 2.124652757820692e-06, "loss": 0.0648, "step": 24247 }, { "epoch": 79.5016393442623, "grad_norm": 2.8284454345703125, "learning_rate": 2.1239983928318287e-06, "loss": 0.2232, "step": 24248 }, { "epoch": 79.50491803278689, "grad_norm": 2.335540533065796, "learning_rate": 2.1233441166530688e-06, "loss": 0.0678, "step": 24249 }, { "epoch": 79.50819672131148, "grad_norm": 2.9970033168792725, "learning_rate": 2.1226899292917967e-06, "loss": 0.1118, "step": 24250 }, { "epoch": 79.51147540983607, "grad_norm": 2.6458818912506104, "learning_rate": 2.122035830755387e-06, "loss": 0.1172, "step": 24251 }, { "epoch": 79.51475409836065, "grad_norm": 2.323821544647217, "learning_rate": 2.121381821051214e-06, "loss": 0.0683, "step": 24252 }, { "epoch": 79.51803278688524, "grad_norm": 1.9033889770507812, "learning_rate": 2.1207279001866487e-06, "loss": 0.0634, "step": 24253 }, { "epoch": 79.52131147540983, "grad_norm": 2.788053274154663, "learning_rate": 2.1200740681690722e-06, "loss": 0.1294, "step": 24254 }, { "epoch": 79.52459016393442, "grad_norm": 2.836967945098877, "learning_rate": 2.119420325005854e-06, "loss": 0.0856, "step": 24255 }, { "epoch": 79.52786885245902, "grad_norm": 3.4356067180633545, "learning_rate": 2.1187666707043654e-06, "loss": 0.0678, "step": 24256 }, { "epoch": 79.53114754098361, "grad_norm": 3.0633561611175537, "learning_rate": 2.1181131052719773e-06, "loss": 0.124, "step": 24257 }, { "epoch": 79.5344262295082, "grad_norm": 1.9868831634521484, "learning_rate": 2.1174596287160555e-06, "loss": 0.0768, "step": 24258 }, { "epoch": 79.53770491803279, "grad_norm": 1.5868903398513794, "learning_rate": 2.116806241043975e-06, "loss": 0.0972, "step": 24259 }, { "epoch": 79.54098360655738, "grad_norm": 2.312819480895996, "learning_rate": 2.116152942263101e-06, "loss": 0.1118, "step": 24260 }, { "epoch": 79.54426229508196, "grad_norm": 2.4242055416107178, "learning_rate": 2.115499732380797e-06, "loss": 0.0584, "step": 24261 }, { "epoch": 79.54754098360655, "grad_norm": 2.518528938293457, "learning_rate": 2.114846611404433e-06, "loss": 0.1792, "step": 24262 }, { "epoch": 79.55081967213114, "grad_norm": 2.940027952194214, "learning_rate": 2.1141935793413726e-06, "loss": 0.0863, "step": 24263 }, { "epoch": 79.55409836065574, "grad_norm": 1.926546335220337, "learning_rate": 2.1135406361989763e-06, "loss": 0.0996, "step": 24264 }, { "epoch": 79.55737704918033, "grad_norm": 2.8060176372528076, "learning_rate": 2.112887781984613e-06, "loss": 0.1407, "step": 24265 }, { "epoch": 79.56065573770492, "grad_norm": 3.7259888648986816, "learning_rate": 2.1122350167056384e-06, "loss": 0.0969, "step": 24266 }, { "epoch": 79.56393442622951, "grad_norm": 2.1413350105285645, "learning_rate": 2.111582340369417e-06, "loss": 0.1385, "step": 24267 }, { "epoch": 79.5672131147541, "grad_norm": 3.046454668045044, "learning_rate": 2.1109297529833027e-06, "loss": 0.1404, "step": 24268 }, { "epoch": 79.57049180327868, "grad_norm": 2.3850576877593994, "learning_rate": 2.110277254554661e-06, "loss": 0.0731, "step": 24269 }, { "epoch": 79.57377049180327, "grad_norm": 1.890297293663025, "learning_rate": 2.1096248450908463e-06, "loss": 0.043, "step": 24270 }, { "epoch": 79.57704918032788, "grad_norm": 2.439760446548462, "learning_rate": 2.108972524599213e-06, "loss": 0.082, "step": 24271 }, { "epoch": 79.58032786885246, "grad_norm": 2.3159425258636475, "learning_rate": 2.1083202930871216e-06, "loss": 0.0849, "step": 24272 }, { "epoch": 79.58360655737705, "grad_norm": 2.2544829845428467, "learning_rate": 2.1076681505619247e-06, "loss": 0.1499, "step": 24273 }, { "epoch": 79.58688524590164, "grad_norm": 4.000511646270752, "learning_rate": 2.107016097030975e-06, "loss": 0.2399, "step": 24274 }, { "epoch": 79.59016393442623, "grad_norm": 1.8430100679397583, "learning_rate": 2.106364132501623e-06, "loss": 0.1008, "step": 24275 }, { "epoch": 79.59344262295082, "grad_norm": 1.9912567138671875, "learning_rate": 2.105712256981225e-06, "loss": 0.0894, "step": 24276 }, { "epoch": 79.5967213114754, "grad_norm": 2.8573315143585205, "learning_rate": 2.1050604704771294e-06, "loss": 0.1307, "step": 24277 }, { "epoch": 79.6, "grad_norm": 2.590266704559326, "learning_rate": 2.1044087729966856e-06, "loss": 0.0637, "step": 24278 }, { "epoch": 79.6032786885246, "grad_norm": 2.190274953842163, "learning_rate": 2.1037571645472397e-06, "loss": 0.0724, "step": 24279 }, { "epoch": 79.60655737704919, "grad_norm": 2.331165313720703, "learning_rate": 2.103105645136145e-06, "loss": 0.1321, "step": 24280 }, { "epoch": 79.60983606557377, "grad_norm": 2.143524169921875, "learning_rate": 2.102454214770745e-06, "loss": 0.0741, "step": 24281 }, { "epoch": 79.61311475409836, "grad_norm": 1.7108325958251953, "learning_rate": 2.101802873458384e-06, "loss": 0.0312, "step": 24282 }, { "epoch": 79.61639344262295, "grad_norm": 2.8070502281188965, "learning_rate": 2.101151621206409e-06, "loss": 0.2224, "step": 24283 }, { "epoch": 79.61967213114754, "grad_norm": 2.142369031906128, "learning_rate": 2.1005004580221578e-06, "loss": 0.0783, "step": 24284 }, { "epoch": 79.62295081967213, "grad_norm": 2.2076544761657715, "learning_rate": 2.0998493839129807e-06, "loss": 0.0705, "step": 24285 }, { "epoch": 79.62622950819672, "grad_norm": 2.367607355117798, "learning_rate": 2.0991983988862163e-06, "loss": 0.0899, "step": 24286 }, { "epoch": 79.62950819672132, "grad_norm": 2.1642444133758545, "learning_rate": 2.098547502949205e-06, "loss": 0.0414, "step": 24287 }, { "epoch": 79.6327868852459, "grad_norm": 2.3058888912200928, "learning_rate": 2.0978966961092826e-06, "loss": 0.153, "step": 24288 }, { "epoch": 79.6360655737705, "grad_norm": 3.646348714828491, "learning_rate": 2.097245978373794e-06, "loss": 0.0924, "step": 24289 }, { "epoch": 79.63934426229508, "grad_norm": 2.4678733348846436, "learning_rate": 2.0965953497500747e-06, "loss": 0.1114, "step": 24290 }, { "epoch": 79.64262295081967, "grad_norm": 2.3022384643554688, "learning_rate": 2.0959448102454594e-06, "loss": 0.0417, "step": 24291 }, { "epoch": 79.64590163934426, "grad_norm": 2.883988380432129, "learning_rate": 2.0952943598672847e-06, "loss": 0.2234, "step": 24292 }, { "epoch": 79.64918032786885, "grad_norm": 2.062870979309082, "learning_rate": 2.0946439986228817e-06, "loss": 0.0815, "step": 24293 }, { "epoch": 79.65245901639344, "grad_norm": 2.580900192260742, "learning_rate": 2.093993726519591e-06, "loss": 0.101, "step": 24294 }, { "epoch": 79.65573770491804, "grad_norm": 1.7030521631240845, "learning_rate": 2.0933435435647398e-06, "loss": 0.111, "step": 24295 }, { "epoch": 79.65901639344263, "grad_norm": 3.5514190196990967, "learning_rate": 2.0926934497656616e-06, "loss": 0.183, "step": 24296 }, { "epoch": 79.66229508196722, "grad_norm": 2.4416756629943848, "learning_rate": 2.0920434451296845e-06, "loss": 0.1279, "step": 24297 }, { "epoch": 79.6655737704918, "grad_norm": 2.408076524734497, "learning_rate": 2.091393529664141e-06, "loss": 0.0717, "step": 24298 }, { "epoch": 79.66885245901639, "grad_norm": 2.010749101638794, "learning_rate": 2.0907437033763587e-06, "loss": 0.1097, "step": 24299 }, { "epoch": 79.67213114754098, "grad_norm": 3.589460849761963, "learning_rate": 2.0900939662736654e-06, "loss": 0.1082, "step": 24300 }, { "epoch": 79.67540983606557, "grad_norm": 2.254276990890503, "learning_rate": 2.089444318363384e-06, "loss": 0.1083, "step": 24301 }, { "epoch": 79.67868852459016, "grad_norm": 2.661068916320801, "learning_rate": 2.0887947596528455e-06, "loss": 0.1839, "step": 24302 }, { "epoch": 79.68196721311476, "grad_norm": 1.96619713306427, "learning_rate": 2.0881452901493714e-06, "loss": 0.2634, "step": 24303 }, { "epoch": 79.68524590163935, "grad_norm": 3.0253355503082275, "learning_rate": 2.0874959098602854e-06, "loss": 0.0829, "step": 24304 }, { "epoch": 79.68852459016394, "grad_norm": 3.3845725059509277, "learning_rate": 2.0868466187929105e-06, "loss": 0.1916, "step": 24305 }, { "epoch": 79.69180327868852, "grad_norm": 2.8983407020568848, "learning_rate": 2.086197416954564e-06, "loss": 0.0773, "step": 24306 }, { "epoch": 79.69508196721311, "grad_norm": 2.210940361022949, "learning_rate": 2.0855483043525737e-06, "loss": 0.1264, "step": 24307 }, { "epoch": 79.6983606557377, "grad_norm": 1.8987269401550293, "learning_rate": 2.0848992809942537e-06, "loss": 0.1934, "step": 24308 }, { "epoch": 79.70163934426229, "grad_norm": 2.6660425662994385, "learning_rate": 2.084250346886926e-06, "loss": 0.0991, "step": 24309 }, { "epoch": 79.70491803278688, "grad_norm": 1.9320068359375, "learning_rate": 2.0836015020379018e-06, "loss": 0.0828, "step": 24310 }, { "epoch": 79.70819672131148, "grad_norm": 3.1888587474823, "learning_rate": 2.082952746454504e-06, "loss": 0.1147, "step": 24311 }, { "epoch": 79.71147540983607, "grad_norm": 2.7001657485961914, "learning_rate": 2.0823040801440464e-06, "loss": 0.1096, "step": 24312 }, { "epoch": 79.71475409836066, "grad_norm": 2.0617125034332275, "learning_rate": 2.081655503113843e-06, "loss": 0.0513, "step": 24313 }, { "epoch": 79.71803278688525, "grad_norm": 2.8580758571624756, "learning_rate": 2.0810070153712035e-06, "loss": 0.2902, "step": 24314 }, { "epoch": 79.72131147540983, "grad_norm": 2.4553232192993164, "learning_rate": 2.080358616923447e-06, "loss": 0.0759, "step": 24315 }, { "epoch": 79.72459016393442, "grad_norm": 2.1615307331085205, "learning_rate": 2.0797103077778803e-06, "loss": 0.1299, "step": 24316 }, { "epoch": 79.72786885245901, "grad_norm": 2.05161714553833, "learning_rate": 2.0790620879418133e-06, "loss": 0.0718, "step": 24317 }, { "epoch": 79.73114754098361, "grad_norm": 2.6388776302337646, "learning_rate": 2.0784139574225593e-06, "loss": 0.0472, "step": 24318 }, { "epoch": 79.7344262295082, "grad_norm": 1.9996812343597412, "learning_rate": 2.0777659162274244e-06, "loss": 0.1101, "step": 24319 }, { "epoch": 79.73770491803279, "grad_norm": 2.6982433795928955, "learning_rate": 2.077117964363713e-06, "loss": 0.0603, "step": 24320 }, { "epoch": 79.74098360655738, "grad_norm": 2.7874982357025146, "learning_rate": 2.076470101838737e-06, "loss": 0.0945, "step": 24321 }, { "epoch": 79.74426229508197, "grad_norm": 2.712820291519165, "learning_rate": 2.075822328659799e-06, "loss": 0.1086, "step": 24322 }, { "epoch": 79.74754098360656, "grad_norm": 2.7927486896514893, "learning_rate": 2.0751746448342004e-06, "loss": 0.1889, "step": 24323 }, { "epoch": 79.75081967213114, "grad_norm": 2.2560441493988037, "learning_rate": 2.0745270503692503e-06, "loss": 0.0675, "step": 24324 }, { "epoch": 79.75409836065573, "grad_norm": 2.5255730152130127, "learning_rate": 2.0738795452722482e-06, "loss": 0.1561, "step": 24325 }, { "epoch": 79.75737704918033, "grad_norm": 2.2604775428771973, "learning_rate": 2.0732321295504955e-06, "loss": 0.1157, "step": 24326 }, { "epoch": 79.76065573770492, "grad_norm": 2.9314980506896973, "learning_rate": 2.0725848032112893e-06, "loss": 0.1997, "step": 24327 }, { "epoch": 79.76393442622951, "grad_norm": 2.4066050052642822, "learning_rate": 2.0719375662619345e-06, "loss": 0.1052, "step": 24328 }, { "epoch": 79.7672131147541, "grad_norm": 2.6367673873901367, "learning_rate": 2.071290418709727e-06, "loss": 0.131, "step": 24329 }, { "epoch": 79.77049180327869, "grad_norm": 2.2216808795928955, "learning_rate": 2.0706433605619635e-06, "loss": 0.141, "step": 24330 }, { "epoch": 79.77377049180328, "grad_norm": 2.623192071914673, "learning_rate": 2.069996391825941e-06, "loss": 0.1475, "step": 24331 }, { "epoch": 79.77704918032786, "grad_norm": 3.002357244491577, "learning_rate": 2.0693495125089515e-06, "loss": 0.1149, "step": 24332 }, { "epoch": 79.78032786885245, "grad_norm": 1.6783649921417236, "learning_rate": 2.0687027226182944e-06, "loss": 0.0282, "step": 24333 }, { "epoch": 79.78360655737706, "grad_norm": 2.313398838043213, "learning_rate": 2.068056022161261e-06, "loss": 0.1353, "step": 24334 }, { "epoch": 79.78688524590164, "grad_norm": 2.728463649749756, "learning_rate": 2.0674094111451436e-06, "loss": 0.1458, "step": 24335 }, { "epoch": 79.79016393442623, "grad_norm": 3.0447752475738525, "learning_rate": 2.0667628895772295e-06, "loss": 0.1912, "step": 24336 }, { "epoch": 79.79344262295082, "grad_norm": 3.135317325592041, "learning_rate": 2.066116457464815e-06, "loss": 0.0727, "step": 24337 }, { "epoch": 79.79672131147541, "grad_norm": 1.7872893810272217, "learning_rate": 2.065470114815187e-06, "loss": 0.186, "step": 24338 }, { "epoch": 79.8, "grad_norm": 2.5320680141448975, "learning_rate": 2.064823861635633e-06, "loss": 0.1029, "step": 24339 }, { "epoch": 79.80327868852459, "grad_norm": 2.6941685676574707, "learning_rate": 2.064177697933437e-06, "loss": 0.1215, "step": 24340 }, { "epoch": 79.80655737704917, "grad_norm": 2.0736868381500244, "learning_rate": 2.063531623715893e-06, "loss": 0.0417, "step": 24341 }, { "epoch": 79.80983606557378, "grad_norm": 5.093869686126709, "learning_rate": 2.0628856389902806e-06, "loss": 0.2153, "step": 24342 }, { "epoch": 79.81311475409836, "grad_norm": 3.04056453704834, "learning_rate": 2.0622397437638854e-06, "loss": 0.203, "step": 24343 }, { "epoch": 79.81639344262295, "grad_norm": 2.1808300018310547, "learning_rate": 2.0615939380439908e-06, "loss": 0.0509, "step": 24344 }, { "epoch": 79.81967213114754, "grad_norm": 2.759850025177002, "learning_rate": 2.060948221837875e-06, "loss": 0.1511, "step": 24345 }, { "epoch": 79.82295081967213, "grad_norm": 2.6987569332122803, "learning_rate": 2.0603025951528257e-06, "loss": 0.0951, "step": 24346 }, { "epoch": 79.82622950819672, "grad_norm": 2.7720131874084473, "learning_rate": 2.0596570579961196e-06, "loss": 0.0862, "step": 24347 }, { "epoch": 79.8295081967213, "grad_norm": 2.055327892303467, "learning_rate": 2.0590116103750366e-06, "loss": 0.1247, "step": 24348 }, { "epoch": 79.8327868852459, "grad_norm": 2.301920175552368, "learning_rate": 2.0583662522968508e-06, "loss": 0.0754, "step": 24349 }, { "epoch": 79.8360655737705, "grad_norm": 1.7037569284439087, "learning_rate": 2.057720983768846e-06, "loss": 0.1169, "step": 24350 }, { "epoch": 79.83934426229509, "grad_norm": 2.6857621669769287, "learning_rate": 2.0570758047982943e-06, "loss": 0.1224, "step": 24351 }, { "epoch": 79.84262295081967, "grad_norm": 2.3716602325439453, "learning_rate": 2.0564307153924723e-06, "loss": 0.0884, "step": 24352 }, { "epoch": 79.84590163934426, "grad_norm": 2.7174479961395264, "learning_rate": 2.0557857155586502e-06, "loss": 0.1257, "step": 24353 }, { "epoch": 79.84918032786885, "grad_norm": 1.9955748319625854, "learning_rate": 2.0551408053041066e-06, "loss": 0.1, "step": 24354 }, { "epoch": 79.85245901639344, "grad_norm": 2.71626353263855, "learning_rate": 2.0544959846361114e-06, "loss": 0.061, "step": 24355 }, { "epoch": 79.85573770491803, "grad_norm": 2.3380136489868164, "learning_rate": 2.053851253561935e-06, "loss": 0.1312, "step": 24356 }, { "epoch": 79.85901639344263, "grad_norm": 2.5689873695373535, "learning_rate": 2.0532066120888473e-06, "loss": 0.1168, "step": 24357 }, { "epoch": 79.86229508196722, "grad_norm": 3.205213785171509, "learning_rate": 2.0525620602241157e-06, "loss": 0.1022, "step": 24358 }, { "epoch": 79.8655737704918, "grad_norm": 2.595858573913574, "learning_rate": 2.0519175979750116e-06, "loss": 0.0795, "step": 24359 }, { "epoch": 79.8688524590164, "grad_norm": 2.0272552967071533, "learning_rate": 2.051273225348802e-06, "loss": 0.1456, "step": 24360 }, { "epoch": 79.87213114754098, "grad_norm": 2.506054162979126, "learning_rate": 2.0506289423527503e-06, "loss": 0.1968, "step": 24361 }, { "epoch": 79.87540983606557, "grad_norm": 1.7343170642852783, "learning_rate": 2.0499847489941207e-06, "loss": 0.0409, "step": 24362 }, { "epoch": 79.87868852459016, "grad_norm": 3.0855422019958496, "learning_rate": 2.049340645280181e-06, "loss": 0.2318, "step": 24363 }, { "epoch": 79.88196721311475, "grad_norm": 2.589174270629883, "learning_rate": 2.048696631218192e-06, "loss": 0.2034, "step": 24364 }, { "epoch": 79.88524590163935, "grad_norm": 2.5575778484344482, "learning_rate": 2.0480527068154167e-06, "loss": 0.2519, "step": 24365 }, { "epoch": 79.88852459016394, "grad_norm": 2.186823606491089, "learning_rate": 2.0474088720791117e-06, "loss": 0.2539, "step": 24366 }, { "epoch": 79.89180327868853, "grad_norm": 2.365656852722168, "learning_rate": 2.0467651270165433e-06, "loss": 0.0666, "step": 24367 }, { "epoch": 79.89508196721312, "grad_norm": 2.8535797595977783, "learning_rate": 2.0461214716349675e-06, "loss": 0.2729, "step": 24368 }, { "epoch": 79.8983606557377, "grad_norm": 1.6822773218154907, "learning_rate": 2.045477905941642e-06, "loss": 0.0791, "step": 24369 }, { "epoch": 79.90163934426229, "grad_norm": 2.2914788722991943, "learning_rate": 2.0448344299438206e-06, "loss": 0.1169, "step": 24370 }, { "epoch": 79.90491803278688, "grad_norm": 2.2481722831726074, "learning_rate": 2.0441910436487646e-06, "loss": 0.1129, "step": 24371 }, { "epoch": 79.90819672131147, "grad_norm": 2.6342360973358154, "learning_rate": 2.043547747063728e-06, "loss": 0.1641, "step": 24372 }, { "epoch": 79.91147540983607, "grad_norm": 3.458142042160034, "learning_rate": 2.042904540195959e-06, "loss": 0.1303, "step": 24373 }, { "epoch": 79.91475409836066, "grad_norm": 2.861983299255371, "learning_rate": 2.0422614230527183e-06, "loss": 0.1818, "step": 24374 }, { "epoch": 79.91803278688525, "grad_norm": 2.8495047092437744, "learning_rate": 2.041618395641254e-06, "loss": 0.1551, "step": 24375 }, { "epoch": 79.92131147540984, "grad_norm": 3.080474376678467, "learning_rate": 2.0409754579688137e-06, "loss": 0.1387, "step": 24376 }, { "epoch": 79.92459016393443, "grad_norm": 1.536850929260254, "learning_rate": 2.0403326100426533e-06, "loss": 0.0903, "step": 24377 }, { "epoch": 79.92786885245901, "grad_norm": 2.3803422451019287, "learning_rate": 2.0396898518700183e-06, "loss": 0.097, "step": 24378 }, { "epoch": 79.9311475409836, "grad_norm": 2.610060453414917, "learning_rate": 2.039047183458155e-06, "loss": 0.1704, "step": 24379 }, { "epoch": 79.93442622950819, "grad_norm": 2.731524705886841, "learning_rate": 2.0384046048143146e-06, "loss": 0.1888, "step": 24380 }, { "epoch": 79.9377049180328, "grad_norm": 2.6955997943878174, "learning_rate": 2.0377621159457395e-06, "loss": 0.0467, "step": 24381 }, { "epoch": 79.94098360655738, "grad_norm": 3.3338754177093506, "learning_rate": 2.0371197168596757e-06, "loss": 0.1814, "step": 24382 }, { "epoch": 79.94426229508197, "grad_norm": 8.866854667663574, "learning_rate": 2.0364774075633665e-06, "loss": 0.1233, "step": 24383 }, { "epoch": 79.94754098360656, "grad_norm": 2.839071035385132, "learning_rate": 2.0358351880640516e-06, "loss": 0.08, "step": 24384 }, { "epoch": 79.95081967213115, "grad_norm": 4.525280952453613, "learning_rate": 2.035193058368978e-06, "loss": 0.1508, "step": 24385 }, { "epoch": 79.95409836065573, "grad_norm": 2.766540050506592, "learning_rate": 2.0345510184853846e-06, "loss": 0.1162, "step": 24386 }, { "epoch": 79.95737704918032, "grad_norm": 2.4841253757476807, "learning_rate": 2.0339090684205108e-06, "loss": 0.1304, "step": 24387 }, { "epoch": 79.96065573770491, "grad_norm": 3.4658799171447754, "learning_rate": 2.0332672081815917e-06, "loss": 0.3136, "step": 24388 }, { "epoch": 79.96393442622951, "grad_norm": 2.8013041019439697, "learning_rate": 2.0326254377758704e-06, "loss": 0.1503, "step": 24389 }, { "epoch": 79.9672131147541, "grad_norm": 2.3712410926818848, "learning_rate": 2.0319837572105817e-06, "loss": 0.2161, "step": 24390 }, { "epoch": 79.97049180327869, "grad_norm": 1.671574592590332, "learning_rate": 2.031342166492961e-06, "loss": 0.1065, "step": 24391 }, { "epoch": 79.97377049180328, "grad_norm": 1.9919546842575073, "learning_rate": 2.0307006656302396e-06, "loss": 0.1214, "step": 24392 }, { "epoch": 79.97704918032787, "grad_norm": 2.168124198913574, "learning_rate": 2.0300592546296572e-06, "loss": 0.2296, "step": 24393 }, { "epoch": 79.98032786885246, "grad_norm": 2.358072519302368, "learning_rate": 2.029417933498443e-06, "loss": 0.0896, "step": 24394 }, { "epoch": 79.98360655737704, "grad_norm": 2.6439764499664307, "learning_rate": 2.02877670224383e-06, "loss": 0.0624, "step": 24395 }, { "epoch": 79.98688524590163, "grad_norm": 2.290701389312744, "learning_rate": 2.028135560873047e-06, "loss": 0.1205, "step": 24396 }, { "epoch": 79.99016393442623, "grad_norm": 2.6331706047058105, "learning_rate": 2.0274945093933205e-06, "loss": 0.1264, "step": 24397 }, { "epoch": 79.99344262295082, "grad_norm": 4.781612873077393, "learning_rate": 2.0268535478118868e-06, "loss": 0.0949, "step": 24398 }, { "epoch": 79.99672131147541, "grad_norm": 1.462594747543335, "learning_rate": 2.026212676135969e-06, "loss": 0.0698, "step": 24399 }, { "epoch": 80.0, "grad_norm": 2.7310879230499268, "learning_rate": 2.025571894372794e-06, "loss": 0.096, "step": 24400 }, { "epoch": 80.00327868852459, "grad_norm": 2.882734775543213, "learning_rate": 2.0249312025295842e-06, "loss": 0.1591, "step": 24401 }, { "epoch": 80.00655737704918, "grad_norm": 2.5843751430511475, "learning_rate": 2.02429060061357e-06, "loss": 0.1626, "step": 24402 }, { "epoch": 80.00983606557377, "grad_norm": 2.742837429046631, "learning_rate": 2.023650088631972e-06, "loss": 0.0767, "step": 24403 }, { "epoch": 80.01311475409837, "grad_norm": 2.0576131343841553, "learning_rate": 2.0230096665920117e-06, "loss": 0.1648, "step": 24404 }, { "epoch": 80.01639344262296, "grad_norm": 2.0688042640686035, "learning_rate": 2.0223693345009097e-06, "loss": 0.0534, "step": 24405 }, { "epoch": 80.01967213114754, "grad_norm": 1.852074384689331, "learning_rate": 2.0217290923658904e-06, "loss": 0.0536, "step": 24406 }, { "epoch": 80.02295081967213, "grad_norm": 2.1164677143096924, "learning_rate": 2.0210889401941714e-06, "loss": 0.0698, "step": 24407 }, { "epoch": 80.02622950819672, "grad_norm": 2.531789541244507, "learning_rate": 2.02044887799297e-06, "loss": 0.0674, "step": 24408 }, { "epoch": 80.02950819672131, "grad_norm": 3.5849504470825195, "learning_rate": 2.0198089057695046e-06, "loss": 0.1742, "step": 24409 }, { "epoch": 80.0327868852459, "grad_norm": 2.6146738529205322, "learning_rate": 2.019169023530988e-06, "loss": 0.0825, "step": 24410 }, { "epoch": 80.03606557377049, "grad_norm": 2.7191967964172363, "learning_rate": 2.0185292312846417e-06, "loss": 0.2243, "step": 24411 }, { "epoch": 80.03934426229509, "grad_norm": 2.1982550621032715, "learning_rate": 2.0178895290376767e-06, "loss": 0.0424, "step": 24412 }, { "epoch": 80.04262295081968, "grad_norm": 2.76072359085083, "learning_rate": 2.0172499167973068e-06, "loss": 0.0884, "step": 24413 }, { "epoch": 80.04590163934427, "grad_norm": 2.2919375896453857, "learning_rate": 2.0166103945707415e-06, "loss": 0.2068, "step": 24414 }, { "epoch": 80.04918032786885, "grad_norm": 1.8036391735076904, "learning_rate": 2.0159709623651967e-06, "loss": 0.0488, "step": 24415 }, { "epoch": 80.05245901639344, "grad_norm": 2.883948564529419, "learning_rate": 2.0153316201878816e-06, "loss": 0.1696, "step": 24416 }, { "epoch": 80.05573770491803, "grad_norm": 2.88038969039917, "learning_rate": 2.014692368046003e-06, "loss": 0.1537, "step": 24417 }, { "epoch": 80.05901639344262, "grad_norm": 2.070063591003418, "learning_rate": 2.014053205946769e-06, "loss": 0.0532, "step": 24418 }, { "epoch": 80.0622950819672, "grad_norm": 2.7261083126068115, "learning_rate": 2.013414133897391e-06, "loss": 0.1216, "step": 24419 }, { "epoch": 80.06557377049181, "grad_norm": 2.907719373703003, "learning_rate": 2.012775151905072e-06, "loss": 0.2586, "step": 24420 }, { "epoch": 80.0688524590164, "grad_norm": 2.8094775676727295, "learning_rate": 2.0121362599770187e-06, "loss": 0.1512, "step": 24421 }, { "epoch": 80.07213114754099, "grad_norm": 2.8513357639312744, "learning_rate": 2.0114974581204303e-06, "loss": 0.172, "step": 24422 }, { "epoch": 80.07540983606557, "grad_norm": 7.584289073944092, "learning_rate": 2.0108587463425187e-06, "loss": 0.149, "step": 24423 }, { "epoch": 80.07868852459016, "grad_norm": 2.2645411491394043, "learning_rate": 2.0102201246504806e-06, "loss": 0.0617, "step": 24424 }, { "epoch": 80.08196721311475, "grad_norm": 2.639848470687866, "learning_rate": 2.009581593051514e-06, "loss": 0.1717, "step": 24425 }, { "epoch": 80.08524590163934, "grad_norm": 2.5570218563079834, "learning_rate": 2.008943151552827e-06, "loss": 0.1916, "step": 24426 }, { "epoch": 80.08852459016393, "grad_norm": 2.1817233562469482, "learning_rate": 2.0083048001616134e-06, "loss": 0.1439, "step": 24427 }, { "epoch": 80.09180327868853, "grad_norm": 2.3256707191467285, "learning_rate": 2.0076665388850734e-06, "loss": 0.0322, "step": 24428 }, { "epoch": 80.09508196721312, "grad_norm": 2.0871617794036865, "learning_rate": 2.0070283677304004e-06, "loss": 0.0654, "step": 24429 }, { "epoch": 80.09836065573771, "grad_norm": 2.816941499710083, "learning_rate": 2.006390286704796e-06, "loss": 0.0678, "step": 24430 }, { "epoch": 80.1016393442623, "grad_norm": 2.85770845413208, "learning_rate": 2.005752295815452e-06, "loss": 0.074, "step": 24431 }, { "epoch": 80.10491803278688, "grad_norm": 2.9054486751556396, "learning_rate": 2.0051143950695595e-06, "loss": 0.1349, "step": 24432 }, { "epoch": 80.10819672131147, "grad_norm": 2.579493999481201, "learning_rate": 2.0044765844743175e-06, "loss": 0.2306, "step": 24433 }, { "epoch": 80.11147540983606, "grad_norm": 2.3466527462005615, "learning_rate": 2.003838864036917e-06, "loss": 0.1086, "step": 24434 }, { "epoch": 80.11475409836065, "grad_norm": 2.8095436096191406, "learning_rate": 2.0032012337645458e-06, "loss": 0.1994, "step": 24435 }, { "epoch": 80.11803278688525, "grad_norm": 2.2972848415374756, "learning_rate": 2.0025636936643923e-06, "loss": 0.1069, "step": 24436 }, { "epoch": 80.12131147540984, "grad_norm": 2.3786165714263916, "learning_rate": 2.0019262437436516e-06, "loss": 0.116, "step": 24437 }, { "epoch": 80.12459016393443, "grad_norm": 1.605781078338623, "learning_rate": 2.001288884009509e-06, "loss": 0.0152, "step": 24438 }, { "epoch": 80.12786885245902, "grad_norm": 2.7239627838134766, "learning_rate": 2.00065161446915e-06, "loss": 0.2507, "step": 24439 }, { "epoch": 80.1311475409836, "grad_norm": 2.683753490447998, "learning_rate": 2.000014435129759e-06, "loss": 0.0733, "step": 24440 }, { "epoch": 80.1344262295082, "grad_norm": 2.686152696609497, "learning_rate": 1.9993773459985254e-06, "loss": 0.0511, "step": 24441 }, { "epoch": 80.13770491803278, "grad_norm": 2.0213937759399414, "learning_rate": 1.9987403470826306e-06, "loss": 0.0751, "step": 24442 }, { "epoch": 80.14098360655737, "grad_norm": 2.401531934738159, "learning_rate": 1.998103438389258e-06, "loss": 0.1218, "step": 24443 }, { "epoch": 80.14426229508197, "grad_norm": 3.3472487926483154, "learning_rate": 1.9974666199255864e-06, "loss": 0.0918, "step": 24444 }, { "epoch": 80.14754098360656, "grad_norm": 3.1057088375091553, "learning_rate": 1.996829891698803e-06, "loss": 0.1751, "step": 24445 }, { "epoch": 80.15081967213115, "grad_norm": 1.8396167755126953, "learning_rate": 1.996193253716082e-06, "loss": 0.0551, "step": 24446 }, { "epoch": 80.15409836065574, "grad_norm": 2.2852225303649902, "learning_rate": 1.9955567059846046e-06, "loss": 0.1008, "step": 24447 }, { "epoch": 80.15737704918033, "grad_norm": 1.95900297164917, "learning_rate": 1.994920248511548e-06, "loss": 0.093, "step": 24448 }, { "epoch": 80.16065573770491, "grad_norm": 2.814753532409668, "learning_rate": 1.9942838813040857e-06, "loss": 0.1773, "step": 24449 }, { "epoch": 80.1639344262295, "grad_norm": 2.1418309211730957, "learning_rate": 1.9936476043693997e-06, "loss": 0.0503, "step": 24450 }, { "epoch": 80.1672131147541, "grad_norm": 2.7247438430786133, "learning_rate": 1.993011417714661e-06, "loss": 0.1677, "step": 24451 }, { "epoch": 80.1704918032787, "grad_norm": 2.3552401065826416, "learning_rate": 1.992375321347044e-06, "loss": 0.051, "step": 24452 }, { "epoch": 80.17377049180328, "grad_norm": 2.364596128463745, "learning_rate": 1.9917393152737186e-06, "loss": 0.1817, "step": 24453 }, { "epoch": 80.17704918032787, "grad_norm": 1.8343759775161743, "learning_rate": 1.9911033995018615e-06, "loss": 0.0385, "step": 24454 }, { "epoch": 80.18032786885246, "grad_norm": 2.44130802154541, "learning_rate": 1.9904675740386405e-06, "loss": 0.1323, "step": 24455 }, { "epoch": 80.18360655737705, "grad_norm": 1.9608794450759888, "learning_rate": 1.9898318388912265e-06, "loss": 0.0532, "step": 24456 }, { "epoch": 80.18688524590164, "grad_norm": 2.950237274169922, "learning_rate": 1.9891961940667825e-06, "loss": 0.1277, "step": 24457 }, { "epoch": 80.19016393442622, "grad_norm": 2.1538238525390625, "learning_rate": 1.9885606395724845e-06, "loss": 0.0936, "step": 24458 }, { "epoch": 80.19344262295083, "grad_norm": 2.0254719257354736, "learning_rate": 1.987925175415495e-06, "loss": 0.1712, "step": 24459 }, { "epoch": 80.19672131147541, "grad_norm": 2.201679229736328, "learning_rate": 1.9872898016029796e-06, "loss": 0.1064, "step": 24460 }, { "epoch": 80.2, "grad_norm": 2.4025683403015137, "learning_rate": 1.9866545181421016e-06, "loss": 0.0832, "step": 24461 }, { "epoch": 80.20327868852459, "grad_norm": 2.7677001953125, "learning_rate": 1.986019325040024e-06, "loss": 0.1215, "step": 24462 }, { "epoch": 80.20655737704918, "grad_norm": 1.8899790048599243, "learning_rate": 1.9853842223039144e-06, "loss": 0.1137, "step": 24463 }, { "epoch": 80.20983606557377, "grad_norm": 2.525655746459961, "learning_rate": 1.9847492099409294e-06, "loss": 0.2135, "step": 24464 }, { "epoch": 80.21311475409836, "grad_norm": 2.080254077911377, "learning_rate": 1.984114287958232e-06, "loss": 0.2141, "step": 24465 }, { "epoch": 80.21639344262294, "grad_norm": 2.9469733238220215, "learning_rate": 1.9834794563629767e-06, "loss": 0.1428, "step": 24466 }, { "epoch": 80.21967213114755, "grad_norm": 2.0285134315490723, "learning_rate": 1.9828447151623288e-06, "loss": 0.0691, "step": 24467 }, { "epoch": 80.22295081967214, "grad_norm": 2.0093064308166504, "learning_rate": 1.9822100643634436e-06, "loss": 0.1225, "step": 24468 }, { "epoch": 80.22622950819672, "grad_norm": 2.471738815307617, "learning_rate": 1.981575503973474e-06, "loss": 0.0932, "step": 24469 }, { "epoch": 80.22950819672131, "grad_norm": 1.7845773696899414, "learning_rate": 1.9809410339995773e-06, "loss": 0.0559, "step": 24470 }, { "epoch": 80.2327868852459, "grad_norm": 3.0502288341522217, "learning_rate": 1.980306654448909e-06, "loss": 0.0933, "step": 24471 }, { "epoch": 80.23606557377049, "grad_norm": 2.676166534423828, "learning_rate": 1.9796723653286233e-06, "loss": 0.2254, "step": 24472 }, { "epoch": 80.23934426229508, "grad_norm": 2.2110042572021484, "learning_rate": 1.979038166645869e-06, "loss": 0.1296, "step": 24473 }, { "epoch": 80.24262295081967, "grad_norm": 2.345182418823242, "learning_rate": 1.9784040584078003e-06, "loss": 0.1114, "step": 24474 }, { "epoch": 80.24590163934427, "grad_norm": 2.8174521923065186, "learning_rate": 1.9777700406215626e-06, "loss": 0.0879, "step": 24475 }, { "epoch": 80.24918032786886, "grad_norm": 3.3132123947143555, "learning_rate": 1.9771361132943123e-06, "loss": 0.2237, "step": 24476 }, { "epoch": 80.25245901639344, "grad_norm": 2.2036941051483154, "learning_rate": 1.9765022764331932e-06, "loss": 0.0839, "step": 24477 }, { "epoch": 80.25573770491803, "grad_norm": 2.347074508666992, "learning_rate": 1.975868530045352e-06, "loss": 0.1389, "step": 24478 }, { "epoch": 80.25901639344262, "grad_norm": 3.3625526428222656, "learning_rate": 1.9752348741379366e-06, "loss": 0.0804, "step": 24479 }, { "epoch": 80.26229508196721, "grad_norm": 3.1356918811798096, "learning_rate": 1.9746013087180936e-06, "loss": 0.2193, "step": 24480 }, { "epoch": 80.2655737704918, "grad_norm": 2.644516706466675, "learning_rate": 1.9739678337929615e-06, "loss": 0.2131, "step": 24481 }, { "epoch": 80.26885245901639, "grad_norm": 2.3588943481445312, "learning_rate": 1.9733344493696902e-06, "loss": 0.0787, "step": 24482 }, { "epoch": 80.27213114754099, "grad_norm": 2.528799295425415, "learning_rate": 1.9727011554554177e-06, "loss": 0.1222, "step": 24483 }, { "epoch": 80.27540983606558, "grad_norm": 2.709683895111084, "learning_rate": 1.9720679520572848e-06, "loss": 0.1155, "step": 24484 }, { "epoch": 80.27868852459017, "grad_norm": 3.016902446746826, "learning_rate": 1.9714348391824345e-06, "loss": 0.1466, "step": 24485 }, { "epoch": 80.28196721311475, "grad_norm": 2.642364025115967, "learning_rate": 1.970801816838004e-06, "loss": 0.0735, "step": 24486 }, { "epoch": 80.28524590163934, "grad_norm": 2.7495055198669434, "learning_rate": 1.970168885031131e-06, "loss": 0.2708, "step": 24487 }, { "epoch": 80.28852459016393, "grad_norm": 2.688633680343628, "learning_rate": 1.9695360437689504e-06, "loss": 0.0949, "step": 24488 }, { "epoch": 80.29180327868852, "grad_norm": 3.171522855758667, "learning_rate": 1.968903293058604e-06, "loss": 0.1853, "step": 24489 }, { "epoch": 80.29508196721312, "grad_norm": 1.993018627166748, "learning_rate": 1.968270632907222e-06, "loss": 0.0841, "step": 24490 }, { "epoch": 80.29836065573771, "grad_norm": 1.772063970565796, "learning_rate": 1.9676380633219396e-06, "loss": 0.1579, "step": 24491 }, { "epoch": 80.3016393442623, "grad_norm": 1.8396704196929932, "learning_rate": 1.9670055843098877e-06, "loss": 0.059, "step": 24492 }, { "epoch": 80.30491803278689, "grad_norm": 6.791411399841309, "learning_rate": 1.966373195878202e-06, "loss": 0.0559, "step": 24493 }, { "epoch": 80.30819672131148, "grad_norm": 2.2845191955566406, "learning_rate": 1.965740898034012e-06, "loss": 0.076, "step": 24494 }, { "epoch": 80.31147540983606, "grad_norm": 3.9990220069885254, "learning_rate": 1.965108690784446e-06, "loss": 0.0912, "step": 24495 }, { "epoch": 80.31475409836065, "grad_norm": 3.2460877895355225, "learning_rate": 1.9644765741366323e-06, "loss": 0.0656, "step": 24496 }, { "epoch": 80.31803278688524, "grad_norm": 2.1138174533843994, "learning_rate": 1.963844548097702e-06, "loss": 0.1687, "step": 24497 }, { "epoch": 80.32131147540984, "grad_norm": 2.0521299839019775, "learning_rate": 1.9632126126747796e-06, "loss": 0.1176, "step": 24498 }, { "epoch": 80.32459016393443, "grad_norm": 2.65277099609375, "learning_rate": 1.962580767874992e-06, "loss": 0.1374, "step": 24499 }, { "epoch": 80.32786885245902, "grad_norm": 2.120236396789551, "learning_rate": 1.9619490137054633e-06, "loss": 0.0442, "step": 24500 }, { "epoch": 80.33114754098361, "grad_norm": 1.6323715448379517, "learning_rate": 1.961317350173313e-06, "loss": 0.035, "step": 24501 }, { "epoch": 80.3344262295082, "grad_norm": 2.030468463897705, "learning_rate": 1.9606857772856713e-06, "loss": 0.1987, "step": 24502 }, { "epoch": 80.33770491803278, "grad_norm": 2.6399004459381104, "learning_rate": 1.960054295049656e-06, "loss": 0.0847, "step": 24503 }, { "epoch": 80.34098360655737, "grad_norm": 2.906876564025879, "learning_rate": 1.9594229034723877e-06, "loss": 0.0883, "step": 24504 }, { "epoch": 80.34426229508196, "grad_norm": 2.6038575172424316, "learning_rate": 1.958791602560983e-06, "loss": 0.0532, "step": 24505 }, { "epoch": 80.34754098360656, "grad_norm": 2.6472744941711426, "learning_rate": 1.958160392322568e-06, "loss": 0.0747, "step": 24506 }, { "epoch": 80.35081967213115, "grad_norm": 2.5822930335998535, "learning_rate": 1.9575292727642547e-06, "loss": 0.1995, "step": 24507 }, { "epoch": 80.35409836065574, "grad_norm": 2.182635545730591, "learning_rate": 1.9568982438931614e-06, "loss": 0.1023, "step": 24508 }, { "epoch": 80.35737704918033, "grad_norm": 2.363743305206299, "learning_rate": 1.9562673057164007e-06, "loss": 0.0705, "step": 24509 }, { "epoch": 80.36065573770492, "grad_norm": 1.847452998161316, "learning_rate": 1.9556364582410925e-06, "loss": 0.0612, "step": 24510 }, { "epoch": 80.3639344262295, "grad_norm": 2.0560343265533447, "learning_rate": 1.9550057014743462e-06, "loss": 0.1325, "step": 24511 }, { "epoch": 80.3672131147541, "grad_norm": 3.1064465045928955, "learning_rate": 1.954375035423276e-06, "loss": 0.0893, "step": 24512 }, { "epoch": 80.37049180327868, "grad_norm": 2.1633853912353516, "learning_rate": 1.953744460094993e-06, "loss": 0.1379, "step": 24513 }, { "epoch": 80.37377049180328, "grad_norm": 1.7866100072860718, "learning_rate": 1.953113975496603e-06, "loss": 0.0514, "step": 24514 }, { "epoch": 80.37704918032787, "grad_norm": 1.9894765615463257, "learning_rate": 1.952483581635224e-06, "loss": 0.1247, "step": 24515 }, { "epoch": 80.38032786885246, "grad_norm": 2.1938681602478027, "learning_rate": 1.951853278517959e-06, "loss": 0.0693, "step": 24516 }, { "epoch": 80.38360655737705, "grad_norm": 2.1997435092926025, "learning_rate": 1.951223066151917e-06, "loss": 0.066, "step": 24517 }, { "epoch": 80.38688524590164, "grad_norm": 2.0254170894622803, "learning_rate": 1.9505929445442007e-06, "loss": 0.0359, "step": 24518 }, { "epoch": 80.39016393442623, "grad_norm": 2.1135125160217285, "learning_rate": 1.9499629137019205e-06, "loss": 0.1567, "step": 24519 }, { "epoch": 80.39344262295081, "grad_norm": 2.407846212387085, "learning_rate": 1.9493329736321786e-06, "loss": 0.1521, "step": 24520 }, { "epoch": 80.3967213114754, "grad_norm": 2.0952882766723633, "learning_rate": 1.948703124342077e-06, "loss": 0.0792, "step": 24521 }, { "epoch": 80.4, "grad_norm": 2.741981267929077, "learning_rate": 1.9480733658387175e-06, "loss": 0.1136, "step": 24522 }, { "epoch": 80.4032786885246, "grad_norm": 1.4149264097213745, "learning_rate": 1.9474436981292057e-06, "loss": 0.0441, "step": 24523 }, { "epoch": 80.40655737704918, "grad_norm": 2.3318068981170654, "learning_rate": 1.946814121220637e-06, "loss": 0.1139, "step": 24524 }, { "epoch": 80.40983606557377, "grad_norm": 2.705209732055664, "learning_rate": 1.9461846351201143e-06, "loss": 0.1342, "step": 24525 }, { "epoch": 80.41311475409836, "grad_norm": 2.0571630001068115, "learning_rate": 1.9455552398347323e-06, "loss": 0.1542, "step": 24526 }, { "epoch": 80.41639344262295, "grad_norm": 2.1472694873809814, "learning_rate": 1.944925935371588e-06, "loss": 0.1068, "step": 24527 }, { "epoch": 80.41967213114754, "grad_norm": 2.4533650875091553, "learning_rate": 1.9442967217377805e-06, "loss": 0.1646, "step": 24528 }, { "epoch": 80.42295081967212, "grad_norm": 2.9273672103881836, "learning_rate": 1.943667598940404e-06, "loss": 0.1142, "step": 24529 }, { "epoch": 80.42622950819673, "grad_norm": 3.487478494644165, "learning_rate": 1.9430385669865513e-06, "loss": 0.0814, "step": 24530 }, { "epoch": 80.42950819672132, "grad_norm": 2.4149763584136963, "learning_rate": 1.942409625883314e-06, "loss": 0.1074, "step": 24531 }, { "epoch": 80.4327868852459, "grad_norm": 3.2609493732452393, "learning_rate": 1.941780775637787e-06, "loss": 0.0671, "step": 24532 }, { "epoch": 80.43606557377049, "grad_norm": 2.2198150157928467, "learning_rate": 1.941152016257062e-06, "loss": 0.0698, "step": 24533 }, { "epoch": 80.43934426229508, "grad_norm": 3.3559114933013916, "learning_rate": 1.940523347748223e-06, "loss": 0.0715, "step": 24534 }, { "epoch": 80.44262295081967, "grad_norm": 3.295330286026001, "learning_rate": 1.9398947701183666e-06, "loss": 0.0756, "step": 24535 }, { "epoch": 80.44590163934426, "grad_norm": 2.418546676635742, "learning_rate": 1.939266283374578e-06, "loss": 0.0952, "step": 24536 }, { "epoch": 80.44918032786886, "grad_norm": 3.6243045330047607, "learning_rate": 1.938637887523939e-06, "loss": 0.2489, "step": 24537 }, { "epoch": 80.45245901639345, "grad_norm": 1.9614673852920532, "learning_rate": 1.9380095825735423e-06, "loss": 0.0523, "step": 24538 }, { "epoch": 80.45573770491804, "grad_norm": 1.8687942028045654, "learning_rate": 1.9373813685304697e-06, "loss": 0.085, "step": 24539 }, { "epoch": 80.45901639344262, "grad_norm": 2.968031644821167, "learning_rate": 1.9367532454018036e-06, "loss": 0.1049, "step": 24540 }, { "epoch": 80.46229508196721, "grad_norm": 3.252424716949463, "learning_rate": 1.9361252131946307e-06, "loss": 0.1357, "step": 24541 }, { "epoch": 80.4655737704918, "grad_norm": 2.378704071044922, "learning_rate": 1.9354972719160304e-06, "loss": 0.1721, "step": 24542 }, { "epoch": 80.46885245901639, "grad_norm": 3.4160635471343994, "learning_rate": 1.9348694215730824e-06, "loss": 0.1772, "step": 24543 }, { "epoch": 80.47213114754098, "grad_norm": 3.0709707736968994, "learning_rate": 1.9342416621728656e-06, "loss": 0.1442, "step": 24544 }, { "epoch": 80.47540983606558, "grad_norm": 2.5044567584991455, "learning_rate": 1.9336139937224618e-06, "loss": 0.1082, "step": 24545 }, { "epoch": 80.47868852459017, "grad_norm": 2.774503231048584, "learning_rate": 1.932986416228949e-06, "loss": 0.1285, "step": 24546 }, { "epoch": 80.48196721311476, "grad_norm": 2.3007640838623047, "learning_rate": 1.9323589296994005e-06, "loss": 0.0795, "step": 24547 }, { "epoch": 80.48524590163935, "grad_norm": 1.6989142894744873, "learning_rate": 1.9317315341408915e-06, "loss": 0.0392, "step": 24548 }, { "epoch": 80.48852459016393, "grad_norm": 2.2684614658355713, "learning_rate": 1.9311042295605e-06, "loss": 0.0834, "step": 24549 }, { "epoch": 80.49180327868852, "grad_norm": 2.593343734741211, "learning_rate": 1.9304770159652984e-06, "loss": 0.2679, "step": 24550 }, { "epoch": 80.49508196721311, "grad_norm": 2.444748878479004, "learning_rate": 1.9298498933623588e-06, "loss": 0.0884, "step": 24551 }, { "epoch": 80.4983606557377, "grad_norm": 2.695887804031372, "learning_rate": 1.9292228617587525e-06, "loss": 0.129, "step": 24552 }, { "epoch": 80.5016393442623, "grad_norm": 2.4040307998657227, "learning_rate": 1.928595921161548e-06, "loss": 0.0879, "step": 24553 }, { "epoch": 80.50491803278689, "grad_norm": 2.2770333290100098, "learning_rate": 1.9279690715778176e-06, "loss": 0.1158, "step": 24554 }, { "epoch": 80.50819672131148, "grad_norm": 5.0805134773254395, "learning_rate": 1.9273423130146298e-06, "loss": 0.1468, "step": 24555 }, { "epoch": 80.51147540983607, "grad_norm": 2.561062812805176, "learning_rate": 1.9267156454790514e-06, "loss": 0.0937, "step": 24556 }, { "epoch": 80.51475409836065, "grad_norm": 1.4093670845031738, "learning_rate": 1.926089068978144e-06, "loss": 0.0165, "step": 24557 }, { "epoch": 80.51803278688524, "grad_norm": 2.694366216659546, "learning_rate": 1.9254625835189813e-06, "loss": 0.1114, "step": 24558 }, { "epoch": 80.52131147540983, "grad_norm": 2.5066752433776855, "learning_rate": 1.924836189108622e-06, "loss": 0.2016, "step": 24559 }, { "epoch": 80.52459016393442, "grad_norm": 2.29478120803833, "learning_rate": 1.9242098857541315e-06, "loss": 0.1715, "step": 24560 }, { "epoch": 80.52786885245902, "grad_norm": 2.21698260307312, "learning_rate": 1.923583673462569e-06, "loss": 0.0578, "step": 24561 }, { "epoch": 80.53114754098361, "grad_norm": 2.2405831813812256, "learning_rate": 1.9229575522410006e-06, "loss": 0.0636, "step": 24562 }, { "epoch": 80.5344262295082, "grad_norm": 3.3365464210510254, "learning_rate": 1.9223315220964834e-06, "loss": 0.107, "step": 24563 }, { "epoch": 80.53770491803279, "grad_norm": 8.861113548278809, "learning_rate": 1.9217055830360766e-06, "loss": 0.0739, "step": 24564 }, { "epoch": 80.54098360655738, "grad_norm": 2.361833333969116, "learning_rate": 1.9210797350668385e-06, "loss": 0.0726, "step": 24565 }, { "epoch": 80.54426229508196, "grad_norm": 3.1698200702667236, "learning_rate": 1.920453978195824e-06, "loss": 0.2817, "step": 24566 }, { "epoch": 80.54754098360655, "grad_norm": 3.0972812175750732, "learning_rate": 1.9198283124300954e-06, "loss": 0.1086, "step": 24567 }, { "epoch": 80.55081967213114, "grad_norm": 2.377560615539551, "learning_rate": 1.919202737776702e-06, "loss": 0.0379, "step": 24568 }, { "epoch": 80.55409836065574, "grad_norm": 1.9826399087905884, "learning_rate": 1.9185772542427008e-06, "loss": 0.0758, "step": 24569 }, { "epoch": 80.55737704918033, "grad_norm": 6.353381156921387, "learning_rate": 1.9179518618351413e-06, "loss": 0.2108, "step": 24570 }, { "epoch": 80.56065573770492, "grad_norm": 2.897803783416748, "learning_rate": 1.9173265605610793e-06, "loss": 0.2537, "step": 24571 }, { "epoch": 80.56393442622951, "grad_norm": 2.6821184158325195, "learning_rate": 1.9167013504275643e-06, "loss": 0.1408, "step": 24572 }, { "epoch": 80.5672131147541, "grad_norm": 2.7570016384124756, "learning_rate": 1.916076231441647e-06, "loss": 0.2376, "step": 24573 }, { "epoch": 80.57049180327868, "grad_norm": 3.132359266281128, "learning_rate": 1.915451203610372e-06, "loss": 0.0966, "step": 24574 }, { "epoch": 80.57377049180327, "grad_norm": 2.8952338695526123, "learning_rate": 1.9148262669407936e-06, "loss": 0.0569, "step": 24575 }, { "epoch": 80.57704918032788, "grad_norm": 2.6118338108062744, "learning_rate": 1.914201421439955e-06, "loss": 0.0939, "step": 24576 }, { "epoch": 80.58032786885246, "grad_norm": 1.5520999431610107, "learning_rate": 1.9135766671149025e-06, "loss": 0.092, "step": 24577 }, { "epoch": 80.58360655737705, "grad_norm": 2.5025599002838135, "learning_rate": 1.912952003972681e-06, "loss": 0.0809, "step": 24578 }, { "epoch": 80.58688524590164, "grad_norm": 2.029402494430542, "learning_rate": 1.912327432020332e-06, "loss": 0.2131, "step": 24579 }, { "epoch": 80.59016393442623, "grad_norm": 1.5600731372833252, "learning_rate": 1.911702951264902e-06, "loss": 0.0273, "step": 24580 }, { "epoch": 80.59344262295082, "grad_norm": 2.913712978363037, "learning_rate": 1.911078561713432e-06, "loss": 0.2559, "step": 24581 }, { "epoch": 80.5967213114754, "grad_norm": 2.691020965576172, "learning_rate": 1.9104542633729604e-06, "loss": 0.2595, "step": 24582 }, { "epoch": 80.6, "grad_norm": 2.2312395572662354, "learning_rate": 1.9098300562505266e-06, "loss": 0.0847, "step": 24583 }, { "epoch": 80.6032786885246, "grad_norm": 2.4628801345825195, "learning_rate": 1.9092059403531727e-06, "loss": 0.1092, "step": 24584 }, { "epoch": 80.60655737704919, "grad_norm": 2.129347324371338, "learning_rate": 1.908581915687934e-06, "loss": 0.1712, "step": 24585 }, { "epoch": 80.60983606557377, "grad_norm": 2.455275297164917, "learning_rate": 1.907957982261844e-06, "loss": 0.0442, "step": 24586 }, { "epoch": 80.61311475409836, "grad_norm": 2.8903441429138184, "learning_rate": 1.9073341400819444e-06, "loss": 0.1199, "step": 24587 }, { "epoch": 80.61639344262295, "grad_norm": 2.3697588443756104, "learning_rate": 1.9067103891552675e-06, "loss": 0.0895, "step": 24588 }, { "epoch": 80.61967213114754, "grad_norm": 2.6457138061523438, "learning_rate": 1.9060867294888419e-06, "loss": 0.0929, "step": 24589 }, { "epoch": 80.62295081967213, "grad_norm": 3.4619970321655273, "learning_rate": 1.9054631610897079e-06, "loss": 0.1037, "step": 24590 }, { "epoch": 80.62622950819672, "grad_norm": 2.2795097827911377, "learning_rate": 1.9048396839648919e-06, "loss": 0.145, "step": 24591 }, { "epoch": 80.62950819672132, "grad_norm": 2.947695732116699, "learning_rate": 1.9042162981214264e-06, "loss": 0.1901, "step": 24592 }, { "epoch": 80.6327868852459, "grad_norm": 2.124718189239502, "learning_rate": 1.9035930035663364e-06, "loss": 0.0534, "step": 24593 }, { "epoch": 80.6360655737705, "grad_norm": 1.9847588539123535, "learning_rate": 1.9029698003066555e-06, "loss": 0.0851, "step": 24594 }, { "epoch": 80.63934426229508, "grad_norm": 2.615281343460083, "learning_rate": 1.902346688349409e-06, "loss": 0.1637, "step": 24595 }, { "epoch": 80.64262295081967, "grad_norm": 2.3540878295898438, "learning_rate": 1.9017236677016215e-06, "loss": 0.0486, "step": 24596 }, { "epoch": 80.64590163934426, "grad_norm": 2.514594078063965, "learning_rate": 1.9011007383703218e-06, "loss": 0.1643, "step": 24597 }, { "epoch": 80.64918032786885, "grad_norm": 2.7010793685913086, "learning_rate": 1.9004779003625317e-06, "loss": 0.0895, "step": 24598 }, { "epoch": 80.65245901639344, "grad_norm": 2.345484495162964, "learning_rate": 1.899855153685275e-06, "loss": 0.1011, "step": 24599 }, { "epoch": 80.65573770491804, "grad_norm": 2.3852808475494385, "learning_rate": 1.89923249834557e-06, "loss": 0.0682, "step": 24600 }, { "epoch": 80.65901639344263, "grad_norm": 2.9100213050842285, "learning_rate": 1.898609934350445e-06, "loss": 0.1484, "step": 24601 }, { "epoch": 80.66229508196722, "grad_norm": 1.5283910036087036, "learning_rate": 1.8979874617069161e-06, "loss": 0.0292, "step": 24602 }, { "epoch": 80.6655737704918, "grad_norm": 3.357316493988037, "learning_rate": 1.8973650804220024e-06, "loss": 0.1472, "step": 24603 }, { "epoch": 80.66885245901639, "grad_norm": 1.8691869974136353, "learning_rate": 1.8967427905027225e-06, "loss": 0.0748, "step": 24604 }, { "epoch": 80.67213114754098, "grad_norm": 2.4064579010009766, "learning_rate": 1.89612059195609e-06, "loss": 0.0803, "step": 24605 }, { "epoch": 80.67540983606557, "grad_norm": 2.4238572120666504, "learning_rate": 1.8954984847891257e-06, "loss": 0.2386, "step": 24606 }, { "epoch": 80.67868852459016, "grad_norm": 2.2534546852111816, "learning_rate": 1.8948764690088427e-06, "loss": 0.0401, "step": 24607 }, { "epoch": 80.68196721311476, "grad_norm": 2.832737922668457, "learning_rate": 1.8942545446222548e-06, "loss": 0.1698, "step": 24608 }, { "epoch": 80.68524590163935, "grad_norm": 2.0880935192108154, "learning_rate": 1.8936327116363728e-06, "loss": 0.1148, "step": 24609 }, { "epoch": 80.68852459016394, "grad_norm": 1.7242387533187866, "learning_rate": 1.8930109700582133e-06, "loss": 0.0245, "step": 24610 }, { "epoch": 80.69180327868852, "grad_norm": 2.8045461177825928, "learning_rate": 1.892389319894783e-06, "loss": 0.0716, "step": 24611 }, { "epoch": 80.69508196721311, "grad_norm": 2.489457845687866, "learning_rate": 1.8917677611530939e-06, "loss": 0.0777, "step": 24612 }, { "epoch": 80.6983606557377, "grad_norm": 2.6539647579193115, "learning_rate": 1.8911462938401503e-06, "loss": 0.1457, "step": 24613 }, { "epoch": 80.70163934426229, "grad_norm": 2.7782838344573975, "learning_rate": 1.890524917962967e-06, "loss": 0.1457, "step": 24614 }, { "epoch": 80.70491803278688, "grad_norm": 2.6343464851379395, "learning_rate": 1.889903633528547e-06, "loss": 0.0831, "step": 24615 }, { "epoch": 80.70819672131148, "grad_norm": 2.064781904220581, "learning_rate": 1.8892824405438948e-06, "loss": 0.034, "step": 24616 }, { "epoch": 80.71147540983607, "grad_norm": 1.9347957372665405, "learning_rate": 1.8886613390160168e-06, "loss": 0.1315, "step": 24617 }, { "epoch": 80.71475409836066, "grad_norm": 2.2839267253875732, "learning_rate": 1.8880403289519133e-06, "loss": 0.1743, "step": 24618 }, { "epoch": 80.71803278688525, "grad_norm": 1.9100323915481567, "learning_rate": 1.8874194103585918e-06, "loss": 0.0495, "step": 24619 }, { "epoch": 80.72131147540983, "grad_norm": 2.2280681133270264, "learning_rate": 1.8867985832430514e-06, "loss": 0.1331, "step": 24620 }, { "epoch": 80.72459016393442, "grad_norm": 2.350313901901245, "learning_rate": 1.8861778476122926e-06, "loss": 0.1478, "step": 24621 }, { "epoch": 80.72786885245901, "grad_norm": 3.9408419132232666, "learning_rate": 1.8855572034733128e-06, "loss": 0.0637, "step": 24622 }, { "epoch": 80.73114754098361, "grad_norm": 3.0791518688201904, "learning_rate": 1.8849366508331146e-06, "loss": 0.0879, "step": 24623 }, { "epoch": 80.7344262295082, "grad_norm": 2.501683473587036, "learning_rate": 1.8843161896986928e-06, "loss": 0.1541, "step": 24624 }, { "epoch": 80.73770491803279, "grad_norm": 4.111865520477295, "learning_rate": 1.883695820077045e-06, "loss": 0.1666, "step": 24625 }, { "epoch": 80.74098360655738, "grad_norm": 2.17234468460083, "learning_rate": 1.8830755419751623e-06, "loss": 0.0563, "step": 24626 }, { "epoch": 80.74426229508197, "grad_norm": 2.915266513824463, "learning_rate": 1.8824553554000457e-06, "loss": 0.0452, "step": 24627 }, { "epoch": 80.74754098360656, "grad_norm": 2.12813138961792, "learning_rate": 1.8818352603586843e-06, "loss": 0.1315, "step": 24628 }, { "epoch": 80.75081967213114, "grad_norm": 3.098062753677368, "learning_rate": 1.881215256858071e-06, "loss": 0.1293, "step": 24629 }, { "epoch": 80.75409836065573, "grad_norm": 1.8926703929901123, "learning_rate": 1.8805953449051984e-06, "loss": 0.1804, "step": 24630 }, { "epoch": 80.75737704918033, "grad_norm": 3.1357781887054443, "learning_rate": 1.8799755245070516e-06, "loss": 0.1011, "step": 24631 }, { "epoch": 80.76065573770492, "grad_norm": 7.712053298950195, "learning_rate": 1.8793557956706265e-06, "loss": 0.0693, "step": 24632 }, { "epoch": 80.76393442622951, "grad_norm": 1.6886333227157593, "learning_rate": 1.8787361584029084e-06, "loss": 0.0337, "step": 24633 }, { "epoch": 80.7672131147541, "grad_norm": 3.4297921657562256, "learning_rate": 1.878116612710883e-06, "loss": 0.2068, "step": 24634 }, { "epoch": 80.77049180327869, "grad_norm": 2.1537210941314697, "learning_rate": 1.8774971586015356e-06, "loss": 0.1606, "step": 24635 }, { "epoch": 80.77377049180328, "grad_norm": 2.5125279426574707, "learning_rate": 1.8768777960818563e-06, "loss": 0.1542, "step": 24636 }, { "epoch": 80.77704918032786, "grad_norm": 1.7346819639205933, "learning_rate": 1.876258525158825e-06, "loss": 0.0298, "step": 24637 }, { "epoch": 80.78032786885245, "grad_norm": 1.8384678363800049, "learning_rate": 1.875639345839425e-06, "loss": 0.1346, "step": 24638 }, { "epoch": 80.78360655737706, "grad_norm": 1.8645962476730347, "learning_rate": 1.8750202581306365e-06, "loss": 0.1079, "step": 24639 }, { "epoch": 80.78688524590164, "grad_norm": 3.0234389305114746, "learning_rate": 1.8744012620394458e-06, "loss": 0.1232, "step": 24640 }, { "epoch": 80.79016393442623, "grad_norm": 2.851086139678955, "learning_rate": 1.8737823575728287e-06, "loss": 0.1791, "step": 24641 }, { "epoch": 80.79344262295082, "grad_norm": 2.7603280544281006, "learning_rate": 1.8731635447377617e-06, "loss": 0.1931, "step": 24642 }, { "epoch": 80.79672131147541, "grad_norm": 2.1969563961029053, "learning_rate": 1.872544823541228e-06, "loss": 0.0517, "step": 24643 }, { "epoch": 80.8, "grad_norm": 2.427950143814087, "learning_rate": 1.8719261939902023e-06, "loss": 0.1343, "step": 24644 }, { "epoch": 80.80327868852459, "grad_norm": 2.0032787322998047, "learning_rate": 1.8713076560916577e-06, "loss": 0.2491, "step": 24645 }, { "epoch": 80.80655737704917, "grad_norm": 2.0294299125671387, "learning_rate": 1.870689209852573e-06, "loss": 0.1045, "step": 24646 }, { "epoch": 80.80983606557378, "grad_norm": 2.3035545349121094, "learning_rate": 1.8700708552799196e-06, "loss": 0.1084, "step": 24647 }, { "epoch": 80.81311475409836, "grad_norm": 2.471050262451172, "learning_rate": 1.8694525923806683e-06, "loss": 0.0964, "step": 24648 }, { "epoch": 80.81639344262295, "grad_norm": 1.8937008380889893, "learning_rate": 1.8688344211617948e-06, "loss": 0.0446, "step": 24649 }, { "epoch": 80.81967213114754, "grad_norm": 2.3958966732025146, "learning_rate": 1.868216341630268e-06, "loss": 0.1857, "step": 24650 }, { "epoch": 80.82295081967213, "grad_norm": 1.7339441776275635, "learning_rate": 1.8675983537930564e-06, "loss": 0.0463, "step": 24651 }, { "epoch": 80.82622950819672, "grad_norm": 2.520014762878418, "learning_rate": 1.8669804576571271e-06, "loss": 0.168, "step": 24652 }, { "epoch": 80.8295081967213, "grad_norm": 2.962588310241699, "learning_rate": 1.866362653229451e-06, "loss": 0.0834, "step": 24653 }, { "epoch": 80.8327868852459, "grad_norm": 2.412919521331787, "learning_rate": 1.8657449405169937e-06, "loss": 0.156, "step": 24654 }, { "epoch": 80.8360655737705, "grad_norm": 1.624295949935913, "learning_rate": 1.8651273195267184e-06, "loss": 0.0694, "step": 24655 }, { "epoch": 80.83934426229509, "grad_norm": 3.0859837532043457, "learning_rate": 1.8645097902655917e-06, "loss": 0.159, "step": 24656 }, { "epoch": 80.84262295081967, "grad_norm": 3.219278573989868, "learning_rate": 1.8638923527405728e-06, "loss": 0.1419, "step": 24657 }, { "epoch": 80.84590163934426, "grad_norm": 2.4348466396331787, "learning_rate": 1.8632750069586304e-06, "loss": 0.2898, "step": 24658 }, { "epoch": 80.84918032786885, "grad_norm": 2.8321056365966797, "learning_rate": 1.862657752926722e-06, "loss": 0.2599, "step": 24659 }, { "epoch": 80.85245901639344, "grad_norm": 2.474595785140991, "learning_rate": 1.862040590651808e-06, "loss": 0.0645, "step": 24660 }, { "epoch": 80.85573770491803, "grad_norm": 2.38192081451416, "learning_rate": 1.861423520140846e-06, "loss": 0.1309, "step": 24661 }, { "epoch": 80.85901639344263, "grad_norm": 3.1460299491882324, "learning_rate": 1.8608065414007969e-06, "loss": 0.1308, "step": 24662 }, { "epoch": 80.86229508196722, "grad_norm": 2.9550275802612305, "learning_rate": 1.8601896544386177e-06, "loss": 0.219, "step": 24663 }, { "epoch": 80.8655737704918, "grad_norm": 2.699094295501709, "learning_rate": 1.8595728592612627e-06, "loss": 0.1601, "step": 24664 }, { "epoch": 80.8688524590164, "grad_norm": 3.1359922885894775, "learning_rate": 1.8589561558756864e-06, "loss": 0.3161, "step": 24665 }, { "epoch": 80.87213114754098, "grad_norm": 2.485595226287842, "learning_rate": 1.8583395442888452e-06, "loss": 0.0811, "step": 24666 }, { "epoch": 80.87540983606557, "grad_norm": 2.8216967582702637, "learning_rate": 1.8577230245076915e-06, "loss": 0.0769, "step": 24667 }, { "epoch": 80.87868852459016, "grad_norm": 3.2381250858306885, "learning_rate": 1.8571065965391767e-06, "loss": 0.225, "step": 24668 }, { "epoch": 80.88196721311475, "grad_norm": 2.3546955585479736, "learning_rate": 1.85649026039025e-06, "loss": 0.0541, "step": 24669 }, { "epoch": 80.88524590163935, "grad_norm": 2.100701332092285, "learning_rate": 1.8558740160678622e-06, "loss": 0.0687, "step": 24670 }, { "epoch": 80.88852459016394, "grad_norm": 1.5533024072647095, "learning_rate": 1.8552578635789642e-06, "loss": 0.0795, "step": 24671 }, { "epoch": 80.89180327868853, "grad_norm": 2.758767604827881, "learning_rate": 1.8546418029305023e-06, "loss": 0.3223, "step": 24672 }, { "epoch": 80.89508196721312, "grad_norm": 2.073669195175171, "learning_rate": 1.8540258341294227e-06, "loss": 0.0458, "step": 24673 }, { "epoch": 80.8983606557377, "grad_norm": 2.2505650520324707, "learning_rate": 1.8534099571826702e-06, "loss": 0.0711, "step": 24674 }, { "epoch": 80.90163934426229, "grad_norm": 2.7659173011779785, "learning_rate": 1.852794172097192e-06, "loss": 0.0836, "step": 24675 }, { "epoch": 80.90491803278688, "grad_norm": 3.461205005645752, "learning_rate": 1.8521784788799314e-06, "loss": 0.1503, "step": 24676 }, { "epoch": 80.90819672131147, "grad_norm": 2.371041774749756, "learning_rate": 1.8515628775378292e-06, "loss": 0.081, "step": 24677 }, { "epoch": 80.91147540983607, "grad_norm": 2.7879726886749268, "learning_rate": 1.8509473680778256e-06, "loss": 0.138, "step": 24678 }, { "epoch": 80.91475409836066, "grad_norm": 2.3677382469177246, "learning_rate": 1.8503319505068662e-06, "loss": 0.0715, "step": 24679 }, { "epoch": 80.91803278688525, "grad_norm": 2.6549665927886963, "learning_rate": 1.8497166248318876e-06, "loss": 0.0737, "step": 24680 }, { "epoch": 80.92131147540984, "grad_norm": 1.817215085029602, "learning_rate": 1.8491013910598277e-06, "loss": 0.0536, "step": 24681 }, { "epoch": 80.92459016393443, "grad_norm": 1.819455623626709, "learning_rate": 1.8484862491976252e-06, "loss": 0.0405, "step": 24682 }, { "epoch": 80.92786885245901, "grad_norm": 2.1207468509674072, "learning_rate": 1.8478711992522125e-06, "loss": 0.1236, "step": 24683 }, { "epoch": 80.9311475409836, "grad_norm": 2.5448672771453857, "learning_rate": 1.8472562412305307e-06, "loss": 0.1396, "step": 24684 }, { "epoch": 80.93442622950819, "grad_norm": 3.7402632236480713, "learning_rate": 1.8466413751395117e-06, "loss": 0.072, "step": 24685 }, { "epoch": 80.9377049180328, "grad_norm": 2.4385156631469727, "learning_rate": 1.8460266009860884e-06, "loss": 0.1377, "step": 24686 }, { "epoch": 80.94098360655738, "grad_norm": 2.1594624519348145, "learning_rate": 1.8454119187771912e-06, "loss": 0.0632, "step": 24687 }, { "epoch": 80.94426229508197, "grad_norm": 2.5896432399749756, "learning_rate": 1.844797328519755e-06, "loss": 0.2, "step": 24688 }, { "epoch": 80.94754098360656, "grad_norm": 2.0105178356170654, "learning_rate": 1.8441828302207089e-06, "loss": 0.0455, "step": 24689 }, { "epoch": 80.95081967213115, "grad_norm": 2.452474355697632, "learning_rate": 1.843568423886981e-06, "loss": 0.1465, "step": 24690 }, { "epoch": 80.95409836065573, "grad_norm": 2.375756025314331, "learning_rate": 1.8429541095254965e-06, "loss": 0.1723, "step": 24691 }, { "epoch": 80.95737704918032, "grad_norm": 2.37449312210083, "learning_rate": 1.8423398871431897e-06, "loss": 0.0823, "step": 24692 }, { "epoch": 80.96065573770491, "grad_norm": 2.3777246475219727, "learning_rate": 1.8417257567469815e-06, "loss": 0.094, "step": 24693 }, { "epoch": 80.96393442622951, "grad_norm": 2.5038652420043945, "learning_rate": 1.8411117183437977e-06, "loss": 0.2463, "step": 24694 }, { "epoch": 80.9672131147541, "grad_norm": 2.6450021266937256, "learning_rate": 1.8404977719405603e-06, "loss": 0.2113, "step": 24695 }, { "epoch": 80.97049180327869, "grad_norm": 2.221146583557129, "learning_rate": 1.8398839175441962e-06, "loss": 0.1281, "step": 24696 }, { "epoch": 80.97377049180328, "grad_norm": 1.9691118001937866, "learning_rate": 1.8392701551616255e-06, "loss": 0.0607, "step": 24697 }, { "epoch": 80.97704918032787, "grad_norm": 2.6796886920928955, "learning_rate": 1.8386564847997668e-06, "loss": 0.1408, "step": 24698 }, { "epoch": 80.98032786885246, "grad_norm": 2.1281046867370605, "learning_rate": 1.8380429064655448e-06, "loss": 0.0504, "step": 24699 }, { "epoch": 80.98360655737704, "grad_norm": 2.5662341117858887, "learning_rate": 1.8374294201658738e-06, "loss": 0.1431, "step": 24700 }, { "epoch": 80.98688524590163, "grad_norm": 2.399062156677246, "learning_rate": 1.8368160259076718e-06, "loss": 0.2184, "step": 24701 }, { "epoch": 80.99016393442623, "grad_norm": 4.443282127380371, "learning_rate": 1.8362027236978585e-06, "loss": 0.1546, "step": 24702 }, { "epoch": 80.99344262295082, "grad_norm": 2.413973331451416, "learning_rate": 1.8355895135433488e-06, "loss": 0.0891, "step": 24703 }, { "epoch": 80.99672131147541, "grad_norm": 1.4264318943023682, "learning_rate": 1.8349763954510525e-06, "loss": 0.0342, "step": 24704 }, { "epoch": 81.0, "grad_norm": 3.323479652404785, "learning_rate": 1.8343633694278895e-06, "loss": 0.2471, "step": 24705 }, { "epoch": 81.00327868852459, "grad_norm": 2.373112201690674, "learning_rate": 1.83375043548077e-06, "loss": 0.149, "step": 24706 }, { "epoch": 81.00655737704918, "grad_norm": 2.7804160118103027, "learning_rate": 1.8331375936166052e-06, "loss": 0.0731, "step": 24707 }, { "epoch": 81.00983606557377, "grad_norm": 2.443281888961792, "learning_rate": 1.832524843842306e-06, "loss": 0.1947, "step": 24708 }, { "epoch": 81.01311475409837, "grad_norm": 2.550814628601074, "learning_rate": 1.831912186164777e-06, "loss": 0.0999, "step": 24709 }, { "epoch": 81.01639344262296, "grad_norm": 1.2516001462936401, "learning_rate": 1.8312996205909351e-06, "loss": 0.0362, "step": 24710 }, { "epoch": 81.01967213114754, "grad_norm": 2.104783773422241, "learning_rate": 1.8306871471276821e-06, "loss": 0.1619, "step": 24711 }, { "epoch": 81.02295081967213, "grad_norm": 2.6436402797698975, "learning_rate": 1.8300747657819263e-06, "loss": 0.1058, "step": 24712 }, { "epoch": 81.02622950819672, "grad_norm": 2.2352471351623535, "learning_rate": 1.8294624765605684e-06, "loss": 0.0601, "step": 24713 }, { "epoch": 81.02950819672131, "grad_norm": 2.712918758392334, "learning_rate": 1.82885027947052e-06, "loss": 0.1196, "step": 24714 }, { "epoch": 81.0327868852459, "grad_norm": 2.650934934616089, "learning_rate": 1.8282381745186805e-06, "loss": 0.2469, "step": 24715 }, { "epoch": 81.03606557377049, "grad_norm": 2.371018409729004, "learning_rate": 1.8276261617119517e-06, "loss": 0.2289, "step": 24716 }, { "epoch": 81.03934426229509, "grad_norm": 2.7399561405181885, "learning_rate": 1.8270142410572344e-06, "loss": 0.2202, "step": 24717 }, { "epoch": 81.04262295081968, "grad_norm": 2.9423630237579346, "learning_rate": 1.8264024125614277e-06, "loss": 0.1596, "step": 24718 }, { "epoch": 81.04590163934427, "grad_norm": 2.621025323867798, "learning_rate": 1.8257906762314348e-06, "loss": 0.0751, "step": 24719 }, { "epoch": 81.04918032786885, "grad_norm": 2.2959444522857666, "learning_rate": 1.8251790320741502e-06, "loss": 0.1514, "step": 24720 }, { "epoch": 81.05245901639344, "grad_norm": 2.154578685760498, "learning_rate": 1.824567480096473e-06, "loss": 0.0552, "step": 24721 }, { "epoch": 81.05573770491803, "grad_norm": 2.9263319969177246, "learning_rate": 1.8239560203052941e-06, "loss": 0.0628, "step": 24722 }, { "epoch": 81.05901639344262, "grad_norm": 3.37842059135437, "learning_rate": 1.823344652707515e-06, "loss": 0.3378, "step": 24723 }, { "epoch": 81.0622950819672, "grad_norm": 2.775283098220825, "learning_rate": 1.8227333773100263e-06, "loss": 0.1238, "step": 24724 }, { "epoch": 81.06557377049181, "grad_norm": 2.7703185081481934, "learning_rate": 1.822122194119722e-06, "loss": 0.0713, "step": 24725 }, { "epoch": 81.0688524590164, "grad_norm": 2.4532482624053955, "learning_rate": 1.8215111031434895e-06, "loss": 0.1798, "step": 24726 }, { "epoch": 81.07213114754099, "grad_norm": 1.8932420015335083, "learning_rate": 1.8209001043882246e-06, "loss": 0.196, "step": 24727 }, { "epoch": 81.07540983606557, "grad_norm": 2.6717658042907715, "learning_rate": 1.8202891978608161e-06, "loss": 0.0843, "step": 24728 }, { "epoch": 81.07868852459016, "grad_norm": 1.8311764001846313, "learning_rate": 1.819678383568152e-06, "loss": 0.1079, "step": 24729 }, { "epoch": 81.08196721311475, "grad_norm": 2.6636462211608887, "learning_rate": 1.8190676615171187e-06, "loss": 0.1131, "step": 24730 }, { "epoch": 81.08524590163934, "grad_norm": 2.37785267829895, "learning_rate": 1.8184570317146012e-06, "loss": 0.0754, "step": 24731 }, { "epoch": 81.08852459016393, "grad_norm": 2.4571714401245117, "learning_rate": 1.81784649416749e-06, "loss": 0.2446, "step": 24732 }, { "epoch": 81.09180327868853, "grad_norm": 2.1719110012054443, "learning_rate": 1.8172360488826668e-06, "loss": 0.0817, "step": 24733 }, { "epoch": 81.09508196721312, "grad_norm": 3.45320463180542, "learning_rate": 1.8166256958670147e-06, "loss": 0.0812, "step": 24734 }, { "epoch": 81.09836065573771, "grad_norm": 4.286313533782959, "learning_rate": 1.816015435127415e-06, "loss": 0.0547, "step": 24735 }, { "epoch": 81.1016393442623, "grad_norm": 2.779933214187622, "learning_rate": 1.8154052666707523e-06, "loss": 0.2321, "step": 24736 }, { "epoch": 81.10491803278688, "grad_norm": 2.2682316303253174, "learning_rate": 1.814795190503905e-06, "loss": 0.1011, "step": 24737 }, { "epoch": 81.10819672131147, "grad_norm": 2.7696893215179443, "learning_rate": 1.8141852066337529e-06, "loss": 0.1109, "step": 24738 }, { "epoch": 81.11147540983606, "grad_norm": 3.0373270511627197, "learning_rate": 1.8135753150671708e-06, "loss": 0.1675, "step": 24739 }, { "epoch": 81.11475409836065, "grad_norm": 2.3877763748168945, "learning_rate": 1.8129655158110415e-06, "loss": 0.1864, "step": 24740 }, { "epoch": 81.11803278688525, "grad_norm": 1.69003427028656, "learning_rate": 1.812355808872238e-06, "loss": 0.0223, "step": 24741 }, { "epoch": 81.12131147540984, "grad_norm": 2.356476068496704, "learning_rate": 1.8117461942576353e-06, "loss": 0.2067, "step": 24742 }, { "epoch": 81.12459016393443, "grad_norm": 3.1662817001342773, "learning_rate": 1.8111366719741085e-06, "loss": 0.1064, "step": 24743 }, { "epoch": 81.12786885245902, "grad_norm": 2.953787326812744, "learning_rate": 1.810527242028528e-06, "loss": 0.1035, "step": 24744 }, { "epoch": 81.1311475409836, "grad_norm": 2.3351657390594482, "learning_rate": 1.8099179044277704e-06, "loss": 0.0839, "step": 24745 }, { "epoch": 81.1344262295082, "grad_norm": 1.7689827680587769, "learning_rate": 1.8093086591787036e-06, "loss": 0.074, "step": 24746 }, { "epoch": 81.13770491803278, "grad_norm": 2.025531530380249, "learning_rate": 1.8086995062881952e-06, "loss": 0.1165, "step": 24747 }, { "epoch": 81.14098360655737, "grad_norm": 2.758237838745117, "learning_rate": 1.8080904457631187e-06, "loss": 0.1444, "step": 24748 }, { "epoch": 81.14426229508197, "grad_norm": 3.2500545978546143, "learning_rate": 1.80748147761034e-06, "loss": 0.1021, "step": 24749 }, { "epoch": 81.14754098360656, "grad_norm": 2.084357261657715, "learning_rate": 1.8068726018367244e-06, "loss": 0.0702, "step": 24750 }, { "epoch": 81.15081967213115, "grad_norm": 2.76088547706604, "learning_rate": 1.8062638184491399e-06, "loss": 0.183, "step": 24751 }, { "epoch": 81.15409836065574, "grad_norm": 3.182551622390747, "learning_rate": 1.8056551274544508e-06, "loss": 0.2885, "step": 24752 }, { "epoch": 81.15737704918033, "grad_norm": 3.9931678771972656, "learning_rate": 1.8050465288595177e-06, "loss": 0.0992, "step": 24753 }, { "epoch": 81.16065573770491, "grad_norm": 2.7136008739471436, "learning_rate": 1.804438022671209e-06, "loss": 0.1293, "step": 24754 }, { "epoch": 81.1639344262295, "grad_norm": 2.812432289123535, "learning_rate": 1.8038296088963813e-06, "loss": 0.0982, "step": 24755 }, { "epoch": 81.1672131147541, "grad_norm": 2.5082569122314453, "learning_rate": 1.8032212875418976e-06, "loss": 0.0665, "step": 24756 }, { "epoch": 81.1704918032787, "grad_norm": 2.2330243587493896, "learning_rate": 1.802613058614614e-06, "loss": 0.0758, "step": 24757 }, { "epoch": 81.17377049180328, "grad_norm": 2.445042133331299, "learning_rate": 1.802004922121393e-06, "loss": 0.1196, "step": 24758 }, { "epoch": 81.17704918032787, "grad_norm": 2.4092001914978027, "learning_rate": 1.8013968780690905e-06, "loss": 0.1904, "step": 24759 }, { "epoch": 81.18032786885246, "grad_norm": 3.4356539249420166, "learning_rate": 1.8007889264645629e-06, "loss": 0.2093, "step": 24760 }, { "epoch": 81.18360655737705, "grad_norm": 2.715557813644409, "learning_rate": 1.8001810673146625e-06, "loss": 0.0896, "step": 24761 }, { "epoch": 81.18688524590164, "grad_norm": 2.363295793533325, "learning_rate": 1.7995733006262494e-06, "loss": 0.1037, "step": 24762 }, { "epoch": 81.19016393442622, "grad_norm": 2.23828125, "learning_rate": 1.7989656264061727e-06, "loss": 0.1876, "step": 24763 }, { "epoch": 81.19344262295083, "grad_norm": 1.7917340993881226, "learning_rate": 1.7983580446612859e-06, "loss": 0.0612, "step": 24764 }, { "epoch": 81.19672131147541, "grad_norm": 2.548351764678955, "learning_rate": 1.7977505553984376e-06, "loss": 0.1454, "step": 24765 }, { "epoch": 81.2, "grad_norm": 1.6914501190185547, "learning_rate": 1.7971431586244814e-06, "loss": 0.102, "step": 24766 }, { "epoch": 81.20327868852459, "grad_norm": 2.293470859527588, "learning_rate": 1.7965358543462663e-06, "loss": 0.0739, "step": 24767 }, { "epoch": 81.20655737704918, "grad_norm": 2.7318670749664307, "learning_rate": 1.795928642570638e-06, "loss": 0.1163, "step": 24768 }, { "epoch": 81.20983606557377, "grad_norm": 2.5430333614349365, "learning_rate": 1.795321523304444e-06, "loss": 0.136, "step": 24769 }, { "epoch": 81.21311475409836, "grad_norm": 1.533003568649292, "learning_rate": 1.7947144965545294e-06, "loss": 0.1044, "step": 24770 }, { "epoch": 81.21639344262294, "grad_norm": 1.7894701957702637, "learning_rate": 1.7941075623277416e-06, "loss": 0.0669, "step": 24771 }, { "epoch": 81.21967213114755, "grad_norm": 2.094872236251831, "learning_rate": 1.793500720630923e-06, "loss": 0.0438, "step": 24772 }, { "epoch": 81.22295081967214, "grad_norm": 2.7848105430603027, "learning_rate": 1.7928939714709171e-06, "loss": 0.0621, "step": 24773 }, { "epoch": 81.22622950819672, "grad_norm": 1.799965739250183, "learning_rate": 1.7922873148545617e-06, "loss": 0.0363, "step": 24774 }, { "epoch": 81.22950819672131, "grad_norm": 1.8275043964385986, "learning_rate": 1.7916807507887035e-06, "loss": 0.1724, "step": 24775 }, { "epoch": 81.2327868852459, "grad_norm": 2.4587714672088623, "learning_rate": 1.7910742792801793e-06, "loss": 0.0694, "step": 24776 }, { "epoch": 81.23606557377049, "grad_norm": 1.900395154953003, "learning_rate": 1.7904679003358283e-06, "loss": 0.1362, "step": 24777 }, { "epoch": 81.23934426229508, "grad_norm": 1.8467636108398438, "learning_rate": 1.7898616139624848e-06, "loss": 0.0987, "step": 24778 }, { "epoch": 81.24262295081967, "grad_norm": 2.9945027828216553, "learning_rate": 1.7892554201669898e-06, "loss": 0.1313, "step": 24779 }, { "epoch": 81.24590163934427, "grad_norm": 2.673344373703003, "learning_rate": 1.7886493189561783e-06, "loss": 0.0991, "step": 24780 }, { "epoch": 81.24918032786886, "grad_norm": 2.807814598083496, "learning_rate": 1.7880433103368822e-06, "loss": 0.2476, "step": 24781 }, { "epoch": 81.25245901639344, "grad_norm": 2.074536085128784, "learning_rate": 1.7874373943159362e-06, "loss": 0.1339, "step": 24782 }, { "epoch": 81.25573770491803, "grad_norm": 3.6112618446350098, "learning_rate": 1.7868315709001704e-06, "loss": 0.2175, "step": 24783 }, { "epoch": 81.25901639344262, "grad_norm": 2.239576578140259, "learning_rate": 1.7862258400964206e-06, "loss": 0.1312, "step": 24784 }, { "epoch": 81.26229508196721, "grad_norm": 2.2969284057617188, "learning_rate": 1.7856202019115144e-06, "loss": 0.0705, "step": 24785 }, { "epoch": 81.2655737704918, "grad_norm": 1.8986274003982544, "learning_rate": 1.7850146563522809e-06, "loss": 0.0314, "step": 24786 }, { "epoch": 81.26885245901639, "grad_norm": 2.0707263946533203, "learning_rate": 1.7844092034255466e-06, "loss": 0.098, "step": 24787 }, { "epoch": 81.27213114754099, "grad_norm": 2.6490349769592285, "learning_rate": 1.7838038431381433e-06, "loss": 0.1735, "step": 24788 }, { "epoch": 81.27540983606558, "grad_norm": 2.106912612915039, "learning_rate": 1.7831985754968938e-06, "loss": 0.0493, "step": 24789 }, { "epoch": 81.27868852459017, "grad_norm": 2.222837209701538, "learning_rate": 1.7825934005086243e-06, "loss": 0.0991, "step": 24790 }, { "epoch": 81.28196721311475, "grad_norm": 1.7415941953659058, "learning_rate": 1.7819883181801557e-06, "loss": 0.0415, "step": 24791 }, { "epoch": 81.28524590163934, "grad_norm": 2.4885025024414062, "learning_rate": 1.7813833285183156e-06, "loss": 0.0896, "step": 24792 }, { "epoch": 81.28852459016393, "grad_norm": 2.8340868949890137, "learning_rate": 1.7807784315299237e-06, "loss": 0.0865, "step": 24793 }, { "epoch": 81.29180327868852, "grad_norm": 2.5855727195739746, "learning_rate": 1.7801736272218007e-06, "loss": 0.1194, "step": 24794 }, { "epoch": 81.29508196721312, "grad_norm": 2.9526684284210205, "learning_rate": 1.7795689156007667e-06, "loss": 0.1319, "step": 24795 }, { "epoch": 81.29836065573771, "grad_norm": 2.884596586227417, "learning_rate": 1.7789642966736376e-06, "loss": 0.1082, "step": 24796 }, { "epoch": 81.3016393442623, "grad_norm": 3.3670878410339355, "learning_rate": 1.7783597704472365e-06, "loss": 0.1681, "step": 24797 }, { "epoch": 81.30491803278689, "grad_norm": 2.245407819747925, "learning_rate": 1.7777553369283773e-06, "loss": 0.0634, "step": 24798 }, { "epoch": 81.30819672131148, "grad_norm": 2.5928075313568115, "learning_rate": 1.7771509961238754e-06, "loss": 0.0826, "step": 24799 }, { "epoch": 81.31147540983606, "grad_norm": 2.212677001953125, "learning_rate": 1.776546748040544e-06, "loss": 0.0614, "step": 24800 }, { "epoch": 81.31475409836065, "grad_norm": 2.56139874458313, "learning_rate": 1.7759425926852002e-06, "loss": 0.0467, "step": 24801 }, { "epoch": 81.31803278688524, "grad_norm": 2.60561466217041, "learning_rate": 1.7753385300646542e-06, "loss": 0.1472, "step": 24802 }, { "epoch": 81.32131147540984, "grad_norm": 2.7566206455230713, "learning_rate": 1.7747345601857157e-06, "loss": 0.0583, "step": 24803 }, { "epoch": 81.32459016393443, "grad_norm": 2.68585205078125, "learning_rate": 1.7741306830551996e-06, "loss": 0.0866, "step": 24804 }, { "epoch": 81.32786885245902, "grad_norm": 6.990877628326416, "learning_rate": 1.7735268986799125e-06, "loss": 0.1677, "step": 24805 }, { "epoch": 81.33114754098361, "grad_norm": 1.9279505014419556, "learning_rate": 1.7729232070666602e-06, "loss": 0.0582, "step": 24806 }, { "epoch": 81.3344262295082, "grad_norm": 3.1089982986450195, "learning_rate": 1.772319608222256e-06, "loss": 0.1353, "step": 24807 }, { "epoch": 81.33770491803278, "grad_norm": 2.401906967163086, "learning_rate": 1.7717161021535034e-06, "loss": 0.1218, "step": 24808 }, { "epoch": 81.34098360655737, "grad_norm": 24.07102394104004, "learning_rate": 1.7711126888672037e-06, "loss": 0.1107, "step": 24809 }, { "epoch": 81.34426229508196, "grad_norm": 2.7368640899658203, "learning_rate": 1.7705093683701669e-06, "loss": 0.1692, "step": 24810 }, { "epoch": 81.34754098360656, "grad_norm": 1.7082257270812988, "learning_rate": 1.7699061406691931e-06, "loss": 0.073, "step": 24811 }, { "epoch": 81.35081967213115, "grad_norm": 2.416964054107666, "learning_rate": 1.7693030057710847e-06, "loss": 0.0537, "step": 24812 }, { "epoch": 81.35409836065574, "grad_norm": 2.419255256652832, "learning_rate": 1.7686999636826407e-06, "loss": 0.0764, "step": 24813 }, { "epoch": 81.35737704918033, "grad_norm": 3.152817726135254, "learning_rate": 1.7680970144106657e-06, "loss": 0.1069, "step": 24814 }, { "epoch": 81.36065573770492, "grad_norm": 1.920857310295105, "learning_rate": 1.7674941579619553e-06, "loss": 0.0526, "step": 24815 }, { "epoch": 81.3639344262295, "grad_norm": 2.4658303260803223, "learning_rate": 1.7668913943433087e-06, "loss": 0.0607, "step": 24816 }, { "epoch": 81.3672131147541, "grad_norm": 2.8355653285980225, "learning_rate": 1.7662887235615189e-06, "loss": 0.0871, "step": 24817 }, { "epoch": 81.37049180327868, "grad_norm": 3.1417527198791504, "learning_rate": 1.7656861456233876e-06, "loss": 0.2013, "step": 24818 }, { "epoch": 81.37377049180328, "grad_norm": 2.5220680236816406, "learning_rate": 1.7650836605357058e-06, "loss": 0.0702, "step": 24819 }, { "epoch": 81.37704918032787, "grad_norm": 2.5021700859069824, "learning_rate": 1.7644812683052682e-06, "loss": 0.0669, "step": 24820 }, { "epoch": 81.38032786885246, "grad_norm": 2.3896920680999756, "learning_rate": 1.7638789689388669e-06, "loss": 0.1498, "step": 24821 }, { "epoch": 81.38360655737705, "grad_norm": 2.9231700897216797, "learning_rate": 1.7632767624432923e-06, "loss": 0.1092, "step": 24822 }, { "epoch": 81.38688524590164, "grad_norm": 2.743133068084717, "learning_rate": 1.7626746488253377e-06, "loss": 0.1816, "step": 24823 }, { "epoch": 81.39016393442623, "grad_norm": 2.6237833499908447, "learning_rate": 1.7620726280917911e-06, "loss": 0.1928, "step": 24824 }, { "epoch": 81.39344262295081, "grad_norm": 2.6853392124176025, "learning_rate": 1.7614707002494413e-06, "loss": 0.1025, "step": 24825 }, { "epoch": 81.3967213114754, "grad_norm": 2.7938954830169678, "learning_rate": 1.760868865305072e-06, "loss": 0.1108, "step": 24826 }, { "epoch": 81.4, "grad_norm": 2.521690845489502, "learning_rate": 1.7602671232654755e-06, "loss": 0.0874, "step": 24827 }, { "epoch": 81.4032786885246, "grad_norm": 2.077296733856201, "learning_rate": 1.7596654741374353e-06, "loss": 0.0739, "step": 24828 }, { "epoch": 81.40655737704918, "grad_norm": 2.078392744064331, "learning_rate": 1.7590639179277335e-06, "loss": 0.1563, "step": 24829 }, { "epoch": 81.40983606557377, "grad_norm": 2.510279893875122, "learning_rate": 1.7584624546431527e-06, "loss": 0.2175, "step": 24830 }, { "epoch": 81.41311475409836, "grad_norm": 2.1413075923919678, "learning_rate": 1.7578610842904798e-06, "loss": 0.0439, "step": 24831 }, { "epoch": 81.41639344262295, "grad_norm": 5.299345016479492, "learning_rate": 1.7572598068764913e-06, "loss": 0.0567, "step": 24832 }, { "epoch": 81.41967213114754, "grad_norm": 2.1006152629852295, "learning_rate": 1.7566586224079695e-06, "loss": 0.0706, "step": 24833 }, { "epoch": 81.42295081967212, "grad_norm": 1.9470388889312744, "learning_rate": 1.756057530891694e-06, "loss": 0.0563, "step": 24834 }, { "epoch": 81.42622950819673, "grad_norm": 2.198558807373047, "learning_rate": 1.7554565323344375e-06, "loss": 0.075, "step": 24835 }, { "epoch": 81.42950819672132, "grad_norm": 2.71793270111084, "learning_rate": 1.7548556267429829e-06, "loss": 0.0826, "step": 24836 }, { "epoch": 81.4327868852459, "grad_norm": 2.2400388717651367, "learning_rate": 1.7542548141241044e-06, "loss": 0.053, "step": 24837 }, { "epoch": 81.43606557377049, "grad_norm": 4.546755790710449, "learning_rate": 1.753654094484577e-06, "loss": 0.1666, "step": 24838 }, { "epoch": 81.43934426229508, "grad_norm": 3.5696797370910645, "learning_rate": 1.7530534678311706e-06, "loss": 0.1778, "step": 24839 }, { "epoch": 81.44262295081967, "grad_norm": 1.578139066696167, "learning_rate": 1.752452934170663e-06, "loss": 0.0343, "step": 24840 }, { "epoch": 81.44590163934426, "grad_norm": 2.5810937881469727, "learning_rate": 1.7518524935098247e-06, "loss": 0.0706, "step": 24841 }, { "epoch": 81.44918032786886, "grad_norm": 1.8796322345733643, "learning_rate": 1.7512521458554254e-06, "loss": 0.0425, "step": 24842 }, { "epoch": 81.45245901639345, "grad_norm": 2.908876657485962, "learning_rate": 1.7506518912142313e-06, "loss": 0.1484, "step": 24843 }, { "epoch": 81.45573770491804, "grad_norm": 2.4176039695739746, "learning_rate": 1.7500517295930174e-06, "loss": 0.0735, "step": 24844 }, { "epoch": 81.45901639344262, "grad_norm": 2.2539637088775635, "learning_rate": 1.749451660998548e-06, "loss": 0.1098, "step": 24845 }, { "epoch": 81.46229508196721, "grad_norm": 2.5271339416503906, "learning_rate": 1.7488516854375904e-06, "loss": 0.0996, "step": 24846 }, { "epoch": 81.4655737704918, "grad_norm": 2.6774821281433105, "learning_rate": 1.7482518029169082e-06, "loss": 0.0842, "step": 24847 }, { "epoch": 81.46885245901639, "grad_norm": 1.5726382732391357, "learning_rate": 1.7476520134432639e-06, "loss": 0.0354, "step": 24848 }, { "epoch": 81.47213114754098, "grad_norm": 3.495736837387085, "learning_rate": 1.7470523170234265e-06, "loss": 0.1109, "step": 24849 }, { "epoch": 81.47540983606558, "grad_norm": 7.105278491973877, "learning_rate": 1.7464527136641552e-06, "loss": 0.1178, "step": 24850 }, { "epoch": 81.47868852459017, "grad_norm": 3.296854019165039, "learning_rate": 1.7458532033722098e-06, "loss": 0.1305, "step": 24851 }, { "epoch": 81.48196721311476, "grad_norm": 2.5162062644958496, "learning_rate": 1.7452537861543507e-06, "loss": 0.0925, "step": 24852 }, { "epoch": 81.48524590163935, "grad_norm": 1.8003522157669067, "learning_rate": 1.74465446201734e-06, "loss": 0.0551, "step": 24853 }, { "epoch": 81.48852459016393, "grad_norm": 2.059828519821167, "learning_rate": 1.7440552309679337e-06, "loss": 0.142, "step": 24854 }, { "epoch": 81.49180327868852, "grad_norm": 2.2594780921936035, "learning_rate": 1.7434560930128853e-06, "loss": 0.0743, "step": 24855 }, { "epoch": 81.49508196721311, "grad_norm": 2.3971283435821533, "learning_rate": 1.742857048158958e-06, "loss": 0.224, "step": 24856 }, { "epoch": 81.4983606557377, "grad_norm": 2.2725439071655273, "learning_rate": 1.7422580964129022e-06, "loss": 0.0947, "step": 24857 }, { "epoch": 81.5016393442623, "grad_norm": 2.2668569087982178, "learning_rate": 1.7416592377814722e-06, "loss": 0.0667, "step": 24858 }, { "epoch": 81.50491803278689, "grad_norm": 2.180528402328491, "learning_rate": 1.7410604722714187e-06, "loss": 0.0975, "step": 24859 }, { "epoch": 81.50819672131148, "grad_norm": 2.7576589584350586, "learning_rate": 1.7404617998894967e-06, "loss": 0.1649, "step": 24860 }, { "epoch": 81.51147540983607, "grad_norm": 2.7841339111328125, "learning_rate": 1.739863220642457e-06, "loss": 0.2024, "step": 24861 }, { "epoch": 81.51475409836065, "grad_norm": 1.8695640563964844, "learning_rate": 1.739264734537045e-06, "loss": 0.0711, "step": 24862 }, { "epoch": 81.51803278688524, "grad_norm": 2.8927736282348633, "learning_rate": 1.7386663415800142e-06, "loss": 0.1207, "step": 24863 }, { "epoch": 81.52131147540983, "grad_norm": 2.170966625213623, "learning_rate": 1.7380680417781104e-06, "loss": 0.1281, "step": 24864 }, { "epoch": 81.52459016393442, "grad_norm": 2.0464937686920166, "learning_rate": 1.737469835138078e-06, "loss": 0.2502, "step": 24865 }, { "epoch": 81.52786885245902, "grad_norm": 2.169673204421997, "learning_rate": 1.7368717216666654e-06, "loss": 0.1102, "step": 24866 }, { "epoch": 81.53114754098361, "grad_norm": 2.1799967288970947, "learning_rate": 1.7362737013706165e-06, "loss": 0.1553, "step": 24867 }, { "epoch": 81.5344262295082, "grad_norm": 2.318648338317871, "learning_rate": 1.7356757742566732e-06, "loss": 0.1316, "step": 24868 }, { "epoch": 81.53770491803279, "grad_norm": 2.4864020347595215, "learning_rate": 1.7350779403315754e-06, "loss": 0.1885, "step": 24869 }, { "epoch": 81.54098360655738, "grad_norm": 2.68426251411438, "learning_rate": 1.7344801996020699e-06, "loss": 0.0772, "step": 24870 }, { "epoch": 81.54426229508196, "grad_norm": 4.044527053833008, "learning_rate": 1.7338825520748947e-06, "loss": 0.2199, "step": 24871 }, { "epoch": 81.54754098360655, "grad_norm": 2.7290256023406982, "learning_rate": 1.7332849977567878e-06, "loss": 0.1553, "step": 24872 }, { "epoch": 81.55081967213114, "grad_norm": 1.7455499172210693, "learning_rate": 1.7326875366544882e-06, "loss": 0.0481, "step": 24873 }, { "epoch": 81.55409836065574, "grad_norm": 1.8024027347564697, "learning_rate": 1.7320901687747294e-06, "loss": 0.0368, "step": 24874 }, { "epoch": 81.55737704918033, "grad_norm": 2.3261325359344482, "learning_rate": 1.7314928941242537e-06, "loss": 0.069, "step": 24875 }, { "epoch": 81.56065573770492, "grad_norm": 2.8066205978393555, "learning_rate": 1.7308957127097926e-06, "loss": 0.1083, "step": 24876 }, { "epoch": 81.56393442622951, "grad_norm": 2.615671157836914, "learning_rate": 1.7302986245380792e-06, "loss": 0.0816, "step": 24877 }, { "epoch": 81.5672131147541, "grad_norm": 1.753106713294983, "learning_rate": 1.7297016296158453e-06, "loss": 0.0369, "step": 24878 }, { "epoch": 81.57049180327868, "grad_norm": 2.4864449501037598, "learning_rate": 1.7291047279498274e-06, "loss": 0.1871, "step": 24879 }, { "epoch": 81.57377049180327, "grad_norm": 4.715540885925293, "learning_rate": 1.7285079195467524e-06, "loss": 0.3324, "step": 24880 }, { "epoch": 81.57704918032788, "grad_norm": 2.1046218872070312, "learning_rate": 1.7279112044133516e-06, "loss": 0.1353, "step": 24881 }, { "epoch": 81.58032786885246, "grad_norm": 2.2734527587890625, "learning_rate": 1.7273145825563498e-06, "loss": 0.0875, "step": 24882 }, { "epoch": 81.58360655737705, "grad_norm": 2.260404586791992, "learning_rate": 1.7267180539824802e-06, "loss": 0.1145, "step": 24883 }, { "epoch": 81.58688524590164, "grad_norm": 2.1817827224731445, "learning_rate": 1.7261216186984665e-06, "loss": 0.1267, "step": 24884 }, { "epoch": 81.59016393442623, "grad_norm": 3.169522285461426, "learning_rate": 1.7255252767110342e-06, "loss": 0.1952, "step": 24885 }, { "epoch": 81.59344262295082, "grad_norm": 2.362793445587158, "learning_rate": 1.7249290280269071e-06, "loss": 0.1082, "step": 24886 }, { "epoch": 81.5967213114754, "grad_norm": 2.14746356010437, "learning_rate": 1.7243328726528074e-06, "loss": 0.1389, "step": 24887 }, { "epoch": 81.6, "grad_norm": 2.2221415042877197, "learning_rate": 1.723736810595461e-06, "loss": 0.1501, "step": 24888 }, { "epoch": 81.6032786885246, "grad_norm": 5.740417957305908, "learning_rate": 1.723140841861588e-06, "loss": 0.1115, "step": 24889 }, { "epoch": 81.60655737704919, "grad_norm": 2.9601528644561768, "learning_rate": 1.7225449664579076e-06, "loss": 0.1594, "step": 24890 }, { "epoch": 81.60983606557377, "grad_norm": 2.224269151687622, "learning_rate": 1.7219491843911362e-06, "loss": 0.1122, "step": 24891 }, { "epoch": 81.61311475409836, "grad_norm": 2.5397086143493652, "learning_rate": 1.7213534956679978e-06, "loss": 0.0731, "step": 24892 }, { "epoch": 81.61639344262295, "grad_norm": 2.2333858013153076, "learning_rate": 1.7207579002952057e-06, "loss": 0.1804, "step": 24893 }, { "epoch": 81.61967213114754, "grad_norm": 2.6322224140167236, "learning_rate": 1.7201623982794769e-06, "loss": 0.1062, "step": 24894 }, { "epoch": 81.62295081967213, "grad_norm": 2.1627964973449707, "learning_rate": 1.7195669896275235e-06, "loss": 0.1042, "step": 24895 }, { "epoch": 81.62622950819672, "grad_norm": 2.228168487548828, "learning_rate": 1.718971674346064e-06, "loss": 0.1232, "step": 24896 }, { "epoch": 81.62950819672132, "grad_norm": 2.490054130554199, "learning_rate": 1.718376452441809e-06, "loss": 0.0843, "step": 24897 }, { "epoch": 81.6327868852459, "grad_norm": 2.226933717727661, "learning_rate": 1.7177813239214702e-06, "loss": 0.0703, "step": 24898 }, { "epoch": 81.6360655737705, "grad_norm": 2.052232027053833, "learning_rate": 1.717186288791759e-06, "loss": 0.0479, "step": 24899 }, { "epoch": 81.63934426229508, "grad_norm": 2.6341712474823, "learning_rate": 1.7165913470593809e-06, "loss": 0.1267, "step": 24900 }, { "epoch": 81.64262295081967, "grad_norm": 2.1529948711395264, "learning_rate": 1.7159964987310519e-06, "loss": 0.054, "step": 24901 }, { "epoch": 81.64590163934426, "grad_norm": 2.075622320175171, "learning_rate": 1.715401743813474e-06, "loss": 0.1612, "step": 24902 }, { "epoch": 81.64918032786885, "grad_norm": 2.1122493743896484, "learning_rate": 1.7148070823133555e-06, "loss": 0.149, "step": 24903 }, { "epoch": 81.65245901639344, "grad_norm": 2.053392171859741, "learning_rate": 1.7142125142374e-06, "loss": 0.05, "step": 24904 }, { "epoch": 81.65573770491804, "grad_norm": 2.754563570022583, "learning_rate": 1.7136180395923152e-06, "loss": 0.2043, "step": 24905 }, { "epoch": 81.65901639344263, "grad_norm": 2.277974843978882, "learning_rate": 1.7130236583848026e-06, "loss": 0.0571, "step": 24906 }, { "epoch": 81.66229508196722, "grad_norm": 2.6984128952026367, "learning_rate": 1.7124293706215656e-06, "loss": 0.2568, "step": 24907 }, { "epoch": 81.6655737704918, "grad_norm": 2.1947202682495117, "learning_rate": 1.711835176309301e-06, "loss": 0.1434, "step": 24908 }, { "epoch": 81.66885245901639, "grad_norm": 2.0744411945343018, "learning_rate": 1.7112410754547149e-06, "loss": 0.0629, "step": 24909 }, { "epoch": 81.67213114754098, "grad_norm": 4.34985876083374, "learning_rate": 1.7106470680645037e-06, "loss": 0.0706, "step": 24910 }, { "epoch": 81.67540983606557, "grad_norm": 2.166921377182007, "learning_rate": 1.7100531541453623e-06, "loss": 0.1221, "step": 24911 }, { "epoch": 81.67868852459016, "grad_norm": 3.908323287963867, "learning_rate": 1.7094593337039956e-06, "loss": 0.165, "step": 24912 }, { "epoch": 81.68196721311476, "grad_norm": 2.194988250732422, "learning_rate": 1.7088656067470932e-06, "loss": 0.1905, "step": 24913 }, { "epoch": 81.68524590163935, "grad_norm": 1.9329575300216675, "learning_rate": 1.7082719732813501e-06, "loss": 0.0413, "step": 24914 }, { "epoch": 81.68852459016394, "grad_norm": 3.348335027694702, "learning_rate": 1.7076784333134634e-06, "loss": 0.1612, "step": 24915 }, { "epoch": 81.69180327868852, "grad_norm": 1.778623104095459, "learning_rate": 1.707084986850125e-06, "loss": 0.1169, "step": 24916 }, { "epoch": 81.69508196721311, "grad_norm": 2.114274740219116, "learning_rate": 1.706491633898023e-06, "loss": 0.0593, "step": 24917 }, { "epoch": 81.6983606557377, "grad_norm": 3.581099033355713, "learning_rate": 1.7058983744638535e-06, "loss": 0.2108, "step": 24918 }, { "epoch": 81.70163934426229, "grad_norm": 3.2119626998901367, "learning_rate": 1.705305208554303e-06, "loss": 0.2014, "step": 24919 }, { "epoch": 81.70491803278688, "grad_norm": 2.7114853858947754, "learning_rate": 1.704712136176061e-06, "loss": 0.0817, "step": 24920 }, { "epoch": 81.70819672131148, "grad_norm": 1.7791023254394531, "learning_rate": 1.7041191573358118e-06, "loss": 0.0525, "step": 24921 }, { "epoch": 81.71147540983607, "grad_norm": 1.5309780836105347, "learning_rate": 1.703526272040248e-06, "loss": 0.0805, "step": 24922 }, { "epoch": 81.71475409836066, "grad_norm": 2.3172178268432617, "learning_rate": 1.7029334802960507e-06, "loss": 0.152, "step": 24923 }, { "epoch": 81.71803278688525, "grad_norm": 2.2782812118530273, "learning_rate": 1.7023407821099058e-06, "loss": 0.0762, "step": 24924 }, { "epoch": 81.72131147540983, "grad_norm": 2.1919612884521484, "learning_rate": 1.7017481774884959e-06, "loss": 0.1178, "step": 24925 }, { "epoch": 81.72459016393442, "grad_norm": 2.831490993499756, "learning_rate": 1.701155666438501e-06, "loss": 0.278, "step": 24926 }, { "epoch": 81.72786885245901, "grad_norm": 2.058565855026245, "learning_rate": 1.7005632489666068e-06, "loss": 0.1461, "step": 24927 }, { "epoch": 81.73114754098361, "grad_norm": 2.9479727745056152, "learning_rate": 1.6999709250794916e-06, "loss": 0.1567, "step": 24928 }, { "epoch": 81.7344262295082, "grad_norm": 2.3354132175445557, "learning_rate": 1.699378694783833e-06, "loss": 0.0715, "step": 24929 }, { "epoch": 81.73770491803279, "grad_norm": 2.56671142578125, "learning_rate": 1.6987865580863083e-06, "loss": 0.0581, "step": 24930 }, { "epoch": 81.74098360655738, "grad_norm": 1.5811117887496948, "learning_rate": 1.6981945149935996e-06, "loss": 0.1036, "step": 24931 }, { "epoch": 81.74426229508197, "grad_norm": 2.461717367172241, "learning_rate": 1.6976025655123784e-06, "loss": 0.166, "step": 24932 }, { "epoch": 81.74754098360656, "grad_norm": 2.905402898788452, "learning_rate": 1.6970107096493204e-06, "loss": 0.1444, "step": 24933 }, { "epoch": 81.75081967213114, "grad_norm": 2.135852336883545, "learning_rate": 1.6964189474110981e-06, "loss": 0.1907, "step": 24934 }, { "epoch": 81.75409836065573, "grad_norm": 2.7604820728302, "learning_rate": 1.6958272788043872e-06, "loss": 0.3197, "step": 24935 }, { "epoch": 81.75737704918033, "grad_norm": 2.563631772994995, "learning_rate": 1.695235703835858e-06, "loss": 0.0948, "step": 24936 }, { "epoch": 81.76065573770492, "grad_norm": 3.1671862602233887, "learning_rate": 1.6946442225121817e-06, "loss": 0.0778, "step": 24937 }, { "epoch": 81.76393442622951, "grad_norm": 3.388005256652832, "learning_rate": 1.6940528348400276e-06, "loss": 0.0669, "step": 24938 }, { "epoch": 81.7672131147541, "grad_norm": 3.246255874633789, "learning_rate": 1.6934615408260602e-06, "loss": 0.045, "step": 24939 }, { "epoch": 81.77049180327869, "grad_norm": 2.6700210571289062, "learning_rate": 1.6928703404769531e-06, "loss": 0.1248, "step": 24940 }, { "epoch": 81.77377049180328, "grad_norm": 1.2721692323684692, "learning_rate": 1.692279233799371e-06, "loss": 0.0218, "step": 24941 }, { "epoch": 81.77704918032786, "grad_norm": 2.425694227218628, "learning_rate": 1.6916882207999774e-06, "loss": 0.1352, "step": 24942 }, { "epoch": 81.78032786885245, "grad_norm": 3.5875375270843506, "learning_rate": 1.691097301485436e-06, "loss": 0.1368, "step": 24943 }, { "epoch": 81.78360655737706, "grad_norm": 5.071976661682129, "learning_rate": 1.6905064758624134e-06, "loss": 0.1006, "step": 24944 }, { "epoch": 81.78688524590164, "grad_norm": 2.98110294342041, "learning_rate": 1.6899157439375714e-06, "loss": 0.0966, "step": 24945 }, { "epoch": 81.79016393442623, "grad_norm": 2.488921880722046, "learning_rate": 1.6893251057175675e-06, "loss": 0.2359, "step": 24946 }, { "epoch": 81.79344262295082, "grad_norm": 9.027594566345215, "learning_rate": 1.6887345612090633e-06, "loss": 0.1031, "step": 24947 }, { "epoch": 81.79672131147541, "grad_norm": 1.5877914428710938, "learning_rate": 1.6881441104187203e-06, "loss": 0.0253, "step": 24948 }, { "epoch": 81.8, "grad_norm": 2.4010250568389893, "learning_rate": 1.687553753353195e-06, "loss": 0.1163, "step": 24949 }, { "epoch": 81.80327868852459, "grad_norm": 2.1001555919647217, "learning_rate": 1.6869634900191434e-06, "loss": 0.2101, "step": 24950 }, { "epoch": 81.80655737704917, "grad_norm": 1.9271926879882812, "learning_rate": 1.6863733204232213e-06, "loss": 0.1558, "step": 24951 }, { "epoch": 81.80983606557378, "grad_norm": 1.470967173576355, "learning_rate": 1.6857832445720823e-06, "loss": 0.0791, "step": 24952 }, { "epoch": 81.81311475409836, "grad_norm": 2.9174134731292725, "learning_rate": 1.6851932624723844e-06, "loss": 0.0664, "step": 24953 }, { "epoch": 81.81639344262295, "grad_norm": 2.3175623416900635, "learning_rate": 1.6846033741307778e-06, "loss": 0.0306, "step": 24954 }, { "epoch": 81.81967213114754, "grad_norm": 2.0173399448394775, "learning_rate": 1.6840135795539137e-06, "loss": 0.1726, "step": 24955 }, { "epoch": 81.82295081967213, "grad_norm": 2.5050208568573, "learning_rate": 1.6834238787484403e-06, "loss": 0.1617, "step": 24956 }, { "epoch": 81.82622950819672, "grad_norm": 2.530580759048462, "learning_rate": 1.6828342717210121e-06, "loss": 0.0507, "step": 24957 }, { "epoch": 81.8295081967213, "grad_norm": 3.0036563873291016, "learning_rate": 1.6822447584782754e-06, "loss": 0.1293, "step": 24958 }, { "epoch": 81.8327868852459, "grad_norm": 1.9102197885513306, "learning_rate": 1.6816553390268774e-06, "loss": 0.0861, "step": 24959 }, { "epoch": 81.8360655737705, "grad_norm": 2.419356107711792, "learning_rate": 1.6810660133734625e-06, "loss": 0.1314, "step": 24960 }, { "epoch": 81.83934426229509, "grad_norm": 3.14422607421875, "learning_rate": 1.680476781524679e-06, "loss": 0.0806, "step": 24961 }, { "epoch": 81.84262295081967, "grad_norm": 2.3894755840301514, "learning_rate": 1.6798876434871703e-06, "loss": 0.1001, "step": 24962 }, { "epoch": 81.84590163934426, "grad_norm": 2.5084376335144043, "learning_rate": 1.6792985992675803e-06, "loss": 0.0749, "step": 24963 }, { "epoch": 81.84918032786885, "grad_norm": 2.268836736679077, "learning_rate": 1.6787096488725462e-06, "loss": 0.0507, "step": 24964 }, { "epoch": 81.85245901639344, "grad_norm": 2.646657943725586, "learning_rate": 1.678120792308716e-06, "loss": 0.1449, "step": 24965 }, { "epoch": 81.85573770491803, "grad_norm": 1.8105014562606812, "learning_rate": 1.6775320295827257e-06, "loss": 0.0316, "step": 24966 }, { "epoch": 81.85901639344263, "grad_norm": 2.3614509105682373, "learning_rate": 1.6769433607012132e-06, "loss": 0.1587, "step": 24967 }, { "epoch": 81.86229508196722, "grad_norm": 2.5676767826080322, "learning_rate": 1.6763547856708206e-06, "loss": 0.0808, "step": 24968 }, { "epoch": 81.8655737704918, "grad_norm": 2.0422654151916504, "learning_rate": 1.675766304498182e-06, "loss": 0.1904, "step": 24969 }, { "epoch": 81.8688524590164, "grad_norm": 2.4188594818115234, "learning_rate": 1.6751779171899307e-06, "loss": 0.1912, "step": 24970 }, { "epoch": 81.87213114754098, "grad_norm": 2.9514431953430176, "learning_rate": 1.6745896237527071e-06, "loss": 0.1111, "step": 24971 }, { "epoch": 81.87540983606557, "grad_norm": 2.036369800567627, "learning_rate": 1.6740014241931414e-06, "loss": 0.038, "step": 24972 }, { "epoch": 81.87868852459016, "grad_norm": 2.6832985877990723, "learning_rate": 1.6734133185178636e-06, "loss": 0.173, "step": 24973 }, { "epoch": 81.88196721311475, "grad_norm": 3.298021078109741, "learning_rate": 1.672825306733511e-06, "loss": 0.1523, "step": 24974 }, { "epoch": 81.88524590163935, "grad_norm": 1.9328359365463257, "learning_rate": 1.6722373888467102e-06, "loss": 0.0846, "step": 24975 }, { "epoch": 81.88852459016394, "grad_norm": 1.775975227355957, "learning_rate": 1.6716495648640928e-06, "loss": 0.0879, "step": 24976 }, { "epoch": 81.89180327868853, "grad_norm": 2.0559487342834473, "learning_rate": 1.6710618347922858e-06, "loss": 0.0404, "step": 24977 }, { "epoch": 81.89508196721312, "grad_norm": 1.9882620573043823, "learning_rate": 1.6704741986379136e-06, "loss": 0.0982, "step": 24978 }, { "epoch": 81.8983606557377, "grad_norm": 2.5539844036102295, "learning_rate": 1.6698866564076077e-06, "loss": 0.1486, "step": 24979 }, { "epoch": 81.90163934426229, "grad_norm": 2.219184637069702, "learning_rate": 1.6692992081079918e-06, "loss": 0.0632, "step": 24980 }, { "epoch": 81.90491803278688, "grad_norm": 2.3250856399536133, "learning_rate": 1.6687118537456881e-06, "loss": 0.0662, "step": 24981 }, { "epoch": 81.90819672131147, "grad_norm": 3.2425918579101562, "learning_rate": 1.6681245933273182e-06, "loss": 0.1139, "step": 24982 }, { "epoch": 81.91147540983607, "grad_norm": 2.3871469497680664, "learning_rate": 1.667537426859509e-06, "loss": 0.0743, "step": 24983 }, { "epoch": 81.91475409836066, "grad_norm": 2.1851515769958496, "learning_rate": 1.6669503543488797e-06, "loss": 0.0798, "step": 24984 }, { "epoch": 81.91803278688525, "grad_norm": 2.3334743976593018, "learning_rate": 1.6663633758020482e-06, "loss": 0.158, "step": 24985 }, { "epoch": 81.92131147540984, "grad_norm": 2.3560428619384766, "learning_rate": 1.6657764912256324e-06, "loss": 0.0821, "step": 24986 }, { "epoch": 81.92459016393443, "grad_norm": 2.7786705493927, "learning_rate": 1.6651897006262552e-06, "loss": 0.23, "step": 24987 }, { "epoch": 81.92786885245901, "grad_norm": 2.2203235626220703, "learning_rate": 1.66460300401053e-06, "loss": 0.0947, "step": 24988 }, { "epoch": 81.9311475409836, "grad_norm": 2.4440627098083496, "learning_rate": 1.6640164013850724e-06, "loss": 0.1674, "step": 24989 }, { "epoch": 81.93442622950819, "grad_norm": 3.4789469242095947, "learning_rate": 1.6634298927564962e-06, "loss": 0.0928, "step": 24990 }, { "epoch": 81.9377049180328, "grad_norm": 2.471013069152832, "learning_rate": 1.6628434781314152e-06, "loss": 0.1841, "step": 24991 }, { "epoch": 81.94098360655738, "grad_norm": 3.0896778106689453, "learning_rate": 1.6622571575164437e-06, "loss": 0.1218, "step": 24992 }, { "epoch": 81.94426229508197, "grad_norm": 2.997851610183716, "learning_rate": 1.661670930918191e-06, "loss": 0.2147, "step": 24993 }, { "epoch": 81.94754098360656, "grad_norm": 2.4483251571655273, "learning_rate": 1.6610847983432698e-06, "loss": 0.0883, "step": 24994 }, { "epoch": 81.95081967213115, "grad_norm": 2.924570322036743, "learning_rate": 1.6604987597982846e-06, "loss": 0.0732, "step": 24995 }, { "epoch": 81.95409836065573, "grad_norm": 2.5001392364501953, "learning_rate": 1.6599128152898492e-06, "loss": 0.0527, "step": 24996 }, { "epoch": 81.95737704918032, "grad_norm": 2.3950178623199463, "learning_rate": 1.6593269648245691e-06, "loss": 0.1735, "step": 24997 }, { "epoch": 81.96065573770491, "grad_norm": 2.541318655014038, "learning_rate": 1.6587412084090492e-06, "loss": 0.0839, "step": 24998 }, { "epoch": 81.96393442622951, "grad_norm": 3.3065805435180664, "learning_rate": 1.658155546049892e-06, "loss": 0.1073, "step": 24999 }, { "epoch": 81.9672131147541, "grad_norm": 2.3607850074768066, "learning_rate": 1.657569977753707e-06, "loss": 0.1402, "step": 25000 }, { "epoch": 81.97049180327869, "grad_norm": 2.6185719966888428, "learning_rate": 1.6569845035270948e-06, "loss": 0.0822, "step": 25001 }, { "epoch": 81.97377049180328, "grad_norm": 3.510194778442383, "learning_rate": 1.6563991233766563e-06, "loss": 0.2683, "step": 25002 }, { "epoch": 81.97704918032787, "grad_norm": 1.9871487617492676, "learning_rate": 1.6558138373089927e-06, "loss": 0.0511, "step": 25003 }, { "epoch": 81.98032786885246, "grad_norm": 2.837308645248413, "learning_rate": 1.655228645330702e-06, "loss": 0.145, "step": 25004 }, { "epoch": 81.98360655737704, "grad_norm": 2.0907976627349854, "learning_rate": 1.654643547448388e-06, "loss": 0.0403, "step": 25005 }, { "epoch": 81.98688524590163, "grad_norm": 2.6406571865081787, "learning_rate": 1.654058543668644e-06, "loss": 0.1373, "step": 25006 }, { "epoch": 81.99016393442623, "grad_norm": 2.4441704750061035, "learning_rate": 1.6534736339980673e-06, "loss": 0.0648, "step": 25007 }, { "epoch": 81.99344262295082, "grad_norm": 2.858135461807251, "learning_rate": 1.6528888184432513e-06, "loss": 0.0749, "step": 25008 }, { "epoch": 81.99672131147541, "grad_norm": 2.837139844894409, "learning_rate": 1.652304097010795e-06, "loss": 0.1433, "step": 25009 }, { "epoch": 82.0, "grad_norm": 2.848459482192993, "learning_rate": 1.6517194697072903e-06, "loss": 0.116, "step": 25010 }, { "epoch": 82.00327868852459, "grad_norm": 2.7739293575286865, "learning_rate": 1.6511349365393282e-06, "loss": 0.0673, "step": 25011 }, { "epoch": 82.00655737704918, "grad_norm": 2.423912525177002, "learning_rate": 1.6505504975134968e-06, "loss": 0.0706, "step": 25012 }, { "epoch": 82.00983606557377, "grad_norm": 2.473818778991699, "learning_rate": 1.6499661526363931e-06, "loss": 0.2002, "step": 25013 }, { "epoch": 82.01311475409837, "grad_norm": 3.101043939590454, "learning_rate": 1.6493819019146028e-06, "loss": 0.2316, "step": 25014 }, { "epoch": 82.01639344262296, "grad_norm": 2.6174118518829346, "learning_rate": 1.6487977453547143e-06, "loss": 0.0907, "step": 25015 }, { "epoch": 82.01967213114754, "grad_norm": 2.171449661254883, "learning_rate": 1.6482136829633122e-06, "loss": 0.0476, "step": 25016 }, { "epoch": 82.02295081967213, "grad_norm": 3.057535171508789, "learning_rate": 1.6476297147469866e-06, "loss": 0.1005, "step": 25017 }, { "epoch": 82.02622950819672, "grad_norm": 2.6414718627929688, "learning_rate": 1.64704584071232e-06, "loss": 0.1158, "step": 25018 }, { "epoch": 82.02950819672131, "grad_norm": 2.208256483078003, "learning_rate": 1.6464620608658943e-06, "loss": 0.0737, "step": 25019 }, { "epoch": 82.0327868852459, "grad_norm": 7.907289028167725, "learning_rate": 1.645878375214296e-06, "loss": 0.0478, "step": 25020 }, { "epoch": 82.03606557377049, "grad_norm": 2.636129140853882, "learning_rate": 1.6452947837641054e-06, "loss": 0.0846, "step": 25021 }, { "epoch": 82.03934426229509, "grad_norm": 2.944378137588501, "learning_rate": 1.644711286521904e-06, "loss": 0.1817, "step": 25022 }, { "epoch": 82.04262295081968, "grad_norm": 2.5553653240203857, "learning_rate": 1.6441278834942665e-06, "loss": 0.2374, "step": 25023 }, { "epoch": 82.04590163934427, "grad_norm": 2.249101400375366, "learning_rate": 1.6435445746877776e-06, "loss": 0.1098, "step": 25024 }, { "epoch": 82.04918032786885, "grad_norm": 2.019144058227539, "learning_rate": 1.6429613601090123e-06, "loss": 0.0805, "step": 25025 }, { "epoch": 82.05245901639344, "grad_norm": 2.1637349128723145, "learning_rate": 1.642378239764545e-06, "loss": 0.0599, "step": 25026 }, { "epoch": 82.05573770491803, "grad_norm": 2.035550117492676, "learning_rate": 1.641795213660955e-06, "loss": 0.0657, "step": 25027 }, { "epoch": 82.05901639344262, "grad_norm": 2.5991694927215576, "learning_rate": 1.6412122818048137e-06, "loss": 0.1297, "step": 25028 }, { "epoch": 82.0622950819672, "grad_norm": 1.9530590772628784, "learning_rate": 1.6406294442026949e-06, "loss": 0.1003, "step": 25029 }, { "epoch": 82.06557377049181, "grad_norm": 6.294688701629639, "learning_rate": 1.6400467008611686e-06, "loss": 0.053, "step": 25030 }, { "epoch": 82.0688524590164, "grad_norm": 2.4430315494537354, "learning_rate": 1.639464051786811e-06, "loss": 0.0804, "step": 25031 }, { "epoch": 82.07213114754099, "grad_norm": 2.3326923847198486, "learning_rate": 1.6388814969861876e-06, "loss": 0.2043, "step": 25032 }, { "epoch": 82.07540983606557, "grad_norm": 2.7073261737823486, "learning_rate": 1.6382990364658702e-06, "loss": 0.1203, "step": 25033 }, { "epoch": 82.07868852459016, "grad_norm": 2.721928358078003, "learning_rate": 1.6377166702324209e-06, "loss": 0.2427, "step": 25034 }, { "epoch": 82.08196721311475, "grad_norm": 1.8761922121047974, "learning_rate": 1.6371343982924148e-06, "loss": 0.0363, "step": 25035 }, { "epoch": 82.08524590163934, "grad_norm": 2.410520553588867, "learning_rate": 1.6365522206524132e-06, "loss": 0.0993, "step": 25036 }, { "epoch": 82.08852459016393, "grad_norm": 2.447758436203003, "learning_rate": 1.6359701373189806e-06, "loss": 0.134, "step": 25037 }, { "epoch": 82.09180327868853, "grad_norm": 2.2055482864379883, "learning_rate": 1.6353881482986789e-06, "loss": 0.1228, "step": 25038 }, { "epoch": 82.09508196721312, "grad_norm": 3.639308214187622, "learning_rate": 1.6348062535980757e-06, "loss": 0.1094, "step": 25039 }, { "epoch": 82.09836065573771, "grad_norm": 2.0603079795837402, "learning_rate": 1.6342244532237294e-06, "loss": 0.0661, "step": 25040 }, { "epoch": 82.1016393442623, "grad_norm": 3.7999308109283447, "learning_rate": 1.6336427471822002e-06, "loss": 0.0945, "step": 25041 }, { "epoch": 82.10491803278688, "grad_norm": 2.6631879806518555, "learning_rate": 1.6330611354800485e-06, "loss": 0.1138, "step": 25042 }, { "epoch": 82.10819672131147, "grad_norm": 3.088045120239258, "learning_rate": 1.632479618123829e-06, "loss": 0.2254, "step": 25043 }, { "epoch": 82.11147540983606, "grad_norm": 2.5374820232391357, "learning_rate": 1.631898195120104e-06, "loss": 0.0982, "step": 25044 }, { "epoch": 82.11475409836065, "grad_norm": 2.393516778945923, "learning_rate": 1.6313168664754287e-06, "loss": 0.1036, "step": 25045 }, { "epoch": 82.11803278688525, "grad_norm": 2.7360308170318604, "learning_rate": 1.6307356321963551e-06, "loss": 0.275, "step": 25046 }, { "epoch": 82.12131147540984, "grad_norm": 2.0853238105773926, "learning_rate": 1.6301544922894385e-06, "loss": 0.1198, "step": 25047 }, { "epoch": 82.12459016393443, "grad_norm": 2.745086431503296, "learning_rate": 1.6295734467612334e-06, "loss": 0.1036, "step": 25048 }, { "epoch": 82.12786885245902, "grad_norm": 2.8759751319885254, "learning_rate": 1.6289924956182923e-06, "loss": 0.0769, "step": 25049 }, { "epoch": 82.1311475409836, "grad_norm": 1.8956657648086548, "learning_rate": 1.6284116388671633e-06, "loss": 0.1612, "step": 25050 }, { "epoch": 82.1344262295082, "grad_norm": 2.8219153881073, "learning_rate": 1.6278308765143958e-06, "loss": 0.1722, "step": 25051 }, { "epoch": 82.13770491803278, "grad_norm": 2.0219779014587402, "learning_rate": 1.627250208566542e-06, "loss": 0.0973, "step": 25052 }, { "epoch": 82.14098360655737, "grad_norm": 1.969748854637146, "learning_rate": 1.626669635030147e-06, "loss": 0.0893, "step": 25053 }, { "epoch": 82.14426229508197, "grad_norm": 2.3904004096984863, "learning_rate": 1.6260891559117587e-06, "loss": 0.1864, "step": 25054 }, { "epoch": 82.14754098360656, "grad_norm": 3.77477765083313, "learning_rate": 1.6255087712179218e-06, "loss": 0.1569, "step": 25055 }, { "epoch": 82.15081967213115, "grad_norm": 2.977602481842041, "learning_rate": 1.624928480955178e-06, "loss": 0.0855, "step": 25056 }, { "epoch": 82.15409836065574, "grad_norm": 2.9267070293426514, "learning_rate": 1.6243482851300763e-06, "loss": 0.1318, "step": 25057 }, { "epoch": 82.15737704918033, "grad_norm": 2.7190310955047607, "learning_rate": 1.6237681837491559e-06, "loss": 0.2216, "step": 25058 }, { "epoch": 82.16065573770491, "grad_norm": 1.780804991722107, "learning_rate": 1.6231881768189583e-06, "loss": 0.0385, "step": 25059 }, { "epoch": 82.1639344262295, "grad_norm": 2.3912549018859863, "learning_rate": 1.6226082643460206e-06, "loss": 0.1609, "step": 25060 }, { "epoch": 82.1672131147541, "grad_norm": 2.4178342819213867, "learning_rate": 1.6220284463368885e-06, "loss": 0.0463, "step": 25061 }, { "epoch": 82.1704918032787, "grad_norm": 2.6874210834503174, "learning_rate": 1.6214487227980969e-06, "loss": 0.1479, "step": 25062 }, { "epoch": 82.17377049180328, "grad_norm": 2.515935182571411, "learning_rate": 1.6208690937361816e-06, "loss": 0.1797, "step": 25063 }, { "epoch": 82.17704918032787, "grad_norm": 2.3487184047698975, "learning_rate": 1.6202895591576772e-06, "loss": 0.1656, "step": 25064 }, { "epoch": 82.18032786885246, "grad_norm": 2.3823623657226562, "learning_rate": 1.6197101190691233e-06, "loss": 0.1242, "step": 25065 }, { "epoch": 82.18360655737705, "grad_norm": 2.4883615970611572, "learning_rate": 1.619130773477051e-06, "loss": 0.2302, "step": 25066 }, { "epoch": 82.18688524590164, "grad_norm": 2.082710027694702, "learning_rate": 1.6185515223879934e-06, "loss": 0.0613, "step": 25067 }, { "epoch": 82.19016393442622, "grad_norm": 3.4657137393951416, "learning_rate": 1.6179723658084811e-06, "loss": 0.0956, "step": 25068 }, { "epoch": 82.19344262295083, "grad_norm": 2.4534332752227783, "learning_rate": 1.617393303745044e-06, "loss": 0.1311, "step": 25069 }, { "epoch": 82.19672131147541, "grad_norm": 2.0363962650299072, "learning_rate": 1.6168143362042144e-06, "loss": 0.0428, "step": 25070 }, { "epoch": 82.2, "grad_norm": 2.391775131225586, "learning_rate": 1.6162354631925203e-06, "loss": 0.1443, "step": 25071 }, { "epoch": 82.20327868852459, "grad_norm": 2.2508580684661865, "learning_rate": 1.6156566847164868e-06, "loss": 0.0836, "step": 25072 }, { "epoch": 82.20655737704918, "grad_norm": 2.2051825523376465, "learning_rate": 1.6150780007826428e-06, "loss": 0.0659, "step": 25073 }, { "epoch": 82.20983606557377, "grad_norm": 2.0516200065612793, "learning_rate": 1.6144994113975122e-06, "loss": 0.0284, "step": 25074 }, { "epoch": 82.21311475409836, "grad_norm": 2.7202200889587402, "learning_rate": 1.6139209165676184e-06, "loss": 0.196, "step": 25075 }, { "epoch": 82.21639344262294, "grad_norm": 4.794836521148682, "learning_rate": 1.6133425162994864e-06, "loss": 0.23, "step": 25076 }, { "epoch": 82.21967213114755, "grad_norm": 1.9575282335281372, "learning_rate": 1.6127642105996388e-06, "loss": 0.1256, "step": 25077 }, { "epoch": 82.22295081967214, "grad_norm": 2.506702423095703, "learning_rate": 1.6121859994745925e-06, "loss": 0.103, "step": 25078 }, { "epoch": 82.22622950819672, "grad_norm": 2.5457763671875, "learning_rate": 1.6116078829308724e-06, "loss": 0.0906, "step": 25079 }, { "epoch": 82.22950819672131, "grad_norm": 3.6832315921783447, "learning_rate": 1.6110298609749953e-06, "loss": 0.1586, "step": 25080 }, { "epoch": 82.2327868852459, "grad_norm": 2.3686888217926025, "learning_rate": 1.6104519336134794e-06, "loss": 0.0633, "step": 25081 }, { "epoch": 82.23606557377049, "grad_norm": 2.1073575019836426, "learning_rate": 1.6098741008528373e-06, "loss": 0.0571, "step": 25082 }, { "epoch": 82.23934426229508, "grad_norm": 3.1818840503692627, "learning_rate": 1.6092963626995918e-06, "loss": 0.2057, "step": 25083 }, { "epoch": 82.24262295081967, "grad_norm": 2.347851514816284, "learning_rate": 1.6087187191602527e-06, "loss": 0.0905, "step": 25084 }, { "epoch": 82.24590163934427, "grad_norm": 3.131054401397705, "learning_rate": 1.6081411702413364e-06, "loss": 0.1289, "step": 25085 }, { "epoch": 82.24918032786886, "grad_norm": 2.2174935340881348, "learning_rate": 1.6075637159493495e-06, "loss": 0.1381, "step": 25086 }, { "epoch": 82.25245901639344, "grad_norm": 2.7276198863983154, "learning_rate": 1.6069863562908117e-06, "loss": 0.1872, "step": 25087 }, { "epoch": 82.25573770491803, "grad_norm": 1.6608903408050537, "learning_rate": 1.6064090912722286e-06, "loss": 0.041, "step": 25088 }, { "epoch": 82.25901639344262, "grad_norm": 3.7089245319366455, "learning_rate": 1.6058319209001105e-06, "loss": 0.0907, "step": 25089 }, { "epoch": 82.26229508196721, "grad_norm": 2.955094575881958, "learning_rate": 1.6052548451809623e-06, "loss": 0.0742, "step": 25090 }, { "epoch": 82.2655737704918, "grad_norm": 2.883366346359253, "learning_rate": 1.6046778641212968e-06, "loss": 0.0821, "step": 25091 }, { "epoch": 82.26885245901639, "grad_norm": 6.436603546142578, "learning_rate": 1.6041009777276173e-06, "loss": 0.0711, "step": 25092 }, { "epoch": 82.27213114754099, "grad_norm": 2.5904128551483154, "learning_rate": 1.6035241860064278e-06, "loss": 0.127, "step": 25093 }, { "epoch": 82.27540983606558, "grad_norm": 2.428814172744751, "learning_rate": 1.6029474889642348e-06, "loss": 0.0691, "step": 25094 }, { "epoch": 82.27868852459017, "grad_norm": 2.325144052505493, "learning_rate": 1.602370886607536e-06, "loss": 0.0774, "step": 25095 }, { "epoch": 82.28196721311475, "grad_norm": 2.0904507637023926, "learning_rate": 1.6017943789428402e-06, "loss": 0.0696, "step": 25096 }, { "epoch": 82.28524590163934, "grad_norm": 2.90531849861145, "learning_rate": 1.6012179659766447e-06, "loss": 0.0422, "step": 25097 }, { "epoch": 82.28852459016393, "grad_norm": 3.2390708923339844, "learning_rate": 1.6006416477154485e-06, "loss": 0.1336, "step": 25098 }, { "epoch": 82.29180327868852, "grad_norm": 4.8247971534729, "learning_rate": 1.6000654241657488e-06, "loss": 0.1482, "step": 25099 }, { "epoch": 82.29508196721312, "grad_norm": 6.622519493103027, "learning_rate": 1.5994892953340468e-06, "loss": 0.1567, "step": 25100 }, { "epoch": 82.29836065573771, "grad_norm": 2.2566301822662354, "learning_rate": 1.5989132612268388e-06, "loss": 0.0632, "step": 25101 }, { "epoch": 82.3016393442623, "grad_norm": 3.0092194080352783, "learning_rate": 1.5983373218506171e-06, "loss": 0.1068, "step": 25102 }, { "epoch": 82.30491803278689, "grad_norm": 2.2491612434387207, "learning_rate": 1.5977614772118765e-06, "loss": 0.0484, "step": 25103 }, { "epoch": 82.30819672131148, "grad_norm": 2.81494140625, "learning_rate": 1.5971857273171132e-06, "loss": 0.0967, "step": 25104 }, { "epoch": 82.31147540983606, "grad_norm": 2.223263740539551, "learning_rate": 1.5966100721728184e-06, "loss": 0.0557, "step": 25105 }, { "epoch": 82.31475409836065, "grad_norm": 2.723827838897705, "learning_rate": 1.5960345117854814e-06, "loss": 0.1275, "step": 25106 }, { "epoch": 82.31803278688524, "grad_norm": 3.58585262298584, "learning_rate": 1.5954590461615937e-06, "loss": 0.0694, "step": 25107 }, { "epoch": 82.32131147540984, "grad_norm": 3.0925674438476562, "learning_rate": 1.5948836753076412e-06, "loss": 0.1314, "step": 25108 }, { "epoch": 82.32459016393443, "grad_norm": 2.7427403926849365, "learning_rate": 1.5943083992301166e-06, "loss": 0.1464, "step": 25109 }, { "epoch": 82.32786885245902, "grad_norm": 4.1637492179870605, "learning_rate": 1.5937332179355047e-06, "loss": 0.3361, "step": 25110 }, { "epoch": 82.33114754098361, "grad_norm": 2.84460711479187, "learning_rate": 1.5931581314302914e-06, "loss": 0.1096, "step": 25111 }, { "epoch": 82.3344262295082, "grad_norm": 2.4479868412017822, "learning_rate": 1.592583139720958e-06, "loss": 0.0762, "step": 25112 }, { "epoch": 82.33770491803278, "grad_norm": 2.2259278297424316, "learning_rate": 1.5920082428139938e-06, "loss": 0.0806, "step": 25113 }, { "epoch": 82.34098360655737, "grad_norm": 1.9205894470214844, "learning_rate": 1.5914334407158793e-06, "loss": 0.0667, "step": 25114 }, { "epoch": 82.34426229508196, "grad_norm": 2.2968742847442627, "learning_rate": 1.590858733433095e-06, "loss": 0.1197, "step": 25115 }, { "epoch": 82.34754098360656, "grad_norm": 2.323523998260498, "learning_rate": 1.5902841209721198e-06, "loss": 0.0574, "step": 25116 }, { "epoch": 82.35081967213115, "grad_norm": 2.5740928649902344, "learning_rate": 1.5897096033394377e-06, "loss": 0.1896, "step": 25117 }, { "epoch": 82.35409836065574, "grad_norm": 2.2291266918182373, "learning_rate": 1.5891351805415233e-06, "loss": 0.0692, "step": 25118 }, { "epoch": 82.35737704918033, "grad_norm": 1.7714840173721313, "learning_rate": 1.5885608525848561e-06, "loss": 0.0821, "step": 25119 }, { "epoch": 82.36065573770492, "grad_norm": 2.37389874458313, "learning_rate": 1.5879866194759096e-06, "loss": 0.1118, "step": 25120 }, { "epoch": 82.3639344262295, "grad_norm": 2.1894304752349854, "learning_rate": 1.587412481221159e-06, "loss": 0.1441, "step": 25121 }, { "epoch": 82.3672131147541, "grad_norm": 3.3551316261291504, "learning_rate": 1.5868384378270817e-06, "loss": 0.1127, "step": 25122 }, { "epoch": 82.37049180327868, "grad_norm": 1.923729419708252, "learning_rate": 1.5862644893001478e-06, "loss": 0.0397, "step": 25123 }, { "epoch": 82.37377049180328, "grad_norm": 2.291343927383423, "learning_rate": 1.5856906356468305e-06, "loss": 0.1131, "step": 25124 }, { "epoch": 82.37704918032787, "grad_norm": 2.759572982788086, "learning_rate": 1.5851168768735981e-06, "loss": 0.2246, "step": 25125 }, { "epoch": 82.38032786885246, "grad_norm": 2.227553606033325, "learning_rate": 1.584543212986923e-06, "loss": 0.1061, "step": 25126 }, { "epoch": 82.38360655737705, "grad_norm": 1.923211693763733, "learning_rate": 1.583969643993275e-06, "loss": 0.0426, "step": 25127 }, { "epoch": 82.38688524590164, "grad_norm": 2.2825310230255127, "learning_rate": 1.5833961698991163e-06, "loss": 0.0297, "step": 25128 }, { "epoch": 82.39016393442623, "grad_norm": 3.4214794635772705, "learning_rate": 1.5828227907109194e-06, "loss": 0.1588, "step": 25129 }, { "epoch": 82.39344262295081, "grad_norm": 3.5532782077789307, "learning_rate": 1.5822495064351474e-06, "loss": 0.2108, "step": 25130 }, { "epoch": 82.3967213114754, "grad_norm": 1.943608045578003, "learning_rate": 1.5816763170782612e-06, "loss": 0.0695, "step": 25131 }, { "epoch": 82.4, "grad_norm": 1.8136498928070068, "learning_rate": 1.5811032226467304e-06, "loss": 0.1157, "step": 25132 }, { "epoch": 82.4032786885246, "grad_norm": 2.4796814918518066, "learning_rate": 1.5805302231470144e-06, "loss": 0.2068, "step": 25133 }, { "epoch": 82.40655737704918, "grad_norm": 1.769975185394287, "learning_rate": 1.5799573185855722e-06, "loss": 0.0512, "step": 25134 }, { "epoch": 82.40983606557377, "grad_norm": 1.7266802787780762, "learning_rate": 1.5793845089688676e-06, "loss": 0.0453, "step": 25135 }, { "epoch": 82.41311475409836, "grad_norm": 2.1125316619873047, "learning_rate": 1.5788117943033577e-06, "loss": 0.0906, "step": 25136 }, { "epoch": 82.41639344262295, "grad_norm": 3.2774853706359863, "learning_rate": 1.5782391745955006e-06, "loss": 0.1073, "step": 25137 }, { "epoch": 82.41967213114754, "grad_norm": 2.1225178241729736, "learning_rate": 1.5776666498517511e-06, "loss": 0.0293, "step": 25138 }, { "epoch": 82.42295081967212, "grad_norm": 2.9501757621765137, "learning_rate": 1.57709422007857e-06, "loss": 0.1999, "step": 25139 }, { "epoch": 82.42622950819673, "grad_norm": 2.121558666229248, "learning_rate": 1.5765218852824094e-06, "loss": 0.0843, "step": 25140 }, { "epoch": 82.42950819672132, "grad_norm": 3.046907424926758, "learning_rate": 1.5759496454697221e-06, "loss": 0.1635, "step": 25141 }, { "epoch": 82.4327868852459, "grad_norm": 2.370638608932495, "learning_rate": 1.5753775006469607e-06, "loss": 0.0878, "step": 25142 }, { "epoch": 82.43606557377049, "grad_norm": 1.9935343265533447, "learning_rate": 1.5748054508205767e-06, "loss": 0.1065, "step": 25143 }, { "epoch": 82.43934426229508, "grad_norm": 2.158555269241333, "learning_rate": 1.574233495997022e-06, "loss": 0.1212, "step": 25144 }, { "epoch": 82.44262295081967, "grad_norm": 3.1023712158203125, "learning_rate": 1.5736616361827461e-06, "loss": 0.3196, "step": 25145 }, { "epoch": 82.44590163934426, "grad_norm": 2.659290075302124, "learning_rate": 1.5730898713841968e-06, "loss": 0.3082, "step": 25146 }, { "epoch": 82.44918032786886, "grad_norm": 1.8168801069259644, "learning_rate": 1.5725182016078178e-06, "loss": 0.0938, "step": 25147 }, { "epoch": 82.45245901639345, "grad_norm": 4.032325744628906, "learning_rate": 1.5719466268600615e-06, "loss": 0.083, "step": 25148 }, { "epoch": 82.45573770491804, "grad_norm": 2.761427640914917, "learning_rate": 1.5713751471473692e-06, "loss": 0.1099, "step": 25149 }, { "epoch": 82.45901639344262, "grad_norm": 2.39851450920105, "learning_rate": 1.5708037624761862e-06, "loss": 0.0681, "step": 25150 }, { "epoch": 82.46229508196721, "grad_norm": 2.515266180038452, "learning_rate": 1.5702324728529527e-06, "loss": 0.0676, "step": 25151 }, { "epoch": 82.4655737704918, "grad_norm": 3.2402896881103516, "learning_rate": 1.5696612782841147e-06, "loss": 0.1032, "step": 25152 }, { "epoch": 82.46885245901639, "grad_norm": 2.4697442054748535, "learning_rate": 1.5690901787761114e-06, "loss": 0.1756, "step": 25153 }, { "epoch": 82.47213114754098, "grad_norm": 2.5014519691467285, "learning_rate": 1.5685191743353823e-06, "loss": 0.1115, "step": 25154 }, { "epoch": 82.47540983606558, "grad_norm": 3.716083526611328, "learning_rate": 1.5679482649683664e-06, "loss": 0.2745, "step": 25155 }, { "epoch": 82.47868852459017, "grad_norm": 1.5827367305755615, "learning_rate": 1.5673774506814977e-06, "loss": 0.0312, "step": 25156 }, { "epoch": 82.48196721311476, "grad_norm": 2.3466315269470215, "learning_rate": 1.5668067314812197e-06, "loss": 0.1322, "step": 25157 }, { "epoch": 82.48524590163935, "grad_norm": 1.8243581056594849, "learning_rate": 1.5662361073739629e-06, "loss": 0.1473, "step": 25158 }, { "epoch": 82.48852459016393, "grad_norm": 2.288248062133789, "learning_rate": 1.5656655783661634e-06, "loss": 0.0842, "step": 25159 }, { "epoch": 82.49180327868852, "grad_norm": 2.5028605461120605, "learning_rate": 1.5650951444642516e-06, "loss": 0.0978, "step": 25160 }, { "epoch": 82.49508196721311, "grad_norm": 2.4233479499816895, "learning_rate": 1.5645248056746642e-06, "loss": 0.1105, "step": 25161 }, { "epoch": 82.4983606557377, "grad_norm": 1.8405152559280396, "learning_rate": 1.563954562003831e-06, "loss": 0.0338, "step": 25162 }, { "epoch": 82.5016393442623, "grad_norm": 2.103384494781494, "learning_rate": 1.5633844134581811e-06, "loss": 0.062, "step": 25163 }, { "epoch": 82.50491803278689, "grad_norm": 3.43505859375, "learning_rate": 1.5628143600441415e-06, "loss": 0.078, "step": 25164 }, { "epoch": 82.50819672131148, "grad_norm": 3.047032356262207, "learning_rate": 1.5622444017681438e-06, "loss": 0.3108, "step": 25165 }, { "epoch": 82.51147540983607, "grad_norm": 2.827159881591797, "learning_rate": 1.561674538636615e-06, "loss": 0.1494, "step": 25166 }, { "epoch": 82.51475409836065, "grad_norm": 2.5761899948120117, "learning_rate": 1.561104770655979e-06, "loss": 0.0898, "step": 25167 }, { "epoch": 82.51803278688524, "grad_norm": 2.3239669799804688, "learning_rate": 1.5605350978326606e-06, "loss": 0.0728, "step": 25168 }, { "epoch": 82.52131147540983, "grad_norm": 3.2986109256744385, "learning_rate": 1.5599655201730812e-06, "loss": 0.2614, "step": 25169 }, { "epoch": 82.52459016393442, "grad_norm": 2.133526086807251, "learning_rate": 1.5593960376836693e-06, "loss": 0.1121, "step": 25170 }, { "epoch": 82.52786885245902, "grad_norm": 2.586696147918701, "learning_rate": 1.5588266503708428e-06, "loss": 0.1936, "step": 25171 }, { "epoch": 82.53114754098361, "grad_norm": 2.4269039630889893, "learning_rate": 1.558257358241022e-06, "loss": 0.0632, "step": 25172 }, { "epoch": 82.5344262295082, "grad_norm": 2.420762777328491, "learning_rate": 1.5576881613006246e-06, "loss": 0.2503, "step": 25173 }, { "epoch": 82.53770491803279, "grad_norm": 1.9060776233673096, "learning_rate": 1.5571190595560736e-06, "loss": 0.0558, "step": 25174 }, { "epoch": 82.54098360655738, "grad_norm": 3.254361391067505, "learning_rate": 1.5565500530137834e-06, "loss": 0.0969, "step": 25175 }, { "epoch": 82.54426229508196, "grad_norm": 1.9136261940002441, "learning_rate": 1.5559811416801695e-06, "loss": 0.1082, "step": 25176 }, { "epoch": 82.54754098360655, "grad_norm": 2.5470073223114014, "learning_rate": 1.555412325561646e-06, "loss": 0.078, "step": 25177 }, { "epoch": 82.55081967213114, "grad_norm": 1.8893846273422241, "learning_rate": 1.554843604664632e-06, "loss": 0.0773, "step": 25178 }, { "epoch": 82.55409836065574, "grad_norm": 2.0701446533203125, "learning_rate": 1.5542749789955358e-06, "loss": 0.1127, "step": 25179 }, { "epoch": 82.55737704918033, "grad_norm": 3.1744115352630615, "learning_rate": 1.553706448560769e-06, "loss": 0.0909, "step": 25180 }, { "epoch": 82.56065573770492, "grad_norm": 2.5084547996520996, "learning_rate": 1.5531380133667461e-06, "loss": 0.0852, "step": 25181 }, { "epoch": 82.56393442622951, "grad_norm": 2.7770328521728516, "learning_rate": 1.5525696734198748e-06, "loss": 0.1305, "step": 25182 }, { "epoch": 82.5672131147541, "grad_norm": 3.0266284942626953, "learning_rate": 1.5520014287265605e-06, "loss": 0.2003, "step": 25183 }, { "epoch": 82.57049180327868, "grad_norm": 2.830608606338501, "learning_rate": 1.5514332792932174e-06, "loss": 0.1395, "step": 25184 }, { "epoch": 82.57377049180327, "grad_norm": 1.9164516925811768, "learning_rate": 1.550865225126248e-06, "loss": 0.026, "step": 25185 }, { "epoch": 82.57704918032788, "grad_norm": 2.850735902786255, "learning_rate": 1.550297266232057e-06, "loss": 0.0554, "step": 25186 }, { "epoch": 82.58032786885246, "grad_norm": 1.8979963064193726, "learning_rate": 1.5497294026170484e-06, "loss": 0.1786, "step": 25187 }, { "epoch": 82.58360655737705, "grad_norm": 2.503951072692871, "learning_rate": 1.5491616342876292e-06, "loss": 0.1775, "step": 25188 }, { "epoch": 82.58688524590164, "grad_norm": 2.2922451496124268, "learning_rate": 1.5485939612501988e-06, "loss": 0.0811, "step": 25189 }, { "epoch": 82.59016393442623, "grad_norm": 2.40480375289917, "learning_rate": 1.5480263835111564e-06, "loss": 0.0665, "step": 25190 }, { "epoch": 82.59344262295082, "grad_norm": 2.1041526794433594, "learning_rate": 1.5474589010769082e-06, "loss": 0.0954, "step": 25191 }, { "epoch": 82.5967213114754, "grad_norm": 2.2405779361724854, "learning_rate": 1.5468915139538476e-06, "loss": 0.2266, "step": 25192 }, { "epoch": 82.6, "grad_norm": 2.593698263168335, "learning_rate": 1.5463242221483742e-06, "loss": 0.1547, "step": 25193 }, { "epoch": 82.6032786885246, "grad_norm": 2.373914957046509, "learning_rate": 1.5457570256668864e-06, "loss": 0.1459, "step": 25194 }, { "epoch": 82.60655737704919, "grad_norm": 2.293910264968872, "learning_rate": 1.5451899245157742e-06, "loss": 0.1509, "step": 25195 }, { "epoch": 82.60983606557377, "grad_norm": 2.1327435970306396, "learning_rate": 1.5446229187014393e-06, "loss": 0.0733, "step": 25196 }, { "epoch": 82.61311475409836, "grad_norm": 2.1608846187591553, "learning_rate": 1.5440560082302725e-06, "loss": 0.0836, "step": 25197 }, { "epoch": 82.61639344262295, "grad_norm": 2.828805685043335, "learning_rate": 1.5434891931086671e-06, "loss": 0.1676, "step": 25198 }, { "epoch": 82.61967213114754, "grad_norm": 2.5145254135131836, "learning_rate": 1.5429224733430104e-06, "loss": 0.1896, "step": 25199 }, { "epoch": 82.62295081967213, "grad_norm": 2.4676079750061035, "learning_rate": 1.5423558489396983e-06, "loss": 0.0464, "step": 25200 }, { "epoch": 82.62622950819672, "grad_norm": 2.100053071975708, "learning_rate": 1.541789319905117e-06, "loss": 0.0533, "step": 25201 }, { "epoch": 82.62950819672132, "grad_norm": 2.4630115032196045, "learning_rate": 1.5412228862456569e-06, "loss": 0.1764, "step": 25202 }, { "epoch": 82.6327868852459, "grad_norm": 3.0831894874572754, "learning_rate": 1.5406565479677006e-06, "loss": 0.0911, "step": 25203 }, { "epoch": 82.6360655737705, "grad_norm": 1.9528427124023438, "learning_rate": 1.5400903050776394e-06, "loss": 0.1351, "step": 25204 }, { "epoch": 82.63934426229508, "grad_norm": 2.4499640464782715, "learning_rate": 1.5395241575818565e-06, "loss": 0.1058, "step": 25205 }, { "epoch": 82.64262295081967, "grad_norm": 1.826369047164917, "learning_rate": 1.5389581054867354e-06, "loss": 0.0366, "step": 25206 }, { "epoch": 82.64590163934426, "grad_norm": 2.7442712783813477, "learning_rate": 1.5383921487986585e-06, "loss": 0.1914, "step": 25207 }, { "epoch": 82.64918032786885, "grad_norm": 2.772118330001831, "learning_rate": 1.5378262875240058e-06, "loss": 0.2147, "step": 25208 }, { "epoch": 82.65245901639344, "grad_norm": 3.485215187072754, "learning_rate": 1.5372605216691627e-06, "loss": 0.0893, "step": 25209 }, { "epoch": 82.65573770491804, "grad_norm": 1.8041813373565674, "learning_rate": 1.5366948512405066e-06, "loss": 0.0696, "step": 25210 }, { "epoch": 82.65901639344263, "grad_norm": 2.268620491027832, "learning_rate": 1.5361292762444157e-06, "loss": 0.1332, "step": 25211 }, { "epoch": 82.66229508196722, "grad_norm": 1.7180263996124268, "learning_rate": 1.5355637966872638e-06, "loss": 0.0828, "step": 25212 }, { "epoch": 82.6655737704918, "grad_norm": 3.266524076461792, "learning_rate": 1.5349984125754347e-06, "loss": 0.176, "step": 25213 }, { "epoch": 82.66885245901639, "grad_norm": 2.545422315597534, "learning_rate": 1.5344331239152998e-06, "loss": 0.0994, "step": 25214 }, { "epoch": 82.67213114754098, "grad_norm": 2.1214263439178467, "learning_rate": 1.533867930713232e-06, "loss": 0.1272, "step": 25215 }, { "epoch": 82.67540983606557, "grad_norm": 2.2332897186279297, "learning_rate": 1.533302832975605e-06, "loss": 0.0785, "step": 25216 }, { "epoch": 82.67868852459016, "grad_norm": 2.150728225708008, "learning_rate": 1.5327378307087937e-06, "loss": 0.0683, "step": 25217 }, { "epoch": 82.68196721311476, "grad_norm": 2.865919589996338, "learning_rate": 1.5321729239191663e-06, "loss": 0.0783, "step": 25218 }, { "epoch": 82.68524590163935, "grad_norm": 2.180121898651123, "learning_rate": 1.5316081126130944e-06, "loss": 0.0745, "step": 25219 }, { "epoch": 82.68852459016394, "grad_norm": 2.7590506076812744, "learning_rate": 1.5310433967969463e-06, "loss": 0.0948, "step": 25220 }, { "epoch": 82.69180327868852, "grad_norm": 1.6100006103515625, "learning_rate": 1.5304787764770857e-06, "loss": 0.0746, "step": 25221 }, { "epoch": 82.69508196721311, "grad_norm": 1.9460771083831787, "learning_rate": 1.5299142516598864e-06, "loss": 0.2287, "step": 25222 }, { "epoch": 82.6983606557377, "grad_norm": 1.6090121269226074, "learning_rate": 1.5293498223517101e-06, "loss": 0.1938, "step": 25223 }, { "epoch": 82.70163934426229, "grad_norm": 2.5832250118255615, "learning_rate": 1.5287854885589227e-06, "loss": 0.1525, "step": 25224 }, { "epoch": 82.70491803278688, "grad_norm": 2.661283493041992, "learning_rate": 1.5282212502878835e-06, "loss": 0.1299, "step": 25225 }, { "epoch": 82.70819672131148, "grad_norm": 1.8241040706634521, "learning_rate": 1.5276571075449608e-06, "loss": 0.0308, "step": 25226 }, { "epoch": 82.71147540983607, "grad_norm": 2.4145596027374268, "learning_rate": 1.527093060336513e-06, "loss": 0.1347, "step": 25227 }, { "epoch": 82.71475409836066, "grad_norm": 2.265883445739746, "learning_rate": 1.5265291086689005e-06, "loss": 0.0979, "step": 25228 }, { "epoch": 82.71803278688525, "grad_norm": 2.458878993988037, "learning_rate": 1.5259652525484803e-06, "loss": 0.1576, "step": 25229 }, { "epoch": 82.72131147540983, "grad_norm": 2.406489133834839, "learning_rate": 1.525401491981615e-06, "loss": 0.0726, "step": 25230 }, { "epoch": 82.72459016393442, "grad_norm": 2.693783760070801, "learning_rate": 1.52483782697466e-06, "loss": 0.1175, "step": 25231 }, { "epoch": 82.72786885245901, "grad_norm": 2.601870536804199, "learning_rate": 1.5242742575339696e-06, "loss": 0.0725, "step": 25232 }, { "epoch": 82.73114754098361, "grad_norm": 2.119157075881958, "learning_rate": 1.523710783665897e-06, "loss": 0.0662, "step": 25233 }, { "epoch": 82.7344262295082, "grad_norm": 2.452564001083374, "learning_rate": 1.5231474053768013e-06, "loss": 0.103, "step": 25234 }, { "epoch": 82.73770491803279, "grad_norm": 2.666022539138794, "learning_rate": 1.522584122673032e-06, "loss": 0.1596, "step": 25235 }, { "epoch": 82.74098360655738, "grad_norm": 2.030156373977661, "learning_rate": 1.5220209355609383e-06, "loss": 0.0481, "step": 25236 }, { "epoch": 82.74426229508197, "grad_norm": 2.5073044300079346, "learning_rate": 1.5214578440468764e-06, "loss": 0.0672, "step": 25237 }, { "epoch": 82.74754098360656, "grad_norm": 2.2977418899536133, "learning_rate": 1.5208948481371932e-06, "loss": 0.0663, "step": 25238 }, { "epoch": 82.75081967213114, "grad_norm": 1.9640271663665771, "learning_rate": 1.520331947838234e-06, "loss": 0.0618, "step": 25239 }, { "epoch": 82.75409836065573, "grad_norm": 2.030156373977661, "learning_rate": 1.519769143156351e-06, "loss": 0.0823, "step": 25240 }, { "epoch": 82.75737704918033, "grad_norm": 1.7363975048065186, "learning_rate": 1.5192064340978874e-06, "loss": 0.0276, "step": 25241 }, { "epoch": 82.76065573770492, "grad_norm": 2.1491332054138184, "learning_rate": 1.5186438206691879e-06, "loss": 0.0526, "step": 25242 }, { "epoch": 82.76393442622951, "grad_norm": 2.1194164752960205, "learning_rate": 1.5180813028765995e-06, "loss": 0.1624, "step": 25243 }, { "epoch": 82.7672131147541, "grad_norm": 4.51901912689209, "learning_rate": 1.5175188807264628e-06, "loss": 0.1734, "step": 25244 }, { "epoch": 82.77049180327869, "grad_norm": 2.2990121841430664, "learning_rate": 1.5169565542251208e-06, "loss": 0.0521, "step": 25245 }, { "epoch": 82.77377049180328, "grad_norm": 2.689525604248047, "learning_rate": 1.5163943233789147e-06, "loss": 0.1822, "step": 25246 }, { "epoch": 82.77704918032786, "grad_norm": 1.75571870803833, "learning_rate": 1.5158321881941796e-06, "loss": 0.0584, "step": 25247 }, { "epoch": 82.78032786885245, "grad_norm": 2.269350290298462, "learning_rate": 1.5152701486772613e-06, "loss": 0.076, "step": 25248 }, { "epoch": 82.78360655737706, "grad_norm": 2.0652565956115723, "learning_rate": 1.514708204834493e-06, "loss": 0.0399, "step": 25249 }, { "epoch": 82.78688524590164, "grad_norm": 2.354058027267456, "learning_rate": 1.5141463566722126e-06, "loss": 0.0813, "step": 25250 }, { "epoch": 82.79016393442623, "grad_norm": 2.621582269668579, "learning_rate": 1.5135846041967529e-06, "loss": 0.2011, "step": 25251 }, { "epoch": 82.79344262295082, "grad_norm": 2.1007893085479736, "learning_rate": 1.513022947414453e-06, "loss": 0.0396, "step": 25252 }, { "epoch": 82.79672131147541, "grad_norm": 2.4856810569763184, "learning_rate": 1.5124613863316429e-06, "loss": 0.1022, "step": 25253 }, { "epoch": 82.8, "grad_norm": 2.522085666656494, "learning_rate": 1.511899920954656e-06, "loss": 0.0799, "step": 25254 }, { "epoch": 82.80327868852459, "grad_norm": 2.4570419788360596, "learning_rate": 1.5113385512898204e-06, "loss": 0.2586, "step": 25255 }, { "epoch": 82.80655737704917, "grad_norm": 1.5972801446914673, "learning_rate": 1.5107772773434715e-06, "loss": 0.1043, "step": 25256 }, { "epoch": 82.80983606557378, "grad_norm": 3.0188026428222656, "learning_rate": 1.510216099121935e-06, "loss": 0.1543, "step": 25257 }, { "epoch": 82.81311475409836, "grad_norm": 2.468191385269165, "learning_rate": 1.5096550166315393e-06, "loss": 0.1476, "step": 25258 }, { "epoch": 82.81639344262295, "grad_norm": 2.100886821746826, "learning_rate": 1.5090940298786105e-06, "loss": 0.0627, "step": 25259 }, { "epoch": 82.81967213114754, "grad_norm": 2.464221239089966, "learning_rate": 1.5085331388694736e-06, "loss": 0.1121, "step": 25260 }, { "epoch": 82.82295081967213, "grad_norm": 2.324633836746216, "learning_rate": 1.5079723436104553e-06, "loss": 0.2008, "step": 25261 }, { "epoch": 82.82622950819672, "grad_norm": 2.747525453567505, "learning_rate": 1.507411644107879e-06, "loss": 0.1731, "step": 25262 }, { "epoch": 82.8295081967213, "grad_norm": 2.2733314037323, "learning_rate": 1.506851040368067e-06, "loss": 0.052, "step": 25263 }, { "epoch": 82.8327868852459, "grad_norm": 2.005946636199951, "learning_rate": 1.5062905323973375e-06, "loss": 0.1119, "step": 25264 }, { "epoch": 82.8360655737705, "grad_norm": 2.765986204147339, "learning_rate": 1.505730120202016e-06, "loss": 0.0648, "step": 25265 }, { "epoch": 82.83934426229509, "grad_norm": 2.4504616260528564, "learning_rate": 1.5051698037884187e-06, "loss": 0.2951, "step": 25266 }, { "epoch": 82.84262295081967, "grad_norm": 2.433410406112671, "learning_rate": 1.5046095831628638e-06, "loss": 0.0611, "step": 25267 }, { "epoch": 82.84590163934426, "grad_norm": 1.6263015270233154, "learning_rate": 1.5040494583316678e-06, "loss": 0.0333, "step": 25268 }, { "epoch": 82.84918032786885, "grad_norm": 2.4120101928710938, "learning_rate": 1.5034894293011492e-06, "loss": 0.0658, "step": 25269 }, { "epoch": 82.85245901639344, "grad_norm": 2.240260601043701, "learning_rate": 1.502929496077622e-06, "loss": 0.1105, "step": 25270 }, { "epoch": 82.85573770491803, "grad_norm": 2.4132745265960693, "learning_rate": 1.5023696586673985e-06, "loss": 0.2062, "step": 25271 }, { "epoch": 82.85901639344263, "grad_norm": 2.1184937953948975, "learning_rate": 1.5018099170767942e-06, "loss": 0.0683, "step": 25272 }, { "epoch": 82.86229508196722, "grad_norm": 2.1570518016815186, "learning_rate": 1.5012502713121145e-06, "loss": 0.1504, "step": 25273 }, { "epoch": 82.8655737704918, "grad_norm": 2.4393584728240967, "learning_rate": 1.5006907213796785e-06, "loss": 0.1485, "step": 25274 }, { "epoch": 82.8688524590164, "grad_norm": 2.7967793941497803, "learning_rate": 1.5001312672857915e-06, "loss": 0.0975, "step": 25275 }, { "epoch": 82.87213114754098, "grad_norm": 2.7121994495391846, "learning_rate": 1.499571909036761e-06, "loss": 0.159, "step": 25276 }, { "epoch": 82.87540983606557, "grad_norm": 2.0434179306030273, "learning_rate": 1.4990126466388944e-06, "loss": 0.0642, "step": 25277 }, { "epoch": 82.87868852459016, "grad_norm": 2.6036040782928467, "learning_rate": 1.4984534800985008e-06, "loss": 0.1507, "step": 25278 }, { "epoch": 82.88196721311475, "grad_norm": 2.5215985774993896, "learning_rate": 1.4978944094218828e-06, "loss": 0.2489, "step": 25279 }, { "epoch": 82.88524590163935, "grad_norm": 1.756888508796692, "learning_rate": 1.4973354346153468e-06, "loss": 0.0471, "step": 25280 }, { "epoch": 82.88852459016394, "grad_norm": 2.429762363433838, "learning_rate": 1.496776555685191e-06, "loss": 0.2675, "step": 25281 }, { "epoch": 82.89180327868853, "grad_norm": 1.7902956008911133, "learning_rate": 1.4962177726377235e-06, "loss": 0.0316, "step": 25282 }, { "epoch": 82.89508196721312, "grad_norm": 1.844582200050354, "learning_rate": 1.495659085479242e-06, "loss": 0.0628, "step": 25283 }, { "epoch": 82.8983606557377, "grad_norm": 2.7840163707733154, "learning_rate": 1.4951004942160463e-06, "loss": 0.0846, "step": 25284 }, { "epoch": 82.90163934426229, "grad_norm": 2.4021215438842773, "learning_rate": 1.494541998854434e-06, "loss": 0.1433, "step": 25285 }, { "epoch": 82.90491803278688, "grad_norm": 1.889587163925171, "learning_rate": 1.4939835994007058e-06, "loss": 0.0413, "step": 25286 }, { "epoch": 82.90819672131147, "grad_norm": 1.303999662399292, "learning_rate": 1.493425295861156e-06, "loss": 0.0875, "step": 25287 }, { "epoch": 82.91147540983607, "grad_norm": 2.3756253719329834, "learning_rate": 1.4928670882420816e-06, "loss": 0.1352, "step": 25288 }, { "epoch": 82.91475409836066, "grad_norm": 1.9433505535125732, "learning_rate": 1.4923089765497733e-06, "loss": 0.0516, "step": 25289 }, { "epoch": 82.91803278688525, "grad_norm": 2.396986722946167, "learning_rate": 1.49175096079053e-06, "loss": 0.0475, "step": 25290 }, { "epoch": 82.92131147540984, "grad_norm": 3.2292659282684326, "learning_rate": 1.4911930409706398e-06, "loss": 0.2014, "step": 25291 }, { "epoch": 82.92459016393443, "grad_norm": 2.0820765495300293, "learning_rate": 1.4906352170963933e-06, "loss": 0.0758, "step": 25292 }, { "epoch": 82.92786885245901, "grad_norm": 2.8918917179107666, "learning_rate": 1.4900774891740855e-06, "loss": 0.1181, "step": 25293 }, { "epoch": 82.9311475409836, "grad_norm": 2.0820138454437256, "learning_rate": 1.489519857210001e-06, "loss": 0.0471, "step": 25294 }, { "epoch": 82.93442622950819, "grad_norm": 2.31087589263916, "learning_rate": 1.4889623212104266e-06, "loss": 0.1133, "step": 25295 }, { "epoch": 82.9377049180328, "grad_norm": 2.1265344619750977, "learning_rate": 1.4884048811816532e-06, "loss": 0.1238, "step": 25296 }, { "epoch": 82.94098360655738, "grad_norm": 2.4526126384735107, "learning_rate": 1.487847537129965e-06, "loss": 0.041, "step": 25297 }, { "epoch": 82.94426229508197, "grad_norm": 1.8067567348480225, "learning_rate": 1.487290289061647e-06, "loss": 0.2028, "step": 25298 }, { "epoch": 82.94754098360656, "grad_norm": 2.2580699920654297, "learning_rate": 1.4867331369829786e-06, "loss": 0.2017, "step": 25299 }, { "epoch": 82.95081967213115, "grad_norm": 2.4618613719940186, "learning_rate": 1.486176080900248e-06, "loss": 0.0956, "step": 25300 }, { "epoch": 82.95409836065573, "grad_norm": 2.2479865550994873, "learning_rate": 1.4856191208197347e-06, "loss": 0.1147, "step": 25301 }, { "epoch": 82.95737704918032, "grad_norm": 2.526674270629883, "learning_rate": 1.485062256747719e-06, "loss": 0.0595, "step": 25302 }, { "epoch": 82.96065573770491, "grad_norm": 2.4527151584625244, "learning_rate": 1.4845054886904764e-06, "loss": 0.1941, "step": 25303 }, { "epoch": 82.96393442622951, "grad_norm": 2.551013708114624, "learning_rate": 1.4839488166542914e-06, "loss": 0.1468, "step": 25304 }, { "epoch": 82.9672131147541, "grad_norm": 1.8794059753417969, "learning_rate": 1.483392240645437e-06, "loss": 0.0937, "step": 25305 }, { "epoch": 82.97049180327869, "grad_norm": 2.1645820140838623, "learning_rate": 1.4828357606701915e-06, "loss": 0.0453, "step": 25306 }, { "epoch": 82.97377049180328, "grad_norm": 2.1983189582824707, "learning_rate": 1.4822793767348253e-06, "loss": 0.1801, "step": 25307 }, { "epoch": 82.97704918032787, "grad_norm": 2.31329345703125, "learning_rate": 1.481723088845617e-06, "loss": 0.0978, "step": 25308 }, { "epoch": 82.98032786885246, "grad_norm": 2.4862473011016846, "learning_rate": 1.4811668970088388e-06, "loss": 0.1364, "step": 25309 }, { "epoch": 82.98360655737704, "grad_norm": 1.9588247537612915, "learning_rate": 1.4806108012307607e-06, "loss": 0.1431, "step": 25310 }, { "epoch": 82.98688524590163, "grad_norm": 2.5291366577148438, "learning_rate": 1.480054801517654e-06, "loss": 0.2728, "step": 25311 }, { "epoch": 82.99016393442623, "grad_norm": 3.1689987182617188, "learning_rate": 1.4794988978757852e-06, "loss": 0.1398, "step": 25312 }, { "epoch": 82.99344262295082, "grad_norm": 2.2038633823394775, "learning_rate": 1.4789430903114277e-06, "loss": 0.0459, "step": 25313 }, { "epoch": 82.99672131147541, "grad_norm": 1.981109857559204, "learning_rate": 1.4783873788308466e-06, "loss": 0.1114, "step": 25314 }, { "epoch": 83.0, "grad_norm": 1.9606808423995972, "learning_rate": 1.4778317634403082e-06, "loss": 0.0737, "step": 25315 }, { "epoch": 83.00327868852459, "grad_norm": 2.01446795463562, "learning_rate": 1.4772762441460764e-06, "loss": 0.0413, "step": 25316 }, { "epoch": 83.00655737704918, "grad_norm": 2.139528512954712, "learning_rate": 1.476720820954417e-06, "loss": 0.0396, "step": 25317 }, { "epoch": 83.00983606557377, "grad_norm": 2.785203456878662, "learning_rate": 1.4761654938715931e-06, "loss": 0.0936, "step": 25318 }, { "epoch": 83.01311475409837, "grad_norm": 4.4368720054626465, "learning_rate": 1.475610262903865e-06, "loss": 0.0872, "step": 25319 }, { "epoch": 83.01639344262296, "grad_norm": 2.8950726985931396, "learning_rate": 1.4750551280574931e-06, "loss": 0.1078, "step": 25320 }, { "epoch": 83.01967213114754, "grad_norm": 2.6572153568267822, "learning_rate": 1.4745000893387395e-06, "loss": 0.2749, "step": 25321 }, { "epoch": 83.02295081967213, "grad_norm": 2.5523924827575684, "learning_rate": 1.4739451467538634e-06, "loss": 0.0834, "step": 25322 }, { "epoch": 83.02622950819672, "grad_norm": 2.877393960952759, "learning_rate": 1.4733903003091189e-06, "loss": 0.1396, "step": 25323 }, { "epoch": 83.02950819672131, "grad_norm": 1.9564118385314941, "learning_rate": 1.472835550010765e-06, "loss": 0.0405, "step": 25324 }, { "epoch": 83.0327868852459, "grad_norm": 2.964205265045166, "learning_rate": 1.4722808958650537e-06, "loss": 0.1452, "step": 25325 }, { "epoch": 83.03606557377049, "grad_norm": 1.4731175899505615, "learning_rate": 1.4717263378782442e-06, "loss": 0.0199, "step": 25326 }, { "epoch": 83.03934426229509, "grad_norm": 3.2127792835235596, "learning_rate": 1.4711718760565884e-06, "loss": 0.2415, "step": 25327 }, { "epoch": 83.04262295081968, "grad_norm": 3.005197286605835, "learning_rate": 1.4706175104063369e-06, "loss": 0.1943, "step": 25328 }, { "epoch": 83.04590163934427, "grad_norm": 2.283298969268799, "learning_rate": 1.470063240933739e-06, "loss": 0.1658, "step": 25329 }, { "epoch": 83.04918032786885, "grad_norm": 2.3577661514282227, "learning_rate": 1.4695090676450484e-06, "loss": 0.1903, "step": 25330 }, { "epoch": 83.05245901639344, "grad_norm": 1.9976638555526733, "learning_rate": 1.468954990546514e-06, "loss": 0.0827, "step": 25331 }, { "epoch": 83.05573770491803, "grad_norm": 2.770716667175293, "learning_rate": 1.4684010096443813e-06, "loss": 0.2461, "step": 25332 }, { "epoch": 83.05901639344262, "grad_norm": 2.2340266704559326, "learning_rate": 1.4678471249448955e-06, "loss": 0.1051, "step": 25333 }, { "epoch": 83.0622950819672, "grad_norm": 2.2842929363250732, "learning_rate": 1.4672933364543062e-06, "loss": 0.1312, "step": 25334 }, { "epoch": 83.06557377049181, "grad_norm": 2.078922748565674, "learning_rate": 1.4667396441788572e-06, "loss": 0.0693, "step": 25335 }, { "epoch": 83.0688524590164, "grad_norm": 2.4207818508148193, "learning_rate": 1.4661860481247913e-06, "loss": 0.0685, "step": 25336 }, { "epoch": 83.07213114754099, "grad_norm": 2.7487499713897705, "learning_rate": 1.4656325482983503e-06, "loss": 0.1391, "step": 25337 }, { "epoch": 83.07540983606557, "grad_norm": 3.149383544921875, "learning_rate": 1.4650791447057734e-06, "loss": 0.0461, "step": 25338 }, { "epoch": 83.07868852459016, "grad_norm": 2.4560952186584473, "learning_rate": 1.4645258373533056e-06, "loss": 0.1839, "step": 25339 }, { "epoch": 83.08196721311475, "grad_norm": 2.4696052074432373, "learning_rate": 1.4639726262471842e-06, "loss": 0.1177, "step": 25340 }, { "epoch": 83.08524590163934, "grad_norm": 2.1598074436187744, "learning_rate": 1.4634195113936446e-06, "loss": 0.118, "step": 25341 }, { "epoch": 83.08852459016393, "grad_norm": 2.5804052352905273, "learning_rate": 1.4628664927989268e-06, "loss": 0.0731, "step": 25342 }, { "epoch": 83.09180327868853, "grad_norm": 2.826584815979004, "learning_rate": 1.4623135704692658e-06, "loss": 0.2354, "step": 25343 }, { "epoch": 83.09508196721312, "grad_norm": 2.4375078678131104, "learning_rate": 1.461760744410895e-06, "loss": 0.0864, "step": 25344 }, { "epoch": 83.09836065573771, "grad_norm": 3.645533561706543, "learning_rate": 1.4612080146300512e-06, "loss": 0.2226, "step": 25345 }, { "epoch": 83.1016393442623, "grad_norm": 1.74141526222229, "learning_rate": 1.460655381132965e-06, "loss": 0.0447, "step": 25346 }, { "epoch": 83.10491803278688, "grad_norm": 2.2432830333709717, "learning_rate": 1.4601028439258657e-06, "loss": 0.0703, "step": 25347 }, { "epoch": 83.10819672131147, "grad_norm": 2.791924476623535, "learning_rate": 1.4595504030149888e-06, "loss": 0.1677, "step": 25348 }, { "epoch": 83.11147540983606, "grad_norm": 2.6704812049865723, "learning_rate": 1.458998058406561e-06, "loss": 0.258, "step": 25349 }, { "epoch": 83.11475409836065, "grad_norm": 1.8574542999267578, "learning_rate": 1.45844581010681e-06, "loss": 0.035, "step": 25350 }, { "epoch": 83.11803278688525, "grad_norm": 2.4174041748046875, "learning_rate": 1.4578936581219616e-06, "loss": 0.0689, "step": 25351 }, { "epoch": 83.12131147540984, "grad_norm": 2.4888250827789307, "learning_rate": 1.4573416024582465e-06, "loss": 0.1271, "step": 25352 }, { "epoch": 83.12459016393443, "grad_norm": 3.03341007232666, "learning_rate": 1.4567896431218863e-06, "loss": 0.1498, "step": 25353 }, { "epoch": 83.12786885245902, "grad_norm": 2.7715585231781006, "learning_rate": 1.4562377801191053e-06, "loss": 0.0962, "step": 25354 }, { "epoch": 83.1311475409836, "grad_norm": 1.7305654287338257, "learning_rate": 1.4556860134561246e-06, "loss": 0.0947, "step": 25355 }, { "epoch": 83.1344262295082, "grad_norm": 2.750699758529663, "learning_rate": 1.4551343431391707e-06, "loss": 0.0929, "step": 25356 }, { "epoch": 83.13770491803278, "grad_norm": 3.8346776962280273, "learning_rate": 1.4545827691744607e-06, "loss": 0.1788, "step": 25357 }, { "epoch": 83.14098360655737, "grad_norm": 2.7950010299682617, "learning_rate": 1.4540312915682154e-06, "loss": 0.0482, "step": 25358 }, { "epoch": 83.14426229508197, "grad_norm": 2.5656604766845703, "learning_rate": 1.4534799103266505e-06, "loss": 0.0855, "step": 25359 }, { "epoch": 83.14754098360656, "grad_norm": 2.17403507232666, "learning_rate": 1.4529286254559882e-06, "loss": 0.1353, "step": 25360 }, { "epoch": 83.15081967213115, "grad_norm": 3.122840642929077, "learning_rate": 1.4523774369624422e-06, "loss": 0.2555, "step": 25361 }, { "epoch": 83.15409836065574, "grad_norm": 2.4679043292999268, "learning_rate": 1.4518263448522285e-06, "loss": 0.0455, "step": 25362 }, { "epoch": 83.15737704918033, "grad_norm": 2.207641124725342, "learning_rate": 1.4512753491315601e-06, "loss": 0.0847, "step": 25363 }, { "epoch": 83.16065573770491, "grad_norm": 2.1997156143188477, "learning_rate": 1.4507244498066485e-06, "loss": 0.0995, "step": 25364 }, { "epoch": 83.1639344262295, "grad_norm": 3.2289717197418213, "learning_rate": 1.4501736468837101e-06, "loss": 0.0797, "step": 25365 }, { "epoch": 83.1672131147541, "grad_norm": 2.279207944869995, "learning_rate": 1.4496229403689532e-06, "loss": 0.0768, "step": 25366 }, { "epoch": 83.1704918032787, "grad_norm": 1.9155875444412231, "learning_rate": 1.4490723302685872e-06, "loss": 0.1181, "step": 25367 }, { "epoch": 83.17377049180328, "grad_norm": 6.832091331481934, "learning_rate": 1.4485218165888204e-06, "loss": 0.0775, "step": 25368 }, { "epoch": 83.17704918032787, "grad_norm": 2.147860288619995, "learning_rate": 1.4479713993358624e-06, "loss": 0.0879, "step": 25369 }, { "epoch": 83.18032786885246, "grad_norm": 2.2815423011779785, "learning_rate": 1.4474210785159205e-06, "loss": 0.121, "step": 25370 }, { "epoch": 83.18360655737705, "grad_norm": 2.838531255722046, "learning_rate": 1.4468708541351973e-06, "loss": 0.0913, "step": 25371 }, { "epoch": 83.18688524590164, "grad_norm": 1.726348638534546, "learning_rate": 1.4463207261998958e-06, "loss": 0.0343, "step": 25372 }, { "epoch": 83.19016393442622, "grad_norm": 2.0449795722961426, "learning_rate": 1.4457706947162242e-06, "loss": 0.0669, "step": 25373 }, { "epoch": 83.19344262295083, "grad_norm": 6.931090831756592, "learning_rate": 1.4452207596903822e-06, "loss": 0.0706, "step": 25374 }, { "epoch": 83.19672131147541, "grad_norm": 1.985330581665039, "learning_rate": 1.4446709211285703e-06, "loss": 0.1439, "step": 25375 }, { "epoch": 83.2, "grad_norm": 2.404728412628174, "learning_rate": 1.4441211790369892e-06, "loss": 0.2121, "step": 25376 }, { "epoch": 83.20327868852459, "grad_norm": 1.911881446838379, "learning_rate": 1.443571533421836e-06, "loss": 0.0322, "step": 25377 }, { "epoch": 83.20655737704918, "grad_norm": 3.282074451446533, "learning_rate": 1.4430219842893123e-06, "loss": 0.0676, "step": 25378 }, { "epoch": 83.20983606557377, "grad_norm": 1.6753580570220947, "learning_rate": 1.4424725316456133e-06, "loss": 0.0362, "step": 25379 }, { "epoch": 83.21311475409836, "grad_norm": 3.5113484859466553, "learning_rate": 1.441923175496933e-06, "loss": 0.0643, "step": 25380 }, { "epoch": 83.21639344262294, "grad_norm": 2.525125026702881, "learning_rate": 1.4413739158494654e-06, "loss": 0.1067, "step": 25381 }, { "epoch": 83.21967213114755, "grad_norm": 10.969902992248535, "learning_rate": 1.4408247527094077e-06, "loss": 0.0657, "step": 25382 }, { "epoch": 83.22295081967214, "grad_norm": 2.8504397869110107, "learning_rate": 1.4402756860829503e-06, "loss": 0.0743, "step": 25383 }, { "epoch": 83.22622950819672, "grad_norm": 3.054901123046875, "learning_rate": 1.439726715976285e-06, "loss": 0.1509, "step": 25384 }, { "epoch": 83.22950819672131, "grad_norm": 1.4910110235214233, "learning_rate": 1.4391778423955983e-06, "loss": 0.0665, "step": 25385 }, { "epoch": 83.2327868852459, "grad_norm": 1.9106874465942383, "learning_rate": 1.4386290653470859e-06, "loss": 0.0826, "step": 25386 }, { "epoch": 83.23606557377049, "grad_norm": 3.4637928009033203, "learning_rate": 1.4380803848369306e-06, "loss": 0.155, "step": 25387 }, { "epoch": 83.23934426229508, "grad_norm": 2.1277387142181396, "learning_rate": 1.4375318008713224e-06, "loss": 0.0509, "step": 25388 }, { "epoch": 83.24262295081967, "grad_norm": 2.26255202293396, "learning_rate": 1.4369833134564458e-06, "loss": 0.0488, "step": 25389 }, { "epoch": 83.24590163934427, "grad_norm": 2.448160171508789, "learning_rate": 1.436434922598483e-06, "loss": 0.1046, "step": 25390 }, { "epoch": 83.24918032786886, "grad_norm": 2.1233863830566406, "learning_rate": 1.4358866283036233e-06, "loss": 0.1349, "step": 25391 }, { "epoch": 83.25245901639344, "grad_norm": 6.574907302856445, "learning_rate": 1.435338430578046e-06, "loss": 0.094, "step": 25392 }, { "epoch": 83.25573770491803, "grad_norm": 2.118880271911621, "learning_rate": 1.434790329427932e-06, "loss": 0.0798, "step": 25393 }, { "epoch": 83.25901639344262, "grad_norm": 2.259105682373047, "learning_rate": 1.4342423248594616e-06, "loss": 0.0628, "step": 25394 }, { "epoch": 83.26229508196721, "grad_norm": 2.2916760444641113, "learning_rate": 1.433694416878817e-06, "loss": 0.1257, "step": 25395 }, { "epoch": 83.2655737704918, "grad_norm": 1.8256868124008179, "learning_rate": 1.4331466054921739e-06, "loss": 0.0414, "step": 25396 }, { "epoch": 83.26885245901639, "grad_norm": 2.492565631866455, "learning_rate": 1.4325988907057087e-06, "loss": 0.1278, "step": 25397 }, { "epoch": 83.27213114754099, "grad_norm": 4.565526008605957, "learning_rate": 1.4320512725256007e-06, "loss": 0.1111, "step": 25398 }, { "epoch": 83.27540983606558, "grad_norm": 1.7920681238174438, "learning_rate": 1.431503750958023e-06, "loss": 0.044, "step": 25399 }, { "epoch": 83.27868852459017, "grad_norm": 1.6834282875061035, "learning_rate": 1.4309563260091474e-06, "loss": 0.0448, "step": 25400 }, { "epoch": 83.28196721311475, "grad_norm": 3.319359302520752, "learning_rate": 1.430408997685151e-06, "loss": 0.1774, "step": 25401 }, { "epoch": 83.28524590163934, "grad_norm": 2.606382131576538, "learning_rate": 1.4298617659922031e-06, "loss": 0.2275, "step": 25402 }, { "epoch": 83.28852459016393, "grad_norm": 2.5517795085906982, "learning_rate": 1.4293146309364726e-06, "loss": 0.1681, "step": 25403 }, { "epoch": 83.29180327868852, "grad_norm": 2.169666290283203, "learning_rate": 1.428767592524133e-06, "loss": 0.1575, "step": 25404 }, { "epoch": 83.29508196721312, "grad_norm": 2.3030412197113037, "learning_rate": 1.4282206507613506e-06, "loss": 0.0687, "step": 25405 }, { "epoch": 83.29836065573771, "grad_norm": 2.191240072250366, "learning_rate": 1.4276738056542928e-06, "loss": 0.0528, "step": 25406 }, { "epoch": 83.3016393442623, "grad_norm": 2.321850538253784, "learning_rate": 1.4271270572091234e-06, "loss": 0.0494, "step": 25407 }, { "epoch": 83.30491803278689, "grad_norm": 2.7964820861816406, "learning_rate": 1.426580405432013e-06, "loss": 0.1035, "step": 25408 }, { "epoch": 83.30819672131148, "grad_norm": 2.9585628509521484, "learning_rate": 1.4260338503291216e-06, "loss": 0.1511, "step": 25409 }, { "epoch": 83.31147540983606, "grad_norm": 2.595162868499756, "learning_rate": 1.4254873919066137e-06, "loss": 0.0962, "step": 25410 }, { "epoch": 83.31475409836065, "grad_norm": 2.5274343490600586, "learning_rate": 1.424941030170649e-06, "loss": 0.0781, "step": 25411 }, { "epoch": 83.31803278688524, "grad_norm": 3.23333477973938, "learning_rate": 1.4243947651273915e-06, "loss": 0.206, "step": 25412 }, { "epoch": 83.32131147540984, "grad_norm": 1.9321801662445068, "learning_rate": 1.4238485967829995e-06, "loss": 0.0491, "step": 25413 }, { "epoch": 83.32459016393443, "grad_norm": 2.1132400035858154, "learning_rate": 1.4233025251436317e-06, "loss": 0.0352, "step": 25414 }, { "epoch": 83.32786885245902, "grad_norm": 2.150782585144043, "learning_rate": 1.4227565502154461e-06, "loss": 0.0584, "step": 25415 }, { "epoch": 83.33114754098361, "grad_norm": 2.621774435043335, "learning_rate": 1.4222106720045959e-06, "loss": 0.0823, "step": 25416 }, { "epoch": 83.3344262295082, "grad_norm": 2.5158140659332275, "learning_rate": 1.4216648905172402e-06, "loss": 0.1479, "step": 25417 }, { "epoch": 83.33770491803278, "grad_norm": 2.241464853286743, "learning_rate": 1.4211192057595335e-06, "loss": 0.145, "step": 25418 }, { "epoch": 83.34098360655737, "grad_norm": 2.1750993728637695, "learning_rate": 1.420573617737626e-06, "loss": 0.1314, "step": 25419 }, { "epoch": 83.34426229508196, "grad_norm": 1.7453625202178955, "learning_rate": 1.4200281264576709e-06, "loss": 0.135, "step": 25420 }, { "epoch": 83.34754098360656, "grad_norm": 2.6907846927642822, "learning_rate": 1.4194827319258208e-06, "loss": 0.0531, "step": 25421 }, { "epoch": 83.35081967213115, "grad_norm": 2.4314839839935303, "learning_rate": 1.4189374341482243e-06, "loss": 0.122, "step": 25422 }, { "epoch": 83.35409836065574, "grad_norm": 2.1397628784179688, "learning_rate": 1.4183922331310306e-06, "loss": 0.0648, "step": 25423 }, { "epoch": 83.35737704918033, "grad_norm": 2.2778286933898926, "learning_rate": 1.4178471288803852e-06, "loss": 0.0374, "step": 25424 }, { "epoch": 83.36065573770492, "grad_norm": 2.549687385559082, "learning_rate": 1.4173021214024384e-06, "loss": 0.0877, "step": 25425 }, { "epoch": 83.3639344262295, "grad_norm": 3.0585808753967285, "learning_rate": 1.4167572107033346e-06, "loss": 0.2012, "step": 25426 }, { "epoch": 83.3672131147541, "grad_norm": 3.522770643234253, "learning_rate": 1.4162123967892161e-06, "loss": 0.1214, "step": 25427 }, { "epoch": 83.37049180327868, "grad_norm": 2.8246803283691406, "learning_rate": 1.4156676796662293e-06, "loss": 0.0817, "step": 25428 }, { "epoch": 83.37377049180328, "grad_norm": 1.7960710525512695, "learning_rate": 1.4151230593405118e-06, "loss": 0.0899, "step": 25429 }, { "epoch": 83.37704918032787, "grad_norm": 2.151134967803955, "learning_rate": 1.4145785358182107e-06, "loss": 0.1468, "step": 25430 }, { "epoch": 83.38032786885246, "grad_norm": 1.764695167541504, "learning_rate": 1.414034109105462e-06, "loss": 0.0273, "step": 25431 }, { "epoch": 83.38360655737705, "grad_norm": 2.306081771850586, "learning_rate": 1.4134897792084067e-06, "loss": 0.152, "step": 25432 }, { "epoch": 83.38688524590164, "grad_norm": 1.7966444492340088, "learning_rate": 1.4129455461331797e-06, "loss": 0.0511, "step": 25433 }, { "epoch": 83.39016393442623, "grad_norm": 2.553269147872925, "learning_rate": 1.412401409885923e-06, "loss": 0.0949, "step": 25434 }, { "epoch": 83.39344262295081, "grad_norm": 2.0571413040161133, "learning_rate": 1.4118573704727678e-06, "loss": 0.0534, "step": 25435 }, { "epoch": 83.3967213114754, "grad_norm": 3.2089574337005615, "learning_rate": 1.4113134278998508e-06, "loss": 0.1393, "step": 25436 }, { "epoch": 83.4, "grad_norm": 2.4166760444641113, "learning_rate": 1.4107695821733026e-06, "loss": 0.1317, "step": 25437 }, { "epoch": 83.4032786885246, "grad_norm": 2.5213875770568848, "learning_rate": 1.4102258332992602e-06, "loss": 0.1932, "step": 25438 }, { "epoch": 83.40655737704918, "grad_norm": 2.3297119140625, "learning_rate": 1.4096821812838525e-06, "loss": 0.1359, "step": 25439 }, { "epoch": 83.40983606557377, "grad_norm": 2.822716474533081, "learning_rate": 1.4091386261332107e-06, "loss": 0.1307, "step": 25440 }, { "epoch": 83.41311475409836, "grad_norm": 3.0433528423309326, "learning_rate": 1.4085951678534627e-06, "loss": 0.1798, "step": 25441 }, { "epoch": 83.41639344262295, "grad_norm": 2.375290870666504, "learning_rate": 1.4080518064507342e-06, "loss": 0.0632, "step": 25442 }, { "epoch": 83.41967213114754, "grad_norm": 2.506641149520874, "learning_rate": 1.4075085419311573e-06, "loss": 0.1194, "step": 25443 }, { "epoch": 83.42295081967212, "grad_norm": 2.563544750213623, "learning_rate": 1.406965374300856e-06, "loss": 0.0995, "step": 25444 }, { "epoch": 83.42622950819673, "grad_norm": 2.313913583755493, "learning_rate": 1.406422303565954e-06, "loss": 0.0976, "step": 25445 }, { "epoch": 83.42950819672132, "grad_norm": 2.274470329284668, "learning_rate": 1.4058793297325745e-06, "loss": 0.1398, "step": 25446 }, { "epoch": 83.4327868852459, "grad_norm": 2.7192091941833496, "learning_rate": 1.4053364528068425e-06, "loss": 0.169, "step": 25447 }, { "epoch": 83.43606557377049, "grad_norm": 2.7592408657073975, "learning_rate": 1.4047936727948786e-06, "loss": 0.1901, "step": 25448 }, { "epoch": 83.43934426229508, "grad_norm": 1.8348971605300903, "learning_rate": 1.4042509897028e-06, "loss": 0.1014, "step": 25449 }, { "epoch": 83.44262295081967, "grad_norm": 2.2633543014526367, "learning_rate": 1.403708403536731e-06, "loss": 0.0778, "step": 25450 }, { "epoch": 83.44590163934426, "grad_norm": 2.3179383277893066, "learning_rate": 1.4031659143027886e-06, "loss": 0.1252, "step": 25451 }, { "epoch": 83.44918032786886, "grad_norm": 2.559183359146118, "learning_rate": 1.4026235220070883e-06, "loss": 0.0656, "step": 25452 }, { "epoch": 83.45245901639345, "grad_norm": 2.7078919410705566, "learning_rate": 1.402081226655745e-06, "loss": 0.124, "step": 25453 }, { "epoch": 83.45573770491804, "grad_norm": 2.6310794353485107, "learning_rate": 1.4015390282548779e-06, "loss": 0.048, "step": 25454 }, { "epoch": 83.45901639344262, "grad_norm": 1.5225114822387695, "learning_rate": 1.4009969268105973e-06, "loss": 0.0415, "step": 25455 }, { "epoch": 83.46229508196721, "grad_norm": 2.6348936557769775, "learning_rate": 1.4004549223290165e-06, "loss": 0.1472, "step": 25456 }, { "epoch": 83.4655737704918, "grad_norm": 2.5130727291107178, "learning_rate": 1.3999130148162487e-06, "loss": 0.085, "step": 25457 }, { "epoch": 83.46885245901639, "grad_norm": 3.188059091567993, "learning_rate": 1.3993712042784035e-06, "loss": 0.0842, "step": 25458 }, { "epoch": 83.47213114754098, "grad_norm": 2.2415380477905273, "learning_rate": 1.3988294907215883e-06, "loss": 0.0554, "step": 25459 }, { "epoch": 83.47540983606558, "grad_norm": 1.8782857656478882, "learning_rate": 1.3982878741519167e-06, "loss": 0.0597, "step": 25460 }, { "epoch": 83.47868852459017, "grad_norm": 2.0317351818084717, "learning_rate": 1.3977463545754922e-06, "loss": 0.1861, "step": 25461 }, { "epoch": 83.48196721311476, "grad_norm": 3.14764142036438, "learning_rate": 1.3972049319984216e-06, "loss": 0.0778, "step": 25462 }, { "epoch": 83.48524590163935, "grad_norm": 5.383913993835449, "learning_rate": 1.3966636064268068e-06, "loss": 0.1036, "step": 25463 }, { "epoch": 83.48852459016393, "grad_norm": 2.2357301712036133, "learning_rate": 1.3961223778667587e-06, "loss": 0.0493, "step": 25464 }, { "epoch": 83.49180327868852, "grad_norm": 2.3050622940063477, "learning_rate": 1.3955812463243767e-06, "loss": 0.2183, "step": 25465 }, { "epoch": 83.49508196721311, "grad_norm": 2.6180484294891357, "learning_rate": 1.3950402118057615e-06, "loss": 0.0833, "step": 25466 }, { "epoch": 83.4983606557377, "grad_norm": 1.8696370124816895, "learning_rate": 1.3944992743170149e-06, "loss": 0.1839, "step": 25467 }, { "epoch": 83.5016393442623, "grad_norm": 2.420593023300171, "learning_rate": 1.3939584338642353e-06, "loss": 0.1854, "step": 25468 }, { "epoch": 83.50491803278689, "grad_norm": 2.127747058868408, "learning_rate": 1.3934176904535235e-06, "loss": 0.0644, "step": 25469 }, { "epoch": 83.50819672131148, "grad_norm": 2.981307029724121, "learning_rate": 1.3928770440909766e-06, "loss": 0.1712, "step": 25470 }, { "epoch": 83.51147540983607, "grad_norm": 2.8444459438323975, "learning_rate": 1.39233649478269e-06, "loss": 0.0971, "step": 25471 }, { "epoch": 83.51475409836065, "grad_norm": 1.8658225536346436, "learning_rate": 1.3917960425347565e-06, "loss": 0.1804, "step": 25472 }, { "epoch": 83.51803278688524, "grad_norm": 2.1088688373565674, "learning_rate": 1.3912556873532756e-06, "loss": 0.0485, "step": 25473 }, { "epoch": 83.52131147540983, "grad_norm": 2.2298340797424316, "learning_rate": 1.3907154292443381e-06, "loss": 0.1267, "step": 25474 }, { "epoch": 83.52459016393442, "grad_norm": 2.6118457317352295, "learning_rate": 1.3901752682140345e-06, "loss": 0.1535, "step": 25475 }, { "epoch": 83.52786885245902, "grad_norm": 2.4278669357299805, "learning_rate": 1.3896352042684546e-06, "loss": 0.0914, "step": 25476 }, { "epoch": 83.53114754098361, "grad_norm": 3.0839717388153076, "learning_rate": 1.3890952374136934e-06, "loss": 0.1068, "step": 25477 }, { "epoch": 83.5344262295082, "grad_norm": 2.0921034812927246, "learning_rate": 1.3885553676558361e-06, "loss": 0.025, "step": 25478 }, { "epoch": 83.53770491803279, "grad_norm": 1.7458367347717285, "learning_rate": 1.3880155950009699e-06, "loss": 0.0878, "step": 25479 }, { "epoch": 83.54098360655738, "grad_norm": 2.9440970420837402, "learning_rate": 1.3874759194551835e-06, "loss": 0.2593, "step": 25480 }, { "epoch": 83.54426229508196, "grad_norm": 2.288414478302002, "learning_rate": 1.3869363410245574e-06, "loss": 0.1043, "step": 25481 }, { "epoch": 83.54754098360655, "grad_norm": 2.9473865032196045, "learning_rate": 1.3863968597151822e-06, "loss": 0.1469, "step": 25482 }, { "epoch": 83.55081967213114, "grad_norm": 2.6226956844329834, "learning_rate": 1.3858574755331388e-06, "loss": 0.0913, "step": 25483 }, { "epoch": 83.55409836065574, "grad_norm": 1.8815736770629883, "learning_rate": 1.385318188484508e-06, "loss": 0.0396, "step": 25484 }, { "epoch": 83.55737704918033, "grad_norm": 2.4938976764678955, "learning_rate": 1.3847789985753701e-06, "loss": 0.1432, "step": 25485 }, { "epoch": 83.56065573770492, "grad_norm": 2.3797285556793213, "learning_rate": 1.3842399058118083e-06, "loss": 0.2277, "step": 25486 }, { "epoch": 83.56393442622951, "grad_norm": 2.6126596927642822, "learning_rate": 1.3837009101998999e-06, "loss": 0.0896, "step": 25487 }, { "epoch": 83.5672131147541, "grad_norm": 2.5654337406158447, "learning_rate": 1.3831620117457222e-06, "loss": 0.1262, "step": 25488 }, { "epoch": 83.57049180327868, "grad_norm": 1.6394540071487427, "learning_rate": 1.3826232104553505e-06, "loss": 0.0232, "step": 25489 }, { "epoch": 83.57377049180327, "grad_norm": 1.7127903699874878, "learning_rate": 1.382084506334863e-06, "loss": 0.0285, "step": 25490 }, { "epoch": 83.57704918032788, "grad_norm": 2.8645477294921875, "learning_rate": 1.3815458993903341e-06, "loss": 0.1416, "step": 25491 }, { "epoch": 83.58032786885246, "grad_norm": 2.1893222332000732, "learning_rate": 1.3810073896278352e-06, "loss": 0.0631, "step": 25492 }, { "epoch": 83.58360655737705, "grad_norm": 2.5276739597320557, "learning_rate": 1.3804689770534408e-06, "loss": 0.1251, "step": 25493 }, { "epoch": 83.58688524590164, "grad_norm": 2.163503408432007, "learning_rate": 1.3799306616732178e-06, "loss": 0.1173, "step": 25494 }, { "epoch": 83.59016393442623, "grad_norm": 1.7394287586212158, "learning_rate": 1.3793924434932416e-06, "loss": 0.0274, "step": 25495 }, { "epoch": 83.59344262295082, "grad_norm": 2.34177565574646, "learning_rate": 1.3788543225195782e-06, "loss": 0.1049, "step": 25496 }, { "epoch": 83.5967213114754, "grad_norm": 2.5782508850097656, "learning_rate": 1.3783162987582965e-06, "loss": 0.209, "step": 25497 }, { "epoch": 83.6, "grad_norm": 1.955331563949585, "learning_rate": 1.3777783722154603e-06, "loss": 0.1095, "step": 25498 }, { "epoch": 83.6032786885246, "grad_norm": 3.1450819969177246, "learning_rate": 1.3772405428971403e-06, "loss": 0.1766, "step": 25499 }, { "epoch": 83.60655737704919, "grad_norm": 2.5357532501220703, "learning_rate": 1.3767028108093994e-06, "loss": 0.1019, "step": 25500 }, { "epoch": 83.60983606557377, "grad_norm": 2.4631707668304443, "learning_rate": 1.3761651759582994e-06, "loss": 0.0859, "step": 25501 }, { "epoch": 83.61311475409836, "grad_norm": 2.4078476428985596, "learning_rate": 1.3756276383499012e-06, "loss": 0.1394, "step": 25502 }, { "epoch": 83.61639344262295, "grad_norm": 2.7958273887634277, "learning_rate": 1.375090197990271e-06, "loss": 0.1385, "step": 25503 }, { "epoch": 83.61967213114754, "grad_norm": 2.661010980606079, "learning_rate": 1.3745528548854658e-06, "loss": 0.159, "step": 25504 }, { "epoch": 83.62295081967213, "grad_norm": 2.5817055702209473, "learning_rate": 1.3740156090415447e-06, "loss": 0.1893, "step": 25505 }, { "epoch": 83.62622950819672, "grad_norm": 2.9413552284240723, "learning_rate": 1.3734784604645667e-06, "loss": 0.1708, "step": 25506 }, { "epoch": 83.62950819672132, "grad_norm": 2.171520233154297, "learning_rate": 1.3729414091605898e-06, "loss": 0.0634, "step": 25507 }, { "epoch": 83.6327868852459, "grad_norm": 2.0564537048339844, "learning_rate": 1.3724044551356662e-06, "loss": 0.0852, "step": 25508 }, { "epoch": 83.6360655737705, "grad_norm": 3.734233856201172, "learning_rate": 1.371867598395854e-06, "loss": 0.105, "step": 25509 }, { "epoch": 83.63934426229508, "grad_norm": 2.725700855255127, "learning_rate": 1.3713308389472068e-06, "loss": 0.0966, "step": 25510 }, { "epoch": 83.64262295081967, "grad_norm": 2.148041009902954, "learning_rate": 1.3707941767957734e-06, "loss": 0.0828, "step": 25511 }, { "epoch": 83.64590163934426, "grad_norm": 2.7956690788269043, "learning_rate": 1.3702576119476098e-06, "loss": 0.0934, "step": 25512 }, { "epoch": 83.64918032786885, "grad_norm": 1.9786241054534912, "learning_rate": 1.3697211444087644e-06, "loss": 0.0465, "step": 25513 }, { "epoch": 83.65245901639344, "grad_norm": 2.607126474380493, "learning_rate": 1.369184774185286e-06, "loss": 0.203, "step": 25514 }, { "epoch": 83.65573770491804, "grad_norm": 2.646974802017212, "learning_rate": 1.3686485012832207e-06, "loss": 0.1027, "step": 25515 }, { "epoch": 83.65901639344263, "grad_norm": 2.2567508220672607, "learning_rate": 1.3681123257086204e-06, "loss": 0.1184, "step": 25516 }, { "epoch": 83.66229508196722, "grad_norm": 2.6861889362335205, "learning_rate": 1.3675762474675291e-06, "loss": 0.1089, "step": 25517 }, { "epoch": 83.6655737704918, "grad_norm": 2.7773687839508057, "learning_rate": 1.367040266565991e-06, "loss": 0.1231, "step": 25518 }, { "epoch": 83.66885245901639, "grad_norm": 2.6403300762176514, "learning_rate": 1.3665043830100489e-06, "loss": 0.179, "step": 25519 }, { "epoch": 83.67213114754098, "grad_norm": 2.609647512435913, "learning_rate": 1.3659685968057457e-06, "loss": 0.1264, "step": 25520 }, { "epoch": 83.67540983606557, "grad_norm": 1.8255170583724976, "learning_rate": 1.3654329079591243e-06, "loss": 0.1187, "step": 25521 }, { "epoch": 83.67868852459016, "grad_norm": 2.3976900577545166, "learning_rate": 1.364897316476226e-06, "loss": 0.0965, "step": 25522 }, { "epoch": 83.68196721311476, "grad_norm": 1.9044607877731323, "learning_rate": 1.3643618223630883e-06, "loss": 0.0293, "step": 25523 }, { "epoch": 83.68524590163935, "grad_norm": 2.027653932571411, "learning_rate": 1.3638264256257473e-06, "loss": 0.0528, "step": 25524 }, { "epoch": 83.68852459016394, "grad_norm": 2.513946771621704, "learning_rate": 1.3632911262702454e-06, "loss": 0.1148, "step": 25525 }, { "epoch": 83.69180327868852, "grad_norm": 4.3245015144348145, "learning_rate": 1.3627559243026155e-06, "loss": 0.0849, "step": 25526 }, { "epoch": 83.69508196721311, "grad_norm": 2.2636239528656006, "learning_rate": 1.3622208197288933e-06, "loss": 0.1027, "step": 25527 }, { "epoch": 83.6983606557377, "grad_norm": 2.415592670440674, "learning_rate": 1.3616858125551092e-06, "loss": 0.098, "step": 25528 }, { "epoch": 83.70163934426229, "grad_norm": 2.726423978805542, "learning_rate": 1.3611509027873027e-06, "loss": 0.3105, "step": 25529 }, { "epoch": 83.70491803278688, "grad_norm": 2.3540101051330566, "learning_rate": 1.3606160904315013e-06, "loss": 0.2488, "step": 25530 }, { "epoch": 83.70819672131148, "grad_norm": 2.3771324157714844, "learning_rate": 1.360081375493737e-06, "loss": 0.1203, "step": 25531 }, { "epoch": 83.71147540983607, "grad_norm": 3.145411252975464, "learning_rate": 1.359546757980037e-06, "loss": 0.09, "step": 25532 }, { "epoch": 83.71475409836066, "grad_norm": 2.746351718902588, "learning_rate": 1.3590122378964299e-06, "loss": 0.2408, "step": 25533 }, { "epoch": 83.71803278688525, "grad_norm": 2.3399503231048584, "learning_rate": 1.3584778152489465e-06, "loss": 0.2215, "step": 25534 }, { "epoch": 83.72131147540983, "grad_norm": 1.682934045791626, "learning_rate": 1.3579434900436105e-06, "loss": 0.099, "step": 25535 }, { "epoch": 83.72459016393442, "grad_norm": 2.358715057373047, "learning_rate": 1.3574092622864465e-06, "loss": 0.0851, "step": 25536 }, { "epoch": 83.72786885245901, "grad_norm": 2.4065353870391846, "learning_rate": 1.3568751319834783e-06, "loss": 0.1827, "step": 25537 }, { "epoch": 83.73114754098361, "grad_norm": 2.9614624977111816, "learning_rate": 1.35634109914073e-06, "loss": 0.0934, "step": 25538 }, { "epoch": 83.7344262295082, "grad_norm": 2.1258819103240967, "learning_rate": 1.3558071637642245e-06, "loss": 0.1505, "step": 25539 }, { "epoch": 83.73770491803279, "grad_norm": 2.921825647354126, "learning_rate": 1.3552733258599804e-06, "loss": 0.0663, "step": 25540 }, { "epoch": 83.74098360655738, "grad_norm": 2.9687488079071045, "learning_rate": 1.354739585434015e-06, "loss": 0.0569, "step": 25541 }, { "epoch": 83.74426229508197, "grad_norm": 1.9610346555709839, "learning_rate": 1.3542059424923526e-06, "loss": 0.0316, "step": 25542 }, { "epoch": 83.74754098360656, "grad_norm": 2.019037961959839, "learning_rate": 1.353672397041007e-06, "loss": 0.0507, "step": 25543 }, { "epoch": 83.75081967213114, "grad_norm": 2.6870946884155273, "learning_rate": 1.3531389490859958e-06, "loss": 0.1221, "step": 25544 }, { "epoch": 83.75409836065573, "grad_norm": 1.6594431400299072, "learning_rate": 1.352605598633333e-06, "loss": 0.0238, "step": 25545 }, { "epoch": 83.75737704918033, "grad_norm": 2.3875062465667725, "learning_rate": 1.3520723456890305e-06, "loss": 0.1104, "step": 25546 }, { "epoch": 83.76065573770492, "grad_norm": 2.2141101360321045, "learning_rate": 1.351539190259107e-06, "loss": 0.1152, "step": 25547 }, { "epoch": 83.76393442622951, "grad_norm": 2.4742469787597656, "learning_rate": 1.3510061323495704e-06, "loss": 0.0873, "step": 25548 }, { "epoch": 83.7672131147541, "grad_norm": 2.9492969512939453, "learning_rate": 1.3504731719664333e-06, "loss": 0.1306, "step": 25549 }, { "epoch": 83.77049180327869, "grad_norm": 2.4189796447753906, "learning_rate": 1.3499403091157015e-06, "loss": 0.103, "step": 25550 }, { "epoch": 83.77377049180328, "grad_norm": 1.9346892833709717, "learning_rate": 1.3494075438033882e-06, "loss": 0.0825, "step": 25551 }, { "epoch": 83.77704918032786, "grad_norm": 1.494080901145935, "learning_rate": 1.3488748760355009e-06, "loss": 0.0409, "step": 25552 }, { "epoch": 83.78032786885245, "grad_norm": 2.5299072265625, "learning_rate": 1.3483423058180423e-06, "loss": 0.0837, "step": 25553 }, { "epoch": 83.78360655737706, "grad_norm": 2.6640546321868896, "learning_rate": 1.3478098331570188e-06, "loss": 0.0626, "step": 25554 }, { "epoch": 83.78688524590164, "grad_norm": 1.8999865055084229, "learning_rate": 1.347277458058437e-06, "loss": 0.1283, "step": 25555 }, { "epoch": 83.79016393442623, "grad_norm": 2.5986218452453613, "learning_rate": 1.3467451805282995e-06, "loss": 0.2251, "step": 25556 }, { "epoch": 83.79344262295082, "grad_norm": 1.8460619449615479, "learning_rate": 1.346213000572606e-06, "loss": 0.1356, "step": 25557 }, { "epoch": 83.79672131147541, "grad_norm": 1.995910406112671, "learning_rate": 1.3456809181973573e-06, "loss": 0.0888, "step": 25558 }, { "epoch": 83.8, "grad_norm": 2.1528584957122803, "learning_rate": 1.3451489334085555e-06, "loss": 0.1594, "step": 25559 }, { "epoch": 83.80327868852459, "grad_norm": 1.9227176904678345, "learning_rate": 1.3446170462121987e-06, "loss": 0.0442, "step": 25560 }, { "epoch": 83.80655737704917, "grad_norm": 2.436394691467285, "learning_rate": 1.3440852566142825e-06, "loss": 0.1244, "step": 25561 }, { "epoch": 83.80983606557378, "grad_norm": 2.7264137268066406, "learning_rate": 1.3435535646208076e-06, "loss": 0.145, "step": 25562 }, { "epoch": 83.81311475409836, "grad_norm": 1.8278627395629883, "learning_rate": 1.3430219702377655e-06, "loss": 0.0936, "step": 25563 }, { "epoch": 83.81639344262295, "grad_norm": 2.014535903930664, "learning_rate": 1.3424904734711497e-06, "loss": 0.1294, "step": 25564 }, { "epoch": 83.81967213114754, "grad_norm": 2.691359758377075, "learning_rate": 1.3419590743269573e-06, "loss": 0.1391, "step": 25565 }, { "epoch": 83.82295081967213, "grad_norm": 2.4762585163116455, "learning_rate": 1.341427772811179e-06, "loss": 0.1923, "step": 25566 }, { "epoch": 83.82622950819672, "grad_norm": 2.747225522994995, "learning_rate": 1.3408965689298037e-06, "loss": 0.0385, "step": 25567 }, { "epoch": 83.8295081967213, "grad_norm": 1.8915538787841797, "learning_rate": 1.3403654626888241e-06, "loss": 0.0528, "step": 25568 }, { "epoch": 83.8327868852459, "grad_norm": 2.3827786445617676, "learning_rate": 1.3398344540942277e-06, "loss": 0.0748, "step": 25569 }, { "epoch": 83.8360655737705, "grad_norm": 2.381317377090454, "learning_rate": 1.3393035431520018e-06, "loss": 0.1091, "step": 25570 }, { "epoch": 83.83934426229509, "grad_norm": 1.9216519594192505, "learning_rate": 1.338772729868134e-06, "loss": 0.0555, "step": 25571 }, { "epoch": 83.84262295081967, "grad_norm": 2.78543758392334, "learning_rate": 1.3382420142486064e-06, "loss": 0.1536, "step": 25572 }, { "epoch": 83.84590163934426, "grad_norm": 10.006476402282715, "learning_rate": 1.3377113962994081e-06, "loss": 0.0746, "step": 25573 }, { "epoch": 83.84918032786885, "grad_norm": 5.904873371124268, "learning_rate": 1.3371808760265214e-06, "loss": 0.15, "step": 25574 }, { "epoch": 83.85245901639344, "grad_norm": 2.6240198612213135, "learning_rate": 1.336650453435926e-06, "loss": 0.0891, "step": 25575 }, { "epoch": 83.85573770491803, "grad_norm": 3.9406237602233887, "learning_rate": 1.3361201285336034e-06, "loss": 0.1674, "step": 25576 }, { "epoch": 83.85901639344263, "grad_norm": 2.056934118270874, "learning_rate": 1.3355899013255358e-06, "loss": 0.1303, "step": 25577 }, { "epoch": 83.86229508196722, "grad_norm": 2.3362209796905518, "learning_rate": 1.3350597718177017e-06, "loss": 0.1342, "step": 25578 }, { "epoch": 83.8655737704918, "grad_norm": 2.5026140213012695, "learning_rate": 1.3345297400160773e-06, "loss": 0.1079, "step": 25579 }, { "epoch": 83.8688524590164, "grad_norm": 2.0380489826202393, "learning_rate": 1.3339998059266402e-06, "loss": 0.0624, "step": 25580 }, { "epoch": 83.87213114754098, "grad_norm": 2.394932508468628, "learning_rate": 1.3334699695553633e-06, "loss": 0.1412, "step": 25581 }, { "epoch": 83.87540983606557, "grad_norm": 2.2839319705963135, "learning_rate": 1.3329402309082252e-06, "loss": 0.2731, "step": 25582 }, { "epoch": 83.87868852459016, "grad_norm": 1.7720519304275513, "learning_rate": 1.3324105899911977e-06, "loss": 0.2382, "step": 25583 }, { "epoch": 83.88196721311475, "grad_norm": 2.530210018157959, "learning_rate": 1.3318810468102528e-06, "loss": 0.0595, "step": 25584 }, { "epoch": 83.88524590163935, "grad_norm": 2.0486202239990234, "learning_rate": 1.3313516013713602e-06, "loss": 0.0431, "step": 25585 }, { "epoch": 83.88852459016394, "grad_norm": 2.029825448989868, "learning_rate": 1.3308222536804927e-06, "loss": 0.1317, "step": 25586 }, { "epoch": 83.89180327868853, "grad_norm": 1.952051043510437, "learning_rate": 1.330293003743619e-06, "loss": 0.1363, "step": 25587 }, { "epoch": 83.89508196721312, "grad_norm": 3.3212881088256836, "learning_rate": 1.3297638515667055e-06, "loss": 0.1263, "step": 25588 }, { "epoch": 83.8983606557377, "grad_norm": 2.7978999614715576, "learning_rate": 1.3292347971557162e-06, "loss": 0.2016, "step": 25589 }, { "epoch": 83.90163934426229, "grad_norm": 2.0447399616241455, "learning_rate": 1.328705840516623e-06, "loss": 0.1054, "step": 25590 }, { "epoch": 83.90491803278688, "grad_norm": 2.7816381454467773, "learning_rate": 1.328176981655388e-06, "loss": 0.1327, "step": 25591 }, { "epoch": 83.90819672131147, "grad_norm": 1.9916632175445557, "learning_rate": 1.3276482205779727e-06, "loss": 0.1542, "step": 25592 }, { "epoch": 83.91147540983607, "grad_norm": 2.7312350273132324, "learning_rate": 1.3271195572903418e-06, "loss": 0.1956, "step": 25593 }, { "epoch": 83.91475409836066, "grad_norm": 2.3524136543273926, "learning_rate": 1.326590991798452e-06, "loss": 0.1035, "step": 25594 }, { "epoch": 83.91803278688525, "grad_norm": 2.4875733852386475, "learning_rate": 1.3260625241082704e-06, "loss": 0.12, "step": 25595 }, { "epoch": 83.92131147540984, "grad_norm": 1.9529130458831787, "learning_rate": 1.3255341542257515e-06, "loss": 0.0731, "step": 25596 }, { "epoch": 83.92459016393443, "grad_norm": 2.742982864379883, "learning_rate": 1.3250058821568546e-06, "loss": 0.0519, "step": 25597 }, { "epoch": 83.92786885245901, "grad_norm": 2.2729713916778564, "learning_rate": 1.3244777079075332e-06, "loss": 0.0775, "step": 25598 }, { "epoch": 83.9311475409836, "grad_norm": 1.8539795875549316, "learning_rate": 1.3239496314837486e-06, "loss": 0.0495, "step": 25599 }, { "epoch": 83.93442622950819, "grad_norm": 2.52205228805542, "learning_rate": 1.3234216528914534e-06, "loss": 0.1679, "step": 25600 }, { "epoch": 83.9377049180328, "grad_norm": 2.351344108581543, "learning_rate": 1.3228937721365997e-06, "loss": 0.0492, "step": 25601 }, { "epoch": 83.94098360655738, "grad_norm": 3.4679372310638428, "learning_rate": 1.3223659892251384e-06, "loss": 0.0596, "step": 25602 }, { "epoch": 83.94426229508197, "grad_norm": 2.764866828918457, "learning_rate": 1.3218383041630257e-06, "loss": 0.1457, "step": 25603 }, { "epoch": 83.94754098360656, "grad_norm": 2.890258550643921, "learning_rate": 1.321310716956209e-06, "loss": 0.2064, "step": 25604 }, { "epoch": 83.95081967213115, "grad_norm": 3.108785629272461, "learning_rate": 1.320783227610637e-06, "loss": 0.3056, "step": 25605 }, { "epoch": 83.95409836065573, "grad_norm": 2.3922555446624756, "learning_rate": 1.3202558361322593e-06, "loss": 0.1102, "step": 25606 }, { "epoch": 83.95737704918032, "grad_norm": 1.9209495782852173, "learning_rate": 1.319728542527019e-06, "loss": 0.0543, "step": 25607 }, { "epoch": 83.96065573770491, "grad_norm": 2.6460769176483154, "learning_rate": 1.3192013468008659e-06, "loss": 0.2399, "step": 25608 }, { "epoch": 83.96393442622951, "grad_norm": 2.022484064102173, "learning_rate": 1.3186742489597448e-06, "loss": 0.1214, "step": 25609 }, { "epoch": 83.9672131147541, "grad_norm": 2.9730477333068848, "learning_rate": 1.3181472490095949e-06, "loss": 0.1442, "step": 25610 }, { "epoch": 83.97049180327869, "grad_norm": 1.8646931648254395, "learning_rate": 1.3176203469563641e-06, "loss": 0.1807, "step": 25611 }, { "epoch": 83.97377049180328, "grad_norm": 3.437762975692749, "learning_rate": 1.3170935428059905e-06, "loss": 0.1632, "step": 25612 }, { "epoch": 83.97704918032787, "grad_norm": 2.4613308906555176, "learning_rate": 1.3165668365644136e-06, "loss": 0.1522, "step": 25613 }, { "epoch": 83.98032786885246, "grad_norm": 1.9646697044372559, "learning_rate": 1.3160402282375762e-06, "loss": 0.1899, "step": 25614 }, { "epoch": 83.98360655737704, "grad_norm": 2.448983669281006, "learning_rate": 1.3155137178314148e-06, "loss": 0.1624, "step": 25615 }, { "epoch": 83.98688524590163, "grad_norm": 2.2181050777435303, "learning_rate": 1.3149873053518659e-06, "loss": 0.1111, "step": 25616 }, { "epoch": 83.99016393442623, "grad_norm": 3.0081589221954346, "learning_rate": 1.3144609908048622e-06, "loss": 0.1005, "step": 25617 }, { "epoch": 83.99344262295082, "grad_norm": 2.8036632537841797, "learning_rate": 1.313934774196345e-06, "loss": 0.064, "step": 25618 }, { "epoch": 83.99672131147541, "grad_norm": 1.7253910303115845, "learning_rate": 1.3134086555322435e-06, "loss": 0.0774, "step": 25619 }, { "epoch": 84.0, "grad_norm": 2.406346559524536, "learning_rate": 1.3128826348184886e-06, "loss": 0.0441, "step": 25620 }, { "epoch": 84.00327868852459, "grad_norm": 2.7505385875701904, "learning_rate": 1.312356712061017e-06, "loss": 0.103, "step": 25621 }, { "epoch": 84.00655737704918, "grad_norm": 2.750643014907837, "learning_rate": 1.311830887265757e-06, "loss": 0.1846, "step": 25622 }, { "epoch": 84.00983606557377, "grad_norm": 2.205155372619629, "learning_rate": 1.3113051604386361e-06, "loss": 0.0771, "step": 25623 }, { "epoch": 84.01311475409837, "grad_norm": 2.118840456008911, "learning_rate": 1.310779531585582e-06, "loss": 0.0653, "step": 25624 }, { "epoch": 84.01639344262296, "grad_norm": 2.0809452533721924, "learning_rate": 1.3102540007125254e-06, "loss": 0.2472, "step": 25625 }, { "epoch": 84.01967213114754, "grad_norm": 2.084820508956909, "learning_rate": 1.309728567825389e-06, "loss": 0.1005, "step": 25626 }, { "epoch": 84.02295081967213, "grad_norm": 1.7426739931106567, "learning_rate": 1.3092032329300997e-06, "loss": 0.0768, "step": 25627 }, { "epoch": 84.02622950819672, "grad_norm": 2.006714344024658, "learning_rate": 1.308677996032578e-06, "loss": 0.0508, "step": 25628 }, { "epoch": 84.02950819672131, "grad_norm": 2.1633408069610596, "learning_rate": 1.3081528571387504e-06, "loss": 0.1885, "step": 25629 }, { "epoch": 84.0327868852459, "grad_norm": 2.3905258178710938, "learning_rate": 1.3076278162545365e-06, "loss": 0.1684, "step": 25630 }, { "epoch": 84.03606557377049, "grad_norm": 2.7397286891937256, "learning_rate": 1.307102873385857e-06, "loss": 0.1008, "step": 25631 }, { "epoch": 84.03934426229509, "grad_norm": 2.6210427284240723, "learning_rate": 1.3065780285386308e-06, "loss": 0.1099, "step": 25632 }, { "epoch": 84.04262295081968, "grad_norm": 2.5043833255767822, "learning_rate": 1.3060532817187743e-06, "loss": 0.0631, "step": 25633 }, { "epoch": 84.04590163934427, "grad_norm": 2.4168429374694824, "learning_rate": 1.3055286329322082e-06, "loss": 0.0777, "step": 25634 }, { "epoch": 84.04918032786885, "grad_norm": 1.993207573890686, "learning_rate": 1.3050040821848476e-06, "loss": 0.0632, "step": 25635 }, { "epoch": 84.05245901639344, "grad_norm": 2.18981671333313, "learning_rate": 1.3044796294826056e-06, "loss": 0.0507, "step": 25636 }, { "epoch": 84.05573770491803, "grad_norm": 1.5418990850448608, "learning_rate": 1.3039552748313945e-06, "loss": 0.0153, "step": 25637 }, { "epoch": 84.05901639344262, "grad_norm": 2.323727607727051, "learning_rate": 1.3034310182371323e-06, "loss": 0.1498, "step": 25638 }, { "epoch": 84.0622950819672, "grad_norm": 2.988905668258667, "learning_rate": 1.3029068597057272e-06, "loss": 0.127, "step": 25639 }, { "epoch": 84.06557377049181, "grad_norm": 2.130293369293213, "learning_rate": 1.3023827992430904e-06, "loss": 0.1002, "step": 25640 }, { "epoch": 84.0688524590164, "grad_norm": 2.9396843910217285, "learning_rate": 1.3018588368551278e-06, "loss": 0.2133, "step": 25641 }, { "epoch": 84.07213114754099, "grad_norm": 1.8545374870300293, "learning_rate": 1.301334972547753e-06, "loss": 0.0519, "step": 25642 }, { "epoch": 84.07540983606557, "grad_norm": 2.4988598823547363, "learning_rate": 1.3008112063268707e-06, "loss": 0.0695, "step": 25643 }, { "epoch": 84.07868852459016, "grad_norm": 2.570540428161621, "learning_rate": 1.300287538198387e-06, "loss": 0.171, "step": 25644 }, { "epoch": 84.08196721311475, "grad_norm": 1.879152536392212, "learning_rate": 1.2997639681682072e-06, "loss": 0.072, "step": 25645 }, { "epoch": 84.08524590163934, "grad_norm": 2.0100581645965576, "learning_rate": 1.2992404962422323e-06, "loss": 0.0722, "step": 25646 }, { "epoch": 84.08852459016393, "grad_norm": 2.318070411682129, "learning_rate": 1.2987171224263695e-06, "loss": 0.0687, "step": 25647 }, { "epoch": 84.09180327868853, "grad_norm": 1.771432876586914, "learning_rate": 1.2981938467265176e-06, "loss": 0.1378, "step": 25648 }, { "epoch": 84.09508196721312, "grad_norm": 1.8380029201507568, "learning_rate": 1.2976706691485786e-06, "loss": 0.0586, "step": 25649 }, { "epoch": 84.09836065573771, "grad_norm": 2.2649857997894287, "learning_rate": 1.2971475896984475e-06, "loss": 0.0635, "step": 25650 }, { "epoch": 84.1016393442623, "grad_norm": 2.854405403137207, "learning_rate": 1.29662460838203e-06, "loss": 0.1455, "step": 25651 }, { "epoch": 84.10491803278688, "grad_norm": 1.5206196308135986, "learning_rate": 1.2961017252052176e-06, "loss": 0.1129, "step": 25652 }, { "epoch": 84.10819672131147, "grad_norm": 2.4061057567596436, "learning_rate": 1.2955789401739094e-06, "loss": 0.1324, "step": 25653 }, { "epoch": 84.11147540983606, "grad_norm": 2.247628688812256, "learning_rate": 1.295056253293996e-06, "loss": 0.1285, "step": 25654 }, { "epoch": 84.11475409836065, "grad_norm": 2.587712526321411, "learning_rate": 1.2945336645713758e-06, "loss": 0.1611, "step": 25655 }, { "epoch": 84.11803278688525, "grad_norm": 1.8943085670471191, "learning_rate": 1.294011174011941e-06, "loss": 0.0435, "step": 25656 }, { "epoch": 84.12131147540984, "grad_norm": 3.1167964935302734, "learning_rate": 1.2934887816215825e-06, "loss": 0.2511, "step": 25657 }, { "epoch": 84.12459016393443, "grad_norm": 3.6221697330474854, "learning_rate": 1.2929664874061898e-06, "loss": 0.137, "step": 25658 }, { "epoch": 84.12786885245902, "grad_norm": 2.166046619415283, "learning_rate": 1.2924442913716507e-06, "loss": 0.1352, "step": 25659 }, { "epoch": 84.1311475409836, "grad_norm": 3.0533907413482666, "learning_rate": 1.291922193523858e-06, "loss": 0.0769, "step": 25660 }, { "epoch": 84.1344262295082, "grad_norm": 3.4948976039886475, "learning_rate": 1.291400193868697e-06, "loss": 0.2009, "step": 25661 }, { "epoch": 84.13770491803278, "grad_norm": 2.3082735538482666, "learning_rate": 1.2908782924120534e-06, "loss": 0.0667, "step": 25662 }, { "epoch": 84.14098360655737, "grad_norm": 2.214904546737671, "learning_rate": 1.2903564891598097e-06, "loss": 0.0817, "step": 25663 }, { "epoch": 84.14426229508197, "grad_norm": 1.9511644840240479, "learning_rate": 1.289834784117855e-06, "loss": 0.0588, "step": 25664 }, { "epoch": 84.14754098360656, "grad_norm": 2.1769051551818848, "learning_rate": 1.2893131772920685e-06, "loss": 0.1871, "step": 25665 }, { "epoch": 84.15081967213115, "grad_norm": 2.591655969619751, "learning_rate": 1.2887916686883317e-06, "loss": 0.1383, "step": 25666 }, { "epoch": 84.15409836065574, "grad_norm": 2.326167583465576, "learning_rate": 1.2882702583125284e-06, "loss": 0.1028, "step": 25667 }, { "epoch": 84.15737704918033, "grad_norm": 2.0433859825134277, "learning_rate": 1.2877489461705351e-06, "loss": 0.0858, "step": 25668 }, { "epoch": 84.16065573770491, "grad_norm": 3.4598801136016846, "learning_rate": 1.2872277322682292e-06, "loss": 0.1161, "step": 25669 }, { "epoch": 84.1639344262295, "grad_norm": 2.254960060119629, "learning_rate": 1.2867066166114917e-06, "loss": 0.1546, "step": 25670 }, { "epoch": 84.1672131147541, "grad_norm": 2.502711772918701, "learning_rate": 1.2861855992061966e-06, "loss": 0.046, "step": 25671 }, { "epoch": 84.1704918032787, "grad_norm": 2.4236302375793457, "learning_rate": 1.2856646800582172e-06, "loss": 0.2433, "step": 25672 }, { "epoch": 84.17377049180328, "grad_norm": 1.8502086400985718, "learning_rate": 1.285143859173431e-06, "loss": 0.0983, "step": 25673 }, { "epoch": 84.17704918032787, "grad_norm": 2.0841901302337646, "learning_rate": 1.284623136557709e-06, "loss": 0.0613, "step": 25674 }, { "epoch": 84.18032786885246, "grad_norm": 2.0710880756378174, "learning_rate": 1.2841025122169225e-06, "loss": 0.0609, "step": 25675 }, { "epoch": 84.18360655737705, "grad_norm": 2.102665662765503, "learning_rate": 1.283581986156941e-06, "loss": 0.0532, "step": 25676 }, { "epoch": 84.18688524590164, "grad_norm": 1.967650294303894, "learning_rate": 1.283061558383637e-06, "loss": 0.1378, "step": 25677 }, { "epoch": 84.19016393442622, "grad_norm": 2.528218984603882, "learning_rate": 1.282541228902877e-06, "loss": 0.0977, "step": 25678 }, { "epoch": 84.19344262295083, "grad_norm": 2.4900755882263184, "learning_rate": 1.28202099772053e-06, "loss": 0.1751, "step": 25679 }, { "epoch": 84.19672131147541, "grad_norm": 2.4999122619628906, "learning_rate": 1.2815008648424565e-06, "loss": 0.127, "step": 25680 }, { "epoch": 84.2, "grad_norm": 2.2524192333221436, "learning_rate": 1.2809808302745298e-06, "loss": 0.0531, "step": 25681 }, { "epoch": 84.20327868852459, "grad_norm": 2.9860973358154297, "learning_rate": 1.2804608940226082e-06, "loss": 0.3381, "step": 25682 }, { "epoch": 84.20655737704918, "grad_norm": 2.719589948654175, "learning_rate": 1.2799410560925573e-06, "loss": 0.1609, "step": 25683 }, { "epoch": 84.20983606557377, "grad_norm": 3.263742208480835, "learning_rate": 1.2794213164902368e-06, "loss": 0.1092, "step": 25684 }, { "epoch": 84.21311475409836, "grad_norm": 2.1885688304901123, "learning_rate": 1.2789016752215055e-06, "loss": 0.1033, "step": 25685 }, { "epoch": 84.21639344262294, "grad_norm": 1.364294409751892, "learning_rate": 1.2783821322922286e-06, "loss": 0.0279, "step": 25686 }, { "epoch": 84.21967213114755, "grad_norm": 2.4187848567962646, "learning_rate": 1.2778626877082611e-06, "loss": 0.1206, "step": 25687 }, { "epoch": 84.22295081967214, "grad_norm": 2.8455212116241455, "learning_rate": 1.27734334147546e-06, "loss": 0.1952, "step": 25688 }, { "epoch": 84.22622950819672, "grad_norm": 2.206449270248413, "learning_rate": 1.27682409359968e-06, "loss": 0.0949, "step": 25689 }, { "epoch": 84.22950819672131, "grad_norm": 2.138273000717163, "learning_rate": 1.2763049440867814e-06, "loss": 0.0754, "step": 25690 }, { "epoch": 84.2327868852459, "grad_norm": 3.2194042205810547, "learning_rate": 1.2757858929426136e-06, "loss": 0.0712, "step": 25691 }, { "epoch": 84.23606557377049, "grad_norm": 3.2463266849517822, "learning_rate": 1.2752669401730321e-06, "loss": 0.0925, "step": 25692 }, { "epoch": 84.23934426229508, "grad_norm": 1.9890328645706177, "learning_rate": 1.2747480857838846e-06, "loss": 0.062, "step": 25693 }, { "epoch": 84.24262295081967, "grad_norm": 2.7125747203826904, "learning_rate": 1.274229329781026e-06, "loss": 0.1734, "step": 25694 }, { "epoch": 84.24590163934427, "grad_norm": 2.5094525814056396, "learning_rate": 1.2737106721703042e-06, "loss": 0.2674, "step": 25695 }, { "epoch": 84.24918032786886, "grad_norm": 2.5904598236083984, "learning_rate": 1.2731921129575685e-06, "loss": 0.142, "step": 25696 }, { "epoch": 84.25245901639344, "grad_norm": 2.167605400085449, "learning_rate": 1.272673652148665e-06, "loss": 0.0968, "step": 25697 }, { "epoch": 84.25573770491803, "grad_norm": 2.4639956951141357, "learning_rate": 1.2721552897494372e-06, "loss": 0.0446, "step": 25698 }, { "epoch": 84.25901639344262, "grad_norm": 2.6923210620880127, "learning_rate": 1.2716370257657362e-06, "loss": 0.0828, "step": 25699 }, { "epoch": 84.26229508196721, "grad_norm": 1.5489201545715332, "learning_rate": 1.2711188602034031e-06, "loss": 0.0328, "step": 25700 }, { "epoch": 84.2655737704918, "grad_norm": 3.1270713806152344, "learning_rate": 1.2706007930682795e-06, "loss": 0.1212, "step": 25701 }, { "epoch": 84.26885245901639, "grad_norm": 1.929532766342163, "learning_rate": 1.2700828243662078e-06, "loss": 0.0507, "step": 25702 }, { "epoch": 84.27213114754099, "grad_norm": 2.726191520690918, "learning_rate": 1.2695649541030297e-06, "loss": 0.0996, "step": 25703 }, { "epoch": 84.27540983606558, "grad_norm": 3.217263698577881, "learning_rate": 1.2690471822845852e-06, "loss": 0.2427, "step": 25704 }, { "epoch": 84.27868852459017, "grad_norm": 3.154780626296997, "learning_rate": 1.2685295089167115e-06, "loss": 0.1354, "step": 25705 }, { "epoch": 84.28196721311475, "grad_norm": 1.889373779296875, "learning_rate": 1.2680119340052432e-06, "loss": 0.1279, "step": 25706 }, { "epoch": 84.28524590163934, "grad_norm": 3.0335466861724854, "learning_rate": 1.2674944575560221e-06, "loss": 0.1326, "step": 25707 }, { "epoch": 84.28852459016393, "grad_norm": 2.230586051940918, "learning_rate": 1.2669770795748803e-06, "loss": 0.119, "step": 25708 }, { "epoch": 84.29180327868852, "grad_norm": 2.0485270023345947, "learning_rate": 1.266459800067652e-06, "loss": 0.0739, "step": 25709 }, { "epoch": 84.29508196721312, "grad_norm": 2.5709786415100098, "learning_rate": 1.2659426190401703e-06, "loss": 0.0638, "step": 25710 }, { "epoch": 84.29836065573771, "grad_norm": 2.3138647079467773, "learning_rate": 1.2654255364982636e-06, "loss": 0.1067, "step": 25711 }, { "epoch": 84.3016393442623, "grad_norm": 1.8162373304367065, "learning_rate": 1.264908552447769e-06, "loss": 0.1201, "step": 25712 }, { "epoch": 84.30491803278689, "grad_norm": 2.551168441772461, "learning_rate": 1.2643916668945123e-06, "loss": 0.0768, "step": 25713 }, { "epoch": 84.30819672131148, "grad_norm": 2.3297841548919678, "learning_rate": 1.2638748798443224e-06, "loss": 0.1307, "step": 25714 }, { "epoch": 84.31147540983606, "grad_norm": 1.9585119485855103, "learning_rate": 1.2633581913030236e-06, "loss": 0.0704, "step": 25715 }, { "epoch": 84.31475409836065, "grad_norm": 2.3352627754211426, "learning_rate": 1.2628416012764477e-06, "loss": 0.0664, "step": 25716 }, { "epoch": 84.31803278688524, "grad_norm": 2.1866023540496826, "learning_rate": 1.262325109770418e-06, "loss": 0.0817, "step": 25717 }, { "epoch": 84.32131147540984, "grad_norm": 2.2651236057281494, "learning_rate": 1.2618087167907567e-06, "loss": 0.1083, "step": 25718 }, { "epoch": 84.32459016393443, "grad_norm": 2.1337060928344727, "learning_rate": 1.2612924223432854e-06, "loss": 0.1286, "step": 25719 }, { "epoch": 84.32786885245902, "grad_norm": 3.016883134841919, "learning_rate": 1.2607762264338297e-06, "loss": 0.2072, "step": 25720 }, { "epoch": 84.33114754098361, "grad_norm": 1.9482741355895996, "learning_rate": 1.2602601290682094e-06, "loss": 0.0736, "step": 25721 }, { "epoch": 84.3344262295082, "grad_norm": 2.7930476665496826, "learning_rate": 1.2597441302522407e-06, "loss": 0.1151, "step": 25722 }, { "epoch": 84.33770491803278, "grad_norm": 2.3206162452697754, "learning_rate": 1.2592282299917468e-06, "loss": 0.0883, "step": 25723 }, { "epoch": 84.34098360655737, "grad_norm": 4.604703426361084, "learning_rate": 1.2587124282925435e-06, "loss": 0.0698, "step": 25724 }, { "epoch": 84.34426229508196, "grad_norm": 1.9003514051437378, "learning_rate": 1.2581967251604422e-06, "loss": 0.0499, "step": 25725 }, { "epoch": 84.34754098360656, "grad_norm": 2.412214756011963, "learning_rate": 1.257681120601265e-06, "loss": 0.1039, "step": 25726 }, { "epoch": 84.35081967213115, "grad_norm": 3.1877355575561523, "learning_rate": 1.2571656146208233e-06, "loss": 0.0853, "step": 25727 }, { "epoch": 84.35409836065574, "grad_norm": 2.6456902027130127, "learning_rate": 1.2566502072249276e-06, "loss": 0.1075, "step": 25728 }, { "epoch": 84.35737704918033, "grad_norm": 1.9670538902282715, "learning_rate": 1.2561348984193932e-06, "loss": 0.0318, "step": 25729 }, { "epoch": 84.36065573770492, "grad_norm": 2.4754037857055664, "learning_rate": 1.2556196882100302e-06, "loss": 0.1977, "step": 25730 }, { "epoch": 84.3639344262295, "grad_norm": 2.300837516784668, "learning_rate": 1.2551045766026459e-06, "loss": 0.0805, "step": 25731 }, { "epoch": 84.3672131147541, "grad_norm": 2.9438326358795166, "learning_rate": 1.254589563603048e-06, "loss": 0.1046, "step": 25732 }, { "epoch": 84.37049180327868, "grad_norm": 1.9913840293884277, "learning_rate": 1.2540746492170476e-06, "loss": 0.2454, "step": 25733 }, { "epoch": 84.37377049180328, "grad_norm": 2.1609041690826416, "learning_rate": 1.2535598334504496e-06, "loss": 0.157, "step": 25734 }, { "epoch": 84.37704918032787, "grad_norm": 2.232755184173584, "learning_rate": 1.2530451163090585e-06, "loss": 0.0868, "step": 25735 }, { "epoch": 84.38032786885246, "grad_norm": 5.392131805419922, "learning_rate": 1.2525304977986784e-06, "loss": 0.1754, "step": 25736 }, { "epoch": 84.38360655737705, "grad_norm": 5.343664646148682, "learning_rate": 1.2520159779251096e-06, "loss": 0.0874, "step": 25737 }, { "epoch": 84.38688524590164, "grad_norm": 2.7167677879333496, "learning_rate": 1.251501556694158e-06, "loss": 0.1581, "step": 25738 }, { "epoch": 84.39016393442623, "grad_norm": 1.5903773307800293, "learning_rate": 1.2509872341116225e-06, "loss": 0.0931, "step": 25739 }, { "epoch": 84.39344262295081, "grad_norm": 2.291236639022827, "learning_rate": 1.2504730101833029e-06, "loss": 0.0561, "step": 25740 }, { "epoch": 84.3967213114754, "grad_norm": 2.658018112182617, "learning_rate": 1.2499588849149957e-06, "loss": 0.0721, "step": 25741 }, { "epoch": 84.4, "grad_norm": 1.9523801803588867, "learning_rate": 1.249444858312502e-06, "loss": 0.0295, "step": 25742 }, { "epoch": 84.4032786885246, "grad_norm": 2.314816474914551, "learning_rate": 1.2489309303816144e-06, "loss": 0.1803, "step": 25743 }, { "epoch": 84.40655737704918, "grad_norm": 2.1388349533081055, "learning_rate": 1.248417101128131e-06, "loss": 0.049, "step": 25744 }, { "epoch": 84.40983606557377, "grad_norm": 2.3244118690490723, "learning_rate": 1.2479033705578414e-06, "loss": 0.0966, "step": 25745 }, { "epoch": 84.41311475409836, "grad_norm": 2.2718358039855957, "learning_rate": 1.2473897386765432e-06, "loss": 0.0574, "step": 25746 }, { "epoch": 84.41639344262295, "grad_norm": 2.3019702434539795, "learning_rate": 1.2468762054900264e-06, "loss": 0.1797, "step": 25747 }, { "epoch": 84.41967213114754, "grad_norm": 2.7275896072387695, "learning_rate": 1.2463627710040816e-06, "loss": 0.1337, "step": 25748 }, { "epoch": 84.42295081967212, "grad_norm": 2.4511122703552246, "learning_rate": 1.2458494352244966e-06, "loss": 0.0901, "step": 25749 }, { "epoch": 84.42622950819673, "grad_norm": 2.167818784713745, "learning_rate": 1.245336198157061e-06, "loss": 0.109, "step": 25750 }, { "epoch": 84.42950819672132, "grad_norm": 2.7356810569763184, "learning_rate": 1.2448230598075627e-06, "loss": 0.1611, "step": 25751 }, { "epoch": 84.4327868852459, "grad_norm": 2.9488930702209473, "learning_rate": 1.2443100201817892e-06, "loss": 0.1969, "step": 25752 }, { "epoch": 84.43606557377049, "grad_norm": 2.2420713901519775, "learning_rate": 1.2437970792855225e-06, "loss": 0.1117, "step": 25753 }, { "epoch": 84.43934426229508, "grad_norm": 3.821808338165283, "learning_rate": 1.2432842371245468e-06, "loss": 0.1333, "step": 25754 }, { "epoch": 84.44262295081967, "grad_norm": 3.4569671154022217, "learning_rate": 1.2427714937046476e-06, "loss": 0.092, "step": 25755 }, { "epoch": 84.44590163934426, "grad_norm": 3.1217682361602783, "learning_rate": 1.2422588490316056e-06, "loss": 0.0684, "step": 25756 }, { "epoch": 84.44918032786886, "grad_norm": 11.96255111694336, "learning_rate": 1.2417463031111998e-06, "loss": 0.1961, "step": 25757 }, { "epoch": 84.45245901639345, "grad_norm": 2.5098893642425537, "learning_rate": 1.2412338559492099e-06, "loss": 0.2095, "step": 25758 }, { "epoch": 84.45573770491804, "grad_norm": 2.458265781402588, "learning_rate": 1.2407215075514157e-06, "loss": 0.1028, "step": 25759 }, { "epoch": 84.45901639344262, "grad_norm": 2.3231935501098633, "learning_rate": 1.2402092579235948e-06, "loss": 0.0482, "step": 25760 }, { "epoch": 84.46229508196721, "grad_norm": 2.4498181343078613, "learning_rate": 1.2396971070715226e-06, "loss": 0.2377, "step": 25761 }, { "epoch": 84.4655737704918, "grad_norm": 2.3628735542297363, "learning_rate": 1.2391850550009743e-06, "loss": 0.1631, "step": 25762 }, { "epoch": 84.46885245901639, "grad_norm": 2.24460506439209, "learning_rate": 1.23867310171772e-06, "loss": 0.1057, "step": 25763 }, { "epoch": 84.47213114754098, "grad_norm": 1.4499719142913818, "learning_rate": 1.2381612472275395e-06, "loss": 0.0195, "step": 25764 }, { "epoch": 84.47540983606558, "grad_norm": 2.5316550731658936, "learning_rate": 1.2376494915362003e-06, "loss": 0.0747, "step": 25765 }, { "epoch": 84.47868852459017, "grad_norm": 2.501638889312744, "learning_rate": 1.2371378346494733e-06, "loss": 0.119, "step": 25766 }, { "epoch": 84.48196721311476, "grad_norm": 2.7357637882232666, "learning_rate": 1.2366262765731264e-06, "loss": 0.1044, "step": 25767 }, { "epoch": 84.48524590163935, "grad_norm": 2.279863119125366, "learning_rate": 1.2361148173129323e-06, "loss": 0.058, "step": 25768 }, { "epoch": 84.48852459016393, "grad_norm": 2.320448160171509, "learning_rate": 1.2356034568746554e-06, "loss": 0.0676, "step": 25769 }, { "epoch": 84.49180327868852, "grad_norm": 2.416351556777954, "learning_rate": 1.2350921952640627e-06, "loss": 0.2736, "step": 25770 }, { "epoch": 84.49508196721311, "grad_norm": 2.4814043045043945, "learning_rate": 1.2345810324869156e-06, "loss": 0.1852, "step": 25771 }, { "epoch": 84.4983606557377, "grad_norm": 2.4897425174713135, "learning_rate": 1.2340699685489844e-06, "loss": 0.0628, "step": 25772 }, { "epoch": 84.5016393442623, "grad_norm": 2.53035569190979, "learning_rate": 1.2335590034560285e-06, "loss": 0.1785, "step": 25773 }, { "epoch": 84.50491803278689, "grad_norm": 2.6460304260253906, "learning_rate": 1.233048137213807e-06, "loss": 0.1108, "step": 25774 }, { "epoch": 84.50819672131148, "grad_norm": 2.420642852783203, "learning_rate": 1.2325373698280852e-06, "loss": 0.0858, "step": 25775 }, { "epoch": 84.51147540983607, "grad_norm": 2.2661452293395996, "learning_rate": 1.2320267013046206e-06, "loss": 0.0982, "step": 25776 }, { "epoch": 84.51475409836065, "grad_norm": 2.4329020977020264, "learning_rate": 1.2315161316491685e-06, "loss": 0.1734, "step": 25777 }, { "epoch": 84.51803278688524, "grad_norm": 2.4688808917999268, "learning_rate": 1.2310056608674925e-06, "loss": 0.0452, "step": 25778 }, { "epoch": 84.52131147540983, "grad_norm": 11.343814849853516, "learning_rate": 1.2304952889653444e-06, "loss": 0.1192, "step": 25779 }, { "epoch": 84.52459016393442, "grad_norm": 2.5853450298309326, "learning_rate": 1.2299850159484794e-06, "loss": 0.2467, "step": 25780 }, { "epoch": 84.52786885245902, "grad_norm": 2.4507389068603516, "learning_rate": 1.229474841822651e-06, "loss": 0.1224, "step": 25781 }, { "epoch": 84.53114754098361, "grad_norm": 1.9352517127990723, "learning_rate": 1.2289647665936143e-06, "loss": 0.1355, "step": 25782 }, { "epoch": 84.5344262295082, "grad_norm": 2.582050085067749, "learning_rate": 1.2284547902671195e-06, "loss": 0.0548, "step": 25783 }, { "epoch": 84.53770491803279, "grad_norm": 1.581181526184082, "learning_rate": 1.227944912848914e-06, "loss": 0.0325, "step": 25784 }, { "epoch": 84.54098360655738, "grad_norm": 2.0861642360687256, "learning_rate": 1.2274351343447533e-06, "loss": 0.1122, "step": 25785 }, { "epoch": 84.54426229508196, "grad_norm": 2.510899066925049, "learning_rate": 1.2269254547603826e-06, "loss": 0.1597, "step": 25786 }, { "epoch": 84.54754098360655, "grad_norm": 2.162632703781128, "learning_rate": 1.2264158741015497e-06, "loss": 0.1438, "step": 25787 }, { "epoch": 84.55081967213114, "grad_norm": 3.602130889892578, "learning_rate": 1.2259063923739988e-06, "loss": 0.2442, "step": 25788 }, { "epoch": 84.55409836065574, "grad_norm": 2.081514358520508, "learning_rate": 1.2253970095834744e-06, "loss": 0.0596, "step": 25789 }, { "epoch": 84.55737704918033, "grad_norm": 2.6625473499298096, "learning_rate": 1.224887725735725e-06, "loss": 0.2429, "step": 25790 }, { "epoch": 84.56065573770492, "grad_norm": 3.1951406002044678, "learning_rate": 1.2243785408364895e-06, "loss": 0.0788, "step": 25791 }, { "epoch": 84.56393442622951, "grad_norm": 2.4303994178771973, "learning_rate": 1.2238694548915109e-06, "loss": 0.0541, "step": 25792 }, { "epoch": 84.5672131147541, "grad_norm": 2.151549816131592, "learning_rate": 1.2233604679065259e-06, "loss": 0.1514, "step": 25793 }, { "epoch": 84.57049180327868, "grad_norm": 2.2441508769989014, "learning_rate": 1.2228515798872797e-06, "loss": 0.0694, "step": 25794 }, { "epoch": 84.57377049180327, "grad_norm": 2.651695489883423, "learning_rate": 1.222342790839508e-06, "loss": 0.1592, "step": 25795 }, { "epoch": 84.57704918032788, "grad_norm": 1.991646409034729, "learning_rate": 1.2218341007689483e-06, "loss": 0.0971, "step": 25796 }, { "epoch": 84.58032786885246, "grad_norm": 2.3016390800476074, "learning_rate": 1.2213255096813325e-06, "loss": 0.0553, "step": 25797 }, { "epoch": 84.58360655737705, "grad_norm": 2.098407030105591, "learning_rate": 1.220817017582403e-06, "loss": 0.0386, "step": 25798 }, { "epoch": 84.58688524590164, "grad_norm": 2.580005645751953, "learning_rate": 1.2203086244778883e-06, "loss": 0.1387, "step": 25799 }, { "epoch": 84.59016393442623, "grad_norm": 2.3048582077026367, "learning_rate": 1.219800330373524e-06, "loss": 0.1742, "step": 25800 }, { "epoch": 84.59344262295082, "grad_norm": 2.075640916824341, "learning_rate": 1.2192921352750387e-06, "loss": 0.1175, "step": 25801 }, { "epoch": 84.5967213114754, "grad_norm": 2.64322566986084, "learning_rate": 1.2187840391881623e-06, "loss": 0.1351, "step": 25802 }, { "epoch": 84.6, "grad_norm": 2.209771156311035, "learning_rate": 1.218276042118629e-06, "loss": 0.1458, "step": 25803 }, { "epoch": 84.6032786885246, "grad_norm": 2.1465673446655273, "learning_rate": 1.2177681440721635e-06, "loss": 0.0863, "step": 25804 }, { "epoch": 84.60655737704919, "grad_norm": 2.7779550552368164, "learning_rate": 1.2172603450544928e-06, "loss": 0.0494, "step": 25805 }, { "epoch": 84.60983606557377, "grad_norm": 2.0630269050598145, "learning_rate": 1.2167526450713418e-06, "loss": 0.0906, "step": 25806 }, { "epoch": 84.61311475409836, "grad_norm": 2.424370050430298, "learning_rate": 1.216245044128439e-06, "loss": 0.0935, "step": 25807 }, { "epoch": 84.61639344262295, "grad_norm": 2.3137331008911133, "learning_rate": 1.2157375422315065e-06, "loss": 0.0396, "step": 25808 }, { "epoch": 84.61967213114754, "grad_norm": 2.2469024658203125, "learning_rate": 1.2152301393862665e-06, "loss": 0.1159, "step": 25809 }, { "epoch": 84.62295081967213, "grad_norm": 2.2938246726989746, "learning_rate": 1.2147228355984387e-06, "loss": 0.08, "step": 25810 }, { "epoch": 84.62622950819672, "grad_norm": 1.6975479125976562, "learning_rate": 1.2142156308737464e-06, "loss": 0.1234, "step": 25811 }, { "epoch": 84.62950819672132, "grad_norm": 1.9109750986099243, "learning_rate": 1.2137085252179092e-06, "loss": 0.0519, "step": 25812 }, { "epoch": 84.6327868852459, "grad_norm": 2.0170817375183105, "learning_rate": 1.213201518636643e-06, "loss": 0.0364, "step": 25813 }, { "epoch": 84.6360655737705, "grad_norm": 2.2554447650909424, "learning_rate": 1.2126946111356651e-06, "loss": 0.1823, "step": 25814 }, { "epoch": 84.63934426229508, "grad_norm": 2.325287342071533, "learning_rate": 1.2121878027206912e-06, "loss": 0.0672, "step": 25815 }, { "epoch": 84.64262295081967, "grad_norm": 2.5071845054626465, "learning_rate": 1.2116810933974377e-06, "loss": 0.1369, "step": 25816 }, { "epoch": 84.64590163934426, "grad_norm": 2.6399030685424805, "learning_rate": 1.2111744831716188e-06, "loss": 0.123, "step": 25817 }, { "epoch": 84.64918032786885, "grad_norm": 2.912447452545166, "learning_rate": 1.2106679720489445e-06, "loss": 0.1482, "step": 25818 }, { "epoch": 84.65245901639344, "grad_norm": 2.2288637161254883, "learning_rate": 1.2101615600351258e-06, "loss": 0.0499, "step": 25819 }, { "epoch": 84.65573770491804, "grad_norm": 2.7480735778808594, "learning_rate": 1.2096552471358768e-06, "loss": 0.0979, "step": 25820 }, { "epoch": 84.65901639344263, "grad_norm": 3.98576283454895, "learning_rate": 1.2091490333569044e-06, "loss": 0.096, "step": 25821 }, { "epoch": 84.66229508196722, "grad_norm": 1.503905177116394, "learning_rate": 1.2086429187039172e-06, "loss": 0.0457, "step": 25822 }, { "epoch": 84.6655737704918, "grad_norm": 2.3635387420654297, "learning_rate": 1.2081369031826185e-06, "loss": 0.1237, "step": 25823 }, { "epoch": 84.66885245901639, "grad_norm": 2.66232967376709, "learning_rate": 1.2076309867987212e-06, "loss": 0.2063, "step": 25824 }, { "epoch": 84.67213114754098, "grad_norm": 2.809567928314209, "learning_rate": 1.2071251695579255e-06, "loss": 0.1248, "step": 25825 }, { "epoch": 84.67540983606557, "grad_norm": 3.3044447898864746, "learning_rate": 1.2066194514659356e-06, "loss": 0.1265, "step": 25826 }, { "epoch": 84.67868852459016, "grad_norm": 2.106065511703491, "learning_rate": 1.2061138325284528e-06, "loss": 0.0692, "step": 25827 }, { "epoch": 84.68196721311476, "grad_norm": 2.5344178676605225, "learning_rate": 1.2056083127511808e-06, "loss": 0.0781, "step": 25828 }, { "epoch": 84.68524590163935, "grad_norm": 2.4085922241210938, "learning_rate": 1.20510289213982e-06, "loss": 0.15, "step": 25829 }, { "epoch": 84.68852459016394, "grad_norm": 2.441835641860962, "learning_rate": 1.2045975707000657e-06, "loss": 0.1189, "step": 25830 }, { "epoch": 84.69180327868852, "grad_norm": 2.0568370819091797, "learning_rate": 1.2040923484376221e-06, "loss": 0.1373, "step": 25831 }, { "epoch": 84.69508196721311, "grad_norm": 2.556854724884033, "learning_rate": 1.2035872253581816e-06, "loss": 0.1765, "step": 25832 }, { "epoch": 84.6983606557377, "grad_norm": 2.3926990032196045, "learning_rate": 1.2030822014674392e-06, "loss": 0.1099, "step": 25833 }, { "epoch": 84.70163934426229, "grad_norm": 4.657907485961914, "learning_rate": 1.2025772767710931e-06, "loss": 0.2138, "step": 25834 }, { "epoch": 84.70491803278688, "grad_norm": 1.7625058889389038, "learning_rate": 1.2020724512748362e-06, "loss": 0.0482, "step": 25835 }, { "epoch": 84.70819672131148, "grad_norm": 2.3613638877868652, "learning_rate": 1.2015677249843572e-06, "loss": 0.207, "step": 25836 }, { "epoch": 84.71147540983607, "grad_norm": 3.172006607055664, "learning_rate": 1.2010630979053527e-06, "loss": 0.1149, "step": 25837 }, { "epoch": 84.71475409836066, "grad_norm": 2.637211799621582, "learning_rate": 1.2005585700435096e-06, "loss": 0.0694, "step": 25838 }, { "epoch": 84.71803278688525, "grad_norm": 2.8427958488464355, "learning_rate": 1.2000541414045185e-06, "loss": 0.1752, "step": 25839 }, { "epoch": 84.72131147540983, "grad_norm": 2.2037572860717773, "learning_rate": 1.1995498119940663e-06, "loss": 0.1019, "step": 25840 }, { "epoch": 84.72459016393442, "grad_norm": 2.2327277660369873, "learning_rate": 1.1990455818178382e-06, "loss": 0.2444, "step": 25841 }, { "epoch": 84.72786885245901, "grad_norm": 2.2170119285583496, "learning_rate": 1.198541450881524e-06, "loss": 0.0715, "step": 25842 }, { "epoch": 84.73114754098361, "grad_norm": 2.666032552719116, "learning_rate": 1.1980374191908061e-06, "loss": 0.2644, "step": 25843 }, { "epoch": 84.7344262295082, "grad_norm": 2.173203706741333, "learning_rate": 1.1975334867513687e-06, "loss": 0.1115, "step": 25844 }, { "epoch": 84.73770491803279, "grad_norm": 2.353407621383667, "learning_rate": 1.1970296535688909e-06, "loss": 0.0722, "step": 25845 }, { "epoch": 84.74098360655738, "grad_norm": 2.696805238723755, "learning_rate": 1.1965259196490574e-06, "loss": 0.0647, "step": 25846 }, { "epoch": 84.74426229508197, "grad_norm": 3.1298320293426514, "learning_rate": 1.1960222849975488e-06, "loss": 0.2306, "step": 25847 }, { "epoch": 84.74754098360656, "grad_norm": 2.8433432579040527, "learning_rate": 1.1955187496200427e-06, "loss": 0.0843, "step": 25848 }, { "epoch": 84.75081967213114, "grad_norm": 2.3516788482666016, "learning_rate": 1.1950153135222152e-06, "loss": 0.1118, "step": 25849 }, { "epoch": 84.75409836065573, "grad_norm": 3.414057731628418, "learning_rate": 1.194511976709747e-06, "loss": 0.063, "step": 25850 }, { "epoch": 84.75737704918033, "grad_norm": 1.823043942451477, "learning_rate": 1.1940087391883104e-06, "loss": 0.1175, "step": 25851 }, { "epoch": 84.76065573770492, "grad_norm": 2.574615478515625, "learning_rate": 1.1935056009635826e-06, "loss": 0.0984, "step": 25852 }, { "epoch": 84.76393442622951, "grad_norm": 3.327444553375244, "learning_rate": 1.1930025620412355e-06, "loss": 0.1128, "step": 25853 }, { "epoch": 84.7672131147541, "grad_norm": 2.2003183364868164, "learning_rate": 1.192499622426938e-06, "loss": 0.0976, "step": 25854 }, { "epoch": 84.77049180327869, "grad_norm": 3.7282555103302, "learning_rate": 1.191996782126369e-06, "loss": 0.1388, "step": 25855 }, { "epoch": 84.77377049180328, "grad_norm": 2.0200207233428955, "learning_rate": 1.1914940411451925e-06, "loss": 0.108, "step": 25856 }, { "epoch": 84.77704918032786, "grad_norm": 1.9967126846313477, "learning_rate": 1.1909913994890797e-06, "loss": 0.1013, "step": 25857 }, { "epoch": 84.78032786885245, "grad_norm": 2.6327149868011475, "learning_rate": 1.1904888571636963e-06, "loss": 0.175, "step": 25858 }, { "epoch": 84.78360655737706, "grad_norm": 2.3905081748962402, "learning_rate": 1.1899864141747131e-06, "loss": 0.0446, "step": 25859 }, { "epoch": 84.78688524590164, "grad_norm": 1.8780393600463867, "learning_rate": 1.1894840705277922e-06, "loss": 0.1143, "step": 25860 }, { "epoch": 84.79016393442623, "grad_norm": 2.8995602130889893, "learning_rate": 1.188981826228599e-06, "loss": 0.1937, "step": 25861 }, { "epoch": 84.79344262295082, "grad_norm": 2.669943332672119, "learning_rate": 1.188479681282796e-06, "loss": 0.2486, "step": 25862 }, { "epoch": 84.79672131147541, "grad_norm": 1.821841835975647, "learning_rate": 1.187977635696047e-06, "loss": 0.0394, "step": 25863 }, { "epoch": 84.8, "grad_norm": 2.2279574871063232, "learning_rate": 1.1874756894740137e-06, "loss": 0.1143, "step": 25864 }, { "epoch": 84.80327868852459, "grad_norm": 2.8304643630981445, "learning_rate": 1.1869738426223532e-06, "loss": 0.0906, "step": 25865 }, { "epoch": 84.80655737704917, "grad_norm": 3.9637835025787354, "learning_rate": 1.1864720951467267e-06, "loss": 0.1344, "step": 25866 }, { "epoch": 84.80983606557378, "grad_norm": 2.514272689819336, "learning_rate": 1.1859704470527888e-06, "loss": 0.096, "step": 25867 }, { "epoch": 84.81311475409836, "grad_norm": 2.119293451309204, "learning_rate": 1.1854688983462003e-06, "loss": 0.2574, "step": 25868 }, { "epoch": 84.81639344262295, "grad_norm": 2.258903980255127, "learning_rate": 1.1849674490326157e-06, "loss": 0.168, "step": 25869 }, { "epoch": 84.81967213114754, "grad_norm": 2.058181047439575, "learning_rate": 1.1844660991176882e-06, "loss": 0.2164, "step": 25870 }, { "epoch": 84.82295081967213, "grad_norm": 1.9502378702163696, "learning_rate": 1.1839648486070687e-06, "loss": 0.0415, "step": 25871 }, { "epoch": 84.82622950819672, "grad_norm": 2.4456892013549805, "learning_rate": 1.183463697506414e-06, "loss": 0.0489, "step": 25872 }, { "epoch": 84.8295081967213, "grad_norm": 2.823235273361206, "learning_rate": 1.1829626458213738e-06, "loss": 0.0634, "step": 25873 }, { "epoch": 84.8327868852459, "grad_norm": 2.688192129135132, "learning_rate": 1.182461693557596e-06, "loss": 0.0457, "step": 25874 }, { "epoch": 84.8360655737705, "grad_norm": 1.6680301427841187, "learning_rate": 1.1819608407207294e-06, "loss": 0.118, "step": 25875 }, { "epoch": 84.83934426229509, "grad_norm": 2.3344950675964355, "learning_rate": 1.181460087316424e-06, "loss": 0.1315, "step": 25876 }, { "epoch": 84.84262295081967, "grad_norm": 2.241563081741333, "learning_rate": 1.180959433350326e-06, "loss": 0.0783, "step": 25877 }, { "epoch": 84.84590163934426, "grad_norm": 2.0365426540374756, "learning_rate": 1.1804588788280792e-06, "loss": 0.0801, "step": 25878 }, { "epoch": 84.84918032786885, "grad_norm": 2.9129326343536377, "learning_rate": 1.1799584237553274e-06, "loss": 0.1179, "step": 25879 }, { "epoch": 84.85245901639344, "grad_norm": 2.0061843395233154, "learning_rate": 1.1794580681377155e-06, "loss": 0.0331, "step": 25880 }, { "epoch": 84.85573770491803, "grad_norm": 2.043696641921997, "learning_rate": 1.1789578119808864e-06, "loss": 0.051, "step": 25881 }, { "epoch": 84.85901639344263, "grad_norm": 2.214136838912964, "learning_rate": 1.1784576552904792e-06, "loss": 0.1284, "step": 25882 }, { "epoch": 84.86229508196722, "grad_norm": 1.5938259363174438, "learning_rate": 1.1779575980721313e-06, "loss": 0.0252, "step": 25883 }, { "epoch": 84.8655737704918, "grad_norm": 1.945381760597229, "learning_rate": 1.1774576403314864e-06, "loss": 0.0602, "step": 25884 }, { "epoch": 84.8688524590164, "grad_norm": 2.3140032291412354, "learning_rate": 1.1769577820741807e-06, "loss": 0.057, "step": 25885 }, { "epoch": 84.87213114754098, "grad_norm": 2.6554720401763916, "learning_rate": 1.1764580233058464e-06, "loss": 0.1542, "step": 25886 }, { "epoch": 84.87540983606557, "grad_norm": 2.17366886138916, "learning_rate": 1.1759583640321248e-06, "loss": 0.1293, "step": 25887 }, { "epoch": 84.87868852459016, "grad_norm": 1.5034104585647583, "learning_rate": 1.1754588042586469e-06, "loss": 0.0261, "step": 25888 }, { "epoch": 84.88196721311475, "grad_norm": 2.5618205070495605, "learning_rate": 1.1749593439910444e-06, "loss": 0.1415, "step": 25889 }, { "epoch": 84.88524590163935, "grad_norm": 3.0178093910217285, "learning_rate": 1.1744599832349535e-06, "loss": 0.1195, "step": 25890 }, { "epoch": 84.88852459016394, "grad_norm": 1.6357296705245972, "learning_rate": 1.1739607219960026e-06, "loss": 0.0727, "step": 25891 }, { "epoch": 84.89180327868853, "grad_norm": 2.2398059368133545, "learning_rate": 1.1734615602798205e-06, "loss": 0.0788, "step": 25892 }, { "epoch": 84.89508196721312, "grad_norm": 2.773022174835205, "learning_rate": 1.1729624980920352e-06, "loss": 0.1017, "step": 25893 }, { "epoch": 84.8983606557377, "grad_norm": 3.2181777954101562, "learning_rate": 1.1724635354382775e-06, "loss": 0.1817, "step": 25894 }, { "epoch": 84.90163934426229, "grad_norm": 2.0131537914276123, "learning_rate": 1.1719646723241707e-06, "loss": 0.0824, "step": 25895 }, { "epoch": 84.90491803278688, "grad_norm": 2.0393338203430176, "learning_rate": 1.1714659087553426e-06, "loss": 0.2069, "step": 25896 }, { "epoch": 84.90819672131147, "grad_norm": 1.8715355396270752, "learning_rate": 1.1709672447374132e-06, "loss": 0.0227, "step": 25897 }, { "epoch": 84.91147540983607, "grad_norm": 2.3271381855010986, "learning_rate": 1.17046868027601e-06, "loss": 0.1115, "step": 25898 }, { "epoch": 84.91475409836066, "grad_norm": 2.317850351333618, "learning_rate": 1.1699702153767523e-06, "loss": 0.0826, "step": 25899 }, { "epoch": 84.91803278688525, "grad_norm": 1.7859832048416138, "learning_rate": 1.1694718500452618e-06, "loss": 0.0679, "step": 25900 }, { "epoch": 84.92131147540984, "grad_norm": 2.0571401119232178, "learning_rate": 1.1689735842871552e-06, "loss": 0.0351, "step": 25901 }, { "epoch": 84.92459016393443, "grad_norm": 2.035909414291382, "learning_rate": 1.1684754181080559e-06, "loss": 0.0549, "step": 25902 }, { "epoch": 84.92786885245901, "grad_norm": 3.45090651512146, "learning_rate": 1.1679773515135796e-06, "loss": 0.2016, "step": 25903 }, { "epoch": 84.9311475409836, "grad_norm": 1.957152009010315, "learning_rate": 1.1674793845093402e-06, "loss": 0.1306, "step": 25904 }, { "epoch": 84.93442622950819, "grad_norm": 2.0131161212921143, "learning_rate": 1.1669815171009557e-06, "loss": 0.1602, "step": 25905 }, { "epoch": 84.9377049180328, "grad_norm": 2.1667304039001465, "learning_rate": 1.166483749294035e-06, "loss": 0.0461, "step": 25906 }, { "epoch": 84.94098360655738, "grad_norm": 2.8745102882385254, "learning_rate": 1.165986081094198e-06, "loss": 0.1724, "step": 25907 }, { "epoch": 84.94426229508197, "grad_norm": 2.1795151233673096, "learning_rate": 1.1654885125070525e-06, "loss": 0.1996, "step": 25908 }, { "epoch": 84.94754098360656, "grad_norm": 1.7534160614013672, "learning_rate": 1.1649910435382095e-06, "loss": 0.0325, "step": 25909 }, { "epoch": 84.95081967213115, "grad_norm": 2.7516486644744873, "learning_rate": 1.1644936741932755e-06, "loss": 0.1681, "step": 25910 }, { "epoch": 84.95409836065573, "grad_norm": 1.8513444662094116, "learning_rate": 1.1639964044778652e-06, "loss": 0.1179, "step": 25911 }, { "epoch": 84.95737704918032, "grad_norm": 1.5379458665847778, "learning_rate": 1.1634992343975826e-06, "loss": 0.0587, "step": 25912 }, { "epoch": 84.96065573770491, "grad_norm": 1.8955236673355103, "learning_rate": 1.1630021639580335e-06, "loss": 0.1216, "step": 25913 }, { "epoch": 84.96393442622951, "grad_norm": 2.342628002166748, "learning_rate": 1.1625051931648212e-06, "loss": 0.0911, "step": 25914 }, { "epoch": 84.9672131147541, "grad_norm": 2.6007418632507324, "learning_rate": 1.1620083220235534e-06, "loss": 0.096, "step": 25915 }, { "epoch": 84.97049180327869, "grad_norm": 1.6828525066375732, "learning_rate": 1.1615115505398323e-06, "loss": 0.1053, "step": 25916 }, { "epoch": 84.97377049180328, "grad_norm": 2.011667490005493, "learning_rate": 1.1610148787192565e-06, "loss": 0.1249, "step": 25917 }, { "epoch": 84.97704918032787, "grad_norm": 3.1530792713165283, "learning_rate": 1.1605183065674285e-06, "loss": 0.1448, "step": 25918 }, { "epoch": 84.98032786885246, "grad_norm": 2.1352553367614746, "learning_rate": 1.1600218340899461e-06, "loss": 0.0757, "step": 25919 }, { "epoch": 84.98360655737704, "grad_norm": 3.101691246032715, "learning_rate": 1.15952546129241e-06, "loss": 0.2766, "step": 25920 }, { "epoch": 84.98688524590163, "grad_norm": 2.067167282104492, "learning_rate": 1.1590291881804162e-06, "loss": 0.0529, "step": 25921 }, { "epoch": 84.99016393442623, "grad_norm": 2.257272720336914, "learning_rate": 1.1585330147595608e-06, "loss": 0.0876, "step": 25922 }, { "epoch": 84.99344262295082, "grad_norm": 2.276327610015869, "learning_rate": 1.1580369410354365e-06, "loss": 0.0658, "step": 25923 }, { "epoch": 84.99672131147541, "grad_norm": 2.413464307785034, "learning_rate": 1.1575409670136417e-06, "loss": 0.0681, "step": 25924 }, { "epoch": 85.0, "grad_norm": 3.3994762897491455, "learning_rate": 1.1570450926997657e-06, "loss": 0.1647, "step": 25925 }, { "epoch": 85.00327868852459, "grad_norm": 2.1149826049804688, "learning_rate": 1.1565493180994002e-06, "loss": 0.0665, "step": 25926 }, { "epoch": 85.00655737704918, "grad_norm": 2.333371877670288, "learning_rate": 1.1560536432181346e-06, "loss": 0.1148, "step": 25927 }, { "epoch": 85.00983606557377, "grad_norm": 2.8971095085144043, "learning_rate": 1.1555580680615608e-06, "loss": 0.0859, "step": 25928 }, { "epoch": 85.01311475409837, "grad_norm": 2.2748324871063232, "learning_rate": 1.1550625926352665e-06, "loss": 0.1446, "step": 25929 }, { "epoch": 85.01639344262296, "grad_norm": 1.771273136138916, "learning_rate": 1.1545672169448375e-06, "loss": 0.0865, "step": 25930 }, { "epoch": 85.01967213114754, "grad_norm": 2.305210828781128, "learning_rate": 1.1540719409958612e-06, "loss": 0.0929, "step": 25931 }, { "epoch": 85.02295081967213, "grad_norm": 2.0414035320281982, "learning_rate": 1.1535767647939177e-06, "loss": 0.0699, "step": 25932 }, { "epoch": 85.02622950819672, "grad_norm": 2.6230428218841553, "learning_rate": 1.1530816883445972e-06, "loss": 0.1494, "step": 25933 }, { "epoch": 85.02950819672131, "grad_norm": 2.7316393852233887, "learning_rate": 1.1525867116534782e-06, "loss": 0.0595, "step": 25934 }, { "epoch": 85.0327868852459, "grad_norm": 1.8818007707595825, "learning_rate": 1.1520918347261412e-06, "loss": 0.0326, "step": 25935 }, { "epoch": 85.03606557377049, "grad_norm": 3.081266403198242, "learning_rate": 1.1515970575681712e-06, "loss": 0.1595, "step": 25936 }, { "epoch": 85.03934426229509, "grad_norm": 2.4661176204681396, "learning_rate": 1.151102380185144e-06, "loss": 0.0694, "step": 25937 }, { "epoch": 85.04262295081968, "grad_norm": 3.1423637866973877, "learning_rate": 1.150607802582635e-06, "loss": 0.1407, "step": 25938 }, { "epoch": 85.04590163934427, "grad_norm": 2.3904402256011963, "learning_rate": 1.1501133247662278e-06, "loss": 0.0626, "step": 25939 }, { "epoch": 85.04918032786885, "grad_norm": 2.2074332237243652, "learning_rate": 1.1496189467414932e-06, "loss": 0.084, "step": 25940 }, { "epoch": 85.05245901639344, "grad_norm": 2.4769339561462402, "learning_rate": 1.1491246685140078e-06, "loss": 0.1314, "step": 25941 }, { "epoch": 85.05573770491803, "grad_norm": 3.017462968826294, "learning_rate": 1.1486304900893418e-06, "loss": 0.0651, "step": 25942 }, { "epoch": 85.05901639344262, "grad_norm": 2.2998809814453125, "learning_rate": 1.148136411473072e-06, "loss": 0.1749, "step": 25943 }, { "epoch": 85.0622950819672, "grad_norm": 1.602027177810669, "learning_rate": 1.147642432670768e-06, "loss": 0.028, "step": 25944 }, { "epoch": 85.06557377049181, "grad_norm": 4.365691184997559, "learning_rate": 1.147148553687998e-06, "loss": 0.1338, "step": 25945 }, { "epoch": 85.0688524590164, "grad_norm": 2.4038445949554443, "learning_rate": 1.1466547745303348e-06, "loss": 0.0557, "step": 25946 }, { "epoch": 85.07213114754099, "grad_norm": 2.0612282752990723, "learning_rate": 1.1461610952033442e-06, "loss": 0.0569, "step": 25947 }, { "epoch": 85.07540983606557, "grad_norm": 2.5788023471832275, "learning_rate": 1.1456675157125918e-06, "loss": 0.1507, "step": 25948 }, { "epoch": 85.07868852459016, "grad_norm": 1.7904025316238403, "learning_rate": 1.1451740360636432e-06, "loss": 0.0332, "step": 25949 }, { "epoch": 85.08196721311475, "grad_norm": 2.7274303436279297, "learning_rate": 1.144680656262066e-06, "loss": 0.1071, "step": 25950 }, { "epoch": 85.08524590163934, "grad_norm": 2.330763816833496, "learning_rate": 1.1441873763134227e-06, "loss": 0.1209, "step": 25951 }, { "epoch": 85.08852459016393, "grad_norm": 11.998517036437988, "learning_rate": 1.1436941962232729e-06, "loss": 0.1323, "step": 25952 }, { "epoch": 85.09180327868853, "grad_norm": 2.2623114585876465, "learning_rate": 1.1432011159971778e-06, "loss": 0.0605, "step": 25953 }, { "epoch": 85.09508196721312, "grad_norm": 2.1975300312042236, "learning_rate": 1.142708135640701e-06, "loss": 0.032, "step": 25954 }, { "epoch": 85.09836065573771, "grad_norm": 1.236235499382019, "learning_rate": 1.1422152551593991e-06, "loss": 0.0179, "step": 25955 }, { "epoch": 85.1016393442623, "grad_norm": 2.0738322734832764, "learning_rate": 1.1417224745588306e-06, "loss": 0.2021, "step": 25956 }, { "epoch": 85.10491803278688, "grad_norm": 2.6754019260406494, "learning_rate": 1.1412297938445505e-06, "loss": 0.0896, "step": 25957 }, { "epoch": 85.10819672131147, "grad_norm": 2.141615390777588, "learning_rate": 1.1407372130221138e-06, "loss": 0.1129, "step": 25958 }, { "epoch": 85.11147540983606, "grad_norm": 1.9974967241287231, "learning_rate": 1.1402447320970788e-06, "loss": 0.1437, "step": 25959 }, { "epoch": 85.11475409836065, "grad_norm": 2.1324548721313477, "learning_rate": 1.1397523510749952e-06, "loss": 0.1023, "step": 25960 }, { "epoch": 85.11803278688525, "grad_norm": 2.309405565261841, "learning_rate": 1.1392600699614175e-06, "loss": 0.1211, "step": 25961 }, { "epoch": 85.12131147540984, "grad_norm": 2.976287364959717, "learning_rate": 1.1387678887618926e-06, "loss": 0.0436, "step": 25962 }, { "epoch": 85.12459016393443, "grad_norm": 2.9978253841400146, "learning_rate": 1.1382758074819744e-06, "loss": 0.0399, "step": 25963 }, { "epoch": 85.12786885245902, "grad_norm": 2.3796961307525635, "learning_rate": 1.1377838261272111e-06, "loss": 0.1125, "step": 25964 }, { "epoch": 85.1311475409836, "grad_norm": 2.3113932609558105, "learning_rate": 1.1372919447031505e-06, "loss": 0.0966, "step": 25965 }, { "epoch": 85.1344262295082, "grad_norm": 2.2965877056121826, "learning_rate": 1.1368001632153348e-06, "loss": 0.1588, "step": 25966 }, { "epoch": 85.13770491803278, "grad_norm": 2.035996675491333, "learning_rate": 1.1363084816693148e-06, "loss": 0.071, "step": 25967 }, { "epoch": 85.14098360655737, "grad_norm": 2.951870918273926, "learning_rate": 1.1358169000706331e-06, "loss": 0.1656, "step": 25968 }, { "epoch": 85.14426229508197, "grad_norm": 2.6749894618988037, "learning_rate": 1.135325418424832e-06, "loss": 0.1087, "step": 25969 }, { "epoch": 85.14754098360656, "grad_norm": 1.6441394090652466, "learning_rate": 1.1348340367374543e-06, "loss": 0.1514, "step": 25970 }, { "epoch": 85.15081967213115, "grad_norm": 2.5432980060577393, "learning_rate": 1.1343427550140373e-06, "loss": 0.1691, "step": 25971 }, { "epoch": 85.15409836065574, "grad_norm": 3.1083030700683594, "learning_rate": 1.1338515732601262e-06, "loss": 0.2359, "step": 25972 }, { "epoch": 85.15737704918033, "grad_norm": 3.900430202484131, "learning_rate": 1.133360491481258e-06, "loss": 0.1004, "step": 25973 }, { "epoch": 85.16065573770491, "grad_norm": 2.1623685359954834, "learning_rate": 1.1328695096829678e-06, "loss": 0.0704, "step": 25974 }, { "epoch": 85.1639344262295, "grad_norm": 1.5280840396881104, "learning_rate": 1.1323786278707916e-06, "loss": 0.0295, "step": 25975 }, { "epoch": 85.1672131147541, "grad_norm": 2.3048791885375977, "learning_rate": 1.1318878460502692e-06, "loss": 0.1611, "step": 25976 }, { "epoch": 85.1704918032787, "grad_norm": 3.5514700412750244, "learning_rate": 1.1313971642269317e-06, "loss": 0.1558, "step": 25977 }, { "epoch": 85.17377049180328, "grad_norm": 1.9237399101257324, "learning_rate": 1.1309065824063115e-06, "loss": 0.1655, "step": 25978 }, { "epoch": 85.17704918032787, "grad_norm": 2.020265579223633, "learning_rate": 1.1304161005939397e-06, "loss": 0.0512, "step": 25979 }, { "epoch": 85.18032786885246, "grad_norm": 2.3959546089172363, "learning_rate": 1.1299257187953505e-06, "loss": 0.1769, "step": 25980 }, { "epoch": 85.18360655737705, "grad_norm": 2.2326931953430176, "learning_rate": 1.129435437016071e-06, "loss": 0.1001, "step": 25981 }, { "epoch": 85.18688524590164, "grad_norm": 2.2039341926574707, "learning_rate": 1.12894525526163e-06, "loss": 0.0756, "step": 25982 }, { "epoch": 85.19016393442622, "grad_norm": 1.7907419204711914, "learning_rate": 1.1284551735375548e-06, "loss": 0.1622, "step": 25983 }, { "epoch": 85.19344262295083, "grad_norm": 2.124624013900757, "learning_rate": 1.1279651918493706e-06, "loss": 0.1529, "step": 25984 }, { "epoch": 85.19672131147541, "grad_norm": 3.93267822265625, "learning_rate": 1.1274753102026037e-06, "loss": 0.0761, "step": 25985 }, { "epoch": 85.2, "grad_norm": 2.1381168365478516, "learning_rate": 1.1269855286027798e-06, "loss": 0.0505, "step": 25986 }, { "epoch": 85.20327868852459, "grad_norm": 2.0356359481811523, "learning_rate": 1.1264958470554178e-06, "loss": 0.0559, "step": 25987 }, { "epoch": 85.20655737704918, "grad_norm": 1.7309975624084473, "learning_rate": 1.1260062655660408e-06, "loss": 0.0452, "step": 25988 }, { "epoch": 85.20983606557377, "grad_norm": 5.567139625549316, "learning_rate": 1.1255167841401704e-06, "loss": 0.0659, "step": 25989 }, { "epoch": 85.21311475409836, "grad_norm": 2.0213515758514404, "learning_rate": 1.1250274027833264e-06, "loss": 0.0743, "step": 25990 }, { "epoch": 85.21639344262294, "grad_norm": 3.4132702350616455, "learning_rate": 1.1245381215010243e-06, "loss": 0.0725, "step": 25991 }, { "epoch": 85.21967213114755, "grad_norm": 1.8210036754608154, "learning_rate": 1.1240489402987841e-06, "loss": 0.1572, "step": 25992 }, { "epoch": 85.22295081967214, "grad_norm": 2.191042900085449, "learning_rate": 1.1235598591821217e-06, "loss": 0.1015, "step": 25993 }, { "epoch": 85.22622950819672, "grad_norm": 1.8854777812957764, "learning_rate": 1.1230708781565481e-06, "loss": 0.2041, "step": 25994 }, { "epoch": 85.22950819672131, "grad_norm": 2.7948694229125977, "learning_rate": 1.122581997227583e-06, "loss": 0.1593, "step": 25995 }, { "epoch": 85.2327868852459, "grad_norm": 2.4559788703918457, "learning_rate": 1.122093216400736e-06, "loss": 0.1867, "step": 25996 }, { "epoch": 85.23606557377049, "grad_norm": 2.083125114440918, "learning_rate": 1.1216045356815153e-06, "loss": 0.0371, "step": 25997 }, { "epoch": 85.23934426229508, "grad_norm": 2.7985446453094482, "learning_rate": 1.121115955075438e-06, "loss": 0.1812, "step": 25998 }, { "epoch": 85.24262295081967, "grad_norm": 2.3204517364501953, "learning_rate": 1.1206274745880097e-06, "loss": 0.1426, "step": 25999 }, { "epoch": 85.24590163934427, "grad_norm": 2.483731985092163, "learning_rate": 1.1201390942247392e-06, "loss": 0.0399, "step": 26000 }, { "epoch": 85.24918032786886, "grad_norm": 2.86759090423584, "learning_rate": 1.119650813991131e-06, "loss": 0.0849, "step": 26001 }, { "epoch": 85.25245901639344, "grad_norm": 2.097355604171753, "learning_rate": 1.1191626338926943e-06, "loss": 0.1288, "step": 26002 }, { "epoch": 85.25573770491803, "grad_norm": 2.6889641284942627, "learning_rate": 1.118674553934934e-06, "loss": 0.146, "step": 26003 }, { "epoch": 85.25901639344262, "grad_norm": 1.7162986993789673, "learning_rate": 1.118186574123351e-06, "loss": 0.0457, "step": 26004 }, { "epoch": 85.26229508196721, "grad_norm": 2.5877127647399902, "learning_rate": 1.1176986944634505e-06, "loss": 0.0729, "step": 26005 }, { "epoch": 85.2655737704918, "grad_norm": 1.9113951921463013, "learning_rate": 1.1172109149607292e-06, "loss": 0.2084, "step": 26006 }, { "epoch": 85.26885245901639, "grad_norm": 2.6672213077545166, "learning_rate": 1.1167232356206936e-06, "loss": 0.0918, "step": 26007 }, { "epoch": 85.27213114754099, "grad_norm": 2.8390681743621826, "learning_rate": 1.1162356564488398e-06, "loss": 0.1865, "step": 26008 }, { "epoch": 85.27540983606558, "grad_norm": 1.8782801628112793, "learning_rate": 1.115748177450665e-06, "loss": 0.069, "step": 26009 }, { "epoch": 85.27868852459017, "grad_norm": 2.070220470428467, "learning_rate": 1.1152607986316655e-06, "loss": 0.1393, "step": 26010 }, { "epoch": 85.28196721311475, "grad_norm": 1.4207688570022583, "learning_rate": 1.1147735199973397e-06, "loss": 0.0534, "step": 26011 }, { "epoch": 85.28524590163934, "grad_norm": 2.41086483001709, "learning_rate": 1.1142863415531813e-06, "loss": 0.0432, "step": 26012 }, { "epoch": 85.28852459016393, "grad_norm": 3.5059123039245605, "learning_rate": 1.1137992633046835e-06, "loss": 0.0596, "step": 26013 }, { "epoch": 85.29180327868852, "grad_norm": 2.0959537029266357, "learning_rate": 1.1133122852573352e-06, "loss": 0.0846, "step": 26014 }, { "epoch": 85.29508196721312, "grad_norm": 1.5992029905319214, "learning_rate": 1.1128254074166334e-06, "loss": 0.0992, "step": 26015 }, { "epoch": 85.29836065573771, "grad_norm": 1.9851130247116089, "learning_rate": 1.1123386297880657e-06, "loss": 0.0457, "step": 26016 }, { "epoch": 85.3016393442623, "grad_norm": 2.891984224319458, "learning_rate": 1.111851952377121e-06, "loss": 0.1437, "step": 26017 }, { "epoch": 85.30491803278689, "grad_norm": 2.764075994491577, "learning_rate": 1.1113653751892862e-06, "loss": 0.1224, "step": 26018 }, { "epoch": 85.30819672131148, "grad_norm": 2.4614617824554443, "learning_rate": 1.1108788982300467e-06, "loss": 0.1931, "step": 26019 }, { "epoch": 85.31147540983606, "grad_norm": 2.5139451026916504, "learning_rate": 1.1103925215048927e-06, "loss": 0.0736, "step": 26020 }, { "epoch": 85.31475409836065, "grad_norm": 1.878305435180664, "learning_rate": 1.1099062450193054e-06, "loss": 0.0537, "step": 26021 }, { "epoch": 85.31803278688524, "grad_norm": 1.9837673902511597, "learning_rate": 1.109420068778768e-06, "loss": 0.3095, "step": 26022 }, { "epoch": 85.32131147540984, "grad_norm": 2.5966053009033203, "learning_rate": 1.108933992788762e-06, "loss": 0.1276, "step": 26023 }, { "epoch": 85.32459016393443, "grad_norm": 1.822542428970337, "learning_rate": 1.1084480170547718e-06, "loss": 0.0764, "step": 26024 }, { "epoch": 85.32786885245902, "grad_norm": 2.574263572692871, "learning_rate": 1.107962141582275e-06, "loss": 0.15, "step": 26025 }, { "epoch": 85.33114754098361, "grad_norm": 2.0966413021087646, "learning_rate": 1.1074763663767497e-06, "loss": 0.1119, "step": 26026 }, { "epoch": 85.3344262295082, "grad_norm": 2.888688564300537, "learning_rate": 1.1069906914436735e-06, "loss": 0.1514, "step": 26027 }, { "epoch": 85.33770491803278, "grad_norm": 2.3334996700286865, "learning_rate": 1.1065051167885244e-06, "loss": 0.157, "step": 26028 }, { "epoch": 85.34098360655737, "grad_norm": 1.8081786632537842, "learning_rate": 1.1060196424167779e-06, "loss": 0.0475, "step": 26029 }, { "epoch": 85.34426229508196, "grad_norm": 2.2867140769958496, "learning_rate": 1.1055342683339066e-06, "loss": 0.0795, "step": 26030 }, { "epoch": 85.34754098360656, "grad_norm": 2.355614185333252, "learning_rate": 1.1050489945453847e-06, "loss": 0.1341, "step": 26031 }, { "epoch": 85.35081967213115, "grad_norm": 2.955773115158081, "learning_rate": 1.1045638210566823e-06, "loss": 0.1297, "step": 26032 }, { "epoch": 85.35409836065574, "grad_norm": 2.9982359409332275, "learning_rate": 1.1040787478732728e-06, "loss": 0.1031, "step": 26033 }, { "epoch": 85.35737704918033, "grad_norm": 2.6425957679748535, "learning_rate": 1.1035937750006254e-06, "loss": 0.168, "step": 26034 }, { "epoch": 85.36065573770492, "grad_norm": 2.7351911067962646, "learning_rate": 1.1031089024442088e-06, "loss": 0.1285, "step": 26035 }, { "epoch": 85.3639344262295, "grad_norm": 2.0563344955444336, "learning_rate": 1.1026241302094864e-06, "loss": 0.0451, "step": 26036 }, { "epoch": 85.3672131147541, "grad_norm": 2.8164124488830566, "learning_rate": 1.1021394583019306e-06, "loss": 0.138, "step": 26037 }, { "epoch": 85.37049180327868, "grad_norm": 2.033627986907959, "learning_rate": 1.1016548867270037e-06, "loss": 0.1244, "step": 26038 }, { "epoch": 85.37377049180328, "grad_norm": 1.6820120811462402, "learning_rate": 1.1011704154901704e-06, "loss": 0.1432, "step": 26039 }, { "epoch": 85.37704918032787, "grad_norm": 3.3808553218841553, "learning_rate": 1.1006860445968902e-06, "loss": 0.2476, "step": 26040 }, { "epoch": 85.38032786885246, "grad_norm": 1.9090489149093628, "learning_rate": 1.1002017740526305e-06, "loss": 0.0789, "step": 26041 }, { "epoch": 85.38360655737705, "grad_norm": 2.1412179470062256, "learning_rate": 1.0997176038628498e-06, "loss": 0.0824, "step": 26042 }, { "epoch": 85.38688524590164, "grad_norm": 1.7931132316589355, "learning_rate": 1.0992335340330062e-06, "loss": 0.1312, "step": 26043 }, { "epoch": 85.39016393442623, "grad_norm": 2.8623111248016357, "learning_rate": 1.0987495645685575e-06, "loss": 0.1486, "step": 26044 }, { "epoch": 85.39344262295081, "grad_norm": 2.99556303024292, "learning_rate": 1.0982656954749637e-06, "loss": 0.2357, "step": 26045 }, { "epoch": 85.3967213114754, "grad_norm": 2.1849303245544434, "learning_rate": 1.0977819267576807e-06, "loss": 0.0882, "step": 26046 }, { "epoch": 85.4, "grad_norm": 1.3631055355072021, "learning_rate": 1.0972982584221592e-06, "loss": 0.0205, "step": 26047 }, { "epoch": 85.4032786885246, "grad_norm": 1.6355624198913574, "learning_rate": 1.0968146904738596e-06, "loss": 0.0527, "step": 26048 }, { "epoch": 85.40655737704918, "grad_norm": 2.4206788539886475, "learning_rate": 1.096331222918231e-06, "loss": 0.1992, "step": 26049 }, { "epoch": 85.40983606557377, "grad_norm": 2.4178764820098877, "learning_rate": 1.0958478557607222e-06, "loss": 0.113, "step": 26050 }, { "epoch": 85.41311475409836, "grad_norm": 2.1823341846466064, "learning_rate": 1.09536458900679e-06, "loss": 0.2393, "step": 26051 }, { "epoch": 85.41639344262295, "grad_norm": 2.1762189865112305, "learning_rate": 1.094881422661881e-06, "loss": 0.0456, "step": 26052 }, { "epoch": 85.41967213114754, "grad_norm": 4.265509128570557, "learning_rate": 1.0943983567314399e-06, "loss": 0.157, "step": 26053 }, { "epoch": 85.42295081967212, "grad_norm": 2.3027050495147705, "learning_rate": 1.0939153912209187e-06, "loss": 0.1245, "step": 26054 }, { "epoch": 85.42622950819673, "grad_norm": 2.323408365249634, "learning_rate": 1.0934325261357625e-06, "loss": 0.1719, "step": 26055 }, { "epoch": 85.42950819672132, "grad_norm": 3.642169713973999, "learning_rate": 1.0929497614814145e-06, "loss": 0.2057, "step": 26056 }, { "epoch": 85.4327868852459, "grad_norm": 3.692805290222168, "learning_rate": 1.092467097263319e-06, "loss": 0.0716, "step": 26057 }, { "epoch": 85.43606557377049, "grad_norm": 2.1433098316192627, "learning_rate": 1.091984533486916e-06, "loss": 0.0343, "step": 26058 }, { "epoch": 85.43934426229508, "grad_norm": 2.601402759552002, "learning_rate": 1.0915020701576529e-06, "loss": 0.1711, "step": 26059 }, { "epoch": 85.44262295081967, "grad_norm": 2.7359509468078613, "learning_rate": 1.0910197072809647e-06, "loss": 0.1538, "step": 26060 }, { "epoch": 85.44590163934426, "grad_norm": 2.2585229873657227, "learning_rate": 1.090537444862293e-06, "loss": 0.1465, "step": 26061 }, { "epoch": 85.44918032786886, "grad_norm": 2.968790054321289, "learning_rate": 1.0900552829070731e-06, "loss": 0.2353, "step": 26062 }, { "epoch": 85.45245901639345, "grad_norm": 2.5771312713623047, "learning_rate": 1.0895732214207465e-06, "loss": 0.0719, "step": 26063 }, { "epoch": 85.45573770491804, "grad_norm": 1.4920001029968262, "learning_rate": 1.0890912604087456e-06, "loss": 0.1235, "step": 26064 }, { "epoch": 85.45901639344262, "grad_norm": 3.283548355102539, "learning_rate": 1.0886093998765069e-06, "loss": 0.1637, "step": 26065 }, { "epoch": 85.46229508196721, "grad_norm": 2.057729482650757, "learning_rate": 1.0881276398294593e-06, "loss": 0.0494, "step": 26066 }, { "epoch": 85.4655737704918, "grad_norm": 2.02663516998291, "learning_rate": 1.087645980273041e-06, "loss": 0.1364, "step": 26067 }, { "epoch": 85.46885245901639, "grad_norm": 2.043087959289551, "learning_rate": 1.0871644212126808e-06, "loss": 0.0777, "step": 26068 }, { "epoch": 85.47213114754098, "grad_norm": 2.8323912620544434, "learning_rate": 1.086682962653809e-06, "loss": 0.1429, "step": 26069 }, { "epoch": 85.47540983606558, "grad_norm": 1.7734723091125488, "learning_rate": 1.0862016046018541e-06, "loss": 0.0604, "step": 26070 }, { "epoch": 85.47868852459017, "grad_norm": 2.170222282409668, "learning_rate": 1.0857203470622424e-06, "loss": 0.0706, "step": 26071 }, { "epoch": 85.48196721311476, "grad_norm": 2.1198973655700684, "learning_rate": 1.0852391900404046e-06, "loss": 0.0502, "step": 26072 }, { "epoch": 85.48524590163935, "grad_norm": 1.9238231182098389, "learning_rate": 1.0847581335417634e-06, "loss": 0.0414, "step": 26073 }, { "epoch": 85.48852459016393, "grad_norm": 2.133544445037842, "learning_rate": 1.0842771775717443e-06, "loss": 0.0389, "step": 26074 }, { "epoch": 85.49180327868852, "grad_norm": 2.654768943786621, "learning_rate": 1.0837963221357672e-06, "loss": 0.1212, "step": 26075 }, { "epoch": 85.49508196721311, "grad_norm": 2.9091174602508545, "learning_rate": 1.0833155672392592e-06, "loss": 0.0735, "step": 26076 }, { "epoch": 85.4983606557377, "grad_norm": 2.027343988418579, "learning_rate": 1.0828349128876404e-06, "loss": 0.0871, "step": 26077 }, { "epoch": 85.5016393442623, "grad_norm": 2.9160940647125244, "learning_rate": 1.0823543590863283e-06, "loss": 0.2384, "step": 26078 }, { "epoch": 85.50491803278689, "grad_norm": 3.9856181144714355, "learning_rate": 1.0818739058407413e-06, "loss": 0.1248, "step": 26079 }, { "epoch": 85.50819672131148, "grad_norm": 2.75264310836792, "learning_rate": 1.0813935531563002e-06, "loss": 0.1906, "step": 26080 }, { "epoch": 85.51147540983607, "grad_norm": 2.0983543395996094, "learning_rate": 1.08091330103842e-06, "loss": 0.0772, "step": 26081 }, { "epoch": 85.51475409836065, "grad_norm": 2.648104190826416, "learning_rate": 1.0804331494925157e-06, "loss": 0.0456, "step": 26082 }, { "epoch": 85.51803278688524, "grad_norm": 2.511411190032959, "learning_rate": 1.0799530985240025e-06, "loss": 0.1294, "step": 26083 }, { "epoch": 85.52131147540983, "grad_norm": 2.376386880874634, "learning_rate": 1.0794731481382902e-06, "loss": 0.0868, "step": 26084 }, { "epoch": 85.52459016393442, "grad_norm": 1.849066138267517, "learning_rate": 1.0789932983407946e-06, "loss": 0.0704, "step": 26085 }, { "epoch": 85.52786885245902, "grad_norm": 2.3897359371185303, "learning_rate": 1.0785135491369259e-06, "loss": 0.0985, "step": 26086 }, { "epoch": 85.53114754098361, "grad_norm": 2.6278960704803467, "learning_rate": 1.0780339005320917e-06, "loss": 0.1133, "step": 26087 }, { "epoch": 85.5344262295082, "grad_norm": 2.601816415786743, "learning_rate": 1.077554352531701e-06, "loss": 0.1575, "step": 26088 }, { "epoch": 85.53770491803279, "grad_norm": 3.460054636001587, "learning_rate": 1.0770749051411633e-06, "loss": 0.1279, "step": 26089 }, { "epoch": 85.54098360655738, "grad_norm": 2.115812301635742, "learning_rate": 1.076595558365884e-06, "loss": 0.1362, "step": 26090 }, { "epoch": 85.54426229508196, "grad_norm": 3.1013994216918945, "learning_rate": 1.0761163122112672e-06, "loss": 0.1133, "step": 26091 }, { "epoch": 85.54754098360655, "grad_norm": 2.40848445892334, "learning_rate": 1.075637166682717e-06, "loss": 0.1303, "step": 26092 }, { "epoch": 85.55081967213114, "grad_norm": 2.7565524578094482, "learning_rate": 1.0751581217856378e-06, "loss": 0.1253, "step": 26093 }, { "epoch": 85.55409836065574, "grad_norm": 1.1048660278320312, "learning_rate": 1.0746791775254296e-06, "loss": 0.0123, "step": 26094 }, { "epoch": 85.55737704918033, "grad_norm": 2.4589831829071045, "learning_rate": 1.0742003339074946e-06, "loss": 0.0777, "step": 26095 }, { "epoch": 85.56065573770492, "grad_norm": 2.2244625091552734, "learning_rate": 1.073721590937229e-06, "loss": 0.1476, "step": 26096 }, { "epoch": 85.56393442622951, "grad_norm": 2.1839334964752197, "learning_rate": 1.0732429486200346e-06, "loss": 0.1486, "step": 26097 }, { "epoch": 85.5672131147541, "grad_norm": 2.2869739532470703, "learning_rate": 1.0727644069613085e-06, "loss": 0.0731, "step": 26098 }, { "epoch": 85.57049180327868, "grad_norm": 2.24820613861084, "learning_rate": 1.0722859659664442e-06, "loss": 0.0324, "step": 26099 }, { "epoch": 85.57377049180327, "grad_norm": 1.5496309995651245, "learning_rate": 1.0718076256408394e-06, "loss": 0.0341, "step": 26100 }, { "epoch": 85.57704918032788, "grad_norm": 2.05265736579895, "learning_rate": 1.0713293859898865e-06, "loss": 0.1101, "step": 26101 }, { "epoch": 85.58032786885246, "grad_norm": 2.107442855834961, "learning_rate": 1.070851247018977e-06, "loss": 0.0811, "step": 26102 }, { "epoch": 85.58360655737705, "grad_norm": 2.0184943675994873, "learning_rate": 1.070373208733505e-06, "loss": 0.0793, "step": 26103 }, { "epoch": 85.58688524590164, "grad_norm": 2.571545362472534, "learning_rate": 1.0698952711388588e-06, "loss": 0.1945, "step": 26104 }, { "epoch": 85.59016393442623, "grad_norm": 2.12785005569458, "learning_rate": 1.0694174342404295e-06, "loss": 0.0575, "step": 26105 }, { "epoch": 85.59344262295082, "grad_norm": 3.3558919429779053, "learning_rate": 1.0689396980436017e-06, "loss": 0.2315, "step": 26106 }, { "epoch": 85.5967213114754, "grad_norm": 3.2280313968658447, "learning_rate": 1.0684620625537677e-06, "loss": 0.2507, "step": 26107 }, { "epoch": 85.6, "grad_norm": 2.826724052429199, "learning_rate": 1.067984527776309e-06, "loss": 0.0609, "step": 26108 }, { "epoch": 85.6032786885246, "grad_norm": 1.6995294094085693, "learning_rate": 1.0675070937166131e-06, "loss": 0.0367, "step": 26109 }, { "epoch": 85.60655737704919, "grad_norm": 2.4565629959106445, "learning_rate": 1.0670297603800595e-06, "loss": 0.1783, "step": 26110 }, { "epoch": 85.60983606557377, "grad_norm": 2.5492374897003174, "learning_rate": 1.0665525277720345e-06, "loss": 0.0837, "step": 26111 }, { "epoch": 85.61311475409836, "grad_norm": 4.65441370010376, "learning_rate": 1.0660753958979198e-06, "loss": 0.2611, "step": 26112 }, { "epoch": 85.61639344262295, "grad_norm": 1.5880792140960693, "learning_rate": 1.065598364763093e-06, "loss": 0.0329, "step": 26113 }, { "epoch": 85.61967213114754, "grad_norm": 2.5326685905456543, "learning_rate": 1.065121434372932e-06, "loss": 0.1241, "step": 26114 }, { "epoch": 85.62295081967213, "grad_norm": 2.5651371479034424, "learning_rate": 1.0646446047328186e-06, "loss": 0.0635, "step": 26115 }, { "epoch": 85.62622950819672, "grad_norm": 2.588501214981079, "learning_rate": 1.064167875848129e-06, "loss": 0.0733, "step": 26116 }, { "epoch": 85.62950819672132, "grad_norm": 2.027834177017212, "learning_rate": 1.0636912477242367e-06, "loss": 0.0784, "step": 26117 }, { "epoch": 85.6327868852459, "grad_norm": 2.1130921840667725, "learning_rate": 1.0632147203665144e-06, "loss": 0.0466, "step": 26118 }, { "epoch": 85.6360655737705, "grad_norm": 1.9680016040802002, "learning_rate": 1.062738293780341e-06, "loss": 0.1144, "step": 26119 }, { "epoch": 85.63934426229508, "grad_norm": 1.8561768531799316, "learning_rate": 1.0622619679710856e-06, "loss": 0.1208, "step": 26120 }, { "epoch": 85.64262295081967, "grad_norm": 2.754455327987671, "learning_rate": 1.0617857429441191e-06, "loss": 0.1053, "step": 26121 }, { "epoch": 85.64590163934426, "grad_norm": 2.68407940864563, "learning_rate": 1.061309618704811e-06, "loss": 0.0567, "step": 26122 }, { "epoch": 85.64918032786885, "grad_norm": 2.856759548187256, "learning_rate": 1.0608335952585302e-06, "loss": 0.1613, "step": 26123 }, { "epoch": 85.65245901639344, "grad_norm": 2.5305984020233154, "learning_rate": 1.0603576726106468e-06, "loss": 0.0906, "step": 26124 }, { "epoch": 85.65573770491804, "grad_norm": 2.5191712379455566, "learning_rate": 1.0598818507665255e-06, "loss": 0.1307, "step": 26125 }, { "epoch": 85.65901639344263, "grad_norm": 1.6712803840637207, "learning_rate": 1.0594061297315316e-06, "loss": 0.0258, "step": 26126 }, { "epoch": 85.66229508196722, "grad_norm": 2.4700522422790527, "learning_rate": 1.058930509511027e-06, "loss": 0.0732, "step": 26127 }, { "epoch": 85.6655737704918, "grad_norm": 2.1726677417755127, "learning_rate": 1.058454990110379e-06, "loss": 0.1059, "step": 26128 }, { "epoch": 85.66885245901639, "grad_norm": 2.0883729457855225, "learning_rate": 1.0579795715349494e-06, "loss": 0.0699, "step": 26129 }, { "epoch": 85.67213114754098, "grad_norm": 1.9269179105758667, "learning_rate": 1.057504253790096e-06, "loss": 0.1181, "step": 26130 }, { "epoch": 85.67540983606557, "grad_norm": 2.3620262145996094, "learning_rate": 1.0570290368811786e-06, "loss": 0.0863, "step": 26131 }, { "epoch": 85.67868852459016, "grad_norm": 2.59226655960083, "learning_rate": 1.05655392081356e-06, "loss": 0.1286, "step": 26132 }, { "epoch": 85.68196721311476, "grad_norm": 3.0495989322662354, "learning_rate": 1.0560789055925935e-06, "loss": 0.0806, "step": 26133 }, { "epoch": 85.68524590163935, "grad_norm": 3.0112578868865967, "learning_rate": 1.0556039912236371e-06, "loss": 0.1686, "step": 26134 }, { "epoch": 85.68852459016394, "grad_norm": 1.8027504682540894, "learning_rate": 1.0551291777120465e-06, "loss": 0.0705, "step": 26135 }, { "epoch": 85.69180327868852, "grad_norm": 1.9376001358032227, "learning_rate": 1.0546544650631719e-06, "loss": 0.1393, "step": 26136 }, { "epoch": 85.69508196721311, "grad_norm": 2.3624658584594727, "learning_rate": 1.054179853282371e-06, "loss": 0.1132, "step": 26137 }, { "epoch": 85.6983606557377, "grad_norm": 2.5398340225219727, "learning_rate": 1.0537053423749932e-06, "loss": 0.1147, "step": 26138 }, { "epoch": 85.70163934426229, "grad_norm": 2.639113664627075, "learning_rate": 1.0532309323463896e-06, "loss": 0.1597, "step": 26139 }, { "epoch": 85.70491803278688, "grad_norm": 2.012371301651001, "learning_rate": 1.0527566232019083e-06, "loss": 0.0849, "step": 26140 }, { "epoch": 85.70819672131148, "grad_norm": 2.859090566635132, "learning_rate": 1.0522824149469003e-06, "loss": 0.085, "step": 26141 }, { "epoch": 85.71147540983607, "grad_norm": 2.1817188262939453, "learning_rate": 1.0518083075867113e-06, "loss": 0.0689, "step": 26142 }, { "epoch": 85.71475409836066, "grad_norm": 2.4369006156921387, "learning_rate": 1.0513343011266873e-06, "loss": 0.1056, "step": 26143 }, { "epoch": 85.71803278688525, "grad_norm": 2.7105090618133545, "learning_rate": 1.0508603955721718e-06, "loss": 0.1052, "step": 26144 }, { "epoch": 85.72131147540983, "grad_norm": 1.8177655935287476, "learning_rate": 1.0503865909285116e-06, "loss": 0.1355, "step": 26145 }, { "epoch": 85.72459016393442, "grad_norm": 2.0992496013641357, "learning_rate": 1.049912887201049e-06, "loss": 0.0544, "step": 26146 }, { "epoch": 85.72786885245901, "grad_norm": 2.4334914684295654, "learning_rate": 1.049439284395123e-06, "loss": 0.0619, "step": 26147 }, { "epoch": 85.73114754098361, "grad_norm": 2.2209584712982178, "learning_rate": 1.048965782516076e-06, "loss": 0.0808, "step": 26148 }, { "epoch": 85.7344262295082, "grad_norm": 2.2202770709991455, "learning_rate": 1.0484923815692449e-06, "loss": 0.0752, "step": 26149 }, { "epoch": 85.73770491803279, "grad_norm": 1.7436970472335815, "learning_rate": 1.04801908155997e-06, "loss": 0.1788, "step": 26150 }, { "epoch": 85.74098360655738, "grad_norm": 2.2105607986450195, "learning_rate": 1.047545882493589e-06, "loss": 0.1842, "step": 26151 }, { "epoch": 85.74426229508197, "grad_norm": 1.9889931678771973, "learning_rate": 1.0470727843754336e-06, "loss": 0.0618, "step": 26152 }, { "epoch": 85.74754098360656, "grad_norm": 2.283900499343872, "learning_rate": 1.0465997872108447e-06, "loss": 0.0915, "step": 26153 }, { "epoch": 85.75081967213114, "grad_norm": 1.6341346502304077, "learning_rate": 1.0461268910051515e-06, "loss": 0.0794, "step": 26154 }, { "epoch": 85.75409836065573, "grad_norm": 2.2604870796203613, "learning_rate": 1.045654095763684e-06, "loss": 0.1265, "step": 26155 }, { "epoch": 85.75737704918033, "grad_norm": 2.8930346965789795, "learning_rate": 1.0451814014917805e-06, "loss": 0.1207, "step": 26156 }, { "epoch": 85.76065573770492, "grad_norm": 1.5514293909072876, "learning_rate": 1.0447088081947664e-06, "loss": 0.1196, "step": 26157 }, { "epoch": 85.76393442622951, "grad_norm": 1.9324371814727783, "learning_rate": 1.04423631587797e-06, "loss": 0.1206, "step": 26158 }, { "epoch": 85.7672131147541, "grad_norm": 2.622429609298706, "learning_rate": 1.0437639245467234e-06, "loss": 0.1265, "step": 26159 }, { "epoch": 85.77049180327869, "grad_norm": 2.283914566040039, "learning_rate": 1.0432916342063503e-06, "loss": 0.1534, "step": 26160 }, { "epoch": 85.77377049180328, "grad_norm": 1.9964141845703125, "learning_rate": 1.0428194448621764e-06, "loss": 0.0994, "step": 26161 }, { "epoch": 85.77704918032786, "grad_norm": 2.6858513355255127, "learning_rate": 1.042347356519524e-06, "loss": 0.0786, "step": 26162 }, { "epoch": 85.78032786885245, "grad_norm": 1.893237829208374, "learning_rate": 1.0418753691837213e-06, "loss": 0.0453, "step": 26163 }, { "epoch": 85.78360655737706, "grad_norm": 2.309267282485962, "learning_rate": 1.0414034828600883e-06, "loss": 0.0553, "step": 26164 }, { "epoch": 85.78688524590164, "grad_norm": 1.7841960191726685, "learning_rate": 1.040931697553945e-06, "loss": 0.0927, "step": 26165 }, { "epoch": 85.79016393442623, "grad_norm": 2.6680397987365723, "learning_rate": 1.04046001327061e-06, "loss": 0.1151, "step": 26166 }, { "epoch": 85.79344262295082, "grad_norm": 2.8728363513946533, "learning_rate": 1.0399884300154062e-06, "loss": 0.3214, "step": 26167 }, { "epoch": 85.79672131147541, "grad_norm": 2.051318407058716, "learning_rate": 1.0395169477936495e-06, "loss": 0.1416, "step": 26168 }, { "epoch": 85.8, "grad_norm": 2.225930690765381, "learning_rate": 1.0390455666106547e-06, "loss": 0.1843, "step": 26169 }, { "epoch": 85.80327868852459, "grad_norm": 2.3857710361480713, "learning_rate": 1.0385742864717364e-06, "loss": 0.2101, "step": 26170 }, { "epoch": 85.80655737704917, "grad_norm": 6.614795684814453, "learning_rate": 1.0381031073822135e-06, "loss": 0.1513, "step": 26171 }, { "epoch": 85.80983606557378, "grad_norm": 2.301292657852173, "learning_rate": 1.0376320293473952e-06, "loss": 0.0489, "step": 26172 }, { "epoch": 85.81311475409836, "grad_norm": 2.9103972911834717, "learning_rate": 1.0371610523725939e-06, "loss": 0.1033, "step": 26173 }, { "epoch": 85.81639344262295, "grad_norm": 3.497738838195801, "learning_rate": 1.0366901764631221e-06, "loss": 0.0689, "step": 26174 }, { "epoch": 85.81967213114754, "grad_norm": 2.4775798320770264, "learning_rate": 1.0362194016242843e-06, "loss": 0.1006, "step": 26175 }, { "epoch": 85.82295081967213, "grad_norm": 4.390932559967041, "learning_rate": 1.0357487278613964e-06, "loss": 0.0597, "step": 26176 }, { "epoch": 85.82622950819672, "grad_norm": 2.3936758041381836, "learning_rate": 1.0352781551797608e-06, "loss": 0.0482, "step": 26177 }, { "epoch": 85.8295081967213, "grad_norm": 1.9132989645004272, "learning_rate": 1.0348076835846866e-06, "loss": 0.0658, "step": 26178 }, { "epoch": 85.8327868852459, "grad_norm": 2.9446563720703125, "learning_rate": 1.0343373130814737e-06, "loss": 0.0817, "step": 26179 }, { "epoch": 85.8360655737705, "grad_norm": 1.640571117401123, "learning_rate": 1.0338670436754316e-06, "loss": 0.0916, "step": 26180 }, { "epoch": 85.83934426229509, "grad_norm": 2.521440029144287, "learning_rate": 1.0333968753718616e-06, "loss": 0.1748, "step": 26181 }, { "epoch": 85.84262295081967, "grad_norm": 1.6714153289794922, "learning_rate": 1.0329268081760646e-06, "loss": 0.0981, "step": 26182 }, { "epoch": 85.84590163934426, "grad_norm": 2.2479422092437744, "learning_rate": 1.03245684209334e-06, "loss": 0.1168, "step": 26183 }, { "epoch": 85.84918032786885, "grad_norm": 1.9302785396575928, "learning_rate": 1.0319869771289893e-06, "loss": 0.0425, "step": 26184 }, { "epoch": 85.85245901639344, "grad_norm": 2.4836721420288086, "learning_rate": 1.03151721328831e-06, "loss": 0.1037, "step": 26185 }, { "epoch": 85.85573770491803, "grad_norm": 2.1556484699249268, "learning_rate": 1.0310475505765993e-06, "loss": 0.0649, "step": 26186 }, { "epoch": 85.85901639344263, "grad_norm": 2.4001758098602295, "learning_rate": 1.030577988999153e-06, "loss": 0.1021, "step": 26187 }, { "epoch": 85.86229508196722, "grad_norm": 2.2878329753875732, "learning_rate": 1.0301085285612632e-06, "loss": 0.1107, "step": 26188 }, { "epoch": 85.8655737704918, "grad_norm": 2.0604615211486816, "learning_rate": 1.0296391692682284e-06, "loss": 0.0582, "step": 26189 }, { "epoch": 85.8688524590164, "grad_norm": 3.400643825531006, "learning_rate": 1.0291699111253395e-06, "loss": 0.1192, "step": 26190 }, { "epoch": 85.87213114754098, "grad_norm": 4.66178035736084, "learning_rate": 1.0287007541378857e-06, "loss": 0.1458, "step": 26191 }, { "epoch": 85.87540983606557, "grad_norm": 3.3280351161956787, "learning_rate": 1.0282316983111584e-06, "loss": 0.2181, "step": 26192 }, { "epoch": 85.87868852459016, "grad_norm": 1.7889198064804077, "learning_rate": 1.0277627436504478e-06, "loss": 0.0611, "step": 26193 }, { "epoch": 85.88196721311475, "grad_norm": 2.699145555496216, "learning_rate": 1.0272938901610408e-06, "loss": 0.1105, "step": 26194 }, { "epoch": 85.88524590163935, "grad_norm": 3.0576038360595703, "learning_rate": 1.0268251378482252e-06, "loss": 0.1213, "step": 26195 }, { "epoch": 85.88852459016394, "grad_norm": 3.4899613857269287, "learning_rate": 1.0263564867172838e-06, "loss": 0.2548, "step": 26196 }, { "epoch": 85.89180327868853, "grad_norm": 2.03621768951416, "learning_rate": 1.0258879367735053e-06, "loss": 0.0784, "step": 26197 }, { "epoch": 85.89508196721312, "grad_norm": 2.2404720783233643, "learning_rate": 1.0254194880221712e-06, "loss": 0.1481, "step": 26198 }, { "epoch": 85.8983606557377, "grad_norm": 3.1438210010528564, "learning_rate": 1.024951140468563e-06, "loss": 0.0874, "step": 26199 }, { "epoch": 85.90163934426229, "grad_norm": 2.6921463012695312, "learning_rate": 1.024482894117963e-06, "loss": 0.1055, "step": 26200 }, { "epoch": 85.90491803278688, "grad_norm": 2.3611068725585938, "learning_rate": 1.0240147489756481e-06, "loss": 0.0659, "step": 26201 }, { "epoch": 85.90819672131147, "grad_norm": 3.055318593978882, "learning_rate": 1.0235467050469016e-06, "loss": 0.194, "step": 26202 }, { "epoch": 85.91147540983607, "grad_norm": 5.706377983093262, "learning_rate": 1.0230787623369997e-06, "loss": 0.1177, "step": 26203 }, { "epoch": 85.91475409836066, "grad_norm": 3.0464930534362793, "learning_rate": 1.0226109208512158e-06, "loss": 0.2221, "step": 26204 }, { "epoch": 85.91803278688525, "grad_norm": 2.3913018703460693, "learning_rate": 1.02214318059483e-06, "loss": 0.0837, "step": 26205 }, { "epoch": 85.92131147540984, "grad_norm": 2.544447660446167, "learning_rate": 1.021675541573115e-06, "loss": 0.127, "step": 26206 }, { "epoch": 85.92459016393443, "grad_norm": 1.7105553150177002, "learning_rate": 1.021208003791343e-06, "loss": 0.0234, "step": 26207 }, { "epoch": 85.92786885245901, "grad_norm": 2.2314653396606445, "learning_rate": 1.0207405672547842e-06, "loss": 0.115, "step": 26208 }, { "epoch": 85.9311475409836, "grad_norm": 2.4330220222473145, "learning_rate": 1.0202732319687147e-06, "loss": 0.164, "step": 26209 }, { "epoch": 85.93442622950819, "grad_norm": 2.188852310180664, "learning_rate": 1.0198059979384e-06, "loss": 0.2403, "step": 26210 }, { "epoch": 85.9377049180328, "grad_norm": 3.010422468185425, "learning_rate": 1.0193388651691082e-06, "loss": 0.2135, "step": 26211 }, { "epoch": 85.94098360655738, "grad_norm": 2.3598430156707764, "learning_rate": 1.0188718336661096e-06, "loss": 0.0788, "step": 26212 }, { "epoch": 85.94426229508197, "grad_norm": 2.230118989944458, "learning_rate": 1.01840490343467e-06, "loss": 0.1394, "step": 26213 }, { "epoch": 85.94754098360656, "grad_norm": 2.7221226692199707, "learning_rate": 1.0179380744800505e-06, "loss": 0.0563, "step": 26214 }, { "epoch": 85.95081967213115, "grad_norm": 2.613314390182495, "learning_rate": 1.0174713468075214e-06, "loss": 0.0514, "step": 26215 }, { "epoch": 85.95409836065573, "grad_norm": 2.2750494480133057, "learning_rate": 1.017004720422341e-06, "loss": 0.0772, "step": 26216 }, { "epoch": 85.95737704918032, "grad_norm": 2.489198684692383, "learning_rate": 1.0165381953297737e-06, "loss": 0.2473, "step": 26217 }, { "epoch": 85.96065573770491, "grad_norm": 2.0722594261169434, "learning_rate": 1.0160717715350755e-06, "loss": 0.0273, "step": 26218 }, { "epoch": 85.96393442622951, "grad_norm": 2.616968870162964, "learning_rate": 1.015605449043512e-06, "loss": 0.1346, "step": 26219 }, { "epoch": 85.9672131147541, "grad_norm": 1.9318814277648926, "learning_rate": 1.0151392278603378e-06, "loss": 0.0984, "step": 26220 }, { "epoch": 85.97049180327869, "grad_norm": 2.8002138137817383, "learning_rate": 1.0146731079908112e-06, "loss": 0.1014, "step": 26221 }, { "epoch": 85.97377049180328, "grad_norm": 1.6942850351333618, "learning_rate": 1.0142070894401856e-06, "loss": 0.092, "step": 26222 }, { "epoch": 85.97704918032787, "grad_norm": 1.8012363910675049, "learning_rate": 1.0137411722137202e-06, "loss": 0.0406, "step": 26223 }, { "epoch": 85.98032786885246, "grad_norm": 2.1037893295288086, "learning_rate": 1.0132753563166663e-06, "loss": 0.115, "step": 26224 }, { "epoch": 85.98360655737704, "grad_norm": 2.0104501247406006, "learning_rate": 1.0128096417542765e-06, "loss": 0.0584, "step": 26225 }, { "epoch": 85.98688524590163, "grad_norm": 2.20470929145813, "learning_rate": 1.012344028531802e-06, "loss": 0.1496, "step": 26226 }, { "epoch": 85.99016393442623, "grad_norm": 2.1276192665100098, "learning_rate": 1.011878516654492e-06, "loss": 0.042, "step": 26227 }, { "epoch": 85.99344262295082, "grad_norm": 2.716879367828369, "learning_rate": 1.0114131061275988e-06, "loss": 0.1466, "step": 26228 }, { "epoch": 85.99672131147541, "grad_norm": 2.4211130142211914, "learning_rate": 1.0109477969563685e-06, "loss": 0.057, "step": 26229 }, { "epoch": 86.0, "grad_norm": 2.893902063369751, "learning_rate": 1.010482589146048e-06, "loss": 0.1164, "step": 26230 }, { "epoch": 86.00327868852459, "grad_norm": 2.3463661670684814, "learning_rate": 1.0100174827018815e-06, "loss": 0.2022, "step": 26231 }, { "epoch": 86.00655737704918, "grad_norm": 1.9953806400299072, "learning_rate": 1.0095524776291165e-06, "loss": 0.1362, "step": 26232 }, { "epoch": 86.00983606557377, "grad_norm": 2.7869021892547607, "learning_rate": 1.0090875739329953e-06, "loss": 0.1869, "step": 26233 }, { "epoch": 86.01311475409837, "grad_norm": 1.728245496749878, "learning_rate": 1.0086227716187602e-06, "loss": 0.0866, "step": 26234 }, { "epoch": 86.01639344262296, "grad_norm": 3.469648599624634, "learning_rate": 1.0081580706916493e-06, "loss": 0.2662, "step": 26235 }, { "epoch": 86.01967213114754, "grad_norm": 2.4519846439361572, "learning_rate": 1.0076934711569076e-06, "loss": 0.0579, "step": 26236 }, { "epoch": 86.02295081967213, "grad_norm": 4.768594741821289, "learning_rate": 1.0072289730197725e-06, "loss": 0.1967, "step": 26237 }, { "epoch": 86.02622950819672, "grad_norm": 1.9423056840896606, "learning_rate": 1.0067645762854794e-06, "loss": 0.1123, "step": 26238 }, { "epoch": 86.02950819672131, "grad_norm": 3.271893262863159, "learning_rate": 1.0063002809592682e-06, "loss": 0.0547, "step": 26239 }, { "epoch": 86.0327868852459, "grad_norm": 2.0695927143096924, "learning_rate": 1.005836087046369e-06, "loss": 0.0456, "step": 26240 }, { "epoch": 86.03606557377049, "grad_norm": 2.986783742904663, "learning_rate": 1.0053719945520213e-06, "loss": 0.1633, "step": 26241 }, { "epoch": 86.03934426229509, "grad_norm": 2.3556203842163086, "learning_rate": 1.0049080034814574e-06, "loss": 0.1032, "step": 26242 }, { "epoch": 86.04262295081968, "grad_norm": 1.8566807508468628, "learning_rate": 1.0044441138399074e-06, "loss": 0.0554, "step": 26243 }, { "epoch": 86.04590163934427, "grad_norm": 2.003955125808716, "learning_rate": 1.003980325632602e-06, "loss": 0.0987, "step": 26244 }, { "epoch": 86.04918032786885, "grad_norm": 2.165496349334717, "learning_rate": 1.0035166388647732e-06, "loss": 0.0901, "step": 26245 }, { "epoch": 86.05245901639344, "grad_norm": 2.5310001373291016, "learning_rate": 1.0030530535416494e-06, "loss": 0.1641, "step": 26246 }, { "epoch": 86.05573770491803, "grad_norm": 2.349961042404175, "learning_rate": 1.0025895696684563e-06, "loss": 0.1248, "step": 26247 }, { "epoch": 86.05901639344262, "grad_norm": 1.9959419965744019, "learning_rate": 1.0021261872504184e-06, "loss": 0.0834, "step": 26248 }, { "epoch": 86.0622950819672, "grad_norm": 2.092083215713501, "learning_rate": 1.0016629062927653e-06, "loss": 0.0584, "step": 26249 }, { "epoch": 86.06557377049181, "grad_norm": 2.232553243637085, "learning_rate": 1.001199726800719e-06, "loss": 0.0698, "step": 26250 }, { "epoch": 86.0688524590164, "grad_norm": 1.9150227308273315, "learning_rate": 1.0007366487795024e-06, "loss": 0.1127, "step": 26251 }, { "epoch": 86.07213114754099, "grad_norm": 3.0852701663970947, "learning_rate": 1.0002736722343365e-06, "loss": 0.1095, "step": 26252 }, { "epoch": 86.07540983606557, "grad_norm": 2.2016539573669434, "learning_rate": 9.998107971704396e-07, "loss": 0.0644, "step": 26253 }, { "epoch": 86.07868852459016, "grad_norm": 2.6630165576934814, "learning_rate": 9.993480235930376e-07, "loss": 0.3195, "step": 26254 }, { "epoch": 86.08196721311475, "grad_norm": 1.942038893699646, "learning_rate": 9.988853515073437e-07, "loss": 0.0444, "step": 26255 }, { "epoch": 86.08524590163934, "grad_norm": 2.2139906883239746, "learning_rate": 9.984227809185764e-07, "loss": 0.2355, "step": 26256 }, { "epoch": 86.08852459016393, "grad_norm": 1.9713077545166016, "learning_rate": 9.979603118319504e-07, "loss": 0.0725, "step": 26257 }, { "epoch": 86.09180327868853, "grad_norm": 2.3562228679656982, "learning_rate": 9.974979442526821e-07, "loss": 0.1435, "step": 26258 }, { "epoch": 86.09508196721312, "grad_norm": 2.8201284408569336, "learning_rate": 9.970356781859858e-07, "loss": 0.1329, "step": 26259 }, { "epoch": 86.09836065573771, "grad_norm": 3.0466468334198, "learning_rate": 9.965735136370713e-07, "loss": 0.0885, "step": 26260 }, { "epoch": 86.1016393442623, "grad_norm": 1.8826779127120972, "learning_rate": 9.961114506111537e-07, "loss": 0.0851, "step": 26261 }, { "epoch": 86.10491803278688, "grad_norm": 2.0227015018463135, "learning_rate": 9.956494891134405e-07, "loss": 0.0523, "step": 26262 }, { "epoch": 86.10819672131147, "grad_norm": 2.599759101867676, "learning_rate": 9.951876291491391e-07, "loss": 0.1539, "step": 26263 }, { "epoch": 86.11147540983606, "grad_norm": 1.9185463190078735, "learning_rate": 9.947258707234631e-07, "loss": 0.0325, "step": 26264 }, { "epoch": 86.11475409836065, "grad_norm": 2.5408928394317627, "learning_rate": 9.942642138416147e-07, "loss": 0.1726, "step": 26265 }, { "epoch": 86.11803278688525, "grad_norm": 2.329195022583008, "learning_rate": 9.938026585088e-07, "loss": 0.1394, "step": 26266 }, { "epoch": 86.12131147540984, "grad_norm": 2.0122177600860596, "learning_rate": 9.93341204730226e-07, "loss": 0.0452, "step": 26267 }, { "epoch": 86.12459016393443, "grad_norm": 2.5679028034210205, "learning_rate": 9.928798525110938e-07, "loss": 0.1265, "step": 26268 }, { "epoch": 86.12786885245902, "grad_norm": 2.562363624572754, "learning_rate": 9.924186018566072e-07, "loss": 0.136, "step": 26269 }, { "epoch": 86.1311475409836, "grad_norm": 1.7530597448349, "learning_rate": 9.919574527719645e-07, "loss": 0.0763, "step": 26270 }, { "epoch": 86.1344262295082, "grad_norm": 3.0932915210723877, "learning_rate": 9.914964052623688e-07, "loss": 0.111, "step": 26271 }, { "epoch": 86.13770491803278, "grad_norm": 1.779921054840088, "learning_rate": 9.910354593330185e-07, "loss": 0.0504, "step": 26272 }, { "epoch": 86.14098360655737, "grad_norm": 2.2890892028808594, "learning_rate": 9.905746149891104e-07, "loss": 0.1153, "step": 26273 }, { "epoch": 86.14426229508197, "grad_norm": 2.3464949131011963, "learning_rate": 9.901138722358383e-07, "loss": 0.1052, "step": 26274 }, { "epoch": 86.14754098360656, "grad_norm": 1.662733793258667, "learning_rate": 9.896532310784023e-07, "loss": 0.0736, "step": 26275 }, { "epoch": 86.15081967213115, "grad_norm": 2.660010576248169, "learning_rate": 9.89192691521994e-07, "loss": 0.0752, "step": 26276 }, { "epoch": 86.15409836065574, "grad_norm": 2.167295455932617, "learning_rate": 9.88732253571808e-07, "loss": 0.139, "step": 26277 }, { "epoch": 86.15737704918033, "grad_norm": 3.0712990760803223, "learning_rate": 9.882719172330347e-07, "loss": 0.2207, "step": 26278 }, { "epoch": 86.16065573770491, "grad_norm": 1.8049962520599365, "learning_rate": 9.878116825108641e-07, "loss": 0.1376, "step": 26279 }, { "epoch": 86.1639344262295, "grad_norm": 2.3136985301971436, "learning_rate": 9.87351549410488e-07, "loss": 0.1242, "step": 26280 }, { "epoch": 86.1672131147541, "grad_norm": 2.650303363800049, "learning_rate": 9.868915179370953e-07, "loss": 0.0828, "step": 26281 }, { "epoch": 86.1704918032787, "grad_norm": 3.4127392768859863, "learning_rate": 9.864315880958708e-07, "loss": 0.1676, "step": 26282 }, { "epoch": 86.17377049180328, "grad_norm": 1.927368402481079, "learning_rate": 9.859717598920003e-07, "loss": 0.1004, "step": 26283 }, { "epoch": 86.17704918032787, "grad_norm": 1.5910673141479492, "learning_rate": 9.855120333306722e-07, "loss": 0.0745, "step": 26284 }, { "epoch": 86.18032786885246, "grad_norm": 3.412076711654663, "learning_rate": 9.850524084170698e-07, "loss": 0.2215, "step": 26285 }, { "epoch": 86.18360655737705, "grad_norm": 2.8267972469329834, "learning_rate": 9.845928851563735e-07, "loss": 0.1375, "step": 26286 }, { "epoch": 86.18688524590164, "grad_norm": 2.149864912033081, "learning_rate": 9.841334635537647e-07, "loss": 0.0749, "step": 26287 }, { "epoch": 86.19016393442622, "grad_norm": 2.241487503051758, "learning_rate": 9.83674143614427e-07, "loss": 0.044, "step": 26288 }, { "epoch": 86.19344262295083, "grad_norm": 2.3266847133636475, "learning_rate": 9.832149253435375e-07, "loss": 0.0473, "step": 26289 }, { "epoch": 86.19672131147541, "grad_norm": 1.8815836906433105, "learning_rate": 9.827558087462751e-07, "loss": 0.0603, "step": 26290 }, { "epoch": 86.2, "grad_norm": 2.3526084423065186, "learning_rate": 9.822967938278172e-07, "loss": 0.1112, "step": 26291 }, { "epoch": 86.20327868852459, "grad_norm": 3.129833221435547, "learning_rate": 9.81837880593336e-07, "loss": 0.1827, "step": 26292 }, { "epoch": 86.20655737704918, "grad_norm": 2.7730281352996826, "learning_rate": 9.813790690480118e-07, "loss": 0.2708, "step": 26293 }, { "epoch": 86.20983606557377, "grad_norm": 2.045257806777954, "learning_rate": 9.809203591970163e-07, "loss": 0.1114, "step": 26294 }, { "epoch": 86.21311475409836, "grad_norm": 3.0353641510009766, "learning_rate": 9.804617510455194e-07, "loss": 0.1055, "step": 26295 }, { "epoch": 86.21639344262294, "grad_norm": 2.147040605545044, "learning_rate": 9.80003244598694e-07, "loss": 0.0555, "step": 26296 }, { "epoch": 86.21967213114755, "grad_norm": 2.2267868518829346, "learning_rate": 9.795448398617114e-07, "loss": 0.1103, "step": 26297 }, { "epoch": 86.22295081967214, "grad_norm": 2.767540216445923, "learning_rate": 9.790865368397406e-07, "loss": 0.2991, "step": 26298 }, { "epoch": 86.22622950819672, "grad_norm": 2.5558629035949707, "learning_rate": 9.786283355379477e-07, "loss": 0.063, "step": 26299 }, { "epoch": 86.22950819672131, "grad_norm": 2.7921690940856934, "learning_rate": 9.781702359614986e-07, "loss": 0.2456, "step": 26300 }, { "epoch": 86.2327868852459, "grad_norm": 2.9430036544799805, "learning_rate": 9.777122381155623e-07, "loss": 0.1423, "step": 26301 }, { "epoch": 86.23606557377049, "grad_norm": 2.578434705734253, "learning_rate": 9.772543420053015e-07, "loss": 0.068, "step": 26302 }, { "epoch": 86.23934426229508, "grad_norm": 1.5699454545974731, "learning_rate": 9.767965476358786e-07, "loss": 0.0413, "step": 26303 }, { "epoch": 86.24262295081967, "grad_norm": 3.2946465015411377, "learning_rate": 9.763388550124564e-07, "loss": 0.101, "step": 26304 }, { "epoch": 86.24590163934427, "grad_norm": 2.3171186447143555, "learning_rate": 9.758812641401948e-07, "loss": 0.0516, "step": 26305 }, { "epoch": 86.24918032786886, "grad_norm": 2.375901222229004, "learning_rate": 9.754237750242569e-07, "loss": 0.239, "step": 26306 }, { "epoch": 86.25245901639344, "grad_norm": 2.1229448318481445, "learning_rate": 9.74966387669798e-07, "loss": 0.0899, "step": 26307 }, { "epoch": 86.25573770491803, "grad_norm": 3.123760938644409, "learning_rate": 9.745091020819775e-07, "loss": 0.1282, "step": 26308 }, { "epoch": 86.25901639344262, "grad_norm": 2.6388938426971436, "learning_rate": 9.74051918265948e-07, "loss": 0.0562, "step": 26309 }, { "epoch": 86.26229508196721, "grad_norm": 2.1690030097961426, "learning_rate": 9.73594836226871e-07, "loss": 0.0835, "step": 26310 }, { "epoch": 86.2655737704918, "grad_norm": 3.0048930644989014, "learning_rate": 9.731378559698968e-07, "loss": 0.1231, "step": 26311 }, { "epoch": 86.26885245901639, "grad_norm": 2.2302935123443604, "learning_rate": 9.726809775001788e-07, "loss": 0.0374, "step": 26312 }, { "epoch": 86.27213114754099, "grad_norm": 2.231599807739258, "learning_rate": 9.722242008228676e-07, "loss": 0.1467, "step": 26313 }, { "epoch": 86.27540983606558, "grad_norm": 2.4274954795837402, "learning_rate": 9.71767525943116e-07, "loss": 0.0791, "step": 26314 }, { "epoch": 86.27868852459017, "grad_norm": 1.4408953189849854, "learning_rate": 9.713109528660737e-07, "loss": 0.0257, "step": 26315 }, { "epoch": 86.28196721311475, "grad_norm": 2.0753531455993652, "learning_rate": 9.708544815968845e-07, "loss": 0.0498, "step": 26316 }, { "epoch": 86.28524590163934, "grad_norm": 2.0669069290161133, "learning_rate": 9.703981121407013e-07, "loss": 0.103, "step": 26317 }, { "epoch": 86.28852459016393, "grad_norm": 2.362248659133911, "learning_rate": 9.699418445026687e-07, "loss": 0.1294, "step": 26318 }, { "epoch": 86.29180327868852, "grad_norm": 2.2145562171936035, "learning_rate": 9.69485678687928e-07, "loss": 0.2146, "step": 26319 }, { "epoch": 86.29508196721312, "grad_norm": 4.116206645965576, "learning_rate": 9.690296147016277e-07, "loss": 0.2958, "step": 26320 }, { "epoch": 86.29836065573771, "grad_norm": 3.206847667694092, "learning_rate": 9.685736525489088e-07, "loss": 0.1875, "step": 26321 }, { "epoch": 86.3016393442623, "grad_norm": 3.3780534267425537, "learning_rate": 9.681177922349105e-07, "loss": 0.0965, "step": 26322 }, { "epoch": 86.30491803278689, "grad_norm": 2.332127332687378, "learning_rate": 9.676620337647758e-07, "loss": 0.1193, "step": 26323 }, { "epoch": 86.30819672131148, "grad_norm": 2.0088038444519043, "learning_rate": 9.672063771436434e-07, "loss": 0.0481, "step": 26324 }, { "epoch": 86.31147540983606, "grad_norm": 2.448702335357666, "learning_rate": 9.66750822376651e-07, "loss": 0.0624, "step": 26325 }, { "epoch": 86.31475409836065, "grad_norm": 1.551844835281372, "learning_rate": 9.662953694689337e-07, "loss": 0.0174, "step": 26326 }, { "epoch": 86.31803278688524, "grad_norm": 1.4715343713760376, "learning_rate": 9.6584001842563e-07, "loss": 0.0896, "step": 26327 }, { "epoch": 86.32131147540984, "grad_norm": 2.1322484016418457, "learning_rate": 9.653847692518737e-07, "loss": 0.0321, "step": 26328 }, { "epoch": 86.32459016393443, "grad_norm": 2.321552276611328, "learning_rate": 9.649296219527982e-07, "loss": 0.15, "step": 26329 }, { "epoch": 86.32786885245902, "grad_norm": 1.5980513095855713, "learning_rate": 9.64474576533534e-07, "loss": 0.0329, "step": 26330 }, { "epoch": 86.33114754098361, "grad_norm": 2.1203489303588867, "learning_rate": 9.640196329992124e-07, "loss": 0.0572, "step": 26331 }, { "epoch": 86.3344262295082, "grad_norm": 2.1477017402648926, "learning_rate": 9.635647913549672e-07, "loss": 0.1461, "step": 26332 }, { "epoch": 86.33770491803278, "grad_norm": 2.0654726028442383, "learning_rate": 9.631100516059234e-07, "loss": 0.0797, "step": 26333 }, { "epoch": 86.34098360655737, "grad_norm": 2.850412368774414, "learning_rate": 9.626554137572109e-07, "loss": 0.1205, "step": 26334 }, { "epoch": 86.34426229508196, "grad_norm": 2.024641513824463, "learning_rate": 9.622008778139514e-07, "loss": 0.1089, "step": 26335 }, { "epoch": 86.34754098360656, "grad_norm": 2.0627779960632324, "learning_rate": 9.617464437812774e-07, "loss": 0.1533, "step": 26336 }, { "epoch": 86.35081967213115, "grad_norm": 2.2316102981567383, "learning_rate": 9.612921116643092e-07, "loss": 0.1634, "step": 26337 }, { "epoch": 86.35409836065574, "grad_norm": 1.8794535398483276, "learning_rate": 9.608378814681706e-07, "loss": 0.0763, "step": 26338 }, { "epoch": 86.35737704918033, "grad_norm": 2.364799976348877, "learning_rate": 9.60383753197981e-07, "loss": 0.1434, "step": 26339 }, { "epoch": 86.36065573770492, "grad_norm": 2.652357816696167, "learning_rate": 9.599297268588647e-07, "loss": 0.2452, "step": 26340 }, { "epoch": 86.3639344262295, "grad_norm": 1.7610220909118652, "learning_rate": 9.594758024559415e-07, "loss": 0.0783, "step": 26341 }, { "epoch": 86.3672131147541, "grad_norm": 2.6560401916503906, "learning_rate": 9.590219799943278e-07, "loss": 0.0669, "step": 26342 }, { "epoch": 86.37049180327868, "grad_norm": 2.2637929916381836, "learning_rate": 9.585682594791413e-07, "loss": 0.0678, "step": 26343 }, { "epoch": 86.37377049180328, "grad_norm": 1.7989681959152222, "learning_rate": 9.581146409154962e-07, "loss": 0.0454, "step": 26344 }, { "epoch": 86.37704918032787, "grad_norm": 2.5541465282440186, "learning_rate": 9.57661124308512e-07, "loss": 0.0811, "step": 26345 }, { "epoch": 86.38032786885246, "grad_norm": 2.385512590408325, "learning_rate": 9.572077096633003e-07, "loss": 0.1681, "step": 26346 }, { "epoch": 86.38360655737705, "grad_norm": 2.1882565021514893, "learning_rate": 9.567543969849746e-07, "loss": 0.0661, "step": 26347 }, { "epoch": 86.38688524590164, "grad_norm": 2.2360708713531494, "learning_rate": 9.56301186278643e-07, "loss": 0.0613, "step": 26348 }, { "epoch": 86.39016393442623, "grad_norm": 2.609076499938965, "learning_rate": 9.558480775494205e-07, "loss": 0.1543, "step": 26349 }, { "epoch": 86.39344262295081, "grad_norm": 2.57600998878479, "learning_rate": 9.55395070802414e-07, "loss": 0.037, "step": 26350 }, { "epoch": 86.3967213114754, "grad_norm": 2.3247485160827637, "learning_rate": 9.549421660427326e-07, "loss": 0.1104, "step": 26351 }, { "epoch": 86.4, "grad_norm": 2.413520574569702, "learning_rate": 9.544893632754816e-07, "loss": 0.0279, "step": 26352 }, { "epoch": 86.4032786885246, "grad_norm": 2.435987949371338, "learning_rate": 9.540366625057683e-07, "loss": 0.0605, "step": 26353 }, { "epoch": 86.40655737704918, "grad_norm": 2.2134053707122803, "learning_rate": 9.535840637386983e-07, "loss": 0.1519, "step": 26354 }, { "epoch": 86.40983606557377, "grad_norm": 2.7059082984924316, "learning_rate": 9.531315669793739e-07, "loss": 0.1263, "step": 26355 }, { "epoch": 86.41311475409836, "grad_norm": 3.0030112266540527, "learning_rate": 9.526791722328977e-07, "loss": 0.0926, "step": 26356 }, { "epoch": 86.41639344262295, "grad_norm": 2.753793478012085, "learning_rate": 9.522268795043676e-07, "loss": 0.1418, "step": 26357 }, { "epoch": 86.41967213114754, "grad_norm": 2.02433705329895, "learning_rate": 9.517746887988899e-07, "loss": 0.056, "step": 26358 }, { "epoch": 86.42295081967212, "grad_norm": 2.933022975921631, "learning_rate": 9.513226001215592e-07, "loss": 0.2155, "step": 26359 }, { "epoch": 86.42622950819673, "grad_norm": 1.9835292100906372, "learning_rate": 9.508706134774748e-07, "loss": 0.0886, "step": 26360 }, { "epoch": 86.42950819672132, "grad_norm": 2.357046127319336, "learning_rate": 9.504187288717315e-07, "loss": 0.094, "step": 26361 }, { "epoch": 86.4327868852459, "grad_norm": 1.834070086479187, "learning_rate": 9.499669463094285e-07, "loss": 0.0839, "step": 26362 }, { "epoch": 86.43606557377049, "grad_norm": 2.226153612136841, "learning_rate": 9.495152657956574e-07, "loss": 0.0696, "step": 26363 }, { "epoch": 86.43934426229508, "grad_norm": 1.3515692949295044, "learning_rate": 9.490636873355108e-07, "loss": 0.0181, "step": 26364 }, { "epoch": 86.44262295081967, "grad_norm": 3.451805353164673, "learning_rate": 9.486122109340812e-07, "loss": 0.1606, "step": 26365 }, { "epoch": 86.44590163934426, "grad_norm": 2.6306674480438232, "learning_rate": 9.481608365964612e-07, "loss": 0.1208, "step": 26366 }, { "epoch": 86.44918032786886, "grad_norm": 2.2340614795684814, "learning_rate": 9.4770956432774e-07, "loss": 0.0681, "step": 26367 }, { "epoch": 86.45245901639345, "grad_norm": 2.290701389312744, "learning_rate": 9.472583941330027e-07, "loss": 0.0755, "step": 26368 }, { "epoch": 86.45573770491804, "grad_norm": 1.9495292901992798, "learning_rate": 9.468073260173427e-07, "loss": 0.0585, "step": 26369 }, { "epoch": 86.45901639344262, "grad_norm": 2.1196069717407227, "learning_rate": 9.463563599858417e-07, "loss": 0.0793, "step": 26370 }, { "epoch": 86.46229508196721, "grad_norm": 2.453749179840088, "learning_rate": 9.459054960435865e-07, "loss": 0.077, "step": 26371 }, { "epoch": 86.4655737704918, "grad_norm": 2.3112378120422363, "learning_rate": 9.4545473419566e-07, "loss": 0.1114, "step": 26372 }, { "epoch": 86.46885245901639, "grad_norm": 2.557934045791626, "learning_rate": 9.450040744471467e-07, "loss": 0.0943, "step": 26373 }, { "epoch": 86.47213114754098, "grad_norm": 2.081529140472412, "learning_rate": 9.445535168031273e-07, "loss": 0.0802, "step": 26374 }, { "epoch": 86.47540983606558, "grad_norm": 2.8279991149902344, "learning_rate": 9.441030612686797e-07, "loss": 0.203, "step": 26375 }, { "epoch": 86.47868852459017, "grad_norm": 4.256728649139404, "learning_rate": 9.436527078488888e-07, "loss": 0.1376, "step": 26376 }, { "epoch": 86.48196721311476, "grad_norm": 2.1895108222961426, "learning_rate": 9.432024565488295e-07, "loss": 0.0976, "step": 26377 }, { "epoch": 86.48524590163935, "grad_norm": 2.273775577545166, "learning_rate": 9.427523073735767e-07, "loss": 0.0946, "step": 26378 }, { "epoch": 86.48852459016393, "grad_norm": 3.1844184398651123, "learning_rate": 9.423022603282117e-07, "loss": 0.1457, "step": 26379 }, { "epoch": 86.49180327868852, "grad_norm": 2.2758188247680664, "learning_rate": 9.418523154178061e-07, "loss": 0.1257, "step": 26380 }, { "epoch": 86.49508196721311, "grad_norm": 2.599623441696167, "learning_rate": 9.414024726474335e-07, "loss": 0.1582, "step": 26381 }, { "epoch": 86.4983606557377, "grad_norm": 2.4431142807006836, "learning_rate": 9.409527320221668e-07, "loss": 0.0889, "step": 26382 }, { "epoch": 86.5016393442623, "grad_norm": 1.731220006942749, "learning_rate": 9.405030935470749e-07, "loss": 0.1397, "step": 26383 }, { "epoch": 86.50491803278689, "grad_norm": 2.8424501419067383, "learning_rate": 9.400535572272329e-07, "loss": 0.1615, "step": 26384 }, { "epoch": 86.50819672131148, "grad_norm": 2.57997465133667, "learning_rate": 9.396041230677056e-07, "loss": 0.1199, "step": 26385 }, { "epoch": 86.51147540983607, "grad_norm": 2.137173891067505, "learning_rate": 9.391547910735632e-07, "loss": 0.0559, "step": 26386 }, { "epoch": 86.51475409836065, "grad_norm": 2.4067747592926025, "learning_rate": 9.387055612498697e-07, "loss": 0.2304, "step": 26387 }, { "epoch": 86.51803278688524, "grad_norm": 1.5827652215957642, "learning_rate": 9.382564336016942e-07, "loss": 0.1321, "step": 26388 }, { "epoch": 86.52131147540983, "grad_norm": 2.8495125770568848, "learning_rate": 9.378074081340983e-07, "loss": 0.1436, "step": 26389 }, { "epoch": 86.52459016393442, "grad_norm": 2.7469232082366943, "learning_rate": 9.373584848521477e-07, "loss": 0.1053, "step": 26390 }, { "epoch": 86.52786885245902, "grad_norm": 1.8187623023986816, "learning_rate": 9.369096637608998e-07, "loss": 0.1545, "step": 26391 }, { "epoch": 86.53114754098361, "grad_norm": 2.5451138019561768, "learning_rate": 9.364609448654217e-07, "loss": 0.0927, "step": 26392 }, { "epoch": 86.5344262295082, "grad_norm": 5.307992935180664, "learning_rate": 9.360123281707689e-07, "loss": 0.0646, "step": 26393 }, { "epoch": 86.53770491803279, "grad_norm": 2.0700042247772217, "learning_rate": 9.355638136820022e-07, "loss": 0.0561, "step": 26394 }, { "epoch": 86.54098360655738, "grad_norm": 1.99447500705719, "learning_rate": 9.351154014041775e-07, "loss": 0.0373, "step": 26395 }, { "epoch": 86.54426229508196, "grad_norm": 2.3088016510009766, "learning_rate": 9.346670913423506e-07, "loss": 0.0433, "step": 26396 }, { "epoch": 86.54754098360655, "grad_norm": 6.995096206665039, "learning_rate": 9.342188835015798e-07, "loss": 0.1493, "step": 26397 }, { "epoch": 86.55081967213114, "grad_norm": 2.4840962886810303, "learning_rate": 9.337707778869165e-07, "loss": 0.2323, "step": 26398 }, { "epoch": 86.55409836065574, "grad_norm": 2.3921775817871094, "learning_rate": 9.333227745034146e-07, "loss": 0.207, "step": 26399 }, { "epoch": 86.55737704918033, "grad_norm": 1.6678003072738647, "learning_rate": 9.328748733561233e-07, "loss": 0.0547, "step": 26400 }, { "epoch": 86.56065573770492, "grad_norm": 2.1391987800598145, "learning_rate": 9.324270744500973e-07, "loss": 0.237, "step": 26401 }, { "epoch": 86.56393442622951, "grad_norm": 1.971970558166504, "learning_rate": 9.319793777903829e-07, "loss": 0.0544, "step": 26402 }, { "epoch": 86.5672131147541, "grad_norm": 2.7850306034088135, "learning_rate": 9.315317833820303e-07, "loss": 0.0631, "step": 26403 }, { "epoch": 86.57049180327868, "grad_norm": 2.566218137741089, "learning_rate": 9.310842912300844e-07, "loss": 0.085, "step": 26404 }, { "epoch": 86.57377049180327, "grad_norm": 1.6984196901321411, "learning_rate": 9.306369013395933e-07, "loss": 0.0843, "step": 26405 }, { "epoch": 86.57704918032788, "grad_norm": 1.8611903190612793, "learning_rate": 9.301896137156019e-07, "loss": 0.1216, "step": 26406 }, { "epoch": 86.58032786885246, "grad_norm": 2.2122673988342285, "learning_rate": 9.297424283631517e-07, "loss": 0.0408, "step": 26407 }, { "epoch": 86.58360655737705, "grad_norm": 1.4501872062683105, "learning_rate": 9.292953452872877e-07, "loss": 0.1056, "step": 26408 }, { "epoch": 86.58688524590164, "grad_norm": 2.0259830951690674, "learning_rate": 9.288483644930469e-07, "loss": 0.1421, "step": 26409 }, { "epoch": 86.59016393442623, "grad_norm": 1.7520201206207275, "learning_rate": 9.28401485985474e-07, "loss": 0.042, "step": 26410 }, { "epoch": 86.59344262295082, "grad_norm": 2.8230271339416504, "learning_rate": 9.279547097696073e-07, "loss": 0.0926, "step": 26411 }, { "epoch": 86.5967213114754, "grad_norm": 2.0319390296936035, "learning_rate": 9.27508035850484e-07, "loss": 0.0614, "step": 26412 }, { "epoch": 86.6, "grad_norm": 2.979379415512085, "learning_rate": 9.270614642331377e-07, "loss": 0.0849, "step": 26413 }, { "epoch": 86.6032786885246, "grad_norm": 2.471560478210449, "learning_rate": 9.2661499492261e-07, "loss": 0.0504, "step": 26414 }, { "epoch": 86.60655737704919, "grad_norm": 2.0957019329071045, "learning_rate": 9.261686279239313e-07, "loss": 0.1006, "step": 26415 }, { "epoch": 86.60983606557377, "grad_norm": 2.2680723667144775, "learning_rate": 9.257223632421353e-07, "loss": 0.0855, "step": 26416 }, { "epoch": 86.61311475409836, "grad_norm": 2.2553305625915527, "learning_rate": 9.252762008822535e-07, "loss": 0.0684, "step": 26417 }, { "epoch": 86.61639344262295, "grad_norm": 2.2952096462249756, "learning_rate": 9.248301408493199e-07, "loss": 0.0712, "step": 26418 }, { "epoch": 86.61967213114754, "grad_norm": 2.356288194656372, "learning_rate": 9.243841831483613e-07, "loss": 0.1773, "step": 26419 }, { "epoch": 86.62295081967213, "grad_norm": 2.3614883422851562, "learning_rate": 9.239383277844083e-07, "loss": 0.0954, "step": 26420 }, { "epoch": 86.62622950819672, "grad_norm": 2.5429892539978027, "learning_rate": 9.234925747624857e-07, "loss": 0.1477, "step": 26421 }, { "epoch": 86.62950819672132, "grad_norm": 1.897330641746521, "learning_rate": 9.230469240876228e-07, "loss": 0.0628, "step": 26422 }, { "epoch": 86.6327868852459, "grad_norm": 2.214210271835327, "learning_rate": 9.226013757648433e-07, "loss": 0.1835, "step": 26423 }, { "epoch": 86.6360655737705, "grad_norm": 2.4855399131774902, "learning_rate": 9.2215592979917e-07, "loss": 0.2123, "step": 26424 }, { "epoch": 86.63934426229508, "grad_norm": 1.8950718641281128, "learning_rate": 9.217105861956288e-07, "loss": 0.0449, "step": 26425 }, { "epoch": 86.64262295081967, "grad_norm": 3.7179369926452637, "learning_rate": 9.21265344959239e-07, "loss": 0.0365, "step": 26426 }, { "epoch": 86.64590163934426, "grad_norm": 2.244826316833496, "learning_rate": 9.208202060950211e-07, "loss": 0.1768, "step": 26427 }, { "epoch": 86.64918032786885, "grad_norm": 2.0403754711151123, "learning_rate": 9.203751696079976e-07, "loss": 0.0495, "step": 26428 }, { "epoch": 86.65245901639344, "grad_norm": 2.376871109008789, "learning_rate": 9.199302355031836e-07, "loss": 0.1233, "step": 26429 }, { "epoch": 86.65573770491804, "grad_norm": 2.0508809089660645, "learning_rate": 9.194854037855982e-07, "loss": 0.0566, "step": 26430 }, { "epoch": 86.65901639344263, "grad_norm": 2.1475937366485596, "learning_rate": 9.190406744602531e-07, "loss": 0.0556, "step": 26431 }, { "epoch": 86.66229508196722, "grad_norm": 2.7481558322906494, "learning_rate": 9.185960475321687e-07, "loss": 0.0646, "step": 26432 }, { "epoch": 86.6655737704918, "grad_norm": 3.464381694793701, "learning_rate": 9.181515230063564e-07, "loss": 0.0461, "step": 26433 }, { "epoch": 86.66885245901639, "grad_norm": 2.1062002182006836, "learning_rate": 9.17707100887828e-07, "loss": 0.0846, "step": 26434 }, { "epoch": 86.67213114754098, "grad_norm": 4.65173864364624, "learning_rate": 9.172627811815938e-07, "loss": 0.1053, "step": 26435 }, { "epoch": 86.67540983606557, "grad_norm": 2.6351397037506104, "learning_rate": 9.168185638926664e-07, "loss": 0.0908, "step": 26436 }, { "epoch": 86.67868852459016, "grad_norm": 1.655562162399292, "learning_rate": 9.16374449026054e-07, "loss": 0.0361, "step": 26437 }, { "epoch": 86.68196721311476, "grad_norm": 2.8945810794830322, "learning_rate": 9.15930436586765e-07, "loss": 0.0652, "step": 26438 }, { "epoch": 86.68524590163935, "grad_norm": 2.4988577365875244, "learning_rate": 9.154865265798029e-07, "loss": 0.1358, "step": 26439 }, { "epoch": 86.68852459016394, "grad_norm": 2.3695149421691895, "learning_rate": 9.150427190101785e-07, "loss": 0.0579, "step": 26440 }, { "epoch": 86.69180327868852, "grad_norm": 2.5717217922210693, "learning_rate": 9.145990138828931e-07, "loss": 0.117, "step": 26441 }, { "epoch": 86.69508196721311, "grad_norm": 2.086982250213623, "learning_rate": 9.141554112029494e-07, "loss": 0.1354, "step": 26442 }, { "epoch": 86.6983606557377, "grad_norm": 2.7228875160217285, "learning_rate": 9.137119109753512e-07, "loss": 0.1047, "step": 26443 }, { "epoch": 86.70163934426229, "grad_norm": 2.3244845867156982, "learning_rate": 9.132685132050967e-07, "loss": 0.1084, "step": 26444 }, { "epoch": 86.70491803278688, "grad_norm": 1.632956862449646, "learning_rate": 9.128252178971896e-07, "loss": 0.027, "step": 26445 }, { "epoch": 86.70819672131148, "grad_norm": 2.2651307582855225, "learning_rate": 9.123820250566262e-07, "loss": 0.0784, "step": 26446 }, { "epoch": 86.71147540983607, "grad_norm": 2.4403014183044434, "learning_rate": 9.119389346884034e-07, "loss": 0.0638, "step": 26447 }, { "epoch": 86.71475409836066, "grad_norm": 2.0238728523254395, "learning_rate": 9.114959467975171e-07, "loss": 0.0702, "step": 26448 }, { "epoch": 86.71803278688525, "grad_norm": 2.3432023525238037, "learning_rate": 9.110530613889656e-07, "loss": 0.1084, "step": 26449 }, { "epoch": 86.72131147540983, "grad_norm": 3.5668752193450928, "learning_rate": 9.106102784677418e-07, "loss": 0.0992, "step": 26450 }, { "epoch": 86.72459016393442, "grad_norm": 2.6003456115722656, "learning_rate": 9.10167598038837e-07, "loss": 0.1152, "step": 26451 }, { "epoch": 86.72786885245901, "grad_norm": 3.2135183811187744, "learning_rate": 9.097250201072405e-07, "loss": 0.2741, "step": 26452 }, { "epoch": 86.73114754098361, "grad_norm": 2.31337571144104, "learning_rate": 9.092825446779496e-07, "loss": 0.062, "step": 26453 }, { "epoch": 86.7344262295082, "grad_norm": 2.2783424854278564, "learning_rate": 9.088401717559492e-07, "loss": 0.1151, "step": 26454 }, { "epoch": 86.73770491803279, "grad_norm": 2.3824472427368164, "learning_rate": 9.083979013462285e-07, "loss": 0.0748, "step": 26455 }, { "epoch": 86.74098360655738, "grad_norm": 1.9566881656646729, "learning_rate": 9.079557334537736e-07, "loss": 0.0935, "step": 26456 }, { "epoch": 86.74426229508197, "grad_norm": 2.960968255996704, "learning_rate": 9.075136680835705e-07, "loss": 0.1551, "step": 26457 }, { "epoch": 86.74754098360656, "grad_norm": 2.6869421005249023, "learning_rate": 9.070717052406052e-07, "loss": 0.1211, "step": 26458 }, { "epoch": 86.75081967213114, "grad_norm": 2.8763599395751953, "learning_rate": 9.066298449298616e-07, "loss": 0.1169, "step": 26459 }, { "epoch": 86.75409836065573, "grad_norm": 1.7911196947097778, "learning_rate": 9.061880871563211e-07, "loss": 0.1659, "step": 26460 }, { "epoch": 86.75737704918033, "grad_norm": 2.3275671005249023, "learning_rate": 9.057464319249631e-07, "loss": 0.0497, "step": 26461 }, { "epoch": 86.76065573770492, "grad_norm": 2.0879340171813965, "learning_rate": 9.053048792407715e-07, "loss": 0.1406, "step": 26462 }, { "epoch": 86.76393442622951, "grad_norm": 1.6230344772338867, "learning_rate": 9.048634291087244e-07, "loss": 0.1228, "step": 26463 }, { "epoch": 86.7672131147541, "grad_norm": 1.6646589040756226, "learning_rate": 9.044220815337979e-07, "loss": 0.13, "step": 26464 }, { "epoch": 86.77049180327869, "grad_norm": 2.102530002593994, "learning_rate": 9.039808365209668e-07, "loss": 0.1103, "step": 26465 }, { "epoch": 86.77377049180328, "grad_norm": 3.0688960552215576, "learning_rate": 9.035396940752128e-07, "loss": 0.1274, "step": 26466 }, { "epoch": 86.77704918032786, "grad_norm": 1.879393458366394, "learning_rate": 9.030986542015052e-07, "loss": 0.0348, "step": 26467 }, { "epoch": 86.78032786885245, "grad_norm": 2.4524946212768555, "learning_rate": 9.026577169048201e-07, "loss": 0.1283, "step": 26468 }, { "epoch": 86.78360655737706, "grad_norm": 2.1068198680877686, "learning_rate": 9.022168821901267e-07, "loss": 0.1352, "step": 26469 }, { "epoch": 86.78688524590164, "grad_norm": 2.2308156490325928, "learning_rate": 9.017761500623968e-07, "loss": 0.0543, "step": 26470 }, { "epoch": 86.79016393442623, "grad_norm": 2.189429759979248, "learning_rate": 9.013355205266017e-07, "loss": 0.0626, "step": 26471 }, { "epoch": 86.79344262295082, "grad_norm": 1.8966253995895386, "learning_rate": 9.008949935877087e-07, "loss": 0.0446, "step": 26472 }, { "epoch": 86.79672131147541, "grad_norm": 2.3413565158843994, "learning_rate": 9.00454569250685e-07, "loss": 0.1252, "step": 26473 }, { "epoch": 86.8, "grad_norm": 2.528313636779785, "learning_rate": 9.000142475204965e-07, "loss": 0.0986, "step": 26474 }, { "epoch": 86.80327868852459, "grad_norm": 2.813307523727417, "learning_rate": 8.995740284021104e-07, "loss": 0.3578, "step": 26475 }, { "epoch": 86.80655737704917, "grad_norm": 2.3226583003997803, "learning_rate": 8.991339119004882e-07, "loss": 0.2108, "step": 26476 }, { "epoch": 86.80983606557378, "grad_norm": 1.9932365417480469, "learning_rate": 8.986938980205928e-07, "loss": 0.0929, "step": 26477 }, { "epoch": 86.81311475409836, "grad_norm": 2.8681578636169434, "learning_rate": 8.98253986767389e-07, "loss": 0.2328, "step": 26478 }, { "epoch": 86.81639344262295, "grad_norm": 2.06406307220459, "learning_rate": 8.978141781458339e-07, "loss": 0.0588, "step": 26479 }, { "epoch": 86.81967213114754, "grad_norm": 1.7837988138198853, "learning_rate": 8.973744721608857e-07, "loss": 0.0726, "step": 26480 }, { "epoch": 86.82295081967213, "grad_norm": 2.0178072452545166, "learning_rate": 8.969348688175073e-07, "loss": 0.0675, "step": 26481 }, { "epoch": 86.82622950819672, "grad_norm": 3.8523614406585693, "learning_rate": 8.964953681206534e-07, "loss": 0.2519, "step": 26482 }, { "epoch": 86.8295081967213, "grad_norm": 2.4765431880950928, "learning_rate": 8.960559700752769e-07, "loss": 0.1242, "step": 26483 }, { "epoch": 86.8327868852459, "grad_norm": 2.4374115467071533, "learning_rate": 8.956166746863371e-07, "loss": 0.0754, "step": 26484 }, { "epoch": 86.8360655737705, "grad_norm": 2.378384828567505, "learning_rate": 8.951774819587855e-07, "loss": 0.0964, "step": 26485 }, { "epoch": 86.83934426229509, "grad_norm": 2.4144492149353027, "learning_rate": 8.947383918975749e-07, "loss": 0.1519, "step": 26486 }, { "epoch": 86.84262295081967, "grad_norm": 2.194697618484497, "learning_rate": 8.942994045076536e-07, "loss": 0.052, "step": 26487 }, { "epoch": 86.84590163934426, "grad_norm": 2.130178213119507, "learning_rate": 8.938605197939765e-07, "loss": 0.0608, "step": 26488 }, { "epoch": 86.84918032786885, "grad_norm": 1.5826122760772705, "learning_rate": 8.934217377614896e-07, "loss": 0.0419, "step": 26489 }, { "epoch": 86.85245901639344, "grad_norm": 3.2992563247680664, "learning_rate": 8.929830584151411e-07, "loss": 0.1732, "step": 26490 }, { "epoch": 86.85573770491803, "grad_norm": 2.367374897003174, "learning_rate": 8.92544481759876e-07, "loss": 0.1891, "step": 26491 }, { "epoch": 86.85901639344263, "grad_norm": 3.1297056674957275, "learning_rate": 8.921060078006427e-07, "loss": 0.0623, "step": 26492 }, { "epoch": 86.86229508196722, "grad_norm": 2.746016502380371, "learning_rate": 8.916676365423848e-07, "loss": 0.1253, "step": 26493 }, { "epoch": 86.8655737704918, "grad_norm": 2.4534995555877686, "learning_rate": 8.91229367990043e-07, "loss": 0.0671, "step": 26494 }, { "epoch": 86.8688524590164, "grad_norm": 2.850062370300293, "learning_rate": 8.90791202148562e-07, "loss": 0.0987, "step": 26495 }, { "epoch": 86.87213114754098, "grad_norm": 2.3560338020324707, "learning_rate": 8.903531390228792e-07, "loss": 0.1897, "step": 26496 }, { "epoch": 86.87540983606557, "grad_norm": 2.099674940109253, "learning_rate": 8.899151786179383e-07, "loss": 0.066, "step": 26497 }, { "epoch": 86.87868852459016, "grad_norm": 2.2504963874816895, "learning_rate": 8.894773209386764e-07, "loss": 0.1122, "step": 26498 }, { "epoch": 86.88196721311475, "grad_norm": 1.53219473361969, "learning_rate": 8.890395659900297e-07, "loss": 0.0319, "step": 26499 }, { "epoch": 86.88524590163935, "grad_norm": 2.7019615173339844, "learning_rate": 8.88601913776933e-07, "loss": 0.1607, "step": 26500 }, { "epoch": 86.88852459016394, "grad_norm": 2.164008617401123, "learning_rate": 8.881643643043258e-07, "loss": 0.0983, "step": 26501 }, { "epoch": 86.89180327868853, "grad_norm": 2.303875684738159, "learning_rate": 8.877269175771386e-07, "loss": 0.0929, "step": 26502 }, { "epoch": 86.89508196721312, "grad_norm": 2.0822596549987793, "learning_rate": 8.872895736003051e-07, "loss": 0.1048, "step": 26503 }, { "epoch": 86.8983606557377, "grad_norm": 2.238399028778076, "learning_rate": 8.868523323787548e-07, "loss": 0.0555, "step": 26504 }, { "epoch": 86.90163934426229, "grad_norm": 2.452265501022339, "learning_rate": 8.864151939174204e-07, "loss": 0.1425, "step": 26505 }, { "epoch": 86.90491803278688, "grad_norm": 2.695446729660034, "learning_rate": 8.859781582212323e-07, "loss": 0.1697, "step": 26506 }, { "epoch": 86.90819672131147, "grad_norm": 2.0965306758880615, "learning_rate": 8.855412252951157e-07, "loss": 0.1554, "step": 26507 }, { "epoch": 86.91147540983607, "grad_norm": 2.407646656036377, "learning_rate": 8.851043951439975e-07, "loss": 0.0905, "step": 26508 }, { "epoch": 86.91475409836066, "grad_norm": 2.4062633514404297, "learning_rate": 8.846676677728039e-07, "loss": 0.0842, "step": 26509 }, { "epoch": 86.91803278688525, "grad_norm": 1.8517653942108154, "learning_rate": 8.84231043186461e-07, "loss": 0.1367, "step": 26510 }, { "epoch": 86.92131147540984, "grad_norm": 2.6062674522399902, "learning_rate": 8.837945213898924e-07, "loss": 0.1041, "step": 26511 }, { "epoch": 86.92459016393443, "grad_norm": 2.368014097213745, "learning_rate": 8.833581023880178e-07, "loss": 0.1042, "step": 26512 }, { "epoch": 86.92786885245901, "grad_norm": 2.2210378646850586, "learning_rate": 8.829217861857575e-07, "loss": 0.3286, "step": 26513 }, { "epoch": 86.9311475409836, "grad_norm": 2.9189236164093018, "learning_rate": 8.824855727880366e-07, "loss": 0.0945, "step": 26514 }, { "epoch": 86.93442622950819, "grad_norm": 1.8944252729415894, "learning_rate": 8.820494621997699e-07, "loss": 0.0852, "step": 26515 }, { "epoch": 86.9377049180328, "grad_norm": 2.197530746459961, "learning_rate": 8.816134544258748e-07, "loss": 0.1187, "step": 26516 }, { "epoch": 86.94098360655738, "grad_norm": 1.9719302654266357, "learning_rate": 8.811775494712682e-07, "loss": 0.0395, "step": 26517 }, { "epoch": 86.94426229508197, "grad_norm": 2.5273098945617676, "learning_rate": 8.807417473408675e-07, "loss": 0.1228, "step": 26518 }, { "epoch": 86.94754098360656, "grad_norm": 2.403413772583008, "learning_rate": 8.803060480395853e-07, "loss": 0.2656, "step": 26519 }, { "epoch": 86.95081967213115, "grad_norm": 1.9778246879577637, "learning_rate": 8.798704515723344e-07, "loss": 0.1233, "step": 26520 }, { "epoch": 86.95409836065573, "grad_norm": 1.985470175743103, "learning_rate": 8.794349579440264e-07, "loss": 0.0386, "step": 26521 }, { "epoch": 86.95737704918032, "grad_norm": 1.9078530073165894, "learning_rate": 8.789995671595708e-07, "loss": 0.2031, "step": 26522 }, { "epoch": 86.96065573770491, "grad_norm": 1.9226397275924683, "learning_rate": 8.785642792238814e-07, "loss": 0.0834, "step": 26523 }, { "epoch": 86.96393442622951, "grad_norm": 2.620330333709717, "learning_rate": 8.781290941418619e-07, "loss": 0.0945, "step": 26524 }, { "epoch": 86.9672131147541, "grad_norm": 1.8523261547088623, "learning_rate": 8.776940119184219e-07, "loss": 0.0855, "step": 26525 }, { "epoch": 86.97049180327869, "grad_norm": 2.1248509883880615, "learning_rate": 8.772590325584651e-07, "loss": 0.0474, "step": 26526 }, { "epoch": 86.97377049180328, "grad_norm": 2.7507541179656982, "learning_rate": 8.768241560669e-07, "loss": 0.2664, "step": 26527 }, { "epoch": 86.97704918032787, "grad_norm": 2.228637933731079, "learning_rate": 8.76389382448628e-07, "loss": 0.063, "step": 26528 }, { "epoch": 86.98032786885246, "grad_norm": 1.8137507438659668, "learning_rate": 8.759547117085498e-07, "loss": 0.1078, "step": 26529 }, { "epoch": 86.98360655737704, "grad_norm": 2.6229822635650635, "learning_rate": 8.755201438515703e-07, "loss": 0.0887, "step": 26530 }, { "epoch": 86.98688524590163, "grad_norm": 2.0828938484191895, "learning_rate": 8.75085678882589e-07, "loss": 0.0994, "step": 26531 }, { "epoch": 86.99016393442623, "grad_norm": 1.5645486116409302, "learning_rate": 8.746513168065019e-07, "loss": 0.0305, "step": 26532 }, { "epoch": 86.99344262295082, "grad_norm": 1.8771543502807617, "learning_rate": 8.742170576282116e-07, "loss": 0.0818, "step": 26533 }, { "epoch": 86.99672131147541, "grad_norm": 3.7022268772125244, "learning_rate": 8.737829013526122e-07, "loss": 0.1024, "step": 26534 }, { "epoch": 87.0, "grad_norm": 2.378970146179199, "learning_rate": 8.733488479845997e-07, "loss": 0.1017, "step": 26535 }, { "epoch": 87.00327868852459, "grad_norm": 2.352015972137451, "learning_rate": 8.729148975290658e-07, "loss": 0.0555, "step": 26536 }, { "epoch": 87.00655737704918, "grad_norm": 3.025385618209839, "learning_rate": 8.724810499909087e-07, "loss": 0.1629, "step": 26537 }, { "epoch": 87.00983606557377, "grad_norm": 2.4219725131988525, "learning_rate": 8.720473053750178e-07, "loss": 0.1129, "step": 26538 }, { "epoch": 87.01311475409837, "grad_norm": 2.272952079772949, "learning_rate": 8.716136636862815e-07, "loss": 0.0517, "step": 26539 }, { "epoch": 87.01639344262296, "grad_norm": 1.789900541305542, "learning_rate": 8.711801249295959e-07, "loss": 0.0398, "step": 26540 }, { "epoch": 87.01967213114754, "grad_norm": 1.858224630355835, "learning_rate": 8.707466891098449e-07, "loss": 0.1349, "step": 26541 }, { "epoch": 87.02295081967213, "grad_norm": 2.5048019886016846, "learning_rate": 8.703133562319166e-07, "loss": 0.1898, "step": 26542 }, { "epoch": 87.02622950819672, "grad_norm": 2.3530519008636475, "learning_rate": 8.698801263006962e-07, "loss": 0.1227, "step": 26543 }, { "epoch": 87.02950819672131, "grad_norm": 2.8343849182128906, "learning_rate": 8.69446999321073e-07, "loss": 0.1997, "step": 26544 }, { "epoch": 87.0327868852459, "grad_norm": 2.027541160583496, "learning_rate": 8.690139752979277e-07, "loss": 0.1261, "step": 26545 }, { "epoch": 87.03606557377049, "grad_norm": 2.1645803451538086, "learning_rate": 8.685810542361429e-07, "loss": 0.0509, "step": 26546 }, { "epoch": 87.03934426229509, "grad_norm": 1.6742870807647705, "learning_rate": 8.681482361406024e-07, "loss": 0.057, "step": 26547 }, { "epoch": 87.04262295081968, "grad_norm": 2.6639599800109863, "learning_rate": 8.677155210161825e-07, "loss": 0.0493, "step": 26548 }, { "epoch": 87.04590163934427, "grad_norm": 3.118274688720703, "learning_rate": 8.67282908867767e-07, "loss": 0.1784, "step": 26549 }, { "epoch": 87.04918032786885, "grad_norm": 2.526252269744873, "learning_rate": 8.668503997002331e-07, "loss": 0.1422, "step": 26550 }, { "epoch": 87.05245901639344, "grad_norm": 2.6348137855529785, "learning_rate": 8.664179935184569e-07, "loss": 0.0802, "step": 26551 }, { "epoch": 87.05573770491803, "grad_norm": 2.203157424926758, "learning_rate": 8.659856903273123e-07, "loss": 0.0705, "step": 26552 }, { "epoch": 87.05901639344262, "grad_norm": 1.9772168397903442, "learning_rate": 8.655534901316776e-07, "loss": 0.1433, "step": 26553 }, { "epoch": 87.0622950819672, "grad_norm": 2.747424840927124, "learning_rate": 8.651213929364244e-07, "loss": 0.0651, "step": 26554 }, { "epoch": 87.06557377049181, "grad_norm": 2.537050247192383, "learning_rate": 8.646893987464266e-07, "loss": 0.1441, "step": 26555 }, { "epoch": 87.0688524590164, "grad_norm": 4.346243381500244, "learning_rate": 8.642575075665516e-07, "loss": 0.1767, "step": 26556 }, { "epoch": 87.07213114754099, "grad_norm": 2.3032338619232178, "learning_rate": 8.638257194016741e-07, "loss": 0.0763, "step": 26557 }, { "epoch": 87.07540983606557, "grad_norm": 3.7407586574554443, "learning_rate": 8.633940342566604e-07, "loss": 0.203, "step": 26558 }, { "epoch": 87.07868852459016, "grad_norm": 1.8020875453948975, "learning_rate": 8.629624521363789e-07, "loss": 0.0898, "step": 26559 }, { "epoch": 87.08196721311475, "grad_norm": 2.4014618396759033, "learning_rate": 8.625309730456966e-07, "loss": 0.1255, "step": 26560 }, { "epoch": 87.08524590163934, "grad_norm": 2.8301990032196045, "learning_rate": 8.620995969894752e-07, "loss": 0.1236, "step": 26561 }, { "epoch": 87.08852459016393, "grad_norm": 2.1623237133026123, "learning_rate": 8.616683239725842e-07, "loss": 0.077, "step": 26562 }, { "epoch": 87.09180327868853, "grad_norm": 1.5361344814300537, "learning_rate": 8.612371539998843e-07, "loss": 0.0857, "step": 26563 }, { "epoch": 87.09508196721312, "grad_norm": 2.182004451751709, "learning_rate": 8.60806087076238e-07, "loss": 0.1415, "step": 26564 }, { "epoch": 87.09836065573771, "grad_norm": 2.596893548965454, "learning_rate": 8.603751232065027e-07, "loss": 0.079, "step": 26565 }, { "epoch": 87.1016393442623, "grad_norm": 3.4786605834960938, "learning_rate": 8.599442623955423e-07, "loss": 0.1545, "step": 26566 }, { "epoch": 87.10491803278688, "grad_norm": 2.075392723083496, "learning_rate": 8.59513504648215e-07, "loss": 0.1116, "step": 26567 }, { "epoch": 87.10819672131147, "grad_norm": 2.466456174850464, "learning_rate": 8.59082849969376e-07, "loss": 0.0822, "step": 26568 }, { "epoch": 87.11147540983606, "grad_norm": 4.034155368804932, "learning_rate": 8.586522983638801e-07, "loss": 0.2513, "step": 26569 }, { "epoch": 87.11475409836065, "grad_norm": 2.269508123397827, "learning_rate": 8.582218498365857e-07, "loss": 0.1199, "step": 26570 }, { "epoch": 87.11803278688525, "grad_norm": 2.528071403503418, "learning_rate": 8.577915043923457e-07, "loss": 0.2103, "step": 26571 }, { "epoch": 87.12131147540984, "grad_norm": 2.0305655002593994, "learning_rate": 8.573612620360106e-07, "loss": 0.1109, "step": 26572 }, { "epoch": 87.12459016393443, "grad_norm": 2.2746429443359375, "learning_rate": 8.569311227724342e-07, "loss": 0.0661, "step": 26573 }, { "epoch": 87.12786885245902, "grad_norm": 2.0108373165130615, "learning_rate": 8.565010866064649e-07, "loss": 0.057, "step": 26574 }, { "epoch": 87.1311475409836, "grad_norm": 2.3199093341827393, "learning_rate": 8.560711535429533e-07, "loss": 0.132, "step": 26575 }, { "epoch": 87.1344262295082, "grad_norm": 2.212594509124756, "learning_rate": 8.556413235867467e-07, "loss": 0.1459, "step": 26576 }, { "epoch": 87.13770491803278, "grad_norm": 2.5199127197265625, "learning_rate": 8.552115967426922e-07, "loss": 0.28, "step": 26577 }, { "epoch": 87.14098360655737, "grad_norm": 1.7034568786621094, "learning_rate": 8.547819730156337e-07, "loss": 0.1039, "step": 26578 }, { "epoch": 87.14426229508197, "grad_norm": 2.4031972885131836, "learning_rate": 8.543524524104185e-07, "loss": 0.0524, "step": 26579 }, { "epoch": 87.14754098360656, "grad_norm": 1.9246584177017212, "learning_rate": 8.539230349318883e-07, "loss": 0.0859, "step": 26580 }, { "epoch": 87.15081967213115, "grad_norm": 2.1244966983795166, "learning_rate": 8.534937205848859e-07, "loss": 0.0811, "step": 26581 }, { "epoch": 87.15409836065574, "grad_norm": 2.2277166843414307, "learning_rate": 8.530645093742506e-07, "loss": 0.1048, "step": 26582 }, { "epoch": 87.15737704918033, "grad_norm": 3.328570604324341, "learning_rate": 8.526354013048244e-07, "loss": 0.1199, "step": 26583 }, { "epoch": 87.16065573770491, "grad_norm": 2.3460185527801514, "learning_rate": 8.522063963814442e-07, "loss": 0.0871, "step": 26584 }, { "epoch": 87.1639344262295, "grad_norm": 2.1313135623931885, "learning_rate": 8.517774946089475e-07, "loss": 0.1038, "step": 26585 }, { "epoch": 87.1672131147541, "grad_norm": 3.688537120819092, "learning_rate": 8.513486959921724e-07, "loss": 0.1856, "step": 26586 }, { "epoch": 87.1704918032787, "grad_norm": 2.204563617706299, "learning_rate": 8.50920000535953e-07, "loss": 0.0836, "step": 26587 }, { "epoch": 87.17377049180328, "grad_norm": 3.1523900032043457, "learning_rate": 8.504914082451221e-07, "loss": 0.2105, "step": 26588 }, { "epoch": 87.17704918032787, "grad_norm": 2.2612802982330322, "learning_rate": 8.500629191245157e-07, "loss": 0.0718, "step": 26589 }, { "epoch": 87.18032786885246, "grad_norm": 1.9965921640396118, "learning_rate": 8.496345331789624e-07, "loss": 0.0528, "step": 26590 }, { "epoch": 87.18360655737705, "grad_norm": 2.268812894821167, "learning_rate": 8.492062504132925e-07, "loss": 0.0885, "step": 26591 }, { "epoch": 87.18688524590164, "grad_norm": 2.6835412979125977, "learning_rate": 8.487780708323379e-07, "loss": 0.126, "step": 26592 }, { "epoch": 87.19016393442622, "grad_norm": 2.6751554012298584, "learning_rate": 8.483499944409257e-07, "loss": 0.1928, "step": 26593 }, { "epoch": 87.19344262295083, "grad_norm": 3.111602544784546, "learning_rate": 8.479220212438832e-07, "loss": 0.0941, "step": 26594 }, { "epoch": 87.19672131147541, "grad_norm": 1.6767933368682861, "learning_rate": 8.474941512460333e-07, "loss": 0.0376, "step": 26595 }, { "epoch": 87.2, "grad_norm": 2.5177481174468994, "learning_rate": 8.470663844522053e-07, "loss": 0.1668, "step": 26596 }, { "epoch": 87.20327868852459, "grad_norm": 3.0956363677978516, "learning_rate": 8.4663872086722e-07, "loss": 0.1193, "step": 26597 }, { "epoch": 87.20655737704918, "grad_norm": 2.225433588027954, "learning_rate": 8.462111604959e-07, "loss": 0.0373, "step": 26598 }, { "epoch": 87.20983606557377, "grad_norm": 2.9774792194366455, "learning_rate": 8.457837033430672e-07, "loss": 0.1198, "step": 26599 }, { "epoch": 87.21311475409836, "grad_norm": 2.435947895050049, "learning_rate": 8.453563494135397e-07, "loss": 0.0599, "step": 26600 }, { "epoch": 87.21639344262294, "grad_norm": 2.895638942718506, "learning_rate": 8.449290987121395e-07, "loss": 0.1539, "step": 26601 }, { "epoch": 87.21967213114755, "grad_norm": 1.9731521606445312, "learning_rate": 8.445019512436814e-07, "loss": 0.0356, "step": 26602 }, { "epoch": 87.22295081967214, "grad_norm": 2.2609941959381104, "learning_rate": 8.440749070129839e-07, "loss": 0.1674, "step": 26603 }, { "epoch": 87.22622950819672, "grad_norm": 2.000863790512085, "learning_rate": 8.436479660248608e-07, "loss": 0.0549, "step": 26604 }, { "epoch": 87.22950819672131, "grad_norm": 2.614328622817993, "learning_rate": 8.432211282841274e-07, "loss": 0.22, "step": 26605 }, { "epoch": 87.2327868852459, "grad_norm": 2.2165255546569824, "learning_rate": 8.427943937955974e-07, "loss": 0.0509, "step": 26606 }, { "epoch": 87.23606557377049, "grad_norm": 2.7475292682647705, "learning_rate": 8.423677625640814e-07, "loss": 0.1793, "step": 26607 }, { "epoch": 87.23934426229508, "grad_norm": 2.8996474742889404, "learning_rate": 8.41941234594389e-07, "loss": 0.1359, "step": 26608 }, { "epoch": 87.24262295081967, "grad_norm": 3.830845355987549, "learning_rate": 8.415148098913318e-07, "loss": 0.1957, "step": 26609 }, { "epoch": 87.24590163934427, "grad_norm": 2.308729648590088, "learning_rate": 8.410884884597182e-07, "loss": 0.1402, "step": 26610 }, { "epoch": 87.24918032786886, "grad_norm": 2.508089542388916, "learning_rate": 8.406622703043554e-07, "loss": 0.0898, "step": 26611 }, { "epoch": 87.25245901639344, "grad_norm": 2.1597399711608887, "learning_rate": 8.402361554300475e-07, "loss": 0.0601, "step": 26612 }, { "epoch": 87.25573770491803, "grad_norm": 2.7777464389801025, "learning_rate": 8.398101438416007e-07, "loss": 0.1348, "step": 26613 }, { "epoch": 87.25901639344262, "grad_norm": 3.1637744903564453, "learning_rate": 8.393842355438186e-07, "loss": 0.0796, "step": 26614 }, { "epoch": 87.26229508196721, "grad_norm": 2.7666714191436768, "learning_rate": 8.389584305415055e-07, "loss": 0.1112, "step": 26615 }, { "epoch": 87.2655737704918, "grad_norm": 2.3009533882141113, "learning_rate": 8.385327288394607e-07, "loss": 0.0771, "step": 26616 }, { "epoch": 87.26885245901639, "grad_norm": 2.3330535888671875, "learning_rate": 8.381071304424826e-07, "loss": 0.1405, "step": 26617 }, { "epoch": 87.27213114754099, "grad_norm": 2.4888150691986084, "learning_rate": 8.376816353553751e-07, "loss": 0.0508, "step": 26618 }, { "epoch": 87.27540983606558, "grad_norm": 2.437282085418701, "learning_rate": 8.372562435829335e-07, "loss": 0.0551, "step": 26619 }, { "epoch": 87.27868852459017, "grad_norm": 2.7695956230163574, "learning_rate": 8.368309551299536e-07, "loss": 0.1289, "step": 26620 }, { "epoch": 87.28196721311475, "grad_norm": 2.4558894634246826, "learning_rate": 8.364057700012318e-07, "loss": 0.2336, "step": 26621 }, { "epoch": 87.28524590163934, "grad_norm": 2.0912282466888428, "learning_rate": 8.359806882015631e-07, "loss": 0.1002, "step": 26622 }, { "epoch": 87.28852459016393, "grad_norm": 2.0548393726348877, "learning_rate": 8.355557097357414e-07, "loss": 0.1053, "step": 26623 }, { "epoch": 87.29180327868852, "grad_norm": 1.5369337797164917, "learning_rate": 8.351308346085562e-07, "loss": 0.0364, "step": 26624 }, { "epoch": 87.29508196721312, "grad_norm": 2.9042510986328125, "learning_rate": 8.347060628248016e-07, "loss": 0.1516, "step": 26625 }, { "epoch": 87.29836065573771, "grad_norm": 1.7412407398223877, "learning_rate": 8.342813943892625e-07, "loss": 0.0178, "step": 26626 }, { "epoch": 87.3016393442623, "grad_norm": 2.244046926498413, "learning_rate": 8.33856829306734e-07, "loss": 0.1163, "step": 26627 }, { "epoch": 87.30491803278689, "grad_norm": 1.9318569898605347, "learning_rate": 8.334323675819989e-07, "loss": 0.1292, "step": 26628 }, { "epoch": 87.30819672131148, "grad_norm": 2.4497432708740234, "learning_rate": 8.330080092198445e-07, "loss": 0.0756, "step": 26629 }, { "epoch": 87.31147540983606, "grad_norm": 2.4800543785095215, "learning_rate": 8.325837542250548e-07, "loss": 0.055, "step": 26630 }, { "epoch": 87.31475409836065, "grad_norm": 3.120274782180786, "learning_rate": 8.32159602602417e-07, "loss": 0.236, "step": 26631 }, { "epoch": 87.31803278688524, "grad_norm": 2.677196979522705, "learning_rate": 8.317355543567119e-07, "loss": 0.0855, "step": 26632 }, { "epoch": 87.32131147540984, "grad_norm": 2.889082431793213, "learning_rate": 8.313116094927209e-07, "loss": 0.2427, "step": 26633 }, { "epoch": 87.32459016393443, "grad_norm": 2.2486438751220703, "learning_rate": 8.308877680152227e-07, "loss": 0.0759, "step": 26634 }, { "epoch": 87.32786885245902, "grad_norm": 3.0039546489715576, "learning_rate": 8.304640299290001e-07, "loss": 0.1021, "step": 26635 }, { "epoch": 87.33114754098361, "grad_norm": 1.8075734376907349, "learning_rate": 8.300403952388292e-07, "loss": 0.048, "step": 26636 }, { "epoch": 87.3344262295082, "grad_norm": 2.529270648956299, "learning_rate": 8.296168639494872e-07, "loss": 0.3448, "step": 26637 }, { "epoch": 87.33770491803278, "grad_norm": 2.993190288543701, "learning_rate": 8.291934360657494e-07, "loss": 0.1938, "step": 26638 }, { "epoch": 87.34098360655737, "grad_norm": 2.0768990516662598, "learning_rate": 8.287701115923907e-07, "loss": 0.0478, "step": 26639 }, { "epoch": 87.34426229508196, "grad_norm": 2.3613924980163574, "learning_rate": 8.283468905341862e-07, "loss": 0.1838, "step": 26640 }, { "epoch": 87.34754098360656, "grad_norm": 2.0443501472473145, "learning_rate": 8.279237728959044e-07, "loss": 0.038, "step": 26641 }, { "epoch": 87.35081967213115, "grad_norm": 1.5235166549682617, "learning_rate": 8.275007586823203e-07, "loss": 0.0329, "step": 26642 }, { "epoch": 87.35409836065574, "grad_norm": 2.255042791366577, "learning_rate": 8.270778478982022e-07, "loss": 0.1001, "step": 26643 }, { "epoch": 87.35737704918033, "grad_norm": 2.5494208335876465, "learning_rate": 8.266550405483164e-07, "loss": 0.1252, "step": 26644 }, { "epoch": 87.36065573770492, "grad_norm": 3.311944007873535, "learning_rate": 8.262323366374358e-07, "loss": 0.2336, "step": 26645 }, { "epoch": 87.3639344262295, "grad_norm": 5.9022016525268555, "learning_rate": 8.258097361703232e-07, "loss": 0.2429, "step": 26646 }, { "epoch": 87.3672131147541, "grad_norm": 2.732556104660034, "learning_rate": 8.253872391517426e-07, "loss": 0.0734, "step": 26647 }, { "epoch": 87.37049180327868, "grad_norm": 2.4902427196502686, "learning_rate": 8.249648455864623e-07, "loss": 0.0839, "step": 26648 }, { "epoch": 87.37377049180328, "grad_norm": 2.0589897632598877, "learning_rate": 8.24542555479243e-07, "loss": 0.1167, "step": 26649 }, { "epoch": 87.37704918032787, "grad_norm": 2.3147239685058594, "learning_rate": 8.241203688348464e-07, "loss": 0.1712, "step": 26650 }, { "epoch": 87.38032786885246, "grad_norm": 1.928398847579956, "learning_rate": 8.236982856580333e-07, "loss": 0.0817, "step": 26651 }, { "epoch": 87.38360655737705, "grad_norm": 2.492936372756958, "learning_rate": 8.232763059535609e-07, "loss": 0.201, "step": 26652 }, { "epoch": 87.38688524590164, "grad_norm": 3.152433395385742, "learning_rate": 8.22854429726192e-07, "loss": 0.1389, "step": 26653 }, { "epoch": 87.39016393442623, "grad_norm": 2.4029064178466797, "learning_rate": 8.224326569806806e-07, "loss": 0.163, "step": 26654 }, { "epoch": 87.39344262295081, "grad_norm": 3.8650221824645996, "learning_rate": 8.220109877217842e-07, "loss": 0.2088, "step": 26655 }, { "epoch": 87.3967213114754, "grad_norm": 1.8902734518051147, "learning_rate": 8.215894219542541e-07, "loss": 0.0666, "step": 26656 }, { "epoch": 87.4, "grad_norm": 2.6558823585510254, "learning_rate": 8.211679596828481e-07, "loss": 0.1181, "step": 26657 }, { "epoch": 87.4032786885246, "grad_norm": 3.0801212787628174, "learning_rate": 8.207466009123166e-07, "loss": 0.1251, "step": 26658 }, { "epoch": 87.40655737704918, "grad_norm": 2.7682383060455322, "learning_rate": 8.203253456474114e-07, "loss": 0.0869, "step": 26659 }, { "epoch": 87.40983606557377, "grad_norm": 2.7968122959136963, "learning_rate": 8.199041938928809e-07, "loss": 0.0557, "step": 26660 }, { "epoch": 87.41311475409836, "grad_norm": 1.645125150680542, "learning_rate": 8.19483145653478e-07, "loss": 0.1009, "step": 26661 }, { "epoch": 87.41639344262295, "grad_norm": 2.117611885070801, "learning_rate": 8.190622009339466e-07, "loss": 0.0316, "step": 26662 }, { "epoch": 87.41967213114754, "grad_norm": 1.8440167903900146, "learning_rate": 8.186413597390353e-07, "loss": 0.065, "step": 26663 }, { "epoch": 87.42295081967212, "grad_norm": 2.260610818862915, "learning_rate": 8.182206220734889e-07, "loss": 0.081, "step": 26664 }, { "epoch": 87.42622950819673, "grad_norm": 2.54697847366333, "learning_rate": 8.177999879420507e-07, "loss": 0.1256, "step": 26665 }, { "epoch": 87.42950819672132, "grad_norm": 3.44191575050354, "learning_rate": 8.173794573494654e-07, "loss": 0.1032, "step": 26666 }, { "epoch": 87.4327868852459, "grad_norm": 2.699392318725586, "learning_rate": 8.169590303004749e-07, "loss": 0.073, "step": 26667 }, { "epoch": 87.43606557377049, "grad_norm": 1.9389245510101318, "learning_rate": 8.165387067998187e-07, "loss": 0.0736, "step": 26668 }, { "epoch": 87.43934426229508, "grad_norm": 2.020352363586426, "learning_rate": 8.161184868522354e-07, "loss": 0.07, "step": 26669 }, { "epoch": 87.44262295081967, "grad_norm": 1.8132576942443848, "learning_rate": 8.156983704624665e-07, "loss": 0.1544, "step": 26670 }, { "epoch": 87.44590163934426, "grad_norm": 2.5940017700195312, "learning_rate": 8.152783576352486e-07, "loss": 0.1053, "step": 26671 }, { "epoch": 87.44918032786886, "grad_norm": 2.4488108158111572, "learning_rate": 8.148584483753163e-07, "loss": 0.0793, "step": 26672 }, { "epoch": 87.45245901639345, "grad_norm": 2.181830644607544, "learning_rate": 8.144386426874029e-07, "loss": 0.1453, "step": 26673 }, { "epoch": 87.45573770491804, "grad_norm": 2.6672263145446777, "learning_rate": 8.140189405762478e-07, "loss": 0.1119, "step": 26674 }, { "epoch": 87.45901639344262, "grad_norm": 1.9416601657867432, "learning_rate": 8.135993420465782e-07, "loss": 0.1838, "step": 26675 }, { "epoch": 87.46229508196721, "grad_norm": 2.840923547744751, "learning_rate": 8.131798471031294e-07, "loss": 0.0916, "step": 26676 }, { "epoch": 87.4655737704918, "grad_norm": 1.7957133054733276, "learning_rate": 8.127604557506285e-07, "loss": 0.103, "step": 26677 }, { "epoch": 87.46885245901639, "grad_norm": 1.889933466911316, "learning_rate": 8.123411679938043e-07, "loss": 0.1324, "step": 26678 }, { "epoch": 87.47213114754098, "grad_norm": 2.650590658187866, "learning_rate": 8.119219838373871e-07, "loss": 0.0696, "step": 26679 }, { "epoch": 87.47540983606558, "grad_norm": 2.163159132003784, "learning_rate": 8.115029032861044e-07, "loss": 0.1352, "step": 26680 }, { "epoch": 87.47868852459017, "grad_norm": 1.9227924346923828, "learning_rate": 8.110839263446791e-07, "loss": 0.1127, "step": 26681 }, { "epoch": 87.48196721311476, "grad_norm": 1.539474606513977, "learning_rate": 8.10665053017835e-07, "loss": 0.0325, "step": 26682 }, { "epoch": 87.48524590163935, "grad_norm": 1.7380468845367432, "learning_rate": 8.102462833102986e-07, "loss": 0.0543, "step": 26683 }, { "epoch": 87.48852459016393, "grad_norm": 3.0398809909820557, "learning_rate": 8.098276172267905e-07, "loss": 0.0938, "step": 26684 }, { "epoch": 87.49180327868852, "grad_norm": 2.4305310249328613, "learning_rate": 8.094090547720312e-07, "loss": 0.1449, "step": 26685 }, { "epoch": 87.49508196721311, "grad_norm": 2.5856056213378906, "learning_rate": 8.089905959507394e-07, "loss": 0.2154, "step": 26686 }, { "epoch": 87.4983606557377, "grad_norm": 2.320366144180298, "learning_rate": 8.085722407676355e-07, "loss": 0.0571, "step": 26687 }, { "epoch": 87.5016393442623, "grad_norm": 1.833586573600769, "learning_rate": 8.08153989227437e-07, "loss": 0.0327, "step": 26688 }, { "epoch": 87.50491803278689, "grad_norm": 1.4185417890548706, "learning_rate": 8.077358413348602e-07, "loss": 0.0253, "step": 26689 }, { "epoch": 87.50819672131148, "grad_norm": 2.2987821102142334, "learning_rate": 8.073177970946167e-07, "loss": 0.144, "step": 26690 }, { "epoch": 87.51147540983607, "grad_norm": 1.9512988328933716, "learning_rate": 8.06899856511425e-07, "loss": 0.0653, "step": 26691 }, { "epoch": 87.51475409836065, "grad_norm": 1.7277615070343018, "learning_rate": 8.064820195899958e-07, "loss": 0.0414, "step": 26692 }, { "epoch": 87.51803278688524, "grad_norm": 2.0330612659454346, "learning_rate": 8.060642863350387e-07, "loss": 0.0692, "step": 26693 }, { "epoch": 87.52131147540983, "grad_norm": 2.582932233810425, "learning_rate": 8.056466567512677e-07, "loss": 0.1, "step": 26694 }, { "epoch": 87.52459016393442, "grad_norm": 3.0383503437042236, "learning_rate": 8.052291308433901e-07, "loss": 0.1057, "step": 26695 }, { "epoch": 87.52786885245902, "grad_norm": 2.8490419387817383, "learning_rate": 8.048117086161134e-07, "loss": 0.2063, "step": 26696 }, { "epoch": 87.53114754098361, "grad_norm": 1.6239091157913208, "learning_rate": 8.043943900741469e-07, "loss": 0.041, "step": 26697 }, { "epoch": 87.5344262295082, "grad_norm": 1.698691487312317, "learning_rate": 8.039771752221948e-07, "loss": 0.0374, "step": 26698 }, { "epoch": 87.53770491803279, "grad_norm": 2.3313989639282227, "learning_rate": 8.03560064064961e-07, "loss": 0.1508, "step": 26699 }, { "epoch": 87.54098360655738, "grad_norm": 3.5324084758758545, "learning_rate": 8.031430566071474e-07, "loss": 0.1125, "step": 26700 }, { "epoch": 87.54426229508196, "grad_norm": 1.844304084777832, "learning_rate": 8.027261528534602e-07, "loss": 0.0352, "step": 26701 }, { "epoch": 87.54754098360655, "grad_norm": 3.291672706604004, "learning_rate": 8.02309352808599e-07, "loss": 0.0959, "step": 26702 }, { "epoch": 87.55081967213114, "grad_norm": 2.8020405769348145, "learning_rate": 8.018926564772622e-07, "loss": 0.0811, "step": 26703 }, { "epoch": 87.55409836065574, "grad_norm": 2.5250470638275146, "learning_rate": 8.014760638641484e-07, "loss": 0.1022, "step": 26704 }, { "epoch": 87.55737704918033, "grad_norm": 2.5926454067230225, "learning_rate": 8.01059574973957e-07, "loss": 0.2286, "step": 26705 }, { "epoch": 87.56065573770492, "grad_norm": 2.848867654800415, "learning_rate": 8.006431898113843e-07, "loss": 0.1076, "step": 26706 }, { "epoch": 87.56393442622951, "grad_norm": 2.724719524383545, "learning_rate": 8.002269083811232e-07, "loss": 0.1673, "step": 26707 }, { "epoch": 87.5672131147541, "grad_norm": 2.527552604675293, "learning_rate": 7.998107306878688e-07, "loss": 0.1418, "step": 26708 }, { "epoch": 87.57049180327868, "grad_norm": 2.1419599056243896, "learning_rate": 7.993946567363154e-07, "loss": 0.1451, "step": 26709 }, { "epoch": 87.57377049180327, "grad_norm": 3.2859439849853516, "learning_rate": 7.989786865311533e-07, "loss": 0.1625, "step": 26710 }, { "epoch": 87.57704918032788, "grad_norm": 2.1462440490722656, "learning_rate": 7.985628200770724e-07, "loss": 0.2265, "step": 26711 }, { "epoch": 87.58032786885246, "grad_norm": 2.420569658279419, "learning_rate": 7.98147057378762e-07, "loss": 0.0774, "step": 26712 }, { "epoch": 87.58360655737705, "grad_norm": 1.8072068691253662, "learning_rate": 7.977313984409129e-07, "loss": 0.0524, "step": 26713 }, { "epoch": 87.58688524590164, "grad_norm": 2.0464658737182617, "learning_rate": 7.973158432682104e-07, "loss": 0.0937, "step": 26714 }, { "epoch": 87.59016393442623, "grad_norm": 2.3198413848876953, "learning_rate": 7.969003918653395e-07, "loss": 0.0591, "step": 26715 }, { "epoch": 87.59344262295082, "grad_norm": 1.2556438446044922, "learning_rate": 7.964850442369854e-07, "loss": 0.0194, "step": 26716 }, { "epoch": 87.5967213114754, "grad_norm": 1.6471859216690063, "learning_rate": 7.960698003878309e-07, "loss": 0.0239, "step": 26717 }, { "epoch": 87.6, "grad_norm": 2.6038882732391357, "learning_rate": 7.956546603225601e-07, "loss": 0.3097, "step": 26718 }, { "epoch": 87.6032786885246, "grad_norm": 2.5548057556152344, "learning_rate": 7.952396240458538e-07, "loss": 0.0708, "step": 26719 }, { "epoch": 87.60655737704919, "grad_norm": 1.5967369079589844, "learning_rate": 7.948246915623903e-07, "loss": 0.1057, "step": 26720 }, { "epoch": 87.60983606557377, "grad_norm": 1.6212060451507568, "learning_rate": 7.944098628768481e-07, "loss": 0.0429, "step": 26721 }, { "epoch": 87.61311475409836, "grad_norm": 2.2831294536590576, "learning_rate": 7.939951379939081e-07, "loss": 0.0469, "step": 26722 }, { "epoch": 87.61639344262295, "grad_norm": 2.7597391605377197, "learning_rate": 7.935805169182442e-07, "loss": 0.0611, "step": 26723 }, { "epoch": 87.61967213114754, "grad_norm": 1.9174809455871582, "learning_rate": 7.931659996545326e-07, "loss": 0.1041, "step": 26724 }, { "epoch": 87.62295081967213, "grad_norm": 2.440422534942627, "learning_rate": 7.927515862074453e-07, "loss": 0.1953, "step": 26725 }, { "epoch": 87.62622950819672, "grad_norm": 1.8762571811676025, "learning_rate": 7.923372765816584e-07, "loss": 0.0553, "step": 26726 }, { "epoch": 87.62950819672132, "grad_norm": 2.551863670349121, "learning_rate": 7.919230707818426e-07, "loss": 0.1844, "step": 26727 }, { "epoch": 87.6327868852459, "grad_norm": 2.57681941986084, "learning_rate": 7.915089688126687e-07, "loss": 0.1426, "step": 26728 }, { "epoch": 87.6360655737705, "grad_norm": 2.1942903995513916, "learning_rate": 7.910949706788051e-07, "loss": 0.0892, "step": 26729 }, { "epoch": 87.63934426229508, "grad_norm": 2.6062393188476562, "learning_rate": 7.906810763849182e-07, "loss": 0.1608, "step": 26730 }, { "epoch": 87.64262295081967, "grad_norm": 2.8974547386169434, "learning_rate": 7.902672859356808e-07, "loss": 0.1042, "step": 26731 }, { "epoch": 87.64590163934426, "grad_norm": 2.003267526626587, "learning_rate": 7.898535993357537e-07, "loss": 0.0958, "step": 26732 }, { "epoch": 87.64918032786885, "grad_norm": 6.221687316894531, "learning_rate": 7.894400165898042e-07, "loss": 0.1619, "step": 26733 }, { "epoch": 87.65245901639344, "grad_norm": 2.626417636871338, "learning_rate": 7.890265377024942e-07, "loss": 0.0962, "step": 26734 }, { "epoch": 87.65573770491804, "grad_norm": 3.267169952392578, "learning_rate": 7.886131626784876e-07, "loss": 0.0748, "step": 26735 }, { "epoch": 87.65901639344263, "grad_norm": 2.4252750873565674, "learning_rate": 7.881998915224453e-07, "loss": 0.1571, "step": 26736 }, { "epoch": 87.66229508196722, "grad_norm": 2.809468984603882, "learning_rate": 7.877867242390269e-07, "loss": 0.1945, "step": 26737 }, { "epoch": 87.6655737704918, "grad_norm": 2.6698591709136963, "learning_rate": 7.873736608328896e-07, "loss": 0.0848, "step": 26738 }, { "epoch": 87.66885245901639, "grad_norm": 2.764922618865967, "learning_rate": 7.869607013086955e-07, "loss": 0.1332, "step": 26739 }, { "epoch": 87.67213114754098, "grad_norm": 2.6296417713165283, "learning_rate": 7.865478456710984e-07, "loss": 0.1552, "step": 26740 }, { "epoch": 87.67540983606557, "grad_norm": 2.2225887775421143, "learning_rate": 7.861350939247536e-07, "loss": 0.0607, "step": 26741 }, { "epoch": 87.67868852459016, "grad_norm": 2.5250203609466553, "learning_rate": 7.857224460743163e-07, "loss": 0.0527, "step": 26742 }, { "epoch": 87.68196721311476, "grad_norm": 1.5338239669799805, "learning_rate": 7.85309902124436e-07, "loss": 0.0268, "step": 26743 }, { "epoch": 87.68524590163935, "grad_norm": 1.743371605873108, "learning_rate": 7.848974620797701e-07, "loss": 0.1526, "step": 26744 }, { "epoch": 87.68852459016394, "grad_norm": 2.633909225463867, "learning_rate": 7.844851259449659e-07, "loss": 0.087, "step": 26745 }, { "epoch": 87.69180327868852, "grad_norm": 2.6248135566711426, "learning_rate": 7.840728937246733e-07, "loss": 0.0568, "step": 26746 }, { "epoch": 87.69508196721311, "grad_norm": 2.380202054977417, "learning_rate": 7.836607654235418e-07, "loss": 0.0789, "step": 26747 }, { "epoch": 87.6983606557377, "grad_norm": 2.191377878189087, "learning_rate": 7.832487410462175e-07, "loss": 0.0725, "step": 26748 }, { "epoch": 87.70163934426229, "grad_norm": 2.4854302406311035, "learning_rate": 7.828368205973447e-07, "loss": 0.2177, "step": 26749 }, { "epoch": 87.70491803278688, "grad_norm": 3.6520206928253174, "learning_rate": 7.824250040815729e-07, "loss": 0.1397, "step": 26750 }, { "epoch": 87.70819672131148, "grad_norm": 2.111696481704712, "learning_rate": 7.820132915035428e-07, "loss": 0.0661, "step": 26751 }, { "epoch": 87.71147540983607, "grad_norm": 2.360391855239868, "learning_rate": 7.816016828678952e-07, "loss": 0.0671, "step": 26752 }, { "epoch": 87.71475409836066, "grad_norm": 2.2198853492736816, "learning_rate": 7.811901781792741e-07, "loss": 0.0445, "step": 26753 }, { "epoch": 87.71803278688525, "grad_norm": 5.30035400390625, "learning_rate": 7.807787774423204e-07, "loss": 0.1086, "step": 26754 }, { "epoch": 87.72131147540983, "grad_norm": 2.1215035915374756, "learning_rate": 7.803674806616712e-07, "loss": 0.05, "step": 26755 }, { "epoch": 87.72459016393442, "grad_norm": 4.082373142242432, "learning_rate": 7.79956287841962e-07, "loss": 0.1002, "step": 26756 }, { "epoch": 87.72786885245901, "grad_norm": 2.771094560623169, "learning_rate": 7.795451989878355e-07, "loss": 0.1288, "step": 26757 }, { "epoch": 87.73114754098361, "grad_norm": 3.6138548851013184, "learning_rate": 7.791342141039227e-07, "loss": 0.178, "step": 26758 }, { "epoch": 87.7344262295082, "grad_norm": 2.334712266921997, "learning_rate": 7.787233331948584e-07, "loss": 0.0721, "step": 26759 }, { "epoch": 87.73770491803279, "grad_norm": 2.787168502807617, "learning_rate": 7.78312556265276e-07, "loss": 0.102, "step": 26760 }, { "epoch": 87.74098360655738, "grad_norm": 2.6281521320343018, "learning_rate": 7.779018833198082e-07, "loss": 0.1253, "step": 26761 }, { "epoch": 87.74426229508197, "grad_norm": 2.6982972621917725, "learning_rate": 7.774913143630858e-07, "loss": 0.0568, "step": 26762 }, { "epoch": 87.74754098360656, "grad_norm": 2.586251974105835, "learning_rate": 7.770808493997372e-07, "loss": 0.1344, "step": 26763 }, { "epoch": 87.75081967213114, "grad_norm": 2.2256646156311035, "learning_rate": 7.7667048843439e-07, "loss": 0.0679, "step": 26764 }, { "epoch": 87.75409836065573, "grad_norm": 2.6868250370025635, "learning_rate": 7.76260231471675e-07, "loss": 0.1979, "step": 26765 }, { "epoch": 87.75737704918033, "grad_norm": 6.792315483093262, "learning_rate": 7.75850078516216e-07, "loss": 0.2633, "step": 26766 }, { "epoch": 87.76065573770492, "grad_norm": 2.1247854232788086, "learning_rate": 7.754400295726383e-07, "loss": 0.1305, "step": 26767 }, { "epoch": 87.76393442622951, "grad_norm": 2.700885057449341, "learning_rate": 7.75030084645565e-07, "loss": 0.0644, "step": 26768 }, { "epoch": 87.7672131147541, "grad_norm": 2.253878355026245, "learning_rate": 7.746202437396178e-07, "loss": 0.0398, "step": 26769 }, { "epoch": 87.77049180327869, "grad_norm": 2.1875410079956055, "learning_rate": 7.742105068594208e-07, "loss": 0.1206, "step": 26770 }, { "epoch": 87.77377049180328, "grad_norm": 2.639535427093506, "learning_rate": 7.738008740095925e-07, "loss": 0.1207, "step": 26771 }, { "epoch": 87.77704918032786, "grad_norm": 1.8840887546539307, "learning_rate": 7.733913451947528e-07, "loss": 0.0548, "step": 26772 }, { "epoch": 87.78032786885245, "grad_norm": 2.0763907432556152, "learning_rate": 7.729819204195166e-07, "loss": 0.1322, "step": 26773 }, { "epoch": 87.78360655737706, "grad_norm": 1.339045524597168, "learning_rate": 7.725725996885047e-07, "loss": 0.017, "step": 26774 }, { "epoch": 87.78688524590164, "grad_norm": 3.1367435455322266, "learning_rate": 7.721633830063313e-07, "loss": 0.0568, "step": 26775 }, { "epoch": 87.79016393442623, "grad_norm": 1.9516373872756958, "learning_rate": 7.717542703776105e-07, "loss": 0.0411, "step": 26776 }, { "epoch": 87.79344262295082, "grad_norm": 3.6828293800354004, "learning_rate": 7.713452618069528e-07, "loss": 0.2385, "step": 26777 }, { "epoch": 87.79672131147541, "grad_norm": 2.4411120414733887, "learning_rate": 7.709363572989747e-07, "loss": 0.061, "step": 26778 }, { "epoch": 87.8, "grad_norm": 2.039456605911255, "learning_rate": 7.705275568582848e-07, "loss": 0.0995, "step": 26779 }, { "epoch": 87.80327868852459, "grad_norm": 2.5017945766448975, "learning_rate": 7.701188604894927e-07, "loss": 0.1184, "step": 26780 }, { "epoch": 87.80655737704917, "grad_norm": 1.889934778213501, "learning_rate": 7.69710268197208e-07, "loss": 0.0639, "step": 26781 }, { "epoch": 87.80983606557378, "grad_norm": 2.751685857772827, "learning_rate": 7.693017799860347e-07, "loss": 0.2232, "step": 26782 }, { "epoch": 87.81311475409836, "grad_norm": 3.47037672996521, "learning_rate": 7.688933958605837e-07, "loss": 0.2041, "step": 26783 }, { "epoch": 87.81639344262295, "grad_norm": 2.788236618041992, "learning_rate": 7.684851158254569e-07, "loss": 0.2977, "step": 26784 }, { "epoch": 87.81967213114754, "grad_norm": 2.3780086040496826, "learning_rate": 7.680769398852594e-07, "loss": 0.0767, "step": 26785 }, { "epoch": 87.82295081967213, "grad_norm": 3.0143442153930664, "learning_rate": 7.67668868044591e-07, "loss": 0.1644, "step": 26786 }, { "epoch": 87.82622950819672, "grad_norm": 2.07975172996521, "learning_rate": 7.672609003080578e-07, "loss": 0.1184, "step": 26787 }, { "epoch": 87.8295081967213, "grad_norm": 2.6294641494750977, "learning_rate": 7.668530366802562e-07, "loss": 0.2562, "step": 26788 }, { "epoch": 87.8327868852459, "grad_norm": 2.939401865005493, "learning_rate": 7.664452771657882e-07, "loss": 0.2432, "step": 26789 }, { "epoch": 87.8360655737705, "grad_norm": 3.520500659942627, "learning_rate": 7.660376217692477e-07, "loss": 0.095, "step": 26790 }, { "epoch": 87.83934426229509, "grad_norm": 8.182442665100098, "learning_rate": 7.656300704952358e-07, "loss": 0.137, "step": 26791 }, { "epoch": 87.84262295081967, "grad_norm": 1.5476473569869995, "learning_rate": 7.652226233483462e-07, "loss": 0.0248, "step": 26792 }, { "epoch": 87.84590163934426, "grad_norm": 3.4478604793548584, "learning_rate": 7.648152803331732e-07, "loss": 0.0507, "step": 26793 }, { "epoch": 87.84918032786885, "grad_norm": 2.248197317123413, "learning_rate": 7.644080414543098e-07, "loss": 0.1521, "step": 26794 }, { "epoch": 87.85245901639344, "grad_norm": 2.7855849266052246, "learning_rate": 7.640009067163468e-07, "loss": 0.2252, "step": 26795 }, { "epoch": 87.85573770491803, "grad_norm": 1.5263392925262451, "learning_rate": 7.635938761238781e-07, "loss": 0.0192, "step": 26796 }, { "epoch": 87.85901639344263, "grad_norm": 2.4626998901367188, "learning_rate": 7.631869496814926e-07, "loss": 0.0547, "step": 26797 }, { "epoch": 87.86229508196722, "grad_norm": 2.4377293586730957, "learning_rate": 7.627801273937762e-07, "loss": 0.138, "step": 26798 }, { "epoch": 87.8655737704918, "grad_norm": 1.8715856075286865, "learning_rate": 7.623734092653201e-07, "loss": 0.0777, "step": 26799 }, { "epoch": 87.8688524590164, "grad_norm": 2.4589927196502686, "learning_rate": 7.619667953007081e-07, "loss": 0.0897, "step": 26800 }, { "epoch": 87.87213114754098, "grad_norm": 1.7986129522323608, "learning_rate": 7.615602855045256e-07, "loss": 0.1246, "step": 26801 }, { "epoch": 87.87540983606557, "grad_norm": 2.4191994667053223, "learning_rate": 7.611538798813545e-07, "loss": 0.1519, "step": 26802 }, { "epoch": 87.87868852459016, "grad_norm": 2.117945432662964, "learning_rate": 7.60747578435782e-07, "loss": 0.0708, "step": 26803 }, { "epoch": 87.88196721311475, "grad_norm": 2.2696027755737305, "learning_rate": 7.603413811723858e-07, "loss": 0.2086, "step": 26804 }, { "epoch": 87.88524590163935, "grad_norm": 2.409900426864624, "learning_rate": 7.599352880957467e-07, "loss": 0.0689, "step": 26805 }, { "epoch": 87.88852459016394, "grad_norm": 2.229020357131958, "learning_rate": 7.595292992104453e-07, "loss": 0.0656, "step": 26806 }, { "epoch": 87.89180327868853, "grad_norm": 1.79445481300354, "learning_rate": 7.591234145210591e-07, "loss": 0.1078, "step": 26807 }, { "epoch": 87.89508196721312, "grad_norm": 2.4544761180877686, "learning_rate": 7.587176340321634e-07, "loss": 0.1218, "step": 26808 }, { "epoch": 87.8983606557377, "grad_norm": 3.449094772338867, "learning_rate": 7.583119577483356e-07, "loss": 0.1406, "step": 26809 }, { "epoch": 87.90163934426229, "grad_norm": 1.9777922630310059, "learning_rate": 7.579063856741498e-07, "loss": 0.0849, "step": 26810 }, { "epoch": 87.90491803278688, "grad_norm": 1.8771251440048218, "learning_rate": 7.57500917814179e-07, "loss": 0.1116, "step": 26811 }, { "epoch": 87.90819672131147, "grad_norm": 2.262660264968872, "learning_rate": 7.570955541729941e-07, "loss": 0.1252, "step": 26812 }, { "epoch": 87.91147540983607, "grad_norm": 2.1336984634399414, "learning_rate": 7.566902947551679e-07, "loss": 0.061, "step": 26813 }, { "epoch": 87.91475409836066, "grad_norm": 2.204042911529541, "learning_rate": 7.562851395652693e-07, "loss": 0.1606, "step": 26814 }, { "epoch": 87.91803278688525, "grad_norm": 2.404541015625, "learning_rate": 7.558800886078665e-07, "loss": 0.137, "step": 26815 }, { "epoch": 87.92131147540984, "grad_norm": 3.6926236152648926, "learning_rate": 7.554751418875261e-07, "loss": 0.1712, "step": 26816 }, { "epoch": 87.92459016393443, "grad_norm": 1.5928473472595215, "learning_rate": 7.550702994088177e-07, "loss": 0.1086, "step": 26817 }, { "epoch": 87.92786885245901, "grad_norm": 3.2635915279388428, "learning_rate": 7.546655611763032e-07, "loss": 0.1346, "step": 26818 }, { "epoch": 87.9311475409836, "grad_norm": 2.2562806606292725, "learning_rate": 7.542609271945467e-07, "loss": 0.0932, "step": 26819 }, { "epoch": 87.93442622950819, "grad_norm": 3.4251303672790527, "learning_rate": 7.538563974681123e-07, "loss": 0.21, "step": 26820 }, { "epoch": 87.9377049180328, "grad_norm": 2.1930298805236816, "learning_rate": 7.534519720015576e-07, "loss": 0.0642, "step": 26821 }, { "epoch": 87.94098360655738, "grad_norm": 1.9554320573806763, "learning_rate": 7.530476507994488e-07, "loss": 0.047, "step": 26822 }, { "epoch": 87.94426229508197, "grad_norm": 2.594214916229248, "learning_rate": 7.526434338663424e-07, "loss": 0.0684, "step": 26823 }, { "epoch": 87.94754098360656, "grad_norm": 2.0568816661834717, "learning_rate": 7.522393212067958e-07, "loss": 0.0675, "step": 26824 }, { "epoch": 87.95081967213115, "grad_norm": 2.191356658935547, "learning_rate": 7.518353128253642e-07, "loss": 0.0971, "step": 26825 }, { "epoch": 87.95409836065573, "grad_norm": 1.7900104522705078, "learning_rate": 7.514314087266062e-07, "loss": 0.0372, "step": 26826 }, { "epoch": 87.95737704918032, "grad_norm": 3.353759288787842, "learning_rate": 7.510276089150758e-07, "loss": 0.2262, "step": 26827 }, { "epoch": 87.96065573770491, "grad_norm": 2.814087152481079, "learning_rate": 7.506239133953264e-07, "loss": 0.0519, "step": 26828 }, { "epoch": 87.96393442622951, "grad_norm": 2.5064663887023926, "learning_rate": 7.502203221719062e-07, "loss": 0.0597, "step": 26829 }, { "epoch": 87.9672131147541, "grad_norm": 1.9969418048858643, "learning_rate": 7.498168352493718e-07, "loss": 0.0437, "step": 26830 }, { "epoch": 87.97049180327869, "grad_norm": 2.36464262008667, "learning_rate": 7.494134526322705e-07, "loss": 0.0703, "step": 26831 }, { "epoch": 87.97377049180328, "grad_norm": 1.4164730310440063, "learning_rate": 7.490101743251499e-07, "loss": 0.0829, "step": 26832 }, { "epoch": 87.97704918032787, "grad_norm": 2.153111457824707, "learning_rate": 7.486070003325585e-07, "loss": 0.072, "step": 26833 }, { "epoch": 87.98032786885246, "grad_norm": 2.9164316654205322, "learning_rate": 7.482039306590405e-07, "loss": 0.1598, "step": 26834 }, { "epoch": 87.98360655737704, "grad_norm": 2.3534367084503174, "learning_rate": 7.478009653091444e-07, "loss": 0.0718, "step": 26835 }, { "epoch": 87.98688524590163, "grad_norm": 1.8098832368850708, "learning_rate": 7.473981042874135e-07, "loss": 0.0585, "step": 26836 }, { "epoch": 87.99016393442623, "grad_norm": 2.501889228820801, "learning_rate": 7.469953475983871e-07, "loss": 0.088, "step": 26837 }, { "epoch": 87.99344262295082, "grad_norm": 1.8870763778686523, "learning_rate": 7.465926952466085e-07, "loss": 0.1013, "step": 26838 }, { "epoch": 87.99672131147541, "grad_norm": 2.1196181774139404, "learning_rate": 7.461901472366195e-07, "loss": 0.1862, "step": 26839 }, { "epoch": 88.0, "grad_norm": 1.7786203622817993, "learning_rate": 7.457877035729588e-07, "loss": 0.0495, "step": 26840 }, { "epoch": 88.00327868852459, "grad_norm": 2.0608532428741455, "learning_rate": 7.453853642601638e-07, "loss": 0.0901, "step": 26841 }, { "epoch": 88.00655737704918, "grad_norm": 2.051459789276123, "learning_rate": 7.449831293027687e-07, "loss": 0.1048, "step": 26842 }, { "epoch": 88.00983606557377, "grad_norm": 2.708513021469116, "learning_rate": 7.445809987053143e-07, "loss": 0.0809, "step": 26843 }, { "epoch": 88.01311475409837, "grad_norm": 2.388010263442993, "learning_rate": 7.441789724723314e-07, "loss": 0.1939, "step": 26844 }, { "epoch": 88.01639344262296, "grad_norm": 3.0083136558532715, "learning_rate": 7.437770506083542e-07, "loss": 0.136, "step": 26845 }, { "epoch": 88.01967213114754, "grad_norm": 1.6469630002975464, "learning_rate": 7.433752331179156e-07, "loss": 0.1059, "step": 26846 }, { "epoch": 88.02295081967213, "grad_norm": 2.551603317260742, "learning_rate": 7.429735200055432e-07, "loss": 0.0579, "step": 26847 }, { "epoch": 88.02622950819672, "grad_norm": 2.497056484222412, "learning_rate": 7.425719112757723e-07, "loss": 0.0919, "step": 26848 }, { "epoch": 88.02950819672131, "grad_norm": 2.1935207843780518, "learning_rate": 7.42170406933127e-07, "loss": 0.1312, "step": 26849 }, { "epoch": 88.0327868852459, "grad_norm": 3.9835946559906006, "learning_rate": 7.417690069821371e-07, "loss": 0.1135, "step": 26850 }, { "epoch": 88.03606557377049, "grad_norm": 7.650518894195557, "learning_rate": 7.413677114273255e-07, "loss": 0.1059, "step": 26851 }, { "epoch": 88.03934426229509, "grad_norm": 2.1796488761901855, "learning_rate": 7.409665202732208e-07, "loss": 0.088, "step": 26852 }, { "epoch": 88.04262295081968, "grad_norm": 2.4775352478027344, "learning_rate": 7.405654335243461e-07, "loss": 0.126, "step": 26853 }, { "epoch": 88.04590163934427, "grad_norm": 2.960329055786133, "learning_rate": 7.401644511852224e-07, "loss": 0.0533, "step": 26854 }, { "epoch": 88.04918032786885, "grad_norm": 2.65376353263855, "learning_rate": 7.397635732603725e-07, "loss": 0.0536, "step": 26855 }, { "epoch": 88.05245901639344, "grad_norm": 2.6663994789123535, "learning_rate": 7.393627997543184e-07, "loss": 0.0716, "step": 26856 }, { "epoch": 88.05573770491803, "grad_norm": 2.2304770946502686, "learning_rate": 7.389621306715744e-07, "loss": 0.0953, "step": 26857 }, { "epoch": 88.05901639344262, "grad_norm": 3.349451780319214, "learning_rate": 7.385615660166634e-07, "loss": 0.2974, "step": 26858 }, { "epoch": 88.0622950819672, "grad_norm": 1.6699732542037964, "learning_rate": 7.381611057941007e-07, "loss": 0.0418, "step": 26859 }, { "epoch": 88.06557377049181, "grad_norm": 2.4180755615234375, "learning_rate": 7.377607500083994e-07, "loss": 0.1897, "step": 26860 }, { "epoch": 88.0688524590164, "grad_norm": 1.5186008214950562, "learning_rate": 7.37360498664077e-07, "loss": 0.0968, "step": 26861 }, { "epoch": 88.07213114754099, "grad_norm": 2.4308197498321533, "learning_rate": 7.369603517656465e-07, "loss": 0.1751, "step": 26862 }, { "epoch": 88.07540983606557, "grad_norm": 2.12469482421875, "learning_rate": 7.365603093176188e-07, "loss": 0.1885, "step": 26863 }, { "epoch": 88.07868852459016, "grad_norm": 2.104611396789551, "learning_rate": 7.361603713245036e-07, "loss": 0.1663, "step": 26864 }, { "epoch": 88.08196721311475, "grad_norm": 3.1769516468048096, "learning_rate": 7.357605377908139e-07, "loss": 0.1249, "step": 26865 }, { "epoch": 88.08524590163934, "grad_norm": 2.269303321838379, "learning_rate": 7.353608087210573e-07, "loss": 0.0532, "step": 26866 }, { "epoch": 88.08852459016393, "grad_norm": 2.1420469284057617, "learning_rate": 7.349611841197391e-07, "loss": 0.1007, "step": 26867 }, { "epoch": 88.09180327868853, "grad_norm": 3.0759987831115723, "learning_rate": 7.345616639913678e-07, "loss": 0.092, "step": 26868 }, { "epoch": 88.09508196721312, "grad_norm": 7.112762451171875, "learning_rate": 7.341622483404454e-07, "loss": 0.054, "step": 26869 }, { "epoch": 88.09836065573771, "grad_norm": 2.1342766284942627, "learning_rate": 7.337629371714794e-07, "loss": 0.0881, "step": 26870 }, { "epoch": 88.1016393442623, "grad_norm": 1.9859040975570679, "learning_rate": 7.333637304889707e-07, "loss": 0.1376, "step": 26871 }, { "epoch": 88.10491803278688, "grad_norm": 2.641038417816162, "learning_rate": 7.329646282974201e-07, "loss": 0.0661, "step": 26872 }, { "epoch": 88.10819672131147, "grad_norm": 1.883233666419983, "learning_rate": 7.325656306013274e-07, "loss": 0.0479, "step": 26873 }, { "epoch": 88.11147540983606, "grad_norm": 1.879963994026184, "learning_rate": 7.321667374051955e-07, "loss": 0.0309, "step": 26874 }, { "epoch": 88.11475409836065, "grad_norm": 2.377248525619507, "learning_rate": 7.317679487135188e-07, "loss": 0.0561, "step": 26875 }, { "epoch": 88.11803278688525, "grad_norm": 2.399746894836426, "learning_rate": 7.313692645307946e-07, "loss": 0.0564, "step": 26876 }, { "epoch": 88.12131147540984, "grad_norm": 2.1064789295196533, "learning_rate": 7.309706848615183e-07, "loss": 0.0877, "step": 26877 }, { "epoch": 88.12459016393443, "grad_norm": 3.0331342220306396, "learning_rate": 7.305722097101864e-07, "loss": 0.2293, "step": 26878 }, { "epoch": 88.12786885245902, "grad_norm": 3.6069371700286865, "learning_rate": 7.301738390812907e-07, "loss": 0.2272, "step": 26879 }, { "epoch": 88.1311475409836, "grad_norm": 2.361480712890625, "learning_rate": 7.297755729793221e-07, "loss": 0.0914, "step": 26880 }, { "epoch": 88.1344262295082, "grad_norm": 3.263720750808716, "learning_rate": 7.293774114087737e-07, "loss": 0.2405, "step": 26881 }, { "epoch": 88.13770491803278, "grad_norm": 2.124316453933716, "learning_rate": 7.289793543741319e-07, "loss": 0.1097, "step": 26882 }, { "epoch": 88.14098360655737, "grad_norm": 2.326418161392212, "learning_rate": 7.285814018798887e-07, "loss": 0.1192, "step": 26883 }, { "epoch": 88.14426229508197, "grad_norm": 1.6697213649749756, "learning_rate": 7.281835539305304e-07, "loss": 0.0381, "step": 26884 }, { "epoch": 88.14754098360656, "grad_norm": 1.5063942670822144, "learning_rate": 7.277858105305436e-07, "loss": 0.0831, "step": 26885 }, { "epoch": 88.15081967213115, "grad_norm": 3.280310869216919, "learning_rate": 7.273881716844089e-07, "loss": 0.0756, "step": 26886 }, { "epoch": 88.15409836065574, "grad_norm": 2.0566868782043457, "learning_rate": 7.269906373966174e-07, "loss": 0.0657, "step": 26887 }, { "epoch": 88.15737704918033, "grad_norm": 3.510096311569214, "learning_rate": 7.265932076716464e-07, "loss": 0.0967, "step": 26888 }, { "epoch": 88.16065573770491, "grad_norm": 1.9448500871658325, "learning_rate": 7.261958825139792e-07, "loss": 0.1129, "step": 26889 }, { "epoch": 88.1639344262295, "grad_norm": 2.848951578140259, "learning_rate": 7.257986619280943e-07, "loss": 0.1243, "step": 26890 }, { "epoch": 88.1672131147541, "grad_norm": 4.05155086517334, "learning_rate": 7.254015459184748e-07, "loss": 0.2288, "step": 26891 }, { "epoch": 88.1704918032787, "grad_norm": 2.7132186889648438, "learning_rate": 7.250045344895951e-07, "loss": 0.0856, "step": 26892 }, { "epoch": 88.17377049180328, "grad_norm": 8.120584487915039, "learning_rate": 7.246076276459324e-07, "loss": 0.0887, "step": 26893 }, { "epoch": 88.17704918032787, "grad_norm": 4.365988731384277, "learning_rate": 7.242108253919633e-07, "loss": 0.0419, "step": 26894 }, { "epoch": 88.18032786885246, "grad_norm": 2.5760679244995117, "learning_rate": 7.238141277321608e-07, "loss": 0.1079, "step": 26895 }, { "epoch": 88.18360655737705, "grad_norm": 2.2011032104492188, "learning_rate": 7.234175346709993e-07, "loss": 0.1059, "step": 26896 }, { "epoch": 88.18688524590164, "grad_norm": 1.808087944984436, "learning_rate": 7.230210462129505e-07, "loss": 0.1118, "step": 26897 }, { "epoch": 88.19016393442622, "grad_norm": 1.9633123874664307, "learning_rate": 7.226246623624844e-07, "loss": 0.1249, "step": 26898 }, { "epoch": 88.19344262295083, "grad_norm": 2.157636880874634, "learning_rate": 7.222283831240706e-07, "loss": 0.0315, "step": 26899 }, { "epoch": 88.19672131147541, "grad_norm": 2.6682963371276855, "learning_rate": 7.218322085021801e-07, "loss": 0.1818, "step": 26900 }, { "epoch": 88.2, "grad_norm": 1.8536310195922852, "learning_rate": 7.21436138501278e-07, "loss": 0.0536, "step": 26901 }, { "epoch": 88.20327868852459, "grad_norm": 2.4780454635620117, "learning_rate": 7.210401731258298e-07, "loss": 0.0754, "step": 26902 }, { "epoch": 88.20655737704918, "grad_norm": 2.542398452758789, "learning_rate": 7.206443123803009e-07, "loss": 0.0975, "step": 26903 }, { "epoch": 88.20983606557377, "grad_norm": 2.113724946975708, "learning_rate": 7.202485562691563e-07, "loss": 0.0475, "step": 26904 }, { "epoch": 88.21311475409836, "grad_norm": 1.792153000831604, "learning_rate": 7.198529047968583e-07, "loss": 0.0324, "step": 26905 }, { "epoch": 88.21639344262294, "grad_norm": 2.1946349143981934, "learning_rate": 7.194573579678677e-07, "loss": 0.0977, "step": 26906 }, { "epoch": 88.21967213114755, "grad_norm": 1.9266479015350342, "learning_rate": 7.190619157866429e-07, "loss": 0.0541, "step": 26907 }, { "epoch": 88.22295081967214, "grad_norm": 3.8891594409942627, "learning_rate": 7.186665782576474e-07, "loss": 0.1395, "step": 26908 }, { "epoch": 88.22622950819672, "grad_norm": 2.5274264812469482, "learning_rate": 7.182713453853352e-07, "loss": 0.2185, "step": 26909 }, { "epoch": 88.22950819672131, "grad_norm": 1.8230842351913452, "learning_rate": 7.178762171741626e-07, "loss": 0.0783, "step": 26910 }, { "epoch": 88.2327868852459, "grad_norm": 2.0959513187408447, "learning_rate": 7.174811936285886e-07, "loss": 0.1698, "step": 26911 }, { "epoch": 88.23606557377049, "grad_norm": 2.5767290592193604, "learning_rate": 7.170862747530649e-07, "loss": 0.195, "step": 26912 }, { "epoch": 88.23934426229508, "grad_norm": 1.9112207889556885, "learning_rate": 7.166914605520447e-07, "loss": 0.0349, "step": 26913 }, { "epoch": 88.24262295081967, "grad_norm": 1.9507958889007568, "learning_rate": 7.162967510299811e-07, "loss": 0.1707, "step": 26914 }, { "epoch": 88.24590163934427, "grad_norm": 1.9616974592208862, "learning_rate": 7.159021461913251e-07, "loss": 0.0647, "step": 26915 }, { "epoch": 88.24918032786886, "grad_norm": 1.731315016746521, "learning_rate": 7.155076460405231e-07, "loss": 0.0532, "step": 26916 }, { "epoch": 88.25245901639344, "grad_norm": 3.314690589904785, "learning_rate": 7.151132505820279e-07, "loss": 0.0541, "step": 26917 }, { "epoch": 88.25573770491803, "grad_norm": 7.141096115112305, "learning_rate": 7.147189598202853e-07, "loss": 0.0881, "step": 26918 }, { "epoch": 88.25901639344262, "grad_norm": 1.5227382183074951, "learning_rate": 7.143247737597392e-07, "loss": 0.0264, "step": 26919 }, { "epoch": 88.26229508196721, "grad_norm": 2.1701178550720215, "learning_rate": 7.139306924048373e-07, "loss": 0.061, "step": 26920 }, { "epoch": 88.2655737704918, "grad_norm": 2.7153847217559814, "learning_rate": 7.135367157600193e-07, "loss": 0.1249, "step": 26921 }, { "epoch": 88.26885245901639, "grad_norm": 2.63889479637146, "learning_rate": 7.131428438297327e-07, "loss": 0.0654, "step": 26922 }, { "epoch": 88.27213114754099, "grad_norm": 2.2485029697418213, "learning_rate": 7.127490766184164e-07, "loss": 0.1144, "step": 26923 }, { "epoch": 88.27540983606558, "grad_norm": 2.9990813732147217, "learning_rate": 7.1235541413051e-07, "loss": 0.0814, "step": 26924 }, { "epoch": 88.27868852459017, "grad_norm": 2.4141364097595215, "learning_rate": 7.119618563704522e-07, "loss": 0.1236, "step": 26925 }, { "epoch": 88.28196721311475, "grad_norm": 1.9717023372650146, "learning_rate": 7.115684033426829e-07, "loss": 0.1449, "step": 26926 }, { "epoch": 88.28524590163934, "grad_norm": 2.250636100769043, "learning_rate": 7.111750550516372e-07, "loss": 0.0657, "step": 26927 }, { "epoch": 88.28852459016393, "grad_norm": 2.138427495956421, "learning_rate": 7.107818115017507e-07, "loss": 0.1344, "step": 26928 }, { "epoch": 88.29180327868852, "grad_norm": 2.6214771270751953, "learning_rate": 7.103886726974562e-07, "loss": 0.2055, "step": 26929 }, { "epoch": 88.29508196721312, "grad_norm": 2.4979841709136963, "learning_rate": 7.099956386431894e-07, "loss": 0.0444, "step": 26930 }, { "epoch": 88.29836065573771, "grad_norm": 2.934901237487793, "learning_rate": 7.09602709343381e-07, "loss": 0.0872, "step": 26931 }, { "epoch": 88.3016393442623, "grad_norm": 3.100081443786621, "learning_rate": 7.09209884802462e-07, "loss": 0.1156, "step": 26932 }, { "epoch": 88.30491803278689, "grad_norm": 2.346907138824463, "learning_rate": 7.088171650248621e-07, "loss": 0.0475, "step": 26933 }, { "epoch": 88.30819672131148, "grad_norm": 2.396744966506958, "learning_rate": 7.084245500150066e-07, "loss": 0.0731, "step": 26934 }, { "epoch": 88.31147540983606, "grad_norm": 2.6709625720977783, "learning_rate": 7.080320397773266e-07, "loss": 0.0923, "step": 26935 }, { "epoch": 88.31475409836065, "grad_norm": 2.20139741897583, "learning_rate": 7.076396343162473e-07, "loss": 0.0725, "step": 26936 }, { "epoch": 88.31803278688524, "grad_norm": 2.419919967651367, "learning_rate": 7.072473336361929e-07, "loss": 0.1936, "step": 26937 }, { "epoch": 88.32131147540984, "grad_norm": 3.3284947872161865, "learning_rate": 7.068551377415844e-07, "loss": 0.0649, "step": 26938 }, { "epoch": 88.32459016393443, "grad_norm": 2.949643135070801, "learning_rate": 7.064630466368483e-07, "loss": 0.117, "step": 26939 }, { "epoch": 88.32786885245902, "grad_norm": 2.057713270187378, "learning_rate": 7.060710603264054e-07, "loss": 0.1939, "step": 26940 }, { "epoch": 88.33114754098361, "grad_norm": 2.314582586288452, "learning_rate": 7.056791788146733e-07, "loss": 0.1509, "step": 26941 }, { "epoch": 88.3344262295082, "grad_norm": 3.0500874519348145, "learning_rate": 7.052874021060707e-07, "loss": 0.1874, "step": 26942 }, { "epoch": 88.33770491803278, "grad_norm": 2.3123741149902344, "learning_rate": 7.048957302050186e-07, "loss": 0.149, "step": 26943 }, { "epoch": 88.34098360655737, "grad_norm": 4.214249610900879, "learning_rate": 7.04504163115931e-07, "loss": 0.0893, "step": 26944 }, { "epoch": 88.34426229508196, "grad_norm": 2.5626392364501953, "learning_rate": 7.041127008432247e-07, "loss": 0.1706, "step": 26945 }, { "epoch": 88.34754098360656, "grad_norm": 2.372532844543457, "learning_rate": 7.037213433913126e-07, "loss": 0.2379, "step": 26946 }, { "epoch": 88.35081967213115, "grad_norm": 2.183504819869995, "learning_rate": 7.033300907646068e-07, "loss": 0.1018, "step": 26947 }, { "epoch": 88.35409836065574, "grad_norm": 2.5769846439361572, "learning_rate": 7.029389429675215e-07, "loss": 0.0739, "step": 26948 }, { "epoch": 88.35737704918033, "grad_norm": 1.931851863861084, "learning_rate": 7.025479000044666e-07, "loss": 0.0748, "step": 26949 }, { "epoch": 88.36065573770492, "grad_norm": 2.116389751434326, "learning_rate": 7.021569618798507e-07, "loss": 0.0691, "step": 26950 }, { "epoch": 88.3639344262295, "grad_norm": 2.2501230239868164, "learning_rate": 7.017661285980814e-07, "loss": 0.1641, "step": 26951 }, { "epoch": 88.3672131147541, "grad_norm": 4.061010360717773, "learning_rate": 7.013754001635676e-07, "loss": 0.0991, "step": 26952 }, { "epoch": 88.37049180327868, "grad_norm": 2.159407615661621, "learning_rate": 7.009847765807143e-07, "loss": 0.0748, "step": 26953 }, { "epoch": 88.37377049180328, "grad_norm": 1.9710537195205688, "learning_rate": 7.005942578539271e-07, "loss": 0.1302, "step": 26954 }, { "epoch": 88.37704918032787, "grad_norm": 70.17322540283203, "learning_rate": 7.002038439876057e-07, "loss": 0.209, "step": 26955 }, { "epoch": 88.38032786885246, "grad_norm": 2.3749704360961914, "learning_rate": 6.998135349861579e-07, "loss": 0.1641, "step": 26956 }, { "epoch": 88.38360655737705, "grad_norm": 2.5816311836242676, "learning_rate": 6.994233308539822e-07, "loss": 0.0443, "step": 26957 }, { "epoch": 88.38688524590164, "grad_norm": 3.2020769119262695, "learning_rate": 6.990332315954784e-07, "loss": 0.2102, "step": 26958 }, { "epoch": 88.39016393442623, "grad_norm": 2.1943485736846924, "learning_rate": 6.986432372150431e-07, "loss": 0.0543, "step": 26959 }, { "epoch": 88.39344262295081, "grad_norm": 2.3965375423431396, "learning_rate": 6.982533477170795e-07, "loss": 0.0684, "step": 26960 }, { "epoch": 88.3967213114754, "grad_norm": 2.348918914794922, "learning_rate": 6.978635631059794e-07, "loss": 0.0791, "step": 26961 }, { "epoch": 88.4, "grad_norm": 1.8675206899642944, "learning_rate": 6.974738833861383e-07, "loss": 0.068, "step": 26962 }, { "epoch": 88.4032786885246, "grad_norm": 2.3085763454437256, "learning_rate": 6.970843085619528e-07, "loss": 0.1722, "step": 26963 }, { "epoch": 88.40655737704918, "grad_norm": 1.7948448657989502, "learning_rate": 6.966948386378147e-07, "loss": 0.1177, "step": 26964 }, { "epoch": 88.40983606557377, "grad_norm": 2.57053279876709, "learning_rate": 6.963054736181152e-07, "loss": 0.1285, "step": 26965 }, { "epoch": 88.41311475409836, "grad_norm": 1.59464693069458, "learning_rate": 6.959162135072428e-07, "loss": 0.1793, "step": 26966 }, { "epoch": 88.41639344262295, "grad_norm": 6.7786478996276855, "learning_rate": 6.955270583095919e-07, "loss": 0.1577, "step": 26967 }, { "epoch": 88.41967213114754, "grad_norm": 1.711273431777954, "learning_rate": 6.951380080295467e-07, "loss": 0.0532, "step": 26968 }, { "epoch": 88.42295081967212, "grad_norm": 5.093185901641846, "learning_rate": 6.947490626714926e-07, "loss": 0.106, "step": 26969 }, { "epoch": 88.42622950819673, "grad_norm": 2.476055383682251, "learning_rate": 6.943602222398204e-07, "loss": 0.0703, "step": 26970 }, { "epoch": 88.42950819672132, "grad_norm": 2.3658292293548584, "learning_rate": 6.939714867389124e-07, "loss": 0.1547, "step": 26971 }, { "epoch": 88.4327868852459, "grad_norm": 2.5273094177246094, "learning_rate": 6.935828561731517e-07, "loss": 0.1631, "step": 26972 }, { "epoch": 88.43606557377049, "grad_norm": 1.638983964920044, "learning_rate": 6.93194330546918e-07, "loss": 0.0964, "step": 26973 }, { "epoch": 88.43934426229508, "grad_norm": 2.342616319656372, "learning_rate": 6.928059098645979e-07, "loss": 0.1585, "step": 26974 }, { "epoch": 88.44262295081967, "grad_norm": 2.0066845417022705, "learning_rate": 6.924175941305666e-07, "loss": 0.0654, "step": 26975 }, { "epoch": 88.44590163934426, "grad_norm": 2.22109055519104, "learning_rate": 6.920293833492053e-07, "loss": 0.0945, "step": 26976 }, { "epoch": 88.44918032786886, "grad_norm": 2.1006784439086914, "learning_rate": 6.916412775248893e-07, "loss": 0.1124, "step": 26977 }, { "epoch": 88.45245901639345, "grad_norm": 2.844224452972412, "learning_rate": 6.912532766619973e-07, "loss": 0.1666, "step": 26978 }, { "epoch": 88.45573770491804, "grad_norm": 2.1843440532684326, "learning_rate": 6.908653807649035e-07, "loss": 0.1335, "step": 26979 }, { "epoch": 88.45901639344262, "grad_norm": 2.4501423835754395, "learning_rate": 6.904775898379811e-07, "loss": 0.1172, "step": 26980 }, { "epoch": 88.46229508196721, "grad_norm": 2.6188321113586426, "learning_rate": 6.900899038856024e-07, "loss": 0.2191, "step": 26981 }, { "epoch": 88.4655737704918, "grad_norm": 2.131743907928467, "learning_rate": 6.897023229121413e-07, "loss": 0.0498, "step": 26982 }, { "epoch": 88.46885245901639, "grad_norm": 3.7002408504486084, "learning_rate": 6.893148469219669e-07, "loss": 0.0726, "step": 26983 }, { "epoch": 88.47213114754098, "grad_norm": 2.4894721508026123, "learning_rate": 6.889274759194475e-07, "loss": 0.1371, "step": 26984 }, { "epoch": 88.47540983606558, "grad_norm": 2.734034299850464, "learning_rate": 6.885402099089533e-07, "loss": 0.1195, "step": 26985 }, { "epoch": 88.47868852459017, "grad_norm": 2.239654064178467, "learning_rate": 6.881530488948474e-07, "loss": 0.1542, "step": 26986 }, { "epoch": 88.48196721311476, "grad_norm": 2.1393167972564697, "learning_rate": 6.877659928814984e-07, "loss": 0.1628, "step": 26987 }, { "epoch": 88.48524590163935, "grad_norm": 2.504157781600952, "learning_rate": 6.873790418732718e-07, "loss": 0.0536, "step": 26988 }, { "epoch": 88.48852459016393, "grad_norm": 2.8334789276123047, "learning_rate": 6.869921958745285e-07, "loss": 0.1201, "step": 26989 }, { "epoch": 88.49180327868852, "grad_norm": 2.65718674659729, "learning_rate": 6.866054548896295e-07, "loss": 0.1744, "step": 26990 }, { "epoch": 88.49508196721311, "grad_norm": 3.3774702548980713, "learning_rate": 6.86218818922939e-07, "loss": 0.0558, "step": 26991 }, { "epoch": 88.4983606557377, "grad_norm": 2.4037578105926514, "learning_rate": 6.858322879788148e-07, "loss": 0.1453, "step": 26992 }, { "epoch": 88.5016393442623, "grad_norm": 2.170832872390747, "learning_rate": 6.854458620616166e-07, "loss": 0.0952, "step": 26993 }, { "epoch": 88.50491803278689, "grad_norm": 1.8847531080245972, "learning_rate": 6.850595411756999e-07, "loss": 0.0324, "step": 26994 }, { "epoch": 88.50819672131148, "grad_norm": 2.563769817352295, "learning_rate": 6.846733253254223e-07, "loss": 0.096, "step": 26995 }, { "epoch": 88.51147540983607, "grad_norm": 1.4336802959442139, "learning_rate": 6.842872145151391e-07, "loss": 0.0799, "step": 26996 }, { "epoch": 88.51475409836065, "grad_norm": 2.6710329055786133, "learning_rate": 6.839012087492037e-07, "loss": 0.0687, "step": 26997 }, { "epoch": 88.51803278688524, "grad_norm": 2.663407564163208, "learning_rate": 6.83515308031969e-07, "loss": 0.0863, "step": 26998 }, { "epoch": 88.52131147540983, "grad_norm": 2.3138515949249268, "learning_rate": 6.831295123677829e-07, "loss": 0.0758, "step": 26999 }, { "epoch": 88.52459016393442, "grad_norm": 2.020151138305664, "learning_rate": 6.827438217610016e-07, "loss": 0.0494, "step": 27000 }, { "epoch": 88.52786885245902, "grad_norm": 2.2220890522003174, "learning_rate": 6.823582362159697e-07, "loss": 0.0665, "step": 27001 }, { "epoch": 88.53114754098361, "grad_norm": 1.54338800907135, "learning_rate": 6.819727557370381e-07, "loss": 0.0174, "step": 27002 }, { "epoch": 88.5344262295082, "grad_norm": 2.2964892387390137, "learning_rate": 6.8158738032855e-07, "loss": 0.1725, "step": 27003 }, { "epoch": 88.53770491803279, "grad_norm": 2.4801530838012695, "learning_rate": 6.81202109994854e-07, "loss": 0.1133, "step": 27004 }, { "epoch": 88.54098360655738, "grad_norm": 3.055471897125244, "learning_rate": 6.808169447402935e-07, "loss": 0.1053, "step": 27005 }, { "epoch": 88.54426229508196, "grad_norm": 2.7719080448150635, "learning_rate": 6.804318845692115e-07, "loss": 0.1584, "step": 27006 }, { "epoch": 88.54754098360655, "grad_norm": 3.0596272945404053, "learning_rate": 6.800469294859491e-07, "loss": 0.172, "step": 27007 }, { "epoch": 88.55081967213114, "grad_norm": 2.6856281757354736, "learning_rate": 6.796620794948483e-07, "loss": 0.0501, "step": 27008 }, { "epoch": 88.55409836065574, "grad_norm": 2.579369306564331, "learning_rate": 6.79277334600249e-07, "loss": 0.2008, "step": 27009 }, { "epoch": 88.55737704918033, "grad_norm": 2.783442735671997, "learning_rate": 6.788926948064889e-07, "loss": 0.1896, "step": 27010 }, { "epoch": 88.56065573770492, "grad_norm": 1.8484983444213867, "learning_rate": 6.785081601179044e-07, "loss": 0.0473, "step": 27011 }, { "epoch": 88.56393442622951, "grad_norm": 2.481127977371216, "learning_rate": 6.78123730538831e-07, "loss": 0.135, "step": 27012 }, { "epoch": 88.5672131147541, "grad_norm": 1.5943372249603271, "learning_rate": 6.777394060736076e-07, "loss": 0.0635, "step": 27013 }, { "epoch": 88.57049180327868, "grad_norm": 1.8258014917373657, "learning_rate": 6.773551867265637e-07, "loss": 0.0678, "step": 27014 }, { "epoch": 88.57377049180327, "grad_norm": 1.742203950881958, "learning_rate": 6.769710725020329e-07, "loss": 0.064, "step": 27015 }, { "epoch": 88.57704918032788, "grad_norm": 3.0454938411712646, "learning_rate": 6.765870634043469e-07, "loss": 0.1366, "step": 27016 }, { "epoch": 88.58032786885246, "grad_norm": 2.533426523208618, "learning_rate": 6.76203159437837e-07, "loss": 0.1312, "step": 27017 }, { "epoch": 88.58360655737705, "grad_norm": 3.9438111782073975, "learning_rate": 6.758193606068286e-07, "loss": 0.2021, "step": 27018 }, { "epoch": 88.58688524590164, "grad_norm": 2.086465835571289, "learning_rate": 6.754356669156526e-07, "loss": 0.0613, "step": 27019 }, { "epoch": 88.59016393442623, "grad_norm": 4.166975498199463, "learning_rate": 6.750520783686354e-07, "loss": 0.2069, "step": 27020 }, { "epoch": 88.59344262295082, "grad_norm": 2.004302501678467, "learning_rate": 6.746685949700993e-07, "loss": 0.0626, "step": 27021 }, { "epoch": 88.5967213114754, "grad_norm": 3.232884407043457, "learning_rate": 6.742852167243729e-07, "loss": 0.1215, "step": 27022 }, { "epoch": 88.6, "grad_norm": 4.495321750640869, "learning_rate": 6.739019436357774e-07, "loss": 0.1431, "step": 27023 }, { "epoch": 88.6032786885246, "grad_norm": 3.2118442058563232, "learning_rate": 6.735187757086337e-07, "loss": 0.0571, "step": 27024 }, { "epoch": 88.60655737704919, "grad_norm": 2.4615280628204346, "learning_rate": 6.731357129472605e-07, "loss": 0.128, "step": 27025 }, { "epoch": 88.60983606557377, "grad_norm": 2.9456703662872314, "learning_rate": 6.727527553559821e-07, "loss": 0.1301, "step": 27026 }, { "epoch": 88.61311475409836, "grad_norm": 2.7961034774780273, "learning_rate": 6.72369902939114e-07, "loss": 0.1839, "step": 27027 }, { "epoch": 88.61639344262295, "grad_norm": 3.099256992340088, "learning_rate": 6.719871557009738e-07, "loss": 0.0822, "step": 27028 }, { "epoch": 88.61967213114754, "grad_norm": 1.7878018617630005, "learning_rate": 6.71604513645876e-07, "loss": 0.1707, "step": 27029 }, { "epoch": 88.62295081967213, "grad_norm": 2.9674830436706543, "learning_rate": 6.712219767781369e-07, "loss": 0.1665, "step": 27030 }, { "epoch": 88.62622950819672, "grad_norm": 2.2424156665802, "learning_rate": 6.7083954510207e-07, "loss": 0.0516, "step": 27031 }, { "epoch": 88.62950819672132, "grad_norm": 1.6683807373046875, "learning_rate": 6.704572186219871e-07, "loss": 0.0462, "step": 27032 }, { "epoch": 88.6327868852459, "grad_norm": 2.177870035171509, "learning_rate": 6.700749973421982e-07, "loss": 0.083, "step": 27033 }, { "epoch": 88.6360655737705, "grad_norm": 2.2332215309143066, "learning_rate": 6.696928812670156e-07, "loss": 0.0694, "step": 27034 }, { "epoch": 88.63934426229508, "grad_norm": 2.19104266166687, "learning_rate": 6.693108704007467e-07, "loss": 0.0959, "step": 27035 }, { "epoch": 88.64262295081967, "grad_norm": 2.5378270149230957, "learning_rate": 6.689289647476993e-07, "loss": 0.1834, "step": 27036 }, { "epoch": 88.64590163934426, "grad_norm": 1.9651811122894287, "learning_rate": 6.685471643121799e-07, "loss": 0.0828, "step": 27037 }, { "epoch": 88.64918032786885, "grad_norm": 2.9544625282287598, "learning_rate": 6.681654690984917e-07, "loss": 0.0653, "step": 27038 }, { "epoch": 88.65245901639344, "grad_norm": 3.3990683555603027, "learning_rate": 6.677838791109425e-07, "loss": 0.0939, "step": 27039 }, { "epoch": 88.65573770491804, "grad_norm": 2.2105484008789062, "learning_rate": 6.674023943538333e-07, "loss": 0.1643, "step": 27040 }, { "epoch": 88.65901639344263, "grad_norm": 2.0345075130462646, "learning_rate": 6.670210148314648e-07, "loss": 0.0492, "step": 27041 }, { "epoch": 88.66229508196722, "grad_norm": 1.797776460647583, "learning_rate": 6.666397405481373e-07, "loss": 0.101, "step": 27042 }, { "epoch": 88.6655737704918, "grad_norm": 3.5204148292541504, "learning_rate": 6.662585715081515e-07, "loss": 0.1347, "step": 27043 }, { "epoch": 88.66885245901639, "grad_norm": 1.8438239097595215, "learning_rate": 6.658775077158064e-07, "loss": 0.0841, "step": 27044 }, { "epoch": 88.67213114754098, "grad_norm": 2.511549234390259, "learning_rate": 6.654965491753962e-07, "loss": 0.1772, "step": 27045 }, { "epoch": 88.67540983606557, "grad_norm": 1.5042963027954102, "learning_rate": 6.651156958912175e-07, "loss": 0.0207, "step": 27046 }, { "epoch": 88.67868852459016, "grad_norm": 2.561593770980835, "learning_rate": 6.647349478675658e-07, "loss": 0.2559, "step": 27047 }, { "epoch": 88.68196721311476, "grad_norm": 2.2820117473602295, "learning_rate": 6.643543051087342e-07, "loss": 0.0954, "step": 27048 }, { "epoch": 88.68524590163935, "grad_norm": 2.547600746154785, "learning_rate": 6.639737676190138e-07, "loss": 0.1404, "step": 27049 }, { "epoch": 88.68852459016394, "grad_norm": 2.2932817935943604, "learning_rate": 6.635933354026969e-07, "loss": 0.0695, "step": 27050 }, { "epoch": 88.69180327868852, "grad_norm": 1.8111425638198853, "learning_rate": 6.632130084640708e-07, "loss": 0.0559, "step": 27051 }, { "epoch": 88.69508196721311, "grad_norm": 2.4871068000793457, "learning_rate": 6.62832786807428e-07, "loss": 0.1437, "step": 27052 }, { "epoch": 88.6983606557377, "grad_norm": 4.129235744476318, "learning_rate": 6.624526704370526e-07, "loss": 0.2261, "step": 27053 }, { "epoch": 88.70163934426229, "grad_norm": 4.069896697998047, "learning_rate": 6.620726593572324e-07, "loss": 0.1803, "step": 27054 }, { "epoch": 88.70491803278688, "grad_norm": 2.039496898651123, "learning_rate": 6.616927535722506e-07, "loss": 0.0859, "step": 27055 }, { "epoch": 88.70819672131148, "grad_norm": 2.847085475921631, "learning_rate": 6.613129530863948e-07, "loss": 0.0651, "step": 27056 }, { "epoch": 88.71147540983607, "grad_norm": 9.367301940917969, "learning_rate": 6.609332579039441e-07, "loss": 0.2134, "step": 27057 }, { "epoch": 88.71475409836066, "grad_norm": 2.5657479763031006, "learning_rate": 6.605536680291813e-07, "loss": 0.1015, "step": 27058 }, { "epoch": 88.71803278688525, "grad_norm": 2.3068864345550537, "learning_rate": 6.601741834663855e-07, "loss": 0.0863, "step": 27059 }, { "epoch": 88.72131147540983, "grad_norm": 1.9518109560012817, "learning_rate": 6.597948042198377e-07, "loss": 0.0694, "step": 27060 }, { "epoch": 88.72459016393442, "grad_norm": 1.506588339805603, "learning_rate": 6.594155302938143e-07, "loss": 0.0281, "step": 27061 }, { "epoch": 88.72786885245901, "grad_norm": 2.943737506866455, "learning_rate": 6.590363616925933e-07, "loss": 0.0649, "step": 27062 }, { "epoch": 88.73114754098361, "grad_norm": 2.5082294940948486, "learning_rate": 6.586572984204498e-07, "loss": 0.0993, "step": 27063 }, { "epoch": 88.7344262295082, "grad_norm": 2.450618267059326, "learning_rate": 6.582783404816562e-07, "loss": 0.1934, "step": 27064 }, { "epoch": 88.73770491803279, "grad_norm": 1.798516869544983, "learning_rate": 6.578994878804878e-07, "loss": 0.0838, "step": 27065 }, { "epoch": 88.74098360655738, "grad_norm": 2.002976655960083, "learning_rate": 6.575207406212169e-07, "loss": 0.1541, "step": 27066 }, { "epoch": 88.74426229508197, "grad_norm": 2.223599433898926, "learning_rate": 6.571420987081134e-07, "loss": 0.1061, "step": 27067 }, { "epoch": 88.74754098360656, "grad_norm": 2.3595023155212402, "learning_rate": 6.56763562145446e-07, "loss": 0.0687, "step": 27068 }, { "epoch": 88.75081967213114, "grad_norm": 2.076474905014038, "learning_rate": 6.563851309374847e-07, "loss": 0.0415, "step": 27069 }, { "epoch": 88.75409836065573, "grad_norm": 2.962602376937866, "learning_rate": 6.560068050884961e-07, "loss": 0.1012, "step": 27070 }, { "epoch": 88.75737704918033, "grad_norm": 2.7023017406463623, "learning_rate": 6.556285846027444e-07, "loss": 0.1474, "step": 27071 }, { "epoch": 88.76065573770492, "grad_norm": 2.9335408210754395, "learning_rate": 6.552504694844974e-07, "loss": 0.2359, "step": 27072 }, { "epoch": 88.76393442622951, "grad_norm": 4.177870750427246, "learning_rate": 6.548724597380174e-07, "loss": 0.0452, "step": 27073 }, { "epoch": 88.7672131147541, "grad_norm": 1.7722911834716797, "learning_rate": 6.544945553675663e-07, "loss": 0.0872, "step": 27074 }, { "epoch": 88.77049180327869, "grad_norm": 2.113004207611084, "learning_rate": 6.541167563774065e-07, "loss": 0.0327, "step": 27075 }, { "epoch": 88.77377049180328, "grad_norm": 2.4056711196899414, "learning_rate": 6.537390627717977e-07, "loss": 0.118, "step": 27076 }, { "epoch": 88.77704918032786, "grad_norm": 2.168280601501465, "learning_rate": 6.533614745549977e-07, "loss": 0.0679, "step": 27077 }, { "epoch": 88.78032786885245, "grad_norm": 2.6091716289520264, "learning_rate": 6.529839917312664e-07, "loss": 0.066, "step": 27078 }, { "epoch": 88.78360655737706, "grad_norm": 2.182069778442383, "learning_rate": 6.526066143048593e-07, "loss": 0.0715, "step": 27079 }, { "epoch": 88.78688524590164, "grad_norm": 3.7827861309051514, "learning_rate": 6.522293422800308e-07, "loss": 0.0322, "step": 27080 }, { "epoch": 88.79016393442623, "grad_norm": 1.5622636079788208, "learning_rate": 6.518521756610352e-07, "loss": 0.0413, "step": 27081 }, { "epoch": 88.79344262295082, "grad_norm": 2.1151702404022217, "learning_rate": 6.51475114452127e-07, "loss": 0.0664, "step": 27082 }, { "epoch": 88.79672131147541, "grad_norm": 2.473381280899048, "learning_rate": 6.51098158657556e-07, "loss": 0.09, "step": 27083 }, { "epoch": 88.8, "grad_norm": 2.279613971710205, "learning_rate": 6.507213082815745e-07, "loss": 0.1019, "step": 27084 }, { "epoch": 88.80327868852459, "grad_norm": 1.8903279304504395, "learning_rate": 6.5034456332843e-07, "loss": 0.1409, "step": 27085 }, { "epoch": 88.80655737704917, "grad_norm": 2.294574737548828, "learning_rate": 6.499679238023726e-07, "loss": 0.1, "step": 27086 }, { "epoch": 88.80983606557378, "grad_norm": 3.3252224922180176, "learning_rate": 6.495913897076489e-07, "loss": 0.1809, "step": 27087 }, { "epoch": 88.81311475409836, "grad_norm": 2.6041958332061768, "learning_rate": 6.492149610485032e-07, "loss": 0.0761, "step": 27088 }, { "epoch": 88.81639344262295, "grad_norm": 1.3810855150222778, "learning_rate": 6.488386378291823e-07, "loss": 0.1359, "step": 27089 }, { "epoch": 88.81967213114754, "grad_norm": 4.845653533935547, "learning_rate": 6.48462420053927e-07, "loss": 0.1636, "step": 27090 }, { "epoch": 88.82295081967213, "grad_norm": 2.217841148376465, "learning_rate": 6.480863077269827e-07, "loss": 0.0957, "step": 27091 }, { "epoch": 88.82622950819672, "grad_norm": 2.2879137992858887, "learning_rate": 6.477103008525875e-07, "loss": 0.0691, "step": 27092 }, { "epoch": 88.8295081967213, "grad_norm": 2.2625656127929688, "learning_rate": 6.473343994349845e-07, "loss": 0.1298, "step": 27093 }, { "epoch": 88.8327868852459, "grad_norm": 2.3952643871307373, "learning_rate": 6.46958603478407e-07, "loss": 0.2071, "step": 27094 }, { "epoch": 88.8360655737705, "grad_norm": 2.286126136779785, "learning_rate": 6.465829129870993e-07, "loss": 0.1197, "step": 27095 }, { "epoch": 88.83934426229509, "grad_norm": 2.5870020389556885, "learning_rate": 6.462073279652936e-07, "loss": 0.158, "step": 27096 }, { "epoch": 88.84262295081967, "grad_norm": 2.3821823596954346, "learning_rate": 6.458318484172255e-07, "loss": 0.0867, "step": 27097 }, { "epoch": 88.84590163934426, "grad_norm": 1.6688032150268555, "learning_rate": 6.454564743471281e-07, "loss": 0.1673, "step": 27098 }, { "epoch": 88.84918032786885, "grad_norm": 2.4248149394989014, "learning_rate": 6.45081205759236e-07, "loss": 0.2073, "step": 27099 }, { "epoch": 88.85245901639344, "grad_norm": 1.7980700731277466, "learning_rate": 6.447060426577812e-07, "loss": 0.0451, "step": 27100 }, { "epoch": 88.85573770491803, "grad_norm": 1.879704236984253, "learning_rate": 6.443309850469915e-07, "loss": 0.0411, "step": 27101 }, { "epoch": 88.85901639344263, "grad_norm": 1.4687529802322388, "learning_rate": 6.43956032931099e-07, "loss": 0.0247, "step": 27102 }, { "epoch": 88.86229508196722, "grad_norm": 1.8256243467330933, "learning_rate": 6.435811863143271e-07, "loss": 0.1088, "step": 27103 }, { "epoch": 88.8655737704918, "grad_norm": 2.455840587615967, "learning_rate": 6.432064452009079e-07, "loss": 0.1771, "step": 27104 }, { "epoch": 88.8688524590164, "grad_norm": 2.0012528896331787, "learning_rate": 6.428318095950648e-07, "loss": 0.0575, "step": 27105 }, { "epoch": 88.87213114754098, "grad_norm": 1.8872098922729492, "learning_rate": 6.424572795010209e-07, "loss": 0.0983, "step": 27106 }, { "epoch": 88.87540983606557, "grad_norm": 4.708655834197998, "learning_rate": 6.420828549229996e-07, "loss": 0.1554, "step": 27107 }, { "epoch": 88.87868852459016, "grad_norm": 3.490051507949829, "learning_rate": 6.417085358652264e-07, "loss": 0.1502, "step": 27108 }, { "epoch": 88.88196721311475, "grad_norm": 2.149437427520752, "learning_rate": 6.413343223319191e-07, "loss": 0.0686, "step": 27109 }, { "epoch": 88.88524590163935, "grad_norm": 1.7736389636993408, "learning_rate": 6.409602143272975e-07, "loss": 0.0328, "step": 27110 }, { "epoch": 88.88852459016394, "grad_norm": 2.559213399887085, "learning_rate": 6.405862118555784e-07, "loss": 0.0861, "step": 27111 }, { "epoch": 88.89180327868853, "grad_norm": 2.2332983016967773, "learning_rate": 6.402123149209838e-07, "loss": 0.0741, "step": 27112 }, { "epoch": 88.89508196721312, "grad_norm": 2.096916675567627, "learning_rate": 6.398385235277271e-07, "loss": 0.0313, "step": 27113 }, { "epoch": 88.8983606557377, "grad_norm": 2.6442220211029053, "learning_rate": 6.394648376800217e-07, "loss": 0.1, "step": 27114 }, { "epoch": 88.90163934426229, "grad_norm": 2.1350033283233643, "learning_rate": 6.39091257382084e-07, "loss": 0.0556, "step": 27115 }, { "epoch": 88.90491803278688, "grad_norm": 2.115403890609741, "learning_rate": 6.387177826381241e-07, "loss": 0.1645, "step": 27116 }, { "epoch": 88.90819672131147, "grad_norm": 2.5143258571624756, "learning_rate": 6.383444134523554e-07, "loss": 0.1325, "step": 27117 }, { "epoch": 88.91147540983607, "grad_norm": 2.8313522338867188, "learning_rate": 6.379711498289864e-07, "loss": 0.1506, "step": 27118 }, { "epoch": 88.91475409836066, "grad_norm": 2.6842141151428223, "learning_rate": 6.375979917722286e-07, "loss": 0.2173, "step": 27119 }, { "epoch": 88.91803278688525, "grad_norm": 2.3965907096862793, "learning_rate": 6.37224939286285e-07, "loss": 0.2026, "step": 27120 }, { "epoch": 88.92131147540984, "grad_norm": 2.090297222137451, "learning_rate": 6.368519923753669e-07, "loss": 0.0768, "step": 27121 }, { "epoch": 88.92459016393443, "grad_norm": 2.198997735977173, "learning_rate": 6.364791510436774e-07, "loss": 0.1124, "step": 27122 }, { "epoch": 88.92786885245901, "grad_norm": 2.8498404026031494, "learning_rate": 6.361064152954199e-07, "loss": 0.1508, "step": 27123 }, { "epoch": 88.9311475409836, "grad_norm": 2.3556203842163086, "learning_rate": 6.357337851348e-07, "loss": 0.1054, "step": 27124 }, { "epoch": 88.93442622950819, "grad_norm": 3.1154773235321045, "learning_rate": 6.353612605660186e-07, "loss": 0.2022, "step": 27125 }, { "epoch": 88.9377049180328, "grad_norm": 1.6402279138565063, "learning_rate": 6.349888415932737e-07, "loss": 0.0474, "step": 27126 }, { "epoch": 88.94098360655738, "grad_norm": 1.9044256210327148, "learning_rate": 6.346165282207684e-07, "loss": 0.0223, "step": 27127 }, { "epoch": 88.94426229508197, "grad_norm": 2.2358312606811523, "learning_rate": 6.342443204526993e-07, "loss": 0.0493, "step": 27128 }, { "epoch": 88.94754098360656, "grad_norm": 1.6928421258926392, "learning_rate": 6.338722182932632e-07, "loss": 0.1012, "step": 27129 }, { "epoch": 88.95081967213115, "grad_norm": 2.3752925395965576, "learning_rate": 6.335002217466557e-07, "loss": 0.0747, "step": 27130 }, { "epoch": 88.95409836065573, "grad_norm": 1.8702174425125122, "learning_rate": 6.331283308170721e-07, "loss": 0.0518, "step": 27131 }, { "epoch": 88.95737704918032, "grad_norm": 1.9363235235214233, "learning_rate": 6.32756545508707e-07, "loss": 0.1635, "step": 27132 }, { "epoch": 88.96065573770491, "grad_norm": 2.1969823837280273, "learning_rate": 6.323848658257493e-07, "loss": 0.2695, "step": 27133 }, { "epoch": 88.96393442622951, "grad_norm": 2.427640199661255, "learning_rate": 6.320132917723931e-07, "loss": 0.1268, "step": 27134 }, { "epoch": 88.9672131147541, "grad_norm": 2.969905376434326, "learning_rate": 6.316418233528277e-07, "loss": 0.3485, "step": 27135 }, { "epoch": 88.97049180327869, "grad_norm": 1.511130690574646, "learning_rate": 6.312704605712417e-07, "loss": 0.026, "step": 27136 }, { "epoch": 88.97377049180328, "grad_norm": 3.2148690223693848, "learning_rate": 6.308992034318196e-07, "loss": 0.1796, "step": 27137 }, { "epoch": 88.97704918032787, "grad_norm": 2.658158540725708, "learning_rate": 6.305280519387525e-07, "loss": 0.0888, "step": 27138 }, { "epoch": 88.98032786885246, "grad_norm": 33.522613525390625, "learning_rate": 6.301570060962237e-07, "loss": 0.1055, "step": 27139 }, { "epoch": 88.98360655737704, "grad_norm": 1.9774401187896729, "learning_rate": 6.297860659084176e-07, "loss": 0.0373, "step": 27140 }, { "epoch": 88.98688524590163, "grad_norm": 2.1345105171203613, "learning_rate": 6.294152313795155e-07, "loss": 0.0617, "step": 27141 }, { "epoch": 88.99016393442623, "grad_norm": 2.367588996887207, "learning_rate": 6.290445025136971e-07, "loss": 0.0972, "step": 27142 }, { "epoch": 88.99344262295082, "grad_norm": 4.04055643081665, "learning_rate": 6.286738793151482e-07, "loss": 0.1034, "step": 27143 }, { "epoch": 88.99672131147541, "grad_norm": 2.168747901916504, "learning_rate": 6.283033617880441e-07, "loss": 0.0331, "step": 27144 }, { "epoch": 89.0, "grad_norm": 2.4807424545288086, "learning_rate": 6.279329499365649e-07, "loss": 0.1245, "step": 27145 }, { "epoch": 89.00327868852459, "grad_norm": 2.038893461227417, "learning_rate": 6.27562643764883e-07, "loss": 0.0354, "step": 27146 }, { "epoch": 89.00655737704918, "grad_norm": 2.291405439376831, "learning_rate": 6.271924432771803e-07, "loss": 0.0708, "step": 27147 }, { "epoch": 89.00983606557377, "grad_norm": 1.9536693096160889, "learning_rate": 6.26822348477627e-07, "loss": 0.068, "step": 27148 }, { "epoch": 89.01311475409837, "grad_norm": 2.156550645828247, "learning_rate": 6.264523593703975e-07, "loss": 0.125, "step": 27149 }, { "epoch": 89.01639344262296, "grad_norm": 2.3036019802093506, "learning_rate": 6.260824759596629e-07, "loss": 0.1466, "step": 27150 }, { "epoch": 89.01967213114754, "grad_norm": 1.892128586769104, "learning_rate": 6.257126982495965e-07, "loss": 0.1093, "step": 27151 }, { "epoch": 89.02295081967213, "grad_norm": 2.338420867919922, "learning_rate": 6.25343026244365e-07, "loss": 0.1765, "step": 27152 }, { "epoch": 89.02622950819672, "grad_norm": 1.9232476949691772, "learning_rate": 6.249734599481394e-07, "loss": 0.0919, "step": 27153 }, { "epoch": 89.02950819672131, "grad_norm": 1.5227888822555542, "learning_rate": 6.246039993650844e-07, "loss": 0.04, "step": 27154 }, { "epoch": 89.0327868852459, "grad_norm": 1.6377551555633545, "learning_rate": 6.242346444993664e-07, "loss": 0.117, "step": 27155 }, { "epoch": 89.03606557377049, "grad_norm": 2.1917150020599365, "learning_rate": 6.238653953551521e-07, "loss": 0.0579, "step": 27156 }, { "epoch": 89.03934426229509, "grad_norm": 3.8345553874969482, "learning_rate": 6.23496251936605e-07, "loss": 0.1818, "step": 27157 }, { "epoch": 89.04262295081968, "grad_norm": 1.8342913389205933, "learning_rate": 6.231272142478862e-07, "loss": 0.0797, "step": 27158 }, { "epoch": 89.04590163934427, "grad_norm": 2.1270883083343506, "learning_rate": 6.227582822931566e-07, "loss": 0.1333, "step": 27159 }, { "epoch": 89.04918032786885, "grad_norm": 1.6270946264266968, "learning_rate": 6.223894560765786e-07, "loss": 0.0458, "step": 27160 }, { "epoch": 89.05245901639344, "grad_norm": 2.44694185256958, "learning_rate": 6.220207356023101e-07, "loss": 0.1594, "step": 27161 }, { "epoch": 89.05573770491803, "grad_norm": 2.696592092514038, "learning_rate": 6.216521208745074e-07, "loss": 0.1333, "step": 27162 }, { "epoch": 89.05901639344262, "grad_norm": 2.078678846359253, "learning_rate": 6.212836118973276e-07, "loss": 0.0344, "step": 27163 }, { "epoch": 89.0622950819672, "grad_norm": 2.4749467372894287, "learning_rate": 6.20915208674927e-07, "loss": 0.1605, "step": 27164 }, { "epoch": 89.06557377049181, "grad_norm": 2.7275338172912598, "learning_rate": 6.205469112114603e-07, "loss": 0.1483, "step": 27165 }, { "epoch": 89.0688524590164, "grad_norm": 2.046959400177002, "learning_rate": 6.201787195110787e-07, "loss": 0.0816, "step": 27166 }, { "epoch": 89.07213114754099, "grad_norm": 2.2953217029571533, "learning_rate": 6.198106335779342e-07, "loss": 0.0608, "step": 27167 }, { "epoch": 89.07540983606557, "grad_norm": 2.7671685218811035, "learning_rate": 6.194426534161768e-07, "loss": 0.0808, "step": 27168 }, { "epoch": 89.07868852459016, "grad_norm": 2.7353246212005615, "learning_rate": 6.190747790299589e-07, "loss": 0.11, "step": 27169 }, { "epoch": 89.08196721311475, "grad_norm": 3.5686962604522705, "learning_rate": 6.187070104234261e-07, "loss": 0.0588, "step": 27170 }, { "epoch": 89.08524590163934, "grad_norm": 3.0383617877960205, "learning_rate": 6.183393476007248e-07, "loss": 0.1034, "step": 27171 }, { "epoch": 89.08852459016393, "grad_norm": 1.5128142833709717, "learning_rate": 6.179717905660021e-07, "loss": 0.0867, "step": 27172 }, { "epoch": 89.09180327868853, "grad_norm": 2.3473525047302246, "learning_rate": 6.176043393234021e-07, "loss": 0.0648, "step": 27173 }, { "epoch": 89.09508196721312, "grad_norm": 2.3539273738861084, "learning_rate": 6.172369938770695e-07, "loss": 0.0695, "step": 27174 }, { "epoch": 89.09836065573771, "grad_norm": 2.2551114559173584, "learning_rate": 6.168697542311453e-07, "loss": 0.0804, "step": 27175 }, { "epoch": 89.1016393442623, "grad_norm": 2.2444376945495605, "learning_rate": 6.165026203897695e-07, "loss": 0.0407, "step": 27176 }, { "epoch": 89.10491803278688, "grad_norm": 2.1999011039733887, "learning_rate": 6.161355923570844e-07, "loss": 0.0654, "step": 27177 }, { "epoch": 89.10819672131147, "grad_norm": 2.2350728511810303, "learning_rate": 6.157686701372267e-07, "loss": 0.1298, "step": 27178 }, { "epoch": 89.11147540983606, "grad_norm": 2.046497344970703, "learning_rate": 6.154018537343331e-07, "loss": 0.0905, "step": 27179 }, { "epoch": 89.11475409836065, "grad_norm": 2.0289838314056396, "learning_rate": 6.150351431525425e-07, "loss": 0.0503, "step": 27180 }, { "epoch": 89.11803278688525, "grad_norm": 2.238034248352051, "learning_rate": 6.146685383959894e-07, "loss": 0.2261, "step": 27181 }, { "epoch": 89.12131147540984, "grad_norm": 2.773488759994507, "learning_rate": 6.143020394688049e-07, "loss": 0.2041, "step": 27182 }, { "epoch": 89.12459016393443, "grad_norm": 2.354053258895874, "learning_rate": 6.139356463751245e-07, "loss": 0.0975, "step": 27183 }, { "epoch": 89.12786885245902, "grad_norm": 2.66394305229187, "learning_rate": 6.135693591190795e-07, "loss": 0.0952, "step": 27184 }, { "epoch": 89.1311475409836, "grad_norm": 2.540894031524658, "learning_rate": 6.132031777047976e-07, "loss": 0.1038, "step": 27185 }, { "epoch": 89.1344262295082, "grad_norm": 1.978078842163086, "learning_rate": 6.12837102136411e-07, "loss": 0.0574, "step": 27186 }, { "epoch": 89.13770491803278, "grad_norm": 2.813722610473633, "learning_rate": 6.124711324180466e-07, "loss": 0.0927, "step": 27187 }, { "epoch": 89.14098360655737, "grad_norm": 1.8123841285705566, "learning_rate": 6.121052685538298e-07, "loss": 0.0478, "step": 27188 }, { "epoch": 89.14426229508197, "grad_norm": 1.7824454307556152, "learning_rate": 6.117395105478863e-07, "loss": 0.0852, "step": 27189 }, { "epoch": 89.14754098360656, "grad_norm": 2.183929204940796, "learning_rate": 6.113738584043427e-07, "loss": 0.2126, "step": 27190 }, { "epoch": 89.15081967213115, "grad_norm": 1.792616605758667, "learning_rate": 6.110083121273214e-07, "loss": 0.1312, "step": 27191 }, { "epoch": 89.15409836065574, "grad_norm": 2.5315916538238525, "learning_rate": 6.106428717209423e-07, "loss": 0.0806, "step": 27192 }, { "epoch": 89.15737704918033, "grad_norm": 1.981261134147644, "learning_rate": 6.102775371893277e-07, "loss": 0.0528, "step": 27193 }, { "epoch": 89.16065573770491, "grad_norm": 1.7346117496490479, "learning_rate": 6.099123085365954e-07, "loss": 0.0448, "step": 27194 }, { "epoch": 89.1639344262295, "grad_norm": 1.6653634309768677, "learning_rate": 6.095471857668667e-07, "loss": 0.0762, "step": 27195 }, { "epoch": 89.1672131147541, "grad_norm": 1.9784501791000366, "learning_rate": 6.09182168884257e-07, "loss": 0.0989, "step": 27196 }, { "epoch": 89.1704918032787, "grad_norm": 1.6844162940979004, "learning_rate": 6.088172578928819e-07, "loss": 0.0488, "step": 27197 }, { "epoch": 89.17377049180328, "grad_norm": 2.506669044494629, "learning_rate": 6.084524527968549e-07, "loss": 0.2315, "step": 27198 }, { "epoch": 89.17704918032787, "grad_norm": 2.555166006088257, "learning_rate": 6.080877536002938e-07, "loss": 0.0797, "step": 27199 }, { "epoch": 89.18032786885246, "grad_norm": 3.134488582611084, "learning_rate": 6.077231603073075e-07, "loss": 0.0695, "step": 27200 }, { "epoch": 89.18360655737705, "grad_norm": 1.8603284358978271, "learning_rate": 6.073586729220082e-07, "loss": 0.1033, "step": 27201 }, { "epoch": 89.18688524590164, "grad_norm": 2.539376735687256, "learning_rate": 6.06994291448505e-07, "loss": 0.1271, "step": 27202 }, { "epoch": 89.19016393442622, "grad_norm": 2.015498399734497, "learning_rate": 6.066300158909077e-07, "loss": 0.046, "step": 27203 }, { "epoch": 89.19344262295083, "grad_norm": 6.004548072814941, "learning_rate": 6.062658462533244e-07, "loss": 0.2278, "step": 27204 }, { "epoch": 89.19672131147541, "grad_norm": 2.588693380355835, "learning_rate": 6.059017825398606e-07, "loss": 0.0816, "step": 27205 }, { "epoch": 89.2, "grad_norm": 2.0818705558776855, "learning_rate": 6.055378247546217e-07, "loss": 0.1015, "step": 27206 }, { "epoch": 89.20327868852459, "grad_norm": 2.345506191253662, "learning_rate": 6.051739729017103e-07, "loss": 0.081, "step": 27207 }, { "epoch": 89.20655737704918, "grad_norm": 3.03778076171875, "learning_rate": 6.048102269852318e-07, "loss": 0.178, "step": 27208 }, { "epoch": 89.20983606557377, "grad_norm": 2.262054204940796, "learning_rate": 6.044465870092863e-07, "loss": 0.0956, "step": 27209 }, { "epoch": 89.21311475409836, "grad_norm": 2.459681510925293, "learning_rate": 6.040830529779751e-07, "loss": 0.143, "step": 27210 }, { "epoch": 89.21639344262294, "grad_norm": 2.855137825012207, "learning_rate": 6.037196248953947e-07, "loss": 0.1374, "step": 27211 }, { "epoch": 89.21967213114755, "grad_norm": 1.6568176746368408, "learning_rate": 6.033563027656475e-07, "loss": 0.0396, "step": 27212 }, { "epoch": 89.22295081967214, "grad_norm": 2.262775421142578, "learning_rate": 6.029930865928268e-07, "loss": 0.1853, "step": 27213 }, { "epoch": 89.22622950819672, "grad_norm": 3.630946636199951, "learning_rate": 6.026299763810306e-07, "loss": 0.2088, "step": 27214 }, { "epoch": 89.22950819672131, "grad_norm": 1.9136308431625366, "learning_rate": 6.022669721343499e-07, "loss": 0.0866, "step": 27215 }, { "epoch": 89.2327868852459, "grad_norm": 2.1029579639434814, "learning_rate": 6.019040738568826e-07, "loss": 0.0557, "step": 27216 }, { "epoch": 89.23606557377049, "grad_norm": 2.1718671321868896, "learning_rate": 6.015412815527177e-07, "loss": 0.0923, "step": 27217 }, { "epoch": 89.23934426229508, "grad_norm": 1.9750224351882935, "learning_rate": 6.011785952259474e-07, "loss": 0.0354, "step": 27218 }, { "epoch": 89.24262295081967, "grad_norm": 2.1393749713897705, "learning_rate": 6.008160148806596e-07, "loss": 0.0954, "step": 27219 }, { "epoch": 89.24590163934427, "grad_norm": 2.2719180583953857, "learning_rate": 6.004535405209433e-07, "loss": 0.1221, "step": 27220 }, { "epoch": 89.24918032786886, "grad_norm": 3.125946044921875, "learning_rate": 6.000911721508884e-07, "loss": 0.1079, "step": 27221 }, { "epoch": 89.25245901639344, "grad_norm": 2.284606456756592, "learning_rate": 5.997289097745784e-07, "loss": 0.0991, "step": 27222 }, { "epoch": 89.25573770491803, "grad_norm": 2.382467269897461, "learning_rate": 5.99366753396099e-07, "loss": 0.1541, "step": 27223 }, { "epoch": 89.25901639344262, "grad_norm": 2.2875919342041016, "learning_rate": 5.990047030195323e-07, "loss": 0.0838, "step": 27224 }, { "epoch": 89.26229508196721, "grad_norm": 1.4176467657089233, "learning_rate": 5.986427586489629e-07, "loss": 0.0223, "step": 27225 }, { "epoch": 89.2655737704918, "grad_norm": 2.4935739040374756, "learning_rate": 5.982809202884721e-07, "loss": 0.0875, "step": 27226 }, { "epoch": 89.26885245901639, "grad_norm": 2.3389174938201904, "learning_rate": 5.979191879421386e-07, "loss": 0.0592, "step": 27227 }, { "epoch": 89.27213114754099, "grad_norm": 2.1057331562042236, "learning_rate": 5.975575616140406e-07, "loss": 0.055, "step": 27228 }, { "epoch": 89.27540983606558, "grad_norm": 2.5852510929107666, "learning_rate": 5.971960413082589e-07, "loss": 0.0515, "step": 27229 }, { "epoch": 89.27868852459017, "grad_norm": 2.474896192550659, "learning_rate": 5.968346270288683e-07, "loss": 0.1334, "step": 27230 }, { "epoch": 89.28196721311475, "grad_norm": 2.458841562271118, "learning_rate": 5.964733187799444e-07, "loss": 0.1587, "step": 27231 }, { "epoch": 89.28524590163934, "grad_norm": 2.5901503562927246, "learning_rate": 5.961121165655592e-07, "loss": 0.1424, "step": 27232 }, { "epoch": 89.28852459016393, "grad_norm": 1.8417366743087769, "learning_rate": 5.957510203897898e-07, "loss": 0.0774, "step": 27233 }, { "epoch": 89.29180327868852, "grad_norm": 2.318467378616333, "learning_rate": 5.953900302567039e-07, "loss": 0.2346, "step": 27234 }, { "epoch": 89.29508196721312, "grad_norm": 2.4331905841827393, "learning_rate": 5.950291461703739e-07, "loss": 0.0983, "step": 27235 }, { "epoch": 89.29836065573771, "grad_norm": 2.1238510608673096, "learning_rate": 5.946683681348697e-07, "loss": 0.0904, "step": 27236 }, { "epoch": 89.3016393442623, "grad_norm": 2.5260813236236572, "learning_rate": 5.943076961542594e-07, "loss": 0.034, "step": 27237 }, { "epoch": 89.30491803278689, "grad_norm": 2.1011970043182373, "learning_rate": 5.939471302326072e-07, "loss": 0.0864, "step": 27238 }, { "epoch": 89.30819672131148, "grad_norm": 2.3479838371276855, "learning_rate": 5.935866703739824e-07, "loss": 0.0647, "step": 27239 }, { "epoch": 89.31147540983606, "grad_norm": 2.683924674987793, "learning_rate": 5.932263165824481e-07, "loss": 0.0278, "step": 27240 }, { "epoch": 89.31475409836065, "grad_norm": 2.179227113723755, "learning_rate": 5.928660688620658e-07, "loss": 0.0708, "step": 27241 }, { "epoch": 89.31803278688524, "grad_norm": 2.345120429992676, "learning_rate": 5.92505927216902e-07, "loss": 0.0606, "step": 27242 }, { "epoch": 89.32131147540984, "grad_norm": 1.6736122369766235, "learning_rate": 5.921458916510147e-07, "loss": 0.0415, "step": 27243 }, { "epoch": 89.32459016393443, "grad_norm": 2.6120965480804443, "learning_rate": 5.91785962168464e-07, "loss": 0.1381, "step": 27244 }, { "epoch": 89.32786885245902, "grad_norm": 2.3471155166625977, "learning_rate": 5.914261387733089e-07, "loss": 0.0536, "step": 27245 }, { "epoch": 89.33114754098361, "grad_norm": 2.3921244144439697, "learning_rate": 5.910664214696049e-07, "loss": 0.0883, "step": 27246 }, { "epoch": 89.3344262295082, "grad_norm": 3.095309257507324, "learning_rate": 5.907068102614122e-07, "loss": 0.1594, "step": 27247 }, { "epoch": 89.33770491803278, "grad_norm": 2.610403060913086, "learning_rate": 5.903473051527831e-07, "loss": 0.1735, "step": 27248 }, { "epoch": 89.34098360655737, "grad_norm": 1.4762040376663208, "learning_rate": 5.899879061477709e-07, "loss": 0.0237, "step": 27249 }, { "epoch": 89.34426229508196, "grad_norm": 2.4465208053588867, "learning_rate": 5.896286132504281e-07, "loss": 0.0395, "step": 27250 }, { "epoch": 89.34754098360656, "grad_norm": 2.2256577014923096, "learning_rate": 5.89269426464808e-07, "loss": 0.1055, "step": 27251 }, { "epoch": 89.35081967213115, "grad_norm": 2.7474279403686523, "learning_rate": 5.889103457949608e-07, "loss": 0.1689, "step": 27252 }, { "epoch": 89.35409836065574, "grad_norm": 2.046556234359741, "learning_rate": 5.885513712449331e-07, "loss": 0.0997, "step": 27253 }, { "epoch": 89.35737704918033, "grad_norm": 2.668888568878174, "learning_rate": 5.881925028187741e-07, "loss": 0.1723, "step": 27254 }, { "epoch": 89.36065573770492, "grad_norm": 2.6364481449127197, "learning_rate": 5.878337405205314e-07, "loss": 0.1012, "step": 27255 }, { "epoch": 89.3639344262295, "grad_norm": 2.7105934619903564, "learning_rate": 5.874750843542487e-07, "loss": 0.109, "step": 27256 }, { "epoch": 89.3672131147541, "grad_norm": 2.0877797603607178, "learning_rate": 5.871165343239726e-07, "loss": 0.2737, "step": 27257 }, { "epoch": 89.37049180327868, "grad_norm": 2.7722175121307373, "learning_rate": 5.867580904337433e-07, "loss": 0.2094, "step": 27258 }, { "epoch": 89.37377049180328, "grad_norm": 2.4362239837646484, "learning_rate": 5.863997526876019e-07, "loss": 0.0824, "step": 27259 }, { "epoch": 89.37704918032787, "grad_norm": 3.119934320449829, "learning_rate": 5.860415210895942e-07, "loss": 0.1258, "step": 27260 }, { "epoch": 89.38032786885246, "grad_norm": 1.4681774377822876, "learning_rate": 5.856833956437546e-07, "loss": 0.0296, "step": 27261 }, { "epoch": 89.38360655737705, "grad_norm": 3.2471673488616943, "learning_rate": 5.853253763541244e-07, "loss": 0.075, "step": 27262 }, { "epoch": 89.38688524590164, "grad_norm": 1.8230468034744263, "learning_rate": 5.849674632247382e-07, "loss": 0.0413, "step": 27263 }, { "epoch": 89.39016393442623, "grad_norm": 2.0364797115325928, "learning_rate": 5.846096562596338e-07, "loss": 0.1397, "step": 27264 }, { "epoch": 89.39344262295081, "grad_norm": 1.619361162185669, "learning_rate": 5.842519554628445e-07, "loss": 0.0335, "step": 27265 }, { "epoch": 89.3967213114754, "grad_norm": 2.3851587772369385, "learning_rate": 5.838943608384051e-07, "loss": 0.1403, "step": 27266 }, { "epoch": 89.4, "grad_norm": 2.111738681793213, "learning_rate": 5.835368723903456e-07, "loss": 0.1317, "step": 27267 }, { "epoch": 89.4032786885246, "grad_norm": 2.102062702178955, "learning_rate": 5.831794901226995e-07, "loss": 0.1407, "step": 27268 }, { "epoch": 89.40655737704918, "grad_norm": 2.797283411026001, "learning_rate": 5.828222140394957e-07, "loss": 0.1304, "step": 27269 }, { "epoch": 89.40983606557377, "grad_norm": 2.357619285583496, "learning_rate": 5.824650441447632e-07, "loss": 0.1535, "step": 27270 }, { "epoch": 89.41311475409836, "grad_norm": 2.35638165473938, "learning_rate": 5.821079804425301e-07, "loss": 0.0955, "step": 27271 }, { "epoch": 89.41639344262295, "grad_norm": 3.397367000579834, "learning_rate": 5.817510229368184e-07, "loss": 0.1374, "step": 27272 }, { "epoch": 89.41967213114754, "grad_norm": 2.192898988723755, "learning_rate": 5.813941716316585e-07, "loss": 0.0401, "step": 27273 }, { "epoch": 89.42295081967212, "grad_norm": 2.21736216545105, "learning_rate": 5.810374265310726e-07, "loss": 0.0891, "step": 27274 }, { "epoch": 89.42622950819673, "grad_norm": 1.6811274290084839, "learning_rate": 5.80680787639083e-07, "loss": 0.0425, "step": 27275 }, { "epoch": 89.42950819672132, "grad_norm": 2.147345542907715, "learning_rate": 5.803242549597099e-07, "loss": 0.1174, "step": 27276 }, { "epoch": 89.4327868852459, "grad_norm": 2.602402448654175, "learning_rate": 5.799678284969757e-07, "loss": 0.0552, "step": 27277 }, { "epoch": 89.43606557377049, "grad_norm": 2.3426551818847656, "learning_rate": 5.796115082548981e-07, "loss": 0.0912, "step": 27278 }, { "epoch": 89.43934426229508, "grad_norm": 1.948075771331787, "learning_rate": 5.792552942374962e-07, "loss": 0.0831, "step": 27279 }, { "epoch": 89.44262295081967, "grad_norm": 2.4450604915618896, "learning_rate": 5.788991864487847e-07, "loss": 0.1594, "step": 27280 }, { "epoch": 89.44590163934426, "grad_norm": 2.6958978176116943, "learning_rate": 5.785431848927814e-07, "loss": 0.164, "step": 27281 }, { "epoch": 89.44918032786886, "grad_norm": 2.5559067726135254, "learning_rate": 5.781872895735008e-07, "loss": 0.1133, "step": 27282 }, { "epoch": 89.45245901639345, "grad_norm": 3.3745296001434326, "learning_rate": 5.77831500494953e-07, "loss": 0.0684, "step": 27283 }, { "epoch": 89.45573770491804, "grad_norm": 3.149521589279175, "learning_rate": 5.774758176611505e-07, "loss": 0.1346, "step": 27284 }, { "epoch": 89.45901639344262, "grad_norm": 2.2714145183563232, "learning_rate": 5.771202410761079e-07, "loss": 0.1213, "step": 27285 }, { "epoch": 89.46229508196721, "grad_norm": 2.8848488330841064, "learning_rate": 5.767647707438306e-07, "loss": 0.1524, "step": 27286 }, { "epoch": 89.4655737704918, "grad_norm": 2.476189136505127, "learning_rate": 5.764094066683268e-07, "loss": 0.0532, "step": 27287 }, { "epoch": 89.46885245901639, "grad_norm": 2.6637542247772217, "learning_rate": 5.760541488536076e-07, "loss": 0.1135, "step": 27288 }, { "epoch": 89.47213114754098, "grad_norm": 3.298828125, "learning_rate": 5.756989973036753e-07, "loss": 0.074, "step": 27289 }, { "epoch": 89.47540983606558, "grad_norm": 2.2289745807647705, "learning_rate": 5.753439520225356e-07, "loss": 0.1741, "step": 27290 }, { "epoch": 89.47868852459017, "grad_norm": 3.5042436122894287, "learning_rate": 5.74989013014191e-07, "loss": 0.2596, "step": 27291 }, { "epoch": 89.48196721311476, "grad_norm": 2.2107529640197754, "learning_rate": 5.746341802826461e-07, "loss": 0.0746, "step": 27292 }, { "epoch": 89.48524590163935, "grad_norm": 1.7209327220916748, "learning_rate": 5.742794538319008e-07, "loss": 0.0503, "step": 27293 }, { "epoch": 89.48852459016393, "grad_norm": 2.7659783363342285, "learning_rate": 5.739248336659531e-07, "loss": 0.0965, "step": 27294 }, { "epoch": 89.49180327868852, "grad_norm": 1.8409931659698486, "learning_rate": 5.735703197888054e-07, "loss": 0.0729, "step": 27295 }, { "epoch": 89.49508196721311, "grad_norm": 2.3234446048736572, "learning_rate": 5.732159122044534e-07, "loss": 0.0898, "step": 27296 }, { "epoch": 89.4983606557377, "grad_norm": 2.391714096069336, "learning_rate": 5.728616109168938e-07, "loss": 0.2042, "step": 27297 }, { "epoch": 89.5016393442623, "grad_norm": 1.9619370698928833, "learning_rate": 5.725074159301192e-07, "loss": 0.0642, "step": 27298 }, { "epoch": 89.50491803278689, "grad_norm": 2.0088794231414795, "learning_rate": 5.721533272481272e-07, "loss": 0.0581, "step": 27299 }, { "epoch": 89.50819672131148, "grad_norm": 1.968276023864746, "learning_rate": 5.717993448749093e-07, "loss": 0.0823, "step": 27300 }, { "epoch": 89.51147540983607, "grad_norm": 2.581912040710449, "learning_rate": 5.714454688144556e-07, "loss": 0.2097, "step": 27301 }, { "epoch": 89.51475409836065, "grad_norm": 2.217932939529419, "learning_rate": 5.710916990707571e-07, "loss": 0.0846, "step": 27302 }, { "epoch": 89.51803278688524, "grad_norm": 8.33527946472168, "learning_rate": 5.707380356478042e-07, "loss": 0.0459, "step": 27303 }, { "epoch": 89.52131147540983, "grad_norm": 2.133753538131714, "learning_rate": 5.703844785495838e-07, "loss": 0.195, "step": 27304 }, { "epoch": 89.52459016393442, "grad_norm": 1.9149997234344482, "learning_rate": 5.700310277800836e-07, "loss": 0.0274, "step": 27305 }, { "epoch": 89.52786885245902, "grad_norm": 2.58134126663208, "learning_rate": 5.696776833432882e-07, "loss": 0.0711, "step": 27306 }, { "epoch": 89.53114754098361, "grad_norm": 2.2206149101257324, "learning_rate": 5.693244452431801e-07, "loss": 0.1171, "step": 27307 }, { "epoch": 89.5344262295082, "grad_norm": 2.7548205852508545, "learning_rate": 5.68971313483745e-07, "loss": 0.1759, "step": 27308 }, { "epoch": 89.53770491803279, "grad_norm": 2.4973902702331543, "learning_rate": 5.68618288068965e-07, "loss": 0.172, "step": 27309 }, { "epoch": 89.54098360655738, "grad_norm": 1.9691200256347656, "learning_rate": 5.682653690028206e-07, "loss": 0.0549, "step": 27310 }, { "epoch": 89.54426229508196, "grad_norm": 1.885907769203186, "learning_rate": 5.679125562892884e-07, "loss": 0.1295, "step": 27311 }, { "epoch": 89.54754098360655, "grad_norm": 1.7369122505187988, "learning_rate": 5.675598499323509e-07, "loss": 0.1084, "step": 27312 }, { "epoch": 89.55081967213114, "grad_norm": 2.5543203353881836, "learning_rate": 5.672072499359826e-07, "loss": 0.0699, "step": 27313 }, { "epoch": 89.55409836065574, "grad_norm": 2.265665054321289, "learning_rate": 5.668547563041604e-07, "loss": 0.0504, "step": 27314 }, { "epoch": 89.55737704918033, "grad_norm": 4.0453948974609375, "learning_rate": 5.665023690408577e-07, "loss": 0.118, "step": 27315 }, { "epoch": 89.56065573770492, "grad_norm": 2.1200180053710938, "learning_rate": 5.661500881500514e-07, "loss": 0.174, "step": 27316 }, { "epoch": 89.56393442622951, "grad_norm": 2.846066951751709, "learning_rate": 5.657979136357106e-07, "loss": 0.0979, "step": 27317 }, { "epoch": 89.5672131147541, "grad_norm": 2.3913073539733887, "learning_rate": 5.654458455018075e-07, "loss": 0.0938, "step": 27318 }, { "epoch": 89.57049180327868, "grad_norm": 3.0695841312408447, "learning_rate": 5.650938837523124e-07, "loss": 0.1079, "step": 27319 }, { "epoch": 89.57377049180327, "grad_norm": 3.547804832458496, "learning_rate": 5.64742028391192e-07, "loss": 0.0866, "step": 27320 }, { "epoch": 89.57704918032788, "grad_norm": 2.2250399589538574, "learning_rate": 5.643902794224165e-07, "loss": 0.1148, "step": 27321 }, { "epoch": 89.58032786885246, "grad_norm": 2.024961233139038, "learning_rate": 5.640386368499517e-07, "loss": 0.0813, "step": 27322 }, { "epoch": 89.58360655737705, "grad_norm": 2.879424571990967, "learning_rate": 5.636871006777622e-07, "loss": 0.2159, "step": 27323 }, { "epoch": 89.58688524590164, "grad_norm": 2.8868274688720703, "learning_rate": 5.633356709098103e-07, "loss": 0.0949, "step": 27324 }, { "epoch": 89.59016393442623, "grad_norm": 2.5066566467285156, "learning_rate": 5.629843475500618e-07, "loss": 0.0697, "step": 27325 }, { "epoch": 89.59344262295082, "grad_norm": 2.375852346420288, "learning_rate": 5.626331306024768e-07, "loss": 0.1349, "step": 27326 }, { "epoch": 89.5967213114754, "grad_norm": 1.6614434719085693, "learning_rate": 5.622820200710156e-07, "loss": 0.0245, "step": 27327 }, { "epoch": 89.6, "grad_norm": 2.052234411239624, "learning_rate": 5.619310159596358e-07, "loss": 0.0687, "step": 27328 }, { "epoch": 89.6032786885246, "grad_norm": 8.047658920288086, "learning_rate": 5.615801182722979e-07, "loss": 0.1509, "step": 27329 }, { "epoch": 89.60655737704919, "grad_norm": 2.0746450424194336, "learning_rate": 5.612293270129588e-07, "loss": 0.1241, "step": 27330 }, { "epoch": 89.60983606557377, "grad_norm": 2.800300359725952, "learning_rate": 5.608786421855728e-07, "loss": 0.1537, "step": 27331 }, { "epoch": 89.61311475409836, "grad_norm": 2.2092318534851074, "learning_rate": 5.605280637940935e-07, "loss": 0.1477, "step": 27332 }, { "epoch": 89.61639344262295, "grad_norm": 1.4787683486938477, "learning_rate": 5.601775918424745e-07, "loss": 0.0336, "step": 27333 }, { "epoch": 89.61967213114754, "grad_norm": 2.4724113941192627, "learning_rate": 5.598272263346682e-07, "loss": 0.1065, "step": 27334 }, { "epoch": 89.62295081967213, "grad_norm": 2.3164865970611572, "learning_rate": 5.594769672746259e-07, "loss": 0.0468, "step": 27335 }, { "epoch": 89.62622950819672, "grad_norm": 2.5027883052825928, "learning_rate": 5.591268146662975e-07, "loss": 0.1502, "step": 27336 }, { "epoch": 89.62950819672132, "grad_norm": 2.2083873748779297, "learning_rate": 5.58776768513628e-07, "loss": 0.0719, "step": 27337 }, { "epoch": 89.6327868852459, "grad_norm": 2.572740077972412, "learning_rate": 5.584268288205674e-07, "loss": 0.2967, "step": 27338 }, { "epoch": 89.6360655737705, "grad_norm": 2.3848659992218018, "learning_rate": 5.580769955910625e-07, "loss": 0.2413, "step": 27339 }, { "epoch": 89.63934426229508, "grad_norm": 1.8765884637832642, "learning_rate": 5.577272688290547e-07, "loss": 0.0801, "step": 27340 }, { "epoch": 89.64262295081967, "grad_norm": 2.0986390113830566, "learning_rate": 5.573776485384908e-07, "loss": 0.0204, "step": 27341 }, { "epoch": 89.64590163934426, "grad_norm": 2.052213430404663, "learning_rate": 5.570281347233109e-07, "loss": 0.0442, "step": 27342 }, { "epoch": 89.64918032786885, "grad_norm": 5.526979923248291, "learning_rate": 5.566787273874563e-07, "loss": 0.1507, "step": 27343 }, { "epoch": 89.65245901639344, "grad_norm": 2.2151036262512207, "learning_rate": 5.563294265348695e-07, "loss": 0.1418, "step": 27344 }, { "epoch": 89.65573770491804, "grad_norm": 2.2453930377960205, "learning_rate": 5.559802321694874e-07, "loss": 0.0592, "step": 27345 }, { "epoch": 89.65901639344263, "grad_norm": 2.0494837760925293, "learning_rate": 5.556311442952455e-07, "loss": 0.0935, "step": 27346 }, { "epoch": 89.66229508196722, "grad_norm": 1.5267125368118286, "learning_rate": 5.552821629160842e-07, "loss": 0.0253, "step": 27347 }, { "epoch": 89.6655737704918, "grad_norm": 2.6973235607147217, "learning_rate": 5.549332880359359e-07, "loss": 0.1419, "step": 27348 }, { "epoch": 89.66885245901639, "grad_norm": 3.6387970447540283, "learning_rate": 5.545845196587352e-07, "loss": 0.0742, "step": 27349 }, { "epoch": 89.67213114754098, "grad_norm": 2.28847074508667, "learning_rate": 5.542358577884144e-07, "loss": 0.0866, "step": 27350 }, { "epoch": 89.67540983606557, "grad_norm": 2.223519802093506, "learning_rate": 5.538873024289059e-07, "loss": 0.0695, "step": 27351 }, { "epoch": 89.67868852459016, "grad_norm": 1.7490410804748535, "learning_rate": 5.535388535841391e-07, "loss": 0.0569, "step": 27352 }, { "epoch": 89.68196721311476, "grad_norm": 2.858635902404785, "learning_rate": 5.531905112580449e-07, "loss": 0.0791, "step": 27353 }, { "epoch": 89.68524590163935, "grad_norm": 2.3184425830841064, "learning_rate": 5.528422754545471e-07, "loss": 0.2131, "step": 27354 }, { "epoch": 89.68852459016394, "grad_norm": 2.3297789096832275, "learning_rate": 5.524941461775779e-07, "loss": 0.1252, "step": 27355 }, { "epoch": 89.69180327868852, "grad_norm": 2.714846134185791, "learning_rate": 5.521461234310599e-07, "loss": 0.1771, "step": 27356 }, { "epoch": 89.69508196721311, "grad_norm": 2.4561541080474854, "learning_rate": 5.517982072189165e-07, "loss": 0.0603, "step": 27357 }, { "epoch": 89.6983606557377, "grad_norm": 3.5362117290496826, "learning_rate": 5.514503975450735e-07, "loss": 0.0823, "step": 27358 }, { "epoch": 89.70163934426229, "grad_norm": 2.354079008102417, "learning_rate": 5.51102694413449e-07, "loss": 0.1064, "step": 27359 }, { "epoch": 89.70491803278688, "grad_norm": 1.911316990852356, "learning_rate": 5.507550978279674e-07, "loss": 0.0681, "step": 27360 }, { "epoch": 89.70819672131148, "grad_norm": 2.5664334297180176, "learning_rate": 5.504076077925468e-07, "loss": 0.1771, "step": 27361 }, { "epoch": 89.71147540983607, "grad_norm": 2.604254961013794, "learning_rate": 5.500602243111064e-07, "loss": 0.0805, "step": 27362 }, { "epoch": 89.71475409836066, "grad_norm": 2.6935017108917236, "learning_rate": 5.497129473875606e-07, "loss": 0.1509, "step": 27363 }, { "epoch": 89.71803278688525, "grad_norm": 2.523409843444824, "learning_rate": 5.493657770258287e-07, "loss": 0.1202, "step": 27364 }, { "epoch": 89.72131147540983, "grad_norm": 2.530505895614624, "learning_rate": 5.49018713229823e-07, "loss": 0.1129, "step": 27365 }, { "epoch": 89.72459016393442, "grad_norm": 2.1010403633117676, "learning_rate": 5.486717560034582e-07, "loss": 0.0938, "step": 27366 }, { "epoch": 89.72786885245901, "grad_norm": 3.2915070056915283, "learning_rate": 5.483249053506456e-07, "loss": 0.1801, "step": 27367 }, { "epoch": 89.73114754098361, "grad_norm": 2.702317714691162, "learning_rate": 5.479781612752976e-07, "loss": 0.1172, "step": 27368 }, { "epoch": 89.7344262295082, "grad_norm": 2.902799129486084, "learning_rate": 5.476315237813235e-07, "loss": 0.0977, "step": 27369 }, { "epoch": 89.73770491803279, "grad_norm": 5.879055023193359, "learning_rate": 5.47284992872632e-07, "loss": 0.1161, "step": 27370 }, { "epoch": 89.74098360655738, "grad_norm": 1.9679839611053467, "learning_rate": 5.469385685531314e-07, "loss": 0.1083, "step": 27371 }, { "epoch": 89.74426229508197, "grad_norm": 1.8483721017837524, "learning_rate": 5.465922508267252e-07, "loss": 0.1017, "step": 27372 }, { "epoch": 89.74754098360656, "grad_norm": 2.037369966506958, "learning_rate": 5.462460396973212e-07, "loss": 0.0765, "step": 27373 }, { "epoch": 89.75081967213114, "grad_norm": 2.291097402572632, "learning_rate": 5.458999351688232e-07, "loss": 0.0622, "step": 27374 }, { "epoch": 89.75409836065573, "grad_norm": 1.5985980033874512, "learning_rate": 5.455539372451335e-07, "loss": 0.1196, "step": 27375 }, { "epoch": 89.75737704918033, "grad_norm": 2.041785717010498, "learning_rate": 5.452080459301512e-07, "loss": 0.1354, "step": 27376 }, { "epoch": 89.76065573770492, "grad_norm": 2.027477741241455, "learning_rate": 5.44862261227781e-07, "loss": 0.0538, "step": 27377 }, { "epoch": 89.76393442622951, "grad_norm": 3.055379629135132, "learning_rate": 5.445165831419186e-07, "loss": 0.098, "step": 27378 }, { "epoch": 89.7672131147541, "grad_norm": 2.3456146717071533, "learning_rate": 5.441710116764642e-07, "loss": 0.0551, "step": 27379 }, { "epoch": 89.77049180327869, "grad_norm": 2.1107306480407715, "learning_rate": 5.438255468353104e-07, "loss": 0.0473, "step": 27380 }, { "epoch": 89.77377049180328, "grad_norm": 2.6793479919433594, "learning_rate": 5.434801886223584e-07, "loss": 0.2489, "step": 27381 }, { "epoch": 89.77704918032786, "grad_norm": 2.281991720199585, "learning_rate": 5.431349370414984e-07, "loss": 0.1056, "step": 27382 }, { "epoch": 89.78032786885245, "grad_norm": 1.0781114101409912, "learning_rate": 5.427897920966252e-07, "loss": 0.0183, "step": 27383 }, { "epoch": 89.78360655737706, "grad_norm": 2.7377660274505615, "learning_rate": 5.4244475379163e-07, "loss": 0.1003, "step": 27384 }, { "epoch": 89.78688524590164, "grad_norm": 1.8456051349639893, "learning_rate": 5.42099822130403e-07, "loss": 0.0861, "step": 27385 }, { "epoch": 89.79016393442623, "grad_norm": 2.657876968383789, "learning_rate": 5.417549971168345e-07, "loss": 0.1474, "step": 27386 }, { "epoch": 89.79344262295082, "grad_norm": 1.6986148357391357, "learning_rate": 5.414102787548126e-07, "loss": 0.0492, "step": 27387 }, { "epoch": 89.79672131147541, "grad_norm": 2.1319849491119385, "learning_rate": 5.41065667048225e-07, "loss": 0.2849, "step": 27388 }, { "epoch": 89.8, "grad_norm": 2.2425918579101562, "learning_rate": 5.407211620009545e-07, "loss": 0.0429, "step": 27389 }, { "epoch": 89.80327868852459, "grad_norm": 2.3004071712493896, "learning_rate": 5.4037676361689e-07, "loss": 0.0786, "step": 27390 }, { "epoch": 89.80655737704917, "grad_norm": 1.8251594305038452, "learning_rate": 5.400324718999139e-07, "loss": 0.0475, "step": 27391 }, { "epoch": 89.80983606557378, "grad_norm": 2.1521875858306885, "learning_rate": 5.396882868539044e-07, "loss": 0.0519, "step": 27392 }, { "epoch": 89.81311475409836, "grad_norm": 2.2871594429016113, "learning_rate": 5.393442084827482e-07, "loss": 0.1255, "step": 27393 }, { "epoch": 89.81639344262295, "grad_norm": 2.258838653564453, "learning_rate": 5.390002367903225e-07, "loss": 0.2353, "step": 27394 }, { "epoch": 89.81967213114754, "grad_norm": 1.6737035512924194, "learning_rate": 5.38656371780506e-07, "loss": 0.1059, "step": 27395 }, { "epoch": 89.82295081967213, "grad_norm": 2.219108819961548, "learning_rate": 5.383126134571748e-07, "loss": 0.0687, "step": 27396 }, { "epoch": 89.82622950819672, "grad_norm": 2.2314038276672363, "learning_rate": 5.379689618242089e-07, "loss": 0.1413, "step": 27397 }, { "epoch": 89.8295081967213, "grad_norm": 2.2022788524627686, "learning_rate": 5.3762541688548e-07, "loss": 0.0607, "step": 27398 }, { "epoch": 89.8327868852459, "grad_norm": 2.321185827255249, "learning_rate": 5.372819786448613e-07, "loss": 0.0464, "step": 27399 }, { "epoch": 89.8360655737705, "grad_norm": 1.820523738861084, "learning_rate": 5.369386471062287e-07, "loss": 0.1056, "step": 27400 }, { "epoch": 89.83934426229509, "grad_norm": 2.0975892543792725, "learning_rate": 5.365954222734526e-07, "loss": 0.1739, "step": 27401 }, { "epoch": 89.84262295081967, "grad_norm": 2.1907153129577637, "learning_rate": 5.362523041504009e-07, "loss": 0.224, "step": 27402 }, { "epoch": 89.84590163934426, "grad_norm": 1.0953279733657837, "learning_rate": 5.359092927409459e-07, "loss": 0.0158, "step": 27403 }, { "epoch": 89.84918032786885, "grad_norm": 2.710120916366577, "learning_rate": 5.355663880489537e-07, "loss": 0.1693, "step": 27404 }, { "epoch": 89.85245901639344, "grad_norm": 1.9404293298721313, "learning_rate": 5.352235900782899e-07, "loss": 0.0778, "step": 27405 }, { "epoch": 89.85573770491803, "grad_norm": 1.379082441329956, "learning_rate": 5.348808988328213e-07, "loss": 0.0275, "step": 27406 }, { "epoch": 89.85901639344263, "grad_norm": 2.757542133331299, "learning_rate": 5.345383143164118e-07, "loss": 0.2879, "step": 27407 }, { "epoch": 89.86229508196722, "grad_norm": 2.0694172382354736, "learning_rate": 5.341958365329247e-07, "loss": 0.0648, "step": 27408 }, { "epoch": 89.8655737704918, "grad_norm": 2.1857714653015137, "learning_rate": 5.338534654862226e-07, "loss": 0.134, "step": 27409 }, { "epoch": 89.8688524590164, "grad_norm": 2.481797695159912, "learning_rate": 5.335112011801635e-07, "loss": 0.174, "step": 27410 }, { "epoch": 89.87213114754098, "grad_norm": 2.7772908210754395, "learning_rate": 5.331690436186076e-07, "loss": 0.1823, "step": 27411 }, { "epoch": 89.87540983606557, "grad_norm": 2.2315597534179688, "learning_rate": 5.328269928054164e-07, "loss": 0.1988, "step": 27412 }, { "epoch": 89.87868852459016, "grad_norm": 1.930225133895874, "learning_rate": 5.324850487444422e-07, "loss": 0.1072, "step": 27413 }, { "epoch": 89.88196721311475, "grad_norm": 2.0201499462127686, "learning_rate": 5.321432114395441e-07, "loss": 0.0927, "step": 27414 }, { "epoch": 89.88524590163935, "grad_norm": 1.9753830432891846, "learning_rate": 5.318014808945737e-07, "loss": 0.0638, "step": 27415 }, { "epoch": 89.88852459016394, "grad_norm": 2.7022764682769775, "learning_rate": 5.314598571133867e-07, "loss": 0.0383, "step": 27416 }, { "epoch": 89.89180327868853, "grad_norm": 2.9270286560058594, "learning_rate": 5.311183400998355e-07, "loss": 0.1057, "step": 27417 }, { "epoch": 89.89508196721312, "grad_norm": 2.1410272121429443, "learning_rate": 5.307769298577703e-07, "loss": 0.1204, "step": 27418 }, { "epoch": 89.8983606557377, "grad_norm": 2.447328567504883, "learning_rate": 5.304356263910393e-07, "loss": 0.2123, "step": 27419 }, { "epoch": 89.90163934426229, "grad_norm": 3.400057792663574, "learning_rate": 5.300944297034927e-07, "loss": 0.1716, "step": 27420 }, { "epoch": 89.90491803278688, "grad_norm": 38.81071853637695, "learning_rate": 5.297533397989785e-07, "loss": 0.0846, "step": 27421 }, { "epoch": 89.90819672131147, "grad_norm": 2.5018324851989746, "learning_rate": 5.294123566813425e-07, "loss": 0.085, "step": 27422 }, { "epoch": 89.91147540983607, "grad_norm": 3.019191026687622, "learning_rate": 5.290714803544284e-07, "loss": 0.2688, "step": 27423 }, { "epoch": 89.91475409836066, "grad_norm": 2.350911855697632, "learning_rate": 5.287307108220796e-07, "loss": 0.2191, "step": 27424 }, { "epoch": 89.91803278688525, "grad_norm": 3.136544942855835, "learning_rate": 5.28390048088141e-07, "loss": 0.1113, "step": 27425 }, { "epoch": 89.92131147540984, "grad_norm": 1.6070647239685059, "learning_rate": 5.280494921564527e-07, "loss": 0.0319, "step": 27426 }, { "epoch": 89.92459016393443, "grad_norm": 1.9015312194824219, "learning_rate": 5.27709043030854e-07, "loss": 0.1152, "step": 27427 }, { "epoch": 89.92786885245901, "grad_norm": 1.635533094406128, "learning_rate": 5.27368700715184e-07, "loss": 0.0686, "step": 27428 }, { "epoch": 89.9311475409836, "grad_norm": 1.8516801595687866, "learning_rate": 5.270284652132829e-07, "loss": 0.1068, "step": 27429 }, { "epoch": 89.93442622950819, "grad_norm": 1.7385867834091187, "learning_rate": 5.266883365289844e-07, "loss": 0.0992, "step": 27430 }, { "epoch": 89.9377049180328, "grad_norm": 3.196249485015869, "learning_rate": 5.263483146661242e-07, "loss": 0.2402, "step": 27431 }, { "epoch": 89.94098360655738, "grad_norm": 2.4176747798919678, "learning_rate": 5.260083996285359e-07, "loss": 0.0608, "step": 27432 }, { "epoch": 89.94426229508197, "grad_norm": 2.719337224960327, "learning_rate": 5.256685914200555e-07, "loss": 0.0986, "step": 27433 }, { "epoch": 89.94754098360656, "grad_norm": 2.6463944911956787, "learning_rate": 5.25328890044512e-07, "loss": 0.0783, "step": 27434 }, { "epoch": 89.95081967213115, "grad_norm": 2.64052677154541, "learning_rate": 5.249892955057368e-07, "loss": 0.1712, "step": 27435 }, { "epoch": 89.95409836065573, "grad_norm": 2.2742958068847656, "learning_rate": 5.246498078075579e-07, "loss": 0.1423, "step": 27436 }, { "epoch": 89.95737704918032, "grad_norm": 2.350618839263916, "learning_rate": 5.243104269538035e-07, "loss": 0.1141, "step": 27437 }, { "epoch": 89.96065573770491, "grad_norm": 2.018988609313965, "learning_rate": 5.239711529483027e-07, "loss": 0.0375, "step": 27438 }, { "epoch": 89.96393442622951, "grad_norm": 1.7852760553359985, "learning_rate": 5.236319857948802e-07, "loss": 0.0339, "step": 27439 }, { "epoch": 89.9672131147541, "grad_norm": 2.0035202503204346, "learning_rate": 5.232929254973595e-07, "loss": 0.1076, "step": 27440 }, { "epoch": 89.97049180327869, "grad_norm": 7.546799659729004, "learning_rate": 5.229539720595634e-07, "loss": 0.0326, "step": 27441 }, { "epoch": 89.97377049180328, "grad_norm": 2.126035451889038, "learning_rate": 5.226151254853152e-07, "loss": 0.0433, "step": 27442 }, { "epoch": 89.97704918032787, "grad_norm": 2.5377800464630127, "learning_rate": 5.222763857784364e-07, "loss": 0.1085, "step": 27443 }, { "epoch": 89.98032786885246, "grad_norm": 2.4431748390197754, "learning_rate": 5.219377529427461e-07, "loss": 0.095, "step": 27444 }, { "epoch": 89.98360655737704, "grad_norm": 2.327075481414795, "learning_rate": 5.215992269820602e-07, "loss": 0.041, "step": 27445 }, { "epoch": 89.98688524590163, "grad_norm": 2.3116917610168457, "learning_rate": 5.212608079001991e-07, "loss": 0.1547, "step": 27446 }, { "epoch": 89.99016393442623, "grad_norm": 2.1992385387420654, "learning_rate": 5.209224957009785e-07, "loss": 0.186, "step": 27447 }, { "epoch": 89.99344262295082, "grad_norm": 2.836360454559326, "learning_rate": 5.205842903882108e-07, "loss": 0.1298, "step": 27448 }, { "epoch": 89.99672131147541, "grad_norm": 2.315814971923828, "learning_rate": 5.202461919657131e-07, "loss": 0.1696, "step": 27449 }, { "epoch": 90.0, "grad_norm": 1.7996597290039062, "learning_rate": 5.199082004372958e-07, "loss": 0.0966, "step": 27450 }, { "epoch": 90.00327868852459, "grad_norm": 2.303461790084839, "learning_rate": 5.195703158067689e-07, "loss": 0.2927, "step": 27451 }, { "epoch": 90.00655737704918, "grad_norm": 1.6446009874343872, "learning_rate": 5.192325380779461e-07, "loss": 0.0356, "step": 27452 }, { "epoch": 90.00983606557377, "grad_norm": 1.994246482849121, "learning_rate": 5.188948672546335e-07, "loss": 0.117, "step": 27453 }, { "epoch": 90.01311475409837, "grad_norm": 3.2143969535827637, "learning_rate": 5.185573033406388e-07, "loss": 0.1371, "step": 27454 }, { "epoch": 90.01639344262296, "grad_norm": 2.469339609146118, "learning_rate": 5.18219846339767e-07, "loss": 0.1787, "step": 27455 }, { "epoch": 90.01967213114754, "grad_norm": 1.6266840696334839, "learning_rate": 5.178824962558271e-07, "loss": 0.0252, "step": 27456 }, { "epoch": 90.02295081967213, "grad_norm": 2.289875030517578, "learning_rate": 5.175452530926206e-07, "loss": 0.0313, "step": 27457 }, { "epoch": 90.02622950819672, "grad_norm": 1.8386223316192627, "learning_rate": 5.1720811685395e-07, "loss": 0.0302, "step": 27458 }, { "epoch": 90.02950819672131, "grad_norm": 1.6116602420806885, "learning_rate": 5.168710875436178e-07, "loss": 0.0926, "step": 27459 }, { "epoch": 90.0327868852459, "grad_norm": 2.422379493713379, "learning_rate": 5.165341651654243e-07, "loss": 0.0971, "step": 27460 }, { "epoch": 90.03606557377049, "grad_norm": 2.9332165718078613, "learning_rate": 5.161973497231687e-07, "loss": 0.1133, "step": 27461 }, { "epoch": 90.03934426229509, "grad_norm": 1.8585175275802612, "learning_rate": 5.158606412206491e-07, "loss": 0.0516, "step": 27462 }, { "epoch": 90.04262295081968, "grad_norm": 2.8933472633361816, "learning_rate": 5.155240396616601e-07, "loss": 0.0807, "step": 27463 }, { "epoch": 90.04590163934427, "grad_norm": 2.409034490585327, "learning_rate": 5.151875450499999e-07, "loss": 0.1024, "step": 27464 }, { "epoch": 90.04918032786885, "grad_norm": 2.335493803024292, "learning_rate": 5.148511573894621e-07, "loss": 0.0978, "step": 27465 }, { "epoch": 90.05245901639344, "grad_norm": 2.4697866439819336, "learning_rate": 5.145148766838404e-07, "loss": 0.0546, "step": 27466 }, { "epoch": 90.05573770491803, "grad_norm": 3.207336902618408, "learning_rate": 5.141787029369238e-07, "loss": 0.1246, "step": 27467 }, { "epoch": 90.05901639344262, "grad_norm": 3.063589334487915, "learning_rate": 5.138426361525062e-07, "loss": 0.1578, "step": 27468 }, { "epoch": 90.0622950819672, "grad_norm": 2.4418253898620605, "learning_rate": 5.135066763343765e-07, "loss": 0.0958, "step": 27469 }, { "epoch": 90.06557377049181, "grad_norm": 2.658055067062378, "learning_rate": 5.13170823486322e-07, "loss": 0.1819, "step": 27470 }, { "epoch": 90.0688524590164, "grad_norm": 2.5527360439300537, "learning_rate": 5.128350776121294e-07, "loss": 0.0807, "step": 27471 }, { "epoch": 90.07213114754099, "grad_norm": 2.1352579593658447, "learning_rate": 5.124994387155868e-07, "loss": 0.0767, "step": 27472 }, { "epoch": 90.07540983606557, "grad_norm": 2.526240587234497, "learning_rate": 5.121639068004769e-07, "loss": 0.261, "step": 27473 }, { "epoch": 90.07868852459016, "grad_norm": 1.949692964553833, "learning_rate": 5.118284818705843e-07, "loss": 0.0664, "step": 27474 }, { "epoch": 90.08196721311475, "grad_norm": 2.4527971744537354, "learning_rate": 5.114931639296916e-07, "loss": 0.11, "step": 27475 }, { "epoch": 90.08524590163934, "grad_norm": 2.575509548187256, "learning_rate": 5.111579529815768e-07, "loss": 0.1085, "step": 27476 }, { "epoch": 90.08852459016393, "grad_norm": 2.9734768867492676, "learning_rate": 5.108228490300227e-07, "loss": 0.1484, "step": 27477 }, { "epoch": 90.09180327868853, "grad_norm": 1.9584404230117798, "learning_rate": 5.104878520788082e-07, "loss": 0.1283, "step": 27478 }, { "epoch": 90.09508196721312, "grad_norm": 2.9649932384490967, "learning_rate": 5.101529621317103e-07, "loss": 0.1098, "step": 27479 }, { "epoch": 90.09836065573771, "grad_norm": 2.611516237258911, "learning_rate": 5.098181791925016e-07, "loss": 0.2163, "step": 27480 }, { "epoch": 90.1016393442623, "grad_norm": 2.412214994430542, "learning_rate": 5.094835032649637e-07, "loss": 0.1158, "step": 27481 }, { "epoch": 90.10491803278688, "grad_norm": 2.6730434894561768, "learning_rate": 5.091489343528655e-07, "loss": 0.2377, "step": 27482 }, { "epoch": 90.10819672131147, "grad_norm": 2.082759380340576, "learning_rate": 5.08814472459982e-07, "loss": 0.1977, "step": 27483 }, { "epoch": 90.11147540983606, "grad_norm": 1.8653539419174194, "learning_rate": 5.084801175900811e-07, "loss": 0.1588, "step": 27484 }, { "epoch": 90.11475409836065, "grad_norm": 2.783338785171509, "learning_rate": 5.081458697469377e-07, "loss": 0.1258, "step": 27485 }, { "epoch": 90.11803278688525, "grad_norm": 1.8510359525680542, "learning_rate": 5.078117289343188e-07, "loss": 0.1852, "step": 27486 }, { "epoch": 90.12131147540984, "grad_norm": 2.0204341411590576, "learning_rate": 5.074776951559923e-07, "loss": 0.0552, "step": 27487 }, { "epoch": 90.12459016393443, "grad_norm": 2.036987781524658, "learning_rate": 5.071437684157243e-07, "loss": 0.0726, "step": 27488 }, { "epoch": 90.12786885245902, "grad_norm": 1.5226813554763794, "learning_rate": 5.068099487172785e-07, "loss": 0.0771, "step": 27489 }, { "epoch": 90.1311475409836, "grad_norm": 2.014302968978882, "learning_rate": 5.064762360644226e-07, "loss": 0.0279, "step": 27490 }, { "epoch": 90.1344262295082, "grad_norm": 2.3204848766326904, "learning_rate": 5.061426304609184e-07, "loss": 0.1731, "step": 27491 }, { "epoch": 90.13770491803278, "grad_norm": 2.1851003170013428, "learning_rate": 5.058091319105263e-07, "loss": 0.1585, "step": 27492 }, { "epoch": 90.14098360655737, "grad_norm": 1.8394652605056763, "learning_rate": 5.054757404170074e-07, "loss": 0.1057, "step": 27493 }, { "epoch": 90.14426229508197, "grad_norm": 2.7357258796691895, "learning_rate": 5.051424559841223e-07, "loss": 0.0995, "step": 27494 }, { "epoch": 90.14754098360656, "grad_norm": 2.2520735263824463, "learning_rate": 5.048092786156278e-07, "loss": 0.1404, "step": 27495 }, { "epoch": 90.15081967213115, "grad_norm": 4.314436912536621, "learning_rate": 5.044762083152821e-07, "loss": 0.2206, "step": 27496 }, { "epoch": 90.15409836065574, "grad_norm": 3.237809896469116, "learning_rate": 5.041432450868377e-07, "loss": 0.135, "step": 27497 }, { "epoch": 90.15737704918033, "grad_norm": 2.079493522644043, "learning_rate": 5.038103889340529e-07, "loss": 0.174, "step": 27498 }, { "epoch": 90.16065573770491, "grad_norm": 2.5709004402160645, "learning_rate": 5.0347763986068e-07, "loss": 0.1767, "step": 27499 }, { "epoch": 90.1639344262295, "grad_norm": 2.452578544616699, "learning_rate": 5.031449978704705e-07, "loss": 0.084, "step": 27500 }, { "epoch": 90.1672131147541, "grad_norm": 2.5439705848693848, "learning_rate": 5.028124629671737e-07, "loss": 0.0592, "step": 27501 }, { "epoch": 90.1704918032787, "grad_norm": 1.8297377824783325, "learning_rate": 5.024800351545423e-07, "loss": 0.1281, "step": 27502 }, { "epoch": 90.17377049180328, "grad_norm": 2.0682501792907715, "learning_rate": 5.02147714436324e-07, "loss": 0.1294, "step": 27503 }, { "epoch": 90.17704918032787, "grad_norm": 2.1108858585357666, "learning_rate": 5.01815500816264e-07, "loss": 0.0535, "step": 27504 }, { "epoch": 90.18032786885246, "grad_norm": 2.25964617729187, "learning_rate": 5.014833942981112e-07, "loss": 0.1413, "step": 27505 }, { "epoch": 90.18360655737705, "grad_norm": 2.3127925395965576, "learning_rate": 5.011513948856083e-07, "loss": 0.133, "step": 27506 }, { "epoch": 90.18688524590164, "grad_norm": 2.402604103088379, "learning_rate": 5.008195025824991e-07, "loss": 0.1233, "step": 27507 }, { "epoch": 90.19016393442622, "grad_norm": 1.7336933612823486, "learning_rate": 5.004877173925282e-07, "loss": 0.0258, "step": 27508 }, { "epoch": 90.19344262295083, "grad_norm": 3.0264859199523926, "learning_rate": 5.00156039319436e-07, "loss": 0.1331, "step": 27509 }, { "epoch": 90.19672131147541, "grad_norm": 2.4209437370300293, "learning_rate": 4.998244683669595e-07, "loss": 0.071, "step": 27510 }, { "epoch": 90.2, "grad_norm": 2.124575614929199, "learning_rate": 4.994930045388414e-07, "loss": 0.1245, "step": 27511 }, { "epoch": 90.20327868852459, "grad_norm": 1.9027231931686401, "learning_rate": 4.991616478388173e-07, "loss": 0.0254, "step": 27512 }, { "epoch": 90.20655737704918, "grad_norm": 2.212862253189087, "learning_rate": 4.988303982706244e-07, "loss": 0.0712, "step": 27513 }, { "epoch": 90.20983606557377, "grad_norm": 2.550314426422119, "learning_rate": 4.984992558379976e-07, "loss": 0.0884, "step": 27514 }, { "epoch": 90.21311475409836, "grad_norm": 2.809446096420288, "learning_rate": 4.981682205446692e-07, "loss": 0.1026, "step": 27515 }, { "epoch": 90.21639344262294, "grad_norm": 2.3288414478302, "learning_rate": 4.978372923943742e-07, "loss": 0.0565, "step": 27516 }, { "epoch": 90.21967213114755, "grad_norm": 1.9122111797332764, "learning_rate": 4.975064713908451e-07, "loss": 0.0505, "step": 27517 }, { "epoch": 90.22295081967214, "grad_norm": 2.7063539028167725, "learning_rate": 4.971757575378089e-07, "loss": 0.0842, "step": 27518 }, { "epoch": 90.22622950819672, "grad_norm": 2.298280715942383, "learning_rate": 4.96845150838996e-07, "loss": 0.0479, "step": 27519 }, { "epoch": 90.22950819672131, "grad_norm": 2.847702741622925, "learning_rate": 4.965146512981367e-07, "loss": 0.1069, "step": 27520 }, { "epoch": 90.2327868852459, "grad_norm": 2.003354787826538, "learning_rate": 4.961842589189559e-07, "loss": 0.0768, "step": 27521 }, { "epoch": 90.23606557377049, "grad_norm": 2.3997724056243896, "learning_rate": 4.958539737051782e-07, "loss": 0.0827, "step": 27522 }, { "epoch": 90.23934426229508, "grad_norm": 2.0173516273498535, "learning_rate": 4.955237956605274e-07, "loss": 0.1397, "step": 27523 }, { "epoch": 90.24262295081967, "grad_norm": 4.198952674865723, "learning_rate": 4.951937247887295e-07, "loss": 0.0622, "step": 27524 }, { "epoch": 90.24590163934427, "grad_norm": 2.1559815406799316, "learning_rate": 4.948637610935058e-07, "loss": 0.0783, "step": 27525 }, { "epoch": 90.24918032786886, "grad_norm": 2.019587755203247, "learning_rate": 4.945339045785747e-07, "loss": 0.076, "step": 27526 }, { "epoch": 90.25245901639344, "grad_norm": 2.690021276473999, "learning_rate": 4.942041552476585e-07, "loss": 0.078, "step": 27527 }, { "epoch": 90.25573770491803, "grad_norm": 2.745496988296509, "learning_rate": 4.938745131044708e-07, "loss": 0.1847, "step": 27528 }, { "epoch": 90.25901639344262, "grad_norm": 2.2959961891174316, "learning_rate": 4.935449781527346e-07, "loss": 0.1423, "step": 27529 }, { "epoch": 90.26229508196721, "grad_norm": 2.544966220855713, "learning_rate": 4.932155503961621e-07, "loss": 0.1416, "step": 27530 }, { "epoch": 90.2655737704918, "grad_norm": 2.350891351699829, "learning_rate": 4.928862298384695e-07, "loss": 0.1032, "step": 27531 }, { "epoch": 90.26885245901639, "grad_norm": 3.099447011947632, "learning_rate": 4.925570164833681e-07, "loss": 0.0763, "step": 27532 }, { "epoch": 90.27213114754099, "grad_norm": 2.3977580070495605, "learning_rate": 4.922279103345729e-07, "loss": 0.1964, "step": 27533 }, { "epoch": 90.27540983606558, "grad_norm": 2.207655668258667, "learning_rate": 4.918989113957939e-07, "loss": 0.0622, "step": 27534 }, { "epoch": 90.27868852459017, "grad_norm": 1.7803051471710205, "learning_rate": 4.915700196707407e-07, "loss": 0.0579, "step": 27535 }, { "epoch": 90.28196721311475, "grad_norm": 2.3367319107055664, "learning_rate": 4.912412351631202e-07, "loss": 0.0913, "step": 27536 }, { "epoch": 90.28524590163934, "grad_norm": 1.868190884590149, "learning_rate": 4.909125578766427e-07, "loss": 0.1448, "step": 27537 }, { "epoch": 90.28852459016393, "grad_norm": 2.2014591693878174, "learning_rate": 4.905839878150131e-07, "loss": 0.0662, "step": 27538 }, { "epoch": 90.29180327868852, "grad_norm": 2.959887981414795, "learning_rate": 4.902555249819363e-07, "loss": 0.1766, "step": 27539 }, { "epoch": 90.29508196721312, "grad_norm": 2.2397799491882324, "learning_rate": 4.899271693811159e-07, "loss": 0.2051, "step": 27540 }, { "epoch": 90.29836065573771, "grad_norm": 2.852663516998291, "learning_rate": 4.895989210162532e-07, "loss": 0.1377, "step": 27541 }, { "epoch": 90.3016393442623, "grad_norm": 2.576869010925293, "learning_rate": 4.892707798910535e-07, "loss": 0.0879, "step": 27542 }, { "epoch": 90.30491803278689, "grad_norm": 3.3494813442230225, "learning_rate": 4.889427460092133e-07, "loss": 0.1619, "step": 27543 }, { "epoch": 90.30819672131148, "grad_norm": 2.7576401233673096, "learning_rate": 4.886148193744333e-07, "loss": 0.0943, "step": 27544 }, { "epoch": 90.31147540983606, "grad_norm": 2.5048718452453613, "learning_rate": 4.882869999904083e-07, "loss": 0.1112, "step": 27545 }, { "epoch": 90.31475409836065, "grad_norm": 2.5160491466522217, "learning_rate": 4.879592878608396e-07, "loss": 0.1421, "step": 27546 }, { "epoch": 90.31803278688524, "grad_norm": 1.7056435346603394, "learning_rate": 4.87631682989419e-07, "loss": 0.028, "step": 27547 }, { "epoch": 90.32131147540984, "grad_norm": 1.4205710887908936, "learning_rate": 4.873041853798421e-07, "loss": 0.0215, "step": 27548 }, { "epoch": 90.32459016393443, "grad_norm": 2.3107900619506836, "learning_rate": 4.869767950357995e-07, "loss": 0.0552, "step": 27549 }, { "epoch": 90.32786885245902, "grad_norm": 2.15339994430542, "learning_rate": 4.866495119609871e-07, "loss": 0.0602, "step": 27550 }, { "epoch": 90.33114754098361, "grad_norm": 1.7895350456237793, "learning_rate": 4.863223361590919e-07, "loss": 0.0741, "step": 27551 }, { "epoch": 90.3344262295082, "grad_norm": 1.8072214126586914, "learning_rate": 4.859952676338042e-07, "loss": 0.0837, "step": 27552 }, { "epoch": 90.33770491803278, "grad_norm": 2.3586792945861816, "learning_rate": 4.856683063888101e-07, "loss": 0.123, "step": 27553 }, { "epoch": 90.34098360655737, "grad_norm": 1.9142032861709595, "learning_rate": 4.853414524278e-07, "loss": 0.124, "step": 27554 }, { "epoch": 90.34426229508196, "grad_norm": 1.8674713373184204, "learning_rate": 4.850147057544585e-07, "loss": 0.1282, "step": 27555 }, { "epoch": 90.34754098360656, "grad_norm": 1.7842766046524048, "learning_rate": 4.846880663724685e-07, "loss": 0.0641, "step": 27556 }, { "epoch": 90.35081967213115, "grad_norm": 2.444403648376465, "learning_rate": 4.843615342855123e-07, "loss": 0.1097, "step": 27557 }, { "epoch": 90.35409836065574, "grad_norm": 1.8247672319412231, "learning_rate": 4.840351094972761e-07, "loss": 0.1314, "step": 27558 }, { "epoch": 90.35737704918033, "grad_norm": 1.4828420877456665, "learning_rate": 4.837087920114369e-07, "loss": 0.0322, "step": 27559 }, { "epoch": 90.36065573770492, "grad_norm": 2.241145610809326, "learning_rate": 4.833825818316751e-07, "loss": 0.0885, "step": 27560 }, { "epoch": 90.3639344262295, "grad_norm": 2.8120946884155273, "learning_rate": 4.83056478961671e-07, "loss": 0.0844, "step": 27561 }, { "epoch": 90.3672131147541, "grad_norm": 2.0612714290618896, "learning_rate": 4.827304834050994e-07, "loss": 0.091, "step": 27562 }, { "epoch": 90.37049180327868, "grad_norm": 1.7789124250411987, "learning_rate": 4.824045951656364e-07, "loss": 0.0595, "step": 27563 }, { "epoch": 90.37377049180328, "grad_norm": 2.9883954524993896, "learning_rate": 4.820788142469579e-07, "loss": 0.086, "step": 27564 }, { "epoch": 90.37704918032787, "grad_norm": 2.79996395111084, "learning_rate": 4.817531406527376e-07, "loss": 0.1424, "step": 27565 }, { "epoch": 90.38032786885246, "grad_norm": 2.0096230506896973, "learning_rate": 4.81427574386647e-07, "loss": 0.2251, "step": 27566 }, { "epoch": 90.38360655737705, "grad_norm": 1.867224931716919, "learning_rate": 4.811021154523566e-07, "loss": 0.1546, "step": 27567 }, { "epoch": 90.38688524590164, "grad_norm": 2.5241730213165283, "learning_rate": 4.807767638535376e-07, "loss": 0.1057, "step": 27568 }, { "epoch": 90.39016393442623, "grad_norm": 1.7118372917175293, "learning_rate": 4.804515195938586e-07, "loss": 0.036, "step": 27569 }, { "epoch": 90.39344262295081, "grad_norm": 2.683182716369629, "learning_rate": 4.801263826769864e-07, "loss": 0.0761, "step": 27570 }, { "epoch": 90.3967213114754, "grad_norm": 2.1130545139312744, "learning_rate": 4.798013531065859e-07, "loss": 0.1125, "step": 27571 }, { "epoch": 90.4, "grad_norm": 2.213613271713257, "learning_rate": 4.794764308863242e-07, "loss": 0.1703, "step": 27572 }, { "epoch": 90.4032786885246, "grad_norm": 3.1479172706604004, "learning_rate": 4.791516160198661e-07, "loss": 0.139, "step": 27573 }, { "epoch": 90.40655737704918, "grad_norm": 2.1962852478027344, "learning_rate": 4.788269085108721e-07, "loss": 0.1061, "step": 27574 }, { "epoch": 90.40983606557377, "grad_norm": 2.573413610458374, "learning_rate": 4.785023083630025e-07, "loss": 0.1365, "step": 27575 }, { "epoch": 90.41311475409836, "grad_norm": 2.6270341873168945, "learning_rate": 4.78177815579921e-07, "loss": 0.0845, "step": 27576 }, { "epoch": 90.41639344262295, "grad_norm": 2.0817604064941406, "learning_rate": 4.778534301652849e-07, "loss": 0.1287, "step": 27577 }, { "epoch": 90.41967213114754, "grad_norm": 6.323949337005615, "learning_rate": 4.775291521227521e-07, "loss": 0.0652, "step": 27578 }, { "epoch": 90.42295081967212, "grad_norm": 18.883182525634766, "learning_rate": 4.772049814559787e-07, "loss": 0.0362, "step": 27579 }, { "epoch": 90.42622950819673, "grad_norm": 2.3753812313079834, "learning_rate": 4.768809181686185e-07, "loss": 0.0538, "step": 27580 }, { "epoch": 90.42950819672132, "grad_norm": 2.304006338119507, "learning_rate": 4.7655696226432957e-07, "loss": 0.2329, "step": 27581 }, { "epoch": 90.4327868852459, "grad_norm": 1.9057807922363281, "learning_rate": 4.762331137467624e-07, "loss": 0.1593, "step": 27582 }, { "epoch": 90.43606557377049, "grad_norm": 1.9238795042037964, "learning_rate": 4.759093726195696e-07, "loss": 0.043, "step": 27583 }, { "epoch": 90.43934426229508, "grad_norm": 2.296276330947876, "learning_rate": 4.7558573888639937e-07, "loss": 0.0887, "step": 27584 }, { "epoch": 90.44262295081967, "grad_norm": 1.6104183197021484, "learning_rate": 4.752622125509043e-07, "loss": 0.1018, "step": 27585 }, { "epoch": 90.44590163934426, "grad_norm": 2.3889710903167725, "learning_rate": 4.749387936167316e-07, "loss": 0.1474, "step": 27586 }, { "epoch": 90.44918032786886, "grad_norm": 3.0992465019226074, "learning_rate": 4.7461548208752706e-07, "loss": 0.304, "step": 27587 }, { "epoch": 90.45245901639345, "grad_norm": 2.1823136806488037, "learning_rate": 4.7429227796693564e-07, "loss": 0.0966, "step": 27588 }, { "epoch": 90.45573770491804, "grad_norm": 2.0061328411102295, "learning_rate": 4.7396918125860445e-07, "loss": 0.0658, "step": 27589 }, { "epoch": 90.45901639344262, "grad_norm": 2.7489583492279053, "learning_rate": 4.73646191966175e-07, "loss": 0.0614, "step": 27590 }, { "epoch": 90.46229508196721, "grad_norm": 3.4018137454986572, "learning_rate": 4.7332331009328993e-07, "loss": 0.0687, "step": 27591 }, { "epoch": 90.4655737704918, "grad_norm": 1.9419772624969482, "learning_rate": 4.730005356435896e-07, "loss": 0.0402, "step": 27592 }, { "epoch": 90.46885245901639, "grad_norm": 2.135468006134033, "learning_rate": 4.726778686207123e-07, "loss": 0.065, "step": 27593 }, { "epoch": 90.47213114754098, "grad_norm": 2.4498345851898193, "learning_rate": 4.7235530902829954e-07, "loss": 0.1022, "step": 27594 }, { "epoch": 90.47540983606558, "grad_norm": 1.6866848468780518, "learning_rate": 4.7203285686998723e-07, "loss": 0.0791, "step": 27595 }, { "epoch": 90.47868852459017, "grad_norm": 2.6542530059814453, "learning_rate": 4.7171051214941146e-07, "loss": 0.1081, "step": 27596 }, { "epoch": 90.48196721311476, "grad_norm": 4.058559894561768, "learning_rate": 4.713882748702048e-07, "loss": 0.0912, "step": 27597 }, { "epoch": 90.48524590163935, "grad_norm": 1.5611920356750488, "learning_rate": 4.7106614503600323e-07, "loss": 0.1834, "step": 27598 }, { "epoch": 90.48852459016393, "grad_norm": 1.722636342048645, "learning_rate": 4.707441226504395e-07, "loss": 0.0381, "step": 27599 }, { "epoch": 90.49180327868852, "grad_norm": 2.4969849586486816, "learning_rate": 4.7042220771714273e-07, "loss": 0.0785, "step": 27600 }, { "epoch": 90.49508196721311, "grad_norm": 1.49663507938385, "learning_rate": 4.7010040023974355e-07, "loss": 0.0387, "step": 27601 }, { "epoch": 90.4983606557377, "grad_norm": 3.1111176013946533, "learning_rate": 4.6977870022187124e-07, "loss": 0.1705, "step": 27602 }, { "epoch": 90.5016393442623, "grad_norm": 1.9309214353561401, "learning_rate": 4.694571076671539e-07, "loss": 0.0386, "step": 27603 }, { "epoch": 90.50491803278689, "grad_norm": 2.344998836517334, "learning_rate": 4.691356225792165e-07, "loss": 0.1647, "step": 27604 }, { "epoch": 90.50819672131148, "grad_norm": 2.0398805141448975, "learning_rate": 4.68814244961685e-07, "loss": 0.1602, "step": 27605 }, { "epoch": 90.51147540983607, "grad_norm": 2.5230188369750977, "learning_rate": 4.68492974818181e-07, "loss": 0.1709, "step": 27606 }, { "epoch": 90.51475409836065, "grad_norm": 2.368718147277832, "learning_rate": 4.681718121523304e-07, "loss": 0.1384, "step": 27607 }, { "epoch": 90.51803278688524, "grad_norm": 2.8605329990386963, "learning_rate": 4.678507569677537e-07, "loss": 0.1536, "step": 27608 }, { "epoch": 90.52131147540983, "grad_norm": 2.7138071060180664, "learning_rate": 4.6752980926806915e-07, "loss": 0.1231, "step": 27609 }, { "epoch": 90.52459016393442, "grad_norm": 1.6474885940551758, "learning_rate": 4.6720896905689815e-07, "loss": 0.051, "step": 27610 }, { "epoch": 90.52786885245902, "grad_norm": 2.54075026512146, "learning_rate": 4.6688823633785796e-07, "loss": 0.1287, "step": 27611 }, { "epoch": 90.53114754098361, "grad_norm": 2.359328269958496, "learning_rate": 4.6656761111456337e-07, "loss": 0.0713, "step": 27612 }, { "epoch": 90.5344262295082, "grad_norm": 2.9416186809539795, "learning_rate": 4.6624709339063267e-07, "loss": 0.0992, "step": 27613 }, { "epoch": 90.53770491803279, "grad_norm": 2.038228750228882, "learning_rate": 4.659266831696796e-07, "loss": 0.101, "step": 27614 }, { "epoch": 90.54098360655738, "grad_norm": 1.981541395187378, "learning_rate": 4.656063804553135e-07, "loss": 0.1186, "step": 27615 }, { "epoch": 90.54426229508196, "grad_norm": 2.46707820892334, "learning_rate": 4.6528618525115034e-07, "loss": 0.0657, "step": 27616 }, { "epoch": 90.54754098360655, "grad_norm": 2.112752676010132, "learning_rate": 4.649660975607995e-07, "loss": 0.071, "step": 27617 }, { "epoch": 90.55081967213114, "grad_norm": 2.3175299167633057, "learning_rate": 4.646461173878691e-07, "loss": 0.277, "step": 27618 }, { "epoch": 90.55409836065574, "grad_norm": 2.5232908725738525, "learning_rate": 4.6432624473596756e-07, "loss": 0.055, "step": 27619 }, { "epoch": 90.55737704918033, "grad_norm": 2.1061649322509766, "learning_rate": 4.6400647960870294e-07, "loss": 0.1158, "step": 27620 }, { "epoch": 90.56065573770492, "grad_norm": 1.941404104232788, "learning_rate": 4.6368682200968016e-07, "loss": 0.1154, "step": 27621 }, { "epoch": 90.56393442622951, "grad_norm": 2.256943941116333, "learning_rate": 4.633672719425042e-07, "loss": 0.088, "step": 27622 }, { "epoch": 90.5672131147541, "grad_norm": 2.209904193878174, "learning_rate": 4.6304782941077654e-07, "loss": 0.061, "step": 27623 }, { "epoch": 90.57049180327868, "grad_norm": 3.3068020343780518, "learning_rate": 4.6272849441810097e-07, "loss": 0.1059, "step": 27624 }, { "epoch": 90.57377049180327, "grad_norm": 2.629026412963867, "learning_rate": 4.624092669680791e-07, "loss": 0.1466, "step": 27625 }, { "epoch": 90.57704918032788, "grad_norm": 2.039994478225708, "learning_rate": 4.620901470643091e-07, "loss": 0.0573, "step": 27626 }, { "epoch": 90.58032786885246, "grad_norm": 2.3498780727386475, "learning_rate": 4.6177113471038813e-07, "loss": 0.0796, "step": 27627 }, { "epoch": 90.58360655737705, "grad_norm": 2.0065829753875732, "learning_rate": 4.6145222990991554e-07, "loss": 0.1531, "step": 27628 }, { "epoch": 90.58688524590164, "grad_norm": 3.653799057006836, "learning_rate": 4.6113343266648735e-07, "loss": 0.2273, "step": 27629 }, { "epoch": 90.59016393442623, "grad_norm": 1.9275425672531128, "learning_rate": 4.608147429836973e-07, "loss": 0.0976, "step": 27630 }, { "epoch": 90.59344262295082, "grad_norm": 1.902726173400879, "learning_rate": 4.6049616086513925e-07, "loss": 0.0354, "step": 27631 }, { "epoch": 90.5967213114754, "grad_norm": 2.0093095302581787, "learning_rate": 4.601776863144047e-07, "loss": 0.1996, "step": 27632 }, { "epoch": 90.6, "grad_norm": 2.324683427810669, "learning_rate": 4.5985931933508757e-07, "loss": 0.2143, "step": 27633 }, { "epoch": 90.6032786885246, "grad_norm": 2.1976609230041504, "learning_rate": 4.595410599307748e-07, "loss": 0.0977, "step": 27634 }, { "epoch": 90.60655737704919, "grad_norm": 2.6639747619628906, "learning_rate": 4.592229081050559e-07, "loss": 0.0936, "step": 27635 }, { "epoch": 90.60983606557377, "grad_norm": 3.0544607639312744, "learning_rate": 4.5890486386151787e-07, "loss": 0.0986, "step": 27636 }, { "epoch": 90.61311475409836, "grad_norm": 2.5682919025421143, "learning_rate": 4.5858692720374907e-07, "loss": 0.287, "step": 27637 }, { "epoch": 90.61639344262295, "grad_norm": 1.9623260498046875, "learning_rate": 4.5826909813533326e-07, "loss": 0.0592, "step": 27638 }, { "epoch": 90.61967213114754, "grad_norm": 2.4002127647399902, "learning_rate": 4.5795137665985424e-07, "loss": 0.0723, "step": 27639 }, { "epoch": 90.62295081967213, "grad_norm": 2.1511762142181396, "learning_rate": 4.5763376278089353e-07, "loss": 0.1272, "step": 27640 }, { "epoch": 90.62622950819672, "grad_norm": 1.9695311784744263, "learning_rate": 4.5731625650203504e-07, "loss": 0.0592, "step": 27641 }, { "epoch": 90.62950819672132, "grad_norm": 1.9259119033813477, "learning_rate": 4.5699885782685806e-07, "loss": 0.0634, "step": 27642 }, { "epoch": 90.6327868852459, "grad_norm": 2.0016238689422607, "learning_rate": 4.5668156675894083e-07, "loss": 0.0459, "step": 27643 }, { "epoch": 90.6360655737705, "grad_norm": 2.8096020221710205, "learning_rate": 4.5636438330186053e-07, "loss": 0.1287, "step": 27644 }, { "epoch": 90.63934426229508, "grad_norm": 2.0543453693389893, "learning_rate": 4.560473074591942e-07, "loss": 0.1309, "step": 27645 }, { "epoch": 90.64262295081967, "grad_norm": 2.27803111076355, "learning_rate": 4.5573033923451915e-07, "loss": 0.0886, "step": 27646 }, { "epoch": 90.64590163934426, "grad_norm": 2.356567621231079, "learning_rate": 4.5541347863140794e-07, "loss": 0.1708, "step": 27647 }, { "epoch": 90.64918032786885, "grad_norm": 2.279548168182373, "learning_rate": 4.5509672565343443e-07, "loss": 0.0764, "step": 27648 }, { "epoch": 90.65245901639344, "grad_norm": 2.3518857955932617, "learning_rate": 4.5478008030416686e-07, "loss": 0.0696, "step": 27649 }, { "epoch": 90.65573770491804, "grad_norm": 2.3989391326904297, "learning_rate": 4.5446354258718017e-07, "loss": 0.1004, "step": 27650 }, { "epoch": 90.65901639344263, "grad_norm": 3.3387298583984375, "learning_rate": 4.541471125060426e-07, "loss": 0.1387, "step": 27651 }, { "epoch": 90.66229508196722, "grad_norm": 1.989269495010376, "learning_rate": 4.538307900643213e-07, "loss": 0.034, "step": 27652 }, { "epoch": 90.6655737704918, "grad_norm": 2.81368350982666, "learning_rate": 4.5351457526558116e-07, "loss": 0.1445, "step": 27653 }, { "epoch": 90.66885245901639, "grad_norm": 2.3851914405822754, "learning_rate": 4.531984681133916e-07, "loss": 0.0844, "step": 27654 }, { "epoch": 90.67213114754098, "grad_norm": 1.8484574556350708, "learning_rate": 4.5288246861131646e-07, "loss": 0.107, "step": 27655 }, { "epoch": 90.67540983606557, "grad_norm": 2.3906466960906982, "learning_rate": 4.525665767629173e-07, "loss": 0.0661, "step": 27656 }, { "epoch": 90.67868852459016, "grad_norm": 1.6443969011306763, "learning_rate": 4.5225079257175677e-07, "loss": 0.045, "step": 27657 }, { "epoch": 90.68196721311476, "grad_norm": 3.4500021934509277, "learning_rate": 4.5193511604139426e-07, "loss": 0.1074, "step": 27658 }, { "epoch": 90.68524590163935, "grad_norm": 3.019484281539917, "learning_rate": 4.516195471753926e-07, "loss": 0.1437, "step": 27659 }, { "epoch": 90.68852459016394, "grad_norm": 2.2591707706451416, "learning_rate": 4.513040859773088e-07, "loss": 0.1667, "step": 27660 }, { "epoch": 90.69180327868852, "grad_norm": 2.414269208908081, "learning_rate": 4.50988732450699e-07, "loss": 0.1268, "step": 27661 }, { "epoch": 90.69508196721311, "grad_norm": 2.295790195465088, "learning_rate": 4.5067348659911804e-07, "loss": 0.1818, "step": 27662 }, { "epoch": 90.6983606557377, "grad_norm": 1.7223562002182007, "learning_rate": 4.5035834842612423e-07, "loss": 0.0883, "step": 27663 }, { "epoch": 90.70163934426229, "grad_norm": 2.2140209674835205, "learning_rate": 4.5004331793526926e-07, "loss": 0.2116, "step": 27664 }, { "epoch": 90.70491803278688, "grad_norm": 2.29477858543396, "learning_rate": 4.4972839513010346e-07, "loss": 0.1018, "step": 27665 }, { "epoch": 90.70819672131148, "grad_norm": 7.669157028198242, "learning_rate": 4.494135800141808e-07, "loss": 0.0911, "step": 27666 }, { "epoch": 90.71147540983607, "grad_norm": 2.0997490882873535, "learning_rate": 4.4909887259105165e-07, "loss": 0.0569, "step": 27667 }, { "epoch": 90.71475409836066, "grad_norm": 1.711596131324768, "learning_rate": 4.4878427286425997e-07, "loss": 0.0345, "step": 27668 }, { "epoch": 90.71803278688525, "grad_norm": 2.541166305541992, "learning_rate": 4.484697808373595e-07, "loss": 0.0755, "step": 27669 }, { "epoch": 90.72131147540983, "grad_norm": 2.2836217880249023, "learning_rate": 4.4815539651389186e-07, "loss": 0.0948, "step": 27670 }, { "epoch": 90.72459016393442, "grad_norm": 2.037017345428467, "learning_rate": 4.478411198974031e-07, "loss": 0.0629, "step": 27671 }, { "epoch": 90.72786885245901, "grad_norm": 2.282221794128418, "learning_rate": 4.475269509914382e-07, "loss": 0.1636, "step": 27672 }, { "epoch": 90.73114754098361, "grad_norm": 3.2056150436401367, "learning_rate": 4.472128897995398e-07, "loss": 0.1496, "step": 27673 }, { "epoch": 90.7344262295082, "grad_norm": 2.1463353633880615, "learning_rate": 4.468989363252485e-07, "loss": 0.0471, "step": 27674 }, { "epoch": 90.73770491803279, "grad_norm": 2.7254295349121094, "learning_rate": 4.465850905721025e-07, "loss": 0.132, "step": 27675 }, { "epoch": 90.74098360655738, "grad_norm": 3.4975666999816895, "learning_rate": 4.462713525436435e-07, "loss": 0.1331, "step": 27676 }, { "epoch": 90.74426229508197, "grad_norm": 2.5387637615203857, "learning_rate": 4.4595772224340974e-07, "loss": 0.0861, "step": 27677 }, { "epoch": 90.74754098360656, "grad_norm": 2.880103588104248, "learning_rate": 4.45644199674935e-07, "loss": 0.2229, "step": 27678 }, { "epoch": 90.75081967213114, "grad_norm": 2.772780656814575, "learning_rate": 4.453307848417554e-07, "loss": 0.0797, "step": 27679 }, { "epoch": 90.75409836065573, "grad_norm": 2.589141607284546, "learning_rate": 4.45017477747407e-07, "loss": 0.0874, "step": 27680 }, { "epoch": 90.75737704918033, "grad_norm": 2.8710405826568604, "learning_rate": 4.447042783954214e-07, "loss": 0.1887, "step": 27681 }, { "epoch": 90.76065573770492, "grad_norm": 2.4493401050567627, "learning_rate": 4.4439118678932913e-07, "loss": 0.0657, "step": 27682 }, { "epoch": 90.76393442622951, "grad_norm": 1.4818905591964722, "learning_rate": 4.440782029326618e-07, "loss": 0.0206, "step": 27683 }, { "epoch": 90.7672131147541, "grad_norm": 1.7775449752807617, "learning_rate": 4.437653268289477e-07, "loss": 0.0417, "step": 27684 }, { "epoch": 90.77049180327869, "grad_norm": 2.526585102081299, "learning_rate": 4.434525584817162e-07, "loss": 0.0251, "step": 27685 }, { "epoch": 90.77377049180328, "grad_norm": 2.1604089736938477, "learning_rate": 4.431398978944945e-07, "loss": 0.0842, "step": 27686 }, { "epoch": 90.77704918032786, "grad_norm": 2.947417974472046, "learning_rate": 4.428273450708065e-07, "loss": 0.1324, "step": 27687 }, { "epoch": 90.78032786885245, "grad_norm": 1.6925737857818604, "learning_rate": 4.4251490001417594e-07, "loss": 0.0816, "step": 27688 }, { "epoch": 90.78360655737706, "grad_norm": 2.2536370754241943, "learning_rate": 4.422025627281279e-07, "loss": 0.0634, "step": 27689 }, { "epoch": 90.78688524590164, "grad_norm": 2.6375324726104736, "learning_rate": 4.418903332161839e-07, "loss": 0.0777, "step": 27690 }, { "epoch": 90.79016393442623, "grad_norm": 2.565168619155884, "learning_rate": 4.4157821148186453e-07, "loss": 0.2097, "step": 27691 }, { "epoch": 90.79344262295082, "grad_norm": 2.62084698677063, "learning_rate": 4.4126619752868695e-07, "loss": 0.2914, "step": 27692 }, { "epoch": 90.79672131147541, "grad_norm": 2.6388466358184814, "learning_rate": 4.4095429136017386e-07, "loss": 0.0987, "step": 27693 }, { "epoch": 90.8, "grad_norm": 2.7930033206939697, "learning_rate": 4.406424929798403e-07, "loss": 0.1787, "step": 27694 }, { "epoch": 90.80327868852459, "grad_norm": 1.5919657945632935, "learning_rate": 4.403308023912012e-07, "loss": 0.1539, "step": 27695 }, { "epoch": 90.80655737704917, "grad_norm": 2.227734088897705, "learning_rate": 4.400192195977715e-07, "loss": 0.1116, "step": 27696 }, { "epoch": 90.80983606557378, "grad_norm": 1.545201301574707, "learning_rate": 4.39707744603064e-07, "loss": 0.0344, "step": 27697 }, { "epoch": 90.81311475409836, "grad_norm": 2.342690944671631, "learning_rate": 4.393963774105936e-07, "loss": 0.0705, "step": 27698 }, { "epoch": 90.81639344262295, "grad_norm": 2.0404298305511475, "learning_rate": 4.390851180238698e-07, "loss": 0.096, "step": 27699 }, { "epoch": 90.81967213114754, "grad_norm": 2.6539928913116455, "learning_rate": 4.387739664464019e-07, "loss": 0.1998, "step": 27700 }, { "epoch": 90.82295081967213, "grad_norm": 2.0615034103393555, "learning_rate": 4.3846292268169723e-07, "loss": 0.1852, "step": 27701 }, { "epoch": 90.82622950819672, "grad_norm": 3.080669641494751, "learning_rate": 4.381519867332651e-07, "loss": 0.0638, "step": 27702 }, { "epoch": 90.8295081967213, "grad_norm": 2.056746482849121, "learning_rate": 4.3784115860461273e-07, "loss": 0.1313, "step": 27703 }, { "epoch": 90.8327868852459, "grad_norm": 4.471701622009277, "learning_rate": 4.375304382992418e-07, "loss": 0.1369, "step": 27704 }, { "epoch": 90.8360655737705, "grad_norm": 6.377279281616211, "learning_rate": 4.3721982582065724e-07, "loss": 0.0687, "step": 27705 }, { "epoch": 90.83934426229509, "grad_norm": 1.8221362829208374, "learning_rate": 4.3690932117236404e-07, "loss": 0.0931, "step": 27706 }, { "epoch": 90.84262295081967, "grad_norm": 2.0018815994262695, "learning_rate": 4.3659892435786056e-07, "loss": 0.1674, "step": 27707 }, { "epoch": 90.84590163934426, "grad_norm": 2.1522107124328613, "learning_rate": 4.3628863538064726e-07, "loss": 0.0831, "step": 27708 }, { "epoch": 90.84918032786885, "grad_norm": 1.8508681058883667, "learning_rate": 4.359784542442236e-07, "loss": 0.1047, "step": 27709 }, { "epoch": 90.85245901639344, "grad_norm": 1.778312087059021, "learning_rate": 4.356683809520856e-07, "loss": 0.0431, "step": 27710 }, { "epoch": 90.85573770491803, "grad_norm": 2.443711996078491, "learning_rate": 4.3535841550773285e-07, "loss": 0.0681, "step": 27711 }, { "epoch": 90.85901639344263, "grad_norm": 1.4617549180984497, "learning_rate": 4.3504855791465797e-07, "loss": 0.0286, "step": 27712 }, { "epoch": 90.86229508196722, "grad_norm": 2.172426223754883, "learning_rate": 4.3473880817635703e-07, "loss": 0.1176, "step": 27713 }, { "epoch": 90.8655737704918, "grad_norm": 2.4921247959136963, "learning_rate": 4.344291662963185e-07, "loss": 0.0799, "step": 27714 }, { "epoch": 90.8688524590164, "grad_norm": 2.121713638305664, "learning_rate": 4.341196322780394e-07, "loss": 0.1585, "step": 27715 }, { "epoch": 90.87213114754098, "grad_norm": 5.3195481300354, "learning_rate": 4.33810206125006e-07, "loss": 0.1518, "step": 27716 }, { "epoch": 90.87540983606557, "grad_norm": 3.565458059310913, "learning_rate": 4.335008878407088e-07, "loss": 0.1142, "step": 27717 }, { "epoch": 90.87868852459016, "grad_norm": 2.2826685905456543, "learning_rate": 4.331916774286371e-07, "loss": 0.0724, "step": 27718 }, { "epoch": 90.88196721311475, "grad_norm": 1.8863736391067505, "learning_rate": 4.328825748922749e-07, "loss": 0.0798, "step": 27719 }, { "epoch": 90.88524590163935, "grad_norm": 2.2794737815856934, "learning_rate": 4.3257358023511054e-07, "loss": 0.0623, "step": 27720 }, { "epoch": 90.88852459016394, "grad_norm": 1.8672267198562622, "learning_rate": 4.322646934606245e-07, "loss": 0.0427, "step": 27721 }, { "epoch": 90.89180327868853, "grad_norm": 2.742845058441162, "learning_rate": 4.319559145723029e-07, "loss": 0.1599, "step": 27722 }, { "epoch": 90.89508196721312, "grad_norm": 2.860281467437744, "learning_rate": 4.316472435736274e-07, "loss": 0.1058, "step": 27723 }, { "epoch": 90.8983606557377, "grad_norm": 2.5440890789031982, "learning_rate": 4.3133868046807636e-07, "loss": 0.0709, "step": 27724 }, { "epoch": 90.90163934426229, "grad_norm": 6.188136100769043, "learning_rate": 4.310302252591325e-07, "loss": 0.1125, "step": 27725 }, { "epoch": 90.90491803278688, "grad_norm": 3.1291394233703613, "learning_rate": 4.3072187795027087e-07, "loss": 0.1498, "step": 27726 }, { "epoch": 90.90819672131147, "grad_norm": 1.7746326923370361, "learning_rate": 4.3041363854496974e-07, "loss": 0.1033, "step": 27727 }, { "epoch": 90.91147540983607, "grad_norm": 3.063758373260498, "learning_rate": 4.3010550704670416e-07, "loss": 0.0895, "step": 27728 }, { "epoch": 90.91475409836066, "grad_norm": 2.4066967964172363, "learning_rate": 4.297974834589502e-07, "loss": 0.1764, "step": 27729 }, { "epoch": 90.91803278688525, "grad_norm": 1.7468022108078003, "learning_rate": 4.294895677851807e-07, "loss": 0.0263, "step": 27730 }, { "epoch": 90.92131147540984, "grad_norm": 2.570066452026367, "learning_rate": 4.2918176002886616e-07, "loss": 0.1054, "step": 27731 }, { "epoch": 90.92459016393443, "grad_norm": 1.988088846206665, "learning_rate": 4.288740601934782e-07, "loss": 0.0701, "step": 27732 }, { "epoch": 90.92786885245901, "grad_norm": 2.444247007369995, "learning_rate": 4.285664682824875e-07, "loss": 0.051, "step": 27733 }, { "epoch": 90.9311475409836, "grad_norm": 2.560499429702759, "learning_rate": 4.282589842993612e-07, "loss": 0.1235, "step": 27734 }, { "epoch": 90.93442622950819, "grad_norm": 1.9812372922897339, "learning_rate": 4.2795160824756764e-07, "loss": 0.049, "step": 27735 }, { "epoch": 90.9377049180328, "grad_norm": 3.4567835330963135, "learning_rate": 4.2764434013057077e-07, "loss": 0.1382, "step": 27736 }, { "epoch": 90.94098360655738, "grad_norm": 1.5160212516784668, "learning_rate": 4.2733717995183776e-07, "loss": 0.0756, "step": 27737 }, { "epoch": 90.94426229508197, "grad_norm": 2.119460344314575, "learning_rate": 4.2703012771483034e-07, "loss": 0.0764, "step": 27738 }, { "epoch": 90.94754098360656, "grad_norm": 1.8919986486434937, "learning_rate": 4.2672318342301233e-07, "loss": 0.0679, "step": 27739 }, { "epoch": 90.95081967213115, "grad_norm": 2.3593456745147705, "learning_rate": 4.2641634707984324e-07, "loss": 0.1677, "step": 27740 }, { "epoch": 90.95409836065573, "grad_norm": 2.2111878395080566, "learning_rate": 4.2610961868878473e-07, "loss": 0.0825, "step": 27741 }, { "epoch": 90.95737704918032, "grad_norm": 1.9967268705368042, "learning_rate": 4.258029982532952e-07, "loss": 0.0489, "step": 27742 }, { "epoch": 90.96065573770491, "grad_norm": 1.7003862857818604, "learning_rate": 4.2549648577683064e-07, "loss": 0.0415, "step": 27743 }, { "epoch": 90.96393442622951, "grad_norm": 2.1238906383514404, "learning_rate": 4.2519008126284845e-07, "loss": 0.0586, "step": 27744 }, { "epoch": 90.9672131147541, "grad_norm": 1.9205092191696167, "learning_rate": 4.248837847148024e-07, "loss": 0.0462, "step": 27745 }, { "epoch": 90.97049180327869, "grad_norm": 1.8021776676177979, "learning_rate": 4.245775961361487e-07, "loss": 0.0348, "step": 27746 }, { "epoch": 90.97377049180328, "grad_norm": 3.0146048069000244, "learning_rate": 4.2427151553033783e-07, "loss": 0.2035, "step": 27747 }, { "epoch": 90.97704918032787, "grad_norm": 2.407923936843872, "learning_rate": 4.239655429008227e-07, "loss": 0.1039, "step": 27748 }, { "epoch": 90.98032786885246, "grad_norm": 2.2353484630584717, "learning_rate": 4.236596782510505e-07, "loss": 0.1569, "step": 27749 }, { "epoch": 90.98360655737704, "grad_norm": 2.2333059310913086, "learning_rate": 4.233539215844751e-07, "loss": 0.0603, "step": 27750 }, { "epoch": 90.98688524590163, "grad_norm": 4.184105396270752, "learning_rate": 4.2304827290454045e-07, "loss": 0.1252, "step": 27751 }, { "epoch": 90.99016393442623, "grad_norm": 2.1210286617279053, "learning_rate": 4.2274273221469373e-07, "loss": 0.0633, "step": 27752 }, { "epoch": 90.99344262295082, "grad_norm": 2.7261886596679688, "learning_rate": 4.22437299518379e-07, "loss": 0.0857, "step": 27753 }, { "epoch": 90.99672131147541, "grad_norm": 3.5817501544952393, "learning_rate": 4.2213197481904443e-07, "loss": 0.1117, "step": 27754 }, { "epoch": 91.0, "grad_norm": 2.094034433364868, "learning_rate": 4.218267581201296e-07, "loss": 0.1804, "step": 27755 }, { "epoch": 91.00327868852459, "grad_norm": 2.4771182537078857, "learning_rate": 4.215216494250773e-07, "loss": 0.154, "step": 27756 }, { "epoch": 91.00655737704918, "grad_norm": 1.8084454536437988, "learning_rate": 4.21216648737327e-07, "loss": 0.0359, "step": 27757 }, { "epoch": 91.00983606557377, "grad_norm": 1.584031343460083, "learning_rate": 4.209117560603171e-07, "loss": 0.0299, "step": 27758 }, { "epoch": 91.01311475409837, "grad_norm": 2.259953022003174, "learning_rate": 4.206069713974881e-07, "loss": 0.1147, "step": 27759 }, { "epoch": 91.01639344262296, "grad_norm": 2.4740917682647705, "learning_rate": 4.2030229475227615e-07, "loss": 0.2373, "step": 27760 }, { "epoch": 91.01967213114754, "grad_norm": 2.1453988552093506, "learning_rate": 4.199977261281163e-07, "loss": 0.0416, "step": 27761 }, { "epoch": 91.02295081967213, "grad_norm": 2.5763981342315674, "learning_rate": 4.1969326552844136e-07, "loss": 0.116, "step": 27762 }, { "epoch": 91.02622950819672, "grad_norm": 1.6333304643630981, "learning_rate": 4.1938891295668636e-07, "loss": 0.128, "step": 27763 }, { "epoch": 91.02950819672131, "grad_norm": 2.1056456565856934, "learning_rate": 4.1908466841628303e-07, "loss": 0.1385, "step": 27764 }, { "epoch": 91.0327868852459, "grad_norm": 2.8298792839050293, "learning_rate": 4.187805319106619e-07, "loss": 0.1599, "step": 27765 }, { "epoch": 91.03606557377049, "grad_norm": 2.4540059566497803, "learning_rate": 4.1847650344325143e-07, "loss": 0.1056, "step": 27766 }, { "epoch": 91.03934426229509, "grad_norm": 2.3497893810272217, "learning_rate": 4.181725830174821e-07, "loss": 0.1856, "step": 27767 }, { "epoch": 91.04262295081968, "grad_norm": 3.5880613327026367, "learning_rate": 4.17868770636779e-07, "loss": 0.2418, "step": 27768 }, { "epoch": 91.04590163934427, "grad_norm": 1.7254526615142822, "learning_rate": 4.1756506630456827e-07, "loss": 0.0477, "step": 27769 }, { "epoch": 91.04918032786885, "grad_norm": 2.349220037460327, "learning_rate": 4.1726147002427385e-07, "loss": 0.0395, "step": 27770 }, { "epoch": 91.05245901639344, "grad_norm": 2.1521902084350586, "learning_rate": 4.169579817993208e-07, "loss": 0.1314, "step": 27771 }, { "epoch": 91.05573770491803, "grad_norm": 1.9943805932998657, "learning_rate": 4.1665460163312963e-07, "loss": 0.1195, "step": 27772 }, { "epoch": 91.05901639344262, "grad_norm": 2.1483304500579834, "learning_rate": 4.1635132952912216e-07, "loss": 0.1728, "step": 27773 }, { "epoch": 91.0622950819672, "grad_norm": 1.9988371133804321, "learning_rate": 4.1604816549071783e-07, "loss": 0.0408, "step": 27774 }, { "epoch": 91.06557377049181, "grad_norm": 1.893466591835022, "learning_rate": 4.1574510952133607e-07, "loss": 0.0459, "step": 27775 }, { "epoch": 91.0688524590164, "grad_norm": 2.7313930988311768, "learning_rate": 4.15442161624392e-07, "loss": 0.1106, "step": 27776 }, { "epoch": 91.07213114754099, "grad_norm": 1.5429834127426147, "learning_rate": 4.1513932180330505e-07, "loss": 0.0578, "step": 27777 }, { "epoch": 91.07540983606557, "grad_norm": 2.4896326065063477, "learning_rate": 4.14836590061487e-07, "loss": 0.156, "step": 27778 }, { "epoch": 91.07868852459016, "grad_norm": 2.2787928581237793, "learning_rate": 4.145339664023507e-07, "loss": 0.0861, "step": 27779 }, { "epoch": 91.08196721311475, "grad_norm": 2.303934335708618, "learning_rate": 4.1423145082931216e-07, "loss": 0.1938, "step": 27780 }, { "epoch": 91.08524590163934, "grad_norm": 2.2889626026153564, "learning_rate": 4.13929043345781e-07, "loss": 0.122, "step": 27781 }, { "epoch": 91.08852459016393, "grad_norm": 1.5682798624038696, "learning_rate": 4.136267439551667e-07, "loss": 0.0341, "step": 27782 }, { "epoch": 91.09180327868853, "grad_norm": 1.9675917625427246, "learning_rate": 4.1332455266087866e-07, "loss": 0.0518, "step": 27783 }, { "epoch": 91.09508196721312, "grad_norm": 2.040498733520508, "learning_rate": 4.1302246946632206e-07, "loss": 0.1316, "step": 27784 }, { "epoch": 91.09836065573771, "grad_norm": 3.246638774871826, "learning_rate": 4.1272049437490636e-07, "loss": 0.139, "step": 27785 }, { "epoch": 91.1016393442623, "grad_norm": 2.3532090187072754, "learning_rate": 4.1241862739003546e-07, "loss": 0.0886, "step": 27786 }, { "epoch": 91.10491803278688, "grad_norm": 1.735715627670288, "learning_rate": 4.1211686851511333e-07, "loss": 0.0217, "step": 27787 }, { "epoch": 91.10819672131147, "grad_norm": 2.489427328109741, "learning_rate": 4.1181521775354063e-07, "loss": 0.0721, "step": 27788 }, { "epoch": 91.11147540983606, "grad_norm": 2.6011650562286377, "learning_rate": 4.115136751087223e-07, "loss": 0.1224, "step": 27789 }, { "epoch": 91.11475409836065, "grad_norm": 2.374831438064575, "learning_rate": 4.1121224058405687e-07, "loss": 0.0927, "step": 27790 }, { "epoch": 91.11803278688525, "grad_norm": 2.6946794986724854, "learning_rate": 4.109109141829437e-07, "loss": 0.1352, "step": 27791 }, { "epoch": 91.12131147540984, "grad_norm": 2.9292097091674805, "learning_rate": 4.10609695908778e-07, "loss": 0.1803, "step": 27792 }, { "epoch": 91.12459016393443, "grad_norm": 2.1287708282470703, "learning_rate": 4.1030858576496025e-07, "loss": 0.2404, "step": 27793 }, { "epoch": 91.12786885245902, "grad_norm": 2.0662806034088135, "learning_rate": 4.1000758375488336e-07, "loss": 0.0621, "step": 27794 }, { "epoch": 91.1311475409836, "grad_norm": 1.9046238660812378, "learning_rate": 4.097066898819424e-07, "loss": 0.1213, "step": 27795 }, { "epoch": 91.1344262295082, "grad_norm": 1.9954177141189575, "learning_rate": 4.094059041495302e-07, "loss": 0.0901, "step": 27796 }, { "epoch": 91.13770491803278, "grad_norm": 2.1458988189697266, "learning_rate": 4.091052265610362e-07, "loss": 0.184, "step": 27797 }, { "epoch": 91.14098360655737, "grad_norm": 1.8547821044921875, "learning_rate": 4.088046571198545e-07, "loss": 0.0454, "step": 27798 }, { "epoch": 91.14426229508197, "grad_norm": 2.0925281047821045, "learning_rate": 4.0850419582937227e-07, "loss": 0.0621, "step": 27799 }, { "epoch": 91.14754098360656, "grad_norm": 1.7983267307281494, "learning_rate": 4.0820384269297796e-07, "loss": 0.0506, "step": 27800 }, { "epoch": 91.15081967213115, "grad_norm": 1.8173394203186035, "learning_rate": 4.0790359771405774e-07, "loss": 0.0534, "step": 27801 }, { "epoch": 91.15409836065574, "grad_norm": 2.0305826663970947, "learning_rate": 4.076034608959978e-07, "loss": 0.0653, "step": 27802 }, { "epoch": 91.15737704918033, "grad_norm": 2.156872510910034, "learning_rate": 4.0730343224218314e-07, "loss": 0.0564, "step": 27803 }, { "epoch": 91.16065573770491, "grad_norm": 2.7002692222595215, "learning_rate": 4.070035117559967e-07, "loss": 0.2602, "step": 27804 }, { "epoch": 91.1639344262295, "grad_norm": 1.9594885110855103, "learning_rate": 4.06703699440818e-07, "loss": 0.0985, "step": 27805 }, { "epoch": 91.1672131147541, "grad_norm": 4.544848442077637, "learning_rate": 4.0640399530003095e-07, "loss": 0.1011, "step": 27806 }, { "epoch": 91.1704918032787, "grad_norm": 1.6724039316177368, "learning_rate": 4.0610439933701396e-07, "loss": 0.1346, "step": 27807 }, { "epoch": 91.17377049180328, "grad_norm": 1.9964473247528076, "learning_rate": 4.0580491155514436e-07, "loss": 0.0386, "step": 27808 }, { "epoch": 91.17704918032787, "grad_norm": 1.4532088041305542, "learning_rate": 4.0550553195780053e-07, "loss": 0.0989, "step": 27809 }, { "epoch": 91.18032786885246, "grad_norm": 1.8990490436553955, "learning_rate": 4.052062605483564e-07, "loss": 0.096, "step": 27810 }, { "epoch": 91.18360655737705, "grad_norm": 3.487652063369751, "learning_rate": 4.049070973301883e-07, "loss": 0.0893, "step": 27811 }, { "epoch": 91.18688524590164, "grad_norm": 2.5728654861450195, "learning_rate": 4.0460804230667004e-07, "loss": 0.1431, "step": 27812 }, { "epoch": 91.19016393442622, "grad_norm": 1.9442702531814575, "learning_rate": 4.0430909548117236e-07, "loss": 0.091, "step": 27813 }, { "epoch": 91.19344262295083, "grad_norm": 3.1113719940185547, "learning_rate": 4.0401025685706473e-07, "loss": 0.1537, "step": 27814 }, { "epoch": 91.19672131147541, "grad_norm": 2.455082416534424, "learning_rate": 4.0371152643772003e-07, "loss": 0.1816, "step": 27815 }, { "epoch": 91.2, "grad_norm": 2.3266501426696777, "learning_rate": 4.034129042265067e-07, "loss": 0.1384, "step": 27816 }, { "epoch": 91.20327868852459, "grad_norm": 2.48785138130188, "learning_rate": 4.031143902267898e-07, "loss": 0.1913, "step": 27817 }, { "epoch": 91.20655737704918, "grad_norm": 1.6554826498031616, "learning_rate": 4.0281598444193546e-07, "loss": 0.1085, "step": 27818 }, { "epoch": 91.20983606557377, "grad_norm": 2.654012441635132, "learning_rate": 4.0251768687531115e-07, "loss": 0.0692, "step": 27819 }, { "epoch": 91.21311475409836, "grad_norm": 4.03281831741333, "learning_rate": 4.0221949753027845e-07, "loss": 0.0722, "step": 27820 }, { "epoch": 91.21639344262294, "grad_norm": 3.541482925415039, "learning_rate": 4.019214164102003e-07, "loss": 0.1095, "step": 27821 }, { "epoch": 91.21967213114755, "grad_norm": 1.9307920932769775, "learning_rate": 4.016234435184374e-07, "loss": 0.0515, "step": 27822 }, { "epoch": 91.22295081967214, "grad_norm": 2.639857769012451, "learning_rate": 4.013255788583492e-07, "loss": 0.0943, "step": 27823 }, { "epoch": 91.22622950819672, "grad_norm": 1.5282800197601318, "learning_rate": 4.010278224332953e-07, "loss": 0.0424, "step": 27824 }, { "epoch": 91.22950819672131, "grad_norm": 2.1474292278289795, "learning_rate": 4.007301742466341e-07, "loss": 0.2857, "step": 27825 }, { "epoch": 91.2327868852459, "grad_norm": 2.537576675415039, "learning_rate": 4.004326343017195e-07, "loss": 0.1304, "step": 27826 }, { "epoch": 91.23606557377049, "grad_norm": 1.942337155342102, "learning_rate": 4.0013520260190897e-07, "loss": 0.1062, "step": 27827 }, { "epoch": 91.23934426229508, "grad_norm": 2.351762533187866, "learning_rate": 3.998378791505564e-07, "loss": 0.1806, "step": 27828 }, { "epoch": 91.24262295081967, "grad_norm": 2.304096221923828, "learning_rate": 3.9954066395101134e-07, "loss": 0.1288, "step": 27829 }, { "epoch": 91.24590163934427, "grad_norm": 1.8697998523712158, "learning_rate": 3.992435570066278e-07, "loss": 0.0883, "step": 27830 }, { "epoch": 91.24918032786886, "grad_norm": 2.887343645095825, "learning_rate": 3.9894655832075636e-07, "loss": 0.1193, "step": 27831 }, { "epoch": 91.25245901639344, "grad_norm": 2.1269137859344482, "learning_rate": 3.986496678967444e-07, "loss": 0.0463, "step": 27832 }, { "epoch": 91.25573770491803, "grad_norm": 1.2613085508346558, "learning_rate": 3.983528857379404e-07, "loss": 0.1535, "step": 27833 }, { "epoch": 91.25901639344262, "grad_norm": 2.6948962211608887, "learning_rate": 3.980562118476916e-07, "loss": 0.2446, "step": 27834 }, { "epoch": 91.26229508196721, "grad_norm": 1.7816153764724731, "learning_rate": 3.9775964622934203e-07, "loss": 0.0642, "step": 27835 }, { "epoch": 91.2655737704918, "grad_norm": 1.4402343034744263, "learning_rate": 3.974631888862357e-07, "loss": 0.0401, "step": 27836 }, { "epoch": 91.26885245901639, "grad_norm": 1.8312463760375977, "learning_rate": 3.9716683982171653e-07, "loss": 0.1181, "step": 27837 }, { "epoch": 91.27213114754099, "grad_norm": 2.6919801235198975, "learning_rate": 3.968705990391253e-07, "loss": 0.1082, "step": 27838 }, { "epoch": 91.27540983606558, "grad_norm": 2.234388589859009, "learning_rate": 3.9657446654180363e-07, "loss": 0.0572, "step": 27839 }, { "epoch": 91.27868852459017, "grad_norm": 2.4514591693878174, "learning_rate": 3.96278442333089e-07, "loss": 0.1295, "step": 27840 }, { "epoch": 91.28196721311475, "grad_norm": 1.8037586212158203, "learning_rate": 3.9598252641632086e-07, "loss": 0.0583, "step": 27841 }, { "epoch": 91.28524590163934, "grad_norm": 1.7610785961151123, "learning_rate": 3.9568671879483547e-07, "loss": 0.0826, "step": 27842 }, { "epoch": 91.28852459016393, "grad_norm": 2.975081205368042, "learning_rate": 3.95391019471969e-07, "loss": 0.0952, "step": 27843 }, { "epoch": 91.29180327868852, "grad_norm": 1.8582115173339844, "learning_rate": 3.950954284510533e-07, "loss": 0.0614, "step": 27844 }, { "epoch": 91.29508196721312, "grad_norm": 2.874279737472534, "learning_rate": 3.947999457354246e-07, "loss": 0.1304, "step": 27845 }, { "epoch": 91.29836065573771, "grad_norm": 2.4832913875579834, "learning_rate": 3.9450457132841236e-07, "loss": 0.1685, "step": 27846 }, { "epoch": 91.3016393442623, "grad_norm": 2.9222185611724854, "learning_rate": 3.9420930523334953e-07, "loss": 0.165, "step": 27847 }, { "epoch": 91.30491803278689, "grad_norm": 2.012986421585083, "learning_rate": 3.939141474535646e-07, "loss": 0.1089, "step": 27848 }, { "epoch": 91.30819672131148, "grad_norm": 2.1275522708892822, "learning_rate": 3.9361909799238264e-07, "loss": 0.168, "step": 27849 }, { "epoch": 91.31147540983606, "grad_norm": 3.165066719055176, "learning_rate": 3.9332415685313653e-07, "loss": 0.1566, "step": 27850 }, { "epoch": 91.31475409836065, "grad_norm": 1.598006010055542, "learning_rate": 3.9302932403914807e-07, "loss": 0.0512, "step": 27851 }, { "epoch": 91.31803278688524, "grad_norm": 3.192207098007202, "learning_rate": 3.927345995537424e-07, "loss": 0.0808, "step": 27852 }, { "epoch": 91.32131147540984, "grad_norm": 2.664773464202881, "learning_rate": 3.9243998340024237e-07, "loss": 0.2123, "step": 27853 }, { "epoch": 91.32459016393443, "grad_norm": 2.8626742362976074, "learning_rate": 3.92145475581972e-07, "loss": 0.0968, "step": 27854 }, { "epoch": 91.32786885245902, "grad_norm": 1.3803151845932007, "learning_rate": 3.9185107610225095e-07, "loss": 0.0652, "step": 27855 }, { "epoch": 91.33114754098361, "grad_norm": 3.1372153759002686, "learning_rate": 3.9155678496439977e-07, "loss": 0.2136, "step": 27856 }, { "epoch": 91.3344262295082, "grad_norm": 2.3210930824279785, "learning_rate": 3.9126260217173475e-07, "loss": 0.1526, "step": 27857 }, { "epoch": 91.33770491803278, "grad_norm": 1.938705325126648, "learning_rate": 3.909685277275743e-07, "loss": 0.0973, "step": 27858 }, { "epoch": 91.34098360655737, "grad_norm": 1.7683067321777344, "learning_rate": 3.9067456163523587e-07, "loss": 0.0327, "step": 27859 }, { "epoch": 91.34426229508196, "grad_norm": 2.624469757080078, "learning_rate": 3.903807038980323e-07, "loss": 0.1421, "step": 27860 }, { "epoch": 91.34754098360656, "grad_norm": 2.6673078536987305, "learning_rate": 3.900869545192787e-07, "loss": 0.1239, "step": 27861 }, { "epoch": 91.35081967213115, "grad_norm": 2.147763967514038, "learning_rate": 3.8979331350228466e-07, "loss": 0.0926, "step": 27862 }, { "epoch": 91.35409836065574, "grad_norm": 2.357283115386963, "learning_rate": 3.894997808503642e-07, "loss": 0.1147, "step": 27863 }, { "epoch": 91.35737704918033, "grad_norm": 2.010681390762329, "learning_rate": 3.892063565668269e-07, "loss": 0.0795, "step": 27864 }, { "epoch": 91.36065573770492, "grad_norm": 1.6907113790512085, "learning_rate": 3.8891304065498016e-07, "loss": 0.0655, "step": 27865 }, { "epoch": 91.3639344262295, "grad_norm": 2.2500698566436768, "learning_rate": 3.886198331181301e-07, "loss": 0.1296, "step": 27866 }, { "epoch": 91.3672131147541, "grad_norm": 2.126814126968384, "learning_rate": 3.883267339595864e-07, "loss": 0.0257, "step": 27867 }, { "epoch": 91.37049180327868, "grad_norm": 1.4687546491622925, "learning_rate": 3.8803374318265195e-07, "loss": 0.0919, "step": 27868 }, { "epoch": 91.37377049180328, "grad_norm": 2.9305341243743896, "learning_rate": 3.877408607906319e-07, "loss": 0.1488, "step": 27869 }, { "epoch": 91.37704918032787, "grad_norm": 1.3603545427322388, "learning_rate": 3.874480867868269e-07, "loss": 0.0403, "step": 27870 }, { "epoch": 91.38032786885246, "grad_norm": 2.0445141792297363, "learning_rate": 3.8715542117453987e-07, "loss": 0.1389, "step": 27871 }, { "epoch": 91.38360655737705, "grad_norm": 1.4933462142944336, "learning_rate": 3.8686286395706927e-07, "loss": 0.0813, "step": 27872 }, { "epoch": 91.38688524590164, "grad_norm": 1.4793301820755005, "learning_rate": 3.86570415137717e-07, "loss": 0.0436, "step": 27873 }, { "epoch": 91.39016393442623, "grad_norm": 2.8095765113830566, "learning_rate": 3.86278074719777e-07, "loss": 0.1113, "step": 27874 }, { "epoch": 91.39344262295081, "grad_norm": 1.6170964241027832, "learning_rate": 3.859858427065477e-07, "loss": 0.0954, "step": 27875 }, { "epoch": 91.3967213114754, "grad_norm": 1.6955710649490356, "learning_rate": 3.8569371910132436e-07, "loss": 0.022, "step": 27876 }, { "epoch": 91.4, "grad_norm": 1.8678995370864868, "learning_rate": 3.8540170390740097e-07, "loss": 0.0737, "step": 27877 }, { "epoch": 91.4032786885246, "grad_norm": 3.155529737472534, "learning_rate": 3.851097971280693e-07, "loss": 0.1996, "step": 27878 }, { "epoch": 91.40655737704918, "grad_norm": 1.9030331373214722, "learning_rate": 3.848179987666223e-07, "loss": 0.0596, "step": 27879 }, { "epoch": 91.40983606557377, "grad_norm": 1.8016791343688965, "learning_rate": 3.845263088263496e-07, "loss": 0.1076, "step": 27880 }, { "epoch": 91.41311475409836, "grad_norm": 1.882747769355774, "learning_rate": 3.842347273105396e-07, "loss": 0.0965, "step": 27881 }, { "epoch": 91.41639344262295, "grad_norm": 2.32602596282959, "learning_rate": 3.839432542224819e-07, "loss": 0.0812, "step": 27882 }, { "epoch": 91.41967213114754, "grad_norm": 3.870915412902832, "learning_rate": 3.836518895654617e-07, "loss": 0.0355, "step": 27883 }, { "epoch": 91.42295081967212, "grad_norm": 2.4584712982177734, "learning_rate": 3.833606333427664e-07, "loss": 0.1039, "step": 27884 }, { "epoch": 91.42622950819673, "grad_norm": 2.347290515899658, "learning_rate": 3.8306948555767663e-07, "loss": 0.1679, "step": 27885 }, { "epoch": 91.42950819672132, "grad_norm": 3.1090011596679688, "learning_rate": 3.827784462134787e-07, "loss": 0.173, "step": 27886 }, { "epoch": 91.4327868852459, "grad_norm": 2.1497576236724854, "learning_rate": 3.824875153134522e-07, "loss": 0.0706, "step": 27887 }, { "epoch": 91.43606557377049, "grad_norm": 2.051043748855591, "learning_rate": 3.821966928608789e-07, "loss": 0.0384, "step": 27888 }, { "epoch": 91.43934426229508, "grad_norm": 1.756813645362854, "learning_rate": 3.8190597885903845e-07, "loss": 0.0904, "step": 27889 }, { "epoch": 91.44262295081967, "grad_norm": 2.495295524597168, "learning_rate": 3.816153733112093e-07, "loss": 0.0746, "step": 27890 }, { "epoch": 91.44590163934426, "grad_norm": 2.834519147872925, "learning_rate": 3.8132487622066673e-07, "loss": 0.111, "step": 27891 }, { "epoch": 91.44918032786886, "grad_norm": 1.9502109289169312, "learning_rate": 3.810344875906857e-07, "loss": 0.0595, "step": 27892 }, { "epoch": 91.45245901639345, "grad_norm": 2.0904929637908936, "learning_rate": 3.807442074245427e-07, "loss": 0.063, "step": 27893 }, { "epoch": 91.45573770491804, "grad_norm": 4.533738136291504, "learning_rate": 3.8045403572551153e-07, "loss": 0.1341, "step": 27894 }, { "epoch": 91.45901639344262, "grad_norm": 1.259280800819397, "learning_rate": 3.80163972496862e-07, "loss": 0.1002, "step": 27895 }, { "epoch": 91.46229508196721, "grad_norm": 2.876699447631836, "learning_rate": 3.7987401774186585e-07, "loss": 0.0884, "step": 27896 }, { "epoch": 91.4655737704918, "grad_norm": 1.8945406675338745, "learning_rate": 3.795841714637927e-07, "loss": 0.049, "step": 27897 }, { "epoch": 91.46885245901639, "grad_norm": 3.9502146244049072, "learning_rate": 3.7929443366591104e-07, "loss": 0.1311, "step": 27898 }, { "epoch": 91.47213114754098, "grad_norm": 2.622960329055786, "learning_rate": 3.790048043514871e-07, "loss": 0.0496, "step": 27899 }, { "epoch": 91.47540983606558, "grad_norm": 1.9131501913070679, "learning_rate": 3.787152835237884e-07, "loss": 0.0295, "step": 27900 }, { "epoch": 91.47868852459017, "grad_norm": 2.520214080810547, "learning_rate": 3.7842587118607666e-07, "loss": 0.0927, "step": 27901 }, { "epoch": 91.48196721311476, "grad_norm": 2.00787353515625, "learning_rate": 3.781365673416182e-07, "loss": 0.1356, "step": 27902 }, { "epoch": 91.48524590163935, "grad_norm": 2.5634613037109375, "learning_rate": 3.7784737199367373e-07, "loss": 0.1206, "step": 27903 }, { "epoch": 91.48852459016393, "grad_norm": 2.104921579360962, "learning_rate": 3.775582851455062e-07, "loss": 0.1417, "step": 27904 }, { "epoch": 91.49180327868852, "grad_norm": 2.4291255474090576, "learning_rate": 3.772693068003719e-07, "loss": 0.0686, "step": 27905 }, { "epoch": 91.49508196721311, "grad_norm": 2.334425210952759, "learning_rate": 3.7698043696153155e-07, "loss": 0.0687, "step": 27906 }, { "epoch": 91.4983606557377, "grad_norm": 2.5736005306243896, "learning_rate": 3.766916756322436e-07, "loss": 0.1185, "step": 27907 }, { "epoch": 91.5016393442623, "grad_norm": 2.5059521198272705, "learning_rate": 3.7640302281576225e-07, "loss": 0.1648, "step": 27908 }, { "epoch": 91.50491803278689, "grad_norm": 2.1126832962036133, "learning_rate": 3.7611447851534145e-07, "loss": 0.1882, "step": 27909 }, { "epoch": 91.50819672131148, "grad_norm": 2.4980931282043457, "learning_rate": 3.7582604273423753e-07, "loss": 0.0729, "step": 27910 }, { "epoch": 91.51147540983607, "grad_norm": 2.0517449378967285, "learning_rate": 3.755377154757012e-07, "loss": 0.056, "step": 27911 }, { "epoch": 91.51475409836065, "grad_norm": 3.4399869441986084, "learning_rate": 3.7524949674298427e-07, "loss": 0.0939, "step": 27912 }, { "epoch": 91.51803278688524, "grad_norm": 1.9064346551895142, "learning_rate": 3.7496138653933755e-07, "loss": 0.0527, "step": 27913 }, { "epoch": 91.52131147540983, "grad_norm": 2.9302687644958496, "learning_rate": 3.7467338486800617e-07, "loss": 0.0881, "step": 27914 }, { "epoch": 91.52459016393442, "grad_norm": 1.439705491065979, "learning_rate": 3.7438549173224204e-07, "loss": 0.0236, "step": 27915 }, { "epoch": 91.52786885245902, "grad_norm": 2.84000825881958, "learning_rate": 3.7409770713528915e-07, "loss": 0.1258, "step": 27916 }, { "epoch": 91.53114754098361, "grad_norm": 3.636084794998169, "learning_rate": 3.7381003108039385e-07, "loss": 0.1822, "step": 27917 }, { "epoch": 91.5344262295082, "grad_norm": 2.5523107051849365, "learning_rate": 3.735224635707968e-07, "loss": 0.0955, "step": 27918 }, { "epoch": 91.53770491803279, "grad_norm": 2.340053081512451, "learning_rate": 3.7323500460974546e-07, "loss": 0.1297, "step": 27919 }, { "epoch": 91.54098360655738, "grad_norm": 2.8707218170166016, "learning_rate": 3.729476542004784e-07, "loss": 0.122, "step": 27920 }, { "epoch": 91.54426229508196, "grad_norm": 2.675729274749756, "learning_rate": 3.7266041234623627e-07, "loss": 0.1122, "step": 27921 }, { "epoch": 91.54754098360655, "grad_norm": 4.890963077545166, "learning_rate": 3.7237327905025545e-07, "loss": 0.0482, "step": 27922 }, { "epoch": 91.55081967213114, "grad_norm": 2.3458664417266846, "learning_rate": 3.720862543157788e-07, "loss": 0.0438, "step": 27923 }, { "epoch": 91.55409836065574, "grad_norm": 1.8347502946853638, "learning_rate": 3.717993381460394e-07, "loss": 0.1159, "step": 27924 }, { "epoch": 91.55737704918033, "grad_norm": 2.9024574756622314, "learning_rate": 3.715125305442735e-07, "loss": 0.192, "step": 27925 }, { "epoch": 91.56065573770492, "grad_norm": 2.7801365852355957, "learning_rate": 3.7122583151371515e-07, "loss": 0.2236, "step": 27926 }, { "epoch": 91.56393442622951, "grad_norm": 1.5263534784317017, "learning_rate": 3.709392410575963e-07, "loss": 0.0157, "step": 27927 }, { "epoch": 91.5672131147541, "grad_norm": 1.8536280393600464, "learning_rate": 3.706527591791487e-07, "loss": 0.076, "step": 27928 }, { "epoch": 91.57049180327868, "grad_norm": 1.806296706199646, "learning_rate": 3.7036638588160424e-07, "loss": 0.1021, "step": 27929 }, { "epoch": 91.57377049180327, "grad_norm": 2.1257433891296387, "learning_rate": 3.7008012116819147e-07, "loss": 0.1171, "step": 27930 }, { "epoch": 91.57704918032788, "grad_norm": 2.273169994354248, "learning_rate": 3.6979396504213673e-07, "loss": 0.071, "step": 27931 }, { "epoch": 91.58032786885246, "grad_norm": 1.704087257385254, "learning_rate": 3.6950791750666847e-07, "loss": 0.0277, "step": 27932 }, { "epoch": 91.58360655737705, "grad_norm": 2.8504204750061035, "learning_rate": 3.6922197856501195e-07, "loss": 0.0955, "step": 27933 }, { "epoch": 91.58688524590164, "grad_norm": 1.8912461996078491, "learning_rate": 3.6893614822039016e-07, "loss": 0.0581, "step": 27934 }, { "epoch": 91.59016393442623, "grad_norm": 1.5586628913879395, "learning_rate": 3.6865042647602825e-07, "loss": 0.0313, "step": 27935 }, { "epoch": 91.59344262295082, "grad_norm": 1.877437949180603, "learning_rate": 3.6836481333514694e-07, "loss": 0.0995, "step": 27936 }, { "epoch": 91.5967213114754, "grad_norm": 1.9444005489349365, "learning_rate": 3.6807930880096487e-07, "loss": 0.0184, "step": 27937 }, { "epoch": 91.6, "grad_norm": 1.5495703220367432, "learning_rate": 3.67793912876705e-07, "loss": 0.1404, "step": 27938 }, { "epoch": 91.6032786885246, "grad_norm": 2.8294618129730225, "learning_rate": 3.675086255655835e-07, "loss": 0.1537, "step": 27939 }, { "epoch": 91.60655737704919, "grad_norm": 1.92719566822052, "learning_rate": 3.672234468708169e-07, "loss": 0.1177, "step": 27940 }, { "epoch": 91.60983606557377, "grad_norm": 2.7015469074249268, "learning_rate": 3.669383767956214e-07, "loss": 0.1382, "step": 27941 }, { "epoch": 91.61311475409836, "grad_norm": 1.8349839448928833, "learning_rate": 3.666534153432133e-07, "loss": 0.2532, "step": 27942 }, { "epoch": 91.61639344262295, "grad_norm": 2.216080665588379, "learning_rate": 3.6636856251680343e-07, "loss": 0.1622, "step": 27943 }, { "epoch": 91.61967213114754, "grad_norm": 1.886691689491272, "learning_rate": 3.660838183196025e-07, "loss": 0.04, "step": 27944 }, { "epoch": 91.62295081967213, "grad_norm": 2.8993542194366455, "learning_rate": 3.6579918275482574e-07, "loss": 0.1707, "step": 27945 }, { "epoch": 91.62622950819672, "grad_norm": 2.3080477714538574, "learning_rate": 3.6551465582567945e-07, "loss": 0.0497, "step": 27946 }, { "epoch": 91.62950819672132, "grad_norm": 2.3416121006011963, "learning_rate": 3.652302375353722e-07, "loss": 0.1251, "step": 27947 }, { "epoch": 91.6327868852459, "grad_norm": 3.7123825550079346, "learning_rate": 3.6494592788711147e-07, "loss": 0.1671, "step": 27948 }, { "epoch": 91.6360655737705, "grad_norm": 2.5794901847839355, "learning_rate": 3.646617268841046e-07, "loss": 0.249, "step": 27949 }, { "epoch": 91.63934426229508, "grad_norm": 3.414410352706909, "learning_rate": 3.6437763452955465e-07, "loss": 0.0605, "step": 27950 }, { "epoch": 91.64262295081967, "grad_norm": 1.805423617362976, "learning_rate": 3.640936508266657e-07, "loss": 0.0724, "step": 27951 }, { "epoch": 91.64590163934426, "grad_norm": 2.627429962158203, "learning_rate": 3.6380977577863965e-07, "loss": 0.1592, "step": 27952 }, { "epoch": 91.64918032786885, "grad_norm": 1.5944706201553345, "learning_rate": 3.635260093886761e-07, "loss": 0.1912, "step": 27953 }, { "epoch": 91.65245901639344, "grad_norm": 2.2542624473571777, "learning_rate": 3.63242351659977e-07, "loss": 0.152, "step": 27954 }, { "epoch": 91.65573770491804, "grad_norm": 2.4342029094696045, "learning_rate": 3.629588025957409e-07, "loss": 0.0476, "step": 27955 }, { "epoch": 91.65901639344263, "grad_norm": 2.794257402420044, "learning_rate": 3.62675362199163e-07, "loss": 0.0713, "step": 27956 }, { "epoch": 91.66229508196722, "grad_norm": 2.767066478729248, "learning_rate": 3.623920304734407e-07, "loss": 0.1125, "step": 27957 }, { "epoch": 91.6655737704918, "grad_norm": 3.31762433052063, "learning_rate": 3.6210880742176933e-07, "loss": 0.1458, "step": 27958 }, { "epoch": 91.66885245901639, "grad_norm": 2.4305191040039062, "learning_rate": 3.6182569304734295e-07, "loss": 0.1037, "step": 27959 }, { "epoch": 91.67213114754098, "grad_norm": 2.53759503364563, "learning_rate": 3.615426873533523e-07, "loss": 0.1663, "step": 27960 }, { "epoch": 91.67540983606557, "grad_norm": 2.556384325027466, "learning_rate": 3.612597903429882e-07, "loss": 0.118, "step": 27961 }, { "epoch": 91.67868852459016, "grad_norm": 2.291653633117676, "learning_rate": 3.609770020194436e-07, "loss": 0.1057, "step": 27962 }, { "epoch": 91.68196721311476, "grad_norm": 2.567348003387451, "learning_rate": 3.6069432238590497e-07, "loss": 0.0997, "step": 27963 }, { "epoch": 91.68524590163935, "grad_norm": 1.6813157796859741, "learning_rate": 3.6041175144556075e-07, "loss": 0.0466, "step": 27964 }, { "epoch": 91.68852459016394, "grad_norm": 2.1732964515686035, "learning_rate": 3.6012928920159617e-07, "loss": 0.1871, "step": 27965 }, { "epoch": 91.69180327868852, "grad_norm": 2.818037271499634, "learning_rate": 3.5984693565719543e-07, "loss": 0.0568, "step": 27966 }, { "epoch": 91.69508196721311, "grad_norm": 2.242997407913208, "learning_rate": 3.595646908155448e-07, "loss": 0.0774, "step": 27967 }, { "epoch": 91.6983606557377, "grad_norm": 2.732530355453491, "learning_rate": 3.592825546798262e-07, "loss": 0.0765, "step": 27968 }, { "epoch": 91.70163934426229, "grad_norm": 1.4920905828475952, "learning_rate": 3.590005272532204e-07, "loss": 0.0261, "step": 27969 }, { "epoch": 91.70491803278688, "grad_norm": 1.909579873085022, "learning_rate": 3.5871860853890714e-07, "loss": 0.0387, "step": 27970 }, { "epoch": 91.70819672131148, "grad_norm": 2.0161306858062744, "learning_rate": 3.584367985400661e-07, "loss": 0.068, "step": 27971 }, { "epoch": 91.71147540983607, "grad_norm": 2.5363340377807617, "learning_rate": 3.581550972598757e-07, "loss": 0.2015, "step": 27972 }, { "epoch": 91.71475409836066, "grad_norm": 2.698593854904175, "learning_rate": 3.578735047015114e-07, "loss": 0.0952, "step": 27973 }, { "epoch": 91.71803278688525, "grad_norm": 2.610527515411377, "learning_rate": 3.5759202086814713e-07, "loss": 0.1555, "step": 27974 }, { "epoch": 91.72131147540983, "grad_norm": 2.346275806427002, "learning_rate": 3.573106457629605e-07, "loss": 0.1628, "step": 27975 }, { "epoch": 91.72459016393442, "grad_norm": 1.8271727561950684, "learning_rate": 3.570293793891211e-07, "loss": 0.0527, "step": 27976 }, { "epoch": 91.72786885245901, "grad_norm": 2.5170631408691406, "learning_rate": 3.5674822174980195e-07, "loss": 0.1108, "step": 27977 }, { "epoch": 91.73114754098361, "grad_norm": 2.1290647983551025, "learning_rate": 3.564671728481739e-07, "loss": 0.1149, "step": 27978 }, { "epoch": 91.7344262295082, "grad_norm": 2.2608890533447266, "learning_rate": 3.5618623268740324e-07, "loss": 0.0597, "step": 27979 }, { "epoch": 91.73770491803279, "grad_norm": 2.2216269969940186, "learning_rate": 3.5590540127066086e-07, "loss": 0.0849, "step": 27980 }, { "epoch": 91.74098360655738, "grad_norm": 1.7338180541992188, "learning_rate": 3.5562467860111306e-07, "loss": 0.0538, "step": 27981 }, { "epoch": 91.74426229508197, "grad_norm": 2.5066463947296143, "learning_rate": 3.553440646819251e-07, "loss": 0.1282, "step": 27982 }, { "epoch": 91.74754098360656, "grad_norm": 2.0273334980010986, "learning_rate": 3.550635595162588e-07, "loss": 0.037, "step": 27983 }, { "epoch": 91.75081967213114, "grad_norm": 2.543762445449829, "learning_rate": 3.5478316310727957e-07, "loss": 0.0702, "step": 27984 }, { "epoch": 91.75409836065573, "grad_norm": 2.426705837249756, "learning_rate": 3.545028754581492e-07, "loss": 0.0849, "step": 27985 }, { "epoch": 91.75737704918033, "grad_norm": 2.3046040534973145, "learning_rate": 3.5422269657202855e-07, "loss": 0.1578, "step": 27986 }, { "epoch": 91.76065573770492, "grad_norm": 1.8773311376571655, "learning_rate": 3.53942626452074e-07, "loss": 0.1532, "step": 27987 }, { "epoch": 91.76393442622951, "grad_norm": 2.070707082748413, "learning_rate": 3.536626651014474e-07, "loss": 0.0791, "step": 27988 }, { "epoch": 91.7672131147541, "grad_norm": 1.872833490371704, "learning_rate": 3.533828125233041e-07, "loss": 0.1333, "step": 27989 }, { "epoch": 91.77049180327869, "grad_norm": 2.4161627292633057, "learning_rate": 3.5310306872079816e-07, "loss": 0.1314, "step": 27990 }, { "epoch": 91.77377049180328, "grad_norm": 2.231961250305176, "learning_rate": 3.5282343369708595e-07, "loss": 0.1071, "step": 27991 }, { "epoch": 91.77704918032786, "grad_norm": 2.102134943008423, "learning_rate": 3.5254390745532054e-07, "loss": 0.1429, "step": 27992 }, { "epoch": 91.78032786885245, "grad_norm": 2.2850375175476074, "learning_rate": 3.5226448999865273e-07, "loss": 0.1288, "step": 27993 }, { "epoch": 91.78360655737706, "grad_norm": 1.621445655822754, "learning_rate": 3.519851813302344e-07, "loss": 0.031, "step": 27994 }, { "epoch": 91.78688524590164, "grad_norm": 2.2702221870422363, "learning_rate": 3.5170598145321424e-07, "loss": 0.0697, "step": 27995 }, { "epoch": 91.79016393442623, "grad_norm": 2.2200255393981934, "learning_rate": 3.514268903707407e-07, "loss": 0.0328, "step": 27996 }, { "epoch": 91.79344262295082, "grad_norm": 2.506941556930542, "learning_rate": 3.5114790808596146e-07, "loss": 0.1669, "step": 27997 }, { "epoch": 91.79672131147541, "grad_norm": 2.3522136211395264, "learning_rate": 3.508690346020216e-07, "loss": 0.1512, "step": 27998 }, { "epoch": 91.8, "grad_norm": 1.8531079292297363, "learning_rate": 3.5059026992206645e-07, "loss": 0.0432, "step": 27999 }, { "epoch": 91.80327868852459, "grad_norm": 1.1970971822738647, "learning_rate": 3.5031161404923797e-07, "loss": 0.0211, "step": 28000 }, { "epoch": 91.80655737704917, "grad_norm": 1.4357022047042847, "learning_rate": 3.500330669866803e-07, "loss": 0.0384, "step": 28001 }, { "epoch": 91.80983606557378, "grad_norm": 1.653127670288086, "learning_rate": 3.4975462873753305e-07, "loss": 0.0967, "step": 28002 }, { "epoch": 91.81311475409836, "grad_norm": 2.1959609985351562, "learning_rate": 3.494762993049361e-07, "loss": 0.1461, "step": 28003 }, { "epoch": 91.81639344262295, "grad_norm": 3.4010961055755615, "learning_rate": 3.491980786920279e-07, "loss": 0.0775, "step": 28004 }, { "epoch": 91.81967213114754, "grad_norm": 2.0785326957702637, "learning_rate": 3.4891996690194383e-07, "loss": 0.0742, "step": 28005 }, { "epoch": 91.82295081967213, "grad_norm": 2.157546043395996, "learning_rate": 3.4864196393782355e-07, "loss": 0.0617, "step": 28006 }, { "epoch": 91.82622950819672, "grad_norm": 1.9149384498596191, "learning_rate": 3.483640698028001e-07, "loss": 0.0603, "step": 28007 }, { "epoch": 91.8295081967213, "grad_norm": 2.3136050701141357, "learning_rate": 3.480862845000066e-07, "loss": 0.0766, "step": 28008 }, { "epoch": 91.8327868852459, "grad_norm": 2.403290033340454, "learning_rate": 3.478086080325749e-07, "loss": 0.102, "step": 28009 }, { "epoch": 91.8360655737705, "grad_norm": 1.7266476154327393, "learning_rate": 3.4753104040363804e-07, "loss": 0.0279, "step": 28010 }, { "epoch": 91.83934426229509, "grad_norm": 1.801749587059021, "learning_rate": 3.4725358161632474e-07, "loss": 0.0869, "step": 28011 }, { "epoch": 91.84262295081967, "grad_norm": 3.263957977294922, "learning_rate": 3.469762316737635e-07, "loss": 0.2321, "step": 28012 }, { "epoch": 91.84590163934426, "grad_norm": 2.998344659805298, "learning_rate": 3.4669899057908073e-07, "loss": 0.071, "step": 28013 }, { "epoch": 91.84918032786885, "grad_norm": 2.3428266048431396, "learning_rate": 3.464218583354051e-07, "loss": 0.1545, "step": 28014 }, { "epoch": 91.85245901639344, "grad_norm": 5.549681186676025, "learning_rate": 3.461448349458607e-07, "loss": 0.1578, "step": 28015 }, { "epoch": 91.85573770491803, "grad_norm": 2.406898021697998, "learning_rate": 3.4586792041356954e-07, "loss": 0.0542, "step": 28016 }, { "epoch": 91.85901639344263, "grad_norm": 1.728642225265503, "learning_rate": 3.4559111474165684e-07, "loss": 0.085, "step": 28017 }, { "epoch": 91.86229508196722, "grad_norm": 2.4497907161712646, "learning_rate": 3.4531441793324014e-07, "loss": 0.151, "step": 28018 }, { "epoch": 91.8655737704918, "grad_norm": 2.799992322921753, "learning_rate": 3.450378299914425e-07, "loss": 0.1861, "step": 28019 }, { "epoch": 91.8688524590164, "grad_norm": 2.335735321044922, "learning_rate": 3.447613509193826e-07, "loss": 0.0639, "step": 28020 }, { "epoch": 91.87213114754098, "grad_norm": 2.080216646194458, "learning_rate": 3.444849807201778e-07, "loss": 0.3279, "step": 28021 }, { "epoch": 91.87540983606557, "grad_norm": 2.988496780395508, "learning_rate": 3.4420871939694235e-07, "loss": 0.1927, "step": 28022 }, { "epoch": 91.87868852459016, "grad_norm": 1.855544090270996, "learning_rate": 3.439325669527949e-07, "loss": 0.0763, "step": 28023 }, { "epoch": 91.88196721311475, "grad_norm": 2.3840246200561523, "learning_rate": 3.436565233908473e-07, "loss": 0.0739, "step": 28024 }, { "epoch": 91.88524590163935, "grad_norm": 3.2471323013305664, "learning_rate": 3.4338058871421163e-07, "loss": 0.0688, "step": 28025 }, { "epoch": 91.88852459016394, "grad_norm": 1.8793461322784424, "learning_rate": 3.4310476292599983e-07, "loss": 0.0894, "step": 28026 }, { "epoch": 91.89180327868853, "grad_norm": 1.9444403648376465, "learning_rate": 3.4282904602932374e-07, "loss": 0.1882, "step": 28027 }, { "epoch": 91.89508196721312, "grad_norm": 2.4628405570983887, "learning_rate": 3.42553438027291e-07, "loss": 0.1081, "step": 28028 }, { "epoch": 91.8983606557377, "grad_norm": 2.455226421356201, "learning_rate": 3.422779389230091e-07, "loss": 0.1632, "step": 28029 }, { "epoch": 91.90163934426229, "grad_norm": 2.3049612045288086, "learning_rate": 3.420025487195855e-07, "loss": 0.0541, "step": 28030 }, { "epoch": 91.90491803278688, "grad_norm": 3.057111978530884, "learning_rate": 3.417272674201233e-07, "loss": 0.1523, "step": 28031 }, { "epoch": 91.90819672131147, "grad_norm": 2.4174513816833496, "learning_rate": 3.414520950277289e-07, "loss": 0.1244, "step": 28032 }, { "epoch": 91.91147540983607, "grad_norm": 2.014822483062744, "learning_rate": 3.4117703154550544e-07, "loss": 0.02, "step": 28033 }, { "epoch": 91.91475409836066, "grad_norm": 2.513864278793335, "learning_rate": 3.4090207697655366e-07, "loss": 0.0793, "step": 28034 }, { "epoch": 91.91803278688525, "grad_norm": 2.1178088188171387, "learning_rate": 3.406272313239722e-07, "loss": 0.1214, "step": 28035 }, { "epoch": 91.92131147540984, "grad_norm": 2.1352477073669434, "learning_rate": 3.403524945908632e-07, "loss": 0.07, "step": 28036 }, { "epoch": 91.92459016393443, "grad_norm": 3.1434485912323, "learning_rate": 3.4007786678032397e-07, "loss": 0.2536, "step": 28037 }, { "epoch": 91.92786885245901, "grad_norm": 3.399465799331665, "learning_rate": 3.398033478954499e-07, "loss": 0.062, "step": 28038 }, { "epoch": 91.9311475409836, "grad_norm": 2.362183094024658, "learning_rate": 3.395289379393363e-07, "loss": 0.0715, "step": 28039 }, { "epoch": 91.93442622950819, "grad_norm": 1.7090777158737183, "learning_rate": 3.3925463691507956e-07, "loss": 0.0414, "step": 28040 }, { "epoch": 91.9377049180328, "grad_norm": 3.6161677837371826, "learning_rate": 3.389804448257705e-07, "loss": 0.1127, "step": 28041 }, { "epoch": 91.94098360655738, "grad_norm": 2.4858334064483643, "learning_rate": 3.387063616745023e-07, "loss": 0.1334, "step": 28042 }, { "epoch": 91.94426229508197, "grad_norm": 2.5426008701324463, "learning_rate": 3.3843238746436466e-07, "loss": 0.1597, "step": 28043 }, { "epoch": 91.94754098360656, "grad_norm": 2.3261027336120605, "learning_rate": 3.381585221984485e-07, "loss": 0.1364, "step": 28044 }, { "epoch": 91.95081967213115, "grad_norm": 2.317322254180908, "learning_rate": 3.37884765879839e-07, "loss": 0.0597, "step": 28045 }, { "epoch": 91.95409836065573, "grad_norm": 2.2261457443237305, "learning_rate": 3.37611118511626e-07, "loss": 0.0608, "step": 28046 }, { "epoch": 91.95737704918032, "grad_norm": 2.0371992588043213, "learning_rate": 3.373375800968948e-07, "loss": 0.0591, "step": 28047 }, { "epoch": 91.96065573770491, "grad_norm": 2.0778205394744873, "learning_rate": 3.3706415063872843e-07, "loss": 0.2661, "step": 28048 }, { "epoch": 91.96393442622951, "grad_norm": 2.486942768096924, "learning_rate": 3.3679083014020897e-07, "loss": 0.0802, "step": 28049 }, { "epoch": 91.9672131147541, "grad_norm": 1.9567975997924805, "learning_rate": 3.365176186044228e-07, "loss": 0.1428, "step": 28050 }, { "epoch": 91.97049180327869, "grad_norm": 2.0866522789001465, "learning_rate": 3.362445160344463e-07, "loss": 0.1004, "step": 28051 }, { "epoch": 91.97377049180328, "grad_norm": 2.6861507892608643, "learning_rate": 3.359715224333604e-07, "loss": 0.0722, "step": 28052 }, { "epoch": 91.97704918032787, "grad_norm": 2.337184429168701, "learning_rate": 3.356986378042448e-07, "loss": 0.0937, "step": 28053 }, { "epoch": 91.98032786885246, "grad_norm": 1.4910460710525513, "learning_rate": 3.3542586215017603e-07, "loss": 0.0924, "step": 28054 }, { "epoch": 91.98360655737704, "grad_norm": 1.854406476020813, "learning_rate": 3.351531954742282e-07, "loss": 0.0991, "step": 28055 }, { "epoch": 91.98688524590163, "grad_norm": 2.7497613430023193, "learning_rate": 3.3488063777947775e-07, "loss": 0.2263, "step": 28056 }, { "epoch": 91.99016393442623, "grad_norm": 2.086195468902588, "learning_rate": 3.346081890689956e-07, "loss": 0.0695, "step": 28057 }, { "epoch": 91.99344262295082, "grad_norm": 2.916712760925293, "learning_rate": 3.3433584934585704e-07, "loss": 0.1269, "step": 28058 }, { "epoch": 91.99672131147541, "grad_norm": 1.9443479776382446, "learning_rate": 3.3406361861313074e-07, "loss": 0.1079, "step": 28059 }, { "epoch": 92.0, "grad_norm": 2.4434635639190674, "learning_rate": 3.3379149687388866e-07, "loss": 0.1733, "step": 28060 }, { "epoch": 92.00327868852459, "grad_norm": 2.1962597370147705, "learning_rate": 3.3351948413119616e-07, "loss": 0.0659, "step": 28061 }, { "epoch": 92.00655737704918, "grad_norm": 2.2500064373016357, "learning_rate": 3.33247580388123e-07, "loss": 0.2643, "step": 28062 }, { "epoch": 92.00983606557377, "grad_norm": 2.7551088333129883, "learning_rate": 3.3297578564773336e-07, "loss": 0.1599, "step": 28063 }, { "epoch": 92.01311475409837, "grad_norm": 1.7924854755401611, "learning_rate": 3.3270409991309485e-07, "loss": 0.103, "step": 28064 }, { "epoch": 92.01639344262296, "grad_norm": 2.189821481704712, "learning_rate": 3.3243252318726603e-07, "loss": 0.0382, "step": 28065 }, { "epoch": 92.01967213114754, "grad_norm": 2.6583638191223145, "learning_rate": 3.3216105547331454e-07, "loss": 0.1416, "step": 28066 }, { "epoch": 92.02295081967213, "grad_norm": 1.9924927949905396, "learning_rate": 3.3188969677429907e-07, "loss": 0.0567, "step": 28067 }, { "epoch": 92.02622950819672, "grad_norm": 1.5708953142166138, "learning_rate": 3.3161844709327927e-07, "loss": 0.0322, "step": 28068 }, { "epoch": 92.02950819672131, "grad_norm": 2.321242332458496, "learning_rate": 3.313473064333139e-07, "loss": 0.1903, "step": 28069 }, { "epoch": 92.0327868852459, "grad_norm": 3.369528293609619, "learning_rate": 3.310762747974605e-07, "loss": 0.1113, "step": 28070 }, { "epoch": 92.03606557377049, "grad_norm": 2.6484947204589844, "learning_rate": 3.3080535218877554e-07, "loss": 0.121, "step": 28071 }, { "epoch": 92.03934426229509, "grad_norm": 1.8621413707733154, "learning_rate": 3.3053453861031316e-07, "loss": 0.0577, "step": 28072 }, { "epoch": 92.04262295081968, "grad_norm": 2.889880895614624, "learning_rate": 3.3026383406512877e-07, "loss": 0.0975, "step": 28073 }, { "epoch": 92.04590163934427, "grad_norm": 2.28420352935791, "learning_rate": 3.2999323855627205e-07, "loss": 0.0505, "step": 28074 }, { "epoch": 92.04918032786885, "grad_norm": 1.1919180154800415, "learning_rate": 3.2972275208679625e-07, "loss": 0.0153, "step": 28075 }, { "epoch": 92.05245901639344, "grad_norm": 2.934368848800659, "learning_rate": 3.2945237465975223e-07, "loss": 0.0875, "step": 28076 }, { "epoch": 92.05573770491803, "grad_norm": 2.7689366340637207, "learning_rate": 3.291821062781864e-07, "loss": 0.1272, "step": 28077 }, { "epoch": 92.05901639344262, "grad_norm": 1.6704350709915161, "learning_rate": 3.289119469451474e-07, "loss": 0.1235, "step": 28078 }, { "epoch": 92.0622950819672, "grad_norm": 3.90924072265625, "learning_rate": 3.286418966636817e-07, "loss": 0.2469, "step": 28079 }, { "epoch": 92.06557377049181, "grad_norm": 2.1396424770355225, "learning_rate": 3.2837195543683476e-07, "loss": 0.2066, "step": 28080 }, { "epoch": 92.0688524590164, "grad_norm": 3.377420663833618, "learning_rate": 3.2810212326765066e-07, "loss": 0.2946, "step": 28081 }, { "epoch": 92.07213114754099, "grad_norm": 1.6919976472854614, "learning_rate": 3.2783240015917037e-07, "loss": 0.0474, "step": 28082 }, { "epoch": 92.07540983606557, "grad_norm": 2.265873670578003, "learning_rate": 3.2756278611443595e-07, "loss": 0.1318, "step": 28083 }, { "epoch": 92.07868852459016, "grad_norm": 2.896942377090454, "learning_rate": 3.272932811364882e-07, "loss": 0.0877, "step": 28084 }, { "epoch": 92.08196721311475, "grad_norm": 2.0763471126556396, "learning_rate": 3.270238852283669e-07, "loss": 0.0563, "step": 28085 }, { "epoch": 92.08524590163934, "grad_norm": 2.0294432640075684, "learning_rate": 3.267545983931075e-07, "loss": 0.0604, "step": 28086 }, { "epoch": 92.08852459016393, "grad_norm": 2.1344542503356934, "learning_rate": 3.264854206337475e-07, "loss": 0.0396, "step": 28087 }, { "epoch": 92.09180327868853, "grad_norm": 2.1772756576538086, "learning_rate": 3.2621635195332236e-07, "loss": 0.13, "step": 28088 }, { "epoch": 92.09508196721312, "grad_norm": 1.8856242895126343, "learning_rate": 3.2594739235486725e-07, "loss": 0.0453, "step": 28089 }, { "epoch": 92.09836065573771, "grad_norm": 2.5352084636688232, "learning_rate": 3.2567854184141324e-07, "loss": 0.0562, "step": 28090 }, { "epoch": 92.1016393442623, "grad_norm": 2.4927124977111816, "learning_rate": 3.2540980041599113e-07, "loss": 0.0833, "step": 28091 }, { "epoch": 92.10491803278688, "grad_norm": 2.0453641414642334, "learning_rate": 3.251411680816341e-07, "loss": 0.0494, "step": 28092 }, { "epoch": 92.10819672131147, "grad_norm": 2.262906074523926, "learning_rate": 3.248726448413686e-07, "loss": 0.0646, "step": 28093 }, { "epoch": 92.11147540983606, "grad_norm": 2.2443864345550537, "learning_rate": 3.2460423069822554e-07, "loss": 0.104, "step": 28094 }, { "epoch": 92.11475409836065, "grad_norm": 3.1869864463806152, "learning_rate": 3.24335925655227e-07, "loss": 0.1026, "step": 28095 }, { "epoch": 92.11803278688525, "grad_norm": 1.7375537157058716, "learning_rate": 3.240677297154027e-07, "loss": 0.1872, "step": 28096 }, { "epoch": 92.12131147540984, "grad_norm": 2.082510232925415, "learning_rate": 3.237996428817758e-07, "loss": 0.1348, "step": 28097 }, { "epoch": 92.12459016393443, "grad_norm": 2.0302894115448, "learning_rate": 3.235316651573661e-07, "loss": 0.1231, "step": 28098 }, { "epoch": 92.12786885245902, "grad_norm": 2.2893667221069336, "learning_rate": 3.232637965452001e-07, "loss": 0.1159, "step": 28099 }, { "epoch": 92.1311475409836, "grad_norm": 1.4523097276687622, "learning_rate": 3.2299603704829654e-07, "loss": 0.0565, "step": 28100 }, { "epoch": 92.1344262295082, "grad_norm": 1.7358778715133667, "learning_rate": 3.2272838666967177e-07, "loss": 0.1019, "step": 28101 }, { "epoch": 92.13770491803278, "grad_norm": 1.722172498703003, "learning_rate": 3.224608454123479e-07, "loss": 0.0327, "step": 28102 }, { "epoch": 92.14098360655737, "grad_norm": 2.511448621749878, "learning_rate": 3.221934132793403e-07, "loss": 0.1263, "step": 28103 }, { "epoch": 92.14426229508197, "grad_norm": 2.350919723510742, "learning_rate": 3.219260902736632e-07, "loss": 0.0833, "step": 28104 }, { "epoch": 92.14754098360656, "grad_norm": 2.7608377933502197, "learning_rate": 3.2165887639833305e-07, "loss": 0.1541, "step": 28105 }, { "epoch": 92.15081967213115, "grad_norm": 5.222855091094971, "learning_rate": 3.2139177165636304e-07, "loss": 0.0898, "step": 28106 }, { "epoch": 92.15409836065574, "grad_norm": 1.6814582347869873, "learning_rate": 3.2112477605076297e-07, "loss": 0.115, "step": 28107 }, { "epoch": 92.15737704918033, "grad_norm": 3.088770866394043, "learning_rate": 3.208578895845449e-07, "loss": 0.1601, "step": 28108 }, { "epoch": 92.16065573770491, "grad_norm": 2.5890133380889893, "learning_rate": 3.2059111226071637e-07, "loss": 0.0555, "step": 28109 }, { "epoch": 92.1639344262295, "grad_norm": 1.846461534500122, "learning_rate": 3.203244440822884e-07, "loss": 0.1018, "step": 28110 }, { "epoch": 92.1672131147541, "grad_norm": 2.0600719451904297, "learning_rate": 3.200578850522673e-07, "loss": 0.0503, "step": 28111 }, { "epoch": 92.1704918032787, "grad_norm": 1.8693400621414185, "learning_rate": 3.1979143517365753e-07, "loss": 0.1283, "step": 28112 }, { "epoch": 92.17377049180328, "grad_norm": 3.115769624710083, "learning_rate": 3.195250944494632e-07, "loss": 0.0635, "step": 28113 }, { "epoch": 92.17704918032787, "grad_norm": 2.1229052543640137, "learning_rate": 3.192588628826898e-07, "loss": 0.1362, "step": 28114 }, { "epoch": 92.18032786885246, "grad_norm": 2.6064810752868652, "learning_rate": 3.189927404763382e-07, "loss": 0.193, "step": 28115 }, { "epoch": 92.18360655737705, "grad_norm": 1.825932502746582, "learning_rate": 3.187267272334083e-07, "loss": 0.0427, "step": 28116 }, { "epoch": 92.18688524590164, "grad_norm": 2.380444049835205, "learning_rate": 3.1846082315690086e-07, "loss": 0.1483, "step": 28117 }, { "epoch": 92.19016393442622, "grad_norm": 1.958207607269287, "learning_rate": 3.181950282498136e-07, "loss": 0.1262, "step": 28118 }, { "epoch": 92.19344262295083, "grad_norm": 2.1844735145568848, "learning_rate": 3.179293425151453e-07, "loss": 0.0962, "step": 28119 }, { "epoch": 92.19672131147541, "grad_norm": 2.3113648891448975, "learning_rate": 3.1766376595589013e-07, "loss": 0.0857, "step": 28120 }, { "epoch": 92.2, "grad_norm": 2.7008137702941895, "learning_rate": 3.1739829857504235e-07, "loss": 0.1239, "step": 28121 }, { "epoch": 92.20327868852459, "grad_norm": 5.498716831207275, "learning_rate": 3.171329403755963e-07, "loss": 0.2447, "step": 28122 }, { "epoch": 92.20655737704918, "grad_norm": 1.9064301252365112, "learning_rate": 3.1686769136054396e-07, "loss": 0.0918, "step": 28123 }, { "epoch": 92.20983606557377, "grad_norm": 2.7805562019348145, "learning_rate": 3.166025515328763e-07, "loss": 0.1216, "step": 28124 }, { "epoch": 92.21311475409836, "grad_norm": 1.934138536453247, "learning_rate": 3.1633752089558434e-07, "loss": 0.0536, "step": 28125 }, { "epoch": 92.21639344262294, "grad_norm": 2.468585729598999, "learning_rate": 3.160725994516534e-07, "loss": 0.1321, "step": 28126 }, { "epoch": 92.21967213114755, "grad_norm": 2.0877749919891357, "learning_rate": 3.1580778720407325e-07, "loss": 0.0707, "step": 28127 }, { "epoch": 92.22295081967214, "grad_norm": 3.2596819400787354, "learning_rate": 3.1554308415583045e-07, "loss": 0.142, "step": 28128 }, { "epoch": 92.22622950819672, "grad_norm": 2.633636236190796, "learning_rate": 3.152784903099082e-07, "loss": 0.0774, "step": 28129 }, { "epoch": 92.22950819672131, "grad_norm": 2.889296770095825, "learning_rate": 3.150140056692885e-07, "loss": 0.1247, "step": 28130 }, { "epoch": 92.2327868852459, "grad_norm": 1.7925543785095215, "learning_rate": 3.147496302369579e-07, "loss": 0.0598, "step": 28131 }, { "epoch": 92.23606557377049, "grad_norm": 2.2439308166503906, "learning_rate": 3.144853640158951e-07, "loss": 0.1074, "step": 28132 }, { "epoch": 92.23934426229508, "grad_norm": 1.8829011917114258, "learning_rate": 3.142212070090811e-07, "loss": 0.0611, "step": 28133 }, { "epoch": 92.24262295081967, "grad_norm": 1.8110960721969604, "learning_rate": 3.1395715921949234e-07, "loss": 0.0381, "step": 28134 }, { "epoch": 92.24590163934427, "grad_norm": 2.342400312423706, "learning_rate": 3.1369322065010756e-07, "loss": 0.1726, "step": 28135 }, { "epoch": 92.24918032786886, "grad_norm": 1.9223283529281616, "learning_rate": 3.134293913039033e-07, "loss": 0.08, "step": 28136 }, { "epoch": 92.25245901639344, "grad_norm": 3.051697015762329, "learning_rate": 3.1316567118385375e-07, "loss": 0.2566, "step": 28137 }, { "epoch": 92.25573770491803, "grad_norm": 3.8125216960906982, "learning_rate": 3.1290206029293444e-07, "loss": 0.1241, "step": 28138 }, { "epoch": 92.25901639344262, "grad_norm": 2.1898794174194336, "learning_rate": 3.126385586341141e-07, "loss": 0.0714, "step": 28139 }, { "epoch": 92.26229508196721, "grad_norm": 1.922253966331482, "learning_rate": 3.1237516621036803e-07, "loss": 0.1302, "step": 28140 }, { "epoch": 92.2655737704918, "grad_norm": 2.606949806213379, "learning_rate": 3.1211188302466386e-07, "loss": 0.0383, "step": 28141 }, { "epoch": 92.26885245901639, "grad_norm": 2.1955208778381348, "learning_rate": 3.1184870907997156e-07, "loss": 0.1346, "step": 28142 }, { "epoch": 92.27213114754099, "grad_norm": 2.105712413787842, "learning_rate": 3.1158564437925866e-07, "loss": 0.0709, "step": 28143 }, { "epoch": 92.27540983606558, "grad_norm": 1.7209445238113403, "learning_rate": 3.1132268892548945e-07, "loss": 0.0293, "step": 28144 }, { "epoch": 92.27868852459017, "grad_norm": 1.6503989696502686, "learning_rate": 3.110598427216327e-07, "loss": 0.0294, "step": 28145 }, { "epoch": 92.28196721311475, "grad_norm": 2.41093111038208, "learning_rate": 3.1079710577064935e-07, "loss": 0.1229, "step": 28146 }, { "epoch": 92.28524590163934, "grad_norm": 3.9504568576812744, "learning_rate": 3.105344780755015e-07, "loss": 0.2627, "step": 28147 }, { "epoch": 92.28852459016393, "grad_norm": 2.023329019546509, "learning_rate": 3.102719596391535e-07, "loss": 0.1124, "step": 28148 }, { "epoch": 92.29180327868852, "grad_norm": 3.5553462505340576, "learning_rate": 3.100095504645639e-07, "loss": 0.0571, "step": 28149 }, { "epoch": 92.29508196721312, "grad_norm": 2.4273149967193604, "learning_rate": 3.0974725055469054e-07, "loss": 0.0634, "step": 28150 }, { "epoch": 92.29836065573771, "grad_norm": 2.077000379562378, "learning_rate": 3.094850599124932e-07, "loss": 0.1133, "step": 28151 }, { "epoch": 92.3016393442623, "grad_norm": 2.055788993835449, "learning_rate": 3.092229785409273e-07, "loss": 0.0728, "step": 28152 }, { "epoch": 92.30491803278689, "grad_norm": 2.076789379119873, "learning_rate": 3.0896100644294823e-07, "loss": 0.1417, "step": 28153 }, { "epoch": 92.30819672131148, "grad_norm": 2.4639194011688232, "learning_rate": 3.0869914362150923e-07, "loss": 0.1041, "step": 28154 }, { "epoch": 92.31147540983606, "grad_norm": 2.5609827041625977, "learning_rate": 3.0843739007956454e-07, "loss": 0.1615, "step": 28155 }, { "epoch": 92.31475409836065, "grad_norm": 2.4746787548065186, "learning_rate": 3.0817574582006513e-07, "loss": 0.0405, "step": 28156 }, { "epoch": 92.31803278688524, "grad_norm": 2.9939327239990234, "learning_rate": 3.0791421084595984e-07, "loss": 0.0658, "step": 28157 }, { "epoch": 92.32131147540984, "grad_norm": 2.7466187477111816, "learning_rate": 3.076527851602007e-07, "loss": 0.1367, "step": 28158 }, { "epoch": 92.32459016393443, "grad_norm": 2.863044500350952, "learning_rate": 3.073914687657331e-07, "loss": 0.0809, "step": 28159 }, { "epoch": 92.32786885245902, "grad_norm": 2.371365785598755, "learning_rate": 3.0713026166550586e-07, "loss": 0.0953, "step": 28160 }, { "epoch": 92.33114754098361, "grad_norm": 1.936585783958435, "learning_rate": 3.06869163862461e-07, "loss": 0.0613, "step": 28161 }, { "epoch": 92.3344262295082, "grad_norm": 1.8934733867645264, "learning_rate": 3.066081753595451e-07, "loss": 0.0298, "step": 28162 }, { "epoch": 92.33770491803278, "grad_norm": 1.7868868112564087, "learning_rate": 3.0634729615970136e-07, "loss": 0.0601, "step": 28163 }, { "epoch": 92.34098360655737, "grad_norm": 2.750852346420288, "learning_rate": 3.060865262658708e-07, "loss": 0.1787, "step": 28164 }, { "epoch": 92.34426229508196, "grad_norm": 4.3095903396606445, "learning_rate": 3.0582586568099206e-07, "loss": 0.1847, "step": 28165 }, { "epoch": 92.34754098360656, "grad_norm": 1.733620524406433, "learning_rate": 3.055653144080084e-07, "loss": 0.0922, "step": 28166 }, { "epoch": 92.35081967213115, "grad_norm": 3.05444598197937, "learning_rate": 3.053048724498542e-07, "loss": 0.151, "step": 28167 }, { "epoch": 92.35409836065574, "grad_norm": 2.003415107727051, "learning_rate": 3.0504453980946813e-07, "loss": 0.0836, "step": 28168 }, { "epoch": 92.35737704918033, "grad_norm": 2.4660024642944336, "learning_rate": 3.047843164897857e-07, "loss": 0.2147, "step": 28169 }, { "epoch": 92.36065573770492, "grad_norm": 2.0616767406463623, "learning_rate": 3.045242024937389e-07, "loss": 0.2471, "step": 28170 }, { "epoch": 92.3639344262295, "grad_norm": 2.2486443519592285, "learning_rate": 3.042641978242633e-07, "loss": 0.1565, "step": 28171 }, { "epoch": 92.3672131147541, "grad_norm": 2.0259897708892822, "learning_rate": 3.0400430248428983e-07, "loss": 0.0825, "step": 28172 }, { "epoch": 92.37049180327868, "grad_norm": 2.3873398303985596, "learning_rate": 3.037445164767494e-07, "loss": 0.1383, "step": 28173 }, { "epoch": 92.37377049180328, "grad_norm": 1.5462695360183716, "learning_rate": 3.0348483980457086e-07, "loss": 0.0309, "step": 28174 }, { "epoch": 92.37704918032787, "grad_norm": 2.051642417907715, "learning_rate": 3.03225272470683e-07, "loss": 0.1598, "step": 28175 }, { "epoch": 92.38032786885246, "grad_norm": 2.244152307510376, "learning_rate": 3.029658144780123e-07, "loss": 0.0699, "step": 28176 }, { "epoch": 92.38360655737705, "grad_norm": 2.2096798419952393, "learning_rate": 3.0270646582948425e-07, "loss": 0.0508, "step": 28177 }, { "epoch": 92.38688524590164, "grad_norm": 2.6434648036956787, "learning_rate": 3.0244722652802203e-07, "loss": 0.1088, "step": 28178 }, { "epoch": 92.39016393442623, "grad_norm": 1.9892572164535522, "learning_rate": 3.0218809657655226e-07, "loss": 0.1085, "step": 28179 }, { "epoch": 92.39344262295081, "grad_norm": 1.7509896755218506, "learning_rate": 3.019290759779947e-07, "loss": 0.0555, "step": 28180 }, { "epoch": 92.3967213114754, "grad_norm": 1.6020069122314453, "learning_rate": 3.016701647352693e-07, "loss": 0.0848, "step": 28181 }, { "epoch": 92.4, "grad_norm": 2.2821784019470215, "learning_rate": 3.0141136285129825e-07, "loss": 0.0533, "step": 28182 }, { "epoch": 92.4032786885246, "grad_norm": 2.203731060028076, "learning_rate": 3.0115267032899577e-07, "loss": 0.2079, "step": 28183 }, { "epoch": 92.40655737704918, "grad_norm": 2.362699508666992, "learning_rate": 3.0089408717128287e-07, "loss": 0.1565, "step": 28184 }, { "epoch": 92.40983606557377, "grad_norm": 2.1548848152160645, "learning_rate": 3.006356133810728e-07, "loss": 0.0381, "step": 28185 }, { "epoch": 92.41311475409836, "grad_norm": 2.5167155265808105, "learning_rate": 3.003772489612811e-07, "loss": 0.2494, "step": 28186 }, { "epoch": 92.41639344262295, "grad_norm": 2.84769344329834, "learning_rate": 3.001189939148208e-07, "loss": 0.1008, "step": 28187 }, { "epoch": 92.41967213114754, "grad_norm": 1.9484912157058716, "learning_rate": 2.9986084824460527e-07, "loss": 0.1533, "step": 28188 }, { "epoch": 92.42295081967212, "grad_norm": 2.7785089015960693, "learning_rate": 2.9960281195354325e-07, "loss": 0.1201, "step": 28189 }, { "epoch": 92.42622950819673, "grad_norm": 2.837653636932373, "learning_rate": 2.9934488504454686e-07, "loss": 0.1232, "step": 28190 }, { "epoch": 92.42950819672132, "grad_norm": 1.8557206392288208, "learning_rate": 2.990870675205204e-07, "loss": 0.1164, "step": 28191 }, { "epoch": 92.4327868852459, "grad_norm": 2.2076900005340576, "learning_rate": 2.988293593843761e-07, "loss": 0.0975, "step": 28192 }, { "epoch": 92.43606557377049, "grad_norm": 2.016671895980835, "learning_rate": 2.9857176063901593e-07, "loss": 0.0412, "step": 28193 }, { "epoch": 92.43934426229508, "grad_norm": 9.739516258239746, "learning_rate": 2.983142712873477e-07, "loss": 0.1788, "step": 28194 }, { "epoch": 92.44262295081967, "grad_norm": 2.1241347789764404, "learning_rate": 2.9805689133227235e-07, "loss": 0.0659, "step": 28195 }, { "epoch": 92.44590163934426, "grad_norm": 1.9778997898101807, "learning_rate": 2.97799620776692e-07, "loss": 0.1108, "step": 28196 }, { "epoch": 92.44918032786886, "grad_norm": 2.2171313762664795, "learning_rate": 2.9754245962350993e-07, "loss": 0.1314, "step": 28197 }, { "epoch": 92.45245901639345, "grad_norm": 2.330918788909912, "learning_rate": 2.9728540787562486e-07, "loss": 0.0683, "step": 28198 }, { "epoch": 92.45573770491804, "grad_norm": 1.9923855066299438, "learning_rate": 2.970284655359357e-07, "loss": 0.0989, "step": 28199 }, { "epoch": 92.45901639344262, "grad_norm": 2.83650803565979, "learning_rate": 2.9677163260733667e-07, "loss": 0.0712, "step": 28200 }, { "epoch": 92.46229508196721, "grad_norm": 2.3404672145843506, "learning_rate": 2.9651490909272773e-07, "loss": 0.2549, "step": 28201 }, { "epoch": 92.4655737704918, "grad_norm": 2.100694179534912, "learning_rate": 2.9625829499500324e-07, "loss": 0.0402, "step": 28202 }, { "epoch": 92.46885245901639, "grad_norm": 2.687318801879883, "learning_rate": 2.960017903170542e-07, "loss": 0.1306, "step": 28203 }, { "epoch": 92.47213114754098, "grad_norm": 2.210770606994629, "learning_rate": 2.95745395061775e-07, "loss": 0.1023, "step": 28204 }, { "epoch": 92.47540983606558, "grad_norm": 3.274299144744873, "learning_rate": 2.9548910923205776e-07, "loss": 0.0992, "step": 28205 }, { "epoch": 92.47868852459017, "grad_norm": 1.983944296836853, "learning_rate": 2.95232932830789e-07, "loss": 0.0477, "step": 28206 }, { "epoch": 92.48196721311476, "grad_norm": 2.266237735748291, "learning_rate": 2.9497686586085983e-07, "loss": 0.2003, "step": 28207 }, { "epoch": 92.48524590163935, "grad_norm": 2.651977062225342, "learning_rate": 2.947209083251579e-07, "loss": 0.1024, "step": 28208 }, { "epoch": 92.48852459016393, "grad_norm": 1.8359116315841675, "learning_rate": 2.944650602265686e-07, "loss": 0.0531, "step": 28209 }, { "epoch": 92.49180327868852, "grad_norm": 1.522286057472229, "learning_rate": 2.942093215679764e-07, "loss": 0.0262, "step": 28210 }, { "epoch": 92.49508196721311, "grad_norm": 1.931323528289795, "learning_rate": 2.9395369235226677e-07, "loss": 0.0889, "step": 28211 }, { "epoch": 92.4983606557377, "grad_norm": 1.9673346281051636, "learning_rate": 2.936981725823207e-07, "loss": 0.0528, "step": 28212 }, { "epoch": 92.5016393442623, "grad_norm": 2.1872618198394775, "learning_rate": 2.934427622610181e-07, "loss": 0.1278, "step": 28213 }, { "epoch": 92.50491803278689, "grad_norm": 2.262173891067505, "learning_rate": 2.9318746139124224e-07, "loss": 0.0636, "step": 28214 }, { "epoch": 92.50819672131148, "grad_norm": 1.689808964729309, "learning_rate": 2.9293226997586966e-07, "loss": 0.1204, "step": 28215 }, { "epoch": 92.51147540983607, "grad_norm": 4.53270149230957, "learning_rate": 2.9267718801777924e-07, "loss": 0.1321, "step": 28216 }, { "epoch": 92.51475409836065, "grad_norm": 2.2003567218780518, "learning_rate": 2.924222155198453e-07, "loss": 0.0492, "step": 28217 }, { "epoch": 92.51803278688524, "grad_norm": 1.3010135889053345, "learning_rate": 2.9216735248494556e-07, "loss": 0.0382, "step": 28218 }, { "epoch": 92.52131147540983, "grad_norm": 2.801713705062866, "learning_rate": 2.919125989159521e-07, "loss": 0.0872, "step": 28219 }, { "epoch": 92.52459016393442, "grad_norm": 2.393923282623291, "learning_rate": 2.9165795481573836e-07, "loss": 0.0732, "step": 28220 }, { "epoch": 92.52786885245902, "grad_norm": 2.5405168533325195, "learning_rate": 2.9140342018717516e-07, "loss": 0.2029, "step": 28221 }, { "epoch": 92.53114754098361, "grad_norm": 1.9587290287017822, "learning_rate": 2.9114899503313145e-07, "loss": 0.0364, "step": 28222 }, { "epoch": 92.5344262295082, "grad_norm": 2.6511619091033936, "learning_rate": 2.908946793564793e-07, "loss": 0.0793, "step": 28223 }, { "epoch": 92.53770491803279, "grad_norm": 2.004589796066284, "learning_rate": 2.9064047316008423e-07, "loss": 0.0759, "step": 28224 }, { "epoch": 92.54098360655738, "grad_norm": 1.7942935228347778, "learning_rate": 2.903863764468129e-07, "loss": 0.1346, "step": 28225 }, { "epoch": 92.54426229508196, "grad_norm": 2.1523754596710205, "learning_rate": 2.9013238921952955e-07, "loss": 0.204, "step": 28226 }, { "epoch": 92.54754098360655, "grad_norm": 2.230391502380371, "learning_rate": 2.898785114811009e-07, "loss": 0.0753, "step": 28227 }, { "epoch": 92.55081967213114, "grad_norm": 2.0513546466827393, "learning_rate": 2.8962474323438685e-07, "loss": 0.0579, "step": 28228 }, { "epoch": 92.55409836065574, "grad_norm": 2.184129476547241, "learning_rate": 2.893710844822506e-07, "loss": 0.0866, "step": 28229 }, { "epoch": 92.55737704918033, "grad_norm": 2.870351552963257, "learning_rate": 2.8911753522755105e-07, "loss": 0.1611, "step": 28230 }, { "epoch": 92.56065573770492, "grad_norm": 2.432551622390747, "learning_rate": 2.888640954731492e-07, "loss": 0.1267, "step": 28231 }, { "epoch": 92.56393442622951, "grad_norm": 2.1616129875183105, "learning_rate": 2.886107652219017e-07, "loss": 0.0389, "step": 28232 }, { "epoch": 92.5672131147541, "grad_norm": 1.7633821964263916, "learning_rate": 2.883575444766651e-07, "loss": 0.1022, "step": 28233 }, { "epoch": 92.57049180327868, "grad_norm": 2.343451738357544, "learning_rate": 2.881044332402949e-07, "loss": 0.1331, "step": 28234 }, { "epoch": 92.57377049180327, "grad_norm": 1.8975688219070435, "learning_rate": 2.878514315156433e-07, "loss": 0.0497, "step": 28235 }, { "epoch": 92.57704918032788, "grad_norm": 2.472198486328125, "learning_rate": 2.875985393055669e-07, "loss": 0.1119, "step": 28236 }, { "epoch": 92.58032786885246, "grad_norm": 1.8086655139923096, "learning_rate": 2.873457566129145e-07, "loss": 0.0453, "step": 28237 }, { "epoch": 92.58360655737705, "grad_norm": 2.650916814804077, "learning_rate": 2.870930834405372e-07, "loss": 0.0681, "step": 28238 }, { "epoch": 92.58688524590164, "grad_norm": 1.9152253866195679, "learning_rate": 2.868405197912838e-07, "loss": 0.0537, "step": 28239 }, { "epoch": 92.59016393442623, "grad_norm": 3.3188412189483643, "learning_rate": 2.865880656680042e-07, "loss": 0.1549, "step": 28240 }, { "epoch": 92.59344262295082, "grad_norm": 2.777594804763794, "learning_rate": 2.863357210735429e-07, "loss": 0.2084, "step": 28241 }, { "epoch": 92.5967213114754, "grad_norm": 1.4655165672302246, "learning_rate": 2.8608348601074644e-07, "loss": 0.018, "step": 28242 }, { "epoch": 92.6, "grad_norm": 3.300875186920166, "learning_rate": 2.8583136048245697e-07, "loss": 0.1644, "step": 28243 }, { "epoch": 92.6032786885246, "grad_norm": 2.4162895679473877, "learning_rate": 2.8557934449152115e-07, "loss": 0.2029, "step": 28244 }, { "epoch": 92.60655737704919, "grad_norm": 3.0437395572662354, "learning_rate": 2.853274380407778e-07, "loss": 0.103, "step": 28245 }, { "epoch": 92.60983606557377, "grad_norm": 2.7566022872924805, "learning_rate": 2.8507564113306795e-07, "loss": 0.1269, "step": 28246 }, { "epoch": 92.61311475409836, "grad_norm": 2.9787726402282715, "learning_rate": 2.848239537712316e-07, "loss": 0.132, "step": 28247 }, { "epoch": 92.61639344262295, "grad_norm": 2.5167789459228516, "learning_rate": 2.845723759581065e-07, "loss": 0.1017, "step": 28248 }, { "epoch": 92.61967213114754, "grad_norm": 3.721013069152832, "learning_rate": 2.843209076965292e-07, "loss": 0.0612, "step": 28249 }, { "epoch": 92.62295081967213, "grad_norm": 2.002178192138672, "learning_rate": 2.8406954898933525e-07, "loss": 0.1292, "step": 28250 }, { "epoch": 92.62622950819672, "grad_norm": 2.6186113357543945, "learning_rate": 2.8381829983936013e-07, "loss": 0.1825, "step": 28251 }, { "epoch": 92.62950819672132, "grad_norm": 2.3404645919799805, "learning_rate": 2.8356716024943385e-07, "loss": 0.0541, "step": 28252 }, { "epoch": 92.6327868852459, "grad_norm": 1.5125423669815063, "learning_rate": 2.83316130222393e-07, "loss": 0.026, "step": 28253 }, { "epoch": 92.6360655737705, "grad_norm": 2.6140074729919434, "learning_rate": 2.8306520976106423e-07, "loss": 0.1315, "step": 28254 }, { "epoch": 92.63934426229508, "grad_norm": 2.133624792098999, "learning_rate": 2.8281439886827854e-07, "loss": 0.0452, "step": 28255 }, { "epoch": 92.64262295081967, "grad_norm": 1.921072244644165, "learning_rate": 2.8256369754686377e-07, "loss": 0.1864, "step": 28256 }, { "epoch": 92.64590163934426, "grad_norm": 3.0784552097320557, "learning_rate": 2.8231310579964646e-07, "loss": 0.1574, "step": 28257 }, { "epoch": 92.64918032786885, "grad_norm": 3.1511809825897217, "learning_rate": 2.820626236294532e-07, "loss": 0.0877, "step": 28258 }, { "epoch": 92.65245901639344, "grad_norm": 1.8882468938827515, "learning_rate": 2.818122510391075e-07, "loss": 0.1841, "step": 28259 }, { "epoch": 92.65573770491804, "grad_norm": 3.133899688720703, "learning_rate": 2.8156198803143355e-07, "loss": 0.1194, "step": 28260 }, { "epoch": 92.65901639344263, "grad_norm": 2.172978162765503, "learning_rate": 2.813118346092536e-07, "loss": 0.0305, "step": 28261 }, { "epoch": 92.66229508196722, "grad_norm": 2.2645394802093506, "learning_rate": 2.8106179077538543e-07, "loss": 0.1385, "step": 28262 }, { "epoch": 92.6655737704918, "grad_norm": 3.4601452350616455, "learning_rate": 2.8081185653265343e-07, "loss": 0.2191, "step": 28263 }, { "epoch": 92.66885245901639, "grad_norm": 2.504211902618408, "learning_rate": 2.8056203188387197e-07, "loss": 0.0691, "step": 28264 }, { "epoch": 92.67213114754098, "grad_norm": 2.603299140930176, "learning_rate": 2.8031231683185775e-07, "loss": 0.1043, "step": 28265 }, { "epoch": 92.67540983606557, "grad_norm": 2.949610948562622, "learning_rate": 2.8006271137942965e-07, "loss": 0.1496, "step": 28266 }, { "epoch": 92.67868852459016, "grad_norm": 2.4879543781280518, "learning_rate": 2.7981321552940086e-07, "loss": 0.1449, "step": 28267 }, { "epoch": 92.68196721311476, "grad_norm": 2.7394652366638184, "learning_rate": 2.795638292845848e-07, "loss": 0.132, "step": 28268 }, { "epoch": 92.68524590163935, "grad_norm": 2.0739190578460693, "learning_rate": 2.793145526477914e-07, "loss": 0.0968, "step": 28269 }, { "epoch": 92.68852459016394, "grad_norm": 2.1660385131835938, "learning_rate": 2.7906538562183506e-07, "loss": 0.0703, "step": 28270 }, { "epoch": 92.69180327868852, "grad_norm": 2.2062623500823975, "learning_rate": 2.788163282095235e-07, "loss": 0.1225, "step": 28271 }, { "epoch": 92.69508196721311, "grad_norm": 2.003019332885742, "learning_rate": 2.785673804136657e-07, "loss": 0.0815, "step": 28272 }, { "epoch": 92.6983606557377, "grad_norm": 2.894526243209839, "learning_rate": 2.7831854223706824e-07, "loss": 0.2239, "step": 28273 }, { "epoch": 92.70163934426229, "grad_norm": 2.939511299133301, "learning_rate": 2.780698136825366e-07, "loss": 0.1511, "step": 28274 }, { "epoch": 92.70491803278688, "grad_norm": 4.139492988586426, "learning_rate": 2.7782119475287637e-07, "loss": 0.195, "step": 28275 }, { "epoch": 92.70819672131148, "grad_norm": 6.389438629150391, "learning_rate": 2.775726854508909e-07, "loss": 0.2329, "step": 28276 }, { "epoch": 92.71147540983607, "grad_norm": 1.6028721332550049, "learning_rate": 2.773242857793823e-07, "loss": 0.0271, "step": 28277 }, { "epoch": 92.71475409836066, "grad_norm": 2.554211139678955, "learning_rate": 2.770759957411506e-07, "loss": 0.1508, "step": 28278 }, { "epoch": 92.71803278688525, "grad_norm": 2.1953532695770264, "learning_rate": 2.768278153389969e-07, "loss": 0.0766, "step": 28279 }, { "epoch": 92.72131147540983, "grad_norm": 2.0183815956115723, "learning_rate": 2.765797445757201e-07, "loss": 0.1718, "step": 28280 }, { "epoch": 92.72459016393442, "grad_norm": 2.17535662651062, "learning_rate": 2.763317834541157e-07, "loss": 0.1015, "step": 28281 }, { "epoch": 92.72786885245901, "grad_norm": 2.7193961143493652, "learning_rate": 2.760839319769792e-07, "loss": 0.0776, "step": 28282 }, { "epoch": 92.73114754098361, "grad_norm": 2.7674269676208496, "learning_rate": 2.758361901471085e-07, "loss": 0.15, "step": 28283 }, { "epoch": 92.7344262295082, "grad_norm": 1.9011512994766235, "learning_rate": 2.755885579672946e-07, "loss": 0.1631, "step": 28284 }, { "epoch": 92.73770491803279, "grad_norm": 2.5751450061798096, "learning_rate": 2.7534103544033185e-07, "loss": 0.0695, "step": 28285 }, { "epoch": 92.74098360655738, "grad_norm": 2.426455497741699, "learning_rate": 2.750936225690093e-07, "loss": 0.1787, "step": 28286 }, { "epoch": 92.74426229508197, "grad_norm": 2.8426249027252197, "learning_rate": 2.748463193561157e-07, "loss": 0.2816, "step": 28287 }, { "epoch": 92.74754098360656, "grad_norm": 2.1568589210510254, "learning_rate": 2.745991258044434e-07, "loss": 0.1055, "step": 28288 }, { "epoch": 92.75081967213114, "grad_norm": 2.457275390625, "learning_rate": 2.7435204191677776e-07, "loss": 0.0546, "step": 28289 }, { "epoch": 92.75409836065573, "grad_norm": 4.2560529708862305, "learning_rate": 2.741050676959045e-07, "loss": 0.0623, "step": 28290 }, { "epoch": 92.75737704918033, "grad_norm": 1.9543733596801758, "learning_rate": 2.73858203144608e-07, "loss": 0.0442, "step": 28291 }, { "epoch": 92.76065573770492, "grad_norm": 1.5199196338653564, "learning_rate": 2.736114482656749e-07, "loss": 0.0342, "step": 28292 }, { "epoch": 92.76393442622951, "grad_norm": 3.002751111984253, "learning_rate": 2.733648030618852e-07, "loss": 0.0523, "step": 28293 }, { "epoch": 92.7672131147541, "grad_norm": 1.7348272800445557, "learning_rate": 2.7311826753602e-07, "loss": 0.0464, "step": 28294 }, { "epoch": 92.77049180327869, "grad_norm": 2.1046719551086426, "learning_rate": 2.728718416908582e-07, "loss": 0.103, "step": 28295 }, { "epoch": 92.77377049180328, "grad_norm": 1.8976566791534424, "learning_rate": 2.726255255291821e-07, "loss": 0.0579, "step": 28296 }, { "epoch": 92.77704918032786, "grad_norm": 2.67032790184021, "learning_rate": 2.7237931905376714e-07, "loss": 0.0876, "step": 28297 }, { "epoch": 92.78032786885245, "grad_norm": 2.521977186203003, "learning_rate": 2.721332222673889e-07, "loss": 0.0576, "step": 28298 }, { "epoch": 92.78360655737706, "grad_norm": 2.082427501678467, "learning_rate": 2.718872351728241e-07, "loss": 0.2097, "step": 28299 }, { "epoch": 92.78688524590164, "grad_norm": 2.6628432273864746, "learning_rate": 2.7164135777284383e-07, "loss": 0.0991, "step": 28300 }, { "epoch": 92.79016393442623, "grad_norm": 2.576356887817383, "learning_rate": 2.713955900702225e-07, "loss": 0.1748, "step": 28301 }, { "epoch": 92.79344262295082, "grad_norm": 1.98890221118927, "learning_rate": 2.711499320677324e-07, "loss": 0.0495, "step": 28302 }, { "epoch": 92.79672131147541, "grad_norm": 1.9907169342041016, "learning_rate": 2.7090438376814135e-07, "loss": 0.1079, "step": 28303 }, { "epoch": 92.8, "grad_norm": 2.09132981300354, "learning_rate": 2.706589451742181e-07, "loss": 0.0695, "step": 28304 }, { "epoch": 92.80327868852459, "grad_norm": 1.947126030921936, "learning_rate": 2.7041361628873276e-07, "loss": 0.0815, "step": 28305 }, { "epoch": 92.80655737704917, "grad_norm": 1.962925672531128, "learning_rate": 2.7016839711444977e-07, "loss": 0.067, "step": 28306 }, { "epoch": 92.80983606557378, "grad_norm": 1.420340895652771, "learning_rate": 2.699232876541347e-07, "loss": 0.0301, "step": 28307 }, { "epoch": 92.81311475409836, "grad_norm": 2.7619121074676514, "learning_rate": 2.6967828791055083e-07, "loss": 0.151, "step": 28308 }, { "epoch": 92.81639344262295, "grad_norm": 2.499074935913086, "learning_rate": 2.6943339788646163e-07, "loss": 0.1517, "step": 28309 }, { "epoch": 92.81967213114754, "grad_norm": 2.5490965843200684, "learning_rate": 2.691886175846281e-07, "loss": 0.1409, "step": 28310 }, { "epoch": 92.82295081967213, "grad_norm": 2.140890121459961, "learning_rate": 2.689439470078092e-07, "loss": 0.0671, "step": 28311 }, { "epoch": 92.82622950819672, "grad_norm": 2.6634883880615234, "learning_rate": 2.6869938615876723e-07, "loss": 0.2378, "step": 28312 }, { "epoch": 92.8295081967213, "grad_norm": 2.419590711593628, "learning_rate": 2.6845493504025657e-07, "loss": 0.0693, "step": 28313 }, { "epoch": 92.8327868852459, "grad_norm": 2.001805305480957, "learning_rate": 2.68210593655035e-07, "loss": 0.1193, "step": 28314 }, { "epoch": 92.8360655737705, "grad_norm": 1.186440348625183, "learning_rate": 2.6796636200585593e-07, "loss": 0.016, "step": 28315 }, { "epoch": 92.83934426229509, "grad_norm": 1.3248569965362549, "learning_rate": 2.6772224009547707e-07, "loss": 0.0234, "step": 28316 }, { "epoch": 92.84262295081967, "grad_norm": 2.006819725036621, "learning_rate": 2.674782279266486e-07, "loss": 0.1264, "step": 28317 }, { "epoch": 92.84590163934426, "grad_norm": 1.4793760776519775, "learning_rate": 2.6723432550212146e-07, "loss": 0.0421, "step": 28318 }, { "epoch": 92.84918032786885, "grad_norm": 1.9539755582809448, "learning_rate": 2.6699053282464693e-07, "loss": 0.0865, "step": 28319 }, { "epoch": 92.85245901639344, "grad_norm": 2.2930023670196533, "learning_rate": 2.6674684989697494e-07, "loss": 0.0512, "step": 28320 }, { "epoch": 92.85573770491803, "grad_norm": 2.3479907512664795, "learning_rate": 2.6650327672185115e-07, "loss": 0.085, "step": 28321 }, { "epoch": 92.85901639344263, "grad_norm": 3.184418201446533, "learning_rate": 2.6625981330202443e-07, "loss": 0.0959, "step": 28322 }, { "epoch": 92.86229508196722, "grad_norm": 1.712030053138733, "learning_rate": 2.6601645964023813e-07, "loss": 0.0966, "step": 28323 }, { "epoch": 92.8655737704918, "grad_norm": 2.219451904296875, "learning_rate": 2.657732157392379e-07, "loss": 0.0766, "step": 28324 }, { "epoch": 92.8688524590164, "grad_norm": 4.367158889770508, "learning_rate": 2.6553008160176476e-07, "loss": 0.1077, "step": 28325 }, { "epoch": 92.87213114754098, "grad_norm": 2.5726685523986816, "learning_rate": 2.65287057230561e-07, "loss": 0.1403, "step": 28326 }, { "epoch": 92.87540983606557, "grad_norm": 2.581162452697754, "learning_rate": 2.650441426283679e-07, "loss": 0.1417, "step": 28327 }, { "epoch": 92.87868852459016, "grad_norm": 2.6130106449127197, "learning_rate": 2.6480133779792417e-07, "loss": 0.1549, "step": 28328 }, { "epoch": 92.88196721311475, "grad_norm": 2.648531198501587, "learning_rate": 2.6455864274196664e-07, "loss": 0.1056, "step": 28329 }, { "epoch": 92.88524590163935, "grad_norm": 2.12884783744812, "learning_rate": 2.6431605746323196e-07, "loss": 0.038, "step": 28330 }, { "epoch": 92.88852459016394, "grad_norm": 2.2804009914398193, "learning_rate": 2.64073581964458e-07, "loss": 0.1641, "step": 28331 }, { "epoch": 92.89180327868853, "grad_norm": 2.500446319580078, "learning_rate": 2.638312162483769e-07, "loss": 0.1246, "step": 28332 }, { "epoch": 92.89508196721312, "grad_norm": 1.5751374959945679, "learning_rate": 2.635889603177222e-07, "loss": 0.1381, "step": 28333 }, { "epoch": 92.8983606557377, "grad_norm": 2.557283639907837, "learning_rate": 2.6334681417522377e-07, "loss": 0.1968, "step": 28334 }, { "epoch": 92.90163934426229, "grad_norm": 2.723376512527466, "learning_rate": 2.631047778236151e-07, "loss": 0.1299, "step": 28335 }, { "epoch": 92.90491803278688, "grad_norm": 2.383227825164795, "learning_rate": 2.6286285126562395e-07, "loss": 0.0724, "step": 28336 }, { "epoch": 92.90819672131147, "grad_norm": 2.2435824871063232, "learning_rate": 2.6262103450397703e-07, "loss": 0.0706, "step": 28337 }, { "epoch": 92.91147540983607, "grad_norm": 2.6211535930633545, "learning_rate": 2.623793275414033e-07, "loss": 0.0679, "step": 28338 }, { "epoch": 92.91475409836066, "grad_norm": 2.0883147716522217, "learning_rate": 2.621377303806261e-07, "loss": 0.1043, "step": 28339 }, { "epoch": 92.91803278688525, "grad_norm": 6.423635005950928, "learning_rate": 2.618962430243721e-07, "loss": 0.0905, "step": 28340 }, { "epoch": 92.92131147540984, "grad_norm": 2.6017870903015137, "learning_rate": 2.6165486547536255e-07, "loss": 0.0822, "step": 28341 }, { "epoch": 92.92459016393443, "grad_norm": 9.334217071533203, "learning_rate": 2.614135977363208e-07, "loss": 0.271, "step": 28342 }, { "epoch": 92.92786885245901, "grad_norm": 1.9245258569717407, "learning_rate": 2.6117243980996356e-07, "loss": 0.0998, "step": 28343 }, { "epoch": 92.9311475409836, "grad_norm": 2.761310338973999, "learning_rate": 2.609313916990153e-07, "loss": 0.1167, "step": 28344 }, { "epoch": 92.93442622950819, "grad_norm": 1.753244400024414, "learning_rate": 2.6069045340619157e-07, "loss": 0.0419, "step": 28345 }, { "epoch": 92.9377049180328, "grad_norm": 2.446993589401245, "learning_rate": 2.604496249342081e-07, "loss": 0.1132, "step": 28346 }, { "epoch": 92.94098360655738, "grad_norm": 2.4320971965789795, "learning_rate": 2.6020890628578153e-07, "loss": 0.03, "step": 28347 }, { "epoch": 92.94426229508197, "grad_norm": 2.649672031402588, "learning_rate": 2.599682974636275e-07, "loss": 0.1553, "step": 28348 }, { "epoch": 92.94754098360656, "grad_norm": 3.1082708835601807, "learning_rate": 2.5972779847045826e-07, "loss": 0.0975, "step": 28349 }, { "epoch": 92.95081967213115, "grad_norm": 1.8084670305252075, "learning_rate": 2.59487409308985e-07, "loss": 0.0315, "step": 28350 }, { "epoch": 92.95409836065573, "grad_norm": 1.9023258686065674, "learning_rate": 2.592471299819188e-07, "loss": 0.1268, "step": 28351 }, { "epoch": 92.95737704918032, "grad_norm": 1.5194191932678223, "learning_rate": 2.590069604919687e-07, "loss": 0.0206, "step": 28352 }, { "epoch": 92.96065573770491, "grad_norm": 2.5264828205108643, "learning_rate": 2.5876690084184366e-07, "loss": 0.0334, "step": 28353 }, { "epoch": 92.96393442622951, "grad_norm": 2.747715711593628, "learning_rate": 2.585269510342503e-07, "loss": 0.0978, "step": 28354 }, { "epoch": 92.9672131147541, "grad_norm": 2.155839204788208, "learning_rate": 2.582871110718943e-07, "loss": 0.1514, "step": 28355 }, { "epoch": 92.97049180327869, "grad_norm": 3.4237191677093506, "learning_rate": 2.5804738095747793e-07, "loss": 0.0682, "step": 28356 }, { "epoch": 92.97377049180328, "grad_norm": 1.784644603729248, "learning_rate": 2.5780776069370794e-07, "loss": 0.1403, "step": 28357 }, { "epoch": 92.97704918032787, "grad_norm": 2.2716634273529053, "learning_rate": 2.5756825028328546e-07, "loss": 0.0929, "step": 28358 }, { "epoch": 92.98032786885246, "grad_norm": 1.954646110534668, "learning_rate": 2.573288497289106e-07, "loss": 0.1874, "step": 28359 }, { "epoch": 92.98360655737704, "grad_norm": 1.940795660018921, "learning_rate": 2.5708955903328116e-07, "loss": 0.0662, "step": 28360 }, { "epoch": 92.98688524590163, "grad_norm": 2.095506191253662, "learning_rate": 2.568503781990983e-07, "loss": 0.0656, "step": 28361 }, { "epoch": 92.99016393442623, "grad_norm": 3.1282129287719727, "learning_rate": 2.566113072290577e-07, "loss": 0.2658, "step": 28362 }, { "epoch": 92.99344262295082, "grad_norm": 1.7230006456375122, "learning_rate": 2.563723461258549e-07, "loss": 0.0379, "step": 28363 }, { "epoch": 92.99672131147541, "grad_norm": 2.8031790256500244, "learning_rate": 2.5613349489218454e-07, "loss": 0.1455, "step": 28364 }, { "epoch": 93.0, "grad_norm": 1.6692992448806763, "learning_rate": 2.5589475353073987e-07, "loss": 0.0544, "step": 28365 }, { "epoch": 93.00327868852459, "grad_norm": 1.690517544746399, "learning_rate": 2.556561220442144e-07, "loss": 0.0887, "step": 28366 }, { "epoch": 93.00655737704918, "grad_norm": 4.115451335906982, "learning_rate": 2.5541760043529597e-07, "loss": 0.2025, "step": 28367 }, { "epoch": 93.00983606557377, "grad_norm": 2.4326703548431396, "learning_rate": 2.551791887066768e-07, "loss": 0.197, "step": 28368 }, { "epoch": 93.01311475409837, "grad_norm": 2.428297281265259, "learning_rate": 2.549408868610448e-07, "loss": 0.073, "step": 28369 }, { "epoch": 93.01639344262296, "grad_norm": 2.080599546432495, "learning_rate": 2.5470269490108556e-07, "loss": 0.0505, "step": 28370 }, { "epoch": 93.01967213114754, "grad_norm": 2.433189630508423, "learning_rate": 2.54464612829487e-07, "loss": 0.0975, "step": 28371 }, { "epoch": 93.02295081967213, "grad_norm": 2.7599897384643555, "learning_rate": 2.5422664064893244e-07, "loss": 0.0815, "step": 28372 }, { "epoch": 93.02622950819672, "grad_norm": 2.506761312484741, "learning_rate": 2.5398877836210534e-07, "loss": 0.1211, "step": 28373 }, { "epoch": 93.02950819672131, "grad_norm": 2.8410303592681885, "learning_rate": 2.53751025971688e-07, "loss": 0.1226, "step": 28374 }, { "epoch": 93.0327868852459, "grad_norm": 1.9567230939865112, "learning_rate": 2.535133834803627e-07, "loss": 0.086, "step": 28375 }, { "epoch": 93.03606557377049, "grad_norm": 3.0495667457580566, "learning_rate": 2.5327585089080733e-07, "loss": 0.1171, "step": 28376 }, { "epoch": 93.03934426229509, "grad_norm": 2.8811194896698, "learning_rate": 2.530384282056997e-07, "loss": 0.1368, "step": 28377 }, { "epoch": 93.04262295081968, "grad_norm": 2.2630326747894287, "learning_rate": 2.5280111542771877e-07, "loss": 0.1713, "step": 28378 }, { "epoch": 93.04590163934427, "grad_norm": 1.4440693855285645, "learning_rate": 2.5256391255953915e-07, "loss": 0.0716, "step": 28379 }, { "epoch": 93.04918032786885, "grad_norm": 2.308466672897339, "learning_rate": 2.5232681960383754e-07, "loss": 0.0581, "step": 28380 }, { "epoch": 93.05245901639344, "grad_norm": 3.6764228343963623, "learning_rate": 2.5208983656328513e-07, "loss": 0.2086, "step": 28381 }, { "epoch": 93.05573770491803, "grad_norm": 1.997995376586914, "learning_rate": 2.518529634405553e-07, "loss": 0.0865, "step": 28382 }, { "epoch": 93.05901639344262, "grad_norm": 2.3345019817352295, "learning_rate": 2.5161620023831823e-07, "loss": 0.0447, "step": 28383 }, { "epoch": 93.0622950819672, "grad_norm": 2.484011173248291, "learning_rate": 2.51379546959245e-07, "loss": 0.0912, "step": 28384 }, { "epoch": 93.06557377049181, "grad_norm": 2.4192028045654297, "learning_rate": 2.5114300360600363e-07, "loss": 0.0571, "step": 28385 }, { "epoch": 93.0688524590164, "grad_norm": 3.5345025062561035, "learning_rate": 2.509065701812607e-07, "loss": 0.1041, "step": 28386 }, { "epoch": 93.07213114754099, "grad_norm": 2.5443809032440186, "learning_rate": 2.5067024668768313e-07, "loss": 0.1691, "step": 28387 }, { "epoch": 93.07540983606557, "grad_norm": 2.7993922233581543, "learning_rate": 2.5043403312793535e-07, "loss": 0.0433, "step": 28388 }, { "epoch": 93.07868852459016, "grad_norm": 5.165956020355225, "learning_rate": 2.501979295046808e-07, "loss": 0.1492, "step": 28389 }, { "epoch": 93.08196721311475, "grad_norm": 2.4083590507507324, "learning_rate": 2.4996193582058183e-07, "loss": 0.1106, "step": 28390 }, { "epoch": 93.08524590163934, "grad_norm": 1.7793524265289307, "learning_rate": 2.497260520782985e-07, "loss": 0.0375, "step": 28391 }, { "epoch": 93.08852459016393, "grad_norm": 2.2386865615844727, "learning_rate": 2.49490278280492e-07, "loss": 0.0885, "step": 28392 }, { "epoch": 93.09180327868853, "grad_norm": 1.8238142728805542, "learning_rate": 2.4925461442982136e-07, "loss": 0.1594, "step": 28393 }, { "epoch": 93.09508196721312, "grad_norm": 1.9586857557296753, "learning_rate": 2.490190605289433e-07, "loss": 0.03, "step": 28394 }, { "epoch": 93.09836065573771, "grad_norm": 1.8810518980026245, "learning_rate": 2.487836165805124e-07, "loss": 0.0493, "step": 28395 }, { "epoch": 93.1016393442623, "grad_norm": 2.2450411319732666, "learning_rate": 2.4854828258718653e-07, "loss": 0.1, "step": 28396 }, { "epoch": 93.10491803278688, "grad_norm": 2.408540964126587, "learning_rate": 2.483130585516169e-07, "loss": 0.2344, "step": 28397 }, { "epoch": 93.10819672131147, "grad_norm": 2.154710531234741, "learning_rate": 2.480779444764569e-07, "loss": 0.064, "step": 28398 }, { "epoch": 93.11147540983606, "grad_norm": 2.410315990447998, "learning_rate": 2.4784294036435673e-07, "loss": 0.1397, "step": 28399 }, { "epoch": 93.11475409836065, "grad_norm": 2.468510627746582, "learning_rate": 2.476080462179686e-07, "loss": 0.0604, "step": 28400 }, { "epoch": 93.11803278688525, "grad_norm": 2.1377646923065186, "learning_rate": 2.473732620399394e-07, "loss": 0.197, "step": 28401 }, { "epoch": 93.12131147540984, "grad_norm": 2.7124085426330566, "learning_rate": 2.4713858783291686e-07, "loss": 0.1196, "step": 28402 }, { "epoch": 93.12459016393443, "grad_norm": 1.909329891204834, "learning_rate": 2.469040235995468e-07, "loss": 0.1176, "step": 28403 }, { "epoch": 93.12786885245902, "grad_norm": 2.8667376041412354, "learning_rate": 2.466695693424737e-07, "loss": 0.105, "step": 28404 }, { "epoch": 93.1311475409836, "grad_norm": 2.4259331226348877, "learning_rate": 2.4643522506434313e-07, "loss": 0.212, "step": 28405 }, { "epoch": 93.1344262295082, "grad_norm": 1.8330918550491333, "learning_rate": 2.462009907677976e-07, "loss": 0.1386, "step": 28406 }, { "epoch": 93.13770491803278, "grad_norm": 2.05287504196167, "learning_rate": 2.4596686645547596e-07, "loss": 0.1445, "step": 28407 }, { "epoch": 93.14098360655737, "grad_norm": 2.5967628955841064, "learning_rate": 2.457328521300195e-07, "loss": 0.1005, "step": 28408 }, { "epoch": 93.14426229508197, "grad_norm": 2.551982879638672, "learning_rate": 2.4549894779406725e-07, "loss": 0.1499, "step": 28409 }, { "epoch": 93.14754098360656, "grad_norm": 2.017275810241699, "learning_rate": 2.4526515345025706e-07, "loss": 0.0768, "step": 28410 }, { "epoch": 93.15081967213115, "grad_norm": 3.032552480697632, "learning_rate": 2.4503146910122345e-07, "loss": 0.0687, "step": 28411 }, { "epoch": 93.15409836065574, "grad_norm": 1.80006742477417, "learning_rate": 2.4479789474960325e-07, "loss": 0.1666, "step": 28412 }, { "epoch": 93.15737704918033, "grad_norm": 2.246438503265381, "learning_rate": 2.4456443039802993e-07, "loss": 0.1323, "step": 28413 }, { "epoch": 93.16065573770491, "grad_norm": 1.6569812297821045, "learning_rate": 2.4433107604913575e-07, "loss": 0.0261, "step": 28414 }, { "epoch": 93.1639344262295, "grad_norm": 3.3319058418273926, "learning_rate": 2.44097831705552e-07, "loss": 0.0295, "step": 28415 }, { "epoch": 93.1672131147541, "grad_norm": 2.466304063796997, "learning_rate": 2.438646973699088e-07, "loss": 0.2171, "step": 28416 }, { "epoch": 93.1704918032787, "grad_norm": 1.7546513080596924, "learning_rate": 2.4363167304483404e-07, "loss": 0.042, "step": 28417 }, { "epoch": 93.17377049180328, "grad_norm": 2.4890644550323486, "learning_rate": 2.433987587329567e-07, "loss": 0.1303, "step": 28418 }, { "epoch": 93.17704918032787, "grad_norm": 2.403866767883301, "learning_rate": 2.4316595443690363e-07, "loss": 0.1474, "step": 28419 }, { "epoch": 93.18032786885246, "grad_norm": 2.027310609817505, "learning_rate": 2.429332601592982e-07, "loss": 0.0484, "step": 28420 }, { "epoch": 93.18360655737705, "grad_norm": 1.5218229293823242, "learning_rate": 2.4270067590276505e-07, "loss": 0.0378, "step": 28421 }, { "epoch": 93.18688524590164, "grad_norm": 4.041204929351807, "learning_rate": 2.424682016699276e-07, "loss": 0.0568, "step": 28422 }, { "epoch": 93.19016393442622, "grad_norm": 1.4974749088287354, "learning_rate": 2.4223583746340486e-07, "loss": 0.1148, "step": 28423 }, { "epoch": 93.19344262295083, "grad_norm": 2.070659637451172, "learning_rate": 2.420035832858192e-07, "loss": 0.063, "step": 28424 }, { "epoch": 93.19672131147541, "grad_norm": 2.6747820377349854, "learning_rate": 2.417714391397896e-07, "loss": 0.0498, "step": 28425 }, { "epoch": 93.2, "grad_norm": 2.1500372886657715, "learning_rate": 2.4153940502793185e-07, "loss": 0.0731, "step": 28426 }, { "epoch": 93.20327868852459, "grad_norm": 2.872292995452881, "learning_rate": 2.4130748095286484e-07, "loss": 0.1056, "step": 28427 }, { "epoch": 93.20655737704918, "grad_norm": 2.8095784187316895, "learning_rate": 2.410756669172032e-07, "loss": 0.0941, "step": 28428 }, { "epoch": 93.20983606557377, "grad_norm": 2.6683194637298584, "learning_rate": 2.4084396292355814e-07, "loss": 0.1862, "step": 28429 }, { "epoch": 93.21311475409836, "grad_norm": 3.190842390060425, "learning_rate": 2.4061236897454544e-07, "loss": 0.1056, "step": 28430 }, { "epoch": 93.21639344262294, "grad_norm": 2.294369697570801, "learning_rate": 2.4038088507277513e-07, "loss": 0.0875, "step": 28431 }, { "epoch": 93.21967213114755, "grad_norm": 1.6656633615493774, "learning_rate": 2.401495112208585e-07, "loss": 0.0353, "step": 28432 }, { "epoch": 93.22295081967214, "grad_norm": 2.4119679927825928, "learning_rate": 2.399182474214035e-07, "loss": 0.179, "step": 28433 }, { "epoch": 93.22622950819672, "grad_norm": 1.9562488794326782, "learning_rate": 2.396870936770168e-07, "loss": 0.1078, "step": 28434 }, { "epoch": 93.22950819672131, "grad_norm": 1.8907421827316284, "learning_rate": 2.394560499903087e-07, "loss": 0.11, "step": 28435 }, { "epoch": 93.2327868852459, "grad_norm": 2.2597544193267822, "learning_rate": 2.3922511636388035e-07, "loss": 0.1429, "step": 28436 }, { "epoch": 93.23606557377049, "grad_norm": 1.7902207374572754, "learning_rate": 2.3899429280033856e-07, "loss": 0.0866, "step": 28437 }, { "epoch": 93.23934426229508, "grad_norm": 2.412048816680908, "learning_rate": 2.387635793022836e-07, "loss": 0.0672, "step": 28438 }, { "epoch": 93.24262295081967, "grad_norm": 2.459563970565796, "learning_rate": 2.3853297587231984e-07, "loss": 0.0577, "step": 28439 }, { "epoch": 93.24590163934427, "grad_norm": 2.813563346862793, "learning_rate": 2.3830248251304533e-07, "loss": 0.0387, "step": 28440 }, { "epoch": 93.24918032786886, "grad_norm": 2.6807656288146973, "learning_rate": 2.3807209922706132e-07, "loss": 0.1748, "step": 28441 }, { "epoch": 93.25245901639344, "grad_norm": 2.297247886657715, "learning_rate": 2.3784182601696236e-07, "loss": 0.1143, "step": 28442 }, { "epoch": 93.25573770491803, "grad_norm": 2.587817668914795, "learning_rate": 2.3761166288534754e-07, "loss": 0.1052, "step": 28443 }, { "epoch": 93.25901639344262, "grad_norm": 2.226651906967163, "learning_rate": 2.373816098348114e-07, "loss": 0.1943, "step": 28444 }, { "epoch": 93.26229508196721, "grad_norm": 2.327597141265869, "learning_rate": 2.3715166686794967e-07, "loss": 0.1715, "step": 28445 }, { "epoch": 93.2655737704918, "grad_norm": 2.2374532222747803, "learning_rate": 2.3692183398735246e-07, "loss": 0.0717, "step": 28446 }, { "epoch": 93.26885245901639, "grad_norm": 2.4229960441589355, "learning_rate": 2.366921111956122e-07, "loss": 0.1656, "step": 28447 }, { "epoch": 93.27213114754099, "grad_norm": 1.1807572841644287, "learning_rate": 2.3646249849532012e-07, "loss": 0.0182, "step": 28448 }, { "epoch": 93.27540983606558, "grad_norm": 2.1716408729553223, "learning_rate": 2.3623299588906524e-07, "loss": 0.1717, "step": 28449 }, { "epoch": 93.27868852459017, "grad_norm": 1.4712605476379395, "learning_rate": 2.3600360337943552e-07, "loss": 0.1171, "step": 28450 }, { "epoch": 93.28196721311475, "grad_norm": 2.5055456161499023, "learning_rate": 2.3577432096901554e-07, "loss": 0.1627, "step": 28451 }, { "epoch": 93.28524590163934, "grad_norm": 1.6780236959457397, "learning_rate": 2.3554514866039325e-07, "loss": 0.0465, "step": 28452 }, { "epoch": 93.28852459016393, "grad_norm": 2.0286970138549805, "learning_rate": 2.353160864561521e-07, "loss": 0.0619, "step": 28453 }, { "epoch": 93.29180327868852, "grad_norm": 2.0263125896453857, "learning_rate": 2.3508713435887563e-07, "loss": 0.0757, "step": 28454 }, { "epoch": 93.29508196721312, "grad_norm": 1.7070599794387817, "learning_rate": 2.34858292371144e-07, "loss": 0.0331, "step": 28455 }, { "epoch": 93.29836065573771, "grad_norm": 3.2718780040740967, "learning_rate": 2.346295604955373e-07, "loss": 0.0875, "step": 28456 }, { "epoch": 93.3016393442623, "grad_norm": 2.194279432296753, "learning_rate": 2.3440093873463689e-07, "loss": 0.0941, "step": 28457 }, { "epoch": 93.30491803278689, "grad_norm": 1.7356045246124268, "learning_rate": 2.3417242709101951e-07, "loss": 0.1129, "step": 28458 }, { "epoch": 93.30819672131148, "grad_norm": 1.7001166343688965, "learning_rate": 2.3394402556726093e-07, "loss": 0.057, "step": 28459 }, { "epoch": 93.31147540983606, "grad_norm": 2.1716349124908447, "learning_rate": 2.3371573416593795e-07, "loss": 0.0509, "step": 28460 }, { "epoch": 93.31475409836065, "grad_norm": 2.2087526321411133, "learning_rate": 2.334875528896252e-07, "loss": 0.1019, "step": 28461 }, { "epoch": 93.31803278688524, "grad_norm": 3.1276166439056396, "learning_rate": 2.3325948174089507e-07, "loss": 0.0757, "step": 28462 }, { "epoch": 93.32131147540984, "grad_norm": 1.665633201599121, "learning_rate": 2.3303152072231883e-07, "loss": 0.0469, "step": 28463 }, { "epoch": 93.32459016393443, "grad_norm": 1.9265596866607666, "learning_rate": 2.328036698364655e-07, "loss": 0.0757, "step": 28464 }, { "epoch": 93.32786885245902, "grad_norm": 1.6534459590911865, "learning_rate": 2.3257592908590863e-07, "loss": 0.1053, "step": 28465 }, { "epoch": 93.33114754098361, "grad_norm": 2.0305843353271484, "learning_rate": 2.3234829847321283e-07, "loss": 0.0395, "step": 28466 }, { "epoch": 93.3344262295082, "grad_norm": 2.2406482696533203, "learning_rate": 2.32120778000946e-07, "loss": 0.1244, "step": 28467 }, { "epoch": 93.33770491803278, "grad_norm": 2.9075117111206055, "learning_rate": 2.318933676716728e-07, "loss": 0.107, "step": 28468 }, { "epoch": 93.34098360655737, "grad_norm": 1.9483568668365479, "learning_rate": 2.3166606748795782e-07, "loss": 0.1052, "step": 28469 }, { "epoch": 93.34426229508196, "grad_norm": 2.4742982387542725, "learning_rate": 2.3143887745236572e-07, "loss": 0.1001, "step": 28470 }, { "epoch": 93.34754098360656, "grad_norm": 3.0622599124908447, "learning_rate": 2.312117975674566e-07, "loss": 0.1846, "step": 28471 }, { "epoch": 93.35081967213115, "grad_norm": 2.015831708908081, "learning_rate": 2.3098482783579068e-07, "loss": 0.0421, "step": 28472 }, { "epoch": 93.35409836065574, "grad_norm": 3.244333028793335, "learning_rate": 2.3075796825992924e-07, "loss": 0.0559, "step": 28473 }, { "epoch": 93.35737704918033, "grad_norm": 2.355642557144165, "learning_rate": 2.3053121884242912e-07, "loss": 0.0821, "step": 28474 }, { "epoch": 93.36065573770492, "grad_norm": 3.2272582054138184, "learning_rate": 2.3030457958584605e-07, "loss": 0.2546, "step": 28475 }, { "epoch": 93.3639344262295, "grad_norm": 2.5922937393188477, "learning_rate": 2.300780504927369e-07, "loss": 0.078, "step": 28476 }, { "epoch": 93.3672131147541, "grad_norm": 2.225545644760132, "learning_rate": 2.2985163156565736e-07, "loss": 0.1333, "step": 28477 }, { "epoch": 93.37049180327868, "grad_norm": 2.2181925773620605, "learning_rate": 2.2962532280715765e-07, "loss": 0.0998, "step": 28478 }, { "epoch": 93.37377049180328, "grad_norm": 1.5533709526062012, "learning_rate": 2.2939912421979126e-07, "loss": 0.0906, "step": 28479 }, { "epoch": 93.37704918032787, "grad_norm": 2.660947561264038, "learning_rate": 2.291730358061095e-07, "loss": 0.2212, "step": 28480 }, { "epoch": 93.38032786885246, "grad_norm": 2.318049430847168, "learning_rate": 2.2894705756866032e-07, "loss": 0.1434, "step": 28481 }, { "epoch": 93.38360655737705, "grad_norm": 1.9279415607452393, "learning_rate": 2.2872118950999168e-07, "loss": 0.2102, "step": 28482 }, { "epoch": 93.38688524590164, "grad_norm": 2.931607723236084, "learning_rate": 2.2849543163265265e-07, "loss": 0.1276, "step": 28483 }, { "epoch": 93.39016393442623, "grad_norm": 1.6970425844192505, "learning_rate": 2.2826978393918674e-07, "loss": 0.0313, "step": 28484 }, { "epoch": 93.39344262295081, "grad_norm": 2.599125862121582, "learning_rate": 2.2804424643213974e-07, "loss": 0.2136, "step": 28485 }, { "epoch": 93.3967213114754, "grad_norm": 1.1894168853759766, "learning_rate": 2.278188191140529e-07, "loss": 0.0182, "step": 28486 }, { "epoch": 93.4, "grad_norm": 2.248945951461792, "learning_rate": 2.2759350198746978e-07, "loss": 0.0761, "step": 28487 }, { "epoch": 93.4032786885246, "grad_norm": 1.8210612535476685, "learning_rate": 2.2736829505493163e-07, "loss": 0.0517, "step": 28488 }, { "epoch": 93.40655737704918, "grad_norm": 2.7387444972991943, "learning_rate": 2.2714319831897648e-07, "loss": 0.1067, "step": 28489 }, { "epoch": 93.40983606557377, "grad_norm": 4.7150959968566895, "learning_rate": 2.2691821178214114e-07, "loss": 0.07, "step": 28490 }, { "epoch": 93.41311475409836, "grad_norm": 1.7505903244018555, "learning_rate": 2.2669333544696693e-07, "loss": 0.1851, "step": 28491 }, { "epoch": 93.41639344262295, "grad_norm": 2.2335269451141357, "learning_rate": 2.2646856931598626e-07, "loss": 0.0926, "step": 28492 }, { "epoch": 93.41967213114754, "grad_norm": 2.414188861846924, "learning_rate": 2.2624391339173379e-07, "loss": 0.0807, "step": 28493 }, { "epoch": 93.42295081967212, "grad_norm": 2.7413158416748047, "learning_rate": 2.2601936767674416e-07, "loss": 0.2304, "step": 28494 }, { "epoch": 93.42622950819673, "grad_norm": 2.179030418395996, "learning_rate": 2.2579493217354753e-07, "loss": 0.1077, "step": 28495 }, { "epoch": 93.42950819672132, "grad_norm": 2.4489378929138184, "learning_rate": 2.2557060688467748e-07, "loss": 0.0652, "step": 28496 }, { "epoch": 93.4327868852459, "grad_norm": 3.0014994144439697, "learning_rate": 2.2534639181265972e-07, "loss": 0.0677, "step": 28497 }, { "epoch": 93.43606557377049, "grad_norm": 2.4093282222747803, "learning_rate": 2.2512228696002558e-07, "loss": 0.1649, "step": 28498 }, { "epoch": 93.43934426229508, "grad_norm": 2.2323076725006104, "learning_rate": 2.2489829232930082e-07, "loss": 0.1052, "step": 28499 }, { "epoch": 93.44262295081967, "grad_norm": 2.6080844402313232, "learning_rate": 2.246744079230112e-07, "loss": 0.0947, "step": 28500 }, { "epoch": 93.44590163934426, "grad_norm": 2.934175491333008, "learning_rate": 2.2445063374368137e-07, "loss": 0.0899, "step": 28501 }, { "epoch": 93.44918032786886, "grad_norm": 1.6316890716552734, "learning_rate": 2.2422696979383595e-07, "loss": 0.1076, "step": 28502 }, { "epoch": 93.45245901639345, "grad_norm": 2.297273874282837, "learning_rate": 2.2400341607599296e-07, "loss": 0.1217, "step": 28503 }, { "epoch": 93.45573770491804, "grad_norm": 1.7003817558288574, "learning_rate": 2.2377997259267815e-07, "loss": 0.132, "step": 28504 }, { "epoch": 93.45901639344262, "grad_norm": 1.9669451713562012, "learning_rate": 2.2355663934640837e-07, "loss": 0.0675, "step": 28505 }, { "epoch": 93.46229508196721, "grad_norm": 2.489962339401245, "learning_rate": 2.2333341633970273e-07, "loss": 0.0675, "step": 28506 }, { "epoch": 93.4655737704918, "grad_norm": 1.9352068901062012, "learning_rate": 2.2311030357507812e-07, "loss": 0.1412, "step": 28507 }, { "epoch": 93.46885245901639, "grad_norm": 2.2966926097869873, "learning_rate": 2.2288730105504918e-07, "loss": 0.0723, "step": 28508 }, { "epoch": 93.47213114754098, "grad_norm": 2.195936918258667, "learning_rate": 2.2266440878213168e-07, "loss": 0.1168, "step": 28509 }, { "epoch": 93.47540983606558, "grad_norm": 2.041222095489502, "learning_rate": 2.2244162675883918e-07, "loss": 0.1558, "step": 28510 }, { "epoch": 93.47868852459017, "grad_norm": 2.617830276489258, "learning_rate": 2.222189549876841e-07, "loss": 0.1412, "step": 28511 }, { "epoch": 93.48196721311476, "grad_norm": 3.0888054370880127, "learning_rate": 2.2199639347117552e-07, "loss": 0.1433, "step": 28512 }, { "epoch": 93.48524590163935, "grad_norm": 2.0388548374176025, "learning_rate": 2.217739422118248e-07, "loss": 0.1189, "step": 28513 }, { "epoch": 93.48852459016393, "grad_norm": 2.6319353580474854, "learning_rate": 2.215516012121399e-07, "loss": 0.1958, "step": 28514 }, { "epoch": 93.49180327868852, "grad_norm": 2.6882145404815674, "learning_rate": 2.2132937047462777e-07, "loss": 0.0821, "step": 28515 }, { "epoch": 93.49508196721311, "grad_norm": 2.751931667327881, "learning_rate": 2.21107250001793e-07, "loss": 0.0516, "step": 28516 }, { "epoch": 93.4983606557377, "grad_norm": 2.526024103164673, "learning_rate": 2.2088523979614363e-07, "loss": 0.0576, "step": 28517 }, { "epoch": 93.5016393442623, "grad_norm": 1.8095883131027222, "learning_rate": 2.2066333986017986e-07, "loss": 0.1099, "step": 28518 }, { "epoch": 93.50491803278689, "grad_norm": 2.337740898132324, "learning_rate": 2.2044155019640412e-07, "loss": 0.2863, "step": 28519 }, { "epoch": 93.50819672131148, "grad_norm": 1.9272617101669312, "learning_rate": 2.2021987080732e-07, "loss": 0.0606, "step": 28520 }, { "epoch": 93.51147540983607, "grad_norm": 2.7286298274993896, "learning_rate": 2.1999830169542325e-07, "loss": 0.1524, "step": 28521 }, { "epoch": 93.51475409836065, "grad_norm": 2.057511329650879, "learning_rate": 2.197768428632152e-07, "loss": 0.1148, "step": 28522 }, { "epoch": 93.51803278688524, "grad_norm": 2.678360939025879, "learning_rate": 2.1955549431319168e-07, "loss": 0.1075, "step": 28523 }, { "epoch": 93.52131147540983, "grad_norm": 2.6331310272216797, "learning_rate": 2.1933425604784953e-07, "loss": 0.0855, "step": 28524 }, { "epoch": 93.52459016393442, "grad_norm": 2.2797937393188477, "learning_rate": 2.1911312806968233e-07, "loss": 0.0697, "step": 28525 }, { "epoch": 93.52786885245902, "grad_norm": 1.6392470598220825, "learning_rate": 2.1889211038118473e-07, "loss": 0.0626, "step": 28526 }, { "epoch": 93.53114754098361, "grad_norm": 2.1822965145111084, "learning_rate": 2.1867120298484924e-07, "loss": 0.1358, "step": 28527 }, { "epoch": 93.5344262295082, "grad_norm": 2.7931861877441406, "learning_rate": 2.184504058831638e-07, "loss": 0.101, "step": 28528 }, { "epoch": 93.53770491803279, "grad_norm": 1.9562779664993286, "learning_rate": 2.18229719078622e-07, "loss": 0.0348, "step": 28529 }, { "epoch": 93.54098360655738, "grad_norm": 1.75235116481781, "learning_rate": 2.1800914257371076e-07, "loss": 0.0531, "step": 28530 }, { "epoch": 93.54426229508196, "grad_norm": 2.7963502407073975, "learning_rate": 2.1778867637091584e-07, "loss": 0.0853, "step": 28531 }, { "epoch": 93.54754098360655, "grad_norm": 1.866882085800171, "learning_rate": 2.1756832047272525e-07, "loss": 0.0621, "step": 28532 }, { "epoch": 93.55081967213114, "grad_norm": 1.9063572883605957, "learning_rate": 2.1734807488162368e-07, "loss": 0.0456, "step": 28533 }, { "epoch": 93.55409836065574, "grad_norm": 1.8857877254486084, "learning_rate": 2.1712793960009248e-07, "loss": 0.0494, "step": 28534 }, { "epoch": 93.55737704918033, "grad_norm": 2.2751760482788086, "learning_rate": 2.1690791463061633e-07, "loss": 0.0856, "step": 28535 }, { "epoch": 93.56065573770492, "grad_norm": 2.1667263507843018, "learning_rate": 2.1668799997567548e-07, "loss": 0.0416, "step": 28536 }, { "epoch": 93.56393442622951, "grad_norm": 2.2809813022613525, "learning_rate": 2.1646819563774902e-07, "loss": 0.0529, "step": 28537 }, { "epoch": 93.5672131147541, "grad_norm": 2.1176817417144775, "learning_rate": 2.162485016193161e-07, "loss": 0.0631, "step": 28538 }, { "epoch": 93.57049180327868, "grad_norm": 2.9949560165405273, "learning_rate": 2.1602891792285364e-07, "loss": 0.0966, "step": 28539 }, { "epoch": 93.57377049180327, "grad_norm": 2.646106719970703, "learning_rate": 2.1580944455083852e-07, "loss": 0.0932, "step": 28540 }, { "epoch": 93.57704918032788, "grad_norm": 2.7261266708374023, "learning_rate": 2.1559008150574544e-07, "loss": 0.1469, "step": 28541 }, { "epoch": 93.58032786885246, "grad_norm": 2.7456281185150146, "learning_rate": 2.1537082879004578e-07, "loss": 0.112, "step": 28542 }, { "epoch": 93.58360655737705, "grad_norm": 2.5289981365203857, "learning_rate": 2.151516864062142e-07, "loss": 0.1168, "step": 28543 }, { "epoch": 93.58688524590164, "grad_norm": 2.1677284240722656, "learning_rate": 2.1493265435672205e-07, "loss": 0.1185, "step": 28544 }, { "epoch": 93.59016393442623, "grad_norm": 2.285599946975708, "learning_rate": 2.1471373264403738e-07, "loss": 0.1709, "step": 28545 }, { "epoch": 93.59344262295082, "grad_norm": 2.147247076034546, "learning_rate": 2.1449492127062932e-07, "loss": 0.1138, "step": 28546 }, { "epoch": 93.5967213114754, "grad_norm": 2.9833457469940186, "learning_rate": 2.1427622023896587e-07, "loss": 0.1419, "step": 28547 }, { "epoch": 93.6, "grad_norm": 2.1659481525421143, "learning_rate": 2.1405762955151178e-07, "loss": 0.0831, "step": 28548 }, { "epoch": 93.6032786885246, "grad_norm": 2.400252103805542, "learning_rate": 2.138391492107339e-07, "loss": 0.1304, "step": 28549 }, { "epoch": 93.60655737704919, "grad_norm": 1.9719425439834595, "learning_rate": 2.1362077921909364e-07, "loss": 0.0334, "step": 28550 }, { "epoch": 93.60983606557377, "grad_norm": 2.6214494705200195, "learning_rate": 2.1340251957905456e-07, "loss": 0.0855, "step": 28551 }, { "epoch": 93.61311475409836, "grad_norm": 2.0604043006896973, "learning_rate": 2.1318437029307804e-07, "loss": 0.0535, "step": 28552 }, { "epoch": 93.61639344262295, "grad_norm": 1.8740990161895752, "learning_rate": 2.129663313636232e-07, "loss": 0.0845, "step": 28553 }, { "epoch": 93.61967213114754, "grad_norm": 2.033803939819336, "learning_rate": 2.1274840279314923e-07, "loss": 0.1271, "step": 28554 }, { "epoch": 93.62295081967213, "grad_norm": 1.9692156314849854, "learning_rate": 2.1253058458411303e-07, "loss": 0.1147, "step": 28555 }, { "epoch": 93.62622950819672, "grad_norm": 1.728478193283081, "learning_rate": 2.123128767389704e-07, "loss": 0.1125, "step": 28556 }, { "epoch": 93.62950819672132, "grad_norm": 2.1820013523101807, "learning_rate": 2.1209527926017716e-07, "loss": 0.0813, "step": 28557 }, { "epoch": 93.6327868852459, "grad_norm": 3.8545138835906982, "learning_rate": 2.1187779215018688e-07, "loss": 0.2151, "step": 28558 }, { "epoch": 93.6360655737705, "grad_norm": 3.543710947036743, "learning_rate": 2.116604154114521e-07, "loss": 0.1739, "step": 28559 }, { "epoch": 93.63934426229508, "grad_norm": 1.9700970649719238, "learning_rate": 2.1144314904642194e-07, "loss": 0.107, "step": 28560 }, { "epoch": 93.64262295081967, "grad_norm": 2.2324092388153076, "learning_rate": 2.1122599305754775e-07, "loss": 0.1442, "step": 28561 }, { "epoch": 93.64590163934426, "grad_norm": 2.663032054901123, "learning_rate": 2.1100894744727985e-07, "loss": 0.0933, "step": 28562 }, { "epoch": 93.64918032786885, "grad_norm": 4.507453441619873, "learning_rate": 2.107920122180629e-07, "loss": 0.2255, "step": 28563 }, { "epoch": 93.65245901639344, "grad_norm": 1.7653292417526245, "learning_rate": 2.1057518737234383e-07, "loss": 0.0444, "step": 28564 }, { "epoch": 93.65573770491804, "grad_norm": 4.601484298706055, "learning_rate": 2.103584729125696e-07, "loss": 0.0481, "step": 28565 }, { "epoch": 93.65901639344263, "grad_norm": 2.0014686584472656, "learning_rate": 2.101418688411816e-07, "loss": 0.1816, "step": 28566 }, { "epoch": 93.66229508196722, "grad_norm": 2.586927652359009, "learning_rate": 2.0992537516062228e-07, "loss": 0.1306, "step": 28567 }, { "epoch": 93.6655737704918, "grad_norm": 2.4359564781188965, "learning_rate": 2.0970899187333304e-07, "loss": 0.0639, "step": 28568 }, { "epoch": 93.66885245901639, "grad_norm": 2.9473750591278076, "learning_rate": 2.0949271898175528e-07, "loss": 0.1872, "step": 28569 }, { "epoch": 93.67213114754098, "grad_norm": 3.486060619354248, "learning_rate": 2.0927655648832702e-07, "loss": 0.2184, "step": 28570 }, { "epoch": 93.67540983606557, "grad_norm": 2.2729427814483643, "learning_rate": 2.0906050439548518e-07, "loss": 0.0858, "step": 28571 }, { "epoch": 93.67868852459016, "grad_norm": 1.7115617990493774, "learning_rate": 2.0884456270566676e-07, "loss": 0.0314, "step": 28572 }, { "epoch": 93.68196721311476, "grad_norm": 2.82016921043396, "learning_rate": 2.0862873142130425e-07, "loss": 0.078, "step": 28573 }, { "epoch": 93.68524590163935, "grad_norm": 2.9206008911132812, "learning_rate": 2.0841301054483453e-07, "loss": 0.1268, "step": 28574 }, { "epoch": 93.68852459016394, "grad_norm": 2.353166341781616, "learning_rate": 2.0819740007868906e-07, "loss": 0.0828, "step": 28575 }, { "epoch": 93.69180327868852, "grad_norm": 1.8959887027740479, "learning_rate": 2.0798190002529807e-07, "loss": 0.0631, "step": 28576 }, { "epoch": 93.69508196721311, "grad_norm": 2.1158905029296875, "learning_rate": 2.0776651038709184e-07, "loss": 0.0512, "step": 28577 }, { "epoch": 93.6983606557377, "grad_norm": 2.2210471630096436, "learning_rate": 2.0755123116650068e-07, "loss": 0.0751, "step": 28578 }, { "epoch": 93.70163934426229, "grad_norm": 2.279811382293701, "learning_rate": 2.0733606236595038e-07, "loss": 0.1152, "step": 28579 }, { "epoch": 93.70491803278688, "grad_norm": 2.3301844596862793, "learning_rate": 2.0712100398786795e-07, "loss": 0.0765, "step": 28580 }, { "epoch": 93.70819672131148, "grad_norm": 2.3231678009033203, "learning_rate": 2.0690605603467806e-07, "loss": 0.0918, "step": 28581 }, { "epoch": 93.71147540983607, "grad_norm": 2.208069324493408, "learning_rate": 2.0669121850880547e-07, "loss": 0.0998, "step": 28582 }, { "epoch": 93.71475409836066, "grad_norm": 2.6200668811798096, "learning_rate": 2.0647649141267158e-07, "loss": 0.0705, "step": 28583 }, { "epoch": 93.71803278688525, "grad_norm": 6.630270481109619, "learning_rate": 2.0626187474869662e-07, "loss": 0.2196, "step": 28584 }, { "epoch": 93.72131147540983, "grad_norm": 16.747682571411133, "learning_rate": 2.0604736851930317e-07, "loss": 0.1933, "step": 28585 }, { "epoch": 93.72459016393442, "grad_norm": 3.2288267612457275, "learning_rate": 2.0583297272690927e-07, "loss": 0.1893, "step": 28586 }, { "epoch": 93.72786885245901, "grad_norm": 3.739393949508667, "learning_rate": 2.0561868737393075e-07, "loss": 0.0441, "step": 28587 }, { "epoch": 93.73114754098361, "grad_norm": 1.5399627685546875, "learning_rate": 2.054045124627857e-07, "loss": 0.0285, "step": 28588 }, { "epoch": 93.7344262295082, "grad_norm": 2.2218315601348877, "learning_rate": 2.0519044799588883e-07, "loss": 0.1056, "step": 28589 }, { "epoch": 93.73770491803279, "grad_norm": 1.4001133441925049, "learning_rate": 2.0497649397565266e-07, "loss": 0.052, "step": 28590 }, { "epoch": 93.74098360655738, "grad_norm": 1.795960783958435, "learning_rate": 2.0476265040449195e-07, "loss": 0.0346, "step": 28591 }, { "epoch": 93.74426229508197, "grad_norm": 2.217897653579712, "learning_rate": 2.0454891728481695e-07, "loss": 0.0818, "step": 28592 }, { "epoch": 93.74754098360656, "grad_norm": 2.867703437805176, "learning_rate": 2.043352946190369e-07, "loss": 0.0882, "step": 28593 }, { "epoch": 93.75081967213114, "grad_norm": 2.3196702003479004, "learning_rate": 2.0412178240956204e-07, "loss": 0.0928, "step": 28594 }, { "epoch": 93.75409836065573, "grad_norm": 2.1464056968688965, "learning_rate": 2.0390838065879825e-07, "loss": 0.074, "step": 28595 }, { "epoch": 93.75737704918033, "grad_norm": 2.7104501724243164, "learning_rate": 2.036950893691536e-07, "loss": 0.2285, "step": 28596 }, { "epoch": 93.76065573770492, "grad_norm": 2.408031940460205, "learning_rate": 2.0348190854303285e-07, "loss": 0.2525, "step": 28597 }, { "epoch": 93.76393442622951, "grad_norm": 2.84818959236145, "learning_rate": 2.0326883818283848e-07, "loss": 0.0485, "step": 28598 }, { "epoch": 93.7672131147541, "grad_norm": 2.768667697906494, "learning_rate": 2.0305587829097418e-07, "loss": 0.2057, "step": 28599 }, { "epoch": 93.77049180327869, "grad_norm": 1.724870204925537, "learning_rate": 2.028430288698413e-07, "loss": 0.0525, "step": 28600 }, { "epoch": 93.77377049180328, "grad_norm": 2.29689359664917, "learning_rate": 2.026302899218402e-07, "loss": 0.1209, "step": 28601 }, { "epoch": 93.77704918032786, "grad_norm": 1.7410478591918945, "learning_rate": 2.0241766144936892e-07, "loss": 0.0774, "step": 28602 }, { "epoch": 93.78032786885245, "grad_norm": 3.3372273445129395, "learning_rate": 2.0220514345482444e-07, "loss": 0.1281, "step": 28603 }, { "epoch": 93.78360655737706, "grad_norm": 1.6120387315750122, "learning_rate": 2.0199273594060597e-07, "loss": 0.1197, "step": 28604 }, { "epoch": 93.78688524590164, "grad_norm": 1.7953137159347534, "learning_rate": 2.0178043890910603e-07, "loss": 0.0452, "step": 28605 }, { "epoch": 93.79016393442623, "grad_norm": 2.4304752349853516, "learning_rate": 2.0156825236271937e-07, "loss": 0.1633, "step": 28606 }, { "epoch": 93.79344262295082, "grad_norm": 1.9955452680587769, "learning_rate": 2.0135617630383852e-07, "loss": 0.1185, "step": 28607 }, { "epoch": 93.79672131147541, "grad_norm": 1.609889268875122, "learning_rate": 2.011442107348538e-07, "loss": 0.1074, "step": 28608 }, { "epoch": 93.8, "grad_norm": 2.0571908950805664, "learning_rate": 2.009323556581566e-07, "loss": 0.0367, "step": 28609 }, { "epoch": 93.80327868852459, "grad_norm": 1.971040964126587, "learning_rate": 2.0072061107613617e-07, "loss": 0.1319, "step": 28610 }, { "epoch": 93.80655737704917, "grad_norm": 1.9172900915145874, "learning_rate": 2.0050897699117943e-07, "loss": 0.134, "step": 28611 }, { "epoch": 93.80983606557378, "grad_norm": 1.3065165281295776, "learning_rate": 2.002974534056723e-07, "loss": 0.0244, "step": 28612 }, { "epoch": 93.81311475409836, "grad_norm": 2.098475456237793, "learning_rate": 2.0008604032200174e-07, "loss": 0.0586, "step": 28613 }, { "epoch": 93.81639344262295, "grad_norm": 2.3018622398376465, "learning_rate": 1.9987473774255028e-07, "loss": 0.0375, "step": 28614 }, { "epoch": 93.81967213114754, "grad_norm": 2.7943618297576904, "learning_rate": 1.9966354566970048e-07, "loss": 0.1516, "step": 28615 }, { "epoch": 93.82295081967213, "grad_norm": 3.1290504932403564, "learning_rate": 1.9945246410583263e-07, "loss": 0.0884, "step": 28616 }, { "epoch": 93.82622950819672, "grad_norm": 2.3198421001434326, "learning_rate": 1.992414930533293e-07, "loss": 0.0488, "step": 28617 }, { "epoch": 93.8295081967213, "grad_norm": 1.774612307548523, "learning_rate": 1.9903063251456856e-07, "loss": 0.0385, "step": 28618 }, { "epoch": 93.8327868852459, "grad_norm": 2.420405864715576, "learning_rate": 1.9881988249192852e-07, "loss": 0.0823, "step": 28619 }, { "epoch": 93.8360655737705, "grad_norm": 1.6823264360427856, "learning_rate": 1.9860924298778394e-07, "loss": 0.0323, "step": 28620 }, { "epoch": 93.83934426229509, "grad_norm": 2.2565722465515137, "learning_rate": 1.983987140045107e-07, "loss": 0.0817, "step": 28621 }, { "epoch": 93.84262295081967, "grad_norm": 2.0048346519470215, "learning_rate": 1.981882955444836e-07, "loss": 0.0637, "step": 28622 }, { "epoch": 93.84590163934426, "grad_norm": 2.91467022895813, "learning_rate": 1.9797798761007514e-07, "loss": 0.248, "step": 28623 }, { "epoch": 93.84918032786885, "grad_norm": 3.470546245574951, "learning_rate": 1.9776779020365677e-07, "loss": 0.1336, "step": 28624 }, { "epoch": 93.85245901639344, "grad_norm": 2.904115915298462, "learning_rate": 1.9755770332759662e-07, "loss": 0.1274, "step": 28625 }, { "epoch": 93.85573770491803, "grad_norm": 1.593517780303955, "learning_rate": 1.9734772698426717e-07, "loss": 0.1078, "step": 28626 }, { "epoch": 93.85901639344263, "grad_norm": 2.2757303714752197, "learning_rate": 1.9713786117603327e-07, "loss": 0.055, "step": 28627 }, { "epoch": 93.86229508196722, "grad_norm": 2.356764078140259, "learning_rate": 1.96928105905263e-07, "loss": 0.1751, "step": 28628 }, { "epoch": 93.8655737704918, "grad_norm": 2.302635669708252, "learning_rate": 1.9671846117432002e-07, "loss": 0.0559, "step": 28629 }, { "epoch": 93.8688524590164, "grad_norm": 2.608090877532959, "learning_rate": 1.9650892698557021e-07, "loss": 0.1728, "step": 28630 }, { "epoch": 93.87213114754098, "grad_norm": 2.2197840213775635, "learning_rate": 1.9629950334137503e-07, "loss": 0.1426, "step": 28631 }, { "epoch": 93.87540983606557, "grad_norm": 2.4302425384521484, "learning_rate": 1.9609019024409703e-07, "loss": 0.1074, "step": 28632 }, { "epoch": 93.87868852459016, "grad_norm": 1.9910879135131836, "learning_rate": 1.958809876960943e-07, "loss": 0.0972, "step": 28633 }, { "epoch": 93.88196721311475, "grad_norm": 2.5411298274993896, "learning_rate": 1.9567189569972722e-07, "loss": 0.1407, "step": 28634 }, { "epoch": 93.88524590163935, "grad_norm": 1.8506011962890625, "learning_rate": 1.95462914257355e-07, "loss": 0.0471, "step": 28635 }, { "epoch": 93.88852459016394, "grad_norm": 3.2625784873962402, "learning_rate": 1.9525404337133014e-07, "loss": 0.1353, "step": 28636 }, { "epoch": 93.89180327868853, "grad_norm": 2.551234722137451, "learning_rate": 1.9504528304401194e-07, "loss": 0.1482, "step": 28637 }, { "epoch": 93.89508196721312, "grad_norm": 2.2950527667999268, "learning_rate": 1.9483663327775293e-07, "loss": 0.0421, "step": 28638 }, { "epoch": 93.8983606557377, "grad_norm": 1.201276421546936, "learning_rate": 1.9462809407490456e-07, "loss": 0.0949, "step": 28639 }, { "epoch": 93.90163934426229, "grad_norm": 2.6869637966156006, "learning_rate": 1.9441966543782055e-07, "loss": 0.15, "step": 28640 }, { "epoch": 93.90491803278688, "grad_norm": 1.9678065776824951, "learning_rate": 1.9421134736885006e-07, "loss": 0.1229, "step": 28641 }, { "epoch": 93.90819672131147, "grad_norm": 2.117192029953003, "learning_rate": 1.9400313987034236e-07, "loss": 0.0772, "step": 28642 }, { "epoch": 93.91147540983607, "grad_norm": 3.3183178901672363, "learning_rate": 1.9379504294464335e-07, "loss": 0.1421, "step": 28643 }, { "epoch": 93.91475409836066, "grad_norm": 2.4595296382904053, "learning_rate": 1.9358705659410225e-07, "loss": 0.1166, "step": 28644 }, { "epoch": 93.91803278688525, "grad_norm": 2.0679779052734375, "learning_rate": 1.9337918082106278e-07, "loss": 0.0394, "step": 28645 }, { "epoch": 93.92131147540984, "grad_norm": 2.6488494873046875, "learning_rate": 1.931714156278708e-07, "loss": 0.1224, "step": 28646 }, { "epoch": 93.92459016393443, "grad_norm": 1.9669973850250244, "learning_rate": 1.9296376101686552e-07, "loss": 0.11, "step": 28647 }, { "epoch": 93.92786885245901, "grad_norm": 2.6354150772094727, "learning_rate": 1.9275621699039182e-07, "loss": 0.0592, "step": 28648 }, { "epoch": 93.9311475409836, "grad_norm": 2.786109685897827, "learning_rate": 1.9254878355078888e-07, "loss": 0.1915, "step": 28649 }, { "epoch": 93.93442622950819, "grad_norm": 2.2092478275299072, "learning_rate": 1.9234146070039483e-07, "loss": 0.0515, "step": 28650 }, { "epoch": 93.9377049180328, "grad_norm": 2.7260704040527344, "learning_rate": 1.921342484415478e-07, "loss": 0.2245, "step": 28651 }, { "epoch": 93.94098360655738, "grad_norm": 2.365264415740967, "learning_rate": 1.9192714677658598e-07, "loss": 0.1634, "step": 28652 }, { "epoch": 93.94426229508197, "grad_norm": 2.264946460723877, "learning_rate": 1.9172015570784297e-07, "loss": 0.0504, "step": 28653 }, { "epoch": 93.94754098360656, "grad_norm": 2.0064048767089844, "learning_rate": 1.9151327523765362e-07, "loss": 0.0981, "step": 28654 }, { "epoch": 93.95081967213115, "grad_norm": 3.0495922565460205, "learning_rate": 1.913065053683494e-07, "loss": 0.1234, "step": 28655 }, { "epoch": 93.95409836065573, "grad_norm": 2.712150812149048, "learning_rate": 1.9109984610226396e-07, "loss": 0.1283, "step": 28656 }, { "epoch": 93.95737704918032, "grad_norm": 2.6395177841186523, "learning_rate": 1.9089329744172658e-07, "loss": 0.0987, "step": 28657 }, { "epoch": 93.96065573770491, "grad_norm": 2.3020410537719727, "learning_rate": 1.906868593890654e-07, "loss": 0.1893, "step": 28658 }, { "epoch": 93.96393442622951, "grad_norm": 1.7716975212097168, "learning_rate": 1.9048053194660965e-07, "loss": 0.0471, "step": 28659 }, { "epoch": 93.9672131147541, "grad_norm": 2.2754251956939697, "learning_rate": 1.9027431511668414e-07, "loss": 0.2222, "step": 28660 }, { "epoch": 93.97049180327869, "grad_norm": 2.3444666862487793, "learning_rate": 1.9006820890161593e-07, "loss": 0.1465, "step": 28661 }, { "epoch": 93.97377049180328, "grad_norm": 2.106343984603882, "learning_rate": 1.8986221330372867e-07, "loss": 0.0344, "step": 28662 }, { "epoch": 93.97704918032787, "grad_norm": 2.323256492614746, "learning_rate": 1.8965632832534497e-07, "loss": 0.1667, "step": 28663 }, { "epoch": 93.98032786885246, "grad_norm": 2.9191629886627197, "learning_rate": 1.894505539687852e-07, "loss": 0.1589, "step": 28664 }, { "epoch": 93.98360655737704, "grad_norm": 2.863133430480957, "learning_rate": 1.8924489023637193e-07, "loss": 0.0701, "step": 28665 }, { "epoch": 93.98688524590163, "grad_norm": 2.351717948913574, "learning_rate": 1.8903933713042334e-07, "loss": 0.0904, "step": 28666 }, { "epoch": 93.99016393442623, "grad_norm": 2.677706241607666, "learning_rate": 1.8883389465325642e-07, "loss": 0.161, "step": 28667 }, { "epoch": 93.99344262295082, "grad_norm": 2.987083673477173, "learning_rate": 1.8862856280718821e-07, "loss": 0.1102, "step": 28668 }, { "epoch": 93.99672131147541, "grad_norm": 1.7496205568313599, "learning_rate": 1.884233415945347e-07, "loss": 0.1424, "step": 28669 }, { "epoch": 94.0, "grad_norm": 2.011338233947754, "learning_rate": 1.8821823101760949e-07, "loss": 0.0957, "step": 28670 }, { "epoch": 94.00327868852459, "grad_norm": 2.278472661972046, "learning_rate": 1.880132310787264e-07, "loss": 0.1491, "step": 28671 }, { "epoch": 94.00655737704918, "grad_norm": 1.8020607233047485, "learning_rate": 1.8780834178019459e-07, "loss": 0.082, "step": 28672 }, { "epoch": 94.00983606557377, "grad_norm": 1.7175687551498413, "learning_rate": 1.8760356312432558e-07, "loss": 0.1104, "step": 28673 }, { "epoch": 94.01311475409837, "grad_norm": 2.260080337524414, "learning_rate": 1.873988951134298e-07, "loss": 0.0726, "step": 28674 }, { "epoch": 94.01639344262296, "grad_norm": 2.588651180267334, "learning_rate": 1.8719433774981422e-07, "loss": 0.2128, "step": 28675 }, { "epoch": 94.01967213114754, "grad_norm": 2.3633267879486084, "learning_rate": 1.869898910357848e-07, "loss": 0.1201, "step": 28676 }, { "epoch": 94.02295081967213, "grad_norm": 2.962088108062744, "learning_rate": 1.8678555497364636e-07, "loss": 0.0792, "step": 28677 }, { "epoch": 94.02622950819672, "grad_norm": 2.5293681621551514, "learning_rate": 1.8658132956570485e-07, "loss": 0.06, "step": 28678 }, { "epoch": 94.02950819672131, "grad_norm": 2.268541097640991, "learning_rate": 1.8637721481426284e-07, "loss": 0.0816, "step": 28679 }, { "epoch": 94.0327868852459, "grad_norm": 2.3358495235443115, "learning_rate": 1.8617321072162075e-07, "loss": 0.1473, "step": 28680 }, { "epoch": 94.03606557377049, "grad_norm": 2.712735891342163, "learning_rate": 1.8596931729007895e-07, "loss": 0.0789, "step": 28681 }, { "epoch": 94.03934426229509, "grad_norm": 2.0200161933898926, "learning_rate": 1.8576553452193779e-07, "loss": 0.1528, "step": 28682 }, { "epoch": 94.04262295081968, "grad_norm": 3.7073676586151123, "learning_rate": 1.8556186241949437e-07, "loss": 0.2249, "step": 28683 }, { "epoch": 94.04590163934427, "grad_norm": 2.8507537841796875, "learning_rate": 1.853583009850457e-07, "loss": 0.1021, "step": 28684 }, { "epoch": 94.04918032786885, "grad_norm": 1.662102222442627, "learning_rate": 1.8515485022088664e-07, "loss": 0.1164, "step": 28685 }, { "epoch": 94.05245901639344, "grad_norm": 1.9161624908447266, "learning_rate": 1.849515101293109e-07, "loss": 0.0587, "step": 28686 }, { "epoch": 94.05573770491803, "grad_norm": 2.658295154571533, "learning_rate": 1.847482807126122e-07, "loss": 0.0991, "step": 28687 }, { "epoch": 94.05901639344262, "grad_norm": 1.6609283685684204, "learning_rate": 1.8454516197308314e-07, "loss": 0.1023, "step": 28688 }, { "epoch": 94.0622950819672, "grad_norm": 2.5570764541625977, "learning_rate": 1.843421539130108e-07, "loss": 0.1378, "step": 28689 }, { "epoch": 94.06557377049181, "grad_norm": 2.154623031616211, "learning_rate": 1.8413925653468778e-07, "loss": 0.1227, "step": 28690 }, { "epoch": 94.0688524590164, "grad_norm": 2.7157554626464844, "learning_rate": 1.8393646984040115e-07, "loss": 0.1811, "step": 28691 }, { "epoch": 94.07213114754099, "grad_norm": 1.6473807096481323, "learning_rate": 1.8373379383243572e-07, "loss": 0.0581, "step": 28692 }, { "epoch": 94.07540983606557, "grad_norm": 1.5022081136703491, "learning_rate": 1.835312285130786e-07, "loss": 0.0323, "step": 28693 }, { "epoch": 94.07868852459016, "grad_norm": 1.8047033548355103, "learning_rate": 1.8332877388461345e-07, "loss": 0.0526, "step": 28694 }, { "epoch": 94.08196721311475, "grad_norm": 2.9903297424316406, "learning_rate": 1.8312642994932294e-07, "loss": 0.0527, "step": 28695 }, { "epoch": 94.08524590163934, "grad_norm": 2.398736000061035, "learning_rate": 1.829241967094886e-07, "loss": 0.0875, "step": 28696 }, { "epoch": 94.08852459016393, "grad_norm": 1.924952745437622, "learning_rate": 1.8272207416739186e-07, "loss": 0.0568, "step": 28697 }, { "epoch": 94.09180327868853, "grad_norm": 3.0697410106658936, "learning_rate": 1.8252006232531207e-07, "loss": 0.0821, "step": 28698 }, { "epoch": 94.09508196721312, "grad_norm": 2.7055230140686035, "learning_rate": 1.8231816118552405e-07, "loss": 0.1419, "step": 28699 }, { "epoch": 94.09836065573771, "grad_norm": 2.372584819793701, "learning_rate": 1.821163707503082e-07, "loss": 0.1752, "step": 28700 }, { "epoch": 94.1016393442623, "grad_norm": 2.1146371364593506, "learning_rate": 1.8191469102193716e-07, "loss": 0.1141, "step": 28701 }, { "epoch": 94.10491803278688, "grad_norm": 2.2448129653930664, "learning_rate": 1.8171312200268798e-07, "loss": 0.072, "step": 28702 }, { "epoch": 94.10819672131147, "grad_norm": 2.201145887374878, "learning_rate": 1.8151166369482998e-07, "loss": 0.0732, "step": 28703 }, { "epoch": 94.11147540983606, "grad_norm": 3.127969980239868, "learning_rate": 1.8131031610063687e-07, "loss": 0.2248, "step": 28704 }, { "epoch": 94.11475409836065, "grad_norm": 2.8482728004455566, "learning_rate": 1.811090792223802e-07, "loss": 0.172, "step": 28705 }, { "epoch": 94.11803278688525, "grad_norm": 13.228585243225098, "learning_rate": 1.80907953062327e-07, "loss": 0.1774, "step": 28706 }, { "epoch": 94.12131147540984, "grad_norm": 1.8683525323867798, "learning_rate": 1.8070693762274438e-07, "loss": 0.0773, "step": 28707 }, { "epoch": 94.12459016393443, "grad_norm": 2.981961727142334, "learning_rate": 1.8050603290590274e-07, "loss": 0.0538, "step": 28708 }, { "epoch": 94.12786885245902, "grad_norm": 2.3319380283355713, "learning_rate": 1.8030523891406471e-07, "loss": 0.1065, "step": 28709 }, { "epoch": 94.1311475409836, "grad_norm": 5.698070526123047, "learning_rate": 1.8010455564949402e-07, "loss": 0.0957, "step": 28710 }, { "epoch": 94.1344262295082, "grad_norm": 2.069873571395874, "learning_rate": 1.7990398311445555e-07, "loss": 0.1344, "step": 28711 }, { "epoch": 94.13770491803278, "grad_norm": 1.639783501625061, "learning_rate": 1.7970352131120971e-07, "loss": 0.0635, "step": 28712 }, { "epoch": 94.14098360655737, "grad_norm": 1.8268744945526123, "learning_rate": 1.79503170242018e-07, "loss": 0.0755, "step": 28713 }, { "epoch": 94.14426229508197, "grad_norm": 2.3926193714141846, "learning_rate": 1.7930292990913757e-07, "loss": 0.1801, "step": 28714 }, { "epoch": 94.14754098360656, "grad_norm": 1.3380544185638428, "learning_rate": 1.7910280031482873e-07, "loss": 0.0244, "step": 28715 }, { "epoch": 94.15081967213115, "grad_norm": 2.0942158699035645, "learning_rate": 1.7890278146134533e-07, "loss": 0.1321, "step": 28716 }, { "epoch": 94.15409836065574, "grad_norm": 1.9755915403366089, "learning_rate": 1.787028733509455e-07, "loss": 0.0939, "step": 28717 }, { "epoch": 94.15737704918033, "grad_norm": 1.9151207208633423, "learning_rate": 1.78503075985883e-07, "loss": 0.072, "step": 28718 }, { "epoch": 94.16065573770491, "grad_norm": 1.7887781858444214, "learning_rate": 1.783033893684094e-07, "loss": 0.0453, "step": 28719 }, { "epoch": 94.1639344262295, "grad_norm": 2.2373082637786865, "learning_rate": 1.7810381350077731e-07, "loss": 0.1453, "step": 28720 }, { "epoch": 94.1672131147541, "grad_norm": 4.885775089263916, "learning_rate": 1.7790434838523606e-07, "loss": 0.2092, "step": 28721 }, { "epoch": 94.1704918032787, "grad_norm": 2.385317087173462, "learning_rate": 1.7770499402403717e-07, "loss": 0.0387, "step": 28722 }, { "epoch": 94.17377049180328, "grad_norm": 2.1783061027526855, "learning_rate": 1.7750575041942665e-07, "loss": 0.0317, "step": 28723 }, { "epoch": 94.17704918032787, "grad_norm": 2.145829439163208, "learning_rate": 1.7730661757365153e-07, "loss": 0.2077, "step": 28724 }, { "epoch": 94.18032786885246, "grad_norm": 2.138054847717285, "learning_rate": 1.7710759548895672e-07, "loss": 0.1676, "step": 28725 }, { "epoch": 94.18360655737705, "grad_norm": 2.353933811187744, "learning_rate": 1.7690868416758711e-07, "loss": 0.1254, "step": 28726 }, { "epoch": 94.18688524590164, "grad_norm": 1.687632441520691, "learning_rate": 1.7670988361178643e-07, "loss": 0.0806, "step": 28727 }, { "epoch": 94.19016393442622, "grad_norm": 2.5247902870178223, "learning_rate": 1.7651119382379512e-07, "loss": 0.0998, "step": 28728 }, { "epoch": 94.19344262295083, "grad_norm": 1.6563069820404053, "learning_rate": 1.763126148058536e-07, "loss": 0.033, "step": 28729 }, { "epoch": 94.19672131147541, "grad_norm": 2.508188486099243, "learning_rate": 1.761141465602012e-07, "loss": 0.0709, "step": 28730 }, { "epoch": 94.2, "grad_norm": 2.238684892654419, "learning_rate": 1.7591578908907724e-07, "loss": 0.0803, "step": 28731 }, { "epoch": 94.20327868852459, "grad_norm": 1.4853042364120483, "learning_rate": 1.757175423947166e-07, "loss": 0.0249, "step": 28732 }, { "epoch": 94.20655737704918, "grad_norm": 2.340649127960205, "learning_rate": 1.7551940647935417e-07, "loss": 0.074, "step": 28733 }, { "epoch": 94.20983606557377, "grad_norm": 1.7965492010116577, "learning_rate": 1.7532138134522704e-07, "loss": 0.0308, "step": 28734 }, { "epoch": 94.21311475409836, "grad_norm": 3.39084529876709, "learning_rate": 1.7512346699456562e-07, "loss": 0.158, "step": 28735 }, { "epoch": 94.21639344262294, "grad_norm": 2.216320276260376, "learning_rate": 1.749256634296026e-07, "loss": 0.1616, "step": 28736 }, { "epoch": 94.21967213114755, "grad_norm": 1.9691734313964844, "learning_rate": 1.7472797065256842e-07, "loss": 0.0615, "step": 28737 }, { "epoch": 94.22295081967214, "grad_norm": 1.8902124166488647, "learning_rate": 1.7453038866569129e-07, "loss": 0.0441, "step": 28738 }, { "epoch": 94.22622950819672, "grad_norm": 2.396419048309326, "learning_rate": 1.743329174712005e-07, "loss": 0.0753, "step": 28739 }, { "epoch": 94.22950819672131, "grad_norm": 2.1790578365325928, "learning_rate": 1.7413555707132324e-07, "loss": 0.181, "step": 28740 }, { "epoch": 94.2327868852459, "grad_norm": 2.3621625900268555, "learning_rate": 1.7393830746828212e-07, "loss": 0.0482, "step": 28741 }, { "epoch": 94.23606557377049, "grad_norm": 2.8316047191619873, "learning_rate": 1.7374116866430424e-07, "loss": 0.1263, "step": 28742 }, { "epoch": 94.23934426229508, "grad_norm": 2.4125077724456787, "learning_rate": 1.7354414066161118e-07, "loss": 0.2849, "step": 28743 }, { "epoch": 94.24262295081967, "grad_norm": 2.456068754196167, "learning_rate": 1.733472234624245e-07, "loss": 0.147, "step": 28744 }, { "epoch": 94.24590163934427, "grad_norm": 2.502415895462036, "learning_rate": 1.7315041706896573e-07, "loss": 0.1328, "step": 28745 }, { "epoch": 94.24918032786886, "grad_norm": 2.6209840774536133, "learning_rate": 1.7295372148345313e-07, "loss": 0.1349, "step": 28746 }, { "epoch": 94.25245901639344, "grad_norm": 3.536282539367676, "learning_rate": 1.727571367081049e-07, "loss": 0.1648, "step": 28747 }, { "epoch": 94.25573770491803, "grad_norm": 1.2515535354614258, "learning_rate": 1.7256066274513705e-07, "loss": 0.0761, "step": 28748 }, { "epoch": 94.25901639344262, "grad_norm": 2.420295476913452, "learning_rate": 1.723642995967656e-07, "loss": 0.2205, "step": 28749 }, { "epoch": 94.26229508196721, "grad_norm": 1.989476203918457, "learning_rate": 1.7216804726520654e-07, "loss": 0.0963, "step": 28750 }, { "epoch": 94.2655737704918, "grad_norm": 2.254758834838867, "learning_rate": 1.719719057526692e-07, "loss": 0.1008, "step": 28751 }, { "epoch": 94.26885245901639, "grad_norm": 2.6747212409973145, "learning_rate": 1.717758750613685e-07, "loss": 0.1951, "step": 28752 }, { "epoch": 94.27213114754099, "grad_norm": 2.0265750885009766, "learning_rate": 1.7157995519351267e-07, "loss": 0.124, "step": 28753 }, { "epoch": 94.27540983606558, "grad_norm": 2.810511350631714, "learning_rate": 1.7138414615131327e-07, "loss": 0.0812, "step": 28754 }, { "epoch": 94.27868852459017, "grad_norm": 4.0115532875061035, "learning_rate": 1.711884479369752e-07, "loss": 0.1201, "step": 28755 }, { "epoch": 94.28196721311475, "grad_norm": 1.7049875259399414, "learning_rate": 1.7099286055270781e-07, "loss": 0.0357, "step": 28756 }, { "epoch": 94.28524590163934, "grad_norm": 1.501083254814148, "learning_rate": 1.7079738400071488e-07, "loss": 0.0303, "step": 28757 }, { "epoch": 94.28852459016393, "grad_norm": 2.1003077030181885, "learning_rate": 1.7060201828320244e-07, "loss": 0.0833, "step": 28758 }, { "epoch": 94.29180327868852, "grad_norm": 1.7804219722747803, "learning_rate": 1.7040676340236983e-07, "loss": 0.0467, "step": 28759 }, { "epoch": 94.29508196721312, "grad_norm": 2.736851453781128, "learning_rate": 1.7021161936042306e-07, "loss": 0.1009, "step": 28760 }, { "epoch": 94.29836065573771, "grad_norm": 2.255948066711426, "learning_rate": 1.700165861595615e-07, "loss": 0.0963, "step": 28761 }, { "epoch": 94.3016393442623, "grad_norm": 1.9790898561477661, "learning_rate": 1.6982166380198227e-07, "loss": 0.0707, "step": 28762 }, { "epoch": 94.30491803278689, "grad_norm": 1.8050365447998047, "learning_rate": 1.6962685228988472e-07, "loss": 0.1461, "step": 28763 }, { "epoch": 94.30819672131148, "grad_norm": 1.7380783557891846, "learning_rate": 1.694321516254649e-07, "loss": 0.1912, "step": 28764 }, { "epoch": 94.31147540983606, "grad_norm": 2.263170003890991, "learning_rate": 1.6923756181091988e-07, "loss": 0.0956, "step": 28765 }, { "epoch": 94.31475409836065, "grad_norm": 1.3872803449630737, "learning_rate": 1.690430828484424e-07, "loss": 0.0224, "step": 28766 }, { "epoch": 94.31803278688524, "grad_norm": 2.257896661758423, "learning_rate": 1.6884871474022625e-07, "loss": 0.1351, "step": 28767 }, { "epoch": 94.32131147540984, "grad_norm": 2.0385735034942627, "learning_rate": 1.6865445748846075e-07, "loss": 0.1577, "step": 28768 }, { "epoch": 94.32459016393443, "grad_norm": 2.1523447036743164, "learning_rate": 1.6846031109533978e-07, "loss": 0.1672, "step": 28769 }, { "epoch": 94.32786885245902, "grad_norm": 1.979791522026062, "learning_rate": 1.6826627556305152e-07, "loss": 0.1482, "step": 28770 }, { "epoch": 94.33114754098361, "grad_norm": 2.483539581298828, "learning_rate": 1.6807235089378315e-07, "loss": 0.1325, "step": 28771 }, { "epoch": 94.3344262295082, "grad_norm": 2.852905035018921, "learning_rate": 1.6787853708972067e-07, "loss": 0.1614, "step": 28772 }, { "epoch": 94.33770491803278, "grad_norm": 2.631192684173584, "learning_rate": 1.6768483415305125e-07, "loss": 0.1007, "step": 28773 }, { "epoch": 94.34098360655737, "grad_norm": 2.3007402420043945, "learning_rate": 1.6749124208595868e-07, "loss": 0.1379, "step": 28774 }, { "epoch": 94.34426229508196, "grad_norm": 1.9711345434188843, "learning_rate": 1.6729776089062565e-07, "loss": 0.0787, "step": 28775 }, { "epoch": 94.34754098360656, "grad_norm": 1.9600427150726318, "learning_rate": 1.6710439056923266e-07, "loss": 0.1318, "step": 28776 }, { "epoch": 94.35081967213115, "grad_norm": 2.5468432903289795, "learning_rate": 1.669111311239624e-07, "loss": 0.0783, "step": 28777 }, { "epoch": 94.35409836065574, "grad_norm": 1.898914098739624, "learning_rate": 1.6671798255699202e-07, "loss": 0.0151, "step": 28778 }, { "epoch": 94.35737704918033, "grad_norm": 1.921445369720459, "learning_rate": 1.6652494487050198e-07, "loss": 0.0565, "step": 28779 }, { "epoch": 94.36065573770492, "grad_norm": 2.9781789779663086, "learning_rate": 1.663320180666661e-07, "loss": 0.2222, "step": 28780 }, { "epoch": 94.3639344262295, "grad_norm": 1.9742108583450317, "learning_rate": 1.6613920214766155e-07, "loss": 0.0455, "step": 28781 }, { "epoch": 94.3672131147541, "grad_norm": 1.3832124471664429, "learning_rate": 1.6594649711566214e-07, "loss": 0.0178, "step": 28782 }, { "epoch": 94.37049180327868, "grad_norm": 2.8031179904937744, "learning_rate": 1.6575390297284056e-07, "loss": 0.1529, "step": 28783 }, { "epoch": 94.37377049180328, "grad_norm": 2.6602187156677246, "learning_rate": 1.6556141972136952e-07, "loss": 0.156, "step": 28784 }, { "epoch": 94.37704918032787, "grad_norm": 1.8995816707611084, "learning_rate": 1.653690473634173e-07, "loss": 0.0428, "step": 28785 }, { "epoch": 94.38032786885246, "grad_norm": 2.2493066787719727, "learning_rate": 1.651767859011566e-07, "loss": 0.0919, "step": 28786 }, { "epoch": 94.38360655737705, "grad_norm": 2.352769613265991, "learning_rate": 1.6498463533675237e-07, "loss": 0.152, "step": 28787 }, { "epoch": 94.38688524590164, "grad_norm": 2.7752416133880615, "learning_rate": 1.647925956723717e-07, "loss": 0.1911, "step": 28788 }, { "epoch": 94.39016393442623, "grad_norm": 2.202009916305542, "learning_rate": 1.6460066691018183e-07, "loss": 0.1322, "step": 28789 }, { "epoch": 94.39344262295081, "grad_norm": 1.5281484127044678, "learning_rate": 1.6440884905234323e-07, "loss": 0.0388, "step": 28790 }, { "epoch": 94.3967213114754, "grad_norm": 1.670149564743042, "learning_rate": 1.6421714210102303e-07, "loss": 0.08, "step": 28791 }, { "epoch": 94.4, "grad_norm": 2.0234487056732178, "learning_rate": 1.6402554605838173e-07, "loss": 0.0639, "step": 28792 }, { "epoch": 94.4032786885246, "grad_norm": 2.165344715118408, "learning_rate": 1.6383406092657873e-07, "loss": 0.0622, "step": 28793 }, { "epoch": 94.40655737704918, "grad_norm": 2.351771831512451, "learning_rate": 1.636426867077734e-07, "loss": 0.1114, "step": 28794 }, { "epoch": 94.40983606557377, "grad_norm": 2.7743446826934814, "learning_rate": 1.6345142340412402e-07, "loss": 0.1647, "step": 28795 }, { "epoch": 94.41311475409836, "grad_norm": 2.7040505409240723, "learning_rate": 1.6326027101778774e-07, "loss": 0.0962, "step": 28796 }, { "epoch": 94.41639344262295, "grad_norm": 2.515380382537842, "learning_rate": 1.630692295509184e-07, "loss": 0.1016, "step": 28797 }, { "epoch": 94.41967213114754, "grad_norm": 2.138239622116089, "learning_rate": 1.628782990056721e-07, "loss": 0.1348, "step": 28798 }, { "epoch": 94.42295081967212, "grad_norm": 3.1696040630340576, "learning_rate": 1.626874793842015e-07, "loss": 0.0926, "step": 28799 }, { "epoch": 94.42622950819673, "grad_norm": 2.1401195526123047, "learning_rate": 1.624967706886571e-07, "loss": 0.0661, "step": 28800 }, { "epoch": 94.42950819672132, "grad_norm": 2.3650712966918945, "learning_rate": 1.6230617292119056e-07, "loss": 0.2006, "step": 28801 }, { "epoch": 94.4327868852459, "grad_norm": 2.1467127799987793, "learning_rate": 1.6211568608395012e-07, "loss": 0.1263, "step": 28802 }, { "epoch": 94.43606557377049, "grad_norm": 2.1932718753814697, "learning_rate": 1.619253101790852e-07, "loss": 0.131, "step": 28803 }, { "epoch": 94.43934426229508, "grad_norm": 2.708975315093994, "learning_rate": 1.617350452087396e-07, "loss": 0.1081, "step": 28804 }, { "epoch": 94.44262295081967, "grad_norm": 2.1530253887176514, "learning_rate": 1.6154489117506166e-07, "loss": 0.1293, "step": 28805 }, { "epoch": 94.44590163934426, "grad_norm": 2.3492748737335205, "learning_rate": 1.6135484808019518e-07, "loss": 0.2708, "step": 28806 }, { "epoch": 94.44918032786886, "grad_norm": 2.0994040966033936, "learning_rate": 1.611649159262807e-07, "loss": 0.063, "step": 28807 }, { "epoch": 94.45245901639345, "grad_norm": 2.6247942447662354, "learning_rate": 1.6097509471546313e-07, "loss": 0.1469, "step": 28808 }, { "epoch": 94.45573770491804, "grad_norm": 2.2560832500457764, "learning_rate": 1.6078538444988078e-07, "loss": 0.0784, "step": 28809 }, { "epoch": 94.45901639344262, "grad_norm": 2.054910182952881, "learning_rate": 1.605957851316742e-07, "loss": 0.0813, "step": 28810 }, { "epoch": 94.46229508196721, "grad_norm": 1.9011059999465942, "learning_rate": 1.6040629676297936e-07, "loss": 0.1276, "step": 28811 }, { "epoch": 94.4655737704918, "grad_norm": 3.233096122741699, "learning_rate": 1.6021691934593464e-07, "loss": 0.1006, "step": 28812 }, { "epoch": 94.46885245901639, "grad_norm": 2.2702887058258057, "learning_rate": 1.6002765288267497e-07, "loss": 0.1161, "step": 28813 }, { "epoch": 94.47213114754098, "grad_norm": 2.8843648433685303, "learning_rate": 1.598384973753353e-07, "loss": 0.1609, "step": 28814 }, { "epoch": 94.47540983606558, "grad_norm": 1.7696425914764404, "learning_rate": 1.5964945282604726e-07, "loss": 0.1455, "step": 28815 }, { "epoch": 94.47868852459017, "grad_norm": 3.691932201385498, "learning_rate": 1.594605192369425e-07, "loss": 0.1037, "step": 28816 }, { "epoch": 94.48196721311476, "grad_norm": 3.6289474964141846, "learning_rate": 1.5927169661015262e-07, "loss": 0.0574, "step": 28817 }, { "epoch": 94.48524590163935, "grad_norm": 2.1071581840515137, "learning_rate": 1.5908298494780593e-07, "loss": 0.0484, "step": 28818 }, { "epoch": 94.48852459016393, "grad_norm": 3.0456762313842773, "learning_rate": 1.5889438425203075e-07, "loss": 0.1056, "step": 28819 }, { "epoch": 94.49180327868852, "grad_norm": 2.1941287517547607, "learning_rate": 1.58705894524952e-07, "loss": 0.1125, "step": 28820 }, { "epoch": 94.49508196721311, "grad_norm": 2.2706716060638428, "learning_rate": 1.58517515768698e-07, "loss": 0.1199, "step": 28821 }, { "epoch": 94.4983606557377, "grad_norm": 2.5780692100524902, "learning_rate": 1.583292479853926e-07, "loss": 0.156, "step": 28822 }, { "epoch": 94.5016393442623, "grad_norm": 2.655876874923706, "learning_rate": 1.5814109117715636e-07, "loss": 0.1808, "step": 28823 }, { "epoch": 94.50491803278689, "grad_norm": 2.684298038482666, "learning_rate": 1.57953045346112e-07, "loss": 0.1985, "step": 28824 }, { "epoch": 94.50819672131148, "grad_norm": 2.0480740070343018, "learning_rate": 1.5776511049438114e-07, "loss": 0.1152, "step": 28825 }, { "epoch": 94.51147540983607, "grad_norm": 2.574033260345459, "learning_rate": 1.575772866240821e-07, "loss": 0.1476, "step": 28826 }, { "epoch": 94.51475409836065, "grad_norm": 1.6010074615478516, "learning_rate": 1.573895737373321e-07, "loss": 0.085, "step": 28827 }, { "epoch": 94.51803278688524, "grad_norm": 2.239833116531372, "learning_rate": 1.572019718362494e-07, "loss": 0.0486, "step": 28828 }, { "epoch": 94.52131147540983, "grad_norm": 2.0578598976135254, "learning_rate": 1.570144809229468e-07, "loss": 0.1011, "step": 28829 }, { "epoch": 94.52459016393442, "grad_norm": 1.7275073528289795, "learning_rate": 1.5682710099954035e-07, "loss": 0.1688, "step": 28830 }, { "epoch": 94.52786885245902, "grad_norm": 2.3525424003601074, "learning_rate": 1.5663983206814394e-07, "loss": 0.122, "step": 28831 }, { "epoch": 94.53114754098361, "grad_norm": 1.9353652000427246, "learning_rate": 1.5645267413086695e-07, "loss": 0.0628, "step": 28832 }, { "epoch": 94.5344262295082, "grad_norm": 2.1527154445648193, "learning_rate": 1.562656271898211e-07, "loss": 0.1078, "step": 28833 }, { "epoch": 94.53770491803279, "grad_norm": 5.59476375579834, "learning_rate": 1.560786912471146e-07, "loss": 0.0938, "step": 28834 }, { "epoch": 94.54098360655738, "grad_norm": 2.5225765705108643, "learning_rate": 1.5589186630485697e-07, "loss": 0.1526, "step": 28835 }, { "epoch": 94.54426229508196, "grad_norm": 1.763401746749878, "learning_rate": 1.5570515236515315e-07, "loss": 0.1196, "step": 28836 }, { "epoch": 94.54754098360655, "grad_norm": 1.9168099164962769, "learning_rate": 1.5551854943010923e-07, "loss": 0.0717, "step": 28837 }, { "epoch": 94.55081967213114, "grad_norm": 2.9086503982543945, "learning_rate": 1.5533205750183023e-07, "loss": 0.1456, "step": 28838 }, { "epoch": 94.55409836065574, "grad_norm": 2.55236554145813, "learning_rate": 1.5514567658241776e-07, "loss": 0.0785, "step": 28839 }, { "epoch": 94.55737704918033, "grad_norm": 2.1297824382781982, "learning_rate": 1.5495940667397347e-07, "loss": 0.1294, "step": 28840 }, { "epoch": 94.56065573770492, "grad_norm": 2.106966257095337, "learning_rate": 1.5477324777859904e-07, "loss": 0.1775, "step": 28841 }, { "epoch": 94.56393442622951, "grad_norm": 2.4263696670532227, "learning_rate": 1.5458719989839167e-07, "loss": 0.066, "step": 28842 }, { "epoch": 94.5672131147541, "grad_norm": 2.5145697593688965, "learning_rate": 1.5440126303545077e-07, "loss": 0.0784, "step": 28843 }, { "epoch": 94.57049180327868, "grad_norm": 1.900507926940918, "learning_rate": 1.5421543719187248e-07, "loss": 0.1378, "step": 28844 }, { "epoch": 94.57377049180327, "grad_norm": 3.834772825241089, "learning_rate": 1.5402972236975178e-07, "loss": 0.1987, "step": 28845 }, { "epoch": 94.57704918032788, "grad_norm": 6.048754692077637, "learning_rate": 1.5384411857118252e-07, "loss": 0.1378, "step": 28846 }, { "epoch": 94.58032786885246, "grad_norm": 1.8546415567398071, "learning_rate": 1.5365862579825973e-07, "loss": 0.0302, "step": 28847 }, { "epoch": 94.58360655737705, "grad_norm": 1.9912512302398682, "learning_rate": 1.5347324405307283e-07, "loss": 0.0554, "step": 28848 }, { "epoch": 94.58688524590164, "grad_norm": 2.1582348346710205, "learning_rate": 1.5328797333771352e-07, "loss": 0.0985, "step": 28849 }, { "epoch": 94.59016393442623, "grad_norm": 2.157205581665039, "learning_rate": 1.5310281365427003e-07, "loss": 0.2188, "step": 28850 }, { "epoch": 94.59344262295082, "grad_norm": 4.19463586807251, "learning_rate": 1.529177650048297e-07, "loss": 0.1735, "step": 28851 }, { "epoch": 94.5967213114754, "grad_norm": 2.2575175762176514, "learning_rate": 1.5273282739148188e-07, "loss": 0.1123, "step": 28852 }, { "epoch": 94.6, "grad_norm": 2.033205986022949, "learning_rate": 1.5254800081630828e-07, "loss": 0.1136, "step": 28853 }, { "epoch": 94.6032786885246, "grad_norm": 2.0240867137908936, "learning_rate": 1.5236328528139499e-07, "loss": 0.0789, "step": 28854 }, { "epoch": 94.60655737704919, "grad_norm": 2.4912517070770264, "learning_rate": 1.521786807888248e-07, "loss": 0.12, "step": 28855 }, { "epoch": 94.60983606557377, "grad_norm": 2.3811838626861572, "learning_rate": 1.519941873406794e-07, "loss": 0.1102, "step": 28856 }, { "epoch": 94.61311475409836, "grad_norm": 1.1513980627059937, "learning_rate": 1.518098049390393e-07, "loss": 0.0154, "step": 28857 }, { "epoch": 94.61639344262295, "grad_norm": 1.7730048894882202, "learning_rate": 1.5162553358598286e-07, "loss": 0.0499, "step": 28858 }, { "epoch": 94.61967213114754, "grad_norm": 2.3178722858428955, "learning_rate": 1.5144137328358733e-07, "loss": 0.1101, "step": 28859 }, { "epoch": 94.62295081967213, "grad_norm": 3.1898560523986816, "learning_rate": 1.51257324033931e-07, "loss": 0.109, "step": 28860 }, { "epoch": 94.62622950819672, "grad_norm": 1.7704126834869385, "learning_rate": 1.5107338583908893e-07, "loss": 0.1424, "step": 28861 }, { "epoch": 94.62950819672132, "grad_norm": 1.7576518058776855, "learning_rate": 1.5088955870113386e-07, "loss": 0.0994, "step": 28862 }, { "epoch": 94.6327868852459, "grad_norm": 2.50036883354187, "learning_rate": 1.5070584262213973e-07, "loss": 0.0768, "step": 28863 }, { "epoch": 94.6360655737705, "grad_norm": 2.570733070373535, "learning_rate": 1.5052223760417816e-07, "loss": 0.0451, "step": 28864 }, { "epoch": 94.63934426229508, "grad_norm": 3.110569953918457, "learning_rate": 1.5033874364931976e-07, "loss": 0.0896, "step": 28865 }, { "epoch": 94.64262295081967, "grad_norm": 2.5550167560577393, "learning_rate": 1.5015536075963288e-07, "loss": 0.0902, "step": 28866 }, { "epoch": 94.64590163934426, "grad_norm": 2.3267605304718018, "learning_rate": 1.4997208893718586e-07, "loss": 0.1232, "step": 28867 }, { "epoch": 94.64918032786885, "grad_norm": 2.5343356132507324, "learning_rate": 1.4978892818404366e-07, "loss": 0.07, "step": 28868 }, { "epoch": 94.65245901639344, "grad_norm": 2.0453133583068848, "learning_rate": 1.496058785022736e-07, "loss": 0.0793, "step": 28869 }, { "epoch": 94.65573770491804, "grad_norm": 1.875262975692749, "learning_rate": 1.4942293989393953e-07, "loss": 0.0364, "step": 28870 }, { "epoch": 94.65901639344263, "grad_norm": 2.010647773742676, "learning_rate": 1.4924011236110424e-07, "loss": 0.1071, "step": 28871 }, { "epoch": 94.66229508196722, "grad_norm": 2.03722882270813, "learning_rate": 1.4905739590582724e-07, "loss": 0.199, "step": 28872 }, { "epoch": 94.6655737704918, "grad_norm": 1.9843379259109497, "learning_rate": 1.4887479053017128e-07, "loss": 0.0968, "step": 28873 }, { "epoch": 94.66885245901639, "grad_norm": 2.6347198486328125, "learning_rate": 1.4869229623619586e-07, "loss": 0.1039, "step": 28874 }, { "epoch": 94.67213114754098, "grad_norm": 1.7615514993667603, "learning_rate": 1.4850991302595597e-07, "loss": 0.0685, "step": 28875 }, { "epoch": 94.67540983606557, "grad_norm": 2.0832087993621826, "learning_rate": 1.4832764090150997e-07, "loss": 0.0672, "step": 28876 }, { "epoch": 94.67868852459016, "grad_norm": 2.4471025466918945, "learning_rate": 1.4814547986491402e-07, "loss": 0.0831, "step": 28877 }, { "epoch": 94.68196721311476, "grad_norm": 1.8021366596221924, "learning_rate": 1.4796342991822089e-07, "loss": 0.079, "step": 28878 }, { "epoch": 94.68524590163935, "grad_norm": 2.65058970451355, "learning_rate": 1.4778149106348337e-07, "loss": 0.1323, "step": 28879 }, { "epoch": 94.68852459016394, "grad_norm": 3.502228021621704, "learning_rate": 1.475996633027532e-07, "loss": 0.1756, "step": 28880 }, { "epoch": 94.69180327868852, "grad_norm": 1.3638887405395508, "learning_rate": 1.4741794663807984e-07, "loss": 0.1817, "step": 28881 }, { "epoch": 94.69508196721311, "grad_norm": 2.1571903228759766, "learning_rate": 1.4723634107151497e-07, "loss": 0.0802, "step": 28882 }, { "epoch": 94.6983606557377, "grad_norm": 2.3117287158966064, "learning_rate": 1.470548466051036e-07, "loss": 0.147, "step": 28883 }, { "epoch": 94.70163934426229, "grad_norm": 2.3301870822906494, "learning_rate": 1.4687346324089414e-07, "loss": 0.0998, "step": 28884 }, { "epoch": 94.70491803278688, "grad_norm": 2.0728394985198975, "learning_rate": 1.4669219098093046e-07, "loss": 0.0608, "step": 28885 }, { "epoch": 94.70819672131148, "grad_norm": 2.5873615741729736, "learning_rate": 1.465110298272565e-07, "loss": 0.0982, "step": 28886 }, { "epoch": 94.71147540983607, "grad_norm": 2.405787944793701, "learning_rate": 1.463299797819173e-07, "loss": 0.1199, "step": 28887 }, { "epoch": 94.71475409836066, "grad_norm": 2.7668333053588867, "learning_rate": 1.4614904084695235e-07, "loss": 0.2447, "step": 28888 }, { "epoch": 94.71803278688525, "grad_norm": 1.5588675737380981, "learning_rate": 1.4596821302440112e-07, "loss": 0.0364, "step": 28889 }, { "epoch": 94.72131147540983, "grad_norm": 2.0778863430023193, "learning_rate": 1.457874963163053e-07, "loss": 0.0409, "step": 28890 }, { "epoch": 94.72459016393442, "grad_norm": 1.7805453538894653, "learning_rate": 1.4560689072470102e-07, "loss": 0.0259, "step": 28891 }, { "epoch": 94.72786885245901, "grad_norm": 2.5052573680877686, "learning_rate": 1.4542639625162448e-07, "loss": 0.0722, "step": 28892 }, { "epoch": 94.73114754098361, "grad_norm": 2.254535436630249, "learning_rate": 1.452460128991129e-07, "loss": 0.0752, "step": 28893 }, { "epoch": 94.7344262295082, "grad_norm": 2.8026158809661865, "learning_rate": 1.4506574066919686e-07, "loss": 0.192, "step": 28894 }, { "epoch": 94.73770491803279, "grad_norm": 2.891613245010376, "learning_rate": 1.4488557956391258e-07, "loss": 0.2637, "step": 28895 }, { "epoch": 94.74098360655738, "grad_norm": 2.656404972076416, "learning_rate": 1.447055295852895e-07, "loss": 0.0781, "step": 28896 }, { "epoch": 94.74426229508197, "grad_norm": 2.21646785736084, "learning_rate": 1.4452559073535933e-07, "loss": 0.0511, "step": 28897 }, { "epoch": 94.74754098360656, "grad_norm": 2.0456655025482178, "learning_rate": 1.4434576301614932e-07, "loss": 0.1664, "step": 28898 }, { "epoch": 94.75081967213114, "grad_norm": 1.832995057106018, "learning_rate": 1.4416604642968902e-07, "loss": 0.0446, "step": 28899 }, { "epoch": 94.75409836065573, "grad_norm": 1.8557335138320923, "learning_rate": 1.4398644097800342e-07, "loss": 0.1076, "step": 28900 }, { "epoch": 94.75737704918033, "grad_norm": 2.2855751514434814, "learning_rate": 1.4380694666311867e-07, "loss": 0.0859, "step": 28901 }, { "epoch": 94.76065573770492, "grad_norm": 2.634950876235962, "learning_rate": 1.4362756348705765e-07, "loss": 0.1233, "step": 28902 }, { "epoch": 94.76393442622951, "grad_norm": 1.557421326637268, "learning_rate": 1.4344829145184425e-07, "loss": 0.0556, "step": 28903 }, { "epoch": 94.7672131147541, "grad_norm": 2.5622987747192383, "learning_rate": 1.432691305595002e-07, "loss": 0.0971, "step": 28904 }, { "epoch": 94.77049180327869, "grad_norm": 2.7640128135681152, "learning_rate": 1.43090080812045e-07, "loss": 0.0828, "step": 28905 }, { "epoch": 94.77377049180328, "grad_norm": 1.8477132320404053, "learning_rate": 1.4291114221149705e-07, "loss": 0.1303, "step": 28906 }, { "epoch": 94.77704918032786, "grad_norm": 2.580329656600952, "learning_rate": 1.427323147598758e-07, "loss": 0.2374, "step": 28907 }, { "epoch": 94.78032786885245, "grad_norm": 3.766735792160034, "learning_rate": 1.4255359845919635e-07, "loss": 0.0733, "step": 28908 }, { "epoch": 94.78360655737706, "grad_norm": 2.3106160163879395, "learning_rate": 1.4237499331147376e-07, "loss": 0.144, "step": 28909 }, { "epoch": 94.78688524590164, "grad_norm": 2.1446945667266846, "learning_rate": 1.4219649931872303e-07, "loss": 0.0844, "step": 28910 }, { "epoch": 94.79016393442623, "grad_norm": 2.0073766708374023, "learning_rate": 1.4201811648295594e-07, "loss": 0.1506, "step": 28911 }, { "epoch": 94.79344262295082, "grad_norm": 2.6064858436584473, "learning_rate": 1.4183984480618417e-07, "loss": 0.0801, "step": 28912 }, { "epoch": 94.79672131147541, "grad_norm": 3.137976884841919, "learning_rate": 1.4166168429041838e-07, "loss": 0.1582, "step": 28913 }, { "epoch": 94.8, "grad_norm": 2.503896713256836, "learning_rate": 1.4148363493766803e-07, "loss": 0.219, "step": 28914 }, { "epoch": 94.80327868852459, "grad_norm": 1.9702603816986084, "learning_rate": 1.413056967499382e-07, "loss": 0.0699, "step": 28915 }, { "epoch": 94.80655737704917, "grad_norm": 1.974979281425476, "learning_rate": 1.4112786972923842e-07, "loss": 0.1348, "step": 28916 }, { "epoch": 94.80983606557378, "grad_norm": 1.6720646619796753, "learning_rate": 1.4095015387757261e-07, "loss": 0.1345, "step": 28917 }, { "epoch": 94.81311475409836, "grad_norm": 3.457155466079712, "learning_rate": 1.407725491969447e-07, "loss": 0.1506, "step": 28918 }, { "epoch": 94.81639344262295, "grad_norm": 2.460399866104126, "learning_rate": 1.405950556893565e-07, "loss": 0.0588, "step": 28919 }, { "epoch": 94.81967213114754, "grad_norm": 2.0611674785614014, "learning_rate": 1.404176733568108e-07, "loss": 0.0617, "step": 28920 }, { "epoch": 94.82295081967213, "grad_norm": 2.497119188308716, "learning_rate": 1.402404022013071e-07, "loss": 0.1451, "step": 28921 }, { "epoch": 94.82622950819672, "grad_norm": 2.5748491287231445, "learning_rate": 1.400632422248438e-07, "loss": 0.0877, "step": 28922 }, { "epoch": 94.8295081967213, "grad_norm": 3.3440656661987305, "learning_rate": 1.3988619342942045e-07, "loss": 0.0737, "step": 28923 }, { "epoch": 94.8327868852459, "grad_norm": 2.0176384449005127, "learning_rate": 1.3970925581703098e-07, "loss": 0.0399, "step": 28924 }, { "epoch": 94.8360655737705, "grad_norm": 3.9298691749572754, "learning_rate": 1.3953242938967272e-07, "loss": 0.065, "step": 28925 }, { "epoch": 94.83934426229509, "grad_norm": 3.7375693321228027, "learning_rate": 1.3935571414933846e-07, "loss": 0.111, "step": 28926 }, { "epoch": 94.84262295081967, "grad_norm": 2.4676551818847656, "learning_rate": 1.391791100980211e-07, "loss": 0.165, "step": 28927 }, { "epoch": 94.84590163934426, "grad_norm": 2.906705379486084, "learning_rate": 1.3900261723771125e-07, "loss": 0.1088, "step": 28928 }, { "epoch": 94.84918032786885, "grad_norm": 1.9082696437835693, "learning_rate": 1.3882623557040065e-07, "loss": 0.0353, "step": 28929 }, { "epoch": 94.85245901639344, "grad_norm": 2.5980725288391113, "learning_rate": 1.386499650980766e-07, "loss": 0.1219, "step": 28930 }, { "epoch": 94.85573770491803, "grad_norm": 2.294553756713867, "learning_rate": 1.3847380582272861e-07, "loss": 0.0576, "step": 28931 }, { "epoch": 94.85901639344263, "grad_norm": 1.7156378030776978, "learning_rate": 1.382977577463407e-07, "loss": 0.0435, "step": 28932 }, { "epoch": 94.86229508196722, "grad_norm": 2.630263090133667, "learning_rate": 1.3812182087089898e-07, "loss": 0.1281, "step": 28933 }, { "epoch": 94.8655737704918, "grad_norm": 2.725851058959961, "learning_rate": 1.3794599519838858e-07, "loss": 0.07, "step": 28934 }, { "epoch": 94.8688524590164, "grad_norm": 3.689662218093872, "learning_rate": 1.3777028073079012e-07, "loss": 0.1467, "step": 28935 }, { "epoch": 94.87213114754098, "grad_norm": 3.0690577030181885, "learning_rate": 1.3759467747008648e-07, "loss": 0.1126, "step": 28936 }, { "epoch": 94.87540983606557, "grad_norm": 2.477398633956909, "learning_rate": 1.3741918541825606e-07, "loss": 0.0598, "step": 28937 }, { "epoch": 94.87868852459016, "grad_norm": 2.2200498580932617, "learning_rate": 1.372438045772806e-07, "loss": 0.1003, "step": 28938 }, { "epoch": 94.88196721311475, "grad_norm": 3.225303888320923, "learning_rate": 1.370685349491352e-07, "loss": 0.1041, "step": 28939 }, { "epoch": 94.88524590163935, "grad_norm": 2.6202523708343506, "learning_rate": 1.3689337653579714e-07, "loss": 0.0669, "step": 28940 }, { "epoch": 94.88852459016394, "grad_norm": 2.3373055458068848, "learning_rate": 1.3671832933924045e-07, "loss": 0.1116, "step": 28941 }, { "epoch": 94.89180327868853, "grad_norm": 2.0617308616638184, "learning_rate": 1.3654339336144019e-07, "loss": 0.0563, "step": 28942 }, { "epoch": 94.89508196721312, "grad_norm": 2.734877109527588, "learning_rate": 1.363685686043692e-07, "loss": 0.065, "step": 28943 }, { "epoch": 94.8983606557377, "grad_norm": 1.842857837677002, "learning_rate": 1.3619385506999815e-07, "loss": 0.0822, "step": 28944 }, { "epoch": 94.90163934426229, "grad_norm": 1.7387757301330566, "learning_rate": 1.360192527602966e-07, "loss": 0.0646, "step": 28945 }, { "epoch": 94.90491803278688, "grad_norm": 1.9036790132522583, "learning_rate": 1.3584476167723404e-07, "loss": 0.1241, "step": 28946 }, { "epoch": 94.90819672131147, "grad_norm": 2.9193332195281982, "learning_rate": 1.3567038182277782e-07, "loss": 0.1181, "step": 28947 }, { "epoch": 94.91147540983607, "grad_norm": 2.079525947570801, "learning_rate": 1.3549611319889522e-07, "loss": 0.0449, "step": 28948 }, { "epoch": 94.91475409836066, "grad_norm": 2.6209073066711426, "learning_rate": 1.3532195580754914e-07, "loss": 0.0566, "step": 28949 }, { "epoch": 94.91803278688525, "grad_norm": 2.0676798820495605, "learning_rate": 1.351479096507047e-07, "loss": 0.0483, "step": 28950 }, { "epoch": 94.92131147540984, "grad_norm": 2.700828790664673, "learning_rate": 1.349739747303258e-07, "loss": 0.1524, "step": 28951 }, { "epoch": 94.92459016393443, "grad_norm": 2.1663570404052734, "learning_rate": 1.3480015104837207e-07, "loss": 0.1462, "step": 28952 }, { "epoch": 94.92786885245901, "grad_norm": 2.1873855590820312, "learning_rate": 1.3462643860680414e-07, "loss": 0.0262, "step": 28953 }, { "epoch": 94.9311475409836, "grad_norm": 1.910888671875, "learning_rate": 1.344528374075793e-07, "loss": 0.1172, "step": 28954 }, { "epoch": 94.93442622950819, "grad_norm": 2.004671335220337, "learning_rate": 1.3427934745265713e-07, "loss": 0.0789, "step": 28955 }, { "epoch": 94.9377049180328, "grad_norm": 3.9403018951416016, "learning_rate": 1.3410596874399273e-07, "loss": 0.1451, "step": 28956 }, { "epoch": 94.94098360655738, "grad_norm": 2.5397543907165527, "learning_rate": 1.339327012835423e-07, "loss": 0.0682, "step": 28957 }, { "epoch": 94.94426229508197, "grad_norm": 2.2563602924346924, "learning_rate": 1.3375954507325762e-07, "loss": 0.139, "step": 28958 }, { "epoch": 94.94754098360656, "grad_norm": 3.0519816875457764, "learning_rate": 1.3358650011509267e-07, "loss": 0.1611, "step": 28959 }, { "epoch": 94.95081967213115, "grad_norm": 2.0038998126983643, "learning_rate": 1.3341356641099923e-07, "loss": 0.1404, "step": 28960 }, { "epoch": 94.95409836065573, "grad_norm": 3.42325758934021, "learning_rate": 1.3324074396292465e-07, "loss": 0.1263, "step": 28961 }, { "epoch": 94.95737704918032, "grad_norm": 1.436866283416748, "learning_rate": 1.3306803277282176e-07, "loss": 0.0196, "step": 28962 }, { "epoch": 94.96065573770491, "grad_norm": 2.2830944061279297, "learning_rate": 1.3289543284263463e-07, "loss": 0.0856, "step": 28963 }, { "epoch": 94.96393442622951, "grad_norm": 1.928023099899292, "learning_rate": 1.3272294417431054e-07, "loss": 0.0779, "step": 28964 }, { "epoch": 94.9672131147541, "grad_norm": 3.0642361640930176, "learning_rate": 1.325505667697957e-07, "loss": 0.1687, "step": 28965 }, { "epoch": 94.97049180327869, "grad_norm": 2.0138258934020996, "learning_rate": 1.3237830063103197e-07, "loss": 0.0363, "step": 28966 }, { "epoch": 94.97377049180328, "grad_norm": 1.84355890750885, "learning_rate": 1.3220614575996326e-07, "loss": 0.0661, "step": 28967 }, { "epoch": 94.97704918032787, "grad_norm": 2.135826826095581, "learning_rate": 1.3203410215852918e-07, "loss": 0.1541, "step": 28968 }, { "epoch": 94.98032786885246, "grad_norm": 3.449045419692993, "learning_rate": 1.318621698286715e-07, "loss": 0.1516, "step": 28969 }, { "epoch": 94.98360655737704, "grad_norm": 1.6120067834854126, "learning_rate": 1.3169034877232867e-07, "loss": 0.0311, "step": 28970 }, { "epoch": 94.98688524590163, "grad_norm": 2.397075653076172, "learning_rate": 1.315186389914369e-07, "loss": 0.2128, "step": 28971 }, { "epoch": 94.99016393442623, "grad_norm": 2.3390908241271973, "learning_rate": 1.3134704048793246e-07, "loss": 0.0454, "step": 28972 }, { "epoch": 94.99344262295082, "grad_norm": 2.7352752685546875, "learning_rate": 1.3117555326375264e-07, "loss": 0.1952, "step": 28973 }, { "epoch": 94.99672131147541, "grad_norm": 1.6575593948364258, "learning_rate": 1.3100417732082816e-07, "loss": 0.0188, "step": 28974 }, { "epoch": 95.0, "grad_norm": 2.210383892059326, "learning_rate": 1.30832912661093e-07, "loss": 0.0616, "step": 28975 }, { "epoch": 95.00327868852459, "grad_norm": 2.6197030544281006, "learning_rate": 1.3066175928647785e-07, "loss": 0.0869, "step": 28976 }, { "epoch": 95.00655737704918, "grad_norm": 1.7174952030181885, "learning_rate": 1.3049071719891339e-07, "loss": 0.0171, "step": 28977 }, { "epoch": 95.00983606557377, "grad_norm": 2.0335536003112793, "learning_rate": 1.3031978640032806e-07, "loss": 0.0725, "step": 28978 }, { "epoch": 95.01311475409837, "grad_norm": 1.8405283689498901, "learning_rate": 1.301489668926492e-07, "loss": 0.1058, "step": 28979 }, { "epoch": 95.01639344262296, "grad_norm": 2.2283549308776855, "learning_rate": 1.29978258677802e-07, "loss": 0.0557, "step": 28980 }, { "epoch": 95.01967213114754, "grad_norm": 2.325192928314209, "learning_rate": 1.2980766175771264e-07, "loss": 0.0629, "step": 28981 }, { "epoch": 95.02295081967213, "grad_norm": 2.013546943664551, "learning_rate": 1.2963717613430405e-07, "loss": 0.249, "step": 28982 }, { "epoch": 95.02622950819672, "grad_norm": 2.4610354900360107, "learning_rate": 1.2946680180949911e-07, "loss": 0.1981, "step": 28983 }, { "epoch": 95.02950819672131, "grad_norm": 1.5204181671142578, "learning_rate": 1.2929653878521854e-07, "loss": 0.1467, "step": 28984 }, { "epoch": 95.0327868852459, "grad_norm": 1.997156023979187, "learning_rate": 1.2912638706338188e-07, "loss": 0.037, "step": 28985 }, { "epoch": 95.03606557377049, "grad_norm": 1.2153406143188477, "learning_rate": 1.2895634664590984e-07, "loss": 0.0253, "step": 28986 }, { "epoch": 95.03934426229509, "grad_norm": 2.6695783138275146, "learning_rate": 1.2878641753471756e-07, "loss": 0.0902, "step": 28987 }, { "epoch": 95.04262295081968, "grad_norm": 2.165830612182617, "learning_rate": 1.2861659973172235e-07, "loss": 0.0875, "step": 28988 }, { "epoch": 95.04590163934427, "grad_norm": 2.6341640949249268, "learning_rate": 1.2844689323883719e-07, "loss": 0.1628, "step": 28989 }, { "epoch": 95.04918032786885, "grad_norm": 2.041414737701416, "learning_rate": 1.282772980579783e-07, "loss": 0.1402, "step": 28990 }, { "epoch": 95.05245901639344, "grad_norm": 1.7677879333496094, "learning_rate": 1.2810781419105745e-07, "loss": 0.082, "step": 28991 }, { "epoch": 95.05573770491803, "grad_norm": 3.861750841140747, "learning_rate": 1.2793844163998427e-07, "loss": 0.1741, "step": 28992 }, { "epoch": 95.05901639344262, "grad_norm": 1.7012839317321777, "learning_rate": 1.2776918040666941e-07, "loss": 0.0307, "step": 28993 }, { "epoch": 95.0622950819672, "grad_norm": 2.0214314460754395, "learning_rate": 1.2760003049302138e-07, "loss": 0.2058, "step": 28994 }, { "epoch": 95.06557377049181, "grad_norm": 1.6696728467941284, "learning_rate": 1.2743099190094865e-07, "loss": 0.0392, "step": 28995 }, { "epoch": 95.0688524590164, "grad_norm": 2.634434700012207, "learning_rate": 1.2726206463235635e-07, "loss": 0.1379, "step": 28996 }, { "epoch": 95.07213114754099, "grad_norm": 2.001145124435425, "learning_rate": 1.270932486891485e-07, "loss": 0.1324, "step": 28997 }, { "epoch": 95.07540983606557, "grad_norm": 2.4485039710998535, "learning_rate": 1.269245440732303e-07, "loss": 0.1763, "step": 28998 }, { "epoch": 95.07868852459016, "grad_norm": 2.3142311573028564, "learning_rate": 1.267559507865024e-07, "loss": 0.0708, "step": 28999 }, { "epoch": 95.08196721311475, "grad_norm": 2.110459804534912, "learning_rate": 1.265874688308677e-07, "loss": 0.1269, "step": 29000 }, { "epoch": 95.08524590163934, "grad_norm": 2.020233154296875, "learning_rate": 1.2641909820822473e-07, "loss": 0.1879, "step": 29001 }, { "epoch": 95.08852459016393, "grad_norm": 2.0821127891540527, "learning_rate": 1.2625083892047195e-07, "loss": 0.0432, "step": 29002 }, { "epoch": 95.09180327868853, "grad_norm": 2.2824110984802246, "learning_rate": 1.2608269096950787e-07, "loss": 0.1046, "step": 29003 }, { "epoch": 95.09508196721312, "grad_norm": 2.0225157737731934, "learning_rate": 1.259146543572276e-07, "loss": 0.1222, "step": 29004 }, { "epoch": 95.09836065573771, "grad_norm": 1.5715545415878296, "learning_rate": 1.257467290855263e-07, "loss": 0.0415, "step": 29005 }, { "epoch": 95.1016393442623, "grad_norm": 2.8375046253204346, "learning_rate": 1.2557891515629695e-07, "loss": 0.1025, "step": 29006 }, { "epoch": 95.10491803278688, "grad_norm": 2.889451742172241, "learning_rate": 1.254112125714313e-07, "loss": 0.0489, "step": 29007 }, { "epoch": 95.10819672131147, "grad_norm": 2.171696186065674, "learning_rate": 1.252436213328223e-07, "loss": 0.0965, "step": 29008 }, { "epoch": 95.11147540983606, "grad_norm": 2.2281296253204346, "learning_rate": 1.2507614144235847e-07, "loss": 0.0471, "step": 29009 }, { "epoch": 95.11475409836065, "grad_norm": 2.466128349304199, "learning_rate": 1.2490877290192827e-07, "loss": 0.0922, "step": 29010 }, { "epoch": 95.11803278688525, "grad_norm": 2.2031357288360596, "learning_rate": 1.247415157134191e-07, "loss": 0.1024, "step": 29011 }, { "epoch": 95.12131147540984, "grad_norm": 1.7325091361999512, "learning_rate": 1.2457436987871717e-07, "loss": 0.0449, "step": 29012 }, { "epoch": 95.12459016393443, "grad_norm": 2.451915740966797, "learning_rate": 1.2440733539970662e-07, "loss": 0.1149, "step": 29013 }, { "epoch": 95.12786885245902, "grad_norm": 1.4581531286239624, "learning_rate": 1.2424041227827144e-07, "loss": 0.0275, "step": 29014 }, { "epoch": 95.1311475409836, "grad_norm": 2.7424497604370117, "learning_rate": 1.2407360051629457e-07, "loss": 0.1731, "step": 29015 }, { "epoch": 95.1344262295082, "grad_norm": 2.2678534984588623, "learning_rate": 1.2390690011565566e-07, "loss": 0.0896, "step": 29016 }, { "epoch": 95.13770491803278, "grad_norm": 2.1308488845825195, "learning_rate": 1.2374031107823536e-07, "loss": 0.1656, "step": 29017 }, { "epoch": 95.14098360655737, "grad_norm": 2.728003978729248, "learning_rate": 1.235738334059122e-07, "loss": 0.0886, "step": 29018 }, { "epoch": 95.14426229508197, "grad_norm": 2.532313346862793, "learning_rate": 1.2340746710056252e-07, "loss": 0.0698, "step": 29019 }, { "epoch": 95.14754098360656, "grad_norm": 2.5433433055877686, "learning_rate": 1.2324121216406137e-07, "loss": 0.1233, "step": 29020 }, { "epoch": 95.15081967213115, "grad_norm": 2.372410535812378, "learning_rate": 1.2307506859828623e-07, "loss": 0.0992, "step": 29021 }, { "epoch": 95.15409836065574, "grad_norm": 2.000892400741577, "learning_rate": 1.2290903640510998e-07, "loss": 0.1094, "step": 29022 }, { "epoch": 95.15737704918033, "grad_norm": 2.403635263442993, "learning_rate": 1.2274311558640228e-07, "loss": 0.1528, "step": 29023 }, { "epoch": 95.16065573770491, "grad_norm": 2.1965134143829346, "learning_rate": 1.2257730614403607e-07, "loss": 0.1569, "step": 29024 }, { "epoch": 95.1639344262295, "grad_norm": 2.024723529815674, "learning_rate": 1.2241160807988207e-07, "loss": 0.049, "step": 29025 }, { "epoch": 95.1672131147541, "grad_norm": 2.319530963897705, "learning_rate": 1.2224602139580544e-07, "loss": 0.0515, "step": 29026 }, { "epoch": 95.1704918032787, "grad_norm": 2.2010011672973633, "learning_rate": 1.2208054609367693e-07, "loss": 0.1353, "step": 29027 }, { "epoch": 95.17377049180328, "grad_norm": 1.601731777191162, "learning_rate": 1.2191518217535947e-07, "loss": 0.0643, "step": 29028 }, { "epoch": 95.17704918032787, "grad_norm": 2.092048406600952, "learning_rate": 1.2174992964271936e-07, "loss": 0.1535, "step": 29029 }, { "epoch": 95.18032786885246, "grad_norm": 1.6514902114868164, "learning_rate": 1.2158478849761956e-07, "loss": 0.0331, "step": 29030 }, { "epoch": 95.18360655737705, "grad_norm": 4.0092082023620605, "learning_rate": 1.214197587419219e-07, "loss": 0.0506, "step": 29031 }, { "epoch": 95.18688524590164, "grad_norm": 2.308751344680786, "learning_rate": 1.2125484037748824e-07, "loss": 0.058, "step": 29032 }, { "epoch": 95.19016393442622, "grad_norm": 3.27846622467041, "learning_rate": 1.210900334061771e-07, "loss": 0.1329, "step": 29033 }, { "epoch": 95.19344262295083, "grad_norm": 2.6928601264953613, "learning_rate": 1.2092533782984806e-07, "loss": 0.1513, "step": 29034 }, { "epoch": 95.19672131147541, "grad_norm": 3.0670487880706787, "learning_rate": 1.2076075365035633e-07, "loss": 0.1198, "step": 29035 }, { "epoch": 95.2, "grad_norm": 2.6741929054260254, "learning_rate": 1.2059628086956044e-07, "loss": 0.1995, "step": 29036 }, { "epoch": 95.20327868852459, "grad_norm": 3.2379837036132812, "learning_rate": 1.2043191948931222e-07, "loss": 0.1748, "step": 29037 }, { "epoch": 95.20655737704918, "grad_norm": 2.0928852558135986, "learning_rate": 1.2026766951146684e-07, "loss": 0.0711, "step": 29038 }, { "epoch": 95.20983606557377, "grad_norm": 1.8316161632537842, "learning_rate": 1.2010353093787508e-07, "loss": 0.0351, "step": 29039 }, { "epoch": 95.21311475409836, "grad_norm": 3.621830940246582, "learning_rate": 1.1993950377038988e-07, "loss": 0.0771, "step": 29040 }, { "epoch": 95.21639344262294, "grad_norm": 1.9393963813781738, "learning_rate": 1.1977558801085755e-07, "loss": 0.09, "step": 29041 }, { "epoch": 95.21967213114755, "grad_norm": 1.5469167232513428, "learning_rate": 1.1961178366112992e-07, "loss": 0.0872, "step": 29042 }, { "epoch": 95.22295081967214, "grad_norm": 2.4388694763183594, "learning_rate": 1.1944809072305219e-07, "loss": 0.0805, "step": 29043 }, { "epoch": 95.22622950819672, "grad_norm": 2.5529284477233887, "learning_rate": 1.192845091984707e-07, "loss": 0.1589, "step": 29044 }, { "epoch": 95.22950819672131, "grad_norm": 2.042693614959717, "learning_rate": 1.1912103908922945e-07, "loss": 0.2014, "step": 29045 }, { "epoch": 95.2327868852459, "grad_norm": 2.9840826988220215, "learning_rate": 1.1895768039717149e-07, "loss": 0.192, "step": 29046 }, { "epoch": 95.23606557377049, "grad_norm": 2.470031499862671, "learning_rate": 1.1879443312413974e-07, "loss": 0.1729, "step": 29047 }, { "epoch": 95.23934426229508, "grad_norm": 1.9758843183517456, "learning_rate": 1.1863129727197498e-07, "loss": 0.056, "step": 29048 }, { "epoch": 95.24262295081967, "grad_norm": 1.6676769256591797, "learning_rate": 1.1846827284251571e-07, "loss": 0.1197, "step": 29049 }, { "epoch": 95.24590163934427, "grad_norm": 2.6322708129882812, "learning_rate": 1.183053598376005e-07, "loss": 0.0874, "step": 29050 }, { "epoch": 95.24918032786886, "grad_norm": 2.0500848293304443, "learning_rate": 1.1814255825906785e-07, "loss": 0.0702, "step": 29051 }, { "epoch": 95.25245901639344, "grad_norm": 1.8687241077423096, "learning_rate": 1.1797986810875184e-07, "loss": 0.0993, "step": 29052 }, { "epoch": 95.25573770491803, "grad_norm": 1.8897093534469604, "learning_rate": 1.178172893884888e-07, "loss": 0.1183, "step": 29053 }, { "epoch": 95.25901639344262, "grad_norm": 2.6574959754943848, "learning_rate": 1.1765482210010837e-07, "loss": 0.1262, "step": 29054 }, { "epoch": 95.26229508196721, "grad_norm": 2.4899442195892334, "learning_rate": 1.1749246624544686e-07, "loss": 0.0994, "step": 29055 }, { "epoch": 95.2655737704918, "grad_norm": 1.9023576974868774, "learning_rate": 1.1733022182633169e-07, "loss": 0.0748, "step": 29056 }, { "epoch": 95.26885245901639, "grad_norm": 1.6355935335159302, "learning_rate": 1.1716808884459475e-07, "loss": 0.0757, "step": 29057 }, { "epoch": 95.27213114754099, "grad_norm": 2.5174732208251953, "learning_rate": 1.1700606730206344e-07, "loss": 0.1692, "step": 29058 }, { "epoch": 95.27540983606558, "grad_norm": 2.3919894695281982, "learning_rate": 1.1684415720056297e-07, "loss": 0.0842, "step": 29059 }, { "epoch": 95.27868852459017, "grad_norm": 1.837249517440796, "learning_rate": 1.1668235854192189e-07, "loss": 0.0329, "step": 29060 }, { "epoch": 95.28196721311475, "grad_norm": 1.9276789426803589, "learning_rate": 1.1652067132796208e-07, "loss": 0.1563, "step": 29061 }, { "epoch": 95.28524590163934, "grad_norm": 2.060011625289917, "learning_rate": 1.1635909556050873e-07, "loss": 0.1057, "step": 29062 }, { "epoch": 95.28852459016393, "grad_norm": 1.233029842376709, "learning_rate": 1.1619763124138261e-07, "loss": 0.0195, "step": 29063 }, { "epoch": 95.29180327868852, "grad_norm": 2.362865924835205, "learning_rate": 1.160362783724056e-07, "loss": 0.0966, "step": 29064 }, { "epoch": 95.29508196721312, "grad_norm": 1.8203810453414917, "learning_rate": 1.1587503695539515e-07, "loss": 0.1033, "step": 29065 }, { "epoch": 95.29836065573771, "grad_norm": 1.588765025138855, "learning_rate": 1.157139069921709e-07, "loss": 0.0649, "step": 29066 }, { "epoch": 95.3016393442623, "grad_norm": 3.138418674468994, "learning_rate": 1.1555288848455026e-07, "loss": 0.056, "step": 29067 }, { "epoch": 95.30491803278689, "grad_norm": 2.8440818786621094, "learning_rate": 1.1539198143434738e-07, "loss": 0.142, "step": 29068 }, { "epoch": 95.30819672131148, "grad_norm": 2.4886434078216553, "learning_rate": 1.1523118584337745e-07, "loss": 0.1141, "step": 29069 }, { "epoch": 95.31147540983606, "grad_norm": 1.331511378288269, "learning_rate": 1.1507050171345236e-07, "loss": 0.0269, "step": 29070 }, { "epoch": 95.31475409836065, "grad_norm": 2.0436182022094727, "learning_rate": 1.1490992904638732e-07, "loss": 0.1103, "step": 29071 }, { "epoch": 95.31803278688524, "grad_norm": 3.552245855331421, "learning_rate": 1.1474946784398977e-07, "loss": 0.1552, "step": 29072 }, { "epoch": 95.32131147540984, "grad_norm": 2.6882596015930176, "learning_rate": 1.1458911810806939e-07, "loss": 0.0567, "step": 29073 }, { "epoch": 95.32459016393443, "grad_norm": 2.5265796184539795, "learning_rate": 1.1442887984043472e-07, "loss": 0.144, "step": 29074 }, { "epoch": 95.32786885245902, "grad_norm": 1.9948488473892212, "learning_rate": 1.1426875304289431e-07, "loss": 0.0672, "step": 29075 }, { "epoch": 95.33114754098361, "grad_norm": 2.732046604156494, "learning_rate": 1.1410873771725117e-07, "loss": 0.1164, "step": 29076 }, { "epoch": 95.3344262295082, "grad_norm": 5.304200649261475, "learning_rate": 1.1394883386531053e-07, "loss": 0.1525, "step": 29077 }, { "epoch": 95.33770491803278, "grad_norm": 1.7885037660598755, "learning_rate": 1.1378904148887648e-07, "loss": 0.0445, "step": 29078 }, { "epoch": 95.34098360655737, "grad_norm": 2.253319263458252, "learning_rate": 1.1362936058975094e-07, "loss": 0.1897, "step": 29079 }, { "epoch": 95.34426229508196, "grad_norm": 2.3896312713623047, "learning_rate": 1.1346979116973134e-07, "loss": 0.0862, "step": 29080 }, { "epoch": 95.34754098360656, "grad_norm": 2.274963140487671, "learning_rate": 1.1331033323062068e-07, "loss": 0.0638, "step": 29081 }, { "epoch": 95.35081967213115, "grad_norm": 2.459076404571533, "learning_rate": 1.1315098677421643e-07, "loss": 0.1682, "step": 29082 }, { "epoch": 95.35409836065574, "grad_norm": 2.0800797939300537, "learning_rate": 1.129917518023127e-07, "loss": 0.0448, "step": 29083 }, { "epoch": 95.35737704918033, "grad_norm": 1.5256775617599487, "learning_rate": 1.1283262831670804e-07, "loss": 0.0353, "step": 29084 }, { "epoch": 95.36065573770492, "grad_norm": 1.8714696168899536, "learning_rate": 1.1267361631919549e-07, "loss": 0.2256, "step": 29085 }, { "epoch": 95.3639344262295, "grad_norm": 2.9376602172851562, "learning_rate": 1.1251471581156803e-07, "loss": 0.1156, "step": 29086 }, { "epoch": 95.3672131147541, "grad_norm": 2.1369881629943848, "learning_rate": 1.1235592679561757e-07, "loss": 0.1524, "step": 29087 }, { "epoch": 95.37049180327868, "grad_norm": 2.2927112579345703, "learning_rate": 1.121972492731338e-07, "loss": 0.0817, "step": 29088 }, { "epoch": 95.37377049180328, "grad_norm": 2.610499143600464, "learning_rate": 1.120386832459075e-07, "loss": 0.1232, "step": 29089 }, { "epoch": 95.37704918032787, "grad_norm": 2.5614211559295654, "learning_rate": 1.1188022871572612e-07, "loss": 0.1056, "step": 29090 }, { "epoch": 95.38032786885246, "grad_norm": 1.9950083494186401, "learning_rate": 1.1172188568437603e-07, "loss": 0.0568, "step": 29091 }, { "epoch": 95.38360655737705, "grad_norm": 2.8697333335876465, "learning_rate": 1.1156365415364357e-07, "loss": 0.0353, "step": 29092 }, { "epoch": 95.38688524590164, "grad_norm": 1.925675868988037, "learning_rate": 1.1140553412531064e-07, "loss": 0.1518, "step": 29093 }, { "epoch": 95.39016393442623, "grad_norm": 2.312967300415039, "learning_rate": 1.1124752560116247e-07, "loss": 0.0885, "step": 29094 }, { "epoch": 95.39344262295081, "grad_norm": 2.7868289947509766, "learning_rate": 1.11089628582981e-07, "loss": 0.2726, "step": 29095 }, { "epoch": 95.3967213114754, "grad_norm": 2.4031357765197754, "learning_rate": 1.109318430725459e-07, "loss": 0.2543, "step": 29096 }, { "epoch": 95.4, "grad_norm": 2.2627103328704834, "learning_rate": 1.1077416907163573e-07, "loss": 0.0596, "step": 29097 }, { "epoch": 95.4032786885246, "grad_norm": 2.716266632080078, "learning_rate": 1.1061660658202911e-07, "loss": 0.0958, "step": 29098 }, { "epoch": 95.40655737704918, "grad_norm": 2.2517178058624268, "learning_rate": 1.1045915560550235e-07, "loss": 0.058, "step": 29099 }, { "epoch": 95.40983606557377, "grad_norm": 2.3462417125701904, "learning_rate": 1.1030181614383184e-07, "loss": 0.134, "step": 29100 }, { "epoch": 95.41311475409836, "grad_norm": 3.3633930683135986, "learning_rate": 1.101445881987906e-07, "loss": 0.2207, "step": 29101 }, { "epoch": 95.41639344262295, "grad_norm": 2.49106502532959, "learning_rate": 1.0998747177215163e-07, "loss": 0.1392, "step": 29102 }, { "epoch": 95.41967213114754, "grad_norm": 1.7199872732162476, "learning_rate": 1.09830466865688e-07, "loss": 0.0397, "step": 29103 }, { "epoch": 95.42295081967212, "grad_norm": 2.132991313934326, "learning_rate": 1.0967357348116826e-07, "loss": 0.1069, "step": 29104 }, { "epoch": 95.42622950819673, "grad_norm": 3.4601759910583496, "learning_rate": 1.0951679162036322e-07, "loss": 0.1113, "step": 29105 }, { "epoch": 95.42950819672132, "grad_norm": 2.397270441055298, "learning_rate": 1.0936012128503815e-07, "loss": 0.1367, "step": 29106 }, { "epoch": 95.4327868852459, "grad_norm": 1.9923925399780273, "learning_rate": 1.0920356247696273e-07, "loss": 0.1379, "step": 29107 }, { "epoch": 95.43606557377049, "grad_norm": 2.151568651199341, "learning_rate": 1.0904711519790113e-07, "loss": 0.2148, "step": 29108 }, { "epoch": 95.43934426229508, "grad_norm": 2.25262451171875, "learning_rate": 1.0889077944961635e-07, "loss": 0.0548, "step": 29109 }, { "epoch": 95.44262295081967, "grad_norm": 2.162198781967163, "learning_rate": 1.0873455523387366e-07, "loss": 0.0643, "step": 29110 }, { "epoch": 95.44590163934426, "grad_norm": 2.285479784011841, "learning_rate": 1.0857844255243167e-07, "loss": 0.0696, "step": 29111 }, { "epoch": 95.44918032786886, "grad_norm": 3.338728427886963, "learning_rate": 1.0842244140705338e-07, "loss": 0.1465, "step": 29112 }, { "epoch": 95.45245901639345, "grad_norm": 2.3886685371398926, "learning_rate": 1.082665517994963e-07, "loss": 0.1564, "step": 29113 }, { "epoch": 95.45573770491804, "grad_norm": 1.9437310695648193, "learning_rate": 1.0811077373151791e-07, "loss": 0.1393, "step": 29114 }, { "epoch": 95.45901639344262, "grad_norm": 2.005871295928955, "learning_rate": 1.0795510720487568e-07, "loss": 0.0402, "step": 29115 }, { "epoch": 95.46229508196721, "grad_norm": 2.0621893405914307, "learning_rate": 1.0779955222132599e-07, "loss": 0.1598, "step": 29116 }, { "epoch": 95.4655737704918, "grad_norm": 2.0779523849487305, "learning_rate": 1.0764410878262077e-07, "loss": 0.0817, "step": 29117 }, { "epoch": 95.46885245901639, "grad_norm": 2.3661327362060547, "learning_rate": 1.0748877689051418e-07, "loss": 0.1342, "step": 29118 }, { "epoch": 95.47213114754098, "grad_norm": 1.607483983039856, "learning_rate": 1.0733355654675703e-07, "loss": 0.0373, "step": 29119 }, { "epoch": 95.47540983606558, "grad_norm": 2.689199686050415, "learning_rate": 1.0717844775309905e-07, "loss": 0.1196, "step": 29120 }, { "epoch": 95.47868852459017, "grad_norm": 2.4783382415771484, "learning_rate": 1.0702345051129104e-07, "loss": 0.0781, "step": 29121 }, { "epoch": 95.48196721311476, "grad_norm": 2.3622047901153564, "learning_rate": 1.068685648230794e-07, "loss": 0.0547, "step": 29122 }, { "epoch": 95.48524590163935, "grad_norm": 2.1447577476501465, "learning_rate": 1.0671379069021048e-07, "loss": 0.1138, "step": 29123 }, { "epoch": 95.48852459016393, "grad_norm": 1.983465552330017, "learning_rate": 1.0655912811443069e-07, "loss": 0.1008, "step": 29124 }, { "epoch": 95.49180327868852, "grad_norm": 2.0678598880767822, "learning_rate": 1.0640457709748308e-07, "loss": 0.0379, "step": 29125 }, { "epoch": 95.49508196721311, "grad_norm": 2.224560499191284, "learning_rate": 1.062501376411107e-07, "loss": 0.0993, "step": 29126 }, { "epoch": 95.4983606557377, "grad_norm": 1.678867220878601, "learning_rate": 1.0609580974705547e-07, "loss": 0.0846, "step": 29127 }, { "epoch": 95.5016393442623, "grad_norm": 2.826720952987671, "learning_rate": 1.0594159341705601e-07, "loss": 0.1341, "step": 29128 }, { "epoch": 95.50491803278689, "grad_norm": 1.8271335363388062, "learning_rate": 1.0578748865285315e-07, "loss": 0.0438, "step": 29129 }, { "epoch": 95.50819672131148, "grad_norm": 1.9495493173599243, "learning_rate": 1.0563349545618329e-07, "loss": 0.1079, "step": 29130 }, { "epoch": 95.51147540983607, "grad_norm": 2.178483009338379, "learning_rate": 1.0547961382878391e-07, "loss": 0.1008, "step": 29131 }, { "epoch": 95.51475409836065, "grad_norm": 2.3585116863250732, "learning_rate": 1.0532584377238808e-07, "loss": 0.1386, "step": 29132 }, { "epoch": 95.51803278688524, "grad_norm": 3.1943349838256836, "learning_rate": 1.051721852887333e-07, "loss": 0.0499, "step": 29133 }, { "epoch": 95.52131147540983, "grad_norm": 1.8771601915359497, "learning_rate": 1.0501863837954929e-07, "loss": 0.0767, "step": 29134 }, { "epoch": 95.52459016393442, "grad_norm": 2.281156301498413, "learning_rate": 1.04865203046568e-07, "loss": 0.0676, "step": 29135 }, { "epoch": 95.52786885245902, "grad_norm": 2.6146507263183594, "learning_rate": 1.0471187929152027e-07, "loss": 0.1408, "step": 29136 }, { "epoch": 95.53114754098361, "grad_norm": 3.73654842376709, "learning_rate": 1.0455866711613472e-07, "loss": 0.0826, "step": 29137 }, { "epoch": 95.5344262295082, "grad_norm": 2.6329457759857178, "learning_rate": 1.0440556652213885e-07, "loss": 0.1803, "step": 29138 }, { "epoch": 95.53770491803279, "grad_norm": 2.7653703689575195, "learning_rate": 1.0425257751125906e-07, "loss": 0.0944, "step": 29139 }, { "epoch": 95.54098360655738, "grad_norm": 2.366771697998047, "learning_rate": 1.0409970008522063e-07, "loss": 0.1251, "step": 29140 }, { "epoch": 95.54426229508196, "grad_norm": 1.945705533027649, "learning_rate": 1.0394693424574554e-07, "loss": 0.041, "step": 29141 }, { "epoch": 95.54754098360655, "grad_norm": 2.186652421951294, "learning_rate": 1.0379427999456015e-07, "loss": 0.0503, "step": 29142 }, { "epoch": 95.55081967213114, "grad_norm": 3.125446081161499, "learning_rate": 1.0364173733338312e-07, "loss": 0.13, "step": 29143 }, { "epoch": 95.55409836065574, "grad_norm": 2.0753672122955322, "learning_rate": 1.0348930626393527e-07, "loss": 0.0496, "step": 29144 }, { "epoch": 95.55737704918033, "grad_norm": 2.348853588104248, "learning_rate": 1.0333698678793413e-07, "loss": 0.047, "step": 29145 }, { "epoch": 95.56065573770492, "grad_norm": 3.0196175575256348, "learning_rate": 1.0318477890709944e-07, "loss": 0.1899, "step": 29146 }, { "epoch": 95.56393442622951, "grad_norm": 3.1751279830932617, "learning_rate": 1.0303268262314647e-07, "loss": 0.2137, "step": 29147 }, { "epoch": 95.5672131147541, "grad_norm": 2.060046434402466, "learning_rate": 1.0288069793779053e-07, "loss": 0.0605, "step": 29148 }, { "epoch": 95.57049180327868, "grad_norm": 2.086352825164795, "learning_rate": 1.0272882485274472e-07, "loss": 0.0841, "step": 29149 }, { "epoch": 95.57377049180327, "grad_norm": 3.390775680541992, "learning_rate": 1.0257706336972207e-07, "loss": 0.136, "step": 29150 }, { "epoch": 95.57704918032788, "grad_norm": 3.3893260955810547, "learning_rate": 1.0242541349043345e-07, "loss": 0.041, "step": 29151 }, { "epoch": 95.58032786885246, "grad_norm": 1.692573070526123, "learning_rate": 1.0227387521658972e-07, "loss": 0.0276, "step": 29152 }, { "epoch": 95.58360655737705, "grad_norm": 1.8734543323516846, "learning_rate": 1.021224485498995e-07, "loss": 0.0677, "step": 29153 }, { "epoch": 95.58688524590164, "grad_norm": 1.8155642747879028, "learning_rate": 1.0197113349206922e-07, "loss": 0.0234, "step": 29154 }, { "epoch": 95.59016393442623, "grad_norm": 2.347968816757202, "learning_rate": 1.0181993004480528e-07, "loss": 0.2159, "step": 29155 }, { "epoch": 95.59344262295082, "grad_norm": 2.900886058807373, "learning_rate": 1.016688382098141e-07, "loss": 0.2431, "step": 29156 }, { "epoch": 95.5967213114754, "grad_norm": 3.2515015602111816, "learning_rate": 1.0151785798879877e-07, "loss": 0.1891, "step": 29157 }, { "epoch": 95.6, "grad_norm": 1.4963241815567017, "learning_rate": 1.0136698938346012e-07, "loss": 0.0935, "step": 29158 }, { "epoch": 95.6032786885246, "grad_norm": 2.1200695037841797, "learning_rate": 1.0121623239550126e-07, "loss": 0.0508, "step": 29159 }, { "epoch": 95.60655737704919, "grad_norm": 2.1759746074676514, "learning_rate": 1.0106558702662195e-07, "loss": 0.0503, "step": 29160 }, { "epoch": 95.60983606557377, "grad_norm": 2.398245096206665, "learning_rate": 1.0091505327851969e-07, "loss": 0.1928, "step": 29161 }, { "epoch": 95.61311475409836, "grad_norm": 2.0404253005981445, "learning_rate": 1.0076463115289314e-07, "loss": 0.2001, "step": 29162 }, { "epoch": 95.61639344262295, "grad_norm": 1.9949910640716553, "learning_rate": 1.0061432065143761e-07, "loss": 0.0663, "step": 29163 }, { "epoch": 95.61967213114754, "grad_norm": 1.8940629959106445, "learning_rate": 1.0046412177584841e-07, "loss": 0.1753, "step": 29164 }, { "epoch": 95.62295081967213, "grad_norm": 2.232599973678589, "learning_rate": 1.0031403452781974e-07, "loss": 0.0976, "step": 29165 }, { "epoch": 95.62622950819672, "grad_norm": 1.9332904815673828, "learning_rate": 1.0016405890904358e-07, "loss": 0.1384, "step": 29166 }, { "epoch": 95.62950819672132, "grad_norm": 1.3798140287399292, "learning_rate": 1.0001419492120967e-07, "loss": 0.0373, "step": 29167 }, { "epoch": 95.6327868852459, "grad_norm": 1.9988610744476318, "learning_rate": 9.986444256601002e-08, "loss": 0.0694, "step": 29168 }, { "epoch": 95.6360655737705, "grad_norm": 2.5427939891815186, "learning_rate": 9.971480184513216e-08, "loss": 0.0514, "step": 29169 }, { "epoch": 95.63934426229508, "grad_norm": 2.535878896713257, "learning_rate": 9.956527276026473e-08, "loss": 0.0735, "step": 29170 }, { "epoch": 95.64262295081967, "grad_norm": 2.6217920780181885, "learning_rate": 9.941585531309084e-08, "loss": 0.0737, "step": 29171 }, { "epoch": 95.64590163934426, "grad_norm": 2.4330432415008545, "learning_rate": 9.926654950529801e-08, "loss": 0.0962, "step": 29172 }, { "epoch": 95.64918032786885, "grad_norm": 1.722464680671692, "learning_rate": 9.911735533856937e-08, "loss": 0.0573, "step": 29173 }, { "epoch": 95.65245901639344, "grad_norm": 1.6908150911331177, "learning_rate": 9.896827281458687e-08, "loss": 0.0378, "step": 29174 }, { "epoch": 95.65573770491804, "grad_norm": 2.690397024154663, "learning_rate": 9.881930193503031e-08, "loss": 0.0849, "step": 29175 }, { "epoch": 95.65901639344263, "grad_norm": 2.1768343448638916, "learning_rate": 9.867044270158167e-08, "loss": 0.0989, "step": 29176 }, { "epoch": 95.66229508196722, "grad_norm": 2.358426570892334, "learning_rate": 9.852169511591957e-08, "loss": 0.0988, "step": 29177 }, { "epoch": 95.6655737704918, "grad_norm": 2.497169256210327, "learning_rate": 9.83730591797183e-08, "loss": 0.1067, "step": 29178 }, { "epoch": 95.66885245901639, "grad_norm": 2.1580312252044678, "learning_rate": 9.822453489465756e-08, "loss": 0.1603, "step": 29179 }, { "epoch": 95.67213114754098, "grad_norm": 2.104948043823242, "learning_rate": 9.807612226240937e-08, "loss": 0.2097, "step": 29180 }, { "epoch": 95.67540983606557, "grad_norm": 2.5675547122955322, "learning_rate": 9.792782128464906e-08, "loss": 0.0692, "step": 29181 }, { "epoch": 95.67868852459016, "grad_norm": 2.6077895164489746, "learning_rate": 9.777963196304752e-08, "loss": 0.3042, "step": 29182 }, { "epoch": 95.68196721311476, "grad_norm": 2.3055038452148438, "learning_rate": 9.763155429927673e-08, "loss": 0.14, "step": 29183 }, { "epoch": 95.68524590163935, "grad_norm": 3.6878294944763184, "learning_rate": 9.748358829500648e-08, "loss": 0.1269, "step": 29184 }, { "epoch": 95.68852459016394, "grad_norm": 2.7317099571228027, "learning_rate": 9.733573395190432e-08, "loss": 0.1237, "step": 29185 }, { "epoch": 95.69180327868852, "grad_norm": 2.4970271587371826, "learning_rate": 9.71879912716378e-08, "loss": 0.1281, "step": 29186 }, { "epoch": 95.69508196721311, "grad_norm": 2.421579360961914, "learning_rate": 9.704036025587338e-08, "loss": 0.0675, "step": 29187 }, { "epoch": 95.6983606557377, "grad_norm": 2.1739087104797363, "learning_rate": 9.689284090627526e-08, "loss": 0.0485, "step": 29188 }, { "epoch": 95.70163934426229, "grad_norm": 1.9268484115600586, "learning_rate": 9.674543322450658e-08, "loss": 0.0777, "step": 29189 }, { "epoch": 95.70491803278688, "grad_norm": 2.7136662006378174, "learning_rate": 9.659813721223044e-08, "loss": 0.1323, "step": 29190 }, { "epoch": 95.70819672131148, "grad_norm": 2.4441726207733154, "learning_rate": 9.645095287110773e-08, "loss": 0.1602, "step": 29191 }, { "epoch": 95.71147540983607, "grad_norm": 1.4445619583129883, "learning_rate": 9.630388020279713e-08, "loss": 0.0498, "step": 29192 }, { "epoch": 95.71475409836066, "grad_norm": 1.5113334655761719, "learning_rate": 9.615691920895731e-08, "loss": 0.0621, "step": 29193 }, { "epoch": 95.71803278688525, "grad_norm": 2.769329786300659, "learning_rate": 9.601006989124584e-08, "loss": 0.1982, "step": 29194 }, { "epoch": 95.72131147540983, "grad_norm": 2.7517008781433105, "learning_rate": 9.586333225131916e-08, "loss": 0.1935, "step": 29195 }, { "epoch": 95.72459016393442, "grad_norm": 1.5401335954666138, "learning_rate": 9.57167062908304e-08, "loss": 0.0721, "step": 29196 }, { "epoch": 95.72786885245901, "grad_norm": 2.2977097034454346, "learning_rate": 9.557019201143269e-08, "loss": 0.1047, "step": 29197 }, { "epoch": 95.73114754098361, "grad_norm": 2.8864097595214844, "learning_rate": 9.542378941478025e-08, "loss": 0.1904, "step": 29198 }, { "epoch": 95.7344262295082, "grad_norm": 1.956570029258728, "learning_rate": 9.527749850252288e-08, "loss": 0.0524, "step": 29199 }, { "epoch": 95.73770491803279, "grad_norm": 2.5398685932159424, "learning_rate": 9.513131927630925e-08, "loss": 0.0902, "step": 29200 }, { "epoch": 95.74098360655738, "grad_norm": 2.1545515060424805, "learning_rate": 9.498525173778916e-08, "loss": 0.0512, "step": 29201 }, { "epoch": 95.74426229508197, "grad_norm": 2.05915904045105, "learning_rate": 9.483929588860907e-08, "loss": 0.0829, "step": 29202 }, { "epoch": 95.74754098360656, "grad_norm": 4.001560688018799, "learning_rate": 9.469345173041433e-08, "loss": 0.1309, "step": 29203 }, { "epoch": 95.75081967213114, "grad_norm": 1.8055967092514038, "learning_rate": 9.45477192648503e-08, "loss": 0.1096, "step": 29204 }, { "epoch": 95.75409836065573, "grad_norm": 2.285027503967285, "learning_rate": 9.440209849355896e-08, "loss": 0.1459, "step": 29205 }, { "epoch": 95.75737704918033, "grad_norm": 2.04461932182312, "learning_rate": 9.42565894181835e-08, "loss": 0.0509, "step": 29206 }, { "epoch": 95.76065573770492, "grad_norm": 2.2662830352783203, "learning_rate": 9.411119204036478e-08, "loss": 0.1485, "step": 29207 }, { "epoch": 95.76393442622951, "grad_norm": 1.6911234855651855, "learning_rate": 9.396590636174153e-08, "loss": 0.1616, "step": 29208 }, { "epoch": 95.7672131147541, "grad_norm": 2.251087188720703, "learning_rate": 9.38207323839524e-08, "loss": 0.069, "step": 29209 }, { "epoch": 95.77049180327869, "grad_norm": 2.8516039848327637, "learning_rate": 9.367567010863387e-08, "loss": 0.0589, "step": 29210 }, { "epoch": 95.77377049180328, "grad_norm": 2.8583664894104004, "learning_rate": 9.353071953742354e-08, "loss": 0.1451, "step": 29211 }, { "epoch": 95.77704918032786, "grad_norm": 1.8821251392364502, "learning_rate": 9.338588067195342e-08, "loss": 0.0826, "step": 29212 }, { "epoch": 95.78032786885245, "grad_norm": 1.9183359146118164, "learning_rate": 9.324115351385887e-08, "loss": 0.043, "step": 29213 }, { "epoch": 95.78360655737706, "grad_norm": 1.9794490337371826, "learning_rate": 9.30965380647697e-08, "loss": 0.0715, "step": 29214 }, { "epoch": 95.78688524590164, "grad_norm": 3.117445468902588, "learning_rate": 9.295203432631794e-08, "loss": 0.1863, "step": 29215 }, { "epoch": 95.79016393442623, "grad_norm": 2.1089887619018555, "learning_rate": 9.280764230013229e-08, "loss": 0.1217, "step": 29216 }, { "epoch": 95.79344262295082, "grad_norm": 4.0595784187316895, "learning_rate": 9.266336198784254e-08, "loss": 0.0831, "step": 29217 }, { "epoch": 95.79672131147541, "grad_norm": 3.356476306915283, "learning_rate": 9.251919339107407e-08, "loss": 0.1013, "step": 29218 }, { "epoch": 95.8, "grad_norm": 2.4768881797790527, "learning_rate": 9.237513651145224e-08, "loss": 0.0504, "step": 29219 }, { "epoch": 95.80327868852459, "grad_norm": 2.620347023010254, "learning_rate": 9.223119135060244e-08, "loss": 0.087, "step": 29220 }, { "epoch": 95.80655737704917, "grad_norm": 2.581282615661621, "learning_rate": 9.208735791014666e-08, "loss": 0.1464, "step": 29221 }, { "epoch": 95.80983606557378, "grad_norm": 2.1354758739471436, "learning_rate": 9.19436361917092e-08, "loss": 0.2183, "step": 29222 }, { "epoch": 95.81311475409836, "grad_norm": 2.586223840713501, "learning_rate": 9.180002619690765e-08, "loss": 0.1808, "step": 29223 }, { "epoch": 95.81639344262295, "grad_norm": 2.3797268867492676, "learning_rate": 9.165652792736291e-08, "loss": 0.1713, "step": 29224 }, { "epoch": 95.81967213114754, "grad_norm": 1.9424442052841187, "learning_rate": 9.151314138469369e-08, "loss": 0.0597, "step": 29225 }, { "epoch": 95.82295081967213, "grad_norm": 1.7332525253295898, "learning_rate": 9.136986657051538e-08, "loss": 0.0538, "step": 29226 }, { "epoch": 95.82622950819672, "grad_norm": 2.3221397399902344, "learning_rate": 9.122670348644447e-08, "loss": 0.1696, "step": 29227 }, { "epoch": 95.8295081967213, "grad_norm": 2.439021110534668, "learning_rate": 9.108365213409521e-08, "loss": 0.0755, "step": 29228 }, { "epoch": 95.8327868852459, "grad_norm": 2.025756597518921, "learning_rate": 9.094071251508074e-08, "loss": 0.1842, "step": 29229 }, { "epoch": 95.8360655737705, "grad_norm": 1.4938931465148926, "learning_rate": 9.079788463101091e-08, "loss": 0.0395, "step": 29230 }, { "epoch": 95.83934426229509, "grad_norm": 2.3085906505584717, "learning_rate": 9.065516848349997e-08, "loss": 0.1804, "step": 29231 }, { "epoch": 95.84262295081967, "grad_norm": 3.14799165725708, "learning_rate": 9.051256407415443e-08, "loss": 0.1358, "step": 29232 }, { "epoch": 95.84590163934426, "grad_norm": 2.2623043060302734, "learning_rate": 9.037007140458299e-08, "loss": 0.1239, "step": 29233 }, { "epoch": 95.84918032786885, "grad_norm": 1.6049902439117432, "learning_rate": 9.022769047639102e-08, "loss": 0.0366, "step": 29234 }, { "epoch": 95.85245901639344, "grad_norm": 2.4580013751983643, "learning_rate": 9.008542129118725e-08, "loss": 0.2899, "step": 29235 }, { "epoch": 95.85573770491803, "grad_norm": 1.791758418083191, "learning_rate": 8.994326385057373e-08, "loss": 0.082, "step": 29236 }, { "epoch": 95.85901639344263, "grad_norm": 1.9047147035598755, "learning_rate": 8.980121815615362e-08, "loss": 0.1154, "step": 29237 }, { "epoch": 95.86229508196722, "grad_norm": 2.474754810333252, "learning_rate": 8.965928420952785e-08, "loss": 0.2189, "step": 29238 }, { "epoch": 95.8655737704918, "grad_norm": 1.9815250635147095, "learning_rate": 8.951746201229961e-08, "loss": 0.0345, "step": 29239 }, { "epoch": 95.8688524590164, "grad_norm": 2.0318987369537354, "learning_rate": 8.937575156606537e-08, "loss": 0.1071, "step": 29240 }, { "epoch": 95.87213114754098, "grad_norm": 2.115264892578125, "learning_rate": 8.923415287242387e-08, "loss": 0.0947, "step": 29241 }, { "epoch": 95.87540983606557, "grad_norm": 1.9856517314910889, "learning_rate": 8.909266593297162e-08, "loss": 0.0514, "step": 29242 }, { "epoch": 95.87868852459016, "grad_norm": 3.1326560974121094, "learning_rate": 8.895129074930509e-08, "loss": 0.0524, "step": 29243 }, { "epoch": 95.88196721311475, "grad_norm": 1.9109553098678589, "learning_rate": 8.881002732301746e-08, "loss": 0.0736, "step": 29244 }, { "epoch": 95.88524590163935, "grad_norm": 2.3855085372924805, "learning_rate": 8.866887565570192e-08, "loss": 0.1025, "step": 29245 }, { "epoch": 95.88852459016394, "grad_norm": 2.037757396697998, "learning_rate": 8.852783574894941e-08, "loss": 0.0633, "step": 29246 }, { "epoch": 95.89180327868853, "grad_norm": 2.149613618850708, "learning_rate": 8.838690760435198e-08, "loss": 0.0787, "step": 29247 }, { "epoch": 95.89508196721312, "grad_norm": 4.265511989593506, "learning_rate": 8.824609122349726e-08, "loss": 0.1147, "step": 29248 }, { "epoch": 95.8983606557377, "grad_norm": 2.2912046909332275, "learning_rate": 8.810538660797175e-08, "loss": 0.0522, "step": 29249 }, { "epoch": 95.90163934426229, "grad_norm": 2.477850914001465, "learning_rate": 8.79647937593664e-08, "loss": 0.0529, "step": 29250 }, { "epoch": 95.90491803278688, "grad_norm": 2.7355055809020996, "learning_rate": 8.782431267926216e-08, "loss": 0.1798, "step": 29251 }, { "epoch": 95.90819672131147, "grad_norm": 2.017287015914917, "learning_rate": 8.768394336924558e-08, "loss": 0.0293, "step": 29252 }, { "epoch": 95.91147540983607, "grad_norm": 5.197559356689453, "learning_rate": 8.754368583089978e-08, "loss": 0.2323, "step": 29253 }, { "epoch": 95.91475409836066, "grad_norm": 2.741206407546997, "learning_rate": 8.740354006580353e-08, "loss": 0.1016, "step": 29254 }, { "epoch": 95.91803278688525, "grad_norm": 1.9386457204818726, "learning_rate": 8.72635060755389e-08, "loss": 0.0616, "step": 29255 }, { "epoch": 95.92131147540984, "grad_norm": 2.0058443546295166, "learning_rate": 8.712358386168573e-08, "loss": 0.1141, "step": 29256 }, { "epoch": 95.92459016393443, "grad_norm": 2.319718837738037, "learning_rate": 8.698377342582165e-08, "loss": 0.1555, "step": 29257 }, { "epoch": 95.92786885245901, "grad_norm": 1.5802549123764038, "learning_rate": 8.684407476952095e-08, "loss": 0.0368, "step": 29258 }, { "epoch": 95.9311475409836, "grad_norm": 1.7041568756103516, "learning_rate": 8.670448789436126e-08, "loss": 0.0842, "step": 29259 }, { "epoch": 95.93442622950819, "grad_norm": 2.383418083190918, "learning_rate": 8.656501280191576e-08, "loss": 0.0506, "step": 29260 }, { "epoch": 95.9377049180328, "grad_norm": 2.088101625442505, "learning_rate": 8.642564949375654e-08, "loss": 0.0697, "step": 29261 }, { "epoch": 95.94098360655738, "grad_norm": 2.879302501678467, "learning_rate": 8.628639797145566e-08, "loss": 0.1309, "step": 29262 }, { "epoch": 95.94426229508197, "grad_norm": 1.7072941064834595, "learning_rate": 8.614725823658409e-08, "loss": 0.0359, "step": 29263 }, { "epoch": 95.94754098360656, "grad_norm": 2.801894187927246, "learning_rate": 8.600823029070949e-08, "loss": 0.1027, "step": 29264 }, { "epoch": 95.95081967213115, "grad_norm": 1.7786015272140503, "learning_rate": 8.586931413540056e-08, "loss": 0.0284, "step": 29265 }, { "epoch": 95.95409836065573, "grad_norm": 2.712118625640869, "learning_rate": 8.573050977222275e-08, "loss": 0.1825, "step": 29266 }, { "epoch": 95.95737704918032, "grad_norm": 2.8506062030792236, "learning_rate": 8.559181720274145e-08, "loss": 0.0894, "step": 29267 }, { "epoch": 95.96065573770491, "grad_norm": 2.239107131958008, "learning_rate": 8.5453236428521e-08, "loss": 0.1691, "step": 29268 }, { "epoch": 95.96393442622951, "grad_norm": 1.8637665510177612, "learning_rate": 8.531476745112454e-08, "loss": 0.118, "step": 29269 }, { "epoch": 95.9672131147541, "grad_norm": 2.148766279220581, "learning_rate": 8.517641027211198e-08, "loss": 0.1105, "step": 29270 }, { "epoch": 95.97049180327869, "grad_norm": 2.0354411602020264, "learning_rate": 8.503816489304429e-08, "loss": 0.0903, "step": 29271 }, { "epoch": 95.97377049180328, "grad_norm": 2.86716890335083, "learning_rate": 8.490003131548019e-08, "loss": 0.1529, "step": 29272 }, { "epoch": 95.97704918032787, "grad_norm": 2.043975353240967, "learning_rate": 8.476200954097846e-08, "loss": 0.0652, "step": 29273 }, { "epoch": 95.98032786885246, "grad_norm": 1.8718492984771729, "learning_rate": 8.462409957109342e-08, "loss": 0.0753, "step": 29274 }, { "epoch": 95.98360655737704, "grad_norm": 1.6514534950256348, "learning_rate": 8.448630140738046e-08, "loss": 0.0988, "step": 29275 }, { "epoch": 95.98688524590163, "grad_norm": 2.114593744277954, "learning_rate": 8.434861505139502e-08, "loss": 0.0596, "step": 29276 }, { "epoch": 95.99016393442623, "grad_norm": 2.1511144638061523, "learning_rate": 8.421104050468809e-08, "loss": 0.1025, "step": 29277 }, { "epoch": 95.99344262295082, "grad_norm": 1.929741382598877, "learning_rate": 8.407357776881175e-08, "loss": 0.0598, "step": 29278 }, { "epoch": 95.99672131147541, "grad_norm": 1.9902024269104004, "learning_rate": 8.393622684531588e-08, "loss": 0.0722, "step": 29279 }, { "epoch": 96.0, "grad_norm": 2.4881582260131836, "learning_rate": 8.379898773574924e-08, "loss": 0.1129, "step": 29280 }, { "epoch": 96.00327868852459, "grad_norm": 1.9257607460021973, "learning_rate": 8.366186044165948e-08, "loss": 0.0555, "step": 29281 }, { "epoch": 96.00655737704918, "grad_norm": 1.914146065711975, "learning_rate": 8.3524844964592e-08, "loss": 0.0616, "step": 29282 }, { "epoch": 96.00983606557377, "grad_norm": 2.5396623611450195, "learning_rate": 8.338794130609229e-08, "loss": 0.0701, "step": 29283 }, { "epoch": 96.01311475409837, "grad_norm": 1.9204171895980835, "learning_rate": 8.325114946770463e-08, "loss": 0.0626, "step": 29284 }, { "epoch": 96.01639344262296, "grad_norm": 2.5620405673980713, "learning_rate": 8.311446945097112e-08, "loss": 0.0907, "step": 29285 }, { "epoch": 96.01967213114754, "grad_norm": 2.7954163551330566, "learning_rate": 8.297790125743277e-08, "loss": 0.0803, "step": 29286 }, { "epoch": 96.02295081967213, "grad_norm": 2.3294858932495117, "learning_rate": 8.284144488862944e-08, "loss": 0.075, "step": 29287 }, { "epoch": 96.02622950819672, "grad_norm": 2.3036952018737793, "learning_rate": 8.270510034610101e-08, "loss": 0.159, "step": 29288 }, { "epoch": 96.02950819672131, "grad_norm": 1.824272871017456, "learning_rate": 8.256886763138295e-08, "loss": 0.0596, "step": 29289 }, { "epoch": 96.0327868852459, "grad_norm": 2.549072027206421, "learning_rate": 8.243274674601287e-08, "loss": 0.1616, "step": 29290 }, { "epoch": 96.03606557377049, "grad_norm": 2.752091646194458, "learning_rate": 8.229673769152625e-08, "loss": 0.1437, "step": 29291 }, { "epoch": 96.03934426229509, "grad_norm": 1.7816226482391357, "learning_rate": 8.216084046945405e-08, "loss": 0.0513, "step": 29292 }, { "epoch": 96.04262295081968, "grad_norm": 1.9892619848251343, "learning_rate": 8.202505508133063e-08, "loss": 0.0785, "step": 29293 }, { "epoch": 96.04590163934427, "grad_norm": 1.4475317001342773, "learning_rate": 8.188938152868809e-08, "loss": 0.0388, "step": 29294 }, { "epoch": 96.04918032786885, "grad_norm": 2.1895337104797363, "learning_rate": 8.175381981305409e-08, "loss": 0.0998, "step": 29295 }, { "epoch": 96.05245901639344, "grad_norm": 2.4179532527923584, "learning_rate": 8.161836993595851e-08, "loss": 0.1433, "step": 29296 }, { "epoch": 96.05573770491803, "grad_norm": 1.8682619333267212, "learning_rate": 8.148303189892793e-08, "loss": 0.0899, "step": 29297 }, { "epoch": 96.05901639344262, "grad_norm": 2.5508341789245605, "learning_rate": 8.134780570348888e-08, "loss": 0.1477, "step": 29298 }, { "epoch": 96.0622950819672, "grad_norm": 2.200930118560791, "learning_rate": 8.121269135116683e-08, "loss": 0.0837, "step": 29299 }, { "epoch": 96.06557377049181, "grad_norm": 2.34822154045105, "learning_rate": 8.107768884348388e-08, "loss": 0.1842, "step": 29300 }, { "epoch": 96.0688524590164, "grad_norm": 2.132760763168335, "learning_rate": 8.094279818196326e-08, "loss": 0.0592, "step": 29301 }, { "epoch": 96.07213114754099, "grad_norm": 3.1125946044921875, "learning_rate": 8.080801936812599e-08, "loss": 0.0389, "step": 29302 }, { "epoch": 96.07540983606557, "grad_norm": 1.941598892211914, "learning_rate": 8.067335240349194e-08, "loss": 0.0979, "step": 29303 }, { "epoch": 96.07868852459016, "grad_norm": 1.663382649421692, "learning_rate": 8.05387972895788e-08, "loss": 0.0384, "step": 29304 }, { "epoch": 96.08196721311475, "grad_norm": 2.625251293182373, "learning_rate": 8.040435402790425e-08, "loss": 0.0852, "step": 29305 }, { "epoch": 96.08524590163934, "grad_norm": 1.9962494373321533, "learning_rate": 8.027002261998484e-08, "loss": 0.095, "step": 29306 }, { "epoch": 96.08852459016393, "grad_norm": 2.3104584217071533, "learning_rate": 8.013580306733492e-08, "loss": 0.0671, "step": 29307 }, { "epoch": 96.09180327868853, "grad_norm": 2.666743755340576, "learning_rate": 8.000169537146774e-08, "loss": 0.2061, "step": 29308 }, { "epoch": 96.09508196721312, "grad_norm": 2.5219085216522217, "learning_rate": 7.986769953389539e-08, "loss": 0.1029, "step": 29309 }, { "epoch": 96.09836065573771, "grad_norm": 1.4946948289871216, "learning_rate": 7.973381555612891e-08, "loss": 0.074, "step": 29310 }, { "epoch": 96.1016393442623, "grad_norm": 2.4613571166992188, "learning_rate": 7.960004343967931e-08, "loss": 0.037, "step": 29311 }, { "epoch": 96.10491803278688, "grad_norm": 2.075458526611328, "learning_rate": 7.946638318605315e-08, "loss": 0.0955, "step": 29312 }, { "epoch": 96.10819672131147, "grad_norm": 2.4920434951782227, "learning_rate": 7.933283479675813e-08, "loss": 0.0649, "step": 29313 }, { "epoch": 96.11147540983606, "grad_norm": 2.117231845855713, "learning_rate": 7.91993982732997e-08, "loss": 0.1653, "step": 29314 }, { "epoch": 96.11475409836065, "grad_norm": 2.2567503452301025, "learning_rate": 7.906607361718443e-08, "loss": 0.1183, "step": 29315 }, { "epoch": 96.11803278688525, "grad_norm": 1.772141456604004, "learning_rate": 7.893286082991336e-08, "loss": 0.1012, "step": 29316 }, { "epoch": 96.12131147540984, "grad_norm": 3.7211039066314697, "learning_rate": 7.879975991299082e-08, "loss": 0.0468, "step": 29317 }, { "epoch": 96.12459016393443, "grad_norm": 3.172610282897949, "learning_rate": 7.866677086791563e-08, "loss": 0.0978, "step": 29318 }, { "epoch": 96.12786885245902, "grad_norm": 2.0762832164764404, "learning_rate": 7.853389369618768e-08, "loss": 0.0433, "step": 29319 }, { "epoch": 96.1311475409836, "grad_norm": 1.4249145984649658, "learning_rate": 7.84011283993058e-08, "loss": 0.0952, "step": 29320 }, { "epoch": 96.1344262295082, "grad_norm": 2.865906238555908, "learning_rate": 7.826847497876766e-08, "loss": 0.0361, "step": 29321 }, { "epoch": 96.13770491803278, "grad_norm": 2.105630397796631, "learning_rate": 7.813593343606874e-08, "loss": 0.0429, "step": 29322 }, { "epoch": 96.14098360655737, "grad_norm": 1.890844464302063, "learning_rate": 7.800350377270338e-08, "loss": 0.1333, "step": 29323 }, { "epoch": 96.14426229508197, "grad_norm": 2.1584997177124023, "learning_rate": 7.787118599016375e-08, "loss": 0.0846, "step": 29324 }, { "epoch": 96.14754098360656, "grad_norm": 2.5434253215789795, "learning_rate": 7.773898008994418e-08, "loss": 0.0379, "step": 29325 }, { "epoch": 96.15081967213115, "grad_norm": 1.7894983291625977, "learning_rate": 7.760688607353351e-08, "loss": 0.047, "step": 29326 }, { "epoch": 96.15409836065574, "grad_norm": 2.6272470951080322, "learning_rate": 7.747490394242163e-08, "loss": 0.1324, "step": 29327 }, { "epoch": 96.15737704918033, "grad_norm": 2.4344136714935303, "learning_rate": 7.734303369809736e-08, "loss": 0.0776, "step": 29328 }, { "epoch": 96.16065573770491, "grad_norm": 2.4650909900665283, "learning_rate": 7.72112753420473e-08, "loss": 0.0817, "step": 29329 }, { "epoch": 96.1639344262295, "grad_norm": 2.2270114421844482, "learning_rate": 7.707962887575804e-08, "loss": 0.0466, "step": 29330 }, { "epoch": 96.1672131147541, "grad_norm": 2.9346423149108887, "learning_rate": 7.694809430071282e-08, "loss": 0.2437, "step": 29331 }, { "epoch": 96.1704918032787, "grad_norm": 2.084172010421753, "learning_rate": 7.681667161839378e-08, "loss": 0.0579, "step": 29332 }, { "epoch": 96.17377049180328, "grad_norm": 2.2413928508758545, "learning_rate": 7.668536083028644e-08, "loss": 0.0702, "step": 29333 }, { "epoch": 96.17704918032787, "grad_norm": 2.7371225357055664, "learning_rate": 7.655416193786847e-08, "loss": 0.0869, "step": 29334 }, { "epoch": 96.18032786885246, "grad_norm": 1.9922415018081665, "learning_rate": 7.642307494261981e-08, "loss": 0.0603, "step": 29335 }, { "epoch": 96.18360655737705, "grad_norm": 1.9005376100540161, "learning_rate": 7.629209984601816e-08, "loss": 0.0714, "step": 29336 }, { "epoch": 96.18688524590164, "grad_norm": 3.298189401626587, "learning_rate": 7.616123664954233e-08, "loss": 0.064, "step": 29337 }, { "epoch": 96.19016393442622, "grad_norm": 1.7869352102279663, "learning_rate": 7.603048535466672e-08, "loss": 0.0507, "step": 29338 }, { "epoch": 96.19344262295083, "grad_norm": 2.6480305194854736, "learning_rate": 7.589984596286459e-08, "loss": 0.1546, "step": 29339 }, { "epoch": 96.19672131147541, "grad_norm": 2.2472586631774902, "learning_rate": 7.576931847561142e-08, "loss": 0.073, "step": 29340 }, { "epoch": 96.2, "grad_norm": 1.9709129333496094, "learning_rate": 7.563890289437825e-08, "loss": 0.0515, "step": 29341 }, { "epoch": 96.20327868852459, "grad_norm": 2.3947670459747314, "learning_rate": 7.550859922063392e-08, "loss": 0.1192, "step": 29342 }, { "epoch": 96.20655737704918, "grad_norm": 2.093733549118042, "learning_rate": 7.537840745584945e-08, "loss": 0.0565, "step": 29343 }, { "epoch": 96.20983606557377, "grad_norm": 2.0425655841827393, "learning_rate": 7.524832760149258e-08, "loss": 0.0477, "step": 29344 }, { "epoch": 96.21311475409836, "grad_norm": 1.6161835193634033, "learning_rate": 7.51183596590288e-08, "loss": 0.0341, "step": 29345 }, { "epoch": 96.21639344262294, "grad_norm": 2.210649013519287, "learning_rate": 7.498850362992694e-08, "loss": 0.1027, "step": 29346 }, { "epoch": 96.21967213114755, "grad_norm": 2.6618711948394775, "learning_rate": 7.485875951564803e-08, "loss": 0.101, "step": 29347 }, { "epoch": 96.22295081967214, "grad_norm": 2.6155402660369873, "learning_rate": 7.472912731765647e-08, "loss": 0.161, "step": 29348 }, { "epoch": 96.22622950819672, "grad_norm": 1.9726134538650513, "learning_rate": 7.45996070374122e-08, "loss": 0.1121, "step": 29349 }, { "epoch": 96.22950819672131, "grad_norm": 2.3011281490325928, "learning_rate": 7.44701986763785e-08, "loss": 0.1155, "step": 29350 }, { "epoch": 96.2327868852459, "grad_norm": 3.675273895263672, "learning_rate": 7.43409022360142e-08, "loss": 0.1256, "step": 29351 }, { "epoch": 96.23606557377049, "grad_norm": 2.4344842433929443, "learning_rate": 7.42117177177748e-08, "loss": 0.1523, "step": 29352 }, { "epoch": 96.23934426229508, "grad_norm": 2.8545773029327393, "learning_rate": 7.408264512311914e-08, "loss": 0.2372, "step": 29353 }, { "epoch": 96.24262295081967, "grad_norm": 2.986853837966919, "learning_rate": 7.395368445350159e-08, "loss": 0.1206, "step": 29354 }, { "epoch": 96.24590163934427, "grad_norm": 1.8132339715957642, "learning_rate": 7.382483571037768e-08, "loss": 0.114, "step": 29355 }, { "epoch": 96.24918032786886, "grad_norm": 1.6983469724655151, "learning_rate": 7.369609889519847e-08, "loss": 0.0299, "step": 29356 }, { "epoch": 96.25245901639344, "grad_norm": 2.805112838745117, "learning_rate": 7.356747400941722e-08, "loss": 0.0561, "step": 29357 }, { "epoch": 96.25573770491803, "grad_norm": 1.8008079528808594, "learning_rate": 7.343896105448278e-08, "loss": 0.107, "step": 29358 }, { "epoch": 96.25901639344262, "grad_norm": 1.864874243736267, "learning_rate": 7.331056003184511e-08, "loss": 0.0383, "step": 29359 }, { "epoch": 96.26229508196721, "grad_norm": 2.427605628967285, "learning_rate": 7.318227094295305e-08, "loss": 0.0611, "step": 29360 }, { "epoch": 96.2655737704918, "grad_norm": 2.7863223552703857, "learning_rate": 7.30540937892521e-08, "loss": 0.0905, "step": 29361 }, { "epoch": 96.26885245901639, "grad_norm": 1.5815925598144531, "learning_rate": 7.292602857218667e-08, "loss": 0.0777, "step": 29362 }, { "epoch": 96.27213114754099, "grad_norm": 2.5419788360595703, "learning_rate": 7.279807529320226e-08, "loss": 0.1273, "step": 29363 }, { "epoch": 96.27540983606558, "grad_norm": 2.2421207427978516, "learning_rate": 7.267023395374106e-08, "loss": 0.1368, "step": 29364 }, { "epoch": 96.27868852459017, "grad_norm": 1.746031641960144, "learning_rate": 7.254250455524525e-08, "loss": 0.1264, "step": 29365 }, { "epoch": 96.28196721311475, "grad_norm": 2.1666531562805176, "learning_rate": 7.241488709915478e-08, "loss": 0.1433, "step": 29366 }, { "epoch": 96.28524590163934, "grad_norm": 2.2498435974121094, "learning_rate": 7.228738158690852e-08, "loss": 0.1057, "step": 29367 }, { "epoch": 96.28852459016393, "grad_norm": 2.2855172157287598, "learning_rate": 7.215998801994417e-08, "loss": 0.1756, "step": 29368 }, { "epoch": 96.29180327868852, "grad_norm": 2.8235576152801514, "learning_rate": 7.203270639969728e-08, "loss": 0.0936, "step": 29369 }, { "epoch": 96.29508196721312, "grad_norm": 2.24839448928833, "learning_rate": 7.190553672760558e-08, "loss": 0.1447, "step": 29370 }, { "epoch": 96.29836065573771, "grad_norm": 3.0864098072052, "learning_rate": 7.177847900510016e-08, "loss": 0.1779, "step": 29371 }, { "epoch": 96.3016393442623, "grad_norm": 2.587533950805664, "learning_rate": 7.165153323361652e-08, "loss": 0.0598, "step": 29372 }, { "epoch": 96.30491803278689, "grad_norm": 2.4126546382904053, "learning_rate": 7.152469941458462e-08, "loss": 0.0736, "step": 29373 }, { "epoch": 96.30819672131148, "grad_norm": 2.146937370300293, "learning_rate": 7.139797754943444e-08, "loss": 0.0968, "step": 29374 }, { "epoch": 96.31147540983606, "grad_norm": 2.625225305557251, "learning_rate": 7.127136763959485e-08, "loss": 0.19, "step": 29375 }, { "epoch": 96.31475409836065, "grad_norm": 1.6280667781829834, "learning_rate": 7.11448696864936e-08, "loss": 0.0385, "step": 29376 }, { "epoch": 96.31803278688524, "grad_norm": 2.74200439453125, "learning_rate": 7.10184836915584e-08, "loss": 0.2876, "step": 29377 }, { "epoch": 96.32131147540984, "grad_norm": 1.985155701637268, "learning_rate": 7.089220965621368e-08, "loss": 0.027, "step": 29378 }, { "epoch": 96.32459016393443, "grad_norm": 2.4962730407714844, "learning_rate": 7.076604758188166e-08, "loss": 0.1333, "step": 29379 }, { "epoch": 96.32786885245902, "grad_norm": 2.453397274017334, "learning_rate": 7.063999746998673e-08, "loss": 0.162, "step": 29380 }, { "epoch": 96.33114754098361, "grad_norm": 2.1671504974365234, "learning_rate": 7.051405932194999e-08, "loss": 0.1149, "step": 29381 }, { "epoch": 96.3344262295082, "grad_norm": 2.6241488456726074, "learning_rate": 7.038823313919141e-08, "loss": 0.2184, "step": 29382 }, { "epoch": 96.33770491803278, "grad_norm": 2.1929380893707275, "learning_rate": 7.026251892312874e-08, "loss": 0.1015, "step": 29383 }, { "epoch": 96.34098360655737, "grad_norm": 1.9211007356643677, "learning_rate": 7.013691667518086e-08, "loss": 0.0567, "step": 29384 }, { "epoch": 96.34426229508196, "grad_norm": 2.154151201248169, "learning_rate": 7.00114263967644e-08, "loss": 0.2278, "step": 29385 }, { "epoch": 96.34754098360656, "grad_norm": 2.540231466293335, "learning_rate": 6.988604808929377e-08, "loss": 0.1108, "step": 29386 }, { "epoch": 96.35081967213115, "grad_norm": 3.868821620941162, "learning_rate": 6.976078175418233e-08, "loss": 0.1646, "step": 29387 }, { "epoch": 96.35409836065574, "grad_norm": 1.7053483724594116, "learning_rate": 6.963562739284225e-08, "loss": 0.1166, "step": 29388 }, { "epoch": 96.35737704918033, "grad_norm": 2.234104871749878, "learning_rate": 6.951058500668683e-08, "loss": 0.064, "step": 29389 }, { "epoch": 96.36065573770492, "grad_norm": 1.6034866571426392, "learning_rate": 6.938565459712387e-08, "loss": 0.0943, "step": 29390 }, { "epoch": 96.3639344262295, "grad_norm": 2.1439876556396484, "learning_rate": 6.926083616556223e-08, "loss": 0.1238, "step": 29391 }, { "epoch": 96.3672131147541, "grad_norm": 2.2697465419769287, "learning_rate": 6.913612971341077e-08, "loss": 0.0742, "step": 29392 }, { "epoch": 96.37049180327868, "grad_norm": 1.826204538345337, "learning_rate": 6.901153524207616e-08, "loss": 0.0831, "step": 29393 }, { "epoch": 96.37377049180328, "grad_norm": 3.0172882080078125, "learning_rate": 6.88870527529606e-08, "loss": 0.1003, "step": 29394 }, { "epoch": 96.37704918032787, "grad_norm": 2.6769087314605713, "learning_rate": 6.876268224746963e-08, "loss": 0.1781, "step": 29395 }, { "epoch": 96.38032786885246, "grad_norm": 1.8348878622055054, "learning_rate": 6.863842372700546e-08, "loss": 0.0713, "step": 29396 }, { "epoch": 96.38360655737705, "grad_norm": 1.3640869855880737, "learning_rate": 6.851427719296922e-08, "loss": 0.1271, "step": 29397 }, { "epoch": 96.38688524590164, "grad_norm": 2.718313217163086, "learning_rate": 6.839024264675975e-08, "loss": 0.0617, "step": 29398 }, { "epoch": 96.39016393442623, "grad_norm": 2.1510233879089355, "learning_rate": 6.826632008977707e-08, "loss": 0.0795, "step": 29399 }, { "epoch": 96.39344262295081, "grad_norm": 2.0140745639801025, "learning_rate": 6.814250952341894e-08, "loss": 0.0975, "step": 29400 }, { "epoch": 96.3967213114754, "grad_norm": 2.4018728733062744, "learning_rate": 6.801881094907869e-08, "loss": 0.0558, "step": 29401 }, { "epoch": 96.4, "grad_norm": 2.1954526901245117, "learning_rate": 6.78952243681541e-08, "loss": 0.0285, "step": 29402 }, { "epoch": 96.4032786885246, "grad_norm": 2.358164072036743, "learning_rate": 6.777174978203849e-08, "loss": 0.1511, "step": 29403 }, { "epoch": 96.40655737704918, "grad_norm": 2.075671672821045, "learning_rate": 6.764838719212297e-08, "loss": 0.0705, "step": 29404 }, { "epoch": 96.40983606557377, "grad_norm": 2.0930228233337402, "learning_rate": 6.752513659979754e-08, "loss": 0.0747, "step": 29405 }, { "epoch": 96.41311475409836, "grad_norm": 2.924804449081421, "learning_rate": 6.740199800645442e-08, "loss": 0.1006, "step": 29406 }, { "epoch": 96.41639344262295, "grad_norm": 2.290931224822998, "learning_rate": 6.727897141348139e-08, "loss": 0.0484, "step": 29407 }, { "epoch": 96.41967213114754, "grad_norm": 2.5035595893859863, "learning_rate": 6.715605682226511e-08, "loss": 0.1325, "step": 29408 }, { "epoch": 96.42295081967212, "grad_norm": 2.713212251663208, "learning_rate": 6.703325423419227e-08, "loss": 0.1221, "step": 29409 }, { "epoch": 96.42622950819673, "grad_norm": 1.6951031684875488, "learning_rate": 6.691056365064619e-08, "loss": 0.0766, "step": 29410 }, { "epoch": 96.42950819672132, "grad_norm": 2.1778900623321533, "learning_rate": 6.678798507301132e-08, "loss": 0.1539, "step": 29411 }, { "epoch": 96.4327868852459, "grad_norm": 1.185517430305481, "learning_rate": 6.6665518502671e-08, "loss": 0.033, "step": 29412 }, { "epoch": 96.43606557377049, "grad_norm": 1.6739026308059692, "learning_rate": 6.654316394100413e-08, "loss": 0.1113, "step": 29413 }, { "epoch": 96.43934426229508, "grad_norm": 2.525385618209839, "learning_rate": 6.642092138939182e-08, "loss": 0.0905, "step": 29414 }, { "epoch": 96.44262295081967, "grad_norm": 1.8002604246139526, "learning_rate": 6.629879084921187e-08, "loss": 0.1317, "step": 29415 }, { "epoch": 96.44590163934426, "grad_norm": 2.741055727005005, "learning_rate": 6.617677232184095e-08, "loss": 0.1135, "step": 29416 }, { "epoch": 96.44918032786886, "grad_norm": 2.5160810947418213, "learning_rate": 6.605486580865683e-08, "loss": 0.1237, "step": 29417 }, { "epoch": 96.45245901639345, "grad_norm": 2.1270060539245605, "learning_rate": 6.593307131103066e-08, "loss": 0.0861, "step": 29418 }, { "epoch": 96.45573770491804, "grad_norm": 1.9332205057144165, "learning_rate": 6.58113888303391e-08, "loss": 0.043, "step": 29419 }, { "epoch": 96.45901639344262, "grad_norm": 2.0669169425964355, "learning_rate": 6.568981836795441e-08, "loss": 0.0887, "step": 29420 }, { "epoch": 96.46229508196721, "grad_norm": 3.41005802154541, "learning_rate": 6.556835992524436e-08, "loss": 0.1745, "step": 29421 }, { "epoch": 96.4655737704918, "grad_norm": 2.8100314140319824, "learning_rate": 6.544701350358118e-08, "loss": 0.0744, "step": 29422 }, { "epoch": 96.46885245901639, "grad_norm": 2.3074498176574707, "learning_rate": 6.532577910433158e-08, "loss": 0.1885, "step": 29423 }, { "epoch": 96.47213114754098, "grad_norm": 2.5325636863708496, "learning_rate": 6.520465672886333e-08, "loss": 0.1437, "step": 29424 }, { "epoch": 96.47540983606558, "grad_norm": 1.8487998247146606, "learning_rate": 6.508364637854314e-08, "loss": 0.1411, "step": 29425 }, { "epoch": 96.47868852459017, "grad_norm": 2.7249162197113037, "learning_rate": 6.496274805473324e-08, "loss": 0.1185, "step": 29426 }, { "epoch": 96.48196721311476, "grad_norm": 2.1485657691955566, "learning_rate": 6.48419617587992e-08, "loss": 0.1345, "step": 29427 }, { "epoch": 96.48524590163935, "grad_norm": 2.7747507095336914, "learning_rate": 6.472128749210105e-08, "loss": 0.1827, "step": 29428 }, { "epoch": 96.48852459016393, "grad_norm": 1.7105565071105957, "learning_rate": 6.460072525600102e-08, "loss": 0.0328, "step": 29429 }, { "epoch": 96.49180327868852, "grad_norm": 2.223562002182007, "learning_rate": 6.448027505185917e-08, "loss": 0.1618, "step": 29430 }, { "epoch": 96.49508196721311, "grad_norm": 4.798424243927002, "learning_rate": 6.435993688103103e-08, "loss": 0.2772, "step": 29431 }, { "epoch": 96.4983606557377, "grad_norm": 2.567213773727417, "learning_rate": 6.423971074487556e-08, "loss": 0.0836, "step": 29432 }, { "epoch": 96.5016393442623, "grad_norm": 3.24806809425354, "learning_rate": 6.411959664474832e-08, "loss": 0.2739, "step": 29433 }, { "epoch": 96.50491803278689, "grad_norm": 2.487877368927002, "learning_rate": 6.399959458200266e-08, "loss": 0.0565, "step": 29434 }, { "epoch": 96.50819672131148, "grad_norm": 2.131448984146118, "learning_rate": 6.387970455799308e-08, "loss": 0.1205, "step": 29435 }, { "epoch": 96.51147540983607, "grad_norm": 3.1586244106292725, "learning_rate": 6.37599265740696e-08, "loss": 0.2424, "step": 29436 }, { "epoch": 96.51475409836065, "grad_norm": 2.1172373294830322, "learning_rate": 6.364026063158557e-08, "loss": 0.1228, "step": 29437 }, { "epoch": 96.51803278688524, "grad_norm": 3.048460006713867, "learning_rate": 6.352070673188771e-08, "loss": 0.1208, "step": 29438 }, { "epoch": 96.52131147540983, "grad_norm": 2.8489644527435303, "learning_rate": 6.340126487632602e-08, "loss": 0.0647, "step": 29439 }, { "epoch": 96.52459016393442, "grad_norm": 3.268126964569092, "learning_rate": 6.328193506624614e-08, "loss": 0.1975, "step": 29440 }, { "epoch": 96.52786885245902, "grad_norm": 3.629676580429077, "learning_rate": 6.316271730299361e-08, "loss": 0.0984, "step": 29441 }, { "epoch": 96.53114754098361, "grad_norm": 1.9431830644607544, "learning_rate": 6.304361158791405e-08, "loss": 0.0967, "step": 29442 }, { "epoch": 96.5344262295082, "grad_norm": 2.535909652709961, "learning_rate": 6.292461792234972e-08, "loss": 0.2076, "step": 29443 }, { "epoch": 96.53770491803279, "grad_norm": 1.8887147903442383, "learning_rate": 6.280573630764064e-08, "loss": 0.0997, "step": 29444 }, { "epoch": 96.54098360655738, "grad_norm": 1.9685893058776855, "learning_rate": 6.268696674513019e-08, "loss": 0.0566, "step": 29445 }, { "epoch": 96.54426229508196, "grad_norm": 2.398360013961792, "learning_rate": 6.256830923615732e-08, "loss": 0.1348, "step": 29446 }, { "epoch": 96.54754098360655, "grad_norm": 2.1996583938598633, "learning_rate": 6.244976378205759e-08, "loss": 0.1062, "step": 29447 }, { "epoch": 96.55081967213114, "grad_norm": 2.523575782775879, "learning_rate": 6.233133038416994e-08, "loss": 0.135, "step": 29448 }, { "epoch": 96.55409836065574, "grad_norm": 2.8279566764831543, "learning_rate": 6.221300904382888e-08, "loss": 0.2423, "step": 29449 }, { "epoch": 96.55737704918033, "grad_norm": 2.401139736175537, "learning_rate": 6.209479976236887e-08, "loss": 0.1703, "step": 29450 }, { "epoch": 96.56065573770492, "grad_norm": 2.0444469451904297, "learning_rate": 6.19767025411222e-08, "loss": 0.1268, "step": 29451 }, { "epoch": 96.56393442622951, "grad_norm": 2.8912179470062256, "learning_rate": 6.185871738142224e-08, "loss": 0.1161, "step": 29452 }, { "epoch": 96.5672131147541, "grad_norm": 2.2501702308654785, "learning_rate": 6.174084428459792e-08, "loss": 0.0667, "step": 29453 }, { "epoch": 96.57049180327868, "grad_norm": 2.918212652206421, "learning_rate": 6.162308325197819e-08, "loss": 0.2018, "step": 29454 }, { "epoch": 96.57377049180327, "grad_norm": 3.381666660308838, "learning_rate": 6.150543428489308e-08, "loss": 0.1489, "step": 29455 }, { "epoch": 96.57704918032788, "grad_norm": 2.3870675563812256, "learning_rate": 6.1387897384666e-08, "loss": 0.064, "step": 29456 }, { "epoch": 96.58032786885246, "grad_norm": 1.8563250303268433, "learning_rate": 6.127047255262475e-08, "loss": 0.105, "step": 29457 }, { "epoch": 96.58360655737705, "grad_norm": 5.187170028686523, "learning_rate": 6.115315979009273e-08, "loss": 0.2096, "step": 29458 }, { "epoch": 96.58688524590164, "grad_norm": 3.1413536071777344, "learning_rate": 6.103595909839222e-08, "loss": 0.0794, "step": 29459 }, { "epoch": 96.59016393442623, "grad_norm": 2.4064676761627197, "learning_rate": 6.091887047884548e-08, "loss": 0.1006, "step": 29460 }, { "epoch": 96.59344262295082, "grad_norm": 2.9290056228637695, "learning_rate": 6.080189393277259e-08, "loss": 0.2979, "step": 29461 }, { "epoch": 96.5967213114754, "grad_norm": 2.112638235092163, "learning_rate": 6.068502946149135e-08, "loss": 0.0746, "step": 29462 }, { "epoch": 96.6, "grad_norm": 1.9065210819244385, "learning_rate": 6.056827706632185e-08, "loss": 0.1843, "step": 29463 }, { "epoch": 96.6032786885246, "grad_norm": 2.051144599914551, "learning_rate": 6.045163674857968e-08, "loss": 0.1929, "step": 29464 }, { "epoch": 96.60655737704919, "grad_norm": 2.268944501876831, "learning_rate": 6.033510850957936e-08, "loss": 0.0773, "step": 29465 }, { "epoch": 96.60983606557377, "grad_norm": 1.5643254518508911, "learning_rate": 6.021869235063538e-08, "loss": 0.0404, "step": 29466 }, { "epoch": 96.61311475409836, "grad_norm": 2.3331797122955322, "learning_rate": 6.010238827306114e-08, "loss": 0.1341, "step": 29467 }, { "epoch": 96.61639344262295, "grad_norm": 1.726672649383545, "learning_rate": 5.998619627816671e-08, "loss": 0.05, "step": 29468 }, { "epoch": 96.61967213114754, "grad_norm": 2.1107044219970703, "learning_rate": 5.987011636726326e-08, "loss": 0.1109, "step": 29469 }, { "epoch": 96.62295081967213, "grad_norm": 2.5913338661193848, "learning_rate": 5.975414854165862e-08, "loss": 0.1262, "step": 29470 }, { "epoch": 96.62622950819672, "grad_norm": 3.3512768745422363, "learning_rate": 5.963829280266176e-08, "loss": 0.0947, "step": 29471 }, { "epoch": 96.62950819672132, "grad_norm": 2.347015619277954, "learning_rate": 5.952254915157829e-08, "loss": 0.2029, "step": 29472 }, { "epoch": 96.6327868852459, "grad_norm": 1.9515777826309204, "learning_rate": 5.940691758971384e-08, "loss": 0.0713, "step": 29473 }, { "epoch": 96.6360655737705, "grad_norm": 1.5248595476150513, "learning_rate": 5.9291398118371815e-08, "loss": 0.0327, "step": 29474 }, { "epoch": 96.63934426229508, "grad_norm": 2.475888967514038, "learning_rate": 5.9175990738854495e-08, "loss": 0.1142, "step": 29475 }, { "epoch": 96.64262295081967, "grad_norm": 2.063737630844116, "learning_rate": 5.906069545246529e-08, "loss": 0.1131, "step": 29476 }, { "epoch": 96.64590163934426, "grad_norm": 2.0330538749694824, "learning_rate": 5.8945512260502045e-08, "loss": 0.1083, "step": 29477 }, { "epoch": 96.64918032786885, "grad_norm": 2.986978769302368, "learning_rate": 5.883044116426373e-08, "loss": 0.1052, "step": 29478 }, { "epoch": 96.65245901639344, "grad_norm": 1.7898305654525757, "learning_rate": 5.871548216504819e-08, "loss": 0.0497, "step": 29479 }, { "epoch": 96.65573770491804, "grad_norm": 1.998872995376587, "learning_rate": 5.8600635264152164e-08, "loss": 0.0319, "step": 29480 }, { "epoch": 96.65901639344263, "grad_norm": 3.016425609588623, "learning_rate": 5.848590046287128e-08, "loss": 0.0721, "step": 29481 }, { "epoch": 96.66229508196722, "grad_norm": 2.3328604698181152, "learning_rate": 5.837127776249785e-08, "loss": 0.0963, "step": 29482 }, { "epoch": 96.6655737704918, "grad_norm": 2.3647208213806152, "learning_rate": 5.825676716432527e-08, "loss": 0.1166, "step": 29483 }, { "epoch": 96.66885245901639, "grad_norm": 2.677665948867798, "learning_rate": 5.8142368669643625e-08, "loss": 0.1914, "step": 29484 }, { "epoch": 96.67213114754098, "grad_norm": 2.5859084129333496, "learning_rate": 5.802808227974521e-08, "loss": 0.1587, "step": 29485 }, { "epoch": 96.67540983606557, "grad_norm": 2.3022055625915527, "learning_rate": 5.7913907995915675e-08, "loss": 0.0597, "step": 29486 }, { "epoch": 96.67868852459016, "grad_norm": 2.550987958908081, "learning_rate": 5.7799845819445086e-08, "loss": 0.1044, "step": 29487 }, { "epoch": 96.68196721311476, "grad_norm": 2.6972908973693848, "learning_rate": 5.768589575161798e-08, "loss": 0.0768, "step": 29488 }, { "epoch": 96.68524590163935, "grad_norm": 2.468183755874634, "learning_rate": 5.7572057793719995e-08, "loss": 0.1494, "step": 29489 }, { "epoch": 96.68852459016394, "grad_norm": 1.6881383657455444, "learning_rate": 5.745833194703454e-08, "loss": 0.1442, "step": 29490 }, { "epoch": 96.69180327868852, "grad_norm": 1.5003862380981445, "learning_rate": 5.734471821284393e-08, "loss": 0.021, "step": 29491 }, { "epoch": 96.69508196721311, "grad_norm": 1.7401654720306396, "learning_rate": 5.723121659242936e-08, "loss": 0.0283, "step": 29492 }, { "epoch": 96.6983606557377, "grad_norm": 2.7476892471313477, "learning_rate": 5.711782708707092e-08, "loss": 0.1877, "step": 29493 }, { "epoch": 96.70163934426229, "grad_norm": 2.1749656200408936, "learning_rate": 5.7004549698046474e-08, "loss": 0.1824, "step": 29494 }, { "epoch": 96.70491803278688, "grad_norm": 2.006681203842163, "learning_rate": 5.6891384426635e-08, "loss": 0.0541, "step": 29495 }, { "epoch": 96.70819672131148, "grad_norm": 2.364851474761963, "learning_rate": 5.6778331274109924e-08, "loss": 0.1203, "step": 29496 }, { "epoch": 96.71147540983607, "grad_norm": 3.4764657020568848, "learning_rate": 5.666539024174911e-08, "loss": 0.0499, "step": 29497 }, { "epoch": 96.71475409836066, "grad_norm": 2.8056843280792236, "learning_rate": 5.6552561330823765e-08, "loss": 0.1953, "step": 29498 }, { "epoch": 96.71803278688525, "grad_norm": 1.8500851392745972, "learning_rate": 5.643984454260621e-08, "loss": 0.0537, "step": 29499 }, { "epoch": 96.72131147540983, "grad_norm": 3.0721986293792725, "learning_rate": 5.6327239878368745e-08, "loss": 0.0447, "step": 29500 }, { "epoch": 96.72459016393442, "grad_norm": 1.760635256767273, "learning_rate": 5.621474733938037e-08, "loss": 0.0563, "step": 29501 }, { "epoch": 96.72786885245901, "grad_norm": 1.973623514175415, "learning_rate": 5.6102366926910066e-08, "loss": 0.0862, "step": 29502 }, { "epoch": 96.73114754098361, "grad_norm": 1.6661286354064941, "learning_rate": 5.599009864222349e-08, "loss": 0.1164, "step": 29503 }, { "epoch": 96.7344262295082, "grad_norm": 2.2457168102264404, "learning_rate": 5.587794248658851e-08, "loss": 0.1934, "step": 29504 }, { "epoch": 96.73770491803279, "grad_norm": 2.112370252609253, "learning_rate": 5.576589846126968e-08, "loss": 0.1065, "step": 29505 }, { "epoch": 96.74098360655738, "grad_norm": 2.226358652114868, "learning_rate": 5.5653966567528194e-08, "loss": 0.0832, "step": 29506 }, { "epoch": 96.74426229508197, "grad_norm": 3.2883689403533936, "learning_rate": 5.554214680662973e-08, "loss": 0.1605, "step": 29507 }, { "epoch": 96.74754098360656, "grad_norm": 2.6604440212249756, "learning_rate": 5.5430439179832154e-08, "loss": 0.0966, "step": 29508 }, { "epoch": 96.75081967213114, "grad_norm": 2.1984989643096924, "learning_rate": 5.5318843688395575e-08, "loss": 0.0855, "step": 29509 }, { "epoch": 96.75409836065573, "grad_norm": 2.5332632064819336, "learning_rate": 5.52073603335801e-08, "loss": 0.2325, "step": 29510 }, { "epoch": 96.75737704918033, "grad_norm": 1.7322111129760742, "learning_rate": 5.509598911664027e-08, "loss": 0.092, "step": 29511 }, { "epoch": 96.76065573770492, "grad_norm": 3.197237253189087, "learning_rate": 5.498473003883398e-08, "loss": 0.0913, "step": 29512 }, { "epoch": 96.76393442622951, "grad_norm": 2.2404892444610596, "learning_rate": 5.487358310141577e-08, "loss": 0.1092, "step": 29513 }, { "epoch": 96.7672131147541, "grad_norm": 2.3842668533325195, "learning_rate": 5.476254830563688e-08, "loss": 0.1502, "step": 29514 }, { "epoch": 96.77049180327869, "grad_norm": 1.2882736921310425, "learning_rate": 5.465162565275184e-08, "loss": 0.0152, "step": 29515 }, { "epoch": 96.77377049180328, "grad_norm": 2.1250991821289062, "learning_rate": 5.4540815144009665e-08, "loss": 0.0462, "step": 29516 }, { "epoch": 96.77704918032786, "grad_norm": 2.5225002765655518, "learning_rate": 5.4430116780661565e-08, "loss": 0.24, "step": 29517 }, { "epoch": 96.78032786885245, "grad_norm": 1.507308840751648, "learning_rate": 5.431953056395323e-08, "loss": 0.0551, "step": 29518 }, { "epoch": 96.78360655737706, "grad_norm": 2.6870737075805664, "learning_rate": 5.4209056495133636e-08, "loss": 0.1538, "step": 29519 }, { "epoch": 96.78688524590164, "grad_norm": 2.1902363300323486, "learning_rate": 5.409869457544847e-08, "loss": 0.1148, "step": 29520 }, { "epoch": 96.79016393442623, "grad_norm": 2.1904971599578857, "learning_rate": 5.3988444806141184e-08, "loss": 0.1753, "step": 29521 }, { "epoch": 96.79344262295082, "grad_norm": 1.9827131032943726, "learning_rate": 5.387830718845521e-08, "loss": 0.0638, "step": 29522 }, { "epoch": 96.79672131147541, "grad_norm": 1.890000820159912, "learning_rate": 5.376828172363291e-08, "loss": 0.0423, "step": 29523 }, { "epoch": 96.8, "grad_norm": 2.085829734802246, "learning_rate": 5.365836841291439e-08, "loss": 0.08, "step": 29524 }, { "epoch": 96.80327868852459, "grad_norm": 2.5474798679351807, "learning_rate": 5.3548567257540873e-08, "loss": 0.3169, "step": 29525 }, { "epoch": 96.80655737704917, "grad_norm": 1.4331364631652832, "learning_rate": 5.343887825874694e-08, "loss": 0.0993, "step": 29526 }, { "epoch": 96.80983606557378, "grad_norm": 2.9529271125793457, "learning_rate": 5.3329301417772704e-08, "loss": 0.0729, "step": 29527 }, { "epoch": 96.81311475409836, "grad_norm": 2.026672840118408, "learning_rate": 5.3219836735852736e-08, "loss": 0.0415, "step": 29528 }, { "epoch": 96.81639344262295, "grad_norm": 2.2461776733398438, "learning_rate": 5.3110484214220495e-08, "loss": 0.0805, "step": 29529 }, { "epoch": 96.81967213114754, "grad_norm": 2.1675455570220947, "learning_rate": 5.300124385410943e-08, "loss": 0.0994, "step": 29530 }, { "epoch": 96.82295081967213, "grad_norm": 2.712496280670166, "learning_rate": 5.2892115656751894e-08, "loss": 0.2291, "step": 29531 }, { "epoch": 96.82622950819672, "grad_norm": 2.1120269298553467, "learning_rate": 5.278309962337913e-08, "loss": 0.052, "step": 29532 }, { "epoch": 96.8295081967213, "grad_norm": 2.5182580947875977, "learning_rate": 5.267419575521793e-08, "loss": 0.0993, "step": 29533 }, { "epoch": 96.8327868852459, "grad_norm": 2.104208469390869, "learning_rate": 5.2565404053499525e-08, "loss": 0.0782, "step": 29534 }, { "epoch": 96.8360655737705, "grad_norm": 2.325662136077881, "learning_rate": 5.245672451944739e-08, "loss": 0.0902, "step": 29535 }, { "epoch": 96.83934426229509, "grad_norm": 2.0287747383117676, "learning_rate": 5.234815715428943e-08, "loss": 0.109, "step": 29536 }, { "epoch": 96.84262295081967, "grad_norm": 1.924148440361023, "learning_rate": 5.223970195924799e-08, "loss": 0.1399, "step": 29537 }, { "epoch": 96.84590163934426, "grad_norm": 2.5437703132629395, "learning_rate": 5.213135893554766e-08, "loss": 0.2907, "step": 29538 }, { "epoch": 96.84918032786885, "grad_norm": 1.5994174480438232, "learning_rate": 5.202312808440968e-08, "loss": 0.1367, "step": 29539 }, { "epoch": 96.85245901639344, "grad_norm": 2.857288122177124, "learning_rate": 5.191500940705418e-08, "loss": 0.0795, "step": 29540 }, { "epoch": 96.85573770491803, "grad_norm": 2.4434332847595215, "learning_rate": 5.18070029047002e-08, "loss": 0.0983, "step": 29541 }, { "epoch": 96.85901639344263, "grad_norm": 1.9096808433532715, "learning_rate": 5.1699108578565636e-08, "loss": 0.0363, "step": 29542 }, { "epoch": 96.86229508196722, "grad_norm": 3.3046987056732178, "learning_rate": 5.159132642986731e-08, "loss": 0.2765, "step": 29543 }, { "epoch": 96.8655737704918, "grad_norm": 2.3663315773010254, "learning_rate": 5.1483656459819785e-08, "loss": 0.2293, "step": 29544 }, { "epoch": 96.8688524590164, "grad_norm": 1.842419981956482, "learning_rate": 5.137609866963877e-08, "loss": 0.0285, "step": 29545 }, { "epoch": 96.87213114754098, "grad_norm": 2.6653244495391846, "learning_rate": 5.126865306053663e-08, "loss": 0.1136, "step": 29546 }, { "epoch": 96.87540983606557, "grad_norm": 1.5105210542678833, "learning_rate": 5.116131963372462e-08, "loss": 0.0467, "step": 29547 }, { "epoch": 96.87868852459016, "grad_norm": 1.7499477863311768, "learning_rate": 5.105409839041175e-08, "loss": 0.0936, "step": 29548 }, { "epoch": 96.88196721311475, "grad_norm": 1.715718388557434, "learning_rate": 5.0946989331808196e-08, "loss": 0.0384, "step": 29549 }, { "epoch": 96.88524590163935, "grad_norm": 2.725234270095825, "learning_rate": 5.083999245912297e-08, "loss": 0.0834, "step": 29550 }, { "epoch": 96.88852459016394, "grad_norm": 2.4405899047851562, "learning_rate": 5.073310777356066e-08, "loss": 0.113, "step": 29551 }, { "epoch": 96.89180327868853, "grad_norm": 2.7747602462768555, "learning_rate": 5.0626335276326986e-08, "loss": 0.0645, "step": 29552 }, { "epoch": 96.89508196721312, "grad_norm": 1.9018148183822632, "learning_rate": 5.051967496862653e-08, "loss": 0.1167, "step": 29553 }, { "epoch": 96.8983606557377, "grad_norm": 2.227715492248535, "learning_rate": 5.041312685166166e-08, "loss": 0.1437, "step": 29554 }, { "epoch": 96.90163934426229, "grad_norm": 2.0935356616973877, "learning_rate": 5.030669092663365e-08, "loss": 0.0564, "step": 29555 }, { "epoch": 96.90491803278688, "grad_norm": 3.018360137939453, "learning_rate": 5.0200367194742636e-08, "loss": 0.1306, "step": 29556 }, { "epoch": 96.90819672131147, "grad_norm": 1.5716986656188965, "learning_rate": 5.009415565718767e-08, "loss": 0.0455, "step": 29557 }, { "epoch": 96.91147540983607, "grad_norm": 1.6919498443603516, "learning_rate": 4.9988056315166675e-08, "loss": 0.0906, "step": 29558 }, { "epoch": 96.91475409836066, "grad_norm": 1.554048776626587, "learning_rate": 4.988206916987537e-08, "loss": 0.0938, "step": 29559 }, { "epoch": 96.91803278688525, "grad_norm": 2.1982686519622803, "learning_rate": 4.977619422250946e-08, "loss": 0.0883, "step": 29560 }, { "epoch": 96.92131147540984, "grad_norm": 2.2750959396362305, "learning_rate": 4.967043147426354e-08, "loss": 0.0673, "step": 29561 }, { "epoch": 96.92459016393443, "grad_norm": 1.2558670043945312, "learning_rate": 4.956478092632777e-08, "loss": 0.0184, "step": 29562 }, { "epoch": 96.92786885245901, "grad_norm": 2.6196043491363525, "learning_rate": 4.945924257989565e-08, "loss": 0.1489, "step": 29563 }, { "epoch": 96.9311475409836, "grad_norm": 1.4601777791976929, "learning_rate": 4.9353816436156224e-08, "loss": 0.0156, "step": 29564 }, { "epoch": 96.93442622950819, "grad_norm": 1.4851933717727661, "learning_rate": 4.9248502496298534e-08, "loss": 0.0264, "step": 29565 }, { "epoch": 96.9377049180328, "grad_norm": 3.121814489364624, "learning_rate": 4.914330076151053e-08, "loss": 0.1419, "step": 29566 }, { "epoch": 96.94098360655738, "grad_norm": 2.796325206756592, "learning_rate": 4.903821123297792e-08, "loss": 0.159, "step": 29567 }, { "epoch": 96.94426229508197, "grad_norm": 3.1711504459381104, "learning_rate": 4.8933233911886426e-08, "loss": 0.1163, "step": 29568 }, { "epoch": 96.94754098360656, "grad_norm": 1.9117895364761353, "learning_rate": 4.8828368799418436e-08, "loss": 0.1169, "step": 29569 }, { "epoch": 96.95081967213115, "grad_norm": 2.3905701637268066, "learning_rate": 4.872361589675745e-08, "loss": 0.0895, "step": 29570 }, { "epoch": 96.95409836065573, "grad_norm": 1.633486032485962, "learning_rate": 4.861897520508474e-08, "loss": 0.0307, "step": 29571 }, { "epoch": 96.95737704918032, "grad_norm": 2.3458375930786133, "learning_rate": 4.8514446725580476e-08, "loss": 0.1231, "step": 29572 }, { "epoch": 96.96065573770491, "grad_norm": 2.208991050720215, "learning_rate": 4.8410030459421497e-08, "loss": 0.0483, "step": 29573 }, { "epoch": 96.96393442622951, "grad_norm": 2.0097968578338623, "learning_rate": 4.8305726407786855e-08, "loss": 0.1863, "step": 29574 }, { "epoch": 96.9672131147541, "grad_norm": 2.2950453758239746, "learning_rate": 4.820153457185228e-08, "loss": 0.1423, "step": 29575 }, { "epoch": 96.97049180327869, "grad_norm": 2.7403769493103027, "learning_rate": 4.80974549527935e-08, "loss": 0.159, "step": 29576 }, { "epoch": 96.97377049180328, "grad_norm": 2.939480781555176, "learning_rate": 4.79934875517829e-08, "loss": 0.272, "step": 29577 }, { "epoch": 96.97704918032787, "grad_norm": 2.4053289890289307, "learning_rate": 4.788963236999289e-08, "loss": 0.0557, "step": 29578 }, { "epoch": 96.98032786885246, "grad_norm": 2.205416202545166, "learning_rate": 4.778588940859474e-08, "loss": 0.0577, "step": 29579 }, { "epoch": 96.98360655737704, "grad_norm": 1.8138636350631714, "learning_rate": 4.7682258668758643e-08, "loss": 0.0539, "step": 29580 }, { "epoch": 96.98688524590163, "grad_norm": 1.371731162071228, "learning_rate": 4.757874015165365e-08, "loss": 0.0682, "step": 29581 }, { "epoch": 96.99016393442623, "grad_norm": 2.8563003540039062, "learning_rate": 4.7475333858445496e-08, "loss": 0.1026, "step": 29582 }, { "epoch": 96.99344262295082, "grad_norm": 2.557936668395996, "learning_rate": 4.7372039790299916e-08, "loss": 0.0505, "step": 29583 }, { "epoch": 96.99672131147541, "grad_norm": 2.842926502227783, "learning_rate": 4.7268857948384875e-08, "loss": 0.0806, "step": 29584 }, { "epoch": 97.0, "grad_norm": 3.4080686569213867, "learning_rate": 4.716578833386054e-08, "loss": 0.0404, "step": 29585 }, { "epoch": 97.00327868852459, "grad_norm": 1.8492918014526367, "learning_rate": 4.706283094789044e-08, "loss": 0.0389, "step": 29586 }, { "epoch": 97.00655737704918, "grad_norm": 2.28877854347229, "learning_rate": 4.6959985791634746e-08, "loss": 0.152, "step": 29587 }, { "epoch": 97.00983606557377, "grad_norm": 2.3223912715911865, "learning_rate": 4.6857252866254754e-08, "loss": 0.1929, "step": 29588 }, { "epoch": 97.01311475409837, "grad_norm": 2.5933797359466553, "learning_rate": 4.675463217290732e-08, "loss": 0.0887, "step": 29589 }, { "epoch": 97.01639344262296, "grad_norm": 2.14874529838562, "learning_rate": 4.665212371275041e-08, "loss": 0.0568, "step": 29590 }, { "epoch": 97.01967213114754, "grad_norm": 3.064631700515747, "learning_rate": 4.654972748693976e-08, "loss": 0.2099, "step": 29591 }, { "epoch": 97.02295081967213, "grad_norm": 2.7490437030792236, "learning_rate": 4.644744349662999e-08, "loss": 0.0861, "step": 29592 }, { "epoch": 97.02622950819672, "grad_norm": 3.51310133934021, "learning_rate": 4.634527174297465e-08, "loss": 0.1847, "step": 29593 }, { "epoch": 97.02950819672131, "grad_norm": 2.280529499053955, "learning_rate": 4.624321222712502e-08, "loss": 0.0742, "step": 29594 }, { "epoch": 97.0327868852459, "grad_norm": 2.586660861968994, "learning_rate": 4.614126495023241e-08, "loss": 0.0857, "step": 29595 }, { "epoch": 97.03606557377049, "grad_norm": 2.480710506439209, "learning_rate": 4.603942991344701e-08, "loss": 0.0629, "step": 29596 }, { "epoch": 97.03934426229509, "grad_norm": 2.0747013092041016, "learning_rate": 4.5937707117915675e-08, "loss": 0.0408, "step": 29597 }, { "epoch": 97.04262295081968, "grad_norm": 2.65350341796875, "learning_rate": 4.583609656478749e-08, "loss": 0.1086, "step": 29598 }, { "epoch": 97.04590163934427, "grad_norm": 1.8378205299377441, "learning_rate": 4.573459825520599e-08, "loss": 0.0694, "step": 29599 }, { "epoch": 97.04918032786885, "grad_norm": 1.9170387983322144, "learning_rate": 4.5633212190318024e-08, "loss": 0.1735, "step": 29600 }, { "epoch": 97.05245901639344, "grad_norm": 2.294471502304077, "learning_rate": 4.553193837126379e-08, "loss": 0.0547, "step": 29601 }, { "epoch": 97.05573770491803, "grad_norm": 2.4191927909851074, "learning_rate": 4.543077679918795e-08, "loss": 0.0793, "step": 29602 }, { "epoch": 97.05901639344262, "grad_norm": 2.7180559635162354, "learning_rate": 4.532972747523068e-08, "loss": 0.2437, "step": 29603 }, { "epoch": 97.0622950819672, "grad_norm": 2.3114237785339355, "learning_rate": 4.5228790400531077e-08, "loss": 0.193, "step": 29604 }, { "epoch": 97.06557377049181, "grad_norm": 2.334533452987671, "learning_rate": 4.512796557622601e-08, "loss": 0.1643, "step": 29605 }, { "epoch": 97.0688524590164, "grad_norm": 2.4096615314483643, "learning_rate": 4.5027253003454566e-08, "loss": 0.0623, "step": 29606 }, { "epoch": 97.07213114754099, "grad_norm": 2.4085006713867188, "learning_rate": 4.492665268335139e-08, "loss": 0.0995, "step": 29607 }, { "epoch": 97.07540983606557, "grad_norm": 2.6203219890594482, "learning_rate": 4.482616461705003e-08, "loss": 0.1976, "step": 29608 }, { "epoch": 97.07868852459016, "grad_norm": 2.5900373458862305, "learning_rate": 4.4725788805685125e-08, "loss": 0.1019, "step": 29609 }, { "epoch": 97.08196721311475, "grad_norm": 1.5732296705245972, "learning_rate": 4.462552525038799e-08, "loss": 0.0821, "step": 29610 }, { "epoch": 97.08524590163934, "grad_norm": 2.2658438682556152, "learning_rate": 4.452537395228884e-08, "loss": 0.0852, "step": 29611 }, { "epoch": 97.08852459016393, "grad_norm": 2.129471778869629, "learning_rate": 4.442533491251677e-08, "loss": 0.1185, "step": 29612 }, { "epoch": 97.09180327868853, "grad_norm": 2.2398064136505127, "learning_rate": 4.432540813220088e-08, "loss": 0.0924, "step": 29613 }, { "epoch": 97.09508196721312, "grad_norm": 2.7898099422454834, "learning_rate": 4.422559361246692e-08, "loss": 0.1022, "step": 29614 }, { "epoch": 97.09836065573771, "grad_norm": 1.8426488637924194, "learning_rate": 4.4125891354441786e-08, "loss": 0.1357, "step": 29615 }, { "epoch": 97.1016393442623, "grad_norm": 2.337052822113037, "learning_rate": 4.40263013592479e-08, "loss": 0.0599, "step": 29616 }, { "epoch": 97.10491803278688, "grad_norm": 1.8901578187942505, "learning_rate": 4.392682362800882e-08, "loss": 0.0626, "step": 29617 }, { "epoch": 97.10819672131147, "grad_norm": 1.3804584741592407, "learning_rate": 4.382745816184697e-08, "loss": 0.021, "step": 29618 }, { "epoch": 97.11147540983606, "grad_norm": 2.464719295501709, "learning_rate": 4.372820496188257e-08, "loss": 0.1984, "step": 29619 }, { "epoch": 97.11475409836065, "grad_norm": 1.985147476196289, "learning_rate": 4.3629064029233615e-08, "loss": 0.1082, "step": 29620 }, { "epoch": 97.11803278688525, "grad_norm": 1.8329273462295532, "learning_rate": 4.353003536502032e-08, "loss": 0.0807, "step": 29621 }, { "epoch": 97.12131147540984, "grad_norm": 2.3824119567871094, "learning_rate": 4.343111897035623e-08, "loss": 0.0823, "step": 29622 }, { "epoch": 97.12459016393443, "grad_norm": 2.6525604724884033, "learning_rate": 4.333231484636047e-08, "loss": 0.1425, "step": 29623 }, { "epoch": 97.12786885245902, "grad_norm": 1.9392317533493042, "learning_rate": 4.323362299414435e-08, "loss": 0.1293, "step": 29624 }, { "epoch": 97.1311475409836, "grad_norm": 3.9152867794036865, "learning_rate": 4.313504341482144e-08, "loss": 0.0543, "step": 29625 }, { "epoch": 97.1344262295082, "grad_norm": 2.340548515319824, "learning_rate": 4.303657610950418e-08, "loss": 0.0463, "step": 29626 }, { "epoch": 97.13770491803278, "grad_norm": 2.2652761936187744, "learning_rate": 4.2938221079300566e-08, "loss": 0.0425, "step": 29627 }, { "epoch": 97.14098360655737, "grad_norm": 2.6362669467926025, "learning_rate": 4.283997832532305e-08, "loss": 0.0971, "step": 29628 }, { "epoch": 97.14426229508197, "grad_norm": 1.8252443075180054, "learning_rate": 4.27418478486763e-08, "loss": 0.0427, "step": 29629 }, { "epoch": 97.14754098360656, "grad_norm": 2.0880653858184814, "learning_rate": 4.2643829650469426e-08, "loss": 0.1125, "step": 29630 }, { "epoch": 97.15081967213115, "grad_norm": 1.5238951444625854, "learning_rate": 4.254592373180488e-08, "loss": 0.0221, "step": 29631 }, { "epoch": 97.15409836065574, "grad_norm": 2.288464069366455, "learning_rate": 4.244813009378956e-08, "loss": 0.1323, "step": 29632 }, { "epoch": 97.15737704918033, "grad_norm": 2.996887445449829, "learning_rate": 4.23504487375237e-08, "loss": 0.0972, "step": 29633 }, { "epoch": 97.16065573770491, "grad_norm": 2.533500909805298, "learning_rate": 4.225287966411085e-08, "loss": 0.1318, "step": 29634 }, { "epoch": 97.1639344262295, "grad_norm": 2.527221441268921, "learning_rate": 4.215542287465013e-08, "loss": 0.2125, "step": 29635 }, { "epoch": 97.1672131147541, "grad_norm": 3.3803083896636963, "learning_rate": 4.205807837023956e-08, "loss": 0.2235, "step": 29636 }, { "epoch": 97.1704918032787, "grad_norm": 1.3490477800369263, "learning_rate": 4.1960846151979374e-08, "loss": 0.0243, "step": 29637 }, { "epoch": 97.17377049180328, "grad_norm": 2.4727277755737305, "learning_rate": 4.186372622096313e-08, "loss": 0.1394, "step": 29638 }, { "epoch": 97.17704918032787, "grad_norm": 2.1361827850341797, "learning_rate": 4.176671857828773e-08, "loss": 0.0662, "step": 29639 }, { "epoch": 97.18032786885246, "grad_norm": 2.5166831016540527, "learning_rate": 4.1669823225046756e-08, "loss": 0.1069, "step": 29640 }, { "epoch": 97.18360655737705, "grad_norm": 2.092316150665283, "learning_rate": 4.157304016233266e-08, "loss": 0.0535, "step": 29641 }, { "epoch": 97.18688524590164, "grad_norm": 2.508729934692383, "learning_rate": 4.1476369391236785e-08, "loss": 0.0812, "step": 29642 }, { "epoch": 97.19016393442622, "grad_norm": 1.7812647819519043, "learning_rate": 4.1379810912848264e-08, "loss": 0.1377, "step": 29643 }, { "epoch": 97.19344262295083, "grad_norm": 2.3919804096221924, "learning_rate": 4.128336472825734e-08, "loss": 0.179, "step": 29644 }, { "epoch": 97.19672131147541, "grad_norm": 1.729231357574463, "learning_rate": 4.118703083855091e-08, "loss": 0.033, "step": 29645 }, { "epoch": 97.2, "grad_norm": 2.0959041118621826, "learning_rate": 4.109080924481479e-08, "loss": 0.0352, "step": 29646 }, { "epoch": 97.20327868852459, "grad_norm": 1.4870951175689697, "learning_rate": 4.0994699948135876e-08, "loss": 0.1124, "step": 29647 }, { "epoch": 97.20655737704918, "grad_norm": 2.450425148010254, "learning_rate": 4.0898702949594415e-08, "loss": 0.1709, "step": 29648 }, { "epoch": 97.20983606557377, "grad_norm": 3.4992945194244385, "learning_rate": 4.080281825027621e-08, "loss": 0.2523, "step": 29649 }, { "epoch": 97.21311475409836, "grad_norm": 1.9400211572647095, "learning_rate": 4.070704585126151e-08, "loss": 0.0688, "step": 29650 }, { "epoch": 97.21639344262294, "grad_norm": 2.2787294387817383, "learning_rate": 4.061138575362944e-08, "loss": 0.0497, "step": 29651 }, { "epoch": 97.21967213114755, "grad_norm": 2.556375503540039, "learning_rate": 4.051583795845915e-08, "loss": 0.0801, "step": 29652 }, { "epoch": 97.22295081967214, "grad_norm": 1.3063262701034546, "learning_rate": 4.042040246682755e-08, "loss": 0.0903, "step": 29653 }, { "epoch": 97.22622950819672, "grad_norm": 2.042860984802246, "learning_rate": 4.032507927981266e-08, "loss": 0.0997, "step": 29654 }, { "epoch": 97.22950819672131, "grad_norm": 2.3491714000701904, "learning_rate": 4.022986839848697e-08, "loss": 0.1046, "step": 29655 }, { "epoch": 97.2327868852459, "grad_norm": 1.7482086420059204, "learning_rate": 4.013476982392517e-08, "loss": 0.0869, "step": 29656 }, { "epoch": 97.23606557377049, "grad_norm": 2.3090953826904297, "learning_rate": 4.0039783557199727e-08, "loss": 0.1076, "step": 29657 }, { "epoch": 97.23934426229508, "grad_norm": 2.6479427814483643, "learning_rate": 3.994490959938091e-08, "loss": 0.1239, "step": 29658 }, { "epoch": 97.24262295081967, "grad_norm": 2.7063138484954834, "learning_rate": 3.985014795154008e-08, "loss": 0.1002, "step": 29659 }, { "epoch": 97.24590163934427, "grad_norm": 2.793900966644287, "learning_rate": 3.9755498614743036e-08, "loss": 0.1952, "step": 29660 }, { "epoch": 97.24918032786886, "grad_norm": 2.3502957820892334, "learning_rate": 3.9660961590060056e-08, "loss": 0.1913, "step": 29661 }, { "epoch": 97.25245901639344, "grad_norm": 2.2046544551849365, "learning_rate": 3.9566536878555825e-08, "loss": 0.0573, "step": 29662 }, { "epoch": 97.25573770491803, "grad_norm": 2.2349672317504883, "learning_rate": 3.9472224481296174e-08, "loss": 0.1762, "step": 29663 }, { "epoch": 97.25901639344262, "grad_norm": 2.3904449939727783, "learning_rate": 3.937802439934135e-08, "loss": 0.1596, "step": 29664 }, { "epoch": 97.26229508196721, "grad_norm": 1.813651442527771, "learning_rate": 3.928393663375718e-08, "loss": 0.037, "step": 29665 }, { "epoch": 97.2655737704918, "grad_norm": 1.8168865442276, "learning_rate": 3.918996118560281e-08, "loss": 0.0296, "step": 29666 }, { "epoch": 97.26885245901639, "grad_norm": 2.428475856781006, "learning_rate": 3.9096098055938505e-08, "loss": 0.0883, "step": 29667 }, { "epoch": 97.27213114754099, "grad_norm": 3.5453927516937256, "learning_rate": 3.9002347245822304e-08, "loss": 0.1077, "step": 29668 }, { "epoch": 97.27540983606558, "grad_norm": 2.0414655208587646, "learning_rate": 3.890870875631225e-08, "loss": 0.162, "step": 29669 }, { "epoch": 97.27868852459017, "grad_norm": 1.8593826293945312, "learning_rate": 3.881518258846195e-08, "loss": 0.1286, "step": 29670 }, { "epoch": 97.28196721311475, "grad_norm": 2.514836549758911, "learning_rate": 3.8721768743328334e-08, "loss": 0.1323, "step": 29671 }, { "epoch": 97.28524590163934, "grad_norm": 2.110865592956543, "learning_rate": 3.862846722196389e-08, "loss": 0.1235, "step": 29672 }, { "epoch": 97.28852459016393, "grad_norm": 2.2835421562194824, "learning_rate": 3.8535278025421116e-08, "loss": 0.1693, "step": 29673 }, { "epoch": 97.29180327868852, "grad_norm": 1.1816766262054443, "learning_rate": 3.844220115474917e-08, "loss": 0.0194, "step": 29674 }, { "epoch": 97.29508196721312, "grad_norm": 2.0510756969451904, "learning_rate": 3.834923661099943e-08, "loss": 0.0497, "step": 29675 }, { "epoch": 97.29836065573771, "grad_norm": 2.1595571041107178, "learning_rate": 3.825638439521995e-08, "loss": 0.1158, "step": 29676 }, { "epoch": 97.3016393442623, "grad_norm": 2.710966110229492, "learning_rate": 3.816364450845766e-08, "loss": 0.0868, "step": 29677 }, { "epoch": 97.30491803278689, "grad_norm": 2.753204345703125, "learning_rate": 3.80710169517573e-08, "loss": 0.0668, "step": 29678 }, { "epoch": 97.30819672131148, "grad_norm": 2.3355929851531982, "learning_rate": 3.797850172616358e-08, "loss": 0.1848, "step": 29679 }, { "epoch": 97.31147540983606, "grad_norm": 2.5072121620178223, "learning_rate": 3.7886098832721205e-08, "loss": 0.1298, "step": 29680 }, { "epoch": 97.31475409836065, "grad_norm": 1.6895811557769775, "learning_rate": 3.779380827247048e-08, "loss": 0.0372, "step": 29681 }, { "epoch": 97.31803278688524, "grad_norm": 1.5129139423370361, "learning_rate": 3.770163004645277e-08, "loss": 0.0309, "step": 29682 }, { "epoch": 97.32131147540984, "grad_norm": 2.2687437534332275, "learning_rate": 3.7609564155707265e-08, "loss": 0.1485, "step": 29683 }, { "epoch": 97.32459016393443, "grad_norm": 2.0719547271728516, "learning_rate": 3.7517610601272016e-08, "loss": 0.1093, "step": 29684 }, { "epoch": 97.32786885245902, "grad_norm": 3.4400806427001953, "learning_rate": 3.742576938418507e-08, "loss": 0.1035, "step": 29685 }, { "epoch": 97.33114754098361, "grad_norm": 2.0902316570281982, "learning_rate": 3.733404050548006e-08, "loss": 0.2185, "step": 29686 }, { "epoch": 97.3344262295082, "grad_norm": 1.849893569946289, "learning_rate": 3.724242396619282e-08, "loss": 0.0855, "step": 29687 }, { "epoch": 97.33770491803278, "grad_norm": 2.3840925693511963, "learning_rate": 3.715091976735585e-08, "loss": 0.0775, "step": 29688 }, { "epoch": 97.34098360655737, "grad_norm": 2.917167901992798, "learning_rate": 3.7059527910000556e-08, "loss": 0.1081, "step": 29689 }, { "epoch": 97.34426229508196, "grad_norm": 1.7544220685958862, "learning_rate": 3.696824839515834e-08, "loss": 0.0457, "step": 29690 }, { "epoch": 97.34754098360656, "grad_norm": 2.3015859127044678, "learning_rate": 3.6877081223858357e-08, "loss": 0.2485, "step": 29691 }, { "epoch": 97.35081967213115, "grad_norm": 1.9845317602157593, "learning_rate": 3.6786026397127583e-08, "loss": 0.0381, "step": 29692 }, { "epoch": 97.35409836065574, "grad_norm": 2.351228713989258, "learning_rate": 3.669508391599408e-08, "loss": 0.1604, "step": 29693 }, { "epoch": 97.35737704918033, "grad_norm": 2.0142879486083984, "learning_rate": 3.660425378148258e-08, "loss": 0.0838, "step": 29694 }, { "epoch": 97.36065573770492, "grad_norm": 1.6954712867736816, "learning_rate": 3.651353599461782e-08, "loss": 0.0186, "step": 29695 }, { "epoch": 97.3639344262295, "grad_norm": 2.279661178588867, "learning_rate": 3.642293055642232e-08, "loss": 0.0971, "step": 29696 }, { "epoch": 97.3672131147541, "grad_norm": 2.8113796710968018, "learning_rate": 3.633243746791748e-08, "loss": 0.0946, "step": 29697 }, { "epoch": 97.37049180327868, "grad_norm": 2.2277796268463135, "learning_rate": 3.62420567301236e-08, "loss": 0.1961, "step": 29698 }, { "epoch": 97.37377049180328, "grad_norm": 2.248478889465332, "learning_rate": 3.615178834406097e-08, "loss": 0.1146, "step": 29699 }, { "epoch": 97.37704918032787, "grad_norm": 2.93328857421875, "learning_rate": 3.6061632310746554e-08, "loss": 0.1562, "step": 29700 }, { "epoch": 97.38032786885246, "grad_norm": 2.3730599880218506, "learning_rate": 3.597158863119732e-08, "loss": 0.1698, "step": 29701 }, { "epoch": 97.38360655737705, "grad_norm": 1.9321177005767822, "learning_rate": 3.588165730642801e-08, "loss": 0.0465, "step": 29702 }, { "epoch": 97.38688524590164, "grad_norm": 1.9868810176849365, "learning_rate": 3.579183833745337e-08, "loss": 0.0762, "step": 29703 }, { "epoch": 97.39016393442623, "grad_norm": 2.4149460792541504, "learning_rate": 3.5702131725285914e-08, "loss": 0.0623, "step": 29704 }, { "epoch": 97.39344262295081, "grad_norm": 2.542747735977173, "learning_rate": 3.561253747093707e-08, "loss": 0.1744, "step": 29705 }, { "epoch": 97.3967213114754, "grad_norm": 2.4217844009399414, "learning_rate": 3.552305557541713e-08, "loss": 0.1028, "step": 29706 }, { "epoch": 97.4, "grad_norm": 1.7936879396438599, "learning_rate": 3.543368603973529e-08, "loss": 0.052, "step": 29707 }, { "epoch": 97.4032786885246, "grad_norm": 2.553591012954712, "learning_rate": 3.534442886489964e-08, "loss": 0.0622, "step": 29708 }, { "epoch": 97.40655737704918, "grad_norm": 2.4697391986846924, "learning_rate": 3.525528405191492e-08, "loss": 0.1169, "step": 29709 }, { "epoch": 97.40983606557377, "grad_norm": 2.0615930557250977, "learning_rate": 3.516625160178921e-08, "loss": 0.1309, "step": 29710 }, { "epoch": 97.41311475409836, "grad_norm": 2.078369617462158, "learning_rate": 3.507733151552395e-08, "loss": 0.0501, "step": 29711 }, { "epoch": 97.41639344262295, "grad_norm": 1.5683902502059937, "learning_rate": 3.498852379412276e-08, "loss": 0.0266, "step": 29712 }, { "epoch": 97.41967213114754, "grad_norm": 4.820764541625977, "learning_rate": 3.489982843858708e-08, "loss": 0.15, "step": 29713 }, { "epoch": 97.42295081967212, "grad_norm": 2.650524616241455, "learning_rate": 3.481124544991721e-08, "loss": 0.2445, "step": 29714 }, { "epoch": 97.42622950819673, "grad_norm": 2.864624261856079, "learning_rate": 3.472277482911124e-08, "loss": 0.1232, "step": 29715 }, { "epoch": 97.42950819672132, "grad_norm": 2.8413870334625244, "learning_rate": 3.463441657716726e-08, "loss": 0.1252, "step": 29716 }, { "epoch": 97.4327868852459, "grad_norm": 2.4494028091430664, "learning_rate": 3.454617069508226e-08, "loss": 0.1279, "step": 29717 }, { "epoch": 97.43606557377049, "grad_norm": 1.9597400426864624, "learning_rate": 3.445803718384988e-08, "loss": 0.055, "step": 29718 }, { "epoch": 97.43934426229508, "grad_norm": 2.338724374771118, "learning_rate": 3.437001604446488e-08, "loss": 0.1624, "step": 29719 }, { "epoch": 97.44262295081967, "grad_norm": 2.538985013961792, "learning_rate": 3.428210727791981e-08, "loss": 0.1338, "step": 29720 }, { "epoch": 97.44590163934426, "grad_norm": 1.6779476404190063, "learning_rate": 3.419431088520608e-08, "loss": 0.0461, "step": 29721 }, { "epoch": 97.44918032786886, "grad_norm": 1.8801912069320679, "learning_rate": 3.4106626867312917e-08, "loss": 0.0539, "step": 29722 }, { "epoch": 97.45245901639345, "grad_norm": 1.8990089893341064, "learning_rate": 3.4019055225229524e-08, "loss": 0.0532, "step": 29723 }, { "epoch": 97.45573770491804, "grad_norm": 1.4613311290740967, "learning_rate": 3.3931595959942885e-08, "loss": 0.0334, "step": 29724 }, { "epoch": 97.45901639344262, "grad_norm": 2.541680097579956, "learning_rate": 3.3844249072439997e-08, "loss": 0.0873, "step": 29725 }, { "epoch": 97.46229508196721, "grad_norm": 2.1612727642059326, "learning_rate": 3.3757014563705615e-08, "loss": 0.1944, "step": 29726 }, { "epoch": 97.4655737704918, "grad_norm": 1.9958388805389404, "learning_rate": 3.36698924347234e-08, "loss": 0.1321, "step": 29727 }, { "epoch": 97.46885245901639, "grad_norm": 1.3720968961715698, "learning_rate": 3.358288268647481e-08, "loss": 0.0726, "step": 29728 }, { "epoch": 97.47213114754098, "grad_norm": 2.7258265018463135, "learning_rate": 3.349598531994236e-08, "loss": 0.1616, "step": 29729 }, { "epoch": 97.47540983606558, "grad_norm": 1.6016817092895508, "learning_rate": 3.340920033610418e-08, "loss": 0.0897, "step": 29730 }, { "epoch": 97.47868852459017, "grad_norm": 2.5975728034973145, "learning_rate": 3.332252773594058e-08, "loss": 0.1777, "step": 29731 }, { "epoch": 97.48196721311476, "grad_norm": 2.6232266426086426, "learning_rate": 3.323596752042857e-08, "loss": 0.1068, "step": 29732 }, { "epoch": 97.48524590163935, "grad_norm": 8.372872352600098, "learning_rate": 3.314951969054403e-08, "loss": 0.1131, "step": 29733 }, { "epoch": 97.48852459016393, "grad_norm": 2.550178289413452, "learning_rate": 3.3063184247260626e-08, "loss": 0.1804, "step": 29734 }, { "epoch": 97.49180327868852, "grad_norm": 1.5776671171188354, "learning_rate": 3.2976961191553135e-08, "loss": 0.0433, "step": 29735 }, { "epoch": 97.49508196721311, "grad_norm": 2.362004041671753, "learning_rate": 3.289085052439411e-08, "loss": 0.0944, "step": 29736 }, { "epoch": 97.4983606557377, "grad_norm": 2.1371331214904785, "learning_rate": 3.2804852246753893e-08, "loss": 0.2112, "step": 29737 }, { "epoch": 97.5016393442623, "grad_norm": 2.026317834854126, "learning_rate": 3.27189663596017e-08, "loss": 0.0556, "step": 29738 }, { "epoch": 97.50491803278689, "grad_norm": 2.512012004852295, "learning_rate": 3.263319286390676e-08, "loss": 0.0954, "step": 29739 }, { "epoch": 97.50819672131148, "grad_norm": 1.9096674919128418, "learning_rate": 3.254753176063608e-08, "loss": 0.0469, "step": 29740 }, { "epoch": 97.51147540983607, "grad_norm": 2.224266290664673, "learning_rate": 3.246198305075554e-08, "loss": 0.1188, "step": 29741 }, { "epoch": 97.51475409836065, "grad_norm": 2.5418684482574463, "learning_rate": 3.237654673522994e-08, "loss": 0.165, "step": 29742 }, { "epoch": 97.51803278688524, "grad_norm": 2.6435649394989014, "learning_rate": 3.229122281502184e-08, "loss": 0.0742, "step": 29743 }, { "epoch": 97.52131147540983, "grad_norm": 1.4647687673568726, "learning_rate": 3.220601129109491e-08, "loss": 0.0388, "step": 29744 }, { "epoch": 97.52459016393442, "grad_norm": 1.7317008972167969, "learning_rate": 3.212091216440838e-08, "loss": 0.0396, "step": 29745 }, { "epoch": 97.52786885245902, "grad_norm": 1.723590612411499, "learning_rate": 3.20359254359226e-08, "loss": 0.179, "step": 29746 }, { "epoch": 97.53114754098361, "grad_norm": 2.271975040435791, "learning_rate": 3.19510511065968e-08, "loss": 0.084, "step": 29747 }, { "epoch": 97.5344262295082, "grad_norm": 2.350311756134033, "learning_rate": 3.186628917738577e-08, "loss": 0.1465, "step": 29748 }, { "epoch": 97.53770491803279, "grad_norm": 1.9600944519042969, "learning_rate": 3.178163964924763e-08, "loss": 0.0707, "step": 29749 }, { "epoch": 97.54098360655738, "grad_norm": 1.8927282094955444, "learning_rate": 3.169710252313496e-08, "loss": 0.1781, "step": 29750 }, { "epoch": 97.54426229508196, "grad_norm": 2.510087490081787, "learning_rate": 3.161267780000255e-08, "loss": 0.06, "step": 29751 }, { "epoch": 97.54754098360655, "grad_norm": 2.422255754470825, "learning_rate": 3.152836548080185e-08, "loss": 0.0922, "step": 29752 }, { "epoch": 97.55081967213114, "grad_norm": 2.529689073562622, "learning_rate": 3.144416556648211e-08, "loss": 0.2038, "step": 29753 }, { "epoch": 97.55409836065574, "grad_norm": 2.0514421463012695, "learning_rate": 3.1360078057995905e-08, "loss": 0.0393, "step": 29754 }, { "epoch": 97.55737704918033, "grad_norm": 2.2639079093933105, "learning_rate": 3.1276102956289134e-08, "loss": 0.0651, "step": 29755 }, { "epoch": 97.56065573770492, "grad_norm": 1.4954417943954468, "learning_rate": 3.119224026230883e-08, "loss": 0.0206, "step": 29756 }, { "epoch": 97.56393442622951, "grad_norm": 1.960267186164856, "learning_rate": 3.1108489977000885e-08, "loss": 0.0465, "step": 29757 }, { "epoch": 97.5672131147541, "grad_norm": 1.8031290769577026, "learning_rate": 3.102485210130901e-08, "loss": 0.1215, "step": 29758 }, { "epoch": 97.57049180327868, "grad_norm": 2.1479504108428955, "learning_rate": 3.0941326636177995e-08, "loss": 0.0644, "step": 29759 }, { "epoch": 97.57377049180327, "grad_norm": 3.430809736251831, "learning_rate": 3.0857913582549304e-08, "loss": 0.2141, "step": 29760 }, { "epoch": 97.57704918032788, "grad_norm": 1.8427937030792236, "learning_rate": 3.0774612941362194e-08, "loss": 0.0692, "step": 29761 }, { "epoch": 97.58032786885246, "grad_norm": 2.483416795730591, "learning_rate": 3.0691424713557015e-08, "loss": 0.058, "step": 29762 }, { "epoch": 97.58360655737705, "grad_norm": 2.1340293884277344, "learning_rate": 3.0608348900070806e-08, "loss": 0.0358, "step": 29763 }, { "epoch": 97.58688524590164, "grad_norm": 1.6132773160934448, "learning_rate": 3.05253855018417e-08, "loss": 0.0256, "step": 29764 }, { "epoch": 97.59016393442623, "grad_norm": 1.6993374824523926, "learning_rate": 3.04425345198045e-08, "loss": 0.1188, "step": 29765 }, { "epoch": 97.59344262295082, "grad_norm": 2.4876363277435303, "learning_rate": 3.035979595489291e-08, "loss": 0.0489, "step": 29766 }, { "epoch": 97.5967213114754, "grad_norm": 1.631692886352539, "learning_rate": 3.027716980804174e-08, "loss": 0.0951, "step": 29767 }, { "epoch": 97.6, "grad_norm": 2.5209949016571045, "learning_rate": 3.019465608018024e-08, "loss": 0.0652, "step": 29768 }, { "epoch": 97.6032786885246, "grad_norm": 1.5493346452713013, "learning_rate": 3.011225477223989e-08, "loss": 0.0778, "step": 29769 }, { "epoch": 97.60655737704919, "grad_norm": 2.4011099338531494, "learning_rate": 3.0029965885151055e-08, "loss": 0.1059, "step": 29770 }, { "epoch": 97.60983606557377, "grad_norm": 1.9388136863708496, "learning_rate": 2.994778941983967e-08, "loss": 0.0333, "step": 29771 }, { "epoch": 97.61311475409836, "grad_norm": 3.2208855152130127, "learning_rate": 2.986572537723276e-08, "loss": 0.0875, "step": 29772 }, { "epoch": 97.61639344262295, "grad_norm": 2.3113889694213867, "learning_rate": 2.978377375825736e-08, "loss": 0.0532, "step": 29773 }, { "epoch": 97.61967213114754, "grad_norm": 1.943501353263855, "learning_rate": 2.9701934563834968e-08, "loss": 0.0431, "step": 29774 }, { "epoch": 97.62295081967213, "grad_norm": 1.5957436561584473, "learning_rate": 2.9620207794890386e-08, "loss": 0.055, "step": 29775 }, { "epoch": 97.62622950819672, "grad_norm": 2.4908454418182373, "learning_rate": 2.953859345234511e-08, "loss": 0.2185, "step": 29776 }, { "epoch": 97.62950819672132, "grad_norm": 0.9832823276519775, "learning_rate": 2.9457091537118398e-08, "loss": 0.013, "step": 29777 }, { "epoch": 97.6327868852459, "grad_norm": 2.327420473098755, "learning_rate": 2.9375702050129516e-08, "loss": 0.0615, "step": 29778 }, { "epoch": 97.6360655737705, "grad_norm": 3.4152474403381348, "learning_rate": 2.9294424992296623e-08, "loss": 0.1683, "step": 29779 }, { "epoch": 97.63934426229508, "grad_norm": 2.0308773517608643, "learning_rate": 2.9213260364536754e-08, "loss": 0.0454, "step": 29780 }, { "epoch": 97.64262295081967, "grad_norm": 2.68475341796875, "learning_rate": 2.9132208167763633e-08, "loss": 0.1208, "step": 29781 }, { "epoch": 97.64590163934426, "grad_norm": 2.0370566844940186, "learning_rate": 2.9051268402892074e-08, "loss": 0.0698, "step": 29782 }, { "epoch": 97.64918032786885, "grad_norm": 2.232219934463501, "learning_rate": 2.8970441070834688e-08, "loss": 0.0759, "step": 29783 }, { "epoch": 97.65245901639344, "grad_norm": 2.2707788944244385, "learning_rate": 2.8889726172502963e-08, "loss": 0.15, "step": 29784 }, { "epoch": 97.65573770491804, "grad_norm": 2.537644147872925, "learning_rate": 2.8809123708806176e-08, "loss": 0.0996, "step": 29785 }, { "epoch": 97.65901639344263, "grad_norm": 1.8826698064804077, "learning_rate": 2.8728633680654707e-08, "loss": 0.1329, "step": 29786 }, { "epoch": 97.66229508196722, "grad_norm": 1.993721842765808, "learning_rate": 2.8648256088955607e-08, "loss": 0.0765, "step": 29787 }, { "epoch": 97.6655737704918, "grad_norm": 2.3193304538726807, "learning_rate": 2.856799093461482e-08, "loss": 0.1486, "step": 29788 }, { "epoch": 97.66885245901639, "grad_norm": 2.2688443660736084, "learning_rate": 2.848783821853718e-08, "loss": 0.0497, "step": 29789 }, { "epoch": 97.67213114754098, "grad_norm": 2.3243279457092285, "learning_rate": 2.8407797941627512e-08, "loss": 0.3069, "step": 29790 }, { "epoch": 97.67540983606557, "grad_norm": 1.9104373455047607, "learning_rate": 2.8327870104787325e-08, "loss": 0.0747, "step": 29791 }, { "epoch": 97.67868852459016, "grad_norm": 1.6225175857543945, "learning_rate": 2.8248054708919226e-08, "loss": 0.0577, "step": 29792 }, { "epoch": 97.68196721311476, "grad_norm": 2.4180092811584473, "learning_rate": 2.8168351754921387e-08, "loss": 0.2124, "step": 29793 }, { "epoch": 97.68524590163935, "grad_norm": 1.7561343908309937, "learning_rate": 2.8088761243694195e-08, "loss": 0.1028, "step": 29794 }, { "epoch": 97.68852459016394, "grad_norm": 3.0326759815216064, "learning_rate": 2.8009283176133606e-08, "loss": 0.1854, "step": 29795 }, { "epoch": 97.69180327868852, "grad_norm": 2.10099458694458, "learning_rate": 2.7929917553136677e-08, "loss": 0.1232, "step": 29796 }, { "epoch": 97.69508196721311, "grad_norm": 2.0113484859466553, "learning_rate": 2.7850664375599358e-08, "loss": 0.0465, "step": 29797 }, { "epoch": 97.6983606557377, "grad_norm": 2.254471778869629, "learning_rate": 2.7771523644413156e-08, "loss": 0.0748, "step": 29798 }, { "epoch": 97.70163934426229, "grad_norm": 2.0418386459350586, "learning_rate": 2.7692495360471804e-08, "loss": 0.081, "step": 29799 }, { "epoch": 97.70491803278688, "grad_norm": 1.3849172592163086, "learning_rate": 2.76135795246657e-08, "loss": 0.0231, "step": 29800 }, { "epoch": 97.70819672131148, "grad_norm": 2.0214290618896484, "learning_rate": 2.7534776137886356e-08, "loss": 0.1942, "step": 29801 }, { "epoch": 97.71147540983607, "grad_norm": 2.5964255332946777, "learning_rate": 2.7456085201020832e-08, "loss": 0.1294, "step": 29802 }, { "epoch": 97.71475409836066, "grad_norm": 5.563587665557861, "learning_rate": 2.7377506714956205e-08, "loss": 0.0578, "step": 29803 }, { "epoch": 97.71803278688525, "grad_norm": 2.2739152908325195, "learning_rate": 2.7299040680579536e-08, "loss": 0.1815, "step": 29804 }, { "epoch": 97.72131147540983, "grad_norm": 2.488654136657715, "learning_rate": 2.722068709877457e-08, "loss": 0.0731, "step": 29805 }, { "epoch": 97.72459016393442, "grad_norm": 2.9478206634521484, "learning_rate": 2.7142445970426145e-08, "loss": 0.0557, "step": 29806 }, { "epoch": 97.72786885245901, "grad_norm": 2.053290843963623, "learning_rate": 2.7064317296415787e-08, "loss": 0.0457, "step": 29807 }, { "epoch": 97.73114754098361, "grad_norm": 2.9200515747070312, "learning_rate": 2.69863010776239e-08, "loss": 0.1594, "step": 29808 }, { "epoch": 97.7344262295082, "grad_norm": 2.7280495166778564, "learning_rate": 2.690839731493089e-08, "loss": 0.1232, "step": 29809 }, { "epoch": 97.73770491803279, "grad_norm": 1.6697361469268799, "learning_rate": 2.6830606009216053e-08, "loss": 0.031, "step": 29810 }, { "epoch": 97.74098360655738, "grad_norm": 1.7754579782485962, "learning_rate": 2.6752927161355357e-08, "loss": 0.1033, "step": 29811 }, { "epoch": 97.74426229508197, "grad_norm": 2.416395425796509, "learning_rate": 2.667536077222477e-08, "loss": 0.1365, "step": 29812 }, { "epoch": 97.74754098360656, "grad_norm": 2.2645750045776367, "learning_rate": 2.659790684269803e-08, "loss": 0.0645, "step": 29813 }, { "epoch": 97.75081967213114, "grad_norm": 2.7175750732421875, "learning_rate": 2.6520565373651108e-08, "loss": 0.0555, "step": 29814 }, { "epoch": 97.75409836065573, "grad_norm": 1.9727190732955933, "learning_rate": 2.644333636595442e-08, "loss": 0.1515, "step": 29815 }, { "epoch": 97.75737704918033, "grad_norm": 2.026374340057373, "learning_rate": 2.6366219820478378e-08, "loss": 0.1492, "step": 29816 }, { "epoch": 97.76065573770492, "grad_norm": 1.872443675994873, "learning_rate": 2.62892157380934e-08, "loss": 0.1053, "step": 29817 }, { "epoch": 97.76393442622951, "grad_norm": 2.6461665630340576, "learning_rate": 2.6212324119667677e-08, "loss": 0.0365, "step": 29818 }, { "epoch": 97.7672131147541, "grad_norm": 2.4194400310516357, "learning_rate": 2.6135544966068294e-08, "loss": 0.1118, "step": 29819 }, { "epoch": 97.77049180327869, "grad_norm": 2.385679006576538, "learning_rate": 2.6058878278161225e-08, "loss": 0.0811, "step": 29820 }, { "epoch": 97.77377049180328, "grad_norm": 2.657813787460327, "learning_rate": 2.5982324056810227e-08, "loss": 0.2285, "step": 29821 }, { "epoch": 97.77704918032786, "grad_norm": 1.9183534383773804, "learning_rate": 2.5905882302877938e-08, "loss": 0.0396, "step": 29822 }, { "epoch": 97.78032786885245, "grad_norm": 5.544907093048096, "learning_rate": 2.5829553017228114e-08, "loss": 0.1009, "step": 29823 }, { "epoch": 97.78360655737706, "grad_norm": 2.4029006958007812, "learning_rate": 2.575333620072118e-08, "loss": 0.0611, "step": 29824 }, { "epoch": 97.78688524590164, "grad_norm": 2.045506238937378, "learning_rate": 2.5677231854215333e-08, "loss": 0.0866, "step": 29825 }, { "epoch": 97.79016393442623, "grad_norm": 4.207647323608398, "learning_rate": 2.560123997856989e-08, "loss": 0.213, "step": 29826 }, { "epoch": 97.79344262295082, "grad_norm": 2.3916962146759033, "learning_rate": 2.5525360574640834e-08, "loss": 0.17, "step": 29827 }, { "epoch": 97.79672131147541, "grad_norm": 2.2458274364471436, "learning_rate": 2.5449593643284144e-08, "loss": 0.0722, "step": 29828 }, { "epoch": 97.8, "grad_norm": 2.84671688079834, "learning_rate": 2.537393918535358e-08, "loss": 0.2082, "step": 29829 }, { "epoch": 97.80327868852459, "grad_norm": 1.7513152360916138, "learning_rate": 2.5298397201704015e-08, "loss": 0.055, "step": 29830 }, { "epoch": 97.80655737704917, "grad_norm": 9.553598403930664, "learning_rate": 2.5222967693185886e-08, "loss": 0.1285, "step": 29831 }, { "epoch": 97.80983606557378, "grad_norm": 2.460895538330078, "learning_rate": 2.5147650660649613e-08, "loss": 0.1332, "step": 29832 }, { "epoch": 97.81311475409836, "grad_norm": 51.087677001953125, "learning_rate": 2.5072446104944524e-08, "loss": 0.0559, "step": 29833 }, { "epoch": 97.81639344262295, "grad_norm": 2.346663475036621, "learning_rate": 2.499735402691994e-08, "loss": 0.0531, "step": 29834 }, { "epoch": 97.81967213114754, "grad_norm": 2.54445219039917, "learning_rate": 2.4922374427420736e-08, "loss": 0.0583, "step": 29835 }, { "epoch": 97.82295081967213, "grad_norm": 1.8544321060180664, "learning_rate": 2.4847507307294018e-08, "loss": 0.0994, "step": 29836 }, { "epoch": 97.82622950819672, "grad_norm": 2.0916340351104736, "learning_rate": 2.4772752667382437e-08, "loss": 0.0551, "step": 29837 }, { "epoch": 97.8295081967213, "grad_norm": 1.538800835609436, "learning_rate": 2.4698110508529772e-08, "loss": 0.1153, "step": 29838 }, { "epoch": 97.8327868852459, "grad_norm": 2.7607948780059814, "learning_rate": 2.4623580831577565e-08, "loss": 0.2105, "step": 29839 }, { "epoch": 97.8360655737705, "grad_norm": 1.8482600450515747, "learning_rate": 2.4549163637367368e-08, "loss": 0.088, "step": 29840 }, { "epoch": 97.83934426229509, "grad_norm": 2.13197660446167, "learning_rate": 2.447485892673629e-08, "loss": 0.0708, "step": 29841 }, { "epoch": 97.84262295081967, "grad_norm": 2.8647940158843994, "learning_rate": 2.4400666700523657e-08, "loss": 0.2633, "step": 29842 }, { "epoch": 97.84590163934426, "grad_norm": 1.4291521310806274, "learning_rate": 2.432658695956436e-08, "loss": 0.0292, "step": 29843 }, { "epoch": 97.84918032786885, "grad_norm": 1.8819587230682373, "learning_rate": 2.4252619704695502e-08, "loss": 0.0883, "step": 29844 }, { "epoch": 97.85245901639344, "grad_norm": 2.3817989826202393, "learning_rate": 2.4178764936750864e-08, "loss": 0.1334, "step": 29845 }, { "epoch": 97.85573770491803, "grad_norm": 2.169555902481079, "learning_rate": 2.4105022656563114e-08, "loss": 0.3385, "step": 29846 }, { "epoch": 97.85901639344263, "grad_norm": 2.3701608180999756, "learning_rate": 2.40313928649627e-08, "loss": 0.0669, "step": 29847 }, { "epoch": 97.86229508196722, "grad_norm": 3.232895612716675, "learning_rate": 2.3957875562781176e-08, "loss": 0.0656, "step": 29848 }, { "epoch": 97.8655737704918, "grad_norm": 1.7450379133224487, "learning_rate": 2.3884470750847878e-08, "loss": 0.1155, "step": 29849 }, { "epoch": 97.8688524590164, "grad_norm": 1.4536858797073364, "learning_rate": 2.3811178429988812e-08, "loss": 0.0678, "step": 29850 }, { "epoch": 97.87213114754098, "grad_norm": 2.926992177963257, "learning_rate": 2.3737998601031097e-08, "loss": 0.0901, "step": 29851 }, { "epoch": 97.87540983606557, "grad_norm": 2.7205662727355957, "learning_rate": 2.366493126480074e-08, "loss": 0.1344, "step": 29852 }, { "epoch": 97.87868852459016, "grad_norm": 2.9709320068359375, "learning_rate": 2.3591976422121522e-08, "loss": 0.1075, "step": 29853 }, { "epoch": 97.88196721311475, "grad_norm": 2.7533905506134033, "learning_rate": 2.3519134073815007e-08, "loss": 0.0763, "step": 29854 }, { "epoch": 97.88524590163935, "grad_norm": 2.5298023223876953, "learning_rate": 2.344640422070277e-08, "loss": 0.0891, "step": 29855 }, { "epoch": 97.88852459016394, "grad_norm": 2.342012643814087, "learning_rate": 2.3373786863605252e-08, "loss": 0.0448, "step": 29856 }, { "epoch": 97.89180327868853, "grad_norm": 2.1385841369628906, "learning_rate": 2.3301282003341808e-08, "loss": 0.1083, "step": 29857 }, { "epoch": 97.89508196721312, "grad_norm": 2.952322006225586, "learning_rate": 2.3228889640730668e-08, "loss": 0.1097, "step": 29858 }, { "epoch": 97.8983606557377, "grad_norm": 2.429555892944336, "learning_rate": 2.3156609776585625e-08, "loss": 0.0813, "step": 29859 }, { "epoch": 97.90163934426229, "grad_norm": 2.668433666229248, "learning_rate": 2.3084442411723805e-08, "loss": 0.1529, "step": 29860 }, { "epoch": 97.90491803278688, "grad_norm": 1.9017813205718994, "learning_rate": 2.3012387546957893e-08, "loss": 0.0544, "step": 29861 }, { "epoch": 97.90819672131147, "grad_norm": 2.6114914417266846, "learning_rate": 2.294044518310057e-08, "loss": 0.0979, "step": 29862 }, { "epoch": 97.91147540983607, "grad_norm": 1.8659907579421997, "learning_rate": 2.2868615320963406e-08, "loss": 0.0406, "step": 29863 }, { "epoch": 97.91475409836066, "grad_norm": 1.6725963354110718, "learning_rate": 2.2796897961356868e-08, "loss": 0.0264, "step": 29864 }, { "epoch": 97.91803278688525, "grad_norm": 1.8086835145950317, "learning_rate": 2.2725293105088086e-08, "loss": 0.1179, "step": 29865 }, { "epoch": 97.92131147540984, "grad_norm": 2.501070737838745, "learning_rate": 2.2653800752966416e-08, "loss": 0.1073, "step": 29866 }, { "epoch": 97.92459016393443, "grad_norm": 2.532396078109741, "learning_rate": 2.2582420905796766e-08, "loss": 0.1225, "step": 29867 }, { "epoch": 97.92786885245901, "grad_norm": 2.4622325897216797, "learning_rate": 2.2511153564384046e-08, "loss": 0.1301, "step": 29868 }, { "epoch": 97.9311475409836, "grad_norm": 2.2915689945220947, "learning_rate": 2.2439998729530952e-08, "loss": 0.1206, "step": 29869 }, { "epoch": 97.93442622950819, "grad_norm": 2.3301758766174316, "learning_rate": 2.2368956402042398e-08, "loss": 0.1876, "step": 29870 }, { "epoch": 97.9377049180328, "grad_norm": 2.0577073097229004, "learning_rate": 2.2298026582717736e-08, "loss": 0.1368, "step": 29871 }, { "epoch": 97.94098360655738, "grad_norm": 1.32089102268219, "learning_rate": 2.2227209272356332e-08, "loss": 0.0271, "step": 29872 }, { "epoch": 97.94426229508197, "grad_norm": 2.5832085609436035, "learning_rate": 2.2156504471757546e-08, "loss": 0.1132, "step": 29873 }, { "epoch": 97.94754098360656, "grad_norm": 2.8944711685180664, "learning_rate": 2.2085912181719628e-08, "loss": 0.1834, "step": 29874 }, { "epoch": 97.95081967213115, "grad_norm": 1.6225950717926025, "learning_rate": 2.2015432403036386e-08, "loss": 0.0595, "step": 29875 }, { "epoch": 97.95409836065573, "grad_norm": 3.9325411319732666, "learning_rate": 2.1945065136503853e-08, "loss": 0.06, "step": 29876 }, { "epoch": 97.95737704918032, "grad_norm": 1.9683573246002197, "learning_rate": 2.1874810382914725e-08, "loss": 0.144, "step": 29877 }, { "epoch": 97.96065573770491, "grad_norm": 1.5559556484222412, "learning_rate": 2.1804668143062812e-08, "loss": 0.0873, "step": 29878 }, { "epoch": 97.96393442622951, "grad_norm": 2.3759541511535645, "learning_rate": 2.1734638417737485e-08, "loss": 0.072, "step": 29879 }, { "epoch": 97.9672131147541, "grad_norm": 2.275331974029541, "learning_rate": 2.166472120772922e-08, "loss": 0.0176, "step": 29880 }, { "epoch": 97.97049180327869, "grad_norm": 1.356019139289856, "learning_rate": 2.159491651382628e-08, "loss": 0.0334, "step": 29881 }, { "epoch": 97.97377049180328, "grad_norm": 2.44707989692688, "learning_rate": 2.15252243368147e-08, "loss": 0.2157, "step": 29882 }, { "epoch": 97.97704918032787, "grad_norm": 1.8683408498764038, "learning_rate": 2.1455644677481624e-08, "loss": 0.156, "step": 29883 }, { "epoch": 97.98032786885246, "grad_norm": 2.3757104873657227, "learning_rate": 2.1386177536611986e-08, "loss": 0.0953, "step": 29884 }, { "epoch": 97.98360655737704, "grad_norm": 1.9841855764389038, "learning_rate": 2.1316822914987378e-08, "loss": 0.0711, "step": 29885 }, { "epoch": 97.98688524590163, "grad_norm": 2.807512044906616, "learning_rate": 2.1247580813391622e-08, "loss": 0.1506, "step": 29886 }, { "epoch": 97.99016393442623, "grad_norm": 2.020881414413452, "learning_rate": 2.1178451232604092e-08, "loss": 0.152, "step": 29887 }, { "epoch": 97.99344262295082, "grad_norm": 2.0443243980407715, "learning_rate": 2.1109434173404165e-08, "loss": 0.1304, "step": 29888 }, { "epoch": 97.99672131147541, "grad_norm": 2.9598989486694336, "learning_rate": 2.1040529636572327e-08, "loss": 0.2047, "step": 29889 }, { "epoch": 98.0, "grad_norm": 1.6397974491119385, "learning_rate": 2.0971737622883515e-08, "loss": 0.0461, "step": 29890 }, { "epoch": 98.00327868852459, "grad_norm": 2.35581636428833, "learning_rate": 2.0903058133113773e-08, "loss": 0.0712, "step": 29891 }, { "epoch": 98.00655737704918, "grad_norm": 1.8906605243682861, "learning_rate": 2.083449116803804e-08, "loss": 0.0328, "step": 29892 }, { "epoch": 98.00983606557377, "grad_norm": 2.256443738937378, "learning_rate": 2.076603672842903e-08, "loss": 0.2396, "step": 29893 }, { "epoch": 98.01311475409837, "grad_norm": 4.382108211517334, "learning_rate": 2.0697694815058343e-08, "loss": 0.1856, "step": 29894 }, { "epoch": 98.01639344262296, "grad_norm": 2.261521339416504, "learning_rate": 2.0629465428697594e-08, "loss": 0.128, "step": 29895 }, { "epoch": 98.01967213114754, "grad_norm": 2.1128618717193604, "learning_rate": 2.0561348570115046e-08, "loss": 0.0601, "step": 29896 }, { "epoch": 98.02295081967213, "grad_norm": 3.092548370361328, "learning_rate": 2.0493344240078983e-08, "loss": 0.2341, "step": 29897 }, { "epoch": 98.02622950819672, "grad_norm": 1.907358169555664, "learning_rate": 2.0425452439357675e-08, "loss": 0.0702, "step": 29898 }, { "epoch": 98.02950819672131, "grad_norm": 2.6252427101135254, "learning_rate": 2.0357673168714952e-08, "loss": 0.068, "step": 29899 }, { "epoch": 98.0327868852459, "grad_norm": 1.6944849491119385, "learning_rate": 2.0290006428914655e-08, "loss": 0.0359, "step": 29900 }, { "epoch": 98.03606557377049, "grad_norm": 2.452953577041626, "learning_rate": 2.0222452220722832e-08, "loss": 0.115, "step": 29901 }, { "epoch": 98.03934426229509, "grad_norm": 2.5273358821868896, "learning_rate": 2.0155010544897768e-08, "loss": 0.0937, "step": 29902 }, { "epoch": 98.04262295081968, "grad_norm": 2.0215117931365967, "learning_rate": 2.0087681402202185e-08, "loss": 0.091, "step": 29903 }, { "epoch": 98.04590163934427, "grad_norm": 1.8151546716690063, "learning_rate": 2.0020464793394366e-08, "loss": 0.0567, "step": 29904 }, { "epoch": 98.04918032786885, "grad_norm": 2.1169638633728027, "learning_rate": 1.9953360719231484e-08, "loss": 0.1382, "step": 29905 }, { "epoch": 98.05245901639344, "grad_norm": 2.770066499710083, "learning_rate": 1.988636918047182e-08, "loss": 0.3377, "step": 29906 }, { "epoch": 98.05573770491803, "grad_norm": 1.9825793504714966, "learning_rate": 1.9819490177870326e-08, "loss": 0.1581, "step": 29907 }, { "epoch": 98.05901639344262, "grad_norm": 2.31335711479187, "learning_rate": 1.9752723712180845e-08, "loss": 0.0424, "step": 29908 }, { "epoch": 98.0622950819672, "grad_norm": 2.5771353244781494, "learning_rate": 1.9686069784156104e-08, "loss": 0.1048, "step": 29909 }, { "epoch": 98.06557377049181, "grad_norm": 1.9711272716522217, "learning_rate": 1.9619528394547727e-08, "loss": 0.0932, "step": 29910 }, { "epoch": 98.0688524590164, "grad_norm": 1.5031648874282837, "learning_rate": 1.9553099544106223e-08, "loss": 0.1789, "step": 29911 }, { "epoch": 98.07213114754099, "grad_norm": 1.70377516746521, "learning_rate": 1.9486783233580997e-08, "loss": 0.0905, "step": 29912 }, { "epoch": 98.07540983606557, "grad_norm": 2.0982000827789307, "learning_rate": 1.9420579463718114e-08, "loss": 0.0556, "step": 29913 }, { "epoch": 98.07868852459016, "grad_norm": 3.582343101501465, "learning_rate": 1.9354488235266977e-08, "loss": 0.0528, "step": 29914 }, { "epoch": 98.08196721311475, "grad_norm": 2.0180416107177734, "learning_rate": 1.9288509548970325e-08, "loss": 0.1332, "step": 29915 }, { "epoch": 98.08524590163934, "grad_norm": 2.048892021179199, "learning_rate": 1.9222643405573114e-08, "loss": 0.0504, "step": 29916 }, { "epoch": 98.08852459016393, "grad_norm": 1.942814588546753, "learning_rate": 1.915688980581809e-08, "loss": 0.1168, "step": 29917 }, { "epoch": 98.09180327868853, "grad_norm": 1.568605661392212, "learning_rate": 1.9091248750446877e-08, "loss": 0.1095, "step": 29918 }, { "epoch": 98.09508196721312, "grad_norm": 1.7542822360992432, "learning_rate": 1.9025720240199996e-08, "loss": 0.0285, "step": 29919 }, { "epoch": 98.09836065573771, "grad_norm": 2.5085833072662354, "learning_rate": 1.8960304275814634e-08, "loss": 0.1, "step": 29920 }, { "epoch": 98.1016393442623, "grad_norm": 1.7963628768920898, "learning_rate": 1.88950008580302e-08, "loss": 0.0459, "step": 29921 }, { "epoch": 98.10491803278688, "grad_norm": 2.2635958194732666, "learning_rate": 1.882980998758166e-08, "loss": 0.1545, "step": 29922 }, { "epoch": 98.10819672131147, "grad_norm": 2.3273680210113525, "learning_rate": 1.8764731665205093e-08, "loss": 0.0614, "step": 29923 }, { "epoch": 98.11147540983606, "grad_norm": 1.8401267528533936, "learning_rate": 1.8699765891634357e-08, "loss": 0.0535, "step": 29924 }, { "epoch": 98.11475409836065, "grad_norm": 2.0466058254241943, "learning_rate": 1.863491266760109e-08, "loss": 0.0464, "step": 29925 }, { "epoch": 98.11803278688525, "grad_norm": 3.34609055519104, "learning_rate": 1.857017199383804e-08, "loss": 0.1255, "step": 29926 }, { "epoch": 98.12131147540984, "grad_norm": 2.452979803085327, "learning_rate": 1.8505543871073506e-08, "loss": 0.2027, "step": 29927 }, { "epoch": 98.12459016393443, "grad_norm": 2.0766892433166504, "learning_rate": 1.844102830003802e-08, "loss": 0.1277, "step": 29928 }, { "epoch": 98.12786885245902, "grad_norm": 1.8082691431045532, "learning_rate": 1.8376625281457672e-08, "loss": 0.0586, "step": 29929 }, { "epoch": 98.1311475409836, "grad_norm": 2.690819501876831, "learning_rate": 1.831233481605854e-08, "loss": 0.0542, "step": 29930 }, { "epoch": 98.1344262295082, "grad_norm": 1.93976628780365, "learning_rate": 1.8248156904567825e-08, "loss": 0.2054, "step": 29931 }, { "epoch": 98.13770491803278, "grad_norm": 2.5424952507019043, "learning_rate": 1.818409154770606e-08, "loss": 0.1331, "step": 29932 }, { "epoch": 98.14098360655737, "grad_norm": 1.7786322832107544, "learning_rate": 1.8120138746198225e-08, "loss": 0.0846, "step": 29933 }, { "epoch": 98.14426229508197, "grad_norm": 2.8374271392822266, "learning_rate": 1.8056298500763736e-08, "loss": 0.0891, "step": 29934 }, { "epoch": 98.14754098360656, "grad_norm": 3.3346617221832275, "learning_rate": 1.7992570812123132e-08, "loss": 0.1246, "step": 29935 }, { "epoch": 98.15081967213115, "grad_norm": 2.5357813835144043, "learning_rate": 1.792895568099473e-08, "loss": 0.0609, "step": 29936 }, { "epoch": 98.15409836065574, "grad_norm": 1.7054758071899414, "learning_rate": 1.7865453108096843e-08, "loss": 0.0361, "step": 29937 }, { "epoch": 98.15737704918033, "grad_norm": 1.9514265060424805, "learning_rate": 1.780206309414445e-08, "loss": 0.1434, "step": 29938 }, { "epoch": 98.16065573770491, "grad_norm": 13.91183853149414, "learning_rate": 1.773878563985143e-08, "loss": 0.0413, "step": 29939 }, { "epoch": 98.1639344262295, "grad_norm": 2.1270675659179688, "learning_rate": 1.7675620745933874e-08, "loss": 0.0648, "step": 29940 }, { "epoch": 98.1672131147541, "grad_norm": 2.19750714302063, "learning_rate": 1.7612568413103436e-08, "loss": 0.0911, "step": 29941 }, { "epoch": 98.1704918032787, "grad_norm": 3.1184048652648926, "learning_rate": 1.7549628642069548e-08, "loss": 0.1643, "step": 29942 }, { "epoch": 98.17377049180328, "grad_norm": 2.1898136138916016, "learning_rate": 1.7486801433541644e-08, "loss": 0.1049, "step": 29943 }, { "epoch": 98.17704918032787, "grad_norm": 2.454946756362915, "learning_rate": 1.7424086788230264e-08, "loss": 0.2418, "step": 29944 }, { "epoch": 98.18032786885246, "grad_norm": 3.045454978942871, "learning_rate": 1.7361484706842623e-08, "loss": 0.1536, "step": 29945 }, { "epoch": 98.18360655737705, "grad_norm": 2.486300468444824, "learning_rate": 1.72989951900826e-08, "loss": 0.151, "step": 29946 }, { "epoch": 98.18688524590164, "grad_norm": 2.693438768386841, "learning_rate": 1.723661823865519e-08, "loss": 0.1038, "step": 29947 }, { "epoch": 98.19016393442622, "grad_norm": 1.620774507522583, "learning_rate": 1.7174353853265378e-08, "loss": 0.0833, "step": 29948 }, { "epoch": 98.19344262295083, "grad_norm": 2.0315942764282227, "learning_rate": 1.7112202034613723e-08, "loss": 0.0591, "step": 29949 }, { "epoch": 98.19672131147541, "grad_norm": 1.6838126182556152, "learning_rate": 1.705016278340188e-08, "loss": 0.0312, "step": 29950 }, { "epoch": 98.2, "grad_norm": 2.4591150283813477, "learning_rate": 1.698823610032929e-08, "loss": 0.1284, "step": 29951 }, { "epoch": 98.20327868852459, "grad_norm": 1.7450073957443237, "learning_rate": 1.692642198609318e-08, "loss": 0.0417, "step": 29952 }, { "epoch": 98.20655737704918, "grad_norm": 2.7703769207000732, "learning_rate": 1.686472044139187e-08, "loss": 0.0631, "step": 29953 }, { "epoch": 98.20983606557377, "grad_norm": 1.7428737878799438, "learning_rate": 1.6803131466921473e-08, "loss": 0.0396, "step": 29954 }, { "epoch": 98.21311475409836, "grad_norm": 2.594977378845215, "learning_rate": 1.6741655063374775e-08, "loss": 0.0952, "step": 29955 }, { "epoch": 98.21639344262294, "grad_norm": 2.484380006790161, "learning_rate": 1.6680291231445656e-08, "loss": 0.1546, "step": 29956 }, { "epoch": 98.21967213114755, "grad_norm": 2.803349733352661, "learning_rate": 1.66190399718269e-08, "loss": 0.1644, "step": 29957 }, { "epoch": 98.22295081967214, "grad_norm": 3.074528694152832, "learning_rate": 1.6557901285209066e-08, "loss": 0.1064, "step": 29958 }, { "epoch": 98.22622950819672, "grad_norm": 2.7039637565612793, "learning_rate": 1.649687517228049e-08, "loss": 0.0736, "step": 29959 }, { "epoch": 98.22950819672131, "grad_norm": 1.6954203844070435, "learning_rate": 1.6435961633729514e-08, "loss": 0.035, "step": 29960 }, { "epoch": 98.2327868852459, "grad_norm": 1.653275728225708, "learning_rate": 1.6375160670244473e-08, "loss": 0.0973, "step": 29961 }, { "epoch": 98.23606557377049, "grad_norm": 2.242846965789795, "learning_rate": 1.6314472282509262e-08, "loss": 0.2301, "step": 29962 }, { "epoch": 98.23934426229508, "grad_norm": 3.0105297565460205, "learning_rate": 1.6253896471207785e-08, "loss": 0.1713, "step": 29963 }, { "epoch": 98.24262295081967, "grad_norm": 2.2056970596313477, "learning_rate": 1.6193433237026157e-08, "loss": 0.1728, "step": 29964 }, { "epoch": 98.24590163934427, "grad_norm": 1.4155933856964111, "learning_rate": 1.6133082580642722e-08, "loss": 0.1402, "step": 29965 }, { "epoch": 98.24918032786886, "grad_norm": 2.2736964225769043, "learning_rate": 1.607284450273916e-08, "loss": 0.0507, "step": 29966 }, { "epoch": 98.25245901639344, "grad_norm": 2.567512035369873, "learning_rate": 1.6012719003996036e-08, "loss": 0.1737, "step": 29967 }, { "epoch": 98.25573770491803, "grad_norm": 1.7714245319366455, "learning_rate": 1.5952706085089475e-08, "loss": 0.0458, "step": 29968 }, { "epoch": 98.25901639344262, "grad_norm": 2.6947309970855713, "learning_rate": 1.5892805746696716e-08, "loss": 0.1655, "step": 29969 }, { "epoch": 98.26229508196721, "grad_norm": 2.530273914337158, "learning_rate": 1.5833017989493882e-08, "loss": 0.0837, "step": 29970 }, { "epoch": 98.2655737704918, "grad_norm": 1.7944995164871216, "learning_rate": 1.577334281415488e-08, "loss": 0.0178, "step": 29971 }, { "epoch": 98.26885245901639, "grad_norm": 1.7866284847259521, "learning_rate": 1.5713780221352503e-08, "loss": 0.0653, "step": 29972 }, { "epoch": 98.27213114754099, "grad_norm": 2.0166869163513184, "learning_rate": 1.565433021175844e-08, "loss": 0.1101, "step": 29973 }, { "epoch": 98.27540983606558, "grad_norm": 2.559385061264038, "learning_rate": 1.559499278604215e-08, "loss": 0.0844, "step": 29974 }, { "epoch": 98.27868852459017, "grad_norm": 2.374727249145508, "learning_rate": 1.5535767944874215e-08, "loss": 0.1417, "step": 29975 }, { "epoch": 98.28196721311475, "grad_norm": 1.7225924730300903, "learning_rate": 1.5476655688921872e-08, "loss": 0.0629, "step": 29976 }, { "epoch": 98.28524590163934, "grad_norm": 1.7405749559402466, "learning_rate": 1.5417656018851257e-08, "loss": 0.0264, "step": 29977 }, { "epoch": 98.28852459016393, "grad_norm": 2.6041698455810547, "learning_rate": 1.5358768935327395e-08, "loss": 0.0948, "step": 29978 }, { "epoch": 98.29180327868852, "grad_norm": 6.078493118286133, "learning_rate": 1.529999443901531e-08, "loss": 0.0761, "step": 29979 }, { "epoch": 98.29508196721312, "grad_norm": 2.4638657569885254, "learning_rate": 1.524133253057669e-08, "loss": 0.2086, "step": 29980 }, { "epoch": 98.29836065573771, "grad_norm": 1.670217514038086, "learning_rate": 1.5182783210674347e-08, "loss": 0.1267, "step": 29981 }, { "epoch": 98.3016393442623, "grad_norm": 2.05678129196167, "learning_rate": 1.5124346479967744e-08, "loss": 0.0356, "step": 29982 }, { "epoch": 98.30491803278689, "grad_norm": 2.7762041091918945, "learning_rate": 1.506602233911525e-08, "loss": 0.1405, "step": 29983 }, { "epoch": 98.30819672131148, "grad_norm": 3.395017623901367, "learning_rate": 1.5007810788775222e-08, "loss": 0.0502, "step": 29984 }, { "epoch": 98.31147540983606, "grad_norm": 2.187243700027466, "learning_rate": 1.4949711829603807e-08, "loss": 0.0734, "step": 29985 }, { "epoch": 98.31475409836065, "grad_norm": 4.942701816558838, "learning_rate": 1.4891725462257145e-08, "loss": 0.1935, "step": 29986 }, { "epoch": 98.31803278688524, "grad_norm": 2.0573010444641113, "learning_rate": 1.4833851687386935e-08, "loss": 0.0774, "step": 29987 }, { "epoch": 98.32131147540984, "grad_norm": 2.1903157234191895, "learning_rate": 1.4776090505648211e-08, "loss": 0.0551, "step": 29988 }, { "epoch": 98.32459016393443, "grad_norm": 3.13287615776062, "learning_rate": 1.4718441917690452e-08, "loss": 0.2141, "step": 29989 }, { "epoch": 98.32786885245902, "grad_norm": 2.8105642795562744, "learning_rate": 1.4660905924164248e-08, "loss": 0.181, "step": 29990 }, { "epoch": 98.33114754098361, "grad_norm": 2.1365420818328857, "learning_rate": 1.4603482525717972e-08, "loss": 0.0718, "step": 29991 }, { "epoch": 98.3344262295082, "grad_norm": 2.482522964477539, "learning_rate": 1.45461717230011e-08, "loss": 0.101, "step": 29992 }, { "epoch": 98.33770491803278, "grad_norm": 1.4092611074447632, "learning_rate": 1.4488973516657568e-08, "loss": 0.0338, "step": 29993 }, { "epoch": 98.34098360655737, "grad_norm": 4.196657657623291, "learning_rate": 1.4431887907332409e-08, "loss": 0.2353, "step": 29994 }, { "epoch": 98.34426229508196, "grad_norm": 1.3984901905059814, "learning_rate": 1.4374914895671776e-08, "loss": 0.024, "step": 29995 }, { "epoch": 98.34754098360656, "grad_norm": 1.9540642499923706, "learning_rate": 1.4318054482315158e-08, "loss": 0.1546, "step": 29996 }, { "epoch": 98.35081967213115, "grad_norm": 2.2836132049560547, "learning_rate": 1.4261306667905372e-08, "loss": 0.0652, "step": 29997 }, { "epoch": 98.35409836065574, "grad_norm": 10.328777313232422, "learning_rate": 1.4204671453081909e-08, "loss": 0.1007, "step": 29998 }, { "epoch": 98.35737704918033, "grad_norm": 1.703665018081665, "learning_rate": 1.4148148838483145e-08, "loss": 0.0273, "step": 29999 }, { "epoch": 98.36065573770492, "grad_norm": 2.3900692462921143, "learning_rate": 1.4091738824747458e-08, "loss": 0.1879, "step": 30000 }, { "epoch": 98.3639344262295, "grad_norm": 3.6457154750823975, "learning_rate": 1.4035441412509899e-08, "loss": 0.1657, "step": 30001 }, { "epoch": 98.3672131147541, "grad_norm": 2.463503122329712, "learning_rate": 1.397925660240551e-08, "loss": 0.0811, "step": 30002 }, { "epoch": 98.37049180327868, "grad_norm": 1.7901524305343628, "learning_rate": 1.3923184395067124e-08, "loss": 0.0699, "step": 30003 }, { "epoch": 98.37377049180328, "grad_norm": 2.513571262359619, "learning_rate": 1.3867224791128675e-08, "loss": 0.3065, "step": 30004 }, { "epoch": 98.37704918032787, "grad_norm": 1.9573066234588623, "learning_rate": 1.381137779121966e-08, "loss": 0.0465, "step": 30005 }, { "epoch": 98.38032786885246, "grad_norm": 3.0797488689422607, "learning_rate": 1.3755643395970685e-08, "loss": 0.0705, "step": 30006 }, { "epoch": 98.38360655737705, "grad_norm": 2.082064151763916, "learning_rate": 1.370002160601014e-08, "loss": 0.0728, "step": 30007 }, { "epoch": 98.38688524590164, "grad_norm": 2.200930118560791, "learning_rate": 1.3644512421964184e-08, "loss": 0.1252, "step": 30008 }, { "epoch": 98.39016393442623, "grad_norm": 1.8007093667984009, "learning_rate": 1.3589115844460098e-08, "loss": 0.1027, "step": 30009 }, { "epoch": 98.39344262295081, "grad_norm": 2.7322657108306885, "learning_rate": 1.3533831874121828e-08, "loss": 0.1359, "step": 30010 }, { "epoch": 98.3967213114754, "grad_norm": 2.015838146209717, "learning_rate": 1.3478660511573316e-08, "loss": 0.06, "step": 30011 }, { "epoch": 98.4, "grad_norm": 3.625669002532959, "learning_rate": 1.3423601757436289e-08, "loss": 0.0783, "step": 30012 }, { "epoch": 98.4032786885246, "grad_norm": 2.6162068843841553, "learning_rate": 1.336865561233136e-08, "loss": 0.1733, "step": 30013 }, { "epoch": 98.40655737704918, "grad_norm": 3.02321195602417, "learning_rate": 1.3313822076878036e-08, "loss": 0.1224, "step": 30014 }, { "epoch": 98.40983606557377, "grad_norm": 1.8435481786727905, "learning_rate": 1.325910115169471e-08, "loss": 0.1285, "step": 30015 }, { "epoch": 98.41311475409836, "grad_norm": 2.612159252166748, "learning_rate": 1.3204492837399774e-08, "loss": 0.204, "step": 30016 }, { "epoch": 98.41639344262295, "grad_norm": 2.0669896602630615, "learning_rate": 1.3149997134607185e-08, "loss": 0.0776, "step": 30017 }, { "epoch": 98.41967213114754, "grad_norm": 3.1847991943359375, "learning_rate": 1.3095614043932004e-08, "loss": 0.1902, "step": 30018 }, { "epoch": 98.42295081967212, "grad_norm": 2.566502332687378, "learning_rate": 1.3041343565987074e-08, "loss": 0.2218, "step": 30019 }, { "epoch": 98.42622950819673, "grad_norm": 2.5045714378356934, "learning_rate": 1.2987185701385242e-08, "loss": 0.1439, "step": 30020 }, { "epoch": 98.42950819672132, "grad_norm": 2.595900774002075, "learning_rate": 1.2933140450737125e-08, "loss": 0.2307, "step": 30021 }, { "epoch": 98.4327868852459, "grad_norm": 1.9983900785446167, "learning_rate": 1.287920781465224e-08, "loss": 0.1046, "step": 30022 }, { "epoch": 98.43606557377049, "grad_norm": 1.3964678049087524, "learning_rate": 1.2825387793736766e-08, "loss": 0.019, "step": 30023 }, { "epoch": 98.43934426229508, "grad_norm": 2.882553815841675, "learning_rate": 1.2771680388600216e-08, "loss": 0.1084, "step": 30024 }, { "epoch": 98.44262295081967, "grad_norm": 1.9711201190948486, "learning_rate": 1.2718085599847662e-08, "loss": 0.0305, "step": 30025 }, { "epoch": 98.44590163934426, "grad_norm": 2.2597949504852295, "learning_rate": 1.2664603428080847e-08, "loss": 0.1161, "step": 30026 }, { "epoch": 98.44918032786886, "grad_norm": 1.8549593687057495, "learning_rate": 1.2611233873907059e-08, "loss": 0.0565, "step": 30027 }, { "epoch": 98.45245901639345, "grad_norm": 2.534135580062866, "learning_rate": 1.2557976937924709e-08, "loss": 0.0631, "step": 30028 }, { "epoch": 98.45573770491804, "grad_norm": 1.6743651628494263, "learning_rate": 1.2504832620735542e-08, "loss": 0.0848, "step": 30029 }, { "epoch": 98.45901639344262, "grad_norm": 2.6208438873291016, "learning_rate": 1.2451800922939072e-08, "loss": 0.185, "step": 30030 }, { "epoch": 98.46229508196721, "grad_norm": 3.8679661750793457, "learning_rate": 1.2398881845132605e-08, "loss": 0.1388, "step": 30031 }, { "epoch": 98.4655737704918, "grad_norm": 2.0143558979034424, "learning_rate": 1.2346075387913436e-08, "loss": 0.1005, "step": 30032 }, { "epoch": 98.46885245901639, "grad_norm": 2.2777318954467773, "learning_rate": 1.2293381551876649e-08, "loss": 0.0948, "step": 30033 }, { "epoch": 98.47213114754098, "grad_norm": 1.8126568794250488, "learning_rate": 1.224080033761732e-08, "loss": 0.0641, "step": 30034 }, { "epoch": 98.47540983606558, "grad_norm": 1.4513319730758667, "learning_rate": 1.21883317457272e-08, "loss": 0.0203, "step": 30035 }, { "epoch": 98.47868852459017, "grad_norm": 2.4353702068328857, "learning_rate": 1.2135975776798036e-08, "loss": 0.1356, "step": 30036 }, { "epoch": 98.48196721311476, "grad_norm": 2.3159914016723633, "learning_rate": 1.208373243142047e-08, "loss": 0.1944, "step": 30037 }, { "epoch": 98.48524590163935, "grad_norm": 3.5910723209381104, "learning_rate": 1.2031601710184026e-08, "loss": 0.1661, "step": 30038 }, { "epoch": 98.48852459016393, "grad_norm": 2.2783255577087402, "learning_rate": 1.1979583613676015e-08, "loss": 0.057, "step": 30039 }, { "epoch": 98.49180327868852, "grad_norm": 2.312199115753174, "learning_rate": 1.1927678142483746e-08, "loss": 0.1816, "step": 30040 }, { "epoch": 98.49508196721311, "grad_norm": 1.8027266263961792, "learning_rate": 1.1875885297191192e-08, "loss": 0.035, "step": 30041 }, { "epoch": 98.4983606557377, "grad_norm": 2.1080915927886963, "learning_rate": 1.1824205078383444e-08, "loss": 0.0638, "step": 30042 }, { "epoch": 98.5016393442623, "grad_norm": 3.853651523590088, "learning_rate": 1.1772637486642258e-08, "loss": 0.1274, "step": 30043 }, { "epoch": 98.50491803278689, "grad_norm": 1.7813751697540283, "learning_rate": 1.172118252254939e-08, "loss": 0.0279, "step": 30044 }, { "epoch": 98.50819672131148, "grad_norm": 2.6655585765838623, "learning_rate": 1.1669840186686599e-08, "loss": 0.1325, "step": 30045 }, { "epoch": 98.51147540983607, "grad_norm": 2.1408374309539795, "learning_rate": 1.1618610479631198e-08, "loss": 0.0726, "step": 30046 }, { "epoch": 98.51475409836065, "grad_norm": 2.6502573490142822, "learning_rate": 1.1567493401961616e-08, "loss": 0.1283, "step": 30047 }, { "epoch": 98.51803278688524, "grad_norm": 2.9078049659729004, "learning_rate": 1.1516488954252947e-08, "loss": 0.281, "step": 30048 }, { "epoch": 98.52131147540983, "grad_norm": 2.0400333404541016, "learning_rate": 1.1465597137082507e-08, "loss": 0.0621, "step": 30049 }, { "epoch": 98.52459016393442, "grad_norm": 1.8033256530761719, "learning_rate": 1.1414817951022062e-08, "loss": 0.049, "step": 30050 }, { "epoch": 98.52786885245902, "grad_norm": 2.2976744174957275, "learning_rate": 1.1364151396645596e-08, "loss": 0.1165, "step": 30051 }, { "epoch": 98.53114754098361, "grad_norm": 1.6440093517303467, "learning_rate": 1.1313597474523764e-08, "loss": 0.0341, "step": 30052 }, { "epoch": 98.5344262295082, "grad_norm": 1.988258957862854, "learning_rate": 1.1263156185226109e-08, "loss": 0.0986, "step": 30053 }, { "epoch": 98.53770491803279, "grad_norm": 2.120529890060425, "learning_rate": 1.1212827529322178e-08, "loss": 0.0616, "step": 30054 }, { "epoch": 98.54098360655738, "grad_norm": 2.7425994873046875, "learning_rate": 1.1162611507380406e-08, "loss": 0.1689, "step": 30055 }, { "epoch": 98.54426229508196, "grad_norm": 2.290138006210327, "learning_rate": 1.1112508119964782e-08, "loss": 0.1539, "step": 30056 }, { "epoch": 98.54754098360655, "grad_norm": 1.7370368242263794, "learning_rate": 1.1062517367642633e-08, "loss": 0.12, "step": 30057 }, { "epoch": 98.55081967213114, "grad_norm": 1.8587604761123657, "learning_rate": 1.1012639250975731e-08, "loss": 0.0954, "step": 30058 }, { "epoch": 98.55409836065574, "grad_norm": 3.1046440601348877, "learning_rate": 1.096287377052696e-08, "loss": 0.1185, "step": 30059 }, { "epoch": 98.55737704918033, "grad_norm": 1.7949126958847046, "learning_rate": 1.0913220926858092e-08, "loss": 0.043, "step": 30060 }, { "epoch": 98.56065573770492, "grad_norm": 2.330597162246704, "learning_rate": 1.086368072052868e-08, "loss": 0.1343, "step": 30061 }, { "epoch": 98.56393442622951, "grad_norm": 2.655237913131714, "learning_rate": 1.0814253152098275e-08, "loss": 0.2642, "step": 30062 }, { "epoch": 98.5672131147541, "grad_norm": 1.7795099020004272, "learning_rate": 1.076493822212199e-08, "loss": 0.0632, "step": 30063 }, { "epoch": 98.57049180327868, "grad_norm": 2.0528151988983154, "learning_rate": 1.0715735931158266e-08, "loss": 0.0763, "step": 30064 }, { "epoch": 98.57377049180327, "grad_norm": 2.169806480407715, "learning_rate": 1.0666646279759996e-08, "loss": 0.2024, "step": 30065 }, { "epoch": 98.57704918032788, "grad_norm": 37.47180938720703, "learning_rate": 1.061766926848229e-08, "loss": 0.1393, "step": 30066 }, { "epoch": 98.58032786885246, "grad_norm": 2.668724536895752, "learning_rate": 1.0568804897875818e-08, "loss": 0.1847, "step": 30067 }, { "epoch": 98.58360655737705, "grad_norm": 1.7777791023254395, "learning_rate": 1.0520053168493471e-08, "loss": 0.1044, "step": 30068 }, { "epoch": 98.58688524590164, "grad_norm": 1.8360083103179932, "learning_rate": 1.0471414080883702e-08, "loss": 0.0996, "step": 30069 }, { "epoch": 98.59016393442623, "grad_norm": 2.9157824516296387, "learning_rate": 1.0422887635594959e-08, "loss": 0.1872, "step": 30070 }, { "epoch": 98.59344262295082, "grad_norm": 1.8463335037231445, "learning_rate": 1.0374473833174581e-08, "loss": 0.0457, "step": 30071 }, { "epoch": 98.5967213114754, "grad_norm": 1.807242512702942, "learning_rate": 1.03261726741688e-08, "loss": 0.0687, "step": 30072 }, { "epoch": 98.6, "grad_norm": 2.264691114425659, "learning_rate": 1.0277984159122734e-08, "loss": 0.0939, "step": 30073 }, { "epoch": 98.6032786885246, "grad_norm": 2.7395589351654053, "learning_rate": 1.0229908288578171e-08, "loss": 0.1568, "step": 30074 }, { "epoch": 98.60655737704919, "grad_norm": 2.7784860134124756, "learning_rate": 1.0181945063079125e-08, "loss": 0.0924, "step": 30075 }, { "epoch": 98.60983606557377, "grad_norm": 2.5223259925842285, "learning_rate": 1.0134094483164048e-08, "loss": 0.085, "step": 30076 }, { "epoch": 98.61311475409836, "grad_norm": 1.6532922983169556, "learning_rate": 1.0086356549374731e-08, "loss": 0.0346, "step": 30077 }, { "epoch": 98.61639344262295, "grad_norm": 2.411806344985962, "learning_rate": 1.0038731262248524e-08, "loss": 0.188, "step": 30078 }, { "epoch": 98.61967213114754, "grad_norm": 3.5012459754943848, "learning_rate": 9.991218622322774e-09, "loss": 0.2388, "step": 30079 }, { "epoch": 98.62295081967213, "grad_norm": 2.146517038345337, "learning_rate": 9.943818630133716e-09, "loss": 0.1931, "step": 30080 }, { "epoch": 98.62622950819672, "grad_norm": 2.9661567211151123, "learning_rate": 9.896531286214261e-09, "loss": 0.1028, "step": 30081 }, { "epoch": 98.62950819672132, "grad_norm": 2.9816782474517822, "learning_rate": 9.849356591098424e-09, "loss": 0.1604, "step": 30082 }, { "epoch": 98.6327868852459, "grad_norm": 2.9365553855895996, "learning_rate": 9.802294545318003e-09, "loss": 0.0999, "step": 30083 }, { "epoch": 98.6360655737705, "grad_norm": 3.022219181060791, "learning_rate": 9.755345149404794e-09, "loss": 0.196, "step": 30084 }, { "epoch": 98.63934426229508, "grad_norm": 2.2271435260772705, "learning_rate": 9.708508403887262e-09, "loss": 0.0683, "step": 30085 }, { "epoch": 98.64262295081967, "grad_norm": 2.642394542694092, "learning_rate": 9.661784309292765e-09, "loss": 0.1434, "step": 30086 }, { "epoch": 98.64590163934426, "grad_norm": 1.8007584810256958, "learning_rate": 9.615172866149768e-09, "loss": 0.055, "step": 30087 }, { "epoch": 98.64918032786885, "grad_norm": 2.3440351486206055, "learning_rate": 9.568674074982298e-09, "loss": 0.141, "step": 30088 }, { "epoch": 98.65245901639344, "grad_norm": 2.8871004581451416, "learning_rate": 9.522287936316599e-09, "loss": 0.1671, "step": 30089 }, { "epoch": 98.65573770491804, "grad_norm": 1.6987236738204956, "learning_rate": 9.476014450673365e-09, "loss": 0.048, "step": 30090 }, { "epoch": 98.65901639344263, "grad_norm": 1.8157142400741577, "learning_rate": 9.429853618576622e-09, "loss": 0.09, "step": 30091 }, { "epoch": 98.66229508196722, "grad_norm": 2.3950774669647217, "learning_rate": 9.383805440545957e-09, "loss": 0.1078, "step": 30092 }, { "epoch": 98.6655737704918, "grad_norm": 2.4190964698791504, "learning_rate": 9.33786991709984e-09, "loss": 0.1375, "step": 30093 }, { "epoch": 98.66885245901639, "grad_norm": 2.4955267906188965, "learning_rate": 9.292047048756747e-09, "loss": 0.0921, "step": 30094 }, { "epoch": 98.67213114754098, "grad_norm": 2.715076446533203, "learning_rate": 9.246336836034043e-09, "loss": 0.1588, "step": 30095 }, { "epoch": 98.67540983606557, "grad_norm": 2.1009347438812256, "learning_rate": 9.200739279446868e-09, "loss": 0.1711, "step": 30096 }, { "epoch": 98.67868852459016, "grad_norm": 2.1006808280944824, "learning_rate": 9.155254379508149e-09, "loss": 0.132, "step": 30097 }, { "epoch": 98.68196721311476, "grad_norm": 2.92031192779541, "learning_rate": 9.109882136733029e-09, "loss": 0.1665, "step": 30098 }, { "epoch": 98.68524590163935, "grad_norm": 1.6737922430038452, "learning_rate": 9.064622551631098e-09, "loss": 0.0289, "step": 30099 }, { "epoch": 98.68852459016394, "grad_norm": 2.251236915588379, "learning_rate": 9.019475624714169e-09, "loss": 0.1531, "step": 30100 }, { "epoch": 98.69180327868852, "grad_norm": 2.2486867904663086, "learning_rate": 8.974441356489616e-09, "loss": 0.099, "step": 30101 }, { "epoch": 98.69508196721311, "grad_norm": 2.443026065826416, "learning_rate": 8.92951974746703e-09, "loss": 0.0221, "step": 30102 }, { "epoch": 98.6983606557377, "grad_norm": 2.709740161895752, "learning_rate": 8.884710798152674e-09, "loss": 0.0741, "step": 30103 }, { "epoch": 98.70163934426229, "grad_norm": 1.751780390739441, "learning_rate": 8.840014509050588e-09, "loss": 0.021, "step": 30104 }, { "epoch": 98.70491803278688, "grad_norm": 1.9415446519851685, "learning_rate": 8.795430880665922e-09, "loss": 0.1258, "step": 30105 }, { "epoch": 98.70819672131148, "grad_norm": 3.329927444458008, "learning_rate": 8.750959913500501e-09, "loss": 0.1376, "step": 30106 }, { "epoch": 98.71147540983607, "grad_norm": 1.7823575735092163, "learning_rate": 8.706601608057252e-09, "loss": 0.0288, "step": 30107 }, { "epoch": 98.71475409836066, "grad_norm": 3.438586473464966, "learning_rate": 8.662355964834667e-09, "loss": 0.114, "step": 30108 }, { "epoch": 98.71803278688525, "grad_norm": 2.816682815551758, "learning_rate": 8.618222984332347e-09, "loss": 0.1362, "step": 30109 }, { "epoch": 98.72131147540983, "grad_norm": 2.558320999145508, "learning_rate": 8.574202667048782e-09, "loss": 0.2116, "step": 30110 }, { "epoch": 98.72459016393442, "grad_norm": 2.007495880126953, "learning_rate": 8.530295013479129e-09, "loss": 0.0494, "step": 30111 }, { "epoch": 98.72786885245901, "grad_norm": 2.507706880569458, "learning_rate": 8.48650002411855e-09, "loss": 0.0996, "step": 30112 }, { "epoch": 98.73114754098361, "grad_norm": 1.820471167564392, "learning_rate": 8.442817699462202e-09, "loss": 0.0325, "step": 30113 }, { "epoch": 98.7344262295082, "grad_norm": 1.723832130432129, "learning_rate": 8.399248040000808e-09, "loss": 0.0414, "step": 30114 }, { "epoch": 98.73770491803279, "grad_norm": 2.418025255203247, "learning_rate": 8.355791046226191e-09, "loss": 0.1215, "step": 30115 }, { "epoch": 98.74098360655738, "grad_norm": 2.5994324684143066, "learning_rate": 8.312446718630186e-09, "loss": 0.0925, "step": 30116 }, { "epoch": 98.74426229508197, "grad_norm": 1.5028743743896484, "learning_rate": 8.269215057699066e-09, "loss": 0.0841, "step": 30117 }, { "epoch": 98.74754098360656, "grad_norm": 2.528975248336792, "learning_rate": 8.22609606392133e-09, "loss": 0.2067, "step": 30118 }, { "epoch": 98.75081967213114, "grad_norm": 2.671027183532715, "learning_rate": 8.183089737783256e-09, "loss": 0.0991, "step": 30119 }, { "epoch": 98.75409836065573, "grad_norm": 1.3517897129058838, "learning_rate": 8.140196079770013e-09, "loss": 0.0234, "step": 30120 }, { "epoch": 98.75737704918033, "grad_norm": 2.4427871704101562, "learning_rate": 8.097415090364547e-09, "loss": 0.046, "step": 30121 }, { "epoch": 98.76065573770492, "grad_norm": 2.1020708084106445, "learning_rate": 8.054746770049804e-09, "loss": 0.132, "step": 30122 }, { "epoch": 98.76393442622951, "grad_norm": 2.4238791465759277, "learning_rate": 8.012191119307622e-09, "loss": 0.1569, "step": 30123 }, { "epoch": 98.7672131147541, "grad_norm": 2.2644870281219482, "learning_rate": 7.969748138616507e-09, "loss": 0.0743, "step": 30124 }, { "epoch": 98.77049180327869, "grad_norm": 2.0860445499420166, "learning_rate": 7.927417828454965e-09, "loss": 0.0628, "step": 30125 }, { "epoch": 98.77377049180328, "grad_norm": 2.1763176918029785, "learning_rate": 7.8852001893015e-09, "loss": 0.1999, "step": 30126 }, { "epoch": 98.77704918032786, "grad_norm": 1.9138213396072388, "learning_rate": 7.843095221631291e-09, "loss": 0.074, "step": 30127 }, { "epoch": 98.78032786885245, "grad_norm": 1.72942316532135, "learning_rate": 7.801102925920622e-09, "loss": 0.0428, "step": 30128 }, { "epoch": 98.78360655737706, "grad_norm": 1.8660542964935303, "learning_rate": 7.759223302640228e-09, "loss": 0.0663, "step": 30129 }, { "epoch": 98.78688524590164, "grad_norm": 2.2613937854766846, "learning_rate": 7.717456352264175e-09, "loss": 0.147, "step": 30130 }, { "epoch": 98.79016393442623, "grad_norm": 2.338191032409668, "learning_rate": 7.675802075264305e-09, "loss": 0.0966, "step": 30131 }, { "epoch": 98.79344262295082, "grad_norm": 2.0391616821289062, "learning_rate": 7.634260472108023e-09, "loss": 0.0407, "step": 30132 }, { "epoch": 98.79672131147541, "grad_norm": 2.8900890350341797, "learning_rate": 7.592831543266066e-09, "loss": 0.11, "step": 30133 }, { "epoch": 98.8, "grad_norm": 1.686813235282898, "learning_rate": 7.551515289203615e-09, "loss": 0.0317, "step": 30134 }, { "epoch": 98.80327868852459, "grad_norm": 2.7955713272094727, "learning_rate": 7.510311710386964e-09, "loss": 0.0992, "step": 30135 }, { "epoch": 98.80655737704917, "grad_norm": 2.2832114696502686, "learning_rate": 7.469220807281297e-09, "loss": 0.0998, "step": 30136 }, { "epoch": 98.80983606557378, "grad_norm": 2.3713631629943848, "learning_rate": 7.428242580350686e-09, "loss": 0.1119, "step": 30137 }, { "epoch": 98.81311475409836, "grad_norm": 2.2125539779663086, "learning_rate": 7.387377030055875e-09, "loss": 0.1438, "step": 30138 }, { "epoch": 98.81639344262295, "grad_norm": 2.729743719100952, "learning_rate": 7.3466241568576065e-09, "loss": 0.0702, "step": 30139 }, { "epoch": 98.81967213114754, "grad_norm": 2.010347843170166, "learning_rate": 7.305983961216623e-09, "loss": 0.0704, "step": 30140 }, { "epoch": 98.82295081967213, "grad_norm": 2.271327257156372, "learning_rate": 7.265456443590335e-09, "loss": 0.0748, "step": 30141 }, { "epoch": 98.82622950819672, "grad_norm": 1.9718835353851318, "learning_rate": 7.225041604435046e-09, "loss": 0.0661, "step": 30142 }, { "epoch": 98.8295081967213, "grad_norm": 1.9139741659164429, "learning_rate": 7.1847394442081665e-09, "loss": 0.0427, "step": 30143 }, { "epoch": 98.8327868852459, "grad_norm": 1.6171306371688843, "learning_rate": 7.144549963362668e-09, "loss": 0.0311, "step": 30144 }, { "epoch": 98.8360655737705, "grad_norm": 2.1148478984832764, "learning_rate": 7.104473162352632e-09, "loss": 0.0644, "step": 30145 }, { "epoch": 98.83934426229509, "grad_norm": 2.7952182292938232, "learning_rate": 7.064509041629919e-09, "loss": 0.1307, "step": 30146 }, { "epoch": 98.84262295081967, "grad_norm": 2.3900346755981445, "learning_rate": 7.02465760164417e-09, "loss": 0.1675, "step": 30147 }, { "epoch": 98.84590163934426, "grad_norm": 2.417673110961914, "learning_rate": 6.984918842846133e-09, "loss": 0.0393, "step": 30148 }, { "epoch": 98.84918032786885, "grad_norm": 2.8649988174438477, "learning_rate": 6.945292765683231e-09, "loss": 0.1039, "step": 30149 }, { "epoch": 98.85245901639344, "grad_norm": 2.334704637527466, "learning_rate": 6.905779370601773e-09, "loss": 0.0638, "step": 30150 }, { "epoch": 98.85573770491803, "grad_norm": 2.0614724159240723, "learning_rate": 6.866378658049178e-09, "loss": 0.0672, "step": 30151 }, { "epoch": 98.85901639344263, "grad_norm": 2.7122974395751953, "learning_rate": 6.8270906284673145e-09, "loss": 0.0913, "step": 30152 }, { "epoch": 98.86229508196722, "grad_norm": 2.8168301582336426, "learning_rate": 6.7879152823002726e-09, "loss": 0.2531, "step": 30153 }, { "epoch": 98.8655737704918, "grad_norm": 2.527911901473999, "learning_rate": 6.74885261998992e-09, "loss": 0.109, "step": 30154 }, { "epoch": 98.8688524590164, "grad_norm": 2.1330819129943848, "learning_rate": 6.709902641977018e-09, "loss": 0.1597, "step": 30155 }, { "epoch": 98.87213114754098, "grad_norm": 2.3429484367370605, "learning_rate": 6.6710653487001005e-09, "loss": 0.0698, "step": 30156 }, { "epoch": 98.87540983606557, "grad_norm": 2.2581846714019775, "learning_rate": 6.632340740597709e-09, "loss": 0.1213, "step": 30157 }, { "epoch": 98.87868852459016, "grad_norm": 1.2880868911743164, "learning_rate": 6.5937288181061595e-09, "loss": 0.0281, "step": 30158 }, { "epoch": 98.88196721311475, "grad_norm": 2.2779428958892822, "learning_rate": 6.555229581660661e-09, "loss": 0.0862, "step": 30159 }, { "epoch": 98.88524590163935, "grad_norm": 2.4110777378082275, "learning_rate": 6.516843031695308e-09, "loss": 0.068, "step": 30160 }, { "epoch": 98.88852459016394, "grad_norm": 2.3259949684143066, "learning_rate": 6.47856916864309e-09, "loss": 0.0784, "step": 30161 }, { "epoch": 98.89180327868853, "grad_norm": 2.5495173931121826, "learning_rate": 6.440407992935882e-09, "loss": 0.0758, "step": 30162 }, { "epoch": 98.89508196721312, "grad_norm": 3.051332712173462, "learning_rate": 6.4023595050044514e-09, "loss": 0.2208, "step": 30163 }, { "epoch": 98.8983606557377, "grad_norm": 2.754591464996338, "learning_rate": 6.3644237052762346e-09, "loss": 0.1184, "step": 30164 }, { "epoch": 98.90163934426229, "grad_norm": 1.7873109579086304, "learning_rate": 6.326600594179777e-09, "loss": 0.1221, "step": 30165 }, { "epoch": 98.90491803278688, "grad_norm": 2.0353825092315674, "learning_rate": 6.288890172142515e-09, "loss": 0.0537, "step": 30166 }, { "epoch": 98.90819672131147, "grad_norm": 1.900235652923584, "learning_rate": 6.251292439588552e-09, "loss": 0.0427, "step": 30167 }, { "epoch": 98.91147540983607, "grad_norm": 2.588801145553589, "learning_rate": 6.213807396941995e-09, "loss": 0.1009, "step": 30168 }, { "epoch": 98.91475409836066, "grad_norm": 1.9704303741455078, "learning_rate": 6.176435044625839e-09, "loss": 0.1511, "step": 30169 }, { "epoch": 98.91803278688525, "grad_norm": 2.46897029876709, "learning_rate": 6.139175383060858e-09, "loss": 0.07, "step": 30170 }, { "epoch": 98.92131147540984, "grad_norm": 2.5568439960479736, "learning_rate": 6.102028412667827e-09, "loss": 0.153, "step": 30171 }, { "epoch": 98.92459016393443, "grad_norm": 1.9122005701065063, "learning_rate": 6.064994133866409e-09, "loss": 0.0497, "step": 30172 }, { "epoch": 98.92786885245901, "grad_norm": 1.6565065383911133, "learning_rate": 6.028072547071828e-09, "loss": 0.0347, "step": 30173 }, { "epoch": 98.9311475409836, "grad_norm": 3.136988639831543, "learning_rate": 5.991263652703749e-09, "loss": 0.186, "step": 30174 }, { "epoch": 98.93442622950819, "grad_norm": 1.7039719820022583, "learning_rate": 5.954567451174065e-09, "loss": 0.0266, "step": 30175 }, { "epoch": 98.9377049180328, "grad_norm": 2.3183200359344482, "learning_rate": 5.917983942897998e-09, "loss": 0.0534, "step": 30176 }, { "epoch": 98.94098360655738, "grad_norm": 2.4517314434051514, "learning_rate": 5.881513128287442e-09, "loss": 0.1117, "step": 30177 }, { "epoch": 98.94426229508197, "grad_norm": 1.847192406654358, "learning_rate": 5.845155007754288e-09, "loss": 0.1832, "step": 30178 }, { "epoch": 98.94754098360656, "grad_norm": 2.160222053527832, "learning_rate": 5.808909581709321e-09, "loss": 0.1505, "step": 30179 }, { "epoch": 98.95081967213115, "grad_norm": 2.0202693939208984, "learning_rate": 5.772776850558881e-09, "loss": 0.1912, "step": 30180 }, { "epoch": 98.95409836065573, "grad_norm": 1.8713618516921997, "learning_rate": 5.73675681471264e-09, "loss": 0.0515, "step": 30181 }, { "epoch": 98.95737704918032, "grad_norm": 2.1281933784484863, "learning_rate": 5.700849474575831e-09, "loss": 0.0426, "step": 30182 }, { "epoch": 98.96065573770491, "grad_norm": 1.8898347616195679, "learning_rate": 5.665054830553684e-09, "loss": 0.1066, "step": 30183 }, { "epoch": 98.96393442622951, "grad_norm": 2.150033712387085, "learning_rate": 5.6293728830492115e-09, "loss": 0.0785, "step": 30184 }, { "epoch": 98.9672131147541, "grad_norm": 1.9421231746673584, "learning_rate": 5.593803632464312e-09, "loss": 0.0596, "step": 30185 }, { "epoch": 98.97049180327869, "grad_norm": 2.3011152744293213, "learning_rate": 5.5583470792019975e-09, "loss": 0.0532, "step": 30186 }, { "epoch": 98.97377049180328, "grad_norm": 1.609649419784546, "learning_rate": 5.523003223659729e-09, "loss": 0.0604, "step": 30187 }, { "epoch": 98.97704918032787, "grad_norm": 1.9653677940368652, "learning_rate": 5.487772066238295e-09, "loss": 0.0632, "step": 30188 }, { "epoch": 98.98032786885246, "grad_norm": 2.4815189838409424, "learning_rate": 5.452653607334046e-09, "loss": 0.0979, "step": 30189 }, { "epoch": 98.98360655737704, "grad_norm": 2.258544921875, "learning_rate": 5.417647847342222e-09, "loss": 0.1279, "step": 30190 }, { "epoch": 98.98688524590163, "grad_norm": 1.950515866279602, "learning_rate": 5.382754786658062e-09, "loss": 0.0884, "step": 30191 }, { "epoch": 98.99016393442623, "grad_norm": 1.5699107646942139, "learning_rate": 5.347974425675695e-09, "loss": 0.0231, "step": 30192 }, { "epoch": 98.99344262295082, "grad_norm": 2.26651668548584, "learning_rate": 5.313306764787029e-09, "loss": 0.0715, "step": 30193 }, { "epoch": 98.99672131147541, "grad_norm": 2.5384349822998047, "learning_rate": 5.278751804381754e-09, "loss": 0.09, "step": 30194 }, { "epoch": 99.0, "grad_norm": 2.3176329135894775, "learning_rate": 5.2443095448506674e-09, "loss": 0.0681, "step": 30195 }, { "epoch": 99.00327868852459, "grad_norm": 2.349560260772705, "learning_rate": 5.209979986582347e-09, "loss": 0.0857, "step": 30196 }, { "epoch": 99.00655737704918, "grad_norm": 2.5774388313293457, "learning_rate": 5.175763129963152e-09, "loss": 0.0837, "step": 30197 }, { "epoch": 99.00983606557377, "grad_norm": 1.9940346479415894, "learning_rate": 5.1416589753794376e-09, "loss": 0.0848, "step": 30198 }, { "epoch": 99.01311475409837, "grad_norm": 3.015000820159912, "learning_rate": 5.1076675232153426e-09, "loss": 0.1166, "step": 30199 }, { "epoch": 99.01639344262296, "grad_norm": 1.6278749704360962, "learning_rate": 5.073788773855004e-09, "loss": 0.0327, "step": 30200 }, { "epoch": 99.01967213114754, "grad_norm": 2.3579843044281006, "learning_rate": 5.040022727679228e-09, "loss": 0.1838, "step": 30201 }, { "epoch": 99.02295081967213, "grad_norm": 3.3477683067321777, "learning_rate": 5.0063693850699315e-09, "loss": 0.1724, "step": 30202 }, { "epoch": 99.02622950819672, "grad_norm": 2.378828287124634, "learning_rate": 4.9728287464057e-09, "loss": 0.17, "step": 30203 }, { "epoch": 99.02950819672131, "grad_norm": 2.332108736038208, "learning_rate": 4.93940081206401e-09, "loss": 0.1715, "step": 30204 }, { "epoch": 99.0327868852459, "grad_norm": 2.3296878337860107, "learning_rate": 4.906085582424558e-09, "loss": 0.1914, "step": 30205 }, { "epoch": 99.03606557377049, "grad_norm": 1.8559507131576538, "learning_rate": 4.872883057860378e-09, "loss": 0.037, "step": 30206 }, { "epoch": 99.03934426229509, "grad_norm": 1.8813265562057495, "learning_rate": 4.8397932387467265e-09, "loss": 0.1049, "step": 30207 }, { "epoch": 99.04262295081968, "grad_norm": 3.161968469619751, "learning_rate": 4.806816125456637e-09, "loss": 0.0612, "step": 30208 }, { "epoch": 99.04590163934427, "grad_norm": 2.1867520809173584, "learning_rate": 4.773951718362035e-09, "loss": 0.1593, "step": 30209 }, { "epoch": 99.04918032786885, "grad_norm": 1.88020658493042, "learning_rate": 4.741200017833736e-09, "loss": 0.0649, "step": 30210 }, { "epoch": 99.05245901639344, "grad_norm": 2.076478958129883, "learning_rate": 4.708561024241443e-09, "loss": 0.052, "step": 30211 }, { "epoch": 99.05573770491803, "grad_norm": 2.386718988418579, "learning_rate": 4.676034737951529e-09, "loss": 0.1192, "step": 30212 }, { "epoch": 99.05901639344262, "grad_norm": 1.7904729843139648, "learning_rate": 4.64362115933259e-09, "loss": 0.0516, "step": 30213 }, { "epoch": 99.0622950819672, "grad_norm": 2.462702512741089, "learning_rate": 4.611320288749887e-09, "loss": 0.0862, "step": 30214 }, { "epoch": 99.06557377049181, "grad_norm": 2.5891733169555664, "learning_rate": 4.579132126566465e-09, "loss": 0.1666, "step": 30215 }, { "epoch": 99.0688524590164, "grad_norm": 2.6911118030548096, "learning_rate": 4.547056673145367e-09, "loss": 0.1044, "step": 30216 }, { "epoch": 99.07213114754099, "grad_norm": 1.5957988500595093, "learning_rate": 4.515093928849634e-09, "loss": 0.1064, "step": 30217 }, { "epoch": 99.07540983606557, "grad_norm": 2.5953028202056885, "learning_rate": 4.48324389403898e-09, "loss": 0.0605, "step": 30218 }, { "epoch": 99.07868852459016, "grad_norm": 5.43726110458374, "learning_rate": 4.451506569073116e-09, "loss": 0.0897, "step": 30219 }, { "epoch": 99.08196721311475, "grad_norm": 2.3312783241271973, "learning_rate": 4.4198819543084244e-09, "loss": 0.1106, "step": 30220 }, { "epoch": 99.08524590163934, "grad_norm": 2.4790711402893066, "learning_rate": 4.388370050102397e-09, "loss": 0.1174, "step": 30221 }, { "epoch": 99.08852459016393, "grad_norm": 1.6586079597473145, "learning_rate": 4.356970856810305e-09, "loss": 0.0475, "step": 30222 }, { "epoch": 99.09180327868853, "grad_norm": 1.8119244575500488, "learning_rate": 4.3256843747863095e-09, "loss": 0.0491, "step": 30223 }, { "epoch": 99.09508196721312, "grad_norm": 2.2722511291503906, "learning_rate": 4.294510604382352e-09, "loss": 0.0698, "step": 30224 }, { "epoch": 99.09836065573771, "grad_norm": 2.191455364227295, "learning_rate": 4.263449545951481e-09, "loss": 0.1436, "step": 30225 }, { "epoch": 99.1016393442623, "grad_norm": 1.7399766445159912, "learning_rate": 4.232501199843419e-09, "loss": 0.0413, "step": 30226 }, { "epoch": 99.10491803278688, "grad_norm": 2.6740972995758057, "learning_rate": 4.201665566406776e-09, "loss": 0.2174, "step": 30227 }, { "epoch": 99.10819672131147, "grad_norm": 2.4125125408172607, "learning_rate": 4.170942645989051e-09, "loss": 0.1007, "step": 30228 }, { "epoch": 99.11147540983606, "grad_norm": 2.906301498413086, "learning_rate": 4.140332438937744e-09, "loss": 0.0547, "step": 30229 }, { "epoch": 99.11475409836065, "grad_norm": 1.8977270126342773, "learning_rate": 4.109834945595914e-09, "loss": 0.0403, "step": 30230 }, { "epoch": 99.11803278688525, "grad_norm": 1.8443576097488403, "learning_rate": 4.07945016630995e-09, "loss": 0.0496, "step": 30231 }, { "epoch": 99.12131147540984, "grad_norm": 2.489854335784912, "learning_rate": 4.049178101421802e-09, "loss": 0.0491, "step": 30232 }, { "epoch": 99.12459016393443, "grad_norm": 2.4204838275909424, "learning_rate": 4.0190187512711976e-09, "loss": 0.1484, "step": 30233 }, { "epoch": 99.12786885245902, "grad_norm": 1.9746938943862915, "learning_rate": 3.9889721161989745e-09, "loss": 0.0922, "step": 30234 }, { "epoch": 99.1311475409836, "grad_norm": 1.377820611000061, "learning_rate": 3.959038196545972e-09, "loss": 0.0165, "step": 30235 }, { "epoch": 99.1344262295082, "grad_norm": 2.4125781059265137, "learning_rate": 3.929216992647477e-09, "loss": 0.109, "step": 30236 }, { "epoch": 99.13770491803278, "grad_norm": 2.361997365951538, "learning_rate": 3.899508504839888e-09, "loss": 0.1091, "step": 30237 }, { "epoch": 99.14098360655737, "grad_norm": 2.171743154525757, "learning_rate": 3.869912733458492e-09, "loss": 0.0531, "step": 30238 }, { "epoch": 99.14426229508197, "grad_norm": 2.322862148284912, "learning_rate": 3.8404296788374654e-09, "loss": 0.2518, "step": 30239 }, { "epoch": 99.14754098360656, "grad_norm": 2.4697654247283936, "learning_rate": 3.8110593413098755e-09, "loss": 0.166, "step": 30240 }, { "epoch": 99.15081967213115, "grad_norm": 2.4458842277526855, "learning_rate": 3.781801721204348e-09, "loss": 0.1545, "step": 30241 }, { "epoch": 99.15409836065574, "grad_norm": 1.8626981973648071, "learning_rate": 3.75265681885395e-09, "loss": 0.1275, "step": 30242 }, { "epoch": 99.15737704918033, "grad_norm": 2.181880235671997, "learning_rate": 3.723624634585088e-09, "loss": 0.1658, "step": 30243 }, { "epoch": 99.16065573770491, "grad_norm": 2.2536768913269043, "learning_rate": 3.694705168726387e-09, "loss": 0.1206, "step": 30244 }, { "epoch": 99.1639344262295, "grad_norm": 1.577263593673706, "learning_rate": 3.6658984216031425e-09, "loss": 0.047, "step": 30245 }, { "epoch": 99.1672131147541, "grad_norm": 2.0362207889556885, "learning_rate": 3.63720439354065e-09, "loss": 0.0418, "step": 30246 }, { "epoch": 99.1704918032787, "grad_norm": 1.6073088645935059, "learning_rate": 3.608623084861984e-09, "loss": 0.1227, "step": 30247 }, { "epoch": 99.17377049180328, "grad_norm": 1.6273030042648315, "learning_rate": 3.5801544958891097e-09, "loss": 0.0259, "step": 30248 }, { "epoch": 99.17704918032787, "grad_norm": 2.039372682571411, "learning_rate": 3.551798626945102e-09, "loss": 0.1336, "step": 30249 }, { "epoch": 99.18032786885246, "grad_norm": 2.4145114421844482, "learning_rate": 3.5235554783474845e-09, "loss": 0.0375, "step": 30250 }, { "epoch": 99.18360655737705, "grad_norm": 2.4259603023529053, "learning_rate": 3.4954250504148913e-09, "loss": 0.1158, "step": 30251 }, { "epoch": 99.18688524590164, "grad_norm": 2.113369941711426, "learning_rate": 3.467407343465956e-09, "loss": 0.1067, "step": 30252 }, { "epoch": 99.19016393442622, "grad_norm": 2.0359652042388916, "learning_rate": 3.4395023578159823e-09, "loss": 0.0904, "step": 30253 }, { "epoch": 99.19344262295083, "grad_norm": 2.4027328491210938, "learning_rate": 3.4117100937791634e-09, "loss": 0.0838, "step": 30254 }, { "epoch": 99.19672131147541, "grad_norm": 1.9421045780181885, "learning_rate": 3.3840305516696927e-09, "loss": 0.0552, "step": 30255 }, { "epoch": 99.2, "grad_norm": 1.9880554676055908, "learning_rate": 3.3564637317984318e-09, "loss": 0.1032, "step": 30256 }, { "epoch": 99.20327868852459, "grad_norm": 3.5579450130462646, "learning_rate": 3.3290096344773538e-09, "loss": 0.2212, "step": 30257 }, { "epoch": 99.20655737704918, "grad_norm": 2.4428508281707764, "learning_rate": 3.3016682600151005e-09, "loss": 0.0655, "step": 30258 }, { "epoch": 99.20983606557377, "grad_norm": 2.693154811859131, "learning_rate": 3.2744396087203146e-09, "loss": 0.1704, "step": 30259 }, { "epoch": 99.21311475409836, "grad_norm": 2.773261547088623, "learning_rate": 3.247323680900527e-09, "loss": 0.0698, "step": 30260 }, { "epoch": 99.21639344262294, "grad_norm": 2.4422857761383057, "learning_rate": 3.2203204768610497e-09, "loss": 0.2424, "step": 30261 }, { "epoch": 99.21967213114755, "grad_norm": 2.7265639305114746, "learning_rate": 3.1934299969071934e-09, "loss": 0.1471, "step": 30262 }, { "epoch": 99.22295081967214, "grad_norm": 2.481107473373413, "learning_rate": 3.1666522413409397e-09, "loss": 0.069, "step": 30263 }, { "epoch": 99.22622950819672, "grad_norm": 2.4939799308776855, "learning_rate": 3.1399872104653782e-09, "loss": 0.121, "step": 30264 }, { "epoch": 99.22950819672131, "grad_norm": 2.465707778930664, "learning_rate": 3.1134349045802703e-09, "loss": 0.07, "step": 30265 }, { "epoch": 99.2327868852459, "grad_norm": 3.331225633621216, "learning_rate": 3.0869953239853757e-09, "loss": 0.1032, "step": 30266 }, { "epoch": 99.23606557377049, "grad_norm": 2.235339879989624, "learning_rate": 3.060668468978234e-09, "loss": 0.1082, "step": 30267 }, { "epoch": 99.23934426229508, "grad_norm": 2.937600612640381, "learning_rate": 3.0344543398563852e-09, "loss": 0.1542, "step": 30268 }, { "epoch": 99.24262295081967, "grad_norm": 2.509794235229492, "learning_rate": 3.0083529369151487e-09, "loss": 0.2664, "step": 30269 }, { "epoch": 99.24590163934427, "grad_norm": 3.0822181701660156, "learning_rate": 2.9823642604498435e-09, "loss": 0.1591, "step": 30270 }, { "epoch": 99.24918032786886, "grad_norm": 2.6586861610412598, "learning_rate": 2.956488310752459e-09, "loss": 0.0956, "step": 30271 }, { "epoch": 99.25245901639344, "grad_norm": 2.153454542160034, "learning_rate": 2.930725088114983e-09, "loss": 0.0822, "step": 30272 }, { "epoch": 99.25573770491803, "grad_norm": 2.0518431663513184, "learning_rate": 2.905074592827184e-09, "loss": 0.0418, "step": 30273 }, { "epoch": 99.25901639344262, "grad_norm": 2.171394109725952, "learning_rate": 2.8795368251799403e-09, "loss": 0.1015, "step": 30274 }, { "epoch": 99.26229508196721, "grad_norm": 2.0980472564697266, "learning_rate": 2.85411178545969e-09, "loss": 0.0848, "step": 30275 }, { "epoch": 99.2655737704918, "grad_norm": 2.363974094390869, "learning_rate": 2.8287994739539803e-09, "loss": 0.0783, "step": 30276 }, { "epoch": 99.26885245901639, "grad_norm": 2.569345235824585, "learning_rate": 2.8035998909481387e-09, "loss": 0.1553, "step": 30277 }, { "epoch": 99.27213114754099, "grad_norm": 2.3992769718170166, "learning_rate": 2.7785130367263823e-09, "loss": 0.0358, "step": 30278 }, { "epoch": 99.27540983606558, "grad_norm": 2.4940903186798096, "learning_rate": 2.753538911570708e-09, "loss": 0.0483, "step": 30279 }, { "epoch": 99.27868852459017, "grad_norm": 1.9265252351760864, "learning_rate": 2.728677515764222e-09, "loss": 0.0504, "step": 30280 }, { "epoch": 99.28196721311475, "grad_norm": 2.3872761726379395, "learning_rate": 2.703928849585591e-09, "loss": 0.1187, "step": 30281 }, { "epoch": 99.28524590163934, "grad_norm": 1.5676213502883911, "learning_rate": 2.6792929133157006e-09, "loss": 0.0813, "step": 30282 }, { "epoch": 99.28852459016393, "grad_norm": 2.1165685653686523, "learning_rate": 2.6547697072309977e-09, "loss": 0.0808, "step": 30283 }, { "epoch": 99.29180327868852, "grad_norm": 2.8106679916381836, "learning_rate": 2.6303592316079263e-09, "loss": 0.1735, "step": 30284 }, { "epoch": 99.29508196721312, "grad_norm": 2.718512535095215, "learning_rate": 2.6060614867229327e-09, "loss": 0.1015, "step": 30285 }, { "epoch": 99.29836065573771, "grad_norm": 1.7738080024719238, "learning_rate": 2.5818764728480217e-09, "loss": 0.0407, "step": 30286 }, { "epoch": 99.3016393442623, "grad_norm": 2.10764741897583, "learning_rate": 2.557804190258528e-09, "loss": 0.1276, "step": 30287 }, { "epoch": 99.30491803278689, "grad_norm": 2.1687958240509033, "learning_rate": 2.5338446392242365e-09, "loss": 0.1668, "step": 30288 }, { "epoch": 99.30819672131148, "grad_norm": 1.8419398069381714, "learning_rate": 2.509997820014931e-09, "loss": 0.0455, "step": 30289 }, { "epoch": 99.31147540983606, "grad_norm": 1.6387169361114502, "learning_rate": 2.486263732900396e-09, "loss": 0.0619, "step": 30290 }, { "epoch": 99.31475409836065, "grad_norm": 7.118724822998047, "learning_rate": 2.462642378149305e-09, "loss": 0.1578, "step": 30291 }, { "epoch": 99.31803278688524, "grad_norm": 2.62971568107605, "learning_rate": 2.4391337560247807e-09, "loss": 0.2212, "step": 30292 }, { "epoch": 99.32131147540984, "grad_norm": 1.9427647590637207, "learning_rate": 2.4157378667954978e-09, "loss": 0.0693, "step": 30293 }, { "epoch": 99.32459016393443, "grad_norm": 2.7762227058410645, "learning_rate": 2.3924547107223583e-09, "loss": 0.0889, "step": 30294 }, { "epoch": 99.32786885245902, "grad_norm": 2.835646390914917, "learning_rate": 2.3692842880707056e-09, "loss": 0.1146, "step": 30295 }, { "epoch": 99.33114754098361, "grad_norm": 2.239800214767456, "learning_rate": 2.3462265990992216e-09, "loss": 0.0731, "step": 30296 }, { "epoch": 99.3344262295082, "grad_norm": 2.5660266876220703, "learning_rate": 2.323281644068809e-09, "loss": 0.1383, "step": 30297 }, { "epoch": 99.33770491803278, "grad_norm": 2.5213119983673096, "learning_rate": 2.3004494232392593e-09, "loss": 0.1026, "step": 30298 }, { "epoch": 99.34098360655737, "grad_norm": 4.197694778442383, "learning_rate": 2.2777299368659246e-09, "loss": 0.0988, "step": 30299 }, { "epoch": 99.34426229508196, "grad_norm": 3.038020133972168, "learning_rate": 2.2551231852074862e-09, "loss": 0.1217, "step": 30300 }, { "epoch": 99.34754098360656, "grad_norm": 1.8953735828399658, "learning_rate": 2.2326291685170755e-09, "loss": 0.1067, "step": 30301 }, { "epoch": 99.35081967213115, "grad_norm": 2.711609125137329, "learning_rate": 2.210247887048933e-09, "loss": 0.2517, "step": 30302 }, { "epoch": 99.35409836065574, "grad_norm": 3.405215263366699, "learning_rate": 2.1879793410550797e-09, "loss": 0.1064, "step": 30303 }, { "epoch": 99.35737704918033, "grad_norm": 2.708073377609253, "learning_rate": 2.1658235307875364e-09, "loss": 0.1507, "step": 30304 }, { "epoch": 99.36065573770492, "grad_norm": 3.1089324951171875, "learning_rate": 2.1437804564949928e-09, "loss": 0.1847, "step": 30305 }, { "epoch": 99.3639344262295, "grad_norm": 2.2413196563720703, "learning_rate": 2.1218501184261385e-09, "loss": 0.1246, "step": 30306 }, { "epoch": 99.3672131147541, "grad_norm": 2.9395859241485596, "learning_rate": 2.100032516828554e-09, "loss": 0.0428, "step": 30307 }, { "epoch": 99.37049180327868, "grad_norm": 2.4660258293151855, "learning_rate": 2.0783276519487084e-09, "loss": 0.0974, "step": 30308 }, { "epoch": 99.37377049180328, "grad_norm": 2.3471014499664307, "learning_rate": 2.0567355240308507e-09, "loss": 0.0642, "step": 30309 }, { "epoch": 99.37704918032787, "grad_norm": 2.4567794799804688, "learning_rate": 2.03525613331923e-09, "loss": 0.1484, "step": 30310 }, { "epoch": 99.38032786885246, "grad_norm": 1.8054113388061523, "learning_rate": 2.013889480054765e-09, "loss": 0.036, "step": 30311 }, { "epoch": 99.38360655737705, "grad_norm": 2.634908676147461, "learning_rate": 1.9926355644783735e-09, "loss": 0.1236, "step": 30312 }, { "epoch": 99.38688524590164, "grad_norm": 2.688535213470459, "learning_rate": 1.9714943868309744e-09, "loss": 0.1256, "step": 30313 }, { "epoch": 99.39016393442623, "grad_norm": 1.7135671377182007, "learning_rate": 1.950465947350155e-09, "loss": 0.0369, "step": 30314 }, { "epoch": 99.39344262295081, "grad_norm": 1.4121520519256592, "learning_rate": 1.9295502462735037e-09, "loss": 0.018, "step": 30315 }, { "epoch": 99.3967213114754, "grad_norm": 2.1720409393310547, "learning_rate": 1.9087472838363875e-09, "loss": 0.1091, "step": 30316 }, { "epoch": 99.4, "grad_norm": 1.9575440883636475, "learning_rate": 1.888057060274173e-09, "loss": 0.0335, "step": 30317 }, { "epoch": 99.4032786885246, "grad_norm": 2.984705924987793, "learning_rate": 1.8674795758188978e-09, "loss": 0.2506, "step": 30318 }, { "epoch": 99.40655737704918, "grad_norm": 2.5226328372955322, "learning_rate": 1.8470148307025981e-09, "loss": 0.0808, "step": 30319 }, { "epoch": 99.40983606557377, "grad_norm": 2.513986110687256, "learning_rate": 1.8266628251584206e-09, "loss": 0.0748, "step": 30320 }, { "epoch": 99.41311475409836, "grad_norm": 4.307724475860596, "learning_rate": 1.806423559412851e-09, "loss": 0.0906, "step": 30321 }, { "epoch": 99.41639344262295, "grad_norm": 2.2808947563171387, "learning_rate": 1.7862970336957054e-09, "loss": 0.1134, "step": 30322 }, { "epoch": 99.41967213114754, "grad_norm": 2.005040407180786, "learning_rate": 1.7662832482334692e-09, "loss": 0.0655, "step": 30323 }, { "epoch": 99.42295081967212, "grad_norm": 2.2731776237487793, "learning_rate": 1.7463822032515177e-09, "loss": 0.0709, "step": 30324 }, { "epoch": 99.42622950819673, "grad_norm": 1.9823722839355469, "learning_rate": 1.7265938989752261e-09, "loss": 0.0533, "step": 30325 }, { "epoch": 99.42950819672132, "grad_norm": 2.023656129837036, "learning_rate": 1.7069183356266394e-09, "loss": 0.1028, "step": 30326 }, { "epoch": 99.4327868852459, "grad_norm": 1.4822819232940674, "learning_rate": 1.6873555134289121e-09, "loss": 0.2426, "step": 30327 }, { "epoch": 99.43606557377049, "grad_norm": 1.6636271476745605, "learning_rate": 1.6679054326018685e-09, "loss": 0.044, "step": 30328 }, { "epoch": 99.43934426229508, "grad_norm": 2.5260071754455566, "learning_rate": 1.6485680933642223e-09, "loss": 0.1961, "step": 30329 }, { "epoch": 99.44262295081967, "grad_norm": 2.702178955078125, "learning_rate": 1.6293434959346877e-09, "loss": 0.1053, "step": 30330 }, { "epoch": 99.44590163934426, "grad_norm": 1.5296785831451416, "learning_rate": 1.6102316405308682e-09, "loss": 0.0957, "step": 30331 }, { "epoch": 99.44918032786886, "grad_norm": 2.632253885269165, "learning_rate": 1.591232527367037e-09, "loss": 0.1267, "step": 30332 }, { "epoch": 99.45245901639345, "grad_norm": 1.98368239402771, "learning_rate": 1.572346156657467e-09, "loss": 0.1184, "step": 30333 }, { "epoch": 99.45573770491804, "grad_norm": 2.3245198726654053, "learning_rate": 1.5535725286153213e-09, "loss": 0.0685, "step": 30334 }, { "epoch": 99.45901639344262, "grad_norm": 2.5466291904449463, "learning_rate": 1.5349116434526524e-09, "loss": 0.1214, "step": 30335 }, { "epoch": 99.46229508196721, "grad_norm": 3.222346305847168, "learning_rate": 1.5163635013804023e-09, "loss": 0.1264, "step": 30336 }, { "epoch": 99.4655737704918, "grad_norm": 1.9189106225967407, "learning_rate": 1.497928102606183e-09, "loss": 0.0401, "step": 30337 }, { "epoch": 99.46885245901639, "grad_norm": 2.1047656536102295, "learning_rate": 1.4796054473387166e-09, "loss": 0.0991, "step": 30338 }, { "epoch": 99.47213114754098, "grad_norm": 2.21158766746521, "learning_rate": 1.4613955357845045e-09, "loss": 0.0758, "step": 30339 }, { "epoch": 99.47540983606558, "grad_norm": 1.581982970237732, "learning_rate": 1.4432983681489377e-09, "loss": 0.0592, "step": 30340 }, { "epoch": 99.47868852459017, "grad_norm": 2.404369831085205, "learning_rate": 1.4253139446362974e-09, "loss": 0.1485, "step": 30341 }, { "epoch": 99.48196721311476, "grad_norm": 1.3682183027267456, "learning_rate": 1.407442265448644e-09, "loss": 0.0839, "step": 30342 }, { "epoch": 99.48524590163935, "grad_norm": 3.0942769050598145, "learning_rate": 1.3896833307880387e-09, "loss": 0.0651, "step": 30343 }, { "epoch": 99.48852459016393, "grad_norm": 2.9617085456848145, "learning_rate": 1.3720371408554311e-09, "loss": 0.1419, "step": 30344 }, { "epoch": 99.49180327868852, "grad_norm": 2.511962890625, "learning_rate": 1.3545036958484415e-09, "loss": 0.3049, "step": 30345 }, { "epoch": 99.49508196721311, "grad_norm": 2.7635042667388916, "learning_rate": 1.3370829959657994e-09, "loss": 0.1837, "step": 30346 }, { "epoch": 99.4983606557377, "grad_norm": 2.3066840171813965, "learning_rate": 1.3197750414029043e-09, "loss": 0.2257, "step": 30347 }, { "epoch": 99.5016393442623, "grad_norm": 2.4100589752197266, "learning_rate": 1.3025798323562655e-09, "loss": 0.0521, "step": 30348 }, { "epoch": 99.50491803278689, "grad_norm": 1.9201240539550781, "learning_rate": 1.285497369019062e-09, "loss": 0.0658, "step": 30349 }, { "epoch": 99.50819672131148, "grad_norm": 3.938716173171997, "learning_rate": 1.2685276515844724e-09, "loss": 0.1038, "step": 30350 }, { "epoch": 99.51147540983607, "grad_norm": 1.5384790897369385, "learning_rate": 1.2516706802423451e-09, "loss": 0.0231, "step": 30351 }, { "epoch": 99.51475409836065, "grad_norm": 2.769549608230591, "learning_rate": 1.2349264551836383e-09, "loss": 0.0703, "step": 30352 }, { "epoch": 99.51803278688524, "grad_norm": 3.909864664077759, "learning_rate": 1.2182949765970898e-09, "loss": 0.2144, "step": 30353 }, { "epoch": 99.52131147540983, "grad_norm": 1.7211079597473145, "learning_rate": 1.2017762446714375e-09, "loss": 0.1069, "step": 30354 }, { "epoch": 99.52459016393442, "grad_norm": 2.5489604473114014, "learning_rate": 1.1853702595909788e-09, "loss": 0.1871, "step": 30355 }, { "epoch": 99.52786885245902, "grad_norm": 1.484422206878662, "learning_rate": 1.169077021542231e-09, "loss": 0.0238, "step": 30356 }, { "epoch": 99.53114754098361, "grad_norm": 2.4936351776123047, "learning_rate": 1.1528965307083806e-09, "loss": 0.1619, "step": 30357 }, { "epoch": 99.5344262295082, "grad_norm": 1.744260311126709, "learning_rate": 1.1368287872715045e-09, "loss": 0.0778, "step": 30358 }, { "epoch": 99.53770491803279, "grad_norm": 2.8509762287139893, "learning_rate": 1.1208737914125689e-09, "loss": 0.1064, "step": 30359 }, { "epoch": 99.54098360655738, "grad_norm": 2.197777032852173, "learning_rate": 1.1050315433125402e-09, "loss": 0.1204, "step": 30360 }, { "epoch": 99.54426229508196, "grad_norm": 6.9260077476501465, "learning_rate": 1.0893020431501645e-09, "loss": 0.2024, "step": 30361 }, { "epoch": 99.54754098360655, "grad_norm": 1.5583579540252686, "learning_rate": 1.0736852911008566e-09, "loss": 0.062, "step": 30362 }, { "epoch": 99.55081967213114, "grad_norm": 1.946007490158081, "learning_rate": 1.0581812873422525e-09, "loss": 0.0396, "step": 30363 }, { "epoch": 99.55409836065574, "grad_norm": 1.9968301057815552, "learning_rate": 1.0427900320497674e-09, "loss": 0.0341, "step": 30364 }, { "epoch": 99.55737704918033, "grad_norm": 2.0176596641540527, "learning_rate": 1.027511525395486e-09, "loss": 0.1143, "step": 30365 }, { "epoch": 99.56065573770492, "grad_norm": 2.2361810207366943, "learning_rate": 1.0123457675526026e-09, "loss": 0.0536, "step": 30366 }, { "epoch": 99.56393442622951, "grad_norm": 1.5929217338562012, "learning_rate": 9.972927586920923e-10, "loss": 0.0733, "step": 30367 }, { "epoch": 99.5672131147541, "grad_norm": 2.454756259918213, "learning_rate": 9.823524989838185e-10, "loss": 0.1545, "step": 30368 }, { "epoch": 99.57049180327868, "grad_norm": 2.025460958480835, "learning_rate": 9.67524988594315e-10, "loss": 0.0794, "step": 30369 }, { "epoch": 99.57377049180327, "grad_norm": 2.3798201084136963, "learning_rate": 9.528102276934459e-10, "loss": 0.0597, "step": 30370 }, { "epoch": 99.57704918032788, "grad_norm": 2.4038805961608887, "learning_rate": 9.382082164466345e-10, "loss": 0.0934, "step": 30371 }, { "epoch": 99.58032786885246, "grad_norm": 2.3333263397216797, "learning_rate": 9.237189550170833e-10, "loss": 0.152, "step": 30372 }, { "epoch": 99.58360655737705, "grad_norm": 2.2469213008880615, "learning_rate": 9.093424435691056e-10, "loss": 0.1112, "step": 30373 }, { "epoch": 99.58688524590164, "grad_norm": 2.8366236686706543, "learning_rate": 8.950786822647939e-10, "loss": 0.1184, "step": 30374 }, { "epoch": 99.59016393442623, "grad_norm": 2.5518012046813965, "learning_rate": 8.809276712651305e-10, "loss": 0.1365, "step": 30375 }, { "epoch": 99.59344262295082, "grad_norm": 1.4805843830108643, "learning_rate": 8.668894107288772e-10, "loss": 0.0396, "step": 30376 }, { "epoch": 99.5967213114754, "grad_norm": 2.0348761081695557, "learning_rate": 8.529639008159063e-10, "loss": 0.0498, "step": 30377 }, { "epoch": 99.6, "grad_norm": 2.3066112995147705, "learning_rate": 8.391511416816489e-10, "loss": 0.0763, "step": 30378 }, { "epoch": 99.6032786885246, "grad_norm": 1.7863349914550781, "learning_rate": 8.254511334826465e-10, "loss": 0.1711, "step": 30379 }, { "epoch": 99.60655737704919, "grad_norm": 2.7692575454711914, "learning_rate": 8.1186387637322e-10, "loss": 0.0633, "step": 30380 }, { "epoch": 99.60983606557377, "grad_norm": 1.4701424837112427, "learning_rate": 7.983893705065804e-10, "loss": 0.084, "step": 30381 }, { "epoch": 99.61311475409836, "grad_norm": 2.4315781593322754, "learning_rate": 7.850276160337178e-10, "loss": 0.0508, "step": 30382 }, { "epoch": 99.61639344262295, "grad_norm": 2.2624332904815674, "learning_rate": 7.717786131078431e-10, "loss": 0.0641, "step": 30383 }, { "epoch": 99.61967213114754, "grad_norm": 4.907903671264648, "learning_rate": 7.586423618755056e-10, "loss": 0.0689, "step": 30384 }, { "epoch": 99.62295081967213, "grad_norm": 2.8452398777008057, "learning_rate": 7.456188624865856e-10, "loss": 0.1159, "step": 30385 }, { "epoch": 99.62622950819672, "grad_norm": 4.268280029296875, "learning_rate": 7.327081150876325e-10, "loss": 0.1096, "step": 30386 }, { "epoch": 99.62950819672132, "grad_norm": 2.0734238624572754, "learning_rate": 7.199101198240854e-10, "loss": 0.0437, "step": 30387 }, { "epoch": 99.6327868852459, "grad_norm": 4.71419620513916, "learning_rate": 7.072248768402734e-10, "loss": 0.1562, "step": 30388 }, { "epoch": 99.6360655737705, "grad_norm": 1.6964905261993408, "learning_rate": 6.94652386278305e-10, "loss": 0.0501, "step": 30389 }, { "epoch": 99.63934426229508, "grad_norm": 2.1413614749908447, "learning_rate": 6.82192648281399e-10, "loss": 0.0446, "step": 30390 }, { "epoch": 99.64262295081967, "grad_norm": 2.7858495712280273, "learning_rate": 6.698456629894435e-10, "loss": 0.1271, "step": 30391 }, { "epoch": 99.64590163934426, "grad_norm": 2.1969826221466064, "learning_rate": 6.576114305412162e-10, "loss": 0.1002, "step": 30392 }, { "epoch": 99.64918032786885, "grad_norm": 1.7053306102752686, "learning_rate": 6.454899510754953e-10, "loss": 0.0563, "step": 30393 }, { "epoch": 99.65245901639344, "grad_norm": 1.9350361824035645, "learning_rate": 6.33481224728838e-10, "loss": 0.1326, "step": 30394 }, { "epoch": 99.65573770491804, "grad_norm": 2.305903911590576, "learning_rate": 6.215852516366916e-10, "loss": 0.1724, "step": 30395 }, { "epoch": 99.65901639344263, "grad_norm": 2.742352247238159, "learning_rate": 6.098020319322828e-10, "loss": 0.1355, "step": 30396 }, { "epoch": 99.66229508196722, "grad_norm": 3.0631840229034424, "learning_rate": 5.981315657488385e-10, "loss": 0.17, "step": 30397 }, { "epoch": 99.6655737704918, "grad_norm": 3.207521915435791, "learning_rate": 5.865738532195852e-10, "loss": 0.2188, "step": 30398 }, { "epoch": 99.66885245901639, "grad_norm": 2.073772430419922, "learning_rate": 5.751288944721989e-10, "loss": 0.1033, "step": 30399 }, { "epoch": 99.67213114754098, "grad_norm": 2.986632823944092, "learning_rate": 5.637966896376857e-10, "loss": 0.0903, "step": 30400 }, { "epoch": 99.67540983606557, "grad_norm": 1.8070025444030762, "learning_rate": 5.525772388426109e-10, "loss": 0.1116, "step": 30401 }, { "epoch": 99.67868852459016, "grad_norm": 2.6637189388275146, "learning_rate": 5.414705422146505e-10, "loss": 0.0517, "step": 30402 }, { "epoch": 99.68196721311476, "grad_norm": 2.6297521591186523, "learning_rate": 5.304765998781491e-10, "loss": 0.1101, "step": 30403 }, { "epoch": 99.68524590163935, "grad_norm": 2.3828155994415283, "learning_rate": 5.195954119563418e-10, "loss": 0.2623, "step": 30404 }, { "epoch": 99.68852459016394, "grad_norm": 1.6372008323669434, "learning_rate": 5.088269785746835e-10, "loss": 0.0346, "step": 30405 }, { "epoch": 99.69180327868852, "grad_norm": 1.8031538724899292, "learning_rate": 4.981712998519683e-10, "loss": 0.1141, "step": 30406 }, { "epoch": 99.69508196721311, "grad_norm": 3.1575980186462402, "learning_rate": 4.876283759092105e-10, "loss": 0.1303, "step": 30407 }, { "epoch": 99.6983606557377, "grad_norm": 2.4844157695770264, "learning_rate": 4.771982068652037e-10, "loss": 0.1446, "step": 30408 }, { "epoch": 99.70163934426229, "grad_norm": 2.685375452041626, "learning_rate": 4.668807928387419e-10, "loss": 0.2224, "step": 30409 }, { "epoch": 99.70491803278688, "grad_norm": 2.755742073059082, "learning_rate": 4.566761339441783e-10, "loss": 0.2628, "step": 30410 }, { "epoch": 99.70819672131148, "grad_norm": 1.6466609239578247, "learning_rate": 4.4658423029808606e-10, "loss": 0.0708, "step": 30411 }, { "epoch": 99.71147540983607, "grad_norm": 1.801673173904419, "learning_rate": 4.3660508201259775e-10, "loss": 0.0942, "step": 30412 }, { "epoch": 99.71475409836066, "grad_norm": 2.7056455612182617, "learning_rate": 4.267386892020664e-10, "loss": 0.0952, "step": 30413 }, { "epoch": 99.71803278688525, "grad_norm": 3.099992275238037, "learning_rate": 4.1698505197751427e-10, "loss": 0.1864, "step": 30414 }, { "epoch": 99.72131147540983, "grad_norm": 1.6583360433578491, "learning_rate": 4.0734417044774323e-10, "loss": 0.0326, "step": 30415 }, { "epoch": 99.72459016393442, "grad_norm": 1.6843584775924683, "learning_rate": 3.978160447215551e-10, "loss": 0.1131, "step": 30416 }, { "epoch": 99.72786885245901, "grad_norm": 3.4656624794006348, "learning_rate": 3.884006749077518e-10, "loss": 0.1827, "step": 30417 }, { "epoch": 99.73114754098361, "grad_norm": 3.030026435852051, "learning_rate": 3.7909806111180447e-10, "loss": 0.2016, "step": 30418 }, { "epoch": 99.7344262295082, "grad_norm": 2.926525831222534, "learning_rate": 3.699082034380741e-10, "loss": 0.1563, "step": 30419 }, { "epoch": 99.73770491803279, "grad_norm": 1.9351072311401367, "learning_rate": 3.6083110199092165e-10, "loss": 0.0641, "step": 30420 }, { "epoch": 99.74098360655738, "grad_norm": 3.1816093921661377, "learning_rate": 3.5186675687248763e-10, "loss": 0.1008, "step": 30421 }, { "epoch": 99.74426229508197, "grad_norm": 1.7459568977355957, "learning_rate": 3.430151681838023e-10, "loss": 0.1337, "step": 30422 }, { "epoch": 99.74754098360656, "grad_norm": 1.8185216188430786, "learning_rate": 3.342763360247858e-10, "loss": 0.0634, "step": 30423 }, { "epoch": 99.75081967213114, "grad_norm": 1.8362312316894531, "learning_rate": 3.25650260494248e-10, "loss": 0.3127, "step": 30424 }, { "epoch": 99.75409836065573, "grad_norm": 2.3163669109344482, "learning_rate": 3.1713694168766793e-10, "loss": 0.0735, "step": 30425 }, { "epoch": 99.75737704918033, "grad_norm": 1.6624757051467896, "learning_rate": 3.087363797038556e-10, "loss": 0.0579, "step": 30426 }, { "epoch": 99.76065573770492, "grad_norm": 2.5731589794158936, "learning_rate": 3.0044857463495945e-10, "loss": 0.223, "step": 30427 }, { "epoch": 99.76393442622951, "grad_norm": 2.801537036895752, "learning_rate": 2.922735265764587e-10, "loss": 0.199, "step": 30428 }, { "epoch": 99.7672131147541, "grad_norm": 2.0518381595611572, "learning_rate": 2.8421123561939157e-10, "loss": 0.0601, "step": 30429 }, { "epoch": 99.77049180327869, "grad_norm": 2.5805342197418213, "learning_rate": 2.762617018547964e-10, "loss": 0.0656, "step": 30430 }, { "epoch": 99.77377049180328, "grad_norm": 2.2730913162231445, "learning_rate": 2.6842492537260124e-10, "loss": 0.0781, "step": 30431 }, { "epoch": 99.77704918032786, "grad_norm": 2.5326385498046875, "learning_rate": 2.607009062605137e-10, "loss": 0.1121, "step": 30432 }, { "epoch": 99.78032786885245, "grad_norm": 2.0353472232818604, "learning_rate": 2.530896446062414e-10, "loss": 0.0704, "step": 30433 }, { "epoch": 99.78360655737706, "grad_norm": 2.3903310298919678, "learning_rate": 2.4559114049638177e-10, "loss": 0.1218, "step": 30434 }, { "epoch": 99.78688524590164, "grad_norm": 1.9062761068344116, "learning_rate": 2.382053940142015e-10, "loss": 0.0684, "step": 30435 }, { "epoch": 99.79016393442623, "grad_norm": 2.4686248302459717, "learning_rate": 2.3093240524296733e-10, "loss": 0.1231, "step": 30436 }, { "epoch": 99.79344262295082, "grad_norm": 2.177877426147461, "learning_rate": 2.23772174265946e-10, "loss": 0.0459, "step": 30437 }, { "epoch": 99.79672131147541, "grad_norm": 2.1814870834350586, "learning_rate": 2.167247011619633e-10, "loss": 0.1354, "step": 30438 }, { "epoch": 99.8, "grad_norm": 2.6186351776123047, "learning_rate": 2.0978998601206558e-10, "loss": 0.0745, "step": 30439 }, { "epoch": 99.80327868852459, "grad_norm": 2.253091812133789, "learning_rate": 2.029680288939684e-10, "loss": 0.1147, "step": 30440 }, { "epoch": 99.80655737704917, "grad_norm": 1.6791592836380005, "learning_rate": 1.9625882988538737e-10, "loss": 0.0702, "step": 30441 }, { "epoch": 99.80983606557378, "grad_norm": 2.2959578037261963, "learning_rate": 1.8966238905959722e-10, "loss": 0.136, "step": 30442 }, { "epoch": 99.81311475409836, "grad_norm": 1.7199362516403198, "learning_rate": 1.8317870649431357e-10, "loss": 0.1194, "step": 30443 }, { "epoch": 99.81639344262295, "grad_norm": 1.8913697004318237, "learning_rate": 1.7680778225948046e-10, "loss": 0.0609, "step": 30444 }, { "epoch": 99.81967213114754, "grad_norm": 2.4453001022338867, "learning_rate": 1.7054961642948286e-10, "loss": 0.1351, "step": 30445 }, { "epoch": 99.82295081967213, "grad_norm": 2.36494779586792, "learning_rate": 1.6440420907204434e-10, "loss": 0.0574, "step": 30446 }, { "epoch": 99.82622950819672, "grad_norm": 2.654515504837036, "learning_rate": 1.5837156025932943e-10, "loss": 0.0854, "step": 30447 }, { "epoch": 99.8295081967213, "grad_norm": 2.2573673725128174, "learning_rate": 1.524516700590617e-10, "loss": 0.0747, "step": 30448 }, { "epoch": 99.8327868852459, "grad_norm": 1.6437819004058838, "learning_rate": 1.466445385356341e-10, "loss": 0.0972, "step": 30449 }, { "epoch": 99.8360655737705, "grad_norm": 1.9482934474945068, "learning_rate": 1.4095016575677022e-10, "loss": 0.0361, "step": 30450 }, { "epoch": 99.83934426229509, "grad_norm": 2.9736123085021973, "learning_rate": 1.3536855178575281e-10, "loss": 0.1199, "step": 30451 }, { "epoch": 99.84262295081967, "grad_norm": 2.6755261421203613, "learning_rate": 1.2989969668586455e-10, "loss": 0.0982, "step": 30452 }, { "epoch": 99.84590163934426, "grad_norm": 1.9018477201461792, "learning_rate": 1.2454360051816773e-10, "loss": 0.1574, "step": 30453 }, { "epoch": 99.84918032786885, "grad_norm": 4.120143413543701, "learning_rate": 1.1930026334372458e-10, "loss": 0.0613, "step": 30454 }, { "epoch": 99.85245901639344, "grad_norm": 1.7925949096679688, "learning_rate": 1.1416968522137695e-10, "loss": 0.0689, "step": 30455 }, { "epoch": 99.85573770491803, "grad_norm": 1.9440011978149414, "learning_rate": 1.0915186620996665e-10, "loss": 0.0989, "step": 30456 }, { "epoch": 99.85901639344263, "grad_norm": 2.3904106616973877, "learning_rate": 1.0424680636389462e-10, "loss": 0.1037, "step": 30457 }, { "epoch": 99.86229508196722, "grad_norm": 2.2863316535949707, "learning_rate": 9.945450574089244e-11, "loss": 0.0581, "step": 30458 }, { "epoch": 99.8655737704918, "grad_norm": 2.617366075515747, "learning_rate": 9.47749643931406e-11, "loss": 0.0688, "step": 30459 }, { "epoch": 99.8688524590164, "grad_norm": 2.380014181137085, "learning_rate": 9.020818237392982e-11, "loss": 0.1154, "step": 30460 }, { "epoch": 99.87213114754098, "grad_norm": 2.2707767486572266, "learning_rate": 8.575415973433032e-11, "loss": 0.174, "step": 30461 }, { "epoch": 99.87540983606557, "grad_norm": 2.168504238128662, "learning_rate": 8.141289652652262e-11, "loss": 0.1216, "step": 30462 }, { "epoch": 99.87868852459016, "grad_norm": 2.0810353755950928, "learning_rate": 7.718439279713608e-11, "loss": 0.0848, "step": 30463 }, { "epoch": 99.88196721311475, "grad_norm": 2.470689535140991, "learning_rate": 7.30686485950205e-11, "loss": 0.1501, "step": 30464 }, { "epoch": 99.88524590163935, "grad_norm": 2.2837789058685303, "learning_rate": 6.906566396680525e-11, "loss": 0.2025, "step": 30465 }, { "epoch": 99.88852459016394, "grad_norm": 2.319566011428833, "learning_rate": 6.517543895689926e-11, "loss": 0.0965, "step": 30466 }, { "epoch": 99.89180327868853, "grad_norm": 1.4874961376190186, "learning_rate": 6.139797360971145e-11, "loss": 0.0364, "step": 30467 }, { "epoch": 99.89508196721312, "grad_norm": 2.1274163722991943, "learning_rate": 5.7733267967430284e-11, "loss": 0.0667, "step": 30468 }, { "epoch": 99.8983606557377, "grad_norm": 2.2290303707122803, "learning_rate": 5.4181322071134025e-11, "loss": 0.0997, "step": 30469 }, { "epoch": 99.90163934426229, "grad_norm": 2.264496088027954, "learning_rate": 5.0742135961900917e-11, "loss": 0.066, "step": 30470 }, { "epoch": 99.90491803278688, "grad_norm": 1.8742761611938477, "learning_rate": 4.741570967747855e-11, "loss": 0.1219, "step": 30471 }, { "epoch": 99.90819672131147, "grad_norm": 3.3886098861694336, "learning_rate": 4.42020432556145e-11, "loss": 0.1751, "step": 30472 }, { "epoch": 99.91147540983607, "grad_norm": 2.383883476257324, "learning_rate": 4.110113673294613e-11, "loss": 0.068, "step": 30473 }, { "epoch": 99.91475409836066, "grad_norm": 2.7119929790496826, "learning_rate": 3.8112990145000585e-11, "loss": 0.1122, "step": 30474 }, { "epoch": 99.91803278688525, "grad_norm": 2.3841896057128906, "learning_rate": 3.523760352397432e-11, "loss": 0.0339, "step": 30475 }, { "epoch": 99.92131147540984, "grad_norm": 2.4614124298095703, "learning_rate": 3.247497690317403e-11, "loss": 0.0623, "step": 30476 }, { "epoch": 99.92459016393443, "grad_norm": 1.870155930519104, "learning_rate": 2.982511031257573e-11, "loss": 0.138, "step": 30477 }, { "epoch": 99.92786885245901, "grad_norm": 1.87362802028656, "learning_rate": 2.7288003784375904e-11, "loss": 0.1042, "step": 30478 }, { "epoch": 99.9311475409836, "grad_norm": 1.3899941444396973, "learning_rate": 2.4863657345219893e-11, "loss": 0.0639, "step": 30479 }, { "epoch": 99.93442622950819, "grad_norm": 2.109954833984375, "learning_rate": 2.2552071022863274e-11, "loss": 0.1965, "step": 30480 }, { "epoch": 99.9377049180328, "grad_norm": 2.501086473464966, "learning_rate": 2.0353244843951404e-11, "loss": 0.0536, "step": 30481 }, { "epoch": 99.94098360655738, "grad_norm": 2.185713291168213, "learning_rate": 1.8267178832909182e-11, "loss": 0.1011, "step": 30482 }, { "epoch": 99.94426229508197, "grad_norm": 2.3429930210113525, "learning_rate": 1.6293873014161522e-11, "loss": 0.0267, "step": 30483 }, { "epoch": 99.94754098360656, "grad_norm": 2.123687982559204, "learning_rate": 1.4433327407692433e-11, "loss": 0.1727, "step": 30484 }, { "epoch": 99.95081967213115, "grad_norm": 2.640380620956421, "learning_rate": 1.2685542036816601e-11, "loss": 0.1183, "step": 30485 }, { "epoch": 99.95409836065573, "grad_norm": 2.9477360248565674, "learning_rate": 1.1050516919297593e-11, "loss": 0.0998, "step": 30486 }, { "epoch": 99.95737704918032, "grad_norm": 2.4947867393493652, "learning_rate": 9.528252075119426e-12, "loss": 0.1459, "step": 30487 }, { "epoch": 99.96065573770491, "grad_norm": 4.079095363616943, "learning_rate": 8.118747520935445e-12, "loss": 0.1322, "step": 30488 }, { "epoch": 99.96393442622951, "grad_norm": 1.660362720489502, "learning_rate": 6.822003273398991e-12, "loss": 0.056, "step": 30489 }, { "epoch": 99.9672131147541, "grad_norm": 1.4518661499023438, "learning_rate": 5.638019344722523e-12, "loss": 0.0357, "step": 30490 }, { "epoch": 99.97049180327869, "grad_norm": 2.2773492336273193, "learning_rate": 4.566795751559383e-12, "loss": 0.0857, "step": 30491 }, { "epoch": 99.97377049180328, "grad_norm": 1.9762102365493774, "learning_rate": 3.608332502791356e-12, "loss": 0.0916, "step": 30492 }, { "epoch": 99.97704918032787, "grad_norm": 1.3666291236877441, "learning_rate": 2.762629611741119e-12, "loss": 0.0431, "step": 30493 }, { "epoch": 99.98032786885246, "grad_norm": 2.191751718521118, "learning_rate": 2.029687086180232e-12, "loss": 0.04, "step": 30494 }, { "epoch": 99.98360655737704, "grad_norm": 2.2597529888153076, "learning_rate": 1.4095049361007028e-12, "loss": 0.0699, "step": 30495 }, { "epoch": 99.98688524590163, "grad_norm": 2.369460105895996, "learning_rate": 9.020831659434237e-13, "loss": 0.1411, "step": 30496 }, { "epoch": 99.99016393442623, "grad_norm": 1.9548797607421875, "learning_rate": 5.074217845901785e-13, "loss": 0.0878, "step": 30497 }, { "epoch": 99.99344262295082, "grad_norm": 2.677663564682007, "learning_rate": 2.2552079426141349e-13, "loss": 0.0583, "step": 30498 }, { "epoch": 99.99672131147541, "grad_norm": 1.2980780601501465, "learning_rate": 5.6380198287797616e-14, "loss": 0.0445, "step": 30499 }, { "epoch": 100.0, "grad_norm": 2.2538843154907227, "learning_rate": 0.0, "loss": 0.0682, "step": 30500 }, { "epoch": 100.0, "step": 30500, "total_flos": 7.1224519032832e+16, "train_loss": 0.6104957264954927, "train_runtime": 17685.638, "train_samples_per_second": 55.039, "train_steps_per_second": 1.725 } ], "logging_steps": 1.0, "max_steps": 30500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.1224519032832e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }