{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 8985, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000333889816360601, "grad_norm": 1287.0480329814236, "learning_rate": 1.1123470522803115e-08, "loss": 11.9232, "step": 1 }, { "epoch": 0.000667779632721202, "grad_norm": 1295.143450852494, "learning_rate": 2.224694104560623e-08, "loss": 11.9422, "step": 2 }, { "epoch": 0.001001669449081803, "grad_norm": 1299.3648281231324, "learning_rate": 3.337041156840935e-08, "loss": 11.9406, "step": 3 }, { "epoch": 0.001335559265442404, "grad_norm": 1301.2282602527096, "learning_rate": 4.449388209121246e-08, "loss": 11.9641, "step": 4 }, { "epoch": 0.001669449081803005, "grad_norm": 1368.8036386752597, "learning_rate": 5.561735261401558e-08, "loss": 11.9358, "step": 5 }, { "epoch": 0.002003338898163606, "grad_norm": 1445.1916677508684, "learning_rate": 6.67408231368187e-08, "loss": 11.9393, "step": 6 }, { "epoch": 0.002337228714524207, "grad_norm": 1452.212695791872, "learning_rate": 7.78642936596218e-08, "loss": 11.9463, "step": 7 }, { "epoch": 0.002671118530884808, "grad_norm": 1455.9004613952682, "learning_rate": 8.898776418242492e-08, "loss": 11.9269, "step": 8 }, { "epoch": 0.003005008347245409, "grad_norm": 1536.531747024118, "learning_rate": 1.0011123470522804e-07, "loss": 11.8996, "step": 9 }, { "epoch": 0.00333889816360601, "grad_norm": 1509.977343091974, "learning_rate": 1.1123470522803116e-07, "loss": 11.8825, "step": 10 }, { "epoch": 0.003672787979966611, "grad_norm": 1823.735728844712, "learning_rate": 1.2235817575083427e-07, "loss": 11.7337, "step": 11 }, { "epoch": 0.004006677796327212, "grad_norm": 1760.6979665433378, "learning_rate": 1.334816462736374e-07, "loss": 11.7122, "step": 12 }, { "epoch": 0.004340567612687813, "grad_norm": 1756.2696866256383, "learning_rate": 1.446051167964405e-07, "loss": 11.7008, "step": 13 }, { "epoch": 0.004674457429048414, "grad_norm": 2048.354263499814, "learning_rate": 1.557285873192436e-07, "loss": 11.624, "step": 14 }, { "epoch": 0.005008347245409015, "grad_norm": 1024.7142398622582, "learning_rate": 1.6685205784204674e-07, "loss": 11.1919, "step": 15 }, { "epoch": 0.005342237061769616, "grad_norm": 1058.495842824166, "learning_rate": 1.7797552836484985e-07, "loss": 11.1161, "step": 16 }, { "epoch": 0.005676126878130217, "grad_norm": 1076.8820968275313, "learning_rate": 1.8909899888765295e-07, "loss": 11.0569, "step": 17 }, { "epoch": 0.006010016694490818, "grad_norm": 967.845693913664, "learning_rate": 2.0022246941045608e-07, "loss": 10.9698, "step": 18 }, { "epoch": 0.006343906510851419, "grad_norm": 1006.0912476395131, "learning_rate": 2.113459399332592e-07, "loss": 10.9174, "step": 19 }, { "epoch": 0.00667779632721202, "grad_norm": 1099.0382463062647, "learning_rate": 2.2246941045606232e-07, "loss": 10.6138, "step": 20 }, { "epoch": 0.007011686143572621, "grad_norm": 1134.826842372751, "learning_rate": 2.3359288097886543e-07, "loss": 9.9894, "step": 21 }, { "epoch": 0.007345575959933222, "grad_norm": 1061.8196832489184, "learning_rate": 2.4471635150166853e-07, "loss": 9.9477, "step": 22 }, { "epoch": 0.007679465776293823, "grad_norm": 940.820120886196, "learning_rate": 2.5583982202447166e-07, "loss": 9.8484, "step": 23 }, { "epoch": 0.008013355592654424, "grad_norm": 833.2767774421342, "learning_rate": 2.669632925472748e-07, "loss": 9.7261, "step": 24 }, { "epoch": 0.008347245409015025, "grad_norm": 757.2179734227236, "learning_rate": 2.780867630700779e-07, "loss": 9.5909, "step": 25 }, { "epoch": 0.008681135225375626, "grad_norm": 680.2265107567655, "learning_rate": 2.89210233592881e-07, "loss": 9.4846, "step": 26 }, { "epoch": 0.009015025041736227, "grad_norm": 567.8655787442851, "learning_rate": 3.003337041156841e-07, "loss": 9.3079, "step": 27 }, { "epoch": 0.009348914858096828, "grad_norm": 249.75299847205213, "learning_rate": 3.114571746384872e-07, "loss": 8.8346, "step": 28 }, { "epoch": 0.009682804674457429, "grad_norm": 367.48261077330363, "learning_rate": 3.2258064516129035e-07, "loss": 8.7605, "step": 29 }, { "epoch": 0.01001669449081803, "grad_norm": 308.27715310857803, "learning_rate": 3.337041156840935e-07, "loss": 8.7801, "step": 30 }, { "epoch": 0.010350584307178631, "grad_norm": 337.86826352953875, "learning_rate": 3.4482758620689656e-07, "loss": 8.7515, "step": 31 }, { "epoch": 0.010684474123539232, "grad_norm": 209.78128287689697, "learning_rate": 3.559510567296997e-07, "loss": 8.6972, "step": 32 }, { "epoch": 0.011018363939899833, "grad_norm": 195.42336780734433, "learning_rate": 3.670745272525028e-07, "loss": 8.595, "step": 33 }, { "epoch": 0.011352253756260434, "grad_norm": 214.33064141550335, "learning_rate": 3.781979977753059e-07, "loss": 8.5687, "step": 34 }, { "epoch": 0.011686143572621035, "grad_norm": 218.55651727613943, "learning_rate": 3.8932146829810904e-07, "loss": 8.4986, "step": 35 }, { "epoch": 0.012020033388981636, "grad_norm": 189.84346475980473, "learning_rate": 4.0044493882091217e-07, "loss": 8.4916, "step": 36 }, { "epoch": 0.012353923205342237, "grad_norm": 222.8441496659349, "learning_rate": 4.115684093437153e-07, "loss": 8.2615, "step": 37 }, { "epoch": 0.012687813021702838, "grad_norm": 162.42954842138204, "learning_rate": 4.226918798665184e-07, "loss": 8.1325, "step": 38 }, { "epoch": 0.01302170283806344, "grad_norm": 199.7506901491005, "learning_rate": 4.338153503893215e-07, "loss": 8.034, "step": 39 }, { "epoch": 0.01335559265442404, "grad_norm": 170.5206448447141, "learning_rate": 4.4493882091212464e-07, "loss": 7.8948, "step": 40 }, { "epoch": 0.013689482470784642, "grad_norm": 132.02336716825278, "learning_rate": 4.560622914349278e-07, "loss": 7.8486, "step": 41 }, { "epoch": 0.014023372287145243, "grad_norm": 131.01028349792065, "learning_rate": 4.6718576195773085e-07, "loss": 7.7451, "step": 42 }, { "epoch": 0.014357262103505844, "grad_norm": 208.72898803344034, "learning_rate": 4.783092324805339e-07, "loss": 7.7273, "step": 43 }, { "epoch": 0.014691151919866445, "grad_norm": 116.0798602584657, "learning_rate": 4.894327030033371e-07, "loss": 7.5904, "step": 44 }, { "epoch": 0.015025041736227046, "grad_norm": 101.49436717811712, "learning_rate": 5.005561735261402e-07, "loss": 7.5182, "step": 45 }, { "epoch": 0.015358931552587647, "grad_norm": 90.77986733668398, "learning_rate": 5.116796440489433e-07, "loss": 7.4804, "step": 46 }, { "epoch": 0.015692821368948246, "grad_norm": 100.852178972112, "learning_rate": 5.228031145717465e-07, "loss": 7.4258, "step": 47 }, { "epoch": 0.016026711185308847, "grad_norm": 113.17697059941423, "learning_rate": 5.339265850945496e-07, "loss": 7.4132, "step": 48 }, { "epoch": 0.016360601001669448, "grad_norm": 125.58491313233608, "learning_rate": 5.450500556173527e-07, "loss": 7.2492, "step": 49 }, { "epoch": 0.01669449081803005, "grad_norm": 94.8481796938426, "learning_rate": 5.561735261401558e-07, "loss": 7.2183, "step": 50 }, { "epoch": 0.01702838063439065, "grad_norm": 65.16258035645745, "learning_rate": 5.672969966629589e-07, "loss": 7.0912, "step": 51 }, { "epoch": 0.01736227045075125, "grad_norm": 68.93110834477764, "learning_rate": 5.78420467185762e-07, "loss": 7.0857, "step": 52 }, { "epoch": 0.017696160267111852, "grad_norm": 70.82991245873612, "learning_rate": 5.89543937708565e-07, "loss": 6.9734, "step": 53 }, { "epoch": 0.018030050083472453, "grad_norm": 92.38277942180727, "learning_rate": 6.006674082313682e-07, "loss": 6.9461, "step": 54 }, { "epoch": 0.018363939899833055, "grad_norm": 46.41267556617961, "learning_rate": 6.117908787541713e-07, "loss": 6.9468, "step": 55 }, { "epoch": 0.018697829716193656, "grad_norm": 78.34144556850109, "learning_rate": 6.229143492769744e-07, "loss": 6.8162, "step": 56 }, { "epoch": 0.019031719532554257, "grad_norm": 63.9598029957539, "learning_rate": 6.340378197997777e-07, "loss": 6.8664, "step": 57 }, { "epoch": 0.019365609348914858, "grad_norm": 134.15332360297074, "learning_rate": 6.451612903225807e-07, "loss": 6.784, "step": 58 }, { "epoch": 0.01969949916527546, "grad_norm": 92.76603793266649, "learning_rate": 6.562847608453838e-07, "loss": 6.6961, "step": 59 }, { "epoch": 0.02003338898163606, "grad_norm": 97.97210850553671, "learning_rate": 6.67408231368187e-07, "loss": 6.6904, "step": 60 }, { "epoch": 0.02036727879799666, "grad_norm": 44.59726069482417, "learning_rate": 6.785317018909901e-07, "loss": 6.6124, "step": 61 }, { "epoch": 0.020701168614357262, "grad_norm": 51.77481595710821, "learning_rate": 6.896551724137931e-07, "loss": 6.5969, "step": 62 }, { "epoch": 0.021035058430717863, "grad_norm": 98.73631668572287, "learning_rate": 7.007786429365964e-07, "loss": 6.5475, "step": 63 }, { "epoch": 0.021368948247078464, "grad_norm": 55.510703770585174, "learning_rate": 7.119021134593994e-07, "loss": 6.5186, "step": 64 }, { "epoch": 0.021702838063439065, "grad_norm": 90.97675128536918, "learning_rate": 7.230255839822026e-07, "loss": 6.4541, "step": 65 }, { "epoch": 0.022036727879799666, "grad_norm": 55.6301593605774, "learning_rate": 7.341490545050057e-07, "loss": 6.4067, "step": 66 }, { "epoch": 0.022370617696160267, "grad_norm": 39.63560672753181, "learning_rate": 7.452725250278087e-07, "loss": 6.4226, "step": 67 }, { "epoch": 0.02270450751252087, "grad_norm": 58.97349732485422, "learning_rate": 7.563959955506118e-07, "loss": 6.3187, "step": 68 }, { "epoch": 0.02303839732888147, "grad_norm": 76.90522368578092, "learning_rate": 7.675194660734149e-07, "loss": 6.302, "step": 69 }, { "epoch": 0.02337228714524207, "grad_norm": 61.80619588614267, "learning_rate": 7.786429365962181e-07, "loss": 6.2978, "step": 70 }, { "epoch": 0.02370617696160267, "grad_norm": 38.93449586461408, "learning_rate": 7.897664071190211e-07, "loss": 6.2416, "step": 71 }, { "epoch": 0.024040066777963272, "grad_norm": 34.56658870964989, "learning_rate": 8.008898776418243e-07, "loss": 6.1725, "step": 72 }, { "epoch": 0.024373956594323874, "grad_norm": 105.20474196578041, "learning_rate": 8.120133481646274e-07, "loss": 6.1593, "step": 73 }, { "epoch": 0.024707846410684475, "grad_norm": 66.50327179526974, "learning_rate": 8.231368186874306e-07, "loss": 6.1327, "step": 74 }, { "epoch": 0.025041736227045076, "grad_norm": 68.19504004285164, "learning_rate": 8.342602892102336e-07, "loss": 6.0962, "step": 75 }, { "epoch": 0.025375626043405677, "grad_norm": 151.23453341915774, "learning_rate": 8.453837597330368e-07, "loss": 6.0397, "step": 76 }, { "epoch": 0.025709515859766278, "grad_norm": 49.23240878329714, "learning_rate": 8.565072302558399e-07, "loss": 5.9815, "step": 77 }, { "epoch": 0.02604340567612688, "grad_norm": 158.94230532909987, "learning_rate": 8.67630700778643e-07, "loss": 5.9764, "step": 78 }, { "epoch": 0.02637729549248748, "grad_norm": 55.13491797625038, "learning_rate": 8.78754171301446e-07, "loss": 5.9113, "step": 79 }, { "epoch": 0.02671118530884808, "grad_norm": 133.61394331295475, "learning_rate": 8.898776418242493e-07, "loss": 5.895, "step": 80 }, { "epoch": 0.027045075125208682, "grad_norm": 92.25057412323504, "learning_rate": 9.010011123470523e-07, "loss": 5.8838, "step": 81 }, { "epoch": 0.027378964941569283, "grad_norm": 124.34967991840215, "learning_rate": 9.121245828698556e-07, "loss": 5.8452, "step": 82 }, { "epoch": 0.027712854757929884, "grad_norm": 68.0445808827216, "learning_rate": 9.232480533926586e-07, "loss": 5.7886, "step": 83 }, { "epoch": 0.028046744574290485, "grad_norm": 65.79750611587649, "learning_rate": 9.343715239154617e-07, "loss": 5.6513, "step": 84 }, { "epoch": 0.028380634390651086, "grad_norm": 153.84861375996604, "learning_rate": 9.454949944382647e-07, "loss": 5.7493, "step": 85 }, { "epoch": 0.028714524207011687, "grad_norm": 70.5384142083367, "learning_rate": 9.566184649610679e-07, "loss": 5.6607, "step": 86 }, { "epoch": 0.02904841402337229, "grad_norm": 64.03172810594731, "learning_rate": 9.67741935483871e-07, "loss": 5.6244, "step": 87 }, { "epoch": 0.02938230383973289, "grad_norm": 59.66057323197273, "learning_rate": 9.788654060066741e-07, "loss": 5.6109, "step": 88 }, { "epoch": 0.02971619365609349, "grad_norm": 160.13212941549682, "learning_rate": 9.899888765294773e-07, "loss": 5.5513, "step": 89 }, { "epoch": 0.03005008347245409, "grad_norm": 101.13292054802967, "learning_rate": 1.0011123470522804e-06, "loss": 5.5507, "step": 90 }, { "epoch": 0.030383973288814693, "grad_norm": 87.38134950363536, "learning_rate": 1.0122358175750835e-06, "loss": 5.477, "step": 91 }, { "epoch": 0.030717863105175294, "grad_norm": 101.4373375081066, "learning_rate": 1.0233592880978867e-06, "loss": 5.4166, "step": 92 }, { "epoch": 0.031051752921535895, "grad_norm": 199.59553676678837, "learning_rate": 1.0344827586206898e-06, "loss": 5.4549, "step": 93 }, { "epoch": 0.03138564273789649, "grad_norm": 116.8928466951783, "learning_rate": 1.045606229143493e-06, "loss": 5.4251, "step": 94 }, { "epoch": 0.03171953255425709, "grad_norm": 92.12456801515826, "learning_rate": 1.056729699666296e-06, "loss": 5.3301, "step": 95 }, { "epoch": 0.032053422370617694, "grad_norm": 71.97719979974413, "learning_rate": 1.0678531701890992e-06, "loss": 5.2959, "step": 96 }, { "epoch": 0.032387312186978295, "grad_norm": 248.63794262233492, "learning_rate": 1.0789766407119021e-06, "loss": 5.4024, "step": 97 }, { "epoch": 0.032721202003338896, "grad_norm": 183.30362192464503, "learning_rate": 1.0901001112347055e-06, "loss": 5.2293, "step": 98 }, { "epoch": 0.0330550918196995, "grad_norm": 203.35024008371082, "learning_rate": 1.1012235817575084e-06, "loss": 5.2214, "step": 99 }, { "epoch": 0.0333889816360601, "grad_norm": 195.01790394880922, "learning_rate": 1.1123470522803115e-06, "loss": 5.142, "step": 100 }, { "epoch": 0.0337228714524207, "grad_norm": 66.66998365741918, "learning_rate": 1.1234705228031146e-06, "loss": 5.1244, "step": 101 }, { "epoch": 0.0340567612687813, "grad_norm": 123.41721921192016, "learning_rate": 1.1345939933259178e-06, "loss": 5.0934, "step": 102 }, { "epoch": 0.0343906510851419, "grad_norm": 274.54408521568376, "learning_rate": 1.145717463848721e-06, "loss": 5.0894, "step": 103 }, { "epoch": 0.0347245409015025, "grad_norm": 154.9499354775396, "learning_rate": 1.156840934371524e-06, "loss": 5.0639, "step": 104 }, { "epoch": 0.035058430717863104, "grad_norm": 242.05817581328606, "learning_rate": 1.1679644048943272e-06, "loss": 5.1062, "step": 105 }, { "epoch": 0.035392320534223705, "grad_norm": 119.58946251847932, "learning_rate": 1.17908787541713e-06, "loss": 4.922, "step": 106 }, { "epoch": 0.035726210350584306, "grad_norm": 156.28078787230294, "learning_rate": 1.1902113459399334e-06, "loss": 4.999, "step": 107 }, { "epoch": 0.03606010016694491, "grad_norm": 187.0907922946408, "learning_rate": 1.2013348164627363e-06, "loss": 4.8835, "step": 108 }, { "epoch": 0.03639398998330551, "grad_norm": 90.65088246316944, "learning_rate": 1.2124582869855397e-06, "loss": 4.8479, "step": 109 }, { "epoch": 0.03672787979966611, "grad_norm": 95.99697810315715, "learning_rate": 1.2235817575083426e-06, "loss": 4.8726, "step": 110 }, { "epoch": 0.03706176961602671, "grad_norm": 232.01922093522725, "learning_rate": 1.2347052280311457e-06, "loss": 4.8559, "step": 111 }, { "epoch": 0.03739565943238731, "grad_norm": 101.02389854216038, "learning_rate": 1.2458286985539489e-06, "loss": 4.7486, "step": 112 }, { "epoch": 0.03772954924874791, "grad_norm": 300.45751738844723, "learning_rate": 1.256952169076752e-06, "loss": 4.8966, "step": 113 }, { "epoch": 0.03806343906510851, "grad_norm": 169.55453604249786, "learning_rate": 1.2680756395995554e-06, "loss": 4.6904, "step": 114 }, { "epoch": 0.038397328881469114, "grad_norm": 239.09652696876913, "learning_rate": 1.2791991101223583e-06, "loss": 4.8345, "step": 115 }, { "epoch": 0.038731218697829715, "grad_norm": 175.5541608378981, "learning_rate": 1.2903225806451614e-06, "loss": 4.7291, "step": 116 }, { "epoch": 0.039065108514190316, "grad_norm": 198.23144387379332, "learning_rate": 1.3014460511679643e-06, "loss": 4.725, "step": 117 }, { "epoch": 0.03939899833055092, "grad_norm": 174.36408325903204, "learning_rate": 1.3125695216907677e-06, "loss": 4.6986, "step": 118 }, { "epoch": 0.03973288814691152, "grad_norm": 159.02484492579288, "learning_rate": 1.3236929922135708e-06, "loss": 4.6595, "step": 119 }, { "epoch": 0.04006677796327212, "grad_norm": 201.73338616852456, "learning_rate": 1.334816462736374e-06, "loss": 4.5912, "step": 120 }, { "epoch": 0.04040066777963272, "grad_norm": 127.02808058622512, "learning_rate": 1.3459399332591769e-06, "loss": 4.5643, "step": 121 }, { "epoch": 0.04073455759599332, "grad_norm": 108.25811328786924, "learning_rate": 1.3570634037819802e-06, "loss": 4.5018, "step": 122 }, { "epoch": 0.04106844741235392, "grad_norm": 162.49022169399458, "learning_rate": 1.3681868743047833e-06, "loss": 4.4681, "step": 123 }, { "epoch": 0.041402337228714524, "grad_norm": 130.85076877808774, "learning_rate": 1.3793103448275862e-06, "loss": 4.4357, "step": 124 }, { "epoch": 0.041736227045075125, "grad_norm": 197.54065546397842, "learning_rate": 1.3904338153503894e-06, "loss": 4.4471, "step": 125 }, { "epoch": 0.042070116861435726, "grad_norm": 89.90707131181375, "learning_rate": 1.4015572858731927e-06, "loss": 4.3742, "step": 126 }, { "epoch": 0.04240400667779633, "grad_norm": 155.90928496923297, "learning_rate": 1.4126807563959956e-06, "loss": 4.3428, "step": 127 }, { "epoch": 0.04273789649415693, "grad_norm": 120.55751132566563, "learning_rate": 1.4238042269187988e-06, "loss": 4.3605, "step": 128 }, { "epoch": 0.04307178631051753, "grad_norm": 167.00889569439354, "learning_rate": 1.434927697441602e-06, "loss": 4.3507, "step": 129 }, { "epoch": 0.04340567612687813, "grad_norm": 205.35179362655944, "learning_rate": 1.4460511679644053e-06, "loss": 4.2726, "step": 130 }, { "epoch": 0.04373956594323873, "grad_norm": 95.63358756189014, "learning_rate": 1.4571746384872082e-06, "loss": 4.2919, "step": 131 }, { "epoch": 0.04407345575959933, "grad_norm": 126.5794737787058, "learning_rate": 1.4682981090100113e-06, "loss": 4.1781, "step": 132 }, { "epoch": 0.04440734557595993, "grad_norm": 167.5270625363879, "learning_rate": 1.4794215795328142e-06, "loss": 4.1282, "step": 133 }, { "epoch": 0.044741235392320534, "grad_norm": 173.15817162885753, "learning_rate": 1.4905450500556174e-06, "loss": 4.2039, "step": 134 }, { "epoch": 0.045075125208681135, "grad_norm": 170.14117016536457, "learning_rate": 1.5016685205784207e-06, "loss": 4.1892, "step": 135 }, { "epoch": 0.04540901502504174, "grad_norm": 119.95458845251275, "learning_rate": 1.5127919911012236e-06, "loss": 4.0762, "step": 136 }, { "epoch": 0.04574290484140234, "grad_norm": 134.5597700370907, "learning_rate": 1.5239154616240268e-06, "loss": 4.0247, "step": 137 }, { "epoch": 0.04607679465776294, "grad_norm": 119.20983678327627, "learning_rate": 1.5350389321468299e-06, "loss": 4.0068, "step": 138 }, { "epoch": 0.04641068447412354, "grad_norm": 132.00519172675575, "learning_rate": 1.5461624026696332e-06, "loss": 4.0485, "step": 139 }, { "epoch": 0.04674457429048414, "grad_norm": 87.18559278631152, "learning_rate": 1.5572858731924361e-06, "loss": 4.0505, "step": 140 }, { "epoch": 0.04707846410684474, "grad_norm": 120.77634907859779, "learning_rate": 1.5684093437152393e-06, "loss": 3.9474, "step": 141 }, { "epoch": 0.04741235392320534, "grad_norm": 117.82933560675065, "learning_rate": 1.5795328142380422e-06, "loss": 3.9909, "step": 142 }, { "epoch": 0.047746243739565944, "grad_norm": 208.70752611192867, "learning_rate": 1.5906562847608455e-06, "loss": 3.9833, "step": 143 }, { "epoch": 0.048080133555926545, "grad_norm": 118.61439895546647, "learning_rate": 1.6017797552836487e-06, "loss": 3.9022, "step": 144 }, { "epoch": 0.048414023372287146, "grad_norm": 93.87456241157244, "learning_rate": 1.6129032258064516e-06, "loss": 3.8388, "step": 145 }, { "epoch": 0.04874791318864775, "grad_norm": 163.45731088475577, "learning_rate": 1.6240266963292547e-06, "loss": 3.8678, "step": 146 }, { "epoch": 0.04908180300500835, "grad_norm": 198.39807569519633, "learning_rate": 1.635150166852058e-06, "loss": 3.8578, "step": 147 }, { "epoch": 0.04941569282136895, "grad_norm": 114.27351567696023, "learning_rate": 1.6462736373748612e-06, "loss": 3.8824, "step": 148 }, { "epoch": 0.04974958263772955, "grad_norm": 134.7079865824267, "learning_rate": 1.6573971078976641e-06, "loss": 3.8538, "step": 149 }, { "epoch": 0.05008347245409015, "grad_norm": 125.6726570124528, "learning_rate": 1.6685205784204673e-06, "loss": 3.832, "step": 150 }, { "epoch": 0.05041736227045075, "grad_norm": 174.76906665179945, "learning_rate": 1.6796440489432706e-06, "loss": 3.8902, "step": 151 }, { "epoch": 0.05075125208681135, "grad_norm": 78.92594538850578, "learning_rate": 1.6907675194660735e-06, "loss": 3.7063, "step": 152 }, { "epoch": 0.051085141903171954, "grad_norm": 190.97424491042142, "learning_rate": 1.7018909899888767e-06, "loss": 3.8559, "step": 153 }, { "epoch": 0.051419031719532556, "grad_norm": 134.0847500840248, "learning_rate": 1.7130144605116798e-06, "loss": 3.8576, "step": 154 }, { "epoch": 0.05175292153589316, "grad_norm": 116.1028757180608, "learning_rate": 1.724137931034483e-06, "loss": 3.7552, "step": 155 }, { "epoch": 0.05208681135225376, "grad_norm": 96.70658387618464, "learning_rate": 1.735261401557286e-06, "loss": 3.6765, "step": 156 }, { "epoch": 0.05242070116861436, "grad_norm": 87.85903446131341, "learning_rate": 1.7463848720800892e-06, "loss": 3.6532, "step": 157 }, { "epoch": 0.05275459098497496, "grad_norm": 104.06306755999711, "learning_rate": 1.757508342602892e-06, "loss": 3.6619, "step": 158 }, { "epoch": 0.05308848080133556, "grad_norm": 113.25388251513911, "learning_rate": 1.7686318131256954e-06, "loss": 3.6375, "step": 159 }, { "epoch": 0.05342237061769616, "grad_norm": 78.34098415105056, "learning_rate": 1.7797552836484986e-06, "loss": 3.6895, "step": 160 }, { "epoch": 0.05375626043405676, "grad_norm": 158.32338824895228, "learning_rate": 1.7908787541713015e-06, "loss": 3.7019, "step": 161 }, { "epoch": 0.054090150250417364, "grad_norm": 88.57457127406087, "learning_rate": 1.8020022246941046e-06, "loss": 3.563, "step": 162 }, { "epoch": 0.054424040066777965, "grad_norm": 151.39816865248517, "learning_rate": 1.813125695216908e-06, "loss": 3.6432, "step": 163 }, { "epoch": 0.054757929883138566, "grad_norm": 83.18472949320682, "learning_rate": 1.824249165739711e-06, "loss": 3.609, "step": 164 }, { "epoch": 0.05509181969949917, "grad_norm": 120.30416715966894, "learning_rate": 1.835372636262514e-06, "loss": 3.6699, "step": 165 }, { "epoch": 0.05542570951585977, "grad_norm": 110.3427275827079, "learning_rate": 1.8464961067853172e-06, "loss": 3.6364, "step": 166 }, { "epoch": 0.05575959933222037, "grad_norm": 122.09863024775031, "learning_rate": 1.85761957730812e-06, "loss": 3.6099, "step": 167 }, { "epoch": 0.05609348914858097, "grad_norm": 105.14519843424003, "learning_rate": 1.8687430478309234e-06, "loss": 3.6623, "step": 168 }, { "epoch": 0.05642737896494157, "grad_norm": 84.27252299302224, "learning_rate": 1.8798665183537266e-06, "loss": 3.5156, "step": 169 }, { "epoch": 0.05676126878130217, "grad_norm": 83.44156103388129, "learning_rate": 1.8909899888765295e-06, "loss": 3.4071, "step": 170 }, { "epoch": 0.05709515859766277, "grad_norm": 134.23018888836117, "learning_rate": 1.9021134593993326e-06, "loss": 3.559, "step": 171 }, { "epoch": 0.057429048414023375, "grad_norm": 113.03081927294559, "learning_rate": 1.9132369299221357e-06, "loss": 3.5709, "step": 172 }, { "epoch": 0.057762938230383976, "grad_norm": 99.64566046918779, "learning_rate": 1.924360400444939e-06, "loss": 3.5239, "step": 173 }, { "epoch": 0.05809682804674458, "grad_norm": 117.28852575274705, "learning_rate": 1.935483870967742e-06, "loss": 3.5614, "step": 174 }, { "epoch": 0.05843071786310518, "grad_norm": 83.82066694317069, "learning_rate": 1.946607341490545e-06, "loss": 3.4658, "step": 175 }, { "epoch": 0.05876460767946578, "grad_norm": 128.92619338151562, "learning_rate": 1.9577308120133483e-06, "loss": 3.5121, "step": 176 }, { "epoch": 0.05909849749582638, "grad_norm": 93.44903810361156, "learning_rate": 1.9688542825361514e-06, "loss": 3.4032, "step": 177 }, { "epoch": 0.05943238731218698, "grad_norm": 84.50188344867463, "learning_rate": 1.9799777530589545e-06, "loss": 3.4875, "step": 178 }, { "epoch": 0.05976627712854758, "grad_norm": 76.1246698034132, "learning_rate": 1.9911012235817577e-06, "loss": 3.389, "step": 179 }, { "epoch": 0.06010016694490818, "grad_norm": 99.15375747246617, "learning_rate": 2.002224694104561e-06, "loss": 3.3335, "step": 180 }, { "epoch": 0.060434056761268784, "grad_norm": 114.54284701065534, "learning_rate": 2.013348164627364e-06, "loss": 3.4242, "step": 181 }, { "epoch": 0.060767946577629385, "grad_norm": 70.05333657960676, "learning_rate": 2.024471635150167e-06, "loss": 3.4021, "step": 182 }, { "epoch": 0.061101836393989986, "grad_norm": 70.57485034855756, "learning_rate": 2.03559510567297e-06, "loss": 3.4195, "step": 183 }, { "epoch": 0.06143572621035059, "grad_norm": 74.27146409049645, "learning_rate": 2.0467185761957733e-06, "loss": 3.3861, "step": 184 }, { "epoch": 0.06176961602671119, "grad_norm": 65.58538151916181, "learning_rate": 2.0578420467185764e-06, "loss": 3.309, "step": 185 }, { "epoch": 0.06210350584307179, "grad_norm": 88.63229033796509, "learning_rate": 2.0689655172413796e-06, "loss": 3.3782, "step": 186 }, { "epoch": 0.06243739565943239, "grad_norm": 98.65150543772567, "learning_rate": 2.0800889877641823e-06, "loss": 3.3155, "step": 187 }, { "epoch": 0.06277128547579298, "grad_norm": 57.98327227382355, "learning_rate": 2.091212458286986e-06, "loss": 3.3817, "step": 188 }, { "epoch": 0.06310517529215359, "grad_norm": 134.22903574534777, "learning_rate": 2.102335928809789e-06, "loss": 3.3494, "step": 189 }, { "epoch": 0.06343906510851419, "grad_norm": 74.91594079790379, "learning_rate": 2.113459399332592e-06, "loss": 3.3277, "step": 190 }, { "epoch": 0.0637729549248748, "grad_norm": 71.61812609612664, "learning_rate": 2.124582869855395e-06, "loss": 3.268, "step": 191 }, { "epoch": 0.06410684474123539, "grad_norm": 92.73641579040745, "learning_rate": 2.1357063403781984e-06, "loss": 3.3447, "step": 192 }, { "epoch": 0.064440734557596, "grad_norm": 80.24320777272665, "learning_rate": 2.1468298109010015e-06, "loss": 3.2463, "step": 193 }, { "epoch": 0.06477462437395659, "grad_norm": 81.17937322213157, "learning_rate": 2.1579532814238042e-06, "loss": 3.1959, "step": 194 }, { "epoch": 0.0651085141903172, "grad_norm": 66.07660858551452, "learning_rate": 2.1690767519466073e-06, "loss": 3.2151, "step": 195 }, { "epoch": 0.06544240400667779, "grad_norm": 87.51445250432486, "learning_rate": 2.180200222469411e-06, "loss": 3.2457, "step": 196 }, { "epoch": 0.0657762938230384, "grad_norm": 65.05498631647397, "learning_rate": 2.1913236929922136e-06, "loss": 3.2514, "step": 197 }, { "epoch": 0.066110183639399, "grad_norm": 82.4412870022299, "learning_rate": 2.2024471635150167e-06, "loss": 3.2548, "step": 198 }, { "epoch": 0.0664440734557596, "grad_norm": 49.70574102263868, "learning_rate": 2.21357063403782e-06, "loss": 3.1154, "step": 199 }, { "epoch": 0.0667779632721202, "grad_norm": 73.59669611754823, "learning_rate": 2.224694104560623e-06, "loss": 3.0974, "step": 200 }, { "epoch": 0.0671118530884808, "grad_norm": 62.90399740687655, "learning_rate": 2.235817575083426e-06, "loss": 3.1553, "step": 201 }, { "epoch": 0.0674457429048414, "grad_norm": 69.62371879996189, "learning_rate": 2.2469410456062293e-06, "loss": 3.1572, "step": 202 }, { "epoch": 0.06777963272120201, "grad_norm": 75.2971990685756, "learning_rate": 2.2580645161290324e-06, "loss": 3.1062, "step": 203 }, { "epoch": 0.0681135225375626, "grad_norm": 64.3063336536269, "learning_rate": 2.2691879866518355e-06, "loss": 3.1155, "step": 204 }, { "epoch": 0.06844741235392321, "grad_norm": 67.52002560241402, "learning_rate": 2.2803114571746387e-06, "loss": 3.0101, "step": 205 }, { "epoch": 0.0687813021702838, "grad_norm": 88.17057719376429, "learning_rate": 2.291434927697442e-06, "loss": 3.0855, "step": 206 }, { "epoch": 0.06911519198664441, "grad_norm": 70.15531186379388, "learning_rate": 2.302558398220245e-06, "loss": 3.0148, "step": 207 }, { "epoch": 0.069449081803005, "grad_norm": 41.00291090981391, "learning_rate": 2.313681868743048e-06, "loss": 3.0015, "step": 208 }, { "epoch": 0.06978297161936561, "grad_norm": 58.657408641895614, "learning_rate": 2.324805339265851e-06, "loss": 3.0655, "step": 209 }, { "epoch": 0.07011686143572621, "grad_norm": 61.67998024066119, "learning_rate": 2.3359288097886543e-06, "loss": 3.0076, "step": 210 }, { "epoch": 0.07045075125208682, "grad_norm": 66.13896520682852, "learning_rate": 2.3470522803114575e-06, "loss": 3.0265, "step": 211 }, { "epoch": 0.07078464106844741, "grad_norm": 60.02142041181616, "learning_rate": 2.35817575083426e-06, "loss": 3.0338, "step": 212 }, { "epoch": 0.07111853088480802, "grad_norm": 70.79100299014138, "learning_rate": 2.3692992213570637e-06, "loss": 2.9995, "step": 213 }, { "epoch": 0.07145242070116861, "grad_norm": 50.36413268664836, "learning_rate": 2.380422691879867e-06, "loss": 3.0069, "step": 214 }, { "epoch": 0.07178631051752922, "grad_norm": 71.53297611189717, "learning_rate": 2.39154616240267e-06, "loss": 2.9139, "step": 215 }, { "epoch": 0.07212020033388981, "grad_norm": 59.43929774050386, "learning_rate": 2.4026696329254727e-06, "loss": 2.8915, "step": 216 }, { "epoch": 0.07245409015025042, "grad_norm": 59.65285017999686, "learning_rate": 2.4137931034482762e-06, "loss": 2.8274, "step": 217 }, { "epoch": 0.07278797996661102, "grad_norm": 54.93730304466695, "learning_rate": 2.4249165739710794e-06, "loss": 2.9065, "step": 218 }, { "epoch": 0.07312186978297162, "grad_norm": 42.85115898803734, "learning_rate": 2.436040044493882e-06, "loss": 2.9552, "step": 219 }, { "epoch": 0.07345575959933222, "grad_norm": 72.56838419523433, "learning_rate": 2.4471635150166852e-06, "loss": 3.0145, "step": 220 }, { "epoch": 0.07378964941569283, "grad_norm": 66.42284115184401, "learning_rate": 2.4582869855394888e-06, "loss": 3.042, "step": 221 }, { "epoch": 0.07412353923205342, "grad_norm": 71.06291638956603, "learning_rate": 2.4694104560622915e-06, "loss": 2.9803, "step": 222 }, { "epoch": 0.07445742904841403, "grad_norm": 44.339827896872784, "learning_rate": 2.4805339265850946e-06, "loss": 2.8887, "step": 223 }, { "epoch": 0.07479131886477462, "grad_norm": 76.02767154267947, "learning_rate": 2.4916573971078977e-06, "loss": 2.9057, "step": 224 }, { "epoch": 0.07512520868113523, "grad_norm": 57.591325800561705, "learning_rate": 2.502780867630701e-06, "loss": 2.9766, "step": 225 }, { "epoch": 0.07545909849749582, "grad_norm": 47.190894734501036, "learning_rate": 2.513904338153504e-06, "loss": 2.8198, "step": 226 }, { "epoch": 0.07579298831385643, "grad_norm": 52.024842744415295, "learning_rate": 2.5250278086763076e-06, "loss": 2.863, "step": 227 }, { "epoch": 0.07612687813021703, "grad_norm": 62.57034543674044, "learning_rate": 2.5361512791991107e-06, "loss": 2.9462, "step": 228 }, { "epoch": 0.07646076794657763, "grad_norm": 57.79699666342125, "learning_rate": 2.5472747497219134e-06, "loss": 2.9066, "step": 229 }, { "epoch": 0.07679465776293823, "grad_norm": 49.51137147360837, "learning_rate": 2.5583982202447165e-06, "loss": 2.7778, "step": 230 }, { "epoch": 0.07712854757929884, "grad_norm": 64.10058057174598, "learning_rate": 2.5695216907675197e-06, "loss": 2.8164, "step": 231 }, { "epoch": 0.07746243739565943, "grad_norm": 45.58295831034494, "learning_rate": 2.580645161290323e-06, "loss": 2.8476, "step": 232 }, { "epoch": 0.07779632721202004, "grad_norm": 49.78960210904009, "learning_rate": 2.591768631813126e-06, "loss": 2.7887, "step": 233 }, { "epoch": 0.07813021702838063, "grad_norm": 45.1161140094017, "learning_rate": 2.6028921023359286e-06, "loss": 2.7943, "step": 234 }, { "epoch": 0.07846410684474124, "grad_norm": 61.08975250842947, "learning_rate": 2.6140155728587318e-06, "loss": 2.7494, "step": 235 }, { "epoch": 0.07879799666110184, "grad_norm": 52.404806314762894, "learning_rate": 2.6251390433815353e-06, "loss": 2.7796, "step": 236 }, { "epoch": 0.07913188647746244, "grad_norm": 51.34589082969784, "learning_rate": 2.6362625139043385e-06, "loss": 2.764, "step": 237 }, { "epoch": 0.07946577629382304, "grad_norm": 51.07290884700787, "learning_rate": 2.6473859844271416e-06, "loss": 2.7691, "step": 238 }, { "epoch": 0.07979966611018365, "grad_norm": 58.19533163895181, "learning_rate": 2.6585094549499447e-06, "loss": 2.7692, "step": 239 }, { "epoch": 0.08013355592654424, "grad_norm": 40.843216618396966, "learning_rate": 2.669632925472748e-06, "loss": 2.6886, "step": 240 }, { "epoch": 0.08046744574290485, "grad_norm": 55.78303500514544, "learning_rate": 2.6807563959955506e-06, "loss": 2.8131, "step": 241 }, { "epoch": 0.08080133555926544, "grad_norm": 50.50292120854103, "learning_rate": 2.6918798665183537e-06, "loss": 2.7187, "step": 242 }, { "epoch": 0.08113522537562605, "grad_norm": 34.27280607377215, "learning_rate": 2.703003337041157e-06, "loss": 2.6669, "step": 243 }, { "epoch": 0.08146911519198664, "grad_norm": 63.7617703966154, "learning_rate": 2.7141268075639604e-06, "loss": 2.7954, "step": 244 }, { "epoch": 0.08180300500834725, "grad_norm": 46.90162169294651, "learning_rate": 2.7252502780867635e-06, "loss": 2.7044, "step": 245 }, { "epoch": 0.08213689482470785, "grad_norm": 43.82037639224682, "learning_rate": 2.7363737486095667e-06, "loss": 2.6281, "step": 246 }, { "epoch": 0.08247078464106845, "grad_norm": 56.7993707793771, "learning_rate": 2.7474972191323694e-06, "loss": 2.6912, "step": 247 }, { "epoch": 0.08280467445742905, "grad_norm": 49.27787875758428, "learning_rate": 2.7586206896551725e-06, "loss": 2.7491, "step": 248 }, { "epoch": 0.08313856427378966, "grad_norm": 36.14329812802357, "learning_rate": 2.7697441601779756e-06, "loss": 2.6756, "step": 249 }, { "epoch": 0.08347245409015025, "grad_norm": 60.138854895142444, "learning_rate": 2.7808676307007788e-06, "loss": 2.6142, "step": 250 }, { "epoch": 0.08380634390651086, "grad_norm": 52.59998321374202, "learning_rate": 2.791991101223582e-06, "loss": 2.6057, "step": 251 }, { "epoch": 0.08414023372287145, "grad_norm": 48.43451314680077, "learning_rate": 2.8031145717463854e-06, "loss": 2.6179, "step": 252 }, { "epoch": 0.08447412353923206, "grad_norm": 45.45644274039688, "learning_rate": 2.8142380422691886e-06, "loss": 2.6081, "step": 253 }, { "epoch": 0.08480801335559265, "grad_norm": 45.0493403135486, "learning_rate": 2.8253615127919913e-06, "loss": 2.5945, "step": 254 }, { "epoch": 0.08514190317195326, "grad_norm": 38.03747360584927, "learning_rate": 2.8364849833147944e-06, "loss": 2.5291, "step": 255 }, { "epoch": 0.08547579298831386, "grad_norm": 65.5445096801032, "learning_rate": 2.8476084538375975e-06, "loss": 2.6372, "step": 256 }, { "epoch": 0.08580968280467446, "grad_norm": 42.07563622001589, "learning_rate": 2.8587319243604007e-06, "loss": 2.559, "step": 257 }, { "epoch": 0.08614357262103506, "grad_norm": 47.99510749985989, "learning_rate": 2.869855394883204e-06, "loss": 2.4751, "step": 258 }, { "epoch": 0.08647746243739567, "grad_norm": 47.37628471880271, "learning_rate": 2.8809788654060065e-06, "loss": 2.5815, "step": 259 }, { "epoch": 0.08681135225375626, "grad_norm": 41.932557621701676, "learning_rate": 2.8921023359288105e-06, "loss": 2.5258, "step": 260 }, { "epoch": 0.08714524207011685, "grad_norm": 54.33948011433492, "learning_rate": 2.903225806451613e-06, "loss": 2.6574, "step": 261 }, { "epoch": 0.08747913188647746, "grad_norm": 38.50779136797486, "learning_rate": 2.9143492769744163e-06, "loss": 2.538, "step": 262 }, { "epoch": 0.08781302170283806, "grad_norm": 33.363548869647445, "learning_rate": 2.9254727474972195e-06, "loss": 2.4775, "step": 263 }, { "epoch": 0.08814691151919866, "grad_norm": 48.06298785601842, "learning_rate": 2.9365962180200226e-06, "loss": 2.5013, "step": 264 }, { "epoch": 0.08848080133555926, "grad_norm": 36.662652103341586, "learning_rate": 2.9477196885428257e-06, "loss": 2.4901, "step": 265 }, { "epoch": 0.08881469115191987, "grad_norm": 37.07059399281797, "learning_rate": 2.9588431590656284e-06, "loss": 2.4117, "step": 266 }, { "epoch": 0.08914858096828046, "grad_norm": 47.238996402939414, "learning_rate": 2.9699666295884316e-06, "loss": 2.5303, "step": 267 }, { "epoch": 0.08948247078464107, "grad_norm": 39.40417661971486, "learning_rate": 2.9810901001112347e-06, "loss": 2.4729, "step": 268 }, { "epoch": 0.08981636060100166, "grad_norm": 32.62788335284814, "learning_rate": 2.9922135706340383e-06, "loss": 2.4708, "step": 269 }, { "epoch": 0.09015025041736227, "grad_norm": 32.57637052276208, "learning_rate": 3.0033370411568414e-06, "loss": 2.421, "step": 270 }, { "epoch": 0.09048414023372287, "grad_norm": 41.77637080555397, "learning_rate": 3.0144605116796445e-06, "loss": 2.395, "step": 271 }, { "epoch": 0.09081803005008347, "grad_norm": 37.22846316661132, "learning_rate": 3.0255839822024472e-06, "loss": 2.4078, "step": 272 }, { "epoch": 0.09115191986644407, "grad_norm": 39.237943823068164, "learning_rate": 3.0367074527252504e-06, "loss": 2.3556, "step": 273 }, { "epoch": 0.09148580968280468, "grad_norm": 53.43624286358602, "learning_rate": 3.0478309232480535e-06, "loss": 2.4451, "step": 274 }, { "epoch": 0.09181969949916527, "grad_norm": 32.66784625686936, "learning_rate": 3.0589543937708566e-06, "loss": 2.33, "step": 275 }, { "epoch": 0.09215358931552588, "grad_norm": 30.494719927726358, "learning_rate": 3.0700778642936598e-06, "loss": 2.47, "step": 276 }, { "epoch": 0.09248747913188647, "grad_norm": 45.76340839364368, "learning_rate": 3.0812013348164633e-06, "loss": 2.4188, "step": 277 }, { "epoch": 0.09282136894824708, "grad_norm": 50.659100942103414, "learning_rate": 3.0923248053392665e-06, "loss": 2.4112, "step": 278 }, { "epoch": 0.09315525876460767, "grad_norm": 29.095150947911602, "learning_rate": 3.103448275862069e-06, "loss": 2.3702, "step": 279 }, { "epoch": 0.09348914858096828, "grad_norm": 47.83453849877859, "learning_rate": 3.1145717463848723e-06, "loss": 2.4377, "step": 280 }, { "epoch": 0.09382303839732888, "grad_norm": 39.39663394088428, "learning_rate": 3.1256952169076754e-06, "loss": 2.3097, "step": 281 }, { "epoch": 0.09415692821368948, "grad_norm": 39.693518747770504, "learning_rate": 3.1368186874304786e-06, "loss": 2.4115, "step": 282 }, { "epoch": 0.09449081803005008, "grad_norm": 45.35407646000697, "learning_rate": 3.1479421579532817e-06, "loss": 2.3656, "step": 283 }, { "epoch": 0.09482470784641069, "grad_norm": 31.537190313403233, "learning_rate": 3.1590656284760844e-06, "loss": 2.3433, "step": 284 }, { "epoch": 0.09515859766277128, "grad_norm": 38.00781535608329, "learning_rate": 3.170189098998888e-06, "loss": 2.3027, "step": 285 }, { "epoch": 0.09549248747913189, "grad_norm": 33.97062118960957, "learning_rate": 3.181312569521691e-06, "loss": 2.3513, "step": 286 }, { "epoch": 0.09582637729549248, "grad_norm": 35.07757941379229, "learning_rate": 3.1924360400444942e-06, "loss": 2.2953, "step": 287 }, { "epoch": 0.09616026711185309, "grad_norm": 35.97278188603787, "learning_rate": 3.2035595105672973e-06, "loss": 2.2797, "step": 288 }, { "epoch": 0.09649415692821368, "grad_norm": 28.555998261626467, "learning_rate": 3.2146829810901005e-06, "loss": 2.2648, "step": 289 }, { "epoch": 0.09682804674457429, "grad_norm": 45.338132777833465, "learning_rate": 3.225806451612903e-06, "loss": 2.3662, "step": 290 }, { "epoch": 0.09716193656093489, "grad_norm": 33.5215147324301, "learning_rate": 3.2369299221357063e-06, "loss": 2.293, "step": 291 }, { "epoch": 0.0974958263772955, "grad_norm": 26.23862474423416, "learning_rate": 3.2480533926585095e-06, "loss": 2.2308, "step": 292 }, { "epoch": 0.09782971619365609, "grad_norm": 32.285881706931164, "learning_rate": 3.259176863181313e-06, "loss": 2.3027, "step": 293 }, { "epoch": 0.0981636060100167, "grad_norm": 39.74529815582136, "learning_rate": 3.270300333704116e-06, "loss": 2.2907, "step": 294 }, { "epoch": 0.09849749582637729, "grad_norm": 33.581595692376354, "learning_rate": 3.2814238042269193e-06, "loss": 2.3027, "step": 295 }, { "epoch": 0.0988313856427379, "grad_norm": 30.226474623654436, "learning_rate": 3.2925472747497224e-06, "loss": 2.3046, "step": 296 }, { "epoch": 0.09916527545909849, "grad_norm": 28.42366764731016, "learning_rate": 3.303670745272525e-06, "loss": 2.1812, "step": 297 }, { "epoch": 0.0994991652754591, "grad_norm": 31.02510080135308, "learning_rate": 3.3147942157953282e-06, "loss": 2.2262, "step": 298 }, { "epoch": 0.0998330550918197, "grad_norm": 42.54155951384411, "learning_rate": 3.3259176863181314e-06, "loss": 2.273, "step": 299 }, { "epoch": 0.1001669449081803, "grad_norm": 28.7531063071307, "learning_rate": 3.3370411568409345e-06, "loss": 2.1349, "step": 300 }, { "epoch": 0.1005008347245409, "grad_norm": 23.431791842327076, "learning_rate": 3.3481646273637376e-06, "loss": 2.1687, "step": 301 }, { "epoch": 0.1008347245409015, "grad_norm": 25.003787318212233, "learning_rate": 3.359288097886541e-06, "loss": 2.1516, "step": 302 }, { "epoch": 0.1011686143572621, "grad_norm": 34.28055284732511, "learning_rate": 3.3704115684093443e-06, "loss": 2.1766, "step": 303 }, { "epoch": 0.1015025041736227, "grad_norm": 22.246981184594073, "learning_rate": 3.381535038932147e-06, "loss": 2.2311, "step": 304 }, { "epoch": 0.1018363939899833, "grad_norm": 40.47741858909045, "learning_rate": 3.39265850945495e-06, "loss": 2.2245, "step": 305 }, { "epoch": 0.10217028380634391, "grad_norm": 29.34633801848772, "learning_rate": 3.4037819799777533e-06, "loss": 2.2142, "step": 306 }, { "epoch": 0.1025041736227045, "grad_norm": 31.958513626395373, "learning_rate": 3.4149054505005564e-06, "loss": 2.1793, "step": 307 }, { "epoch": 0.10283806343906511, "grad_norm": 29.84659399592041, "learning_rate": 3.4260289210233596e-06, "loss": 2.186, "step": 308 }, { "epoch": 0.1031719532554257, "grad_norm": 28.73790283787379, "learning_rate": 3.4371523915461623e-06, "loss": 2.1447, "step": 309 }, { "epoch": 0.10350584307178631, "grad_norm": 33.444297688582935, "learning_rate": 3.448275862068966e-06, "loss": 2.1608, "step": 310 }, { "epoch": 0.10383973288814691, "grad_norm": 24.699240622118868, "learning_rate": 3.459399332591769e-06, "loss": 2.1286, "step": 311 }, { "epoch": 0.10417362270450752, "grad_norm": 24.172927781551728, "learning_rate": 3.470522803114572e-06, "loss": 2.1262, "step": 312 }, { "epoch": 0.10450751252086811, "grad_norm": 27.28451121639845, "learning_rate": 3.4816462736373752e-06, "loss": 2.1261, "step": 313 }, { "epoch": 0.10484140233722872, "grad_norm": 32.13566961397857, "learning_rate": 3.4927697441601784e-06, "loss": 2.1944, "step": 314 }, { "epoch": 0.10517529215358931, "grad_norm": 27.74852311889766, "learning_rate": 3.503893214682981e-06, "loss": 2.2424, "step": 315 }, { "epoch": 0.10550918196994992, "grad_norm": 25.946761982402776, "learning_rate": 3.515016685205784e-06, "loss": 2.165, "step": 316 }, { "epoch": 0.10584307178631051, "grad_norm": 30.279196215395533, "learning_rate": 3.5261401557285873e-06, "loss": 2.08, "step": 317 }, { "epoch": 0.10617696160267112, "grad_norm": 22.967893296471562, "learning_rate": 3.537263626251391e-06, "loss": 2.0506, "step": 318 }, { "epoch": 0.10651085141903172, "grad_norm": 22.455482328858164, "learning_rate": 3.548387096774194e-06, "loss": 2.0725, "step": 319 }, { "epoch": 0.10684474123539232, "grad_norm": 23.590528969178017, "learning_rate": 3.559510567296997e-06, "loss": 2.0851, "step": 320 }, { "epoch": 0.10717863105175292, "grad_norm": 24.57959989184421, "learning_rate": 3.5706340378198003e-06, "loss": 2.0737, "step": 321 }, { "epoch": 0.10751252086811353, "grad_norm": 25.2222328818636, "learning_rate": 3.581757508342603e-06, "loss": 2.0484, "step": 322 }, { "epoch": 0.10784641068447412, "grad_norm": 25.740502125940992, "learning_rate": 3.592880978865406e-06, "loss": 2.0882, "step": 323 }, { "epoch": 0.10818030050083473, "grad_norm": 36.96091487125776, "learning_rate": 3.6040044493882093e-06, "loss": 2.1277, "step": 324 }, { "epoch": 0.10851419031719532, "grad_norm": 31.613980883246207, "learning_rate": 3.6151279199110124e-06, "loss": 1.9999, "step": 325 }, { "epoch": 0.10884808013355593, "grad_norm": 26.380790933518856, "learning_rate": 3.626251390433816e-06, "loss": 2.1162, "step": 326 }, { "epoch": 0.10918196994991652, "grad_norm": 24.193462935675242, "learning_rate": 3.637374860956619e-06, "loss": 2.0028, "step": 327 }, { "epoch": 0.10951585976627713, "grad_norm": 22.058296325333085, "learning_rate": 3.648498331479422e-06, "loss": 1.9763, "step": 328 }, { "epoch": 0.10984974958263773, "grad_norm": 21.134726744081934, "learning_rate": 3.659621802002225e-06, "loss": 1.9465, "step": 329 }, { "epoch": 0.11018363939899833, "grad_norm": 24.157191546474927, "learning_rate": 3.670745272525028e-06, "loss": 2.0087, "step": 330 }, { "epoch": 0.11051752921535893, "grad_norm": 22.17180799207362, "learning_rate": 3.681868743047831e-06, "loss": 2.0786, "step": 331 }, { "epoch": 0.11085141903171954, "grad_norm": 20.974586205022412, "learning_rate": 3.6929922135706343e-06, "loss": 1.9888, "step": 332 }, { "epoch": 0.11118530884808013, "grad_norm": 24.514513908942536, "learning_rate": 3.7041156840934374e-06, "loss": 1.997, "step": 333 }, { "epoch": 0.11151919866444074, "grad_norm": 18.558152997701622, "learning_rate": 3.71523915461624e-06, "loss": 1.9559, "step": 334 }, { "epoch": 0.11185308848080133, "grad_norm": 35.94128995691721, "learning_rate": 3.7263626251390437e-06, "loss": 2.1814, "step": 335 }, { "epoch": 0.11218697829716194, "grad_norm": 21.101176085479164, "learning_rate": 3.737486095661847e-06, "loss": 1.952, "step": 336 }, { "epoch": 0.11252086811352253, "grad_norm": 31.206405622505528, "learning_rate": 3.74860956618465e-06, "loss": 2.0164, "step": 337 }, { "epoch": 0.11285475792988314, "grad_norm": 24.119566620812364, "learning_rate": 3.759733036707453e-06, "loss": 1.9313, "step": 338 }, { "epoch": 0.11318864774624374, "grad_norm": 19.129483358919856, "learning_rate": 3.7708565072302562e-06, "loss": 1.9579, "step": 339 }, { "epoch": 0.11352253756260434, "grad_norm": 23.67241740782234, "learning_rate": 3.781979977753059e-06, "loss": 1.9482, "step": 340 }, { "epoch": 0.11385642737896494, "grad_norm": 21.260143260655674, "learning_rate": 3.793103448275862e-06, "loss": 1.9086, "step": 341 }, { "epoch": 0.11419031719532555, "grad_norm": 17.462678593675733, "learning_rate": 3.804226918798665e-06, "loss": 1.935, "step": 342 }, { "epoch": 0.11452420701168614, "grad_norm": 25.742043222363893, "learning_rate": 3.815350389321469e-06, "loss": 1.963, "step": 343 }, { "epoch": 0.11485809682804675, "grad_norm": 22.332203425025305, "learning_rate": 3.8264738598442715e-06, "loss": 1.9793, "step": 344 }, { "epoch": 0.11519198664440734, "grad_norm": 23.209042391928495, "learning_rate": 3.837597330367075e-06, "loss": 1.9137, "step": 345 }, { "epoch": 0.11552587646076795, "grad_norm": 15.370457019169066, "learning_rate": 3.848720800889878e-06, "loss": 1.8228, "step": 346 }, { "epoch": 0.11585976627712855, "grad_norm": 20.476548236835743, "learning_rate": 3.859844271412681e-06, "loss": 1.9109, "step": 347 }, { "epoch": 0.11619365609348915, "grad_norm": 17.534010970399823, "learning_rate": 3.870967741935484e-06, "loss": 1.918, "step": 348 }, { "epoch": 0.11652754590984975, "grad_norm": 19.24836723800628, "learning_rate": 3.8820912124582876e-06, "loss": 1.9004, "step": 349 }, { "epoch": 0.11686143572621036, "grad_norm": 18.167536495029868, "learning_rate": 3.89321468298109e-06, "loss": 1.8493, "step": 350 }, { "epoch": 0.11719532554257095, "grad_norm": 19.386669062185664, "learning_rate": 3.904338153503894e-06, "loss": 1.8457, "step": 351 }, { "epoch": 0.11752921535893156, "grad_norm": 18.980267219047114, "learning_rate": 3.9154616240266965e-06, "loss": 1.8635, "step": 352 }, { "epoch": 0.11786310517529215, "grad_norm": 20.21579179278481, "learning_rate": 3.9265850945495e-06, "loss": 1.9044, "step": 353 }, { "epoch": 0.11819699499165276, "grad_norm": 19.25670252241022, "learning_rate": 3.937708565072303e-06, "loss": 1.8807, "step": 354 }, { "epoch": 0.11853088480801335, "grad_norm": 18.449747462219474, "learning_rate": 3.948832035595106e-06, "loss": 1.8538, "step": 355 }, { "epoch": 0.11886477462437396, "grad_norm": 21.548563905380014, "learning_rate": 3.959955506117909e-06, "loss": 1.7964, "step": 356 }, { "epoch": 0.11919866444073456, "grad_norm": 22.603159276579028, "learning_rate": 3.971078976640712e-06, "loss": 1.8848, "step": 357 }, { "epoch": 0.11953255425709516, "grad_norm": 16.57522624733454, "learning_rate": 3.982202447163515e-06, "loss": 1.7855, "step": 358 }, { "epoch": 0.11986644407345576, "grad_norm": 24.991153389637677, "learning_rate": 3.993325917686319e-06, "loss": 1.8508, "step": 359 }, { "epoch": 0.12020033388981637, "grad_norm": 21.072092033258052, "learning_rate": 4.004449388209122e-06, "loss": 1.8686, "step": 360 }, { "epoch": 0.12053422370617696, "grad_norm": 23.940226460289633, "learning_rate": 4.015572858731925e-06, "loss": 1.94, "step": 361 }, { "epoch": 0.12086811352253757, "grad_norm": 16.629068079968206, "learning_rate": 4.026696329254728e-06, "loss": 1.7881, "step": 362 }, { "epoch": 0.12120200333889816, "grad_norm": 17.836255311285885, "learning_rate": 4.0378197997775306e-06, "loss": 1.7985, "step": 363 }, { "epoch": 0.12153589315525877, "grad_norm": 13.709028046545729, "learning_rate": 4.048943270300334e-06, "loss": 1.8191, "step": 364 }, { "epoch": 0.12186978297161936, "grad_norm": 22.23029182761428, "learning_rate": 4.060066740823137e-06, "loss": 1.8532, "step": 365 }, { "epoch": 0.12220367278797997, "grad_norm": 16.173136030989177, "learning_rate": 4.07119021134594e-06, "loss": 1.8283, "step": 366 }, { "epoch": 0.12253756260434057, "grad_norm": 23.66572704027634, "learning_rate": 4.082313681868743e-06, "loss": 1.8226, "step": 367 }, { "epoch": 0.12287145242070117, "grad_norm": 19.83430337806497, "learning_rate": 4.093437152391547e-06, "loss": 1.8683, "step": 368 }, { "epoch": 0.12320534223706177, "grad_norm": 16.960524756053058, "learning_rate": 4.104560622914349e-06, "loss": 1.683, "step": 369 }, { "epoch": 0.12353923205342238, "grad_norm": 15.24816423779075, "learning_rate": 4.115684093437153e-06, "loss": 1.6942, "step": 370 }, { "epoch": 0.12387312186978297, "grad_norm": 16.03989879892715, "learning_rate": 4.126807563959956e-06, "loss": 1.7539, "step": 371 }, { "epoch": 0.12420701168614358, "grad_norm": 17.074120340191584, "learning_rate": 4.137931034482759e-06, "loss": 1.8026, "step": 372 }, { "epoch": 0.12454090150250417, "grad_norm": 19.402872197701274, "learning_rate": 4.149054505005562e-06, "loss": 1.8025, "step": 373 }, { "epoch": 0.12487479131886478, "grad_norm": 23.984319606473406, "learning_rate": 4.160177975528365e-06, "loss": 1.7522, "step": 374 }, { "epoch": 0.12520868113522537, "grad_norm": 14.521735575719143, "learning_rate": 4.171301446051168e-06, "loss": 1.8148, "step": 375 }, { "epoch": 0.12554257095158597, "grad_norm": 24.309186558026187, "learning_rate": 4.182424916573972e-06, "loss": 1.7674, "step": 376 }, { "epoch": 0.1258764607679466, "grad_norm": 17.67403717090658, "learning_rate": 4.193548387096774e-06, "loss": 1.8042, "step": 377 }, { "epoch": 0.12621035058430718, "grad_norm": 20.747321196520296, "learning_rate": 4.204671857619578e-06, "loss": 1.7457, "step": 378 }, { "epoch": 0.12654424040066778, "grad_norm": 16.180478497537248, "learning_rate": 4.215795328142381e-06, "loss": 1.7633, "step": 379 }, { "epoch": 0.12687813021702837, "grad_norm": 18.659013636696102, "learning_rate": 4.226918798665184e-06, "loss": 1.7813, "step": 380 }, { "epoch": 0.127212020033389, "grad_norm": 19.530396086861277, "learning_rate": 4.238042269187987e-06, "loss": 1.745, "step": 381 }, { "epoch": 0.1275459098497496, "grad_norm": 16.042053080234087, "learning_rate": 4.24916573971079e-06, "loss": 1.6292, "step": 382 }, { "epoch": 0.12787979966611018, "grad_norm": 15.81830617438071, "learning_rate": 4.260289210233593e-06, "loss": 1.6993, "step": 383 }, { "epoch": 0.12821368948247078, "grad_norm": 17.415975819376595, "learning_rate": 4.271412680756397e-06, "loss": 1.7138, "step": 384 }, { "epoch": 0.1285475792988314, "grad_norm": 12.506009830877971, "learning_rate": 4.2825361512791995e-06, "loss": 1.7417, "step": 385 }, { "epoch": 0.128881469115192, "grad_norm": 15.215251903076949, "learning_rate": 4.293659621802003e-06, "loss": 1.6995, "step": 386 }, { "epoch": 0.1292153589315526, "grad_norm": 15.1961141446809, "learning_rate": 4.304783092324806e-06, "loss": 1.7165, "step": 387 }, { "epoch": 0.12954924874791318, "grad_norm": 18.705945675944584, "learning_rate": 4.3159065628476084e-06, "loss": 1.739, "step": 388 }, { "epoch": 0.1298831385642738, "grad_norm": 15.806409095610187, "learning_rate": 4.327030033370412e-06, "loss": 1.7307, "step": 389 }, { "epoch": 0.1302170283806344, "grad_norm": 18.0480378723754, "learning_rate": 4.338153503893215e-06, "loss": 1.6783, "step": 390 }, { "epoch": 0.130550918196995, "grad_norm": 14.225398227769405, "learning_rate": 4.349276974416018e-06, "loss": 1.7143, "step": 391 }, { "epoch": 0.13088480801335559, "grad_norm": 19.84474361946555, "learning_rate": 4.360400444938822e-06, "loss": 1.7327, "step": 392 }, { "epoch": 0.13121869782971618, "grad_norm": 22.957221153164284, "learning_rate": 4.3715239154616245e-06, "loss": 1.6851, "step": 393 }, { "epoch": 0.1315525876460768, "grad_norm": 13.262471755973783, "learning_rate": 4.382647385984427e-06, "loss": 1.6009, "step": 394 }, { "epoch": 0.1318864774624374, "grad_norm": 22.14864384656617, "learning_rate": 4.393770856507231e-06, "loss": 1.775, "step": 395 }, { "epoch": 0.132220367278798, "grad_norm": 16.097214797833757, "learning_rate": 4.4048943270300335e-06, "loss": 1.7036, "step": 396 }, { "epoch": 0.13255425709515858, "grad_norm": 12.293573352252828, "learning_rate": 4.416017797552837e-06, "loss": 1.7182, "step": 397 }, { "epoch": 0.1328881469115192, "grad_norm": 13.37070658144906, "learning_rate": 4.42714126807564e-06, "loss": 1.6544, "step": 398 }, { "epoch": 0.1332220367278798, "grad_norm": 15.303878582659598, "learning_rate": 4.4382647385984425e-06, "loss": 1.6453, "step": 399 }, { "epoch": 0.1335559265442404, "grad_norm": 12.805770972259978, "learning_rate": 4.449388209121246e-06, "loss": 1.5922, "step": 400 }, { "epoch": 0.133889816360601, "grad_norm": 12.521978063357869, "learning_rate": 4.4605116796440496e-06, "loss": 1.624, "step": 401 }, { "epoch": 0.1342237061769616, "grad_norm": 15.693412051101722, "learning_rate": 4.471635150166852e-06, "loss": 1.5974, "step": 402 }, { "epoch": 0.1345575959933222, "grad_norm": 17.406200512628843, "learning_rate": 4.482758620689656e-06, "loss": 1.6253, "step": 403 }, { "epoch": 0.1348914858096828, "grad_norm": 14.272839832732938, "learning_rate": 4.4938820912124585e-06, "loss": 1.626, "step": 404 }, { "epoch": 0.1352253756260434, "grad_norm": 13.362789500878321, "learning_rate": 4.505005561735262e-06, "loss": 1.6106, "step": 405 }, { "epoch": 0.13555926544240401, "grad_norm": 12.730685982638537, "learning_rate": 4.516129032258065e-06, "loss": 1.5667, "step": 406 }, { "epoch": 0.1358931552587646, "grad_norm": 14.36871218618362, "learning_rate": 4.5272525027808675e-06, "loss": 1.599, "step": 407 }, { "epoch": 0.1362270450751252, "grad_norm": 13.225638696780388, "learning_rate": 4.538375973303671e-06, "loss": 1.6048, "step": 408 }, { "epoch": 0.1365609348914858, "grad_norm": 16.23474598774944, "learning_rate": 4.549499443826475e-06, "loss": 1.5832, "step": 409 }, { "epoch": 0.13689482470784642, "grad_norm": 12.75038907760738, "learning_rate": 4.560622914349277e-06, "loss": 1.5735, "step": 410 }, { "epoch": 0.137228714524207, "grad_norm": 11.283321859333173, "learning_rate": 4.571746384872081e-06, "loss": 1.5753, "step": 411 }, { "epoch": 0.1375626043405676, "grad_norm": 15.916741734384185, "learning_rate": 4.582869855394884e-06, "loss": 1.58, "step": 412 }, { "epoch": 0.1378964941569282, "grad_norm": 10.90166531001267, "learning_rate": 4.593993325917686e-06, "loss": 1.5563, "step": 413 }, { "epoch": 0.13823038397328882, "grad_norm": 18.728249592418653, "learning_rate": 4.60511679644049e-06, "loss": 1.6168, "step": 414 }, { "epoch": 0.13856427378964942, "grad_norm": 13.169592971497012, "learning_rate": 4.6162402669632926e-06, "loss": 1.6098, "step": 415 }, { "epoch": 0.13889816360601, "grad_norm": 14.032944627910833, "learning_rate": 4.627363737486096e-06, "loss": 1.5251, "step": 416 }, { "epoch": 0.1392320534223706, "grad_norm": 13.081948578276812, "learning_rate": 4.6384872080089e-06, "loss": 1.6176, "step": 417 }, { "epoch": 0.13956594323873123, "grad_norm": 15.19054111163826, "learning_rate": 4.649610678531702e-06, "loss": 1.5725, "step": 418 }, { "epoch": 0.13989983305509182, "grad_norm": 12.379950557206868, "learning_rate": 4.660734149054505e-06, "loss": 1.5461, "step": 419 }, { "epoch": 0.14023372287145242, "grad_norm": 13.638070706394918, "learning_rate": 4.671857619577309e-06, "loss": 1.5874, "step": 420 }, { "epoch": 0.140567612687813, "grad_norm": 12.209288061461917, "learning_rate": 4.682981090100111e-06, "loss": 1.5056, "step": 421 }, { "epoch": 0.14090150250417363, "grad_norm": 11.990821430062624, "learning_rate": 4.694104560622915e-06, "loss": 1.5692, "step": 422 }, { "epoch": 0.14123539232053423, "grad_norm": 14.238842328477482, "learning_rate": 4.705228031145718e-06, "loss": 1.5601, "step": 423 }, { "epoch": 0.14156928213689482, "grad_norm": 10.832503067994304, "learning_rate": 4.71635150166852e-06, "loss": 1.6088, "step": 424 }, { "epoch": 0.1419031719532554, "grad_norm": 13.418263958764314, "learning_rate": 4.727474972191325e-06, "loss": 1.4746, "step": 425 }, { "epoch": 0.14223706176961604, "grad_norm": 13.819303522280036, "learning_rate": 4.7385984427141274e-06, "loss": 1.6096, "step": 426 }, { "epoch": 0.14257095158597663, "grad_norm": 12.345343959511268, "learning_rate": 4.74972191323693e-06, "loss": 1.5396, "step": 427 }, { "epoch": 0.14290484140233722, "grad_norm": 13.095970425278386, "learning_rate": 4.760845383759734e-06, "loss": 1.5041, "step": 428 }, { "epoch": 0.14323873121869782, "grad_norm": 12.94972326768607, "learning_rate": 4.771968854282536e-06, "loss": 1.5033, "step": 429 }, { "epoch": 0.14357262103505844, "grad_norm": 11.801786362135307, "learning_rate": 4.78309232480534e-06, "loss": 1.4714, "step": 430 }, { "epoch": 0.14390651085141903, "grad_norm": 12.332893443312843, "learning_rate": 4.794215795328143e-06, "loss": 1.4802, "step": 431 }, { "epoch": 0.14424040066777963, "grad_norm": 11.354975199289292, "learning_rate": 4.805339265850945e-06, "loss": 1.4582, "step": 432 }, { "epoch": 0.14457429048414022, "grad_norm": 10.678465775588661, "learning_rate": 4.816462736373749e-06, "loss": 1.4702, "step": 433 }, { "epoch": 0.14490818030050084, "grad_norm": 10.052461987515102, "learning_rate": 4.8275862068965525e-06, "loss": 1.5165, "step": 434 }, { "epoch": 0.14524207011686144, "grad_norm": 12.446079550431573, "learning_rate": 4.838709677419355e-06, "loss": 1.4872, "step": 435 }, { "epoch": 0.14557595993322203, "grad_norm": 11.213977795457279, "learning_rate": 4.849833147942159e-06, "loss": 1.5121, "step": 436 }, { "epoch": 0.14590984974958263, "grad_norm": 12.45717282383454, "learning_rate": 4.8609566184649615e-06, "loss": 1.498, "step": 437 }, { "epoch": 0.14624373956594325, "grad_norm": 13.045542532124745, "learning_rate": 4.872080088987764e-06, "loss": 1.4157, "step": 438 }, { "epoch": 0.14657762938230384, "grad_norm": 9.148665156933124, "learning_rate": 4.883203559510568e-06, "loss": 1.4718, "step": 439 }, { "epoch": 0.14691151919866444, "grad_norm": 13.584427161188765, "learning_rate": 4.8943270300333704e-06, "loss": 1.4379, "step": 440 }, { "epoch": 0.14724540901502503, "grad_norm": 11.237040653109133, "learning_rate": 4.905450500556174e-06, "loss": 1.5051, "step": 441 }, { "epoch": 0.14757929883138565, "grad_norm": 14.347987216047361, "learning_rate": 4.9165739710789776e-06, "loss": 1.4475, "step": 442 }, { "epoch": 0.14791318864774625, "grad_norm": 13.259527288538616, "learning_rate": 4.92769744160178e-06, "loss": 1.4507, "step": 443 }, { "epoch": 0.14824707846410684, "grad_norm": 8.337767858776962, "learning_rate": 4.938820912124583e-06, "loss": 1.4471, "step": 444 }, { "epoch": 0.14858096828046743, "grad_norm": 14.143851828350975, "learning_rate": 4.9499443826473865e-06, "loss": 1.4979, "step": 445 }, { "epoch": 0.14891485809682806, "grad_norm": 13.041913430277969, "learning_rate": 4.961067853170189e-06, "loss": 1.3913, "step": 446 }, { "epoch": 0.14924874791318865, "grad_norm": 9.225775984701997, "learning_rate": 4.972191323692993e-06, "loss": 1.4737, "step": 447 }, { "epoch": 0.14958263772954924, "grad_norm": 12.343502854537459, "learning_rate": 4.9833147942157955e-06, "loss": 1.4615, "step": 448 }, { "epoch": 0.14991652754590984, "grad_norm": 9.507564411613576, "learning_rate": 4.994438264738598e-06, "loss": 1.4156, "step": 449 }, { "epoch": 0.15025041736227046, "grad_norm": 11.756331931020235, "learning_rate": 5.005561735261402e-06, "loss": 1.4807, "step": 450 }, { "epoch": 0.15058430717863105, "grad_norm": 13.317721014305539, "learning_rate": 5.016685205784205e-06, "loss": 1.4039, "step": 451 }, { "epoch": 0.15091819699499165, "grad_norm": 11.908360750076858, "learning_rate": 5.027808676307008e-06, "loss": 1.3633, "step": 452 }, { "epoch": 0.15125208681135224, "grad_norm": 10.439078164980312, "learning_rate": 5.038932146829812e-06, "loss": 1.3423, "step": 453 }, { "epoch": 0.15158597662771287, "grad_norm": 9.474673032054197, "learning_rate": 5.050055617352615e-06, "loss": 1.4033, "step": 454 }, { "epoch": 0.15191986644407346, "grad_norm": 12.38187863114966, "learning_rate": 5.061179087875418e-06, "loss": 1.3622, "step": 455 }, { "epoch": 0.15225375626043405, "grad_norm": 10.219251269686973, "learning_rate": 5.072302558398221e-06, "loss": 1.4188, "step": 456 }, { "epoch": 0.15258764607679465, "grad_norm": 10.677858335655632, "learning_rate": 5.083426028921023e-06, "loss": 1.4114, "step": 457 }, { "epoch": 0.15292153589315527, "grad_norm": 10.49625794047551, "learning_rate": 5.094549499443827e-06, "loss": 1.442, "step": 458 }, { "epoch": 0.15325542570951586, "grad_norm": 11.555646620292343, "learning_rate": 5.1056729699666295e-06, "loss": 1.4146, "step": 459 }, { "epoch": 0.15358931552587646, "grad_norm": 10.40053048667901, "learning_rate": 5.116796440489433e-06, "loss": 1.4043, "step": 460 }, { "epoch": 0.15392320534223705, "grad_norm": 9.461835623213954, "learning_rate": 5.127919911012236e-06, "loss": 1.2915, "step": 461 }, { "epoch": 0.15425709515859767, "grad_norm": 11.94405383509616, "learning_rate": 5.139043381535039e-06, "loss": 1.3486, "step": 462 }, { "epoch": 0.15459098497495827, "grad_norm": 10.95880145906255, "learning_rate": 5.150166852057843e-06, "loss": 1.3565, "step": 463 }, { "epoch": 0.15492487479131886, "grad_norm": 10.086603893155807, "learning_rate": 5.161290322580646e-06, "loss": 1.367, "step": 464 }, { "epoch": 0.15525876460767946, "grad_norm": 8.2858493146781, "learning_rate": 5.172413793103449e-06, "loss": 1.3208, "step": 465 }, { "epoch": 0.15559265442404008, "grad_norm": 10.167212270034707, "learning_rate": 5.183537263626252e-06, "loss": 1.3469, "step": 466 }, { "epoch": 0.15592654424040067, "grad_norm": 10.705411144498244, "learning_rate": 5.1946607341490554e-06, "loss": 1.3855, "step": 467 }, { "epoch": 0.15626043405676127, "grad_norm": 8.787848993451439, "learning_rate": 5.205784204671857e-06, "loss": 1.3197, "step": 468 }, { "epoch": 0.15659432387312186, "grad_norm": 8.252858363507476, "learning_rate": 5.216907675194661e-06, "loss": 1.374, "step": 469 }, { "epoch": 0.15692821368948248, "grad_norm": 9.09440651524691, "learning_rate": 5.2280311457174636e-06, "loss": 1.3169, "step": 470 }, { "epoch": 0.15726210350584308, "grad_norm": 10.247094493591598, "learning_rate": 5.239154616240267e-06, "loss": 1.3267, "step": 471 }, { "epoch": 0.15759599332220367, "grad_norm": 8.819675456180072, "learning_rate": 5.250278086763071e-06, "loss": 1.3477, "step": 472 }, { "epoch": 0.15792988313856426, "grad_norm": 8.009964447658414, "learning_rate": 5.261401557285873e-06, "loss": 1.2888, "step": 473 }, { "epoch": 0.1582637729549249, "grad_norm": 9.745865950273602, "learning_rate": 5.272525027808677e-06, "loss": 1.3469, "step": 474 }, { "epoch": 0.15859766277128548, "grad_norm": 9.434733846426171, "learning_rate": 5.28364849833148e-06, "loss": 1.3284, "step": 475 }, { "epoch": 0.15893155258764607, "grad_norm": 9.685721173846323, "learning_rate": 5.294771968854283e-06, "loss": 1.3152, "step": 476 }, { "epoch": 0.15926544240400667, "grad_norm": 7.808791017411983, "learning_rate": 5.305895439377086e-06, "loss": 1.3187, "step": 477 }, { "epoch": 0.1595993322203673, "grad_norm": 9.134769420257607, "learning_rate": 5.3170189098998895e-06, "loss": 1.2757, "step": 478 }, { "epoch": 0.15993322203672788, "grad_norm": 9.04806287628472, "learning_rate": 5.328142380422693e-06, "loss": 1.3006, "step": 479 }, { "epoch": 0.16026711185308848, "grad_norm": 9.205984446771422, "learning_rate": 5.339265850945496e-06, "loss": 1.3637, "step": 480 }, { "epoch": 0.16060100166944907, "grad_norm": 10.30403937046858, "learning_rate": 5.350389321468299e-06, "loss": 1.3415, "step": 481 }, { "epoch": 0.1609348914858097, "grad_norm": 9.656001188279841, "learning_rate": 5.361512791991101e-06, "loss": 1.357, "step": 482 }, { "epoch": 0.1612687813021703, "grad_norm": 8.255482921665799, "learning_rate": 5.372636262513905e-06, "loss": 1.3076, "step": 483 }, { "epoch": 0.16160267111853088, "grad_norm": 10.72367850145772, "learning_rate": 5.383759733036707e-06, "loss": 1.3308, "step": 484 }, { "epoch": 0.16193656093489148, "grad_norm": 7.496494786006748, "learning_rate": 5.394883203559511e-06, "loss": 1.2737, "step": 485 }, { "epoch": 0.1622704507512521, "grad_norm": 8.38561467616152, "learning_rate": 5.406006674082314e-06, "loss": 1.2956, "step": 486 }, { "epoch": 0.1626043405676127, "grad_norm": 9.212318916153489, "learning_rate": 5.417130144605117e-06, "loss": 1.2818, "step": 487 }, { "epoch": 0.1629382303839733, "grad_norm": 7.938964342969311, "learning_rate": 5.428253615127921e-06, "loss": 1.2613, "step": 488 }, { "epoch": 0.16327212020033388, "grad_norm": 8.72470917595111, "learning_rate": 5.4393770856507235e-06, "loss": 1.2466, "step": 489 }, { "epoch": 0.1636060100166945, "grad_norm": 10.613476765096456, "learning_rate": 5.450500556173527e-06, "loss": 1.3053, "step": 490 }, { "epoch": 0.1639398998330551, "grad_norm": 7.002559051109649, "learning_rate": 5.46162402669633e-06, "loss": 1.2766, "step": 491 }, { "epoch": 0.1642737896494157, "grad_norm": 9.02409096700985, "learning_rate": 5.472747497219133e-06, "loss": 1.1921, "step": 492 }, { "epoch": 0.16460767946577629, "grad_norm": 7.118785360971742, "learning_rate": 5.483870967741935e-06, "loss": 1.2652, "step": 493 }, { "epoch": 0.1649415692821369, "grad_norm": 9.670291013797014, "learning_rate": 5.494994438264739e-06, "loss": 1.2613, "step": 494 }, { "epoch": 0.1652754590984975, "grad_norm": 7.022604785298879, "learning_rate": 5.506117908787543e-06, "loss": 1.2872, "step": 495 }, { "epoch": 0.1656093489148581, "grad_norm": 9.804255188500221, "learning_rate": 5.517241379310345e-06, "loss": 1.2226, "step": 496 }, { "epoch": 0.1659432387312187, "grad_norm": 7.343149930849095, "learning_rate": 5.5283648498331485e-06, "loss": 1.2567, "step": 497 }, { "epoch": 0.1662771285475793, "grad_norm": 8.382375232671057, "learning_rate": 5.539488320355951e-06, "loss": 1.3072, "step": 498 }, { "epoch": 0.1666110183639399, "grad_norm": 8.6335964168934, "learning_rate": 5.550611790878755e-06, "loss": 1.2403, "step": 499 }, { "epoch": 0.1669449081803005, "grad_norm": 8.022550359982919, "learning_rate": 5.5617352614015575e-06, "loss": 1.2244, "step": 500 }, { "epoch": 0.1672787979966611, "grad_norm": 9.817649010319748, "learning_rate": 5.572858731924361e-06, "loss": 1.2324, "step": 501 }, { "epoch": 0.16761268781302172, "grad_norm": 7.4765437997579465, "learning_rate": 5.583982202447164e-06, "loss": 1.2578, "step": 502 }, { "epoch": 0.1679465776293823, "grad_norm": 10.520365074830861, "learning_rate": 5.595105672969967e-06, "loss": 1.1721, "step": 503 }, { "epoch": 0.1682804674457429, "grad_norm": 7.960896339152049, "learning_rate": 5.606229143492771e-06, "loss": 1.1952, "step": 504 }, { "epoch": 0.1686143572621035, "grad_norm": 8.005492780353904, "learning_rate": 5.617352614015574e-06, "loss": 1.2296, "step": 505 }, { "epoch": 0.16894824707846412, "grad_norm": 8.088540655533732, "learning_rate": 5.628476084538377e-06, "loss": 1.2521, "step": 506 }, { "epoch": 0.16928213689482471, "grad_norm": 7.928181114416385, "learning_rate": 5.639599555061179e-06, "loss": 1.2506, "step": 507 }, { "epoch": 0.1696160267111853, "grad_norm": 7.1409737363618975, "learning_rate": 5.6507230255839826e-06, "loss": 1.2721, "step": 508 }, { "epoch": 0.1699499165275459, "grad_norm": 8.537444597800638, "learning_rate": 5.661846496106785e-06, "loss": 1.248, "step": 509 }, { "epoch": 0.17028380634390652, "grad_norm": 7.769753739452895, "learning_rate": 5.672969966629589e-06, "loss": 1.1779, "step": 510 }, { "epoch": 0.17061769616026712, "grad_norm": 7.258499594307799, "learning_rate": 5.6840934371523915e-06, "loss": 1.1912, "step": 511 }, { "epoch": 0.1709515859766277, "grad_norm": 7.692882605520094, "learning_rate": 5.695216907675195e-06, "loss": 1.1414, "step": 512 }, { "epoch": 0.1712854757929883, "grad_norm": 6.905704806961085, "learning_rate": 5.706340378197999e-06, "loss": 1.1939, "step": 513 }, { "epoch": 0.17161936560934893, "grad_norm": 8.197331590690604, "learning_rate": 5.717463848720801e-06, "loss": 1.2164, "step": 514 }, { "epoch": 0.17195325542570952, "grad_norm": 8.595680527096729, "learning_rate": 5.728587319243605e-06, "loss": 1.1426, "step": 515 }, { "epoch": 0.17228714524207012, "grad_norm": 6.613888554549661, "learning_rate": 5.739710789766408e-06, "loss": 1.1508, "step": 516 }, { "epoch": 0.1726210350584307, "grad_norm": 6.928194341988576, "learning_rate": 5.750834260289211e-06, "loss": 1.1381, "step": 517 }, { "epoch": 0.17295492487479133, "grad_norm": 8.70538311035413, "learning_rate": 5.761957730812013e-06, "loss": 1.1559, "step": 518 }, { "epoch": 0.17328881469115193, "grad_norm": 6.6378015527592895, "learning_rate": 5.773081201334817e-06, "loss": 1.1928, "step": 519 }, { "epoch": 0.17362270450751252, "grad_norm": 7.838901698270906, "learning_rate": 5.784204671857621e-06, "loss": 1.146, "step": 520 }, { "epoch": 0.17395659432387311, "grad_norm": 6.542889950113477, "learning_rate": 5.795328142380423e-06, "loss": 1.1331, "step": 521 }, { "epoch": 0.1742904841402337, "grad_norm": 7.6333255473148025, "learning_rate": 5.806451612903226e-06, "loss": 1.1507, "step": 522 }, { "epoch": 0.17462437395659433, "grad_norm": 6.3822971506938435, "learning_rate": 5.817575083426029e-06, "loss": 1.1026, "step": 523 }, { "epoch": 0.17495826377295493, "grad_norm": 7.136164995192112, "learning_rate": 5.828698553948833e-06, "loss": 1.1914, "step": 524 }, { "epoch": 0.17529215358931552, "grad_norm": 8.936051776496438, "learning_rate": 5.839822024471635e-06, "loss": 1.1608, "step": 525 }, { "epoch": 0.1756260434056761, "grad_norm": 6.605720155485942, "learning_rate": 5.850945494994439e-06, "loss": 1.1965, "step": 526 }, { "epoch": 0.17595993322203674, "grad_norm": 8.277193190849815, "learning_rate": 5.862068965517242e-06, "loss": 1.1217, "step": 527 }, { "epoch": 0.17629382303839733, "grad_norm": 7.779841697081107, "learning_rate": 5.873192436040045e-06, "loss": 1.1457, "step": 528 }, { "epoch": 0.17662771285475792, "grad_norm": 7.701542074529115, "learning_rate": 5.884315906562849e-06, "loss": 1.1539, "step": 529 }, { "epoch": 0.17696160267111852, "grad_norm": 6.104972649688682, "learning_rate": 5.8954393770856515e-06, "loss": 1.1243, "step": 530 }, { "epoch": 0.17729549248747914, "grad_norm": 6.42837809783253, "learning_rate": 5.906562847608455e-06, "loss": 1.0973, "step": 531 }, { "epoch": 0.17762938230383973, "grad_norm": 6.194730204169662, "learning_rate": 5.917686318131257e-06, "loss": 1.0809, "step": 532 }, { "epoch": 0.17796327212020033, "grad_norm": 7.402419366895633, "learning_rate": 5.9288097886540604e-06, "loss": 1.1005, "step": 533 }, { "epoch": 0.17829716193656092, "grad_norm": 5.77047687563374, "learning_rate": 5.939933259176863e-06, "loss": 1.1149, "step": 534 }, { "epoch": 0.17863105175292154, "grad_norm": 7.2593321804624935, "learning_rate": 5.951056729699667e-06, "loss": 1.1033, "step": 535 }, { "epoch": 0.17896494156928214, "grad_norm": 6.649421047833579, "learning_rate": 5.962180200222469e-06, "loss": 1.1551, "step": 536 }, { "epoch": 0.17929883138564273, "grad_norm": 7.570140956879754, "learning_rate": 5.973303670745273e-06, "loss": 1.0905, "step": 537 }, { "epoch": 0.17963272120200333, "grad_norm": 6.311507696120994, "learning_rate": 5.9844271412680765e-06, "loss": 1.1509, "step": 538 }, { "epoch": 0.17996661101836395, "grad_norm": 8.474053875223088, "learning_rate": 5.995550611790879e-06, "loss": 1.1542, "step": 539 }, { "epoch": 0.18030050083472454, "grad_norm": 6.0262623268026045, "learning_rate": 6.006674082313683e-06, "loss": 1.0957, "step": 540 }, { "epoch": 0.18063439065108514, "grad_norm": 7.284144973276557, "learning_rate": 6.0177975528364855e-06, "loss": 1.1092, "step": 541 }, { "epoch": 0.18096828046744573, "grad_norm": 5.921975569639002, "learning_rate": 6.028921023359289e-06, "loss": 1.132, "step": 542 }, { "epoch": 0.18130217028380635, "grad_norm": 7.967706489322677, "learning_rate": 6.040044493882091e-06, "loss": 1.0225, "step": 543 }, { "epoch": 0.18163606010016695, "grad_norm": 6.169067547681182, "learning_rate": 6.0511679644048945e-06, "loss": 1.0895, "step": 544 }, { "epoch": 0.18196994991652754, "grad_norm": 5.6343202458308115, "learning_rate": 6.062291434927698e-06, "loss": 1.0419, "step": 545 }, { "epoch": 0.18230383973288813, "grad_norm": 7.16517532769145, "learning_rate": 6.073414905450501e-06, "loss": 1.0928, "step": 546 }, { "epoch": 0.18263772954924876, "grad_norm": 6.269692892034574, "learning_rate": 6.084538375973304e-06, "loss": 1.1234, "step": 547 }, { "epoch": 0.18297161936560935, "grad_norm": 6.725546312593826, "learning_rate": 6.095661846496107e-06, "loss": 1.1097, "step": 548 }, { "epoch": 0.18330550918196994, "grad_norm": 4.996057208045785, "learning_rate": 6.1067853170189106e-06, "loss": 1.0691, "step": 549 }, { "epoch": 0.18363939899833054, "grad_norm": 6.785949577595199, "learning_rate": 6.117908787541713e-06, "loss": 1.099, "step": 550 }, { "epoch": 0.18397328881469116, "grad_norm": 6.019096826074248, "learning_rate": 6.129032258064517e-06, "loss": 1.0975, "step": 551 }, { "epoch": 0.18430717863105175, "grad_norm": 5.665313403263225, "learning_rate": 6.1401557285873195e-06, "loss": 1.1184, "step": 552 }, { "epoch": 0.18464106844741235, "grad_norm": 5.9173746047377005, "learning_rate": 6.151279199110123e-06, "loss": 1.0439, "step": 553 }, { "epoch": 0.18497495826377294, "grad_norm": 4.724987897416044, "learning_rate": 6.162402669632927e-06, "loss": 1.0912, "step": 554 }, { "epoch": 0.18530884808013356, "grad_norm": 7.790763178146619, "learning_rate": 6.173526140155729e-06, "loss": 1.0934, "step": 555 }, { "epoch": 0.18564273789649416, "grad_norm": 6.774465931746398, "learning_rate": 6.184649610678533e-06, "loss": 1.0604, "step": 556 }, { "epoch": 0.18597662771285475, "grad_norm": 6.065706602151024, "learning_rate": 6.195773081201335e-06, "loss": 1.0562, "step": 557 }, { "epoch": 0.18631051752921535, "grad_norm": 6.391899797812053, "learning_rate": 6.206896551724138e-06, "loss": 1.053, "step": 558 }, { "epoch": 0.18664440734557597, "grad_norm": 5.534961164132003, "learning_rate": 6.218020022246941e-06, "loss": 1.0771, "step": 559 }, { "epoch": 0.18697829716193656, "grad_norm": 6.751980036897675, "learning_rate": 6.229143492769745e-06, "loss": 1.0809, "step": 560 }, { "epoch": 0.18731218697829716, "grad_norm": 5.4516936988492315, "learning_rate": 6.240266963292548e-06, "loss": 1.0848, "step": 561 }, { "epoch": 0.18764607679465775, "grad_norm": 5.865225405257197, "learning_rate": 6.251390433815351e-06, "loss": 1.0638, "step": 562 }, { "epoch": 0.18797996661101837, "grad_norm": 5.764627048284211, "learning_rate": 6.262513904338154e-06, "loss": 1.0905, "step": 563 }, { "epoch": 0.18831385642737897, "grad_norm": 5.611562956138211, "learning_rate": 6.273637374860957e-06, "loss": 1.046, "step": 564 }, { "epoch": 0.18864774624373956, "grad_norm": 5.4324772915349016, "learning_rate": 6.284760845383761e-06, "loss": 1.007, "step": 565 }, { "epoch": 0.18898163606010016, "grad_norm": 5.234464796942871, "learning_rate": 6.295884315906563e-06, "loss": 0.9954, "step": 566 }, { "epoch": 0.18931552587646078, "grad_norm": 5.1825827203786154, "learning_rate": 6.307007786429367e-06, "loss": 0.999, "step": 567 }, { "epoch": 0.18964941569282137, "grad_norm": 5.900263809662949, "learning_rate": 6.318131256952169e-06, "loss": 1.0279, "step": 568 }, { "epoch": 0.18998330550918197, "grad_norm": 5.245274844946697, "learning_rate": 6.329254727474972e-06, "loss": 1.0366, "step": 569 }, { "epoch": 0.19031719532554256, "grad_norm": 5.54795834369421, "learning_rate": 6.340378197997776e-06, "loss": 1.0254, "step": 570 }, { "epoch": 0.19065108514190318, "grad_norm": 6.09666436579459, "learning_rate": 6.351501668520579e-06, "loss": 1.0339, "step": 571 }, { "epoch": 0.19098497495826378, "grad_norm": 5.66583092311325, "learning_rate": 6.362625139043382e-06, "loss": 0.9477, "step": 572 }, { "epoch": 0.19131886477462437, "grad_norm": 5.36685091681801, "learning_rate": 6.373748609566185e-06, "loss": 0.9976, "step": 573 }, { "epoch": 0.19165275459098496, "grad_norm": 5.100368127279983, "learning_rate": 6.3848720800889884e-06, "loss": 0.9973, "step": 574 }, { "epoch": 0.19198664440734559, "grad_norm": 4.905026647337771, "learning_rate": 6.395995550611791e-06, "loss": 1.0419, "step": 575 }, { "epoch": 0.19232053422370618, "grad_norm": 6.3850890212477145, "learning_rate": 6.407119021134595e-06, "loss": 0.9946, "step": 576 }, { "epoch": 0.19265442404006677, "grad_norm": 5.83647812655898, "learning_rate": 6.418242491657397e-06, "loss": 1.0164, "step": 577 }, { "epoch": 0.19298831385642737, "grad_norm": 5.72101813177116, "learning_rate": 6.429365962180201e-06, "loss": 1.0019, "step": 578 }, { "epoch": 0.193322203672788, "grad_norm": 4.758366149901443, "learning_rate": 6.4404894327030045e-06, "loss": 1.0081, "step": 579 }, { "epoch": 0.19365609348914858, "grad_norm": 5.200299196565246, "learning_rate": 6.451612903225806e-06, "loss": 1.015, "step": 580 }, { "epoch": 0.19398998330550918, "grad_norm": 5.070666865801012, "learning_rate": 6.462736373748611e-06, "loss": 0.9342, "step": 581 }, { "epoch": 0.19432387312186977, "grad_norm": 4.715764258355364, "learning_rate": 6.473859844271413e-06, "loss": 1.0003, "step": 582 }, { "epoch": 0.1946577629382304, "grad_norm": 4.887623072624093, "learning_rate": 6.484983314794216e-06, "loss": 0.995, "step": 583 }, { "epoch": 0.194991652754591, "grad_norm": 5.426402287718064, "learning_rate": 6.496106785317019e-06, "loss": 0.9623, "step": 584 }, { "epoch": 0.19532554257095158, "grad_norm": 5.201091302355398, "learning_rate": 6.5072302558398225e-06, "loss": 1.0111, "step": 585 }, { "epoch": 0.19565943238731218, "grad_norm": 5.327576416923839, "learning_rate": 6.518353726362626e-06, "loss": 1.0154, "step": 586 }, { "epoch": 0.1959933222036728, "grad_norm": 4.867803553617983, "learning_rate": 6.529477196885429e-06, "loss": 1.0136, "step": 587 }, { "epoch": 0.1963272120200334, "grad_norm": 5.598584256227055, "learning_rate": 6.540600667408232e-06, "loss": 0.9787, "step": 588 }, { "epoch": 0.196661101836394, "grad_norm": 5.008486358405725, "learning_rate": 6.551724137931035e-06, "loss": 0.9862, "step": 589 }, { "epoch": 0.19699499165275458, "grad_norm": 4.930434973175076, "learning_rate": 6.5628476084538385e-06, "loss": 1.0134, "step": 590 }, { "epoch": 0.1973288814691152, "grad_norm": 4.407152763430384, "learning_rate": 6.573971078976641e-06, "loss": 0.9371, "step": 591 }, { "epoch": 0.1976627712854758, "grad_norm": 5.561097635364516, "learning_rate": 6.585094549499445e-06, "loss": 1.0114, "step": 592 }, { "epoch": 0.1979966611018364, "grad_norm": 5.225447262525904, "learning_rate": 6.596218020022247e-06, "loss": 1.0212, "step": 593 }, { "epoch": 0.19833055091819699, "grad_norm": 5.535488580063173, "learning_rate": 6.60734149054505e-06, "loss": 1.0374, "step": 594 }, { "epoch": 0.1986644407345576, "grad_norm": 4.201351049691699, "learning_rate": 6.618464961067854e-06, "loss": 0.9517, "step": 595 }, { "epoch": 0.1989983305509182, "grad_norm": 5.238817295009794, "learning_rate": 6.6295884315906565e-06, "loss": 0.9265, "step": 596 }, { "epoch": 0.1993322203672788, "grad_norm": 5.012577171619133, "learning_rate": 6.64071190211346e-06, "loss": 0.9831, "step": 597 }, { "epoch": 0.1996661101836394, "grad_norm": 5.240657470711978, "learning_rate": 6.651835372636263e-06, "loss": 0.9954, "step": 598 }, { "epoch": 0.2, "grad_norm": 5.344042139129603, "learning_rate": 6.662958843159066e-06, "loss": 0.9411, "step": 599 }, { "epoch": 0.2003338898163606, "grad_norm": 4.120717534768755, "learning_rate": 6.674082313681869e-06, "loss": 0.9914, "step": 600 }, { "epoch": 0.2006677796327212, "grad_norm": 4.44082638124549, "learning_rate": 6.6852057842046726e-06, "loss": 0.947, "step": 601 }, { "epoch": 0.2010016694490818, "grad_norm": 4.483926738229526, "learning_rate": 6.696329254727475e-06, "loss": 0.9409, "step": 602 }, { "epoch": 0.20133555926544242, "grad_norm": 4.536391421241219, "learning_rate": 6.707452725250279e-06, "loss": 0.9828, "step": 603 }, { "epoch": 0.201669449081803, "grad_norm": 3.861235380669573, "learning_rate": 6.718576195773082e-06, "loss": 0.9167, "step": 604 }, { "epoch": 0.2020033388981636, "grad_norm": 4.719743207687248, "learning_rate": 6.729699666295884e-06, "loss": 0.9416, "step": 605 }, { "epoch": 0.2023372287145242, "grad_norm": 4.592359013415216, "learning_rate": 6.740823136818689e-06, "loss": 0.9563, "step": 606 }, { "epoch": 0.20267111853088482, "grad_norm": 4.379447334177857, "learning_rate": 6.7519466073414905e-06, "loss": 0.9442, "step": 607 }, { "epoch": 0.2030050083472454, "grad_norm": 4.7771376966436145, "learning_rate": 6.763070077864294e-06, "loss": 0.9594, "step": 608 }, { "epoch": 0.203338898163606, "grad_norm": 3.941480876943667, "learning_rate": 6.774193548387097e-06, "loss": 0.9787, "step": 609 }, { "epoch": 0.2036727879799666, "grad_norm": 4.286413046676961, "learning_rate": 6.7853170189099e-06, "loss": 0.9455, "step": 610 }, { "epoch": 0.20400667779632722, "grad_norm": 5.119755169150936, "learning_rate": 6.796440489432704e-06, "loss": 1.0095, "step": 611 }, { "epoch": 0.20434056761268782, "grad_norm": 4.3106173998371595, "learning_rate": 6.807563959955507e-06, "loss": 0.927, "step": 612 }, { "epoch": 0.2046744574290484, "grad_norm": 4.3244293092102035, "learning_rate": 6.81868743047831e-06, "loss": 0.9347, "step": 613 }, { "epoch": 0.205008347245409, "grad_norm": 4.441900097013751, "learning_rate": 6.829810901001113e-06, "loss": 0.9173, "step": 614 }, { "epoch": 0.20534223706176963, "grad_norm": 5.6557645960923475, "learning_rate": 6.840934371523916e-06, "loss": 0.9726, "step": 615 }, { "epoch": 0.20567612687813022, "grad_norm": 3.9113733395341646, "learning_rate": 6.852057842046719e-06, "loss": 0.9385, "step": 616 }, { "epoch": 0.20601001669449082, "grad_norm": 4.899772853408734, "learning_rate": 6.863181312569523e-06, "loss": 0.9476, "step": 617 }, { "epoch": 0.2063439065108514, "grad_norm": 4.070075470883009, "learning_rate": 6.8743047830923245e-06, "loss": 0.9324, "step": 618 }, { "epoch": 0.20667779632721203, "grad_norm": 4.138231698686058, "learning_rate": 6.885428253615128e-06, "loss": 0.9557, "step": 619 }, { "epoch": 0.20701168614357263, "grad_norm": 3.7989828147054125, "learning_rate": 6.896551724137932e-06, "loss": 0.9169, "step": 620 }, { "epoch": 0.20734557595993322, "grad_norm": 3.77117782223477, "learning_rate": 6.907675194660734e-06, "loss": 0.8834, "step": 621 }, { "epoch": 0.20767946577629381, "grad_norm": 4.186312942356236, "learning_rate": 6.918798665183538e-06, "loss": 0.923, "step": 622 }, { "epoch": 0.20801335559265444, "grad_norm": 4.280316447077903, "learning_rate": 6.929922135706341e-06, "loss": 0.9284, "step": 623 }, { "epoch": 0.20834724540901503, "grad_norm": 3.7795311672979928, "learning_rate": 6.941045606229144e-06, "loss": 0.9078, "step": 624 }, { "epoch": 0.20868113522537562, "grad_norm": 4.369998315463343, "learning_rate": 6.952169076751947e-06, "loss": 0.9357, "step": 625 }, { "epoch": 0.20901502504173622, "grad_norm": 4.1835774932747665, "learning_rate": 6.9632925472747504e-06, "loss": 0.9128, "step": 626 }, { "epoch": 0.20934891485809684, "grad_norm": 4.175765165737357, "learning_rate": 6.974416017797554e-06, "loss": 0.889, "step": 627 }, { "epoch": 0.20968280467445743, "grad_norm": 4.14258299044525, "learning_rate": 6.985539488320357e-06, "loss": 0.8714, "step": 628 }, { "epoch": 0.21001669449081803, "grad_norm": 3.8551612238582913, "learning_rate": 6.99666295884316e-06, "loss": 0.8844, "step": 629 }, { "epoch": 0.21035058430717862, "grad_norm": 3.9029614974887092, "learning_rate": 7.007786429365962e-06, "loss": 0.8999, "step": 630 }, { "epoch": 0.21068447412353924, "grad_norm": 5.086966677174806, "learning_rate": 7.0189098998887665e-06, "loss": 0.9286, "step": 631 }, { "epoch": 0.21101836393989984, "grad_norm": 4.266407048408614, "learning_rate": 7.030033370411568e-06, "loss": 0.9454, "step": 632 }, { "epoch": 0.21135225375626043, "grad_norm": 4.6732202269005905, "learning_rate": 7.041156840934372e-06, "loss": 0.9804, "step": 633 }, { "epoch": 0.21168614357262103, "grad_norm": 3.6813968449842425, "learning_rate": 7.052280311457175e-06, "loss": 0.9235, "step": 634 }, { "epoch": 0.21202003338898165, "grad_norm": 4.163446268833577, "learning_rate": 7.063403781979978e-06, "loss": 0.8837, "step": 635 }, { "epoch": 0.21235392320534224, "grad_norm": 4.144216916144925, "learning_rate": 7.074527252502782e-06, "loss": 0.8929, "step": 636 }, { "epoch": 0.21268781302170284, "grad_norm": 3.726746382973073, "learning_rate": 7.0856507230255845e-06, "loss": 0.8841, "step": 637 }, { "epoch": 0.21302170283806343, "grad_norm": 4.4111462118464075, "learning_rate": 7.096774193548388e-06, "loss": 0.9224, "step": 638 }, { "epoch": 0.21335559265442405, "grad_norm": 3.9370899353631867, "learning_rate": 7.107897664071191e-06, "loss": 0.8696, "step": 639 }, { "epoch": 0.21368948247078465, "grad_norm": 4.013400942146667, "learning_rate": 7.119021134593994e-06, "loss": 0.8497, "step": 640 }, { "epoch": 0.21402337228714524, "grad_norm": 4.207711885018177, "learning_rate": 7.130144605116797e-06, "loss": 0.9296, "step": 641 }, { "epoch": 0.21435726210350584, "grad_norm": 3.830917942708965, "learning_rate": 7.1412680756396006e-06, "loss": 0.8208, "step": 642 }, { "epoch": 0.21469115191986646, "grad_norm": 4.834387930947786, "learning_rate": 7.152391546162402e-06, "loss": 0.8851, "step": 643 }, { "epoch": 0.21502504173622705, "grad_norm": 3.94813410096315, "learning_rate": 7.163515016685206e-06, "loss": 0.9043, "step": 644 }, { "epoch": 0.21535893155258765, "grad_norm": 4.369864916996113, "learning_rate": 7.1746384872080095e-06, "loss": 0.8572, "step": 645 }, { "epoch": 0.21569282136894824, "grad_norm": 4.02270163754953, "learning_rate": 7.185761957730812e-06, "loss": 0.8829, "step": 646 }, { "epoch": 0.21602671118530886, "grad_norm": 4.03057533439871, "learning_rate": 7.196885428253616e-06, "loss": 0.8906, "step": 647 }, { "epoch": 0.21636060100166946, "grad_norm": 3.591868783832409, "learning_rate": 7.2080088987764185e-06, "loss": 0.8535, "step": 648 }, { "epoch": 0.21669449081803005, "grad_norm": 3.871276731484219, "learning_rate": 7.219132369299222e-06, "loss": 0.91, "step": 649 }, { "epoch": 0.21702838063439064, "grad_norm": 3.768800102164548, "learning_rate": 7.230255839822025e-06, "loss": 0.8535, "step": 650 }, { "epoch": 0.21736227045075127, "grad_norm": 3.66262332412366, "learning_rate": 7.241379310344828e-06, "loss": 0.8463, "step": 651 }, { "epoch": 0.21769616026711186, "grad_norm": 3.636680254724362, "learning_rate": 7.252502780867632e-06, "loss": 0.881, "step": 652 }, { "epoch": 0.21803005008347245, "grad_norm": 3.7383698268687136, "learning_rate": 7.263626251390435e-06, "loss": 0.8322, "step": 653 }, { "epoch": 0.21836393989983305, "grad_norm": 4.516880520893684, "learning_rate": 7.274749721913238e-06, "loss": 0.8613, "step": 654 }, { "epoch": 0.21869782971619364, "grad_norm": 3.9985882422580366, "learning_rate": 7.28587319243604e-06, "loss": 0.9048, "step": 655 }, { "epoch": 0.21903171953255426, "grad_norm": 3.6223592373877276, "learning_rate": 7.296996662958844e-06, "loss": 0.8603, "step": 656 }, { "epoch": 0.21936560934891486, "grad_norm": 4.806470085870025, "learning_rate": 7.308120133481646e-06, "loss": 0.8709, "step": 657 }, { "epoch": 0.21969949916527545, "grad_norm": 3.5910773024139635, "learning_rate": 7.31924360400445e-06, "loss": 0.832, "step": 658 }, { "epoch": 0.22003338898163605, "grad_norm": 3.8248153505530254, "learning_rate": 7.3303670745272525e-06, "loss": 0.8719, "step": 659 }, { "epoch": 0.22036727879799667, "grad_norm": 3.7799558810948315, "learning_rate": 7.341490545050056e-06, "loss": 0.8411, "step": 660 }, { "epoch": 0.22070116861435726, "grad_norm": 3.5087987611068905, "learning_rate": 7.35261401557286e-06, "loss": 0.863, "step": 661 }, { "epoch": 0.22103505843071786, "grad_norm": 4.213731012185598, "learning_rate": 7.363737486095662e-06, "loss": 0.8647, "step": 662 }, { "epoch": 0.22136894824707845, "grad_norm": 3.2519142568021557, "learning_rate": 7.374860956618466e-06, "loss": 0.8505, "step": 663 }, { "epoch": 0.22170283806343907, "grad_norm": 3.6110492397464165, "learning_rate": 7.385984427141269e-06, "loss": 0.8262, "step": 664 }, { "epoch": 0.22203672787979967, "grad_norm": 3.8091601301183533, "learning_rate": 7.397107897664072e-06, "loss": 0.8599, "step": 665 }, { "epoch": 0.22237061769616026, "grad_norm": 3.382811033911828, "learning_rate": 7.408231368186875e-06, "loss": 0.8297, "step": 666 }, { "epoch": 0.22270450751252086, "grad_norm": 3.302261825767181, "learning_rate": 7.4193548387096784e-06, "loss": 0.8288, "step": 667 }, { "epoch": 0.22303839732888148, "grad_norm": 3.6216426526173406, "learning_rate": 7.43047830923248e-06, "loss": 0.8634, "step": 668 }, { "epoch": 0.22337228714524207, "grad_norm": 3.546020722588526, "learning_rate": 7.441601779755284e-06, "loss": 0.885, "step": 669 }, { "epoch": 0.22370617696160267, "grad_norm": 3.6613534114519997, "learning_rate": 7.452725250278087e-06, "loss": 0.8273, "step": 670 }, { "epoch": 0.22404006677796326, "grad_norm": 3.5019044685532377, "learning_rate": 7.46384872080089e-06, "loss": 0.8073, "step": 671 }, { "epoch": 0.22437395659432388, "grad_norm": 3.52777323941085, "learning_rate": 7.474972191323694e-06, "loss": 0.9027, "step": 672 }, { "epoch": 0.22470784641068448, "grad_norm": 3.5938486469042887, "learning_rate": 7.486095661846496e-06, "loss": 0.9068, "step": 673 }, { "epoch": 0.22504173622704507, "grad_norm": 3.3592859447018575, "learning_rate": 7.4972191323693e-06, "loss": 0.8436, "step": 674 }, { "epoch": 0.22537562604340566, "grad_norm": 3.5589739470977233, "learning_rate": 7.508342602892103e-06, "loss": 0.8227, "step": 675 }, { "epoch": 0.22570951585976629, "grad_norm": 3.5376398256594475, "learning_rate": 7.519466073414906e-06, "loss": 0.811, "step": 676 }, { "epoch": 0.22604340567612688, "grad_norm": 3.858249790435458, "learning_rate": 7.53058954393771e-06, "loss": 0.8721, "step": 677 }, { "epoch": 0.22637729549248747, "grad_norm": 3.4198640073918187, "learning_rate": 7.5417130144605125e-06, "loss": 0.8532, "step": 678 }, { "epoch": 0.22671118530884807, "grad_norm": 3.37353838318588, "learning_rate": 7.552836484983316e-06, "loss": 0.8208, "step": 679 }, { "epoch": 0.2270450751252087, "grad_norm": 3.4832680528056064, "learning_rate": 7.563959955506118e-06, "loss": 0.8372, "step": 680 }, { "epoch": 0.22737896494156928, "grad_norm": 3.0300875111910934, "learning_rate": 7.575083426028922e-06, "loss": 0.8155, "step": 681 }, { "epoch": 0.22771285475792988, "grad_norm": 3.5705408249233663, "learning_rate": 7.586206896551724e-06, "loss": 0.826, "step": 682 }, { "epoch": 0.22804674457429047, "grad_norm": 3.1688802839591337, "learning_rate": 7.597330367074528e-06, "loss": 0.8145, "step": 683 }, { "epoch": 0.2283806343906511, "grad_norm": 3.5277331310727433, "learning_rate": 7.60845383759733e-06, "loss": 0.8217, "step": 684 }, { "epoch": 0.2287145242070117, "grad_norm": 3.4936228043140027, "learning_rate": 7.619577308120134e-06, "loss": 0.8348, "step": 685 }, { "epoch": 0.22904841402337228, "grad_norm": 3.0787837033811467, "learning_rate": 7.630700778642938e-06, "loss": 0.7944, "step": 686 }, { "epoch": 0.22938230383973288, "grad_norm": 3.513217242906615, "learning_rate": 7.64182424916574e-06, "loss": 0.8414, "step": 687 }, { "epoch": 0.2297161936560935, "grad_norm": 3.744527110652556, "learning_rate": 7.652947719688543e-06, "loss": 0.8561, "step": 688 }, { "epoch": 0.2300500834724541, "grad_norm": 3.406773372929362, "learning_rate": 7.664071190211346e-06, "loss": 0.8314, "step": 689 }, { "epoch": 0.2303839732888147, "grad_norm": 3.058024488140952, "learning_rate": 7.67519466073415e-06, "loss": 0.7957, "step": 690 }, { "epoch": 0.23071786310517528, "grad_norm": 3.373337228000405, "learning_rate": 7.686318131256953e-06, "loss": 0.8161, "step": 691 }, { "epoch": 0.2310517529215359, "grad_norm": 3.2448235141097954, "learning_rate": 7.697441601779755e-06, "loss": 0.7637, "step": 692 }, { "epoch": 0.2313856427378965, "grad_norm": 3.3584983964340624, "learning_rate": 7.70856507230256e-06, "loss": 0.8246, "step": 693 }, { "epoch": 0.2317195325542571, "grad_norm": 3.194992928033707, "learning_rate": 7.719688542825363e-06, "loss": 0.819, "step": 694 }, { "epoch": 0.23205342237061768, "grad_norm": 3.4681922615732654, "learning_rate": 7.730812013348165e-06, "loss": 0.7923, "step": 695 }, { "epoch": 0.2323873121869783, "grad_norm": 3.2939897428899134, "learning_rate": 7.741935483870968e-06, "loss": 0.8193, "step": 696 }, { "epoch": 0.2327212020033389, "grad_norm": 3.3634231528536533, "learning_rate": 7.753058954393772e-06, "loss": 0.8146, "step": 697 }, { "epoch": 0.2330550918196995, "grad_norm": 3.1240532201711813, "learning_rate": 7.764182424916575e-06, "loss": 0.8443, "step": 698 }, { "epoch": 0.2333889816360601, "grad_norm": 3.043710768311105, "learning_rate": 7.775305895439378e-06, "loss": 0.8138, "step": 699 }, { "epoch": 0.2337228714524207, "grad_norm": 3.1227164345586003, "learning_rate": 7.78642936596218e-06, "loss": 0.8621, "step": 700 }, { "epoch": 0.2340567612687813, "grad_norm": 3.0578686229140484, "learning_rate": 7.797552836484983e-06, "loss": 0.8203, "step": 701 }, { "epoch": 0.2343906510851419, "grad_norm": 3.0917584055881826, "learning_rate": 7.808676307007788e-06, "loss": 0.7694, "step": 702 }, { "epoch": 0.2347245409015025, "grad_norm": 3.261285397172838, "learning_rate": 7.81979977753059e-06, "loss": 0.7714, "step": 703 }, { "epoch": 0.23505843071786311, "grad_norm": 2.8000852522803563, "learning_rate": 7.830923248053393e-06, "loss": 0.7752, "step": 704 }, { "epoch": 0.2353923205342237, "grad_norm": 3.2603021152997904, "learning_rate": 7.842046718576196e-06, "loss": 0.791, "step": 705 }, { "epoch": 0.2357262103505843, "grad_norm": 2.7970984614880847, "learning_rate": 7.853170189099e-06, "loss": 0.7964, "step": 706 }, { "epoch": 0.2360601001669449, "grad_norm": 3.1473420692325442, "learning_rate": 7.864293659621803e-06, "loss": 0.7983, "step": 707 }, { "epoch": 0.23639398998330552, "grad_norm": 3.1343337730993843, "learning_rate": 7.875417130144606e-06, "loss": 0.8027, "step": 708 }, { "epoch": 0.2367278797996661, "grad_norm": 3.0423512657837275, "learning_rate": 7.886540600667408e-06, "loss": 0.8348, "step": 709 }, { "epoch": 0.2370617696160267, "grad_norm": 3.2151410326773036, "learning_rate": 7.897664071190213e-06, "loss": 0.7491, "step": 710 }, { "epoch": 0.2373956594323873, "grad_norm": 3.235232391100134, "learning_rate": 7.908787541713015e-06, "loss": 0.7779, "step": 711 }, { "epoch": 0.23772954924874792, "grad_norm": 3.169094352860317, "learning_rate": 7.919911012235818e-06, "loss": 0.801, "step": 712 }, { "epoch": 0.23806343906510852, "grad_norm": 3.072433443340641, "learning_rate": 7.93103448275862e-06, "loss": 0.7634, "step": 713 }, { "epoch": 0.2383973288814691, "grad_norm": 3.1000483779275982, "learning_rate": 7.942157953281424e-06, "loss": 0.7951, "step": 714 }, { "epoch": 0.2387312186978297, "grad_norm": 3.1171750221477232, "learning_rate": 7.953281423804228e-06, "loss": 0.801, "step": 715 }, { "epoch": 0.23906510851419033, "grad_norm": 3.356618253754888, "learning_rate": 7.96440489432703e-06, "loss": 0.8254, "step": 716 }, { "epoch": 0.23939899833055092, "grad_norm": 3.098926285124785, "learning_rate": 7.975528364849833e-06, "loss": 0.7939, "step": 717 }, { "epoch": 0.23973288814691152, "grad_norm": 3.052608348077377, "learning_rate": 7.986651835372638e-06, "loss": 0.8076, "step": 718 }, { "epoch": 0.2400667779632721, "grad_norm": 3.26586450874081, "learning_rate": 7.99777530589544e-06, "loss": 0.7666, "step": 719 }, { "epoch": 0.24040066777963273, "grad_norm": 3.510970503644211, "learning_rate": 8.008898776418243e-06, "loss": 0.7985, "step": 720 }, { "epoch": 0.24073455759599333, "grad_norm": 2.900494580948869, "learning_rate": 8.020022246941046e-06, "loss": 0.7745, "step": 721 }, { "epoch": 0.24106844741235392, "grad_norm": 3.149247839887993, "learning_rate": 8.03114571746385e-06, "loss": 0.7978, "step": 722 }, { "epoch": 0.24140233722871451, "grad_norm": 2.895252873777829, "learning_rate": 8.042269187986651e-06, "loss": 0.76, "step": 723 }, { "epoch": 0.24173622704507514, "grad_norm": 2.888655810315481, "learning_rate": 8.053392658509456e-06, "loss": 0.7706, "step": 724 }, { "epoch": 0.24207011686143573, "grad_norm": 3.0782704179122606, "learning_rate": 8.064516129032258e-06, "loss": 0.7906, "step": 725 }, { "epoch": 0.24240400667779632, "grad_norm": 2.8851903281651112, "learning_rate": 8.075639599555061e-06, "loss": 0.7813, "step": 726 }, { "epoch": 0.24273789649415692, "grad_norm": 2.957929433060461, "learning_rate": 8.086763070077866e-06, "loss": 0.7811, "step": 727 }, { "epoch": 0.24307178631051754, "grad_norm": 2.884912971168197, "learning_rate": 8.097886540600668e-06, "loss": 0.8028, "step": 728 }, { "epoch": 0.24340567612687813, "grad_norm": 2.9652495140830806, "learning_rate": 8.109010011123471e-06, "loss": 0.7816, "step": 729 }, { "epoch": 0.24373956594323873, "grad_norm": 2.934433891775833, "learning_rate": 8.120133481646274e-06, "loss": 0.7985, "step": 730 }, { "epoch": 0.24407345575959932, "grad_norm": 2.880806881671892, "learning_rate": 8.131256952169078e-06, "loss": 0.7708, "step": 731 }, { "epoch": 0.24440734557595994, "grad_norm": 2.8588578379720957, "learning_rate": 8.14238042269188e-06, "loss": 0.7756, "step": 732 }, { "epoch": 0.24474123539232054, "grad_norm": 2.8757425627920368, "learning_rate": 8.153503893214683e-06, "loss": 0.8032, "step": 733 }, { "epoch": 0.24507512520868113, "grad_norm": 2.7356998408680395, "learning_rate": 8.164627363737486e-06, "loss": 0.7367, "step": 734 }, { "epoch": 0.24540901502504173, "grad_norm": 2.9698416764474853, "learning_rate": 8.17575083426029e-06, "loss": 0.7998, "step": 735 }, { "epoch": 0.24574290484140235, "grad_norm": 2.7813682735142966, "learning_rate": 8.186874304783093e-06, "loss": 0.7734, "step": 736 }, { "epoch": 0.24607679465776294, "grad_norm": 2.9150826370521683, "learning_rate": 8.197997775305896e-06, "loss": 0.7686, "step": 737 }, { "epoch": 0.24641068447412354, "grad_norm": 2.5555775409481845, "learning_rate": 8.209121245828699e-06, "loss": 0.7324, "step": 738 }, { "epoch": 0.24674457429048413, "grad_norm": 2.7978736718295716, "learning_rate": 8.220244716351501e-06, "loss": 0.7576, "step": 739 }, { "epoch": 0.24707846410684475, "grad_norm": 2.7730453419025154, "learning_rate": 8.231368186874306e-06, "loss": 0.7699, "step": 740 }, { "epoch": 0.24741235392320535, "grad_norm": 3.185307346436557, "learning_rate": 8.242491657397109e-06, "loss": 0.7513, "step": 741 }, { "epoch": 0.24774624373956594, "grad_norm": 2.545159188850958, "learning_rate": 8.253615127919911e-06, "loss": 0.7573, "step": 742 }, { "epoch": 0.24808013355592654, "grad_norm": 3.321075861183876, "learning_rate": 8.264738598442716e-06, "loss": 0.7945, "step": 743 }, { "epoch": 0.24841402337228716, "grad_norm": 2.7479231020441244, "learning_rate": 8.275862068965518e-06, "loss": 0.7492, "step": 744 }, { "epoch": 0.24874791318864775, "grad_norm": 2.6937743743247102, "learning_rate": 8.286985539488321e-06, "loss": 0.7442, "step": 745 }, { "epoch": 0.24908180300500835, "grad_norm": 2.827590177115561, "learning_rate": 8.298109010011124e-06, "loss": 0.7421, "step": 746 }, { "epoch": 0.24941569282136894, "grad_norm": 2.504159720312986, "learning_rate": 8.309232480533928e-06, "loss": 0.7659, "step": 747 }, { "epoch": 0.24974958263772956, "grad_norm": 3.0217916703909844, "learning_rate": 8.32035595105673e-06, "loss": 0.776, "step": 748 }, { "epoch": 0.2500834724540901, "grad_norm": 2.756133949746331, "learning_rate": 8.331479421579534e-06, "loss": 0.7275, "step": 749 }, { "epoch": 0.25041736227045075, "grad_norm": 2.7190941346082314, "learning_rate": 8.342602892102336e-06, "loss": 0.7126, "step": 750 }, { "epoch": 0.25075125208681137, "grad_norm": 2.8795575947429155, "learning_rate": 8.353726362625139e-06, "loss": 0.743, "step": 751 }, { "epoch": 0.25108514190317194, "grad_norm": 2.674489237321665, "learning_rate": 8.364849833147943e-06, "loss": 0.7129, "step": 752 }, { "epoch": 0.25141903171953256, "grad_norm": 3.041144666915435, "learning_rate": 8.375973303670746e-06, "loss": 0.7891, "step": 753 }, { "epoch": 0.2517529215358932, "grad_norm": 2.7626954175807685, "learning_rate": 8.387096774193549e-06, "loss": 0.7418, "step": 754 }, { "epoch": 0.25208681135225375, "grad_norm": 2.666885156219428, "learning_rate": 8.398220244716352e-06, "loss": 0.7691, "step": 755 }, { "epoch": 0.25242070116861437, "grad_norm": 3.0973699153579832, "learning_rate": 8.409343715239156e-06, "loss": 0.7539, "step": 756 }, { "epoch": 0.25275459098497494, "grad_norm": 2.6594906036622774, "learning_rate": 8.420467185761959e-06, "loss": 0.7806, "step": 757 }, { "epoch": 0.25308848080133556, "grad_norm": 2.964170696647356, "learning_rate": 8.431590656284761e-06, "loss": 0.7635, "step": 758 }, { "epoch": 0.2534223706176962, "grad_norm": 2.6227264784968107, "learning_rate": 8.442714126807566e-06, "loss": 0.7371, "step": 759 }, { "epoch": 0.25375626043405675, "grad_norm": 2.7484898896315415, "learning_rate": 8.453837597330368e-06, "loss": 0.7257, "step": 760 }, { "epoch": 0.25409015025041737, "grad_norm": 2.5319478747770168, "learning_rate": 8.464961067853171e-06, "loss": 0.7447, "step": 761 }, { "epoch": 0.254424040066778, "grad_norm": 2.636902104312925, "learning_rate": 8.476084538375974e-06, "loss": 0.7506, "step": 762 }, { "epoch": 0.25475792988313856, "grad_norm": 2.642622056814751, "learning_rate": 8.487208008898777e-06, "loss": 0.7603, "step": 763 }, { "epoch": 0.2550918196994992, "grad_norm": 2.7965409911881234, "learning_rate": 8.49833147942158e-06, "loss": 0.8283, "step": 764 }, { "epoch": 0.25542570951585974, "grad_norm": 2.8083214069631337, "learning_rate": 8.509454949944384e-06, "loss": 0.7571, "step": 765 }, { "epoch": 0.25575959933222037, "grad_norm": 2.5227465592207445, "learning_rate": 8.520578420467186e-06, "loss": 0.7389, "step": 766 }, { "epoch": 0.256093489148581, "grad_norm": 2.5848521600098753, "learning_rate": 8.531701890989989e-06, "loss": 0.7465, "step": 767 }, { "epoch": 0.25642737896494155, "grad_norm": 2.7638513260258084, "learning_rate": 8.542825361512793e-06, "loss": 0.7056, "step": 768 }, { "epoch": 0.2567612687813022, "grad_norm": 2.6263435022885675, "learning_rate": 8.553948832035596e-06, "loss": 0.733, "step": 769 }, { "epoch": 0.2570951585976628, "grad_norm": 2.933675912385845, "learning_rate": 8.565072302558399e-06, "loss": 0.7927, "step": 770 }, { "epoch": 0.25742904841402336, "grad_norm": 2.836111254873545, "learning_rate": 8.576195773081202e-06, "loss": 0.7949, "step": 771 }, { "epoch": 0.257762938230384, "grad_norm": 2.840659631210894, "learning_rate": 8.587319243604006e-06, "loss": 0.7629, "step": 772 }, { "epoch": 0.25809682804674455, "grad_norm": 2.577168200024518, "learning_rate": 8.598442714126807e-06, "loss": 0.7387, "step": 773 }, { "epoch": 0.2584307178631052, "grad_norm": 2.6557270932982133, "learning_rate": 8.609566184649611e-06, "loss": 0.7235, "step": 774 }, { "epoch": 0.2587646076794658, "grad_norm": 2.5978177623170255, "learning_rate": 8.620689655172414e-06, "loss": 0.7414, "step": 775 }, { "epoch": 0.25909849749582636, "grad_norm": 2.926851160346, "learning_rate": 8.631813125695217e-06, "loss": 0.7027, "step": 776 }, { "epoch": 0.259432387312187, "grad_norm": 2.78582252016108, "learning_rate": 8.642936596218021e-06, "loss": 0.761, "step": 777 }, { "epoch": 0.2597662771285476, "grad_norm": 2.4379567632138532, "learning_rate": 8.654060066740824e-06, "loss": 0.749, "step": 778 }, { "epoch": 0.2601001669449082, "grad_norm": 3.0286346197895004, "learning_rate": 8.665183537263627e-06, "loss": 0.7193, "step": 779 }, { "epoch": 0.2604340567612688, "grad_norm": 2.528130483808439, "learning_rate": 8.67630700778643e-06, "loss": 0.7471, "step": 780 }, { "epoch": 0.26076794657762936, "grad_norm": 2.535367312329148, "learning_rate": 8.687430478309234e-06, "loss": 0.6879, "step": 781 }, { "epoch": 0.26110183639399, "grad_norm": 2.5138214429898076, "learning_rate": 8.698553948832036e-06, "loss": 0.7207, "step": 782 }, { "epoch": 0.2614357262103506, "grad_norm": 2.5101695967740265, "learning_rate": 8.70967741935484e-06, "loss": 0.7325, "step": 783 }, { "epoch": 0.26176961602671117, "grad_norm": 2.5875987693204183, "learning_rate": 8.720800889877644e-06, "loss": 0.7325, "step": 784 }, { "epoch": 0.2621035058430718, "grad_norm": 2.4608459608229074, "learning_rate": 8.731924360400446e-06, "loss": 0.7481, "step": 785 }, { "epoch": 0.26243739565943236, "grad_norm": 2.3497240681406666, "learning_rate": 8.743047830923249e-06, "loss": 0.7529, "step": 786 }, { "epoch": 0.262771285475793, "grad_norm": 2.4699870009088225, "learning_rate": 8.754171301446052e-06, "loss": 0.6925, "step": 787 }, { "epoch": 0.2631051752921536, "grad_norm": 2.464983131636083, "learning_rate": 8.765294771968854e-06, "loss": 0.7471, "step": 788 }, { "epoch": 0.26343906510851417, "grad_norm": 2.4261954506947947, "learning_rate": 8.776418242491657e-06, "loss": 0.7422, "step": 789 }, { "epoch": 0.2637729549248748, "grad_norm": 2.549916504672808, "learning_rate": 8.787541713014462e-06, "loss": 0.7369, "step": 790 }, { "epoch": 0.2641068447412354, "grad_norm": 2.442277214735108, "learning_rate": 8.798665183537264e-06, "loss": 0.702, "step": 791 }, { "epoch": 0.264440734557596, "grad_norm": 2.664187264370996, "learning_rate": 8.809788654060067e-06, "loss": 0.7515, "step": 792 }, { "epoch": 0.2647746243739566, "grad_norm": 2.6174097261795986, "learning_rate": 8.820912124582871e-06, "loss": 0.7027, "step": 793 }, { "epoch": 0.26510851419031717, "grad_norm": 2.2448538690171063, "learning_rate": 8.832035595105674e-06, "loss": 0.7037, "step": 794 }, { "epoch": 0.2654424040066778, "grad_norm": 2.5011538822241413, "learning_rate": 8.843159065628477e-06, "loss": 0.6756, "step": 795 }, { "epoch": 0.2657762938230384, "grad_norm": 2.3864948422846086, "learning_rate": 8.85428253615128e-06, "loss": 0.6993, "step": 796 }, { "epoch": 0.266110183639399, "grad_norm": 2.4232511755918074, "learning_rate": 8.865406006674084e-06, "loss": 0.717, "step": 797 }, { "epoch": 0.2664440734557596, "grad_norm": 2.3909125953357178, "learning_rate": 8.876529477196885e-06, "loss": 0.7282, "step": 798 }, { "epoch": 0.2667779632721202, "grad_norm": 2.3390766102753187, "learning_rate": 8.88765294771969e-06, "loss": 0.6962, "step": 799 }, { "epoch": 0.2671118530884808, "grad_norm": 2.4071510778199996, "learning_rate": 8.898776418242492e-06, "loss": 0.6881, "step": 800 }, { "epoch": 0.2674457429048414, "grad_norm": 2.520911593001965, "learning_rate": 8.909899888765295e-06, "loss": 0.7278, "step": 801 }, { "epoch": 0.267779632721202, "grad_norm": 2.518572219569252, "learning_rate": 8.921023359288099e-06, "loss": 0.724, "step": 802 }, { "epoch": 0.2681135225375626, "grad_norm": 2.343294136174664, "learning_rate": 8.932146829810902e-06, "loss": 0.7045, "step": 803 }, { "epoch": 0.2684474123539232, "grad_norm": 2.4415640618465395, "learning_rate": 8.943270300333705e-06, "loss": 0.6838, "step": 804 }, { "epoch": 0.2687813021702838, "grad_norm": 2.451031373191094, "learning_rate": 8.954393770856507e-06, "loss": 0.7111, "step": 805 }, { "epoch": 0.2691151919866444, "grad_norm": 2.4574693338836147, "learning_rate": 8.965517241379312e-06, "loss": 0.7192, "step": 806 }, { "epoch": 0.26944908180300503, "grad_norm": 2.4159773856238513, "learning_rate": 8.976640711902114e-06, "loss": 0.699, "step": 807 }, { "epoch": 0.2697829716193656, "grad_norm": 2.338379168221267, "learning_rate": 8.987764182424917e-06, "loss": 0.753, "step": 808 }, { "epoch": 0.2701168614357262, "grad_norm": 2.3106998248845714, "learning_rate": 8.998887652947721e-06, "loss": 0.6872, "step": 809 }, { "epoch": 0.2704507512520868, "grad_norm": 2.3149578434137297, "learning_rate": 9.010011123470524e-06, "loss": 0.7277, "step": 810 }, { "epoch": 0.2707846410684474, "grad_norm": 2.472119664221603, "learning_rate": 9.021134593993327e-06, "loss": 0.7296, "step": 811 }, { "epoch": 0.27111853088480803, "grad_norm": 2.3632957875011824, "learning_rate": 9.03225806451613e-06, "loss": 0.7383, "step": 812 }, { "epoch": 0.2714524207011686, "grad_norm": 2.424094271177195, "learning_rate": 9.043381535038932e-06, "loss": 0.6744, "step": 813 }, { "epoch": 0.2717863105175292, "grad_norm": 2.7174871677507157, "learning_rate": 9.054505005561735e-06, "loss": 0.7049, "step": 814 }, { "epoch": 0.27212020033388984, "grad_norm": 2.171403160782245, "learning_rate": 9.06562847608454e-06, "loss": 0.6857, "step": 815 }, { "epoch": 0.2724540901502504, "grad_norm": 2.565876721202445, "learning_rate": 9.076751946607342e-06, "loss": 0.7041, "step": 816 }, { "epoch": 0.272787979966611, "grad_norm": 2.292264279671954, "learning_rate": 9.087875417130145e-06, "loss": 0.696, "step": 817 }, { "epoch": 0.2731218697829716, "grad_norm": 2.530275238237511, "learning_rate": 9.09899888765295e-06, "loss": 0.7116, "step": 818 }, { "epoch": 0.2734557595993322, "grad_norm": 2.5117559972692565, "learning_rate": 9.110122358175752e-06, "loss": 0.7299, "step": 819 }, { "epoch": 0.27378964941569284, "grad_norm": 2.404996740869113, "learning_rate": 9.121245828698555e-06, "loss": 0.6878, "step": 820 }, { "epoch": 0.2741235392320534, "grad_norm": 2.3560112766968317, "learning_rate": 9.132369299221357e-06, "loss": 0.7203, "step": 821 }, { "epoch": 0.274457429048414, "grad_norm": 2.340308421984246, "learning_rate": 9.143492769744162e-06, "loss": 0.6966, "step": 822 }, { "epoch": 0.27479131886477465, "grad_norm": 2.438592229880176, "learning_rate": 9.154616240266963e-06, "loss": 0.7063, "step": 823 }, { "epoch": 0.2751252086811352, "grad_norm": 2.386146614235925, "learning_rate": 9.165739710789767e-06, "loss": 0.7396, "step": 824 }, { "epoch": 0.27545909849749584, "grad_norm": 2.19058517623771, "learning_rate": 9.176863181312572e-06, "loss": 0.6856, "step": 825 }, { "epoch": 0.2757929883138564, "grad_norm": 2.3927360103542252, "learning_rate": 9.187986651835373e-06, "loss": 0.7165, "step": 826 }, { "epoch": 0.276126878130217, "grad_norm": 2.1552870595105147, "learning_rate": 9.199110122358177e-06, "loss": 0.671, "step": 827 }, { "epoch": 0.27646076794657765, "grad_norm": 2.429507424594572, "learning_rate": 9.21023359288098e-06, "loss": 0.6979, "step": 828 }, { "epoch": 0.2767946577629382, "grad_norm": 2.3227202131326528, "learning_rate": 9.221357063403782e-06, "loss": 0.7102, "step": 829 }, { "epoch": 0.27712854757929883, "grad_norm": 2.307077050810612, "learning_rate": 9.232480533926585e-06, "loss": 0.7391, "step": 830 }, { "epoch": 0.27746243739565946, "grad_norm": 2.255737161688295, "learning_rate": 9.24360400444939e-06, "loss": 0.7163, "step": 831 }, { "epoch": 0.27779632721202, "grad_norm": 2.281341793816911, "learning_rate": 9.254727474972192e-06, "loss": 0.712, "step": 832 }, { "epoch": 0.27813021702838064, "grad_norm": 2.3341878805073297, "learning_rate": 9.265850945494995e-06, "loss": 0.6567, "step": 833 }, { "epoch": 0.2784641068447412, "grad_norm": 2.2381652026047485, "learning_rate": 9.2769744160178e-06, "loss": 0.7042, "step": 834 }, { "epoch": 0.27879799666110183, "grad_norm": 2.5324189868793128, "learning_rate": 9.288097886540602e-06, "loss": 0.754, "step": 835 }, { "epoch": 0.27913188647746245, "grad_norm": 2.2518898509315224, "learning_rate": 9.299221357063405e-06, "loss": 0.6477, "step": 836 }, { "epoch": 0.279465776293823, "grad_norm": 2.4994986462298217, "learning_rate": 9.310344827586207e-06, "loss": 0.7097, "step": 837 }, { "epoch": 0.27979966611018364, "grad_norm": 2.2265653127790865, "learning_rate": 9.32146829810901e-06, "loss": 0.7021, "step": 838 }, { "epoch": 0.28013355592654426, "grad_norm": 2.3476908762804225, "learning_rate": 9.332591768631813e-06, "loss": 0.7209, "step": 839 }, { "epoch": 0.28046744574290483, "grad_norm": 2.174338414965359, "learning_rate": 9.343715239154617e-06, "loss": 0.6994, "step": 840 }, { "epoch": 0.28080133555926545, "grad_norm": 2.372415281100539, "learning_rate": 9.35483870967742e-06, "loss": 0.6848, "step": 841 }, { "epoch": 0.281135225375626, "grad_norm": 2.1859366667469753, "learning_rate": 9.365962180200223e-06, "loss": 0.6884, "step": 842 }, { "epoch": 0.28146911519198664, "grad_norm": 2.1349034968681995, "learning_rate": 9.377085650723027e-06, "loss": 0.6768, "step": 843 }, { "epoch": 0.28180300500834726, "grad_norm": 2.262707913835109, "learning_rate": 9.38820912124583e-06, "loss": 0.6865, "step": 844 }, { "epoch": 0.28213689482470783, "grad_norm": 2.324665392827428, "learning_rate": 9.399332591768633e-06, "loss": 0.6827, "step": 845 }, { "epoch": 0.28247078464106845, "grad_norm": 2.211117858249741, "learning_rate": 9.410456062291435e-06, "loss": 0.6709, "step": 846 }, { "epoch": 0.2828046744574291, "grad_norm": 2.293123416343112, "learning_rate": 9.42157953281424e-06, "loss": 0.6893, "step": 847 }, { "epoch": 0.28313856427378964, "grad_norm": 2.1449340258612373, "learning_rate": 9.43270300333704e-06, "loss": 0.6808, "step": 848 }, { "epoch": 0.28347245409015026, "grad_norm": 2.1260632212944506, "learning_rate": 9.443826473859845e-06, "loss": 0.6934, "step": 849 }, { "epoch": 0.2838063439065108, "grad_norm": 2.4098983478860925, "learning_rate": 9.45494994438265e-06, "loss": 0.6728, "step": 850 }, { "epoch": 0.28414023372287145, "grad_norm": 2.069202764331185, "learning_rate": 9.46607341490545e-06, "loss": 0.6779, "step": 851 }, { "epoch": 0.28447412353923207, "grad_norm": 3.271199454495248, "learning_rate": 9.477196885428255e-06, "loss": 0.6898, "step": 852 }, { "epoch": 0.28480801335559264, "grad_norm": 2.334156540244979, "learning_rate": 9.488320355951058e-06, "loss": 0.7044, "step": 853 }, { "epoch": 0.28514190317195326, "grad_norm": 2.3743777460496354, "learning_rate": 9.49944382647386e-06, "loss": 0.6769, "step": 854 }, { "epoch": 0.2854757929883139, "grad_norm": 2.49782632449432, "learning_rate": 9.510567296996663e-06, "loss": 0.7129, "step": 855 }, { "epoch": 0.28580968280467445, "grad_norm": 2.3891127504517193, "learning_rate": 9.521690767519467e-06, "loss": 0.6754, "step": 856 }, { "epoch": 0.28614357262103507, "grad_norm": 2.098349273206771, "learning_rate": 9.53281423804227e-06, "loss": 0.6526, "step": 857 }, { "epoch": 0.28647746243739564, "grad_norm": 2.2032377583810234, "learning_rate": 9.543937708565073e-06, "loss": 0.6977, "step": 858 }, { "epoch": 0.28681135225375626, "grad_norm": 2.2502924761748297, "learning_rate": 9.555061179087877e-06, "loss": 0.7225, "step": 859 }, { "epoch": 0.2871452420701169, "grad_norm": 2.460773564623362, "learning_rate": 9.56618464961068e-06, "loss": 0.6867, "step": 860 }, { "epoch": 0.28747913188647745, "grad_norm": 2.2753153702182636, "learning_rate": 9.577308120133483e-06, "loss": 0.6922, "step": 861 }, { "epoch": 0.28781302170283807, "grad_norm": 2.068964756098716, "learning_rate": 9.588431590656285e-06, "loss": 0.6378, "step": 862 }, { "epoch": 0.2881469115191987, "grad_norm": 2.496082345519915, "learning_rate": 9.599555061179088e-06, "loss": 0.6928, "step": 863 }, { "epoch": 0.28848080133555926, "grad_norm": 2.0460430452727296, "learning_rate": 9.61067853170189e-06, "loss": 0.6977, "step": 864 }, { "epoch": 0.2888146911519199, "grad_norm": 2.3728465438841253, "learning_rate": 9.621802002224695e-06, "loss": 0.6824, "step": 865 }, { "epoch": 0.28914858096828044, "grad_norm": 2.1033232263692265, "learning_rate": 9.632925472747498e-06, "loss": 0.704, "step": 866 }, { "epoch": 0.28948247078464107, "grad_norm": 2.0362402298215536, "learning_rate": 9.6440489432703e-06, "loss": 0.6903, "step": 867 }, { "epoch": 0.2898163606010017, "grad_norm": 2.283209384252515, "learning_rate": 9.655172413793105e-06, "loss": 0.7063, "step": 868 }, { "epoch": 0.29015025041736225, "grad_norm": 1.9461277499372618, "learning_rate": 9.666295884315908e-06, "loss": 0.6743, "step": 869 }, { "epoch": 0.2904841402337229, "grad_norm": 1.9607822814715243, "learning_rate": 9.67741935483871e-06, "loss": 0.65, "step": 870 }, { "epoch": 0.2908180300500835, "grad_norm": 2.151276852676313, "learning_rate": 9.688542825361513e-06, "loss": 0.6641, "step": 871 }, { "epoch": 0.29115191986644406, "grad_norm": 1.9748951085278192, "learning_rate": 9.699666295884318e-06, "loss": 0.6884, "step": 872 }, { "epoch": 0.2914858096828047, "grad_norm": 2.1160133045981606, "learning_rate": 9.710789766407119e-06, "loss": 0.658, "step": 873 }, { "epoch": 0.29181969949916525, "grad_norm": 2.0577488559598924, "learning_rate": 9.721913236929923e-06, "loss": 0.6953, "step": 874 }, { "epoch": 0.2921535893155259, "grad_norm": 2.0272295655294474, "learning_rate": 9.733036707452727e-06, "loss": 0.6442, "step": 875 }, { "epoch": 0.2924874791318865, "grad_norm": 1.998173460860901, "learning_rate": 9.744160177975528e-06, "loss": 0.636, "step": 876 }, { "epoch": 0.29282136894824706, "grad_norm": 2.2072402122495096, "learning_rate": 9.755283648498333e-06, "loss": 0.7005, "step": 877 }, { "epoch": 0.2931552587646077, "grad_norm": 2.0938152715422356, "learning_rate": 9.766407119021135e-06, "loss": 0.665, "step": 878 }, { "epoch": 0.2934891485809683, "grad_norm": 2.1210478510593935, "learning_rate": 9.777530589543938e-06, "loss": 0.6615, "step": 879 }, { "epoch": 0.2938230383973289, "grad_norm": 2.0814234599151415, "learning_rate": 9.788654060066741e-06, "loss": 0.6951, "step": 880 }, { "epoch": 0.2941569282136895, "grad_norm": 1.9624465102044069, "learning_rate": 9.799777530589545e-06, "loss": 0.68, "step": 881 }, { "epoch": 0.29449081803005006, "grad_norm": 2.046362934923562, "learning_rate": 9.810901001112348e-06, "loss": 0.6687, "step": 882 }, { "epoch": 0.2948247078464107, "grad_norm": 2.122663350767734, "learning_rate": 9.82202447163515e-06, "loss": 0.6717, "step": 883 }, { "epoch": 0.2951585976627713, "grad_norm": 2.1341822647297986, "learning_rate": 9.833147942157955e-06, "loss": 0.6725, "step": 884 }, { "epoch": 0.29549248747913187, "grad_norm": 2.0172415885898127, "learning_rate": 9.844271412680758e-06, "loss": 0.6578, "step": 885 }, { "epoch": 0.2958263772954925, "grad_norm": 2.065691518977551, "learning_rate": 9.85539488320356e-06, "loss": 0.659, "step": 886 }, { "epoch": 0.2961602671118531, "grad_norm": 1.9832312058836672, "learning_rate": 9.866518353726363e-06, "loss": 0.6522, "step": 887 }, { "epoch": 0.2964941569282137, "grad_norm": 2.14532408171536, "learning_rate": 9.877641824249166e-06, "loss": 0.702, "step": 888 }, { "epoch": 0.2968280467445743, "grad_norm": 2.0444594860819687, "learning_rate": 9.888765294771969e-06, "loss": 0.6629, "step": 889 }, { "epoch": 0.29716193656093487, "grad_norm": 2.0755446015934784, "learning_rate": 9.899888765294773e-06, "loss": 0.656, "step": 890 }, { "epoch": 0.2974958263772955, "grad_norm": 1.9542309614237288, "learning_rate": 9.911012235817576e-06, "loss": 0.6496, "step": 891 }, { "epoch": 0.2978297161936561, "grad_norm": 2.210136907874954, "learning_rate": 9.922135706340378e-06, "loss": 0.6666, "step": 892 }, { "epoch": 0.2981636060100167, "grad_norm": 2.033793693663427, "learning_rate": 9.933259176863183e-06, "loss": 0.668, "step": 893 }, { "epoch": 0.2984974958263773, "grad_norm": 2.006926664501037, "learning_rate": 9.944382647385986e-06, "loss": 0.659, "step": 894 }, { "epoch": 0.2988313856427379, "grad_norm": 2.004247793486411, "learning_rate": 9.955506117908788e-06, "loss": 0.655, "step": 895 }, { "epoch": 0.2991652754590985, "grad_norm": 1.9902436353064719, "learning_rate": 9.966629588431591e-06, "loss": 0.6598, "step": 896 }, { "epoch": 0.2994991652754591, "grad_norm": 2.0103564354313934, "learning_rate": 9.977753058954395e-06, "loss": 0.6312, "step": 897 }, { "epoch": 0.2998330550918197, "grad_norm": 2.009658903313541, "learning_rate": 9.988876529477196e-06, "loss": 0.6544, "step": 898 }, { "epoch": 0.3001669449081803, "grad_norm": 2.0070967952679712, "learning_rate": 1e-05, "loss": 0.6516, "step": 899 }, { "epoch": 0.3005008347245409, "grad_norm": 2.012564265563225, "learning_rate": 9.99999962262574e-06, "loss": 0.6775, "step": 900 }, { "epoch": 0.3008347245409015, "grad_norm": 2.0161702021103642, "learning_rate": 9.999998490503018e-06, "loss": 0.6551, "step": 901 }, { "epoch": 0.3011686143572621, "grad_norm": 1.9719002275755582, "learning_rate": 9.999996603632e-06, "loss": 0.6503, "step": 902 }, { "epoch": 0.30150250417362273, "grad_norm": 2.13518942111188, "learning_rate": 9.999993962012977e-06, "loss": 0.7052, "step": 903 }, { "epoch": 0.3018363939899833, "grad_norm": 1.9853203661674088, "learning_rate": 9.999990565646344e-06, "loss": 0.6684, "step": 904 }, { "epoch": 0.3021702838063439, "grad_norm": 1.9881402843626113, "learning_rate": 9.999986414532616e-06, "loss": 0.6743, "step": 905 }, { "epoch": 0.3025041736227045, "grad_norm": 1.9109029396322152, "learning_rate": 9.999981508672418e-06, "loss": 0.6525, "step": 906 }, { "epoch": 0.3028380634390651, "grad_norm": 1.903637671260634, "learning_rate": 9.999975848066489e-06, "loss": 0.6861, "step": 907 }, { "epoch": 0.30317195325542573, "grad_norm": 1.9579732248426094, "learning_rate": 9.999969432715688e-06, "loss": 0.6837, "step": 908 }, { "epoch": 0.3035058430717863, "grad_norm": 2.068891790021067, "learning_rate": 9.99996226262098e-06, "loss": 0.6909, "step": 909 }, { "epoch": 0.3038397328881469, "grad_norm": 1.8949419790749402, "learning_rate": 9.999954337783448e-06, "loss": 0.6856, "step": 910 }, { "epoch": 0.30417362270450754, "grad_norm": 2.0343878688121606, "learning_rate": 9.999945658204289e-06, "loss": 0.6813, "step": 911 }, { "epoch": 0.3045075125208681, "grad_norm": 1.9055121276661946, "learning_rate": 9.999936223884812e-06, "loss": 0.6617, "step": 912 }, { "epoch": 0.30484140233722873, "grad_norm": 2.000116166707744, "learning_rate": 9.99992603482644e-06, "loss": 0.6519, "step": 913 }, { "epoch": 0.3051752921535893, "grad_norm": 1.9041506677179467, "learning_rate": 9.999915091030715e-06, "loss": 0.645, "step": 914 }, { "epoch": 0.3055091819699499, "grad_norm": 2.0499283827877455, "learning_rate": 9.999903392499286e-06, "loss": 0.6511, "step": 915 }, { "epoch": 0.30584307178631054, "grad_norm": 1.9415254334362597, "learning_rate": 9.99989093923392e-06, "loss": 0.6731, "step": 916 }, { "epoch": 0.3061769616026711, "grad_norm": 1.9099845048841848, "learning_rate": 9.999877731236495e-06, "loss": 0.6624, "step": 917 }, { "epoch": 0.3065108514190317, "grad_norm": 1.8661595486283775, "learning_rate": 9.999863768509008e-06, "loss": 0.6649, "step": 918 }, { "epoch": 0.3068447412353923, "grad_norm": 1.8855647646833464, "learning_rate": 9.999849051053564e-06, "loss": 0.6201, "step": 919 }, { "epoch": 0.3071786310517529, "grad_norm": 1.8888706245265683, "learning_rate": 9.999833578872385e-06, "loss": 0.6237, "step": 920 }, { "epoch": 0.30751252086811354, "grad_norm": 1.954534241730217, "learning_rate": 9.999817351967806e-06, "loss": 0.6668, "step": 921 }, { "epoch": 0.3078464106844741, "grad_norm": 1.8641344348868596, "learning_rate": 9.999800370342278e-06, "loss": 0.6407, "step": 922 }, { "epoch": 0.3081803005008347, "grad_norm": 2.0504267231968134, "learning_rate": 9.999782633998365e-06, "loss": 0.6607, "step": 923 }, { "epoch": 0.30851419031719535, "grad_norm": 1.877087274613195, "learning_rate": 9.999764142938741e-06, "loss": 0.6436, "step": 924 }, { "epoch": 0.3088480801335559, "grad_norm": 1.9198345815282047, "learning_rate": 9.999744897166202e-06, "loss": 0.6153, "step": 925 }, { "epoch": 0.30918196994991654, "grad_norm": 1.9382212023840781, "learning_rate": 9.999724896683646e-06, "loss": 0.6584, "step": 926 }, { "epoch": 0.3095158597662771, "grad_norm": 1.9613080653262633, "learning_rate": 9.9997041414941e-06, "loss": 0.6381, "step": 927 }, { "epoch": 0.3098497495826377, "grad_norm": 1.8225868499062796, "learning_rate": 9.999682631600694e-06, "loss": 0.631, "step": 928 }, { "epoch": 0.31018363939899835, "grad_norm": 1.8582571934116852, "learning_rate": 9.999660367006671e-06, "loss": 0.6295, "step": 929 }, { "epoch": 0.3105175292153589, "grad_norm": 1.8652269402320913, "learning_rate": 9.999637347715398e-06, "loss": 0.6295, "step": 930 }, { "epoch": 0.31085141903171953, "grad_norm": 1.9549937879935968, "learning_rate": 9.999613573730346e-06, "loss": 0.6917, "step": 931 }, { "epoch": 0.31118530884808016, "grad_norm": 1.863215953177301, "learning_rate": 9.999589045055105e-06, "loss": 0.6194, "step": 932 }, { "epoch": 0.3115191986644407, "grad_norm": 1.9312762807212451, "learning_rate": 9.999563761693375e-06, "loss": 0.6411, "step": 933 }, { "epoch": 0.31185308848080134, "grad_norm": 1.8119926837909135, "learning_rate": 9.999537723648977e-06, "loss": 0.6435, "step": 934 }, { "epoch": 0.3121869782971619, "grad_norm": 1.9103699268081624, "learning_rate": 9.99951093092584e-06, "loss": 0.657, "step": 935 }, { "epoch": 0.31252086811352253, "grad_norm": 2.0246378724386616, "learning_rate": 9.999483383528006e-06, "loss": 0.6657, "step": 936 }, { "epoch": 0.31285475792988315, "grad_norm": 1.7255681203881545, "learning_rate": 9.999455081459635e-06, "loss": 0.6168, "step": 937 }, { "epoch": 0.3131886477462437, "grad_norm": 1.9455934729142803, "learning_rate": 9.999426024724999e-06, "loss": 0.661, "step": 938 }, { "epoch": 0.31352253756260434, "grad_norm": 1.973783741898996, "learning_rate": 9.999396213328482e-06, "loss": 0.6518, "step": 939 }, { "epoch": 0.31385642737896496, "grad_norm": 1.833735536808459, "learning_rate": 9.99936564727459e-06, "loss": 0.6285, "step": 940 }, { "epoch": 0.31419031719532553, "grad_norm": 2.0375261566564973, "learning_rate": 9.999334326567931e-06, "loss": 0.6507, "step": 941 }, { "epoch": 0.31452420701168615, "grad_norm": 1.828070504500008, "learning_rate": 9.999302251213235e-06, "loss": 0.652, "step": 942 }, { "epoch": 0.3148580968280467, "grad_norm": 1.8936199169156758, "learning_rate": 9.999269421215344e-06, "loss": 0.6196, "step": 943 }, { "epoch": 0.31519198664440734, "grad_norm": 1.8325729884301216, "learning_rate": 9.999235836579212e-06, "loss": 0.6634, "step": 944 }, { "epoch": 0.31552587646076796, "grad_norm": 1.8281415581112896, "learning_rate": 9.999201497309911e-06, "loss": 0.6073, "step": 945 }, { "epoch": 0.31585976627712853, "grad_norm": 1.7489169425440378, "learning_rate": 9.999166403412624e-06, "loss": 0.6067, "step": 946 }, { "epoch": 0.31619365609348915, "grad_norm": 1.8414482663540788, "learning_rate": 9.999130554892647e-06, "loss": 0.654, "step": 947 }, { "epoch": 0.3165275459098498, "grad_norm": 1.707059554937721, "learning_rate": 9.999093951755393e-06, "loss": 0.6073, "step": 948 }, { "epoch": 0.31686143572621034, "grad_norm": 1.929705771626915, "learning_rate": 9.999056594006385e-06, "loss": 0.6216, "step": 949 }, { "epoch": 0.31719532554257096, "grad_norm": 1.931602948495179, "learning_rate": 9.999018481651263e-06, "loss": 0.6496, "step": 950 }, { "epoch": 0.3175292153589315, "grad_norm": 2.0499829132611613, "learning_rate": 9.998979614695784e-06, "loss": 0.6263, "step": 951 }, { "epoch": 0.31786310517529215, "grad_norm": 1.8065140365732577, "learning_rate": 9.998939993145809e-06, "loss": 0.6433, "step": 952 }, { "epoch": 0.31819699499165277, "grad_norm": 1.9013733579513343, "learning_rate": 9.99889961700732e-06, "loss": 0.6352, "step": 953 }, { "epoch": 0.31853088480801334, "grad_norm": 1.8943671047357449, "learning_rate": 9.998858486286416e-06, "loss": 0.6231, "step": 954 }, { "epoch": 0.31886477462437396, "grad_norm": 1.8505227143575338, "learning_rate": 9.9988166009893e-06, "loss": 0.6477, "step": 955 }, { "epoch": 0.3191986644407346, "grad_norm": 1.7794782507659066, "learning_rate": 9.998773961122301e-06, "loss": 0.6565, "step": 956 }, { "epoch": 0.31953255425709515, "grad_norm": 1.8062318493225624, "learning_rate": 9.99873056669185e-06, "loss": 0.6458, "step": 957 }, { "epoch": 0.31986644407345577, "grad_norm": 1.8040301721045633, "learning_rate": 9.9986864177045e-06, "loss": 0.6756, "step": 958 }, { "epoch": 0.32020033388981634, "grad_norm": 1.72480925934717, "learning_rate": 9.998641514166914e-06, "loss": 0.6119, "step": 959 }, { "epoch": 0.32053422370617696, "grad_norm": 1.7545803803730733, "learning_rate": 9.998595856085868e-06, "loss": 0.6505, "step": 960 }, { "epoch": 0.3208681135225376, "grad_norm": 1.6548137824240872, "learning_rate": 9.99854944346826e-06, "loss": 0.6156, "step": 961 }, { "epoch": 0.32120200333889815, "grad_norm": 1.7128687352515635, "learning_rate": 9.998502276321092e-06, "loss": 0.6379, "step": 962 }, { "epoch": 0.32153589315525877, "grad_norm": 1.7277017183928796, "learning_rate": 9.998454354651486e-06, "loss": 0.6392, "step": 963 }, { "epoch": 0.3218697829716194, "grad_norm": 1.7889919409031678, "learning_rate": 9.998405678466673e-06, "loss": 0.6471, "step": 964 }, { "epoch": 0.32220367278797996, "grad_norm": 1.709221527017984, "learning_rate": 9.998356247774002e-06, "loss": 0.5958, "step": 965 }, { "epoch": 0.3225375626043406, "grad_norm": 1.738796174962692, "learning_rate": 9.998306062580934e-06, "loss": 0.6108, "step": 966 }, { "epoch": 0.32287145242070114, "grad_norm": 1.7020440821323082, "learning_rate": 9.998255122895047e-06, "loss": 0.6203, "step": 967 }, { "epoch": 0.32320534223706177, "grad_norm": 1.6908921146679416, "learning_rate": 9.998203428724027e-06, "loss": 0.6084, "step": 968 }, { "epoch": 0.3235392320534224, "grad_norm": 1.8286680505636335, "learning_rate": 9.998150980075679e-06, "loss": 0.6224, "step": 969 }, { "epoch": 0.32387312186978295, "grad_norm": 1.8067089445977937, "learning_rate": 9.998097776957918e-06, "loss": 0.6249, "step": 970 }, { "epoch": 0.3242070116861436, "grad_norm": 1.9951123612800237, "learning_rate": 9.99804381937878e-06, "loss": 0.6401, "step": 971 }, { "epoch": 0.3245409015025042, "grad_norm": 1.794546376518722, "learning_rate": 9.997989107346403e-06, "loss": 0.6262, "step": 972 }, { "epoch": 0.32487479131886476, "grad_norm": 1.7742507556445282, "learning_rate": 9.997933640869051e-06, "loss": 0.6416, "step": 973 }, { "epoch": 0.3252086811352254, "grad_norm": 1.9091398274504985, "learning_rate": 9.997877419955095e-06, "loss": 0.6278, "step": 974 }, { "epoch": 0.32554257095158595, "grad_norm": 1.6863719704897926, "learning_rate": 9.997820444613023e-06, "loss": 0.6234, "step": 975 }, { "epoch": 0.3258764607679466, "grad_norm": 2.0155420854498782, "learning_rate": 9.997762714851432e-06, "loss": 0.6163, "step": 976 }, { "epoch": 0.3262103505843072, "grad_norm": 1.8025282076384284, "learning_rate": 9.997704230679037e-06, "loss": 0.6107, "step": 977 }, { "epoch": 0.32654424040066776, "grad_norm": 1.9635039258548614, "learning_rate": 9.997644992104669e-06, "loss": 0.6458, "step": 978 }, { "epoch": 0.3268781302170284, "grad_norm": 1.7793614443857284, "learning_rate": 9.99758499913727e-06, "loss": 0.6044, "step": 979 }, { "epoch": 0.327212020033389, "grad_norm": 1.874814682810183, "learning_rate": 9.997524251785891e-06, "loss": 0.6158, "step": 980 }, { "epoch": 0.3275459098497496, "grad_norm": 1.6989474227004033, "learning_rate": 9.997462750059708e-06, "loss": 0.6221, "step": 981 }, { "epoch": 0.3278797996661102, "grad_norm": 1.7769617150710733, "learning_rate": 9.997400493967999e-06, "loss": 0.6297, "step": 982 }, { "epoch": 0.32821368948247076, "grad_norm": 1.8034748001216085, "learning_rate": 9.997337483520167e-06, "loss": 0.6412, "step": 983 }, { "epoch": 0.3285475792988314, "grad_norm": 1.653436879928246, "learning_rate": 9.997273718725719e-06, "loss": 0.629, "step": 984 }, { "epoch": 0.328881469115192, "grad_norm": 1.6357815171544372, "learning_rate": 9.997209199594284e-06, "loss": 0.6074, "step": 985 }, { "epoch": 0.32921535893155257, "grad_norm": 1.7392202893616766, "learning_rate": 9.997143926135598e-06, "loss": 0.6308, "step": 986 }, { "epoch": 0.3295492487479132, "grad_norm": 1.8036019645276526, "learning_rate": 9.997077898359515e-06, "loss": 0.6441, "step": 987 }, { "epoch": 0.3298831385642738, "grad_norm": 1.8992046641092883, "learning_rate": 9.997011116276002e-06, "loss": 0.6705, "step": 988 }, { "epoch": 0.3302170283806344, "grad_norm": 1.6208535231586858, "learning_rate": 9.996943579895139e-06, "loss": 0.5764, "step": 989 }, { "epoch": 0.330550918196995, "grad_norm": 1.8073766775905342, "learning_rate": 9.996875289227124e-06, "loss": 0.5951, "step": 990 }, { "epoch": 0.33088480801335557, "grad_norm": 1.7524411928535217, "learning_rate": 9.99680624428226e-06, "loss": 0.6366, "step": 991 }, { "epoch": 0.3312186978297162, "grad_norm": 1.7987788576316264, "learning_rate": 9.996736445070974e-06, "loss": 0.6261, "step": 992 }, { "epoch": 0.3315525876460768, "grad_norm": 1.8873403710255774, "learning_rate": 9.9966658916038e-06, "loss": 0.6515, "step": 993 }, { "epoch": 0.3318864774624374, "grad_norm": 1.7271024504278574, "learning_rate": 9.996594583891388e-06, "loss": 0.6097, "step": 994 }, { "epoch": 0.332220367278798, "grad_norm": 1.8180311835085652, "learning_rate": 9.996522521944502e-06, "loss": 0.6377, "step": 995 }, { "epoch": 0.3325542570951586, "grad_norm": 1.657906723535912, "learning_rate": 9.99644970577402e-06, "loss": 0.6116, "step": 996 }, { "epoch": 0.3328881469115192, "grad_norm": 1.7136576671663715, "learning_rate": 9.996376135390935e-06, "loss": 0.5967, "step": 997 }, { "epoch": 0.3332220367278798, "grad_norm": 1.646875560542699, "learning_rate": 9.996301810806348e-06, "loss": 0.6197, "step": 998 }, { "epoch": 0.3335559265442404, "grad_norm": 1.6760660940552565, "learning_rate": 9.996226732031483e-06, "loss": 0.6126, "step": 999 }, { "epoch": 0.333889816360601, "grad_norm": 1.7806132893176343, "learning_rate": 9.996150899077673e-06, "loss": 0.6639, "step": 1000 }, { "epoch": 0.3342237061769616, "grad_norm": 1.7580517139401248, "learning_rate": 9.99607431195636e-06, "loss": 0.624, "step": 1001 }, { "epoch": 0.3345575959933222, "grad_norm": 1.6752895075022125, "learning_rate": 9.99599697067911e-06, "loss": 0.5933, "step": 1002 }, { "epoch": 0.3348914858096828, "grad_norm": 1.744149856723612, "learning_rate": 9.995918875257596e-06, "loss": 0.6346, "step": 1003 }, { "epoch": 0.33522537562604343, "grad_norm": 1.8150583829006073, "learning_rate": 9.995840025703606e-06, "loss": 0.6096, "step": 1004 }, { "epoch": 0.335559265442404, "grad_norm": 1.6618357936087633, "learning_rate": 9.995760422029042e-06, "loss": 0.606, "step": 1005 }, { "epoch": 0.3358931552587646, "grad_norm": 1.682224689876123, "learning_rate": 9.995680064245921e-06, "loss": 0.6245, "step": 1006 }, { "epoch": 0.3362270450751252, "grad_norm": 1.7014965095044552, "learning_rate": 9.995598952366372e-06, "loss": 0.6316, "step": 1007 }, { "epoch": 0.3365609348914858, "grad_norm": 1.7088123847332966, "learning_rate": 9.995517086402642e-06, "loss": 0.5938, "step": 1008 }, { "epoch": 0.33689482470784643, "grad_norm": 1.702050744569486, "learning_rate": 9.995434466367085e-06, "loss": 0.6252, "step": 1009 }, { "epoch": 0.337228714524207, "grad_norm": 1.6836554561436212, "learning_rate": 9.995351092272171e-06, "loss": 0.6, "step": 1010 }, { "epoch": 0.3375626043405676, "grad_norm": 1.8358557868913326, "learning_rate": 9.99526696413049e-06, "loss": 0.6607, "step": 1011 }, { "epoch": 0.33789649415692824, "grad_norm": 1.6202913694891108, "learning_rate": 9.99518208195474e-06, "loss": 0.6172, "step": 1012 }, { "epoch": 0.3382303839732888, "grad_norm": 1.8097737454319847, "learning_rate": 9.995096445757732e-06, "loss": 0.5947, "step": 1013 }, { "epoch": 0.33856427378964943, "grad_norm": 1.6876741120948258, "learning_rate": 9.995010055552394e-06, "loss": 0.6004, "step": 1014 }, { "epoch": 0.33889816360601, "grad_norm": 1.6698199748257292, "learning_rate": 9.994922911351766e-06, "loss": 0.6112, "step": 1015 }, { "epoch": 0.3392320534223706, "grad_norm": 1.855185215729266, "learning_rate": 9.994835013169001e-06, "loss": 0.633, "step": 1016 }, { "epoch": 0.33956594323873124, "grad_norm": 1.721749080154185, "learning_rate": 9.994746361017372e-06, "loss": 0.5923, "step": 1017 }, { "epoch": 0.3398998330550918, "grad_norm": 1.7404208007931814, "learning_rate": 9.994656954910256e-06, "loss": 0.6326, "step": 1018 }, { "epoch": 0.3402337228714524, "grad_norm": 1.6985495698229234, "learning_rate": 9.994566794861152e-06, "loss": 0.6169, "step": 1019 }, { "epoch": 0.34056761268781305, "grad_norm": 1.690857983364654, "learning_rate": 9.994475880883667e-06, "loss": 0.6379, "step": 1020 }, { "epoch": 0.3409015025041736, "grad_norm": 1.6806768793473126, "learning_rate": 9.994384212991529e-06, "loss": 0.6134, "step": 1021 }, { "epoch": 0.34123539232053424, "grad_norm": 1.6372914042207263, "learning_rate": 9.994291791198569e-06, "loss": 0.592, "step": 1022 }, { "epoch": 0.3415692821368948, "grad_norm": 1.5944285892804353, "learning_rate": 9.99419861551874e-06, "loss": 0.5838, "step": 1023 }, { "epoch": 0.3419031719532554, "grad_norm": 1.5299581229306853, "learning_rate": 9.994104685966111e-06, "loss": 0.5779, "step": 1024 }, { "epoch": 0.34223706176961605, "grad_norm": 1.7154996934892768, "learning_rate": 9.994010002554857e-06, "loss": 0.6246, "step": 1025 }, { "epoch": 0.3425709515859766, "grad_norm": 1.5813401959521847, "learning_rate": 9.99391456529927e-06, "loss": 0.5872, "step": 1026 }, { "epoch": 0.34290484140233723, "grad_norm": 1.6029203285342453, "learning_rate": 9.993818374213757e-06, "loss": 0.5973, "step": 1027 }, { "epoch": 0.34323873121869786, "grad_norm": 1.7155299830493311, "learning_rate": 9.99372142931284e-06, "loss": 0.6234, "step": 1028 }, { "epoch": 0.3435726210350584, "grad_norm": 1.6597347504112248, "learning_rate": 9.993623730611148e-06, "loss": 0.6179, "step": 1029 }, { "epoch": 0.34390651085141904, "grad_norm": 1.6177133214830135, "learning_rate": 9.993525278123434e-06, "loss": 0.6136, "step": 1030 }, { "epoch": 0.3442404006677796, "grad_norm": 1.5895799853739265, "learning_rate": 9.993426071864556e-06, "loss": 0.6176, "step": 1031 }, { "epoch": 0.34457429048414023, "grad_norm": 1.6116169102539553, "learning_rate": 9.99332611184949e-06, "loss": 0.5837, "step": 1032 }, { "epoch": 0.34490818030050086, "grad_norm": 1.6833260503166279, "learning_rate": 9.993225398093326e-06, "loss": 0.6068, "step": 1033 }, { "epoch": 0.3452420701168614, "grad_norm": 1.7215201142511796, "learning_rate": 9.993123930611268e-06, "loss": 0.6141, "step": 1034 }, { "epoch": 0.34557595993322204, "grad_norm": 1.576767743193051, "learning_rate": 9.993021709418626e-06, "loss": 0.5862, "step": 1035 }, { "epoch": 0.34590984974958267, "grad_norm": 1.6429332851440865, "learning_rate": 9.992918734530837e-06, "loss": 0.6116, "step": 1036 }, { "epoch": 0.34624373956594323, "grad_norm": 1.589036747584031, "learning_rate": 9.992815005963444e-06, "loss": 0.5778, "step": 1037 }, { "epoch": 0.34657762938230385, "grad_norm": 1.5814555608281042, "learning_rate": 9.9927105237321e-06, "loss": 0.5911, "step": 1038 }, { "epoch": 0.3469115191986644, "grad_norm": 1.6306580342956063, "learning_rate": 9.992605287852582e-06, "loss": 0.5625, "step": 1039 }, { "epoch": 0.34724540901502504, "grad_norm": 1.686041988785331, "learning_rate": 9.992499298340773e-06, "loss": 0.5831, "step": 1040 }, { "epoch": 0.34757929883138566, "grad_norm": 1.5227569510260024, "learning_rate": 9.992392555212673e-06, "loss": 0.5843, "step": 1041 }, { "epoch": 0.34791318864774623, "grad_norm": 1.814988376560954, "learning_rate": 9.992285058484393e-06, "loss": 0.5905, "step": 1042 }, { "epoch": 0.34824707846410685, "grad_norm": 1.559316129382816, "learning_rate": 9.99217680817216e-06, "loss": 0.5882, "step": 1043 }, { "epoch": 0.3485809682804674, "grad_norm": 1.6733753226763108, "learning_rate": 9.992067804292317e-06, "loss": 0.6007, "step": 1044 }, { "epoch": 0.34891485809682804, "grad_norm": 1.7052277514456786, "learning_rate": 9.991958046861317e-06, "loss": 0.624, "step": 1045 }, { "epoch": 0.34924874791318866, "grad_norm": 1.6279348376802831, "learning_rate": 9.991847535895725e-06, "loss": 0.5815, "step": 1046 }, { "epoch": 0.34958263772954923, "grad_norm": 1.7913740981255313, "learning_rate": 9.991736271412224e-06, "loss": 0.6107, "step": 1047 }, { "epoch": 0.34991652754590985, "grad_norm": 1.6118579561020174, "learning_rate": 9.991624253427613e-06, "loss": 0.6045, "step": 1048 }, { "epoch": 0.35025041736227047, "grad_norm": 1.6010019694543038, "learning_rate": 9.991511481958797e-06, "loss": 0.5963, "step": 1049 }, { "epoch": 0.35058430717863104, "grad_norm": 1.5623181029726445, "learning_rate": 9.991397957022799e-06, "loss": 0.5739, "step": 1050 }, { "epoch": 0.35091819699499166, "grad_norm": 1.6386467549829928, "learning_rate": 9.991283678636757e-06, "loss": 0.6057, "step": 1051 }, { "epoch": 0.3512520868113522, "grad_norm": 1.628880250309172, "learning_rate": 9.991168646817922e-06, "loss": 0.6171, "step": 1052 }, { "epoch": 0.35158597662771285, "grad_norm": 1.7378482074719832, "learning_rate": 9.991052861583656e-06, "loss": 0.5749, "step": 1053 }, { "epoch": 0.35191986644407347, "grad_norm": 1.6186093617031736, "learning_rate": 9.990936322951436e-06, "loss": 0.6177, "step": 1054 }, { "epoch": 0.35225375626043404, "grad_norm": 1.6068474633903131, "learning_rate": 9.990819030938857e-06, "loss": 0.599, "step": 1055 }, { "epoch": 0.35258764607679466, "grad_norm": 1.688821823829651, "learning_rate": 9.990700985563621e-06, "loss": 0.627, "step": 1056 }, { "epoch": 0.3529215358931553, "grad_norm": 1.5676336433663922, "learning_rate": 9.99058218684355e-06, "loss": 0.5833, "step": 1057 }, { "epoch": 0.35325542570951585, "grad_norm": 1.4909268956620514, "learning_rate": 9.990462634796574e-06, "loss": 0.5865, "step": 1058 }, { "epoch": 0.35358931552587647, "grad_norm": 1.673600081741139, "learning_rate": 9.990342329440739e-06, "loss": 0.6459, "step": 1059 }, { "epoch": 0.35392320534223703, "grad_norm": 1.6331502830533362, "learning_rate": 9.990221270794208e-06, "loss": 0.5998, "step": 1060 }, { "epoch": 0.35425709515859766, "grad_norm": 1.7457731150132956, "learning_rate": 9.99009945887525e-06, "loss": 0.6144, "step": 1061 }, { "epoch": 0.3545909849749583, "grad_norm": 1.5521448473426305, "learning_rate": 9.989976893702257e-06, "loss": 0.5729, "step": 1062 }, { "epoch": 0.35492487479131885, "grad_norm": 1.7007908055476741, "learning_rate": 9.989853575293731e-06, "loss": 0.5851, "step": 1063 }, { "epoch": 0.35525876460767947, "grad_norm": 1.6332558891366342, "learning_rate": 9.989729503668282e-06, "loss": 0.5905, "step": 1064 }, { "epoch": 0.3555926544240401, "grad_norm": 1.7143633025652123, "learning_rate": 9.989604678844643e-06, "loss": 0.5949, "step": 1065 }, { "epoch": 0.35592654424040066, "grad_norm": 1.6269868265110297, "learning_rate": 9.989479100841652e-06, "loss": 0.5922, "step": 1066 }, { "epoch": 0.3562604340567613, "grad_norm": 1.694525476855903, "learning_rate": 9.989352769678266e-06, "loss": 0.592, "step": 1067 }, { "epoch": 0.35659432387312184, "grad_norm": 1.4950844390090348, "learning_rate": 9.989225685373561e-06, "loss": 0.5978, "step": 1068 }, { "epoch": 0.35692821368948247, "grad_norm": 1.659878925078338, "learning_rate": 9.989097847946712e-06, "loss": 0.5772, "step": 1069 }, { "epoch": 0.3572621035058431, "grad_norm": 1.5099233592531196, "learning_rate": 9.988969257417018e-06, "loss": 0.5798, "step": 1070 }, { "epoch": 0.35759599332220365, "grad_norm": 1.613063898889283, "learning_rate": 9.988839913803894e-06, "loss": 0.608, "step": 1071 }, { "epoch": 0.3579298831385643, "grad_norm": 1.6562605347074917, "learning_rate": 9.98870981712686e-06, "loss": 0.6109, "step": 1072 }, { "epoch": 0.3582637729549249, "grad_norm": 1.5957754816591654, "learning_rate": 9.988578967405554e-06, "loss": 0.6278, "step": 1073 }, { "epoch": 0.35859766277128546, "grad_norm": 1.5616779765525908, "learning_rate": 9.988447364659731e-06, "loss": 0.592, "step": 1074 }, { "epoch": 0.3589315525876461, "grad_norm": 1.6497184003372782, "learning_rate": 9.988315008909255e-06, "loss": 0.5761, "step": 1075 }, { "epoch": 0.35926544240400665, "grad_norm": 1.6134925644744835, "learning_rate": 9.988181900174104e-06, "loss": 0.5798, "step": 1076 }, { "epoch": 0.3595993322203673, "grad_norm": 1.5758500324523943, "learning_rate": 9.98804803847437e-06, "loss": 0.6131, "step": 1077 }, { "epoch": 0.3599332220367279, "grad_norm": 1.6748490097578295, "learning_rate": 9.987913423830262e-06, "loss": 0.6013, "step": 1078 }, { "epoch": 0.36026711185308846, "grad_norm": 1.5077390123080048, "learning_rate": 9.987778056262098e-06, "loss": 0.5818, "step": 1079 }, { "epoch": 0.3606010016694491, "grad_norm": 1.5384709170624595, "learning_rate": 9.987641935790314e-06, "loss": 0.5817, "step": 1080 }, { "epoch": 0.3609348914858097, "grad_norm": 1.5970761954704795, "learning_rate": 9.987505062435455e-06, "loss": 0.6268, "step": 1081 }, { "epoch": 0.36126878130217027, "grad_norm": 1.6539657519280686, "learning_rate": 9.987367436218183e-06, "loss": 0.6173, "step": 1082 }, { "epoch": 0.3616026711185309, "grad_norm": 1.539054136714416, "learning_rate": 9.987229057159272e-06, "loss": 0.604, "step": 1083 }, { "epoch": 0.36193656093489146, "grad_norm": 1.5085168316673134, "learning_rate": 9.987089925279611e-06, "loss": 0.56, "step": 1084 }, { "epoch": 0.3622704507512521, "grad_norm": 1.6531610735049247, "learning_rate": 9.986950040600201e-06, "loss": 0.6046, "step": 1085 }, { "epoch": 0.3626043405676127, "grad_norm": 1.5569851187752706, "learning_rate": 9.986809403142159e-06, "loss": 0.5899, "step": 1086 }, { "epoch": 0.36293823038397327, "grad_norm": 1.5494792180796852, "learning_rate": 9.986668012926715e-06, "loss": 0.596, "step": 1087 }, { "epoch": 0.3632721202003339, "grad_norm": 1.5074758828554813, "learning_rate": 9.986525869975207e-06, "loss": 0.5667, "step": 1088 }, { "epoch": 0.3636060100166945, "grad_norm": 1.6564590939383022, "learning_rate": 9.986382974309096e-06, "loss": 0.5941, "step": 1089 }, { "epoch": 0.3639398998330551, "grad_norm": 1.5235938046292024, "learning_rate": 9.986239325949951e-06, "loss": 0.5789, "step": 1090 }, { "epoch": 0.3642737896494157, "grad_norm": 1.5628117135990383, "learning_rate": 9.986094924919454e-06, "loss": 0.595, "step": 1091 }, { "epoch": 0.36460767946577627, "grad_norm": 1.5206529113988383, "learning_rate": 9.985949771239405e-06, "loss": 0.5872, "step": 1092 }, { "epoch": 0.3649415692821369, "grad_norm": 1.536494208790411, "learning_rate": 9.985803864931714e-06, "loss": 0.6024, "step": 1093 }, { "epoch": 0.3652754590984975, "grad_norm": 1.5291922264347129, "learning_rate": 9.985657206018403e-06, "loss": 0.5787, "step": 1094 }, { "epoch": 0.3656093489148581, "grad_norm": 1.5334501134163319, "learning_rate": 9.985509794521613e-06, "loss": 0.591, "step": 1095 }, { "epoch": 0.3659432387312187, "grad_norm": 1.505012023157528, "learning_rate": 9.985361630463596e-06, "loss": 0.588, "step": 1096 }, { "epoch": 0.3662771285475793, "grad_norm": 1.5059693319972962, "learning_rate": 9.985212713866717e-06, "loss": 0.5753, "step": 1097 }, { "epoch": 0.3666110183639399, "grad_norm": 1.5003113408647641, "learning_rate": 9.98506304475345e-06, "loss": 0.5644, "step": 1098 }, { "epoch": 0.3669449081803005, "grad_norm": 1.4903710475424676, "learning_rate": 9.984912623146397e-06, "loss": 0.5772, "step": 1099 }, { "epoch": 0.3672787979966611, "grad_norm": 1.5112133492904025, "learning_rate": 9.984761449068254e-06, "loss": 0.569, "step": 1100 }, { "epoch": 0.3676126878130217, "grad_norm": 1.5589497122944402, "learning_rate": 9.984609522541848e-06, "loss": 0.6066, "step": 1101 }, { "epoch": 0.3679465776293823, "grad_norm": 1.593501473526072, "learning_rate": 9.98445684359011e-06, "loss": 0.6118, "step": 1102 }, { "epoch": 0.3682804674457429, "grad_norm": 1.5214293920672484, "learning_rate": 9.984303412236085e-06, "loss": 0.6061, "step": 1103 }, { "epoch": 0.3686143572621035, "grad_norm": 1.6473531269054054, "learning_rate": 9.984149228502937e-06, "loss": 0.5643, "step": 1104 }, { "epoch": 0.36894824707846413, "grad_norm": 1.5083702957764127, "learning_rate": 9.983994292413937e-06, "loss": 0.5734, "step": 1105 }, { "epoch": 0.3692821368948247, "grad_norm": 1.5573677649681914, "learning_rate": 9.983838603992472e-06, "loss": 0.5858, "step": 1106 }, { "epoch": 0.3696160267111853, "grad_norm": 1.5533279413865004, "learning_rate": 9.983682163262044e-06, "loss": 0.5604, "step": 1107 }, { "epoch": 0.3699499165275459, "grad_norm": 1.4484149716796224, "learning_rate": 9.983524970246272e-06, "loss": 0.5716, "step": 1108 }, { "epoch": 0.3702838063439065, "grad_norm": 1.5062802081622604, "learning_rate": 9.98336702496888e-06, "loss": 0.5606, "step": 1109 }, { "epoch": 0.37061769616026713, "grad_norm": 1.5302637402601125, "learning_rate": 9.983208327453708e-06, "loss": 0.602, "step": 1110 }, { "epoch": 0.3709515859766277, "grad_norm": 1.4985679720342033, "learning_rate": 9.983048877724716e-06, "loss": 0.5737, "step": 1111 }, { "epoch": 0.3712854757929883, "grad_norm": 1.5685346316927862, "learning_rate": 9.982888675805967e-06, "loss": 0.5666, "step": 1112 }, { "epoch": 0.37161936560934894, "grad_norm": 1.5322188433056128, "learning_rate": 9.98272772172165e-06, "loss": 0.5886, "step": 1113 }, { "epoch": 0.3719532554257095, "grad_norm": 1.5975125916569402, "learning_rate": 9.982566015496056e-06, "loss": 0.5896, "step": 1114 }, { "epoch": 0.3722871452420701, "grad_norm": 1.4425773301699651, "learning_rate": 9.982403557153598e-06, "loss": 0.555, "step": 1115 }, { "epoch": 0.3726210350584307, "grad_norm": 1.4950265541722587, "learning_rate": 9.982240346718797e-06, "loss": 0.5496, "step": 1116 }, { "epoch": 0.3729549248747913, "grad_norm": 1.4452010518364269, "learning_rate": 9.982076384216291e-06, "loss": 0.6006, "step": 1117 }, { "epoch": 0.37328881469115194, "grad_norm": 1.4907082281310742, "learning_rate": 9.981911669670828e-06, "loss": 0.5711, "step": 1118 }, { "epoch": 0.3736227045075125, "grad_norm": 1.509847650623372, "learning_rate": 9.981746203107273e-06, "loss": 0.5932, "step": 1119 }, { "epoch": 0.3739565943238731, "grad_norm": 1.5075178127920215, "learning_rate": 9.981579984550603e-06, "loss": 0.5893, "step": 1120 }, { "epoch": 0.37429048414023375, "grad_norm": 1.5557411456130914, "learning_rate": 9.98141301402591e-06, "loss": 0.5683, "step": 1121 }, { "epoch": 0.3746243739565943, "grad_norm": 1.502415409026078, "learning_rate": 9.981245291558395e-06, "loss": 0.5537, "step": 1122 }, { "epoch": 0.37495826377295494, "grad_norm": 1.4886117773687997, "learning_rate": 9.981076817173377e-06, "loss": 0.583, "step": 1123 }, { "epoch": 0.3752921535893155, "grad_norm": 1.4537778734674567, "learning_rate": 9.980907590896288e-06, "loss": 0.5732, "step": 1124 }, { "epoch": 0.3756260434056761, "grad_norm": 1.4727619259520377, "learning_rate": 9.980737612752674e-06, "loss": 0.5824, "step": 1125 }, { "epoch": 0.37595993322203675, "grad_norm": 1.537165449912223, "learning_rate": 9.980566882768187e-06, "loss": 0.5804, "step": 1126 }, { "epoch": 0.3762938230383973, "grad_norm": 1.5012399748787446, "learning_rate": 9.980395400968607e-06, "loss": 0.5769, "step": 1127 }, { "epoch": 0.37662771285475793, "grad_norm": 1.4460883694195508, "learning_rate": 9.980223167379815e-06, "loss": 0.5617, "step": 1128 }, { "epoch": 0.37696160267111856, "grad_norm": 1.4786419323872086, "learning_rate": 9.980050182027807e-06, "loss": 0.5918, "step": 1129 }, { "epoch": 0.3772954924874791, "grad_norm": 1.484048268645675, "learning_rate": 9.979876444938701e-06, "loss": 0.5874, "step": 1130 }, { "epoch": 0.37762938230383974, "grad_norm": 1.485817139050113, "learning_rate": 9.979701956138717e-06, "loss": 0.5649, "step": 1131 }, { "epoch": 0.3779632721202003, "grad_norm": 1.3816956231395197, "learning_rate": 9.979526715654198e-06, "loss": 0.5707, "step": 1132 }, { "epoch": 0.37829716193656093, "grad_norm": 1.430021348835952, "learning_rate": 9.979350723511594e-06, "loss": 0.568, "step": 1133 }, { "epoch": 0.37863105175292155, "grad_norm": 1.4395297562235612, "learning_rate": 9.97917397973747e-06, "loss": 0.583, "step": 1134 }, { "epoch": 0.3789649415692821, "grad_norm": 1.4483810847065717, "learning_rate": 9.97899648435851e-06, "loss": 0.5777, "step": 1135 }, { "epoch": 0.37929883138564274, "grad_norm": 1.464437490925817, "learning_rate": 9.978818237401505e-06, "loss": 0.5731, "step": 1136 }, { "epoch": 0.37963272120200336, "grad_norm": 1.4233240474655162, "learning_rate": 9.978639238893357e-06, "loss": 0.5756, "step": 1137 }, { "epoch": 0.37996661101836393, "grad_norm": 1.4551866374182059, "learning_rate": 9.97845948886109e-06, "loss": 0.5732, "step": 1138 }, { "epoch": 0.38030050083472455, "grad_norm": 1.4849061546804325, "learning_rate": 9.978278987331838e-06, "loss": 0.5842, "step": 1139 }, { "epoch": 0.3806343906510851, "grad_norm": 1.4312832374138802, "learning_rate": 9.978097734332846e-06, "loss": 0.5632, "step": 1140 }, { "epoch": 0.38096828046744574, "grad_norm": 1.4626487717096923, "learning_rate": 9.977915729891475e-06, "loss": 0.5712, "step": 1141 }, { "epoch": 0.38130217028380636, "grad_norm": 1.5405371105392351, "learning_rate": 9.977732974035194e-06, "loss": 0.5636, "step": 1142 }, { "epoch": 0.38163606010016693, "grad_norm": 1.4510965112663556, "learning_rate": 9.977549466791597e-06, "loss": 0.5635, "step": 1143 }, { "epoch": 0.38196994991652755, "grad_norm": 1.4582212292379413, "learning_rate": 9.97736520818838e-06, "loss": 0.5591, "step": 1144 }, { "epoch": 0.3823038397328882, "grad_norm": 1.530086772967078, "learning_rate": 9.977180198253357e-06, "loss": 0.5792, "step": 1145 }, { "epoch": 0.38263772954924874, "grad_norm": 1.4375187693860085, "learning_rate": 9.976994437014458e-06, "loss": 0.581, "step": 1146 }, { "epoch": 0.38297161936560936, "grad_norm": 1.423803878133664, "learning_rate": 9.976807924499721e-06, "loss": 0.5619, "step": 1147 }, { "epoch": 0.38330550918196993, "grad_norm": 1.5160871934445457, "learning_rate": 9.9766206607373e-06, "loss": 0.5757, "step": 1148 }, { "epoch": 0.38363939899833055, "grad_norm": 1.42978634900331, "learning_rate": 9.976432645755463e-06, "loss": 0.5662, "step": 1149 }, { "epoch": 0.38397328881469117, "grad_norm": 1.4823518607635942, "learning_rate": 9.976243879582593e-06, "loss": 0.592, "step": 1150 }, { "epoch": 0.38430717863105174, "grad_norm": 1.4146825351368182, "learning_rate": 9.976054362247179e-06, "loss": 0.5276, "step": 1151 }, { "epoch": 0.38464106844741236, "grad_norm": 1.4742519553174147, "learning_rate": 9.975864093777832e-06, "loss": 0.5803, "step": 1152 }, { "epoch": 0.384974958263773, "grad_norm": 1.486986484986515, "learning_rate": 9.975673074203274e-06, "loss": 0.5865, "step": 1153 }, { "epoch": 0.38530884808013355, "grad_norm": 1.554020602301936, "learning_rate": 9.975481303552337e-06, "loss": 0.5822, "step": 1154 }, { "epoch": 0.38564273789649417, "grad_norm": 1.499996201137536, "learning_rate": 9.975288781853968e-06, "loss": 0.6015, "step": 1155 }, { "epoch": 0.38597662771285474, "grad_norm": 1.4077293798258388, "learning_rate": 9.975095509137234e-06, "loss": 0.5601, "step": 1156 }, { "epoch": 0.38631051752921536, "grad_norm": 1.41699448528267, "learning_rate": 9.9749014854313e-06, "loss": 0.5684, "step": 1157 }, { "epoch": 0.386644407345576, "grad_norm": 1.5103768120754244, "learning_rate": 9.974706710765462e-06, "loss": 0.5794, "step": 1158 }, { "epoch": 0.38697829716193655, "grad_norm": 1.501919917178444, "learning_rate": 9.97451118516912e-06, "loss": 0.5595, "step": 1159 }, { "epoch": 0.38731218697829717, "grad_norm": 1.4893668674325702, "learning_rate": 9.974314908671784e-06, "loss": 0.5947, "step": 1160 }, { "epoch": 0.3876460767946578, "grad_norm": 2.1417855199485545, "learning_rate": 9.974117881303085e-06, "loss": 0.5627, "step": 1161 }, { "epoch": 0.38797996661101836, "grad_norm": 1.500670744947681, "learning_rate": 9.973920103092764e-06, "loss": 0.5572, "step": 1162 }, { "epoch": 0.388313856427379, "grad_norm": 1.4635913827177858, "learning_rate": 9.973721574070676e-06, "loss": 0.586, "step": 1163 }, { "epoch": 0.38864774624373954, "grad_norm": 1.5330394789406947, "learning_rate": 9.973522294266788e-06, "loss": 0.5703, "step": 1164 }, { "epoch": 0.38898163606010017, "grad_norm": 1.3998013009884982, "learning_rate": 9.973322263711182e-06, "loss": 0.5617, "step": 1165 }, { "epoch": 0.3893155258764608, "grad_norm": 1.4876555195757466, "learning_rate": 9.973121482434051e-06, "loss": 0.5835, "step": 1166 }, { "epoch": 0.38964941569282135, "grad_norm": 1.4917660227315859, "learning_rate": 9.972919950465705e-06, "loss": 0.5751, "step": 1167 }, { "epoch": 0.389983305509182, "grad_norm": 1.7576696405349175, "learning_rate": 9.972717667836563e-06, "loss": 0.5733, "step": 1168 }, { "epoch": 0.3903171953255426, "grad_norm": 1.5108391075271557, "learning_rate": 9.972514634577162e-06, "loss": 0.5869, "step": 1169 }, { "epoch": 0.39065108514190316, "grad_norm": 1.5192051759700325, "learning_rate": 9.972310850718147e-06, "loss": 0.5806, "step": 1170 }, { "epoch": 0.3909849749582638, "grad_norm": 1.4833451479256636, "learning_rate": 9.97210631629028e-06, "loss": 0.5804, "step": 1171 }, { "epoch": 0.39131886477462435, "grad_norm": 1.5606840598103833, "learning_rate": 9.971901031324438e-06, "loss": 0.5914, "step": 1172 }, { "epoch": 0.391652754590985, "grad_norm": 1.4077251840595897, "learning_rate": 9.971694995851606e-06, "loss": 0.5334, "step": 1173 }, { "epoch": 0.3919866444073456, "grad_norm": 1.4652009927590186, "learning_rate": 9.971488209902886e-06, "loss": 0.5577, "step": 1174 }, { "epoch": 0.39232053422370616, "grad_norm": 1.5240652270722692, "learning_rate": 9.97128067350949e-06, "loss": 0.5496, "step": 1175 }, { "epoch": 0.3926544240400668, "grad_norm": 1.4872268199735068, "learning_rate": 9.97107238670275e-06, "loss": 0.5639, "step": 1176 }, { "epoch": 0.39298831385642735, "grad_norm": 1.5483486963637298, "learning_rate": 9.970863349514104e-06, "loss": 0.5957, "step": 1177 }, { "epoch": 0.393322203672788, "grad_norm": 1.3701890150801037, "learning_rate": 9.970653561975106e-06, "loss": 0.545, "step": 1178 }, { "epoch": 0.3936560934891486, "grad_norm": 1.4094778600500992, "learning_rate": 9.970443024117423e-06, "loss": 0.561, "step": 1179 }, { "epoch": 0.39398998330550916, "grad_norm": 1.5057485919261613, "learning_rate": 9.970231735972838e-06, "loss": 0.5729, "step": 1180 }, { "epoch": 0.3943238731218698, "grad_norm": 1.398859446874459, "learning_rate": 9.970019697573241e-06, "loss": 0.5612, "step": 1181 }, { "epoch": 0.3946577629382304, "grad_norm": 1.4379496763451567, "learning_rate": 9.969806908950643e-06, "loss": 0.5704, "step": 1182 }, { "epoch": 0.39499165275459097, "grad_norm": 1.4221745880600378, "learning_rate": 9.969593370137163e-06, "loss": 0.545, "step": 1183 }, { "epoch": 0.3953255425709516, "grad_norm": 1.480771059163126, "learning_rate": 9.969379081165034e-06, "loss": 0.583, "step": 1184 }, { "epoch": 0.39565943238731216, "grad_norm": 1.5117583295037724, "learning_rate": 9.969164042066603e-06, "loss": 0.5982, "step": 1185 }, { "epoch": 0.3959933222036728, "grad_norm": 1.448033096006544, "learning_rate": 9.96894825287433e-06, "loss": 0.5449, "step": 1186 }, { "epoch": 0.3963272120200334, "grad_norm": 1.3903888982711334, "learning_rate": 9.96873171362079e-06, "loss": 0.548, "step": 1187 }, { "epoch": 0.39666110183639397, "grad_norm": 1.400843178477807, "learning_rate": 9.96851442433867e-06, "loss": 0.564, "step": 1188 }, { "epoch": 0.3969949916527546, "grad_norm": 1.4428085681748135, "learning_rate": 9.968296385060764e-06, "loss": 0.5744, "step": 1189 }, { "epoch": 0.3973288814691152, "grad_norm": 1.4013817488817222, "learning_rate": 9.968077595819991e-06, "loss": 0.5421, "step": 1190 }, { "epoch": 0.3976627712854758, "grad_norm": 1.4582099518322407, "learning_rate": 9.967858056649375e-06, "loss": 0.592, "step": 1191 }, { "epoch": 0.3979966611018364, "grad_norm": 1.416493434592038, "learning_rate": 9.967637767582055e-06, "loss": 0.5579, "step": 1192 }, { "epoch": 0.39833055091819697, "grad_norm": 1.353831365910856, "learning_rate": 9.967416728651285e-06, "loss": 0.545, "step": 1193 }, { "epoch": 0.3986644407345576, "grad_norm": 1.3666964890840405, "learning_rate": 9.967194939890429e-06, "loss": 0.5832, "step": 1194 }, { "epoch": 0.3989983305509182, "grad_norm": 1.3279981855680956, "learning_rate": 9.966972401332967e-06, "loss": 0.5273, "step": 1195 }, { "epoch": 0.3993322203672788, "grad_norm": 1.470013945905513, "learning_rate": 9.96674911301249e-06, "loss": 0.5651, "step": 1196 }, { "epoch": 0.3996661101836394, "grad_norm": 1.3919176953179497, "learning_rate": 9.966525074962706e-06, "loss": 0.5744, "step": 1197 }, { "epoch": 0.4, "grad_norm": 1.395582133973002, "learning_rate": 9.966300287217432e-06, "loss": 0.5525, "step": 1198 }, { "epoch": 0.4003338898163606, "grad_norm": 1.376666171473179, "learning_rate": 9.966074749810599e-06, "loss": 0.544, "step": 1199 }, { "epoch": 0.4006677796327212, "grad_norm": 1.4124947585427752, "learning_rate": 9.96584846277625e-06, "loss": 0.551, "step": 1200 }, { "epoch": 0.4010016694490818, "grad_norm": 1.4256698730491273, "learning_rate": 9.965621426148546e-06, "loss": 0.5814, "step": 1201 }, { "epoch": 0.4013355592654424, "grad_norm": 1.518894660608799, "learning_rate": 9.965393639961759e-06, "loss": 0.5457, "step": 1202 }, { "epoch": 0.401669449081803, "grad_norm": 1.4403467680104618, "learning_rate": 9.965165104250269e-06, "loss": 0.5532, "step": 1203 }, { "epoch": 0.4020033388981636, "grad_norm": 1.359480857451518, "learning_rate": 9.964935819048579e-06, "loss": 0.5731, "step": 1204 }, { "epoch": 0.4023372287145242, "grad_norm": 1.4355351642276863, "learning_rate": 9.964705784391295e-06, "loss": 0.5406, "step": 1205 }, { "epoch": 0.40267111853088483, "grad_norm": 1.3990573185875232, "learning_rate": 9.96447500031314e-06, "loss": 0.557, "step": 1206 }, { "epoch": 0.4030050083472454, "grad_norm": 1.4484379728208825, "learning_rate": 9.964243466848956e-06, "loss": 0.5655, "step": 1207 }, { "epoch": 0.403338898163606, "grad_norm": 1.3929203931743022, "learning_rate": 9.964011184033688e-06, "loss": 0.5568, "step": 1208 }, { "epoch": 0.4036727879799666, "grad_norm": 1.4317144149484498, "learning_rate": 9.963778151902402e-06, "loss": 0.5719, "step": 1209 }, { "epoch": 0.4040066777963272, "grad_norm": 1.320585181831383, "learning_rate": 9.96354437049027e-06, "loss": 0.5441, "step": 1210 }, { "epoch": 0.40434056761268783, "grad_norm": 1.4542550361522186, "learning_rate": 9.963309839832587e-06, "loss": 0.5531, "step": 1211 }, { "epoch": 0.4046744574290484, "grad_norm": 1.4481298879104787, "learning_rate": 9.963074559964752e-06, "loss": 0.5725, "step": 1212 }, { "epoch": 0.405008347245409, "grad_norm": 1.4249805993109648, "learning_rate": 9.96283853092228e-06, "loss": 0.5563, "step": 1213 }, { "epoch": 0.40534223706176964, "grad_norm": 1.4081510284071699, "learning_rate": 9.962601752740802e-06, "loss": 0.5661, "step": 1214 }, { "epoch": 0.4056761268781302, "grad_norm": 1.3845241157389, "learning_rate": 9.962364225456057e-06, "loss": 0.5505, "step": 1215 }, { "epoch": 0.4060100166944908, "grad_norm": 1.4185448893690438, "learning_rate": 9.9621259491039e-06, "loss": 0.5738, "step": 1216 }, { "epoch": 0.4063439065108514, "grad_norm": 1.4093525345919722, "learning_rate": 9.9618869237203e-06, "loss": 0.5769, "step": 1217 }, { "epoch": 0.406677796327212, "grad_norm": 1.3237057724271735, "learning_rate": 9.961647149341338e-06, "loss": 0.5663, "step": 1218 }, { "epoch": 0.40701168614357264, "grad_norm": 1.3119044570861862, "learning_rate": 9.961406626003207e-06, "loss": 0.5731, "step": 1219 }, { "epoch": 0.4073455759599332, "grad_norm": 1.3112412076296762, "learning_rate": 9.961165353742214e-06, "loss": 0.5422, "step": 1220 }, { "epoch": 0.4076794657762938, "grad_norm": 1.352798147898093, "learning_rate": 9.960923332594779e-06, "loss": 0.5618, "step": 1221 }, { "epoch": 0.40801335559265445, "grad_norm": 1.3617283940468667, "learning_rate": 9.960680562597435e-06, "loss": 0.5688, "step": 1222 }, { "epoch": 0.408347245409015, "grad_norm": 1.371124612583457, "learning_rate": 9.960437043786828e-06, "loss": 0.5703, "step": 1223 }, { "epoch": 0.40868113522537564, "grad_norm": 1.341445420954613, "learning_rate": 9.960192776199717e-06, "loss": 0.5379, "step": 1224 }, { "epoch": 0.4090150250417362, "grad_norm": 1.4238512193111226, "learning_rate": 9.959947759872974e-06, "loss": 0.5419, "step": 1225 }, { "epoch": 0.4093489148580968, "grad_norm": 1.4028205285147093, "learning_rate": 9.959701994843585e-06, "loss": 0.5479, "step": 1226 }, { "epoch": 0.40968280467445745, "grad_norm": 1.2938830576694882, "learning_rate": 9.959455481148648e-06, "loss": 0.5353, "step": 1227 }, { "epoch": 0.410016694490818, "grad_norm": 1.4166585193651862, "learning_rate": 9.959208218825373e-06, "loss": 0.5885, "step": 1228 }, { "epoch": 0.41035058430717863, "grad_norm": 1.391194103268811, "learning_rate": 9.958960207911086e-06, "loss": 0.5606, "step": 1229 }, { "epoch": 0.41068447412353926, "grad_norm": 1.405636186564293, "learning_rate": 9.958711448443222e-06, "loss": 0.565, "step": 1230 }, { "epoch": 0.4110183639398998, "grad_norm": 1.364683430452903, "learning_rate": 9.958461940459332e-06, "loss": 0.5402, "step": 1231 }, { "epoch": 0.41135225375626044, "grad_norm": 1.3519105656650772, "learning_rate": 9.958211683997081e-06, "loss": 0.5509, "step": 1232 }, { "epoch": 0.411686143572621, "grad_norm": 1.3928118879341855, "learning_rate": 9.957960679094243e-06, "loss": 0.5821, "step": 1233 }, { "epoch": 0.41202003338898163, "grad_norm": 1.3494828343489949, "learning_rate": 9.957708925788707e-06, "loss": 0.5446, "step": 1234 }, { "epoch": 0.41235392320534225, "grad_norm": 1.3753638968016224, "learning_rate": 9.957456424118477e-06, "loss": 0.5629, "step": 1235 }, { "epoch": 0.4126878130217028, "grad_norm": 1.397366260216176, "learning_rate": 9.957203174121666e-06, "loss": 0.5819, "step": 1236 }, { "epoch": 0.41302170283806344, "grad_norm": 1.374206344139325, "learning_rate": 9.956949175836503e-06, "loss": 0.5684, "step": 1237 }, { "epoch": 0.41335559265442406, "grad_norm": 1.3953250627295657, "learning_rate": 9.95669442930133e-06, "loss": 0.5588, "step": 1238 }, { "epoch": 0.41368948247078463, "grad_norm": 1.4036059674761832, "learning_rate": 9.956438934554597e-06, "loss": 0.5451, "step": 1239 }, { "epoch": 0.41402337228714525, "grad_norm": 1.3214444740075273, "learning_rate": 9.956182691634877e-06, "loss": 0.554, "step": 1240 }, { "epoch": 0.4143572621035058, "grad_norm": 1.3471255083214253, "learning_rate": 9.955925700580845e-06, "loss": 0.5736, "step": 1241 }, { "epoch": 0.41469115191986644, "grad_norm": 1.3453848153455623, "learning_rate": 9.955667961431294e-06, "loss": 0.5622, "step": 1242 }, { "epoch": 0.41502504173622706, "grad_norm": 1.389742972900291, "learning_rate": 9.955409474225134e-06, "loss": 0.562, "step": 1243 }, { "epoch": 0.41535893155258763, "grad_norm": 1.3911805409805942, "learning_rate": 9.955150239001377e-06, "loss": 0.5734, "step": 1244 }, { "epoch": 0.41569282136894825, "grad_norm": 1.308949310505028, "learning_rate": 9.954890255799157e-06, "loss": 0.5459, "step": 1245 }, { "epoch": 0.4160267111853089, "grad_norm": 1.324291273996052, "learning_rate": 9.954629524657722e-06, "loss": 0.5334, "step": 1246 }, { "epoch": 0.41636060100166944, "grad_norm": 1.310390248231302, "learning_rate": 9.954368045616426e-06, "loss": 0.5343, "step": 1247 }, { "epoch": 0.41669449081803006, "grad_norm": 1.2685787377844657, "learning_rate": 9.954105818714739e-06, "loss": 0.5252, "step": 1248 }, { "epoch": 0.4170283806343906, "grad_norm": 1.4349750243449728, "learning_rate": 9.953842843992246e-06, "loss": 0.5308, "step": 1249 }, { "epoch": 0.41736227045075125, "grad_norm": 1.2965213719404518, "learning_rate": 9.95357912148864e-06, "loss": 0.5194, "step": 1250 }, { "epoch": 0.41769616026711187, "grad_norm": 1.4217947754866225, "learning_rate": 9.953314651243733e-06, "loss": 0.5654, "step": 1251 }, { "epoch": 0.41803005008347244, "grad_norm": 1.3346464841949088, "learning_rate": 9.953049433297444e-06, "loss": 0.5693, "step": 1252 }, { "epoch": 0.41836393989983306, "grad_norm": 1.3477447825622788, "learning_rate": 9.95278346768981e-06, "loss": 0.5665, "step": 1253 }, { "epoch": 0.4186978297161937, "grad_norm": 1.3286974024450438, "learning_rate": 9.952516754460976e-06, "loss": 0.5437, "step": 1254 }, { "epoch": 0.41903171953255425, "grad_norm": 1.296877798640672, "learning_rate": 9.952249293651203e-06, "loss": 0.5394, "step": 1255 }, { "epoch": 0.41936560934891487, "grad_norm": 1.2818666986107938, "learning_rate": 9.951981085300867e-06, "loss": 0.537, "step": 1256 }, { "epoch": 0.41969949916527544, "grad_norm": 1.5090293836909978, "learning_rate": 9.95171212945045e-06, "loss": 0.5537, "step": 1257 }, { "epoch": 0.42003338898163606, "grad_norm": 1.3744958483957699, "learning_rate": 9.951442426140554e-06, "loss": 0.5558, "step": 1258 }, { "epoch": 0.4203672787979967, "grad_norm": 1.389087976920171, "learning_rate": 9.951171975411888e-06, "loss": 0.5658, "step": 1259 }, { "epoch": 0.42070116861435725, "grad_norm": 1.3343805875688786, "learning_rate": 9.950900777305276e-06, "loss": 0.5445, "step": 1260 }, { "epoch": 0.42103505843071787, "grad_norm": 1.3093239310610583, "learning_rate": 9.95062883186166e-06, "loss": 0.5448, "step": 1261 }, { "epoch": 0.4213689482470785, "grad_norm": 1.3995278259248891, "learning_rate": 9.950356139122085e-06, "loss": 0.5651, "step": 1262 }, { "epoch": 0.42170283806343906, "grad_norm": 1.3703728665350965, "learning_rate": 9.950082699127717e-06, "loss": 0.5546, "step": 1263 }, { "epoch": 0.4220367278797997, "grad_norm": 1.3257859338573097, "learning_rate": 9.949808511919828e-06, "loss": 0.5534, "step": 1264 }, { "epoch": 0.42237061769616024, "grad_norm": 1.3166960964353571, "learning_rate": 9.949533577539812e-06, "loss": 0.5429, "step": 1265 }, { "epoch": 0.42270450751252087, "grad_norm": 1.3125453210188336, "learning_rate": 9.949257896029165e-06, "loss": 0.5237, "step": 1266 }, { "epoch": 0.4230383973288815, "grad_norm": 1.301909062150871, "learning_rate": 9.948981467429503e-06, "loss": 0.525, "step": 1267 }, { "epoch": 0.42337228714524205, "grad_norm": 1.3440281765418363, "learning_rate": 9.948704291782555e-06, "loss": 0.5595, "step": 1268 }, { "epoch": 0.4237061769616027, "grad_norm": 1.307365652098423, "learning_rate": 9.948426369130157e-06, "loss": 0.5341, "step": 1269 }, { "epoch": 0.4240400667779633, "grad_norm": 1.287049116655689, "learning_rate": 9.948147699514264e-06, "loss": 0.5404, "step": 1270 }, { "epoch": 0.42437395659432386, "grad_norm": 1.3412141437330223, "learning_rate": 9.94786828297694e-06, "loss": 0.5599, "step": 1271 }, { "epoch": 0.4247078464106845, "grad_norm": 1.3203370232175844, "learning_rate": 9.947588119560362e-06, "loss": 0.5586, "step": 1272 }, { "epoch": 0.42504173622704505, "grad_norm": 1.3391562895839262, "learning_rate": 9.947307209306823e-06, "loss": 0.541, "step": 1273 }, { "epoch": 0.4253756260434057, "grad_norm": 1.3094299614797802, "learning_rate": 9.947025552258724e-06, "loss": 0.5619, "step": 1274 }, { "epoch": 0.4257095158597663, "grad_norm": 1.3118921824256675, "learning_rate": 9.946743148458582e-06, "loss": 0.5284, "step": 1275 }, { "epoch": 0.42604340567612686, "grad_norm": 1.3284879016148472, "learning_rate": 9.946459997949026e-06, "loss": 0.5512, "step": 1276 }, { "epoch": 0.4263772954924875, "grad_norm": 1.3247893724209194, "learning_rate": 9.946176100772796e-06, "loss": 0.5349, "step": 1277 }, { "epoch": 0.4267111853088481, "grad_norm": 1.3330700755308236, "learning_rate": 9.945891456972748e-06, "loss": 0.5375, "step": 1278 }, { "epoch": 0.4270450751252087, "grad_norm": 1.297617946593221, "learning_rate": 9.945606066591848e-06, "loss": 0.5074, "step": 1279 }, { "epoch": 0.4273789649415693, "grad_norm": 1.2943311082644633, "learning_rate": 9.945319929673176e-06, "loss": 0.5357, "step": 1280 }, { "epoch": 0.42771285475792986, "grad_norm": 1.3114362602133505, "learning_rate": 9.945033046259924e-06, "loss": 0.5418, "step": 1281 }, { "epoch": 0.4280467445742905, "grad_norm": 1.2977184998052098, "learning_rate": 9.944745416395398e-06, "loss": 0.5599, "step": 1282 }, { "epoch": 0.4283806343906511, "grad_norm": 1.3410103671171056, "learning_rate": 9.944457040123014e-06, "loss": 0.548, "step": 1283 }, { "epoch": 0.42871452420701167, "grad_norm": 1.3151457607170296, "learning_rate": 9.944167917486304e-06, "loss": 0.5439, "step": 1284 }, { "epoch": 0.4290484140233723, "grad_norm": 1.3759493434694905, "learning_rate": 9.94387804852891e-06, "loss": 0.5923, "step": 1285 }, { "epoch": 0.4293823038397329, "grad_norm": 1.2657510800988852, "learning_rate": 9.943587433294587e-06, "loss": 0.5345, "step": 1286 }, { "epoch": 0.4297161936560935, "grad_norm": 1.3064208968253999, "learning_rate": 9.943296071827202e-06, "loss": 0.5545, "step": 1287 }, { "epoch": 0.4300500834724541, "grad_norm": 1.3718207017436912, "learning_rate": 9.94300396417074e-06, "loss": 0.5473, "step": 1288 }, { "epoch": 0.43038397328881467, "grad_norm": 1.4099785469534314, "learning_rate": 9.942711110369292e-06, "loss": 0.5793, "step": 1289 }, { "epoch": 0.4307178631051753, "grad_norm": 1.355939661662366, "learning_rate": 9.942417510467063e-06, "loss": 0.5517, "step": 1290 }, { "epoch": 0.4310517529215359, "grad_norm": 1.4004244703099775, "learning_rate": 9.942123164508376e-06, "loss": 0.5688, "step": 1291 }, { "epoch": 0.4313856427378965, "grad_norm": 1.3793718472094263, "learning_rate": 9.941828072537659e-06, "loss": 0.5525, "step": 1292 }, { "epoch": 0.4317195325542571, "grad_norm": 1.3678231893080135, "learning_rate": 9.941532234599457e-06, "loss": 0.5852, "step": 1293 }, { "epoch": 0.4320534223706177, "grad_norm": 1.2812904763639341, "learning_rate": 9.941235650738425e-06, "loss": 0.5329, "step": 1294 }, { "epoch": 0.4323873121869783, "grad_norm": 1.3161772184361786, "learning_rate": 9.940938320999336e-06, "loss": 0.518, "step": 1295 }, { "epoch": 0.4327212020033389, "grad_norm": 1.2264364865271313, "learning_rate": 9.940640245427068e-06, "loss": 0.5154, "step": 1296 }, { "epoch": 0.4330550918196995, "grad_norm": 1.385523403012093, "learning_rate": 9.940341424066619e-06, "loss": 0.5822, "step": 1297 }, { "epoch": 0.4333889816360601, "grad_norm": 1.3313348727526828, "learning_rate": 9.940041856963092e-06, "loss": 0.5334, "step": 1298 }, { "epoch": 0.4337228714524207, "grad_norm": 1.2421530787181936, "learning_rate": 9.93974154416171e-06, "loss": 0.5411, "step": 1299 }, { "epoch": 0.4340567612687813, "grad_norm": 1.2579836874120263, "learning_rate": 9.939440485707804e-06, "loss": 0.5364, "step": 1300 }, { "epoch": 0.4343906510851419, "grad_norm": 1.2335101142297484, "learning_rate": 9.939138681646817e-06, "loss": 0.5196, "step": 1301 }, { "epoch": 0.43472454090150253, "grad_norm": 1.2611448405393548, "learning_rate": 9.938836132024309e-06, "loss": 0.5499, "step": 1302 }, { "epoch": 0.4350584307178631, "grad_norm": 1.2633264343377568, "learning_rate": 9.938532836885947e-06, "loss": 0.5342, "step": 1303 }, { "epoch": 0.4353923205342237, "grad_norm": 1.284918050857036, "learning_rate": 9.938228796277516e-06, "loss": 0.5381, "step": 1304 }, { "epoch": 0.4357262103505843, "grad_norm": 1.4383213898985046, "learning_rate": 9.93792401024491e-06, "loss": 0.589, "step": 1305 }, { "epoch": 0.4360601001669449, "grad_norm": 1.2839139405687492, "learning_rate": 9.937618478834134e-06, "loss": 0.5306, "step": 1306 }, { "epoch": 0.43639398998330553, "grad_norm": 1.3313386776005545, "learning_rate": 9.93731220209131e-06, "loss": 0.5578, "step": 1307 }, { "epoch": 0.4367278797996661, "grad_norm": 1.3244485388167757, "learning_rate": 9.937005180062672e-06, "loss": 0.5529, "step": 1308 }, { "epoch": 0.4370617696160267, "grad_norm": 1.3283063497757315, "learning_rate": 9.936697412794562e-06, "loss": 0.5739, "step": 1309 }, { "epoch": 0.4373956594323873, "grad_norm": 1.2804988004648776, "learning_rate": 9.93638890033344e-06, "loss": 0.5389, "step": 1310 }, { "epoch": 0.4377295492487479, "grad_norm": 1.2670768809564563, "learning_rate": 9.936079642725873e-06, "loss": 0.5201, "step": 1311 }, { "epoch": 0.43806343906510853, "grad_norm": 1.3221200014492613, "learning_rate": 9.935769640018545e-06, "loss": 0.5607, "step": 1312 }, { "epoch": 0.4383973288814691, "grad_norm": 1.2643861617297127, "learning_rate": 9.935458892258249e-06, "loss": 0.5358, "step": 1313 }, { "epoch": 0.4387312186978297, "grad_norm": 1.281506926213057, "learning_rate": 9.935147399491896e-06, "loss": 0.5593, "step": 1314 }, { "epoch": 0.43906510851419034, "grad_norm": 1.292696699123711, "learning_rate": 9.934835161766502e-06, "loss": 0.5254, "step": 1315 }, { "epoch": 0.4393989983305509, "grad_norm": 1.4023262255416717, "learning_rate": 9.934522179129203e-06, "loss": 0.5566, "step": 1316 }, { "epoch": 0.4397328881469115, "grad_norm": 1.2962586641234513, "learning_rate": 9.934208451627238e-06, "loss": 0.5173, "step": 1317 }, { "epoch": 0.4400667779632721, "grad_norm": 1.2976485024460185, "learning_rate": 9.93389397930797e-06, "loss": 0.5601, "step": 1318 }, { "epoch": 0.4404006677796327, "grad_norm": 1.3095704857661228, "learning_rate": 9.933578762218865e-06, "loss": 0.5208, "step": 1319 }, { "epoch": 0.44073455759599334, "grad_norm": 1.3490722733068001, "learning_rate": 9.933262800407507e-06, "loss": 0.5443, "step": 1320 }, { "epoch": 0.4410684474123539, "grad_norm": 1.3433067398847827, "learning_rate": 9.932946093921587e-06, "loss": 0.5555, "step": 1321 }, { "epoch": 0.4414023372287145, "grad_norm": 1.4128258877636284, "learning_rate": 9.932628642808916e-06, "loss": 0.5452, "step": 1322 }, { "epoch": 0.44173622704507515, "grad_norm": 1.3024665301867886, "learning_rate": 9.93231044711741e-06, "loss": 0.5358, "step": 1323 }, { "epoch": 0.4420701168614357, "grad_norm": 1.2943473037455198, "learning_rate": 9.931991506895101e-06, "loss": 0.5461, "step": 1324 }, { "epoch": 0.44240400667779634, "grad_norm": 1.280222831462, "learning_rate": 9.931671822190136e-06, "loss": 0.5175, "step": 1325 }, { "epoch": 0.4427378964941569, "grad_norm": 1.3157273958448747, "learning_rate": 9.931351393050766e-06, "loss": 0.5141, "step": 1326 }, { "epoch": 0.4430717863105175, "grad_norm": 1.3366978827055016, "learning_rate": 9.931030219525365e-06, "loss": 0.5497, "step": 1327 }, { "epoch": 0.44340567612687815, "grad_norm": 1.3217204378987382, "learning_rate": 9.93070830166241e-06, "loss": 0.5637, "step": 1328 }, { "epoch": 0.4437395659432387, "grad_norm": 1.2588470726481165, "learning_rate": 9.930385639510497e-06, "loss": 0.5087, "step": 1329 }, { "epoch": 0.44407345575959933, "grad_norm": 1.2603778110693364, "learning_rate": 9.93006223311833e-06, "loss": 0.5087, "step": 1330 }, { "epoch": 0.44440734557595996, "grad_norm": 1.32689733197692, "learning_rate": 9.92973808253473e-06, "loss": 0.5467, "step": 1331 }, { "epoch": 0.4447412353923205, "grad_norm": 1.3238567933274692, "learning_rate": 9.929413187808623e-06, "loss": 0.5177, "step": 1332 }, { "epoch": 0.44507512520868114, "grad_norm": 1.232746281013181, "learning_rate": 9.929087548989054e-06, "loss": 0.5144, "step": 1333 }, { "epoch": 0.4454090150250417, "grad_norm": 1.2523911167470974, "learning_rate": 9.92876116612518e-06, "loss": 0.5138, "step": 1334 }, { "epoch": 0.44574290484140233, "grad_norm": 1.2975139260859674, "learning_rate": 9.928434039266265e-06, "loss": 0.5342, "step": 1335 }, { "epoch": 0.44607679465776295, "grad_norm": 1.250662642902049, "learning_rate": 9.92810616846169e-06, "loss": 0.5073, "step": 1336 }, { "epoch": 0.4464106844741235, "grad_norm": 1.3379958794223423, "learning_rate": 9.92777755376095e-06, "loss": 0.5392, "step": 1337 }, { "epoch": 0.44674457429048414, "grad_norm": 1.3464486039127266, "learning_rate": 9.927448195213643e-06, "loss": 0.549, "step": 1338 }, { "epoch": 0.44707846410684476, "grad_norm": 1.3389126456636564, "learning_rate": 9.92711809286949e-06, "loss": 0.5342, "step": 1339 }, { "epoch": 0.44741235392320533, "grad_norm": 1.3022338856631406, "learning_rate": 9.92678724677832e-06, "loss": 0.5437, "step": 1340 }, { "epoch": 0.44774624373956595, "grad_norm": 1.2791770617295626, "learning_rate": 9.926455656990073e-06, "loss": 0.5606, "step": 1341 }, { "epoch": 0.4480801335559265, "grad_norm": 1.330724005114067, "learning_rate": 9.926123323554803e-06, "loss": 0.5371, "step": 1342 }, { "epoch": 0.44841402337228714, "grad_norm": 1.2332007286265, "learning_rate": 9.925790246522675e-06, "loss": 0.534, "step": 1343 }, { "epoch": 0.44874791318864776, "grad_norm": 1.2258292777673874, "learning_rate": 9.925456425943965e-06, "loss": 0.5121, "step": 1344 }, { "epoch": 0.44908180300500833, "grad_norm": 1.2584802278223652, "learning_rate": 9.925121861869066e-06, "loss": 0.5047, "step": 1345 }, { "epoch": 0.44941569282136895, "grad_norm": 1.2281746712543589, "learning_rate": 9.924786554348482e-06, "loss": 0.5065, "step": 1346 }, { "epoch": 0.4497495826377296, "grad_norm": 1.2188329487030336, "learning_rate": 9.924450503432822e-06, "loss": 0.5315, "step": 1347 }, { "epoch": 0.45008347245409014, "grad_norm": 1.206523954402233, "learning_rate": 9.924113709172817e-06, "loss": 0.5328, "step": 1348 }, { "epoch": 0.45041736227045076, "grad_norm": 1.214792438252619, "learning_rate": 9.923776171619305e-06, "loss": 0.4953, "step": 1349 }, { "epoch": 0.4507512520868113, "grad_norm": 1.3103215870769815, "learning_rate": 9.923437890823236e-06, "loss": 0.5476, "step": 1350 }, { "epoch": 0.45108514190317195, "grad_norm": 1.2200104799023248, "learning_rate": 9.923098866835676e-06, "loss": 0.5261, "step": 1351 }, { "epoch": 0.45141903171953257, "grad_norm": 1.267002193297526, "learning_rate": 9.922759099707798e-06, "loss": 0.5575, "step": 1352 }, { "epoch": 0.45175292153589314, "grad_norm": 1.2724731472730877, "learning_rate": 9.92241858949089e-06, "loss": 0.5234, "step": 1353 }, { "epoch": 0.45208681135225376, "grad_norm": 1.2522396973960122, "learning_rate": 9.922077336236354e-06, "loss": 0.5221, "step": 1354 }, { "epoch": 0.4524207011686144, "grad_norm": 1.3180711553338351, "learning_rate": 9.921735339995699e-06, "loss": 0.543, "step": 1355 }, { "epoch": 0.45275459098497495, "grad_norm": 1.3117276165131373, "learning_rate": 9.921392600820554e-06, "loss": 0.532, "step": 1356 }, { "epoch": 0.45308848080133557, "grad_norm": 1.197335989195156, "learning_rate": 9.921049118762648e-06, "loss": 0.4989, "step": 1357 }, { "epoch": 0.45342237061769614, "grad_norm": 1.2510805562705247, "learning_rate": 9.920704893873838e-06, "loss": 0.5352, "step": 1358 }, { "epoch": 0.45375626043405676, "grad_norm": 1.2683574721314572, "learning_rate": 9.920359926206078e-06, "loss": 0.5356, "step": 1359 }, { "epoch": 0.4540901502504174, "grad_norm": 1.2052122862137309, "learning_rate": 9.920014215811443e-06, "loss": 0.5097, "step": 1360 }, { "epoch": 0.45442404006677795, "grad_norm": 1.264748399187728, "learning_rate": 9.91966776274212e-06, "loss": 0.5204, "step": 1361 }, { "epoch": 0.45475792988313857, "grad_norm": 1.2908355163072884, "learning_rate": 9.919320567050404e-06, "loss": 0.5144, "step": 1362 }, { "epoch": 0.4550918196994992, "grad_norm": 1.2714804244808542, "learning_rate": 9.918972628788704e-06, "loss": 0.5439, "step": 1363 }, { "epoch": 0.45542570951585976, "grad_norm": 1.2689762777428868, "learning_rate": 9.91862394800954e-06, "loss": 0.518, "step": 1364 }, { "epoch": 0.4557595993322204, "grad_norm": 1.1937325972181718, "learning_rate": 9.918274524765547e-06, "loss": 0.5204, "step": 1365 }, { "epoch": 0.45609348914858094, "grad_norm": 1.2873563436236595, "learning_rate": 9.917924359109472e-06, "loss": 0.525, "step": 1366 }, { "epoch": 0.45642737896494157, "grad_norm": 1.222034064583693, "learning_rate": 9.917573451094168e-06, "loss": 0.5453, "step": 1367 }, { "epoch": 0.4567612687813022, "grad_norm": 1.3149621336446888, "learning_rate": 9.917221800772607e-06, "loss": 0.5394, "step": 1368 }, { "epoch": 0.45709515859766275, "grad_norm": 1.3143148435231542, "learning_rate": 9.916869408197871e-06, "loss": 0.5416, "step": 1369 }, { "epoch": 0.4574290484140234, "grad_norm": 1.2278104534927703, "learning_rate": 9.916516273423153e-06, "loss": 0.5262, "step": 1370 }, { "epoch": 0.457762938230384, "grad_norm": 1.2710532587197503, "learning_rate": 9.916162396501758e-06, "loss": 0.5376, "step": 1371 }, { "epoch": 0.45809682804674456, "grad_norm": 1.2343781989820795, "learning_rate": 9.915807777487106e-06, "loss": 0.5137, "step": 1372 }, { "epoch": 0.4584307178631052, "grad_norm": 1.1974474275349531, "learning_rate": 9.915452416432722e-06, "loss": 0.5462, "step": 1373 }, { "epoch": 0.45876460767946575, "grad_norm": 1.2754052689234507, "learning_rate": 9.915096313392251e-06, "loss": 0.5548, "step": 1374 }, { "epoch": 0.4590984974958264, "grad_norm": 1.3757187388928707, "learning_rate": 9.914739468419447e-06, "loss": 0.5547, "step": 1375 }, { "epoch": 0.459432387312187, "grad_norm": 1.234322092016973, "learning_rate": 9.914381881568175e-06, "loss": 0.529, "step": 1376 }, { "epoch": 0.45976627712854756, "grad_norm": 1.2736752071884068, "learning_rate": 9.914023552892413e-06, "loss": 0.53, "step": 1377 }, { "epoch": 0.4601001669449082, "grad_norm": 1.2533223588628344, "learning_rate": 9.913664482446248e-06, "loss": 0.5281, "step": 1378 }, { "epoch": 0.4604340567612688, "grad_norm": 1.3183289946178562, "learning_rate": 9.913304670283885e-06, "loss": 0.5433, "step": 1379 }, { "epoch": 0.4607679465776294, "grad_norm": 1.256862591616209, "learning_rate": 9.912944116459634e-06, "loss": 0.5348, "step": 1380 }, { "epoch": 0.46110183639399, "grad_norm": 1.2748317334313923, "learning_rate": 9.912582821027924e-06, "loss": 0.5438, "step": 1381 }, { "epoch": 0.46143572621035056, "grad_norm": 1.2089549243356712, "learning_rate": 9.91222078404329e-06, "loss": 0.5119, "step": 1382 }, { "epoch": 0.4617696160267112, "grad_norm": 1.2153755626108704, "learning_rate": 9.911858005560384e-06, "loss": 0.5346, "step": 1383 }, { "epoch": 0.4621035058430718, "grad_norm": 1.286389353840713, "learning_rate": 9.911494485633965e-06, "loss": 0.5504, "step": 1384 }, { "epoch": 0.46243739565943237, "grad_norm": 1.2335080869730017, "learning_rate": 9.911130224318906e-06, "loss": 0.5105, "step": 1385 }, { "epoch": 0.462771285475793, "grad_norm": 1.201164114732561, "learning_rate": 9.910765221670194e-06, "loss": 0.5171, "step": 1386 }, { "epoch": 0.4631051752921536, "grad_norm": 1.289779807645435, "learning_rate": 9.910399477742925e-06, "loss": 0.5222, "step": 1387 }, { "epoch": 0.4634390651085142, "grad_norm": 1.3125605817561912, "learning_rate": 9.910032992592308e-06, "loss": 0.5599, "step": 1388 }, { "epoch": 0.4637729549248748, "grad_norm": 1.3050625902659871, "learning_rate": 9.909665766273662e-06, "loss": 0.5664, "step": 1389 }, { "epoch": 0.46410684474123537, "grad_norm": 1.2552734629889797, "learning_rate": 9.909297798842423e-06, "loss": 0.55, "step": 1390 }, { "epoch": 0.464440734557596, "grad_norm": 1.2006557668841185, "learning_rate": 9.908929090354135e-06, "loss": 0.5252, "step": 1391 }, { "epoch": 0.4647746243739566, "grad_norm": 1.5068283229981336, "learning_rate": 9.908559640864452e-06, "loss": 0.5739, "step": 1392 }, { "epoch": 0.4651085141903172, "grad_norm": 1.2660918542078299, "learning_rate": 9.908189450429144e-06, "loss": 0.5365, "step": 1393 }, { "epoch": 0.4654424040066778, "grad_norm": 1.2017952392787012, "learning_rate": 9.907818519104092e-06, "loss": 0.5301, "step": 1394 }, { "epoch": 0.4657762938230384, "grad_norm": 1.2195972579914163, "learning_rate": 9.907446846945286e-06, "loss": 0.52, "step": 1395 }, { "epoch": 0.466110183639399, "grad_norm": 1.1706058323926098, "learning_rate": 9.907074434008833e-06, "loss": 0.4941, "step": 1396 }, { "epoch": 0.4664440734557596, "grad_norm": 1.2401581741448127, "learning_rate": 9.906701280350943e-06, "loss": 0.5175, "step": 1397 }, { "epoch": 0.4667779632721202, "grad_norm": 1.2797660004815448, "learning_rate": 9.906327386027948e-06, "loss": 0.5515, "step": 1398 }, { "epoch": 0.4671118530884808, "grad_norm": 1.273864895854904, "learning_rate": 9.905952751096286e-06, "loss": 0.5377, "step": 1399 }, { "epoch": 0.4674457429048414, "grad_norm": 1.245734285360822, "learning_rate": 9.90557737561251e-06, "loss": 0.5293, "step": 1400 }, { "epoch": 0.467779632721202, "grad_norm": 1.3154761499539127, "learning_rate": 9.905201259633278e-06, "loss": 0.5431, "step": 1401 }, { "epoch": 0.4681135225375626, "grad_norm": 1.2784436756955058, "learning_rate": 9.90482440321537e-06, "loss": 0.534, "step": 1402 }, { "epoch": 0.46844741235392323, "grad_norm": 1.3112352959314606, "learning_rate": 9.904446806415668e-06, "loss": 0.5295, "step": 1403 }, { "epoch": 0.4687813021702838, "grad_norm": 1.2525927875561582, "learning_rate": 9.904068469291172e-06, "loss": 0.554, "step": 1404 }, { "epoch": 0.4691151919866444, "grad_norm": 1.200546447870523, "learning_rate": 9.903689391898992e-06, "loss": 0.5396, "step": 1405 }, { "epoch": 0.469449081803005, "grad_norm": 1.2372420991292015, "learning_rate": 9.90330957429635e-06, "loss": 0.5333, "step": 1406 }, { "epoch": 0.4697829716193656, "grad_norm": 1.201345238724241, "learning_rate": 9.902929016540579e-06, "loss": 0.5184, "step": 1407 }, { "epoch": 0.47011686143572623, "grad_norm": 1.1897430566180522, "learning_rate": 9.902547718689123e-06, "loss": 0.5094, "step": 1408 }, { "epoch": 0.4704507512520868, "grad_norm": 1.209041342196832, "learning_rate": 9.90216568079954e-06, "loss": 0.5078, "step": 1409 }, { "epoch": 0.4707846410684474, "grad_norm": 1.231229938849202, "learning_rate": 9.9017829029295e-06, "loss": 0.548, "step": 1410 }, { "epoch": 0.47111853088480804, "grad_norm": 1.1519639185144264, "learning_rate": 9.90139938513678e-06, "loss": 0.5118, "step": 1411 }, { "epoch": 0.4714524207011686, "grad_norm": 1.273940930518486, "learning_rate": 9.901015127479272e-06, "loss": 0.5252, "step": 1412 }, { "epoch": 0.47178631051752923, "grad_norm": 1.2172513101131486, "learning_rate": 9.900630130014984e-06, "loss": 0.4997, "step": 1413 }, { "epoch": 0.4721202003338898, "grad_norm": 1.191773596636484, "learning_rate": 9.900244392802025e-06, "loss": 0.5159, "step": 1414 }, { "epoch": 0.4724540901502504, "grad_norm": 1.2071443349341682, "learning_rate": 9.899857915898625e-06, "loss": 0.5079, "step": 1415 }, { "epoch": 0.47278797996661104, "grad_norm": 1.2725205723712025, "learning_rate": 9.899470699363126e-06, "loss": 0.5416, "step": 1416 }, { "epoch": 0.4731218697829716, "grad_norm": 1.2381535310608345, "learning_rate": 9.899082743253971e-06, "loss": 0.511, "step": 1417 }, { "epoch": 0.4734557595993322, "grad_norm": 1.2351594310068783, "learning_rate": 9.898694047629729e-06, "loss": 0.5279, "step": 1418 }, { "epoch": 0.47378964941569285, "grad_norm": 1.309754045427574, "learning_rate": 9.898304612549068e-06, "loss": 0.5239, "step": 1419 }, { "epoch": 0.4741235392320534, "grad_norm": 1.2426585923813722, "learning_rate": 9.897914438070777e-06, "loss": 0.5218, "step": 1420 }, { "epoch": 0.47445742904841404, "grad_norm": 1.2473483596057058, "learning_rate": 9.897523524253749e-06, "loss": 0.5165, "step": 1421 }, { "epoch": 0.4747913188647746, "grad_norm": 1.2670676249809094, "learning_rate": 9.897131871156996e-06, "loss": 0.5519, "step": 1422 }, { "epoch": 0.4751252086811352, "grad_norm": 1.2564355811323007, "learning_rate": 9.896739478839636e-06, "loss": 0.5075, "step": 1423 }, { "epoch": 0.47545909849749585, "grad_norm": 1.2566791040334837, "learning_rate": 9.896346347360901e-06, "loss": 0.5391, "step": 1424 }, { "epoch": 0.4757929883138564, "grad_norm": 1.2672015571880582, "learning_rate": 9.895952476780133e-06, "loss": 0.5546, "step": 1425 }, { "epoch": 0.47612687813021703, "grad_norm": 1.219985286626879, "learning_rate": 9.89555786715679e-06, "loss": 0.5485, "step": 1426 }, { "epoch": 0.47646076794657766, "grad_norm": 1.2477639696009368, "learning_rate": 9.895162518550432e-06, "loss": 0.518, "step": 1427 }, { "epoch": 0.4767946577629382, "grad_norm": 1.1707143835635192, "learning_rate": 9.894766431020741e-06, "loss": 0.5002, "step": 1428 }, { "epoch": 0.47712854757929885, "grad_norm": 1.200522123580089, "learning_rate": 9.894369604627507e-06, "loss": 0.4869, "step": 1429 }, { "epoch": 0.4774624373956594, "grad_norm": 1.2919315918488397, "learning_rate": 9.893972039430631e-06, "loss": 0.5295, "step": 1430 }, { "epoch": 0.47779632721202003, "grad_norm": 1.1676303134953538, "learning_rate": 9.89357373549012e-06, "loss": 0.5058, "step": 1431 }, { "epoch": 0.47813021702838066, "grad_norm": 1.2477304030023564, "learning_rate": 9.893174692866106e-06, "loss": 0.5213, "step": 1432 }, { "epoch": 0.4784641068447412, "grad_norm": 1.2527071543064967, "learning_rate": 9.892774911618818e-06, "loss": 0.5309, "step": 1433 }, { "epoch": 0.47879799666110184, "grad_norm": 1.2716809316377897, "learning_rate": 9.892374391808606e-06, "loss": 0.5467, "step": 1434 }, { "epoch": 0.4791318864774624, "grad_norm": 1.2021335492356013, "learning_rate": 9.891973133495927e-06, "loss": 0.504, "step": 1435 }, { "epoch": 0.47946577629382303, "grad_norm": 1.3066632958352125, "learning_rate": 9.891571136741351e-06, "loss": 0.5437, "step": 1436 }, { "epoch": 0.47979966611018365, "grad_norm": 1.235452646278823, "learning_rate": 9.89116840160556e-06, "loss": 0.5167, "step": 1437 }, { "epoch": 0.4801335559265442, "grad_norm": 1.2122405058181143, "learning_rate": 9.890764928149346e-06, "loss": 0.536, "step": 1438 }, { "epoch": 0.48046744574290484, "grad_norm": 1.2934568339618164, "learning_rate": 9.890360716433613e-06, "loss": 0.5399, "step": 1439 }, { "epoch": 0.48080133555926546, "grad_norm": 1.293526882930591, "learning_rate": 9.889955766519376e-06, "loss": 0.5436, "step": 1440 }, { "epoch": 0.48113522537562603, "grad_norm": 1.3095514378536395, "learning_rate": 9.889550078467764e-06, "loss": 0.5298, "step": 1441 }, { "epoch": 0.48146911519198665, "grad_norm": 1.2152082416730405, "learning_rate": 9.889143652340016e-06, "loss": 0.5138, "step": 1442 }, { "epoch": 0.4818030050083472, "grad_norm": 1.174020735441125, "learning_rate": 9.88873648819748e-06, "loss": 0.4998, "step": 1443 }, { "epoch": 0.48213689482470784, "grad_norm": 1.2578890982324569, "learning_rate": 9.888328586101617e-06, "loss": 0.5023, "step": 1444 }, { "epoch": 0.48247078464106846, "grad_norm": 1.245012819167611, "learning_rate": 9.887919946114002e-06, "loss": 0.5359, "step": 1445 }, { "epoch": 0.48280467445742903, "grad_norm": 1.2606355169330676, "learning_rate": 9.887510568296318e-06, "loss": 0.5456, "step": 1446 }, { "epoch": 0.48313856427378965, "grad_norm": 1.1460319104640586, "learning_rate": 9.887100452710356e-06, "loss": 0.5073, "step": 1447 }, { "epoch": 0.48347245409015027, "grad_norm": 1.1953020998602588, "learning_rate": 9.886689599418032e-06, "loss": 0.4963, "step": 1448 }, { "epoch": 0.48380634390651084, "grad_norm": 1.2704460718809898, "learning_rate": 9.886278008481356e-06, "loss": 0.5196, "step": 1449 }, { "epoch": 0.48414023372287146, "grad_norm": 1.1920514880629784, "learning_rate": 9.88586567996246e-06, "loss": 0.5095, "step": 1450 }, { "epoch": 0.484474123539232, "grad_norm": 1.2216989896795467, "learning_rate": 9.885452613923589e-06, "loss": 0.5285, "step": 1451 }, { "epoch": 0.48480801335559265, "grad_norm": 1.1693515570296216, "learning_rate": 9.885038810427089e-06, "loss": 0.5107, "step": 1452 }, { "epoch": 0.48514190317195327, "grad_norm": 1.2035468041736619, "learning_rate": 9.884624269535427e-06, "loss": 0.5174, "step": 1453 }, { "epoch": 0.48547579298831384, "grad_norm": 1.1855734154129784, "learning_rate": 9.884208991311177e-06, "loss": 0.4892, "step": 1454 }, { "epoch": 0.48580968280467446, "grad_norm": 1.2009170264482716, "learning_rate": 9.883792975817026e-06, "loss": 0.5099, "step": 1455 }, { "epoch": 0.4861435726210351, "grad_norm": 1.2497781246894477, "learning_rate": 9.88337622311577e-06, "loss": 0.5395, "step": 1456 }, { "epoch": 0.48647746243739565, "grad_norm": 1.265869691218867, "learning_rate": 9.882958733270317e-06, "loss": 0.516, "step": 1457 }, { "epoch": 0.48681135225375627, "grad_norm": 1.1874560954573317, "learning_rate": 9.88254050634369e-06, "loss": 0.5088, "step": 1458 }, { "epoch": 0.48714524207011684, "grad_norm": 1.1753716673508878, "learning_rate": 9.882121542399017e-06, "loss": 0.5151, "step": 1459 }, { "epoch": 0.48747913188647746, "grad_norm": 1.2412654701424313, "learning_rate": 9.881701841499542e-06, "loss": 0.5249, "step": 1460 }, { "epoch": 0.4878130217028381, "grad_norm": 1.188119611091381, "learning_rate": 9.88128140370862e-06, "loss": 0.5272, "step": 1461 }, { "epoch": 0.48814691151919865, "grad_norm": 1.2117301653975212, "learning_rate": 9.880860229089715e-06, "loss": 0.5401, "step": 1462 }, { "epoch": 0.48848080133555927, "grad_norm": 1.1651805314634875, "learning_rate": 9.880438317706402e-06, "loss": 0.5034, "step": 1463 }, { "epoch": 0.4888146911519199, "grad_norm": 1.2230402972822276, "learning_rate": 9.880015669622369e-06, "loss": 0.5419, "step": 1464 }, { "epoch": 0.48914858096828046, "grad_norm": 1.1619388029342892, "learning_rate": 9.879592284901415e-06, "loss": 0.5206, "step": 1465 }, { "epoch": 0.4894824707846411, "grad_norm": 1.2350020623900542, "learning_rate": 9.879168163607449e-06, "loss": 0.5372, "step": 1466 }, { "epoch": 0.48981636060100164, "grad_norm": 1.1647381821709422, "learning_rate": 9.878743305804493e-06, "loss": 0.4969, "step": 1467 }, { "epoch": 0.49015025041736227, "grad_norm": 1.1929819881660408, "learning_rate": 9.87831771155668e-06, "loss": 0.533, "step": 1468 }, { "epoch": 0.4904841402337229, "grad_norm": 1.2883272128567227, "learning_rate": 9.877891380928251e-06, "loss": 0.53, "step": 1469 }, { "epoch": 0.49081803005008345, "grad_norm": 1.2222701799318976, "learning_rate": 9.877464313983563e-06, "loss": 0.5079, "step": 1470 }, { "epoch": 0.4911519198664441, "grad_norm": 1.1916509930970784, "learning_rate": 9.877036510787077e-06, "loss": 0.519, "step": 1471 }, { "epoch": 0.4914858096828047, "grad_norm": 1.2679931025837856, "learning_rate": 9.876607971403376e-06, "loss": 0.5267, "step": 1472 }, { "epoch": 0.49181969949916526, "grad_norm": 1.2993851545660295, "learning_rate": 9.876178695897143e-06, "loss": 0.5502, "step": 1473 }, { "epoch": 0.4921535893155259, "grad_norm": 1.1733779871691237, "learning_rate": 9.875748684333179e-06, "loss": 0.5335, "step": 1474 }, { "epoch": 0.49248747913188645, "grad_norm": 1.1858616056376896, "learning_rate": 9.875317936776396e-06, "loss": 0.5163, "step": 1475 }, { "epoch": 0.4928213689482471, "grad_norm": 1.2139318438822633, "learning_rate": 9.874886453291812e-06, "loss": 0.5219, "step": 1476 }, { "epoch": 0.4931552587646077, "grad_norm": 1.2232870767475281, "learning_rate": 9.87445423394456e-06, "loss": 0.517, "step": 1477 }, { "epoch": 0.49348914858096826, "grad_norm": 1.2395738359898307, "learning_rate": 9.874021278799886e-06, "loss": 0.5389, "step": 1478 }, { "epoch": 0.4938230383973289, "grad_norm": 1.2200290494263846, "learning_rate": 9.873587587923139e-06, "loss": 0.5011, "step": 1479 }, { "epoch": 0.4941569282136895, "grad_norm": 1.1569711103378677, "learning_rate": 9.87315316137979e-06, "loss": 0.486, "step": 1480 }, { "epoch": 0.49449081803005007, "grad_norm": 1.157181271441347, "learning_rate": 9.872717999235413e-06, "loss": 0.4944, "step": 1481 }, { "epoch": 0.4948247078464107, "grad_norm": 1.2653937973329428, "learning_rate": 9.872282101555697e-06, "loss": 0.526, "step": 1482 }, { "epoch": 0.49515859766277126, "grad_norm": 1.2230122709197417, "learning_rate": 9.871845468406437e-06, "loss": 0.5087, "step": 1483 }, { "epoch": 0.4954924874791319, "grad_norm": 1.1822735727609657, "learning_rate": 9.871408099853548e-06, "loss": 0.4987, "step": 1484 }, { "epoch": 0.4958263772954925, "grad_norm": 1.2703209588706526, "learning_rate": 9.870969995963047e-06, "loss": 0.518, "step": 1485 }, { "epoch": 0.49616026711185307, "grad_norm": 1.2346403106781776, "learning_rate": 9.870531156801067e-06, "loss": 0.5296, "step": 1486 }, { "epoch": 0.4964941569282137, "grad_norm": 1.1845088822378413, "learning_rate": 9.87009158243385e-06, "loss": 0.4936, "step": 1487 }, { "epoch": 0.4968280467445743, "grad_norm": 1.189129112157157, "learning_rate": 9.86965127292775e-06, "loss": 0.5139, "step": 1488 }, { "epoch": 0.4971619365609349, "grad_norm": 1.223059879990408, "learning_rate": 9.869210228349231e-06, "loss": 0.525, "step": 1489 }, { "epoch": 0.4974958263772955, "grad_norm": 1.1697997393989135, "learning_rate": 9.86876844876487e-06, "loss": 0.5027, "step": 1490 }, { "epoch": 0.49782971619365607, "grad_norm": 1.1749503339722074, "learning_rate": 9.868325934241349e-06, "loss": 0.508, "step": 1491 }, { "epoch": 0.4981636060100167, "grad_norm": 1.2065173063234496, "learning_rate": 9.867882684845474e-06, "loss": 0.5168, "step": 1492 }, { "epoch": 0.4984974958263773, "grad_norm": 1.2226454516429897, "learning_rate": 9.867438700644145e-06, "loss": 0.5363, "step": 1493 }, { "epoch": 0.4988313856427379, "grad_norm": 1.3242992389831558, "learning_rate": 9.866993981704384e-06, "loss": 0.5555, "step": 1494 }, { "epoch": 0.4991652754590985, "grad_norm": 1.1872423823686489, "learning_rate": 9.866548528093325e-06, "loss": 0.5167, "step": 1495 }, { "epoch": 0.4994991652754591, "grad_norm": 1.197555041805572, "learning_rate": 9.866102339878203e-06, "loss": 0.5274, "step": 1496 }, { "epoch": 0.4998330550918197, "grad_norm": 1.2513232267450551, "learning_rate": 9.865655417126374e-06, "loss": 0.5408, "step": 1497 }, { "epoch": 0.5001669449081803, "grad_norm": 1.1607391294513594, "learning_rate": 9.8652077599053e-06, "loss": 0.5138, "step": 1498 }, { "epoch": 0.5005008347245409, "grad_norm": 1.1811145366072882, "learning_rate": 9.864759368282555e-06, "loss": 0.5249, "step": 1499 }, { "epoch": 0.5008347245409015, "grad_norm": 1.2170742255367157, "learning_rate": 9.864310242325822e-06, "loss": 0.5033, "step": 1500 }, { "epoch": 0.5011686143572621, "grad_norm": 1.247217308430905, "learning_rate": 9.863860382102896e-06, "loss": 0.5177, "step": 1501 }, { "epoch": 0.5015025041736227, "grad_norm": 1.2318430677596608, "learning_rate": 9.863409787681687e-06, "loss": 0.511, "step": 1502 }, { "epoch": 0.5018363939899833, "grad_norm": 1.1678577508421286, "learning_rate": 9.86295845913021e-06, "loss": 0.5106, "step": 1503 }, { "epoch": 0.5021702838063439, "grad_norm": 1.1736456516938079, "learning_rate": 9.862506396516591e-06, "loss": 0.5022, "step": 1504 }, { "epoch": 0.5025041736227045, "grad_norm": 1.1905261622797874, "learning_rate": 9.862053599909072e-06, "loss": 0.5258, "step": 1505 }, { "epoch": 0.5028380634390651, "grad_norm": 1.1836275381045578, "learning_rate": 9.861600069375999e-06, "loss": 0.52, "step": 1506 }, { "epoch": 0.5031719532554257, "grad_norm": 1.2029331955317626, "learning_rate": 9.861145804985836e-06, "loss": 0.5206, "step": 1507 }, { "epoch": 0.5035058430717864, "grad_norm": 1.1604977601283324, "learning_rate": 9.860690806807152e-06, "loss": 0.5031, "step": 1508 }, { "epoch": 0.5038397328881469, "grad_norm": 1.1855690046158056, "learning_rate": 9.86023507490863e-06, "loss": 0.5019, "step": 1509 }, { "epoch": 0.5041736227045075, "grad_norm": 1.2019746691425377, "learning_rate": 9.859778609359058e-06, "loss": 0.5097, "step": 1510 }, { "epoch": 0.5045075125208681, "grad_norm": 1.1998404963135563, "learning_rate": 9.859321410227346e-06, "loss": 0.5322, "step": 1511 }, { "epoch": 0.5048414023372287, "grad_norm": 1.2208934624631238, "learning_rate": 9.858863477582506e-06, "loss": 0.5156, "step": 1512 }, { "epoch": 0.5051752921535894, "grad_norm": 1.132416782449831, "learning_rate": 9.85840481149366e-06, "loss": 0.5125, "step": 1513 }, { "epoch": 0.5055091819699499, "grad_norm": 1.1595884141629096, "learning_rate": 9.857945412030049e-06, "loss": 0.5255, "step": 1514 }, { "epoch": 0.5058430717863105, "grad_norm": 1.2129904754090328, "learning_rate": 9.85748527926101e-06, "loss": 0.5282, "step": 1515 }, { "epoch": 0.5061769616026711, "grad_norm": 1.177674423090557, "learning_rate": 9.85702441325601e-06, "loss": 0.5235, "step": 1516 }, { "epoch": 0.5065108514190317, "grad_norm": 1.2067453080674757, "learning_rate": 9.856562814084612e-06, "loss": 0.5166, "step": 1517 }, { "epoch": 0.5068447412353924, "grad_norm": 1.1806790098155944, "learning_rate": 9.856100481816491e-06, "loss": 0.5182, "step": 1518 }, { "epoch": 0.5071786310517529, "grad_norm": 1.1778734096318775, "learning_rate": 9.855637416521442e-06, "loss": 0.4959, "step": 1519 }, { "epoch": 0.5075125208681135, "grad_norm": 1.18912460371927, "learning_rate": 9.855173618269363e-06, "loss": 0.518, "step": 1520 }, { "epoch": 0.5078464106844741, "grad_norm": 1.1029371649329927, "learning_rate": 9.854709087130261e-06, "loss": 0.4868, "step": 1521 }, { "epoch": 0.5081803005008347, "grad_norm": 1.1720051557438, "learning_rate": 9.854243823174259e-06, "loss": 0.5183, "step": 1522 }, { "epoch": 0.5085141903171954, "grad_norm": 1.2213026404869065, "learning_rate": 9.85377782647159e-06, "loss": 0.5034, "step": 1523 }, { "epoch": 0.508848080133556, "grad_norm": 1.1508450753657173, "learning_rate": 9.853311097092593e-06, "loss": 0.4958, "step": 1524 }, { "epoch": 0.5091819699499165, "grad_norm": 1.0876546185943305, "learning_rate": 9.852843635107722e-06, "loss": 0.4849, "step": 1525 }, { "epoch": 0.5095158597662771, "grad_norm": 1.2206286360747947, "learning_rate": 9.852375440587542e-06, "loss": 0.5328, "step": 1526 }, { "epoch": 0.5098497495826377, "grad_norm": 1.1608731892395587, "learning_rate": 9.851906513602725e-06, "loss": 0.5305, "step": 1527 }, { "epoch": 0.5101836393989984, "grad_norm": 1.2186264023430808, "learning_rate": 9.851436854224054e-06, "loss": 0.5024, "step": 1528 }, { "epoch": 0.510517529215359, "grad_norm": 1.1911821043179878, "learning_rate": 9.850966462522427e-06, "loss": 0.5038, "step": 1529 }, { "epoch": 0.5108514190317195, "grad_norm": 1.2211831029801905, "learning_rate": 9.850495338568848e-06, "loss": 0.4947, "step": 1530 }, { "epoch": 0.5111853088480801, "grad_norm": 1.1969434115947435, "learning_rate": 9.850023482434434e-06, "loss": 0.5286, "step": 1531 }, { "epoch": 0.5115191986644407, "grad_norm": 1.1957336680242014, "learning_rate": 9.849550894190408e-06, "loss": 0.5273, "step": 1532 }, { "epoch": 0.5118530884808014, "grad_norm": 1.2043698502466476, "learning_rate": 9.849077573908111e-06, "loss": 0.5344, "step": 1533 }, { "epoch": 0.512186978297162, "grad_norm": 1.3588979838040853, "learning_rate": 9.84860352165899e-06, "loss": 0.5011, "step": 1534 }, { "epoch": 0.5125208681135225, "grad_norm": 1.16015307704093, "learning_rate": 9.848128737514602e-06, "loss": 0.5136, "step": 1535 }, { "epoch": 0.5128547579298831, "grad_norm": 1.2420631699016398, "learning_rate": 9.847653221546614e-06, "loss": 0.5338, "step": 1536 }, { "epoch": 0.5131886477462437, "grad_norm": 1.2258031619218928, "learning_rate": 9.847176973826809e-06, "loss": 0.5319, "step": 1537 }, { "epoch": 0.5135225375626044, "grad_norm": 1.1162967078051873, "learning_rate": 9.846699994427074e-06, "loss": 0.4883, "step": 1538 }, { "epoch": 0.513856427378965, "grad_norm": 1.1269082931118861, "learning_rate": 9.846222283419407e-06, "loss": 0.482, "step": 1539 }, { "epoch": 0.5141903171953256, "grad_norm": 1.1003699550843316, "learning_rate": 9.845743840875923e-06, "loss": 0.4943, "step": 1540 }, { "epoch": 0.5145242070116861, "grad_norm": 1.1773301994679, "learning_rate": 9.84526466686884e-06, "loss": 0.5028, "step": 1541 }, { "epoch": 0.5148580968280467, "grad_norm": 1.2064900447364584, "learning_rate": 9.844784761470487e-06, "loss": 0.4862, "step": 1542 }, { "epoch": 0.5151919866444074, "grad_norm": 1.1508970478087286, "learning_rate": 9.84430412475331e-06, "loss": 0.4941, "step": 1543 }, { "epoch": 0.515525876460768, "grad_norm": 1.1712570722855642, "learning_rate": 9.84382275678986e-06, "loss": 0.5256, "step": 1544 }, { "epoch": 0.5158597662771286, "grad_norm": 1.0477128915583531, "learning_rate": 9.843340657652796e-06, "loss": 0.4818, "step": 1545 }, { "epoch": 0.5161936560934891, "grad_norm": 1.0898865081795397, "learning_rate": 9.842857827414894e-06, "loss": 0.5085, "step": 1546 }, { "epoch": 0.5165275459098497, "grad_norm": 1.1777290114864796, "learning_rate": 9.842374266149038e-06, "loss": 0.5143, "step": 1547 }, { "epoch": 0.5168614357262103, "grad_norm": 1.1084485173132486, "learning_rate": 9.841889973928217e-06, "loss": 0.512, "step": 1548 }, { "epoch": 0.517195325542571, "grad_norm": 1.1454829488006155, "learning_rate": 9.841404950825537e-06, "loss": 0.4879, "step": 1549 }, { "epoch": 0.5175292153589316, "grad_norm": 1.1202659789884906, "learning_rate": 9.840919196914214e-06, "loss": 0.4814, "step": 1550 }, { "epoch": 0.5178631051752921, "grad_norm": 1.1454420616545813, "learning_rate": 9.84043271226757e-06, "loss": 0.5143, "step": 1551 }, { "epoch": 0.5181969949916527, "grad_norm": 1.1566284996562202, "learning_rate": 9.83994549695904e-06, "loss": 0.5087, "step": 1552 }, { "epoch": 0.5185308848080133, "grad_norm": 1.1523252836988553, "learning_rate": 9.839457551062172e-06, "loss": 0.5137, "step": 1553 }, { "epoch": 0.518864774624374, "grad_norm": 1.2362931641417214, "learning_rate": 9.838968874650617e-06, "loss": 0.5167, "step": 1554 }, { "epoch": 0.5191986644407346, "grad_norm": 1.1408563487546144, "learning_rate": 9.838479467798141e-06, "loss": 0.4995, "step": 1555 }, { "epoch": 0.5195325542570952, "grad_norm": 1.1549394524847973, "learning_rate": 9.837989330578624e-06, "loss": 0.4959, "step": 1556 }, { "epoch": 0.5198664440734557, "grad_norm": 1.2634694255734729, "learning_rate": 9.83749846306605e-06, "loss": 0.5305, "step": 1557 }, { "epoch": 0.5202003338898163, "grad_norm": 1.146230934276364, "learning_rate": 9.837006865334511e-06, "loss": 0.4977, "step": 1558 }, { "epoch": 0.520534223706177, "grad_norm": 1.1296144743368222, "learning_rate": 9.836514537458219e-06, "loss": 0.5034, "step": 1559 }, { "epoch": 0.5208681135225376, "grad_norm": 1.1412907114084492, "learning_rate": 9.836021479511488e-06, "loss": 0.5007, "step": 1560 }, { "epoch": 0.5212020033388982, "grad_norm": 1.1660521059493751, "learning_rate": 9.835527691568747e-06, "loss": 0.5132, "step": 1561 }, { "epoch": 0.5215358931552587, "grad_norm": 1.194426837895975, "learning_rate": 9.835033173704531e-06, "loss": 0.5206, "step": 1562 }, { "epoch": 0.5218697829716193, "grad_norm": 1.2164337954537587, "learning_rate": 9.834537925993489e-06, "loss": 0.5157, "step": 1563 }, { "epoch": 0.52220367278798, "grad_norm": 1.2160069874504045, "learning_rate": 9.834041948510377e-06, "loss": 0.5122, "step": 1564 }, { "epoch": 0.5225375626043406, "grad_norm": 1.1335731956030073, "learning_rate": 9.833545241330064e-06, "loss": 0.5045, "step": 1565 }, { "epoch": 0.5228714524207012, "grad_norm": 1.1539224014939644, "learning_rate": 9.833047804527529e-06, "loss": 0.4794, "step": 1566 }, { "epoch": 0.5232053422370617, "grad_norm": 1.160223033591591, "learning_rate": 9.832549638177855e-06, "loss": 0.5146, "step": 1567 }, { "epoch": 0.5235392320534223, "grad_norm": 1.1122998279790592, "learning_rate": 9.832050742356245e-06, "loss": 0.503, "step": 1568 }, { "epoch": 0.523873121869783, "grad_norm": 1.1426051398615151, "learning_rate": 9.831551117138006e-06, "loss": 0.502, "step": 1569 }, { "epoch": 0.5242070116861436, "grad_norm": 1.1424054443227596, "learning_rate": 9.831050762598553e-06, "loss": 0.4886, "step": 1570 }, { "epoch": 0.5245409015025042, "grad_norm": 1.1376380733523546, "learning_rate": 9.83054967881342e-06, "loss": 0.4973, "step": 1571 }, { "epoch": 0.5248747913188647, "grad_norm": 1.1242581591660061, "learning_rate": 9.83004786585824e-06, "loss": 0.506, "step": 1572 }, { "epoch": 0.5252086811352253, "grad_norm": 1.1140816648990957, "learning_rate": 9.829545323808767e-06, "loss": 0.4811, "step": 1573 }, { "epoch": 0.525542570951586, "grad_norm": 1.091093836101034, "learning_rate": 9.829042052740854e-06, "loss": 0.4803, "step": 1574 }, { "epoch": 0.5258764607679466, "grad_norm": 1.1203456866906802, "learning_rate": 9.828538052730472e-06, "loss": 0.504, "step": 1575 }, { "epoch": 0.5262103505843072, "grad_norm": 1.1867236031529298, "learning_rate": 9.828033323853703e-06, "loss": 0.5212, "step": 1576 }, { "epoch": 0.5265442404006678, "grad_norm": 1.1585164949500208, "learning_rate": 9.82752786618673e-06, "loss": 0.494, "step": 1577 }, { "epoch": 0.5268781302170283, "grad_norm": 1.1979622866202821, "learning_rate": 9.827021679805855e-06, "loss": 0.5324, "step": 1578 }, { "epoch": 0.527212020033389, "grad_norm": 1.139649596485181, "learning_rate": 9.826514764787484e-06, "loss": 0.5098, "step": 1579 }, { "epoch": 0.5275459098497496, "grad_norm": 1.104048239513574, "learning_rate": 9.826007121208141e-06, "loss": 0.4809, "step": 1580 }, { "epoch": 0.5278797996661102, "grad_norm": 1.121448250052177, "learning_rate": 9.825498749144449e-06, "loss": 0.5115, "step": 1581 }, { "epoch": 0.5282136894824708, "grad_norm": 1.140566198851559, "learning_rate": 9.82498964867315e-06, "loss": 0.4976, "step": 1582 }, { "epoch": 0.5285475792988313, "grad_norm": 1.1694490837988103, "learning_rate": 9.82447981987109e-06, "loss": 0.5079, "step": 1583 }, { "epoch": 0.528881469115192, "grad_norm": 1.183683251097565, "learning_rate": 9.823969262815231e-06, "loss": 0.5172, "step": 1584 }, { "epoch": 0.5292153589315526, "grad_norm": 1.176452740882236, "learning_rate": 9.823457977582638e-06, "loss": 0.5165, "step": 1585 }, { "epoch": 0.5295492487479132, "grad_norm": 1.136963133376205, "learning_rate": 9.82294596425049e-06, "loss": 0.5135, "step": 1586 }, { "epoch": 0.5298831385642738, "grad_norm": 1.1204409568105878, "learning_rate": 9.822433222896078e-06, "loss": 0.505, "step": 1587 }, { "epoch": 0.5302170283806343, "grad_norm": 1.1643829702393294, "learning_rate": 9.821919753596796e-06, "loss": 0.511, "step": 1588 }, { "epoch": 0.530550918196995, "grad_norm": 1.1794967802911775, "learning_rate": 9.821405556430155e-06, "loss": 0.5125, "step": 1589 }, { "epoch": 0.5308848080133556, "grad_norm": 1.1274004979682806, "learning_rate": 9.820890631473772e-06, "loss": 0.4958, "step": 1590 }, { "epoch": 0.5312186978297162, "grad_norm": 1.0989396461627425, "learning_rate": 9.820374978805376e-06, "loss": 0.4946, "step": 1591 }, { "epoch": 0.5315525876460768, "grad_norm": 1.1282489123456256, "learning_rate": 9.819858598502804e-06, "loss": 0.5173, "step": 1592 }, { "epoch": 0.5318864774624374, "grad_norm": 1.1518447933401912, "learning_rate": 9.819341490644e-06, "loss": 0.5143, "step": 1593 }, { "epoch": 0.532220367278798, "grad_norm": 1.1070113086582074, "learning_rate": 9.818823655307026e-06, "loss": 0.4756, "step": 1594 }, { "epoch": 0.5325542570951586, "grad_norm": 1.159383895138509, "learning_rate": 9.818305092570046e-06, "loss": 0.5293, "step": 1595 }, { "epoch": 0.5328881469115192, "grad_norm": 1.1088246183332566, "learning_rate": 9.817785802511339e-06, "loss": 0.5007, "step": 1596 }, { "epoch": 0.5332220367278798, "grad_norm": 1.1062947494875541, "learning_rate": 9.81726578520929e-06, "loss": 0.5095, "step": 1597 }, { "epoch": 0.5335559265442404, "grad_norm": 1.1380779324776225, "learning_rate": 9.816745040742398e-06, "loss": 0.5002, "step": 1598 }, { "epoch": 0.533889816360601, "grad_norm": 1.12302460360104, "learning_rate": 9.816223569189267e-06, "loss": 0.4987, "step": 1599 }, { "epoch": 0.5342237061769616, "grad_norm": 1.1746884991107578, "learning_rate": 9.815701370628615e-06, "loss": 0.4927, "step": 1600 }, { "epoch": 0.5345575959933222, "grad_norm": 1.1882651597176719, "learning_rate": 9.815178445139263e-06, "loss": 0.4902, "step": 1601 }, { "epoch": 0.5348914858096828, "grad_norm": 1.1463436309648747, "learning_rate": 9.814654792800152e-06, "loss": 0.4876, "step": 1602 }, { "epoch": 0.5352253756260434, "grad_norm": 1.5194212695409708, "learning_rate": 9.814130413690324e-06, "loss": 0.5143, "step": 1603 }, { "epoch": 0.535559265442404, "grad_norm": 1.2174153637649583, "learning_rate": 9.813605307888935e-06, "loss": 0.503, "step": 1604 }, { "epoch": 0.5358931552587646, "grad_norm": 1.1837056152210095, "learning_rate": 9.813079475475248e-06, "loss": 0.5182, "step": 1605 }, { "epoch": 0.5362270450751252, "grad_norm": 1.1293441499542403, "learning_rate": 9.812552916528641e-06, "loss": 0.488, "step": 1606 }, { "epoch": 0.5365609348914858, "grad_norm": 1.152986331466244, "learning_rate": 9.812025631128595e-06, "loss": 0.4983, "step": 1607 }, { "epoch": 0.5368948247078464, "grad_norm": 1.1279211339119724, "learning_rate": 9.811497619354702e-06, "loss": 0.4819, "step": 1608 }, { "epoch": 0.5372287145242071, "grad_norm": 1.1317441790020022, "learning_rate": 9.810968881286669e-06, "loss": 0.4994, "step": 1609 }, { "epoch": 0.5375626043405676, "grad_norm": 1.185937800066566, "learning_rate": 9.810439417004307e-06, "loss": 0.5129, "step": 1610 }, { "epoch": 0.5378964941569282, "grad_norm": 1.1108674962125566, "learning_rate": 9.809909226587538e-06, "loss": 0.4979, "step": 1611 }, { "epoch": 0.5382303839732888, "grad_norm": 1.116591743186823, "learning_rate": 9.809378310116395e-06, "loss": 0.5035, "step": 1612 }, { "epoch": 0.5385642737896494, "grad_norm": 1.1698491668650177, "learning_rate": 9.80884666767102e-06, "loss": 0.5277, "step": 1613 }, { "epoch": 0.5388981636060101, "grad_norm": 1.1583820254794068, "learning_rate": 9.808314299331661e-06, "loss": 0.5052, "step": 1614 }, { "epoch": 0.5392320534223706, "grad_norm": 1.1853362070888884, "learning_rate": 9.807781205178684e-06, "loss": 0.5279, "step": 1615 }, { "epoch": 0.5395659432387312, "grad_norm": 1.0592821682574527, "learning_rate": 9.807247385292555e-06, "loss": 0.4637, "step": 1616 }, { "epoch": 0.5398998330550918, "grad_norm": 1.112972543419496, "learning_rate": 9.806712839753858e-06, "loss": 0.5119, "step": 1617 }, { "epoch": 0.5402337228714524, "grad_norm": 1.1281156231838707, "learning_rate": 9.806177568643279e-06, "loss": 0.5257, "step": 1618 }, { "epoch": 0.5405676126878131, "grad_norm": 1.0855825867511828, "learning_rate": 9.80564157204162e-06, "loss": 0.4985, "step": 1619 }, { "epoch": 0.5409015025041736, "grad_norm": 1.1896894405013476, "learning_rate": 9.805104850029785e-06, "loss": 0.527, "step": 1620 }, { "epoch": 0.5412353923205342, "grad_norm": 1.1133400456943712, "learning_rate": 9.804567402688796e-06, "loss": 0.5006, "step": 1621 }, { "epoch": 0.5415692821368948, "grad_norm": 1.176252489662721, "learning_rate": 9.80402923009978e-06, "loss": 0.4952, "step": 1622 }, { "epoch": 0.5419031719532554, "grad_norm": 1.150792444218577, "learning_rate": 9.803490332343973e-06, "loss": 0.5088, "step": 1623 }, { "epoch": 0.5422370617696161, "grad_norm": 1.1144503399251886, "learning_rate": 9.802950709502724e-06, "loss": 0.5002, "step": 1624 }, { "epoch": 0.5425709515859767, "grad_norm": 1.1289116653175952, "learning_rate": 9.802410361657483e-06, "loss": 0.4851, "step": 1625 }, { "epoch": 0.5429048414023372, "grad_norm": 1.111595604558496, "learning_rate": 9.80186928888982e-06, "loss": 0.4985, "step": 1626 }, { "epoch": 0.5432387312186978, "grad_norm": 1.148081383019076, "learning_rate": 9.801327491281411e-06, "loss": 0.5249, "step": 1627 }, { "epoch": 0.5435726210350584, "grad_norm": 1.0725998632176446, "learning_rate": 9.800784968914037e-06, "loss": 0.471, "step": 1628 }, { "epoch": 0.5439065108514191, "grad_norm": 1.1435417773032381, "learning_rate": 9.800241721869593e-06, "loss": 0.506, "step": 1629 }, { "epoch": 0.5442404006677797, "grad_norm": 1.1370706524955136, "learning_rate": 9.79969775023008e-06, "loss": 0.511, "step": 1630 }, { "epoch": 0.5445742904841402, "grad_norm": 1.1265925065947906, "learning_rate": 9.799153054077613e-06, "loss": 0.4946, "step": 1631 }, { "epoch": 0.5449081803005008, "grad_norm": 1.084051141461635, "learning_rate": 9.798607633494414e-06, "loss": 0.5026, "step": 1632 }, { "epoch": 0.5452420701168614, "grad_norm": 1.1587451113639906, "learning_rate": 9.79806148856281e-06, "loss": 0.501, "step": 1633 }, { "epoch": 0.545575959933222, "grad_norm": 1.109352580418426, "learning_rate": 9.797514619365247e-06, "loss": 0.4786, "step": 1634 }, { "epoch": 0.5459098497495827, "grad_norm": 1.1211411175405144, "learning_rate": 9.796967025984271e-06, "loss": 0.4947, "step": 1635 }, { "epoch": 0.5462437395659432, "grad_norm": 1.141371214127038, "learning_rate": 9.796418708502543e-06, "loss": 0.5114, "step": 1636 }, { "epoch": 0.5465776293823038, "grad_norm": 1.121726679990922, "learning_rate": 9.795869667002829e-06, "loss": 0.4998, "step": 1637 }, { "epoch": 0.5469115191986644, "grad_norm": 1.1194731445199755, "learning_rate": 9.795319901568008e-06, "loss": 0.4907, "step": 1638 }, { "epoch": 0.547245409015025, "grad_norm": 1.164219155015283, "learning_rate": 9.794769412281066e-06, "loss": 0.4889, "step": 1639 }, { "epoch": 0.5475792988313857, "grad_norm": 1.0896327396943544, "learning_rate": 9.794218199225103e-06, "loss": 0.5048, "step": 1640 }, { "epoch": 0.5479131886477463, "grad_norm": 1.1324218230790786, "learning_rate": 9.79366626248332e-06, "loss": 0.4909, "step": 1641 }, { "epoch": 0.5482470784641068, "grad_norm": 1.1842052302659372, "learning_rate": 9.793113602139032e-06, "loss": 0.5005, "step": 1642 }, { "epoch": 0.5485809682804674, "grad_norm": 1.1534381554767095, "learning_rate": 9.792560218275666e-06, "loss": 0.5046, "step": 1643 }, { "epoch": 0.548914858096828, "grad_norm": 1.0831297313812847, "learning_rate": 9.792006110976753e-06, "loss": 0.4994, "step": 1644 }, { "epoch": 0.5492487479131887, "grad_norm": 1.0739731759124747, "learning_rate": 9.791451280325935e-06, "loss": 0.4898, "step": 1645 }, { "epoch": 0.5495826377295493, "grad_norm": 1.1139267542417253, "learning_rate": 9.790895726406964e-06, "loss": 0.5038, "step": 1646 }, { "epoch": 0.5499165275459098, "grad_norm": 1.1322263340429137, "learning_rate": 9.790339449303702e-06, "loss": 0.5344, "step": 1647 }, { "epoch": 0.5502504173622704, "grad_norm": 1.1570459506575992, "learning_rate": 9.789782449100118e-06, "loss": 0.5091, "step": 1648 }, { "epoch": 0.550584307178631, "grad_norm": 1.1463909694142373, "learning_rate": 9.78922472588029e-06, "loss": 0.5015, "step": 1649 }, { "epoch": 0.5509181969949917, "grad_norm": 1.1676515552516509, "learning_rate": 9.788666279728408e-06, "loss": 0.5099, "step": 1650 }, { "epoch": 0.5512520868113523, "grad_norm": 1.0803513506644162, "learning_rate": 9.788107110728766e-06, "loss": 0.4773, "step": 1651 }, { "epoch": 0.5515859766277128, "grad_norm": 1.078002719382883, "learning_rate": 9.787547218965775e-06, "loss": 0.4929, "step": 1652 }, { "epoch": 0.5519198664440734, "grad_norm": 1.0940836154925637, "learning_rate": 9.786986604523946e-06, "loss": 0.4563, "step": 1653 }, { "epoch": 0.552253756260434, "grad_norm": 1.1199670046903292, "learning_rate": 9.786425267487907e-06, "loss": 0.4893, "step": 1654 }, { "epoch": 0.5525876460767947, "grad_norm": 1.1127051486500994, "learning_rate": 9.78586320794239e-06, "loss": 0.4964, "step": 1655 }, { "epoch": 0.5529215358931553, "grad_norm": 1.117837466318853, "learning_rate": 9.785300425972238e-06, "loss": 0.507, "step": 1656 }, { "epoch": 0.5532554257095159, "grad_norm": 1.1187121649490213, "learning_rate": 9.784736921662403e-06, "loss": 0.4955, "step": 1657 }, { "epoch": 0.5535893155258764, "grad_norm": 1.0339245368781995, "learning_rate": 9.784172695097946e-06, "loss": 0.4782, "step": 1658 }, { "epoch": 0.553923205342237, "grad_norm": 1.157884349292039, "learning_rate": 9.783607746364037e-06, "loss": 0.5263, "step": 1659 }, { "epoch": 0.5542570951585977, "grad_norm": 1.1313692966073734, "learning_rate": 9.783042075545951e-06, "loss": 0.5221, "step": 1660 }, { "epoch": 0.5545909849749583, "grad_norm": 1.0650882481988937, "learning_rate": 9.782475682729084e-06, "loss": 0.4822, "step": 1661 }, { "epoch": 0.5549248747913189, "grad_norm": 1.0515669551214382, "learning_rate": 9.781908567998924e-06, "loss": 0.4807, "step": 1662 }, { "epoch": 0.5552587646076794, "grad_norm": 1.0899821646269254, "learning_rate": 9.781340731441082e-06, "loss": 0.4793, "step": 1663 }, { "epoch": 0.55559265442404, "grad_norm": 1.0900955080588839, "learning_rate": 9.780772173141273e-06, "loss": 0.4867, "step": 1664 }, { "epoch": 0.5559265442404007, "grad_norm": 1.097437864099752, "learning_rate": 9.780202893185318e-06, "loss": 0.4828, "step": 1665 }, { "epoch": 0.5562604340567613, "grad_norm": 1.1201597580523117, "learning_rate": 9.779632891659152e-06, "loss": 0.4855, "step": 1666 }, { "epoch": 0.5565943238731219, "grad_norm": 1.081922583396706, "learning_rate": 9.779062168648814e-06, "loss": 0.489, "step": 1667 }, { "epoch": 0.5569282136894824, "grad_norm": 1.0678724526440255, "learning_rate": 9.778490724240457e-06, "loss": 0.4888, "step": 1668 }, { "epoch": 0.557262103505843, "grad_norm": 1.1017004823239551, "learning_rate": 9.777918558520338e-06, "loss": 0.4885, "step": 1669 }, { "epoch": 0.5575959933222037, "grad_norm": 1.1358553484964637, "learning_rate": 9.777345671574828e-06, "loss": 0.4991, "step": 1670 }, { "epoch": 0.5579298831385643, "grad_norm": 1.0691406474653344, "learning_rate": 9.776772063490403e-06, "loss": 0.4841, "step": 1671 }, { "epoch": 0.5582637729549249, "grad_norm": 1.1340714298882708, "learning_rate": 9.776197734353647e-06, "loss": 0.5017, "step": 1672 }, { "epoch": 0.5585976627712855, "grad_norm": 1.1253198001274445, "learning_rate": 9.775622684251255e-06, "loss": 0.5169, "step": 1673 }, { "epoch": 0.558931552587646, "grad_norm": 1.1034756947207174, "learning_rate": 9.775046913270036e-06, "loss": 0.4903, "step": 1674 }, { "epoch": 0.5592654424040067, "grad_norm": 1.0609939378376128, "learning_rate": 9.774470421496895e-06, "loss": 0.4937, "step": 1675 }, { "epoch": 0.5595993322203673, "grad_norm": 1.0477832600153054, "learning_rate": 9.77389320901886e-06, "loss": 0.485, "step": 1676 }, { "epoch": 0.5599332220367279, "grad_norm": 1.1771645148228573, "learning_rate": 9.773315275923057e-06, "loss": 0.5513, "step": 1677 }, { "epoch": 0.5602671118530885, "grad_norm": 1.2459902747923346, "learning_rate": 9.772736622296723e-06, "loss": 0.5014, "step": 1678 }, { "epoch": 0.560601001669449, "grad_norm": 1.052222468334516, "learning_rate": 9.772157248227212e-06, "loss": 0.4763, "step": 1679 }, { "epoch": 0.5609348914858097, "grad_norm": 1.0565988772218822, "learning_rate": 9.771577153801974e-06, "loss": 0.4993, "step": 1680 }, { "epoch": 0.5612687813021703, "grad_norm": 1.0802720549646267, "learning_rate": 9.770996339108577e-06, "loss": 0.4816, "step": 1681 }, { "epoch": 0.5616026711185309, "grad_norm": 1.0789715860080358, "learning_rate": 9.770414804234695e-06, "loss": 0.4845, "step": 1682 }, { "epoch": 0.5619365609348915, "grad_norm": 1.048011898617931, "learning_rate": 9.769832549268112e-06, "loss": 0.4906, "step": 1683 }, { "epoch": 0.562270450751252, "grad_norm": 1.0696485629771249, "learning_rate": 9.769249574296716e-06, "loss": 0.5043, "step": 1684 }, { "epoch": 0.5626043405676127, "grad_norm": 1.064055153770045, "learning_rate": 9.768665879408508e-06, "loss": 0.5048, "step": 1685 }, { "epoch": 0.5629382303839733, "grad_norm": 1.1052357472603054, "learning_rate": 9.768081464691596e-06, "loss": 0.4979, "step": 1686 }, { "epoch": 0.5632721202003339, "grad_norm": 1.0891007692432766, "learning_rate": 9.767496330234197e-06, "loss": 0.4788, "step": 1687 }, { "epoch": 0.5636060100166945, "grad_norm": 1.0679544789967181, "learning_rate": 9.76691047612464e-06, "loss": 0.4785, "step": 1688 }, { "epoch": 0.5639398998330551, "grad_norm": 1.2010158028511655, "learning_rate": 9.766323902451356e-06, "loss": 0.517, "step": 1689 }, { "epoch": 0.5642737896494157, "grad_norm": 1.0447186995947555, "learning_rate": 9.76573660930289e-06, "loss": 0.4737, "step": 1690 }, { "epoch": 0.5646076794657763, "grad_norm": 1.1218584970127723, "learning_rate": 9.765148596767893e-06, "loss": 0.5111, "step": 1691 }, { "epoch": 0.5649415692821369, "grad_norm": 1.1094624228138494, "learning_rate": 9.764559864935127e-06, "loss": 0.5213, "step": 1692 }, { "epoch": 0.5652754590984975, "grad_norm": 1.0084884508170766, "learning_rate": 9.763970413893457e-06, "loss": 0.4836, "step": 1693 }, { "epoch": 0.5656093489148581, "grad_norm": 1.0364752943931923, "learning_rate": 9.763380243731864e-06, "loss": 0.4828, "step": 1694 }, { "epoch": 0.5659432387312187, "grad_norm": 1.1509216804751718, "learning_rate": 9.762789354539432e-06, "loss": 0.5015, "step": 1695 }, { "epoch": 0.5662771285475793, "grad_norm": 1.1302319383520842, "learning_rate": 9.762197746405358e-06, "loss": 0.5075, "step": 1696 }, { "epoch": 0.5666110183639399, "grad_norm": 1.0641453020112348, "learning_rate": 9.761605419418942e-06, "loss": 0.4773, "step": 1697 }, { "epoch": 0.5669449081803005, "grad_norm": 1.0768641607468106, "learning_rate": 9.761012373669599e-06, "loss": 0.4933, "step": 1698 }, { "epoch": 0.5672787979966611, "grad_norm": 1.1116194144649127, "learning_rate": 9.760418609246845e-06, "loss": 0.4774, "step": 1699 }, { "epoch": 0.5676126878130217, "grad_norm": 1.1287033627495144, "learning_rate": 9.759824126240311e-06, "loss": 0.4926, "step": 1700 }, { "epoch": 0.5679465776293823, "grad_norm": 1.0650806568190854, "learning_rate": 9.759228924739734e-06, "loss": 0.4916, "step": 1701 }, { "epoch": 0.5682804674457429, "grad_norm": 1.1177213148828438, "learning_rate": 9.758633004834961e-06, "loss": 0.5133, "step": 1702 }, { "epoch": 0.5686143572621035, "grad_norm": 1.1117170332392186, "learning_rate": 9.758036366615942e-06, "loss": 0.4803, "step": 1703 }, { "epoch": 0.5689482470784641, "grad_norm": 1.060651654782984, "learning_rate": 9.757439010172743e-06, "loss": 0.4911, "step": 1704 }, { "epoch": 0.5692821368948247, "grad_norm": 1.0291851975734803, "learning_rate": 9.756840935595531e-06, "loss": 0.4708, "step": 1705 }, { "epoch": 0.5696160267111853, "grad_norm": 1.1019175320118504, "learning_rate": 9.75624214297459e-06, "loss": 0.5021, "step": 1706 }, { "epoch": 0.5699499165275459, "grad_norm": 1.09970539733342, "learning_rate": 9.755642632400304e-06, "loss": 0.5029, "step": 1707 }, { "epoch": 0.5702838063439065, "grad_norm": 1.061108753696846, "learning_rate": 9.75504240396317e-06, "loss": 0.4756, "step": 1708 }, { "epoch": 0.5706176961602671, "grad_norm": 1.1192048689987089, "learning_rate": 9.75444145775379e-06, "loss": 0.5009, "step": 1709 }, { "epoch": 0.5709515859766278, "grad_norm": 1.0510153125707007, "learning_rate": 9.753839793862883e-06, "loss": 0.49, "step": 1710 }, { "epoch": 0.5712854757929883, "grad_norm": 1.0903312894287407, "learning_rate": 9.753237412381265e-06, "loss": 0.4849, "step": 1711 }, { "epoch": 0.5716193656093489, "grad_norm": 1.109720501189613, "learning_rate": 9.752634313399865e-06, "loss": 0.4868, "step": 1712 }, { "epoch": 0.5719532554257095, "grad_norm": 1.068124262149034, "learning_rate": 9.752030497009722e-06, "loss": 0.4866, "step": 1713 }, { "epoch": 0.5722871452420701, "grad_norm": 1.1001552107314798, "learning_rate": 9.751425963301983e-06, "loss": 0.5034, "step": 1714 }, { "epoch": 0.5726210350584308, "grad_norm": 1.1371064583885087, "learning_rate": 9.7508207123679e-06, "loss": 0.4945, "step": 1715 }, { "epoch": 0.5729549248747913, "grad_norm": 1.0641095703955454, "learning_rate": 9.750214744298835e-06, "loss": 0.4868, "step": 1716 }, { "epoch": 0.5732888146911519, "grad_norm": 1.0865646677320513, "learning_rate": 9.749608059186264e-06, "loss": 0.4992, "step": 1717 }, { "epoch": 0.5736227045075125, "grad_norm": 1.0915098325700099, "learning_rate": 9.74900065712176e-06, "loss": 0.498, "step": 1718 }, { "epoch": 0.5739565943238731, "grad_norm": 1.0779517523968136, "learning_rate": 9.748392538197011e-06, "loss": 0.4714, "step": 1719 }, { "epoch": 0.5742904841402338, "grad_norm": 1.139650367948577, "learning_rate": 9.747783702503814e-06, "loss": 0.5089, "step": 1720 }, { "epoch": 0.5746243739565943, "grad_norm": 1.0968990131673415, "learning_rate": 9.747174150134074e-06, "loss": 0.4945, "step": 1721 }, { "epoch": 0.5749582637729549, "grad_norm": 1.0215603647077292, "learning_rate": 9.746563881179798e-06, "loss": 0.4867, "step": 1722 }, { "epoch": 0.5752921535893155, "grad_norm": 1.0520444349334948, "learning_rate": 9.74595289573311e-06, "loss": 0.4885, "step": 1723 }, { "epoch": 0.5756260434056761, "grad_norm": 1.0365464713326953, "learning_rate": 9.745341193886238e-06, "loss": 0.4648, "step": 1724 }, { "epoch": 0.5759599332220368, "grad_norm": 1.1223731124525298, "learning_rate": 9.744728775731516e-06, "loss": 0.5035, "step": 1725 }, { "epoch": 0.5762938230383974, "grad_norm": 1.0702989339158784, "learning_rate": 9.744115641361389e-06, "loss": 0.4944, "step": 1726 }, { "epoch": 0.5766277128547579, "grad_norm": 1.1380669842246487, "learning_rate": 9.74350179086841e-06, "loss": 0.4904, "step": 1727 }, { "epoch": 0.5769616026711185, "grad_norm": 1.057573109998935, "learning_rate": 9.74288722434524e-06, "loss": 0.4867, "step": 1728 }, { "epoch": 0.5772954924874791, "grad_norm": 1.0121408314117466, "learning_rate": 9.74227194188465e-06, "loss": 0.4529, "step": 1729 }, { "epoch": 0.5776293823038398, "grad_norm": 1.0885244175032365, "learning_rate": 9.74165594357951e-06, "loss": 0.5084, "step": 1730 }, { "epoch": 0.5779632721202004, "grad_norm": 1.1085210957077696, "learning_rate": 9.741039229522809e-06, "loss": 0.5019, "step": 1731 }, { "epoch": 0.5782971619365609, "grad_norm": 1.0692537969740339, "learning_rate": 9.74042179980764e-06, "loss": 0.4647, "step": 1732 }, { "epoch": 0.5786310517529215, "grad_norm": 1.1586043232305514, "learning_rate": 9.739803654527203e-06, "loss": 0.5217, "step": 1733 }, { "epoch": 0.5789649415692821, "grad_norm": 1.1471342852886253, "learning_rate": 9.739184793774807e-06, "loss": 0.5003, "step": 1734 }, { "epoch": 0.5792988313856428, "grad_norm": 1.104976182371055, "learning_rate": 9.738565217643868e-06, "loss": 0.5211, "step": 1735 }, { "epoch": 0.5796327212020034, "grad_norm": 1.1196440026021242, "learning_rate": 9.737944926227913e-06, "loss": 0.506, "step": 1736 }, { "epoch": 0.5799666110183639, "grad_norm": 1.0706690700124497, "learning_rate": 9.737323919620573e-06, "loss": 0.4791, "step": 1737 }, { "epoch": 0.5803005008347245, "grad_norm": 1.1094846614235343, "learning_rate": 9.736702197915589e-06, "loss": 0.4957, "step": 1738 }, { "epoch": 0.5806343906510851, "grad_norm": 1.0879686719582755, "learning_rate": 9.736079761206811e-06, "loss": 0.5035, "step": 1739 }, { "epoch": 0.5809682804674458, "grad_norm": 1.0691807185934192, "learning_rate": 9.735456609588194e-06, "loss": 0.4889, "step": 1740 }, { "epoch": 0.5813021702838064, "grad_norm": 1.0881755885584925, "learning_rate": 9.734832743153802e-06, "loss": 0.4907, "step": 1741 }, { "epoch": 0.581636060100167, "grad_norm": 1.0775496100317237, "learning_rate": 9.73420816199781e-06, "loss": 0.4694, "step": 1742 }, { "epoch": 0.5819699499165275, "grad_norm": 1.0668165644937464, "learning_rate": 9.733582866214496e-06, "loss": 0.4719, "step": 1743 }, { "epoch": 0.5823038397328881, "grad_norm": 1.101375478992394, "learning_rate": 9.732956855898251e-06, "loss": 0.4743, "step": 1744 }, { "epoch": 0.5826377295492488, "grad_norm": 1.0745462228841458, "learning_rate": 9.732330131143569e-06, "loss": 0.4861, "step": 1745 }, { "epoch": 0.5829716193656094, "grad_norm": 1.0960477214051472, "learning_rate": 9.731702692045053e-06, "loss": 0.4703, "step": 1746 }, { "epoch": 0.58330550918197, "grad_norm": 1.0727108031035937, "learning_rate": 9.731074538697416e-06, "loss": 0.4894, "step": 1747 }, { "epoch": 0.5836393989983305, "grad_norm": 1.0430660599032624, "learning_rate": 9.730445671195478e-06, "loss": 0.48, "step": 1748 }, { "epoch": 0.5839732888146911, "grad_norm": 1.1480634943704262, "learning_rate": 9.729816089634165e-06, "loss": 0.4662, "step": 1749 }, { "epoch": 0.5843071786310517, "grad_norm": 1.0919348501076713, "learning_rate": 9.729185794108513e-06, "loss": 0.4948, "step": 1750 }, { "epoch": 0.5846410684474124, "grad_norm": 1.03698662114105, "learning_rate": 9.728554784713667e-06, "loss": 0.4623, "step": 1751 }, { "epoch": 0.584974958263773, "grad_norm": 1.123863727978336, "learning_rate": 9.727923061544872e-06, "loss": 0.4849, "step": 1752 }, { "epoch": 0.5853088480801335, "grad_norm": 1.061432680572174, "learning_rate": 9.727290624697493e-06, "loss": 0.4716, "step": 1753 }, { "epoch": 0.5856427378964941, "grad_norm": 1.0687556655835824, "learning_rate": 9.726657474266992e-06, "loss": 0.4784, "step": 1754 }, { "epoch": 0.5859766277128547, "grad_norm": 1.0739857627472105, "learning_rate": 9.726023610348944e-06, "loss": 0.4724, "step": 1755 }, { "epoch": 0.5863105175292154, "grad_norm": 1.0980240499783582, "learning_rate": 9.725389033039032e-06, "loss": 0.4839, "step": 1756 }, { "epoch": 0.586644407345576, "grad_norm": 1.0415341549709245, "learning_rate": 9.724753742433042e-06, "loss": 0.4757, "step": 1757 }, { "epoch": 0.5869782971619366, "grad_norm": 1.089414774690047, "learning_rate": 9.724117738626874e-06, "loss": 0.4891, "step": 1758 }, { "epoch": 0.5873121869782971, "grad_norm": 1.1033414954702137, "learning_rate": 9.723481021716531e-06, "loss": 0.4874, "step": 1759 }, { "epoch": 0.5876460767946577, "grad_norm": 1.0604829889806608, "learning_rate": 9.722843591798126e-06, "loss": 0.4777, "step": 1760 }, { "epoch": 0.5879799666110184, "grad_norm": 1.0664161142130135, "learning_rate": 9.722205448967878e-06, "loss": 0.4828, "step": 1761 }, { "epoch": 0.588313856427379, "grad_norm": 1.0584910222032573, "learning_rate": 9.721566593322114e-06, "loss": 0.4859, "step": 1762 }, { "epoch": 0.5886477462437396, "grad_norm": 1.0730966576583751, "learning_rate": 9.720927024957271e-06, "loss": 0.4812, "step": 1763 }, { "epoch": 0.5889816360601001, "grad_norm": 1.0295281451423264, "learning_rate": 9.72028674396989e-06, "loss": 0.4588, "step": 1764 }, { "epoch": 0.5893155258764607, "grad_norm": 1.0732251731288356, "learning_rate": 9.719645750456623e-06, "loss": 0.4807, "step": 1765 }, { "epoch": 0.5896494156928214, "grad_norm": 1.1401714439702053, "learning_rate": 9.719004044514226e-06, "loss": 0.525, "step": 1766 }, { "epoch": 0.589983305509182, "grad_norm": 1.0742727450372036, "learning_rate": 9.718361626239566e-06, "loss": 0.4783, "step": 1767 }, { "epoch": 0.5903171953255426, "grad_norm": 1.1062913131443122, "learning_rate": 9.717718495729615e-06, "loss": 0.5235, "step": 1768 }, { "epoch": 0.5906510851419031, "grad_norm": 1.063305254920572, "learning_rate": 9.717074653081451e-06, "loss": 0.4743, "step": 1769 }, { "epoch": 0.5909849749582637, "grad_norm": 1.0846749773219018, "learning_rate": 9.716430098392266e-06, "loss": 0.4893, "step": 1770 }, { "epoch": 0.5913188647746244, "grad_norm": 1.0991687967926278, "learning_rate": 9.715784831759353e-06, "loss": 0.4829, "step": 1771 }, { "epoch": 0.591652754590985, "grad_norm": 1.1563894220519413, "learning_rate": 9.715138853280115e-06, "loss": 0.4825, "step": 1772 }, { "epoch": 0.5919866444073456, "grad_norm": 1.0893386002086132, "learning_rate": 9.714492163052063e-06, "loss": 0.5046, "step": 1773 }, { "epoch": 0.5923205342237062, "grad_norm": 1.0506773776577514, "learning_rate": 9.713844761172813e-06, "loss": 0.4974, "step": 1774 }, { "epoch": 0.5926544240400667, "grad_norm": 1.0558580255208232, "learning_rate": 9.713196647740092e-06, "loss": 0.4773, "step": 1775 }, { "epoch": 0.5929883138564274, "grad_norm": 1.0725393831844854, "learning_rate": 9.712547822851733e-06, "loss": 0.4892, "step": 1776 }, { "epoch": 0.593322203672788, "grad_norm": 1.0411128089093165, "learning_rate": 9.711898286605672e-06, "loss": 0.4727, "step": 1777 }, { "epoch": 0.5936560934891486, "grad_norm": 1.09279097236024, "learning_rate": 9.711248039099961e-06, "loss": 0.4894, "step": 1778 }, { "epoch": 0.5939899833055092, "grad_norm": 1.0936457310895966, "learning_rate": 9.710597080432753e-06, "loss": 0.4807, "step": 1779 }, { "epoch": 0.5943238731218697, "grad_norm": 1.110071604243898, "learning_rate": 9.709945410702308e-06, "loss": 0.4824, "step": 1780 }, { "epoch": 0.5946577629382304, "grad_norm": 1.0988607253988296, "learning_rate": 9.709293030006999e-06, "loss": 0.474, "step": 1781 }, { "epoch": 0.594991652754591, "grad_norm": 1.0633585069474352, "learning_rate": 9.708639938445299e-06, "loss": 0.4554, "step": 1782 }, { "epoch": 0.5953255425709516, "grad_norm": 1.1016273411329565, "learning_rate": 9.707986136115795e-06, "loss": 0.4991, "step": 1783 }, { "epoch": 0.5956594323873122, "grad_norm": 1.1299606616320912, "learning_rate": 9.707331623117176e-06, "loss": 0.4976, "step": 1784 }, { "epoch": 0.5959933222036727, "grad_norm": 1.0667518480061582, "learning_rate": 9.706676399548242e-06, "loss": 0.4757, "step": 1785 }, { "epoch": 0.5963272120200334, "grad_norm": 1.1376845601823284, "learning_rate": 9.7060204655079e-06, "loss": 0.4976, "step": 1786 }, { "epoch": 0.596661101836394, "grad_norm": 1.053842338957427, "learning_rate": 9.70536382109516e-06, "loss": 0.4796, "step": 1787 }, { "epoch": 0.5969949916527546, "grad_norm": 1.0795485134999705, "learning_rate": 9.704706466409143e-06, "loss": 0.4768, "step": 1788 }, { "epoch": 0.5973288814691152, "grad_norm": 1.088631337183465, "learning_rate": 9.704048401549078e-06, "loss": 0.4721, "step": 1789 }, { "epoch": 0.5976627712854758, "grad_norm": 1.035270955830066, "learning_rate": 9.7033896266143e-06, "loss": 0.4611, "step": 1790 }, { "epoch": 0.5979966611018364, "grad_norm": 1.0316882723668852, "learning_rate": 9.702730141704246e-06, "loss": 0.443, "step": 1791 }, { "epoch": 0.598330550918197, "grad_norm": 1.049684423941244, "learning_rate": 9.702069946918472e-06, "loss": 0.48, "step": 1792 }, { "epoch": 0.5986644407345576, "grad_norm": 1.08437747668516, "learning_rate": 9.701409042356631e-06, "loss": 0.4918, "step": 1793 }, { "epoch": 0.5989983305509182, "grad_norm": 1.0683237943827655, "learning_rate": 9.700747428118485e-06, "loss": 0.4982, "step": 1794 }, { "epoch": 0.5993322203672788, "grad_norm": 1.0963650054943255, "learning_rate": 9.700085104303908e-06, "loss": 0.4952, "step": 1795 }, { "epoch": 0.5996661101836394, "grad_norm": 1.0447800486282024, "learning_rate": 9.699422071012873e-06, "loss": 0.4791, "step": 1796 }, { "epoch": 0.6, "grad_norm": 1.040858038172078, "learning_rate": 9.698758328345467e-06, "loss": 0.4763, "step": 1797 }, { "epoch": 0.6003338898163606, "grad_norm": 1.1101818839768773, "learning_rate": 9.698093876401884e-06, "loss": 0.4944, "step": 1798 }, { "epoch": 0.6006677796327212, "grad_norm": 1.040607271883769, "learning_rate": 9.69742871528242e-06, "loss": 0.5017, "step": 1799 }, { "epoch": 0.6010016694490818, "grad_norm": 1.0542409819084273, "learning_rate": 9.696762845087481e-06, "loss": 0.489, "step": 1800 }, { "epoch": 0.6013355592654424, "grad_norm": 1.036053103248477, "learning_rate": 9.69609626591758e-06, "loss": 0.4705, "step": 1801 }, { "epoch": 0.601669449081803, "grad_norm": 1.0242910157574683, "learning_rate": 9.695428977873337e-06, "loss": 0.4835, "step": 1802 }, { "epoch": 0.6020033388981636, "grad_norm": 1.025731285876406, "learning_rate": 9.694760981055482e-06, "loss": 0.4865, "step": 1803 }, { "epoch": 0.6023372287145242, "grad_norm": 1.0572700929673646, "learning_rate": 9.694092275564845e-06, "loss": 0.4844, "step": 1804 }, { "epoch": 0.6026711185308848, "grad_norm": 1.0849607116310656, "learning_rate": 9.693422861502369e-06, "loss": 0.5022, "step": 1805 }, { "epoch": 0.6030050083472455, "grad_norm": 1.0110022986025722, "learning_rate": 9.692752738969097e-06, "loss": 0.4657, "step": 1806 }, { "epoch": 0.603338898163606, "grad_norm": 1.060828667292055, "learning_rate": 9.692081908066192e-06, "loss": 0.4757, "step": 1807 }, { "epoch": 0.6036727879799666, "grad_norm": 1.0608156374922408, "learning_rate": 9.69141036889491e-06, "loss": 0.4888, "step": 1808 }, { "epoch": 0.6040066777963272, "grad_norm": 1.09346923777322, "learning_rate": 9.690738121556622e-06, "loss": 0.497, "step": 1809 }, { "epoch": 0.6043405676126878, "grad_norm": 1.0861589720576013, "learning_rate": 9.690065166152802e-06, "loss": 0.4804, "step": 1810 }, { "epoch": 0.6046744574290485, "grad_norm": 1.0951704888816833, "learning_rate": 9.689391502785033e-06, "loss": 0.4874, "step": 1811 }, { "epoch": 0.605008347245409, "grad_norm": 1.052705689010054, "learning_rate": 9.688717131555004e-06, "loss": 0.4994, "step": 1812 }, { "epoch": 0.6053422370617696, "grad_norm": 0.9885603527035771, "learning_rate": 9.688042052564513e-06, "loss": 0.4717, "step": 1813 }, { "epoch": 0.6056761268781302, "grad_norm": 1.0638263684445708, "learning_rate": 9.68736626591546e-06, "loss": 0.484, "step": 1814 }, { "epoch": 0.6060100166944908, "grad_norm": 1.0711493174399649, "learning_rate": 9.686689771709856e-06, "loss": 0.491, "step": 1815 }, { "epoch": 0.6063439065108515, "grad_norm": 1.066488930242745, "learning_rate": 9.68601257004982e-06, "loss": 0.4856, "step": 1816 }, { "epoch": 0.606677796327212, "grad_norm": 1.0860259689436143, "learning_rate": 9.685334661037572e-06, "loss": 0.4896, "step": 1817 }, { "epoch": 0.6070116861435726, "grad_norm": 1.0726765051047789, "learning_rate": 9.684656044775444e-06, "loss": 0.4694, "step": 1818 }, { "epoch": 0.6073455759599332, "grad_norm": 1.0290496413912542, "learning_rate": 9.683976721365872e-06, "loss": 0.468, "step": 1819 }, { "epoch": 0.6076794657762938, "grad_norm": 1.0767044005232356, "learning_rate": 9.6832966909114e-06, "loss": 0.4739, "step": 1820 }, { "epoch": 0.6080133555926545, "grad_norm": 0.984378738425431, "learning_rate": 9.682615953514678e-06, "loss": 0.4147, "step": 1821 }, { "epoch": 0.6083472454090151, "grad_norm": 1.0601769011311022, "learning_rate": 9.681934509278464e-06, "loss": 0.4764, "step": 1822 }, { "epoch": 0.6086811352253756, "grad_norm": 1.0258164246977366, "learning_rate": 9.681252358305621e-06, "loss": 0.478, "step": 1823 }, { "epoch": 0.6090150250417362, "grad_norm": 1.1806477730360416, "learning_rate": 9.680569500699122e-06, "loss": 0.5314, "step": 1824 }, { "epoch": 0.6093489148580968, "grad_norm": 1.112482408138498, "learning_rate": 9.67988593656204e-06, "loss": 0.4915, "step": 1825 }, { "epoch": 0.6096828046744575, "grad_norm": 1.0718094009727077, "learning_rate": 9.679201665997563e-06, "loss": 0.4808, "step": 1826 }, { "epoch": 0.6100166944908181, "grad_norm": 1.0878164518841442, "learning_rate": 9.678516689108975e-06, "loss": 0.4809, "step": 1827 }, { "epoch": 0.6103505843071786, "grad_norm": 1.0170610405902207, "learning_rate": 9.67783100599968e-06, "loss": 0.4623, "step": 1828 }, { "epoch": 0.6106844741235392, "grad_norm": 1.0790854194137758, "learning_rate": 9.67714461677318e-06, "loss": 0.506, "step": 1829 }, { "epoch": 0.6110183639398998, "grad_norm": 1.0692628117593155, "learning_rate": 9.676457521533082e-06, "loss": 0.4964, "step": 1830 }, { "epoch": 0.6113522537562605, "grad_norm": 1.120677028009034, "learning_rate": 9.675769720383105e-06, "loss": 0.4839, "step": 1831 }, { "epoch": 0.6116861435726211, "grad_norm": 1.1205336882398589, "learning_rate": 9.675081213427076e-06, "loss": 0.4657, "step": 1832 }, { "epoch": 0.6120200333889816, "grad_norm": 1.032991181525537, "learning_rate": 9.674392000768918e-06, "loss": 0.4837, "step": 1833 }, { "epoch": 0.6123539232053422, "grad_norm": 1.0736466185441695, "learning_rate": 9.673702082512672e-06, "loss": 0.4859, "step": 1834 }, { "epoch": 0.6126878130217028, "grad_norm": 1.0622579770060723, "learning_rate": 9.673011458762479e-06, "loss": 0.4883, "step": 1835 }, { "epoch": 0.6130217028380635, "grad_norm": 1.0367178313326477, "learning_rate": 9.67232012962259e-06, "loss": 0.4706, "step": 1836 }, { "epoch": 0.6133555926544241, "grad_norm": 1.0148465212627689, "learning_rate": 9.67162809519736e-06, "loss": 0.4758, "step": 1837 }, { "epoch": 0.6136894824707846, "grad_norm": 1.0398847655051593, "learning_rate": 9.670935355591251e-06, "loss": 0.4741, "step": 1838 }, { "epoch": 0.6140233722871452, "grad_norm": 1.0948421098825447, "learning_rate": 9.670241910908833e-06, "loss": 0.4963, "step": 1839 }, { "epoch": 0.6143572621035058, "grad_norm": 1.019994340447896, "learning_rate": 9.66954776125478e-06, "loss": 0.4754, "step": 1840 }, { "epoch": 0.6146911519198665, "grad_norm": 1.0479415084201273, "learning_rate": 9.668852906733874e-06, "loss": 0.4896, "step": 1841 }, { "epoch": 0.6150250417362271, "grad_norm": 1.0542616085099896, "learning_rate": 9.668157347451003e-06, "loss": 0.4776, "step": 1842 }, { "epoch": 0.6153589315525877, "grad_norm": 1.0101833495686183, "learning_rate": 9.667461083511165e-06, "loss": 0.4648, "step": 1843 }, { "epoch": 0.6156928213689482, "grad_norm": 1.0281343984139264, "learning_rate": 9.666764115019455e-06, "loss": 0.4664, "step": 1844 }, { "epoch": 0.6160267111853088, "grad_norm": 1.0526395152136576, "learning_rate": 9.666066442081084e-06, "loss": 0.485, "step": 1845 }, { "epoch": 0.6163606010016695, "grad_norm": 1.1082466180501072, "learning_rate": 9.665368064801364e-06, "loss": 0.4936, "step": 1846 }, { "epoch": 0.6166944908180301, "grad_norm": 1.095126346327165, "learning_rate": 9.664668983285715e-06, "loss": 0.4754, "step": 1847 }, { "epoch": 0.6170283806343907, "grad_norm": 1.0611709825737237, "learning_rate": 9.663969197639662e-06, "loss": 0.4665, "step": 1848 }, { "epoch": 0.6173622704507512, "grad_norm": 1.0833059497410256, "learning_rate": 9.663268707968842e-06, "loss": 0.4936, "step": 1849 }, { "epoch": 0.6176961602671118, "grad_norm": 1.0465547494686473, "learning_rate": 9.662567514378988e-06, "loss": 0.4806, "step": 1850 }, { "epoch": 0.6180300500834724, "grad_norm": 1.1112483478257764, "learning_rate": 9.66186561697595e-06, "loss": 0.4984, "step": 1851 }, { "epoch": 0.6183639398998331, "grad_norm": 1.0281414857755775, "learning_rate": 9.661163015865675e-06, "loss": 0.4684, "step": 1852 }, { "epoch": 0.6186978297161937, "grad_norm": 1.048639755467341, "learning_rate": 9.660459711154222e-06, "loss": 0.4723, "step": 1853 }, { "epoch": 0.6190317195325542, "grad_norm": 1.0455917053018755, "learning_rate": 9.659755702947754e-06, "loss": 0.4824, "step": 1854 }, { "epoch": 0.6193656093489148, "grad_norm": 1.0361479926797734, "learning_rate": 9.659050991352544e-06, "loss": 0.4768, "step": 1855 }, { "epoch": 0.6196994991652754, "grad_norm": 1.0683081005093884, "learning_rate": 9.658345576474964e-06, "loss": 0.4892, "step": 1856 }, { "epoch": 0.6200333889816361, "grad_norm": 1.0365748094457832, "learning_rate": 9.657639458421499e-06, "loss": 0.4874, "step": 1857 }, { "epoch": 0.6203672787979967, "grad_norm": 1.0267586736441299, "learning_rate": 9.656932637298734e-06, "loss": 0.4697, "step": 1858 }, { "epoch": 0.6207011686143573, "grad_norm": 1.0184965396560146, "learning_rate": 9.656225113213368e-06, "loss": 0.4751, "step": 1859 }, { "epoch": 0.6210350584307178, "grad_norm": 0.9772072930657645, "learning_rate": 9.655516886272199e-06, "loss": 0.4621, "step": 1860 }, { "epoch": 0.6213689482470784, "grad_norm": 1.0128648957819923, "learning_rate": 9.654807956582131e-06, "loss": 0.4679, "step": 1861 }, { "epoch": 0.6217028380634391, "grad_norm": 1.0140427049234753, "learning_rate": 9.654098324250183e-06, "loss": 0.4812, "step": 1862 }, { "epoch": 0.6220367278797997, "grad_norm": 1.067719364360891, "learning_rate": 9.653387989383467e-06, "loss": 0.4658, "step": 1863 }, { "epoch": 0.6223706176961603, "grad_norm": 1.076377384888913, "learning_rate": 9.652676952089213e-06, "loss": 0.4887, "step": 1864 }, { "epoch": 0.6227045075125208, "grad_norm": 1.0572645755183192, "learning_rate": 9.65196521247475e-06, "loss": 0.489, "step": 1865 }, { "epoch": 0.6230383973288814, "grad_norm": 1.0268897451226011, "learning_rate": 9.651252770647515e-06, "loss": 0.4938, "step": 1866 }, { "epoch": 0.6233722871452421, "grad_norm": 1.0713850610772666, "learning_rate": 9.65053962671505e-06, "loss": 0.4816, "step": 1867 }, { "epoch": 0.6237061769616027, "grad_norm": 1.0289764786338471, "learning_rate": 9.649825780785005e-06, "loss": 0.4736, "step": 1868 }, { "epoch": 0.6240400667779633, "grad_norm": 1.062737085692753, "learning_rate": 9.649111232965134e-06, "loss": 0.4726, "step": 1869 }, { "epoch": 0.6243739565943238, "grad_norm": 1.0743775789238783, "learning_rate": 9.648395983363299e-06, "loss": 0.4671, "step": 1870 }, { "epoch": 0.6247078464106844, "grad_norm": 1.0912562717237058, "learning_rate": 9.647680032087466e-06, "loss": 0.4993, "step": 1871 }, { "epoch": 0.6250417362270451, "grad_norm": 1.0438987505926247, "learning_rate": 9.646963379245707e-06, "loss": 0.4767, "step": 1872 }, { "epoch": 0.6253756260434057, "grad_norm": 1.017641320054308, "learning_rate": 9.6462460249462e-06, "loss": 0.482, "step": 1873 }, { "epoch": 0.6257095158597663, "grad_norm": 1.14422412925512, "learning_rate": 9.645527969297232e-06, "loss": 0.4632, "step": 1874 }, { "epoch": 0.6260434056761269, "grad_norm": 0.9535481300285111, "learning_rate": 9.644809212407192e-06, "loss": 0.4313, "step": 1875 }, { "epoch": 0.6263772954924874, "grad_norm": 1.0510890678173206, "learning_rate": 9.644089754384575e-06, "loss": 0.4735, "step": 1876 }, { "epoch": 0.6267111853088481, "grad_norm": 0.9733235178780538, "learning_rate": 9.643369595337983e-06, "loss": 0.4657, "step": 1877 }, { "epoch": 0.6270450751252087, "grad_norm": 1.0340497009758167, "learning_rate": 9.642648735376127e-06, "loss": 0.4643, "step": 1878 }, { "epoch": 0.6273789649415693, "grad_norm": 1.089922704106879, "learning_rate": 9.641927174607816e-06, "loss": 0.4878, "step": 1879 }, { "epoch": 0.6277128547579299, "grad_norm": 1.1162371321258373, "learning_rate": 9.641204913141974e-06, "loss": 0.4956, "step": 1880 }, { "epoch": 0.6280467445742904, "grad_norm": 1.0694934444528206, "learning_rate": 9.640481951087623e-06, "loss": 0.4781, "step": 1881 }, { "epoch": 0.6283806343906511, "grad_norm": 1.040807813812242, "learning_rate": 9.639758288553895e-06, "loss": 0.453, "step": 1882 }, { "epoch": 0.6287145242070117, "grad_norm": 1.0807937333429445, "learning_rate": 9.639033925650025e-06, "loss": 0.4758, "step": 1883 }, { "epoch": 0.6290484140233723, "grad_norm": 1.0218741440412573, "learning_rate": 9.63830886248536e-06, "loss": 0.4892, "step": 1884 }, { "epoch": 0.6293823038397329, "grad_norm": 0.9999386082984913, "learning_rate": 9.637583099169341e-06, "loss": 0.4723, "step": 1885 }, { "epoch": 0.6297161936560934, "grad_norm": 1.0016184517443534, "learning_rate": 9.636856635811528e-06, "loss": 0.4691, "step": 1886 }, { "epoch": 0.6300500834724541, "grad_norm": 1.0192859396361786, "learning_rate": 9.636129472521577e-06, "loss": 0.4806, "step": 1887 }, { "epoch": 0.6303839732888147, "grad_norm": 1.049593094019945, "learning_rate": 9.635401609409254e-06, "loss": 0.4686, "step": 1888 }, { "epoch": 0.6307178631051753, "grad_norm": 1.0083330590779238, "learning_rate": 9.63467304658443e-06, "loss": 0.4827, "step": 1889 }, { "epoch": 0.6310517529215359, "grad_norm": 1.0297365308691164, "learning_rate": 9.633943784157082e-06, "loss": 0.4653, "step": 1890 }, { "epoch": 0.6313856427378965, "grad_norm": 1.0012313801937989, "learning_rate": 9.633213822237291e-06, "loss": 0.4803, "step": 1891 }, { "epoch": 0.6317195325542571, "grad_norm": 0.9898134432467482, "learning_rate": 9.632483160935245e-06, "loss": 0.4662, "step": 1892 }, { "epoch": 0.6320534223706177, "grad_norm": 0.9790989518764595, "learning_rate": 9.631751800361236e-06, "loss": 0.4601, "step": 1893 }, { "epoch": 0.6323873121869783, "grad_norm": 1.0453001794869596, "learning_rate": 9.631019740625662e-06, "loss": 0.4886, "step": 1894 }, { "epoch": 0.6327212020033389, "grad_norm": 0.9979924829968218, "learning_rate": 9.63028698183903e-06, "loss": 0.4741, "step": 1895 }, { "epoch": 0.6330550918196995, "grad_norm": 1.0419317528317142, "learning_rate": 9.62955352411195e-06, "loss": 0.4863, "step": 1896 }, { "epoch": 0.6333889816360601, "grad_norm": 1.023943008824261, "learning_rate": 9.628819367555133e-06, "loss": 0.4511, "step": 1897 }, { "epoch": 0.6337228714524207, "grad_norm": 1.03844550617973, "learning_rate": 9.628084512279404e-06, "loss": 0.4637, "step": 1898 }, { "epoch": 0.6340567612687813, "grad_norm": 1.0275251035338022, "learning_rate": 9.627348958395687e-06, "loss": 0.4901, "step": 1899 }, { "epoch": 0.6343906510851419, "grad_norm": 1.023442288770474, "learning_rate": 9.626612706015014e-06, "loss": 0.471, "step": 1900 }, { "epoch": 0.6347245409015025, "grad_norm": 1.0261749536117704, "learning_rate": 9.625875755248522e-06, "loss": 0.479, "step": 1901 }, { "epoch": 0.635058430717863, "grad_norm": 1.0239259620536365, "learning_rate": 9.625138106207455e-06, "loss": 0.4829, "step": 1902 }, { "epoch": 0.6353923205342237, "grad_norm": 1.041559132774179, "learning_rate": 9.624399759003157e-06, "loss": 0.4983, "step": 1903 }, { "epoch": 0.6357262103505843, "grad_norm": 0.9852418938728276, "learning_rate": 9.623660713747086e-06, "loss": 0.4645, "step": 1904 }, { "epoch": 0.6360601001669449, "grad_norm": 1.0265029461922248, "learning_rate": 9.622920970550797e-06, "loss": 0.4861, "step": 1905 }, { "epoch": 0.6363939899833055, "grad_norm": 1.0022011016313932, "learning_rate": 9.622180529525957e-06, "loss": 0.4591, "step": 1906 }, { "epoch": 0.6367278797996662, "grad_norm": 1.0523717769958119, "learning_rate": 9.621439390784335e-06, "loss": 0.4811, "step": 1907 }, { "epoch": 0.6370617696160267, "grad_norm": 1.0175236506390641, "learning_rate": 9.620697554437802e-06, "loss": 0.4593, "step": 1908 }, { "epoch": 0.6373956594323873, "grad_norm": 0.9891321266495015, "learning_rate": 9.619955020598342e-06, "loss": 0.466, "step": 1909 }, { "epoch": 0.6377295492487479, "grad_norm": 1.0244943897497634, "learning_rate": 9.619211789378038e-06, "loss": 0.4717, "step": 1910 }, { "epoch": 0.6380634390651085, "grad_norm": 0.9993929138265737, "learning_rate": 9.618467860889082e-06, "loss": 0.4904, "step": 1911 }, { "epoch": 0.6383973288814692, "grad_norm": 1.0840692214144336, "learning_rate": 9.61772323524377e-06, "loss": 0.4875, "step": 1912 }, { "epoch": 0.6387312186978297, "grad_norm": 1.0516804121285732, "learning_rate": 9.6169779125545e-06, "loss": 0.4897, "step": 1913 }, { "epoch": 0.6390651085141903, "grad_norm": 1.0787054348239749, "learning_rate": 9.616231892933782e-06, "loss": 0.4792, "step": 1914 }, { "epoch": 0.6393989983305509, "grad_norm": 1.015205800454623, "learning_rate": 9.615485176494226e-06, "loss": 0.4685, "step": 1915 }, { "epoch": 0.6397328881469115, "grad_norm": 1.0649483462615692, "learning_rate": 9.614737763348548e-06, "loss": 0.4765, "step": 1916 }, { "epoch": 0.6400667779632722, "grad_norm": 1.0508474088497892, "learning_rate": 9.61398965360957e-06, "loss": 0.48, "step": 1917 }, { "epoch": 0.6404006677796327, "grad_norm": 0.974859994573762, "learning_rate": 9.61324084739022e-06, "loss": 0.4749, "step": 1918 }, { "epoch": 0.6407345575959933, "grad_norm": 1.046039985559171, "learning_rate": 9.61249134480353e-06, "loss": 0.4954, "step": 1919 }, { "epoch": 0.6410684474123539, "grad_norm": 1.0366338022024761, "learning_rate": 9.611741145962634e-06, "loss": 0.494, "step": 1920 }, { "epoch": 0.6414023372287145, "grad_norm": 1.02427509179541, "learning_rate": 9.610990250980778e-06, "loss": 0.4574, "step": 1921 }, { "epoch": 0.6417362270450752, "grad_norm": 1.0054632046299503, "learning_rate": 9.61023865997131e-06, "loss": 0.4517, "step": 1922 }, { "epoch": 0.6420701168614358, "grad_norm": 1.0531211510454115, "learning_rate": 9.609486373047679e-06, "loss": 0.502, "step": 1923 }, { "epoch": 0.6424040066777963, "grad_norm": 1.094830040111955, "learning_rate": 9.608733390323443e-06, "loss": 0.506, "step": 1924 }, { "epoch": 0.6427378964941569, "grad_norm": 1.0034419315283936, "learning_rate": 9.607979711912267e-06, "loss": 0.458, "step": 1925 }, { "epoch": 0.6430717863105175, "grad_norm": 0.9538239628258768, "learning_rate": 9.607225337927916e-06, "loss": 0.4607, "step": 1926 }, { "epoch": 0.6434056761268782, "grad_norm": 1.0306470058553099, "learning_rate": 9.606470268484265e-06, "loss": 0.4781, "step": 1927 }, { "epoch": 0.6437395659432388, "grad_norm": 1.0876016353341158, "learning_rate": 9.60571450369529e-06, "loss": 0.4952, "step": 1928 }, { "epoch": 0.6440734557595993, "grad_norm": 0.9740015832695825, "learning_rate": 9.604958043675072e-06, "loss": 0.4617, "step": 1929 }, { "epoch": 0.6444073455759599, "grad_norm": 1.0042771042058314, "learning_rate": 9.604200888537803e-06, "loss": 0.4589, "step": 1930 }, { "epoch": 0.6447412353923205, "grad_norm": 1.0275462987442, "learning_rate": 9.60344303839777e-06, "loss": 0.4786, "step": 1931 }, { "epoch": 0.6450751252086812, "grad_norm": 1.0693119646034785, "learning_rate": 9.602684493369375e-06, "loss": 0.4992, "step": 1932 }, { "epoch": 0.6454090150250418, "grad_norm": 1.0027527151280091, "learning_rate": 9.601925253567114e-06, "loss": 0.4575, "step": 1933 }, { "epoch": 0.6457429048414023, "grad_norm": 1.034159304206499, "learning_rate": 9.6011653191056e-06, "loss": 0.4759, "step": 1934 }, { "epoch": 0.6460767946577629, "grad_norm": 1.061865701745986, "learning_rate": 9.600404690099544e-06, "loss": 0.4913, "step": 1935 }, { "epoch": 0.6464106844741235, "grad_norm": 1.0225399791199026, "learning_rate": 9.59964336666376e-06, "loss": 0.4748, "step": 1936 }, { "epoch": 0.6467445742904842, "grad_norm": 0.9800926391738627, "learning_rate": 9.598881348913171e-06, "loss": 0.4574, "step": 1937 }, { "epoch": 0.6470784641068448, "grad_norm": 0.9831212407597226, "learning_rate": 9.598118636962806e-06, "loss": 0.4761, "step": 1938 }, { "epoch": 0.6474123539232054, "grad_norm": 1.0019673661452113, "learning_rate": 9.59735523092779e-06, "loss": 0.4702, "step": 1939 }, { "epoch": 0.6477462437395659, "grad_norm": 1.0068358817219776, "learning_rate": 9.596591130923363e-06, "loss": 0.4659, "step": 1940 }, { "epoch": 0.6480801335559265, "grad_norm": 1.0341000324780913, "learning_rate": 9.595826337064866e-06, "loss": 0.4797, "step": 1941 }, { "epoch": 0.6484140233722872, "grad_norm": 1.0225253190056884, "learning_rate": 9.595060849467743e-06, "loss": 0.4688, "step": 1942 }, { "epoch": 0.6487479131886478, "grad_norm": 1.10480996325665, "learning_rate": 9.594294668247546e-06, "loss": 0.505, "step": 1943 }, { "epoch": 0.6490818030050084, "grad_norm": 1.0027106263341174, "learning_rate": 9.593527793519928e-06, "loss": 0.4721, "step": 1944 }, { "epoch": 0.6494156928213689, "grad_norm": 1.008192245612592, "learning_rate": 9.592760225400647e-06, "loss": 0.4632, "step": 1945 }, { "epoch": 0.6497495826377295, "grad_norm": 1.0208689202091912, "learning_rate": 9.59199196400557e-06, "loss": 0.4886, "step": 1946 }, { "epoch": 0.6500834724540901, "grad_norm": 1.0574766804869549, "learning_rate": 9.591223009450665e-06, "loss": 0.4828, "step": 1947 }, { "epoch": 0.6504173622704508, "grad_norm": 1.034349120369625, "learning_rate": 9.590453361852006e-06, "loss": 0.4795, "step": 1948 }, { "epoch": 0.6507512520868114, "grad_norm": 1.0479201586158156, "learning_rate": 9.589683021325771e-06, "loss": 0.4858, "step": 1949 }, { "epoch": 0.6510851419031719, "grad_norm": 1.0155639151822096, "learning_rate": 9.588911987988241e-06, "loss": 0.462, "step": 1950 }, { "epoch": 0.6514190317195325, "grad_norm": 1.0389126941252866, "learning_rate": 9.588140261955805e-06, "loss": 0.4891, "step": 1951 }, { "epoch": 0.6517529215358931, "grad_norm": 1.0607159942259328, "learning_rate": 9.587367843344954e-06, "loss": 0.4818, "step": 1952 }, { "epoch": 0.6520868113522538, "grad_norm": 1.0305902888946934, "learning_rate": 9.586594732272284e-06, "loss": 0.4764, "step": 1953 }, { "epoch": 0.6524207011686144, "grad_norm": 1.0123979699340335, "learning_rate": 9.585820928854497e-06, "loss": 0.4704, "step": 1954 }, { "epoch": 0.6527545909849749, "grad_norm": 0.9998993229488877, "learning_rate": 9.585046433208399e-06, "loss": 0.4773, "step": 1955 }, { "epoch": 0.6530884808013355, "grad_norm": 0.9984783833340973, "learning_rate": 9.584271245450898e-06, "loss": 0.4558, "step": 1956 }, { "epoch": 0.6534223706176961, "grad_norm": 1.0401387136548819, "learning_rate": 9.58349536569901e-06, "loss": 0.4773, "step": 1957 }, { "epoch": 0.6537562604340568, "grad_norm": 1.0218772516418435, "learning_rate": 9.582718794069852e-06, "loss": 0.4809, "step": 1958 }, { "epoch": 0.6540901502504174, "grad_norm": 1.0173105396472493, "learning_rate": 9.581941530680647e-06, "loss": 0.4842, "step": 1959 }, { "epoch": 0.654424040066778, "grad_norm": 1.0197596615610045, "learning_rate": 9.581163575648727e-06, "loss": 0.4919, "step": 1960 }, { "epoch": 0.6547579298831385, "grad_norm": 1.0022125118442275, "learning_rate": 9.580384929091517e-06, "loss": 0.4807, "step": 1961 }, { "epoch": 0.6550918196994991, "grad_norm": 1.0171890365605754, "learning_rate": 9.579605591126561e-06, "loss": 0.4889, "step": 1962 }, { "epoch": 0.6554257095158598, "grad_norm": 1.02646026861356, "learning_rate": 9.578825561871495e-06, "loss": 0.4737, "step": 1963 }, { "epoch": 0.6557595993322204, "grad_norm": 1.0748820302607898, "learning_rate": 9.578044841444065e-06, "loss": 0.4922, "step": 1964 }, { "epoch": 0.656093489148581, "grad_norm": 1.044495162194982, "learning_rate": 9.577263429962121e-06, "loss": 0.4816, "step": 1965 }, { "epoch": 0.6564273789649415, "grad_norm": 1.0791002592720016, "learning_rate": 9.576481327543618e-06, "loss": 0.4839, "step": 1966 }, { "epoch": 0.6567612687813021, "grad_norm": 1.0052022834276393, "learning_rate": 9.575698534306613e-06, "loss": 0.4716, "step": 1967 }, { "epoch": 0.6570951585976628, "grad_norm": 1.0300090047075592, "learning_rate": 9.574915050369266e-06, "loss": 0.4672, "step": 1968 }, { "epoch": 0.6574290484140234, "grad_norm": 1.0411845916504368, "learning_rate": 9.574130875849847e-06, "loss": 0.474, "step": 1969 }, { "epoch": 0.657762938230384, "grad_norm": 0.9858770851427541, "learning_rate": 9.573346010866726e-06, "loss": 0.4626, "step": 1970 }, { "epoch": 0.6580968280467445, "grad_norm": 1.0825642923256704, "learning_rate": 9.57256045553838e-06, "loss": 0.491, "step": 1971 }, { "epoch": 0.6584307178631051, "grad_norm": 0.9892212047435976, "learning_rate": 9.571774209983384e-06, "loss": 0.471, "step": 1972 }, { "epoch": 0.6587646076794658, "grad_norm": 1.050645276130837, "learning_rate": 9.570987274320424e-06, "loss": 0.4921, "step": 1973 }, { "epoch": 0.6590984974958264, "grad_norm": 0.9898473468106272, "learning_rate": 9.57019964866829e-06, "loss": 0.4456, "step": 1974 }, { "epoch": 0.659432387312187, "grad_norm": 0.9622822659298476, "learning_rate": 9.56941133314587e-06, "loss": 0.4519, "step": 1975 }, { "epoch": 0.6597662771285476, "grad_norm": 0.9644108351899163, "learning_rate": 9.568622327872161e-06, "loss": 0.4492, "step": 1976 }, { "epoch": 0.6601001669449081, "grad_norm": 0.999827669844851, "learning_rate": 9.567832632966265e-06, "loss": 0.4643, "step": 1977 }, { "epoch": 0.6604340567612688, "grad_norm": 1.0251289184092045, "learning_rate": 9.567042248547383e-06, "loss": 0.4469, "step": 1978 }, { "epoch": 0.6607679465776294, "grad_norm": 1.025355936431034, "learning_rate": 9.566251174734826e-06, "loss": 0.4798, "step": 1979 }, { "epoch": 0.66110183639399, "grad_norm": 1.008715791180879, "learning_rate": 9.565459411648005e-06, "loss": 0.4524, "step": 1980 }, { "epoch": 0.6614357262103506, "grad_norm": 1.004628616632578, "learning_rate": 9.564666959406439e-06, "loss": 0.4616, "step": 1981 }, { "epoch": 0.6617696160267111, "grad_norm": 1.0771697744963373, "learning_rate": 9.563873818129745e-06, "loss": 0.4673, "step": 1982 }, { "epoch": 0.6621035058430718, "grad_norm": 1.0002388038344816, "learning_rate": 9.56307998793765e-06, "loss": 0.4732, "step": 1983 }, { "epoch": 0.6624373956594324, "grad_norm": 1.0480659141388264, "learning_rate": 9.562285468949978e-06, "loss": 0.4737, "step": 1984 }, { "epoch": 0.662771285475793, "grad_norm": 1.048205649177646, "learning_rate": 9.561490261286666e-06, "loss": 0.4646, "step": 1985 }, { "epoch": 0.6631051752921536, "grad_norm": 0.9954902043373768, "learning_rate": 9.560694365067751e-06, "loss": 0.4628, "step": 1986 }, { "epoch": 0.6634390651085141, "grad_norm": 1.0189544992411987, "learning_rate": 9.559897780413369e-06, "loss": 0.4555, "step": 1987 }, { "epoch": 0.6637729549248748, "grad_norm": 1.028758330736898, "learning_rate": 9.559100507443767e-06, "loss": 0.4909, "step": 1988 }, { "epoch": 0.6641068447412354, "grad_norm": 0.991712335200119, "learning_rate": 9.558302546279291e-06, "loss": 0.4491, "step": 1989 }, { "epoch": 0.664440734557596, "grad_norm": 1.0228293171763214, "learning_rate": 9.557503897040398e-06, "loss": 0.4822, "step": 1990 }, { "epoch": 0.6647746243739566, "grad_norm": 1.0441219236523813, "learning_rate": 9.556704559847638e-06, "loss": 0.4879, "step": 1991 }, { "epoch": 0.6651085141903172, "grad_norm": 1.024075867307426, "learning_rate": 9.555904534821675e-06, "loss": 0.439, "step": 1992 }, { "epoch": 0.6654424040066778, "grad_norm": 0.9972050534473275, "learning_rate": 9.55510382208327e-06, "loss": 0.4605, "step": 1993 }, { "epoch": 0.6657762938230384, "grad_norm": 0.9733299154167371, "learning_rate": 9.55430242175329e-06, "loss": 0.4708, "step": 1994 }, { "epoch": 0.666110183639399, "grad_norm": 0.9813452290728532, "learning_rate": 9.553500333952708e-06, "loss": 0.4735, "step": 1995 }, { "epoch": 0.6664440734557596, "grad_norm": 1.0078833471163808, "learning_rate": 9.5526975588026e-06, "loss": 0.4589, "step": 1996 }, { "epoch": 0.6667779632721202, "grad_norm": 0.9997922017612151, "learning_rate": 9.55189409642414e-06, "loss": 0.4736, "step": 1997 }, { "epoch": 0.6671118530884808, "grad_norm": 1.0799982618378818, "learning_rate": 9.551089946938614e-06, "loss": 0.4749, "step": 1998 }, { "epoch": 0.6674457429048414, "grad_norm": 1.0000181842656115, "learning_rate": 9.550285110467407e-06, "loss": 0.4708, "step": 1999 }, { "epoch": 0.667779632721202, "grad_norm": 0.9962934014501261, "learning_rate": 9.54947958713201e-06, "loss": 0.4475, "step": 2000 }, { "epoch": 0.6681135225375626, "grad_norm": 1.04664972440087, "learning_rate": 9.548673377054014e-06, "loss": 0.4712, "step": 2001 }, { "epoch": 0.6684474123539232, "grad_norm": 1.0079235120267624, "learning_rate": 9.547866480355121e-06, "loss": 0.4625, "step": 2002 }, { "epoch": 0.6687813021702838, "grad_norm": 1.0226722216380868, "learning_rate": 9.547058897157127e-06, "loss": 0.4722, "step": 2003 }, { "epoch": 0.6691151919866444, "grad_norm": 1.012283417254806, "learning_rate": 9.546250627581937e-06, "loss": 0.47, "step": 2004 }, { "epoch": 0.669449081803005, "grad_norm": 1.0195923484767195, "learning_rate": 9.545441671751562e-06, "loss": 0.4729, "step": 2005 }, { "epoch": 0.6697829716193656, "grad_norm": 1.0455857687745156, "learning_rate": 9.54463202978811e-06, "loss": 0.4843, "step": 2006 }, { "epoch": 0.6701168614357262, "grad_norm": 1.0396336624530482, "learning_rate": 9.5438217018138e-06, "loss": 0.4865, "step": 2007 }, { "epoch": 0.6704507512520869, "grad_norm": 1.028478614292583, "learning_rate": 9.543010687950946e-06, "loss": 0.4517, "step": 2008 }, { "epoch": 0.6707846410684474, "grad_norm": 1.021773989557662, "learning_rate": 9.542198988321976e-06, "loss": 0.4859, "step": 2009 }, { "epoch": 0.671118530884808, "grad_norm": 1.0299724668172514, "learning_rate": 9.541386603049411e-06, "loss": 0.4596, "step": 2010 }, { "epoch": 0.6714524207011686, "grad_norm": 1.028834289508438, "learning_rate": 9.540573532255883e-06, "loss": 0.4763, "step": 2011 }, { "epoch": 0.6717863105175292, "grad_norm": 1.006355621795006, "learning_rate": 9.539759776064125e-06, "loss": 0.4676, "step": 2012 }, { "epoch": 0.6721202003338899, "grad_norm": 0.9855806257625108, "learning_rate": 9.53894533459697e-06, "loss": 0.4645, "step": 2013 }, { "epoch": 0.6724540901502504, "grad_norm": 1.0073134381193345, "learning_rate": 9.538130207977363e-06, "loss": 0.4493, "step": 2014 }, { "epoch": 0.672787979966611, "grad_norm": 1.0010643959828103, "learning_rate": 9.537314396328342e-06, "loss": 0.4545, "step": 2015 }, { "epoch": 0.6731218697829716, "grad_norm": 1.0372128127381275, "learning_rate": 9.536497899773057e-06, "loss": 0.4643, "step": 2016 }, { "epoch": 0.6734557595993322, "grad_norm": 1.005225478245749, "learning_rate": 9.535680718434755e-06, "loss": 0.4842, "step": 2017 }, { "epoch": 0.6737896494156929, "grad_norm": 1.0456683610269917, "learning_rate": 9.53486285243679e-06, "loss": 0.4819, "step": 2018 }, { "epoch": 0.6741235392320534, "grad_norm": 0.9722271360600316, "learning_rate": 9.534044301902621e-06, "loss": 0.4394, "step": 2019 }, { "epoch": 0.674457429048414, "grad_norm": 1.0516186528048614, "learning_rate": 9.533225066955807e-06, "loss": 0.4864, "step": 2020 }, { "epoch": 0.6747913188647746, "grad_norm": 0.983868870453483, "learning_rate": 9.532405147720009e-06, "loss": 0.4628, "step": 2021 }, { "epoch": 0.6751252086811352, "grad_norm": 1.0349969199414644, "learning_rate": 9.531584544318996e-06, "loss": 0.4655, "step": 2022 }, { "epoch": 0.6754590984974959, "grad_norm": 0.9929926836091452, "learning_rate": 9.530763256876638e-06, "loss": 0.4624, "step": 2023 }, { "epoch": 0.6757929883138565, "grad_norm": 1.0334302884140067, "learning_rate": 9.529941285516906e-06, "loss": 0.474, "step": 2024 }, { "epoch": 0.676126878130217, "grad_norm": 1.015610105648912, "learning_rate": 9.529118630363879e-06, "loss": 0.4819, "step": 2025 }, { "epoch": 0.6764607679465776, "grad_norm": 0.9612749709569808, "learning_rate": 9.528295291541733e-06, "loss": 0.4768, "step": 2026 }, { "epoch": 0.6767946577629382, "grad_norm": 0.9686017000805985, "learning_rate": 9.527471269174754e-06, "loss": 0.4654, "step": 2027 }, { "epoch": 0.6771285475792989, "grad_norm": 0.970824338906515, "learning_rate": 9.526646563387328e-06, "loss": 0.4559, "step": 2028 }, { "epoch": 0.6774624373956595, "grad_norm": 1.0495752427310114, "learning_rate": 9.52582117430394e-06, "loss": 0.4863, "step": 2029 }, { "epoch": 0.67779632721202, "grad_norm": 0.9785745023729849, "learning_rate": 9.524995102049185e-06, "loss": 0.4602, "step": 2030 }, { "epoch": 0.6781302170283806, "grad_norm": 1.0237344845411456, "learning_rate": 9.524168346747762e-06, "loss": 0.4986, "step": 2031 }, { "epoch": 0.6784641068447412, "grad_norm": 1.0113054654402485, "learning_rate": 9.523340908524463e-06, "loss": 0.4683, "step": 2032 }, { "epoch": 0.6787979966611019, "grad_norm": 0.9873892125286107, "learning_rate": 9.522512787504195e-06, "loss": 0.4786, "step": 2033 }, { "epoch": 0.6791318864774625, "grad_norm": 1.0336940840614997, "learning_rate": 9.521683983811957e-06, "loss": 0.4797, "step": 2034 }, { "epoch": 0.679465776293823, "grad_norm": 1.0316260899188083, "learning_rate": 9.52085449757286e-06, "loss": 0.461, "step": 2035 }, { "epoch": 0.6797996661101836, "grad_norm": 0.9969086380244632, "learning_rate": 9.520024328912117e-06, "loss": 0.4488, "step": 2036 }, { "epoch": 0.6801335559265442, "grad_norm": 1.0396831736360932, "learning_rate": 9.519193477955038e-06, "loss": 0.4754, "step": 2037 }, { "epoch": 0.6804674457429049, "grad_norm": 1.0386138471674236, "learning_rate": 9.518361944827042e-06, "loss": 0.4839, "step": 2038 }, { "epoch": 0.6808013355592655, "grad_norm": 1.0514528126812837, "learning_rate": 9.517529729653645e-06, "loss": 0.471, "step": 2039 }, { "epoch": 0.6811352253756261, "grad_norm": 1.0102890179404074, "learning_rate": 9.516696832560473e-06, "loss": 0.4649, "step": 2040 }, { "epoch": 0.6814691151919866, "grad_norm": 0.9867246977041393, "learning_rate": 9.515863253673253e-06, "loss": 0.4558, "step": 2041 }, { "epoch": 0.6818030050083472, "grad_norm": 0.9900149103023519, "learning_rate": 9.515028993117809e-06, "loss": 0.4572, "step": 2042 }, { "epoch": 0.6821368948247079, "grad_norm": 0.9477924489254734, "learning_rate": 9.514194051020076e-06, "loss": 0.4552, "step": 2043 }, { "epoch": 0.6824707846410685, "grad_norm": 0.9880858345440642, "learning_rate": 9.513358427506085e-06, "loss": 0.4528, "step": 2044 }, { "epoch": 0.6828046744574291, "grad_norm": 1.0912391867430262, "learning_rate": 9.512522122701976e-06, "loss": 0.4852, "step": 2045 }, { "epoch": 0.6831385642737896, "grad_norm": 0.9837071675420219, "learning_rate": 9.511685136733987e-06, "loss": 0.465, "step": 2046 }, { "epoch": 0.6834724540901502, "grad_norm": 0.9939071994388641, "learning_rate": 9.510847469728464e-06, "loss": 0.4554, "step": 2047 }, { "epoch": 0.6838063439065108, "grad_norm": 0.9852162831487279, "learning_rate": 9.510009121811849e-06, "loss": 0.442, "step": 2048 }, { "epoch": 0.6841402337228715, "grad_norm": 1.0235049861949528, "learning_rate": 9.509170093110693e-06, "loss": 0.4539, "step": 2049 }, { "epoch": 0.6844741235392321, "grad_norm": 1.0293221423929995, "learning_rate": 9.508330383751645e-06, "loss": 0.4669, "step": 2050 }, { "epoch": 0.6848080133555926, "grad_norm": 1.0034902730314448, "learning_rate": 9.50748999386146e-06, "loss": 0.4756, "step": 2051 }, { "epoch": 0.6851419031719532, "grad_norm": 0.951920768096253, "learning_rate": 9.506648923566993e-06, "loss": 0.4485, "step": 2052 }, { "epoch": 0.6854757929883138, "grad_norm": 0.9982282337451881, "learning_rate": 9.505807172995206e-06, "loss": 0.4583, "step": 2053 }, { "epoch": 0.6858096828046745, "grad_norm": 0.9366581868151616, "learning_rate": 9.504964742273159e-06, "loss": 0.4434, "step": 2054 }, { "epoch": 0.6861435726210351, "grad_norm": 1.0074597418718367, "learning_rate": 9.504121631528018e-06, "loss": 0.4676, "step": 2055 }, { "epoch": 0.6864774624373957, "grad_norm": 1.0666968722003263, "learning_rate": 9.50327784088705e-06, "loss": 0.4779, "step": 2056 }, { "epoch": 0.6868113522537562, "grad_norm": 0.9373892925751819, "learning_rate": 9.502433370477625e-06, "loss": 0.4645, "step": 2057 }, { "epoch": 0.6871452420701168, "grad_norm": 0.9654742266592217, "learning_rate": 9.501588220427214e-06, "loss": 0.4628, "step": 2058 }, { "epoch": 0.6874791318864775, "grad_norm": 0.9707672128430754, "learning_rate": 9.500742390863393e-06, "loss": 0.4519, "step": 2059 }, { "epoch": 0.6878130217028381, "grad_norm": 1.033494208271974, "learning_rate": 9.49989588191384e-06, "loss": 0.4691, "step": 2060 }, { "epoch": 0.6881469115191987, "grad_norm": 0.9724202559411813, "learning_rate": 9.499048693706336e-06, "loss": 0.4541, "step": 2061 }, { "epoch": 0.6884808013355592, "grad_norm": 0.9911956202823096, "learning_rate": 9.498200826368762e-06, "loss": 0.4716, "step": 2062 }, { "epoch": 0.6888146911519198, "grad_norm": 0.960197479099812, "learning_rate": 9.497352280029106e-06, "loss": 0.4597, "step": 2063 }, { "epoch": 0.6891485809682805, "grad_norm": 0.997197074508443, "learning_rate": 9.496503054815454e-06, "loss": 0.4675, "step": 2064 }, { "epoch": 0.6894824707846411, "grad_norm": 0.9685045042007395, "learning_rate": 9.495653150855995e-06, "loss": 0.4613, "step": 2065 }, { "epoch": 0.6898163606010017, "grad_norm": 0.9861960836147552, "learning_rate": 9.494802568279024e-06, "loss": 0.4574, "step": 2066 }, { "epoch": 0.6901502504173622, "grad_norm": 1.0127437742722616, "learning_rate": 9.493951307212936e-06, "loss": 0.47, "step": 2067 }, { "epoch": 0.6904841402337228, "grad_norm": 0.9537340894644947, "learning_rate": 9.493099367786228e-06, "loss": 0.4565, "step": 2068 }, { "epoch": 0.6908180300500835, "grad_norm": 0.9861865616708394, "learning_rate": 9.4922467501275e-06, "loss": 0.4761, "step": 2069 }, { "epoch": 0.6911519198664441, "grad_norm": 0.9395610059688548, "learning_rate": 9.491393454365455e-06, "loss": 0.4467, "step": 2070 }, { "epoch": 0.6914858096828047, "grad_norm": 1.0194386891709364, "learning_rate": 9.490539480628898e-06, "loss": 0.4705, "step": 2071 }, { "epoch": 0.6918196994991653, "grad_norm": 0.9731032270115949, "learning_rate": 9.489684829046732e-06, "loss": 0.448, "step": 2072 }, { "epoch": 0.6921535893155258, "grad_norm": 0.9934364566817743, "learning_rate": 9.488829499747973e-06, "loss": 0.4644, "step": 2073 }, { "epoch": 0.6924874791318865, "grad_norm": 0.9852212142794685, "learning_rate": 9.487973492861729e-06, "loss": 0.4723, "step": 2074 }, { "epoch": 0.6928213689482471, "grad_norm": 0.968006582109069, "learning_rate": 9.487116808517212e-06, "loss": 0.4609, "step": 2075 }, { "epoch": 0.6931552587646077, "grad_norm": 1.010227142620974, "learning_rate": 9.486259446843743e-06, "loss": 0.465, "step": 2076 }, { "epoch": 0.6934891485809683, "grad_norm": 1.007760901749721, "learning_rate": 9.485401407970737e-06, "loss": 0.488, "step": 2077 }, { "epoch": 0.6938230383973288, "grad_norm": 0.9378636126643762, "learning_rate": 9.484542692027716e-06, "loss": 0.4487, "step": 2078 }, { "epoch": 0.6941569282136895, "grad_norm": 0.9873533465154052, "learning_rate": 9.483683299144304e-06, "loss": 0.4726, "step": 2079 }, { "epoch": 0.6944908180300501, "grad_norm": 0.9755487690633222, "learning_rate": 9.482823229450224e-06, "loss": 0.457, "step": 2080 }, { "epoch": 0.6948247078464107, "grad_norm": 1.0107751572020718, "learning_rate": 9.481962483075305e-06, "loss": 0.4564, "step": 2081 }, { "epoch": 0.6951585976627713, "grad_norm": 0.9725835017907183, "learning_rate": 9.481101060149474e-06, "loss": 0.451, "step": 2082 }, { "epoch": 0.6954924874791318, "grad_norm": 1.018699206217049, "learning_rate": 9.480238960802764e-06, "loss": 0.4724, "step": 2083 }, { "epoch": 0.6958263772954925, "grad_norm": 1.0024456871453158, "learning_rate": 9.47937618516531e-06, "loss": 0.482, "step": 2084 }, { "epoch": 0.6961602671118531, "grad_norm": 1.00978831246322, "learning_rate": 9.478512733367345e-06, "loss": 0.4731, "step": 2085 }, { "epoch": 0.6964941569282137, "grad_norm": 0.98305309984543, "learning_rate": 9.477648605539209e-06, "loss": 0.4765, "step": 2086 }, { "epoch": 0.6968280467445743, "grad_norm": 0.9860942366714842, "learning_rate": 9.476783801811341e-06, "loss": 0.473, "step": 2087 }, { "epoch": 0.6971619365609348, "grad_norm": 0.9698283251713963, "learning_rate": 9.475918322314284e-06, "loss": 0.4606, "step": 2088 }, { "epoch": 0.6974958263772955, "grad_norm": 0.9610663189221141, "learning_rate": 9.47505216717868e-06, "loss": 0.4478, "step": 2089 }, { "epoch": 0.6978297161936561, "grad_norm": 0.932861915504336, "learning_rate": 9.474185336535276e-06, "loss": 0.4632, "step": 2090 }, { "epoch": 0.6981636060100167, "grad_norm": 0.9163114488096369, "learning_rate": 9.473317830514918e-06, "loss": 0.4476, "step": 2091 }, { "epoch": 0.6984974958263773, "grad_norm": 0.9422913481642725, "learning_rate": 9.472449649248559e-06, "loss": 0.4246, "step": 2092 }, { "epoch": 0.698831385642738, "grad_norm": 0.9837196307545167, "learning_rate": 9.471580792867248e-06, "loss": 0.4757, "step": 2093 }, { "epoch": 0.6991652754590985, "grad_norm": 0.948194955189659, "learning_rate": 9.470711261502141e-06, "loss": 0.4534, "step": 2094 }, { "epoch": 0.6994991652754591, "grad_norm": 0.9967178793212651, "learning_rate": 9.469841055284491e-06, "loss": 0.4763, "step": 2095 }, { "epoch": 0.6998330550918197, "grad_norm": 0.9547630586669271, "learning_rate": 9.468970174345657e-06, "loss": 0.4637, "step": 2096 }, { "epoch": 0.7001669449081803, "grad_norm": 1.0124910686914173, "learning_rate": 9.468098618817097e-06, "loss": 0.4566, "step": 2097 }, { "epoch": 0.7005008347245409, "grad_norm": 1.0463768837020289, "learning_rate": 9.467226388830372e-06, "loss": 0.4761, "step": 2098 }, { "epoch": 0.7008347245409015, "grad_norm": 0.9364951755635409, "learning_rate": 9.466353484517148e-06, "loss": 0.4389, "step": 2099 }, { "epoch": 0.7011686143572621, "grad_norm": 0.9632310409790996, "learning_rate": 9.465479906009187e-06, "loss": 0.4513, "step": 2100 }, { "epoch": 0.7015025041736227, "grad_norm": 0.9514195438132383, "learning_rate": 9.464605653438354e-06, "loss": 0.4422, "step": 2101 }, { "epoch": 0.7018363939899833, "grad_norm": 0.9452011333973411, "learning_rate": 9.463730726936619e-06, "loss": 0.4525, "step": 2102 }, { "epoch": 0.7021702838063439, "grad_norm": 0.968342094026541, "learning_rate": 9.462855126636053e-06, "loss": 0.4653, "step": 2103 }, { "epoch": 0.7025041736227045, "grad_norm": 0.9523873538325971, "learning_rate": 9.461978852668827e-06, "loss": 0.4582, "step": 2104 }, { "epoch": 0.7028380634390651, "grad_norm": 0.9997805475224059, "learning_rate": 9.461101905167211e-06, "loss": 0.4711, "step": 2105 }, { "epoch": 0.7031719532554257, "grad_norm": 0.9901219423686629, "learning_rate": 9.460224284263583e-06, "loss": 0.4552, "step": 2106 }, { "epoch": 0.7035058430717863, "grad_norm": 1.020937066020677, "learning_rate": 9.459345990090421e-06, "loss": 0.4595, "step": 2107 }, { "epoch": 0.7038397328881469, "grad_norm": 0.9613069665422157, "learning_rate": 9.458467022780301e-06, "loss": 0.456, "step": 2108 }, { "epoch": 0.7041736227045076, "grad_norm": 0.9318641977255749, "learning_rate": 9.457587382465903e-06, "loss": 0.4548, "step": 2109 }, { "epoch": 0.7045075125208681, "grad_norm": 1.019395136413758, "learning_rate": 9.456707069280008e-06, "loss": 0.4912, "step": 2110 }, { "epoch": 0.7048414023372287, "grad_norm": 0.9216811441377718, "learning_rate": 9.455826083355502e-06, "loss": 0.4382, "step": 2111 }, { "epoch": 0.7051752921535893, "grad_norm": 0.9615725815200324, "learning_rate": 9.454944424825365e-06, "loss": 0.4698, "step": 2112 }, { "epoch": 0.7055091819699499, "grad_norm": 0.9611986857705681, "learning_rate": 9.454062093822686e-06, "loss": 0.4385, "step": 2113 }, { "epoch": 0.7058430717863106, "grad_norm": 0.9297039296144134, "learning_rate": 9.453179090480653e-06, "loss": 0.4547, "step": 2114 }, { "epoch": 0.7061769616026711, "grad_norm": 0.957830219213674, "learning_rate": 9.452295414932553e-06, "loss": 0.4576, "step": 2115 }, { "epoch": 0.7065108514190317, "grad_norm": 0.99874719800253, "learning_rate": 9.45141106731178e-06, "loss": 0.467, "step": 2116 }, { "epoch": 0.7068447412353923, "grad_norm": 0.9456818828385138, "learning_rate": 9.45052604775182e-06, "loss": 0.4402, "step": 2117 }, { "epoch": 0.7071786310517529, "grad_norm": 0.9278174987999472, "learning_rate": 9.449640356386275e-06, "loss": 0.4344, "step": 2118 }, { "epoch": 0.7075125208681136, "grad_norm": 0.9685337434566481, "learning_rate": 9.44875399334883e-06, "loss": 0.4599, "step": 2119 }, { "epoch": 0.7078464106844741, "grad_norm": 0.9406195278916997, "learning_rate": 9.447866958773288e-06, "loss": 0.4577, "step": 2120 }, { "epoch": 0.7081803005008347, "grad_norm": 0.952916919098285, "learning_rate": 9.446979252793546e-06, "loss": 0.4621, "step": 2121 }, { "epoch": 0.7085141903171953, "grad_norm": 0.9385542883720763, "learning_rate": 9.4460908755436e-06, "loss": 0.4375, "step": 2122 }, { "epoch": 0.7088480801335559, "grad_norm": 0.9731551249649911, "learning_rate": 9.445201827157555e-06, "loss": 0.4721, "step": 2123 }, { "epoch": 0.7091819699499166, "grad_norm": 0.9835013765282958, "learning_rate": 9.444312107769606e-06, "loss": 0.4653, "step": 2124 }, { "epoch": 0.7095158597662772, "grad_norm": 0.9894569268842363, "learning_rate": 9.443421717514063e-06, "loss": 0.4565, "step": 2125 }, { "epoch": 0.7098497495826377, "grad_norm": 0.9816858422956128, "learning_rate": 9.442530656525326e-06, "loss": 0.457, "step": 2126 }, { "epoch": 0.7101836393989983, "grad_norm": 0.9641751865485925, "learning_rate": 9.4416389249379e-06, "loss": 0.4402, "step": 2127 }, { "epoch": 0.7105175292153589, "grad_norm": 0.988098350254109, "learning_rate": 9.440746522886393e-06, "loss": 0.453, "step": 2128 }, { "epoch": 0.7108514190317196, "grad_norm": 1.0600358946759072, "learning_rate": 9.439853450505512e-06, "loss": 0.4851, "step": 2129 }, { "epoch": 0.7111853088480802, "grad_norm": 0.9503885233073149, "learning_rate": 9.438959707930066e-06, "loss": 0.4659, "step": 2130 }, { "epoch": 0.7115191986644407, "grad_norm": 0.9612353833332808, "learning_rate": 9.438065295294967e-06, "loss": 0.4536, "step": 2131 }, { "epoch": 0.7118530884808013, "grad_norm": 0.9813008324926398, "learning_rate": 9.437170212735225e-06, "loss": 0.4442, "step": 2132 }, { "epoch": 0.7121869782971619, "grad_norm": 0.9581040325553482, "learning_rate": 9.436274460385953e-06, "loss": 0.4575, "step": 2133 }, { "epoch": 0.7125208681135226, "grad_norm": 0.9647876624960497, "learning_rate": 9.435378038382364e-06, "loss": 0.457, "step": 2134 }, { "epoch": 0.7128547579298832, "grad_norm": 0.9685785783142735, "learning_rate": 9.43448094685977e-06, "loss": 0.447, "step": 2135 }, { "epoch": 0.7131886477462437, "grad_norm": 0.9885264763978882, "learning_rate": 9.433583185953592e-06, "loss": 0.4625, "step": 2136 }, { "epoch": 0.7135225375626043, "grad_norm": 0.954294884665814, "learning_rate": 9.432684755799343e-06, "loss": 0.4595, "step": 2137 }, { "epoch": 0.7138564273789649, "grad_norm": 1.0462508160783828, "learning_rate": 9.431785656532645e-06, "loss": 0.4827, "step": 2138 }, { "epoch": 0.7141903171953256, "grad_norm": 1.006508100070899, "learning_rate": 9.43088588828921e-06, "loss": 0.4821, "step": 2139 }, { "epoch": 0.7145242070116862, "grad_norm": 0.9761259722629412, "learning_rate": 9.429985451204861e-06, "loss": 0.4489, "step": 2140 }, { "epoch": 0.7148580968280468, "grad_norm": 0.9208990142917647, "learning_rate": 9.429084345415522e-06, "loss": 0.4513, "step": 2141 }, { "epoch": 0.7151919866444073, "grad_norm": 0.9719508926027093, "learning_rate": 9.42818257105721e-06, "loss": 0.447, "step": 2142 }, { "epoch": 0.7155258764607679, "grad_norm": 0.984307038449781, "learning_rate": 9.427280128266049e-06, "loss": 0.4605, "step": 2143 }, { "epoch": 0.7158597662771286, "grad_norm": 0.990982323994699, "learning_rate": 9.426377017178266e-06, "loss": 0.4704, "step": 2144 }, { "epoch": 0.7161936560934892, "grad_norm": 0.9614418200477783, "learning_rate": 9.425473237930179e-06, "loss": 0.455, "step": 2145 }, { "epoch": 0.7165275459098498, "grad_norm": 1.0021238994243906, "learning_rate": 9.424568790658217e-06, "loss": 0.4615, "step": 2146 }, { "epoch": 0.7168614357262103, "grad_norm": 0.9943971016721667, "learning_rate": 9.423663675498907e-06, "loss": 0.4837, "step": 2147 }, { "epoch": 0.7171953255425709, "grad_norm": 0.9899047940289004, "learning_rate": 9.422757892588872e-06, "loss": 0.4675, "step": 2148 }, { "epoch": 0.7175292153589315, "grad_norm": 0.9693709829127448, "learning_rate": 9.421851442064841e-06, "loss": 0.4557, "step": 2149 }, { "epoch": 0.7178631051752922, "grad_norm": 0.9689510876224435, "learning_rate": 9.420944324063647e-06, "loss": 0.4569, "step": 2150 }, { "epoch": 0.7181969949916528, "grad_norm": 0.9013278237699848, "learning_rate": 9.420036538722214e-06, "loss": 0.4419, "step": 2151 }, { "epoch": 0.7185308848080133, "grad_norm": 0.9176704846174684, "learning_rate": 9.419128086177573e-06, "loss": 0.4433, "step": 2152 }, { "epoch": 0.7188647746243739, "grad_norm": 0.9255550866673512, "learning_rate": 9.418218966566857e-06, "loss": 0.4438, "step": 2153 }, { "epoch": 0.7191986644407345, "grad_norm": 0.907992258313924, "learning_rate": 9.417309180027293e-06, "loss": 0.4314, "step": 2154 }, { "epoch": 0.7195325542570952, "grad_norm": 0.9517166857988772, "learning_rate": 9.416398726696216e-06, "loss": 0.436, "step": 2155 }, { "epoch": 0.7198664440734558, "grad_norm": 0.9319252161413681, "learning_rate": 9.41548760671106e-06, "loss": 0.4528, "step": 2156 }, { "epoch": 0.7202003338898164, "grad_norm": 0.974440716802703, "learning_rate": 9.414575820209355e-06, "loss": 0.4494, "step": 2157 }, { "epoch": 0.7205342237061769, "grad_norm": 1.0401214659991926, "learning_rate": 9.413663367328738e-06, "loss": 0.4548, "step": 2158 }, { "epoch": 0.7208681135225375, "grad_norm": 0.9689356741672956, "learning_rate": 9.41275024820694e-06, "loss": 0.4437, "step": 2159 }, { "epoch": 0.7212020033388982, "grad_norm": 1.0247249752969112, "learning_rate": 9.411836462981799e-06, "loss": 0.4352, "step": 2160 }, { "epoch": 0.7215358931552588, "grad_norm": 0.9422393435237021, "learning_rate": 9.410922011791248e-06, "loss": 0.4625, "step": 2161 }, { "epoch": 0.7218697829716194, "grad_norm": 0.9271254137711082, "learning_rate": 9.410006894773326e-06, "loss": 0.4599, "step": 2162 }, { "epoch": 0.7222036727879799, "grad_norm": 0.9587343689526792, "learning_rate": 9.409091112066167e-06, "loss": 0.4755, "step": 2163 }, { "epoch": 0.7225375626043405, "grad_norm": 1.0062086773355277, "learning_rate": 9.408174663808011e-06, "loss": 0.4496, "step": 2164 }, { "epoch": 0.7228714524207012, "grad_norm": 0.9938978476649855, "learning_rate": 9.407257550137194e-06, "loss": 0.4581, "step": 2165 }, { "epoch": 0.7232053422370618, "grad_norm": 0.992332617156209, "learning_rate": 9.406339771192152e-06, "loss": 0.4668, "step": 2166 }, { "epoch": 0.7235392320534224, "grad_norm": 0.9708444849428016, "learning_rate": 9.405421327111428e-06, "loss": 0.4565, "step": 2167 }, { "epoch": 0.7238731218697829, "grad_norm": 0.9359017604242091, "learning_rate": 9.404502218033658e-06, "loss": 0.4489, "step": 2168 }, { "epoch": 0.7242070116861435, "grad_norm": 0.9707887340250945, "learning_rate": 9.40358244409758e-06, "loss": 0.4617, "step": 2169 }, { "epoch": 0.7245409015025042, "grad_norm": 1.0133843084285272, "learning_rate": 9.402662005442036e-06, "loss": 0.4566, "step": 2170 }, { "epoch": 0.7248747913188648, "grad_norm": 1.0333999499066793, "learning_rate": 9.401740902205968e-06, "loss": 0.475, "step": 2171 }, { "epoch": 0.7252086811352254, "grad_norm": 0.9388351654611098, "learning_rate": 9.400819134528409e-06, "loss": 0.453, "step": 2172 }, { "epoch": 0.725542570951586, "grad_norm": 0.9716237821066083, "learning_rate": 9.399896702548505e-06, "loss": 0.4375, "step": 2173 }, { "epoch": 0.7258764607679465, "grad_norm": 0.9818249399616656, "learning_rate": 9.398973606405499e-06, "loss": 0.4468, "step": 2174 }, { "epoch": 0.7262103505843072, "grad_norm": 0.948885292993422, "learning_rate": 9.398049846238726e-06, "loss": 0.4555, "step": 2175 }, { "epoch": 0.7265442404006678, "grad_norm": 0.9680957756147868, "learning_rate": 9.39712542218763e-06, "loss": 0.4717, "step": 2176 }, { "epoch": 0.7268781302170284, "grad_norm": 0.9546016629536873, "learning_rate": 9.396200334391753e-06, "loss": 0.4434, "step": 2177 }, { "epoch": 0.727212020033389, "grad_norm": 0.9035286133936762, "learning_rate": 9.395274582990738e-06, "loss": 0.4361, "step": 2178 }, { "epoch": 0.7275459098497495, "grad_norm": 0.9745297671692229, "learning_rate": 9.394348168124326e-06, "loss": 0.4402, "step": 2179 }, { "epoch": 0.7278797996661102, "grad_norm": 0.936668828507746, "learning_rate": 9.393421089932356e-06, "loss": 0.4587, "step": 2180 }, { "epoch": 0.7282136894824708, "grad_norm": 0.9740098863853213, "learning_rate": 9.392493348554774e-06, "loss": 0.4483, "step": 2181 }, { "epoch": 0.7285475792988314, "grad_norm": 1.0021164892772954, "learning_rate": 9.391564944131622e-06, "loss": 0.4808, "step": 2182 }, { "epoch": 0.728881469115192, "grad_norm": 0.9427407841975041, "learning_rate": 9.390635876803039e-06, "loss": 0.4412, "step": 2183 }, { "epoch": 0.7292153589315525, "grad_norm": 0.9446503539614529, "learning_rate": 9.389706146709272e-06, "loss": 0.4541, "step": 2184 }, { "epoch": 0.7295492487479132, "grad_norm": 0.9233084012021211, "learning_rate": 9.38877575399066e-06, "loss": 0.4597, "step": 2185 }, { "epoch": 0.7298831385642738, "grad_norm": 0.9468349488163326, "learning_rate": 9.387844698787645e-06, "loss": 0.4399, "step": 2186 }, { "epoch": 0.7302170283806344, "grad_norm": 0.9860002324007329, "learning_rate": 9.386912981240774e-06, "loss": 0.469, "step": 2187 }, { "epoch": 0.730550918196995, "grad_norm": 0.9809672039840776, "learning_rate": 9.385980601490684e-06, "loss": 0.45, "step": 2188 }, { "epoch": 0.7308848080133556, "grad_norm": 0.9282593314550316, "learning_rate": 9.385047559678122e-06, "loss": 0.46, "step": 2189 }, { "epoch": 0.7312186978297162, "grad_norm": 0.9274093840612164, "learning_rate": 9.38411385594393e-06, "loss": 0.4693, "step": 2190 }, { "epoch": 0.7315525876460768, "grad_norm": 0.9683899491280357, "learning_rate": 9.383179490429047e-06, "loss": 0.4575, "step": 2191 }, { "epoch": 0.7318864774624374, "grad_norm": 0.9434920533236342, "learning_rate": 9.382244463274516e-06, "loss": 0.4589, "step": 2192 }, { "epoch": 0.732220367278798, "grad_norm": 0.9447824511207382, "learning_rate": 9.381308774621483e-06, "loss": 0.4509, "step": 2193 }, { "epoch": 0.7325542570951586, "grad_norm": 0.9748716410408748, "learning_rate": 9.380372424611185e-06, "loss": 0.4639, "step": 2194 }, { "epoch": 0.7328881469115192, "grad_norm": 0.890407783104994, "learning_rate": 9.379435413384965e-06, "loss": 0.4293, "step": 2195 }, { "epoch": 0.7332220367278798, "grad_norm": 0.9360103510492683, "learning_rate": 9.378497741084267e-06, "loss": 0.4604, "step": 2196 }, { "epoch": 0.7335559265442404, "grad_norm": 0.9624296354106074, "learning_rate": 9.37755940785063e-06, "loss": 0.4568, "step": 2197 }, { "epoch": 0.733889816360601, "grad_norm": 0.9658405539281805, "learning_rate": 9.376620413825697e-06, "loss": 0.4371, "step": 2198 }, { "epoch": 0.7342237061769616, "grad_norm": 0.949453711971617, "learning_rate": 9.375680759151206e-06, "loss": 0.4642, "step": 2199 }, { "epoch": 0.7345575959933222, "grad_norm": 0.9755457077967965, "learning_rate": 9.374740443969002e-06, "loss": 0.466, "step": 2200 }, { "epoch": 0.7348914858096828, "grad_norm": 0.9566529030634526, "learning_rate": 9.373799468421021e-06, "loss": 0.4317, "step": 2201 }, { "epoch": 0.7352253756260434, "grad_norm": 0.973644711814818, "learning_rate": 9.372857832649305e-06, "loss": 0.4768, "step": 2202 }, { "epoch": 0.735559265442404, "grad_norm": 0.9715866813089785, "learning_rate": 9.371915536795992e-06, "loss": 0.4712, "step": 2203 }, { "epoch": 0.7358931552587646, "grad_norm": 0.9929129728386372, "learning_rate": 9.370972581003323e-06, "loss": 0.4544, "step": 2204 }, { "epoch": 0.7362270450751253, "grad_norm": 0.9661498965913974, "learning_rate": 9.370028965413638e-06, "loss": 0.4595, "step": 2205 }, { "epoch": 0.7365609348914858, "grad_norm": 0.9796045456960898, "learning_rate": 9.369084690169372e-06, "loss": 0.4642, "step": 2206 }, { "epoch": 0.7368948247078464, "grad_norm": 0.9117717452829489, "learning_rate": 9.368139755413065e-06, "loss": 0.4454, "step": 2207 }, { "epoch": 0.737228714524207, "grad_norm": 0.9829478146386957, "learning_rate": 9.367194161287356e-06, "loss": 0.4688, "step": 2208 }, { "epoch": 0.7375626043405676, "grad_norm": 0.9488569093169523, "learning_rate": 9.366247907934979e-06, "loss": 0.435, "step": 2209 }, { "epoch": 0.7378964941569283, "grad_norm": 0.9388153143650824, "learning_rate": 9.365300995498775e-06, "loss": 0.4615, "step": 2210 }, { "epoch": 0.7382303839732888, "grad_norm": 1.0189483726441946, "learning_rate": 9.364353424121675e-06, "loss": 0.4656, "step": 2211 }, { "epoch": 0.7385642737896494, "grad_norm": 0.9976699333501842, "learning_rate": 9.363405193946719e-06, "loss": 0.4602, "step": 2212 }, { "epoch": 0.73889816360601, "grad_norm": 0.959238210844959, "learning_rate": 9.36245630511704e-06, "loss": 0.4457, "step": 2213 }, { "epoch": 0.7392320534223706, "grad_norm": 0.9799835162534111, "learning_rate": 9.361506757775871e-06, "loss": 0.4705, "step": 2214 }, { "epoch": 0.7395659432387313, "grad_norm": 0.9889708462177033, "learning_rate": 9.360556552066549e-06, "loss": 0.4562, "step": 2215 }, { "epoch": 0.7398998330550918, "grad_norm": 0.984681543979978, "learning_rate": 9.359605688132505e-06, "loss": 0.4428, "step": 2216 }, { "epoch": 0.7402337228714524, "grad_norm": 1.0433294552407228, "learning_rate": 9.358654166117274e-06, "loss": 0.4812, "step": 2217 }, { "epoch": 0.740567612687813, "grad_norm": 0.9490961633434377, "learning_rate": 9.357701986164487e-06, "loss": 0.4592, "step": 2218 }, { "epoch": 0.7409015025041736, "grad_norm": 0.9917150042994081, "learning_rate": 9.356749148417872e-06, "loss": 0.4551, "step": 2219 }, { "epoch": 0.7412353923205343, "grad_norm": 0.9060411174814378, "learning_rate": 9.355795653021264e-06, "loss": 0.4375, "step": 2220 }, { "epoch": 0.7415692821368948, "grad_norm": 0.9500441746976167, "learning_rate": 9.354841500118592e-06, "loss": 0.4652, "step": 2221 }, { "epoch": 0.7419031719532554, "grad_norm": 0.943194307065151, "learning_rate": 9.353886689853884e-06, "loss": 0.4487, "step": 2222 }, { "epoch": 0.742237061769616, "grad_norm": 0.9592891171852873, "learning_rate": 9.352931222371268e-06, "loss": 0.4589, "step": 2223 }, { "epoch": 0.7425709515859766, "grad_norm": 0.9057781095569739, "learning_rate": 9.351975097814971e-06, "loss": 0.4386, "step": 2224 }, { "epoch": 0.7429048414023373, "grad_norm": 0.9270178480946147, "learning_rate": 9.351018316329323e-06, "loss": 0.4457, "step": 2225 }, { "epoch": 0.7432387312186979, "grad_norm": 0.950524780740956, "learning_rate": 9.350060878058747e-06, "loss": 0.4562, "step": 2226 }, { "epoch": 0.7435726210350584, "grad_norm": 0.9861977816858288, "learning_rate": 9.34910278314777e-06, "loss": 0.4791, "step": 2227 }, { "epoch": 0.743906510851419, "grad_norm": 0.9658938186653534, "learning_rate": 9.348144031741013e-06, "loss": 0.4595, "step": 2228 }, { "epoch": 0.7442404006677796, "grad_norm": 0.9344636926110538, "learning_rate": 9.347184623983201e-06, "loss": 0.458, "step": 2229 }, { "epoch": 0.7445742904841403, "grad_norm": 0.9768618748770214, "learning_rate": 9.346224560019156e-06, "loss": 0.436, "step": 2230 }, { "epoch": 0.7449081803005009, "grad_norm": 0.9398179104653842, "learning_rate": 9.345263839993804e-06, "loss": 0.4488, "step": 2231 }, { "epoch": 0.7452420701168614, "grad_norm": 0.9762242833154918, "learning_rate": 9.344302464052157e-06, "loss": 0.4597, "step": 2232 }, { "epoch": 0.745575959933222, "grad_norm": 0.9720725925599375, "learning_rate": 9.34334043233934e-06, "loss": 0.4716, "step": 2233 }, { "epoch": 0.7459098497495826, "grad_norm": 0.9229172482969085, "learning_rate": 9.34237774500057e-06, "loss": 0.4485, "step": 2234 }, { "epoch": 0.7462437395659433, "grad_norm": 0.9081817111563151, "learning_rate": 9.341414402181166e-06, "loss": 0.4465, "step": 2235 }, { "epoch": 0.7465776293823039, "grad_norm": 0.9257652617810918, "learning_rate": 9.340450404026542e-06, "loss": 0.4264, "step": 2236 }, { "epoch": 0.7469115191986644, "grad_norm": 0.8990059930228013, "learning_rate": 9.339485750682212e-06, "loss": 0.4297, "step": 2237 }, { "epoch": 0.747245409015025, "grad_norm": 0.9257932384668932, "learning_rate": 9.338520442293795e-06, "loss": 0.456, "step": 2238 }, { "epoch": 0.7475792988313856, "grad_norm": 0.9644333061508779, "learning_rate": 9.337554479006999e-06, "loss": 0.4753, "step": 2239 }, { "epoch": 0.7479131886477463, "grad_norm": 1.02017451791268, "learning_rate": 9.336587860967638e-06, "loss": 0.4757, "step": 2240 }, { "epoch": 0.7482470784641069, "grad_norm": 0.9233958820116359, "learning_rate": 9.335620588321625e-06, "loss": 0.4554, "step": 2241 }, { "epoch": 0.7485809682804675, "grad_norm": 0.9854816829802168, "learning_rate": 9.334652661214965e-06, "loss": 0.4529, "step": 2242 }, { "epoch": 0.748914858096828, "grad_norm": 0.9032922662847906, "learning_rate": 9.33368407979377e-06, "loss": 0.4452, "step": 2243 }, { "epoch": 0.7492487479131886, "grad_norm": 0.902656783531148, "learning_rate": 9.332714844204245e-06, "loss": 0.4449, "step": 2244 }, { "epoch": 0.7495826377295493, "grad_norm": 0.9040556080742019, "learning_rate": 9.331744954592697e-06, "loss": 0.4427, "step": 2245 }, { "epoch": 0.7499165275459099, "grad_norm": 0.9349096568162644, "learning_rate": 9.33077441110553e-06, "loss": 0.4577, "step": 2246 }, { "epoch": 0.7502504173622705, "grad_norm": 0.9251710232837531, "learning_rate": 9.329803213889246e-06, "loss": 0.437, "step": 2247 }, { "epoch": 0.750584307178631, "grad_norm": 0.9446308649744913, "learning_rate": 9.328831363090449e-06, "loss": 0.4637, "step": 2248 }, { "epoch": 0.7509181969949916, "grad_norm": 0.9967077561717859, "learning_rate": 9.327858858855838e-06, "loss": 0.4767, "step": 2249 }, { "epoch": 0.7512520868113522, "grad_norm": 0.9296192528852324, "learning_rate": 9.326885701332214e-06, "loss": 0.4547, "step": 2250 }, { "epoch": 0.7515859766277129, "grad_norm": 0.9467336127871148, "learning_rate": 9.325911890666474e-06, "loss": 0.4396, "step": 2251 }, { "epoch": 0.7519198664440735, "grad_norm": 0.990771341153774, "learning_rate": 9.324937427005613e-06, "loss": 0.4811, "step": 2252 }, { "epoch": 0.752253756260434, "grad_norm": 0.9375471285368707, "learning_rate": 9.323962310496729e-06, "loss": 0.4399, "step": 2253 }, { "epoch": 0.7525876460767946, "grad_norm": 0.9427818415246827, "learning_rate": 9.322986541287013e-06, "loss": 0.453, "step": 2254 }, { "epoch": 0.7529215358931552, "grad_norm": 0.9328720561979812, "learning_rate": 9.32201011952376e-06, "loss": 0.4403, "step": 2255 }, { "epoch": 0.7532554257095159, "grad_norm": 0.9749048968086684, "learning_rate": 9.321033045354355e-06, "loss": 0.4521, "step": 2256 }, { "epoch": 0.7535893155258765, "grad_norm": 0.9789548070333314, "learning_rate": 9.320055318926291e-06, "loss": 0.4537, "step": 2257 }, { "epoch": 0.7539232053422371, "grad_norm": 0.9510280004541176, "learning_rate": 9.319076940387157e-06, "loss": 0.4394, "step": 2258 }, { "epoch": 0.7542570951585976, "grad_norm": 0.9659198041986038, "learning_rate": 9.318097909884635e-06, "loss": 0.4458, "step": 2259 }, { "epoch": 0.7545909849749582, "grad_norm": 0.91838408044389, "learning_rate": 9.317118227566513e-06, "loss": 0.4254, "step": 2260 }, { "epoch": 0.7549248747913189, "grad_norm": 0.9646585706662439, "learning_rate": 9.316137893580672e-06, "loss": 0.4462, "step": 2261 }, { "epoch": 0.7552587646076795, "grad_norm": 0.969503238321221, "learning_rate": 9.315156908075093e-06, "loss": 0.4633, "step": 2262 }, { "epoch": 0.7555926544240401, "grad_norm": 0.961264081016938, "learning_rate": 9.314175271197855e-06, "loss": 0.4406, "step": 2263 }, { "epoch": 0.7559265442404006, "grad_norm": 0.9200806077142808, "learning_rate": 9.313192983097137e-06, "loss": 0.4585, "step": 2264 }, { "epoch": 0.7562604340567612, "grad_norm": 0.9713551807921798, "learning_rate": 9.312210043921216e-06, "loss": 0.4662, "step": 2265 }, { "epoch": 0.7565943238731219, "grad_norm": 0.9536435435838974, "learning_rate": 9.311226453818463e-06, "loss": 0.4431, "step": 2266 }, { "epoch": 0.7569282136894825, "grad_norm": 0.935555257923329, "learning_rate": 9.310242212937355e-06, "loss": 0.449, "step": 2267 }, { "epoch": 0.7572621035058431, "grad_norm": 0.9340907194721165, "learning_rate": 9.309257321426459e-06, "loss": 0.4611, "step": 2268 }, { "epoch": 0.7575959933222036, "grad_norm": 0.9229252324147351, "learning_rate": 9.308271779434445e-06, "loss": 0.4291, "step": 2269 }, { "epoch": 0.7579298831385642, "grad_norm": 1.0199186768258195, "learning_rate": 9.307285587110081e-06, "loss": 0.4765, "step": 2270 }, { "epoch": 0.7582637729549249, "grad_norm": 0.9034014383498333, "learning_rate": 9.306298744602233e-06, "loss": 0.4378, "step": 2271 }, { "epoch": 0.7585976627712855, "grad_norm": 0.9127792552501863, "learning_rate": 9.305311252059863e-06, "loss": 0.4382, "step": 2272 }, { "epoch": 0.7589315525876461, "grad_norm": 0.9458778845547429, "learning_rate": 9.304323109632035e-06, "loss": 0.4504, "step": 2273 }, { "epoch": 0.7592654424040067, "grad_norm": 1.021432190432254, "learning_rate": 9.303334317467907e-06, "loss": 0.4804, "step": 2274 }, { "epoch": 0.7595993322203672, "grad_norm": 0.9864596303008071, "learning_rate": 9.302344875716737e-06, "loss": 0.4588, "step": 2275 }, { "epoch": 0.7599332220367279, "grad_norm": 0.9242398819169123, "learning_rate": 9.30135478452788e-06, "loss": 0.423, "step": 2276 }, { "epoch": 0.7602671118530885, "grad_norm": 0.9549684059241658, "learning_rate": 9.300364044050794e-06, "loss": 0.4289, "step": 2277 }, { "epoch": 0.7606010016694491, "grad_norm": 0.9283966909305242, "learning_rate": 9.299372654435026e-06, "loss": 0.4333, "step": 2278 }, { "epoch": 0.7609348914858097, "grad_norm": 1.006503422584923, "learning_rate": 9.29838061583023e-06, "loss": 0.4434, "step": 2279 }, { "epoch": 0.7612687813021702, "grad_norm": 0.9540697749634214, "learning_rate": 9.297387928386148e-06, "loss": 0.4335, "step": 2280 }, { "epoch": 0.7616026711185309, "grad_norm": 0.8659488663260303, "learning_rate": 9.296394592252633e-06, "loss": 0.434, "step": 2281 }, { "epoch": 0.7619365609348915, "grad_norm": 0.9916914695465454, "learning_rate": 9.295400607579626e-06, "loss": 0.4624, "step": 2282 }, { "epoch": 0.7622704507512521, "grad_norm": 0.9159628695795489, "learning_rate": 9.29440597451717e-06, "loss": 0.4406, "step": 2283 }, { "epoch": 0.7626043405676127, "grad_norm": 0.9601360918386518, "learning_rate": 9.293410693215398e-06, "loss": 0.4786, "step": 2284 }, { "epoch": 0.7629382303839732, "grad_norm": 0.9028186738692219, "learning_rate": 9.292414763824557e-06, "loss": 0.4157, "step": 2285 }, { "epoch": 0.7632721202003339, "grad_norm": 0.928851276788754, "learning_rate": 9.291418186494976e-06, "loss": 0.4341, "step": 2286 }, { "epoch": 0.7636060100166945, "grad_norm": 1.0383758717417777, "learning_rate": 9.290420961377089e-06, "loss": 0.4591, "step": 2287 }, { "epoch": 0.7639398998330551, "grad_norm": 1.0239890125481141, "learning_rate": 9.28942308862143e-06, "loss": 0.4764, "step": 2288 }, { "epoch": 0.7642737896494157, "grad_norm": 0.9470933350422311, "learning_rate": 9.288424568378623e-06, "loss": 0.4643, "step": 2289 }, { "epoch": 0.7646076794657763, "grad_norm": 0.9802614684123083, "learning_rate": 9.287425400799396e-06, "loss": 0.4582, "step": 2290 }, { "epoch": 0.7649415692821369, "grad_norm": 1.0397615653964243, "learning_rate": 9.286425586034576e-06, "loss": 0.4578, "step": 2291 }, { "epoch": 0.7652754590984975, "grad_norm": 1.013572310388653, "learning_rate": 9.285425124235079e-06, "loss": 0.4666, "step": 2292 }, { "epoch": 0.7656093489148581, "grad_norm": 0.9551344330195806, "learning_rate": 9.28442401555193e-06, "loss": 0.4385, "step": 2293 }, { "epoch": 0.7659432387312187, "grad_norm": 0.9035672545083582, "learning_rate": 9.283422260136244e-06, "loss": 0.4395, "step": 2294 }, { "epoch": 0.7662771285475793, "grad_norm": 0.9612900690568693, "learning_rate": 9.282419858139233e-06, "loss": 0.4557, "step": 2295 }, { "epoch": 0.7666110183639399, "grad_norm": 0.9080933621732501, "learning_rate": 9.281416809712214e-06, "loss": 0.4356, "step": 2296 }, { "epoch": 0.7669449081803005, "grad_norm": 0.9592703096334942, "learning_rate": 9.280413115006595e-06, "loss": 0.4352, "step": 2297 }, { "epoch": 0.7672787979966611, "grad_norm": 0.9437536630410415, "learning_rate": 9.279408774173882e-06, "loss": 0.4488, "step": 2298 }, { "epoch": 0.7676126878130217, "grad_norm": 0.8806732402609715, "learning_rate": 9.27840378736568e-06, "loss": 0.4337, "step": 2299 }, { "epoch": 0.7679465776293823, "grad_norm": 0.9711500502767338, "learning_rate": 9.277398154733695e-06, "loss": 0.4359, "step": 2300 }, { "epoch": 0.7682804674457429, "grad_norm": 0.9322596436599139, "learning_rate": 9.276391876429722e-06, "loss": 0.433, "step": 2301 }, { "epoch": 0.7686143572621035, "grad_norm": 0.8909350121192724, "learning_rate": 9.275384952605662e-06, "loss": 0.4325, "step": 2302 }, { "epoch": 0.7689482470784641, "grad_norm": 0.9734615134310296, "learning_rate": 9.274377383413508e-06, "loss": 0.452, "step": 2303 }, { "epoch": 0.7692821368948247, "grad_norm": 0.9515539995833027, "learning_rate": 9.273369169005354e-06, "loss": 0.4595, "step": 2304 }, { "epoch": 0.7696160267111853, "grad_norm": 0.936380806281841, "learning_rate": 9.272360309533388e-06, "loss": 0.4586, "step": 2305 }, { "epoch": 0.769949916527546, "grad_norm": 0.9594443474536795, "learning_rate": 9.271350805149896e-06, "loss": 0.4541, "step": 2306 }, { "epoch": 0.7702838063439065, "grad_norm": 0.9418765611687544, "learning_rate": 9.270340656007266e-06, "loss": 0.4467, "step": 2307 }, { "epoch": 0.7706176961602671, "grad_norm": 0.912316110908783, "learning_rate": 9.269329862257977e-06, "loss": 0.441, "step": 2308 }, { "epoch": 0.7709515859766277, "grad_norm": 0.9085789452735822, "learning_rate": 9.26831842405461e-06, "loss": 0.4434, "step": 2309 }, { "epoch": 0.7712854757929883, "grad_norm": 0.9324483724511037, "learning_rate": 9.267306341549839e-06, "loss": 0.4526, "step": 2310 }, { "epoch": 0.771619365609349, "grad_norm": 0.9131717865544803, "learning_rate": 9.266293614896438e-06, "loss": 0.4588, "step": 2311 }, { "epoch": 0.7719532554257095, "grad_norm": 0.9066725260694537, "learning_rate": 9.26528024424728e-06, "loss": 0.4487, "step": 2312 }, { "epoch": 0.7722871452420701, "grad_norm": 0.9445210866815462, "learning_rate": 9.26426622975533e-06, "loss": 0.4432, "step": 2313 }, { "epoch": 0.7726210350584307, "grad_norm": 0.8882146555625315, "learning_rate": 9.263251571573654e-06, "loss": 0.4217, "step": 2314 }, { "epoch": 0.7729549248747913, "grad_norm": 0.9995615824195316, "learning_rate": 9.262236269855416e-06, "loss": 0.4896, "step": 2315 }, { "epoch": 0.773288814691152, "grad_norm": 0.8939748208560684, "learning_rate": 9.261220324753875e-06, "loss": 0.4299, "step": 2316 }, { "epoch": 0.7736227045075125, "grad_norm": 0.920991654193632, "learning_rate": 9.260203736422386e-06, "loss": 0.44, "step": 2317 }, { "epoch": 0.7739565943238731, "grad_norm": 0.9576037726245896, "learning_rate": 9.259186505014404e-06, "loss": 0.4588, "step": 2318 }, { "epoch": 0.7742904841402337, "grad_norm": 0.9123006356963164, "learning_rate": 9.258168630683478e-06, "loss": 0.4457, "step": 2319 }, { "epoch": 0.7746243739565943, "grad_norm": 0.9673254700084013, "learning_rate": 9.257150113583259e-06, "loss": 0.4541, "step": 2320 }, { "epoch": 0.774958263772955, "grad_norm": 0.9390981147712606, "learning_rate": 9.25613095386749e-06, "loss": 0.4298, "step": 2321 }, { "epoch": 0.7752921535893156, "grad_norm": 0.9433482789976507, "learning_rate": 9.255111151690013e-06, "loss": 0.4454, "step": 2322 }, { "epoch": 0.7756260434056761, "grad_norm": 0.9105478413496216, "learning_rate": 9.254090707204766e-06, "loss": 0.4561, "step": 2323 }, { "epoch": 0.7759599332220367, "grad_norm": 0.9338655166869504, "learning_rate": 9.253069620565788e-06, "loss": 0.4556, "step": 2324 }, { "epoch": 0.7762938230383973, "grad_norm": 0.9430831591789703, "learning_rate": 9.252047891927207e-06, "loss": 0.4394, "step": 2325 }, { "epoch": 0.776627712854758, "grad_norm": 0.9349609657891143, "learning_rate": 9.251025521443257e-06, "loss": 0.4486, "step": 2326 }, { "epoch": 0.7769616026711186, "grad_norm": 1.160323536868479, "learning_rate": 9.250002509268261e-06, "loss": 0.4464, "step": 2327 }, { "epoch": 0.7772954924874791, "grad_norm": 0.9184441034668277, "learning_rate": 9.248978855556645e-06, "loss": 0.4529, "step": 2328 }, { "epoch": 0.7776293823038397, "grad_norm": 0.8715837778313896, "learning_rate": 9.247954560462929e-06, "loss": 0.4363, "step": 2329 }, { "epoch": 0.7779632721202003, "grad_norm": 0.9274346391271003, "learning_rate": 9.246929624141727e-06, "loss": 0.4706, "step": 2330 }, { "epoch": 0.778297161936561, "grad_norm": 0.9187442440212126, "learning_rate": 9.245904046747755e-06, "loss": 0.4433, "step": 2331 }, { "epoch": 0.7786310517529216, "grad_norm": 0.9283021773766118, "learning_rate": 9.244877828435825e-06, "loss": 0.4403, "step": 2332 }, { "epoch": 0.7789649415692821, "grad_norm": 0.9605240357997943, "learning_rate": 9.243850969360844e-06, "loss": 0.4593, "step": 2333 }, { "epoch": 0.7792988313856427, "grad_norm": 0.9039007983125368, "learning_rate": 9.242823469677813e-06, "loss": 0.4323, "step": 2334 }, { "epoch": 0.7796327212020033, "grad_norm": 0.9600212213650361, "learning_rate": 9.241795329541836e-06, "loss": 0.4474, "step": 2335 }, { "epoch": 0.779966611018364, "grad_norm": 0.9240080815966144, "learning_rate": 9.240766549108109e-06, "loss": 0.4367, "step": 2336 }, { "epoch": 0.7803005008347246, "grad_norm": 0.9091374673018564, "learning_rate": 9.239737128531925e-06, "loss": 0.4478, "step": 2337 }, { "epoch": 0.7806343906510852, "grad_norm": 0.9212320877913792, "learning_rate": 9.23870706796868e-06, "loss": 0.4512, "step": 2338 }, { "epoch": 0.7809682804674457, "grad_norm": 0.9304096315569288, "learning_rate": 9.237676367573853e-06, "loss": 0.4304, "step": 2339 }, { "epoch": 0.7813021702838063, "grad_norm": 1.024133885054231, "learning_rate": 9.236645027503035e-06, "loss": 0.47, "step": 2340 }, { "epoch": 0.781636060100167, "grad_norm": 0.9868284386446196, "learning_rate": 9.235613047911903e-06, "loss": 0.459, "step": 2341 }, { "epoch": 0.7819699499165276, "grad_norm": 0.979190348074577, "learning_rate": 9.234580428956234e-06, "loss": 0.462, "step": 2342 }, { "epoch": 0.7823038397328882, "grad_norm": 0.9383771067090546, "learning_rate": 9.233547170791906e-06, "loss": 0.455, "step": 2343 }, { "epoch": 0.7826377295492487, "grad_norm": 0.9393997413214773, "learning_rate": 9.232513273574882e-06, "loss": 0.4378, "step": 2344 }, { "epoch": 0.7829716193656093, "grad_norm": 0.9067058354420047, "learning_rate": 9.231478737461232e-06, "loss": 0.4426, "step": 2345 }, { "epoch": 0.78330550918197, "grad_norm": 0.8893875986753055, "learning_rate": 9.23044356260712e-06, "loss": 0.4357, "step": 2346 }, { "epoch": 0.7836393989983306, "grad_norm": 0.9367986156312614, "learning_rate": 9.229407749168804e-06, "loss": 0.4543, "step": 2347 }, { "epoch": 0.7839732888146912, "grad_norm": 0.9616819821001988, "learning_rate": 9.228371297302639e-06, "loss": 0.4655, "step": 2348 }, { "epoch": 0.7843071786310517, "grad_norm": 0.9874227055484771, "learning_rate": 9.227334207165079e-06, "loss": 0.4623, "step": 2349 }, { "epoch": 0.7846410684474123, "grad_norm": 0.9829187118368898, "learning_rate": 9.226296478912671e-06, "loss": 0.4695, "step": 2350 }, { "epoch": 0.784974958263773, "grad_norm": 0.9448535477408812, "learning_rate": 9.22525811270206e-06, "loss": 0.4532, "step": 2351 }, { "epoch": 0.7853088480801336, "grad_norm": 0.9200578453342497, "learning_rate": 9.22421910868999e-06, "loss": 0.4422, "step": 2352 }, { "epoch": 0.7856427378964942, "grad_norm": 0.929206076067544, "learning_rate": 9.223179467033292e-06, "loss": 0.4654, "step": 2353 }, { "epoch": 0.7859766277128547, "grad_norm": 0.8826236632096727, "learning_rate": 9.222139187888905e-06, "loss": 0.4401, "step": 2354 }, { "epoch": 0.7863105175292153, "grad_norm": 0.9137709140359256, "learning_rate": 9.221098271413856e-06, "loss": 0.432, "step": 2355 }, { "epoch": 0.786644407345576, "grad_norm": 0.9309522539177945, "learning_rate": 9.220056717765274e-06, "loss": 0.4475, "step": 2356 }, { "epoch": 0.7869782971619366, "grad_norm": 0.9157251672454945, "learning_rate": 9.219014527100378e-06, "loss": 0.4323, "step": 2357 }, { "epoch": 0.7873121869782972, "grad_norm": 1.0211109071873175, "learning_rate": 9.217971699576486e-06, "loss": 0.4522, "step": 2358 }, { "epoch": 0.7876460767946578, "grad_norm": 0.9649710585701221, "learning_rate": 9.216928235351016e-06, "loss": 0.4448, "step": 2359 }, { "epoch": 0.7879799666110183, "grad_norm": 0.9185797222030962, "learning_rate": 9.215884134581476e-06, "loss": 0.4348, "step": 2360 }, { "epoch": 0.788313856427379, "grad_norm": 0.9615784231198572, "learning_rate": 9.214839397425476e-06, "loss": 0.4487, "step": 2361 }, { "epoch": 0.7886477462437396, "grad_norm": 0.8898348501007929, "learning_rate": 9.213794024040714e-06, "loss": 0.429, "step": 2362 }, { "epoch": 0.7889816360601002, "grad_norm": 0.9234142358319539, "learning_rate": 9.212748014584991e-06, "loss": 0.4608, "step": 2363 }, { "epoch": 0.7893155258764608, "grad_norm": 0.9372018526444938, "learning_rate": 9.211701369216203e-06, "loss": 0.4333, "step": 2364 }, { "epoch": 0.7896494156928213, "grad_norm": 0.9357272371403383, "learning_rate": 9.210654088092339e-06, "loss": 0.4554, "step": 2365 }, { "epoch": 0.7899833055091819, "grad_norm": 0.9255604177458167, "learning_rate": 9.209606171371488e-06, "loss": 0.4563, "step": 2366 }, { "epoch": 0.7903171953255426, "grad_norm": 0.9668143312143707, "learning_rate": 9.20855761921183e-06, "loss": 0.4365, "step": 2367 }, { "epoch": 0.7906510851419032, "grad_norm": 0.8974120135019661, "learning_rate": 9.207508431771643e-06, "loss": 0.4474, "step": 2368 }, { "epoch": 0.7909849749582638, "grad_norm": 0.8936478732346627, "learning_rate": 9.206458609209305e-06, "loss": 0.4091, "step": 2369 }, { "epoch": 0.7913188647746243, "grad_norm": 0.9087441379635313, "learning_rate": 9.205408151683286e-06, "loss": 0.4414, "step": 2370 }, { "epoch": 0.7916527545909849, "grad_norm": 0.9237435127490172, "learning_rate": 9.204357059352151e-06, "loss": 0.4526, "step": 2371 }, { "epoch": 0.7919866444073456, "grad_norm": 0.9358694826889685, "learning_rate": 9.20330533237456e-06, "loss": 0.4625, "step": 2372 }, { "epoch": 0.7923205342237062, "grad_norm": 0.9274160417165092, "learning_rate": 9.202252970909273e-06, "loss": 0.4608, "step": 2373 }, { "epoch": 0.7926544240400668, "grad_norm": 0.8608284197059816, "learning_rate": 9.201199975115145e-06, "loss": 0.4198, "step": 2374 }, { "epoch": 0.7929883138564274, "grad_norm": 0.9078978665264182, "learning_rate": 9.200146345151124e-06, "loss": 0.4494, "step": 2375 }, { "epoch": 0.7933222036727879, "grad_norm": 0.9315966231130529, "learning_rate": 9.199092081176256e-06, "loss": 0.4568, "step": 2376 }, { "epoch": 0.7936560934891486, "grad_norm": 0.8993461161229781, "learning_rate": 9.19803718334968e-06, "loss": 0.4217, "step": 2377 }, { "epoch": 0.7939899833055092, "grad_norm": 0.9198393645594196, "learning_rate": 9.196981651830632e-06, "loss": 0.4299, "step": 2378 }, { "epoch": 0.7943238731218698, "grad_norm": 0.9325906989403041, "learning_rate": 9.195925486778447e-06, "loss": 0.4484, "step": 2379 }, { "epoch": 0.7946577629382304, "grad_norm": 0.9528760067753898, "learning_rate": 9.194868688352551e-06, "loss": 0.4313, "step": 2380 }, { "epoch": 0.7949916527545909, "grad_norm": 0.9565929843520166, "learning_rate": 9.193811256712471e-06, "loss": 0.4468, "step": 2381 }, { "epoch": 0.7953255425709516, "grad_norm": 0.952514936822793, "learning_rate": 9.19275319201782e-06, "loss": 0.4387, "step": 2382 }, { "epoch": 0.7956594323873122, "grad_norm": 0.9252874273762661, "learning_rate": 9.191694494428314e-06, "loss": 0.4492, "step": 2383 }, { "epoch": 0.7959933222036728, "grad_norm": 0.933832246147722, "learning_rate": 9.190635164103767e-06, "loss": 0.4439, "step": 2384 }, { "epoch": 0.7963272120200334, "grad_norm": 0.9633425480495651, "learning_rate": 9.189575201204082e-06, "loss": 0.4483, "step": 2385 }, { "epoch": 0.7966611018363939, "grad_norm": 0.9200601393523323, "learning_rate": 9.18851460588926e-06, "loss": 0.4534, "step": 2386 }, { "epoch": 0.7969949916527546, "grad_norm": 0.9228489241202794, "learning_rate": 9.187453378319398e-06, "loss": 0.438, "step": 2387 }, { "epoch": 0.7973288814691152, "grad_norm": 0.9388476719024647, "learning_rate": 9.186391518654687e-06, "loss": 0.4289, "step": 2388 }, { "epoch": 0.7976627712854758, "grad_norm": 0.9550581330557993, "learning_rate": 9.185329027055416e-06, "loss": 0.4496, "step": 2389 }, { "epoch": 0.7979966611018364, "grad_norm": 0.9396037845263814, "learning_rate": 9.184265903681966e-06, "loss": 0.4438, "step": 2390 }, { "epoch": 0.798330550918197, "grad_norm": 0.9784747658747858, "learning_rate": 9.183202148694817e-06, "loss": 0.4719, "step": 2391 }, { "epoch": 0.7986644407345576, "grad_norm": 0.9253091609173298, "learning_rate": 9.182137762254539e-06, "loss": 0.4346, "step": 2392 }, { "epoch": 0.7989983305509182, "grad_norm": 0.9569652142735214, "learning_rate": 9.181072744521806e-06, "loss": 0.444, "step": 2393 }, { "epoch": 0.7993322203672788, "grad_norm": 0.9285632483160446, "learning_rate": 9.18000709565738e-06, "loss": 0.4469, "step": 2394 }, { "epoch": 0.7996661101836394, "grad_norm": 0.89048418057733, "learning_rate": 9.17894081582212e-06, "loss": 0.4622, "step": 2395 }, { "epoch": 0.8, "grad_norm": 0.9161398232617446, "learning_rate": 9.177873905176979e-06, "loss": 0.4379, "step": 2396 }, { "epoch": 0.8003338898163606, "grad_norm": 0.9665553288608024, "learning_rate": 9.17680636388301e-06, "loss": 0.4639, "step": 2397 }, { "epoch": 0.8006677796327212, "grad_norm": 0.9492806793577335, "learning_rate": 9.175738192101353e-06, "loss": 0.4459, "step": 2398 }, { "epoch": 0.8010016694490818, "grad_norm": 0.9339842379699855, "learning_rate": 9.174669389993255e-06, "loss": 0.4364, "step": 2399 }, { "epoch": 0.8013355592654424, "grad_norm": 0.9174960552268154, "learning_rate": 9.173599957720047e-06, "loss": 0.438, "step": 2400 }, { "epoch": 0.801669449081803, "grad_norm": 1.05953534710222, "learning_rate": 9.17252989544316e-06, "loss": 0.4392, "step": 2401 }, { "epoch": 0.8020033388981636, "grad_norm": 0.9329747103710307, "learning_rate": 9.17145920332412e-06, "loss": 0.4474, "step": 2402 }, { "epoch": 0.8023372287145242, "grad_norm": 0.9894927537361413, "learning_rate": 9.17038788152455e-06, "loss": 0.463, "step": 2403 }, { "epoch": 0.8026711185308848, "grad_norm": 0.9386364059639872, "learning_rate": 9.169315930206161e-06, "loss": 0.4489, "step": 2404 }, { "epoch": 0.8030050083472454, "grad_norm": 0.9888396468326437, "learning_rate": 9.168243349530767e-06, "loss": 0.4363, "step": 2405 }, { "epoch": 0.803338898163606, "grad_norm": 0.9537474467324023, "learning_rate": 9.167170139660273e-06, "loss": 0.4489, "step": 2406 }, { "epoch": 0.8036727879799667, "grad_norm": 0.9092648960759259, "learning_rate": 9.166096300756679e-06, "loss": 0.4502, "step": 2407 }, { "epoch": 0.8040066777963272, "grad_norm": 0.9607641579554362, "learning_rate": 9.165021832982082e-06, "loss": 0.4477, "step": 2408 }, { "epoch": 0.8043405676126878, "grad_norm": 1.0381444958314945, "learning_rate": 9.163946736498673e-06, "loss": 0.4486, "step": 2409 }, { "epoch": 0.8046744574290484, "grad_norm": 0.8726555149929118, "learning_rate": 9.162871011468735e-06, "loss": 0.4134, "step": 2410 }, { "epoch": 0.805008347245409, "grad_norm": 0.8803092146189985, "learning_rate": 9.16179465805465e-06, "loss": 0.4326, "step": 2411 }, { "epoch": 0.8053422370617697, "grad_norm": 0.9433115165958709, "learning_rate": 9.160717676418892e-06, "loss": 0.4507, "step": 2412 }, { "epoch": 0.8056761268781302, "grad_norm": 0.9648564302758145, "learning_rate": 9.159640066724033e-06, "loss": 0.4617, "step": 2413 }, { "epoch": 0.8060100166944908, "grad_norm": 1.3170284736299749, "learning_rate": 9.158561829132738e-06, "loss": 0.4295, "step": 2414 }, { "epoch": 0.8063439065108514, "grad_norm": 0.9562401133221572, "learning_rate": 9.157482963807763e-06, "loss": 0.4684, "step": 2415 }, { "epoch": 0.806677796327212, "grad_norm": 0.9479493300283173, "learning_rate": 9.156403470911966e-06, "loss": 0.4429, "step": 2416 }, { "epoch": 0.8070116861435727, "grad_norm": 0.9262557863615934, "learning_rate": 9.155323350608295e-06, "loss": 0.455, "step": 2417 }, { "epoch": 0.8073455759599332, "grad_norm": 0.8794986930525976, "learning_rate": 9.154242603059795e-06, "loss": 0.4627, "step": 2418 }, { "epoch": 0.8076794657762938, "grad_norm": 0.9047995660992606, "learning_rate": 9.153161228429603e-06, "loss": 0.458, "step": 2419 }, { "epoch": 0.8080133555926544, "grad_norm": 0.9365519408524693, "learning_rate": 9.152079226880952e-06, "loss": 0.4754, "step": 2420 }, { "epoch": 0.808347245409015, "grad_norm": 0.8895955195665303, "learning_rate": 9.15099659857717e-06, "loss": 0.4317, "step": 2421 }, { "epoch": 0.8086811352253757, "grad_norm": 0.8598105477552769, "learning_rate": 9.149913343681683e-06, "loss": 0.421, "step": 2422 }, { "epoch": 0.8090150250417363, "grad_norm": 0.8840108774200938, "learning_rate": 9.148829462358002e-06, "loss": 0.4511, "step": 2423 }, { "epoch": 0.8093489148580968, "grad_norm": 1.2006333588205598, "learning_rate": 9.147744954769742e-06, "loss": 0.4424, "step": 2424 }, { "epoch": 0.8096828046744574, "grad_norm": 0.9095857688872336, "learning_rate": 9.146659821080608e-06, "loss": 0.446, "step": 2425 }, { "epoch": 0.810016694490818, "grad_norm": 0.8694564637366966, "learning_rate": 9.145574061454404e-06, "loss": 0.4088, "step": 2426 }, { "epoch": 0.8103505843071787, "grad_norm": 0.9649449140718076, "learning_rate": 9.144487676055018e-06, "loss": 0.4488, "step": 2427 }, { "epoch": 0.8106844741235393, "grad_norm": 0.9470265346473284, "learning_rate": 9.143400665046448e-06, "loss": 0.4346, "step": 2428 }, { "epoch": 0.8110183639398998, "grad_norm": 0.9330584654047133, "learning_rate": 9.14231302859277e-06, "loss": 0.4487, "step": 2429 }, { "epoch": 0.8113522537562604, "grad_norm": 0.9008919034157215, "learning_rate": 9.141224766858167e-06, "loss": 0.4351, "step": 2430 }, { "epoch": 0.811686143572621, "grad_norm": 0.8986109478396762, "learning_rate": 9.140135880006913e-06, "loss": 0.4281, "step": 2431 }, { "epoch": 0.8120200333889817, "grad_norm": 0.9394623292687391, "learning_rate": 9.139046368203371e-06, "loss": 0.444, "step": 2432 }, { "epoch": 0.8123539232053423, "grad_norm": 0.8924095402226968, "learning_rate": 9.137956231612006e-06, "loss": 0.4438, "step": 2433 }, { "epoch": 0.8126878130217028, "grad_norm": 0.8776661404551427, "learning_rate": 9.136865470397372e-06, "loss": 0.4366, "step": 2434 }, { "epoch": 0.8130217028380634, "grad_norm": 0.9052459002538257, "learning_rate": 9.135774084724119e-06, "loss": 0.4348, "step": 2435 }, { "epoch": 0.813355592654424, "grad_norm": 0.9321475699168374, "learning_rate": 9.134682074756993e-06, "loss": 0.4517, "step": 2436 }, { "epoch": 0.8136894824707847, "grad_norm": 0.9443592372786701, "learning_rate": 9.13358944066083e-06, "loss": 0.4519, "step": 2437 }, { "epoch": 0.8140233722871453, "grad_norm": 0.903661530829237, "learning_rate": 9.132496182600566e-06, "loss": 0.4185, "step": 2438 }, { "epoch": 0.8143572621035059, "grad_norm": 0.9519744833651546, "learning_rate": 9.131402300741224e-06, "loss": 0.4637, "step": 2439 }, { "epoch": 0.8146911519198664, "grad_norm": 0.9734164141205267, "learning_rate": 9.130307795247928e-06, "loss": 0.4479, "step": 2440 }, { "epoch": 0.815025041736227, "grad_norm": 0.9892324449642432, "learning_rate": 9.129212666285895e-06, "loss": 0.4813, "step": 2441 }, { "epoch": 0.8153589315525877, "grad_norm": 0.9237401527271896, "learning_rate": 9.128116914020428e-06, "loss": 0.4559, "step": 2442 }, { "epoch": 0.8156928213689483, "grad_norm": 0.9179015514406167, "learning_rate": 9.127020538616937e-06, "loss": 0.4607, "step": 2443 }, { "epoch": 0.8160267111853089, "grad_norm": 0.8852457881898803, "learning_rate": 9.125923540240917e-06, "loss": 0.4367, "step": 2444 }, { "epoch": 0.8163606010016694, "grad_norm": 0.9500803662390883, "learning_rate": 9.124825919057959e-06, "loss": 0.4497, "step": 2445 }, { "epoch": 0.81669449081803, "grad_norm": 0.9172185686244025, "learning_rate": 9.123727675233748e-06, "loss": 0.438, "step": 2446 }, { "epoch": 0.8170283806343906, "grad_norm": 0.9141399145458131, "learning_rate": 9.122628808934067e-06, "loss": 0.4541, "step": 2447 }, { "epoch": 0.8173622704507513, "grad_norm": 0.9423848738194638, "learning_rate": 9.121529320324787e-06, "loss": 0.4535, "step": 2448 }, { "epoch": 0.8176961602671119, "grad_norm": 0.9611702616935572, "learning_rate": 9.120429209571875e-06, "loss": 0.4714, "step": 2449 }, { "epoch": 0.8180300500834724, "grad_norm": 0.9557899497171077, "learning_rate": 9.119328476841393e-06, "loss": 0.467, "step": 2450 }, { "epoch": 0.818363939899833, "grad_norm": 1.0546612752968827, "learning_rate": 9.118227122299498e-06, "loss": 0.4475, "step": 2451 }, { "epoch": 0.8186978297161936, "grad_norm": 0.9301240792834949, "learning_rate": 9.117125146112436e-06, "loss": 0.4634, "step": 2452 }, { "epoch": 0.8190317195325543, "grad_norm": 0.9234396560309909, "learning_rate": 9.116022548446553e-06, "loss": 0.4317, "step": 2453 }, { "epoch": 0.8193656093489149, "grad_norm": 0.9179349262920742, "learning_rate": 9.114919329468283e-06, "loss": 0.4495, "step": 2454 }, { "epoch": 0.8196994991652755, "grad_norm": 0.9329106851643484, "learning_rate": 9.113815489344158e-06, "loss": 0.4389, "step": 2455 }, { "epoch": 0.820033388981636, "grad_norm": 0.9226920672768804, "learning_rate": 9.112711028240804e-06, "loss": 0.4385, "step": 2456 }, { "epoch": 0.8203672787979966, "grad_norm": 0.9359162226312534, "learning_rate": 9.111605946324936e-06, "loss": 0.4612, "step": 2457 }, { "epoch": 0.8207011686143573, "grad_norm": 0.9638119759746036, "learning_rate": 9.110500243763367e-06, "loss": 0.4531, "step": 2458 }, { "epoch": 0.8210350584307179, "grad_norm": 0.909254964568198, "learning_rate": 9.109393920723001e-06, "loss": 0.4544, "step": 2459 }, { "epoch": 0.8213689482470785, "grad_norm": 0.917687479825964, "learning_rate": 9.108286977370842e-06, "loss": 0.4302, "step": 2460 }, { "epoch": 0.821702838063439, "grad_norm": 0.8835148639210916, "learning_rate": 9.107179413873978e-06, "loss": 0.4285, "step": 2461 }, { "epoch": 0.8220367278797996, "grad_norm": 0.9389900399416361, "learning_rate": 9.106071230399599e-06, "loss": 0.4547, "step": 2462 }, { "epoch": 0.8223706176961603, "grad_norm": 0.925554750144728, "learning_rate": 9.10496242711498e-06, "loss": 0.4361, "step": 2463 }, { "epoch": 0.8227045075125209, "grad_norm": 0.8999317007830407, "learning_rate": 9.1038530041875e-06, "loss": 0.4485, "step": 2464 }, { "epoch": 0.8230383973288815, "grad_norm": 0.9458842722247892, "learning_rate": 9.10274296178462e-06, "loss": 0.449, "step": 2465 }, { "epoch": 0.823372287145242, "grad_norm": 0.9345858660347529, "learning_rate": 9.101632300073907e-06, "loss": 0.4275, "step": 2466 }, { "epoch": 0.8237061769616026, "grad_norm": 0.9162409979440616, "learning_rate": 9.100521019223012e-06, "loss": 0.4642, "step": 2467 }, { "epoch": 0.8240400667779633, "grad_norm": 0.9130917106434153, "learning_rate": 9.099409119399681e-06, "loss": 0.4379, "step": 2468 }, { "epoch": 0.8243739565943239, "grad_norm": 0.8975139863927779, "learning_rate": 9.098296600771758e-06, "loss": 0.4544, "step": 2469 }, { "epoch": 0.8247078464106845, "grad_norm": 0.9081235294457464, "learning_rate": 9.097183463507176e-06, "loss": 0.4482, "step": 2470 }, { "epoch": 0.8250417362270451, "grad_norm": 0.8996089536810798, "learning_rate": 9.09606970777396e-06, "loss": 0.4549, "step": 2471 }, { "epoch": 0.8253756260434056, "grad_norm": 0.9080395282736542, "learning_rate": 9.094955333740238e-06, "loss": 0.4501, "step": 2472 }, { "epoch": 0.8257095158597663, "grad_norm": 0.8633903785828803, "learning_rate": 9.093840341574219e-06, "loss": 0.4399, "step": 2473 }, { "epoch": 0.8260434056761269, "grad_norm": 0.8845241877052109, "learning_rate": 9.09272473144421e-06, "loss": 0.4502, "step": 2474 }, { "epoch": 0.8263772954924875, "grad_norm": 0.8769669602941063, "learning_rate": 9.091608503518615e-06, "loss": 0.4318, "step": 2475 }, { "epoch": 0.8267111853088481, "grad_norm": 0.8885563745413152, "learning_rate": 9.090491657965928e-06, "loss": 0.4411, "step": 2476 }, { "epoch": 0.8270450751252086, "grad_norm": 0.8975653517962502, "learning_rate": 9.089374194954735e-06, "loss": 0.4491, "step": 2477 }, { "epoch": 0.8273789649415693, "grad_norm": 0.8827594500478251, "learning_rate": 9.088256114653718e-06, "loss": 0.4453, "step": 2478 }, { "epoch": 0.8277128547579299, "grad_norm": 0.9137640349737506, "learning_rate": 9.087137417231648e-06, "loss": 0.4418, "step": 2479 }, { "epoch": 0.8280467445742905, "grad_norm": 0.8862368008296858, "learning_rate": 9.086018102857396e-06, "loss": 0.4415, "step": 2480 }, { "epoch": 0.8283806343906511, "grad_norm": 0.9166492677074957, "learning_rate": 9.08489817169992e-06, "loss": 0.4476, "step": 2481 }, { "epoch": 0.8287145242070116, "grad_norm": 0.9532268728738692, "learning_rate": 9.083777623928275e-06, "loss": 0.4455, "step": 2482 }, { "epoch": 0.8290484140233723, "grad_norm": 0.9692674941062865, "learning_rate": 9.082656459711608e-06, "loss": 0.4646, "step": 2483 }, { "epoch": 0.8293823038397329, "grad_norm": 0.9078495369030468, "learning_rate": 9.081534679219153e-06, "loss": 0.4351, "step": 2484 }, { "epoch": 0.8297161936560935, "grad_norm": 0.9218307265037251, "learning_rate": 9.080412282620247e-06, "loss": 0.4459, "step": 2485 }, { "epoch": 0.8300500834724541, "grad_norm": 0.8884881789922595, "learning_rate": 9.079289270084315e-06, "loss": 0.4246, "step": 2486 }, { "epoch": 0.8303839732888146, "grad_norm": 0.9506072362827308, "learning_rate": 9.078165641780875e-06, "loss": 0.4527, "step": 2487 }, { "epoch": 0.8307178631051753, "grad_norm": 0.8962711636233276, "learning_rate": 9.07704139787954e-06, "loss": 0.4541, "step": 2488 }, { "epoch": 0.8310517529215359, "grad_norm": 0.8858176484745481, "learning_rate": 9.07591653855001e-06, "loss": 0.4364, "step": 2489 }, { "epoch": 0.8313856427378965, "grad_norm": 0.9406429259683662, "learning_rate": 9.074791063962086e-06, "loss": 0.4711, "step": 2490 }, { "epoch": 0.8317195325542571, "grad_norm": 0.9221949095964617, "learning_rate": 9.073664974285657e-06, "loss": 0.4454, "step": 2491 }, { "epoch": 0.8320534223706177, "grad_norm": 0.9237216113948711, "learning_rate": 9.072538269690703e-06, "loss": 0.4488, "step": 2492 }, { "epoch": 0.8323873121869783, "grad_norm": 0.9550410900949092, "learning_rate": 9.071410950347306e-06, "loss": 0.4471, "step": 2493 }, { "epoch": 0.8327212020033389, "grad_norm": 0.8904345164167541, "learning_rate": 9.070283016425628e-06, "loss": 0.4358, "step": 2494 }, { "epoch": 0.8330550918196995, "grad_norm": 0.8891751723364716, "learning_rate": 9.069154468095935e-06, "loss": 0.435, "step": 2495 }, { "epoch": 0.8333889816360601, "grad_norm": 0.9235581454129667, "learning_rate": 9.068025305528579e-06, "loss": 0.4392, "step": 2496 }, { "epoch": 0.8337228714524207, "grad_norm": 0.9235379706051238, "learning_rate": 9.066895528894007e-06, "loss": 0.4322, "step": 2497 }, { "epoch": 0.8340567612687813, "grad_norm": 0.9169694907723195, "learning_rate": 9.065765138362757e-06, "loss": 0.4376, "step": 2498 }, { "epoch": 0.8343906510851419, "grad_norm": 0.9279684493656729, "learning_rate": 9.064634134105464e-06, "loss": 0.4493, "step": 2499 }, { "epoch": 0.8347245409015025, "grad_norm": 0.9053119732087441, "learning_rate": 9.06350251629285e-06, "loss": 0.4455, "step": 2500 }, { "epoch": 0.8350584307178631, "grad_norm": 0.9272311361830234, "learning_rate": 9.062370285095735e-06, "loss": 0.4627, "step": 2501 }, { "epoch": 0.8353923205342237, "grad_norm": 0.9041141792890748, "learning_rate": 9.061237440685027e-06, "loss": 0.4545, "step": 2502 }, { "epoch": 0.8357262103505843, "grad_norm": 0.9070094977620858, "learning_rate": 9.06010398323173e-06, "loss": 0.4172, "step": 2503 }, { "epoch": 0.8360601001669449, "grad_norm": 0.9371413100449117, "learning_rate": 9.058969912906938e-06, "loss": 0.4408, "step": 2504 }, { "epoch": 0.8363939899833055, "grad_norm": 0.9166372981238119, "learning_rate": 9.057835229881839e-06, "loss": 0.4529, "step": 2505 }, { "epoch": 0.8367278797996661, "grad_norm": 0.909855200215244, "learning_rate": 9.056699934327712e-06, "loss": 0.4491, "step": 2506 }, { "epoch": 0.8370617696160267, "grad_norm": 0.9210789276387459, "learning_rate": 9.05556402641593e-06, "loss": 0.4632, "step": 2507 }, { "epoch": 0.8373956594323874, "grad_norm": 0.8797942346902152, "learning_rate": 9.05442750631796e-06, "loss": 0.4332, "step": 2508 }, { "epoch": 0.8377295492487479, "grad_norm": 0.8873187729027525, "learning_rate": 9.053290374205357e-06, "loss": 0.4348, "step": 2509 }, { "epoch": 0.8380634390651085, "grad_norm": 0.8891885343231097, "learning_rate": 9.052152630249772e-06, "loss": 0.4382, "step": 2510 }, { "epoch": 0.8383973288814691, "grad_norm": 0.9081939256975562, "learning_rate": 9.051014274622947e-06, "loss": 0.4484, "step": 2511 }, { "epoch": 0.8387312186978297, "grad_norm": 0.8821647037301213, "learning_rate": 9.049875307496714e-06, "loss": 0.4435, "step": 2512 }, { "epoch": 0.8390651085141904, "grad_norm": 0.896185066308062, "learning_rate": 9.048735729043004e-06, "loss": 0.4316, "step": 2513 }, { "epoch": 0.8393989983305509, "grad_norm": 0.947885639896305, "learning_rate": 9.047595539433833e-06, "loss": 0.4577, "step": 2514 }, { "epoch": 0.8397328881469115, "grad_norm": 0.8650689120046435, "learning_rate": 9.046454738841313e-06, "loss": 0.4315, "step": 2515 }, { "epoch": 0.8400667779632721, "grad_norm": 0.8802071838018491, "learning_rate": 9.045313327437649e-06, "loss": 0.4324, "step": 2516 }, { "epoch": 0.8404006677796327, "grad_norm": 0.8819385633227905, "learning_rate": 9.044171305395133e-06, "loss": 0.4361, "step": 2517 }, { "epoch": 0.8407345575959934, "grad_norm": 0.8884843927929081, "learning_rate": 9.043028672886156e-06, "loss": 0.4466, "step": 2518 }, { "epoch": 0.8410684474123539, "grad_norm": 0.9485451783124003, "learning_rate": 9.0418854300832e-06, "loss": 0.4552, "step": 2519 }, { "epoch": 0.8414023372287145, "grad_norm": 0.8954987324904915, "learning_rate": 9.040741577158831e-06, "loss": 0.4105, "step": 2520 }, { "epoch": 0.8417362270450751, "grad_norm": 0.9190437533425639, "learning_rate": 9.039597114285718e-06, "loss": 0.4659, "step": 2521 }, { "epoch": 0.8420701168614357, "grad_norm": 0.9070668635708665, "learning_rate": 9.038452041636616e-06, "loss": 0.4351, "step": 2522 }, { "epoch": 0.8424040066777964, "grad_norm": 0.8827204990225042, "learning_rate": 9.037306359384374e-06, "loss": 0.4373, "step": 2523 }, { "epoch": 0.842737896494157, "grad_norm": 0.9277463940698875, "learning_rate": 9.036160067701931e-06, "loss": 0.4304, "step": 2524 }, { "epoch": 0.8430717863105175, "grad_norm": 0.8963025789402009, "learning_rate": 9.035013166762322e-06, "loss": 0.4432, "step": 2525 }, { "epoch": 0.8434056761268781, "grad_norm": 0.8645128890160518, "learning_rate": 9.033865656738667e-06, "loss": 0.4296, "step": 2526 }, { "epoch": 0.8437395659432387, "grad_norm": 0.8450628848185954, "learning_rate": 9.032717537804185e-06, "loss": 0.4144, "step": 2527 }, { "epoch": 0.8440734557595994, "grad_norm": 0.8986742198478986, "learning_rate": 9.031568810132185e-06, "loss": 0.4447, "step": 2528 }, { "epoch": 0.84440734557596, "grad_norm": 0.9225881935354175, "learning_rate": 9.030419473896067e-06, "loss": 0.438, "step": 2529 }, { "epoch": 0.8447412353923205, "grad_norm": 0.8898957000199518, "learning_rate": 9.029269529269322e-06, "loss": 0.4307, "step": 2530 }, { "epoch": 0.8450751252086811, "grad_norm": 0.8725974862614237, "learning_rate": 9.028118976425533e-06, "loss": 0.4188, "step": 2531 }, { "epoch": 0.8454090150250417, "grad_norm": 0.8936659116655638, "learning_rate": 9.026967815538377e-06, "loss": 0.4434, "step": 2532 }, { "epoch": 0.8457429048414024, "grad_norm": 0.8963715978324962, "learning_rate": 9.025816046781621e-06, "loss": 0.4307, "step": 2533 }, { "epoch": 0.846076794657763, "grad_norm": 0.9331042250278112, "learning_rate": 9.024663670329124e-06, "loss": 0.4485, "step": 2534 }, { "epoch": 0.8464106844741235, "grad_norm": 0.9375873405770133, "learning_rate": 9.023510686354837e-06, "loss": 0.429, "step": 2535 }, { "epoch": 0.8467445742904841, "grad_norm": 0.9866912587882741, "learning_rate": 9.022357095032803e-06, "loss": 0.4656, "step": 2536 }, { "epoch": 0.8470784641068447, "grad_norm": 0.886989246432727, "learning_rate": 9.021202896537156e-06, "loss": 0.4502, "step": 2537 }, { "epoch": 0.8474123539232054, "grad_norm": 0.919143873635726, "learning_rate": 9.020048091042121e-06, "loss": 0.4343, "step": 2538 }, { "epoch": 0.847746243739566, "grad_norm": 0.9083283210342991, "learning_rate": 9.018892678722016e-06, "loss": 0.4292, "step": 2539 }, { "epoch": 0.8480801335559266, "grad_norm": 0.8877506262924333, "learning_rate": 9.017736659751252e-06, "loss": 0.4372, "step": 2540 }, { "epoch": 0.8484140233722871, "grad_norm": 1.034983418498316, "learning_rate": 9.016580034304327e-06, "loss": 0.4523, "step": 2541 }, { "epoch": 0.8487479131886477, "grad_norm": 0.8778372480302741, "learning_rate": 9.015422802555835e-06, "loss": 0.427, "step": 2542 }, { "epoch": 0.8490818030050084, "grad_norm": 0.8557829029081945, "learning_rate": 9.01426496468046e-06, "loss": 0.4241, "step": 2543 }, { "epoch": 0.849415692821369, "grad_norm": 0.8702542220508215, "learning_rate": 9.013106520852975e-06, "loss": 0.4492, "step": 2544 }, { "epoch": 0.8497495826377296, "grad_norm": 0.8520920530450126, "learning_rate": 9.011947471248248e-06, "loss": 0.4241, "step": 2545 }, { "epoch": 0.8500834724540901, "grad_norm": 0.9226300156808886, "learning_rate": 9.01078781604124e-06, "loss": 0.4351, "step": 2546 }, { "epoch": 0.8504173622704507, "grad_norm": 0.8820505621430332, "learning_rate": 9.009627555406996e-06, "loss": 0.4288, "step": 2547 }, { "epoch": 0.8507512520868113, "grad_norm": 0.8710001319872308, "learning_rate": 9.00846668952066e-06, "loss": 0.4133, "step": 2548 }, { "epoch": 0.851085141903172, "grad_norm": 0.8690715888468529, "learning_rate": 9.007305218557463e-06, "loss": 0.4343, "step": 2549 }, { "epoch": 0.8514190317195326, "grad_norm": 0.9097105447057472, "learning_rate": 9.006143142692728e-06, "loss": 0.445, "step": 2550 }, { "epoch": 0.8517529215358931, "grad_norm": 0.9099502151929894, "learning_rate": 9.004980462101873e-06, "loss": 0.4608, "step": 2551 }, { "epoch": 0.8520868113522537, "grad_norm": 0.8725800417236911, "learning_rate": 9.003817176960402e-06, "loss": 0.4296, "step": 2552 }, { "epoch": 0.8524207011686143, "grad_norm": 0.8854017885707383, "learning_rate": 9.002653287443913e-06, "loss": 0.4456, "step": 2553 }, { "epoch": 0.852754590984975, "grad_norm": 0.9118578304133799, "learning_rate": 9.001488793728096e-06, "loss": 0.4448, "step": 2554 }, { "epoch": 0.8530884808013356, "grad_norm": 0.8766760944922589, "learning_rate": 9.000323695988726e-06, "loss": 0.4266, "step": 2555 }, { "epoch": 0.8534223706176962, "grad_norm": 0.9315221559145412, "learning_rate": 8.999157994401682e-06, "loss": 0.4519, "step": 2556 }, { "epoch": 0.8537562604340567, "grad_norm": 0.8644486805591709, "learning_rate": 8.997991689142919e-06, "loss": 0.4163, "step": 2557 }, { "epoch": 0.8540901502504173, "grad_norm": 0.8532250097126132, "learning_rate": 8.996824780388497e-06, "loss": 0.4302, "step": 2558 }, { "epoch": 0.854424040066778, "grad_norm": 0.8893581324324027, "learning_rate": 8.995657268314556e-06, "loss": 0.4323, "step": 2559 }, { "epoch": 0.8547579298831386, "grad_norm": 0.9071544353118776, "learning_rate": 8.99448915309733e-06, "loss": 0.4491, "step": 2560 }, { "epoch": 0.8550918196994992, "grad_norm": 0.8831112207702853, "learning_rate": 8.993320434913152e-06, "loss": 0.4535, "step": 2561 }, { "epoch": 0.8554257095158597, "grad_norm": 0.9586650450380177, "learning_rate": 8.992151113938435e-06, "loss": 0.4547, "step": 2562 }, { "epoch": 0.8557595993322203, "grad_norm": 0.9171590785203464, "learning_rate": 8.990981190349688e-06, "loss": 0.4288, "step": 2563 }, { "epoch": 0.856093489148581, "grad_norm": 0.8999575310272011, "learning_rate": 8.989810664323513e-06, "loss": 0.4335, "step": 2564 }, { "epoch": 0.8564273789649416, "grad_norm": 0.855069360812408, "learning_rate": 8.988639536036597e-06, "loss": 0.4192, "step": 2565 }, { "epoch": 0.8567612687813022, "grad_norm": 0.8509684077755376, "learning_rate": 8.987467805665725e-06, "loss": 0.4293, "step": 2566 }, { "epoch": 0.8570951585976627, "grad_norm": 0.9024361313893352, "learning_rate": 8.986295473387766e-06, "loss": 0.4431, "step": 2567 }, { "epoch": 0.8574290484140233, "grad_norm": 0.9267995216145167, "learning_rate": 8.985122539379686e-06, "loss": 0.4343, "step": 2568 }, { "epoch": 0.857762938230384, "grad_norm": 0.8741253777660463, "learning_rate": 8.98394900381854e-06, "loss": 0.4339, "step": 2569 }, { "epoch": 0.8580968280467446, "grad_norm": 0.9554027458364311, "learning_rate": 8.982774866881467e-06, "loss": 0.4467, "step": 2570 }, { "epoch": 0.8584307178631052, "grad_norm": 0.8713629315457287, "learning_rate": 8.98160012874571e-06, "loss": 0.4206, "step": 2571 }, { "epoch": 0.8587646076794658, "grad_norm": 0.9243588949726832, "learning_rate": 8.98042478958859e-06, "loss": 0.4387, "step": 2572 }, { "epoch": 0.8590984974958263, "grad_norm": 0.9082326521763185, "learning_rate": 8.979248849587526e-06, "loss": 0.4284, "step": 2573 }, { "epoch": 0.859432387312187, "grad_norm": 0.9116411003853994, "learning_rate": 8.978072308920026e-06, "loss": 0.4361, "step": 2574 }, { "epoch": 0.8597662771285476, "grad_norm": 0.9168228120997479, "learning_rate": 8.976895167763688e-06, "loss": 0.4537, "step": 2575 }, { "epoch": 0.8601001669449082, "grad_norm": 0.9099822077538703, "learning_rate": 8.975717426296203e-06, "loss": 0.4531, "step": 2576 }, { "epoch": 0.8604340567612688, "grad_norm": 0.8542941660089832, "learning_rate": 8.974539084695348e-06, "loss": 0.4104, "step": 2577 }, { "epoch": 0.8607679465776293, "grad_norm": 0.8700726284272756, "learning_rate": 8.973360143138993e-06, "loss": 0.4176, "step": 2578 }, { "epoch": 0.86110183639399, "grad_norm": 0.8796851756737367, "learning_rate": 8.9721806018051e-06, "loss": 0.4435, "step": 2579 }, { "epoch": 0.8614357262103506, "grad_norm": 0.8782358807053029, "learning_rate": 8.971000460871724e-06, "loss": 0.4326, "step": 2580 }, { "epoch": 0.8617696160267112, "grad_norm": 0.8944005667069224, "learning_rate": 8.969819720517001e-06, "loss": 0.4396, "step": 2581 }, { "epoch": 0.8621035058430718, "grad_norm": 0.8845175817899639, "learning_rate": 8.968638380919166e-06, "loss": 0.4189, "step": 2582 }, { "epoch": 0.8624373956594323, "grad_norm": 0.94171526393676, "learning_rate": 8.967456442256544e-06, "loss": 0.4522, "step": 2583 }, { "epoch": 0.862771285475793, "grad_norm": 0.9182940261624618, "learning_rate": 8.966273904707543e-06, "loss": 0.4352, "step": 2584 }, { "epoch": 0.8631051752921536, "grad_norm": 0.9169957283506691, "learning_rate": 8.965090768450673e-06, "loss": 0.4365, "step": 2585 }, { "epoch": 0.8634390651085142, "grad_norm": 0.9465342227202277, "learning_rate": 8.963907033664523e-06, "loss": 0.4501, "step": 2586 }, { "epoch": 0.8637729549248748, "grad_norm": 0.9627927112282909, "learning_rate": 8.962722700527779e-06, "loss": 0.4521, "step": 2587 }, { "epoch": 0.8641068447412354, "grad_norm": 0.8829752104761261, "learning_rate": 8.961537769219217e-06, "loss": 0.4468, "step": 2588 }, { "epoch": 0.864440734557596, "grad_norm": 0.897400862832209, "learning_rate": 8.9603522399177e-06, "loss": 0.4456, "step": 2589 }, { "epoch": 0.8647746243739566, "grad_norm": 0.9157291412194258, "learning_rate": 8.959166112802187e-06, "loss": 0.4635, "step": 2590 }, { "epoch": 0.8651085141903172, "grad_norm": 0.9050353094256186, "learning_rate": 8.957979388051718e-06, "loss": 0.4464, "step": 2591 }, { "epoch": 0.8654424040066778, "grad_norm": 0.951224860802502, "learning_rate": 8.956792065845433e-06, "loss": 0.4498, "step": 2592 }, { "epoch": 0.8657762938230384, "grad_norm": 0.8589032296019774, "learning_rate": 8.955604146362557e-06, "loss": 0.4283, "step": 2593 }, { "epoch": 0.866110183639399, "grad_norm": 0.8572850092671688, "learning_rate": 8.954415629782404e-06, "loss": 0.4146, "step": 2594 }, { "epoch": 0.8664440734557596, "grad_norm": 0.9110103954338731, "learning_rate": 8.953226516284383e-06, "loss": 0.4486, "step": 2595 }, { "epoch": 0.8667779632721202, "grad_norm": 0.8870076813141773, "learning_rate": 8.95203680604799e-06, "loss": 0.4297, "step": 2596 }, { "epoch": 0.8671118530884808, "grad_norm": 0.9006229471427801, "learning_rate": 8.95084649925281e-06, "loss": 0.4476, "step": 2597 }, { "epoch": 0.8674457429048414, "grad_norm": 0.8611280945439428, "learning_rate": 8.94965559607852e-06, "loss": 0.4067, "step": 2598 }, { "epoch": 0.867779632721202, "grad_norm": 0.8672840847645924, "learning_rate": 8.948464096704888e-06, "loss": 0.4581, "step": 2599 }, { "epoch": 0.8681135225375626, "grad_norm": 0.9018634038070579, "learning_rate": 8.947272001311766e-06, "loss": 0.4488, "step": 2600 }, { "epoch": 0.8684474123539232, "grad_norm": 0.9354777614341907, "learning_rate": 8.946079310079106e-06, "loss": 0.4596, "step": 2601 }, { "epoch": 0.8687813021702838, "grad_norm": 0.9094565590935809, "learning_rate": 8.94488602318694e-06, "loss": 0.4629, "step": 2602 }, { "epoch": 0.8691151919866444, "grad_norm": 0.872523201578843, "learning_rate": 8.943692140815399e-06, "loss": 0.4329, "step": 2603 }, { "epoch": 0.8694490818030051, "grad_norm": 0.867011583200951, "learning_rate": 8.942497663144696e-06, "loss": 0.4093, "step": 2604 }, { "epoch": 0.8697829716193656, "grad_norm": 0.9180980562094909, "learning_rate": 8.941302590355136e-06, "loss": 0.4475, "step": 2605 }, { "epoch": 0.8701168614357262, "grad_norm": 0.8651345129428634, "learning_rate": 8.940106922627117e-06, "loss": 0.4285, "step": 2606 }, { "epoch": 0.8704507512520868, "grad_norm": 0.9110860043913084, "learning_rate": 8.938910660141122e-06, "loss": 0.4414, "step": 2607 }, { "epoch": 0.8707846410684474, "grad_norm": 0.901461161518182, "learning_rate": 8.937713803077732e-06, "loss": 0.4342, "step": 2608 }, { "epoch": 0.8711185308848081, "grad_norm": 0.8288319782291046, "learning_rate": 8.936516351617607e-06, "loss": 0.4255, "step": 2609 }, { "epoch": 0.8714524207011686, "grad_norm": 0.887428484306359, "learning_rate": 8.935318305941502e-06, "loss": 0.4201, "step": 2610 }, { "epoch": 0.8717863105175292, "grad_norm": 0.9264120581426981, "learning_rate": 8.934119666230266e-06, "loss": 0.4416, "step": 2611 }, { "epoch": 0.8721202003338898, "grad_norm": 0.9053473399355904, "learning_rate": 8.932920432664831e-06, "loss": 0.4519, "step": 2612 }, { "epoch": 0.8724540901502504, "grad_norm": 0.9026653764037564, "learning_rate": 8.931720605426219e-06, "loss": 0.4371, "step": 2613 }, { "epoch": 0.8727879799666111, "grad_norm": 0.8965529161841463, "learning_rate": 8.930520184695546e-06, "loss": 0.4255, "step": 2614 }, { "epoch": 0.8731218697829716, "grad_norm": 0.9163476854622896, "learning_rate": 8.929319170654013e-06, "loss": 0.4291, "step": 2615 }, { "epoch": 0.8734557595993322, "grad_norm": 0.8915898311320655, "learning_rate": 8.928117563482915e-06, "loss": 0.4196, "step": 2616 }, { "epoch": 0.8737896494156928, "grad_norm": 0.936707316554064, "learning_rate": 8.926915363363633e-06, "loss": 0.4527, "step": 2617 }, { "epoch": 0.8741235392320534, "grad_norm": 0.9343068330609197, "learning_rate": 8.92571257047764e-06, "loss": 0.4473, "step": 2618 }, { "epoch": 0.8744574290484141, "grad_norm": 0.8972595568062287, "learning_rate": 8.924509185006494e-06, "loss": 0.4396, "step": 2619 }, { "epoch": 0.8747913188647746, "grad_norm": 0.9263479502442729, "learning_rate": 8.92330520713185e-06, "loss": 0.4349, "step": 2620 }, { "epoch": 0.8751252086811352, "grad_norm": 0.8985989530138839, "learning_rate": 8.922100637035445e-06, "loss": 0.4294, "step": 2621 }, { "epoch": 0.8754590984974958, "grad_norm": 0.8990497037644154, "learning_rate": 8.92089547489911e-06, "loss": 0.434, "step": 2622 }, { "epoch": 0.8757929883138564, "grad_norm": 0.8611680619177737, "learning_rate": 8.919689720904763e-06, "loss": 0.4114, "step": 2623 }, { "epoch": 0.8761268781302171, "grad_norm": 0.8463435891212536, "learning_rate": 8.918483375234413e-06, "loss": 0.4186, "step": 2624 }, { "epoch": 0.8764607679465777, "grad_norm": 0.8810263228855197, "learning_rate": 8.917276438070158e-06, "loss": 0.4338, "step": 2625 }, { "epoch": 0.8767946577629382, "grad_norm": 0.9406626601345185, "learning_rate": 8.91606890959418e-06, "loss": 0.4289, "step": 2626 }, { "epoch": 0.8771285475792988, "grad_norm": 0.9107282147352365, "learning_rate": 8.914860789988764e-06, "loss": 0.4281, "step": 2627 }, { "epoch": 0.8774624373956594, "grad_norm": 0.8983035140280813, "learning_rate": 8.913652079436269e-06, "loss": 0.4235, "step": 2628 }, { "epoch": 0.8777963272120201, "grad_norm": 0.8390574785348393, "learning_rate": 8.912442778119147e-06, "loss": 0.4245, "step": 2629 }, { "epoch": 0.8781302170283807, "grad_norm": 0.8401757815584179, "learning_rate": 8.911232886219949e-06, "loss": 0.4074, "step": 2630 }, { "epoch": 0.8784641068447412, "grad_norm": 0.8857318472057116, "learning_rate": 8.910022403921303e-06, "loss": 0.4315, "step": 2631 }, { "epoch": 0.8787979966611018, "grad_norm": 0.8852699137822636, "learning_rate": 8.908811331405934e-06, "loss": 0.459, "step": 2632 }, { "epoch": 0.8791318864774624, "grad_norm": 0.9187114626522203, "learning_rate": 8.90759966885665e-06, "loss": 0.4479, "step": 2633 }, { "epoch": 0.879465776293823, "grad_norm": 0.8372833664202498, "learning_rate": 8.906387416456352e-06, "loss": 0.4232, "step": 2634 }, { "epoch": 0.8797996661101837, "grad_norm": 0.87226962529294, "learning_rate": 8.905174574388032e-06, "loss": 0.4421, "step": 2635 }, { "epoch": 0.8801335559265442, "grad_norm": 0.8438947870181643, "learning_rate": 8.903961142834762e-06, "loss": 0.4209, "step": 2636 }, { "epoch": 0.8804674457429048, "grad_norm": 0.9025009337576939, "learning_rate": 8.902747121979716e-06, "loss": 0.4218, "step": 2637 }, { "epoch": 0.8808013355592654, "grad_norm": 0.912666920723091, "learning_rate": 8.901532512006145e-06, "loss": 0.4595, "step": 2638 }, { "epoch": 0.881135225375626, "grad_norm": 0.8883111442259761, "learning_rate": 8.900317313097396e-06, "loss": 0.4526, "step": 2639 }, { "epoch": 0.8814691151919867, "grad_norm": 0.8621923974570158, "learning_rate": 8.899101525436904e-06, "loss": 0.4342, "step": 2640 }, { "epoch": 0.8818030050083473, "grad_norm": 0.8559344181835407, "learning_rate": 8.89788514920819e-06, "loss": 0.4181, "step": 2641 }, { "epoch": 0.8821368948247078, "grad_norm": 0.8715408099796064, "learning_rate": 8.896668184594866e-06, "loss": 0.4514, "step": 2642 }, { "epoch": 0.8824707846410684, "grad_norm": 0.9158060805645524, "learning_rate": 8.895450631780633e-06, "loss": 0.4485, "step": 2643 }, { "epoch": 0.882804674457429, "grad_norm": 0.8522835175820749, "learning_rate": 8.89423249094928e-06, "loss": 0.4186, "step": 2644 }, { "epoch": 0.8831385642737897, "grad_norm": 0.8891210350566975, "learning_rate": 8.893013762284685e-06, "loss": 0.4541, "step": 2645 }, { "epoch": 0.8834724540901503, "grad_norm": 0.8765262288576069, "learning_rate": 8.891794445970814e-06, "loss": 0.4284, "step": 2646 }, { "epoch": 0.8838063439065108, "grad_norm": 0.8763055757356052, "learning_rate": 8.890574542191724e-06, "loss": 0.4257, "step": 2647 }, { "epoch": 0.8841402337228714, "grad_norm": 0.8920205733214188, "learning_rate": 8.889354051131556e-06, "loss": 0.4219, "step": 2648 }, { "epoch": 0.884474123539232, "grad_norm": 0.9171101971534703, "learning_rate": 8.888132972974547e-06, "loss": 0.439, "step": 2649 }, { "epoch": 0.8848080133555927, "grad_norm": 0.9094240911933612, "learning_rate": 8.886911307905017e-06, "loss": 0.4337, "step": 2650 }, { "epoch": 0.8851419031719533, "grad_norm": 0.889742585463896, "learning_rate": 8.885689056107375e-06, "loss": 0.4387, "step": 2651 }, { "epoch": 0.8854757929883138, "grad_norm": 0.8599218163818061, "learning_rate": 8.884466217766118e-06, "loss": 0.4326, "step": 2652 }, { "epoch": 0.8858096828046744, "grad_norm": 1.0343645195269153, "learning_rate": 8.883242793065835e-06, "loss": 0.4448, "step": 2653 }, { "epoch": 0.886143572621035, "grad_norm": 0.8483178035544707, "learning_rate": 8.882018782191205e-06, "loss": 0.43, "step": 2654 }, { "epoch": 0.8864774624373957, "grad_norm": 0.8798560487019703, "learning_rate": 8.880794185326987e-06, "loss": 0.4428, "step": 2655 }, { "epoch": 0.8868113522537563, "grad_norm": 0.8770725516374984, "learning_rate": 8.879569002658032e-06, "loss": 0.4281, "step": 2656 }, { "epoch": 0.8871452420701169, "grad_norm": 0.8804718011126899, "learning_rate": 8.878343234369287e-06, "loss": 0.4323, "step": 2657 }, { "epoch": 0.8874791318864774, "grad_norm": 0.8398119417706017, "learning_rate": 8.87711688064578e-06, "loss": 0.4214, "step": 2658 }, { "epoch": 0.887813021702838, "grad_norm": 0.9508477800563611, "learning_rate": 8.875889941672625e-06, "loss": 0.4622, "step": 2659 }, { "epoch": 0.8881469115191987, "grad_norm": 0.9402907380463016, "learning_rate": 8.874662417635032e-06, "loss": 0.4401, "step": 2660 }, { "epoch": 0.8884808013355593, "grad_norm": 0.8990419822360964, "learning_rate": 8.873434308718292e-06, "loss": 0.4339, "step": 2661 }, { "epoch": 0.8888146911519199, "grad_norm": 0.9039115489022903, "learning_rate": 8.87220561510779e-06, "loss": 0.4364, "step": 2662 }, { "epoch": 0.8891485809682804, "grad_norm": 0.9000079614696426, "learning_rate": 8.870976336988995e-06, "loss": 0.4324, "step": 2663 }, { "epoch": 0.889482470784641, "grad_norm": 0.8302725020618356, "learning_rate": 8.86974647454747e-06, "loss": 0.4152, "step": 2664 }, { "epoch": 0.8898163606010017, "grad_norm": 0.8931544368065488, "learning_rate": 8.868516027968859e-06, "loss": 0.4416, "step": 2665 }, { "epoch": 0.8901502504173623, "grad_norm": 0.8574262385370206, "learning_rate": 8.867284997438897e-06, "loss": 0.4197, "step": 2666 }, { "epoch": 0.8904841402337229, "grad_norm": 0.8628503981213133, "learning_rate": 8.86605338314341e-06, "loss": 0.4147, "step": 2667 }, { "epoch": 0.8908180300500834, "grad_norm": 0.8663115708377856, "learning_rate": 8.864821185268309e-06, "loss": 0.4265, "step": 2668 }, { "epoch": 0.891151919866444, "grad_norm": 0.871599413187569, "learning_rate": 8.863588403999594e-06, "loss": 0.4397, "step": 2669 }, { "epoch": 0.8914858096828047, "grad_norm": 0.8947548396010802, "learning_rate": 8.862355039523354e-06, "loss": 0.4417, "step": 2670 }, { "epoch": 0.8918196994991653, "grad_norm": 0.8824294142192666, "learning_rate": 8.861121092025762e-06, "loss": 0.4511, "step": 2671 }, { "epoch": 0.8921535893155259, "grad_norm": 0.8366558395230824, "learning_rate": 8.859886561693083e-06, "loss": 0.4303, "step": 2672 }, { "epoch": 0.8924874791318865, "grad_norm": 0.8608952783260723, "learning_rate": 8.858651448711672e-06, "loss": 0.431, "step": 2673 }, { "epoch": 0.892821368948247, "grad_norm": 0.8691936500503159, "learning_rate": 8.857415753267966e-06, "loss": 0.4175, "step": 2674 }, { "epoch": 0.8931552587646077, "grad_norm": 0.8755841115979169, "learning_rate": 8.856179475548494e-06, "loss": 0.4326, "step": 2675 }, { "epoch": 0.8934891485809683, "grad_norm": 0.8305803798472904, "learning_rate": 8.85494261573987e-06, "loss": 0.4249, "step": 2676 }, { "epoch": 0.8938230383973289, "grad_norm": 0.947918320327318, "learning_rate": 8.853705174028799e-06, "loss": 0.4406, "step": 2677 }, { "epoch": 0.8941569282136895, "grad_norm": 0.8771913448277646, "learning_rate": 8.852467150602074e-06, "loss": 0.4041, "step": 2678 }, { "epoch": 0.89449081803005, "grad_norm": 0.9000485730830148, "learning_rate": 8.85122854564657e-06, "loss": 0.438, "step": 2679 }, { "epoch": 0.8948247078464107, "grad_norm": 0.8791018513037664, "learning_rate": 8.84998935934926e-06, "loss": 0.4334, "step": 2680 }, { "epoch": 0.8951585976627713, "grad_norm": 0.8518234101539034, "learning_rate": 8.848749591897194e-06, "loss": 0.4209, "step": 2681 }, { "epoch": 0.8954924874791319, "grad_norm": 0.8985601870041862, "learning_rate": 8.847509243477518e-06, "loss": 0.4207, "step": 2682 }, { "epoch": 0.8958263772954925, "grad_norm": 0.8759796560196552, "learning_rate": 8.846268314277458e-06, "loss": 0.4394, "step": 2683 }, { "epoch": 0.896160267111853, "grad_norm": 0.9461408761560569, "learning_rate": 8.845026804484333e-06, "loss": 0.4541, "step": 2684 }, { "epoch": 0.8964941569282137, "grad_norm": 0.8574107635474115, "learning_rate": 8.843784714285552e-06, "loss": 0.4241, "step": 2685 }, { "epoch": 0.8968280467445743, "grad_norm": 0.8121819915230722, "learning_rate": 8.842542043868604e-06, "loss": 0.4136, "step": 2686 }, { "epoch": 0.8971619365609349, "grad_norm": 0.9146957822664098, "learning_rate": 8.841298793421075e-06, "loss": 0.4558, "step": 2687 }, { "epoch": 0.8974958263772955, "grad_norm": 0.8361805336262271, "learning_rate": 8.840054963130628e-06, "loss": 0.4015, "step": 2688 }, { "epoch": 0.8978297161936561, "grad_norm": 0.8381804558278335, "learning_rate": 8.83881055318502e-06, "loss": 0.4116, "step": 2689 }, { "epoch": 0.8981636060100167, "grad_norm": 0.863182613541081, "learning_rate": 8.837565563772096e-06, "loss": 0.4144, "step": 2690 }, { "epoch": 0.8984974958263773, "grad_norm": 0.8650899158093195, "learning_rate": 8.836319995079786e-06, "loss": 0.4416, "step": 2691 }, { "epoch": 0.8988313856427379, "grad_norm": 0.9006045935748442, "learning_rate": 8.835073847296107e-06, "loss": 0.4604, "step": 2692 }, { "epoch": 0.8991652754590985, "grad_norm": 0.9355928709902603, "learning_rate": 8.833827120609166e-06, "loss": 0.4494, "step": 2693 }, { "epoch": 0.8994991652754591, "grad_norm": 0.8861542224104186, "learning_rate": 8.832579815207156e-06, "loss": 0.4539, "step": 2694 }, { "epoch": 0.8998330550918197, "grad_norm": 0.8373959713303635, "learning_rate": 8.831331931278357e-06, "loss": 0.4316, "step": 2695 }, { "epoch": 0.9001669449081803, "grad_norm": 0.873163448963221, "learning_rate": 8.830083469011137e-06, "loss": 0.4301, "step": 2696 }, { "epoch": 0.9005008347245409, "grad_norm": 0.9499069583508047, "learning_rate": 8.82883442859395e-06, "loss": 0.442, "step": 2697 }, { "epoch": 0.9008347245409015, "grad_norm": 0.8633435602719216, "learning_rate": 8.82758481021534e-06, "loss": 0.4265, "step": 2698 }, { "epoch": 0.9011686143572621, "grad_norm": 0.8698045021380919, "learning_rate": 8.826334614063935e-06, "loss": 0.4288, "step": 2699 }, { "epoch": 0.9015025041736227, "grad_norm": 0.8732843166761568, "learning_rate": 8.825083840328453e-06, "loss": 0.4213, "step": 2700 }, { "epoch": 0.9018363939899833, "grad_norm": 0.8451009607482431, "learning_rate": 8.823832489197696e-06, "loss": 0.4261, "step": 2701 }, { "epoch": 0.9021702838063439, "grad_norm": 0.8865701092523374, "learning_rate": 8.822580560860558e-06, "loss": 0.4311, "step": 2702 }, { "epoch": 0.9025041736227045, "grad_norm": 0.892810586602944, "learning_rate": 8.821328055506016e-06, "loss": 0.4483, "step": 2703 }, { "epoch": 0.9028380634390651, "grad_norm": 0.9022678012078308, "learning_rate": 8.820074973323133e-06, "loss": 0.4198, "step": 2704 }, { "epoch": 0.9031719532554258, "grad_norm": 0.9053647786533938, "learning_rate": 8.818821314501065e-06, "loss": 0.4304, "step": 2705 }, { "epoch": 0.9035058430717863, "grad_norm": 0.8782488149793608, "learning_rate": 8.817567079229048e-06, "loss": 0.4441, "step": 2706 }, { "epoch": 0.9038397328881469, "grad_norm": 0.8936714187245556, "learning_rate": 8.816312267696411e-06, "loss": 0.4385, "step": 2707 }, { "epoch": 0.9041736227045075, "grad_norm": 0.9787426573496121, "learning_rate": 8.815056880092566e-06, "loss": 0.4466, "step": 2708 }, { "epoch": 0.9045075125208681, "grad_norm": 0.95320182769173, "learning_rate": 8.813800916607014e-06, "loss": 0.4461, "step": 2709 }, { "epoch": 0.9048414023372288, "grad_norm": 0.9259610092295509, "learning_rate": 8.812544377429344e-06, "loss": 0.4514, "step": 2710 }, { "epoch": 0.9051752921535893, "grad_norm": 0.9328582176935485, "learning_rate": 8.811287262749226e-06, "loss": 0.449, "step": 2711 }, { "epoch": 0.9055091819699499, "grad_norm": 0.8861671587467047, "learning_rate": 8.810029572756426e-06, "loss": 0.4322, "step": 2712 }, { "epoch": 0.9058430717863105, "grad_norm": 0.9365010350350416, "learning_rate": 8.808771307640788e-06, "loss": 0.449, "step": 2713 }, { "epoch": 0.9061769616026711, "grad_norm": 0.8843458485028963, "learning_rate": 8.80751246759225e-06, "loss": 0.4261, "step": 2714 }, { "epoch": 0.9065108514190318, "grad_norm": 0.9032122053829788, "learning_rate": 8.806253052800828e-06, "loss": 0.4384, "step": 2715 }, { "epoch": 0.9068447412353923, "grad_norm": 1.0361753953995512, "learning_rate": 8.804993063456637e-06, "loss": 0.4514, "step": 2716 }, { "epoch": 0.9071786310517529, "grad_norm": 0.8842632569323312, "learning_rate": 8.803732499749868e-06, "loss": 0.4305, "step": 2717 }, { "epoch": 0.9075125208681135, "grad_norm": 0.8989769680062165, "learning_rate": 8.802471361870806e-06, "loss": 0.4395, "step": 2718 }, { "epoch": 0.9078464106844741, "grad_norm": 0.8616135647480325, "learning_rate": 8.801209650009813e-06, "loss": 0.4363, "step": 2719 }, { "epoch": 0.9081803005008348, "grad_norm": 0.8715910007144207, "learning_rate": 8.799947364357354e-06, "loss": 0.4126, "step": 2720 }, { "epoch": 0.9085141903171954, "grad_norm": 0.8981207906338136, "learning_rate": 8.79868450510396e-06, "loss": 0.4436, "step": 2721 }, { "epoch": 0.9088480801335559, "grad_norm": 0.8874787864069271, "learning_rate": 8.797421072440266e-06, "loss": 0.4538, "step": 2722 }, { "epoch": 0.9091819699499165, "grad_norm": 0.837172549248435, "learning_rate": 8.796157066556986e-06, "loss": 0.4126, "step": 2723 }, { "epoch": 0.9095158597662771, "grad_norm": 0.9328397300805902, "learning_rate": 8.794892487644917e-06, "loss": 0.4433, "step": 2724 }, { "epoch": 0.9098497495826378, "grad_norm": 0.8590210528125788, "learning_rate": 8.793627335894953e-06, "loss": 0.4365, "step": 2725 }, { "epoch": 0.9101836393989984, "grad_norm": 0.8901446730330053, "learning_rate": 8.792361611498064e-06, "loss": 0.4342, "step": 2726 }, { "epoch": 0.9105175292153589, "grad_norm": 0.8593902265575405, "learning_rate": 8.791095314645312e-06, "loss": 0.4206, "step": 2727 }, { "epoch": 0.9108514190317195, "grad_norm": 0.9042401615501107, "learning_rate": 8.789828445527845e-06, "loss": 0.4406, "step": 2728 }, { "epoch": 0.9111853088480801, "grad_norm": 0.8640981030958104, "learning_rate": 8.788561004336896e-06, "loss": 0.4412, "step": 2729 }, { "epoch": 0.9115191986644408, "grad_norm": 0.8864891442220034, "learning_rate": 8.787292991263782e-06, "loss": 0.4258, "step": 2730 }, { "epoch": 0.9118530884808014, "grad_norm": 0.8877752582842866, "learning_rate": 8.786024406499915e-06, "loss": 0.4336, "step": 2731 }, { "epoch": 0.9121869782971619, "grad_norm": 0.8380430607350212, "learning_rate": 8.784755250236783e-06, "loss": 0.4074, "step": 2732 }, { "epoch": 0.9125208681135225, "grad_norm": 0.8932359203301151, "learning_rate": 8.783485522665965e-06, "loss": 0.4341, "step": 2733 }, { "epoch": 0.9128547579298831, "grad_norm": 0.8736303331283028, "learning_rate": 8.782215223979128e-06, "loss": 0.4377, "step": 2734 }, { "epoch": 0.9131886477462438, "grad_norm": 0.8868840039625557, "learning_rate": 8.780944354368022e-06, "loss": 0.4474, "step": 2735 }, { "epoch": 0.9135225375626044, "grad_norm": 0.9422288731877948, "learning_rate": 8.779672914024483e-06, "loss": 0.4476, "step": 2736 }, { "epoch": 0.9138564273789649, "grad_norm": 0.854423410600763, "learning_rate": 8.77840090314044e-06, "loss": 0.4161, "step": 2737 }, { "epoch": 0.9141903171953255, "grad_norm": 0.8699944447107713, "learning_rate": 8.777128321907898e-06, "loss": 0.4378, "step": 2738 }, { "epoch": 0.9145242070116861, "grad_norm": 0.9106861762747811, "learning_rate": 8.77585517051895e-06, "loss": 0.414, "step": 2739 }, { "epoch": 0.9148580968280468, "grad_norm": 0.8689109266980727, "learning_rate": 8.774581449165784e-06, "loss": 0.4284, "step": 2740 }, { "epoch": 0.9151919866444074, "grad_norm": 0.8660963512590217, "learning_rate": 8.773307158040665e-06, "loss": 0.4329, "step": 2741 }, { "epoch": 0.915525876460768, "grad_norm": 0.8734895476153105, "learning_rate": 8.772032297335948e-06, "loss": 0.4291, "step": 2742 }, { "epoch": 0.9158597662771285, "grad_norm": 0.8542362909408495, "learning_rate": 8.77075686724407e-06, "loss": 0.4311, "step": 2743 }, { "epoch": 0.9161936560934891, "grad_norm": 0.8442500504932883, "learning_rate": 8.769480867957561e-06, "loss": 0.4131, "step": 2744 }, { "epoch": 0.9165275459098498, "grad_norm": 0.8831305806941406, "learning_rate": 8.76820429966903e-06, "loss": 0.4229, "step": 2745 }, { "epoch": 0.9168614357262104, "grad_norm": 0.8407623196130213, "learning_rate": 8.766927162571174e-06, "loss": 0.4096, "step": 2746 }, { "epoch": 0.917195325542571, "grad_norm": 0.9090172707358853, "learning_rate": 8.765649456856777e-06, "loss": 0.4305, "step": 2747 }, { "epoch": 0.9175292153589315, "grad_norm": 0.8635178403432037, "learning_rate": 8.764371182718711e-06, "loss": 0.4339, "step": 2748 }, { "epoch": 0.9178631051752921, "grad_norm": 0.8864960479249778, "learning_rate": 8.763092340349928e-06, "loss": 0.4379, "step": 2749 }, { "epoch": 0.9181969949916527, "grad_norm": 0.8776034126811842, "learning_rate": 8.761812929943469e-06, "loss": 0.4345, "step": 2750 }, { "epoch": 0.9185308848080134, "grad_norm": 0.8544330434982772, "learning_rate": 8.760532951692463e-06, "loss": 0.4053, "step": 2751 }, { "epoch": 0.918864774624374, "grad_norm": 0.9120446248659434, "learning_rate": 8.75925240579012e-06, "loss": 0.4414, "step": 2752 }, { "epoch": 0.9191986644407345, "grad_norm": 0.8746710888693818, "learning_rate": 8.757971292429738e-06, "loss": 0.4398, "step": 2753 }, { "epoch": 0.9195325542570951, "grad_norm": 0.9394237876831237, "learning_rate": 8.756689611804703e-06, "loss": 0.4573, "step": 2754 }, { "epoch": 0.9198664440734557, "grad_norm": 0.847139720891756, "learning_rate": 8.755407364108483e-06, "loss": 0.4268, "step": 2755 }, { "epoch": 0.9202003338898164, "grad_norm": 0.8431380339562318, "learning_rate": 8.754124549534631e-06, "loss": 0.4176, "step": 2756 }, { "epoch": 0.920534223706177, "grad_norm": 0.8625227793358436, "learning_rate": 8.75284116827679e-06, "loss": 0.4458, "step": 2757 }, { "epoch": 0.9208681135225376, "grad_norm": 0.8424232478256026, "learning_rate": 8.751557220528687e-06, "loss": 0.3971, "step": 2758 }, { "epoch": 0.9212020033388981, "grad_norm": 0.8647059672112389, "learning_rate": 8.75027270648413e-06, "loss": 0.4305, "step": 2759 }, { "epoch": 0.9215358931552587, "grad_norm": 0.8945563527715285, "learning_rate": 8.748987626337019e-06, "loss": 0.4361, "step": 2760 }, { "epoch": 0.9218697829716194, "grad_norm": 0.8686599289162573, "learning_rate": 8.747701980281334e-06, "loss": 0.4417, "step": 2761 }, { "epoch": 0.92220367278798, "grad_norm": 0.8947127581857692, "learning_rate": 8.746415768511144e-06, "loss": 0.4262, "step": 2762 }, { "epoch": 0.9225375626043406, "grad_norm": 0.8509694966642576, "learning_rate": 8.745128991220603e-06, "loss": 0.4147, "step": 2763 }, { "epoch": 0.9228714524207011, "grad_norm": 0.8737084333372821, "learning_rate": 8.74384164860395e-06, "loss": 0.4095, "step": 2764 }, { "epoch": 0.9232053422370617, "grad_norm": 0.8574616027342712, "learning_rate": 8.742553740855507e-06, "loss": 0.4123, "step": 2765 }, { "epoch": 0.9235392320534224, "grad_norm": 0.9160531027133239, "learning_rate": 8.741265268169684e-06, "loss": 0.449, "step": 2766 }, { "epoch": 0.923873121869783, "grad_norm": 0.8827862939757443, "learning_rate": 8.739976230740976e-06, "loss": 0.4333, "step": 2767 }, { "epoch": 0.9242070116861436, "grad_norm": 0.8963086922652989, "learning_rate": 8.738686628763964e-06, "loss": 0.4198, "step": 2768 }, { "epoch": 0.9245409015025041, "grad_norm": 0.9672482983382711, "learning_rate": 8.73739646243331e-06, "loss": 0.4453, "step": 2769 }, { "epoch": 0.9248747913188647, "grad_norm": 0.9031064651666159, "learning_rate": 8.736105731943766e-06, "loss": 0.4328, "step": 2770 }, { "epoch": 0.9252086811352254, "grad_norm": 0.8430255628449711, "learning_rate": 8.73481443749017e-06, "loss": 0.4041, "step": 2771 }, { "epoch": 0.925542570951586, "grad_norm": 0.8743445952561978, "learning_rate": 8.733522579267439e-06, "loss": 0.4434, "step": 2772 }, { "epoch": 0.9258764607679466, "grad_norm": 0.8264658979520376, "learning_rate": 8.732230157470579e-06, "loss": 0.4275, "step": 2773 }, { "epoch": 0.9262103505843072, "grad_norm": 0.957777983092935, "learning_rate": 8.73093717229468e-06, "loss": 0.4556, "step": 2774 }, { "epoch": 0.9265442404006677, "grad_norm": 0.8723520897929291, "learning_rate": 8.729643623934921e-06, "loss": 0.4423, "step": 2775 }, { "epoch": 0.9268781302170284, "grad_norm": 0.8837238119745643, "learning_rate": 8.72834951258656e-06, "loss": 0.4253, "step": 2776 }, { "epoch": 0.927212020033389, "grad_norm": 0.8959909314532875, "learning_rate": 8.727054838444942e-06, "loss": 0.436, "step": 2777 }, { "epoch": 0.9275459098497496, "grad_norm": 0.910286851074967, "learning_rate": 8.725759601705502e-06, "loss": 0.4432, "step": 2778 }, { "epoch": 0.9278797996661102, "grad_norm": 0.9059912939728966, "learning_rate": 8.72446380256375e-06, "loss": 0.4388, "step": 2779 }, { "epoch": 0.9282136894824707, "grad_norm": 0.9139582351644958, "learning_rate": 8.72316744121529e-06, "loss": 0.4448, "step": 2780 }, { "epoch": 0.9285475792988314, "grad_norm": 0.883027950641876, "learning_rate": 8.721870517855806e-06, "loss": 0.4135, "step": 2781 }, { "epoch": 0.928881469115192, "grad_norm": 0.9055586464539889, "learning_rate": 8.72057303268107e-06, "loss": 0.4501, "step": 2782 }, { "epoch": 0.9292153589315526, "grad_norm": 0.8842280442187852, "learning_rate": 8.719274985886937e-06, "loss": 0.4249, "step": 2783 }, { "epoch": 0.9295492487479132, "grad_norm": 0.8882825708556202, "learning_rate": 8.717976377669344e-06, "loss": 0.4174, "step": 2784 }, { "epoch": 0.9298831385642737, "grad_norm": 0.8582936434119848, "learning_rate": 8.716677208224315e-06, "loss": 0.4133, "step": 2785 }, { "epoch": 0.9302170283806344, "grad_norm": 0.8736599802199038, "learning_rate": 8.715377477747963e-06, "loss": 0.4215, "step": 2786 }, { "epoch": 0.930550918196995, "grad_norm": 0.9047347205345863, "learning_rate": 8.714077186436482e-06, "loss": 0.4224, "step": 2787 }, { "epoch": 0.9308848080133556, "grad_norm": 0.9037905525303344, "learning_rate": 8.712776334486147e-06, "loss": 0.428, "step": 2788 }, { "epoch": 0.9312186978297162, "grad_norm": 0.9635799695261539, "learning_rate": 8.711474922093324e-06, "loss": 0.4459, "step": 2789 }, { "epoch": 0.9315525876460768, "grad_norm": 0.893521767532625, "learning_rate": 8.710172949454459e-06, "loss": 0.4336, "step": 2790 }, { "epoch": 0.9318864774624374, "grad_norm": 0.862516862934859, "learning_rate": 8.708870416766084e-06, "loss": 0.4286, "step": 2791 }, { "epoch": 0.932220367278798, "grad_norm": 0.8858748782349943, "learning_rate": 8.70756732422482e-06, "loss": 0.4309, "step": 2792 }, { "epoch": 0.9325542570951586, "grad_norm": 0.838182370542333, "learning_rate": 8.706263672027364e-06, "loss": 0.4086, "step": 2793 }, { "epoch": 0.9328881469115192, "grad_norm": 0.8417724490707997, "learning_rate": 8.704959460370503e-06, "loss": 0.4247, "step": 2794 }, { "epoch": 0.9332220367278798, "grad_norm": 0.8683339252357013, "learning_rate": 8.703654689451109e-06, "loss": 0.4271, "step": 2795 }, { "epoch": 0.9335559265442404, "grad_norm": 0.8617438177617847, "learning_rate": 8.702349359466136e-06, "loss": 0.434, "step": 2796 }, { "epoch": 0.933889816360601, "grad_norm": 0.857498537609334, "learning_rate": 8.701043470612623e-06, "loss": 0.429, "step": 2797 }, { "epoch": 0.9342237061769616, "grad_norm": 0.8610600036444435, "learning_rate": 8.699737023087693e-06, "loss": 0.4322, "step": 2798 }, { "epoch": 0.9345575959933222, "grad_norm": 0.8422669337206138, "learning_rate": 8.698430017088553e-06, "loss": 0.425, "step": 2799 }, { "epoch": 0.9348914858096828, "grad_norm": 0.8498849630963655, "learning_rate": 8.697122452812498e-06, "loss": 0.4159, "step": 2800 }, { "epoch": 0.9352253756260434, "grad_norm": 0.8784059432477006, "learning_rate": 8.695814330456901e-06, "loss": 0.4557, "step": 2801 }, { "epoch": 0.935559265442404, "grad_norm": 0.868348909810428, "learning_rate": 8.694505650219226e-06, "loss": 0.4294, "step": 2802 }, { "epoch": 0.9358931552587646, "grad_norm": 0.8714807839192353, "learning_rate": 8.693196412297017e-06, "loss": 0.426, "step": 2803 }, { "epoch": 0.9362270450751252, "grad_norm": 0.8367566257803507, "learning_rate": 8.691886616887901e-06, "loss": 0.4163, "step": 2804 }, { "epoch": 0.9365609348914858, "grad_norm": 0.8729724831870221, "learning_rate": 8.690576264189594e-06, "loss": 0.4223, "step": 2805 }, { "epoch": 0.9368948247078465, "grad_norm": 0.8879956651711487, "learning_rate": 8.68926535439989e-06, "loss": 0.4309, "step": 2806 }, { "epoch": 0.937228714524207, "grad_norm": 0.8827724981011954, "learning_rate": 8.687953887716675e-06, "loss": 0.4549, "step": 2807 }, { "epoch": 0.9375626043405676, "grad_norm": 0.881011854108841, "learning_rate": 8.68664186433791e-06, "loss": 0.4274, "step": 2808 }, { "epoch": 0.9378964941569282, "grad_norm": 0.8698574280412269, "learning_rate": 8.685329284461647e-06, "loss": 0.4188, "step": 2809 }, { "epoch": 0.9382303839732888, "grad_norm": 0.8691503203731505, "learning_rate": 8.68401614828602e-06, "loss": 0.4279, "step": 2810 }, { "epoch": 0.9385642737896495, "grad_norm": 0.8530007725387114, "learning_rate": 8.682702456009247e-06, "loss": 0.4257, "step": 2811 }, { "epoch": 0.93889816360601, "grad_norm": 0.9094261081429491, "learning_rate": 8.681388207829627e-06, "loss": 0.4324, "step": 2812 }, { "epoch": 0.9392320534223706, "grad_norm": 0.9046420210799007, "learning_rate": 8.680073403945547e-06, "loss": 0.4427, "step": 2813 }, { "epoch": 0.9395659432387312, "grad_norm": 0.811116731063025, "learning_rate": 8.678758044555474e-06, "loss": 0.4077, "step": 2814 }, { "epoch": 0.9398998330550918, "grad_norm": 0.8728553244173934, "learning_rate": 8.677442129857963e-06, "loss": 0.4247, "step": 2815 }, { "epoch": 0.9402337228714525, "grad_norm": 0.8912541596319776, "learning_rate": 8.67612566005165e-06, "loss": 0.4312, "step": 2816 }, { "epoch": 0.940567612687813, "grad_norm": 0.8874612206075059, "learning_rate": 8.674808635335259e-06, "loss": 0.4482, "step": 2817 }, { "epoch": 0.9409015025041736, "grad_norm": 0.8622718414255929, "learning_rate": 8.673491055907591e-06, "loss": 0.4472, "step": 2818 }, { "epoch": 0.9412353923205342, "grad_norm": 0.827604790058232, "learning_rate": 8.672172921967534e-06, "loss": 0.4242, "step": 2819 }, { "epoch": 0.9415692821368948, "grad_norm": 0.8986143384745798, "learning_rate": 8.670854233714064e-06, "loss": 0.4566, "step": 2820 }, { "epoch": 0.9419031719532555, "grad_norm": 0.8566810294722924, "learning_rate": 8.669534991346232e-06, "loss": 0.4268, "step": 2821 }, { "epoch": 0.9422370617696161, "grad_norm": 0.8763617557439426, "learning_rate": 8.668215195063177e-06, "loss": 0.4593, "step": 2822 }, { "epoch": 0.9425709515859766, "grad_norm": 0.8819635203649782, "learning_rate": 8.666894845064126e-06, "loss": 0.4309, "step": 2823 }, { "epoch": 0.9429048414023372, "grad_norm": 0.8802479797930175, "learning_rate": 8.665573941548383e-06, "loss": 0.4348, "step": 2824 }, { "epoch": 0.9432387312186978, "grad_norm": 0.8722722085082891, "learning_rate": 8.664252484715339e-06, "loss": 0.4397, "step": 2825 }, { "epoch": 0.9435726210350585, "grad_norm": 0.8710428365378544, "learning_rate": 8.662930474764466e-06, "loss": 0.4298, "step": 2826 }, { "epoch": 0.9439065108514191, "grad_norm": 0.861837551836094, "learning_rate": 8.661607911895323e-06, "loss": 0.442, "step": 2827 }, { "epoch": 0.9442404006677796, "grad_norm": 0.8625404613412923, "learning_rate": 8.660284796307548e-06, "loss": 0.4193, "step": 2828 }, { "epoch": 0.9445742904841402, "grad_norm": 0.8801376979790573, "learning_rate": 8.658961128200867e-06, "loss": 0.4277, "step": 2829 }, { "epoch": 0.9449081803005008, "grad_norm": 0.9057609309195664, "learning_rate": 8.657636907775086e-06, "loss": 0.4481, "step": 2830 }, { "epoch": 0.9452420701168615, "grad_norm": 0.8764523607485417, "learning_rate": 8.656312135230096e-06, "loss": 0.4257, "step": 2831 }, { "epoch": 0.9455759599332221, "grad_norm": 0.8598024746669591, "learning_rate": 8.654986810765872e-06, "loss": 0.429, "step": 2832 }, { "epoch": 0.9459098497495826, "grad_norm": 0.8825337077668421, "learning_rate": 8.653660934582471e-06, "loss": 0.4031, "step": 2833 }, { "epoch": 0.9462437395659432, "grad_norm": 0.8934956557454719, "learning_rate": 8.652334506880031e-06, "loss": 0.4188, "step": 2834 }, { "epoch": 0.9465776293823038, "grad_norm": 0.9027369119286521, "learning_rate": 8.65100752785878e-06, "loss": 0.4511, "step": 2835 }, { "epoch": 0.9469115191986645, "grad_norm": 0.8591341781069072, "learning_rate": 8.649679997719023e-06, "loss": 0.4252, "step": 2836 }, { "epoch": 0.9472454090150251, "grad_norm": 0.839208677821064, "learning_rate": 8.648351916661149e-06, "loss": 0.4162, "step": 2837 }, { "epoch": 0.9475792988313857, "grad_norm": 0.9178467926440428, "learning_rate": 8.647023284885634e-06, "loss": 0.4324, "step": 2838 }, { "epoch": 0.9479131886477462, "grad_norm": 0.8267154082594539, "learning_rate": 8.645694102593033e-06, "loss": 0.4082, "step": 2839 }, { "epoch": 0.9482470784641068, "grad_norm": 0.8472631404188877, "learning_rate": 8.644364369983986e-06, "loss": 0.4322, "step": 2840 }, { "epoch": 0.9485809682804675, "grad_norm": 0.8365860786292307, "learning_rate": 8.643034087259216e-06, "loss": 0.4218, "step": 2841 }, { "epoch": 0.9489148580968281, "grad_norm": 0.8813974156970031, "learning_rate": 8.641703254619529e-06, "loss": 0.4308, "step": 2842 }, { "epoch": 0.9492487479131887, "grad_norm": 0.8920206212683989, "learning_rate": 8.640371872265813e-06, "loss": 0.4416, "step": 2843 }, { "epoch": 0.9495826377295492, "grad_norm": 0.840794961558012, "learning_rate": 8.639039940399041e-06, "loss": 0.4148, "step": 2844 }, { "epoch": 0.9499165275459098, "grad_norm": 0.8408306415031306, "learning_rate": 8.637707459220265e-06, "loss": 0.4159, "step": 2845 }, { "epoch": 0.9502504173622704, "grad_norm": 0.8499370413262075, "learning_rate": 8.636374428930627e-06, "loss": 0.4129, "step": 2846 }, { "epoch": 0.9505843071786311, "grad_norm": 0.8997284649912934, "learning_rate": 8.635040849731342e-06, "loss": 0.4442, "step": 2847 }, { "epoch": 0.9509181969949917, "grad_norm": 0.8302471868505594, "learning_rate": 8.63370672182372e-06, "loss": 0.416, "step": 2848 }, { "epoch": 0.9512520868113522, "grad_norm": 0.8308278762586732, "learning_rate": 8.632372045409142e-06, "loss": 0.4158, "step": 2849 }, { "epoch": 0.9515859766277128, "grad_norm": 0.8872480459676081, "learning_rate": 8.631036820689078e-06, "loss": 0.4307, "step": 2850 }, { "epoch": 0.9519198664440734, "grad_norm": 0.8389378839027479, "learning_rate": 8.629701047865082e-06, "loss": 0.43, "step": 2851 }, { "epoch": 0.9522537562604341, "grad_norm": 0.8746257125071801, "learning_rate": 8.628364727138785e-06, "loss": 0.4285, "step": 2852 }, { "epoch": 0.9525876460767947, "grad_norm": 0.8724445438202154, "learning_rate": 8.627027858711908e-06, "loss": 0.4172, "step": 2853 }, { "epoch": 0.9529215358931553, "grad_norm": 0.8937318415941259, "learning_rate": 8.625690442786248e-06, "loss": 0.4269, "step": 2854 }, { "epoch": 0.9532554257095158, "grad_norm": 0.8771384602857203, "learning_rate": 8.62435247956369e-06, "loss": 0.4291, "step": 2855 }, { "epoch": 0.9535893155258764, "grad_norm": 0.8472113490017191, "learning_rate": 8.623013969246197e-06, "loss": 0.4177, "step": 2856 }, { "epoch": 0.9539232053422371, "grad_norm": 0.8434967630360347, "learning_rate": 8.621674912035817e-06, "loss": 0.4198, "step": 2857 }, { "epoch": 0.9542570951585977, "grad_norm": 0.837572465484102, "learning_rate": 8.620335308134683e-06, "loss": 0.4115, "step": 2858 }, { "epoch": 0.9545909849749583, "grad_norm": 0.8709810979907251, "learning_rate": 8.618995157745002e-06, "loss": 0.4316, "step": 2859 }, { "epoch": 0.9549248747913188, "grad_norm": 0.8701644277967678, "learning_rate": 8.617654461069076e-06, "loss": 0.4107, "step": 2860 }, { "epoch": 0.9552587646076794, "grad_norm": 0.8505504010363715, "learning_rate": 8.616313218309277e-06, "loss": 0.4303, "step": 2861 }, { "epoch": 0.9555926544240401, "grad_norm": 0.8093297642750151, "learning_rate": 8.614971429668071e-06, "loss": 0.4079, "step": 2862 }, { "epoch": 0.9559265442404007, "grad_norm": 0.8456790227712729, "learning_rate": 8.613629095347994e-06, "loss": 0.3977, "step": 2863 }, { "epoch": 0.9562604340567613, "grad_norm": 0.8901727951304208, "learning_rate": 8.612286215551678e-06, "loss": 0.4514, "step": 2864 }, { "epoch": 0.9565943238731218, "grad_norm": 0.8908475833288325, "learning_rate": 8.610942790481825e-06, "loss": 0.4175, "step": 2865 }, { "epoch": 0.9569282136894824, "grad_norm": 0.9205498449637033, "learning_rate": 8.609598820341226e-06, "loss": 0.4218, "step": 2866 }, { "epoch": 0.9572621035058431, "grad_norm": 0.8638568359316476, "learning_rate": 8.608254305332754e-06, "loss": 0.4199, "step": 2867 }, { "epoch": 0.9575959933222037, "grad_norm": 0.843685976787602, "learning_rate": 8.606909245659364e-06, "loss": 0.411, "step": 2868 }, { "epoch": 0.9579298831385643, "grad_norm": 0.8371812507669436, "learning_rate": 8.605563641524087e-06, "loss": 0.4265, "step": 2869 }, { "epoch": 0.9582637729549248, "grad_norm": 0.8906216550942823, "learning_rate": 8.604217493130047e-06, "loss": 0.4413, "step": 2870 }, { "epoch": 0.9585976627712854, "grad_norm": 0.9633322161343022, "learning_rate": 8.602870800680443e-06, "loss": 0.4416, "step": 2871 }, { "epoch": 0.9589315525876461, "grad_norm": 0.8614156940518098, "learning_rate": 8.60152356437856e-06, "loss": 0.4258, "step": 2872 }, { "epoch": 0.9592654424040067, "grad_norm": 0.8844899865122653, "learning_rate": 8.600175784427757e-06, "loss": 0.4383, "step": 2873 }, { "epoch": 0.9595993322203673, "grad_norm": 0.8527251419033084, "learning_rate": 8.598827461031487e-06, "loss": 0.4267, "step": 2874 }, { "epoch": 0.9599332220367279, "grad_norm": 0.8340875221219151, "learning_rate": 8.597478594393275e-06, "loss": 0.431, "step": 2875 }, { "epoch": 0.9602671118530884, "grad_norm": 0.856398495613359, "learning_rate": 8.596129184716737e-06, "loss": 0.4157, "step": 2876 }, { "epoch": 0.9606010016694491, "grad_norm": 0.8729086394843304, "learning_rate": 8.594779232205562e-06, "loss": 0.434, "step": 2877 }, { "epoch": 0.9609348914858097, "grad_norm": 0.9192277980314156, "learning_rate": 8.593428737063523e-06, "loss": 0.4383, "step": 2878 }, { "epoch": 0.9612687813021703, "grad_norm": 0.9233637469455396, "learning_rate": 8.592077699494483e-06, "loss": 0.448, "step": 2879 }, { "epoch": 0.9616026711185309, "grad_norm": 0.8687790448374149, "learning_rate": 8.590726119702374e-06, "loss": 0.4212, "step": 2880 }, { "epoch": 0.9619365609348914, "grad_norm": 0.8550898161825881, "learning_rate": 8.589373997891222e-06, "loss": 0.4315, "step": 2881 }, { "epoch": 0.9622704507512521, "grad_norm": 0.8542776622918101, "learning_rate": 8.588021334265126e-06, "loss": 0.4447, "step": 2882 }, { "epoch": 0.9626043405676127, "grad_norm": 0.8745781636993786, "learning_rate": 8.586668129028272e-06, "loss": 0.4433, "step": 2883 }, { "epoch": 0.9629382303839733, "grad_norm": 0.8734955692242559, "learning_rate": 8.585314382384925e-06, "loss": 0.4484, "step": 2884 }, { "epoch": 0.9632721202003339, "grad_norm": 0.8326766622782098, "learning_rate": 8.583960094539434e-06, "loss": 0.4164, "step": 2885 }, { "epoch": 0.9636060100166944, "grad_norm": 0.8132600609012295, "learning_rate": 8.582605265696228e-06, "loss": 0.4107, "step": 2886 }, { "epoch": 0.9639398998330551, "grad_norm": 0.8713296787670436, "learning_rate": 8.581249896059816e-06, "loss": 0.4257, "step": 2887 }, { "epoch": 0.9642737896494157, "grad_norm": 0.8719787621832589, "learning_rate": 8.579893985834793e-06, "loss": 0.416, "step": 2888 }, { "epoch": 0.9646076794657763, "grad_norm": 0.8738810032783227, "learning_rate": 8.57853753522583e-06, "loss": 0.4273, "step": 2889 }, { "epoch": 0.9649415692821369, "grad_norm": 0.9008502117571867, "learning_rate": 8.577180544437688e-06, "loss": 0.4309, "step": 2890 }, { "epoch": 0.9652754590984975, "grad_norm": 0.9225631618564879, "learning_rate": 8.5758230136752e-06, "loss": 0.4358, "step": 2891 }, { "epoch": 0.9656093489148581, "grad_norm": 0.8503853620008174, "learning_rate": 8.574464943143286e-06, "loss": 0.4295, "step": 2892 }, { "epoch": 0.9659432387312187, "grad_norm": 0.8695053677931114, "learning_rate": 8.573106333046947e-06, "loss": 0.4159, "step": 2893 }, { "epoch": 0.9662771285475793, "grad_norm": 0.8513624273177613, "learning_rate": 8.571747183591264e-06, "loss": 0.4141, "step": 2894 }, { "epoch": 0.9666110183639399, "grad_norm": 0.8994305206477525, "learning_rate": 8.5703874949814e-06, "loss": 0.4313, "step": 2895 }, { "epoch": 0.9669449081803005, "grad_norm": 0.9389365891439532, "learning_rate": 8.569027267422602e-06, "loss": 0.4582, "step": 2896 }, { "epoch": 0.967278797996661, "grad_norm": 0.8785227668016559, "learning_rate": 8.567666501120194e-06, "loss": 0.4385, "step": 2897 }, { "epoch": 0.9676126878130217, "grad_norm": 0.8584102864470768, "learning_rate": 8.566305196279582e-06, "loss": 0.4362, "step": 2898 }, { "epoch": 0.9679465776293823, "grad_norm": 0.9102511167518768, "learning_rate": 8.564943353106256e-06, "loss": 0.4389, "step": 2899 }, { "epoch": 0.9682804674457429, "grad_norm": 0.8663353992974414, "learning_rate": 8.563580971805789e-06, "loss": 0.446, "step": 2900 }, { "epoch": 0.9686143572621035, "grad_norm": 0.8228491500493998, "learning_rate": 8.562218052583826e-06, "loss": 0.4223, "step": 2901 }, { "epoch": 0.968948247078464, "grad_norm": 0.8543615835574133, "learning_rate": 8.560854595646102e-06, "loss": 0.4285, "step": 2902 }, { "epoch": 0.9692821368948247, "grad_norm": 0.8201954810295664, "learning_rate": 8.559490601198432e-06, "loss": 0.4233, "step": 2903 }, { "epoch": 0.9696160267111853, "grad_norm": 0.882989734007228, "learning_rate": 8.558126069446708e-06, "loss": 0.4229, "step": 2904 }, { "epoch": 0.9699499165275459, "grad_norm": 0.9274484064215591, "learning_rate": 8.556761000596907e-06, "loss": 0.4634, "step": 2905 }, { "epoch": 0.9702838063439065, "grad_norm": 0.8993367001371142, "learning_rate": 8.555395394855087e-06, "loss": 0.4481, "step": 2906 }, { "epoch": 0.9706176961602672, "grad_norm": 0.8827850733928508, "learning_rate": 8.554029252427383e-06, "loss": 0.4274, "step": 2907 }, { "epoch": 0.9709515859766277, "grad_norm": 0.8154648351542897, "learning_rate": 8.552662573520015e-06, "loss": 0.3942, "step": 2908 }, { "epoch": 0.9712854757929883, "grad_norm": 0.8254064491087063, "learning_rate": 8.551295358339283e-06, "loss": 0.3952, "step": 2909 }, { "epoch": 0.9716193656093489, "grad_norm": 0.8756473897209546, "learning_rate": 8.549927607091568e-06, "loss": 0.442, "step": 2910 }, { "epoch": 0.9719532554257095, "grad_norm": 0.8390857906371954, "learning_rate": 8.54855931998333e-06, "loss": 0.4195, "step": 2911 }, { "epoch": 0.9722871452420702, "grad_norm": 0.8239381720069724, "learning_rate": 8.547190497221114e-06, "loss": 0.4173, "step": 2912 }, { "epoch": 0.9726210350584307, "grad_norm": 0.864844342560549, "learning_rate": 8.545821139011542e-06, "loss": 0.4214, "step": 2913 }, { "epoch": 0.9729549248747913, "grad_norm": 0.8393132195121552, "learning_rate": 8.544451245561318e-06, "loss": 0.4173, "step": 2914 }, { "epoch": 0.9732888146911519, "grad_norm": 0.8355698897394213, "learning_rate": 8.543080817077229e-06, "loss": 0.4271, "step": 2915 }, { "epoch": 0.9736227045075125, "grad_norm": 0.837380201765597, "learning_rate": 8.541709853766137e-06, "loss": 0.418, "step": 2916 }, { "epoch": 0.9739565943238732, "grad_norm": 0.8948175015294741, "learning_rate": 8.540338355834991e-06, "loss": 0.4546, "step": 2917 }, { "epoch": 0.9742904841402337, "grad_norm": 0.8327926350086284, "learning_rate": 8.53896632349082e-06, "loss": 0.4219, "step": 2918 }, { "epoch": 0.9746243739565943, "grad_norm": 0.8569895198124752, "learning_rate": 8.537593756940728e-06, "loss": 0.434, "step": 2919 }, { "epoch": 0.9749582637729549, "grad_norm": 0.8392021090046627, "learning_rate": 8.536220656391905e-06, "loss": 0.3998, "step": 2920 }, { "epoch": 0.9752921535893155, "grad_norm": 0.8605464124389827, "learning_rate": 8.53484702205162e-06, "loss": 0.4262, "step": 2921 }, { "epoch": 0.9756260434056762, "grad_norm": 0.8412842966033974, "learning_rate": 8.533472854127225e-06, "loss": 0.3983, "step": 2922 }, { "epoch": 0.9759599332220368, "grad_norm": 0.8747645235945124, "learning_rate": 8.532098152826149e-06, "loss": 0.421, "step": 2923 }, { "epoch": 0.9762938230383973, "grad_norm": 0.8449196723924498, "learning_rate": 8.5307229183559e-06, "loss": 0.419, "step": 2924 }, { "epoch": 0.9766277128547579, "grad_norm": 0.8386791360364003, "learning_rate": 8.529347150924071e-06, "loss": 0.4135, "step": 2925 }, { "epoch": 0.9769616026711185, "grad_norm": 0.8760017091889115, "learning_rate": 8.527970850738333e-06, "loss": 0.4375, "step": 2926 }, { "epoch": 0.9772954924874792, "grad_norm": 0.8395905077773972, "learning_rate": 8.526594018006442e-06, "loss": 0.4156, "step": 2927 }, { "epoch": 0.9776293823038398, "grad_norm": 0.8314833220782, "learning_rate": 8.525216652936228e-06, "loss": 0.4191, "step": 2928 }, { "epoch": 0.9779632721202003, "grad_norm": 0.8478750823532611, "learning_rate": 8.523838755735603e-06, "loss": 0.4001, "step": 2929 }, { "epoch": 0.9782971619365609, "grad_norm": 0.8510677512283685, "learning_rate": 8.522460326612558e-06, "loss": 0.4229, "step": 2930 }, { "epoch": 0.9786310517529215, "grad_norm": 0.9075700210619977, "learning_rate": 8.521081365775173e-06, "loss": 0.4275, "step": 2931 }, { "epoch": 0.9789649415692822, "grad_norm": 0.896969202556232, "learning_rate": 8.519701873431593e-06, "loss": 0.4063, "step": 2932 }, { "epoch": 0.9792988313856428, "grad_norm": 0.8510370976940711, "learning_rate": 8.518321849790062e-06, "loss": 0.4266, "step": 2933 }, { "epoch": 0.9796327212020033, "grad_norm": 0.8686603238684812, "learning_rate": 8.516941295058886e-06, "loss": 0.4181, "step": 2934 }, { "epoch": 0.9799666110183639, "grad_norm": 0.8652611958664432, "learning_rate": 8.515560209446463e-06, "loss": 0.4282, "step": 2935 }, { "epoch": 0.9803005008347245, "grad_norm": 0.8617221141858783, "learning_rate": 8.514178593161266e-06, "loss": 0.4251, "step": 2936 }, { "epoch": 0.9806343906510852, "grad_norm": 1.01305752895066, "learning_rate": 8.51279644641185e-06, "loss": 0.468, "step": 2937 }, { "epoch": 0.9809682804674458, "grad_norm": 0.869561101317859, "learning_rate": 8.511413769406853e-06, "loss": 0.4283, "step": 2938 }, { "epoch": 0.9813021702838064, "grad_norm": 0.8534590368137853, "learning_rate": 8.510030562354984e-06, "loss": 0.4315, "step": 2939 }, { "epoch": 0.9816360601001669, "grad_norm": 0.8223246252284468, "learning_rate": 8.508646825465041e-06, "loss": 0.4039, "step": 2940 }, { "epoch": 0.9819699499165275, "grad_norm": 0.825463957569363, "learning_rate": 8.507262558945898e-06, "loss": 0.4093, "step": 2941 }, { "epoch": 0.9823038397328882, "grad_norm": 0.8549005978662653, "learning_rate": 8.50587776300651e-06, "loss": 0.4234, "step": 2942 }, { "epoch": 0.9826377295492488, "grad_norm": 0.8551997406573525, "learning_rate": 8.50449243785591e-06, "loss": 0.4217, "step": 2943 }, { "epoch": 0.9829716193656094, "grad_norm": 0.8212984920278721, "learning_rate": 8.503106583703214e-06, "loss": 0.4114, "step": 2944 }, { "epoch": 0.9833055091819699, "grad_norm": 0.86155345482195, "learning_rate": 8.501720200757617e-06, "loss": 0.4288, "step": 2945 }, { "epoch": 0.9836393989983305, "grad_norm": 0.8586920171018674, "learning_rate": 8.500333289228391e-06, "loss": 0.4236, "step": 2946 }, { "epoch": 0.9839732888146911, "grad_norm": 0.8776145278666392, "learning_rate": 8.49894584932489e-06, "loss": 0.4354, "step": 2947 }, { "epoch": 0.9843071786310518, "grad_norm": 0.8603676440209601, "learning_rate": 8.49755788125655e-06, "loss": 0.4232, "step": 2948 }, { "epoch": 0.9846410684474124, "grad_norm": 0.8490833960722883, "learning_rate": 8.496169385232884e-06, "loss": 0.4289, "step": 2949 }, { "epoch": 0.9849749582637729, "grad_norm": 0.9011760238838834, "learning_rate": 8.494780361463481e-06, "loss": 0.4246, "step": 2950 }, { "epoch": 0.9853088480801335, "grad_norm": 0.8772577525553595, "learning_rate": 8.49339081015802e-06, "loss": 0.4234, "step": 2951 }, { "epoch": 0.9856427378964941, "grad_norm": 0.8935519373579061, "learning_rate": 8.492000731526248e-06, "loss": 0.4215, "step": 2952 }, { "epoch": 0.9859766277128548, "grad_norm": 0.8676874997639936, "learning_rate": 8.490610125777998e-06, "loss": 0.4222, "step": 2953 }, { "epoch": 0.9863105175292154, "grad_norm": 0.8337355924594598, "learning_rate": 8.489218993123185e-06, "loss": 0.4068, "step": 2954 }, { "epoch": 0.986644407345576, "grad_norm": 0.8897168121099436, "learning_rate": 8.487827333771795e-06, "loss": 0.4425, "step": 2955 }, { "epoch": 0.9869782971619365, "grad_norm": 0.85804960463875, "learning_rate": 8.486435147933902e-06, "loss": 0.4357, "step": 2956 }, { "epoch": 0.9873121869782971, "grad_norm": 0.8520343097620037, "learning_rate": 8.485042435819657e-06, "loss": 0.4222, "step": 2957 }, { "epoch": 0.9876460767946578, "grad_norm": 0.8588727650560304, "learning_rate": 8.483649197639285e-06, "loss": 0.3986, "step": 2958 }, { "epoch": 0.9879799666110184, "grad_norm": 0.8449144890581072, "learning_rate": 8.4822554336031e-06, "loss": 0.4001, "step": 2959 }, { "epoch": 0.988313856427379, "grad_norm": 0.8947719821221466, "learning_rate": 8.480861143921486e-06, "loss": 0.4277, "step": 2960 }, { "epoch": 0.9886477462437395, "grad_norm": 0.866165339194789, "learning_rate": 8.479466328804913e-06, "loss": 0.4235, "step": 2961 }, { "epoch": 0.9889816360601001, "grad_norm": 0.8508019718885551, "learning_rate": 8.478070988463926e-06, "loss": 0.4229, "step": 2962 }, { "epoch": 0.9893155258764608, "grad_norm": 0.8820145046871607, "learning_rate": 8.476675123109156e-06, "loss": 0.441, "step": 2963 }, { "epoch": 0.9896494156928214, "grad_norm": 0.849245191305165, "learning_rate": 8.475278732951302e-06, "loss": 0.4264, "step": 2964 }, { "epoch": 0.989983305509182, "grad_norm": 0.8557631507304861, "learning_rate": 8.473881818201154e-06, "loss": 0.4267, "step": 2965 }, { "epoch": 0.9903171953255425, "grad_norm": 0.8499291236639019, "learning_rate": 8.472484379069572e-06, "loss": 0.4174, "step": 2966 }, { "epoch": 0.9906510851419031, "grad_norm": 0.8838117881643501, "learning_rate": 8.4710864157675e-06, "loss": 0.4372, "step": 2967 }, { "epoch": 0.9909849749582638, "grad_norm": 0.8119304953229035, "learning_rate": 8.469687928505962e-06, "loss": 0.4098, "step": 2968 }, { "epoch": 0.9913188647746244, "grad_norm": 0.8592939999487994, "learning_rate": 8.468288917496058e-06, "loss": 0.4343, "step": 2969 }, { "epoch": 0.991652754590985, "grad_norm": 0.8626602365001347, "learning_rate": 8.466889382948967e-06, "loss": 0.4178, "step": 2970 }, { "epoch": 0.9919866444073456, "grad_norm": 0.8379785393726717, "learning_rate": 8.46548932507595e-06, "loss": 0.4235, "step": 2971 }, { "epoch": 0.9923205342237061, "grad_norm": 0.8671923830812354, "learning_rate": 8.464088744088345e-06, "loss": 0.4269, "step": 2972 }, { "epoch": 0.9926544240400668, "grad_norm": 0.8796898592154467, "learning_rate": 8.462687640197572e-06, "loss": 0.4307, "step": 2973 }, { "epoch": 0.9929883138564274, "grad_norm": 0.7992017927894045, "learning_rate": 8.461286013615123e-06, "loss": 0.3965, "step": 2974 }, { "epoch": 0.993322203672788, "grad_norm": 0.892621964033071, "learning_rate": 8.459883864552572e-06, "loss": 0.4319, "step": 2975 }, { "epoch": 0.9936560934891486, "grad_norm": 0.8576303255370749, "learning_rate": 8.458481193221576e-06, "loss": 0.4256, "step": 2976 }, { "epoch": 0.9939899833055091, "grad_norm": 0.8405490196532913, "learning_rate": 8.45707799983387e-06, "loss": 0.3959, "step": 2977 }, { "epoch": 0.9943238731218698, "grad_norm": 0.8452842796750313, "learning_rate": 8.455674284601263e-06, "loss": 0.4254, "step": 2978 }, { "epoch": 0.9946577629382304, "grad_norm": 0.8922044662362446, "learning_rate": 8.454270047735644e-06, "loss": 0.4492, "step": 2979 }, { "epoch": 0.994991652754591, "grad_norm": 0.8314167341119372, "learning_rate": 8.452865289448982e-06, "loss": 0.4188, "step": 2980 }, { "epoch": 0.9953255425709516, "grad_norm": 0.8649419937386499, "learning_rate": 8.45146000995333e-06, "loss": 0.4167, "step": 2981 }, { "epoch": 0.9956594323873121, "grad_norm": 0.8746089700593128, "learning_rate": 8.450054209460807e-06, "loss": 0.423, "step": 2982 }, { "epoch": 0.9959933222036728, "grad_norm": 0.8345116654302558, "learning_rate": 8.448647888183625e-06, "loss": 0.4241, "step": 2983 }, { "epoch": 0.9963272120200334, "grad_norm": 0.9361478488387039, "learning_rate": 8.447241046334062e-06, "loss": 0.4414, "step": 2984 }, { "epoch": 0.996661101836394, "grad_norm": 0.86872474297192, "learning_rate": 8.445833684124486e-06, "loss": 0.4229, "step": 2985 }, { "epoch": 0.9969949916527546, "grad_norm": 0.8786713971303383, "learning_rate": 8.444425801767334e-06, "loss": 0.4448, "step": 2986 }, { "epoch": 0.9973288814691152, "grad_norm": 0.8748893173656584, "learning_rate": 8.443017399475126e-06, "loss": 0.4204, "step": 2987 }, { "epoch": 0.9976627712854758, "grad_norm": 0.8447979434614988, "learning_rate": 8.44160847746046e-06, "loss": 0.4128, "step": 2988 }, { "epoch": 0.9979966611018364, "grad_norm": 0.8587510640333473, "learning_rate": 8.440199035936016e-06, "loss": 0.4233, "step": 2989 }, { "epoch": 0.998330550918197, "grad_norm": 0.8530328951981077, "learning_rate": 8.438789075114544e-06, "loss": 0.4269, "step": 2990 }, { "epoch": 0.9986644407345576, "grad_norm": 0.8797653834759264, "learning_rate": 8.437378595208878e-06, "loss": 0.4305, "step": 2991 }, { "epoch": 0.9989983305509182, "grad_norm": 0.8226029475842876, "learning_rate": 8.435967596431931e-06, "loss": 0.4044, "step": 2992 }, { "epoch": 0.9993322203672788, "grad_norm": 0.8662490586852071, "learning_rate": 8.434556078996692e-06, "loss": 0.4236, "step": 2993 }, { "epoch": 0.9996661101836394, "grad_norm": 0.8763251831895738, "learning_rate": 8.433144043116228e-06, "loss": 0.4435, "step": 2994 }, { "epoch": 1.0, "grad_norm": 0.8894107028636398, "learning_rate": 8.431731489003688e-06, "loss": 0.4395, "step": 2995 }, { "epoch": 1.0, "eval_loss": 0.4230945110321045, "eval_runtime": 833.2907, "eval_samples_per_second": 24.215, "eval_steps_per_second": 0.757, "step": 2995 }, { "epoch": 1.0003338898163605, "grad_norm": 0.8019817361811156, "learning_rate": 8.430318416872295e-06, "loss": 0.3783, "step": 2996 }, { "epoch": 1.0006677796327212, "grad_norm": 0.8301866294517517, "learning_rate": 8.428904826935352e-06, "loss": 0.4018, "step": 2997 }, { "epoch": 1.0010016694490818, "grad_norm": 0.8511045228889494, "learning_rate": 8.427490719406242e-06, "loss": 0.4125, "step": 2998 }, { "epoch": 1.0013355592654425, "grad_norm": 0.8574755035700116, "learning_rate": 8.42607609449842e-06, "loss": 0.4058, "step": 2999 }, { "epoch": 1.001669449081803, "grad_norm": 0.8422272632519447, "learning_rate": 8.424660952425425e-06, "loss": 0.3906, "step": 3000 }, { "epoch": 1.0020033388981635, "grad_norm": 0.8662545758754367, "learning_rate": 8.423245293400875e-06, "loss": 0.3991, "step": 3001 }, { "epoch": 1.0023372287145242, "grad_norm": 0.8029843861124352, "learning_rate": 8.421829117638459e-06, "loss": 0.3763, "step": 3002 }, { "epoch": 1.0026711185308848, "grad_norm": 0.8834323197399492, "learning_rate": 8.420412425351951e-06, "loss": 0.4197, "step": 3003 }, { "epoch": 1.0030050083472455, "grad_norm": 0.8135812722540209, "learning_rate": 8.418995216755199e-06, "loss": 0.3706, "step": 3004 }, { "epoch": 1.003338898163606, "grad_norm": 0.8550397560329256, "learning_rate": 8.417577492062133e-06, "loss": 0.3819, "step": 3005 }, { "epoch": 1.0036727879799665, "grad_norm": 0.8306901739998624, "learning_rate": 8.416159251486752e-06, "loss": 0.3793, "step": 3006 }, { "epoch": 1.0040066777963272, "grad_norm": 0.866223102895604, "learning_rate": 8.414740495243146e-06, "loss": 0.398, "step": 3007 }, { "epoch": 1.0043405676126878, "grad_norm": 0.8500806670154026, "learning_rate": 8.413321223545473e-06, "loss": 0.3918, "step": 3008 }, { "epoch": 1.0046744574290485, "grad_norm": 0.8925749446772798, "learning_rate": 8.41190143660797e-06, "loss": 0.3945, "step": 3009 }, { "epoch": 1.005008347245409, "grad_norm": 0.8292097182519681, "learning_rate": 8.410481134644957e-06, "loss": 0.3946, "step": 3010 }, { "epoch": 1.0053422370617695, "grad_norm": 0.8963273204376231, "learning_rate": 8.409060317870822e-06, "loss": 0.4036, "step": 3011 }, { "epoch": 1.0056761268781302, "grad_norm": 0.8841971214591823, "learning_rate": 8.407638986500045e-06, "loss": 0.4029, "step": 3012 }, { "epoch": 1.0060100166944907, "grad_norm": 0.8324327564762166, "learning_rate": 8.40621714074717e-06, "loss": 0.3656, "step": 3013 }, { "epoch": 1.0063439065108515, "grad_norm": 0.8559913757226315, "learning_rate": 8.404794780826824e-06, "loss": 0.3875, "step": 3014 }, { "epoch": 1.006677796327212, "grad_norm": 0.8664158751602891, "learning_rate": 8.403371906953715e-06, "loss": 0.3971, "step": 3015 }, { "epoch": 1.0070116861435727, "grad_norm": 0.8870807110085581, "learning_rate": 8.401948519342623e-06, "loss": 0.4037, "step": 3016 }, { "epoch": 1.0073455759599332, "grad_norm": 0.9315779781558998, "learning_rate": 8.40052461820841e-06, "loss": 0.3958, "step": 3017 }, { "epoch": 1.0076794657762937, "grad_norm": 0.8302672347355703, "learning_rate": 8.399100203766012e-06, "loss": 0.367, "step": 3018 }, { "epoch": 1.0080133555926545, "grad_norm": 0.8699562331989634, "learning_rate": 8.397675276230444e-06, "loss": 0.4002, "step": 3019 }, { "epoch": 1.008347245409015, "grad_norm": 0.8293295633992429, "learning_rate": 8.3962498358168e-06, "loss": 0.3836, "step": 3020 }, { "epoch": 1.0086811352253757, "grad_norm": 0.8173717597071527, "learning_rate": 8.394823882740244e-06, "loss": 0.374, "step": 3021 }, { "epoch": 1.0090150250417362, "grad_norm": 0.8532909513222806, "learning_rate": 8.393397417216031e-06, "loss": 0.3835, "step": 3022 }, { "epoch": 1.0093489148580967, "grad_norm": 0.864958194380293, "learning_rate": 8.391970439459482e-06, "loss": 0.3811, "step": 3023 }, { "epoch": 1.0096828046744575, "grad_norm": 0.8285299734831587, "learning_rate": 8.390542949685999e-06, "loss": 0.3712, "step": 3024 }, { "epoch": 1.010016694490818, "grad_norm": 0.905267016998081, "learning_rate": 8.389114948111058e-06, "loss": 0.3972, "step": 3025 }, { "epoch": 1.0103505843071787, "grad_norm": 0.8741623065739326, "learning_rate": 8.387686434950222e-06, "loss": 0.3925, "step": 3026 }, { "epoch": 1.0106844741235392, "grad_norm": 0.8627780525288371, "learning_rate": 8.38625741041912e-06, "loss": 0.3872, "step": 3027 }, { "epoch": 1.0110183639398997, "grad_norm": 0.8454927305983762, "learning_rate": 8.384827874733464e-06, "loss": 0.3692, "step": 3028 }, { "epoch": 1.0113522537562605, "grad_norm": 0.8648812260220751, "learning_rate": 8.383397828109044e-06, "loss": 0.3956, "step": 3029 }, { "epoch": 1.011686143572621, "grad_norm": 0.9006682994022066, "learning_rate": 8.38196727076172e-06, "loss": 0.3931, "step": 3030 }, { "epoch": 1.0120200333889817, "grad_norm": 0.890692596231704, "learning_rate": 8.38053620290744e-06, "loss": 0.3908, "step": 3031 }, { "epoch": 1.0123539232053422, "grad_norm": 0.8892477830608428, "learning_rate": 8.379104624762218e-06, "loss": 0.4065, "step": 3032 }, { "epoch": 1.0126878130217027, "grad_norm": 0.8765776783206715, "learning_rate": 8.377672536542154e-06, "loss": 0.3994, "step": 3033 }, { "epoch": 1.0130217028380635, "grad_norm": 0.8483768270034808, "learning_rate": 8.37623993846342e-06, "loss": 0.3708, "step": 3034 }, { "epoch": 1.013355592654424, "grad_norm": 0.9036079908539265, "learning_rate": 8.374806830742267e-06, "loss": 0.4025, "step": 3035 }, { "epoch": 1.0136894824707847, "grad_norm": 0.8692300614605536, "learning_rate": 8.37337321359502e-06, "loss": 0.3906, "step": 3036 }, { "epoch": 1.0140233722871452, "grad_norm": 0.9210636475892222, "learning_rate": 8.371939087238087e-06, "loss": 0.4044, "step": 3037 }, { "epoch": 1.0143572621035057, "grad_norm": 0.8687984208027372, "learning_rate": 8.370504451887945e-06, "loss": 0.3904, "step": 3038 }, { "epoch": 1.0146911519198665, "grad_norm": 0.8662964142139746, "learning_rate": 8.369069307761155e-06, "loss": 0.3936, "step": 3039 }, { "epoch": 1.015025041736227, "grad_norm": 0.8831848869504457, "learning_rate": 8.367633655074349e-06, "loss": 0.4066, "step": 3040 }, { "epoch": 1.0153589315525877, "grad_norm": 0.8878843109884577, "learning_rate": 8.36619749404424e-06, "loss": 0.3936, "step": 3041 }, { "epoch": 1.0156928213689482, "grad_norm": 0.8558554442869141, "learning_rate": 8.364760824887615e-06, "loss": 0.3916, "step": 3042 }, { "epoch": 1.0160267111853087, "grad_norm": 0.8818828880775983, "learning_rate": 8.363323647821338e-06, "loss": 0.3885, "step": 3043 }, { "epoch": 1.0163606010016695, "grad_norm": 0.8605974301215743, "learning_rate": 8.361885963062352e-06, "loss": 0.3904, "step": 3044 }, { "epoch": 1.01669449081803, "grad_norm": 0.8776127269959224, "learning_rate": 8.360447770827677e-06, "loss": 0.3964, "step": 3045 }, { "epoch": 1.0170283806343907, "grad_norm": 0.8951174538029094, "learning_rate": 8.359009071334403e-06, "loss": 0.3968, "step": 3046 }, { "epoch": 1.0173622704507512, "grad_norm": 0.9173694199173008, "learning_rate": 8.357569864799707e-06, "loss": 0.3941, "step": 3047 }, { "epoch": 1.017696160267112, "grad_norm": 0.901548742553632, "learning_rate": 8.356130151440829e-06, "loss": 0.3963, "step": 3048 }, { "epoch": 1.0180300500834725, "grad_norm": 0.912866127707925, "learning_rate": 8.354689931475101e-06, "loss": 0.4045, "step": 3049 }, { "epoch": 1.018363939899833, "grad_norm": 0.8771423795991797, "learning_rate": 8.35324920511992e-06, "loss": 0.4053, "step": 3050 }, { "epoch": 1.0186978297161937, "grad_norm": 0.885025103304134, "learning_rate": 8.351807972592764e-06, "loss": 0.4005, "step": 3051 }, { "epoch": 1.0190317195325542, "grad_norm": 0.9065865630553249, "learning_rate": 8.350366234111186e-06, "loss": 0.3989, "step": 3052 }, { "epoch": 1.019365609348915, "grad_norm": 0.8547097590481144, "learning_rate": 8.348923989892816e-06, "loss": 0.3967, "step": 3053 }, { "epoch": 1.0196994991652755, "grad_norm": 0.8885622908930214, "learning_rate": 8.347481240155361e-06, "loss": 0.3919, "step": 3054 }, { "epoch": 1.020033388981636, "grad_norm": 0.9149312102278694, "learning_rate": 8.346037985116605e-06, "loss": 0.3994, "step": 3055 }, { "epoch": 1.0203672787979967, "grad_norm": 0.9159520361337552, "learning_rate": 8.344594224994404e-06, "loss": 0.4047, "step": 3056 }, { "epoch": 1.0207011686143572, "grad_norm": 0.8642103926223605, "learning_rate": 8.343149960006694e-06, "loss": 0.3692, "step": 3057 }, { "epoch": 1.021035058430718, "grad_norm": 0.8776893577000907, "learning_rate": 8.341705190371487e-06, "loss": 0.3879, "step": 3058 }, { "epoch": 1.0213689482470785, "grad_norm": 0.9291093156346614, "learning_rate": 8.340259916306873e-06, "loss": 0.404, "step": 3059 }, { "epoch": 1.021702838063439, "grad_norm": 0.8736808970138222, "learning_rate": 8.33881413803101e-06, "loss": 0.4094, "step": 3060 }, { "epoch": 1.0220367278797997, "grad_norm": 0.8992659088203414, "learning_rate": 8.337367855762142e-06, "loss": 0.4074, "step": 3061 }, { "epoch": 1.0223706176961602, "grad_norm": 0.8819611452906267, "learning_rate": 8.335921069718584e-06, "loss": 0.4031, "step": 3062 }, { "epoch": 1.022704507512521, "grad_norm": 0.8444672657981916, "learning_rate": 8.334473780118728e-06, "loss": 0.3926, "step": 3063 }, { "epoch": 1.0230383973288815, "grad_norm": 0.845956973459417, "learning_rate": 8.33302598718104e-06, "loss": 0.3962, "step": 3064 }, { "epoch": 1.023372287145242, "grad_norm": 0.8690957221139648, "learning_rate": 8.331577691124067e-06, "loss": 0.3815, "step": 3065 }, { "epoch": 1.0237061769616027, "grad_norm": 0.8686427724030927, "learning_rate": 8.330128892166429e-06, "loss": 0.3894, "step": 3066 }, { "epoch": 1.0240400667779632, "grad_norm": 0.9119511677177891, "learning_rate": 8.328679590526818e-06, "loss": 0.3937, "step": 3067 }, { "epoch": 1.024373956594324, "grad_norm": 0.8824587447299307, "learning_rate": 8.32722978642401e-06, "loss": 0.3922, "step": 3068 }, { "epoch": 1.0247078464106845, "grad_norm": 0.8930840293597956, "learning_rate": 8.325779480076848e-06, "loss": 0.3907, "step": 3069 }, { "epoch": 1.025041736227045, "grad_norm": 0.9142418420625383, "learning_rate": 8.324328671704258e-06, "loss": 0.4009, "step": 3070 }, { "epoch": 1.0253756260434057, "grad_norm": 0.9118230365013947, "learning_rate": 8.322877361525239e-06, "loss": 0.3968, "step": 3071 }, { "epoch": 1.0257095158597662, "grad_norm": 0.9431326300475298, "learning_rate": 8.321425549758865e-06, "loss": 0.4127, "step": 3072 }, { "epoch": 1.026043405676127, "grad_norm": 0.8795118670129329, "learning_rate": 8.31997323662429e-06, "loss": 0.3854, "step": 3073 }, { "epoch": 1.0263772954924875, "grad_norm": 0.9237446001320372, "learning_rate": 8.318520422340734e-06, "loss": 0.4091, "step": 3074 }, { "epoch": 1.026711185308848, "grad_norm": 0.9309540199904498, "learning_rate": 8.317067107127506e-06, "loss": 0.4119, "step": 3075 }, { "epoch": 1.0270450751252087, "grad_norm": 0.8894299088108442, "learning_rate": 8.315613291203977e-06, "loss": 0.37, "step": 3076 }, { "epoch": 1.0273789649415692, "grad_norm": 0.8985997660078139, "learning_rate": 8.314158974789603e-06, "loss": 0.395, "step": 3077 }, { "epoch": 1.02771285475793, "grad_norm": 0.8921121476356616, "learning_rate": 8.312704158103914e-06, "loss": 0.3849, "step": 3078 }, { "epoch": 1.0280467445742905, "grad_norm": 0.9036052443459062, "learning_rate": 8.311248841366511e-06, "loss": 0.4047, "step": 3079 }, { "epoch": 1.0283806343906512, "grad_norm": 0.8705188690836737, "learning_rate": 8.309793024797076e-06, "loss": 0.4081, "step": 3080 }, { "epoch": 1.0287145242070117, "grad_norm": 0.847697324091338, "learning_rate": 8.308336708615363e-06, "loss": 0.3803, "step": 3081 }, { "epoch": 1.0290484140233722, "grad_norm": 0.8462856013682339, "learning_rate": 8.306879893041205e-06, "loss": 0.372, "step": 3082 }, { "epoch": 1.029382303839733, "grad_norm": 0.8827153039136337, "learning_rate": 8.305422578294505e-06, "loss": 0.3951, "step": 3083 }, { "epoch": 1.0297161936560935, "grad_norm": 0.8623931341216658, "learning_rate": 8.303964764595242e-06, "loss": 0.3939, "step": 3084 }, { "epoch": 1.0300500834724542, "grad_norm": 0.9039094714774385, "learning_rate": 8.302506452163479e-06, "loss": 0.4064, "step": 3085 }, { "epoch": 1.0303839732888147, "grad_norm": 0.9510171220283717, "learning_rate": 8.301047641219343e-06, "loss": 0.4006, "step": 3086 }, { "epoch": 1.0307178631051752, "grad_norm": 0.8640806649896802, "learning_rate": 8.299588331983042e-06, "loss": 0.3964, "step": 3087 }, { "epoch": 1.031051752921536, "grad_norm": 0.8857776466923751, "learning_rate": 8.29812852467486e-06, "loss": 0.3936, "step": 3088 }, { "epoch": 1.0313856427378965, "grad_norm": 0.907365872967301, "learning_rate": 8.296668219515152e-06, "loss": 0.4159, "step": 3089 }, { "epoch": 1.0317195325542572, "grad_norm": 0.8642778705696517, "learning_rate": 8.295207416724354e-06, "loss": 0.3807, "step": 3090 }, { "epoch": 1.0320534223706177, "grad_norm": 0.8797862682904953, "learning_rate": 8.293746116522969e-06, "loss": 0.3873, "step": 3091 }, { "epoch": 1.0323873121869782, "grad_norm": 1.051092299768401, "learning_rate": 8.292284319131584e-06, "loss": 0.4073, "step": 3092 }, { "epoch": 1.032721202003339, "grad_norm": 0.9336871935463733, "learning_rate": 8.290822024770856e-06, "loss": 0.4098, "step": 3093 }, { "epoch": 1.0330550918196995, "grad_norm": 0.8619936579599397, "learning_rate": 8.289359233661515e-06, "loss": 0.3942, "step": 3094 }, { "epoch": 1.0333889816360602, "grad_norm": 0.9101503754448582, "learning_rate": 8.287895946024372e-06, "loss": 0.4017, "step": 3095 }, { "epoch": 1.0337228714524207, "grad_norm": 0.9185106845853932, "learning_rate": 8.286432162080308e-06, "loss": 0.4084, "step": 3096 }, { "epoch": 1.0340567612687812, "grad_norm": 0.8960235033587034, "learning_rate": 8.284967882050283e-06, "loss": 0.3951, "step": 3097 }, { "epoch": 1.034390651085142, "grad_norm": 0.8669471236944498, "learning_rate": 8.283503106155328e-06, "loss": 0.3802, "step": 3098 }, { "epoch": 1.0347245409015025, "grad_norm": 0.8633670849925584, "learning_rate": 8.282037834616551e-06, "loss": 0.3764, "step": 3099 }, { "epoch": 1.0350584307178632, "grad_norm": 0.8905140114342419, "learning_rate": 8.280572067655135e-06, "loss": 0.3965, "step": 3100 }, { "epoch": 1.0353923205342237, "grad_norm": 0.8967896633204501, "learning_rate": 8.279105805492336e-06, "loss": 0.3967, "step": 3101 }, { "epoch": 1.0357262103505842, "grad_norm": 0.9330128974963361, "learning_rate": 8.277639048349484e-06, "loss": 0.4118, "step": 3102 }, { "epoch": 1.036060100166945, "grad_norm": 0.8611138371790268, "learning_rate": 8.27617179644799e-06, "loss": 0.3928, "step": 3103 }, { "epoch": 1.0363939899833055, "grad_norm": 0.9294821786832684, "learning_rate": 8.274704050009332e-06, "loss": 0.4172, "step": 3104 }, { "epoch": 1.0367278797996662, "grad_norm": 0.9199379491940507, "learning_rate": 8.273235809255067e-06, "loss": 0.4007, "step": 3105 }, { "epoch": 1.0370617696160267, "grad_norm": 0.8585889446637955, "learning_rate": 8.271767074406824e-06, "loss": 0.3822, "step": 3106 }, { "epoch": 1.0373956594323872, "grad_norm": 0.8586138948443255, "learning_rate": 8.27029784568631e-06, "loss": 0.3973, "step": 3107 }, { "epoch": 1.037729549248748, "grad_norm": 0.8686791680277933, "learning_rate": 8.268828123315305e-06, "loss": 0.4035, "step": 3108 }, { "epoch": 1.0380634390651085, "grad_norm": 0.8672447222253389, "learning_rate": 8.267357907515662e-06, "loss": 0.3839, "step": 3109 }, { "epoch": 1.0383973288814692, "grad_norm": 0.8485263386080519, "learning_rate": 8.265887198509308e-06, "loss": 0.3923, "step": 3110 }, { "epoch": 1.0387312186978297, "grad_norm": 0.8960257174001259, "learning_rate": 8.26441599651825e-06, "loss": 0.418, "step": 3111 }, { "epoch": 1.0390651085141904, "grad_norm": 0.9028117358371174, "learning_rate": 8.262944301764561e-06, "loss": 0.3965, "step": 3112 }, { "epoch": 1.039398998330551, "grad_norm": 0.879392632548917, "learning_rate": 8.261472114470398e-06, "loss": 0.3824, "step": 3113 }, { "epoch": 1.0397328881469114, "grad_norm": 0.928383289615137, "learning_rate": 8.259999434857983e-06, "loss": 0.3992, "step": 3114 }, { "epoch": 1.0400667779632722, "grad_norm": 0.918637706550834, "learning_rate": 8.258526263149618e-06, "loss": 0.3907, "step": 3115 }, { "epoch": 1.0404006677796327, "grad_norm": 0.9171988964939671, "learning_rate": 8.257052599567679e-06, "loss": 0.4149, "step": 3116 }, { "epoch": 1.0407345575959934, "grad_norm": 0.9195392794641568, "learning_rate": 8.255578444334613e-06, "loss": 0.4078, "step": 3117 }, { "epoch": 1.041068447412354, "grad_norm": 0.9078728513026286, "learning_rate": 8.254103797672944e-06, "loss": 0.3778, "step": 3118 }, { "epoch": 1.0414023372287144, "grad_norm": 0.9310823402426939, "learning_rate": 8.25262865980527e-06, "loss": 0.4047, "step": 3119 }, { "epoch": 1.0417362270450752, "grad_norm": 0.8561226951397582, "learning_rate": 8.251153030954263e-06, "loss": 0.3771, "step": 3120 }, { "epoch": 1.0420701168614357, "grad_norm": 0.9377595169338342, "learning_rate": 8.249676911342668e-06, "loss": 0.4162, "step": 3121 }, { "epoch": 1.0424040066777964, "grad_norm": 0.9593920134005892, "learning_rate": 8.248200301193305e-06, "loss": 0.3984, "step": 3122 }, { "epoch": 1.042737896494157, "grad_norm": 0.8910132801383792, "learning_rate": 8.246723200729068e-06, "loss": 0.3949, "step": 3123 }, { "epoch": 1.0430717863105174, "grad_norm": 0.877965200558971, "learning_rate": 8.245245610172923e-06, "loss": 0.3825, "step": 3124 }, { "epoch": 1.0434056761268782, "grad_norm": 0.8713582109800347, "learning_rate": 8.243767529747914e-06, "loss": 0.3932, "step": 3125 }, { "epoch": 1.0437395659432387, "grad_norm": 0.8408396584499739, "learning_rate": 8.242288959677158e-06, "loss": 0.3762, "step": 3126 }, { "epoch": 1.0440734557595994, "grad_norm": 0.9051712038781157, "learning_rate": 8.240809900183842e-06, "loss": 0.394, "step": 3127 }, { "epoch": 1.04440734557596, "grad_norm": 0.8577325996570899, "learning_rate": 8.23933035149123e-06, "loss": 0.3864, "step": 3128 }, { "epoch": 1.0447412353923204, "grad_norm": 0.8892209925241082, "learning_rate": 8.23785031382266e-06, "loss": 0.3941, "step": 3129 }, { "epoch": 1.0450751252086812, "grad_norm": 0.8959552179777884, "learning_rate": 8.236369787401545e-06, "loss": 0.3837, "step": 3130 }, { "epoch": 1.0454090150250417, "grad_norm": 0.8513077631051694, "learning_rate": 8.234888772451366e-06, "loss": 0.3734, "step": 3131 }, { "epoch": 1.0457429048414024, "grad_norm": 0.9103302985325497, "learning_rate": 8.233407269195687e-06, "loss": 0.403, "step": 3132 }, { "epoch": 1.046076794657763, "grad_norm": 0.85798145783765, "learning_rate": 8.231925277858135e-06, "loss": 0.3923, "step": 3133 }, { "epoch": 1.0464106844741234, "grad_norm": 0.8731528978211633, "learning_rate": 8.230442798662421e-06, "loss": 0.3907, "step": 3134 }, { "epoch": 1.0467445742904842, "grad_norm": 0.9006902114151525, "learning_rate": 8.22895983183232e-06, "loss": 0.3949, "step": 3135 }, { "epoch": 1.0470784641068447, "grad_norm": 0.8917716037759945, "learning_rate": 8.22747637759169e-06, "loss": 0.3843, "step": 3136 }, { "epoch": 1.0474123539232054, "grad_norm": 0.8670828851624409, "learning_rate": 8.225992436164455e-06, "loss": 0.3715, "step": 3137 }, { "epoch": 1.047746243739566, "grad_norm": 0.8605790289829728, "learning_rate": 8.224508007774616e-06, "loss": 0.3833, "step": 3138 }, { "epoch": 1.0480801335559264, "grad_norm": 0.900129430385847, "learning_rate": 8.22302309264625e-06, "loss": 0.393, "step": 3139 }, { "epoch": 1.0484140233722872, "grad_norm": 0.9079768080038657, "learning_rate": 8.221537691003499e-06, "loss": 0.3835, "step": 3140 }, { "epoch": 1.0487479131886477, "grad_norm": 0.8802125268653678, "learning_rate": 8.220051803070588e-06, "loss": 0.3733, "step": 3141 }, { "epoch": 1.0490818030050084, "grad_norm": 0.9242446159654144, "learning_rate": 8.218565429071809e-06, "loss": 0.3977, "step": 3142 }, { "epoch": 1.049415692821369, "grad_norm": 0.847776707694496, "learning_rate": 8.217078569231534e-06, "loss": 0.3741, "step": 3143 }, { "epoch": 1.0497495826377294, "grad_norm": 0.9088917253303966, "learning_rate": 8.215591223774199e-06, "loss": 0.4018, "step": 3144 }, { "epoch": 1.0500834724540902, "grad_norm": 0.8833623158175578, "learning_rate": 8.21410339292432e-06, "loss": 0.3805, "step": 3145 }, { "epoch": 1.0504173622704507, "grad_norm": 0.8979171513638683, "learning_rate": 8.212615076906486e-06, "loss": 0.4015, "step": 3146 }, { "epoch": 1.0507512520868114, "grad_norm": 0.8881263872062358, "learning_rate": 8.211126275945356e-06, "loss": 0.3911, "step": 3147 }, { "epoch": 1.051085141903172, "grad_norm": 0.8824826993042976, "learning_rate": 8.209636990265667e-06, "loss": 0.3953, "step": 3148 }, { "epoch": 1.0514190317195327, "grad_norm": 0.8602926467834873, "learning_rate": 8.208147220092222e-06, "loss": 0.382, "step": 3149 }, { "epoch": 1.0517529215358932, "grad_norm": 0.8995903258678679, "learning_rate": 8.206656965649903e-06, "loss": 0.3996, "step": 3150 }, { "epoch": 1.0520868113522537, "grad_norm": 0.9592675157138439, "learning_rate": 8.205166227163665e-06, "loss": 0.4059, "step": 3151 }, { "epoch": 1.0524207011686144, "grad_norm": 0.8650193955110482, "learning_rate": 8.203675004858536e-06, "loss": 0.3724, "step": 3152 }, { "epoch": 1.052754590984975, "grad_norm": 0.8839983508804412, "learning_rate": 8.202183298959609e-06, "loss": 0.393, "step": 3153 }, { "epoch": 1.0530884808013357, "grad_norm": 0.8842137440374771, "learning_rate": 8.200691109692062e-06, "loss": 0.4023, "step": 3154 }, { "epoch": 1.0534223706176962, "grad_norm": 0.8793038058089941, "learning_rate": 8.19919843728114e-06, "loss": 0.3866, "step": 3155 }, { "epoch": 1.0537562604340567, "grad_norm": 0.8791769276997273, "learning_rate": 8.19770528195216e-06, "loss": 0.3926, "step": 3156 }, { "epoch": 1.0540901502504174, "grad_norm": 0.8838237414779386, "learning_rate": 8.196211643930514e-06, "loss": 0.3679, "step": 3157 }, { "epoch": 1.054424040066778, "grad_norm": 0.8822198394644679, "learning_rate": 8.194717523441666e-06, "loss": 0.3927, "step": 3158 }, { "epoch": 1.0547579298831387, "grad_norm": 0.859854788908128, "learning_rate": 8.193222920711156e-06, "loss": 0.3719, "step": 3159 }, { "epoch": 1.0550918196994992, "grad_norm": 0.8797764294523429, "learning_rate": 8.191727835964589e-06, "loss": 0.3716, "step": 3160 }, { "epoch": 1.0554257095158597, "grad_norm": 0.8776277551752107, "learning_rate": 8.19023226942765e-06, "loss": 0.379, "step": 3161 }, { "epoch": 1.0557595993322204, "grad_norm": 0.902189259649189, "learning_rate": 8.188736221326092e-06, "loss": 0.3962, "step": 3162 }, { "epoch": 1.056093489148581, "grad_norm": 0.8916153954356842, "learning_rate": 8.187239691885746e-06, "loss": 0.3792, "step": 3163 }, { "epoch": 1.0564273789649417, "grad_norm": 0.9046986256506887, "learning_rate": 8.185742681332513e-06, "loss": 0.409, "step": 3164 }, { "epoch": 1.0567612687813022, "grad_norm": 0.9020696525421477, "learning_rate": 8.184245189892366e-06, "loss": 0.401, "step": 3165 }, { "epoch": 1.0570951585976627, "grad_norm": 0.8571256074687121, "learning_rate": 8.182747217791347e-06, "loss": 0.3728, "step": 3166 }, { "epoch": 1.0574290484140234, "grad_norm": 0.8788806310472191, "learning_rate": 8.181248765255579e-06, "loss": 0.3899, "step": 3167 }, { "epoch": 1.057762938230384, "grad_norm": 0.8597463324797313, "learning_rate": 8.17974983251125e-06, "loss": 0.3778, "step": 3168 }, { "epoch": 1.0580968280467447, "grad_norm": 0.8788192963127184, "learning_rate": 8.178250419784627e-06, "loss": 0.3954, "step": 3169 }, { "epoch": 1.0584307178631052, "grad_norm": 0.9121975524727213, "learning_rate": 8.176750527302043e-06, "loss": 0.407, "step": 3170 }, { "epoch": 1.0587646076794657, "grad_norm": 0.8868923780597968, "learning_rate": 8.175250155289906e-06, "loss": 0.3802, "step": 3171 }, { "epoch": 1.0590984974958264, "grad_norm": 0.8766036355862518, "learning_rate": 8.173749303974697e-06, "loss": 0.3973, "step": 3172 }, { "epoch": 1.059432387312187, "grad_norm": 0.8767333934354954, "learning_rate": 8.172247973582971e-06, "loss": 0.4003, "step": 3173 }, { "epoch": 1.0597662771285477, "grad_norm": 0.8890324282055533, "learning_rate": 8.170746164341351e-06, "loss": 0.3984, "step": 3174 }, { "epoch": 1.0601001669449082, "grad_norm": 0.8626627882976675, "learning_rate": 8.16924387647654e-06, "loss": 0.3971, "step": 3175 }, { "epoch": 1.060434056761269, "grad_norm": 0.9173157610433953, "learning_rate": 8.167741110215299e-06, "loss": 0.4014, "step": 3176 }, { "epoch": 1.0607679465776294, "grad_norm": 0.9310869220717732, "learning_rate": 8.166237865784477e-06, "loss": 0.3837, "step": 3177 }, { "epoch": 1.06110183639399, "grad_norm": 0.8923902959781792, "learning_rate": 8.164734143410987e-06, "loss": 0.3791, "step": 3178 }, { "epoch": 1.0614357262103506, "grad_norm": 0.92622887709934, "learning_rate": 8.163229943321814e-06, "loss": 0.3905, "step": 3179 }, { "epoch": 1.0617696160267112, "grad_norm": 0.8998871591837295, "learning_rate": 8.161725265744016e-06, "loss": 0.3672, "step": 3180 }, { "epoch": 1.062103505843072, "grad_norm": 0.9363180278793608, "learning_rate": 8.160220110904724e-06, "loss": 0.3932, "step": 3181 }, { "epoch": 1.0624373956594324, "grad_norm": 0.931289392652576, "learning_rate": 8.158714479031142e-06, "loss": 0.396, "step": 3182 }, { "epoch": 1.062771285475793, "grad_norm": 0.8547779799222022, "learning_rate": 8.157208370350547e-06, "loss": 0.3752, "step": 3183 }, { "epoch": 1.0631051752921536, "grad_norm": 0.8578123449038728, "learning_rate": 8.155701785090282e-06, "loss": 0.3801, "step": 3184 }, { "epoch": 1.0634390651085142, "grad_norm": 0.8911626368515142, "learning_rate": 8.154194723477763e-06, "loss": 0.3817, "step": 3185 }, { "epoch": 1.063772954924875, "grad_norm": 0.8841347195376554, "learning_rate": 8.152687185740487e-06, "loss": 0.3944, "step": 3186 }, { "epoch": 1.0641068447412354, "grad_norm": 0.9318434282100492, "learning_rate": 8.151179172106012e-06, "loss": 0.3929, "step": 3187 }, { "epoch": 1.064440734557596, "grad_norm": 0.8656718918051046, "learning_rate": 8.149670682801975e-06, "loss": 0.383, "step": 3188 }, { "epoch": 1.0647746243739566, "grad_norm": 0.9553345512385568, "learning_rate": 8.148161718056078e-06, "loss": 0.3987, "step": 3189 }, { "epoch": 1.0651085141903172, "grad_norm": 0.9159139999467111, "learning_rate": 8.146652278096104e-06, "loss": 0.3904, "step": 3190 }, { "epoch": 1.065442404006678, "grad_norm": 0.8649249502177451, "learning_rate": 8.145142363149898e-06, "loss": 0.3878, "step": 3191 }, { "epoch": 1.0657762938230384, "grad_norm": 0.8663615475291239, "learning_rate": 8.143631973445385e-06, "loss": 0.3835, "step": 3192 }, { "epoch": 1.066110183639399, "grad_norm": 0.8828729480283173, "learning_rate": 8.142121109210554e-06, "loss": 0.3703, "step": 3193 }, { "epoch": 1.0664440734557596, "grad_norm": 0.90773311618947, "learning_rate": 8.140609770673472e-06, "loss": 0.3927, "step": 3194 }, { "epoch": 1.0667779632721202, "grad_norm": 0.8543861316785322, "learning_rate": 8.139097958062275e-06, "loss": 0.3766, "step": 3195 }, { "epoch": 1.0671118530884809, "grad_norm": 0.9379840167193493, "learning_rate": 8.137585671605169e-06, "loss": 0.3948, "step": 3196 }, { "epoch": 1.0674457429048414, "grad_norm": 0.9066003459617071, "learning_rate": 8.136072911530435e-06, "loss": 0.388, "step": 3197 }, { "epoch": 1.067779632721202, "grad_norm": 0.9020851397538543, "learning_rate": 8.134559678066423e-06, "loss": 0.3846, "step": 3198 }, { "epoch": 1.0681135225375626, "grad_norm": 0.9021313423769842, "learning_rate": 8.133045971441555e-06, "loss": 0.3819, "step": 3199 }, { "epoch": 1.0684474123539232, "grad_norm": 0.961285827797377, "learning_rate": 8.131531791884326e-06, "loss": 0.4088, "step": 3200 }, { "epoch": 1.0687813021702839, "grad_norm": 0.8883558367989787, "learning_rate": 8.130017139623298e-06, "loss": 0.3747, "step": 3201 }, { "epoch": 1.0691151919866444, "grad_norm": 0.8795046790425848, "learning_rate": 8.128502014887111e-06, "loss": 0.3792, "step": 3202 }, { "epoch": 1.069449081803005, "grad_norm": 0.8321825018490889, "learning_rate": 8.12698641790447e-06, "loss": 0.3797, "step": 3203 }, { "epoch": 1.0697829716193656, "grad_norm": 0.8613351876932255, "learning_rate": 8.125470348904153e-06, "loss": 0.3942, "step": 3204 }, { "epoch": 1.0701168614357262, "grad_norm": 0.8952855335290385, "learning_rate": 8.123953808115014e-06, "loss": 0.3983, "step": 3205 }, { "epoch": 1.0704507512520869, "grad_norm": 0.9301039563176334, "learning_rate": 8.122436795765972e-06, "loss": 0.4125, "step": 3206 }, { "epoch": 1.0707846410684474, "grad_norm": 0.8679534334686382, "learning_rate": 8.120919312086019e-06, "loss": 0.4027, "step": 3207 }, { "epoch": 1.071118530884808, "grad_norm": 0.8581405550222333, "learning_rate": 8.11940135730422e-06, "loss": 0.3919, "step": 3208 }, { "epoch": 1.0714524207011686, "grad_norm": 0.8535338166691963, "learning_rate": 8.117882931649707e-06, "loss": 0.3761, "step": 3209 }, { "epoch": 1.0717863105175292, "grad_norm": 0.8688907950469874, "learning_rate": 8.116364035351691e-06, "loss": 0.3859, "step": 3210 }, { "epoch": 1.0721202003338899, "grad_norm": 0.9001058413904593, "learning_rate": 8.114844668639446e-06, "loss": 0.3956, "step": 3211 }, { "epoch": 1.0724540901502504, "grad_norm": 0.9066258906372988, "learning_rate": 8.113324831742318e-06, "loss": 0.4, "step": 3212 }, { "epoch": 1.072787979966611, "grad_norm": 0.8969215645447128, "learning_rate": 8.11180452488973e-06, "loss": 0.3928, "step": 3213 }, { "epoch": 1.0731218697829716, "grad_norm": 0.9400055181416533, "learning_rate": 8.110283748311167e-06, "loss": 0.4038, "step": 3214 }, { "epoch": 1.0734557595993321, "grad_norm": 0.8802665656772269, "learning_rate": 8.108762502236196e-06, "loss": 0.3818, "step": 3215 }, { "epoch": 1.0737896494156929, "grad_norm": 0.8669834452255005, "learning_rate": 8.107240786894443e-06, "loss": 0.3722, "step": 3216 }, { "epoch": 1.0741235392320534, "grad_norm": 0.8963277882355981, "learning_rate": 8.105718602515615e-06, "loss": 0.3957, "step": 3217 }, { "epoch": 1.0744574290484141, "grad_norm": 0.8948695326483893, "learning_rate": 8.104195949329481e-06, "loss": 0.3902, "step": 3218 }, { "epoch": 1.0747913188647746, "grad_norm": 0.8749340231612588, "learning_rate": 8.102672827565887e-06, "loss": 0.3831, "step": 3219 }, { "epoch": 1.0751252086811351, "grad_norm": 0.8919036477073579, "learning_rate": 8.101149237454748e-06, "loss": 0.3926, "step": 3220 }, { "epoch": 1.0754590984974959, "grad_norm": 0.8561343982069518, "learning_rate": 8.099625179226052e-06, "loss": 0.3875, "step": 3221 }, { "epoch": 1.0757929883138564, "grad_norm": 0.9176992332616131, "learning_rate": 8.09810065310985e-06, "loss": 0.3954, "step": 3222 }, { "epoch": 1.0761268781302171, "grad_norm": 0.8785953786739455, "learning_rate": 8.096575659336272e-06, "loss": 0.3939, "step": 3223 }, { "epoch": 1.0764607679465776, "grad_norm": 0.9613901350898422, "learning_rate": 8.095050198135512e-06, "loss": 0.4195, "step": 3224 }, { "epoch": 1.0767946577629381, "grad_norm": 0.900245580368975, "learning_rate": 8.093524269737844e-06, "loss": 0.3832, "step": 3225 }, { "epoch": 1.0771285475792989, "grad_norm": 0.8897300534925999, "learning_rate": 8.091997874373602e-06, "loss": 0.402, "step": 3226 }, { "epoch": 1.0774624373956594, "grad_norm": 0.8762414266643731, "learning_rate": 8.090471012273196e-06, "loss": 0.3986, "step": 3227 }, { "epoch": 1.0777963272120201, "grad_norm": 0.8759599243240119, "learning_rate": 8.088943683667103e-06, "loss": 0.385, "step": 3228 }, { "epoch": 1.0781302170283806, "grad_norm": 0.9057233147429664, "learning_rate": 8.087415888785879e-06, "loss": 0.3926, "step": 3229 }, { "epoch": 1.0784641068447411, "grad_norm": 0.8544252064585779, "learning_rate": 8.085887627860137e-06, "loss": 0.3879, "step": 3230 }, { "epoch": 1.0787979966611019, "grad_norm": 0.8737105345356283, "learning_rate": 8.084358901120572e-06, "loss": 0.3879, "step": 3231 }, { "epoch": 1.0791318864774624, "grad_norm": 0.8537395708600019, "learning_rate": 8.082829708797945e-06, "loss": 0.3713, "step": 3232 }, { "epoch": 1.0794657762938231, "grad_norm": 0.8876010699635731, "learning_rate": 8.081300051123084e-06, "loss": 0.3796, "step": 3233 }, { "epoch": 1.0797996661101836, "grad_norm": 0.890209458249904, "learning_rate": 8.079769928326892e-06, "loss": 0.3892, "step": 3234 }, { "epoch": 1.0801335559265441, "grad_norm": 0.906796187892199, "learning_rate": 8.07823934064034e-06, "loss": 0.3973, "step": 3235 }, { "epoch": 1.0804674457429049, "grad_norm": 0.8767019081866398, "learning_rate": 8.076708288294474e-06, "loss": 0.3697, "step": 3236 }, { "epoch": 1.0808013355592654, "grad_norm": 0.889985464865374, "learning_rate": 8.0751767715204e-06, "loss": 0.3957, "step": 3237 }, { "epoch": 1.0811352253756261, "grad_norm": 0.8658498399588316, "learning_rate": 8.073644790549303e-06, "loss": 0.3892, "step": 3238 }, { "epoch": 1.0814691151919866, "grad_norm": 0.8558657194404254, "learning_rate": 8.072112345612434e-06, "loss": 0.3713, "step": 3239 }, { "epoch": 1.0818030050083474, "grad_norm": 0.926016256066877, "learning_rate": 8.070579436941115e-06, "loss": 0.3911, "step": 3240 }, { "epoch": 1.0821368948247079, "grad_norm": 0.8928541185422916, "learning_rate": 8.069046064766739e-06, "loss": 0.3926, "step": 3241 }, { "epoch": 1.0824707846410684, "grad_norm": 0.8804577586823944, "learning_rate": 8.067512229320768e-06, "loss": 0.3962, "step": 3242 }, { "epoch": 1.0828046744574291, "grad_norm": 0.9032284193377653, "learning_rate": 8.065977930834734e-06, "loss": 0.3721, "step": 3243 }, { "epoch": 1.0831385642737896, "grad_norm": 0.9612745163138661, "learning_rate": 8.064443169540239e-06, "loss": 0.4086, "step": 3244 }, { "epoch": 1.0834724540901504, "grad_norm": 0.8588135647584106, "learning_rate": 8.062907945668954e-06, "loss": 0.3761, "step": 3245 }, { "epoch": 1.0838063439065109, "grad_norm": 0.8862100537922674, "learning_rate": 8.061372259452623e-06, "loss": 0.3825, "step": 3246 }, { "epoch": 1.0841402337228714, "grad_norm": 0.8940698213552815, "learning_rate": 8.059836111123052e-06, "loss": 0.4044, "step": 3247 }, { "epoch": 1.0844741235392321, "grad_norm": 0.9200149080220738, "learning_rate": 8.058299500912128e-06, "loss": 0.3991, "step": 3248 }, { "epoch": 1.0848080133555926, "grad_norm": 0.903570071239533, "learning_rate": 8.056762429051797e-06, "loss": 0.3828, "step": 3249 }, { "epoch": 1.0851419031719534, "grad_norm": 0.9544435392350319, "learning_rate": 8.055224895774084e-06, "loss": 0.411, "step": 3250 }, { "epoch": 1.0854757929883139, "grad_norm": 0.852859892157824, "learning_rate": 8.053686901311075e-06, "loss": 0.3759, "step": 3251 }, { "epoch": 1.0858096828046744, "grad_norm": 0.964997284923933, "learning_rate": 8.052148445894933e-06, "loss": 0.3936, "step": 3252 }, { "epoch": 1.0861435726210351, "grad_norm": 0.9128373244845401, "learning_rate": 8.050609529757888e-06, "loss": 0.396, "step": 3253 }, { "epoch": 1.0864774624373956, "grad_norm": 0.8599529547613759, "learning_rate": 8.049070153132237e-06, "loss": 0.3896, "step": 3254 }, { "epoch": 1.0868113522537564, "grad_norm": 0.9159647509525665, "learning_rate": 8.047530316250346e-06, "loss": 0.3926, "step": 3255 }, { "epoch": 1.0871452420701169, "grad_norm": 0.8610037995658492, "learning_rate": 8.045990019344659e-06, "loss": 0.3935, "step": 3256 }, { "epoch": 1.0874791318864774, "grad_norm": 0.842937436919229, "learning_rate": 8.044449262647678e-06, "loss": 0.3812, "step": 3257 }, { "epoch": 1.0878130217028381, "grad_norm": 0.8418474716847071, "learning_rate": 8.042908046391982e-06, "loss": 0.3672, "step": 3258 }, { "epoch": 1.0881469115191986, "grad_norm": 1.0519209242031085, "learning_rate": 8.041366370810217e-06, "loss": 0.4086, "step": 3259 }, { "epoch": 1.0884808013355594, "grad_norm": 0.8951925334112902, "learning_rate": 8.039824236135099e-06, "loss": 0.3868, "step": 3260 }, { "epoch": 1.0888146911519199, "grad_norm": 0.9210436989189595, "learning_rate": 8.03828164259941e-06, "loss": 0.4043, "step": 3261 }, { "epoch": 1.0891485809682804, "grad_norm": 0.898430400603667, "learning_rate": 8.036738590436008e-06, "loss": 0.3752, "step": 3262 }, { "epoch": 1.089482470784641, "grad_norm": 0.8908686026765505, "learning_rate": 8.035195079877813e-06, "loss": 0.3763, "step": 3263 }, { "epoch": 1.0898163606010016, "grad_norm": 0.9390690059956107, "learning_rate": 8.033651111157821e-06, "loss": 0.3961, "step": 3264 }, { "epoch": 1.0901502504173624, "grad_norm": 0.8762506828622931, "learning_rate": 8.03210668450909e-06, "loss": 0.3894, "step": 3265 }, { "epoch": 1.0904841402337229, "grad_norm": 0.8535213678771666, "learning_rate": 8.030561800164753e-06, "loss": 0.375, "step": 3266 }, { "epoch": 1.0908180300500834, "grad_norm": 0.8703211507424985, "learning_rate": 8.02901645835801e-06, "loss": 0.3855, "step": 3267 }, { "epoch": 1.091151919866444, "grad_norm": 0.943944226142991, "learning_rate": 8.027470659322127e-06, "loss": 0.3973, "step": 3268 }, { "epoch": 1.0914858096828046, "grad_norm": 0.8927250024190441, "learning_rate": 8.025924403290446e-06, "loss": 0.39, "step": 3269 }, { "epoch": 1.0918196994991654, "grad_norm": 0.908250052227963, "learning_rate": 8.02437769049637e-06, "loss": 0.3902, "step": 3270 }, { "epoch": 1.0921535893155259, "grad_norm": 0.9058335020854832, "learning_rate": 8.022830521173376e-06, "loss": 0.3828, "step": 3271 }, { "epoch": 1.0924874791318864, "grad_norm": 0.8977629897804512, "learning_rate": 8.021282895555012e-06, "loss": 0.3894, "step": 3272 }, { "epoch": 1.092821368948247, "grad_norm": 0.9208836924360758, "learning_rate": 8.019734813874889e-06, "loss": 0.4072, "step": 3273 }, { "epoch": 1.0931552587646076, "grad_norm": 0.9751771639349803, "learning_rate": 8.018186276366687e-06, "loss": 0.4003, "step": 3274 }, { "epoch": 1.0934891485809684, "grad_norm": 0.9454737011405774, "learning_rate": 8.016637283264162e-06, "loss": 0.3963, "step": 3275 }, { "epoch": 1.0938230383973289, "grad_norm": 0.8864620819134884, "learning_rate": 8.01508783480113e-06, "loss": 0.3976, "step": 3276 }, { "epoch": 1.0941569282136894, "grad_norm": 0.9407976562359657, "learning_rate": 8.013537931211483e-06, "loss": 0.4077, "step": 3277 }, { "epoch": 1.09449081803005, "grad_norm": 0.919206353336701, "learning_rate": 8.011987572729176e-06, "loss": 0.3909, "step": 3278 }, { "epoch": 1.0948247078464106, "grad_norm": 0.9305252693439695, "learning_rate": 8.010436759588238e-06, "loss": 0.4074, "step": 3279 }, { "epoch": 1.0951585976627713, "grad_norm": 0.885815662555408, "learning_rate": 8.00888549202276e-06, "loss": 0.3933, "step": 3280 }, { "epoch": 1.0954924874791319, "grad_norm": 0.8758783081358801, "learning_rate": 8.00733377026691e-06, "loss": 0.3871, "step": 3281 }, { "epoch": 1.0958263772954924, "grad_norm": 0.9029710128021723, "learning_rate": 8.005781594554913e-06, "loss": 0.3658, "step": 3282 }, { "epoch": 1.096160267111853, "grad_norm": 0.854238977298581, "learning_rate": 8.004228965121077e-06, "loss": 0.3647, "step": 3283 }, { "epoch": 1.0964941569282136, "grad_norm": 0.8717563172118248, "learning_rate": 8.002675882199767e-06, "loss": 0.3842, "step": 3284 }, { "epoch": 1.0968280467445743, "grad_norm": 0.8736200839608905, "learning_rate": 8.001122346025423e-06, "loss": 0.395, "step": 3285 }, { "epoch": 1.0971619365609349, "grad_norm": 0.8960848184099974, "learning_rate": 7.999568356832546e-06, "loss": 0.3773, "step": 3286 }, { "epoch": 1.0974958263772956, "grad_norm": 0.9698708057567024, "learning_rate": 7.998013914855715e-06, "loss": 0.4232, "step": 3287 }, { "epoch": 1.097829716193656, "grad_norm": 0.909394046575844, "learning_rate": 7.99645902032957e-06, "loss": 0.3904, "step": 3288 }, { "epoch": 1.0981636060100166, "grad_norm": 0.9152510477827754, "learning_rate": 7.994903673488822e-06, "loss": 0.3975, "step": 3289 }, { "epoch": 1.0984974958263773, "grad_norm": 0.8976382578547708, "learning_rate": 7.993347874568254e-06, "loss": 0.3775, "step": 3290 }, { "epoch": 1.0988313856427379, "grad_norm": 0.8949418158757023, "learning_rate": 7.991791623802707e-06, "loss": 0.3812, "step": 3291 }, { "epoch": 1.0991652754590986, "grad_norm": 0.9225934281446169, "learning_rate": 7.9902349214271e-06, "loss": 0.3871, "step": 3292 }, { "epoch": 1.099499165275459, "grad_norm": 0.9145805970669288, "learning_rate": 7.988677767676418e-06, "loss": 0.4021, "step": 3293 }, { "epoch": 1.0998330550918196, "grad_norm": 0.9058316569739226, "learning_rate": 7.98712016278571e-06, "loss": 0.3899, "step": 3294 }, { "epoch": 1.1001669449081803, "grad_norm": 0.8878165502090992, "learning_rate": 7.985562106990098e-06, "loss": 0.3862, "step": 3295 }, { "epoch": 1.1005008347245409, "grad_norm": 0.9215465343797, "learning_rate": 7.98400360052477e-06, "loss": 0.412, "step": 3296 }, { "epoch": 1.1008347245409016, "grad_norm": 0.9147924189348714, "learning_rate": 7.982444643624982e-06, "loss": 0.3947, "step": 3297 }, { "epoch": 1.101168614357262, "grad_norm": 0.8737345960372848, "learning_rate": 7.980885236526055e-06, "loss": 0.3877, "step": 3298 }, { "epoch": 1.1015025041736226, "grad_norm": 0.8879574800759141, "learning_rate": 7.979325379463388e-06, "loss": 0.3864, "step": 3299 }, { "epoch": 1.1018363939899833, "grad_norm": 0.8733301728282288, "learning_rate": 7.977765072672433e-06, "loss": 0.3952, "step": 3300 }, { "epoch": 1.1021702838063439, "grad_norm": 0.8785117385134673, "learning_rate": 7.976204316388723e-06, "loss": 0.3833, "step": 3301 }, { "epoch": 1.1025041736227046, "grad_norm": 0.8546207074063189, "learning_rate": 7.97464311084785e-06, "loss": 0.3895, "step": 3302 }, { "epoch": 1.102838063439065, "grad_norm": 0.9150801127493177, "learning_rate": 7.973081456285483e-06, "loss": 0.4038, "step": 3303 }, { "epoch": 1.1031719532554256, "grad_norm": 0.8926014614718639, "learning_rate": 7.971519352937348e-06, "loss": 0.3857, "step": 3304 }, { "epoch": 1.1035058430717863, "grad_norm": 0.8930043825085774, "learning_rate": 7.969956801039248e-06, "loss": 0.3806, "step": 3305 }, { "epoch": 1.1038397328881469, "grad_norm": 0.9417384188300841, "learning_rate": 7.968393800827045e-06, "loss": 0.41, "step": 3306 }, { "epoch": 1.1041736227045076, "grad_norm": 0.9026075292483013, "learning_rate": 7.966830352536678e-06, "loss": 0.4089, "step": 3307 }, { "epoch": 1.104507512520868, "grad_norm": 0.921536859195394, "learning_rate": 7.965266456404147e-06, "loss": 0.4056, "step": 3308 }, { "epoch": 1.1048414023372288, "grad_norm": 0.9115516328732307, "learning_rate": 7.963702112665523e-06, "loss": 0.3868, "step": 3309 }, { "epoch": 1.1051752921535893, "grad_norm": 0.8496154366872113, "learning_rate": 7.962137321556942e-06, "loss": 0.3874, "step": 3310 }, { "epoch": 1.1055091819699499, "grad_norm": 0.882931787440021, "learning_rate": 7.960572083314606e-06, "loss": 0.3886, "step": 3311 }, { "epoch": 1.1058430717863106, "grad_norm": 0.8778157649070525, "learning_rate": 7.959006398174794e-06, "loss": 0.3914, "step": 3312 }, { "epoch": 1.106176961602671, "grad_norm": 0.8632678953027152, "learning_rate": 7.95744026637384e-06, "loss": 0.3903, "step": 3313 }, { "epoch": 1.1065108514190318, "grad_norm": 0.8976025552004778, "learning_rate": 7.955873688148153e-06, "loss": 0.3945, "step": 3314 }, { "epoch": 1.1068447412353923, "grad_norm": 0.850891520681039, "learning_rate": 7.954306663734205e-06, "loss": 0.3624, "step": 3315 }, { "epoch": 1.1071786310517528, "grad_norm": 0.8536574269638779, "learning_rate": 7.952739193368544e-06, "loss": 0.3811, "step": 3316 }, { "epoch": 1.1075125208681136, "grad_norm": 0.8713823822726785, "learning_rate": 7.951171277287775e-06, "loss": 0.3838, "step": 3317 }, { "epoch": 1.107846410684474, "grad_norm": 0.9474043775930535, "learning_rate": 7.949602915728574e-06, "loss": 0.4032, "step": 3318 }, { "epoch": 1.1081803005008348, "grad_norm": 0.8969981726924782, "learning_rate": 7.948034108927687e-06, "loss": 0.4015, "step": 3319 }, { "epoch": 1.1085141903171953, "grad_norm": 0.8760722523189981, "learning_rate": 7.946464857121922e-06, "loss": 0.3731, "step": 3320 }, { "epoch": 1.1088480801335558, "grad_norm": 0.8706409748998168, "learning_rate": 7.94489516054816e-06, "loss": 0.3689, "step": 3321 }, { "epoch": 1.1091819699499166, "grad_norm": 0.8697898770971504, "learning_rate": 7.943325019443346e-06, "loss": 0.3741, "step": 3322 }, { "epoch": 1.109515859766277, "grad_norm": 0.8711732226652241, "learning_rate": 7.941754434044488e-06, "loss": 0.3723, "step": 3323 }, { "epoch": 1.1098497495826378, "grad_norm": 0.9413439766352728, "learning_rate": 7.940183404588673e-06, "loss": 0.4164, "step": 3324 }, { "epoch": 1.1101836393989983, "grad_norm": 0.864532725840221, "learning_rate": 7.938611931313041e-06, "loss": 0.382, "step": 3325 }, { "epoch": 1.1105175292153588, "grad_norm": 0.8717556976030796, "learning_rate": 7.937040014454807e-06, "loss": 0.3782, "step": 3326 }, { "epoch": 1.1108514190317196, "grad_norm": 0.897190949991112, "learning_rate": 7.935467654251252e-06, "loss": 0.3929, "step": 3327 }, { "epoch": 1.11118530884808, "grad_norm": 0.9066599570929701, "learning_rate": 7.933894850939725e-06, "loss": 0.3886, "step": 3328 }, { "epoch": 1.1115191986644408, "grad_norm": 0.9108961631467719, "learning_rate": 7.932321604757637e-06, "loss": 0.3959, "step": 3329 }, { "epoch": 1.1118530884808013, "grad_norm": 0.8846191350755419, "learning_rate": 7.930747915942471e-06, "loss": 0.3824, "step": 3330 }, { "epoch": 1.1121869782971618, "grad_norm": 0.8679526962209646, "learning_rate": 7.929173784731775e-06, "loss": 0.389, "step": 3331 }, { "epoch": 1.1125208681135226, "grad_norm": 0.8607808525774826, "learning_rate": 7.927599211363161e-06, "loss": 0.3788, "step": 3332 }, { "epoch": 1.112854757929883, "grad_norm": 0.8620184656919654, "learning_rate": 7.926024196074315e-06, "loss": 0.369, "step": 3333 }, { "epoch": 1.1131886477462438, "grad_norm": 0.8978306858573663, "learning_rate": 7.92444873910298e-06, "loss": 0.3997, "step": 3334 }, { "epoch": 1.1135225375626043, "grad_norm": 0.8875971690717467, "learning_rate": 7.922872840686975e-06, "loss": 0.3831, "step": 3335 }, { "epoch": 1.1138564273789648, "grad_norm": 0.9029970182822147, "learning_rate": 7.92129650106418e-06, "loss": 0.4001, "step": 3336 }, { "epoch": 1.1141903171953256, "grad_norm": 0.8879150045894979, "learning_rate": 7.919719720472542e-06, "loss": 0.3934, "step": 3337 }, { "epoch": 1.114524207011686, "grad_norm": 0.8786611874120868, "learning_rate": 7.918142499150076e-06, "loss": 0.3811, "step": 3338 }, { "epoch": 1.1148580968280468, "grad_norm": 0.9157095872769225, "learning_rate": 7.916564837334863e-06, "loss": 0.3774, "step": 3339 }, { "epoch": 1.1151919866444073, "grad_norm": 0.8969965982738932, "learning_rate": 7.914986735265052e-06, "loss": 0.3745, "step": 3340 }, { "epoch": 1.1155258764607678, "grad_norm": 0.9128994449647821, "learning_rate": 7.913408193178857e-06, "loss": 0.3848, "step": 3341 }, { "epoch": 1.1158597662771286, "grad_norm": 0.9012915635537314, "learning_rate": 7.911829211314555e-06, "loss": 0.3753, "step": 3342 }, { "epoch": 1.116193656093489, "grad_norm": 0.9092169578091577, "learning_rate": 7.910249789910495e-06, "loss": 0.4036, "step": 3343 }, { "epoch": 1.1165275459098498, "grad_norm": 0.9045712487260422, "learning_rate": 7.908669929205094e-06, "loss": 0.3808, "step": 3344 }, { "epoch": 1.1168614357262103, "grad_norm": 0.9427950790610413, "learning_rate": 7.907089629436823e-06, "loss": 0.4066, "step": 3345 }, { "epoch": 1.1171953255425708, "grad_norm": 0.8891218391325917, "learning_rate": 7.905508890844235e-06, "loss": 0.379, "step": 3346 }, { "epoch": 1.1175292153589316, "grad_norm": 0.8913615222414447, "learning_rate": 7.903927713665942e-06, "loss": 0.391, "step": 3347 }, { "epoch": 1.117863105175292, "grad_norm": 0.8973400577831224, "learning_rate": 7.902346098140617e-06, "loss": 0.396, "step": 3348 }, { "epoch": 1.1181969949916528, "grad_norm": 0.8669648274621712, "learning_rate": 7.90076404450701e-06, "loss": 0.3776, "step": 3349 }, { "epoch": 1.1185308848080133, "grad_norm": 0.8942881473549308, "learning_rate": 7.899181553003927e-06, "loss": 0.3943, "step": 3350 }, { "epoch": 1.118864774624374, "grad_norm": 0.8966784434171757, "learning_rate": 7.897598623870247e-06, "loss": 0.3795, "step": 3351 }, { "epoch": 1.1191986644407346, "grad_norm": 0.937515953824046, "learning_rate": 7.896015257344911e-06, "loss": 0.3807, "step": 3352 }, { "epoch": 1.119532554257095, "grad_norm": 1.0056770195220388, "learning_rate": 7.894431453666932e-06, "loss": 0.3778, "step": 3353 }, { "epoch": 1.1198664440734558, "grad_norm": 0.9329935845252656, "learning_rate": 7.892847213075378e-06, "loss": 0.4035, "step": 3354 }, { "epoch": 1.1202003338898163, "grad_norm": 0.9208956503656347, "learning_rate": 7.891262535809396e-06, "loss": 0.3692, "step": 3355 }, { "epoch": 1.120534223706177, "grad_norm": 0.8786977311693207, "learning_rate": 7.889677422108189e-06, "loss": 0.3791, "step": 3356 }, { "epoch": 1.1208681135225376, "grad_norm": 0.8503838107168304, "learning_rate": 7.88809187221103e-06, "loss": 0.372, "step": 3357 }, { "epoch": 1.121202003338898, "grad_norm": 0.8957468259338885, "learning_rate": 7.886505886357257e-06, "loss": 0.3978, "step": 3358 }, { "epoch": 1.1215358931552588, "grad_norm": 0.8508147070195511, "learning_rate": 7.884919464786275e-06, "loss": 0.3742, "step": 3359 }, { "epoch": 1.1218697829716193, "grad_norm": 0.8878882020610562, "learning_rate": 7.883332607737555e-06, "loss": 0.3721, "step": 3360 }, { "epoch": 1.12220367278798, "grad_norm": 0.9121615140004451, "learning_rate": 7.88174531545063e-06, "loss": 0.3902, "step": 3361 }, { "epoch": 1.1225375626043406, "grad_norm": 0.9263300663350341, "learning_rate": 7.880157588165103e-06, "loss": 0.3886, "step": 3362 }, { "epoch": 1.122871452420701, "grad_norm": 0.8892346926898792, "learning_rate": 7.878569426120639e-06, "loss": 0.3823, "step": 3363 }, { "epoch": 1.1232053422370618, "grad_norm": 0.8827895486199078, "learning_rate": 7.876980829556973e-06, "loss": 0.3832, "step": 3364 }, { "epoch": 1.1235392320534223, "grad_norm": 0.906270605616623, "learning_rate": 7.875391798713903e-06, "loss": 0.384, "step": 3365 }, { "epoch": 1.123873121869783, "grad_norm": 0.8821341835969027, "learning_rate": 7.873802333831291e-06, "loss": 0.3812, "step": 3366 }, { "epoch": 1.1242070116861436, "grad_norm": 0.8693238956779936, "learning_rate": 7.872212435149067e-06, "loss": 0.3918, "step": 3367 }, { "epoch": 1.124540901502504, "grad_norm": 0.8616407495196708, "learning_rate": 7.870622102907226e-06, "loss": 0.4009, "step": 3368 }, { "epoch": 1.1248747913188648, "grad_norm": 0.8407763189801958, "learning_rate": 7.869031337345828e-06, "loss": 0.373, "step": 3369 }, { "epoch": 1.1252086811352253, "grad_norm": 0.8537063970089491, "learning_rate": 7.867440138704999e-06, "loss": 0.3715, "step": 3370 }, { "epoch": 1.125542570951586, "grad_norm": 0.8703489841894991, "learning_rate": 7.86584850722493e-06, "loss": 0.393, "step": 3371 }, { "epoch": 1.1258764607679466, "grad_norm": 0.9091475529442411, "learning_rate": 7.864256443145878e-06, "loss": 0.3956, "step": 3372 }, { "epoch": 1.1262103505843073, "grad_norm": 0.9068434662856022, "learning_rate": 7.86266394670816e-06, "loss": 0.3979, "step": 3373 }, { "epoch": 1.1265442404006678, "grad_norm": 0.8909133835827635, "learning_rate": 7.86107101815217e-06, "loss": 0.3995, "step": 3374 }, { "epoch": 1.1268781302170283, "grad_norm": 0.8710599263244072, "learning_rate": 7.859477657718353e-06, "loss": 0.3824, "step": 3375 }, { "epoch": 1.127212020033389, "grad_norm": 0.9048993561930178, "learning_rate": 7.857883865647231e-06, "loss": 0.3958, "step": 3376 }, { "epoch": 1.1275459098497496, "grad_norm": 0.8698197541720978, "learning_rate": 7.856289642179387e-06, "loss": 0.3817, "step": 3377 }, { "epoch": 1.1278797996661103, "grad_norm": 0.8951332692088075, "learning_rate": 7.854694987555467e-06, "loss": 0.3893, "step": 3378 }, { "epoch": 1.1282136894824708, "grad_norm": 0.9057030074714768, "learning_rate": 7.853099902016182e-06, "loss": 0.3811, "step": 3379 }, { "epoch": 1.1285475792988313, "grad_norm": 0.9509068513727809, "learning_rate": 7.851504385802312e-06, "loss": 0.4029, "step": 3380 }, { "epoch": 1.128881469115192, "grad_norm": 0.8751302039875309, "learning_rate": 7.849908439154697e-06, "loss": 0.3903, "step": 3381 }, { "epoch": 1.1292153589315526, "grad_norm": 0.8987369320585317, "learning_rate": 7.848312062314247e-06, "loss": 0.4044, "step": 3382 }, { "epoch": 1.1295492487479133, "grad_norm": 0.8798297044372849, "learning_rate": 7.846715255521936e-06, "loss": 0.4038, "step": 3383 }, { "epoch": 1.1298831385642738, "grad_norm": 0.8241263592208597, "learning_rate": 7.845118019018798e-06, "loss": 0.3906, "step": 3384 }, { "epoch": 1.1302170283806343, "grad_norm": 0.8941643836030357, "learning_rate": 7.843520353045936e-06, "loss": 0.3969, "step": 3385 }, { "epoch": 1.130550918196995, "grad_norm": 0.8479772317202938, "learning_rate": 7.84192225784452e-06, "loss": 0.3631, "step": 3386 }, { "epoch": 1.1308848080133556, "grad_norm": 0.8772052840953287, "learning_rate": 7.84032373365578e-06, "loss": 0.3778, "step": 3387 }, { "epoch": 1.1312186978297163, "grad_norm": 0.9104483436564257, "learning_rate": 7.838724780721012e-06, "loss": 0.3973, "step": 3388 }, { "epoch": 1.1315525876460768, "grad_norm": 0.9215623688119763, "learning_rate": 7.83712539928158e-06, "loss": 0.4049, "step": 3389 }, { "epoch": 1.1318864774624373, "grad_norm": 0.8975383743654236, "learning_rate": 7.835525589578907e-06, "loss": 0.3834, "step": 3390 }, { "epoch": 1.132220367278798, "grad_norm": 0.9170678833038748, "learning_rate": 7.833925351854486e-06, "loss": 0.4197, "step": 3391 }, { "epoch": 1.1325542570951586, "grad_norm": 0.8458920905290959, "learning_rate": 7.832324686349872e-06, "loss": 0.3715, "step": 3392 }, { "epoch": 1.1328881469115193, "grad_norm": 1.1031252645231686, "learning_rate": 7.830723593306685e-06, "loss": 0.3758, "step": 3393 }, { "epoch": 1.1332220367278798, "grad_norm": 0.889307374048156, "learning_rate": 7.82912207296661e-06, "loss": 0.4051, "step": 3394 }, { "epoch": 1.1335559265442403, "grad_norm": 0.8188024094343765, "learning_rate": 7.827520125571393e-06, "loss": 0.3688, "step": 3395 }, { "epoch": 1.133889816360601, "grad_norm": 0.8486224983267039, "learning_rate": 7.825917751362852e-06, "loss": 0.3721, "step": 3396 }, { "epoch": 1.1342237061769616, "grad_norm": 0.8758789637063107, "learning_rate": 7.824314950582861e-06, "loss": 0.3993, "step": 3397 }, { "epoch": 1.1345575959933223, "grad_norm": 0.8830151464799383, "learning_rate": 7.822711723473365e-06, "loss": 0.3932, "step": 3398 }, { "epoch": 1.1348914858096828, "grad_norm": 0.883997832364035, "learning_rate": 7.82110807027637e-06, "loss": 0.4003, "step": 3399 }, { "epoch": 1.1352253756260433, "grad_norm": 0.9387103743262182, "learning_rate": 7.819503991233945e-06, "loss": 0.4024, "step": 3400 }, { "epoch": 1.135559265442404, "grad_norm": 0.8607685578828677, "learning_rate": 7.81789948658823e-06, "loss": 0.3739, "step": 3401 }, { "epoch": 1.1358931552587646, "grad_norm": 0.8916792751955052, "learning_rate": 7.816294556581418e-06, "loss": 0.398, "step": 3402 }, { "epoch": 1.1362270450751253, "grad_norm": 0.8802245218259264, "learning_rate": 7.814689201455778e-06, "loss": 0.3972, "step": 3403 }, { "epoch": 1.1365609348914858, "grad_norm": 0.8960798811304872, "learning_rate": 7.813083421453635e-06, "loss": 0.3883, "step": 3404 }, { "epoch": 1.1368948247078463, "grad_norm": 0.9014786825295702, "learning_rate": 7.811477216817385e-06, "loss": 0.3958, "step": 3405 }, { "epoch": 1.137228714524207, "grad_norm": 0.9327474072134697, "learning_rate": 7.809870587789478e-06, "loss": 0.3803, "step": 3406 }, { "epoch": 1.1375626043405676, "grad_norm": 0.8854237962901758, "learning_rate": 7.808263534612437e-06, "loss": 0.3801, "step": 3407 }, { "epoch": 1.1378964941569283, "grad_norm": 0.905049620100093, "learning_rate": 7.806656057528849e-06, "loss": 0.3966, "step": 3408 }, { "epoch": 1.1382303839732888, "grad_norm": 0.9473282102848825, "learning_rate": 7.805048156781359e-06, "loss": 0.4117, "step": 3409 }, { "epoch": 1.1385642737896493, "grad_norm": 0.8845584017938948, "learning_rate": 7.803439832612678e-06, "loss": 0.3891, "step": 3410 }, { "epoch": 1.13889816360601, "grad_norm": 0.8873460997682128, "learning_rate": 7.801831085265586e-06, "loss": 0.405, "step": 3411 }, { "epoch": 1.1392320534223705, "grad_norm": 0.8450935339766996, "learning_rate": 7.800221914982919e-06, "loss": 0.3687, "step": 3412 }, { "epoch": 1.1395659432387313, "grad_norm": 0.8700219305125408, "learning_rate": 7.798612322007584e-06, "loss": 0.391, "step": 3413 }, { "epoch": 1.1398998330550918, "grad_norm": 0.8950753406711247, "learning_rate": 7.797002306582546e-06, "loss": 0.3964, "step": 3414 }, { "epoch": 1.1402337228714523, "grad_norm": 0.9270595382481512, "learning_rate": 7.795391868950838e-06, "loss": 0.402, "step": 3415 }, { "epoch": 1.140567612687813, "grad_norm": 0.926205793733517, "learning_rate": 7.793781009355555e-06, "loss": 0.3912, "step": 3416 }, { "epoch": 1.1409015025041735, "grad_norm": 0.9155261775997389, "learning_rate": 7.792169728039856e-06, "loss": 0.387, "step": 3417 }, { "epoch": 1.1412353923205343, "grad_norm": 0.898751271748491, "learning_rate": 7.790558025246962e-06, "loss": 0.3879, "step": 3418 }, { "epoch": 1.1415692821368948, "grad_norm": 0.9205869790730605, "learning_rate": 7.78894590122016e-06, "loss": 0.3743, "step": 3419 }, { "epoch": 1.1419031719532553, "grad_norm": 0.8721159129819496, "learning_rate": 7.7873333562028e-06, "loss": 0.371, "step": 3420 }, { "epoch": 1.142237061769616, "grad_norm": 0.8893508473768338, "learning_rate": 7.785720390438294e-06, "loss": 0.3706, "step": 3421 }, { "epoch": 1.1425709515859765, "grad_norm": 0.9210401900794724, "learning_rate": 7.78410700417012e-06, "loss": 0.3965, "step": 3422 }, { "epoch": 1.1429048414023373, "grad_norm": 0.8976135708361815, "learning_rate": 7.782493197641819e-06, "loss": 0.3742, "step": 3423 }, { "epoch": 1.1432387312186978, "grad_norm": 0.8842218703594097, "learning_rate": 7.78087897109699e-06, "loss": 0.3919, "step": 3424 }, { "epoch": 1.1435726210350585, "grad_norm": 0.8867546738324211, "learning_rate": 7.779264324779305e-06, "loss": 0.3863, "step": 3425 }, { "epoch": 1.143906510851419, "grad_norm": 0.9081966898041095, "learning_rate": 7.777649258932494e-06, "loss": 0.3851, "step": 3426 }, { "epoch": 1.1442404006677795, "grad_norm": 0.8814613198615423, "learning_rate": 7.776033773800347e-06, "loss": 0.3826, "step": 3427 }, { "epoch": 1.1445742904841403, "grad_norm": 0.8887838569730199, "learning_rate": 7.774417869626726e-06, "loss": 0.3839, "step": 3428 }, { "epoch": 1.1449081803005008, "grad_norm": 0.868483937808584, "learning_rate": 7.772801546655546e-06, "loss": 0.3777, "step": 3429 }, { "epoch": 1.1452420701168615, "grad_norm": 0.9091080465027618, "learning_rate": 7.771184805130796e-06, "loss": 0.3927, "step": 3430 }, { "epoch": 1.145575959933222, "grad_norm": 0.8705714809369411, "learning_rate": 7.769567645296517e-06, "loss": 0.381, "step": 3431 }, { "epoch": 1.1459098497495825, "grad_norm": 0.8777215241538572, "learning_rate": 7.767950067396822e-06, "loss": 0.3874, "step": 3432 }, { "epoch": 1.1462437395659433, "grad_norm": 0.9392624355638086, "learning_rate": 7.766332071675884e-06, "loss": 0.389, "step": 3433 }, { "epoch": 1.1465776293823038, "grad_norm": 0.8947736897747212, "learning_rate": 7.764713658377938e-06, "loss": 0.3852, "step": 3434 }, { "epoch": 1.1469115191986645, "grad_norm": 0.889515449956335, "learning_rate": 7.763094827747283e-06, "loss": 0.3698, "step": 3435 }, { "epoch": 1.147245409015025, "grad_norm": 0.8985986066952434, "learning_rate": 7.761475580028284e-06, "loss": 0.3937, "step": 3436 }, { "epoch": 1.1475792988313858, "grad_norm": 0.9325784790447249, "learning_rate": 7.759855915465361e-06, "loss": 0.3882, "step": 3437 }, { "epoch": 1.1479131886477463, "grad_norm": 0.8462641617668015, "learning_rate": 7.758235834303005e-06, "loss": 0.3821, "step": 3438 }, { "epoch": 1.1482470784641068, "grad_norm": 0.8662209931530238, "learning_rate": 7.756615336785767e-06, "loss": 0.3851, "step": 3439 }, { "epoch": 1.1485809682804675, "grad_norm": 0.8836343627860141, "learning_rate": 7.754994423158258e-06, "loss": 0.3844, "step": 3440 }, { "epoch": 1.148914858096828, "grad_norm": 0.9018750926635396, "learning_rate": 7.753373093665156e-06, "loss": 0.4079, "step": 3441 }, { "epoch": 1.1492487479131888, "grad_norm": 0.9136578801392132, "learning_rate": 7.751751348551202e-06, "loss": 0.396, "step": 3442 }, { "epoch": 1.1495826377295493, "grad_norm": 0.9191503960384916, "learning_rate": 7.750129188061196e-06, "loss": 0.4067, "step": 3443 }, { "epoch": 1.1499165275459098, "grad_norm": 0.9189232421893122, "learning_rate": 7.748506612440003e-06, "loss": 0.4009, "step": 3444 }, { "epoch": 1.1502504173622705, "grad_norm": 0.889132771570418, "learning_rate": 7.746883621932548e-06, "loss": 0.3906, "step": 3445 }, { "epoch": 1.150584307178631, "grad_norm": 0.9146313845787105, "learning_rate": 7.745260216783825e-06, "loss": 0.3826, "step": 3446 }, { "epoch": 1.1509181969949918, "grad_norm": 0.9142246604561794, "learning_rate": 7.743636397238886e-06, "loss": 0.4002, "step": 3447 }, { "epoch": 1.1512520868113523, "grad_norm": 0.8809009653741896, "learning_rate": 7.742012163542842e-06, "loss": 0.3725, "step": 3448 }, { "epoch": 1.1515859766277128, "grad_norm": 0.8993107126122434, "learning_rate": 7.740387515940874e-06, "loss": 0.3835, "step": 3449 }, { "epoch": 1.1519198664440735, "grad_norm": 0.9256263198872726, "learning_rate": 7.738762454678223e-06, "loss": 0.4134, "step": 3450 }, { "epoch": 1.152253756260434, "grad_norm": 0.8987596596107241, "learning_rate": 7.737136980000189e-06, "loss": 0.4018, "step": 3451 }, { "epoch": 1.1525876460767948, "grad_norm": 0.8751115335101367, "learning_rate": 7.735511092152139e-06, "loss": 0.3763, "step": 3452 }, { "epoch": 1.1529215358931553, "grad_norm": 0.9305861829984258, "learning_rate": 7.733884791379498e-06, "loss": 0.3802, "step": 3453 }, { "epoch": 1.1532554257095158, "grad_norm": 0.8881845834160168, "learning_rate": 7.732258077927757e-06, "loss": 0.3819, "step": 3454 }, { "epoch": 1.1535893155258765, "grad_norm": 0.8466396082723717, "learning_rate": 7.730630952042467e-06, "loss": 0.366, "step": 3455 }, { "epoch": 1.153923205342237, "grad_norm": 0.8412124853069138, "learning_rate": 7.729003413969243e-06, "loss": 0.3791, "step": 3456 }, { "epoch": 1.1542570951585978, "grad_norm": 0.9000387173691691, "learning_rate": 7.727375463953762e-06, "loss": 0.384, "step": 3457 }, { "epoch": 1.1545909849749583, "grad_norm": 0.9281597891076173, "learning_rate": 7.725747102241762e-06, "loss": 0.4101, "step": 3458 }, { "epoch": 1.1549248747913188, "grad_norm": 0.8715536533128948, "learning_rate": 7.724118329079043e-06, "loss": 0.3663, "step": 3459 }, { "epoch": 1.1552587646076795, "grad_norm": 0.9078059838010168, "learning_rate": 7.722489144711468e-06, "loss": 0.4068, "step": 3460 }, { "epoch": 1.15559265442404, "grad_norm": 0.9066557146374493, "learning_rate": 7.720859549384965e-06, "loss": 0.3795, "step": 3461 }, { "epoch": 1.1559265442404008, "grad_norm": 0.9213178822981042, "learning_rate": 7.719229543345515e-06, "loss": 0.396, "step": 3462 }, { "epoch": 1.1562604340567613, "grad_norm": 0.9130630649385673, "learning_rate": 7.717599126839172e-06, "loss": 0.3897, "step": 3463 }, { "epoch": 1.1565943238731218, "grad_norm": 0.8899006842012813, "learning_rate": 7.715968300112043e-06, "loss": 0.3837, "step": 3464 }, { "epoch": 1.1569282136894825, "grad_norm": 0.9111561406785196, "learning_rate": 7.714337063410304e-06, "loss": 0.3986, "step": 3465 }, { "epoch": 1.157262103505843, "grad_norm": 0.8917427109427414, "learning_rate": 7.712705416980188e-06, "loss": 0.3924, "step": 3466 }, { "epoch": 1.1575959933222038, "grad_norm": 0.9751658532423912, "learning_rate": 7.711073361067995e-06, "loss": 0.4203, "step": 3467 }, { "epoch": 1.1579298831385643, "grad_norm": 0.8315177021490936, "learning_rate": 7.709440895920078e-06, "loss": 0.357, "step": 3468 }, { "epoch": 1.1582637729549248, "grad_norm": 0.9287362717187483, "learning_rate": 7.70780802178286e-06, "loss": 0.4001, "step": 3469 }, { "epoch": 1.1585976627712855, "grad_norm": 0.930871641789955, "learning_rate": 7.706174738902822e-06, "loss": 0.3853, "step": 3470 }, { "epoch": 1.158931552587646, "grad_norm": 0.8495238255588927, "learning_rate": 7.704541047526508e-06, "loss": 0.3754, "step": 3471 }, { "epoch": 1.1592654424040068, "grad_norm": 0.8515551186762538, "learning_rate": 7.702906947900524e-06, "loss": 0.3721, "step": 3472 }, { "epoch": 1.1595993322203673, "grad_norm": 0.9503551796853156, "learning_rate": 7.701272440271534e-06, "loss": 0.4083, "step": 3473 }, { "epoch": 1.1599332220367278, "grad_norm": 0.8823737383933125, "learning_rate": 7.69963752488627e-06, "loss": 0.3748, "step": 3474 }, { "epoch": 1.1602671118530885, "grad_norm": 0.896391815978995, "learning_rate": 7.698002201991522e-06, "loss": 0.3986, "step": 3475 }, { "epoch": 1.160601001669449, "grad_norm": 0.9079508378512477, "learning_rate": 7.696366471834138e-06, "loss": 0.3997, "step": 3476 }, { "epoch": 1.1609348914858098, "grad_norm": 0.8896841396775967, "learning_rate": 7.694730334661033e-06, "loss": 0.3829, "step": 3477 }, { "epoch": 1.1612687813021703, "grad_norm": 0.9064870468335365, "learning_rate": 7.693093790719183e-06, "loss": 0.3885, "step": 3478 }, { "epoch": 1.1616026711185308, "grad_norm": 0.8909575648290576, "learning_rate": 7.691456840255622e-06, "loss": 0.382, "step": 3479 }, { "epoch": 1.1619365609348915, "grad_norm": 0.8870186347648517, "learning_rate": 7.689819483517447e-06, "loss": 0.3713, "step": 3480 }, { "epoch": 1.162270450751252, "grad_norm": 0.9186802023057073, "learning_rate": 7.688181720751817e-06, "loss": 0.3899, "step": 3481 }, { "epoch": 1.1626043405676127, "grad_norm": 0.8950677445152659, "learning_rate": 7.686543552205951e-06, "loss": 0.382, "step": 3482 }, { "epoch": 1.1629382303839733, "grad_norm": 0.8900628901560402, "learning_rate": 7.684904978127133e-06, "loss": 0.3838, "step": 3483 }, { "epoch": 1.1632721202003338, "grad_norm": 0.8795367144862432, "learning_rate": 7.6832659987627e-06, "loss": 0.3994, "step": 3484 }, { "epoch": 1.1636060100166945, "grad_norm": 0.9058241227476985, "learning_rate": 7.68162661436006e-06, "loss": 0.4128, "step": 3485 }, { "epoch": 1.163939899833055, "grad_norm": 0.8883178109225873, "learning_rate": 7.679986825166676e-06, "loss": 0.3812, "step": 3486 }, { "epoch": 1.1642737896494157, "grad_norm": 0.881243265278376, "learning_rate": 7.678346631430075e-06, "loss": 0.3653, "step": 3487 }, { "epoch": 1.1646076794657763, "grad_norm": 0.870310476337495, "learning_rate": 7.676706033397842e-06, "loss": 0.3706, "step": 3488 }, { "epoch": 1.164941569282137, "grad_norm": 0.885274266580726, "learning_rate": 7.675065031317625e-06, "loss": 0.3982, "step": 3489 }, { "epoch": 1.1652754590984975, "grad_norm": 0.8729233689496902, "learning_rate": 7.673423625437134e-06, "loss": 0.3687, "step": 3490 }, { "epoch": 1.165609348914858, "grad_norm": 0.8749012670145552, "learning_rate": 7.671781816004138e-06, "loss": 0.3854, "step": 3491 }, { "epoch": 1.1659432387312187, "grad_norm": 0.8494949275634677, "learning_rate": 7.670139603266467e-06, "loss": 0.3761, "step": 3492 }, { "epoch": 1.1662771285475793, "grad_norm": 0.8466593161421234, "learning_rate": 7.668496987472011e-06, "loss": 0.3758, "step": 3493 }, { "epoch": 1.16661101836394, "grad_norm": 0.9209681687355434, "learning_rate": 7.666853968868729e-06, "loss": 0.3901, "step": 3494 }, { "epoch": 1.1669449081803005, "grad_norm": 0.9357386670205903, "learning_rate": 7.665210547704624e-06, "loss": 0.4124, "step": 3495 }, { "epoch": 1.167278797996661, "grad_norm": 0.8690489015706933, "learning_rate": 7.66356672422778e-06, "loss": 0.3763, "step": 3496 }, { "epoch": 1.1676126878130217, "grad_norm": 0.87225694749947, "learning_rate": 7.661922498686325e-06, "loss": 0.3757, "step": 3497 }, { "epoch": 1.1679465776293823, "grad_norm": 0.9174938083579325, "learning_rate": 7.660277871328458e-06, "loss": 0.3838, "step": 3498 }, { "epoch": 1.168280467445743, "grad_norm": 0.9032669505420464, "learning_rate": 7.658632842402432e-06, "loss": 0.3969, "step": 3499 }, { "epoch": 1.1686143572621035, "grad_norm": 0.8968933275367329, "learning_rate": 7.656987412156567e-06, "loss": 0.3831, "step": 3500 }, { "epoch": 1.1689482470784642, "grad_norm": 0.8886287511396233, "learning_rate": 7.655341580839237e-06, "loss": 0.3826, "step": 3501 }, { "epoch": 1.1692821368948247, "grad_norm": 0.8858627063136189, "learning_rate": 7.653695348698882e-06, "loss": 0.389, "step": 3502 }, { "epoch": 1.1696160267111853, "grad_norm": 0.8714257438490425, "learning_rate": 7.652048715984e-06, "loss": 0.3836, "step": 3503 }, { "epoch": 1.169949916527546, "grad_norm": 0.8649951852111216, "learning_rate": 7.650401682943148e-06, "loss": 0.3772, "step": 3504 }, { "epoch": 1.1702838063439065, "grad_norm": 0.8723800261170847, "learning_rate": 7.648754249824946e-06, "loss": 0.381, "step": 3505 }, { "epoch": 1.1706176961602672, "grad_norm": 0.8841963347111319, "learning_rate": 7.647106416878075e-06, "loss": 0.3748, "step": 3506 }, { "epoch": 1.1709515859766277, "grad_norm": 0.8932979770617235, "learning_rate": 7.645458184351273e-06, "loss": 0.404, "step": 3507 }, { "epoch": 1.1712854757929883, "grad_norm": 0.8971803199198826, "learning_rate": 7.643809552493341e-06, "loss": 0.3694, "step": 3508 }, { "epoch": 1.171619365609349, "grad_norm": 0.9089498678132277, "learning_rate": 7.64216052155314e-06, "loss": 0.4065, "step": 3509 }, { "epoch": 1.1719532554257095, "grad_norm": 0.8429194217331897, "learning_rate": 7.64051109177959e-06, "loss": 0.3682, "step": 3510 }, { "epoch": 1.1722871452420702, "grad_norm": 0.9311534838028298, "learning_rate": 7.63886126342167e-06, "loss": 0.3996, "step": 3511 }, { "epoch": 1.1726210350584307, "grad_norm": 0.8586492333498995, "learning_rate": 7.637211036728426e-06, "loss": 0.3693, "step": 3512 }, { "epoch": 1.1729549248747912, "grad_norm": 0.8820065439974455, "learning_rate": 7.635560411948957e-06, "loss": 0.385, "step": 3513 }, { "epoch": 1.173288814691152, "grad_norm": 0.9035160810034651, "learning_rate": 7.63390938933242e-06, "loss": 0.4011, "step": 3514 }, { "epoch": 1.1736227045075125, "grad_norm": 0.9076694878215521, "learning_rate": 7.632257969128043e-06, "loss": 0.3843, "step": 3515 }, { "epoch": 1.1739565943238732, "grad_norm": 0.8913721501467672, "learning_rate": 7.630606151585105e-06, "loss": 0.3769, "step": 3516 }, { "epoch": 1.1742904841402337, "grad_norm": 0.8488775368372088, "learning_rate": 7.628953936952945e-06, "loss": 0.3813, "step": 3517 }, { "epoch": 1.1746243739565942, "grad_norm": 0.9339151927754759, "learning_rate": 7.6273013254809684e-06, "loss": 0.3944, "step": 3518 }, { "epoch": 1.174958263772955, "grad_norm": 0.8489192487077234, "learning_rate": 7.625648317418633e-06, "loss": 0.3772, "step": 3519 }, { "epoch": 1.1752921535893155, "grad_norm": 0.894090601532894, "learning_rate": 7.623994913015461e-06, "loss": 0.401, "step": 3520 }, { "epoch": 1.1756260434056762, "grad_norm": 0.8855276766520395, "learning_rate": 7.6223411125210344e-06, "loss": 0.3971, "step": 3521 }, { "epoch": 1.1759599332220367, "grad_norm": 0.9121682726609304, "learning_rate": 7.620686916184991e-06, "loss": 0.3747, "step": 3522 }, { "epoch": 1.1762938230383972, "grad_norm": 0.8634163450558732, "learning_rate": 7.619032324257034e-06, "loss": 0.355, "step": 3523 }, { "epoch": 1.176627712854758, "grad_norm": 0.9088175981581377, "learning_rate": 7.617377336986924e-06, "loss": 0.3986, "step": 3524 }, { "epoch": 1.1769616026711185, "grad_norm": 0.8721926644499545, "learning_rate": 7.615721954624478e-06, "loss": 0.3926, "step": 3525 }, { "epoch": 1.1772954924874792, "grad_norm": 0.8978163028480789, "learning_rate": 7.614066177419578e-06, "loss": 0.4052, "step": 3526 }, { "epoch": 1.1776293823038397, "grad_norm": 0.8762267774940196, "learning_rate": 7.612410005622162e-06, "loss": 0.3985, "step": 3527 }, { "epoch": 1.1779632721202002, "grad_norm": 0.9155031581646795, "learning_rate": 7.610753439482227e-06, "loss": 0.4007, "step": 3528 }, { "epoch": 1.178297161936561, "grad_norm": 0.9172471896864912, "learning_rate": 7.609096479249836e-06, "loss": 0.3975, "step": 3529 }, { "epoch": 1.1786310517529215, "grad_norm": 0.8670566103221533, "learning_rate": 7.6074391251751e-06, "loss": 0.4016, "step": 3530 }, { "epoch": 1.1789649415692822, "grad_norm": 0.864015312371431, "learning_rate": 7.605781377508203e-06, "loss": 0.3722, "step": 3531 }, { "epoch": 1.1792988313856427, "grad_norm": 0.8818084366150372, "learning_rate": 7.6041232364993765e-06, "loss": 0.3784, "step": 3532 }, { "epoch": 1.1796327212020032, "grad_norm": 0.8914914180762773, "learning_rate": 7.6024647023989175e-06, "loss": 0.3854, "step": 3533 }, { "epoch": 1.179966611018364, "grad_norm": 0.8994835918195492, "learning_rate": 7.600805775457183e-06, "loss": 0.3882, "step": 3534 }, { "epoch": 1.1803005008347245, "grad_norm": 0.9011605311425904, "learning_rate": 7.599146455924587e-06, "loss": 0.3847, "step": 3535 }, { "epoch": 1.1806343906510852, "grad_norm": 0.8556995077142133, "learning_rate": 7.597486744051601e-06, "loss": 0.3668, "step": 3536 }, { "epoch": 1.1809682804674457, "grad_norm": 0.9261676781706476, "learning_rate": 7.595826640088762e-06, "loss": 0.4141, "step": 3537 }, { "epoch": 1.1813021702838062, "grad_norm": 0.8744160208307132, "learning_rate": 7.594166144286656e-06, "loss": 0.3762, "step": 3538 }, { "epoch": 1.181636060100167, "grad_norm": 0.9055384184984785, "learning_rate": 7.592505256895941e-06, "loss": 0.3991, "step": 3539 }, { "epoch": 1.1819699499165275, "grad_norm": 0.9031912466158448, "learning_rate": 7.590843978167324e-06, "loss": 0.3876, "step": 3540 }, { "epoch": 1.1823038397328882, "grad_norm": 0.8590579794547046, "learning_rate": 7.589182308351576e-06, "loss": 0.3859, "step": 3541 }, { "epoch": 1.1826377295492487, "grad_norm": 0.8898040048046162, "learning_rate": 7.587520247699525e-06, "loss": 0.3968, "step": 3542 }, { "epoch": 1.1829716193656092, "grad_norm": 0.8654705588719309, "learning_rate": 7.585857796462058e-06, "loss": 0.3763, "step": 3543 }, { "epoch": 1.18330550918197, "grad_norm": 0.8819077956101639, "learning_rate": 7.584194954890121e-06, "loss": 0.3924, "step": 3544 }, { "epoch": 1.1836393989983305, "grad_norm": 0.9012093954936227, "learning_rate": 7.58253172323472e-06, "loss": 0.3903, "step": 3545 }, { "epoch": 1.1839732888146912, "grad_norm": 0.8666578258413099, "learning_rate": 7.580868101746919e-06, "loss": 0.388, "step": 3546 }, { "epoch": 1.1843071786310517, "grad_norm": 0.8696843087941366, "learning_rate": 7.579204090677845e-06, "loss": 0.3885, "step": 3547 }, { "epoch": 1.1846410684474122, "grad_norm": 0.8780750189434439, "learning_rate": 7.577539690278674e-06, "loss": 0.3781, "step": 3548 }, { "epoch": 1.184974958263773, "grad_norm": 0.8654428332054215, "learning_rate": 7.575874900800652e-06, "loss": 0.3816, "step": 3549 }, { "epoch": 1.1853088480801335, "grad_norm": 0.9359380497109966, "learning_rate": 7.5742097224950736e-06, "loss": 0.4085, "step": 3550 }, { "epoch": 1.1856427378964942, "grad_norm": 0.8747572291026703, "learning_rate": 7.572544155613299e-06, "loss": 0.3792, "step": 3551 }, { "epoch": 1.1859766277128547, "grad_norm": 0.8960305446130059, "learning_rate": 7.570878200406748e-06, "loss": 0.4001, "step": 3552 }, { "epoch": 1.1863105175292152, "grad_norm": 0.881726037310473, "learning_rate": 7.569211857126893e-06, "loss": 0.3844, "step": 3553 }, { "epoch": 1.186644407345576, "grad_norm": 0.9224160606962024, "learning_rate": 7.5675451260252665e-06, "loss": 0.3976, "step": 3554 }, { "epoch": 1.1869782971619365, "grad_norm": 0.8833943838516775, "learning_rate": 7.5658780073534645e-06, "loss": 0.402, "step": 3555 }, { "epoch": 1.1873121869782972, "grad_norm": 0.8620828925157196, "learning_rate": 7.564210501363136e-06, "loss": 0.3764, "step": 3556 }, { "epoch": 1.1876460767946577, "grad_norm": 0.9049744300298611, "learning_rate": 7.562542608305992e-06, "loss": 0.3861, "step": 3557 }, { "epoch": 1.1879799666110185, "grad_norm": 0.8805088591995743, "learning_rate": 7.5608743284338e-06, "loss": 0.376, "step": 3558 }, { "epoch": 1.188313856427379, "grad_norm": 0.8499008936650815, "learning_rate": 7.559205661998383e-06, "loss": 0.3785, "step": 3559 }, { "epoch": 1.1886477462437395, "grad_norm": 0.8537116439041512, "learning_rate": 7.5575366092516325e-06, "loss": 0.3889, "step": 3560 }, { "epoch": 1.1889816360601002, "grad_norm": 0.8575346636897004, "learning_rate": 7.555867170445485e-06, "loss": 0.3941, "step": 3561 }, { "epoch": 1.1893155258764607, "grad_norm": 0.9034013619149195, "learning_rate": 7.554197345831946e-06, "loss": 0.3858, "step": 3562 }, { "epoch": 1.1896494156928215, "grad_norm": 0.8910836600103267, "learning_rate": 7.552527135663073e-06, "loss": 0.3947, "step": 3563 }, { "epoch": 1.189983305509182, "grad_norm": 0.8705109684761888, "learning_rate": 7.550856540190985e-06, "loss": 0.3787, "step": 3564 }, { "epoch": 1.1903171953255425, "grad_norm": 0.8858660248462512, "learning_rate": 7.549185559667855e-06, "loss": 0.3851, "step": 3565 }, { "epoch": 1.1906510851419032, "grad_norm": 0.873536224211359, "learning_rate": 7.547514194345922e-06, "loss": 0.415, "step": 3566 }, { "epoch": 1.1909849749582637, "grad_norm": 0.878294352855302, "learning_rate": 7.545842444477473e-06, "loss": 0.3925, "step": 3567 }, { "epoch": 1.1913188647746245, "grad_norm": 0.8429088855084562, "learning_rate": 7.544170310314862e-06, "loss": 0.3737, "step": 3568 }, { "epoch": 1.191652754590985, "grad_norm": 0.870435289367877, "learning_rate": 7.542497792110494e-06, "loss": 0.3892, "step": 3569 }, { "epoch": 1.1919866444073457, "grad_norm": 0.9399139597099297, "learning_rate": 7.540824890116838e-06, "loss": 0.4055, "step": 3570 }, { "epoch": 1.1923205342237062, "grad_norm": 0.8568757015329322, "learning_rate": 7.539151604586417e-06, "loss": 0.3686, "step": 3571 }, { "epoch": 1.1926544240400667, "grad_norm": 0.9162026790402166, "learning_rate": 7.537477935771812e-06, "loss": 0.3995, "step": 3572 }, { "epoch": 1.1929883138564275, "grad_norm": 0.9006430112151603, "learning_rate": 7.535803883925663e-06, "loss": 0.3704, "step": 3573 }, { "epoch": 1.193322203672788, "grad_norm": 0.8483363904561737, "learning_rate": 7.534129449300669e-06, "loss": 0.3724, "step": 3574 }, { "epoch": 1.1936560934891487, "grad_norm": 0.8507114158980514, "learning_rate": 7.5324546321495835e-06, "loss": 0.3791, "step": 3575 }, { "epoch": 1.1939899833055092, "grad_norm": 0.894314792013607, "learning_rate": 7.530779432725222e-06, "loss": 0.3891, "step": 3576 }, { "epoch": 1.1943238731218697, "grad_norm": 0.8822210205256393, "learning_rate": 7.529103851280454e-06, "loss": 0.3787, "step": 3577 }, { "epoch": 1.1946577629382304, "grad_norm": 0.8929651651316326, "learning_rate": 7.527427888068207e-06, "loss": 0.3977, "step": 3578 }, { "epoch": 1.194991652754591, "grad_norm": 0.9138299030685747, "learning_rate": 7.525751543341469e-06, "loss": 0.3956, "step": 3579 }, { "epoch": 1.1953255425709517, "grad_norm": 0.8766373529516591, "learning_rate": 7.524074817353284e-06, "loss": 0.3729, "step": 3580 }, { "epoch": 1.1956594323873122, "grad_norm": 0.9196337550305292, "learning_rate": 7.5223977103567505e-06, "loss": 0.4144, "step": 3581 }, { "epoch": 1.1959933222036727, "grad_norm": 0.8877273276404406, "learning_rate": 7.520720222605029e-06, "loss": 0.389, "step": 3582 }, { "epoch": 1.1963272120200334, "grad_norm": 0.8501249239132088, "learning_rate": 7.519042354351336e-06, "loss": 0.3965, "step": 3583 }, { "epoch": 1.196661101836394, "grad_norm": 0.896521997993778, "learning_rate": 7.517364105848946e-06, "loss": 0.3964, "step": 3584 }, { "epoch": 1.1969949916527547, "grad_norm": 0.9104829369778794, "learning_rate": 7.515685477351191e-06, "loss": 0.4036, "step": 3585 }, { "epoch": 1.1973288814691152, "grad_norm": 0.8902142517751311, "learning_rate": 7.514006469111454e-06, "loss": 0.3924, "step": 3586 }, { "epoch": 1.1976627712854757, "grad_norm": 0.8997458588117246, "learning_rate": 7.512327081383186e-06, "loss": 0.4025, "step": 3587 }, { "epoch": 1.1979966611018364, "grad_norm": 0.8882286990730652, "learning_rate": 7.510647314419889e-06, "loss": 0.3929, "step": 3588 }, { "epoch": 1.198330550918197, "grad_norm": 0.8663934863131162, "learning_rate": 7.508967168475123e-06, "loss": 0.3979, "step": 3589 }, { "epoch": 1.1986644407345577, "grad_norm": 0.8610487335798929, "learning_rate": 7.507286643802505e-06, "loss": 0.3724, "step": 3590 }, { "epoch": 1.1989983305509182, "grad_norm": 0.8373276463767139, "learning_rate": 7.50560574065571e-06, "loss": 0.3813, "step": 3591 }, { "epoch": 1.1993322203672787, "grad_norm": 0.914741908079231, "learning_rate": 7.503924459288468e-06, "loss": 0.4091, "step": 3592 }, { "epoch": 1.1996661101836394, "grad_norm": 0.8541948338113496, "learning_rate": 7.502242799954573e-06, "loss": 0.3829, "step": 3593 }, { "epoch": 1.2, "grad_norm": 0.8743948017529577, "learning_rate": 7.500560762907867e-06, "loss": 0.3865, "step": 3594 }, { "epoch": 1.2003338898163607, "grad_norm": 0.8949648451629582, "learning_rate": 7.498878348402254e-06, "loss": 0.3921, "step": 3595 }, { "epoch": 1.2006677796327212, "grad_norm": 0.8778052292400069, "learning_rate": 7.497195556691692e-06, "loss": 0.3985, "step": 3596 }, { "epoch": 1.2010016694490817, "grad_norm": 0.9008685116616606, "learning_rate": 7.495512388030202e-06, "loss": 0.3923, "step": 3597 }, { "epoch": 1.2013355592654424, "grad_norm": 0.8432863899624851, "learning_rate": 7.493828842671854e-06, "loss": 0.3521, "step": 3598 }, { "epoch": 1.201669449081803, "grad_norm": 0.8372495741009164, "learning_rate": 7.4921449208707805e-06, "loss": 0.3828, "step": 3599 }, { "epoch": 1.2020033388981637, "grad_norm": 0.864592468033126, "learning_rate": 7.49046062288117e-06, "loss": 0.3677, "step": 3600 }, { "epoch": 1.2023372287145242, "grad_norm": 0.8865904728670059, "learning_rate": 7.488775948957264e-06, "loss": 0.3939, "step": 3601 }, { "epoch": 1.2026711185308847, "grad_norm": 0.8603543178640856, "learning_rate": 7.487090899353365e-06, "loss": 0.3909, "step": 3602 }, { "epoch": 1.2030050083472454, "grad_norm": 0.8465867662327634, "learning_rate": 7.485405474323832e-06, "loss": 0.3947, "step": 3603 }, { "epoch": 1.203338898163606, "grad_norm": 0.8554962972163417, "learning_rate": 7.4837196741230765e-06, "loss": 0.3761, "step": 3604 }, { "epoch": 1.2036727879799667, "grad_norm": 0.8785282546734228, "learning_rate": 7.482033499005573e-06, "loss": 0.3887, "step": 3605 }, { "epoch": 1.2040066777963272, "grad_norm": 0.9177087878594723, "learning_rate": 7.480346949225846e-06, "loss": 0.3698, "step": 3606 }, { "epoch": 1.2043405676126877, "grad_norm": 0.922486302592974, "learning_rate": 7.478660025038483e-06, "loss": 0.3946, "step": 3607 }, { "epoch": 1.2046744574290484, "grad_norm": 0.8503494602938995, "learning_rate": 7.4769727266981204e-06, "loss": 0.3758, "step": 3608 }, { "epoch": 1.205008347245409, "grad_norm": 0.8658189118213173, "learning_rate": 7.475285054459459e-06, "loss": 0.3888, "step": 3609 }, { "epoch": 1.2053422370617697, "grad_norm": 0.8926027742992299, "learning_rate": 7.473597008577251e-06, "loss": 0.3869, "step": 3610 }, { "epoch": 1.2056761268781302, "grad_norm": 0.8771421948086169, "learning_rate": 7.471908589306306e-06, "loss": 0.3787, "step": 3611 }, { "epoch": 1.2060100166944907, "grad_norm": 0.8718146504898024, "learning_rate": 7.47021979690149e-06, "loss": 0.3795, "step": 3612 }, { "epoch": 1.2063439065108514, "grad_norm": 0.8637749660863454, "learning_rate": 7.46853063161773e-06, "loss": 0.3963, "step": 3613 }, { "epoch": 1.206677796327212, "grad_norm": 0.8471383635795399, "learning_rate": 7.466841093709999e-06, "loss": 0.3848, "step": 3614 }, { "epoch": 1.2070116861435727, "grad_norm": 0.819321818148692, "learning_rate": 7.465151183433334e-06, "loss": 0.378, "step": 3615 }, { "epoch": 1.2073455759599332, "grad_norm": 0.8839843218492417, "learning_rate": 7.463460901042831e-06, "loss": 0.3955, "step": 3616 }, { "epoch": 1.2076794657762937, "grad_norm": 0.8470400657335432, "learning_rate": 7.461770246793632e-06, "loss": 0.3884, "step": 3617 }, { "epoch": 1.2080133555926544, "grad_norm": 0.8609763752042522, "learning_rate": 7.460079220940943e-06, "loss": 0.383, "step": 3618 }, { "epoch": 1.208347245409015, "grad_norm": 0.8821940933500856, "learning_rate": 7.458387823740023e-06, "loss": 0.3879, "step": 3619 }, { "epoch": 1.2086811352253757, "grad_norm": 0.8395828335107828, "learning_rate": 7.456696055446189e-06, "loss": 0.3585, "step": 3620 }, { "epoch": 1.2090150250417362, "grad_norm": 0.9506763721445298, "learning_rate": 7.4550039163148125e-06, "loss": 0.3959, "step": 3621 }, { "epoch": 1.209348914858097, "grad_norm": 0.9098856202106034, "learning_rate": 7.453311406601321e-06, "loss": 0.4076, "step": 3622 }, { "epoch": 1.2096828046744574, "grad_norm": 0.8470030820382547, "learning_rate": 7.4516185265612e-06, "loss": 0.3608, "step": 3623 }, { "epoch": 1.210016694490818, "grad_norm": 0.874398758967945, "learning_rate": 7.4499252764499865e-06, "loss": 0.3718, "step": 3624 }, { "epoch": 1.2103505843071787, "grad_norm": 0.8742841321536938, "learning_rate": 7.448231656523277e-06, "loss": 0.3889, "step": 3625 }, { "epoch": 1.2106844741235392, "grad_norm": 0.8943659000193892, "learning_rate": 7.446537667036725e-06, "loss": 0.3756, "step": 3626 }, { "epoch": 1.2110183639399, "grad_norm": 0.8962245510375133, "learning_rate": 7.4448433082460345e-06, "loss": 0.3984, "step": 3627 }, { "epoch": 1.2113522537562604, "grad_norm": 0.9023491732160422, "learning_rate": 7.443148580406972e-06, "loss": 0.4146, "step": 3628 }, { "epoch": 1.211686143572621, "grad_norm": 0.8694770506769269, "learning_rate": 7.441453483775354e-06, "loss": 0.3698, "step": 3629 }, { "epoch": 1.2120200333889817, "grad_norm": 0.885575844317509, "learning_rate": 7.4397580186070536e-06, "loss": 0.384, "step": 3630 }, { "epoch": 1.2123539232053422, "grad_norm": 0.8705447888399833, "learning_rate": 7.438062185158003e-06, "loss": 0.3862, "step": 3631 }, { "epoch": 1.212687813021703, "grad_norm": 0.8977290633574755, "learning_rate": 7.436365983684189e-06, "loss": 0.3885, "step": 3632 }, { "epoch": 1.2130217028380634, "grad_norm": 0.8996864656093627, "learning_rate": 7.4346694144416484e-06, "loss": 0.402, "step": 3633 }, { "epoch": 1.2133555926544242, "grad_norm": 0.9533863502083326, "learning_rate": 7.432972477686482e-06, "loss": 0.4024, "step": 3634 }, { "epoch": 1.2136894824707847, "grad_norm": 0.9041776131142115, "learning_rate": 7.431275173674838e-06, "loss": 0.3844, "step": 3635 }, { "epoch": 1.2140233722871452, "grad_norm": 0.8584839858180651, "learning_rate": 7.429577502662928e-06, "loss": 0.3744, "step": 3636 }, { "epoch": 1.214357262103506, "grad_norm": 0.8499599141445187, "learning_rate": 7.427879464907011e-06, "loss": 0.3818, "step": 3637 }, { "epoch": 1.2146911519198664, "grad_norm": 0.8729581994868494, "learning_rate": 7.4261810606634085e-06, "loss": 0.3652, "step": 3638 }, { "epoch": 1.2150250417362272, "grad_norm": 0.8923363907600544, "learning_rate": 7.4244822901884925e-06, "loss": 0.374, "step": 3639 }, { "epoch": 1.2153589315525877, "grad_norm": 0.8416452546438226, "learning_rate": 7.422783153738693e-06, "loss": 0.3721, "step": 3640 }, { "epoch": 1.2156928213689482, "grad_norm": 0.903743350500553, "learning_rate": 7.421083651570493e-06, "loss": 0.4037, "step": 3641 }, { "epoch": 1.216026711185309, "grad_norm": 0.8880651421612132, "learning_rate": 7.4193837839404325e-06, "loss": 0.383, "step": 3642 }, { "epoch": 1.2163606010016694, "grad_norm": 0.8914766495227163, "learning_rate": 7.417683551105106e-06, "loss": 0.4033, "step": 3643 }, { "epoch": 1.2166944908180302, "grad_norm": 0.8853835058008486, "learning_rate": 7.415982953321161e-06, "loss": 0.3894, "step": 3644 }, { "epoch": 1.2170283806343907, "grad_norm": 0.9309553884625813, "learning_rate": 7.414281990845305e-06, "loss": 0.4035, "step": 3645 }, { "epoch": 1.2173622704507512, "grad_norm": 0.921681790647427, "learning_rate": 7.412580663934299e-06, "loss": 0.3817, "step": 3646 }, { "epoch": 1.217696160267112, "grad_norm": 0.9075942089607403, "learning_rate": 7.410878972844955e-06, "loss": 0.3971, "step": 3647 }, { "epoch": 1.2180300500834724, "grad_norm": 0.8701892593918206, "learning_rate": 7.409176917834142e-06, "loss": 0.3858, "step": 3648 }, { "epoch": 1.2183639398998332, "grad_norm": 0.8770900415417993, "learning_rate": 7.407474499158788e-06, "loss": 0.3654, "step": 3649 }, { "epoch": 1.2186978297161937, "grad_norm": 0.924596635170004, "learning_rate": 7.405771717075869e-06, "loss": 0.3769, "step": 3650 }, { "epoch": 1.2190317195325542, "grad_norm": 0.8861542877625522, "learning_rate": 7.404068571842422e-06, "loss": 0.37, "step": 3651 }, { "epoch": 1.219365609348915, "grad_norm": 0.8823578085788935, "learning_rate": 7.402365063715535e-06, "loss": 0.3727, "step": 3652 }, { "epoch": 1.2196994991652754, "grad_norm": 0.9154578919659491, "learning_rate": 7.400661192952354e-06, "loss": 0.3861, "step": 3653 }, { "epoch": 1.2200333889816362, "grad_norm": 0.8256932188615262, "learning_rate": 7.398956959810076e-06, "loss": 0.3622, "step": 3654 }, { "epoch": 1.2203672787979967, "grad_norm": 0.8883227058638596, "learning_rate": 7.397252364545955e-06, "loss": 0.3652, "step": 3655 }, { "epoch": 1.2207011686143572, "grad_norm": 0.8935845157572342, "learning_rate": 7.395547407417299e-06, "loss": 0.3729, "step": 3656 }, { "epoch": 1.221035058430718, "grad_norm": 0.8487242813470564, "learning_rate": 7.393842088681469e-06, "loss": 0.3809, "step": 3657 }, { "epoch": 1.2213689482470784, "grad_norm": 0.8967357783980614, "learning_rate": 7.392136408595885e-06, "loss": 0.3932, "step": 3658 }, { "epoch": 1.2217028380634392, "grad_norm": 0.863561564677957, "learning_rate": 7.39043036741802e-06, "loss": 0.389, "step": 3659 }, { "epoch": 1.2220367278797997, "grad_norm": 0.8856868498708941, "learning_rate": 7.3887239654053956e-06, "loss": 0.4017, "step": 3660 }, { "epoch": 1.2223706176961602, "grad_norm": 0.8647263203547022, "learning_rate": 7.387017202815596e-06, "loss": 0.3968, "step": 3661 }, { "epoch": 1.222704507512521, "grad_norm": 0.8859827524653817, "learning_rate": 7.385310079906258e-06, "loss": 0.3907, "step": 3662 }, { "epoch": 1.2230383973288814, "grad_norm": 0.8606757064809548, "learning_rate": 7.383602596935068e-06, "loss": 0.393, "step": 3663 }, { "epoch": 1.2233722871452422, "grad_norm": 0.9185049437540116, "learning_rate": 7.381894754159772e-06, "loss": 0.3967, "step": 3664 }, { "epoch": 1.2237061769616027, "grad_norm": 0.8582163051453323, "learning_rate": 7.380186551838166e-06, "loss": 0.3948, "step": 3665 }, { "epoch": 1.2240400667779632, "grad_norm": 0.8834788968077992, "learning_rate": 7.3784779902281066e-06, "loss": 0.3883, "step": 3666 }, { "epoch": 1.224373956594324, "grad_norm": 0.9101711410201611, "learning_rate": 7.3767690695874976e-06, "loss": 0.4067, "step": 3667 }, { "epoch": 1.2247078464106844, "grad_norm": 0.8828213140586118, "learning_rate": 7.375059790174301e-06, "loss": 0.3855, "step": 3668 }, { "epoch": 1.2250417362270452, "grad_norm": 0.8954609142773475, "learning_rate": 7.373350152246532e-06, "loss": 0.3867, "step": 3669 }, { "epoch": 1.2253756260434057, "grad_norm": 0.8901122203552299, "learning_rate": 7.371640156062259e-06, "loss": 0.4002, "step": 3670 }, { "epoch": 1.2257095158597662, "grad_norm": 0.8651917405110257, "learning_rate": 7.369929801879608e-06, "loss": 0.3701, "step": 3671 }, { "epoch": 1.226043405676127, "grad_norm": 0.8635075456882203, "learning_rate": 7.368219089956753e-06, "loss": 0.3818, "step": 3672 }, { "epoch": 1.2263772954924874, "grad_norm": 0.8904423603826941, "learning_rate": 7.366508020551928e-06, "loss": 0.3872, "step": 3673 }, { "epoch": 1.2267111853088482, "grad_norm": 0.8469038149419226, "learning_rate": 7.3647965939234175e-06, "loss": 0.3712, "step": 3674 }, { "epoch": 1.2270450751252087, "grad_norm": 0.8266676856589246, "learning_rate": 7.363084810329561e-06, "loss": 0.3837, "step": 3675 }, { "epoch": 1.2273789649415692, "grad_norm": 0.8862043011077164, "learning_rate": 7.3613726700287505e-06, "loss": 0.4002, "step": 3676 }, { "epoch": 1.22771285475793, "grad_norm": 0.8891893202396041, "learning_rate": 7.359660173279437e-06, "loss": 0.3895, "step": 3677 }, { "epoch": 1.2280467445742904, "grad_norm": 0.899253091498681, "learning_rate": 7.357947320340118e-06, "loss": 0.3944, "step": 3678 }, { "epoch": 1.2283806343906511, "grad_norm": 0.9060707636302263, "learning_rate": 7.356234111469347e-06, "loss": 0.3949, "step": 3679 }, { "epoch": 1.2287145242070117, "grad_norm": 0.8898760530793953, "learning_rate": 7.354520546925735e-06, "loss": 0.3932, "step": 3680 }, { "epoch": 1.2290484140233722, "grad_norm": 0.8735173811549901, "learning_rate": 7.352806626967943e-06, "loss": 0.3781, "step": 3681 }, { "epoch": 1.229382303839733, "grad_norm": 0.9064183279202435, "learning_rate": 7.351092351854688e-06, "loss": 0.3714, "step": 3682 }, { "epoch": 1.2297161936560934, "grad_norm": 0.8924731514932592, "learning_rate": 7.349377721844737e-06, "loss": 0.394, "step": 3683 }, { "epoch": 1.2300500834724541, "grad_norm": 0.8831853315748919, "learning_rate": 7.347662737196915e-06, "loss": 0.383, "step": 3684 }, { "epoch": 1.2303839732888147, "grad_norm": 0.8672422409602443, "learning_rate": 7.3459473981700965e-06, "loss": 0.3665, "step": 3685 }, { "epoch": 1.2307178631051752, "grad_norm": 0.8655731989528984, "learning_rate": 7.344231705023214e-06, "loss": 0.3718, "step": 3686 }, { "epoch": 1.231051752921536, "grad_norm": 0.8381576227958012, "learning_rate": 7.342515658015248e-06, "loss": 0.3684, "step": 3687 }, { "epoch": 1.2313856427378964, "grad_norm": 0.8707377394157306, "learning_rate": 7.340799257405238e-06, "loss": 0.3795, "step": 3688 }, { "epoch": 1.2317195325542571, "grad_norm": 0.8665947212538581, "learning_rate": 7.339082503452272e-06, "loss": 0.4117, "step": 3689 }, { "epoch": 1.2320534223706177, "grad_norm": 0.8867337819097758, "learning_rate": 7.337365396415495e-06, "loss": 0.3853, "step": 3690 }, { "epoch": 1.2323873121869784, "grad_norm": 0.8853068608653724, "learning_rate": 7.335647936554101e-06, "loss": 0.3963, "step": 3691 }, { "epoch": 1.232721202003339, "grad_norm": 0.8534292377283973, "learning_rate": 7.333930124127345e-06, "loss": 0.3774, "step": 3692 }, { "epoch": 1.2330550918196994, "grad_norm": 0.8951659759117098, "learning_rate": 7.332211959394525e-06, "loss": 0.406, "step": 3693 }, { "epoch": 1.2333889816360601, "grad_norm": 0.8193441815514898, "learning_rate": 7.330493442615001e-06, "loss": 0.3687, "step": 3694 }, { "epoch": 1.2337228714524207, "grad_norm": 0.8760344881705241, "learning_rate": 7.328774574048182e-06, "loss": 0.3958, "step": 3695 }, { "epoch": 1.2340567612687814, "grad_norm": 0.8802685104143123, "learning_rate": 7.32705535395353e-06, "loss": 0.3851, "step": 3696 }, { "epoch": 1.234390651085142, "grad_norm": 0.8925140077897173, "learning_rate": 7.325335782590561e-06, "loss": 0.3964, "step": 3697 }, { "epoch": 1.2347245409015024, "grad_norm": 0.9305118092949668, "learning_rate": 7.323615860218844e-06, "loss": 0.4083, "step": 3698 }, { "epoch": 1.2350584307178631, "grad_norm": 0.9154511258308309, "learning_rate": 7.321895587097999e-06, "loss": 0.4015, "step": 3699 }, { "epoch": 1.2353923205342237, "grad_norm": 0.8776322946092271, "learning_rate": 7.320174963487704e-06, "loss": 0.4009, "step": 3700 }, { "epoch": 1.2357262103505844, "grad_norm": 0.9384122925619883, "learning_rate": 7.318453989647684e-06, "loss": 0.4095, "step": 3701 }, { "epoch": 1.236060100166945, "grad_norm": 0.8387692821032526, "learning_rate": 7.3167326658377215e-06, "loss": 0.369, "step": 3702 }, { "epoch": 1.2363939899833056, "grad_norm": 0.8883122010716072, "learning_rate": 7.315010992317647e-06, "loss": 0.3891, "step": 3703 }, { "epoch": 1.2367278797996661, "grad_norm": 0.8254432799432648, "learning_rate": 7.31328896934735e-06, "loss": 0.368, "step": 3704 }, { "epoch": 1.2370617696160267, "grad_norm": 0.8844183527529846, "learning_rate": 7.311566597186765e-06, "loss": 0.3775, "step": 3705 }, { "epoch": 1.2373956594323874, "grad_norm": 0.8775290880216636, "learning_rate": 7.309843876095889e-06, "loss": 0.3699, "step": 3706 }, { "epoch": 1.237729549248748, "grad_norm": 0.8843697170836122, "learning_rate": 7.308120806334762e-06, "loss": 0.3965, "step": 3707 }, { "epoch": 1.2380634390651086, "grad_norm": 0.848165083035715, "learning_rate": 7.3063973881634844e-06, "loss": 0.3894, "step": 3708 }, { "epoch": 1.2383973288814691, "grad_norm": 0.878314636925748, "learning_rate": 7.304673621842201e-06, "loss": 0.387, "step": 3709 }, { "epoch": 1.2387312186978297, "grad_norm": 0.8670278385286776, "learning_rate": 7.302949507631118e-06, "loss": 0.3743, "step": 3710 }, { "epoch": 1.2390651085141904, "grad_norm": 0.878043543244099, "learning_rate": 7.301225045790487e-06, "loss": 0.3832, "step": 3711 }, { "epoch": 1.239398998330551, "grad_norm": 0.9060291479709773, "learning_rate": 7.299500236580618e-06, "loss": 0.3995, "step": 3712 }, { "epoch": 1.2397328881469116, "grad_norm": 0.8740564948890038, "learning_rate": 7.297775080261869e-06, "loss": 0.3693, "step": 3713 }, { "epoch": 1.2400667779632721, "grad_norm": 0.8866658528642974, "learning_rate": 7.296049577094648e-06, "loss": 0.3863, "step": 3714 }, { "epoch": 1.2404006677796326, "grad_norm": 0.8498782156141579, "learning_rate": 7.294323727339427e-06, "loss": 0.3778, "step": 3715 }, { "epoch": 1.2407345575959934, "grad_norm": 0.8870671485793818, "learning_rate": 7.292597531256715e-06, "loss": 0.3793, "step": 3716 }, { "epoch": 1.241068447412354, "grad_norm": 0.8699480590119748, "learning_rate": 7.290870989107086e-06, "loss": 0.3797, "step": 3717 }, { "epoch": 1.2414023372287146, "grad_norm": 0.8734558211256402, "learning_rate": 7.28914410115116e-06, "loss": 0.3905, "step": 3718 }, { "epoch": 1.2417362270450751, "grad_norm": 0.8482293165390139, "learning_rate": 7.287416867649608e-06, "loss": 0.3803, "step": 3719 }, { "epoch": 1.2420701168614356, "grad_norm": 0.8507331971926219, "learning_rate": 7.285689288863158e-06, "loss": 0.3756, "step": 3720 }, { "epoch": 1.2424040066777964, "grad_norm": 0.8779945348131357, "learning_rate": 7.283961365052585e-06, "loss": 0.39, "step": 3721 }, { "epoch": 1.242737896494157, "grad_norm": 0.8658825741687654, "learning_rate": 7.2822330964787194e-06, "loss": 0.3844, "step": 3722 }, { "epoch": 1.2430717863105176, "grad_norm": 0.8891691185066103, "learning_rate": 7.280504483402445e-06, "loss": 0.403, "step": 3723 }, { "epoch": 1.2434056761268781, "grad_norm": 0.8458933308529875, "learning_rate": 7.2787755260846915e-06, "loss": 0.3772, "step": 3724 }, { "epoch": 1.2437395659432386, "grad_norm": 0.8461488263503006, "learning_rate": 7.27704622478645e-06, "loss": 0.3871, "step": 3725 }, { "epoch": 1.2440734557595994, "grad_norm": 0.8461595483088253, "learning_rate": 7.275316579768752e-06, "loss": 0.3638, "step": 3726 }, { "epoch": 1.2444073455759599, "grad_norm": 0.8931138958067221, "learning_rate": 7.273586591292689e-06, "loss": 0.4025, "step": 3727 }, { "epoch": 1.2447412353923206, "grad_norm": 0.8600912379519996, "learning_rate": 7.271856259619405e-06, "loss": 0.3782, "step": 3728 }, { "epoch": 1.2450751252086811, "grad_norm": 0.9096901382014206, "learning_rate": 7.270125585010091e-06, "loss": 0.4031, "step": 3729 }, { "epoch": 1.2454090150250416, "grad_norm": 0.894596138198877, "learning_rate": 7.26839456772599e-06, "loss": 0.4037, "step": 3730 }, { "epoch": 1.2457429048414024, "grad_norm": 0.8760933658806943, "learning_rate": 7.266663208028402e-06, "loss": 0.3859, "step": 3731 }, { "epoch": 1.2460767946577629, "grad_norm": 0.840975552619695, "learning_rate": 7.264931506178672e-06, "loss": 0.3736, "step": 3732 }, { "epoch": 1.2464106844741236, "grad_norm": 0.8906616866438221, "learning_rate": 7.263199462438203e-06, "loss": 0.3896, "step": 3733 }, { "epoch": 1.2467445742904841, "grad_norm": 0.9013592183609582, "learning_rate": 7.2614670770684425e-06, "loss": 0.39, "step": 3734 }, { "epoch": 1.2470784641068446, "grad_norm": 0.8705300767045046, "learning_rate": 7.259734350330899e-06, "loss": 0.3858, "step": 3735 }, { "epoch": 1.2474123539232054, "grad_norm": 0.8419115037928535, "learning_rate": 7.258001282487122e-06, "loss": 0.3741, "step": 3736 }, { "epoch": 1.2477462437395659, "grad_norm": 0.8526874999318534, "learning_rate": 7.25626787379872e-06, "loss": 0.3763, "step": 3737 }, { "epoch": 1.2480801335559266, "grad_norm": 0.9432982306201548, "learning_rate": 7.25453412452735e-06, "loss": 0.4083, "step": 3738 }, { "epoch": 1.2484140233722871, "grad_norm": 0.8995206589349384, "learning_rate": 7.252800034934723e-06, "loss": 0.3874, "step": 3739 }, { "epoch": 1.2487479131886476, "grad_norm": 0.9361605826597544, "learning_rate": 7.251065605282596e-06, "loss": 0.3978, "step": 3740 }, { "epoch": 1.2490818030050084, "grad_norm": 0.8332297082118165, "learning_rate": 7.249330835832782e-06, "loss": 0.3808, "step": 3741 }, { "epoch": 1.2494156928213689, "grad_norm": 0.8977965499878474, "learning_rate": 7.247595726847143e-06, "loss": 0.4004, "step": 3742 }, { "epoch": 1.2497495826377296, "grad_norm": 0.9041285279854581, "learning_rate": 7.245860278587596e-06, "loss": 0.3682, "step": 3743 }, { "epoch": 1.2500834724540901, "grad_norm": 0.8699409213176155, "learning_rate": 7.244124491316103e-06, "loss": 0.3855, "step": 3744 }, { "epoch": 1.2504173622704506, "grad_norm": 0.8752861118033872, "learning_rate": 7.242388365294682e-06, "loss": 0.3882, "step": 3745 }, { "epoch": 1.2507512520868114, "grad_norm": 0.859962051672519, "learning_rate": 7.240651900785402e-06, "loss": 0.3809, "step": 3746 }, { "epoch": 1.2510851419031719, "grad_norm": 0.8699862538329867, "learning_rate": 7.2389150980503804e-06, "loss": 0.3734, "step": 3747 }, { "epoch": 1.2514190317195326, "grad_norm": 0.8112456228657743, "learning_rate": 7.237177957351788e-06, "loss": 0.3517, "step": 3748 }, { "epoch": 1.2517529215358931, "grad_norm": 0.8626076320193925, "learning_rate": 7.235440478951844e-06, "loss": 0.3839, "step": 3749 }, { "epoch": 1.2520868113522536, "grad_norm": 0.8651237251187028, "learning_rate": 7.233702663112821e-06, "loss": 0.371, "step": 3750 }, { "epoch": 1.2524207011686144, "grad_norm": 0.8781432529361627, "learning_rate": 7.2319645100970435e-06, "loss": 0.3866, "step": 3751 }, { "epoch": 1.2527545909849749, "grad_norm": 0.9206831527354854, "learning_rate": 7.230226020166883e-06, "loss": 0.3973, "step": 3752 }, { "epoch": 1.2530884808013356, "grad_norm": 0.8899450445343625, "learning_rate": 7.228487193584764e-06, "loss": 0.3948, "step": 3753 }, { "epoch": 1.2534223706176961, "grad_norm": 0.8334077875725444, "learning_rate": 7.226748030613164e-06, "loss": 0.3713, "step": 3754 }, { "epoch": 1.2537562604340566, "grad_norm": 0.9330731601512268, "learning_rate": 7.2250085315146055e-06, "loss": 0.3911, "step": 3755 }, { "epoch": 1.2540901502504174, "grad_norm": 0.9053634044939639, "learning_rate": 7.223268696551671e-06, "loss": 0.3943, "step": 3756 }, { "epoch": 1.254424040066778, "grad_norm": 0.9000015110239931, "learning_rate": 7.221528525986981e-06, "loss": 0.374, "step": 3757 }, { "epoch": 1.2547579298831386, "grad_norm": 0.8368757078068523, "learning_rate": 7.219788020083219e-06, "loss": 0.3715, "step": 3758 }, { "epoch": 1.2550918196994991, "grad_norm": 0.8856634240407512, "learning_rate": 7.218047179103112e-06, "loss": 0.3933, "step": 3759 }, { "epoch": 1.2554257095158596, "grad_norm": 0.8921488293355695, "learning_rate": 7.216306003309441e-06, "loss": 0.3846, "step": 3760 }, { "epoch": 1.2557595993322204, "grad_norm": 0.8920494457656929, "learning_rate": 7.214564492965035e-06, "loss": 0.3848, "step": 3761 }, { "epoch": 1.256093489148581, "grad_norm": 0.8950250169788114, "learning_rate": 7.212822648332773e-06, "loss": 0.3944, "step": 3762 }, { "epoch": 1.2564273789649416, "grad_norm": 0.8530915676866068, "learning_rate": 7.211080469675586e-06, "loss": 0.3734, "step": 3763 }, { "epoch": 1.2567612687813021, "grad_norm": 0.896147023681243, "learning_rate": 7.209337957256458e-06, "loss": 0.3859, "step": 3764 }, { "epoch": 1.2570951585976629, "grad_norm": 0.9344298538198884, "learning_rate": 7.207595111338418e-06, "loss": 0.4039, "step": 3765 }, { "epoch": 1.2574290484140234, "grad_norm": 0.8586180697779561, "learning_rate": 7.205851932184551e-06, "loss": 0.3737, "step": 3766 }, { "epoch": 1.257762938230384, "grad_norm": 0.8896739475156126, "learning_rate": 7.204108420057986e-06, "loss": 0.3803, "step": 3767 }, { "epoch": 1.2580968280467446, "grad_norm": 0.8207424813589618, "learning_rate": 7.202364575221907e-06, "loss": 0.3753, "step": 3768 }, { "epoch": 1.2584307178631051, "grad_norm": 0.8613197451607922, "learning_rate": 7.200620397939549e-06, "loss": 0.3777, "step": 3769 }, { "epoch": 1.2587646076794659, "grad_norm": 0.8546304176585952, "learning_rate": 7.1988758884741915e-06, "loss": 0.3852, "step": 3770 }, { "epoch": 1.2590984974958264, "grad_norm": 0.8914691259005703, "learning_rate": 7.197131047089172e-06, "loss": 0.3929, "step": 3771 }, { "epoch": 1.259432387312187, "grad_norm": 0.8761665270875117, "learning_rate": 7.1953858740478675e-06, "loss": 0.3876, "step": 3772 }, { "epoch": 1.2597662771285476, "grad_norm": 0.8668195287472334, "learning_rate": 7.193640369613717e-06, "loss": 0.3615, "step": 3773 }, { "epoch": 1.2601001669449081, "grad_norm": 0.8676500879171953, "learning_rate": 7.191894534050202e-06, "loss": 0.3709, "step": 3774 }, { "epoch": 1.2604340567612689, "grad_norm": 0.9503633267545276, "learning_rate": 7.190148367620855e-06, "loss": 0.4155, "step": 3775 }, { "epoch": 1.2607679465776294, "grad_norm": 0.8876740430664313, "learning_rate": 7.188401870589261e-06, "loss": 0.3871, "step": 3776 }, { "epoch": 1.26110183639399, "grad_norm": 0.8634569203563025, "learning_rate": 7.186655043219051e-06, "loss": 0.3754, "step": 3777 }, { "epoch": 1.2614357262103506, "grad_norm": 0.8345246890633969, "learning_rate": 7.18490788577391e-06, "loss": 0.3726, "step": 3778 }, { "epoch": 1.2617696160267111, "grad_norm": 0.8489324170222611, "learning_rate": 7.18316039851757e-06, "loss": 0.3774, "step": 3779 }, { "epoch": 1.2621035058430718, "grad_norm": 0.8858231931616587, "learning_rate": 7.1814125817138135e-06, "loss": 0.4013, "step": 3780 }, { "epoch": 1.2624373956594324, "grad_norm": 0.846121595490895, "learning_rate": 7.179664435626475e-06, "loss": 0.3652, "step": 3781 }, { "epoch": 1.262771285475793, "grad_norm": 0.8787459413186149, "learning_rate": 7.1779159605194334e-06, "loss": 0.3928, "step": 3782 }, { "epoch": 1.2631051752921536, "grad_norm": 0.8681828470714537, "learning_rate": 7.176167156656622e-06, "loss": 0.383, "step": 3783 }, { "epoch": 1.2634390651085141, "grad_norm": 0.8770066791693343, "learning_rate": 7.174418024302024e-06, "loss": 0.3848, "step": 3784 }, { "epoch": 1.2637729549248748, "grad_norm": 0.863667380028598, "learning_rate": 7.172668563719667e-06, "loss": 0.3927, "step": 3785 }, { "epoch": 1.2641068447412354, "grad_norm": 0.917124805868631, "learning_rate": 7.170918775173634e-06, "loss": 0.3905, "step": 3786 }, { "epoch": 1.264440734557596, "grad_norm": 0.8826581862926411, "learning_rate": 7.169168658928055e-06, "loss": 0.3862, "step": 3787 }, { "epoch": 1.2647746243739566, "grad_norm": 0.8550090314130755, "learning_rate": 7.167418215247107e-06, "loss": 0.3685, "step": 3788 }, { "epoch": 1.2651085141903171, "grad_norm": 0.8531865301478876, "learning_rate": 7.165667444395024e-06, "loss": 0.3754, "step": 3789 }, { "epoch": 1.2654424040066778, "grad_norm": 0.8781018913031613, "learning_rate": 7.163916346636078e-06, "loss": 0.3855, "step": 3790 }, { "epoch": 1.2657762938230384, "grad_norm": 0.8745846013420631, "learning_rate": 7.1621649222346e-06, "loss": 0.3727, "step": 3791 }, { "epoch": 1.266110183639399, "grad_norm": 0.8734644249908996, "learning_rate": 7.160413171454968e-06, "loss": 0.3938, "step": 3792 }, { "epoch": 1.2664440734557596, "grad_norm": 0.8686967458536256, "learning_rate": 7.158661094561608e-06, "loss": 0.3844, "step": 3793 }, { "epoch": 1.2667779632721201, "grad_norm": 0.8911272821269944, "learning_rate": 7.156908691818993e-06, "loss": 0.3909, "step": 3794 }, { "epoch": 1.2671118530884808, "grad_norm": 0.8677308979178208, "learning_rate": 7.15515596349165e-06, "loss": 0.3777, "step": 3795 }, { "epoch": 1.2674457429048414, "grad_norm": 0.8696613193488731, "learning_rate": 7.153402909844152e-06, "loss": 0.3871, "step": 3796 }, { "epoch": 1.267779632721202, "grad_norm": 0.8459585405608776, "learning_rate": 7.151649531141121e-06, "loss": 0.3736, "step": 3797 }, { "epoch": 1.2681135225375626, "grad_norm": 0.8701710132835699, "learning_rate": 7.149895827647231e-06, "loss": 0.3773, "step": 3798 }, { "epoch": 1.268447412353923, "grad_norm": 0.8470645161181268, "learning_rate": 7.148141799627203e-06, "loss": 0.3673, "step": 3799 }, { "epoch": 1.2687813021702838, "grad_norm": 0.8814696260135724, "learning_rate": 7.1463874473458035e-06, "loss": 0.3904, "step": 3800 }, { "epoch": 1.2691151919866444, "grad_norm": 0.8782740731056705, "learning_rate": 7.144632771067856e-06, "loss": 0.3637, "step": 3801 }, { "epoch": 1.269449081803005, "grad_norm": 0.9065385455723741, "learning_rate": 7.142877771058227e-06, "loss": 0.3937, "step": 3802 }, { "epoch": 1.2697829716193656, "grad_norm": 0.900565452098408, "learning_rate": 7.141122447581831e-06, "loss": 0.3977, "step": 3803 }, { "epoch": 1.270116861435726, "grad_norm": 0.8766478866071478, "learning_rate": 7.1393668009036355e-06, "loss": 0.3941, "step": 3804 }, { "epoch": 1.2704507512520868, "grad_norm": 0.9134559687391012, "learning_rate": 7.137610831288655e-06, "loss": 0.4076, "step": 3805 }, { "epoch": 1.2707846410684474, "grad_norm": 0.8892821293391326, "learning_rate": 7.135854539001952e-06, "loss": 0.3757, "step": 3806 }, { "epoch": 1.271118530884808, "grad_norm": 0.8840521941090842, "learning_rate": 7.134097924308638e-06, "loss": 0.3709, "step": 3807 }, { "epoch": 1.2714524207011686, "grad_norm": 0.883281365636721, "learning_rate": 7.132340987473874e-06, "loss": 0.3841, "step": 3808 }, { "epoch": 1.271786310517529, "grad_norm": 0.8605005838677049, "learning_rate": 7.1305837287628694e-06, "loss": 0.3546, "step": 3809 }, { "epoch": 1.2721202003338898, "grad_norm": 0.9052641861898881, "learning_rate": 7.128826148440881e-06, "loss": 0.3879, "step": 3810 }, { "epoch": 1.2724540901502503, "grad_norm": 0.8904558588837572, "learning_rate": 7.127068246773216e-06, "loss": 0.3878, "step": 3811 }, { "epoch": 1.272787979966611, "grad_norm": 0.9000221634293243, "learning_rate": 7.125310024025229e-06, "loss": 0.3707, "step": 3812 }, { "epoch": 1.2731218697829716, "grad_norm": 0.8417271530531211, "learning_rate": 7.123551480462321e-06, "loss": 0.3635, "step": 3813 }, { "epoch": 1.273455759599332, "grad_norm": 0.9325264932440127, "learning_rate": 7.121792616349947e-06, "loss": 0.4055, "step": 3814 }, { "epoch": 1.2737896494156928, "grad_norm": 0.8890593885929783, "learning_rate": 7.120033431953606e-06, "loss": 0.3883, "step": 3815 }, { "epoch": 1.2741235392320533, "grad_norm": 0.8897252655099409, "learning_rate": 7.1182739275388435e-06, "loss": 0.3839, "step": 3816 }, { "epoch": 1.274457429048414, "grad_norm": 0.8799109659548189, "learning_rate": 7.11651410337126e-06, "loss": 0.377, "step": 3817 }, { "epoch": 1.2747913188647746, "grad_norm": 0.867663140290672, "learning_rate": 7.1147539597165e-06, "loss": 0.3869, "step": 3818 }, { "epoch": 1.275125208681135, "grad_norm": 0.8776449841079145, "learning_rate": 7.112993496840255e-06, "loss": 0.3841, "step": 3819 }, { "epoch": 1.2754590984974958, "grad_norm": 0.8289471164998274, "learning_rate": 7.111232715008266e-06, "loss": 0.3577, "step": 3820 }, { "epoch": 1.2757929883138563, "grad_norm": 0.8750482955027648, "learning_rate": 7.109471614486323e-06, "loss": 0.3712, "step": 3821 }, { "epoch": 1.276126878130217, "grad_norm": 0.8807947307899354, "learning_rate": 7.107710195540266e-06, "loss": 0.3801, "step": 3822 }, { "epoch": 1.2764607679465776, "grad_norm": 0.9038743530542667, "learning_rate": 7.105948458435976e-06, "loss": 0.3888, "step": 3823 }, { "epoch": 1.276794657762938, "grad_norm": 0.8473174358350416, "learning_rate": 7.104186403439391e-06, "loss": 0.3696, "step": 3824 }, { "epoch": 1.2771285475792988, "grad_norm": 0.8307550620584245, "learning_rate": 7.1024240308164924e-06, "loss": 0.3814, "step": 3825 }, { "epoch": 1.2774624373956596, "grad_norm": 0.9157789431244192, "learning_rate": 7.100661340833307e-06, "loss": 0.3935, "step": 3826 }, { "epoch": 1.27779632721202, "grad_norm": 0.8279297830261375, "learning_rate": 7.098898333755915e-06, "loss": 0.3546, "step": 3827 }, { "epoch": 1.2781302170283806, "grad_norm": 0.8872439967264949, "learning_rate": 7.09713500985044e-06, "loss": 0.3791, "step": 3828 }, { "epoch": 1.278464106844741, "grad_norm": 0.8983359511756637, "learning_rate": 7.095371369383054e-06, "loss": 0.4036, "step": 3829 }, { "epoch": 1.2787979966611018, "grad_norm": 0.872934695911379, "learning_rate": 7.093607412619983e-06, "loss": 0.3877, "step": 3830 }, { "epoch": 1.2791318864774626, "grad_norm": 0.8784861353267939, "learning_rate": 7.091843139827491e-06, "loss": 0.384, "step": 3831 }, { "epoch": 1.279465776293823, "grad_norm": 0.8469119585412301, "learning_rate": 7.0900785512718975e-06, "loss": 0.3584, "step": 3832 }, { "epoch": 1.2797996661101836, "grad_norm": 0.8763422706440482, "learning_rate": 7.088313647219563e-06, "loss": 0.387, "step": 3833 }, { "epoch": 1.2801335559265443, "grad_norm": 0.8603816139327731, "learning_rate": 7.0865484279369026e-06, "loss": 0.3826, "step": 3834 }, { "epoch": 1.2804674457429048, "grad_norm": 0.9153771525936789, "learning_rate": 7.084782893690375e-06, "loss": 0.391, "step": 3835 }, { "epoch": 1.2808013355592656, "grad_norm": 0.8394716539248723, "learning_rate": 7.083017044746485e-06, "loss": 0.3793, "step": 3836 }, { "epoch": 1.281135225375626, "grad_norm": 0.8573946602734464, "learning_rate": 7.081250881371789e-06, "loss": 0.3807, "step": 3837 }, { "epoch": 1.2814691151919866, "grad_norm": 0.8788590656331157, "learning_rate": 7.079484403832889e-06, "loss": 0.3792, "step": 3838 }, { "epoch": 1.2818030050083473, "grad_norm": 0.8661210607639396, "learning_rate": 7.077717612396434e-06, "loss": 0.3799, "step": 3839 }, { "epoch": 1.2821368948247078, "grad_norm": 0.851234587874758, "learning_rate": 7.07595050732912e-06, "loss": 0.3743, "step": 3840 }, { "epoch": 1.2824707846410686, "grad_norm": 0.8613908428368866, "learning_rate": 7.074183088897691e-06, "loss": 0.3806, "step": 3841 }, { "epoch": 1.282804674457429, "grad_norm": 0.8882730445642679, "learning_rate": 7.0724153573689395e-06, "loss": 0.3782, "step": 3842 }, { "epoch": 1.2831385642737896, "grad_norm": 0.8549581632915851, "learning_rate": 7.070647313009702e-06, "loss": 0.3573, "step": 3843 }, { "epoch": 1.2834724540901503, "grad_norm": 0.873467664020485, "learning_rate": 7.068878956086865e-06, "loss": 0.4005, "step": 3844 }, { "epoch": 1.2838063439065108, "grad_norm": 0.8485760317521407, "learning_rate": 7.0671102868673625e-06, "loss": 0.3715, "step": 3845 }, { "epoch": 1.2841402337228716, "grad_norm": 0.8517954740812805, "learning_rate": 7.065341305618173e-06, "loss": 0.3746, "step": 3846 }, { "epoch": 1.284474123539232, "grad_norm": 0.8995724091579741, "learning_rate": 7.063572012606326e-06, "loss": 0.3843, "step": 3847 }, { "epoch": 1.2848080133555926, "grad_norm": 0.8884407847978913, "learning_rate": 7.061802408098895e-06, "loss": 0.4036, "step": 3848 }, { "epoch": 1.2851419031719533, "grad_norm": 0.8653269860785341, "learning_rate": 7.060032492362998e-06, "loss": 0.3815, "step": 3849 }, { "epoch": 1.2854757929883138, "grad_norm": 0.843295475128046, "learning_rate": 7.058262265665808e-06, "loss": 0.3626, "step": 3850 }, { "epoch": 1.2858096828046746, "grad_norm": 0.8466384439974827, "learning_rate": 7.056491728274537e-06, "loss": 0.3771, "step": 3851 }, { "epoch": 1.286143572621035, "grad_norm": 0.8316766814538445, "learning_rate": 7.054720880456448e-06, "loss": 0.377, "step": 3852 }, { "epoch": 1.2864774624373956, "grad_norm": 0.8581202654326745, "learning_rate": 7.052949722478853e-06, "loss": 0.3791, "step": 3853 }, { "epoch": 1.2868113522537563, "grad_norm": 0.8472542644665207, "learning_rate": 7.051178254609101e-06, "loss": 0.3842, "step": 3854 }, { "epoch": 1.2871452420701168, "grad_norm": 0.8902697010874452, "learning_rate": 7.049406477114602e-06, "loss": 0.3958, "step": 3855 }, { "epoch": 1.2874791318864776, "grad_norm": 0.890691393269806, "learning_rate": 7.0476343902627984e-06, "loss": 0.395, "step": 3856 }, { "epoch": 1.287813021702838, "grad_norm": 0.8589949404460621, "learning_rate": 7.045861994321193e-06, "loss": 0.3689, "step": 3857 }, { "epoch": 1.2881469115191986, "grad_norm": 0.9555628205046068, "learning_rate": 7.044089289557322e-06, "loss": 0.3898, "step": 3858 }, { "epoch": 1.2884808013355593, "grad_norm": 0.842963295439733, "learning_rate": 7.04231627623878e-06, "loss": 0.3642, "step": 3859 }, { "epoch": 1.2888146911519198, "grad_norm": 0.9027967339414857, "learning_rate": 7.040542954633199e-06, "loss": 0.3938, "step": 3860 }, { "epoch": 1.2891485809682806, "grad_norm": 0.8820902391896719, "learning_rate": 7.0387693250082635e-06, "loss": 0.3862, "step": 3861 }, { "epoch": 1.289482470784641, "grad_norm": 0.8742714264480484, "learning_rate": 7.036995387631702e-06, "loss": 0.381, "step": 3862 }, { "epoch": 1.2898163606010016, "grad_norm": 0.9204954284293078, "learning_rate": 7.035221142771289e-06, "loss": 0.3962, "step": 3863 }, { "epoch": 1.2901502504173623, "grad_norm": 0.8388941313927242, "learning_rate": 7.033446590694847e-06, "loss": 0.3712, "step": 3864 }, { "epoch": 1.2904841402337228, "grad_norm": 0.8482158727285725, "learning_rate": 7.031671731670244e-06, "loss": 0.3688, "step": 3865 }, { "epoch": 1.2908180300500836, "grad_norm": 0.8546856253641639, "learning_rate": 7.029896565965394e-06, "loss": 0.3788, "step": 3866 }, { "epoch": 1.291151919866444, "grad_norm": 0.8689401176684042, "learning_rate": 7.028121093848257e-06, "loss": 0.3852, "step": 3867 }, { "epoch": 1.2914858096828046, "grad_norm": 0.8644764453259974, "learning_rate": 7.026345315586843e-06, "loss": 0.3904, "step": 3868 }, { "epoch": 1.2918196994991653, "grad_norm": 0.8943658201951675, "learning_rate": 7.024569231449202e-06, "loss": 0.3826, "step": 3869 }, { "epoch": 1.2921535893155258, "grad_norm": 0.8911148426176091, "learning_rate": 7.022792841703435e-06, "loss": 0.383, "step": 3870 }, { "epoch": 1.2924874791318866, "grad_norm": 0.8565761707498275, "learning_rate": 7.021016146617687e-06, "loss": 0.3811, "step": 3871 }, { "epoch": 1.292821368948247, "grad_norm": 0.8902745650391761, "learning_rate": 7.019239146460151e-06, "loss": 0.3853, "step": 3872 }, { "epoch": 1.2931552587646076, "grad_norm": 0.8036185267151061, "learning_rate": 7.017461841499063e-06, "loss": 0.3589, "step": 3873 }, { "epoch": 1.2934891485809683, "grad_norm": 0.858215648450316, "learning_rate": 7.015684232002706e-06, "loss": 0.3745, "step": 3874 }, { "epoch": 1.2938230383973288, "grad_norm": 0.8626527950400539, "learning_rate": 7.013906318239411e-06, "loss": 0.3835, "step": 3875 }, { "epoch": 1.2941569282136896, "grad_norm": 0.8421017484060415, "learning_rate": 7.012128100477555e-06, "loss": 0.3724, "step": 3876 }, { "epoch": 1.29449081803005, "grad_norm": 0.9066384564432541, "learning_rate": 7.010349578985555e-06, "loss": 0.3871, "step": 3877 }, { "epoch": 1.2948247078464106, "grad_norm": 0.8593683184382133, "learning_rate": 7.008570754031883e-06, "loss": 0.3803, "step": 3878 }, { "epoch": 1.2951585976627713, "grad_norm": 0.8944185041268583, "learning_rate": 7.006791625885049e-06, "loss": 0.3847, "step": 3879 }, { "epoch": 1.2954924874791318, "grad_norm": 0.8368278506696795, "learning_rate": 7.005012194813613e-06, "loss": 0.3722, "step": 3880 }, { "epoch": 1.2958263772954925, "grad_norm": 0.8993623867543384, "learning_rate": 7.0032324610861806e-06, "loss": 0.3822, "step": 3881 }, { "epoch": 1.296160267111853, "grad_norm": 0.878208768451714, "learning_rate": 7.0014524249714e-06, "loss": 0.3807, "step": 3882 }, { "epoch": 1.2964941569282136, "grad_norm": 0.8923482790944715, "learning_rate": 6.999672086737967e-06, "loss": 0.3983, "step": 3883 }, { "epoch": 1.2968280467445743, "grad_norm": 0.859024304904816, "learning_rate": 6.997891446654627e-06, "loss": 0.3704, "step": 3884 }, { "epoch": 1.2971619365609348, "grad_norm": 0.8179653295768047, "learning_rate": 6.996110504990161e-06, "loss": 0.3542, "step": 3885 }, { "epoch": 1.2974958263772955, "grad_norm": 0.8877580103714349, "learning_rate": 6.994329262013408e-06, "loss": 0.375, "step": 3886 }, { "epoch": 1.297829716193656, "grad_norm": 0.8946443744471391, "learning_rate": 6.9925477179932424e-06, "loss": 0.3901, "step": 3887 }, { "epoch": 1.2981636060100166, "grad_norm": 0.8705613306132313, "learning_rate": 6.990765873198588e-06, "loss": 0.4031, "step": 3888 }, { "epoch": 1.2984974958263773, "grad_norm": 0.862145724628215, "learning_rate": 6.988983727898414e-06, "loss": 0.3918, "step": 3889 }, { "epoch": 1.298831385642738, "grad_norm": 0.8387997085404539, "learning_rate": 6.9872012823617356e-06, "loss": 0.3742, "step": 3890 }, { "epoch": 1.2991652754590985, "grad_norm": 0.8680999867266358, "learning_rate": 6.985418536857611e-06, "loss": 0.3858, "step": 3891 }, { "epoch": 1.299499165275459, "grad_norm": 0.8845567624992671, "learning_rate": 6.983635491655147e-06, "loss": 0.3859, "step": 3892 }, { "epoch": 1.2998330550918196, "grad_norm": 0.8347873265986612, "learning_rate": 6.981852147023491e-06, "loss": 0.3822, "step": 3893 }, { "epoch": 1.3001669449081803, "grad_norm": 0.8742619305264085, "learning_rate": 6.9800685032318415e-06, "loss": 0.388, "step": 3894 }, { "epoch": 1.300500834724541, "grad_norm": 0.8832902854551339, "learning_rate": 6.978284560549437e-06, "loss": 0.3873, "step": 3895 }, { "epoch": 1.3008347245409015, "grad_norm": 0.8626417177677663, "learning_rate": 6.9765003192455625e-06, "loss": 0.3914, "step": 3896 }, { "epoch": 1.301168614357262, "grad_norm": 0.8589007794334477, "learning_rate": 6.97471577958955e-06, "loss": 0.3881, "step": 3897 }, { "epoch": 1.3015025041736228, "grad_norm": 0.888532703479526, "learning_rate": 6.972930941850775e-06, "loss": 0.3878, "step": 3898 }, { "epoch": 1.3018363939899833, "grad_norm": 0.8828072471532727, "learning_rate": 6.971145806298659e-06, "loss": 0.3773, "step": 3899 }, { "epoch": 1.302170283806344, "grad_norm": 0.8567778793739964, "learning_rate": 6.969360373202666e-06, "loss": 0.3832, "step": 3900 }, { "epoch": 1.3025041736227045, "grad_norm": 0.8715271959614392, "learning_rate": 6.967574642832309e-06, "loss": 0.3748, "step": 3901 }, { "epoch": 1.302838063439065, "grad_norm": 0.8628331303468495, "learning_rate": 6.96578861545714e-06, "loss": 0.3891, "step": 3902 }, { "epoch": 1.3031719532554258, "grad_norm": 0.8812636653853843, "learning_rate": 6.9640022913467606e-06, "loss": 0.3847, "step": 3903 }, { "epoch": 1.3035058430717863, "grad_norm": 0.8986714987746092, "learning_rate": 6.962215670770819e-06, "loss": 0.3661, "step": 3904 }, { "epoch": 1.303839732888147, "grad_norm": 0.8938178798791662, "learning_rate": 6.9604287539990016e-06, "loss": 0.387, "step": 3905 }, { "epoch": 1.3041736227045075, "grad_norm": 0.8545673083415949, "learning_rate": 6.9586415413010435e-06, "loss": 0.3657, "step": 3906 }, { "epoch": 1.304507512520868, "grad_norm": 0.8284408582038825, "learning_rate": 6.956854032946725e-06, "loss": 0.3635, "step": 3907 }, { "epoch": 1.3048414023372288, "grad_norm": 0.8893359163707841, "learning_rate": 6.955066229205868e-06, "loss": 0.3776, "step": 3908 }, { "epoch": 1.3051752921535893, "grad_norm": 0.821477475966994, "learning_rate": 6.953278130348344e-06, "loss": 0.3501, "step": 3909 }, { "epoch": 1.30550918196995, "grad_norm": 0.8871247635716404, "learning_rate": 6.951489736644064e-06, "loss": 0.38, "step": 3910 }, { "epoch": 1.3058430717863105, "grad_norm": 0.887222664486688, "learning_rate": 6.949701048362986e-06, "loss": 0.3741, "step": 3911 }, { "epoch": 1.306176961602671, "grad_norm": 0.8683411670338697, "learning_rate": 6.94791206577511e-06, "loss": 0.3759, "step": 3912 }, { "epoch": 1.3065108514190318, "grad_norm": 0.8584668984817216, "learning_rate": 6.9461227891504846e-06, "loss": 0.3697, "step": 3913 }, { "epoch": 1.3068447412353923, "grad_norm": 0.8753439415955073, "learning_rate": 6.944333218759201e-06, "loss": 0.3854, "step": 3914 }, { "epoch": 1.307178631051753, "grad_norm": 0.8666768288958908, "learning_rate": 6.942543354871394e-06, "loss": 0.3824, "step": 3915 }, { "epoch": 1.3075125208681135, "grad_norm": 0.8557936270787855, "learning_rate": 6.940753197757242e-06, "loss": 0.3838, "step": 3916 }, { "epoch": 1.307846410684474, "grad_norm": 0.8937782042327899, "learning_rate": 6.938962747686968e-06, "loss": 0.3807, "step": 3917 }, { "epoch": 1.3081803005008348, "grad_norm": 0.8731474453843634, "learning_rate": 6.937172004930841e-06, "loss": 0.4041, "step": 3918 }, { "epoch": 1.3085141903171953, "grad_norm": 0.8805057156141401, "learning_rate": 6.935380969759175e-06, "loss": 0.3796, "step": 3919 }, { "epoch": 1.308848080133556, "grad_norm": 0.8992131320541439, "learning_rate": 6.933589642442322e-06, "loss": 0.3865, "step": 3920 }, { "epoch": 1.3091819699499165, "grad_norm": 0.8888548488166783, "learning_rate": 6.931798023250687e-06, "loss": 0.3844, "step": 3921 }, { "epoch": 1.309515859766277, "grad_norm": 0.8916449415059049, "learning_rate": 6.93000611245471e-06, "loss": 0.3751, "step": 3922 }, { "epoch": 1.3098497495826378, "grad_norm": 0.8873605552162681, "learning_rate": 6.928213910324884e-06, "loss": 0.3775, "step": 3923 }, { "epoch": 1.3101836393989983, "grad_norm": 0.8800315643702818, "learning_rate": 6.926421417131737e-06, "loss": 0.3808, "step": 3924 }, { "epoch": 1.310517529215359, "grad_norm": 0.8908038603953868, "learning_rate": 6.924628633145847e-06, "loss": 0.3759, "step": 3925 }, { "epoch": 1.3108514190317195, "grad_norm": 0.8568166715245934, "learning_rate": 6.922835558637835e-06, "loss": 0.364, "step": 3926 }, { "epoch": 1.31118530884808, "grad_norm": 0.8396609158647508, "learning_rate": 6.921042193878364e-06, "loss": 0.3754, "step": 3927 }, { "epoch": 1.3115191986644408, "grad_norm": 0.8649136616855071, "learning_rate": 6.919248539138144e-06, "loss": 0.3711, "step": 3928 }, { "epoch": 1.3118530884808013, "grad_norm": 0.8950700420850619, "learning_rate": 6.917454594687923e-06, "loss": 0.393, "step": 3929 }, { "epoch": 1.312186978297162, "grad_norm": 0.8507368153264177, "learning_rate": 6.9156603607984996e-06, "loss": 0.3692, "step": 3930 }, { "epoch": 1.3125208681135225, "grad_norm": 0.8634927541669042, "learning_rate": 6.91386583774071e-06, "loss": 0.3734, "step": 3931 }, { "epoch": 1.312854757929883, "grad_norm": 0.8448644300910301, "learning_rate": 6.912071025785441e-06, "loss": 0.3673, "step": 3932 }, { "epoch": 1.3131886477462438, "grad_norm": 0.9195201784521173, "learning_rate": 6.910275925203614e-06, "loss": 0.3837, "step": 3933 }, { "epoch": 1.3135225375626043, "grad_norm": 0.8508626958733463, "learning_rate": 6.908480536266204e-06, "loss": 0.3645, "step": 3934 }, { "epoch": 1.313856427378965, "grad_norm": 0.8901513098516732, "learning_rate": 6.90668485924422e-06, "loss": 0.3982, "step": 3935 }, { "epoch": 1.3141903171953255, "grad_norm": 0.8492570235362653, "learning_rate": 6.904888894408721e-06, "loss": 0.3698, "step": 3936 }, { "epoch": 1.314524207011686, "grad_norm": 0.8939726335598079, "learning_rate": 6.903092642030808e-06, "loss": 0.4006, "step": 3937 }, { "epoch": 1.3148580968280468, "grad_norm": 0.8542224206772626, "learning_rate": 6.901296102381623e-06, "loss": 0.3845, "step": 3938 }, { "epoch": 1.3151919866444073, "grad_norm": 0.8432325028659107, "learning_rate": 6.899499275732355e-06, "loss": 0.3838, "step": 3939 }, { "epoch": 1.315525876460768, "grad_norm": 0.8617435141900345, "learning_rate": 6.897702162354232e-06, "loss": 0.3754, "step": 3940 }, { "epoch": 1.3158597662771285, "grad_norm": 0.8560695926437251, "learning_rate": 6.895904762518529e-06, "loss": 0.3661, "step": 3941 }, { "epoch": 1.316193656093489, "grad_norm": 0.8691502133196474, "learning_rate": 6.894107076496564e-06, "loss": 0.3849, "step": 3942 }, { "epoch": 1.3165275459098498, "grad_norm": 0.882440278354119, "learning_rate": 6.892309104559696e-06, "loss": 0.3732, "step": 3943 }, { "epoch": 1.3168614357262103, "grad_norm": 0.8872061091918327, "learning_rate": 6.8905108469793294e-06, "loss": 0.3833, "step": 3944 }, { "epoch": 1.317195325542571, "grad_norm": 0.8630365787206778, "learning_rate": 6.88871230402691e-06, "loss": 0.3955, "step": 3945 }, { "epoch": 1.3175292153589315, "grad_norm": 0.8576732325797689, "learning_rate": 6.886913475973926e-06, "loss": 0.3861, "step": 3946 }, { "epoch": 1.317863105175292, "grad_norm": 0.8726767724262778, "learning_rate": 6.8851143630919125e-06, "loss": 0.3669, "step": 3947 }, { "epoch": 1.3181969949916528, "grad_norm": 0.8466555737617728, "learning_rate": 6.883314965652443e-06, "loss": 0.3756, "step": 3948 }, { "epoch": 1.3185308848080133, "grad_norm": 0.8150642456451986, "learning_rate": 6.881515283927138e-06, "loss": 0.3656, "step": 3949 }, { "epoch": 1.318864774624374, "grad_norm": 0.8716247567089951, "learning_rate": 6.879715318187656e-06, "loss": 0.3839, "step": 3950 }, { "epoch": 1.3191986644407345, "grad_norm": 0.836130556532165, "learning_rate": 6.877915068705703e-06, "loss": 0.3788, "step": 3951 }, { "epoch": 1.319532554257095, "grad_norm": 0.8609737962183535, "learning_rate": 6.876114535753029e-06, "loss": 0.3788, "step": 3952 }, { "epoch": 1.3198664440734558, "grad_norm": 0.8731807005345051, "learning_rate": 6.874313719601418e-06, "loss": 0.3757, "step": 3953 }, { "epoch": 1.3202003338898163, "grad_norm": 0.8743469063732635, "learning_rate": 6.872512620522707e-06, "loss": 0.3786, "step": 3954 }, { "epoch": 1.320534223706177, "grad_norm": 0.875888187605601, "learning_rate": 6.870711238788769e-06, "loss": 0.3719, "step": 3955 }, { "epoch": 1.3208681135225375, "grad_norm": 0.884076456735553, "learning_rate": 6.868909574671524e-06, "loss": 0.3837, "step": 3956 }, { "epoch": 1.321202003338898, "grad_norm": 0.8777487492903319, "learning_rate": 6.867107628442933e-06, "loss": 0.379, "step": 3957 }, { "epoch": 1.3215358931552588, "grad_norm": 0.8962964476262744, "learning_rate": 6.865305400374995e-06, "loss": 0.3946, "step": 3958 }, { "epoch": 1.3218697829716195, "grad_norm": 0.8817655384348437, "learning_rate": 6.86350289073976e-06, "loss": 0.398, "step": 3959 }, { "epoch": 1.32220367278798, "grad_norm": 0.859856360946415, "learning_rate": 6.861700099809317e-06, "loss": 0.3802, "step": 3960 }, { "epoch": 1.3225375626043405, "grad_norm": 0.8316814340625119, "learning_rate": 6.859897027855793e-06, "loss": 0.3681, "step": 3961 }, { "epoch": 1.322871452420701, "grad_norm": 0.8765915298637793, "learning_rate": 6.858093675151363e-06, "loss": 0.39, "step": 3962 }, { "epoch": 1.3232053422370618, "grad_norm": 0.8634779537126527, "learning_rate": 6.856290041968243e-06, "loss": 0.3806, "step": 3963 }, { "epoch": 1.3235392320534225, "grad_norm": 0.8642038974189272, "learning_rate": 6.85448612857869e-06, "loss": 0.3835, "step": 3964 }, { "epoch": 1.323873121869783, "grad_norm": 0.8889387457789055, "learning_rate": 6.852681935255007e-06, "loss": 0.3838, "step": 3965 }, { "epoch": 1.3242070116861435, "grad_norm": 0.8667406006681403, "learning_rate": 6.850877462269531e-06, "loss": 0.3897, "step": 3966 }, { "epoch": 1.3245409015025043, "grad_norm": 0.8554529465798284, "learning_rate": 6.849072709894651e-06, "loss": 0.3625, "step": 3967 }, { "epoch": 1.3248747913188648, "grad_norm": 0.8581041543284151, "learning_rate": 6.847267678402793e-06, "loss": 0.3887, "step": 3968 }, { "epoch": 1.3252086811352255, "grad_norm": 0.8448811224957059, "learning_rate": 6.845462368066426e-06, "loss": 0.3786, "step": 3969 }, { "epoch": 1.325542570951586, "grad_norm": 0.8923945778458191, "learning_rate": 6.84365677915806e-06, "loss": 0.3931, "step": 3970 }, { "epoch": 1.3258764607679465, "grad_norm": 0.8476894588380592, "learning_rate": 6.84185091195025e-06, "loss": 0.3571, "step": 3971 }, { "epoch": 1.3262103505843073, "grad_norm": 0.8876418911900106, "learning_rate": 6.8400447667155875e-06, "loss": 0.3957, "step": 3972 }, { "epoch": 1.3265442404006678, "grad_norm": 0.8805463522700633, "learning_rate": 6.838238343726714e-06, "loss": 0.3804, "step": 3973 }, { "epoch": 1.3268781302170285, "grad_norm": 0.8573698352928097, "learning_rate": 6.8364316432563046e-06, "loss": 0.3856, "step": 3974 }, { "epoch": 1.327212020033389, "grad_norm": 0.8959397111427385, "learning_rate": 6.834624665577085e-06, "loss": 0.3993, "step": 3975 }, { "epoch": 1.3275459098497495, "grad_norm": 0.8576878537034727, "learning_rate": 6.832817410961813e-06, "loss": 0.388, "step": 3976 }, { "epoch": 1.3278797996661102, "grad_norm": 0.8489135060769818, "learning_rate": 6.831009879683296e-06, "loss": 0.3669, "step": 3977 }, { "epoch": 1.3282136894824708, "grad_norm": 0.839718360857908, "learning_rate": 6.829202072014379e-06, "loss": 0.3873, "step": 3978 }, { "epoch": 1.3285475792988315, "grad_norm": 0.8396249887381968, "learning_rate": 6.827393988227951e-06, "loss": 0.3734, "step": 3979 }, { "epoch": 1.328881469115192, "grad_norm": 0.8567643222172687, "learning_rate": 6.825585628596942e-06, "loss": 0.3818, "step": 3980 }, { "epoch": 1.3292153589315525, "grad_norm": 0.8749692150615669, "learning_rate": 6.823776993394321e-06, "loss": 0.3744, "step": 3981 }, { "epoch": 1.3295492487479132, "grad_norm": 0.8279915978408737, "learning_rate": 6.821968082893102e-06, "loss": 0.3671, "step": 3982 }, { "epoch": 1.3298831385642738, "grad_norm": 0.8093532887873065, "learning_rate": 6.820158897366342e-06, "loss": 0.367, "step": 3983 }, { "epoch": 1.3302170283806345, "grad_norm": 0.8748237386845517, "learning_rate": 6.8183494370871335e-06, "loss": 0.3779, "step": 3984 }, { "epoch": 1.330550918196995, "grad_norm": 0.8615899823660492, "learning_rate": 6.8165397023286176e-06, "loss": 0.3718, "step": 3985 }, { "epoch": 1.3308848080133555, "grad_norm": 0.8482119402637547, "learning_rate": 6.81472969336397e-06, "loss": 0.3775, "step": 3986 }, { "epoch": 1.3312186978297162, "grad_norm": 0.8385383745505703, "learning_rate": 6.812919410466412e-06, "loss": 0.3528, "step": 3987 }, { "epoch": 1.3315525876460768, "grad_norm": 0.8860297433915152, "learning_rate": 6.811108853909207e-06, "loss": 0.3759, "step": 3988 }, { "epoch": 1.3318864774624375, "grad_norm": 0.8463061753966, "learning_rate": 6.8092980239656534e-06, "loss": 0.3699, "step": 3989 }, { "epoch": 1.332220367278798, "grad_norm": 0.8625016874859682, "learning_rate": 6.8074869209091014e-06, "loss": 0.3726, "step": 3990 }, { "epoch": 1.3325542570951585, "grad_norm": 0.8953454247847195, "learning_rate": 6.805675545012933e-06, "loss": 0.3942, "step": 3991 }, { "epoch": 1.3328881469115192, "grad_norm": 0.8420651630802977, "learning_rate": 6.8038638965505745e-06, "loss": 0.3589, "step": 3992 }, { "epoch": 1.3332220367278798, "grad_norm": 0.8016670611735178, "learning_rate": 6.8020519757954965e-06, "loss": 0.3525, "step": 3993 }, { "epoch": 1.3335559265442405, "grad_norm": 0.893064828740011, "learning_rate": 6.800239783021204e-06, "loss": 0.3879, "step": 3994 }, { "epoch": 1.333889816360601, "grad_norm": 0.8449693083320802, "learning_rate": 6.79842731850125e-06, "loss": 0.3729, "step": 3995 }, { "epoch": 1.3342237061769615, "grad_norm": 0.8658214066898399, "learning_rate": 6.796614582509224e-06, "loss": 0.3794, "step": 3996 }, { "epoch": 1.3345575959933222, "grad_norm": 0.8535469252940094, "learning_rate": 6.794801575318758e-06, "loss": 0.3728, "step": 3997 }, { "epoch": 1.3348914858096828, "grad_norm": 0.9093446580878108, "learning_rate": 6.792988297203527e-06, "loss": 0.4111, "step": 3998 }, { "epoch": 1.3352253756260435, "grad_norm": 0.8807619075467364, "learning_rate": 6.7911747484372416e-06, "loss": 0.3927, "step": 3999 }, { "epoch": 1.335559265442404, "grad_norm": 0.8427963197472929, "learning_rate": 6.789360929293658e-06, "loss": 0.374, "step": 4000 }, { "epoch": 1.3358931552587645, "grad_norm": 0.8254277628184616, "learning_rate": 6.787546840046573e-06, "loss": 0.3604, "step": 4001 }, { "epoch": 1.3362270450751252, "grad_norm": 0.878068275785974, "learning_rate": 6.78573248096982e-06, "loss": 0.3692, "step": 4002 }, { "epoch": 1.3365609348914858, "grad_norm": 0.8985183424771622, "learning_rate": 6.783917852337279e-06, "loss": 0.391, "step": 4003 }, { "epoch": 1.3368948247078465, "grad_norm": 0.8954805694812714, "learning_rate": 6.7821029544228644e-06, "loss": 0.3967, "step": 4004 }, { "epoch": 1.337228714524207, "grad_norm": 0.8413032249200882, "learning_rate": 6.780287787500537e-06, "loss": 0.3824, "step": 4005 }, { "epoch": 1.3375626043405675, "grad_norm": 0.8575042222419962, "learning_rate": 6.778472351844296e-06, "loss": 0.3746, "step": 4006 }, { "epoch": 1.3378964941569282, "grad_norm": 0.8572333712653251, "learning_rate": 6.776656647728178e-06, "loss": 0.3739, "step": 4007 }, { "epoch": 1.3382303839732888, "grad_norm": 0.9009798455364403, "learning_rate": 6.774840675426266e-06, "loss": 0.3885, "step": 4008 }, { "epoch": 1.3385642737896495, "grad_norm": 0.8586216893365269, "learning_rate": 6.773024435212678e-06, "loss": 0.3761, "step": 4009 }, { "epoch": 1.33889816360601, "grad_norm": 0.8259667278131994, "learning_rate": 6.771207927361578e-06, "loss": 0.3409, "step": 4010 }, { "epoch": 1.3392320534223705, "grad_norm": 0.8671574457979933, "learning_rate": 6.769391152147164e-06, "loss": 0.3738, "step": 4011 }, { "epoch": 1.3395659432387312, "grad_norm": 0.8782189068117594, "learning_rate": 6.76757410984368e-06, "loss": 0.3785, "step": 4012 }, { "epoch": 1.3398998330550917, "grad_norm": 0.8614056708659601, "learning_rate": 6.765756800725407e-06, "loss": 0.3685, "step": 4013 }, { "epoch": 1.3402337228714525, "grad_norm": 0.8958835236096396, "learning_rate": 6.763939225066668e-06, "loss": 0.3929, "step": 4014 }, { "epoch": 1.340567612687813, "grad_norm": 0.8832995271551406, "learning_rate": 6.762121383141824e-06, "loss": 0.3829, "step": 4015 }, { "epoch": 1.3409015025041735, "grad_norm": 0.8605596370508561, "learning_rate": 6.760303275225281e-06, "loss": 0.3723, "step": 4016 }, { "epoch": 1.3412353923205342, "grad_norm": 0.8627270477452664, "learning_rate": 6.758484901591478e-06, "loss": 0.3882, "step": 4017 }, { "epoch": 1.3415692821368947, "grad_norm": 0.854680103713029, "learning_rate": 6.7566662625148985e-06, "loss": 0.3819, "step": 4018 }, { "epoch": 1.3419031719532555, "grad_norm": 0.8939769221063326, "learning_rate": 6.754847358270067e-06, "loss": 0.3862, "step": 4019 }, { "epoch": 1.342237061769616, "grad_norm": 0.8869023400312002, "learning_rate": 6.753028189131545e-06, "loss": 0.3965, "step": 4020 }, { "epoch": 1.3425709515859765, "grad_norm": 0.8984581565006766, "learning_rate": 6.75120875537394e-06, "loss": 0.3843, "step": 4021 }, { "epoch": 1.3429048414023372, "grad_norm": 0.8613105203113272, "learning_rate": 6.749389057271889e-06, "loss": 0.3758, "step": 4022 }, { "epoch": 1.343238731218698, "grad_norm": 0.8873554229307132, "learning_rate": 6.7475690951000784e-06, "loss": 0.3882, "step": 4023 }, { "epoch": 1.3435726210350585, "grad_norm": 0.8535289766575623, "learning_rate": 6.74574886913323e-06, "loss": 0.3702, "step": 4024 }, { "epoch": 1.343906510851419, "grad_norm": 0.847381939223058, "learning_rate": 6.743928379646105e-06, "loss": 0.3633, "step": 4025 }, { "epoch": 1.3442404006677795, "grad_norm": 0.8756785367229691, "learning_rate": 6.74210762691351e-06, "loss": 0.3972, "step": 4026 }, { "epoch": 1.3445742904841402, "grad_norm": 0.8947015023243476, "learning_rate": 6.740286611210283e-06, "loss": 0.3794, "step": 4027 }, { "epoch": 1.344908180300501, "grad_norm": 0.8463091574819666, "learning_rate": 6.738465332811305e-06, "loss": 0.3737, "step": 4028 }, { "epoch": 1.3452420701168615, "grad_norm": 0.8523748273069683, "learning_rate": 6.736643791991501e-06, "loss": 0.3781, "step": 4029 }, { "epoch": 1.345575959933222, "grad_norm": 0.8573912586670822, "learning_rate": 6.7348219890258315e-06, "loss": 0.3692, "step": 4030 }, { "epoch": 1.3459098497495827, "grad_norm": 0.8634447316366279, "learning_rate": 6.7329999241892965e-06, "loss": 0.3768, "step": 4031 }, { "epoch": 1.3462437395659432, "grad_norm": 0.7816405247332506, "learning_rate": 6.731177597756934e-06, "loss": 0.3554, "step": 4032 }, { "epoch": 1.346577629382304, "grad_norm": 0.8349089427521723, "learning_rate": 6.7293550100038264e-06, "loss": 0.3666, "step": 4033 }, { "epoch": 1.3469115191986645, "grad_norm": 0.8714760644651907, "learning_rate": 6.727532161205092e-06, "loss": 0.3984, "step": 4034 }, { "epoch": 1.347245409015025, "grad_norm": 0.8795066591717327, "learning_rate": 6.725709051635888e-06, "loss": 0.404, "step": 4035 }, { "epoch": 1.3475792988313857, "grad_norm": 0.8740954571103982, "learning_rate": 6.723885681571414e-06, "loss": 0.3958, "step": 4036 }, { "epoch": 1.3479131886477462, "grad_norm": 0.8618713672597033, "learning_rate": 6.722062051286906e-06, "loss": 0.3792, "step": 4037 }, { "epoch": 1.348247078464107, "grad_norm": 0.8388122991554573, "learning_rate": 6.720238161057642e-06, "loss": 0.3628, "step": 4038 }, { "epoch": 1.3485809682804675, "grad_norm": 0.8699752064356225, "learning_rate": 6.718414011158937e-06, "loss": 0.3907, "step": 4039 }, { "epoch": 1.348914858096828, "grad_norm": 0.8703274657579507, "learning_rate": 6.716589601866144e-06, "loss": 0.3837, "step": 4040 }, { "epoch": 1.3492487479131887, "grad_norm": 0.8297891630243317, "learning_rate": 6.71476493345466e-06, "loss": 0.3684, "step": 4041 }, { "epoch": 1.3495826377295492, "grad_norm": 0.8742670113083418, "learning_rate": 6.712940006199915e-06, "loss": 0.3889, "step": 4042 }, { "epoch": 1.34991652754591, "grad_norm": 0.8688278328708887, "learning_rate": 6.7111148203773845e-06, "loss": 0.3819, "step": 4043 }, { "epoch": 1.3502504173622705, "grad_norm": 0.8543217588567006, "learning_rate": 6.7092893762625775e-06, "loss": 0.364, "step": 4044 }, { "epoch": 1.350584307178631, "grad_norm": 0.8669576494574048, "learning_rate": 6.707463674131045e-06, "loss": 0.3721, "step": 4045 }, { "epoch": 1.3509181969949917, "grad_norm": 0.8438918461486629, "learning_rate": 6.705637714258377e-06, "loss": 0.3715, "step": 4046 }, { "epoch": 1.3512520868113522, "grad_norm": 0.8936827163536376, "learning_rate": 6.703811496920201e-06, "loss": 0.4021, "step": 4047 }, { "epoch": 1.351585976627713, "grad_norm": 0.8446164659502546, "learning_rate": 6.7019850223921835e-06, "loss": 0.3787, "step": 4048 }, { "epoch": 1.3519198664440735, "grad_norm": 0.9201224884286889, "learning_rate": 6.7001582909500295e-06, "loss": 0.3978, "step": 4049 }, { "epoch": 1.352253756260434, "grad_norm": 0.821431035635867, "learning_rate": 6.698331302869485e-06, "loss": 0.3747, "step": 4050 }, { "epoch": 1.3525876460767947, "grad_norm": 0.8716035312459567, "learning_rate": 6.696504058426333e-06, "loss": 0.3686, "step": 4051 }, { "epoch": 1.3529215358931552, "grad_norm": 0.8321886933701182, "learning_rate": 6.6946765578963955e-06, "loss": 0.366, "step": 4052 }, { "epoch": 1.353255425709516, "grad_norm": 0.9185338225053796, "learning_rate": 6.692848801555533e-06, "loss": 0.3891, "step": 4053 }, { "epoch": 1.3535893155258765, "grad_norm": 0.8715312748282422, "learning_rate": 6.6910207896796464e-06, "loss": 0.3765, "step": 4054 }, { "epoch": 1.353923205342237, "grad_norm": 0.8452467558584225, "learning_rate": 6.689192522544669e-06, "loss": 0.374, "step": 4055 }, { "epoch": 1.3542570951585977, "grad_norm": 0.8261396000954917, "learning_rate": 6.687364000426583e-06, "loss": 0.3635, "step": 4056 }, { "epoch": 1.3545909849749582, "grad_norm": 0.8553688180938399, "learning_rate": 6.6855352236013995e-06, "loss": 0.3748, "step": 4057 }, { "epoch": 1.354924874791319, "grad_norm": 0.8394319866897155, "learning_rate": 6.683706192345173e-06, "loss": 0.3776, "step": 4058 }, { "epoch": 1.3552587646076795, "grad_norm": 0.8712414561132646, "learning_rate": 6.681876906933995e-06, "loss": 0.3797, "step": 4059 }, { "epoch": 1.35559265442404, "grad_norm": 0.8523193819194014, "learning_rate": 6.680047367643995e-06, "loss": 0.3707, "step": 4060 }, { "epoch": 1.3559265442404007, "grad_norm": 0.8848497016931828, "learning_rate": 6.678217574751341e-06, "loss": 0.4025, "step": 4061 }, { "epoch": 1.3562604340567612, "grad_norm": 0.8480347162631376, "learning_rate": 6.676387528532243e-06, "loss": 0.373, "step": 4062 }, { "epoch": 1.356594323873122, "grad_norm": 0.8549065042811639, "learning_rate": 6.674557229262942e-06, "loss": 0.3782, "step": 4063 }, { "epoch": 1.3569282136894825, "grad_norm": 0.8790367756933217, "learning_rate": 6.672726677219725e-06, "loss": 0.3993, "step": 4064 }, { "epoch": 1.357262103505843, "grad_norm": 0.8339299092511382, "learning_rate": 6.670895872678909e-06, "loss": 0.3641, "step": 4065 }, { "epoch": 1.3575959933222037, "grad_norm": 0.859918189597194, "learning_rate": 6.669064815916857e-06, "loss": 0.3832, "step": 4066 }, { "epoch": 1.3579298831385642, "grad_norm": 0.838130987801759, "learning_rate": 6.667233507209963e-06, "loss": 0.3632, "step": 4067 }, { "epoch": 1.358263772954925, "grad_norm": 0.8620085548774843, "learning_rate": 6.665401946834666e-06, "loss": 0.3709, "step": 4068 }, { "epoch": 1.3585976627712855, "grad_norm": 0.8623225260380293, "learning_rate": 6.663570135067439e-06, "loss": 0.3858, "step": 4069 }, { "epoch": 1.358931552587646, "grad_norm": 0.8861379430558695, "learning_rate": 6.6617380721847905e-06, "loss": 0.3827, "step": 4070 }, { "epoch": 1.3592654424040067, "grad_norm": 0.8569897947212134, "learning_rate": 6.659905758463271e-06, "loss": 0.3805, "step": 4071 }, { "epoch": 1.3595993322203672, "grad_norm": 0.8602231935550364, "learning_rate": 6.658073194179471e-06, "loss": 0.3618, "step": 4072 }, { "epoch": 1.359933222036728, "grad_norm": 0.8736974868697646, "learning_rate": 6.65624037961001e-06, "loss": 0.3761, "step": 4073 }, { "epoch": 1.3602671118530885, "grad_norm": 0.8916915126001473, "learning_rate": 6.654407315031557e-06, "loss": 0.3864, "step": 4074 }, { "epoch": 1.360601001669449, "grad_norm": 0.8462475047865492, "learning_rate": 6.652574000720808e-06, "loss": 0.3819, "step": 4075 }, { "epoch": 1.3609348914858097, "grad_norm": 0.8756255138874385, "learning_rate": 6.650740436954502e-06, "loss": 0.3742, "step": 4076 }, { "epoch": 1.3612687813021702, "grad_norm": 0.8640250231139371, "learning_rate": 6.648906624009417e-06, "loss": 0.3693, "step": 4077 }, { "epoch": 1.361602671118531, "grad_norm": 0.8264162837426313, "learning_rate": 6.647072562162364e-06, "loss": 0.3632, "step": 4078 }, { "epoch": 1.3619365609348915, "grad_norm": 0.8197031737818121, "learning_rate": 6.645238251690196e-06, "loss": 0.3742, "step": 4079 }, { "epoch": 1.362270450751252, "grad_norm": 0.8832030306006127, "learning_rate": 6.643403692869801e-06, "loss": 0.3788, "step": 4080 }, { "epoch": 1.3626043405676127, "grad_norm": 0.8651438627991271, "learning_rate": 6.641568885978104e-06, "loss": 0.3747, "step": 4081 }, { "epoch": 1.3629382303839732, "grad_norm": 0.8835275632223183, "learning_rate": 6.63973383129207e-06, "loss": 0.3906, "step": 4082 }, { "epoch": 1.363272120200334, "grad_norm": 0.8922862259785971, "learning_rate": 6.637898529088698e-06, "loss": 0.3954, "step": 4083 }, { "epoch": 1.3636060100166945, "grad_norm": 0.8548445391764176, "learning_rate": 6.6360629796450295e-06, "loss": 0.3826, "step": 4084 }, { "epoch": 1.363939899833055, "grad_norm": 0.8520880662977227, "learning_rate": 6.634227183238137e-06, "loss": 0.3829, "step": 4085 }, { "epoch": 1.3642737896494157, "grad_norm": 0.8569354135680819, "learning_rate": 6.6323911401451356e-06, "loss": 0.3651, "step": 4086 }, { "epoch": 1.3646076794657762, "grad_norm": 0.9067849931635064, "learning_rate": 6.630554850643176e-06, "loss": 0.3951, "step": 4087 }, { "epoch": 1.364941569282137, "grad_norm": 0.8761993914423261, "learning_rate": 6.628718315009441e-06, "loss": 0.3767, "step": 4088 }, { "epoch": 1.3652754590984975, "grad_norm": 0.882686021005594, "learning_rate": 6.6268815335211614e-06, "loss": 0.386, "step": 4089 }, { "epoch": 1.365609348914858, "grad_norm": 0.9152321409844729, "learning_rate": 6.625044506455595e-06, "loss": 0.3855, "step": 4090 }, { "epoch": 1.3659432387312187, "grad_norm": 0.8723266581397457, "learning_rate": 6.6232072340900415e-06, "loss": 0.3861, "step": 4091 }, { "epoch": 1.3662771285475794, "grad_norm": 0.9380650199211579, "learning_rate": 6.621369716701835e-06, "loss": 0.3933, "step": 4092 }, { "epoch": 1.36661101836394, "grad_norm": 0.890569414303012, "learning_rate": 6.61953195456835e-06, "loss": 0.3983, "step": 4093 }, { "epoch": 1.3669449081803005, "grad_norm": 0.8588348740057568, "learning_rate": 6.617693947966995e-06, "loss": 0.38, "step": 4094 }, { "epoch": 1.367278797996661, "grad_norm": 0.872169831256406, "learning_rate": 6.61585569717522e-06, "loss": 0.3667, "step": 4095 }, { "epoch": 1.3676126878130217, "grad_norm": 0.8931478708547308, "learning_rate": 6.614017202470503e-06, "loss": 0.3941, "step": 4096 }, { "epoch": 1.3679465776293824, "grad_norm": 0.8423625112963377, "learning_rate": 6.6121784641303675e-06, "loss": 0.3687, "step": 4097 }, { "epoch": 1.368280467445743, "grad_norm": 0.8869919713819244, "learning_rate": 6.61033948243237e-06, "loss": 0.4009, "step": 4098 }, { "epoch": 1.3686143572621035, "grad_norm": 0.8192331108143094, "learning_rate": 6.608500257654106e-06, "loss": 0.3601, "step": 4099 }, { "epoch": 1.3689482470784642, "grad_norm": 0.8769317559215289, "learning_rate": 6.606660790073202e-06, "loss": 0.3803, "step": 4100 }, { "epoch": 1.3692821368948247, "grad_norm": 0.8759618730781309, "learning_rate": 6.604821079967328e-06, "loss": 0.3675, "step": 4101 }, { "epoch": 1.3696160267111854, "grad_norm": 0.7954010821540142, "learning_rate": 6.602981127614188e-06, "loss": 0.3427, "step": 4102 }, { "epoch": 1.369949916527546, "grad_norm": 0.907719014461805, "learning_rate": 6.601140933291519e-06, "loss": 0.3875, "step": 4103 }, { "epoch": 1.3702838063439065, "grad_norm": 0.8556961568509611, "learning_rate": 6.5993004972771e-06, "loss": 0.3829, "step": 4104 }, { "epoch": 1.3706176961602672, "grad_norm": 1.484975243289392, "learning_rate": 6.5974598198487465e-06, "loss": 0.3685, "step": 4105 }, { "epoch": 1.3709515859766277, "grad_norm": 0.8372127866125063, "learning_rate": 6.595618901284304e-06, "loss": 0.3629, "step": 4106 }, { "epoch": 1.3712854757929884, "grad_norm": 0.8446557630965318, "learning_rate": 6.593777741861661e-06, "loss": 0.3831, "step": 4107 }, { "epoch": 1.371619365609349, "grad_norm": 0.8103522437713642, "learning_rate": 6.59193634185874e-06, "loss": 0.3683, "step": 4108 }, { "epoch": 1.3719532554257095, "grad_norm": 0.8871183278451862, "learning_rate": 6.590094701553499e-06, "loss": 0.3786, "step": 4109 }, { "epoch": 1.3722871452420702, "grad_norm": 0.860370533310057, "learning_rate": 6.588252821223935e-06, "loss": 0.3808, "step": 4110 }, { "epoch": 1.3726210350584307, "grad_norm": 0.8400945553040268, "learning_rate": 6.586410701148074e-06, "loss": 0.3731, "step": 4111 }, { "epoch": 1.3729549248747914, "grad_norm": 0.8787909837499593, "learning_rate": 6.584568341603989e-06, "loss": 0.3874, "step": 4112 }, { "epoch": 1.373288814691152, "grad_norm": 0.8922506702088684, "learning_rate": 6.582725742869782e-06, "loss": 0.3885, "step": 4113 }, { "epoch": 1.3736227045075124, "grad_norm": 0.8570049872320888, "learning_rate": 6.580882905223593e-06, "loss": 0.3758, "step": 4114 }, { "epoch": 1.3739565943238732, "grad_norm": 0.844859042205196, "learning_rate": 6.5790398289435965e-06, "loss": 0.3554, "step": 4115 }, { "epoch": 1.3742904841402337, "grad_norm": 0.8736107739152456, "learning_rate": 6.577196514308006e-06, "loss": 0.3688, "step": 4116 }, { "epoch": 1.3746243739565944, "grad_norm": 0.8974808469277533, "learning_rate": 6.575352961595067e-06, "loss": 0.3832, "step": 4117 }, { "epoch": 1.374958263772955, "grad_norm": 0.8870523395085737, "learning_rate": 6.573509171083065e-06, "loss": 0.3806, "step": 4118 }, { "epoch": 1.3752921535893154, "grad_norm": 0.8717996994889268, "learning_rate": 6.57166514305032e-06, "loss": 0.3703, "step": 4119 }, { "epoch": 1.3756260434056762, "grad_norm": 0.9218462778374623, "learning_rate": 6.569820877775186e-06, "loss": 0.3879, "step": 4120 }, { "epoch": 1.3759599332220367, "grad_norm": 0.8742440057334022, "learning_rate": 6.567976375536056e-06, "loss": 0.384, "step": 4121 }, { "epoch": 1.3762938230383974, "grad_norm": 0.8590627500029293, "learning_rate": 6.566131636611355e-06, "loss": 0.3825, "step": 4122 }, { "epoch": 1.376627712854758, "grad_norm": 0.8739893915688214, "learning_rate": 6.564286661279549e-06, "loss": 0.3732, "step": 4123 }, { "epoch": 1.3769616026711184, "grad_norm": 0.8963133030509811, "learning_rate": 6.5624414498191325e-06, "loss": 0.3759, "step": 4124 }, { "epoch": 1.3772954924874792, "grad_norm": 0.8361582492959769, "learning_rate": 6.560596002508642e-06, "loss": 0.3716, "step": 4125 }, { "epoch": 1.3776293823038397, "grad_norm": 0.8648968908850726, "learning_rate": 6.5587503196266465e-06, "loss": 0.3745, "step": 4126 }, { "epoch": 1.3779632721202004, "grad_norm": 0.8555592139277598, "learning_rate": 6.5569044014517515e-06, "loss": 0.3824, "step": 4127 }, { "epoch": 1.378297161936561, "grad_norm": 0.8435115121920387, "learning_rate": 6.555058248262598e-06, "loss": 0.3854, "step": 4128 }, { "epoch": 1.3786310517529214, "grad_norm": 0.8597304252432503, "learning_rate": 6.553211860337862e-06, "loss": 0.3575, "step": 4129 }, { "epoch": 1.3789649415692822, "grad_norm": 0.8487732889292452, "learning_rate": 6.551365237956255e-06, "loss": 0.3591, "step": 4130 }, { "epoch": 1.3792988313856427, "grad_norm": 0.9111576977303164, "learning_rate": 6.549518381396526e-06, "loss": 0.3968, "step": 4131 }, { "epoch": 1.3796327212020034, "grad_norm": 0.8847355282680602, "learning_rate": 6.547671290937454e-06, "loss": 0.3733, "step": 4132 }, { "epoch": 1.379966611018364, "grad_norm": 0.8062238058902417, "learning_rate": 6.5458239668578605e-06, "loss": 0.349, "step": 4133 }, { "epoch": 1.3803005008347244, "grad_norm": 0.8330771874034149, "learning_rate": 6.543976409436595e-06, "loss": 0.3611, "step": 4134 }, { "epoch": 1.3806343906510852, "grad_norm": 0.8345041724097703, "learning_rate": 6.542128618952549e-06, "loss": 0.3708, "step": 4135 }, { "epoch": 1.3809682804674457, "grad_norm": 0.8743493409683398, "learning_rate": 6.540280595684643e-06, "loss": 0.3712, "step": 4136 }, { "epoch": 1.3813021702838064, "grad_norm": 0.8816812472590074, "learning_rate": 6.5384323399118365e-06, "loss": 0.3854, "step": 4137 }, { "epoch": 1.381636060100167, "grad_norm": 0.8583275845340858, "learning_rate": 6.5365838519131254e-06, "loss": 0.4029, "step": 4138 }, { "epoch": 1.3819699499165274, "grad_norm": 0.8428785461631245, "learning_rate": 6.534735131967536e-06, "loss": 0.3572, "step": 4139 }, { "epoch": 1.3823038397328882, "grad_norm": 0.8791419378513735, "learning_rate": 6.532886180354131e-06, "loss": 0.3681, "step": 4140 }, { "epoch": 1.3826377295492487, "grad_norm": 0.8646579477737752, "learning_rate": 6.531036997352013e-06, "loss": 0.3772, "step": 4141 }, { "epoch": 1.3829716193656094, "grad_norm": 0.8863000140022503, "learning_rate": 6.529187583240311e-06, "loss": 0.3775, "step": 4142 }, { "epoch": 1.38330550918197, "grad_norm": 0.8424214632062578, "learning_rate": 6.527337938298198e-06, "loss": 0.3582, "step": 4143 }, { "epoch": 1.3836393989983304, "grad_norm": 0.8514685372011083, "learning_rate": 6.525488062804874e-06, "loss": 0.3668, "step": 4144 }, { "epoch": 1.3839732888146912, "grad_norm": 0.8670388135707529, "learning_rate": 6.523637957039579e-06, "loss": 0.37, "step": 4145 }, { "epoch": 1.3843071786310517, "grad_norm": 0.8455925732783363, "learning_rate": 6.521787621281586e-06, "loss": 0.3804, "step": 4146 }, { "epoch": 1.3846410684474124, "grad_norm": 0.8755608711686845, "learning_rate": 6.5199370558102e-06, "loss": 0.3927, "step": 4147 }, { "epoch": 1.384974958263773, "grad_norm": 0.8627453720139057, "learning_rate": 6.518086260904767e-06, "loss": 0.3787, "step": 4148 }, { "epoch": 1.3853088480801334, "grad_norm": 0.8213235663137751, "learning_rate": 6.516235236844661e-06, "loss": 0.3689, "step": 4149 }, { "epoch": 1.3856427378964942, "grad_norm": 0.877840569010103, "learning_rate": 6.514383983909294e-06, "loss": 0.3792, "step": 4150 }, { "epoch": 1.3859766277128547, "grad_norm": 0.860547593972009, "learning_rate": 6.512532502378115e-06, "loss": 0.3748, "step": 4151 }, { "epoch": 1.3863105175292154, "grad_norm": 0.8772368925189025, "learning_rate": 6.5106807925306e-06, "loss": 0.3812, "step": 4152 }, { "epoch": 1.386644407345576, "grad_norm": 0.8667865600278231, "learning_rate": 6.508828854646268e-06, "loss": 0.3928, "step": 4153 }, { "epoch": 1.3869782971619364, "grad_norm": 0.8893634071035489, "learning_rate": 6.506976689004667e-06, "loss": 0.3818, "step": 4154 }, { "epoch": 1.3873121869782972, "grad_norm": 0.8630927641487136, "learning_rate": 6.50512429588538e-06, "loss": 0.3876, "step": 4155 }, { "epoch": 1.387646076794658, "grad_norm": 0.8527441293738088, "learning_rate": 6.503271675568026e-06, "loss": 0.3677, "step": 4156 }, { "epoch": 1.3879799666110184, "grad_norm": 0.8578619595581377, "learning_rate": 6.501418828332257e-06, "loss": 0.3771, "step": 4157 }, { "epoch": 1.388313856427379, "grad_norm": 0.8474410092670147, "learning_rate": 6.499565754457762e-06, "loss": 0.3782, "step": 4158 }, { "epoch": 1.3886477462437394, "grad_norm": 0.8535007498295405, "learning_rate": 6.497712454224259e-06, "loss": 0.3701, "step": 4159 }, { "epoch": 1.3889816360601002, "grad_norm": 0.8657489048202013, "learning_rate": 6.495858927911504e-06, "loss": 0.3958, "step": 4160 }, { "epoch": 1.389315525876461, "grad_norm": 0.8876709801780296, "learning_rate": 6.494005175799287e-06, "loss": 0.3744, "step": 4161 }, { "epoch": 1.3896494156928214, "grad_norm": 0.8296835313117399, "learning_rate": 6.49215119816743e-06, "loss": 0.3763, "step": 4162 }, { "epoch": 1.389983305509182, "grad_norm": 0.8170505618152457, "learning_rate": 6.490296995295792e-06, "loss": 0.3709, "step": 4163 }, { "epoch": 1.3903171953255427, "grad_norm": 0.8336691454051012, "learning_rate": 6.488442567464263e-06, "loss": 0.3619, "step": 4164 }, { "epoch": 1.3906510851419032, "grad_norm": 0.8845949479105644, "learning_rate": 6.48658791495277e-06, "loss": 0.3812, "step": 4165 }, { "epoch": 1.390984974958264, "grad_norm": 0.8464054658096828, "learning_rate": 6.4847330380412705e-06, "loss": 0.3759, "step": 4166 }, { "epoch": 1.3913188647746244, "grad_norm": 0.8649466391441885, "learning_rate": 6.482877937009758e-06, "loss": 0.371, "step": 4167 }, { "epoch": 1.391652754590985, "grad_norm": 0.8959910233456333, "learning_rate": 6.481022612138259e-06, "loss": 0.3845, "step": 4168 }, { "epoch": 1.3919866444073457, "grad_norm": 0.8677311187742855, "learning_rate": 6.479167063706837e-06, "loss": 0.3723, "step": 4169 }, { "epoch": 1.3923205342237062, "grad_norm": 0.8784374349317485, "learning_rate": 6.477311291995582e-06, "loss": 0.3702, "step": 4170 }, { "epoch": 1.392654424040067, "grad_norm": 0.8933163424918511, "learning_rate": 6.4754552972846285e-06, "loss": 0.3828, "step": 4171 }, { "epoch": 1.3929883138564274, "grad_norm": 0.9008843516839573, "learning_rate": 6.473599079854132e-06, "loss": 0.4056, "step": 4172 }, { "epoch": 1.393322203672788, "grad_norm": 0.8334805656329123, "learning_rate": 6.47174263998429e-06, "loss": 0.3757, "step": 4173 }, { "epoch": 1.3936560934891487, "grad_norm": 0.8756957533674948, "learning_rate": 6.469885977955334e-06, "loss": 0.3777, "step": 4174 }, { "epoch": 1.3939899833055092, "grad_norm": 0.8437314194469459, "learning_rate": 6.468029094047522e-06, "loss": 0.3606, "step": 4175 }, { "epoch": 1.39432387312187, "grad_norm": 0.8461748949715033, "learning_rate": 6.466171988541155e-06, "loss": 0.3867, "step": 4176 }, { "epoch": 1.3946577629382304, "grad_norm": 0.8214793107233465, "learning_rate": 6.464314661716558e-06, "loss": 0.3845, "step": 4177 }, { "epoch": 1.394991652754591, "grad_norm": 0.8735311892083598, "learning_rate": 6.462457113854098e-06, "loss": 0.3912, "step": 4178 }, { "epoch": 1.3953255425709516, "grad_norm": 0.833926537070665, "learning_rate": 6.460599345234168e-06, "loss": 0.3733, "step": 4179 }, { "epoch": 1.3956594323873122, "grad_norm": 0.8389896301481493, "learning_rate": 6.458741356137202e-06, "loss": 0.3671, "step": 4180 }, { "epoch": 1.395993322203673, "grad_norm": 0.8802117028810652, "learning_rate": 6.456883146843658e-06, "loss": 0.3694, "step": 4181 }, { "epoch": 1.3963272120200334, "grad_norm": 0.8880043380771895, "learning_rate": 6.455024717634033e-06, "loss": 0.3851, "step": 4182 }, { "epoch": 1.396661101836394, "grad_norm": 0.8697273702224037, "learning_rate": 6.4531660687888575e-06, "loss": 0.384, "step": 4183 }, { "epoch": 1.3969949916527546, "grad_norm": 0.8630644818004227, "learning_rate": 6.451307200588697e-06, "loss": 0.3872, "step": 4184 }, { "epoch": 1.3973288814691152, "grad_norm": 0.8635249081396171, "learning_rate": 6.449448113314141e-06, "loss": 0.364, "step": 4185 }, { "epoch": 1.397662771285476, "grad_norm": 0.8219860604385852, "learning_rate": 6.447588807245821e-06, "loss": 0.3575, "step": 4186 }, { "epoch": 1.3979966611018364, "grad_norm": 0.8216105817556442, "learning_rate": 6.445729282664402e-06, "loss": 0.3781, "step": 4187 }, { "epoch": 1.398330550918197, "grad_norm": 0.9098755003643256, "learning_rate": 6.4438695398505735e-06, "loss": 0.3871, "step": 4188 }, { "epoch": 1.3986644407345576, "grad_norm": 0.8557872784816903, "learning_rate": 6.4420095790850654e-06, "loss": 0.3723, "step": 4189 }, { "epoch": 1.3989983305509182, "grad_norm": 0.8539781367171108, "learning_rate": 6.440149400648638e-06, "loss": 0.3806, "step": 4190 }, { "epoch": 1.399332220367279, "grad_norm": 0.8426075305055706, "learning_rate": 6.438289004822085e-06, "loss": 0.3605, "step": 4191 }, { "epoch": 1.3996661101836394, "grad_norm": 0.8432343548962923, "learning_rate": 6.436428391886233e-06, "loss": 0.3776, "step": 4192 }, { "epoch": 1.4, "grad_norm": 0.8421431526621722, "learning_rate": 6.434567562121938e-06, "loss": 0.3735, "step": 4193 }, { "epoch": 1.4003338898163606, "grad_norm": 0.8653250279857599, "learning_rate": 6.4327065158100975e-06, "loss": 0.3903, "step": 4194 }, { "epoch": 1.4006677796327212, "grad_norm": 0.8242022417889636, "learning_rate": 6.4308452532316305e-06, "loss": 0.3747, "step": 4195 }, { "epoch": 1.4010016694490819, "grad_norm": 0.84304904146776, "learning_rate": 6.428983774667495e-06, "loss": 0.3601, "step": 4196 }, { "epoch": 1.4013355592654424, "grad_norm": 0.8488624398793724, "learning_rate": 6.4271220803986835e-06, "loss": 0.372, "step": 4197 }, { "epoch": 1.401669449081803, "grad_norm": 0.8581479518748699, "learning_rate": 6.425260170706216e-06, "loss": 0.3779, "step": 4198 }, { "epoch": 1.4020033388981636, "grad_norm": 0.8940940129102263, "learning_rate": 6.423398045871146e-06, "loss": 0.4054, "step": 4199 }, { "epoch": 1.4023372287145242, "grad_norm": 0.8226218684271506, "learning_rate": 6.421535706174564e-06, "loss": 0.3628, "step": 4200 }, { "epoch": 1.4026711185308849, "grad_norm": 0.8762626935184308, "learning_rate": 6.419673151897587e-06, "loss": 0.3871, "step": 4201 }, { "epoch": 1.4030050083472454, "grad_norm": 0.8471273663004117, "learning_rate": 6.4178103833213675e-06, "loss": 0.3655, "step": 4202 }, { "epoch": 1.403338898163606, "grad_norm": 0.8742513528123518, "learning_rate": 6.415947400727092e-06, "loss": 0.4037, "step": 4203 }, { "epoch": 1.4036727879799666, "grad_norm": 0.8287268614888376, "learning_rate": 6.4140842043959735e-06, "loss": 0.3625, "step": 4204 }, { "epoch": 1.4040066777963272, "grad_norm": 0.8379520126948361, "learning_rate": 6.412220794609264e-06, "loss": 0.3809, "step": 4205 }, { "epoch": 1.4043405676126879, "grad_norm": 0.8412254476926662, "learning_rate": 6.410357171648241e-06, "loss": 0.3755, "step": 4206 }, { "epoch": 1.4046744574290484, "grad_norm": 0.848835971066931, "learning_rate": 6.408493335794224e-06, "loss": 0.364, "step": 4207 }, { "epoch": 1.405008347245409, "grad_norm": 0.8804578053430427, "learning_rate": 6.406629287328552e-06, "loss": 0.3756, "step": 4208 }, { "epoch": 1.4053422370617696, "grad_norm": 0.874258531364819, "learning_rate": 6.404765026532607e-06, "loss": 0.3802, "step": 4209 }, { "epoch": 1.4056761268781301, "grad_norm": 0.8104548048621387, "learning_rate": 6.402900553687797e-06, "loss": 0.3447, "step": 4210 }, { "epoch": 1.4060100166944909, "grad_norm": 0.8596374628645098, "learning_rate": 6.401035869075563e-06, "loss": 0.3741, "step": 4211 }, { "epoch": 1.4063439065108514, "grad_norm": 0.8530393351823236, "learning_rate": 6.3991709729773776e-06, "loss": 0.3862, "step": 4212 }, { "epoch": 1.406677796327212, "grad_norm": 0.8657137773219723, "learning_rate": 6.39730586567475e-06, "loss": 0.3853, "step": 4213 }, { "epoch": 1.4070116861435726, "grad_norm": 0.8610271371114558, "learning_rate": 6.395440547449214e-06, "loss": 0.3838, "step": 4214 }, { "epoch": 1.4073455759599331, "grad_norm": 0.8845418231862705, "learning_rate": 6.393575018582342e-06, "loss": 0.3806, "step": 4215 }, { "epoch": 1.4076794657762939, "grad_norm": 0.877604750073328, "learning_rate": 6.391709279355731e-06, "loss": 0.3833, "step": 4216 }, { "epoch": 1.4080133555926544, "grad_norm": 0.8426391729428405, "learning_rate": 6.38984333005102e-06, "loss": 0.3556, "step": 4217 }, { "epoch": 1.408347245409015, "grad_norm": 0.8680211501524379, "learning_rate": 6.387977170949866e-06, "loss": 0.3768, "step": 4218 }, { "epoch": 1.4086811352253756, "grad_norm": 0.8373386322658903, "learning_rate": 6.3861108023339704e-06, "loss": 0.3619, "step": 4219 }, { "epoch": 1.4090150250417361, "grad_norm": 0.8467192967636391, "learning_rate": 6.384244224485059e-06, "loss": 0.3659, "step": 4220 }, { "epoch": 1.4093489148580969, "grad_norm": 0.871539872307342, "learning_rate": 6.382377437684892e-06, "loss": 0.391, "step": 4221 }, { "epoch": 1.4096828046744574, "grad_norm": 0.8602778895758482, "learning_rate": 6.3805104422152596e-06, "loss": 0.3935, "step": 4222 }, { "epoch": 1.410016694490818, "grad_norm": 0.8642730331617753, "learning_rate": 6.378643238357985e-06, "loss": 0.3837, "step": 4223 }, { "epoch": 1.4103505843071786, "grad_norm": 0.9067069449834073, "learning_rate": 6.376775826394919e-06, "loss": 0.3811, "step": 4224 }, { "epoch": 1.4106844741235394, "grad_norm": 0.8787361642815061, "learning_rate": 6.374908206607953e-06, "loss": 0.3874, "step": 4225 }, { "epoch": 1.4110183639398999, "grad_norm": 0.8560058582609157, "learning_rate": 6.373040379278997e-06, "loss": 0.3673, "step": 4226 }, { "epoch": 1.4113522537562604, "grad_norm": 0.845648073727628, "learning_rate": 6.371172344690005e-06, "loss": 0.378, "step": 4227 }, { "epoch": 1.411686143572621, "grad_norm": 0.8794127933207583, "learning_rate": 6.3693041031229505e-06, "loss": 0.3995, "step": 4228 }, { "epoch": 1.4120200333889816, "grad_norm": 0.8324450915190106, "learning_rate": 6.367435654859848e-06, "loss": 0.3663, "step": 4229 }, { "epoch": 1.4123539232053424, "grad_norm": 0.8502446104148926, "learning_rate": 6.36556700018274e-06, "loss": 0.3699, "step": 4230 }, { "epoch": 1.4126878130217029, "grad_norm": 0.8175882310188961, "learning_rate": 6.3636981393736956e-06, "loss": 0.3703, "step": 4231 }, { "epoch": 1.4130217028380634, "grad_norm": 0.856011174928994, "learning_rate": 6.36182907271482e-06, "loss": 0.365, "step": 4232 }, { "epoch": 1.4133555926544241, "grad_norm": 0.8560664024482781, "learning_rate": 6.35995980048825e-06, "loss": 0.3736, "step": 4233 }, { "epoch": 1.4136894824707846, "grad_norm": 0.8359680127485074, "learning_rate": 6.35809032297615e-06, "loss": 0.3738, "step": 4234 }, { "epoch": 1.4140233722871454, "grad_norm": 0.8805209873806314, "learning_rate": 6.356220640460719e-06, "loss": 0.3813, "step": 4235 }, { "epoch": 1.4143572621035059, "grad_norm": 0.9045217522720914, "learning_rate": 6.354350753224182e-06, "loss": 0.3931, "step": 4236 }, { "epoch": 1.4146911519198664, "grad_norm": 0.8279777755832443, "learning_rate": 6.3524806615488e-06, "loss": 0.3702, "step": 4237 }, { "epoch": 1.4150250417362271, "grad_norm": 0.8441210489496125, "learning_rate": 6.3506103657168635e-06, "loss": 0.3774, "step": 4238 }, { "epoch": 1.4153589315525876, "grad_norm": 0.8603132640655822, "learning_rate": 6.34873986601069e-06, "loss": 0.3927, "step": 4239 }, { "epoch": 1.4156928213689484, "grad_norm": 0.8653154193667691, "learning_rate": 6.346869162712636e-06, "loss": 0.3883, "step": 4240 }, { "epoch": 1.4160267111853089, "grad_norm": 0.8959623652676451, "learning_rate": 6.344998256105077e-06, "loss": 0.3857, "step": 4241 }, { "epoch": 1.4163606010016694, "grad_norm": 0.8440215640107999, "learning_rate": 6.3431271464704306e-06, "loss": 0.372, "step": 4242 }, { "epoch": 1.4166944908180301, "grad_norm": 0.8370770128442371, "learning_rate": 6.341255834091139e-06, "loss": 0.3791, "step": 4243 }, { "epoch": 1.4170283806343906, "grad_norm": 0.8330323760779768, "learning_rate": 6.339384319249676e-06, "loss": 0.3688, "step": 4244 }, { "epoch": 1.4173622704507514, "grad_norm": 0.8772338289969384, "learning_rate": 6.337512602228547e-06, "loss": 0.3963, "step": 4245 }, { "epoch": 1.4176961602671119, "grad_norm": 0.8516671935375753, "learning_rate": 6.335640683310287e-06, "loss": 0.3714, "step": 4246 }, { "epoch": 1.4180300500834724, "grad_norm": 0.8757769800932088, "learning_rate": 6.333768562777458e-06, "loss": 0.3894, "step": 4247 }, { "epoch": 1.4183639398998331, "grad_norm": 0.8583268726518558, "learning_rate": 6.331896240912663e-06, "loss": 0.3754, "step": 4248 }, { "epoch": 1.4186978297161936, "grad_norm": 0.8314676797205736, "learning_rate": 6.330023717998521e-06, "loss": 0.3764, "step": 4249 }, { "epoch": 1.4190317195325544, "grad_norm": 0.8617523050341904, "learning_rate": 6.3281509943176955e-06, "loss": 0.3701, "step": 4250 }, { "epoch": 1.4193656093489149, "grad_norm": 0.8501860521651396, "learning_rate": 6.326278070152869e-06, "loss": 0.3518, "step": 4251 }, { "epoch": 1.4196994991652754, "grad_norm": 0.8553547473844789, "learning_rate": 6.324404945786761e-06, "loss": 0.3829, "step": 4252 }, { "epoch": 1.4200333889816361, "grad_norm": 0.9101017667689983, "learning_rate": 6.322531621502118e-06, "loss": 0.3956, "step": 4253 }, { "epoch": 1.4203672787979966, "grad_norm": 0.8498913341785956, "learning_rate": 6.320658097581718e-06, "loss": 0.381, "step": 4254 }, { "epoch": 1.4207011686143574, "grad_norm": 0.8948368937026883, "learning_rate": 6.318784374308369e-06, "loss": 0.3933, "step": 4255 }, { "epoch": 1.4210350584307179, "grad_norm": 0.8639427971847738, "learning_rate": 6.31691045196491e-06, "loss": 0.3616, "step": 4256 }, { "epoch": 1.4213689482470784, "grad_norm": 0.8479883112568131, "learning_rate": 6.3150363308342054e-06, "loss": 0.3941, "step": 4257 }, { "epoch": 1.4217028380634391, "grad_norm": 0.899426019644512, "learning_rate": 6.313162011199159e-06, "loss": 0.3885, "step": 4258 }, { "epoch": 1.4220367278797996, "grad_norm": 0.8260373294952674, "learning_rate": 6.311287493342693e-06, "loss": 0.385, "step": 4259 }, { "epoch": 1.4223706176961604, "grad_norm": 0.849771547833609, "learning_rate": 6.309412777547769e-06, "loss": 0.3794, "step": 4260 }, { "epoch": 1.4227045075125209, "grad_norm": 0.8583341521076244, "learning_rate": 6.3075378640973735e-06, "loss": 0.3809, "step": 4261 }, { "epoch": 1.4230383973288814, "grad_norm": 0.8144581095178602, "learning_rate": 6.305662753274525e-06, "loss": 0.3545, "step": 4262 }, { "epoch": 1.423372287145242, "grad_norm": 0.8340407916862653, "learning_rate": 6.303787445362271e-06, "loss": 0.3849, "step": 4263 }, { "epoch": 1.4237061769616026, "grad_norm": 0.8420423490245025, "learning_rate": 6.301911940643686e-06, "loss": 0.3792, "step": 4264 }, { "epoch": 1.4240400667779634, "grad_norm": 0.8419296159277585, "learning_rate": 6.3000362394018795e-06, "loss": 0.373, "step": 4265 }, { "epoch": 1.4243739565943239, "grad_norm": 0.86881277838224, "learning_rate": 6.298160341919988e-06, "loss": 0.3716, "step": 4266 }, { "epoch": 1.4247078464106844, "grad_norm": 0.8620008487155382, "learning_rate": 6.296284248481177e-06, "loss": 0.367, "step": 4267 }, { "epoch": 1.425041736227045, "grad_norm": 0.8455845752195092, "learning_rate": 6.294407959368642e-06, "loss": 0.3704, "step": 4268 }, { "epoch": 1.4253756260434056, "grad_norm": 0.8824313330253365, "learning_rate": 6.292531474865608e-06, "loss": 0.3784, "step": 4269 }, { "epoch": 1.4257095158597664, "grad_norm": 0.9102371450060824, "learning_rate": 6.290654795255331e-06, "loss": 0.3839, "step": 4270 }, { "epoch": 1.4260434056761269, "grad_norm": 0.8656480824240743, "learning_rate": 6.288777920821095e-06, "loss": 0.3843, "step": 4271 }, { "epoch": 1.4263772954924874, "grad_norm": 0.8785201462277689, "learning_rate": 6.286900851846212e-06, "loss": 0.378, "step": 4272 }, { "epoch": 1.426711185308848, "grad_norm": 0.8620375246074452, "learning_rate": 6.285023588614027e-06, "loss": 0.355, "step": 4273 }, { "epoch": 1.4270450751252086, "grad_norm": 0.8894138533629757, "learning_rate": 6.283146131407911e-06, "loss": 0.375, "step": 4274 }, { "epoch": 1.4273789649415694, "grad_norm": 0.8753921474734623, "learning_rate": 6.281268480511268e-06, "loss": 0.3872, "step": 4275 }, { "epoch": 1.4277128547579299, "grad_norm": 0.8125006998639943, "learning_rate": 6.2793906362075255e-06, "loss": 0.3735, "step": 4276 }, { "epoch": 1.4280467445742904, "grad_norm": 0.8622629776461171, "learning_rate": 6.277512598780146e-06, "loss": 0.3987, "step": 4277 }, { "epoch": 1.428380634390651, "grad_norm": 0.8478219880798277, "learning_rate": 6.275634368512617e-06, "loss": 0.3713, "step": 4278 }, { "epoch": 1.4287145242070116, "grad_norm": 0.8674409658083138, "learning_rate": 6.273755945688458e-06, "loss": 0.3799, "step": 4279 }, { "epoch": 1.4290484140233723, "grad_norm": 0.8526446330125443, "learning_rate": 6.271877330591214e-06, "loss": 0.3735, "step": 4280 }, { "epoch": 1.4293823038397329, "grad_norm": 0.8327086768624173, "learning_rate": 6.2699985235044665e-06, "loss": 0.355, "step": 4281 }, { "epoch": 1.4297161936560934, "grad_norm": 0.8454568400458572, "learning_rate": 6.268119524711815e-06, "loss": 0.3714, "step": 4282 }, { "epoch": 1.430050083472454, "grad_norm": 0.8320786712412025, "learning_rate": 6.266240334496898e-06, "loss": 0.3458, "step": 4283 }, { "epoch": 1.4303839732888146, "grad_norm": 0.8617445347640856, "learning_rate": 6.264360953143378e-06, "loss": 0.3652, "step": 4284 }, { "epoch": 1.4307178631051753, "grad_norm": 0.8897247559923178, "learning_rate": 6.262481380934945e-06, "loss": 0.3834, "step": 4285 }, { "epoch": 1.4310517529215359, "grad_norm": 0.8272201917084367, "learning_rate": 6.260601618155322e-06, "loss": 0.3612, "step": 4286 }, { "epoch": 1.4313856427378964, "grad_norm": 0.7927307114191059, "learning_rate": 6.258721665088257e-06, "loss": 0.3676, "step": 4287 }, { "epoch": 1.431719532554257, "grad_norm": 0.9315026938558869, "learning_rate": 6.256841522017529e-06, "loss": 0.3944, "step": 4288 }, { "epoch": 1.4320534223706178, "grad_norm": 0.8297953625206356, "learning_rate": 6.254961189226947e-06, "loss": 0.3827, "step": 4289 }, { "epoch": 1.4323873121869783, "grad_norm": 0.8459008750990797, "learning_rate": 6.253080667000342e-06, "loss": 0.3716, "step": 4290 }, { "epoch": 1.4327212020033389, "grad_norm": 0.8232084729638264, "learning_rate": 6.2511999556215844e-06, "loss": 0.3756, "step": 4291 }, { "epoch": 1.4330550918196994, "grad_norm": 0.7895172793915807, "learning_rate": 6.249319055374561e-06, "loss": 0.3493, "step": 4292 }, { "epoch": 1.43338898163606, "grad_norm": 0.8365143117317172, "learning_rate": 6.247437966543196e-06, "loss": 0.3821, "step": 4293 }, { "epoch": 1.4337228714524208, "grad_norm": 0.8140907992184409, "learning_rate": 6.245556689411442e-06, "loss": 0.3532, "step": 4294 }, { "epoch": 1.4340567612687813, "grad_norm": 0.8383644110951787, "learning_rate": 6.243675224263271e-06, "loss": 0.3585, "step": 4295 }, { "epoch": 1.4343906510851419, "grad_norm": 0.8401483447401382, "learning_rate": 6.241793571382695e-06, "loss": 0.3704, "step": 4296 }, { "epoch": 1.4347245409015026, "grad_norm": 0.8484960259486307, "learning_rate": 6.239911731053746e-06, "loss": 0.3722, "step": 4297 }, { "epoch": 1.435058430717863, "grad_norm": 0.8366208355509132, "learning_rate": 6.238029703560489e-06, "loss": 0.3764, "step": 4298 }, { "epoch": 1.4353923205342238, "grad_norm": 0.8584639538989484, "learning_rate": 6.236147489187013e-06, "loss": 0.3764, "step": 4299 }, { "epoch": 1.4357262103505843, "grad_norm": 0.8614224484898336, "learning_rate": 6.234265088217442e-06, "loss": 0.3867, "step": 4300 }, { "epoch": 1.4360601001669449, "grad_norm": 0.8137489613211276, "learning_rate": 6.232382500935918e-06, "loss": 0.379, "step": 4301 }, { "epoch": 1.4363939899833056, "grad_norm": 0.8637903110745215, "learning_rate": 6.2304997276266225e-06, "loss": 0.3666, "step": 4302 }, { "epoch": 1.436727879799666, "grad_norm": 0.8821626936071583, "learning_rate": 6.228616768573755e-06, "loss": 0.4035, "step": 4303 }, { "epoch": 1.4370617696160268, "grad_norm": 0.8561052038748423, "learning_rate": 6.226733624061553e-06, "loss": 0.3811, "step": 4304 }, { "epoch": 1.4373956594323873, "grad_norm": 0.8948242502601671, "learning_rate": 6.224850294374271e-06, "loss": 0.3911, "step": 4305 }, { "epoch": 1.4377295492487479, "grad_norm": 0.8507782478382961, "learning_rate": 6.2229667797962e-06, "loss": 0.373, "step": 4306 }, { "epoch": 1.4380634390651086, "grad_norm": 0.8827861349644157, "learning_rate": 6.221083080611656e-06, "loss": 0.379, "step": 4307 }, { "epoch": 1.438397328881469, "grad_norm": 0.8651206790927275, "learning_rate": 6.219199197104982e-06, "loss": 0.3714, "step": 4308 }, { "epoch": 1.4387312186978298, "grad_norm": 0.8154891254269715, "learning_rate": 6.21731512956055e-06, "loss": 0.3543, "step": 4309 }, { "epoch": 1.4390651085141903, "grad_norm": 0.8302236233049456, "learning_rate": 6.21543087826276e-06, "loss": 0.3611, "step": 4310 }, { "epoch": 1.4393989983305508, "grad_norm": 0.8516075493962764, "learning_rate": 6.213546443496039e-06, "loss": 0.3749, "step": 4311 }, { "epoch": 1.4397328881469116, "grad_norm": 0.8843007084518466, "learning_rate": 6.211661825544841e-06, "loss": 0.3907, "step": 4312 }, { "epoch": 1.440066777963272, "grad_norm": 0.8710868524502136, "learning_rate": 6.209777024693647e-06, "loss": 0.3916, "step": 4313 }, { "epoch": 1.4404006677796328, "grad_norm": 0.8216151686316355, "learning_rate": 6.207892041226972e-06, "loss": 0.3584, "step": 4314 }, { "epoch": 1.4407345575959933, "grad_norm": 0.865466206318285, "learning_rate": 6.206006875429349e-06, "loss": 0.3764, "step": 4315 }, { "epoch": 1.4410684474123538, "grad_norm": 0.8173688854628182, "learning_rate": 6.204121527585346e-06, "loss": 0.3561, "step": 4316 }, { "epoch": 1.4414023372287146, "grad_norm": 0.9091708389338625, "learning_rate": 6.202235997979555e-06, "loss": 0.3929, "step": 4317 }, { "epoch": 1.441736227045075, "grad_norm": 0.839930673430571, "learning_rate": 6.200350286896596e-06, "loss": 0.3759, "step": 4318 }, { "epoch": 1.4420701168614358, "grad_norm": 0.8989138422240059, "learning_rate": 6.198464394621115e-06, "loss": 0.398, "step": 4319 }, { "epoch": 1.4424040066777963, "grad_norm": 0.8584336426526071, "learning_rate": 6.1965783214377895e-06, "loss": 0.385, "step": 4320 }, { "epoch": 1.4427378964941568, "grad_norm": 0.8485777554941375, "learning_rate": 6.194692067631319e-06, "loss": 0.3864, "step": 4321 }, { "epoch": 1.4430717863105176, "grad_norm": 0.8658875489793977, "learning_rate": 6.192805633486437e-06, "loss": 0.3743, "step": 4322 }, { "epoch": 1.443405676126878, "grad_norm": 0.8649626721250849, "learning_rate": 6.1909190192878954e-06, "loss": 0.3814, "step": 4323 }, { "epoch": 1.4437395659432388, "grad_norm": 0.865653496009155, "learning_rate": 6.189032225320482e-06, "loss": 0.3694, "step": 4324 }, { "epoch": 1.4440734557595993, "grad_norm": 0.8397257856804546, "learning_rate": 6.187145251869005e-06, "loss": 0.377, "step": 4325 }, { "epoch": 1.4444073455759598, "grad_norm": 0.8722538201685984, "learning_rate": 6.185258099218303e-06, "loss": 0.3894, "step": 4326 }, { "epoch": 1.4447412353923206, "grad_norm": 0.8480430367141202, "learning_rate": 6.183370767653244e-06, "loss": 0.3807, "step": 4327 }, { "epoch": 1.445075125208681, "grad_norm": 0.8440434411201598, "learning_rate": 6.181483257458715e-06, "loss": 0.3811, "step": 4328 }, { "epoch": 1.4454090150250418, "grad_norm": 0.8282512498760265, "learning_rate": 6.179595568919638e-06, "loss": 0.3753, "step": 4329 }, { "epoch": 1.4457429048414023, "grad_norm": 0.8557969275609926, "learning_rate": 6.177707702320961e-06, "loss": 0.3826, "step": 4330 }, { "epoch": 1.4460767946577628, "grad_norm": 0.8174913429188719, "learning_rate": 6.175819657947654e-06, "loss": 0.3651, "step": 4331 }, { "epoch": 1.4464106844741236, "grad_norm": 0.8310232626661425, "learning_rate": 6.173931436084718e-06, "loss": 0.3765, "step": 4332 }, { "epoch": 1.446744574290484, "grad_norm": 0.836933527126702, "learning_rate": 6.172043037017179e-06, "loss": 0.3618, "step": 4333 }, { "epoch": 1.4470784641068448, "grad_norm": 0.865316615382753, "learning_rate": 6.17015446103009e-06, "loss": 0.3904, "step": 4334 }, { "epoch": 1.4474123539232053, "grad_norm": 0.8410599057787306, "learning_rate": 6.168265708408532e-06, "loss": 0.3663, "step": 4335 }, { "epoch": 1.4477462437395658, "grad_norm": 0.8487004875004489, "learning_rate": 6.166376779437609e-06, "loss": 0.368, "step": 4336 }, { "epoch": 1.4480801335559266, "grad_norm": 0.8025912546398061, "learning_rate": 6.164487674402459e-06, "loss": 0.3588, "step": 4337 }, { "epoch": 1.448414023372287, "grad_norm": 0.9106744772064063, "learning_rate": 6.162598393588237e-06, "loss": 0.3994, "step": 4338 }, { "epoch": 1.4487479131886478, "grad_norm": 0.8470397884526315, "learning_rate": 6.160708937280133e-06, "loss": 0.3713, "step": 4339 }, { "epoch": 1.4490818030050083, "grad_norm": 0.8339705866296473, "learning_rate": 6.158819305763358e-06, "loss": 0.3533, "step": 4340 }, { "epoch": 1.4494156928213688, "grad_norm": 0.8092236150991935, "learning_rate": 6.156929499323152e-06, "loss": 0.3539, "step": 4341 }, { "epoch": 1.4497495826377296, "grad_norm": 0.8499698297667925, "learning_rate": 6.15503951824478e-06, "loss": 0.383, "step": 4342 }, { "epoch": 1.45008347245409, "grad_norm": 0.8541033483586359, "learning_rate": 6.153149362813535e-06, "loss": 0.3744, "step": 4343 }, { "epoch": 1.4504173622704508, "grad_norm": 0.8745521804781788, "learning_rate": 6.1512590333147335e-06, "loss": 0.3866, "step": 4344 }, { "epoch": 1.4507512520868113, "grad_norm": 0.8666813199977818, "learning_rate": 6.149368530033724e-06, "loss": 0.3865, "step": 4345 }, { "epoch": 1.4510851419031718, "grad_norm": 0.8229807302360485, "learning_rate": 6.147477853255872e-06, "loss": 0.371, "step": 4346 }, { "epoch": 1.4514190317195326, "grad_norm": 0.8763979726969154, "learning_rate": 6.145587003266581e-06, "loss": 0.386, "step": 4347 }, { "epoch": 1.451752921535893, "grad_norm": 0.8703116399937548, "learning_rate": 6.143695980351268e-06, "loss": 0.3576, "step": 4348 }, { "epoch": 1.4520868113522538, "grad_norm": 0.8526119259090235, "learning_rate": 6.141804784795387e-06, "loss": 0.3744, "step": 4349 }, { "epoch": 1.4524207011686143, "grad_norm": 0.8724976994225971, "learning_rate": 6.139913416884411e-06, "loss": 0.3801, "step": 4350 }, { "epoch": 1.4527545909849748, "grad_norm": 0.8891720734527058, "learning_rate": 6.138021876903842e-06, "loss": 0.3878, "step": 4351 }, { "epoch": 1.4530884808013356, "grad_norm": 0.8536225942951086, "learning_rate": 6.136130165139208e-06, "loss": 0.3819, "step": 4352 }, { "epoch": 1.453422370617696, "grad_norm": 0.8528572065109636, "learning_rate": 6.1342382818760615e-06, "loss": 0.3745, "step": 4353 }, { "epoch": 1.4537562604340568, "grad_norm": 0.8680417376768778, "learning_rate": 6.1323462273999815e-06, "loss": 0.3901, "step": 4354 }, { "epoch": 1.4540901502504173, "grad_norm": 0.8553214725797517, "learning_rate": 6.130454001996575e-06, "loss": 0.3869, "step": 4355 }, { "epoch": 1.4544240400667778, "grad_norm": 0.8411932569053029, "learning_rate": 6.128561605951471e-06, "loss": 0.3751, "step": 4356 }, { "epoch": 1.4547579298831386, "grad_norm": 0.849518736245358, "learning_rate": 6.126669039550326e-06, "loss": 0.3801, "step": 4357 }, { "epoch": 1.4550918196994993, "grad_norm": 0.8204979459543917, "learning_rate": 6.1247763030788235e-06, "loss": 0.3568, "step": 4358 }, { "epoch": 1.4554257095158598, "grad_norm": 0.8684481836008537, "learning_rate": 6.122883396822669e-06, "loss": 0.3716, "step": 4359 }, { "epoch": 1.4557595993322203, "grad_norm": 0.8440671009602334, "learning_rate": 6.1209903210676e-06, "loss": 0.3726, "step": 4360 }, { "epoch": 1.4560934891485808, "grad_norm": 0.8662773893414951, "learning_rate": 6.119097076099373e-06, "loss": 0.3766, "step": 4361 }, { "epoch": 1.4564273789649416, "grad_norm": 0.840658807356322, "learning_rate": 6.117203662203774e-06, "loss": 0.3835, "step": 4362 }, { "epoch": 1.4567612687813023, "grad_norm": 0.8652839928347242, "learning_rate": 6.115310079666612e-06, "loss": 0.3897, "step": 4363 }, { "epoch": 1.4570951585976628, "grad_norm": 0.8560325240357892, "learning_rate": 6.1134163287737245e-06, "loss": 0.3673, "step": 4364 }, { "epoch": 1.4574290484140233, "grad_norm": 0.9191975147141891, "learning_rate": 6.1115224098109715e-06, "loss": 0.378, "step": 4365 }, { "epoch": 1.457762938230384, "grad_norm": 0.8595814530056446, "learning_rate": 6.109628323064239e-06, "loss": 0.3944, "step": 4366 }, { "epoch": 1.4580968280467446, "grad_norm": 0.8343117678470263, "learning_rate": 6.107734068819439e-06, "loss": 0.3567, "step": 4367 }, { "epoch": 1.4584307178631053, "grad_norm": 0.863237588303002, "learning_rate": 6.105839647362511e-06, "loss": 0.3695, "step": 4368 }, { "epoch": 1.4587646076794658, "grad_norm": 0.8421689505125292, "learning_rate": 6.103945058979414e-06, "loss": 0.3772, "step": 4369 }, { "epoch": 1.4590984974958263, "grad_norm": 0.8251971635561918, "learning_rate": 6.102050303956139e-06, "loss": 0.3522, "step": 4370 }, { "epoch": 1.459432387312187, "grad_norm": 0.832960262350679, "learning_rate": 6.100155382578694e-06, "loss": 0.3658, "step": 4371 }, { "epoch": 1.4597662771285476, "grad_norm": 0.8745342997175507, "learning_rate": 6.098260295133121e-06, "loss": 0.3794, "step": 4372 }, { "epoch": 1.4601001669449083, "grad_norm": 0.8507969416250929, "learning_rate": 6.096365041905481e-06, "loss": 0.3776, "step": 4373 }, { "epoch": 1.4604340567612688, "grad_norm": 0.8655414248300808, "learning_rate": 6.094469623181863e-06, "loss": 0.3873, "step": 4374 }, { "epoch": 1.4607679465776293, "grad_norm": 0.8684570382717015, "learning_rate": 6.09257403924838e-06, "loss": 0.3721, "step": 4375 }, { "epoch": 1.46110183639399, "grad_norm": 0.8271830668277742, "learning_rate": 6.090678290391167e-06, "loss": 0.366, "step": 4376 }, { "epoch": 1.4614357262103506, "grad_norm": 0.8615976922227114, "learning_rate": 6.088782376896389e-06, "loss": 0.3722, "step": 4377 }, { "epoch": 1.4617696160267113, "grad_norm": 0.9150045875038577, "learning_rate": 6.086886299050236e-06, "loss": 0.3832, "step": 4378 }, { "epoch": 1.4621035058430718, "grad_norm": 0.8362331937264665, "learning_rate": 6.084990057138915e-06, "loss": 0.3599, "step": 4379 }, { "epoch": 1.4624373956594323, "grad_norm": 0.8189020059456901, "learning_rate": 6.083093651448669e-06, "loss": 0.3611, "step": 4380 }, { "epoch": 1.462771285475793, "grad_norm": 0.8955684160914755, "learning_rate": 6.081197082265754e-06, "loss": 0.3835, "step": 4381 }, { "epoch": 1.4631051752921536, "grad_norm": 0.8403437989180711, "learning_rate": 6.0793003498764604e-06, "loss": 0.3917, "step": 4382 }, { "epoch": 1.4634390651085143, "grad_norm": 0.8551452381407931, "learning_rate": 6.077403454567099e-06, "loss": 0.3796, "step": 4383 }, { "epoch": 1.4637729549248748, "grad_norm": 0.8886988682622327, "learning_rate": 6.0755063966240045e-06, "loss": 0.3769, "step": 4384 }, { "epoch": 1.4641068447412353, "grad_norm": 0.8824862185974663, "learning_rate": 6.073609176333537e-06, "loss": 0.3835, "step": 4385 }, { "epoch": 1.464440734557596, "grad_norm": 0.8379719405385924, "learning_rate": 6.071711793982083e-06, "loss": 0.3656, "step": 4386 }, { "epoch": 1.4647746243739566, "grad_norm": 0.8389959160101909, "learning_rate": 6.069814249856051e-06, "loss": 0.375, "step": 4387 }, { "epoch": 1.4651085141903173, "grad_norm": 0.9046515168687554, "learning_rate": 6.067916544241874e-06, "loss": 0.3698, "step": 4388 }, { "epoch": 1.4654424040066778, "grad_norm": 0.8751932008373974, "learning_rate": 6.066018677426011e-06, "loss": 0.3646, "step": 4389 }, { "epoch": 1.4657762938230383, "grad_norm": 0.8206137136899226, "learning_rate": 6.064120649694942e-06, "loss": 0.378, "step": 4390 }, { "epoch": 1.466110183639399, "grad_norm": 0.8948431063148444, "learning_rate": 6.062222461335179e-06, "loss": 0.3847, "step": 4391 }, { "epoch": 1.4664440734557596, "grad_norm": 0.8822104307179258, "learning_rate": 6.060324112633248e-06, "loss": 0.3785, "step": 4392 }, { "epoch": 1.4667779632721203, "grad_norm": 0.8812092780806547, "learning_rate": 6.058425603875707e-06, "loss": 0.3788, "step": 4393 }, { "epoch": 1.4671118530884808, "grad_norm": 0.8704519221126829, "learning_rate": 6.056526935349133e-06, "loss": 0.3795, "step": 4394 }, { "epoch": 1.4674457429048413, "grad_norm": 0.8795979052965308, "learning_rate": 6.054628107340133e-06, "loss": 0.3867, "step": 4395 }, { "epoch": 1.467779632721202, "grad_norm": 0.8434302794054036, "learning_rate": 6.05272912013533e-06, "loss": 0.3666, "step": 4396 }, { "epoch": 1.4681135225375626, "grad_norm": 0.8173863625878508, "learning_rate": 6.050829974021379e-06, "loss": 0.3702, "step": 4397 }, { "epoch": 1.4684474123539233, "grad_norm": 0.8694686822267661, "learning_rate": 6.048930669284954e-06, "loss": 0.3897, "step": 4398 }, { "epoch": 1.4687813021702838, "grad_norm": 0.8617694408908941, "learning_rate": 6.047031206212756e-06, "loss": 0.3921, "step": 4399 }, { "epoch": 1.4691151919866443, "grad_norm": 0.8742050418716574, "learning_rate": 6.045131585091505e-06, "loss": 0.3884, "step": 4400 }, { "epoch": 1.469449081803005, "grad_norm": 0.8751708324180649, "learning_rate": 6.043231806207952e-06, "loss": 0.3879, "step": 4401 }, { "epoch": 1.4697829716193656, "grad_norm": 0.8735980728862459, "learning_rate": 6.041331869848866e-06, "loss": 0.3732, "step": 4402 }, { "epoch": 1.4701168614357263, "grad_norm": 0.8292291002749714, "learning_rate": 6.039431776301043e-06, "loss": 0.361, "step": 4403 }, { "epoch": 1.4704507512520868, "grad_norm": 0.819026894275791, "learning_rate": 6.037531525851301e-06, "loss": 0.3601, "step": 4404 }, { "epoch": 1.4707846410684473, "grad_norm": 0.8673206156099421, "learning_rate": 6.035631118786483e-06, "loss": 0.3865, "step": 4405 }, { "epoch": 1.471118530884808, "grad_norm": 0.850967481598563, "learning_rate": 6.033730555393452e-06, "loss": 0.3728, "step": 4406 }, { "epoch": 1.4714524207011686, "grad_norm": 0.8304513943387936, "learning_rate": 6.031829835959101e-06, "loss": 0.3664, "step": 4407 }, { "epoch": 1.4717863105175293, "grad_norm": 0.9026544709540003, "learning_rate": 6.029928960770341e-06, "loss": 0.3994, "step": 4408 }, { "epoch": 1.4721202003338898, "grad_norm": 0.8371914179838621, "learning_rate": 6.02802793011411e-06, "loss": 0.3701, "step": 4409 }, { "epoch": 1.4724540901502503, "grad_norm": 0.8016287473176831, "learning_rate": 6.026126744277365e-06, "loss": 0.3623, "step": 4410 }, { "epoch": 1.472787979966611, "grad_norm": 0.8365000252474453, "learning_rate": 6.024225403547095e-06, "loss": 0.3775, "step": 4411 }, { "epoch": 1.4731218697829715, "grad_norm": 0.8326670432476759, "learning_rate": 6.022323908210301e-06, "loss": 0.3626, "step": 4412 }, { "epoch": 1.4734557595993323, "grad_norm": 0.8188918240149197, "learning_rate": 6.020422258554016e-06, "loss": 0.3667, "step": 4413 }, { "epoch": 1.4737896494156928, "grad_norm": 0.8390434828150616, "learning_rate": 6.018520454865293e-06, "loss": 0.3684, "step": 4414 }, { "epoch": 1.4741235392320533, "grad_norm": 0.8606861015324025, "learning_rate": 6.016618497431211e-06, "loss": 0.3816, "step": 4415 }, { "epoch": 1.474457429048414, "grad_norm": 0.9028746486347071, "learning_rate": 6.014716386538865e-06, "loss": 0.3896, "step": 4416 }, { "epoch": 1.4747913188647745, "grad_norm": 0.8154672505569857, "learning_rate": 6.012814122475381e-06, "loss": 0.3698, "step": 4417 }, { "epoch": 1.4751252086811353, "grad_norm": 0.8887944424863284, "learning_rate": 6.010911705527906e-06, "loss": 0.3808, "step": 4418 }, { "epoch": 1.4754590984974958, "grad_norm": 0.8778427501781316, "learning_rate": 6.009009135983608e-06, "loss": 0.3845, "step": 4419 }, { "epoch": 1.4757929883138563, "grad_norm": 0.8236553243245011, "learning_rate": 6.0071064141296784e-06, "loss": 0.3638, "step": 4420 }, { "epoch": 1.476126878130217, "grad_norm": 0.821417717728372, "learning_rate": 6.005203540253335e-06, "loss": 0.362, "step": 4421 }, { "epoch": 1.4764607679465778, "grad_norm": 0.8618409152766905, "learning_rate": 6.003300514641814e-06, "loss": 0.3802, "step": 4422 }, { "epoch": 1.4767946577629383, "grad_norm": 0.8380963626951632, "learning_rate": 6.0013973375823765e-06, "loss": 0.3713, "step": 4423 }, { "epoch": 1.4771285475792988, "grad_norm": 0.8483932160888936, "learning_rate": 5.99949400936231e-06, "loss": 0.3755, "step": 4424 }, { "epoch": 1.4774624373956593, "grad_norm": 0.8478785615597424, "learning_rate": 5.997590530268915e-06, "loss": 0.3883, "step": 4425 }, { "epoch": 1.47779632721202, "grad_norm": 0.8195757374024742, "learning_rate": 5.995686900589528e-06, "loss": 0.3638, "step": 4426 }, { "epoch": 1.4781302170283808, "grad_norm": 0.8375907845564732, "learning_rate": 5.993783120611495e-06, "loss": 0.3702, "step": 4427 }, { "epoch": 1.4784641068447413, "grad_norm": 0.8600046044446323, "learning_rate": 5.991879190622195e-06, "loss": 0.3787, "step": 4428 }, { "epoch": 1.4787979966611018, "grad_norm": 0.8652480513080726, "learning_rate": 5.9899751109090256e-06, "loss": 0.3802, "step": 4429 }, { "epoch": 1.4791318864774623, "grad_norm": 0.826740331652704, "learning_rate": 5.988070881759406e-06, "loss": 0.3675, "step": 4430 }, { "epoch": 1.479465776293823, "grad_norm": 0.8395337598307105, "learning_rate": 5.986166503460779e-06, "loss": 0.3626, "step": 4431 }, { "epoch": 1.4797996661101838, "grad_norm": 0.8504688785843548, "learning_rate": 5.984261976300609e-06, "loss": 0.3631, "step": 4432 }, { "epoch": 1.4801335559265443, "grad_norm": 0.8560719316427406, "learning_rate": 5.982357300566386e-06, "loss": 0.3808, "step": 4433 }, { "epoch": 1.4804674457429048, "grad_norm": 0.8595324956414777, "learning_rate": 5.980452476545621e-06, "loss": 0.373, "step": 4434 }, { "epoch": 1.4808013355592655, "grad_norm": 0.8430440010318843, "learning_rate": 5.978547504525841e-06, "loss": 0.3822, "step": 4435 }, { "epoch": 1.481135225375626, "grad_norm": 0.8703855197890092, "learning_rate": 5.976642384794607e-06, "loss": 0.3806, "step": 4436 }, { "epoch": 1.4814691151919868, "grad_norm": 0.847738199403074, "learning_rate": 5.974737117639494e-06, "loss": 0.3537, "step": 4437 }, { "epoch": 1.4818030050083473, "grad_norm": 0.8857597363315751, "learning_rate": 5.972831703348101e-06, "loss": 0.3651, "step": 4438 }, { "epoch": 1.4821368948247078, "grad_norm": 0.8531425118513203, "learning_rate": 5.970926142208051e-06, "loss": 0.3761, "step": 4439 }, { "epoch": 1.4824707846410685, "grad_norm": 0.863988629079305, "learning_rate": 5.969020434506987e-06, "loss": 0.381, "step": 4440 }, { "epoch": 1.482804674457429, "grad_norm": 0.8539210773922074, "learning_rate": 5.9671145805325745e-06, "loss": 0.3728, "step": 4441 }, { "epoch": 1.4831385642737898, "grad_norm": 0.8722449779140465, "learning_rate": 5.965208580572504e-06, "loss": 0.3577, "step": 4442 }, { "epoch": 1.4834724540901503, "grad_norm": 0.8552322597633782, "learning_rate": 5.963302434914481e-06, "loss": 0.3691, "step": 4443 }, { "epoch": 1.4838063439065108, "grad_norm": 0.8240307441860861, "learning_rate": 5.961396143846243e-06, "loss": 0.3752, "step": 4444 }, { "epoch": 1.4841402337228715, "grad_norm": 0.8536046621258585, "learning_rate": 5.95948970765554e-06, "loss": 0.3766, "step": 4445 }, { "epoch": 1.484474123539232, "grad_norm": 0.8559145454582889, "learning_rate": 5.9575831266301505e-06, "loss": 0.3707, "step": 4446 }, { "epoch": 1.4848080133555928, "grad_norm": 0.8572596782144613, "learning_rate": 5.955676401057872e-06, "loss": 0.3941, "step": 4447 }, { "epoch": 1.4851419031719533, "grad_norm": 0.8258117459911831, "learning_rate": 5.953769531226522e-06, "loss": 0.3729, "step": 4448 }, { "epoch": 1.4854757929883138, "grad_norm": 0.8919863896710933, "learning_rate": 5.951862517423945e-06, "loss": 0.3934, "step": 4449 }, { "epoch": 1.4858096828046745, "grad_norm": 0.8842738372210167, "learning_rate": 5.949955359938002e-06, "loss": 0.3919, "step": 4450 }, { "epoch": 1.486143572621035, "grad_norm": 0.8599278717506064, "learning_rate": 5.948048059056578e-06, "loss": 0.3728, "step": 4451 }, { "epoch": 1.4864774624373958, "grad_norm": 0.8454884521317759, "learning_rate": 5.9461406150675815e-06, "loss": 0.3813, "step": 4452 }, { "epoch": 1.4868113522537563, "grad_norm": 0.8645582834406615, "learning_rate": 5.944233028258938e-06, "loss": 0.3786, "step": 4453 }, { "epoch": 1.4871452420701168, "grad_norm": 0.8319436492686045, "learning_rate": 5.942325298918599e-06, "loss": 0.3779, "step": 4454 }, { "epoch": 1.4874791318864775, "grad_norm": 0.8322326794068068, "learning_rate": 5.940417427334535e-06, "loss": 0.3774, "step": 4455 }, { "epoch": 1.487813021702838, "grad_norm": 0.8370788621477512, "learning_rate": 5.938509413794736e-06, "loss": 0.3407, "step": 4456 }, { "epoch": 1.4881469115191988, "grad_norm": 0.8415966239307846, "learning_rate": 5.936601258587223e-06, "loss": 0.381, "step": 4457 }, { "epoch": 1.4884808013355593, "grad_norm": 0.8774913503893063, "learning_rate": 5.9346929620000235e-06, "loss": 0.3947, "step": 4458 }, { "epoch": 1.4888146911519198, "grad_norm": 0.8211841192026984, "learning_rate": 5.9327845243212e-06, "loss": 0.3684, "step": 4459 }, { "epoch": 1.4891485809682805, "grad_norm": 0.8264272798956286, "learning_rate": 5.930875945838829e-06, "loss": 0.3721, "step": 4460 }, { "epoch": 1.489482470784641, "grad_norm": 0.8367285847652153, "learning_rate": 5.928967226841008e-06, "loss": 0.3754, "step": 4461 }, { "epoch": 1.4898163606010018, "grad_norm": 0.8702149434870533, "learning_rate": 5.92705836761586e-06, "loss": 0.3831, "step": 4462 }, { "epoch": 1.4901502504173623, "grad_norm": 0.8678259126838501, "learning_rate": 5.925149368451526e-06, "loss": 0.3804, "step": 4463 }, { "epoch": 1.4904841402337228, "grad_norm": 0.8319571435576331, "learning_rate": 5.923240229636168e-06, "loss": 0.3617, "step": 4464 }, { "epoch": 1.4908180300500835, "grad_norm": 0.8417824030182655, "learning_rate": 5.921330951457971e-06, "loss": 0.3708, "step": 4465 }, { "epoch": 1.491151919866444, "grad_norm": 0.8138319005620198, "learning_rate": 5.919421534205138e-06, "loss": 0.3614, "step": 4466 }, { "epoch": 1.4914858096828048, "grad_norm": 0.8909609719124522, "learning_rate": 5.917511978165899e-06, "loss": 0.3952, "step": 4467 }, { "epoch": 1.4918196994991653, "grad_norm": 0.8842239345224885, "learning_rate": 5.915602283628496e-06, "loss": 0.3904, "step": 4468 }, { "epoch": 1.4921535893155258, "grad_norm": 0.8487096665970462, "learning_rate": 5.9136924508812e-06, "loss": 0.3848, "step": 4469 }, { "epoch": 1.4924874791318865, "grad_norm": 0.8709185506737593, "learning_rate": 5.9117824802123e-06, "loss": 0.3763, "step": 4470 }, { "epoch": 1.492821368948247, "grad_norm": 0.8658415102381603, "learning_rate": 5.909872371910104e-06, "loss": 0.3797, "step": 4471 }, { "epoch": 1.4931552587646078, "grad_norm": 0.8660361536970715, "learning_rate": 5.907962126262942e-06, "loss": 0.3873, "step": 4472 }, { "epoch": 1.4934891485809683, "grad_norm": 0.8251673845433907, "learning_rate": 5.906051743559167e-06, "loss": 0.3621, "step": 4473 }, { "epoch": 1.4938230383973288, "grad_norm": 0.8547418373505719, "learning_rate": 5.904141224087147e-06, "loss": 0.3739, "step": 4474 }, { "epoch": 1.4941569282136895, "grad_norm": 0.8486309713227614, "learning_rate": 5.9022305681352796e-06, "loss": 0.3736, "step": 4475 }, { "epoch": 1.49449081803005, "grad_norm": 0.8572518096571338, "learning_rate": 5.9003197759919725e-06, "loss": 0.3737, "step": 4476 }, { "epoch": 1.4948247078464107, "grad_norm": 0.8795922244054633, "learning_rate": 5.898408847945665e-06, "loss": 0.379, "step": 4477 }, { "epoch": 1.4951585976627713, "grad_norm": 0.850916581094115, "learning_rate": 5.896497784284804e-06, "loss": 0.386, "step": 4478 }, { "epoch": 1.4954924874791318, "grad_norm": 0.8761490216154507, "learning_rate": 5.89458658529787e-06, "loss": 0.3856, "step": 4479 }, { "epoch": 1.4958263772954925, "grad_norm": 0.8592305733231634, "learning_rate": 5.892675251273357e-06, "loss": 0.3745, "step": 4480 }, { "epoch": 1.496160267111853, "grad_norm": 0.8157338442247694, "learning_rate": 5.890763782499775e-06, "loss": 0.3623, "step": 4481 }, { "epoch": 1.4964941569282137, "grad_norm": 0.8627199889857627, "learning_rate": 5.888852179265666e-06, "loss": 0.3735, "step": 4482 }, { "epoch": 1.4968280467445743, "grad_norm": 0.8390266104536471, "learning_rate": 5.886940441859583e-06, "loss": 0.3593, "step": 4483 }, { "epoch": 1.4971619365609348, "grad_norm": 0.8680372971113917, "learning_rate": 5.885028570570104e-06, "loss": 0.3807, "step": 4484 }, { "epoch": 1.4974958263772955, "grad_norm": 0.8538572727182295, "learning_rate": 5.8831165656858226e-06, "loss": 0.3833, "step": 4485 }, { "epoch": 1.497829716193656, "grad_norm": 0.8348721137498762, "learning_rate": 5.881204427495358e-06, "loss": 0.3737, "step": 4486 }, { "epoch": 1.4981636060100167, "grad_norm": 0.8547798270901471, "learning_rate": 5.879292156287346e-06, "loss": 0.3717, "step": 4487 }, { "epoch": 1.4984974958263773, "grad_norm": 0.8094934408110567, "learning_rate": 5.877379752350443e-06, "loss": 0.3614, "step": 4488 }, { "epoch": 1.4988313856427378, "grad_norm": 0.8252816040155003, "learning_rate": 5.875467215973325e-06, "loss": 0.3637, "step": 4489 }, { "epoch": 1.4991652754590985, "grad_norm": 0.8279711344012135, "learning_rate": 5.873554547444691e-06, "loss": 0.3701, "step": 4490 }, { "epoch": 1.4994991652754592, "grad_norm": 0.8513647512805367, "learning_rate": 5.871641747053256e-06, "loss": 0.3622, "step": 4491 }, { "epoch": 1.4998330550918197, "grad_norm": 0.8289217733769987, "learning_rate": 5.869728815087758e-06, "loss": 0.366, "step": 4492 }, { "epoch": 1.5001669449081803, "grad_norm": 0.8607319933407821, "learning_rate": 5.867815751836952e-06, "loss": 0.3712, "step": 4493 }, { "epoch": 1.5005008347245408, "grad_norm": 0.8848100903594333, "learning_rate": 5.8659025575896144e-06, "loss": 0.3886, "step": 4494 }, { "epoch": 1.5008347245409015, "grad_norm": 0.8680659043688147, "learning_rate": 5.863989232634543e-06, "loss": 0.3789, "step": 4495 }, { "epoch": 1.5011686143572622, "grad_norm": 0.8891777785265819, "learning_rate": 5.862075777260553e-06, "loss": 0.3825, "step": 4496 }, { "epoch": 1.5015025041736227, "grad_norm": 0.8730465365127712, "learning_rate": 5.8601621917564775e-06, "loss": 0.3934, "step": 4497 }, { "epoch": 1.5018363939899833, "grad_norm": 0.8213904917327407, "learning_rate": 5.858248476411177e-06, "loss": 0.3677, "step": 4498 }, { "epoch": 1.5021702838063438, "grad_norm": 0.8126922369501806, "learning_rate": 5.85633463151352e-06, "loss": 0.3607, "step": 4499 }, { "epoch": 1.5025041736227045, "grad_norm": 0.8924722523325763, "learning_rate": 5.854420657352406e-06, "loss": 0.3837, "step": 4500 }, { "epoch": 1.5028380634390652, "grad_norm": 0.8745223521073894, "learning_rate": 5.852506554216745e-06, "loss": 0.3729, "step": 4501 }, { "epoch": 1.5031719532554257, "grad_norm": 0.8707622818710163, "learning_rate": 5.850592322395472e-06, "loss": 0.3756, "step": 4502 }, { "epoch": 1.5035058430717863, "grad_norm": 0.8371716337821535, "learning_rate": 5.8486779621775405e-06, "loss": 0.3705, "step": 4503 }, { "epoch": 1.5038397328881468, "grad_norm": 0.848409410748853, "learning_rate": 5.846763473851922e-06, "loss": 0.3694, "step": 4504 }, { "epoch": 1.5041736227045075, "grad_norm": 0.8300475170366788, "learning_rate": 5.844848857707607e-06, "loss": 0.3654, "step": 4505 }, { "epoch": 1.5045075125208682, "grad_norm": 0.8506618777058952, "learning_rate": 5.842934114033607e-06, "loss": 0.3713, "step": 4506 }, { "epoch": 1.5048414023372287, "grad_norm": 0.8780162349907373, "learning_rate": 5.841019243118951e-06, "loss": 0.3699, "step": 4507 }, { "epoch": 1.5051752921535893, "grad_norm": 0.880763030927952, "learning_rate": 5.839104245252692e-06, "loss": 0.3871, "step": 4508 }, { "epoch": 1.5055091819699498, "grad_norm": 0.8613560206367908, "learning_rate": 5.837189120723891e-06, "loss": 0.3769, "step": 4509 }, { "epoch": 1.5058430717863105, "grad_norm": 0.8250027387732943, "learning_rate": 5.835273869821645e-06, "loss": 0.3602, "step": 4510 }, { "epoch": 1.5061769616026712, "grad_norm": 0.8863407536013396, "learning_rate": 5.833358492835053e-06, "loss": 0.3858, "step": 4511 }, { "epoch": 1.5065108514190317, "grad_norm": 0.8540721077788762, "learning_rate": 5.831442990053242e-06, "loss": 0.3651, "step": 4512 }, { "epoch": 1.5068447412353922, "grad_norm": 0.875671441413765, "learning_rate": 5.829527361765361e-06, "loss": 0.3625, "step": 4513 }, { "epoch": 1.5071786310517528, "grad_norm": 0.8641011980298494, "learning_rate": 5.827611608260566e-06, "loss": 0.3635, "step": 4514 }, { "epoch": 1.5075125208681135, "grad_norm": 0.8143524365817753, "learning_rate": 5.825695729828047e-06, "loss": 0.367, "step": 4515 }, { "epoch": 1.5078464106844742, "grad_norm": 0.827336587416146, "learning_rate": 5.8237797267570005e-06, "loss": 0.3593, "step": 4516 }, { "epoch": 1.5081803005008347, "grad_norm": 0.8174218668760639, "learning_rate": 5.821863599336649e-06, "loss": 0.3653, "step": 4517 }, { "epoch": 1.5085141903171952, "grad_norm": 0.8799580271475302, "learning_rate": 5.81994734785623e-06, "loss": 0.3722, "step": 4518 }, { "epoch": 1.508848080133556, "grad_norm": 0.8559109459205979, "learning_rate": 5.818030972605001e-06, "loss": 0.3785, "step": 4519 }, { "epoch": 1.5091819699499165, "grad_norm": 0.8316531701543554, "learning_rate": 5.816114473872238e-06, "loss": 0.3731, "step": 4520 }, { "epoch": 1.5095158597662772, "grad_norm": 0.8218835359688067, "learning_rate": 5.814197851947239e-06, "loss": 0.3708, "step": 4521 }, { "epoch": 1.5098497495826377, "grad_norm": 0.8705046080671365, "learning_rate": 5.812281107119313e-06, "loss": 0.3925, "step": 4522 }, { "epoch": 1.5101836393989982, "grad_norm": 0.846132559964902, "learning_rate": 5.810364239677796e-06, "loss": 0.3787, "step": 4523 }, { "epoch": 1.510517529215359, "grad_norm": 0.8414849586389868, "learning_rate": 5.808447249912035e-06, "loss": 0.3574, "step": 4524 }, { "epoch": 1.5108514190317195, "grad_norm": 0.8253095643070755, "learning_rate": 5.806530138111402e-06, "loss": 0.3758, "step": 4525 }, { "epoch": 1.5111853088480802, "grad_norm": 0.8685614609016075, "learning_rate": 5.804612904565283e-06, "loss": 0.3832, "step": 4526 }, { "epoch": 1.5115191986644407, "grad_norm": 0.8058833037098095, "learning_rate": 5.802695549563084e-06, "loss": 0.3676, "step": 4527 }, { "epoch": 1.5118530884808012, "grad_norm": 0.8246034317438565, "learning_rate": 5.800778073394231e-06, "loss": 0.3609, "step": 4528 }, { "epoch": 1.512186978297162, "grad_norm": 0.8451240418246786, "learning_rate": 5.798860476348163e-06, "loss": 0.3784, "step": 4529 }, { "epoch": 1.5125208681135225, "grad_norm": 0.8203430188222166, "learning_rate": 5.7969427587143425e-06, "loss": 0.3647, "step": 4530 }, { "epoch": 1.5128547579298832, "grad_norm": 0.8124634389621261, "learning_rate": 5.7950249207822495e-06, "loss": 0.3608, "step": 4531 }, { "epoch": 1.5131886477462437, "grad_norm": 0.852442058450493, "learning_rate": 5.793106962841378e-06, "loss": 0.3743, "step": 4532 }, { "epoch": 1.5135225375626042, "grad_norm": 0.8312335088142787, "learning_rate": 5.791188885181248e-06, "loss": 0.3539, "step": 4533 }, { "epoch": 1.513856427378965, "grad_norm": 0.8514808039048893, "learning_rate": 5.789270688091388e-06, "loss": 0.3743, "step": 4534 }, { "epoch": 1.5141903171953257, "grad_norm": 0.9129828395233109, "learning_rate": 5.787352371861352e-06, "loss": 0.3931, "step": 4535 }, { "epoch": 1.5145242070116862, "grad_norm": 0.8450611016063306, "learning_rate": 5.785433936780708e-06, "loss": 0.3744, "step": 4536 }, { "epoch": 1.5148580968280467, "grad_norm": 0.8357780163286855, "learning_rate": 5.783515383139045e-06, "loss": 0.3707, "step": 4537 }, { "epoch": 1.5151919866444072, "grad_norm": 0.8559760021294867, "learning_rate": 5.781596711225965e-06, "loss": 0.3825, "step": 4538 }, { "epoch": 1.515525876460768, "grad_norm": 0.8446409277845742, "learning_rate": 5.779677921331094e-06, "loss": 0.3636, "step": 4539 }, { "epoch": 1.5158597662771287, "grad_norm": 0.847058965908876, "learning_rate": 5.77775901374407e-06, "loss": 0.3657, "step": 4540 }, { "epoch": 1.5161936560934892, "grad_norm": 0.844479452538891, "learning_rate": 5.775839988754554e-06, "loss": 0.3647, "step": 4541 }, { "epoch": 1.5165275459098497, "grad_norm": 0.83715028429231, "learning_rate": 5.7739208466522225e-06, "loss": 0.3731, "step": 4542 }, { "epoch": 1.5168614357262102, "grad_norm": 0.8285158974280024, "learning_rate": 5.772001587726765e-06, "loss": 0.3593, "step": 4543 }, { "epoch": 1.517195325542571, "grad_norm": 0.8218988948255029, "learning_rate": 5.7700822122679e-06, "loss": 0.3601, "step": 4544 }, { "epoch": 1.5175292153589317, "grad_norm": 0.8887935240270896, "learning_rate": 5.76816272056535e-06, "loss": 0.3942, "step": 4545 }, { "epoch": 1.5178631051752922, "grad_norm": 0.834265691357487, "learning_rate": 5.766243112908867e-06, "loss": 0.3774, "step": 4546 }, { "epoch": 1.5181969949916527, "grad_norm": 0.8386535034727971, "learning_rate": 5.764323389588211e-06, "loss": 0.3661, "step": 4547 }, { "epoch": 1.5185308848080132, "grad_norm": 0.8190451714081459, "learning_rate": 5.762403550893166e-06, "loss": 0.3565, "step": 4548 }, { "epoch": 1.518864774624374, "grad_norm": 0.8391976513927918, "learning_rate": 5.760483597113531e-06, "loss": 0.3681, "step": 4549 }, { "epoch": 1.5191986644407347, "grad_norm": 0.8162267715431115, "learning_rate": 5.758563528539123e-06, "loss": 0.3591, "step": 4550 }, { "epoch": 1.5195325542570952, "grad_norm": 0.8684803269419071, "learning_rate": 5.756643345459774e-06, "loss": 0.3782, "step": 4551 }, { "epoch": 1.5198664440734557, "grad_norm": 0.8266860347055878, "learning_rate": 5.754723048165335e-06, "loss": 0.3626, "step": 4552 }, { "epoch": 1.5202003338898162, "grad_norm": 0.8440639029658428, "learning_rate": 5.752802636945675e-06, "loss": 0.3778, "step": 4553 }, { "epoch": 1.520534223706177, "grad_norm": 0.8404829648586764, "learning_rate": 5.750882112090682e-06, "loss": 0.3883, "step": 4554 }, { "epoch": 1.5208681135225377, "grad_norm": 0.8721676785289848, "learning_rate": 5.748961473890254e-06, "loss": 0.392, "step": 4555 }, { "epoch": 1.5212020033388982, "grad_norm": 0.844961619637175, "learning_rate": 5.747040722634316e-06, "loss": 0.3719, "step": 4556 }, { "epoch": 1.5215358931552587, "grad_norm": 0.8437060987980429, "learning_rate": 5.745119858612799e-06, "loss": 0.3642, "step": 4557 }, { "epoch": 1.5218697829716192, "grad_norm": 0.8468448452413193, "learning_rate": 5.743198882115662e-06, "loss": 0.3714, "step": 4558 }, { "epoch": 1.52220367278798, "grad_norm": 0.8420390655498292, "learning_rate": 5.741277793432873e-06, "loss": 0.3815, "step": 4559 }, { "epoch": 1.5225375626043407, "grad_norm": 0.8487592817934352, "learning_rate": 5.73935659285442e-06, "loss": 0.3718, "step": 4560 }, { "epoch": 1.5228714524207012, "grad_norm": 0.8419314970655593, "learning_rate": 5.737435280670307e-06, "loss": 0.3717, "step": 4561 }, { "epoch": 1.5232053422370617, "grad_norm": 0.8426096185725606, "learning_rate": 5.735513857170558e-06, "loss": 0.3644, "step": 4562 }, { "epoch": 1.5235392320534222, "grad_norm": 0.8339959205478115, "learning_rate": 5.733592322645209e-06, "loss": 0.3747, "step": 4563 }, { "epoch": 1.523873121869783, "grad_norm": 0.8662745418972684, "learning_rate": 5.731670677384319e-06, "loss": 0.38, "step": 4564 }, { "epoch": 1.5242070116861437, "grad_norm": 0.8582665466901738, "learning_rate": 5.729748921677954e-06, "loss": 0.3762, "step": 4565 }, { "epoch": 1.5245409015025042, "grad_norm": 0.828750997061731, "learning_rate": 5.727827055816205e-06, "loss": 0.3652, "step": 4566 }, { "epoch": 1.5248747913188647, "grad_norm": 0.831792366482075, "learning_rate": 5.725905080089178e-06, "loss": 0.3724, "step": 4567 }, { "epoch": 1.5252086811352252, "grad_norm": 0.8189716990933287, "learning_rate": 5.723982994786995e-06, "loss": 0.3728, "step": 4568 }, { "epoch": 1.525542570951586, "grad_norm": 0.8095329344847768, "learning_rate": 5.722060800199792e-06, "loss": 0.3572, "step": 4569 }, { "epoch": 1.5258764607679467, "grad_norm": 0.8564627055401911, "learning_rate": 5.7201384966177254e-06, "loss": 0.3819, "step": 4570 }, { "epoch": 1.5262103505843072, "grad_norm": 0.8332657122202003, "learning_rate": 5.718216084330966e-06, "loss": 0.3694, "step": 4571 }, { "epoch": 1.5265442404006677, "grad_norm": 0.8064506899861974, "learning_rate": 5.7162935636297015e-06, "loss": 0.3527, "step": 4572 }, { "epoch": 1.5268781302170282, "grad_norm": 0.8545465843440885, "learning_rate": 5.714370934804135e-06, "loss": 0.3851, "step": 4573 }, { "epoch": 1.527212020033389, "grad_norm": 0.8367893735550378, "learning_rate": 5.712448198144487e-06, "loss": 0.3759, "step": 4574 }, { "epoch": 1.5275459098497497, "grad_norm": 0.8463317239574732, "learning_rate": 5.710525353940994e-06, "loss": 0.3708, "step": 4575 }, { "epoch": 1.5278797996661102, "grad_norm": 0.8520059602686577, "learning_rate": 5.708602402483909e-06, "loss": 0.3699, "step": 4576 }, { "epoch": 1.5282136894824707, "grad_norm": 0.8476616266191257, "learning_rate": 5.706679344063504e-06, "loss": 0.3797, "step": 4577 }, { "epoch": 1.5285475792988312, "grad_norm": 0.814502802232499, "learning_rate": 5.704756178970057e-06, "loss": 0.3612, "step": 4578 }, { "epoch": 1.528881469115192, "grad_norm": 0.8566127737866792, "learning_rate": 5.702832907493877e-06, "loss": 0.3767, "step": 4579 }, { "epoch": 1.5292153589315527, "grad_norm": 0.8528881208465566, "learning_rate": 5.700909529925275e-06, "loss": 0.366, "step": 4580 }, { "epoch": 1.5295492487479132, "grad_norm": 0.8242911148055958, "learning_rate": 5.698986046554588e-06, "loss": 0.3747, "step": 4581 }, { "epoch": 1.5298831385642737, "grad_norm": 0.8440717195305949, "learning_rate": 5.6970624576721636e-06, "loss": 0.3785, "step": 4582 }, { "epoch": 1.5302170283806342, "grad_norm": 0.8310758330588992, "learning_rate": 5.695138763568368e-06, "loss": 0.3646, "step": 4583 }, { "epoch": 1.530550918196995, "grad_norm": 0.8265350663031803, "learning_rate": 5.693214964533581e-06, "loss": 0.3687, "step": 4584 }, { "epoch": 1.5308848080133557, "grad_norm": 0.8001503309469279, "learning_rate": 5.6912910608582005e-06, "loss": 0.3538, "step": 4585 }, { "epoch": 1.5312186978297162, "grad_norm": 0.8716019698635983, "learning_rate": 5.689367052832638e-06, "loss": 0.377, "step": 4586 }, { "epoch": 1.5315525876460767, "grad_norm": 0.8289047033168443, "learning_rate": 5.6874429407473265e-06, "loss": 0.3647, "step": 4587 }, { "epoch": 1.5318864774624374, "grad_norm": 0.8837745020487852, "learning_rate": 5.685518724892702e-06, "loss": 0.3867, "step": 4588 }, { "epoch": 1.532220367278798, "grad_norm": 0.8084122589542443, "learning_rate": 5.6835944055592315e-06, "loss": 0.3539, "step": 4589 }, { "epoch": 1.5325542570951587, "grad_norm": 0.8834134325324803, "learning_rate": 5.681669983037387e-06, "loss": 0.3849, "step": 4590 }, { "epoch": 1.5328881469115192, "grad_norm": 0.8650896517361542, "learning_rate": 5.6797454576176616e-06, "loss": 0.3762, "step": 4591 }, { "epoch": 1.5332220367278797, "grad_norm": 0.8523964670043567, "learning_rate": 5.677820829590559e-06, "loss": 0.3751, "step": 4592 }, { "epoch": 1.5335559265442404, "grad_norm": 0.8232540287371503, "learning_rate": 5.675896099246604e-06, "loss": 0.3632, "step": 4593 }, { "epoch": 1.533889816360601, "grad_norm": 0.8386385621400887, "learning_rate": 5.673971266876332e-06, "loss": 0.3732, "step": 4594 }, { "epoch": 1.5342237061769617, "grad_norm": 0.8775710937510162, "learning_rate": 5.672046332770297e-06, "loss": 0.392, "step": 4595 }, { "epoch": 1.5345575959933222, "grad_norm": 0.8075330512168302, "learning_rate": 5.670121297219065e-06, "loss": 0.3714, "step": 4596 }, { "epoch": 1.5348914858096827, "grad_norm": 0.8397217964144337, "learning_rate": 5.6681961605132256e-06, "loss": 0.3688, "step": 4597 }, { "epoch": 1.5352253756260434, "grad_norm": 0.848487909383754, "learning_rate": 5.666270922943369e-06, "loss": 0.3832, "step": 4598 }, { "epoch": 1.535559265442404, "grad_norm": 0.8316003704617575, "learning_rate": 5.664345584800116e-06, "loss": 0.3633, "step": 4599 }, { "epoch": 1.5358931552587647, "grad_norm": 0.8251057281513459, "learning_rate": 5.662420146374093e-06, "loss": 0.3763, "step": 4600 }, { "epoch": 1.5362270450751252, "grad_norm": 0.8160197570974659, "learning_rate": 5.660494607955946e-06, "loss": 0.3585, "step": 4601 }, { "epoch": 1.5365609348914857, "grad_norm": 0.8341670220835372, "learning_rate": 5.658568969836333e-06, "loss": 0.3536, "step": 4602 }, { "epoch": 1.5368948247078464, "grad_norm": 0.8453058874458753, "learning_rate": 5.656643232305929e-06, "loss": 0.3775, "step": 4603 }, { "epoch": 1.5372287145242072, "grad_norm": 0.7879529873213548, "learning_rate": 5.654717395655424e-06, "loss": 0.3442, "step": 4604 }, { "epoch": 1.5375626043405677, "grad_norm": 0.8881546328795527, "learning_rate": 5.652791460175521e-06, "loss": 0.3889, "step": 4605 }, { "epoch": 1.5378964941569282, "grad_norm": 0.858759854631037, "learning_rate": 5.650865426156939e-06, "loss": 0.3644, "step": 4606 }, { "epoch": 1.5382303839732887, "grad_norm": 0.8833396354591703, "learning_rate": 5.648939293890415e-06, "loss": 0.3858, "step": 4607 }, { "epoch": 1.5385642737896494, "grad_norm": 0.862963672843676, "learning_rate": 5.647013063666698e-06, "loss": 0.3824, "step": 4608 }, { "epoch": 1.5388981636060102, "grad_norm": 0.8444915906783751, "learning_rate": 5.645086735776547e-06, "loss": 0.375, "step": 4609 }, { "epoch": 1.5392320534223707, "grad_norm": 0.8652676142802171, "learning_rate": 5.643160310510747e-06, "loss": 0.376, "step": 4610 }, { "epoch": 1.5395659432387312, "grad_norm": 0.8709039637001478, "learning_rate": 5.641233788160086e-06, "loss": 0.373, "step": 4611 }, { "epoch": 1.5398998330550917, "grad_norm": 0.8372371464498981, "learning_rate": 5.639307169015377e-06, "loss": 0.387, "step": 4612 }, { "epoch": 1.5402337228714524, "grad_norm": 0.8673602368111638, "learning_rate": 5.637380453367439e-06, "loss": 0.3779, "step": 4613 }, { "epoch": 1.5405676126878132, "grad_norm": 0.8909988085098335, "learning_rate": 5.6354536415071105e-06, "loss": 0.3913, "step": 4614 }, { "epoch": 1.5409015025041737, "grad_norm": 0.7986947692378141, "learning_rate": 5.633526733725243e-06, "loss": 0.3746, "step": 4615 }, { "epoch": 1.5412353923205342, "grad_norm": 0.8412499413118899, "learning_rate": 5.631599730312703e-06, "loss": 0.364, "step": 4616 }, { "epoch": 1.5415692821368947, "grad_norm": 0.8531197277610687, "learning_rate": 5.629672631560369e-06, "loss": 0.3814, "step": 4617 }, { "epoch": 1.5419031719532554, "grad_norm": 0.8318879992434413, "learning_rate": 5.627745437759138e-06, "loss": 0.3518, "step": 4618 }, { "epoch": 1.5422370617696162, "grad_norm": 0.83686466340434, "learning_rate": 5.6258181491999185e-06, "loss": 0.3664, "step": 4619 }, { "epoch": 1.5425709515859767, "grad_norm": 0.837108471319402, "learning_rate": 5.623890766173636e-06, "loss": 0.3743, "step": 4620 }, { "epoch": 1.5429048414023372, "grad_norm": 0.8495781589027019, "learning_rate": 5.621963288971226e-06, "loss": 0.3834, "step": 4621 }, { "epoch": 1.5432387312186977, "grad_norm": 0.8753821559882154, "learning_rate": 5.620035717883642e-06, "loss": 0.3835, "step": 4622 }, { "epoch": 1.5435726210350584, "grad_norm": 0.8923764953396041, "learning_rate": 5.61810805320185e-06, "loss": 0.3944, "step": 4623 }, { "epoch": 1.5439065108514192, "grad_norm": 0.8546541484910651, "learning_rate": 5.61618029521683e-06, "loss": 0.3744, "step": 4624 }, { "epoch": 1.5442404006677797, "grad_norm": 0.8405622809662, "learning_rate": 5.614252444219577e-06, "loss": 0.3755, "step": 4625 }, { "epoch": 1.5445742904841402, "grad_norm": 0.8130350789351539, "learning_rate": 5.6123245005011e-06, "loss": 0.3614, "step": 4626 }, { "epoch": 1.5449081803005007, "grad_norm": 0.8503548730598703, "learning_rate": 5.610396464352419e-06, "loss": 0.3742, "step": 4627 }, { "epoch": 1.5452420701168614, "grad_norm": 0.9226327582008876, "learning_rate": 5.6084683360645745e-06, "loss": 0.3721, "step": 4628 }, { "epoch": 1.5455759599332222, "grad_norm": 0.817603217937531, "learning_rate": 5.606540115928611e-06, "loss": 0.3549, "step": 4629 }, { "epoch": 1.5459098497495827, "grad_norm": 0.8376720942271959, "learning_rate": 5.604611804235601e-06, "loss": 0.3692, "step": 4630 }, { "epoch": 1.5462437395659432, "grad_norm": 0.8314871934121616, "learning_rate": 5.6026834012766155e-06, "loss": 0.3644, "step": 4631 }, { "epoch": 1.5465776293823037, "grad_norm": 0.8307927639392737, "learning_rate": 5.600754907342749e-06, "loss": 0.3754, "step": 4632 }, { "epoch": 1.5469115191986644, "grad_norm": 0.8677030612941866, "learning_rate": 5.5988263227251085e-06, "loss": 0.3801, "step": 4633 }, { "epoch": 1.5472454090150252, "grad_norm": 0.8286899467934559, "learning_rate": 5.5968976477148104e-06, "loss": 0.3688, "step": 4634 }, { "epoch": 1.5475792988313857, "grad_norm": 0.8088498251993087, "learning_rate": 5.59496888260299e-06, "loss": 0.3636, "step": 4635 }, { "epoch": 1.5479131886477462, "grad_norm": 0.8298123311270329, "learning_rate": 5.593040027680793e-06, "loss": 0.3664, "step": 4636 }, { "epoch": 1.5482470784641067, "grad_norm": 0.8606906918409066, "learning_rate": 5.5911110832393785e-06, "loss": 0.3785, "step": 4637 }, { "epoch": 1.5485809682804674, "grad_norm": 0.8186384896510772, "learning_rate": 5.589182049569921e-06, "loss": 0.3582, "step": 4638 }, { "epoch": 1.5489148580968282, "grad_norm": 0.8942259067890642, "learning_rate": 5.587252926963608e-06, "loss": 0.3708, "step": 4639 }, { "epoch": 1.5492487479131887, "grad_norm": 0.8439825400587206, "learning_rate": 5.585323715711639e-06, "loss": 0.3676, "step": 4640 }, { "epoch": 1.5495826377295492, "grad_norm": 0.8618787197831388, "learning_rate": 5.583394416105229e-06, "loss": 0.3667, "step": 4641 }, { "epoch": 1.5499165275459097, "grad_norm": 0.8612027429441096, "learning_rate": 5.581465028435603e-06, "loss": 0.3678, "step": 4642 }, { "epoch": 1.5502504173622704, "grad_norm": 0.8543767638935126, "learning_rate": 5.579535552994006e-06, "loss": 0.3828, "step": 4643 }, { "epoch": 1.5505843071786312, "grad_norm": 0.8118909115072624, "learning_rate": 5.577605990071685e-06, "loss": 0.3674, "step": 4644 }, { "epoch": 1.5509181969949917, "grad_norm": 0.8670338511849125, "learning_rate": 5.5756763399599124e-06, "loss": 0.3759, "step": 4645 }, { "epoch": 1.5512520868113522, "grad_norm": 0.8473681753925495, "learning_rate": 5.573746602949968e-06, "loss": 0.3835, "step": 4646 }, { "epoch": 1.5515859766277127, "grad_norm": 0.8219248457983265, "learning_rate": 5.571816779333141e-06, "loss": 0.3677, "step": 4647 }, { "epoch": 1.5519198664440734, "grad_norm": 0.8663893132599488, "learning_rate": 5.569886869400743e-06, "loss": 0.3832, "step": 4648 }, { "epoch": 1.5522537562604342, "grad_norm": 0.7963071044144713, "learning_rate": 5.567956873444089e-06, "loss": 0.3743, "step": 4649 }, { "epoch": 1.5525876460767947, "grad_norm": 0.8551296279568785, "learning_rate": 5.566026791754514e-06, "loss": 0.3689, "step": 4650 }, { "epoch": 1.5529215358931552, "grad_norm": 0.8702495739319698, "learning_rate": 5.5640966246233615e-06, "loss": 0.3808, "step": 4651 }, { "epoch": 1.553255425709516, "grad_norm": 0.8822278202564868, "learning_rate": 5.562166372341991e-06, "loss": 0.3979, "step": 4652 }, { "epoch": 1.5535893155258764, "grad_norm": 0.7899223318118294, "learning_rate": 5.5602360352017735e-06, "loss": 0.3657, "step": 4653 }, { "epoch": 1.5539232053422372, "grad_norm": 0.8419618696225292, "learning_rate": 5.55830561349409e-06, "loss": 0.3796, "step": 4654 }, { "epoch": 1.5542570951585977, "grad_norm": 0.8242240677977208, "learning_rate": 5.556375107510341e-06, "loss": 0.3756, "step": 4655 }, { "epoch": 1.5545909849749582, "grad_norm": 0.8631088232272038, "learning_rate": 5.554444517541935e-06, "loss": 0.3778, "step": 4656 }, { "epoch": 1.554924874791319, "grad_norm": 0.8392896104851216, "learning_rate": 5.552513843880292e-06, "loss": 0.3529, "step": 4657 }, { "epoch": 1.5552587646076794, "grad_norm": 0.8633350498524849, "learning_rate": 5.550583086816848e-06, "loss": 0.3715, "step": 4658 }, { "epoch": 1.5555926544240402, "grad_norm": 0.8327215135995499, "learning_rate": 5.54865224664305e-06, "loss": 0.3726, "step": 4659 }, { "epoch": 1.5559265442404007, "grad_norm": 0.8486751268782865, "learning_rate": 5.546721323650357e-06, "loss": 0.3638, "step": 4660 }, { "epoch": 1.5562604340567612, "grad_norm": 0.8649113328047423, "learning_rate": 5.544790318130243e-06, "loss": 0.3669, "step": 4661 }, { "epoch": 1.556594323873122, "grad_norm": 0.8350563490196505, "learning_rate": 5.542859230374192e-06, "loss": 0.3688, "step": 4662 }, { "epoch": 1.5569282136894824, "grad_norm": 0.8727300836411366, "learning_rate": 5.540928060673701e-06, "loss": 0.3752, "step": 4663 }, { "epoch": 1.5572621035058432, "grad_norm": 0.834563182801128, "learning_rate": 5.538996809320277e-06, "loss": 0.3633, "step": 4664 }, { "epoch": 1.5575959933222037, "grad_norm": 0.844437805350316, "learning_rate": 5.537065476605445e-06, "loss": 0.3642, "step": 4665 }, { "epoch": 1.5579298831385642, "grad_norm": 0.8060379135208736, "learning_rate": 5.535134062820741e-06, "loss": 0.3461, "step": 4666 }, { "epoch": 1.558263772954925, "grad_norm": 0.8745890422048247, "learning_rate": 5.533202568257705e-06, "loss": 0.3797, "step": 4667 }, { "epoch": 1.5585976627712856, "grad_norm": 0.8326568605677381, "learning_rate": 5.531270993207901e-06, "loss": 0.3743, "step": 4668 }, { "epoch": 1.5589315525876462, "grad_norm": 0.8458792882907076, "learning_rate": 5.529339337962898e-06, "loss": 0.3758, "step": 4669 }, { "epoch": 1.5592654424040067, "grad_norm": 0.8662321243244201, "learning_rate": 5.527407602814277e-06, "loss": 0.3757, "step": 4670 }, { "epoch": 1.5595993322203672, "grad_norm": 0.8821652456644898, "learning_rate": 5.525475788053637e-06, "loss": 0.3871, "step": 4671 }, { "epoch": 1.559933222036728, "grad_norm": 0.8220061420300301, "learning_rate": 5.523543893972581e-06, "loss": 0.3669, "step": 4672 }, { "epoch": 1.5602671118530886, "grad_norm": 0.8068053214643155, "learning_rate": 5.521611920862728e-06, "loss": 0.352, "step": 4673 }, { "epoch": 1.5606010016694492, "grad_norm": 0.8412316432002273, "learning_rate": 5.519679869015712e-06, "loss": 0.3678, "step": 4674 }, { "epoch": 1.5609348914858097, "grad_norm": 0.8407949092785038, "learning_rate": 5.51774773872317e-06, "loss": 0.3809, "step": 4675 }, { "epoch": 1.5612687813021702, "grad_norm": 0.8472894583051875, "learning_rate": 5.515815530276765e-06, "loss": 0.3763, "step": 4676 }, { "epoch": 1.561602671118531, "grad_norm": 0.8803072271783369, "learning_rate": 5.5138832439681546e-06, "loss": 0.3892, "step": 4677 }, { "epoch": 1.5619365609348916, "grad_norm": 0.8283086300212832, "learning_rate": 5.511950880089022e-06, "loss": 0.3559, "step": 4678 }, { "epoch": 1.5622704507512521, "grad_norm": 0.884836096563403, "learning_rate": 5.510018438931055e-06, "loss": 0.3616, "step": 4679 }, { "epoch": 1.5626043405676127, "grad_norm": 0.8342700649274848, "learning_rate": 5.508085920785956e-06, "loss": 0.3679, "step": 4680 }, { "epoch": 1.5629382303839732, "grad_norm": 0.8464870550926202, "learning_rate": 5.506153325945438e-06, "loss": 0.379, "step": 4681 }, { "epoch": 1.563272120200334, "grad_norm": 0.8871767164241731, "learning_rate": 5.504220654701224e-06, "loss": 0.385, "step": 4682 }, { "epoch": 1.5636060100166946, "grad_norm": 0.8061542524129185, "learning_rate": 5.502287907345052e-06, "loss": 0.3657, "step": 4683 }, { "epoch": 1.5639398998330551, "grad_norm": 0.8721837989344724, "learning_rate": 5.5003550841686705e-06, "loss": 0.3752, "step": 4684 }, { "epoch": 1.5642737896494157, "grad_norm": 0.8903385943039924, "learning_rate": 5.4984221854638345e-06, "loss": 0.381, "step": 4685 }, { "epoch": 1.5646076794657762, "grad_norm": 0.8435905747298711, "learning_rate": 5.49648921152232e-06, "loss": 0.3778, "step": 4686 }, { "epoch": 1.564941569282137, "grad_norm": 0.8213633237026012, "learning_rate": 5.494556162635903e-06, "loss": 0.3563, "step": 4687 }, { "epoch": 1.5652754590984976, "grad_norm": 0.84030342416887, "learning_rate": 5.4926230390963805e-06, "loss": 0.3682, "step": 4688 }, { "epoch": 1.5656093489148581, "grad_norm": 0.8584962062651341, "learning_rate": 5.4906898411955576e-06, "loss": 0.3636, "step": 4689 }, { "epoch": 1.5659432387312187, "grad_norm": 0.8460976450213196, "learning_rate": 5.4887565692252466e-06, "loss": 0.3733, "step": 4690 }, { "epoch": 1.5662771285475792, "grad_norm": 0.8465256608204751, "learning_rate": 5.4868232234772774e-06, "loss": 0.3805, "step": 4691 }, { "epoch": 1.56661101836394, "grad_norm": 0.8509016769324597, "learning_rate": 5.484889804243486e-06, "loss": 0.3655, "step": 4692 }, { "epoch": 1.5669449081803006, "grad_norm": 0.8690009584610198, "learning_rate": 5.482956311815722e-06, "loss": 0.3761, "step": 4693 }, { "epoch": 1.5672787979966611, "grad_norm": 0.8557734630828415, "learning_rate": 5.481022746485847e-06, "loss": 0.3749, "step": 4694 }, { "epoch": 1.5676126878130217, "grad_norm": 0.8556898639391988, "learning_rate": 5.479089108545728e-06, "loss": 0.3665, "step": 4695 }, { "epoch": 1.5679465776293822, "grad_norm": 0.8611478027287763, "learning_rate": 5.477155398287253e-06, "loss": 0.389, "step": 4696 }, { "epoch": 1.568280467445743, "grad_norm": 0.8115610472595713, "learning_rate": 5.475221616002311e-06, "loss": 0.355, "step": 4697 }, { "epoch": 1.5686143572621036, "grad_norm": 0.9095656790418029, "learning_rate": 5.473287761982804e-06, "loss": 0.4018, "step": 4698 }, { "epoch": 1.5689482470784641, "grad_norm": 0.8187443954883665, "learning_rate": 5.471353836520653e-06, "loss": 0.3538, "step": 4699 }, { "epoch": 1.5692821368948247, "grad_norm": 0.8376266789014236, "learning_rate": 5.469419839907777e-06, "loss": 0.3843, "step": 4700 }, { "epoch": 1.5696160267111852, "grad_norm": 0.8252713188768002, "learning_rate": 5.467485772436116e-06, "loss": 0.3692, "step": 4701 }, { "epoch": 1.569949916527546, "grad_norm": 0.8348409085216223, "learning_rate": 5.465551634397618e-06, "loss": 0.3703, "step": 4702 }, { "epoch": 1.5702838063439066, "grad_norm": 0.8718129054900104, "learning_rate": 5.4636174260842366e-06, "loss": 0.3817, "step": 4703 }, { "epoch": 1.5706176961602671, "grad_norm": 0.791769084105282, "learning_rate": 5.461683147787942e-06, "loss": 0.3599, "step": 4704 }, { "epoch": 1.5709515859766277, "grad_norm": 0.8899411133950356, "learning_rate": 5.459748799800714e-06, "loss": 0.378, "step": 4705 }, { "epoch": 1.5712854757929882, "grad_norm": 0.9047422433308694, "learning_rate": 5.457814382414538e-06, "loss": 0.3866, "step": 4706 }, { "epoch": 1.571619365609349, "grad_norm": 0.8673170900484725, "learning_rate": 5.4558798959214195e-06, "loss": 0.3774, "step": 4707 }, { "epoch": 1.5719532554257096, "grad_norm": 0.8404285113623038, "learning_rate": 5.453945340613364e-06, "loss": 0.3623, "step": 4708 }, { "epoch": 1.5722871452420701, "grad_norm": 0.8204364058292412, "learning_rate": 5.452010716782396e-06, "loss": 0.3659, "step": 4709 }, { "epoch": 1.5726210350584306, "grad_norm": 0.8708474836911899, "learning_rate": 5.450076024720541e-06, "loss": 0.3818, "step": 4710 }, { "epoch": 1.5729549248747912, "grad_norm": 0.8882282275625685, "learning_rate": 5.448141264719844e-06, "loss": 0.3859, "step": 4711 }, { "epoch": 1.573288814691152, "grad_norm": 0.8427285201541228, "learning_rate": 5.446206437072357e-06, "loss": 0.3853, "step": 4712 }, { "epoch": 1.5736227045075126, "grad_norm": 0.8871440430687846, "learning_rate": 5.444271542070139e-06, "loss": 0.3668, "step": 4713 }, { "epoch": 1.5739565943238731, "grad_norm": 0.8160368052811744, "learning_rate": 5.442336580005264e-06, "loss": 0.3726, "step": 4714 }, { "epoch": 1.5742904841402336, "grad_norm": 0.8451238775420626, "learning_rate": 5.440401551169813e-06, "loss": 0.3743, "step": 4715 }, { "epoch": 1.5746243739565942, "grad_norm": 0.8200771617290292, "learning_rate": 5.4384664558558765e-06, "loss": 0.3558, "step": 4716 }, { "epoch": 1.574958263772955, "grad_norm": 0.8286630472618757, "learning_rate": 5.436531294355561e-06, "loss": 0.3617, "step": 4717 }, { "epoch": 1.5752921535893156, "grad_norm": 0.8231124987309704, "learning_rate": 5.4345960669609745e-06, "loss": 0.3637, "step": 4718 }, { "epoch": 1.5756260434056761, "grad_norm": 0.805746224967024, "learning_rate": 5.43266077396424e-06, "loss": 0.3378, "step": 4719 }, { "epoch": 1.5759599332220366, "grad_norm": 0.8491338325002805, "learning_rate": 5.430725415657491e-06, "loss": 0.3627, "step": 4720 }, { "epoch": 1.5762938230383974, "grad_norm": 0.8052836259632493, "learning_rate": 5.428789992332867e-06, "loss": 0.3689, "step": 4721 }, { "epoch": 1.576627712854758, "grad_norm": 0.8654111768710204, "learning_rate": 5.426854504282521e-06, "loss": 0.3636, "step": 4722 }, { "epoch": 1.5769616026711186, "grad_norm": 0.8981134007659112, "learning_rate": 5.4249189517986125e-06, "loss": 0.3797, "step": 4723 }, { "epoch": 1.5772954924874791, "grad_norm": 0.8388823513715419, "learning_rate": 5.4229833351733154e-06, "loss": 0.3695, "step": 4724 }, { "epoch": 1.5776293823038396, "grad_norm": 0.8888588189051352, "learning_rate": 5.421047654698809e-06, "loss": 0.3947, "step": 4725 }, { "epoch": 1.5779632721202004, "grad_norm": 0.8506685380123549, "learning_rate": 5.419111910667283e-06, "loss": 0.3676, "step": 4726 }, { "epoch": 1.5782971619365609, "grad_norm": 0.8052970421230617, "learning_rate": 5.417176103370939e-06, "loss": 0.3619, "step": 4727 }, { "epoch": 1.5786310517529216, "grad_norm": 0.8067012612872889, "learning_rate": 5.415240233101985e-06, "loss": 0.3649, "step": 4728 }, { "epoch": 1.5789649415692821, "grad_norm": 0.8125474662940604, "learning_rate": 5.41330430015264e-06, "loss": 0.3607, "step": 4729 }, { "epoch": 1.5792988313856426, "grad_norm": 0.8502889356528823, "learning_rate": 5.411368304815135e-06, "loss": 0.3699, "step": 4730 }, { "epoch": 1.5796327212020034, "grad_norm": 0.8114937614211326, "learning_rate": 5.409432247381705e-06, "loss": 0.3573, "step": 4731 }, { "epoch": 1.5799666110183639, "grad_norm": 0.8199004217749751, "learning_rate": 5.407496128144599e-06, "loss": 0.3588, "step": 4732 }, { "epoch": 1.5803005008347246, "grad_norm": 0.8866363832300106, "learning_rate": 5.405559947396072e-06, "loss": 0.3777, "step": 4733 }, { "epoch": 1.5806343906510851, "grad_norm": 0.8579030163178626, "learning_rate": 5.403623705428391e-06, "loss": 0.3844, "step": 4734 }, { "epoch": 1.5809682804674456, "grad_norm": 0.8642722279215914, "learning_rate": 5.4016874025338325e-06, "loss": 0.3728, "step": 4735 }, { "epoch": 1.5813021702838064, "grad_norm": 0.8328563821484761, "learning_rate": 5.399751039004679e-06, "loss": 0.3653, "step": 4736 }, { "epoch": 1.581636060100167, "grad_norm": 0.8610857464744904, "learning_rate": 5.397814615133224e-06, "loss": 0.377, "step": 4737 }, { "epoch": 1.5819699499165276, "grad_norm": 0.8572641806585894, "learning_rate": 5.395878131211772e-06, "loss": 0.3662, "step": 4738 }, { "epoch": 1.5823038397328881, "grad_norm": 0.8439445519601297, "learning_rate": 5.39394158753263e-06, "loss": 0.3615, "step": 4739 }, { "epoch": 1.5826377295492486, "grad_norm": 0.8064659097759334, "learning_rate": 5.392004984388125e-06, "loss": 0.3539, "step": 4740 }, { "epoch": 1.5829716193656094, "grad_norm": 0.8417906907168456, "learning_rate": 5.390068322070582e-06, "loss": 0.3751, "step": 4741 }, { "epoch": 1.58330550918197, "grad_norm": 0.8659675151806434, "learning_rate": 5.388131600872342e-06, "loss": 0.3828, "step": 4742 }, { "epoch": 1.5836393989983306, "grad_norm": 0.8544103551036113, "learning_rate": 5.386194821085751e-06, "loss": 0.3903, "step": 4743 }, { "epoch": 1.5839732888146911, "grad_norm": 0.8345938269819708, "learning_rate": 5.384257983003167e-06, "loss": 0.37, "step": 4744 }, { "epoch": 1.5843071786310516, "grad_norm": 0.7971905781325904, "learning_rate": 5.3823210869169536e-06, "loss": 0.3618, "step": 4745 }, { "epoch": 1.5846410684474124, "grad_norm": 0.8459076141959699, "learning_rate": 5.380384133119485e-06, "loss": 0.3736, "step": 4746 }, { "epoch": 1.584974958263773, "grad_norm": 0.8471795009011593, "learning_rate": 5.378447121903146e-06, "loss": 0.3728, "step": 4747 }, { "epoch": 1.5853088480801336, "grad_norm": 0.8357313749000993, "learning_rate": 5.376510053560323e-06, "loss": 0.3654, "step": 4748 }, { "epoch": 1.5856427378964941, "grad_norm": 0.82211916664702, "learning_rate": 5.374572928383419e-06, "loss": 0.3718, "step": 4749 }, { "epoch": 1.5859766277128546, "grad_norm": 0.8392236086899758, "learning_rate": 5.3726357466648445e-06, "loss": 0.3758, "step": 4750 }, { "epoch": 1.5863105175292154, "grad_norm": 0.8174208023742019, "learning_rate": 5.3706985086970134e-06, "loss": 0.3754, "step": 4751 }, { "epoch": 1.586644407345576, "grad_norm": 0.8997670170669695, "learning_rate": 5.368761214772352e-06, "loss": 0.3912, "step": 4752 }, { "epoch": 1.5869782971619366, "grad_norm": 0.843084953781644, "learning_rate": 5.366823865183295e-06, "loss": 0.3724, "step": 4753 }, { "epoch": 1.5873121869782971, "grad_norm": 0.8209153763678145, "learning_rate": 5.364886460222283e-06, "loss": 0.36, "step": 4754 }, { "epoch": 1.5876460767946576, "grad_norm": 0.8174470085473404, "learning_rate": 5.362949000181769e-06, "loss": 0.3625, "step": 4755 }, { "epoch": 1.5879799666110184, "grad_norm": 0.8746689208062027, "learning_rate": 5.36101148535421e-06, "loss": 0.3966, "step": 4756 }, { "epoch": 1.588313856427379, "grad_norm": 0.8241391967787879, "learning_rate": 5.3590739160320745e-06, "loss": 0.355, "step": 4757 }, { "epoch": 1.5886477462437396, "grad_norm": 0.8088676031854173, "learning_rate": 5.357136292507838e-06, "loss": 0.3611, "step": 4758 }, { "epoch": 1.5889816360601001, "grad_norm": 0.8212073335726022, "learning_rate": 5.355198615073983e-06, "loss": 0.3616, "step": 4759 }, { "epoch": 1.5893155258764606, "grad_norm": 0.842097370002555, "learning_rate": 5.353260884023003e-06, "loss": 0.3675, "step": 4760 }, { "epoch": 1.5896494156928214, "grad_norm": 0.8480060950509275, "learning_rate": 5.3513230996473965e-06, "loss": 0.372, "step": 4761 }, { "epoch": 1.589983305509182, "grad_norm": 0.8576867934648627, "learning_rate": 5.349385262239671e-06, "loss": 0.3702, "step": 4762 }, { "epoch": 1.5903171953255426, "grad_norm": 0.816677742609679, "learning_rate": 5.347447372092346e-06, "loss": 0.3491, "step": 4763 }, { "epoch": 1.5906510851419031, "grad_norm": 0.8354348926808749, "learning_rate": 5.34550942949794e-06, "loss": 0.3613, "step": 4764 }, { "epoch": 1.5909849749582636, "grad_norm": 0.8682174267377847, "learning_rate": 5.34357143474899e-06, "loss": 0.3722, "step": 4765 }, { "epoch": 1.5913188647746244, "grad_norm": 0.8620959995171902, "learning_rate": 5.3416333881380336e-06, "loss": 0.3632, "step": 4766 }, { "epoch": 1.591652754590985, "grad_norm": 0.8507778088946338, "learning_rate": 5.339695289957617e-06, "loss": 0.3649, "step": 4767 }, { "epoch": 1.5919866444073456, "grad_norm": 0.8908105463851795, "learning_rate": 5.337757140500298e-06, "loss": 0.3762, "step": 4768 }, { "epoch": 1.5923205342237061, "grad_norm": 0.8078847280256638, "learning_rate": 5.335818940058639e-06, "loss": 0.3526, "step": 4769 }, { "epoch": 1.5926544240400666, "grad_norm": 0.8092590997435708, "learning_rate": 5.333880688925209e-06, "loss": 0.356, "step": 4770 }, { "epoch": 1.5929883138564274, "grad_norm": 0.8728410762486825, "learning_rate": 5.331942387392589e-06, "loss": 0.3732, "step": 4771 }, { "epoch": 1.593322203672788, "grad_norm": 0.8188603847482195, "learning_rate": 5.330004035753363e-06, "loss": 0.3363, "step": 4772 }, { "epoch": 1.5936560934891486, "grad_norm": 0.8662146617926758, "learning_rate": 5.328065634300127e-06, "loss": 0.3683, "step": 4773 }, { "epoch": 1.5939899833055091, "grad_norm": 0.8666911632352965, "learning_rate": 5.326127183325479e-06, "loss": 0.3754, "step": 4774 }, { "epoch": 1.5943238731218696, "grad_norm": 0.815999007849376, "learning_rate": 5.324188683122029e-06, "loss": 0.3678, "step": 4775 }, { "epoch": 1.5946577629382304, "grad_norm": 0.810416025806422, "learning_rate": 5.322250133982394e-06, "loss": 0.3469, "step": 4776 }, { "epoch": 1.594991652754591, "grad_norm": 0.8162865077469083, "learning_rate": 5.320311536199198e-06, "loss": 0.3805, "step": 4777 }, { "epoch": 1.5953255425709516, "grad_norm": 0.8167106150602038, "learning_rate": 5.318372890065068e-06, "loss": 0.357, "step": 4778 }, { "epoch": 1.5956594323873121, "grad_norm": 0.8652696319165198, "learning_rate": 5.3164341958726465e-06, "loss": 0.3705, "step": 4779 }, { "epoch": 1.5959933222036726, "grad_norm": 0.861285088006653, "learning_rate": 5.314495453914575e-06, "loss": 0.3721, "step": 4780 }, { "epoch": 1.5963272120200334, "grad_norm": 0.8588157494517206, "learning_rate": 5.312556664483509e-06, "loss": 0.3821, "step": 4781 }, { "epoch": 1.596661101836394, "grad_norm": 0.8398699227255694, "learning_rate": 5.310617827872105e-06, "loss": 0.3671, "step": 4782 }, { "epoch": 1.5969949916527546, "grad_norm": 0.829689209394999, "learning_rate": 5.3086789443730345e-06, "loss": 0.3506, "step": 4783 }, { "epoch": 1.5973288814691151, "grad_norm": 0.8232405711247712, "learning_rate": 5.306740014278967e-06, "loss": 0.3595, "step": 4784 }, { "epoch": 1.5976627712854758, "grad_norm": 0.8363328669738963, "learning_rate": 5.304801037882585e-06, "loss": 0.3574, "step": 4785 }, { "epoch": 1.5979966611018364, "grad_norm": 0.8679396434363364, "learning_rate": 5.302862015476578e-06, "loss": 0.3656, "step": 4786 }, { "epoch": 1.598330550918197, "grad_norm": 0.8755120932726997, "learning_rate": 5.300922947353639e-06, "loss": 0.3745, "step": 4787 }, { "epoch": 1.5986644407345576, "grad_norm": 0.8640634158366307, "learning_rate": 5.298983833806469e-06, "loss": 0.3701, "step": 4788 }, { "epoch": 1.5989983305509181, "grad_norm": 0.8177771677111677, "learning_rate": 5.297044675127778e-06, "loss": 0.352, "step": 4789 }, { "epoch": 1.5993322203672788, "grad_norm": 0.833325841279846, "learning_rate": 5.2951054716102825e-06, "loss": 0.3555, "step": 4790 }, { "epoch": 1.5996661101836394, "grad_norm": 0.8678858180348941, "learning_rate": 5.2931662235467026e-06, "loss": 0.3816, "step": 4791 }, { "epoch": 1.6, "grad_norm": 0.8555683216478825, "learning_rate": 5.2912269312297685e-06, "loss": 0.3851, "step": 4792 }, { "epoch": 1.6003338898163606, "grad_norm": 0.8349198030900942, "learning_rate": 5.289287594952215e-06, "loss": 0.355, "step": 4793 }, { "epoch": 1.600667779632721, "grad_norm": 0.842634610446948, "learning_rate": 5.287348215006784e-06, "loss": 0.3741, "step": 4794 }, { "epoch": 1.6010016694490818, "grad_norm": 0.8309831580343346, "learning_rate": 5.285408791686224e-06, "loss": 0.3679, "step": 4795 }, { "epoch": 1.6013355592654424, "grad_norm": 0.8167292366723968, "learning_rate": 5.283469325283294e-06, "loss": 0.3802, "step": 4796 }, { "epoch": 1.601669449081803, "grad_norm": 0.8029255241156277, "learning_rate": 5.281529816090751e-06, "loss": 0.3759, "step": 4797 }, { "epoch": 1.6020033388981636, "grad_norm": 0.8261189423649998, "learning_rate": 5.279590264401367e-06, "loss": 0.378, "step": 4798 }, { "epoch": 1.602337228714524, "grad_norm": 0.83553412004015, "learning_rate": 5.277650670507916e-06, "loss": 0.3617, "step": 4799 }, { "epoch": 1.6026711185308848, "grad_norm": 0.832661408041557, "learning_rate": 5.275711034703177e-06, "loss": 0.3631, "step": 4800 }, { "epoch": 1.6030050083472456, "grad_norm": 0.8742862254007748, "learning_rate": 5.273771357279939e-06, "loss": 0.3716, "step": 4801 }, { "epoch": 1.603338898163606, "grad_norm": 0.8589756464592267, "learning_rate": 5.271831638530996e-06, "loss": 0.3689, "step": 4802 }, { "epoch": 1.6036727879799666, "grad_norm": 0.8706964253226589, "learning_rate": 5.269891878749147e-06, "loss": 0.3639, "step": 4803 }, { "epoch": 1.604006677796327, "grad_norm": 0.8472946176206526, "learning_rate": 5.2679520782272e-06, "loss": 0.3855, "step": 4804 }, { "epoch": 1.6043405676126878, "grad_norm": 0.8175575140657638, "learning_rate": 5.266012237257965e-06, "loss": 0.3815, "step": 4805 }, { "epoch": 1.6046744574290486, "grad_norm": 0.8188199452374001, "learning_rate": 5.2640723561342626e-06, "loss": 0.3528, "step": 4806 }, { "epoch": 1.605008347245409, "grad_norm": 0.9205220426612801, "learning_rate": 5.262132435148914e-06, "loss": 0.3945, "step": 4807 }, { "epoch": 1.6053422370617696, "grad_norm": 0.8726325357730149, "learning_rate": 5.260192474594754e-06, "loss": 0.3886, "step": 4808 }, { "epoch": 1.60567612687813, "grad_norm": 0.8265906206719456, "learning_rate": 5.258252474764616e-06, "loss": 0.37, "step": 4809 }, { "epoch": 1.6060100166944908, "grad_norm": 0.7766864841820094, "learning_rate": 5.256312435951344e-06, "loss": 0.3505, "step": 4810 }, { "epoch": 1.6063439065108516, "grad_norm": 0.85868881384738, "learning_rate": 5.254372358447786e-06, "loss": 0.3895, "step": 4811 }, { "epoch": 1.606677796327212, "grad_norm": 0.8759020956266392, "learning_rate": 5.252432242546794e-06, "loss": 0.3768, "step": 4812 }, { "epoch": 1.6070116861435726, "grad_norm": 0.8496840374665948, "learning_rate": 5.25049208854123e-06, "loss": 0.381, "step": 4813 }, { "epoch": 1.607345575959933, "grad_norm": 0.8754843007201013, "learning_rate": 5.2485518967239625e-06, "loss": 0.3859, "step": 4814 }, { "epoch": 1.6076794657762938, "grad_norm": 0.9436139500243994, "learning_rate": 5.2466116673878555e-06, "loss": 0.391, "step": 4815 }, { "epoch": 1.6080133555926546, "grad_norm": 0.8678177111065968, "learning_rate": 5.244671400825794e-06, "loss": 0.3657, "step": 4816 }, { "epoch": 1.608347245409015, "grad_norm": 0.8463492983759023, "learning_rate": 5.242731097330653e-06, "loss": 0.3699, "step": 4817 }, { "epoch": 1.6086811352253756, "grad_norm": 0.8076244140011215, "learning_rate": 5.240790757195328e-06, "loss": 0.3591, "step": 4818 }, { "epoch": 1.609015025041736, "grad_norm": 0.830867373482969, "learning_rate": 5.238850380712709e-06, "loss": 0.364, "step": 4819 }, { "epoch": 1.6093489148580968, "grad_norm": 0.8296934249106451, "learning_rate": 5.2369099681756945e-06, "loss": 0.3596, "step": 4820 }, { "epoch": 1.6096828046744576, "grad_norm": 0.857202880661064, "learning_rate": 5.234969519877191e-06, "loss": 0.3873, "step": 4821 }, { "epoch": 1.610016694490818, "grad_norm": 0.8629106883533055, "learning_rate": 5.233029036110108e-06, "loss": 0.383, "step": 4822 }, { "epoch": 1.6103505843071786, "grad_norm": 0.857818361035063, "learning_rate": 5.2310885171673605e-06, "loss": 0.3621, "step": 4823 }, { "epoch": 1.610684474123539, "grad_norm": 0.8773057650182201, "learning_rate": 5.229147963341871e-06, "loss": 0.3843, "step": 4824 }, { "epoch": 1.6110183639398998, "grad_norm": 0.8317783491843667, "learning_rate": 5.227207374926563e-06, "loss": 0.3583, "step": 4825 }, { "epoch": 1.6113522537562606, "grad_norm": 0.8310774008342322, "learning_rate": 5.22526675221437e-06, "loss": 0.3596, "step": 4826 }, { "epoch": 1.611686143572621, "grad_norm": 0.8106684307810604, "learning_rate": 5.223326095498226e-06, "loss": 0.3594, "step": 4827 }, { "epoch": 1.6120200333889816, "grad_norm": 0.8328265776660528, "learning_rate": 5.2213854050710745e-06, "loss": 0.3535, "step": 4828 }, { "epoch": 1.612353923205342, "grad_norm": 0.8312399124199655, "learning_rate": 5.219444681225862e-06, "loss": 0.366, "step": 4829 }, { "epoch": 1.6126878130217028, "grad_norm": 0.8307865267986433, "learning_rate": 5.2175039242555395e-06, "loss": 0.3653, "step": 4830 }, { "epoch": 1.6130217028380636, "grad_norm": 0.8655529265079591, "learning_rate": 5.215563134453064e-06, "loss": 0.3693, "step": 4831 }, { "epoch": 1.613355592654424, "grad_norm": 0.8340696237879075, "learning_rate": 5.2136223121113975e-06, "loss": 0.3687, "step": 4832 }, { "epoch": 1.6136894824707846, "grad_norm": 0.8432592370254091, "learning_rate": 5.2116814575235066e-06, "loss": 0.3666, "step": 4833 }, { "epoch": 1.614023372287145, "grad_norm": 0.8445678959873947, "learning_rate": 5.2097405709823616e-06, "loss": 0.3686, "step": 4834 }, { "epoch": 1.6143572621035058, "grad_norm": 0.7925263011175375, "learning_rate": 5.207799652780939e-06, "loss": 0.3575, "step": 4835 }, { "epoch": 1.6146911519198666, "grad_norm": 0.8474897324643095, "learning_rate": 5.205858703212221e-06, "loss": 0.3658, "step": 4836 }, { "epoch": 1.615025041736227, "grad_norm": 0.8539221522724607, "learning_rate": 5.203917722569194e-06, "loss": 0.3674, "step": 4837 }, { "epoch": 1.6153589315525876, "grad_norm": 0.8937191886635781, "learning_rate": 5.201976711144844e-06, "loss": 0.3746, "step": 4838 }, { "epoch": 1.615692821368948, "grad_norm": 0.859513037852171, "learning_rate": 5.200035669232173e-06, "loss": 0.3821, "step": 4839 }, { "epoch": 1.6160267111853088, "grad_norm": 0.7951133582119061, "learning_rate": 5.198094597124173e-06, "loss": 0.3608, "step": 4840 }, { "epoch": 1.6163606010016696, "grad_norm": 0.8268945517533635, "learning_rate": 5.196153495113855e-06, "loss": 0.3642, "step": 4841 }, { "epoch": 1.61669449081803, "grad_norm": 0.825824658349619, "learning_rate": 5.194212363494223e-06, "loss": 0.3661, "step": 4842 }, { "epoch": 1.6170283806343906, "grad_norm": 0.8007621354678469, "learning_rate": 5.192271202558294e-06, "loss": 0.3534, "step": 4843 }, { "epoch": 1.617362270450751, "grad_norm": 0.807732520614521, "learning_rate": 5.190330012599083e-06, "loss": 0.3658, "step": 4844 }, { "epoch": 1.6176961602671118, "grad_norm": 0.8333513043183854, "learning_rate": 5.188388793909613e-06, "loss": 0.3622, "step": 4845 }, { "epoch": 1.6180300500834726, "grad_norm": 0.8697377207873621, "learning_rate": 5.18644754678291e-06, "loss": 0.3624, "step": 4846 }, { "epoch": 1.618363939899833, "grad_norm": 0.8261997126720202, "learning_rate": 5.1845062715120065e-06, "loss": 0.3587, "step": 4847 }, { "epoch": 1.6186978297161936, "grad_norm": 0.8431620827866987, "learning_rate": 5.1825649683899334e-06, "loss": 0.3694, "step": 4848 }, { "epoch": 1.619031719532554, "grad_norm": 0.843455950187753, "learning_rate": 5.180623637709735e-06, "loss": 0.3729, "step": 4849 }, { "epoch": 1.6193656093489148, "grad_norm": 0.8876661887344155, "learning_rate": 5.178682279764451e-06, "loss": 0.3858, "step": 4850 }, { "epoch": 1.6196994991652756, "grad_norm": 0.9008630368697345, "learning_rate": 5.176740894847129e-06, "loss": 0.3742, "step": 4851 }, { "epoch": 1.620033388981636, "grad_norm": 0.9196886154995442, "learning_rate": 5.174799483250822e-06, "loss": 0.385, "step": 4852 }, { "epoch": 1.6203672787979966, "grad_norm": 0.8657777702088386, "learning_rate": 5.172858045268584e-06, "loss": 0.3762, "step": 4853 }, { "epoch": 1.6207011686143573, "grad_norm": 0.842902695486518, "learning_rate": 5.170916581193475e-06, "loss": 0.369, "step": 4854 }, { "epoch": 1.6210350584307178, "grad_norm": 0.8181783747538925, "learning_rate": 5.16897509131856e-06, "loss": 0.3616, "step": 4855 }, { "epoch": 1.6213689482470786, "grad_norm": 0.8291149788355224, "learning_rate": 5.167033575936904e-06, "loss": 0.3634, "step": 4856 }, { "epoch": 1.621702838063439, "grad_norm": 0.8309153931625717, "learning_rate": 5.165092035341579e-06, "loss": 0.3695, "step": 4857 }, { "epoch": 1.6220367278797996, "grad_norm": 0.8298087263413041, "learning_rate": 5.16315046982566e-06, "loss": 0.371, "step": 4858 }, { "epoch": 1.6223706176961603, "grad_norm": 0.7883115638573348, "learning_rate": 5.161208879682226e-06, "loss": 0.3461, "step": 4859 }, { "epoch": 1.6227045075125208, "grad_norm": 0.847335674972883, "learning_rate": 5.159267265204361e-06, "loss": 0.3622, "step": 4860 }, { "epoch": 1.6230383973288816, "grad_norm": 0.8567708379883844, "learning_rate": 5.1573256266851465e-06, "loss": 0.3657, "step": 4861 }, { "epoch": 1.623372287145242, "grad_norm": 0.7935369651274559, "learning_rate": 5.155383964417678e-06, "loss": 0.3431, "step": 4862 }, { "epoch": 1.6237061769616026, "grad_norm": 0.8412862303245983, "learning_rate": 5.153442278695045e-06, "loss": 0.3734, "step": 4863 }, { "epoch": 1.6240400667779633, "grad_norm": 0.8228777108888687, "learning_rate": 5.151500569810345e-06, "loss": 0.3665, "step": 4864 }, { "epoch": 1.6243739565943238, "grad_norm": 0.8866077606158662, "learning_rate": 5.14955883805668e-06, "loss": 0.3641, "step": 4865 }, { "epoch": 1.6247078464106846, "grad_norm": 0.8148466121225045, "learning_rate": 5.147617083727151e-06, "loss": 0.3636, "step": 4866 }, { "epoch": 1.625041736227045, "grad_norm": 0.8348112686665111, "learning_rate": 5.145675307114868e-06, "loss": 0.3718, "step": 4867 }, { "epoch": 1.6253756260434056, "grad_norm": 0.8350724188176432, "learning_rate": 5.143733508512941e-06, "loss": 0.3702, "step": 4868 }, { "epoch": 1.6257095158597663, "grad_norm": 0.8040803787066425, "learning_rate": 5.1417916882144806e-06, "loss": 0.3671, "step": 4869 }, { "epoch": 1.626043405676127, "grad_norm": 0.9133275348421807, "learning_rate": 5.13984984651261e-06, "loss": 0.384, "step": 4870 }, { "epoch": 1.6263772954924876, "grad_norm": 0.8142454023319023, "learning_rate": 5.137907983700444e-06, "loss": 0.3787, "step": 4871 }, { "epoch": 1.626711185308848, "grad_norm": 0.8408950878051604, "learning_rate": 5.135966100071112e-06, "loss": 0.366, "step": 4872 }, { "epoch": 1.6270450751252086, "grad_norm": 0.8245307587261237, "learning_rate": 5.134024195917734e-06, "loss": 0.3749, "step": 4873 }, { "epoch": 1.6273789649415693, "grad_norm": 0.8323144022238508, "learning_rate": 5.132082271533445e-06, "loss": 0.3603, "step": 4874 }, { "epoch": 1.62771285475793, "grad_norm": 0.8671825639984271, "learning_rate": 5.130140327211376e-06, "loss": 0.365, "step": 4875 }, { "epoch": 1.6280467445742905, "grad_norm": 0.8345542891672131, "learning_rate": 5.128198363244663e-06, "loss": 0.3795, "step": 4876 }, { "epoch": 1.628380634390651, "grad_norm": 0.843905031483186, "learning_rate": 5.1262563799264455e-06, "loss": 0.361, "step": 4877 }, { "epoch": 1.6287145242070116, "grad_norm": 0.813806229323046, "learning_rate": 5.1243143775498635e-06, "loss": 0.3697, "step": 4878 }, { "epoch": 1.6290484140233723, "grad_norm": 0.8077628577061765, "learning_rate": 5.122372356408063e-06, "loss": 0.36, "step": 4879 }, { "epoch": 1.629382303839733, "grad_norm": 0.852027665516943, "learning_rate": 5.120430316794194e-06, "loss": 0.3801, "step": 4880 }, { "epoch": 1.6297161936560935, "grad_norm": 0.8545582840178899, "learning_rate": 5.118488259001403e-06, "loss": 0.3652, "step": 4881 }, { "epoch": 1.630050083472454, "grad_norm": 0.848886014654246, "learning_rate": 5.116546183322843e-06, "loss": 0.3679, "step": 4882 }, { "epoch": 1.6303839732888146, "grad_norm": 0.8532673191532679, "learning_rate": 5.114604090051674e-06, "loss": 0.375, "step": 4883 }, { "epoch": 1.6307178631051753, "grad_norm": 0.8251166116236136, "learning_rate": 5.112661979481047e-06, "loss": 0.3547, "step": 4884 }, { "epoch": 1.631051752921536, "grad_norm": 0.8528787105538578, "learning_rate": 5.1107198519041324e-06, "loss": 0.3695, "step": 4885 }, { "epoch": 1.6313856427378965, "grad_norm": 1.518669958253283, "learning_rate": 5.108777707614085e-06, "loss": 0.3661, "step": 4886 }, { "epoch": 1.631719532554257, "grad_norm": 0.8601373948511205, "learning_rate": 5.106835546904077e-06, "loss": 0.3726, "step": 4887 }, { "epoch": 1.6320534223706176, "grad_norm": 0.820259811805761, "learning_rate": 5.104893370067274e-06, "loss": 0.3654, "step": 4888 }, { "epoch": 1.6323873121869783, "grad_norm": 0.8651146330101496, "learning_rate": 5.102951177396849e-06, "loss": 0.371, "step": 4889 }, { "epoch": 1.632721202003339, "grad_norm": 0.8346494960411639, "learning_rate": 5.1010089691859735e-06, "loss": 0.3607, "step": 4890 }, { "epoch": 1.6330550918196995, "grad_norm": 0.8869372168586424, "learning_rate": 5.099066745727824e-06, "loss": 0.3777, "step": 4891 }, { "epoch": 1.63338898163606, "grad_norm": 0.8380409683808917, "learning_rate": 5.097124507315577e-06, "loss": 0.3819, "step": 4892 }, { "epoch": 1.6337228714524206, "grad_norm": 0.8806397632373189, "learning_rate": 5.095182254242418e-06, "loss": 0.3683, "step": 4893 }, { "epoch": 1.6340567612687813, "grad_norm": 0.8334669374075899, "learning_rate": 5.0932399868015225e-06, "loss": 0.374, "step": 4894 }, { "epoch": 1.634390651085142, "grad_norm": 0.857104430855134, "learning_rate": 5.09129770528608e-06, "loss": 0.3725, "step": 4895 }, { "epoch": 1.6347245409015025, "grad_norm": 0.8433373591645837, "learning_rate": 5.089355409989275e-06, "loss": 0.3503, "step": 4896 }, { "epoch": 1.635058430717863, "grad_norm": 0.8658309746724933, "learning_rate": 5.087413101204298e-06, "loss": 0.3789, "step": 4897 }, { "epoch": 1.6353923205342236, "grad_norm": 0.838950750526386, "learning_rate": 5.08547077922434e-06, "loss": 0.3795, "step": 4898 }, { "epoch": 1.6357262103505843, "grad_norm": 0.8622356872688876, "learning_rate": 5.083528444342593e-06, "loss": 0.3484, "step": 4899 }, { "epoch": 1.636060100166945, "grad_norm": 0.9188691245602659, "learning_rate": 5.081586096852251e-06, "loss": 0.3529, "step": 4900 }, { "epoch": 1.6363939899833055, "grad_norm": 0.8441224357148613, "learning_rate": 5.0796437370465125e-06, "loss": 0.3633, "step": 4901 }, { "epoch": 1.636727879799666, "grad_norm": 0.8904542663214188, "learning_rate": 5.077701365218574e-06, "loss": 0.3648, "step": 4902 }, { "epoch": 1.6370617696160266, "grad_norm": 0.8434370045523742, "learning_rate": 5.07575898166164e-06, "loss": 0.3624, "step": 4903 }, { "epoch": 1.6373956594323873, "grad_norm": 0.8579603722234157, "learning_rate": 5.073816586668908e-06, "loss": 0.3695, "step": 4904 }, { "epoch": 1.637729549248748, "grad_norm": 0.8514122804019335, "learning_rate": 5.071874180533585e-06, "loss": 0.3812, "step": 4905 }, { "epoch": 1.6380634390651085, "grad_norm": 0.8663476049758349, "learning_rate": 5.069931763548875e-06, "loss": 0.3748, "step": 4906 }, { "epoch": 1.638397328881469, "grad_norm": 0.8545151182394541, "learning_rate": 5.0679893360079865e-06, "loss": 0.3754, "step": 4907 }, { "epoch": 1.6387312186978296, "grad_norm": 0.8261840071342469, "learning_rate": 5.066046898204129e-06, "loss": 0.374, "step": 4908 }, { "epoch": 1.6390651085141903, "grad_norm": 0.8262449628218808, "learning_rate": 5.064104450430511e-06, "loss": 0.365, "step": 4909 }, { "epoch": 1.639398998330551, "grad_norm": 0.8016828439834943, "learning_rate": 5.062161992980345e-06, "loss": 0.3591, "step": 4910 }, { "epoch": 1.6397328881469115, "grad_norm": 0.8343433938962515, "learning_rate": 5.060219526146844e-06, "loss": 0.3659, "step": 4911 }, { "epoch": 1.640066777963272, "grad_norm": 0.8218319290557085, "learning_rate": 5.0582770502232236e-06, "loss": 0.3616, "step": 4912 }, { "epoch": 1.6404006677796326, "grad_norm": 0.8625601260332859, "learning_rate": 5.056334565502701e-06, "loss": 0.3662, "step": 4913 }, { "epoch": 1.6407345575959933, "grad_norm": 0.8319427701894473, "learning_rate": 5.054392072278491e-06, "loss": 0.3608, "step": 4914 }, { "epoch": 1.641068447412354, "grad_norm": 0.8598927448720335, "learning_rate": 5.0524495708438135e-06, "loss": 0.3608, "step": 4915 }, { "epoch": 1.6414023372287145, "grad_norm": 0.8205016287872425, "learning_rate": 5.05050706149189e-06, "loss": 0.3683, "step": 4916 }, { "epoch": 1.641736227045075, "grad_norm": 0.8040030433338281, "learning_rate": 5.048564544515939e-06, "loss": 0.3608, "step": 4917 }, { "epoch": 1.6420701168614358, "grad_norm": 0.8541526500018054, "learning_rate": 5.0466220202091866e-06, "loss": 0.3762, "step": 4918 }, { "epoch": 1.6424040066777963, "grad_norm": 0.8596074424712867, "learning_rate": 5.044679488864852e-06, "loss": 0.365, "step": 4919 }, { "epoch": 1.642737896494157, "grad_norm": 0.8198296483808423, "learning_rate": 5.042736950776162e-06, "loss": 0.3545, "step": 4920 }, { "epoch": 1.6430717863105175, "grad_norm": 0.8194274948716783, "learning_rate": 5.040794406236344e-06, "loss": 0.3504, "step": 4921 }, { "epoch": 1.643405676126878, "grad_norm": 0.8135998394151756, "learning_rate": 5.03885185553862e-06, "loss": 0.3603, "step": 4922 }, { "epoch": 1.6437395659432388, "grad_norm": 0.7887053712873404, "learning_rate": 5.036909298976221e-06, "loss": 0.3566, "step": 4923 }, { "epoch": 1.6440734557595993, "grad_norm": 0.8565876637492129, "learning_rate": 5.034966736842374e-06, "loss": 0.3885, "step": 4924 }, { "epoch": 1.64440734557596, "grad_norm": 0.8261636325317759, "learning_rate": 5.033024169430307e-06, "loss": 0.3663, "step": 4925 }, { "epoch": 1.6447412353923205, "grad_norm": 0.7868178976536163, "learning_rate": 5.0310815970332546e-06, "loss": 0.368, "step": 4926 }, { "epoch": 1.645075125208681, "grad_norm": 0.8172182363344174, "learning_rate": 5.029139019944443e-06, "loss": 0.3613, "step": 4927 }, { "epoch": 1.6454090150250418, "grad_norm": 0.8075879481483511, "learning_rate": 5.027196438457104e-06, "loss": 0.3469, "step": 4928 }, { "epoch": 1.6457429048414023, "grad_norm": 0.8590100316445424, "learning_rate": 5.0252538528644715e-06, "loss": 0.384, "step": 4929 }, { "epoch": 1.646076794657763, "grad_norm": 0.8476956876457243, "learning_rate": 5.023311263459777e-06, "loss": 0.3754, "step": 4930 }, { "epoch": 1.6464106844741235, "grad_norm": 0.7901464126267812, "learning_rate": 5.021368670536254e-06, "loss": 0.347, "step": 4931 }, { "epoch": 1.646744574290484, "grad_norm": 0.8837190281781069, "learning_rate": 5.019426074387137e-06, "loss": 0.3655, "step": 4932 }, { "epoch": 1.6470784641068448, "grad_norm": 0.8145266129536676, "learning_rate": 5.0174834753056604e-06, "loss": 0.3637, "step": 4933 }, { "epoch": 1.6474123539232055, "grad_norm": 0.8381076656478599, "learning_rate": 5.015540873585057e-06, "loss": 0.3635, "step": 4934 }, { "epoch": 1.647746243739566, "grad_norm": 0.8284571754709285, "learning_rate": 5.013598269518562e-06, "loss": 0.358, "step": 4935 }, { "epoch": 1.6480801335559265, "grad_norm": 0.9048956727384538, "learning_rate": 5.011655663399416e-06, "loss": 0.3865, "step": 4936 }, { "epoch": 1.648414023372287, "grad_norm": 0.8353143923875466, "learning_rate": 5.009713055520848e-06, "loss": 0.3515, "step": 4937 }, { "epoch": 1.6487479131886478, "grad_norm": 0.8278267955078684, "learning_rate": 5.007770446176099e-06, "loss": 0.3521, "step": 4938 }, { "epoch": 1.6490818030050085, "grad_norm": 0.8157722167147483, "learning_rate": 5.005827835658402e-06, "loss": 0.363, "step": 4939 }, { "epoch": 1.649415692821369, "grad_norm": 0.8226077369380588, "learning_rate": 5.003885224260997e-06, "loss": 0.3574, "step": 4940 }, { "epoch": 1.6497495826377295, "grad_norm": 0.8440550567144807, "learning_rate": 5.001942612277117e-06, "loss": 0.3607, "step": 4941 }, { "epoch": 1.65008347245409, "grad_norm": 0.8100344100206879, "learning_rate": 5e-06, "loss": 0.3598, "step": 4942 }, { "epoch": 1.6504173622704508, "grad_norm": 0.8656764655433411, "learning_rate": 4.998057387722884e-06, "loss": 0.3646, "step": 4943 }, { "epoch": 1.6507512520868115, "grad_norm": 0.9038200542970058, "learning_rate": 4.996114775739006e-06, "loss": 0.3854, "step": 4944 }, { "epoch": 1.651085141903172, "grad_norm": 0.8248390977579931, "learning_rate": 4.994172164341597e-06, "loss": 0.3733, "step": 4945 }, { "epoch": 1.6514190317195325, "grad_norm": 0.8256639186037504, "learning_rate": 4.992229553823902e-06, "loss": 0.3621, "step": 4946 }, { "epoch": 1.651752921535893, "grad_norm": 0.7767210436091394, "learning_rate": 4.990286944479153e-06, "loss": 0.3426, "step": 4947 }, { "epoch": 1.6520868113522538, "grad_norm": 0.8471735134052545, "learning_rate": 4.988344336600586e-06, "loss": 0.3703, "step": 4948 }, { "epoch": 1.6524207011686145, "grad_norm": 0.8186435075149142, "learning_rate": 4.986401730481438e-06, "loss": 0.3513, "step": 4949 }, { "epoch": 1.652754590984975, "grad_norm": 0.8244935448535715, "learning_rate": 4.984459126414944e-06, "loss": 0.3653, "step": 4950 }, { "epoch": 1.6530884808013355, "grad_norm": 0.8436646932238253, "learning_rate": 4.982516524694342e-06, "loss": 0.358, "step": 4951 }, { "epoch": 1.653422370617696, "grad_norm": 0.8479857335263005, "learning_rate": 4.980573925612865e-06, "loss": 0.3755, "step": 4952 }, { "epoch": 1.6537562604340568, "grad_norm": 0.8452036179397793, "learning_rate": 4.978631329463746e-06, "loss": 0.3592, "step": 4953 }, { "epoch": 1.6540901502504175, "grad_norm": 0.8161028464091397, "learning_rate": 4.976688736540225e-06, "loss": 0.3595, "step": 4954 }, { "epoch": 1.654424040066778, "grad_norm": 0.8355254268921103, "learning_rate": 4.974746147135531e-06, "loss": 0.3623, "step": 4955 }, { "epoch": 1.6547579298831385, "grad_norm": 0.8476393975874061, "learning_rate": 4.972803561542898e-06, "loss": 0.3682, "step": 4956 }, { "epoch": 1.655091819699499, "grad_norm": 0.8184737233684668, "learning_rate": 4.970860980055558e-06, "loss": 0.3618, "step": 4957 }, { "epoch": 1.6554257095158598, "grad_norm": 0.8495151967967788, "learning_rate": 4.968918402966746e-06, "loss": 0.3847, "step": 4958 }, { "epoch": 1.6557595993322205, "grad_norm": 0.8261302004026215, "learning_rate": 4.966975830569694e-06, "loss": 0.3512, "step": 4959 }, { "epoch": 1.656093489148581, "grad_norm": 0.8533464095464358, "learning_rate": 4.965033263157628e-06, "loss": 0.3862, "step": 4960 }, { "epoch": 1.6564273789649415, "grad_norm": 0.8488312603928495, "learning_rate": 4.9630907010237805e-06, "loss": 0.376, "step": 4961 }, { "epoch": 1.656761268781302, "grad_norm": 0.8141553539489573, "learning_rate": 4.961148144461381e-06, "loss": 0.3548, "step": 4962 }, { "epoch": 1.6570951585976628, "grad_norm": 0.8082803942226707, "learning_rate": 4.959205593763659e-06, "loss": 0.357, "step": 4963 }, { "epoch": 1.6574290484140235, "grad_norm": 0.8247022754937015, "learning_rate": 4.957263049223839e-06, "loss": 0.3648, "step": 4964 }, { "epoch": 1.657762938230384, "grad_norm": 0.8193822277766386, "learning_rate": 4.9553205111351485e-06, "loss": 0.369, "step": 4965 }, { "epoch": 1.6580968280467445, "grad_norm": 0.823887231739732, "learning_rate": 4.953377979790815e-06, "loss": 0.3621, "step": 4966 }, { "epoch": 1.658430717863105, "grad_norm": 0.8720852265503193, "learning_rate": 4.9514354554840625e-06, "loss": 0.3787, "step": 4967 }, { "epoch": 1.6587646076794658, "grad_norm": 0.8265986641243749, "learning_rate": 4.949492938508112e-06, "loss": 0.3533, "step": 4968 }, { "epoch": 1.6590984974958265, "grad_norm": 0.8521178209565349, "learning_rate": 4.947550429156187e-06, "loss": 0.3726, "step": 4969 }, { "epoch": 1.659432387312187, "grad_norm": 0.8084409552735783, "learning_rate": 4.9456079277215105e-06, "loss": 0.3619, "step": 4970 }, { "epoch": 1.6597662771285475, "grad_norm": 0.8130465078261078, "learning_rate": 4.943665434497301e-06, "loss": 0.3463, "step": 4971 }, { "epoch": 1.660100166944908, "grad_norm": 0.8417687369551693, "learning_rate": 4.941722949776778e-06, "loss": 0.3637, "step": 4972 }, { "epoch": 1.6604340567612688, "grad_norm": 0.8208021191239563, "learning_rate": 4.939780473853158e-06, "loss": 0.3603, "step": 4973 }, { "epoch": 1.6607679465776295, "grad_norm": 0.8334138203804675, "learning_rate": 4.937838007019657e-06, "loss": 0.3722, "step": 4974 }, { "epoch": 1.66110183639399, "grad_norm": 0.8148544696748345, "learning_rate": 4.93589554956949e-06, "loss": 0.3656, "step": 4975 }, { "epoch": 1.6614357262103505, "grad_norm": 0.8240968922452412, "learning_rate": 4.933953101795873e-06, "loss": 0.3763, "step": 4976 }, { "epoch": 1.661769616026711, "grad_norm": 0.8272984668602724, "learning_rate": 4.932010663992015e-06, "loss": 0.3454, "step": 4977 }, { "epoch": 1.6621035058430718, "grad_norm": 0.8141735007631529, "learning_rate": 4.930068236451125e-06, "loss": 0.3557, "step": 4978 }, { "epoch": 1.6624373956594325, "grad_norm": 0.8223159355418511, "learning_rate": 4.928125819466417e-06, "loss": 0.3568, "step": 4979 }, { "epoch": 1.662771285475793, "grad_norm": 0.8865753267124565, "learning_rate": 4.9261834133310936e-06, "loss": 0.3858, "step": 4980 }, { "epoch": 1.6631051752921535, "grad_norm": 0.8248196616602507, "learning_rate": 4.924241018338362e-06, "loss": 0.355, "step": 4981 }, { "epoch": 1.663439065108514, "grad_norm": 0.8494188560340998, "learning_rate": 4.922298634781426e-06, "loss": 0.3573, "step": 4982 }, { "epoch": 1.6637729549248748, "grad_norm": 0.8712037446125709, "learning_rate": 4.920356262953489e-06, "loss": 0.3704, "step": 4983 }, { "epoch": 1.6641068447412355, "grad_norm": 0.8345859487298971, "learning_rate": 4.918413903147751e-06, "loss": 0.3624, "step": 4984 }, { "epoch": 1.664440734557596, "grad_norm": 0.8869743370103433, "learning_rate": 4.91647155565741e-06, "loss": 0.3819, "step": 4985 }, { "epoch": 1.6647746243739565, "grad_norm": 0.8652097160354599, "learning_rate": 4.914529220775661e-06, "loss": 0.3662, "step": 4986 }, { "epoch": 1.6651085141903172, "grad_norm": 0.8642564307641153, "learning_rate": 4.9125868987957035e-06, "loss": 0.3738, "step": 4987 }, { "epoch": 1.6654424040066778, "grad_norm": 0.8586025260773672, "learning_rate": 4.910644590010726e-06, "loss": 0.3774, "step": 4988 }, { "epoch": 1.6657762938230385, "grad_norm": 0.7919699387453666, "learning_rate": 4.908702294713922e-06, "loss": 0.3493, "step": 4989 }, { "epoch": 1.666110183639399, "grad_norm": 0.8319343710728205, "learning_rate": 4.906760013198478e-06, "loss": 0.3694, "step": 4990 }, { "epoch": 1.6664440734557595, "grad_norm": 0.8593407365093187, "learning_rate": 4.904817745757584e-06, "loss": 0.3795, "step": 4991 }, { "epoch": 1.6667779632721202, "grad_norm": 0.8446389467291071, "learning_rate": 4.902875492684424e-06, "loss": 0.3723, "step": 4992 }, { "epoch": 1.6671118530884808, "grad_norm": 0.8441745696046902, "learning_rate": 4.9009332542721785e-06, "loss": 0.3625, "step": 4993 }, { "epoch": 1.6674457429048415, "grad_norm": 0.8593293167186676, "learning_rate": 4.898991030814028e-06, "loss": 0.3742, "step": 4994 }, { "epoch": 1.667779632721202, "grad_norm": 0.8163653927278032, "learning_rate": 4.897048822603153e-06, "loss": 0.3524, "step": 4995 }, { "epoch": 1.6681135225375625, "grad_norm": 0.8565636877628114, "learning_rate": 4.8951066299327274e-06, "loss": 0.3633, "step": 4996 }, { "epoch": 1.6684474123539232, "grad_norm": 0.8204026571764718, "learning_rate": 4.8931644530959245e-06, "loss": 0.3679, "step": 4997 }, { "epoch": 1.6687813021702838, "grad_norm": 0.8248193617220293, "learning_rate": 4.891222292385915e-06, "loss": 0.3602, "step": 4998 }, { "epoch": 1.6691151919866445, "grad_norm": 0.8358339236451555, "learning_rate": 4.889280148095869e-06, "loss": 0.3703, "step": 4999 }, { "epoch": 1.669449081803005, "grad_norm": 0.8813819302814989, "learning_rate": 4.887338020518953e-06, "loss": 0.3839, "step": 5000 }, { "epoch": 1.6697829716193655, "grad_norm": 0.8652583978559514, "learning_rate": 4.88539590994833e-06, "loss": 0.3699, "step": 5001 }, { "epoch": 1.6701168614357262, "grad_norm": 0.856130992672627, "learning_rate": 4.883453816677158e-06, "loss": 0.3741, "step": 5002 }, { "epoch": 1.670450751252087, "grad_norm": 0.848638243616635, "learning_rate": 4.881511740998599e-06, "loss": 0.3734, "step": 5003 }, { "epoch": 1.6707846410684475, "grad_norm": 0.8618597025566784, "learning_rate": 4.879569683205808e-06, "loss": 0.3828, "step": 5004 }, { "epoch": 1.671118530884808, "grad_norm": 0.8044229066262293, "learning_rate": 4.877627643591938e-06, "loss": 0.364, "step": 5005 }, { "epoch": 1.6714524207011685, "grad_norm": 0.8227903677337837, "learning_rate": 4.875685622450139e-06, "loss": 0.3646, "step": 5006 }, { "epoch": 1.6717863105175292, "grad_norm": 0.819279710480042, "learning_rate": 4.873743620073556e-06, "loss": 0.3502, "step": 5007 }, { "epoch": 1.67212020033389, "grad_norm": 0.8457138049162214, "learning_rate": 4.871801636755338e-06, "loss": 0.3754, "step": 5008 }, { "epoch": 1.6724540901502505, "grad_norm": 0.8159007564583156, "learning_rate": 4.869859672788626e-06, "loss": 0.3553, "step": 5009 }, { "epoch": 1.672787979966611, "grad_norm": 0.8664715709212104, "learning_rate": 4.867917728466556e-06, "loss": 0.3563, "step": 5010 }, { "epoch": 1.6731218697829715, "grad_norm": 0.8523739937677386, "learning_rate": 4.865975804082266e-06, "loss": 0.3679, "step": 5011 }, { "epoch": 1.6734557595993322, "grad_norm": 0.8188532448129069, "learning_rate": 4.86403389992889e-06, "loss": 0.3626, "step": 5012 }, { "epoch": 1.673789649415693, "grad_norm": 0.8208184385911473, "learning_rate": 4.862092016299557e-06, "loss": 0.3554, "step": 5013 }, { "epoch": 1.6741235392320535, "grad_norm": 0.8159068553312032, "learning_rate": 4.8601501534873925e-06, "loss": 0.358, "step": 5014 }, { "epoch": 1.674457429048414, "grad_norm": 0.8608858694096122, "learning_rate": 4.858208311785519e-06, "loss": 0.3666, "step": 5015 }, { "epoch": 1.6747913188647745, "grad_norm": 0.8319668511135356, "learning_rate": 4.856266491487061e-06, "loss": 0.3554, "step": 5016 }, { "epoch": 1.6751252086811352, "grad_norm": 0.8638045613639033, "learning_rate": 4.854324692885134e-06, "loss": 0.3925, "step": 5017 }, { "epoch": 1.675459098497496, "grad_norm": 0.8402018774090297, "learning_rate": 4.852382916272851e-06, "loss": 0.366, "step": 5018 }, { "epoch": 1.6757929883138565, "grad_norm": 0.8607465006059608, "learning_rate": 4.850441161943321e-06, "loss": 0.3923, "step": 5019 }, { "epoch": 1.676126878130217, "grad_norm": 0.8439594512147476, "learning_rate": 4.848499430189657e-06, "loss": 0.3819, "step": 5020 }, { "epoch": 1.6764607679465775, "grad_norm": 0.8406646447696602, "learning_rate": 4.846557721304957e-06, "loss": 0.3719, "step": 5021 }, { "epoch": 1.6767946577629382, "grad_norm": 0.8097885974034065, "learning_rate": 4.844616035582324e-06, "loss": 0.3562, "step": 5022 }, { "epoch": 1.677128547579299, "grad_norm": 0.8296137019417364, "learning_rate": 4.842674373314853e-06, "loss": 0.367, "step": 5023 }, { "epoch": 1.6774624373956595, "grad_norm": 0.7815541668297832, "learning_rate": 4.84073273479564e-06, "loss": 0.349, "step": 5024 }, { "epoch": 1.67779632721202, "grad_norm": 0.8259129006835219, "learning_rate": 4.838791120317775e-06, "loss": 0.3602, "step": 5025 }, { "epoch": 1.6781302170283805, "grad_norm": 0.8246342441796811, "learning_rate": 4.836849530174342e-06, "loss": 0.3654, "step": 5026 }, { "epoch": 1.6784641068447412, "grad_norm": 0.8239698202843148, "learning_rate": 4.834907964658422e-06, "loss": 0.3626, "step": 5027 }, { "epoch": 1.678797996661102, "grad_norm": 0.8925134595509594, "learning_rate": 4.8329664240630975e-06, "loss": 0.3847, "step": 5028 }, { "epoch": 1.6791318864774625, "grad_norm": 0.8291206297382246, "learning_rate": 4.831024908681442e-06, "loss": 0.3612, "step": 5029 }, { "epoch": 1.679465776293823, "grad_norm": 0.8016205298132668, "learning_rate": 4.829083418806526e-06, "loss": 0.353, "step": 5030 }, { "epoch": 1.6797996661101835, "grad_norm": 0.8453229119072551, "learning_rate": 4.827141954731417e-06, "loss": 0.375, "step": 5031 }, { "epoch": 1.6801335559265442, "grad_norm": 0.8746299012033395, "learning_rate": 4.825200516749179e-06, "loss": 0.3771, "step": 5032 }, { "epoch": 1.680467445742905, "grad_norm": 0.864736140483068, "learning_rate": 4.823259105152874e-06, "loss": 0.3519, "step": 5033 }, { "epoch": 1.6808013355592655, "grad_norm": 0.8642770301126106, "learning_rate": 4.821317720235551e-06, "loss": 0.3806, "step": 5034 }, { "epoch": 1.681135225375626, "grad_norm": 0.8520391940361279, "learning_rate": 4.819376362290268e-06, "loss": 0.3773, "step": 5035 }, { "epoch": 1.6814691151919865, "grad_norm": 0.830280122666306, "learning_rate": 4.8174350316100665e-06, "loss": 0.3757, "step": 5036 }, { "epoch": 1.6818030050083472, "grad_norm": 0.8167409165569786, "learning_rate": 4.815493728487995e-06, "loss": 0.3483, "step": 5037 }, { "epoch": 1.682136894824708, "grad_norm": 0.8779312132186801, "learning_rate": 4.813552453217092e-06, "loss": 0.383, "step": 5038 }, { "epoch": 1.6824707846410685, "grad_norm": 0.8091333540659589, "learning_rate": 4.8116112060903894e-06, "loss": 0.3489, "step": 5039 }, { "epoch": 1.682804674457429, "grad_norm": 0.808452719375461, "learning_rate": 4.809669987400918e-06, "loss": 0.3481, "step": 5040 }, { "epoch": 1.6831385642737895, "grad_norm": 0.8374447963176206, "learning_rate": 4.807728797441707e-06, "loss": 0.3725, "step": 5041 }, { "epoch": 1.6834724540901502, "grad_norm": 0.8192513621718864, "learning_rate": 4.805787636505778e-06, "loss": 0.3581, "step": 5042 }, { "epoch": 1.683806343906511, "grad_norm": 0.9078973733369831, "learning_rate": 4.803846504886147e-06, "loss": 0.3812, "step": 5043 }, { "epoch": 1.6841402337228715, "grad_norm": 0.7986700384321094, "learning_rate": 4.8019054028758274e-06, "loss": 0.3454, "step": 5044 }, { "epoch": 1.684474123539232, "grad_norm": 0.852156675768388, "learning_rate": 4.799964330767829e-06, "loss": 0.3631, "step": 5045 }, { "epoch": 1.6848080133555925, "grad_norm": 0.8397426608226151, "learning_rate": 4.798023288855157e-06, "loss": 0.351, "step": 5046 }, { "epoch": 1.6851419031719532, "grad_norm": 0.9222048117523457, "learning_rate": 4.796082277430809e-06, "loss": 0.3789, "step": 5047 }, { "epoch": 1.685475792988314, "grad_norm": 0.7839043249985541, "learning_rate": 4.79414129678778e-06, "loss": 0.3497, "step": 5048 }, { "epoch": 1.6858096828046745, "grad_norm": 0.841487354172595, "learning_rate": 4.7922003472190615e-06, "loss": 0.3664, "step": 5049 }, { "epoch": 1.686143572621035, "grad_norm": 0.8140678965813253, "learning_rate": 4.790259429017639e-06, "loss": 0.3658, "step": 5050 }, { "epoch": 1.6864774624373957, "grad_norm": 0.8250114057467892, "learning_rate": 4.788318542476496e-06, "loss": 0.3651, "step": 5051 }, { "epoch": 1.6868113522537562, "grad_norm": 0.8426683282793316, "learning_rate": 4.7863776878886024e-06, "loss": 0.3681, "step": 5052 }, { "epoch": 1.687145242070117, "grad_norm": 0.7900426844509609, "learning_rate": 4.784436865546937e-06, "loss": 0.3528, "step": 5053 }, { "epoch": 1.6874791318864775, "grad_norm": 0.8224567732666027, "learning_rate": 4.782496075744462e-06, "loss": 0.3712, "step": 5054 }, { "epoch": 1.687813021702838, "grad_norm": 0.8472371748540259, "learning_rate": 4.78055531877414e-06, "loss": 0.3749, "step": 5055 }, { "epoch": 1.6881469115191987, "grad_norm": 0.8270515509297995, "learning_rate": 4.778614594928926e-06, "loss": 0.3399, "step": 5056 }, { "epoch": 1.6884808013355592, "grad_norm": 0.8291113519247449, "learning_rate": 4.776673904501775e-06, "loss": 0.375, "step": 5057 }, { "epoch": 1.68881469115192, "grad_norm": 0.8129740112989613, "learning_rate": 4.774733247785632e-06, "loss": 0.3679, "step": 5058 }, { "epoch": 1.6891485809682805, "grad_norm": 0.817176964045583, "learning_rate": 4.7727926250734396e-06, "loss": 0.3546, "step": 5059 }, { "epoch": 1.689482470784641, "grad_norm": 0.8678369615570495, "learning_rate": 4.77085203665813e-06, "loss": 0.3673, "step": 5060 }, { "epoch": 1.6898163606010017, "grad_norm": 0.8025642063942361, "learning_rate": 4.76891148283264e-06, "loss": 0.3497, "step": 5061 }, { "epoch": 1.6901502504173622, "grad_norm": 0.8585689035870321, "learning_rate": 4.766970963889894e-06, "loss": 0.3765, "step": 5062 }, { "epoch": 1.690484140233723, "grad_norm": 0.8371959408386547, "learning_rate": 4.76503048012281e-06, "loss": 0.3585, "step": 5063 }, { "epoch": 1.6908180300500835, "grad_norm": 0.8288295437342798, "learning_rate": 4.763090031824306e-06, "loss": 0.3622, "step": 5064 }, { "epoch": 1.691151919866444, "grad_norm": 0.8311444733363044, "learning_rate": 4.761149619287292e-06, "loss": 0.3611, "step": 5065 }, { "epoch": 1.6914858096828047, "grad_norm": 0.828714078069269, "learning_rate": 4.759209242804674e-06, "loss": 0.3611, "step": 5066 }, { "epoch": 1.6918196994991654, "grad_norm": 0.8170920799167433, "learning_rate": 4.757268902669348e-06, "loss": 0.3712, "step": 5067 }, { "epoch": 1.692153589315526, "grad_norm": 0.8667230351299027, "learning_rate": 4.7553285991742085e-06, "loss": 0.3695, "step": 5068 }, { "epoch": 1.6924874791318865, "grad_norm": 0.8197377443404248, "learning_rate": 4.7533883326121445e-06, "loss": 0.353, "step": 5069 }, { "epoch": 1.692821368948247, "grad_norm": 0.8007100560872069, "learning_rate": 4.751448103276039e-06, "loss": 0.3505, "step": 5070 }, { "epoch": 1.6931552587646077, "grad_norm": 0.8578360411614776, "learning_rate": 4.749507911458771e-06, "loss": 0.3822, "step": 5071 }, { "epoch": 1.6934891485809684, "grad_norm": 0.8137488002412732, "learning_rate": 4.7475677574532074e-06, "loss": 0.3618, "step": 5072 }, { "epoch": 1.693823038397329, "grad_norm": 0.8251277007079508, "learning_rate": 4.745627641552216e-06, "loss": 0.3608, "step": 5073 }, { "epoch": 1.6941569282136895, "grad_norm": 0.8187317865437518, "learning_rate": 4.743687564048657e-06, "loss": 0.3632, "step": 5074 }, { "epoch": 1.69449081803005, "grad_norm": 0.8366530486673542, "learning_rate": 4.741747525235385e-06, "loss": 0.3672, "step": 5075 }, { "epoch": 1.6948247078464107, "grad_norm": 0.8400075104404288, "learning_rate": 4.739807525405248e-06, "loss": 0.3705, "step": 5076 }, { "epoch": 1.6951585976627714, "grad_norm": 0.8504387991649781, "learning_rate": 4.737867564851086e-06, "loss": 0.3737, "step": 5077 }, { "epoch": 1.695492487479132, "grad_norm": 0.8437564359011326, "learning_rate": 4.735927643865738e-06, "loss": 0.3702, "step": 5078 }, { "epoch": 1.6958263772954925, "grad_norm": 0.9156593385680168, "learning_rate": 4.7339877627420375e-06, "loss": 0.371, "step": 5079 }, { "epoch": 1.696160267111853, "grad_norm": 0.8624793124444722, "learning_rate": 4.732047921772803e-06, "loss": 0.372, "step": 5080 }, { "epoch": 1.6964941569282137, "grad_norm": 0.8426586501699824, "learning_rate": 4.730108121250854e-06, "loss": 0.3812, "step": 5081 }, { "epoch": 1.6968280467445744, "grad_norm": 0.8387425960911483, "learning_rate": 4.728168361469005e-06, "loss": 0.3623, "step": 5082 }, { "epoch": 1.697161936560935, "grad_norm": 0.8358931626469136, "learning_rate": 4.726228642720063e-06, "loss": 0.3681, "step": 5083 }, { "epoch": 1.6974958263772955, "grad_norm": 0.8528076404233177, "learning_rate": 4.724288965296826e-06, "loss": 0.3679, "step": 5084 }, { "epoch": 1.697829716193656, "grad_norm": 0.8619384594315543, "learning_rate": 4.722349329492085e-06, "loss": 0.3755, "step": 5085 }, { "epoch": 1.6981636060100167, "grad_norm": 0.8140696516956203, "learning_rate": 4.720409735598635e-06, "loss": 0.3529, "step": 5086 }, { "epoch": 1.6984974958263774, "grad_norm": 0.8546270110095853, "learning_rate": 4.71847018390925e-06, "loss": 0.3682, "step": 5087 }, { "epoch": 1.698831385642738, "grad_norm": 0.8686875566804657, "learning_rate": 4.716530674716708e-06, "loss": 0.3846, "step": 5088 }, { "epoch": 1.6991652754590985, "grad_norm": 0.825763479761658, "learning_rate": 4.714591208313776e-06, "loss": 0.3692, "step": 5089 }, { "epoch": 1.699499165275459, "grad_norm": 0.8329129184962153, "learning_rate": 4.7126517849932175e-06, "loss": 0.3696, "step": 5090 }, { "epoch": 1.6998330550918197, "grad_norm": 0.8472030427386045, "learning_rate": 4.710712405047787e-06, "loss": 0.3768, "step": 5091 }, { "epoch": 1.7001669449081804, "grad_norm": 0.8197126500739307, "learning_rate": 4.708773068770234e-06, "loss": 0.3663, "step": 5092 }, { "epoch": 1.700500834724541, "grad_norm": 0.837378111563886, "learning_rate": 4.706833776453298e-06, "loss": 0.3606, "step": 5093 }, { "epoch": 1.7008347245409015, "grad_norm": 0.8001647101303432, "learning_rate": 4.704894528389719e-06, "loss": 0.3575, "step": 5094 }, { "epoch": 1.701168614357262, "grad_norm": 0.8300449079326353, "learning_rate": 4.702955324872223e-06, "loss": 0.3522, "step": 5095 }, { "epoch": 1.7015025041736227, "grad_norm": 0.8168208131995319, "learning_rate": 4.701016166193533e-06, "loss": 0.3508, "step": 5096 }, { "epoch": 1.7018363939899834, "grad_norm": 0.8355847371017023, "learning_rate": 4.699077052646364e-06, "loss": 0.3688, "step": 5097 }, { "epoch": 1.702170283806344, "grad_norm": 0.8041158747212038, "learning_rate": 4.697137984523422e-06, "loss": 0.3566, "step": 5098 }, { "epoch": 1.7025041736227045, "grad_norm": 0.819618169045727, "learning_rate": 4.695198962117416e-06, "loss": 0.356, "step": 5099 }, { "epoch": 1.702838063439065, "grad_norm": 0.8375973426433833, "learning_rate": 4.693259985721035e-06, "loss": 0.3672, "step": 5100 }, { "epoch": 1.7031719532554257, "grad_norm": 0.8174243832732584, "learning_rate": 4.691321055626968e-06, "loss": 0.3766, "step": 5101 }, { "epoch": 1.7035058430717864, "grad_norm": 0.8599016680686723, "learning_rate": 4.689382172127896e-06, "loss": 0.3811, "step": 5102 }, { "epoch": 1.703839732888147, "grad_norm": 0.9279533557261785, "learning_rate": 4.687443335516493e-06, "loss": 0.385, "step": 5103 }, { "epoch": 1.7041736227045075, "grad_norm": 0.8385641904729851, "learning_rate": 4.685504546085426e-06, "loss": 0.3649, "step": 5104 }, { "epoch": 1.704507512520868, "grad_norm": 0.83072126431368, "learning_rate": 4.683565804127357e-06, "loss": 0.37, "step": 5105 }, { "epoch": 1.7048414023372287, "grad_norm": 0.8367249619085589, "learning_rate": 4.681627109934932e-06, "loss": 0.3584, "step": 5106 }, { "epoch": 1.7051752921535894, "grad_norm": 0.7982750074589402, "learning_rate": 4.679688463800804e-06, "loss": 0.3524, "step": 5107 }, { "epoch": 1.70550918196995, "grad_norm": 0.8171138762294455, "learning_rate": 4.677749866017607e-06, "loss": 0.3644, "step": 5108 }, { "epoch": 1.7058430717863104, "grad_norm": 0.8915606186296338, "learning_rate": 4.675811316877972e-06, "loss": 0.3616, "step": 5109 }, { "epoch": 1.706176961602671, "grad_norm": 0.8720020122759814, "learning_rate": 4.673872816674522e-06, "loss": 0.3543, "step": 5110 }, { "epoch": 1.7065108514190317, "grad_norm": 0.8581640151368288, "learning_rate": 4.671934365699875e-06, "loss": 0.3551, "step": 5111 }, { "epoch": 1.7068447412353924, "grad_norm": 0.8006704588972517, "learning_rate": 4.6699959642466386e-06, "loss": 0.351, "step": 5112 }, { "epoch": 1.707178631051753, "grad_norm": 0.7885308629405903, "learning_rate": 4.668057612607413e-06, "loss": 0.3649, "step": 5113 }, { "epoch": 1.7075125208681134, "grad_norm": 0.8214451229115709, "learning_rate": 4.666119311074792e-06, "loss": 0.3502, "step": 5114 }, { "epoch": 1.707846410684474, "grad_norm": 0.8077705419078743, "learning_rate": 4.664181059941363e-06, "loss": 0.3602, "step": 5115 }, { "epoch": 1.7081803005008347, "grad_norm": 0.8426472467086255, "learning_rate": 4.6622428594997034e-06, "loss": 0.3561, "step": 5116 }, { "epoch": 1.7085141903171954, "grad_norm": 0.8623370349330006, "learning_rate": 4.660304710042385e-06, "loss": 0.3624, "step": 5117 }, { "epoch": 1.708848080133556, "grad_norm": 0.8566649377524486, "learning_rate": 4.658366611861967e-06, "loss": 0.3717, "step": 5118 }, { "epoch": 1.7091819699499164, "grad_norm": 0.8434853319301764, "learning_rate": 4.656428565251012e-06, "loss": 0.3675, "step": 5119 }, { "epoch": 1.7095158597662772, "grad_norm": 0.8471535431828383, "learning_rate": 4.654490570502061e-06, "loss": 0.3616, "step": 5120 }, { "epoch": 1.7098497495826377, "grad_norm": 0.8472351289612946, "learning_rate": 4.652552627907657e-06, "loss": 0.366, "step": 5121 }, { "epoch": 1.7101836393989984, "grad_norm": 0.8404625767580742, "learning_rate": 4.65061473776033e-06, "loss": 0.374, "step": 5122 }, { "epoch": 1.710517529215359, "grad_norm": 0.846149414276694, "learning_rate": 4.648676900352606e-06, "loss": 0.3616, "step": 5123 }, { "epoch": 1.7108514190317194, "grad_norm": 0.8650974606169785, "learning_rate": 4.646739115977e-06, "loss": 0.3803, "step": 5124 }, { "epoch": 1.7111853088480802, "grad_norm": 0.8545248834713705, "learning_rate": 4.6448013849260195e-06, "loss": 0.3701, "step": 5125 }, { "epoch": 1.7115191986644407, "grad_norm": 0.8166294629949481, "learning_rate": 4.642863707492163e-06, "loss": 0.3619, "step": 5126 }, { "epoch": 1.7118530884808014, "grad_norm": 0.8342267091957137, "learning_rate": 4.640926083967927e-06, "loss": 0.3767, "step": 5127 }, { "epoch": 1.712186978297162, "grad_norm": 0.8292201275285388, "learning_rate": 4.638988514645791e-06, "loss": 0.3424, "step": 5128 }, { "epoch": 1.7125208681135224, "grad_norm": 0.7833848688683744, "learning_rate": 4.637050999818233e-06, "loss": 0.346, "step": 5129 }, { "epoch": 1.7128547579298832, "grad_norm": 0.8720520248038115, "learning_rate": 4.635113539777719e-06, "loss": 0.3684, "step": 5130 }, { "epoch": 1.7131886477462437, "grad_norm": 0.8600840889421961, "learning_rate": 4.633176134816706e-06, "loss": 0.379, "step": 5131 }, { "epoch": 1.7135225375626044, "grad_norm": 0.87418551859426, "learning_rate": 4.63123878522765e-06, "loss": 0.3861, "step": 5132 }, { "epoch": 1.713856427378965, "grad_norm": 0.7861053470473252, "learning_rate": 4.629301491302988e-06, "loss": 0.3498, "step": 5133 }, { "epoch": 1.7141903171953254, "grad_norm": 0.8157337121977356, "learning_rate": 4.627364253335157e-06, "loss": 0.3539, "step": 5134 }, { "epoch": 1.7145242070116862, "grad_norm": 0.8299751543749194, "learning_rate": 4.625427071616581e-06, "loss": 0.3702, "step": 5135 }, { "epoch": 1.714858096828047, "grad_norm": 0.803912649105203, "learning_rate": 4.623489946439678e-06, "loss": 0.3384, "step": 5136 }, { "epoch": 1.7151919866444074, "grad_norm": 0.825948724419121, "learning_rate": 4.621552878096857e-06, "loss": 0.3611, "step": 5137 }, { "epoch": 1.715525876460768, "grad_norm": 0.8122862732774327, "learning_rate": 4.6196158668805165e-06, "loss": 0.3467, "step": 5138 }, { "epoch": 1.7158597662771284, "grad_norm": 0.8366511943382423, "learning_rate": 4.617678913083047e-06, "loss": 0.3744, "step": 5139 }, { "epoch": 1.7161936560934892, "grad_norm": 0.8234347914737968, "learning_rate": 4.615742016996835e-06, "loss": 0.3581, "step": 5140 }, { "epoch": 1.71652754590985, "grad_norm": 0.8570850968101998, "learning_rate": 4.61380517891425e-06, "loss": 0.3804, "step": 5141 }, { "epoch": 1.7168614357262104, "grad_norm": 0.8200235006022403, "learning_rate": 4.61186839912766e-06, "loss": 0.3524, "step": 5142 }, { "epoch": 1.717195325542571, "grad_norm": 0.8865111196955301, "learning_rate": 4.609931677929418e-06, "loss": 0.3663, "step": 5143 }, { "epoch": 1.7175292153589314, "grad_norm": 0.8728324160097769, "learning_rate": 4.607995015611876e-06, "loss": 0.3675, "step": 5144 }, { "epoch": 1.7178631051752922, "grad_norm": 0.8493227823009363, "learning_rate": 4.606058412467371e-06, "loss": 0.3614, "step": 5145 }, { "epoch": 1.718196994991653, "grad_norm": 0.8196057694691836, "learning_rate": 4.604121868788232e-06, "loss": 0.3629, "step": 5146 }, { "epoch": 1.7185308848080134, "grad_norm": 0.7968627153733574, "learning_rate": 4.602185384866777e-06, "loss": 0.3466, "step": 5147 }, { "epoch": 1.718864774624374, "grad_norm": 0.8104526795971757, "learning_rate": 4.600248960995322e-06, "loss": 0.3574, "step": 5148 }, { "epoch": 1.7191986644407344, "grad_norm": 0.841765643898728, "learning_rate": 4.598312597466169e-06, "loss": 0.3709, "step": 5149 }, { "epoch": 1.7195325542570952, "grad_norm": 0.8240411468423731, "learning_rate": 4.5963762945716095e-06, "loss": 0.3671, "step": 5150 }, { "epoch": 1.719866444073456, "grad_norm": 0.8391913037683816, "learning_rate": 4.594440052603928e-06, "loss": 0.3624, "step": 5151 }, { "epoch": 1.7202003338898164, "grad_norm": 0.8299597051031344, "learning_rate": 4.592503871855402e-06, "loss": 0.3714, "step": 5152 }, { "epoch": 1.720534223706177, "grad_norm": 0.8455275199139178, "learning_rate": 4.590567752618296e-06, "loss": 0.3644, "step": 5153 }, { "epoch": 1.7208681135225374, "grad_norm": 0.8018948627710544, "learning_rate": 4.588631695184867e-06, "loss": 0.3409, "step": 5154 }, { "epoch": 1.7212020033388982, "grad_norm": 0.8318871641569087, "learning_rate": 4.58669569984736e-06, "loss": 0.3675, "step": 5155 }, { "epoch": 1.721535893155259, "grad_norm": 0.8381205478798033, "learning_rate": 4.5847597668980155e-06, "loss": 0.3493, "step": 5156 }, { "epoch": 1.7218697829716194, "grad_norm": 0.8047139750515294, "learning_rate": 4.582823896629062e-06, "loss": 0.3549, "step": 5157 }, { "epoch": 1.72220367278798, "grad_norm": 0.8392563633215503, "learning_rate": 4.580888089332719e-06, "loss": 0.3716, "step": 5158 }, { "epoch": 1.7225375626043404, "grad_norm": 0.8265174802195686, "learning_rate": 4.578952345301194e-06, "loss": 0.3533, "step": 5159 }, { "epoch": 1.7228714524207012, "grad_norm": 0.8282304570179608, "learning_rate": 4.577016664826686e-06, "loss": 0.3628, "step": 5160 }, { "epoch": 1.723205342237062, "grad_norm": 0.8121738425627295, "learning_rate": 4.575081048201388e-06, "loss": 0.3569, "step": 5161 }, { "epoch": 1.7235392320534224, "grad_norm": 0.833609928833467, "learning_rate": 4.573145495717482e-06, "loss": 0.3556, "step": 5162 }, { "epoch": 1.723873121869783, "grad_norm": 0.8174472424900046, "learning_rate": 4.571210007667135e-06, "loss": 0.3551, "step": 5163 }, { "epoch": 1.7242070116861434, "grad_norm": 0.8594062556848462, "learning_rate": 4.56927458434251e-06, "loss": 0.3605, "step": 5164 }, { "epoch": 1.7245409015025042, "grad_norm": 0.8391592681244218, "learning_rate": 4.567339226035761e-06, "loss": 0.3589, "step": 5165 }, { "epoch": 1.724874791318865, "grad_norm": 0.8185289174508174, "learning_rate": 4.565403933039028e-06, "loss": 0.3572, "step": 5166 }, { "epoch": 1.7252086811352254, "grad_norm": 0.8683066865049017, "learning_rate": 4.563468705644441e-06, "loss": 0.3643, "step": 5167 }, { "epoch": 1.725542570951586, "grad_norm": 0.8250449847958179, "learning_rate": 4.5615335441441235e-06, "loss": 0.3674, "step": 5168 }, { "epoch": 1.7258764607679464, "grad_norm": 0.8458519983382715, "learning_rate": 4.559598448830189e-06, "loss": 0.3687, "step": 5169 }, { "epoch": 1.7262103505843072, "grad_norm": 0.8165293483463401, "learning_rate": 4.557663419994738e-06, "loss": 0.3541, "step": 5170 }, { "epoch": 1.726544240400668, "grad_norm": 0.8584864915488122, "learning_rate": 4.555728457929864e-06, "loss": 0.3548, "step": 5171 }, { "epoch": 1.7268781302170284, "grad_norm": 0.8481022928306267, "learning_rate": 4.553793562927644e-06, "loss": 0.3939, "step": 5172 }, { "epoch": 1.727212020033389, "grad_norm": 0.8294495777826917, "learning_rate": 4.551858735280157e-06, "loss": 0.3662, "step": 5173 }, { "epoch": 1.7275459098497494, "grad_norm": 0.8126345103225611, "learning_rate": 4.549923975279461e-06, "loss": 0.3609, "step": 5174 }, { "epoch": 1.7278797996661102, "grad_norm": 0.8196265518155186, "learning_rate": 4.5479892832176075e-06, "loss": 0.3656, "step": 5175 }, { "epoch": 1.728213689482471, "grad_norm": 0.8667864275147268, "learning_rate": 4.546054659386637e-06, "loss": 0.3699, "step": 5176 }, { "epoch": 1.7285475792988314, "grad_norm": 0.8421374235587512, "learning_rate": 4.544120104078581e-06, "loss": 0.3708, "step": 5177 }, { "epoch": 1.728881469115192, "grad_norm": 0.8527171737129287, "learning_rate": 4.5421856175854625e-06, "loss": 0.3693, "step": 5178 }, { "epoch": 1.7292153589315524, "grad_norm": 0.8958501153054519, "learning_rate": 4.5402512001992895e-06, "loss": 0.3965, "step": 5179 }, { "epoch": 1.7295492487479132, "grad_norm": 0.8315266872693963, "learning_rate": 4.538316852212059e-06, "loss": 0.3598, "step": 5180 }, { "epoch": 1.729883138564274, "grad_norm": 0.8364406071715508, "learning_rate": 4.536382573915765e-06, "loss": 0.3543, "step": 5181 }, { "epoch": 1.7302170283806344, "grad_norm": 0.8235313745095434, "learning_rate": 4.534448365602384e-06, "loss": 0.3518, "step": 5182 }, { "epoch": 1.730550918196995, "grad_norm": 0.8369702863714138, "learning_rate": 4.5325142275638845e-06, "loss": 0.3658, "step": 5183 }, { "epoch": 1.7308848080133556, "grad_norm": 0.8754252440826181, "learning_rate": 4.530580160092223e-06, "loss": 0.3674, "step": 5184 }, { "epoch": 1.7312186978297162, "grad_norm": 0.844224026678017, "learning_rate": 4.528646163479349e-06, "loss": 0.3638, "step": 5185 }, { "epoch": 1.731552587646077, "grad_norm": 0.8488486066530925, "learning_rate": 4.526712238017197e-06, "loss": 0.3837, "step": 5186 }, { "epoch": 1.7318864774624374, "grad_norm": 0.8200974581118766, "learning_rate": 4.5247783839976925e-06, "loss": 0.3549, "step": 5187 }, { "epoch": 1.732220367278798, "grad_norm": 0.8219354532153366, "learning_rate": 4.522844601712749e-06, "loss": 0.3665, "step": 5188 }, { "epoch": 1.7325542570951586, "grad_norm": 0.8825289564714754, "learning_rate": 4.520910891454272e-06, "loss": 0.3794, "step": 5189 }, { "epoch": 1.7328881469115192, "grad_norm": 0.8420002293631231, "learning_rate": 4.518977253514156e-06, "loss": 0.3662, "step": 5190 }, { "epoch": 1.7332220367278799, "grad_norm": 0.8401065276570243, "learning_rate": 4.51704368818428e-06, "loss": 0.3742, "step": 5191 }, { "epoch": 1.7335559265442404, "grad_norm": 0.835913879804604, "learning_rate": 4.5151101957565165e-06, "loss": 0.3723, "step": 5192 }, { "epoch": 1.733889816360601, "grad_norm": 0.801823799430202, "learning_rate": 4.513176776522724e-06, "loss": 0.3636, "step": 5193 }, { "epoch": 1.7342237061769616, "grad_norm": 0.8740105060694361, "learning_rate": 4.511243430774754e-06, "loss": 0.3705, "step": 5194 }, { "epoch": 1.7345575959933222, "grad_norm": 0.8323267257370318, "learning_rate": 4.509310158804444e-06, "loss": 0.3702, "step": 5195 }, { "epoch": 1.7348914858096829, "grad_norm": 0.8109866455374036, "learning_rate": 4.50737696090362e-06, "loss": 0.3434, "step": 5196 }, { "epoch": 1.7352253756260434, "grad_norm": 0.8044195094194084, "learning_rate": 4.505443837364098e-06, "loss": 0.3513, "step": 5197 }, { "epoch": 1.735559265442404, "grad_norm": 0.8216782659398284, "learning_rate": 4.503510788477682e-06, "loss": 0.3592, "step": 5198 }, { "epoch": 1.7358931552587646, "grad_norm": 0.8097476823202788, "learning_rate": 4.501577814536166e-06, "loss": 0.3556, "step": 5199 }, { "epoch": 1.7362270450751254, "grad_norm": 0.8505587159349987, "learning_rate": 4.499644915831332e-06, "loss": 0.3639, "step": 5200 }, { "epoch": 1.7365609348914859, "grad_norm": 0.8217240927899006, "learning_rate": 4.497712092654948e-06, "loss": 0.364, "step": 5201 }, { "epoch": 1.7368948247078464, "grad_norm": 0.819481477843947, "learning_rate": 4.495779345298777e-06, "loss": 0.3653, "step": 5202 }, { "epoch": 1.737228714524207, "grad_norm": 0.8328572048240377, "learning_rate": 4.493846674054564e-06, "loss": 0.3677, "step": 5203 }, { "epoch": 1.7375626043405676, "grad_norm": 0.8348322883845412, "learning_rate": 4.491914079214046e-06, "loss": 0.3681, "step": 5204 }, { "epoch": 1.7378964941569284, "grad_norm": 0.8924605095244256, "learning_rate": 4.489981561068945e-06, "loss": 0.3893, "step": 5205 }, { "epoch": 1.7382303839732889, "grad_norm": 0.8294559282157058, "learning_rate": 4.48804911991098e-06, "loss": 0.3597, "step": 5206 }, { "epoch": 1.7385642737896494, "grad_norm": 0.8498278815042397, "learning_rate": 4.486116756031847e-06, "loss": 0.3652, "step": 5207 }, { "epoch": 1.73889816360601, "grad_norm": 0.8524676270746291, "learning_rate": 4.484184469723238e-06, "loss": 0.3536, "step": 5208 }, { "epoch": 1.7392320534223706, "grad_norm": 0.8838092872924582, "learning_rate": 4.48225226127683e-06, "loss": 0.3741, "step": 5209 }, { "epoch": 1.7395659432387314, "grad_norm": 0.8205738921965781, "learning_rate": 4.48032013098429e-06, "loss": 0.3603, "step": 5210 }, { "epoch": 1.7398998330550919, "grad_norm": 0.8527321444087071, "learning_rate": 4.478388079137273e-06, "loss": 0.3593, "step": 5211 }, { "epoch": 1.7402337228714524, "grad_norm": 0.8635475005984952, "learning_rate": 4.476456106027422e-06, "loss": 0.3762, "step": 5212 }, { "epoch": 1.740567612687813, "grad_norm": 0.8163788308185235, "learning_rate": 4.474524211946364e-06, "loss": 0.3625, "step": 5213 }, { "epoch": 1.7409015025041736, "grad_norm": 0.8058345120843834, "learning_rate": 4.4725923971857234e-06, "loss": 0.3475, "step": 5214 }, { "epoch": 1.7412353923205344, "grad_norm": 0.8478665841339514, "learning_rate": 4.4706606620371035e-06, "loss": 0.3699, "step": 5215 }, { "epoch": 1.7415692821368949, "grad_norm": 0.8749719578559391, "learning_rate": 4.4687290067921e-06, "loss": 0.3798, "step": 5216 }, { "epoch": 1.7419031719532554, "grad_norm": 0.8180962361228327, "learning_rate": 4.466797431742295e-06, "loss": 0.358, "step": 5217 }, { "epoch": 1.742237061769616, "grad_norm": 0.861241542980661, "learning_rate": 4.46486593717926e-06, "loss": 0.3786, "step": 5218 }, { "epoch": 1.7425709515859766, "grad_norm": 0.871621369951994, "learning_rate": 4.462934523394556e-06, "loss": 0.377, "step": 5219 }, { "epoch": 1.7429048414023374, "grad_norm": 0.833165564740338, "learning_rate": 4.4610031906797245e-06, "loss": 0.3715, "step": 5220 }, { "epoch": 1.7432387312186979, "grad_norm": 0.8181139155733862, "learning_rate": 4.459071939326302e-06, "loss": 0.3654, "step": 5221 }, { "epoch": 1.7435726210350584, "grad_norm": 0.8263669457715125, "learning_rate": 4.457140769625809e-06, "loss": 0.3639, "step": 5222 }, { "epoch": 1.743906510851419, "grad_norm": 0.8277804655610369, "learning_rate": 4.455209681869758e-06, "loss": 0.3471, "step": 5223 }, { "epoch": 1.7442404006677796, "grad_norm": 0.8692086711588207, "learning_rate": 4.4532786763496445e-06, "loss": 0.3781, "step": 5224 }, { "epoch": 1.7445742904841404, "grad_norm": 0.8552464965671694, "learning_rate": 4.451347753356952e-06, "loss": 0.3691, "step": 5225 }, { "epoch": 1.7449081803005009, "grad_norm": 0.8174513298882218, "learning_rate": 4.449416913183153e-06, "loss": 0.3665, "step": 5226 }, { "epoch": 1.7452420701168614, "grad_norm": 0.8422622547397652, "learning_rate": 4.44748615611971e-06, "loss": 0.353, "step": 5227 }, { "epoch": 1.745575959933222, "grad_norm": 0.811922121098496, "learning_rate": 4.445555482458067e-06, "loss": 0.3603, "step": 5228 }, { "epoch": 1.7459098497495826, "grad_norm": 0.8629466502662372, "learning_rate": 4.443624892489661e-06, "loss": 0.3688, "step": 5229 }, { "epoch": 1.7462437395659434, "grad_norm": 0.8424980119948483, "learning_rate": 4.441694386505911e-06, "loss": 0.3742, "step": 5230 }, { "epoch": 1.7465776293823039, "grad_norm": 0.8160811290079278, "learning_rate": 4.439763964798228e-06, "loss": 0.3518, "step": 5231 }, { "epoch": 1.7469115191986644, "grad_norm": 0.7969239259401402, "learning_rate": 4.437833627658011e-06, "loss": 0.3544, "step": 5232 }, { "epoch": 1.747245409015025, "grad_norm": 0.8464830650233565, "learning_rate": 4.43590337537664e-06, "loss": 0.3635, "step": 5233 }, { "epoch": 1.7475792988313856, "grad_norm": 0.8350182875108959, "learning_rate": 4.4339732082454875e-06, "loss": 0.3657, "step": 5234 }, { "epoch": 1.7479131886477464, "grad_norm": 0.8089042047748952, "learning_rate": 4.432043126555912e-06, "loss": 0.343, "step": 5235 }, { "epoch": 1.7482470784641069, "grad_norm": 0.8385085518977731, "learning_rate": 4.430113130599259e-06, "loss": 0.3716, "step": 5236 }, { "epoch": 1.7485809682804674, "grad_norm": 0.8172269195601712, "learning_rate": 4.4281832206668605e-06, "loss": 0.3701, "step": 5237 }, { "epoch": 1.748914858096828, "grad_norm": 0.7660552473958899, "learning_rate": 4.426253397050033e-06, "loss": 0.3418, "step": 5238 }, { "epoch": 1.7492487479131886, "grad_norm": 0.8122847203341456, "learning_rate": 4.424323660040089e-06, "loss": 0.3507, "step": 5239 }, { "epoch": 1.7495826377295494, "grad_norm": 0.81324004631439, "learning_rate": 4.422394009928317e-06, "loss": 0.3737, "step": 5240 }, { "epoch": 1.7499165275459099, "grad_norm": 0.8477306980455267, "learning_rate": 4.420464447005998e-06, "loss": 0.3685, "step": 5241 }, { "epoch": 1.7502504173622704, "grad_norm": 0.8079418655430027, "learning_rate": 4.418534971564397e-06, "loss": 0.3477, "step": 5242 }, { "epoch": 1.750584307178631, "grad_norm": 0.9002625735982693, "learning_rate": 4.416605583894772e-06, "loss": 0.3628, "step": 5243 }, { "epoch": 1.7509181969949916, "grad_norm": 0.8661027345746574, "learning_rate": 4.4146762842883616e-06, "loss": 0.3592, "step": 5244 }, { "epoch": 1.7512520868113524, "grad_norm": 0.8160717060977347, "learning_rate": 4.4127470730363946e-06, "loss": 0.3562, "step": 5245 }, { "epoch": 1.7515859766277129, "grad_norm": 0.8082938968555029, "learning_rate": 4.410817950430079e-06, "loss": 0.3558, "step": 5246 }, { "epoch": 1.7519198664440734, "grad_norm": 0.8586426697984909, "learning_rate": 4.408888916760622e-06, "loss": 0.3819, "step": 5247 }, { "epoch": 1.752253756260434, "grad_norm": 0.8564123351636318, "learning_rate": 4.406959972319209e-06, "loss": 0.3797, "step": 5248 }, { "epoch": 1.7525876460767946, "grad_norm": 0.9056012300425004, "learning_rate": 4.405031117397011e-06, "loss": 0.3726, "step": 5249 }, { "epoch": 1.7529215358931554, "grad_norm": 0.8126718037940599, "learning_rate": 4.403102352285189e-06, "loss": 0.3492, "step": 5250 }, { "epoch": 1.7532554257095159, "grad_norm": 0.8234122111871665, "learning_rate": 4.401173677274892e-06, "loss": 0.3618, "step": 5251 }, { "epoch": 1.7535893155258764, "grad_norm": 0.8726096919613281, "learning_rate": 4.399245092657252e-06, "loss": 0.378, "step": 5252 }, { "epoch": 1.7539232053422371, "grad_norm": 0.8402538995550808, "learning_rate": 4.397316598723385e-06, "loss": 0.3622, "step": 5253 }, { "epoch": 1.7542570951585976, "grad_norm": 0.8391488728459496, "learning_rate": 4.3953881957644014e-06, "loss": 0.3645, "step": 5254 }, { "epoch": 1.7545909849749584, "grad_norm": 0.8360590143059211, "learning_rate": 4.393459884071388e-06, "loss": 0.3683, "step": 5255 }, { "epoch": 1.7549248747913189, "grad_norm": 0.8475437930745455, "learning_rate": 4.391531663935428e-06, "loss": 0.3597, "step": 5256 }, { "epoch": 1.7552587646076794, "grad_norm": 0.8676164366011083, "learning_rate": 4.389603535647582e-06, "loss": 0.3774, "step": 5257 }, { "epoch": 1.7555926544240401, "grad_norm": 0.8059471489314588, "learning_rate": 4.387675499498904e-06, "loss": 0.3537, "step": 5258 }, { "epoch": 1.7559265442404006, "grad_norm": 0.8396305981211171, "learning_rate": 4.385747555780423e-06, "loss": 0.3517, "step": 5259 }, { "epoch": 1.7562604340567614, "grad_norm": 0.8145240683401095, "learning_rate": 4.3838197047831715e-06, "loss": 0.3668, "step": 5260 }, { "epoch": 1.7565943238731219, "grad_norm": 0.8243627635567632, "learning_rate": 4.381891946798152e-06, "loss": 0.3466, "step": 5261 }, { "epoch": 1.7569282136894824, "grad_norm": 0.8330885746751898, "learning_rate": 4.37996428211636e-06, "loss": 0.3759, "step": 5262 }, { "epoch": 1.757262103505843, "grad_norm": 0.7809205593365494, "learning_rate": 4.378036711028775e-06, "loss": 0.3448, "step": 5263 }, { "epoch": 1.7575959933222036, "grad_norm": 0.8366819526110404, "learning_rate": 4.376109233826365e-06, "loss": 0.3569, "step": 5264 }, { "epoch": 1.7579298831385644, "grad_norm": 0.8040859728217691, "learning_rate": 4.374181850800083e-06, "loss": 0.3546, "step": 5265 }, { "epoch": 1.7582637729549249, "grad_norm": 0.8102759017088004, "learning_rate": 4.372254562240864e-06, "loss": 0.3388, "step": 5266 }, { "epoch": 1.7585976627712854, "grad_norm": 1.0675549449428805, "learning_rate": 4.370327368439633e-06, "loss": 0.367, "step": 5267 }, { "epoch": 1.758931552587646, "grad_norm": 0.8295451365984505, "learning_rate": 4.3684002696873e-06, "loss": 0.3633, "step": 5268 }, { "epoch": 1.7592654424040068, "grad_norm": 0.8382699226989095, "learning_rate": 4.366473266274759e-06, "loss": 0.3726, "step": 5269 }, { "epoch": 1.7595993322203674, "grad_norm": 0.8285019285450067, "learning_rate": 4.364546358492892e-06, "loss": 0.3585, "step": 5270 }, { "epoch": 1.7599332220367279, "grad_norm": 0.8247453537357342, "learning_rate": 4.362619546632561e-06, "loss": 0.3529, "step": 5271 }, { "epoch": 1.7602671118530884, "grad_norm": 0.8591610605064361, "learning_rate": 4.360692830984624e-06, "loss": 0.3626, "step": 5272 }, { "epoch": 1.760601001669449, "grad_norm": 0.8382024417479536, "learning_rate": 4.3587662118399145e-06, "loss": 0.3699, "step": 5273 }, { "epoch": 1.7609348914858098, "grad_norm": 0.81994776388511, "learning_rate": 4.356839689489255e-06, "loss": 0.3564, "step": 5274 }, { "epoch": 1.7612687813021703, "grad_norm": 0.806923605022444, "learning_rate": 4.3549132642234535e-06, "loss": 0.3614, "step": 5275 }, { "epoch": 1.7616026711185309, "grad_norm": 0.8230466287868925, "learning_rate": 4.352986936333305e-06, "loss": 0.3671, "step": 5276 }, { "epoch": 1.7619365609348914, "grad_norm": 0.8055322673068349, "learning_rate": 4.351060706109586e-06, "loss": 0.3505, "step": 5277 }, { "epoch": 1.762270450751252, "grad_norm": 0.8484538104369105, "learning_rate": 4.349134573843063e-06, "loss": 0.3749, "step": 5278 }, { "epoch": 1.7626043405676128, "grad_norm": 0.8437583167420776, "learning_rate": 4.34720853982448e-06, "loss": 0.375, "step": 5279 }, { "epoch": 1.7629382303839733, "grad_norm": 0.8318724202532485, "learning_rate": 4.345282604344579e-06, "loss": 0.3607, "step": 5280 }, { "epoch": 1.7632721202003339, "grad_norm": 0.8120171179335607, "learning_rate": 4.3433567676940725e-06, "loss": 0.3529, "step": 5281 }, { "epoch": 1.7636060100166944, "grad_norm": 0.8411428954278904, "learning_rate": 4.341431030163668e-06, "loss": 0.3618, "step": 5282 }, { "epoch": 1.763939899833055, "grad_norm": 0.8448872342923456, "learning_rate": 4.339505392044056e-06, "loss": 0.3579, "step": 5283 }, { "epoch": 1.7642737896494158, "grad_norm": 0.8052259766254823, "learning_rate": 4.337579853625906e-06, "loss": 0.3584, "step": 5284 }, { "epoch": 1.7646076794657763, "grad_norm": 0.8466832872236475, "learning_rate": 4.3356544151998856e-06, "loss": 0.3646, "step": 5285 }, { "epoch": 1.7649415692821369, "grad_norm": 0.8190211345508966, "learning_rate": 4.333729077056632e-06, "loss": 0.3572, "step": 5286 }, { "epoch": 1.7652754590984974, "grad_norm": 0.820457043184686, "learning_rate": 4.331803839486778e-06, "loss": 0.3681, "step": 5287 }, { "epoch": 1.765609348914858, "grad_norm": 0.8678814023220636, "learning_rate": 4.3298787027809354e-06, "loss": 0.3712, "step": 5288 }, { "epoch": 1.7659432387312188, "grad_norm": 0.8409996332493214, "learning_rate": 4.327953667229705e-06, "loss": 0.3714, "step": 5289 }, { "epoch": 1.7662771285475793, "grad_norm": 0.8552675706180386, "learning_rate": 4.326028733123669e-06, "loss": 0.3797, "step": 5290 }, { "epoch": 1.7666110183639399, "grad_norm": 0.8369960386285439, "learning_rate": 4.324103900753399e-06, "loss": 0.3672, "step": 5291 }, { "epoch": 1.7669449081803004, "grad_norm": 0.820301526546641, "learning_rate": 4.322179170409441e-06, "loss": 0.3418, "step": 5292 }, { "epoch": 1.767278797996661, "grad_norm": 0.8203344984770051, "learning_rate": 4.320254542382341e-06, "loss": 0.3722, "step": 5293 }, { "epoch": 1.7676126878130218, "grad_norm": 0.8164486149901454, "learning_rate": 4.318330016962614e-06, "loss": 0.3489, "step": 5294 }, { "epoch": 1.7679465776293823, "grad_norm": 0.8531386332003701, "learning_rate": 4.316405594440769e-06, "loss": 0.3663, "step": 5295 }, { "epoch": 1.7682804674457429, "grad_norm": 0.7812980987067437, "learning_rate": 4.314481275107298e-06, "loss": 0.3397, "step": 5296 }, { "epoch": 1.7686143572621034, "grad_norm": 0.8183919640797415, "learning_rate": 4.312557059252675e-06, "loss": 0.3502, "step": 5297 }, { "epoch": 1.768948247078464, "grad_norm": 0.810344175361874, "learning_rate": 4.310632947167363e-06, "loss": 0.3659, "step": 5298 }, { "epoch": 1.7692821368948248, "grad_norm": 0.8085833696512188, "learning_rate": 4.308708939141801e-06, "loss": 0.344, "step": 5299 }, { "epoch": 1.7696160267111853, "grad_norm": 0.8227091319912443, "learning_rate": 4.30678503546642e-06, "loss": 0.3591, "step": 5300 }, { "epoch": 1.7699499165275459, "grad_norm": 0.8187487709296116, "learning_rate": 4.304861236431634e-06, "loss": 0.3603, "step": 5301 }, { "epoch": 1.7702838063439064, "grad_norm": 0.8392265880126929, "learning_rate": 4.302937542327838e-06, "loss": 0.3612, "step": 5302 }, { "epoch": 1.770617696160267, "grad_norm": 0.8336017464275374, "learning_rate": 4.301013953445414e-06, "loss": 0.3618, "step": 5303 }, { "epoch": 1.7709515859766278, "grad_norm": 0.828787534611039, "learning_rate": 4.299090470074726e-06, "loss": 0.3719, "step": 5304 }, { "epoch": 1.7712854757929883, "grad_norm": 0.8336640331317637, "learning_rate": 4.297167092506125e-06, "loss": 0.3591, "step": 5305 }, { "epoch": 1.7716193656093489, "grad_norm": 0.879590604793455, "learning_rate": 4.295243821029944e-06, "loss": 0.3776, "step": 5306 }, { "epoch": 1.7719532554257094, "grad_norm": 0.8200701083796806, "learning_rate": 4.293320655936499e-06, "loss": 0.3578, "step": 5307 }, { "epoch": 1.77228714524207, "grad_norm": 0.8242260115224451, "learning_rate": 4.291397597516091e-06, "loss": 0.3485, "step": 5308 }, { "epoch": 1.7726210350584308, "grad_norm": 0.8071259727835967, "learning_rate": 4.289474646059007e-06, "loss": 0.3566, "step": 5309 }, { "epoch": 1.7729549248747913, "grad_norm": 0.8154270834247624, "learning_rate": 4.287551801855514e-06, "loss": 0.3646, "step": 5310 }, { "epoch": 1.7732888146911518, "grad_norm": 0.7991316421562071, "learning_rate": 4.285629065195868e-06, "loss": 0.3625, "step": 5311 }, { "epoch": 1.7736227045075124, "grad_norm": 0.8667093045864978, "learning_rate": 4.283706436370299e-06, "loss": 0.376, "step": 5312 }, { "epoch": 1.773956594323873, "grad_norm": 0.8532804801368675, "learning_rate": 4.281783915669035e-06, "loss": 0.3818, "step": 5313 }, { "epoch": 1.7742904841402338, "grad_norm": 0.7992732205436825, "learning_rate": 4.279861503382275e-06, "loss": 0.363, "step": 5314 }, { "epoch": 1.7746243739565943, "grad_norm": 0.7875745081874655, "learning_rate": 4.277939199800209e-06, "loss": 0.3465, "step": 5315 }, { "epoch": 1.7749582637729548, "grad_norm": 0.8248251903361853, "learning_rate": 4.276017005213008e-06, "loss": 0.3582, "step": 5316 }, { "epoch": 1.7752921535893156, "grad_norm": 0.8411053914467003, "learning_rate": 4.2740949199108215e-06, "loss": 0.3557, "step": 5317 }, { "epoch": 1.775626043405676, "grad_norm": 0.8347583976125161, "learning_rate": 4.2721729441837965e-06, "loss": 0.351, "step": 5318 }, { "epoch": 1.7759599332220368, "grad_norm": 0.7831737026041709, "learning_rate": 4.270251078322048e-06, "loss": 0.3424, "step": 5319 }, { "epoch": 1.7762938230383973, "grad_norm": 0.8325561955613071, "learning_rate": 4.268329322615684e-06, "loss": 0.3527, "step": 5320 }, { "epoch": 1.7766277128547578, "grad_norm": 0.8732157798510899, "learning_rate": 4.266407677354791e-06, "loss": 0.3641, "step": 5321 }, { "epoch": 1.7769616026711186, "grad_norm": 0.807587857115205, "learning_rate": 4.264486142829443e-06, "loss": 0.3462, "step": 5322 }, { "epoch": 1.777295492487479, "grad_norm": 0.8701357570950253, "learning_rate": 4.262564719329694e-06, "loss": 0.3835, "step": 5323 }, { "epoch": 1.7776293823038398, "grad_norm": 0.8697017346686915, "learning_rate": 4.260643407145583e-06, "loss": 0.3708, "step": 5324 }, { "epoch": 1.7779632721202003, "grad_norm": 0.815319107605148, "learning_rate": 4.258722206567128e-06, "loss": 0.3613, "step": 5325 }, { "epoch": 1.7782971619365608, "grad_norm": 0.8204549114587928, "learning_rate": 4.256801117884341e-06, "loss": 0.35, "step": 5326 }, { "epoch": 1.7786310517529216, "grad_norm": 0.7995730479761738, "learning_rate": 4.254880141387203e-06, "loss": 0.3543, "step": 5327 }, { "epoch": 1.778964941569282, "grad_norm": 0.8134491656201497, "learning_rate": 4.252959277365687e-06, "loss": 0.3515, "step": 5328 }, { "epoch": 1.7792988313856428, "grad_norm": 0.8328973655821612, "learning_rate": 4.2510385261097465e-06, "loss": 0.3606, "step": 5329 }, { "epoch": 1.7796327212020033, "grad_norm": 0.8278210490446104, "learning_rate": 4.249117887909319e-06, "loss": 0.3687, "step": 5330 }, { "epoch": 1.7799666110183638, "grad_norm": 0.8738178779413514, "learning_rate": 4.247197363054326e-06, "loss": 0.3494, "step": 5331 }, { "epoch": 1.7803005008347246, "grad_norm": 0.8624701964832947, "learning_rate": 4.245276951834668e-06, "loss": 0.3678, "step": 5332 }, { "epoch": 1.7806343906510853, "grad_norm": 0.8386455393500971, "learning_rate": 4.243356654540228e-06, "loss": 0.3496, "step": 5333 }, { "epoch": 1.7809682804674458, "grad_norm": 0.854806670945387, "learning_rate": 4.2414364714608795e-06, "loss": 0.3761, "step": 5334 }, { "epoch": 1.7813021702838063, "grad_norm": 0.7820855235381142, "learning_rate": 4.23951640288647e-06, "loss": 0.349, "step": 5335 }, { "epoch": 1.7816360601001668, "grad_norm": 0.8168859526977447, "learning_rate": 4.237596449106835e-06, "loss": 0.353, "step": 5336 }, { "epoch": 1.7819699499165276, "grad_norm": 0.8164350514668589, "learning_rate": 4.23567661041179e-06, "loss": 0.3448, "step": 5337 }, { "epoch": 1.7823038397328883, "grad_norm": 0.8237774518988704, "learning_rate": 4.233756887091135e-06, "loss": 0.3525, "step": 5338 }, { "epoch": 1.7826377295492488, "grad_norm": 0.8901531301922102, "learning_rate": 4.231837279434652e-06, "loss": 0.3699, "step": 5339 }, { "epoch": 1.7829716193656093, "grad_norm": 0.8309013138852736, "learning_rate": 4.229917787732103e-06, "loss": 0.3779, "step": 5340 }, { "epoch": 1.7833055091819698, "grad_norm": 0.8290182315274353, "learning_rate": 4.227998412273235e-06, "loss": 0.369, "step": 5341 }, { "epoch": 1.7836393989983306, "grad_norm": 0.8114963713104119, "learning_rate": 4.22607915334778e-06, "loss": 0.3672, "step": 5342 }, { "epoch": 1.7839732888146913, "grad_norm": 0.8659556679613264, "learning_rate": 4.2241600112454465e-06, "loss": 0.3714, "step": 5343 }, { "epoch": 1.7843071786310518, "grad_norm": 0.8163107730687078, "learning_rate": 4.222240986255932e-06, "loss": 0.3482, "step": 5344 }, { "epoch": 1.7846410684474123, "grad_norm": 0.8634998667754544, "learning_rate": 4.2203220786689094e-06, "loss": 0.3663, "step": 5345 }, { "epoch": 1.7849749582637728, "grad_norm": 0.8339759961634791, "learning_rate": 4.218403288774037e-06, "loss": 0.3532, "step": 5346 }, { "epoch": 1.7853088480801336, "grad_norm": 0.8460475613102214, "learning_rate": 4.216484616860958e-06, "loss": 0.363, "step": 5347 }, { "epoch": 1.7856427378964943, "grad_norm": 0.8708112099473075, "learning_rate": 4.214566063219293e-06, "loss": 0.3693, "step": 5348 }, { "epoch": 1.7859766277128548, "grad_norm": 0.8354883787358781, "learning_rate": 4.21264762813865e-06, "loss": 0.3628, "step": 5349 }, { "epoch": 1.7863105175292153, "grad_norm": 0.8407953675812655, "learning_rate": 4.2107293119086126e-06, "loss": 0.3664, "step": 5350 }, { "epoch": 1.7866444073455758, "grad_norm": 0.827202047957341, "learning_rate": 4.208811114818753e-06, "loss": 0.3716, "step": 5351 }, { "epoch": 1.7869782971619366, "grad_norm": 0.8600382526483904, "learning_rate": 4.2068930371586225e-06, "loss": 0.3749, "step": 5352 }, { "epoch": 1.7873121869782973, "grad_norm": 0.8283590567081887, "learning_rate": 4.204975079217752e-06, "loss": 0.3584, "step": 5353 }, { "epoch": 1.7876460767946578, "grad_norm": 0.7739146937952517, "learning_rate": 4.203057241285658e-06, "loss": 0.3424, "step": 5354 }, { "epoch": 1.7879799666110183, "grad_norm": 0.8083425534731341, "learning_rate": 4.201139523651838e-06, "loss": 0.3537, "step": 5355 }, { "epoch": 1.7883138564273788, "grad_norm": 0.7961168850822419, "learning_rate": 4.199221926605771e-06, "loss": 0.3608, "step": 5356 }, { "epoch": 1.7886477462437396, "grad_norm": 0.8270202701855596, "learning_rate": 4.197304450436918e-06, "loss": 0.3608, "step": 5357 }, { "epoch": 1.7889816360601003, "grad_norm": 0.8564199590794612, "learning_rate": 4.195387095434717e-06, "loss": 0.3665, "step": 5358 }, { "epoch": 1.7893155258764608, "grad_norm": 0.8220391486818918, "learning_rate": 4.193469861888599e-06, "loss": 0.3707, "step": 5359 }, { "epoch": 1.7896494156928213, "grad_norm": 0.7929316131463927, "learning_rate": 4.191552750087967e-06, "loss": 0.3464, "step": 5360 }, { "epoch": 1.7899833055091818, "grad_norm": 0.8096278938662006, "learning_rate": 4.189635760322207e-06, "loss": 0.3426, "step": 5361 }, { "epoch": 1.7903171953255426, "grad_norm": 0.7769554929896875, "learning_rate": 4.187718892880688e-06, "loss": 0.3515, "step": 5362 }, { "epoch": 1.7906510851419033, "grad_norm": 0.861037446501701, "learning_rate": 4.185802148052763e-06, "loss": 0.3669, "step": 5363 }, { "epoch": 1.7909849749582638, "grad_norm": 0.8655112795413537, "learning_rate": 4.183885526127763e-06, "loss": 0.3733, "step": 5364 }, { "epoch": 1.7913188647746243, "grad_norm": 0.9389540920711993, "learning_rate": 4.181969027395002e-06, "loss": 0.3566, "step": 5365 }, { "epoch": 1.7916527545909848, "grad_norm": 0.8112504189959923, "learning_rate": 4.180052652143773e-06, "loss": 0.3554, "step": 5366 }, { "epoch": 1.7919866444073456, "grad_norm": 0.8219465599434639, "learning_rate": 4.178136400663353e-06, "loss": 0.3563, "step": 5367 }, { "epoch": 1.7923205342237063, "grad_norm": 0.8291357608347752, "learning_rate": 4.176220273243001e-06, "loss": 0.367, "step": 5368 }, { "epoch": 1.7926544240400668, "grad_norm": 0.8083544413454706, "learning_rate": 4.174304270171955e-06, "loss": 0.3659, "step": 5369 }, { "epoch": 1.7929883138564273, "grad_norm": 0.8721555304381928, "learning_rate": 4.1723883917394335e-06, "loss": 0.3785, "step": 5370 }, { "epoch": 1.7933222036727878, "grad_norm": 0.8676955227094274, "learning_rate": 4.170472638234641e-06, "loss": 0.3656, "step": 5371 }, { "epoch": 1.7936560934891486, "grad_norm": 0.8224779373182908, "learning_rate": 4.16855700994676e-06, "loss": 0.3544, "step": 5372 }, { "epoch": 1.7939899833055093, "grad_norm": 0.8421205604429578, "learning_rate": 4.1666415071649495e-06, "loss": 0.353, "step": 5373 }, { "epoch": 1.7943238731218698, "grad_norm": 0.8193825199735396, "learning_rate": 4.164726130178357e-06, "loss": 0.3557, "step": 5374 }, { "epoch": 1.7946577629382303, "grad_norm": 0.8317736908765533, "learning_rate": 4.1628108792761086e-06, "loss": 0.3718, "step": 5375 }, { "epoch": 1.7949916527545908, "grad_norm": 0.8191185771011571, "learning_rate": 4.160895754747311e-06, "loss": 0.3667, "step": 5376 }, { "epoch": 1.7953255425709516, "grad_norm": 0.8599076280491128, "learning_rate": 4.158980756881051e-06, "loss": 0.3806, "step": 5377 }, { "epoch": 1.7956594323873123, "grad_norm": 0.8179320437488484, "learning_rate": 4.157065885966396e-06, "loss": 0.3681, "step": 5378 }, { "epoch": 1.7959933222036728, "grad_norm": 0.8299316710178517, "learning_rate": 4.155151142292395e-06, "loss": 0.3613, "step": 5379 }, { "epoch": 1.7963272120200333, "grad_norm": 0.8339261720672152, "learning_rate": 4.15323652614808e-06, "loss": 0.3746, "step": 5380 }, { "epoch": 1.7966611018363938, "grad_norm": 0.7710904656803148, "learning_rate": 4.151322037822461e-06, "loss": 0.3415, "step": 5381 }, { "epoch": 1.7969949916527546, "grad_norm": 0.8318049477245383, "learning_rate": 4.149407677604529e-06, "loss": 0.3519, "step": 5382 }, { "epoch": 1.7973288814691153, "grad_norm": 0.8013124145239012, "learning_rate": 4.147493445783256e-06, "loss": 0.3565, "step": 5383 }, { "epoch": 1.7976627712854758, "grad_norm": 0.8332724983190032, "learning_rate": 4.145579342647595e-06, "loss": 0.3589, "step": 5384 }, { "epoch": 1.7979966611018363, "grad_norm": 0.8482587175383741, "learning_rate": 4.143665368486482e-06, "loss": 0.3599, "step": 5385 }, { "epoch": 1.798330550918197, "grad_norm": 0.8628076575380135, "learning_rate": 4.141751523588826e-06, "loss": 0.3764, "step": 5386 }, { "epoch": 1.7986644407345576, "grad_norm": 0.8557574600704917, "learning_rate": 4.1398378082435225e-06, "loss": 0.3808, "step": 5387 }, { "epoch": 1.7989983305509183, "grad_norm": 0.8610503309807406, "learning_rate": 4.1379242227394485e-06, "loss": 0.3529, "step": 5388 }, { "epoch": 1.7993322203672788, "grad_norm": 0.7943134593738741, "learning_rate": 4.136010767365458e-06, "loss": 0.358, "step": 5389 }, { "epoch": 1.7996661101836393, "grad_norm": 0.8199839411216499, "learning_rate": 4.134097442410388e-06, "loss": 0.3535, "step": 5390 }, { "epoch": 1.8, "grad_norm": 0.8542486043246028, "learning_rate": 4.132184248163049e-06, "loss": 0.3676, "step": 5391 }, { "epoch": 1.8003338898163606, "grad_norm": 0.8347708821712715, "learning_rate": 4.130271184912245e-06, "loss": 0.3722, "step": 5392 }, { "epoch": 1.8006677796327213, "grad_norm": 0.8127291825655308, "learning_rate": 4.1283582529467456e-06, "loss": 0.3677, "step": 5393 }, { "epoch": 1.8010016694490818, "grad_norm": 0.8182237801287638, "learning_rate": 4.126445452555311e-06, "loss": 0.3778, "step": 5394 }, { "epoch": 1.8013355592654423, "grad_norm": 0.8509115651444996, "learning_rate": 4.124532784026676e-06, "loss": 0.3732, "step": 5395 }, { "epoch": 1.801669449081803, "grad_norm": 0.8481375061170815, "learning_rate": 4.122620247649559e-06, "loss": 0.366, "step": 5396 }, { "epoch": 1.8020033388981636, "grad_norm": 0.8485660886245617, "learning_rate": 4.120707843712655e-06, "loss": 0.3639, "step": 5397 }, { "epoch": 1.8023372287145243, "grad_norm": 0.7986120415150482, "learning_rate": 4.118795572504644e-06, "loss": 0.3553, "step": 5398 }, { "epoch": 1.8026711185308848, "grad_norm": 0.8245385411718475, "learning_rate": 4.116883434314177e-06, "loss": 0.3545, "step": 5399 }, { "epoch": 1.8030050083472453, "grad_norm": 0.8023520258528456, "learning_rate": 4.114971429429897e-06, "loss": 0.3531, "step": 5400 }, { "epoch": 1.803338898163606, "grad_norm": 0.8221984604989145, "learning_rate": 4.113059558140418e-06, "loss": 0.3515, "step": 5401 }, { "epoch": 1.8036727879799668, "grad_norm": 0.8339465475474408, "learning_rate": 4.111147820734335e-06, "loss": 0.3615, "step": 5402 }, { "epoch": 1.8040066777963273, "grad_norm": 0.8511335922685537, "learning_rate": 4.1092362175002254e-06, "loss": 0.3587, "step": 5403 }, { "epoch": 1.8043405676126878, "grad_norm": 0.8149931806407322, "learning_rate": 4.107324748726645e-06, "loss": 0.3555, "step": 5404 }, { "epoch": 1.8046744574290483, "grad_norm": 0.8009721367942682, "learning_rate": 4.105413414702132e-06, "loss": 0.3508, "step": 5405 }, { "epoch": 1.805008347245409, "grad_norm": 0.8135811290652104, "learning_rate": 4.103502215715197e-06, "loss": 0.3514, "step": 5406 }, { "epoch": 1.8053422370617698, "grad_norm": 0.8352504387992116, "learning_rate": 4.101591152054338e-06, "loss": 0.3769, "step": 5407 }, { "epoch": 1.8056761268781303, "grad_norm": 0.8384427403818906, "learning_rate": 4.0996802240080275e-06, "loss": 0.3703, "step": 5408 }, { "epoch": 1.8060100166944908, "grad_norm": 0.8022650846055362, "learning_rate": 4.097769431864722e-06, "loss": 0.3431, "step": 5409 }, { "epoch": 1.8063439065108513, "grad_norm": 0.818251546035806, "learning_rate": 4.0958587759128534e-06, "loss": 0.3546, "step": 5410 }, { "epoch": 1.806677796327212, "grad_norm": 0.8707148320616468, "learning_rate": 4.093948256440836e-06, "loss": 0.3633, "step": 5411 }, { "epoch": 1.8070116861435728, "grad_norm": 0.859504051440694, "learning_rate": 4.092037873737059e-06, "loss": 0.3681, "step": 5412 }, { "epoch": 1.8073455759599333, "grad_norm": 0.8023740910025187, "learning_rate": 4.090127628089898e-06, "loss": 0.3479, "step": 5413 }, { "epoch": 1.8076794657762938, "grad_norm": 0.847264344765341, "learning_rate": 4.0882175197877015e-06, "loss": 0.3643, "step": 5414 }, { "epoch": 1.8080133555926543, "grad_norm": 0.9006545802212551, "learning_rate": 4.086307549118801e-06, "loss": 0.3881, "step": 5415 }, { "epoch": 1.808347245409015, "grad_norm": 0.8461306161861614, "learning_rate": 4.084397716371504e-06, "loss": 0.3724, "step": 5416 }, { "epoch": 1.8086811352253758, "grad_norm": 0.8255098325380172, "learning_rate": 4.082488021834102e-06, "loss": 0.3606, "step": 5417 }, { "epoch": 1.8090150250417363, "grad_norm": 0.8179868376832581, "learning_rate": 4.080578465794863e-06, "loss": 0.358, "step": 5418 }, { "epoch": 1.8093489148580968, "grad_norm": 0.8442393770349848, "learning_rate": 4.078669048542032e-06, "loss": 0.3685, "step": 5419 }, { "epoch": 1.8096828046744573, "grad_norm": 0.8177222192699413, "learning_rate": 4.076759770363834e-06, "loss": 0.3795, "step": 5420 }, { "epoch": 1.810016694490818, "grad_norm": 0.8004486354403672, "learning_rate": 4.074850631548476e-06, "loss": 0.3552, "step": 5421 }, { "epoch": 1.8103505843071788, "grad_norm": 0.7989678439684289, "learning_rate": 4.072941632384141e-06, "loss": 0.3442, "step": 5422 }, { "epoch": 1.8106844741235393, "grad_norm": 0.8391206596100912, "learning_rate": 4.071032773158994e-06, "loss": 0.3718, "step": 5423 }, { "epoch": 1.8110183639398998, "grad_norm": 0.8721937661255065, "learning_rate": 4.069124054161172e-06, "loss": 0.3624, "step": 5424 }, { "epoch": 1.8113522537562603, "grad_norm": 0.8355739185615685, "learning_rate": 4.067215475678801e-06, "loss": 0.37, "step": 5425 }, { "epoch": 1.811686143572621, "grad_norm": 0.8113341106051752, "learning_rate": 4.065307037999977e-06, "loss": 0.3683, "step": 5426 }, { "epoch": 1.8120200333889818, "grad_norm": 0.8457753658804829, "learning_rate": 4.06339874141278e-06, "loss": 0.3629, "step": 5427 }, { "epoch": 1.8123539232053423, "grad_norm": 0.8081077309079684, "learning_rate": 4.0614905862052635e-06, "loss": 0.3592, "step": 5428 }, { "epoch": 1.8126878130217028, "grad_norm": 0.8002479480153025, "learning_rate": 4.059582572665467e-06, "loss": 0.3412, "step": 5429 }, { "epoch": 1.8130217028380633, "grad_norm": 0.8362859742043438, "learning_rate": 4.057674701081403e-06, "loss": 0.3638, "step": 5430 }, { "epoch": 1.813355592654424, "grad_norm": 0.8666215590122148, "learning_rate": 4.055766971741064e-06, "loss": 0.379, "step": 5431 }, { "epoch": 1.8136894824707848, "grad_norm": 0.8603470930650143, "learning_rate": 4.053859384932419e-06, "loss": 0.373, "step": 5432 }, { "epoch": 1.8140233722871453, "grad_norm": 0.8478570866884758, "learning_rate": 4.051951940943423e-06, "loss": 0.3675, "step": 5433 }, { "epoch": 1.8143572621035058, "grad_norm": 0.810472171198967, "learning_rate": 4.050044640062e-06, "loss": 0.3615, "step": 5434 }, { "epoch": 1.8146911519198663, "grad_norm": 0.8064644545753067, "learning_rate": 4.048137482576057e-06, "loss": 0.3699, "step": 5435 }, { "epoch": 1.815025041736227, "grad_norm": 0.8291851999718692, "learning_rate": 4.04623046877348e-06, "loss": 0.3727, "step": 5436 }, { "epoch": 1.8153589315525878, "grad_norm": 0.8462492289049635, "learning_rate": 4.044323598942129e-06, "loss": 0.376, "step": 5437 }, { "epoch": 1.8156928213689483, "grad_norm": 0.780158909707121, "learning_rate": 4.042416873369851e-06, "loss": 0.3523, "step": 5438 }, { "epoch": 1.8160267111853088, "grad_norm": 0.8160921776481993, "learning_rate": 4.040510292344461e-06, "loss": 0.3769, "step": 5439 }, { "epoch": 1.8163606010016693, "grad_norm": 0.8135539300097957, "learning_rate": 4.038603856153759e-06, "loss": 0.3619, "step": 5440 }, { "epoch": 1.81669449081803, "grad_norm": 0.8173662378198964, "learning_rate": 4.036697565085519e-06, "loss": 0.3579, "step": 5441 }, { "epoch": 1.8170283806343908, "grad_norm": 0.7877486727074766, "learning_rate": 4.034791419427498e-06, "loss": 0.358, "step": 5442 }, { "epoch": 1.8173622704507513, "grad_norm": 0.8181360012997675, "learning_rate": 4.032885419467426e-06, "loss": 0.3703, "step": 5443 }, { "epoch": 1.8176961602671118, "grad_norm": 0.8074772872626457, "learning_rate": 4.030979565493016e-06, "loss": 0.3292, "step": 5444 }, { "epoch": 1.8180300500834723, "grad_norm": 0.8341695651784312, "learning_rate": 4.029073857791949e-06, "loss": 0.3624, "step": 5445 }, { "epoch": 1.818363939899833, "grad_norm": 0.8732599028550577, "learning_rate": 4.0271682966519e-06, "loss": 0.3654, "step": 5446 }, { "epoch": 1.8186978297161938, "grad_norm": 0.8459636073726915, "learning_rate": 4.0252628823605074e-06, "loss": 0.373, "step": 5447 }, { "epoch": 1.8190317195325543, "grad_norm": 0.8036361677957531, "learning_rate": 4.023357615205394e-06, "loss": 0.3538, "step": 5448 }, { "epoch": 1.8193656093489148, "grad_norm": 0.8399925927265764, "learning_rate": 4.021452495474159e-06, "loss": 0.3629, "step": 5449 }, { "epoch": 1.8196994991652755, "grad_norm": 0.8998915623195911, "learning_rate": 4.0195475234543814e-06, "loss": 0.3865, "step": 5450 }, { "epoch": 1.820033388981636, "grad_norm": 0.8280972452406269, "learning_rate": 4.017642699433615e-06, "loss": 0.3555, "step": 5451 }, { "epoch": 1.8203672787979968, "grad_norm": 0.8126692509187187, "learning_rate": 4.0157380236993925e-06, "loss": 0.3509, "step": 5452 }, { "epoch": 1.8207011686143573, "grad_norm": 0.8243982267971195, "learning_rate": 4.013833496539223e-06, "loss": 0.3557, "step": 5453 }, { "epoch": 1.8210350584307178, "grad_norm": 0.8217052805615619, "learning_rate": 4.011929118240596e-06, "loss": 0.3528, "step": 5454 }, { "epoch": 1.8213689482470785, "grad_norm": 0.8026784660600069, "learning_rate": 4.010024889090975e-06, "loss": 0.3547, "step": 5455 }, { "epoch": 1.821702838063439, "grad_norm": 0.8340784846853071, "learning_rate": 4.008120809377806e-06, "loss": 0.3539, "step": 5456 }, { "epoch": 1.8220367278797998, "grad_norm": 0.8177435813667315, "learning_rate": 4.006216879388505e-06, "loss": 0.3531, "step": 5457 }, { "epoch": 1.8223706176961603, "grad_norm": 0.8398488169331699, "learning_rate": 4.004313099410474e-06, "loss": 0.3665, "step": 5458 }, { "epoch": 1.8227045075125208, "grad_norm": 0.8274366222120395, "learning_rate": 4.002409469731086e-06, "loss": 0.3591, "step": 5459 }, { "epoch": 1.8230383973288815, "grad_norm": 0.8694517908907721, "learning_rate": 4.000505990637693e-06, "loss": 0.3783, "step": 5460 }, { "epoch": 1.823372287145242, "grad_norm": 0.822458412294384, "learning_rate": 3.9986026624176235e-06, "loss": 0.3582, "step": 5461 }, { "epoch": 1.8237061769616028, "grad_norm": 0.8234145537374075, "learning_rate": 3.996699485358187e-06, "loss": 0.3528, "step": 5462 }, { "epoch": 1.8240400667779633, "grad_norm": 0.8487602362779987, "learning_rate": 3.994796459746666e-06, "loss": 0.35, "step": 5463 }, { "epoch": 1.8243739565943238, "grad_norm": 0.813996334273787, "learning_rate": 3.992893585870323e-06, "loss": 0.3522, "step": 5464 }, { "epoch": 1.8247078464106845, "grad_norm": 0.84871723955274, "learning_rate": 3.990990864016392e-06, "loss": 0.3741, "step": 5465 }, { "epoch": 1.8250417362270452, "grad_norm": 0.7958587956993218, "learning_rate": 3.989088294472095e-06, "loss": 0.3512, "step": 5466 }, { "epoch": 1.8253756260434058, "grad_norm": 0.8352889288964238, "learning_rate": 3.98718587752462e-06, "loss": 0.3655, "step": 5467 }, { "epoch": 1.8257095158597663, "grad_norm": 0.8687020198046358, "learning_rate": 3.985283613461137e-06, "loss": 0.3852, "step": 5468 }, { "epoch": 1.8260434056761268, "grad_norm": 0.7962537363523639, "learning_rate": 3.983381502568793e-06, "loss": 0.3465, "step": 5469 }, { "epoch": 1.8263772954924875, "grad_norm": 0.8416916049978966, "learning_rate": 3.981479545134707e-06, "loss": 0.3688, "step": 5470 }, { "epoch": 1.8267111853088482, "grad_norm": 0.8515033891758949, "learning_rate": 3.979577741445985e-06, "loss": 0.3764, "step": 5471 }, { "epoch": 1.8270450751252088, "grad_norm": 0.7805678202542902, "learning_rate": 3.9776760917897005e-06, "loss": 0.3535, "step": 5472 }, { "epoch": 1.8273789649415693, "grad_norm": 0.8396628470558596, "learning_rate": 3.975774596452907e-06, "loss": 0.3722, "step": 5473 }, { "epoch": 1.8277128547579298, "grad_norm": 0.8346622562753402, "learning_rate": 3.9738732557226346e-06, "loss": 0.3679, "step": 5474 }, { "epoch": 1.8280467445742905, "grad_norm": 0.8291378071070393, "learning_rate": 3.971972069885892e-06, "loss": 0.3657, "step": 5475 }, { "epoch": 1.8283806343906512, "grad_norm": 0.8192806474450941, "learning_rate": 3.97007103922966e-06, "loss": 0.3571, "step": 5476 }, { "epoch": 1.8287145242070117, "grad_norm": 0.835196005634283, "learning_rate": 3.968170164040902e-06, "loss": 0.3674, "step": 5477 }, { "epoch": 1.8290484140233723, "grad_norm": 0.840060613290096, "learning_rate": 3.966269444606548e-06, "loss": 0.3616, "step": 5478 }, { "epoch": 1.8293823038397328, "grad_norm": 0.8236676479380389, "learning_rate": 3.96436888121352e-06, "loss": 0.3708, "step": 5479 }, { "epoch": 1.8297161936560935, "grad_norm": 0.8194555075740733, "learning_rate": 3.9624684741487e-06, "loss": 0.3717, "step": 5480 }, { "epoch": 1.8300500834724542, "grad_norm": 0.7974530672635936, "learning_rate": 3.960568223698958e-06, "loss": 0.3513, "step": 5481 }, { "epoch": 1.8303839732888147, "grad_norm": 0.8328874740660019, "learning_rate": 3.9586681301511345e-06, "loss": 0.3555, "step": 5482 }, { "epoch": 1.8307178631051753, "grad_norm": 0.8132949774789194, "learning_rate": 3.956768193792049e-06, "loss": 0.3656, "step": 5483 }, { "epoch": 1.8310517529215358, "grad_norm": 0.7959138531641682, "learning_rate": 3.954868414908497e-06, "loss": 0.35, "step": 5484 }, { "epoch": 1.8313856427378965, "grad_norm": 0.8434269115512025, "learning_rate": 3.9529687937872475e-06, "loss": 0.3698, "step": 5485 }, { "epoch": 1.8317195325542572, "grad_norm": 0.8394540658916583, "learning_rate": 3.951069330715047e-06, "loss": 0.3781, "step": 5486 }, { "epoch": 1.8320534223706177, "grad_norm": 0.8024872397458072, "learning_rate": 3.949170025978622e-06, "loss": 0.3553, "step": 5487 }, { "epoch": 1.8323873121869783, "grad_norm": 0.795737444474444, "learning_rate": 3.947270879864672e-06, "loss": 0.3462, "step": 5488 }, { "epoch": 1.8327212020033388, "grad_norm": 0.8165527087345149, "learning_rate": 3.94537189265987e-06, "loss": 0.3605, "step": 5489 }, { "epoch": 1.8330550918196995, "grad_norm": 0.7880145144683619, "learning_rate": 3.943473064650867e-06, "loss": 0.3436, "step": 5490 }, { "epoch": 1.8333889816360602, "grad_norm": 0.8165350744986715, "learning_rate": 3.941574396124294e-06, "loss": 0.3586, "step": 5491 }, { "epoch": 1.8337228714524207, "grad_norm": 0.7532992837790958, "learning_rate": 3.9396758873667534e-06, "loss": 0.3438, "step": 5492 }, { "epoch": 1.8340567612687813, "grad_norm": 0.8308953471335638, "learning_rate": 3.937777538664823e-06, "loss": 0.3673, "step": 5493 }, { "epoch": 1.8343906510851418, "grad_norm": 0.8816750669061959, "learning_rate": 3.935879350305058e-06, "loss": 0.3778, "step": 5494 }, { "epoch": 1.8347245409015025, "grad_norm": 0.8103788589127382, "learning_rate": 3.933981322573991e-06, "loss": 0.3349, "step": 5495 }, { "epoch": 1.8350584307178632, "grad_norm": 0.8547828965992741, "learning_rate": 3.932083455758128e-06, "loss": 0.356, "step": 5496 }, { "epoch": 1.8353923205342237, "grad_norm": 0.8862943437113068, "learning_rate": 3.930185750143952e-06, "loss": 0.3584, "step": 5497 }, { "epoch": 1.8357262103505843, "grad_norm": 0.8450577239373314, "learning_rate": 3.92828820601792e-06, "loss": 0.3642, "step": 5498 }, { "epoch": 1.8360601001669448, "grad_norm": 0.8365873679181008, "learning_rate": 3.926390823666464e-06, "loss": 0.3623, "step": 5499 }, { "epoch": 1.8363939899833055, "grad_norm": 0.820651185324556, "learning_rate": 3.924493603375997e-06, "loss": 0.3595, "step": 5500 }, { "epoch": 1.8367278797996662, "grad_norm": 0.8003174035890295, "learning_rate": 3.922596545432903e-06, "loss": 0.3563, "step": 5501 }, { "epoch": 1.8370617696160267, "grad_norm": 0.8303182322521717, "learning_rate": 3.92069965012354e-06, "loss": 0.359, "step": 5502 }, { "epoch": 1.8373956594323873, "grad_norm": 0.8514124802241532, "learning_rate": 3.918802917734246e-06, "loss": 0.3724, "step": 5503 }, { "epoch": 1.8377295492487478, "grad_norm": 0.8619993843513146, "learning_rate": 3.916906348551332e-06, "loss": 0.3598, "step": 5504 }, { "epoch": 1.8380634390651085, "grad_norm": 0.8461214479619321, "learning_rate": 3.915009942861086e-06, "loss": 0.373, "step": 5505 }, { "epoch": 1.8383973288814692, "grad_norm": 0.8642008096502433, "learning_rate": 3.913113700949766e-06, "loss": 0.3647, "step": 5506 }, { "epoch": 1.8387312186978297, "grad_norm": 0.8325361706551858, "learning_rate": 3.91121762310361e-06, "loss": 0.3703, "step": 5507 }, { "epoch": 1.8390651085141902, "grad_norm": 0.8363986854480002, "learning_rate": 3.909321709608834e-06, "loss": 0.3565, "step": 5508 }, { "epoch": 1.8393989983305508, "grad_norm": 0.8194326363859813, "learning_rate": 3.907425960751622e-06, "loss": 0.3569, "step": 5509 }, { "epoch": 1.8397328881469115, "grad_norm": 0.8657505825490692, "learning_rate": 3.905530376818139e-06, "loss": 0.3644, "step": 5510 }, { "epoch": 1.8400667779632722, "grad_norm": 0.842179872792804, "learning_rate": 3.903634958094519e-06, "loss": 0.3615, "step": 5511 }, { "epoch": 1.8404006677796327, "grad_norm": 0.8495726838894196, "learning_rate": 3.90173970486688e-06, "loss": 0.371, "step": 5512 }, { "epoch": 1.8407345575959932, "grad_norm": 0.8231298394843983, "learning_rate": 3.899844617421307e-06, "loss": 0.3611, "step": 5513 }, { "epoch": 1.8410684474123538, "grad_norm": 0.782871782260295, "learning_rate": 3.897949696043864e-06, "loss": 0.3621, "step": 5514 }, { "epoch": 1.8414023372287145, "grad_norm": 0.8598616912126152, "learning_rate": 3.896054941020586e-06, "loss": 0.3599, "step": 5515 }, { "epoch": 1.8417362270450752, "grad_norm": 0.8059969885671715, "learning_rate": 3.89416035263749e-06, "loss": 0.3604, "step": 5516 }, { "epoch": 1.8420701168614357, "grad_norm": 0.8737808610409076, "learning_rate": 3.892265931180562e-06, "loss": 0.3695, "step": 5517 }, { "epoch": 1.8424040066777962, "grad_norm": 0.8324424598761125, "learning_rate": 3.890371676935763e-06, "loss": 0.3549, "step": 5518 }, { "epoch": 1.842737896494157, "grad_norm": 0.8688713561671979, "learning_rate": 3.88847759018903e-06, "loss": 0.3551, "step": 5519 }, { "epoch": 1.8430717863105175, "grad_norm": 0.7902925407059451, "learning_rate": 3.886583671226277e-06, "loss": 0.3567, "step": 5520 }, { "epoch": 1.8434056761268782, "grad_norm": 0.831998323451826, "learning_rate": 3.884689920333389e-06, "loss": 0.3568, "step": 5521 }, { "epoch": 1.8437395659432387, "grad_norm": 0.7968467695718977, "learning_rate": 3.882796337796228e-06, "loss": 0.3456, "step": 5522 }, { "epoch": 1.8440734557595992, "grad_norm": 0.8207813912303329, "learning_rate": 3.880902923900628e-06, "loss": 0.3694, "step": 5523 }, { "epoch": 1.84440734557596, "grad_norm": 0.854346637357205, "learning_rate": 3.879009678932401e-06, "loss": 0.3486, "step": 5524 }, { "epoch": 1.8447412353923205, "grad_norm": 0.8691293712094177, "learning_rate": 3.877116603177333e-06, "loss": 0.3533, "step": 5525 }, { "epoch": 1.8450751252086812, "grad_norm": 0.8554780883254625, "learning_rate": 3.87522369692118e-06, "loss": 0.3625, "step": 5526 }, { "epoch": 1.8454090150250417, "grad_norm": 0.8054491257967767, "learning_rate": 3.873330960449676e-06, "loss": 0.3406, "step": 5527 }, { "epoch": 1.8457429048414022, "grad_norm": 0.816695188178835, "learning_rate": 3.871438394048531e-06, "loss": 0.3444, "step": 5528 }, { "epoch": 1.846076794657763, "grad_norm": 0.8072331915827342, "learning_rate": 3.869545998003426e-06, "loss": 0.3461, "step": 5529 }, { "epoch": 1.8464106844741235, "grad_norm": 0.7815725269346372, "learning_rate": 3.86765377260002e-06, "loss": 0.3492, "step": 5530 }, { "epoch": 1.8467445742904842, "grad_norm": 0.8103903560837057, "learning_rate": 3.86576171812394e-06, "loss": 0.3533, "step": 5531 }, { "epoch": 1.8470784641068447, "grad_norm": 0.823360386483386, "learning_rate": 3.863869834860793e-06, "loss": 0.3626, "step": 5532 }, { "epoch": 1.8474123539232052, "grad_norm": 0.85590024152013, "learning_rate": 3.8619781230961586e-06, "loss": 0.3886, "step": 5533 }, { "epoch": 1.847746243739566, "grad_norm": 0.7882818462078464, "learning_rate": 3.86008658311559e-06, "loss": 0.3316, "step": 5534 }, { "epoch": 1.8480801335559267, "grad_norm": 0.831492017594595, "learning_rate": 3.858195215204614e-06, "loss": 0.3662, "step": 5535 }, { "epoch": 1.8484140233722872, "grad_norm": 0.8028974045908648, "learning_rate": 3.8563040196487325e-06, "loss": 0.3549, "step": 5536 }, { "epoch": 1.8487479131886477, "grad_norm": 0.8526366278408573, "learning_rate": 3.8544129967334204e-06, "loss": 0.3584, "step": 5537 }, { "epoch": 1.8490818030050082, "grad_norm": 0.8017023151219842, "learning_rate": 3.852522146744129e-06, "loss": 0.3603, "step": 5538 }, { "epoch": 1.849415692821369, "grad_norm": 0.8064890845951561, "learning_rate": 3.850631469966279e-06, "loss": 0.3437, "step": 5539 }, { "epoch": 1.8497495826377297, "grad_norm": 0.7935600771000739, "learning_rate": 3.848740966685267e-06, "loss": 0.3584, "step": 5540 }, { "epoch": 1.8500834724540902, "grad_norm": 0.8360630986926455, "learning_rate": 3.846850637186467e-06, "loss": 0.3501, "step": 5541 }, { "epoch": 1.8504173622704507, "grad_norm": 0.8200986338399513, "learning_rate": 3.844960481755222e-06, "loss": 0.3573, "step": 5542 }, { "epoch": 1.8507512520868112, "grad_norm": 0.8220671417589014, "learning_rate": 3.843070500676851e-06, "loss": 0.3564, "step": 5543 }, { "epoch": 1.851085141903172, "grad_norm": 0.8864632674644196, "learning_rate": 3.841180694236643e-06, "loss": 0.3613, "step": 5544 }, { "epoch": 1.8514190317195327, "grad_norm": 0.8285929501610719, "learning_rate": 3.8392910627198685e-06, "loss": 0.3637, "step": 5545 }, { "epoch": 1.8517529215358932, "grad_norm": 0.819790964399723, "learning_rate": 3.837401606411764e-06, "loss": 0.3392, "step": 5546 }, { "epoch": 1.8520868113522537, "grad_norm": 0.8287241723039862, "learning_rate": 3.835512325597543e-06, "loss": 0.3676, "step": 5547 }, { "epoch": 1.8524207011686142, "grad_norm": 0.8354592119832116, "learning_rate": 3.833623220562392e-06, "loss": 0.3444, "step": 5548 }, { "epoch": 1.852754590984975, "grad_norm": 0.8256994414582386, "learning_rate": 3.8317342915914705e-06, "loss": 0.3672, "step": 5549 }, { "epoch": 1.8530884808013357, "grad_norm": 0.817629547516496, "learning_rate": 3.829845538969911e-06, "loss": 0.3551, "step": 5550 }, { "epoch": 1.8534223706176962, "grad_norm": 0.8312582243500495, "learning_rate": 3.827956962982824e-06, "loss": 0.3612, "step": 5551 }, { "epoch": 1.8537562604340567, "grad_norm": 0.8363734968750857, "learning_rate": 3.826068563915282e-06, "loss": 0.359, "step": 5552 }, { "epoch": 1.8540901502504172, "grad_norm": 0.799525950409362, "learning_rate": 3.824180342052347e-06, "loss": 0.3452, "step": 5553 }, { "epoch": 1.854424040066778, "grad_norm": 0.8226433935604175, "learning_rate": 3.82229229767904e-06, "loss": 0.3554, "step": 5554 }, { "epoch": 1.8547579298831387, "grad_norm": 0.8262522137687682, "learning_rate": 3.820404431080363e-06, "loss": 0.3615, "step": 5555 }, { "epoch": 1.8550918196994992, "grad_norm": 0.7951495608225562, "learning_rate": 3.818516742541285e-06, "loss": 0.3518, "step": 5556 }, { "epoch": 1.8554257095158597, "grad_norm": 0.8509075130743068, "learning_rate": 3.816629232346758e-06, "loss": 0.3706, "step": 5557 }, { "epoch": 1.8557595993322202, "grad_norm": 0.8308770059455912, "learning_rate": 3.814741900781698e-06, "loss": 0.3496, "step": 5558 }, { "epoch": 1.856093489148581, "grad_norm": 0.8529877436246663, "learning_rate": 3.8128547481309967e-06, "loss": 0.38, "step": 5559 }, { "epoch": 1.8564273789649417, "grad_norm": 0.7977853109619331, "learning_rate": 3.8109677746795205e-06, "loss": 0.3558, "step": 5560 }, { "epoch": 1.8567612687813022, "grad_norm": 0.7937637417147043, "learning_rate": 3.809080980712105e-06, "loss": 0.3548, "step": 5561 }, { "epoch": 1.8570951585976627, "grad_norm": 0.8221578614802393, "learning_rate": 3.807194366513564e-06, "loss": 0.3569, "step": 5562 }, { "epoch": 1.8574290484140232, "grad_norm": 0.7883176213539507, "learning_rate": 3.805307932368682e-06, "loss": 0.3472, "step": 5563 }, { "epoch": 1.857762938230384, "grad_norm": 0.8083147888046285, "learning_rate": 3.803421678562213e-06, "loss": 0.3497, "step": 5564 }, { "epoch": 1.8580968280467447, "grad_norm": 0.8131048306733937, "learning_rate": 3.801535605378886e-06, "loss": 0.3627, "step": 5565 }, { "epoch": 1.8584307178631052, "grad_norm": 0.8228613378179233, "learning_rate": 3.799649713103406e-06, "loss": 0.3563, "step": 5566 }, { "epoch": 1.8587646076794657, "grad_norm": 0.8304341379639009, "learning_rate": 3.7977640020204465e-06, "loss": 0.3607, "step": 5567 }, { "epoch": 1.8590984974958262, "grad_norm": 0.814027087671281, "learning_rate": 3.7958784724146554e-06, "loss": 0.3663, "step": 5568 }, { "epoch": 1.859432387312187, "grad_norm": 0.8521265025694801, "learning_rate": 3.793993124570651e-06, "loss": 0.3788, "step": 5569 }, { "epoch": 1.8597662771285477, "grad_norm": 0.8198479260122858, "learning_rate": 3.7921079587730285e-06, "loss": 0.3568, "step": 5570 }, { "epoch": 1.8601001669449082, "grad_norm": 0.8186009206897902, "learning_rate": 3.7902229753063533e-06, "loss": 0.3491, "step": 5571 }, { "epoch": 1.8604340567612687, "grad_norm": 0.8151289024399739, "learning_rate": 3.7883381744551616e-06, "loss": 0.3455, "step": 5572 }, { "epoch": 1.8607679465776292, "grad_norm": 0.8262247653617278, "learning_rate": 3.7864535565039623e-06, "loss": 0.3735, "step": 5573 }, { "epoch": 1.86110183639399, "grad_norm": 0.8229621823943059, "learning_rate": 3.784569121737241e-06, "loss": 0.3638, "step": 5574 }, { "epoch": 1.8614357262103507, "grad_norm": 0.8340496076229742, "learning_rate": 3.78268487043945e-06, "loss": 0.3657, "step": 5575 }, { "epoch": 1.8617696160267112, "grad_norm": 0.8333543973514498, "learning_rate": 3.78080080289502e-06, "loss": 0.3621, "step": 5576 }, { "epoch": 1.8621035058430717, "grad_norm": 0.8299306317314652, "learning_rate": 3.778916919388344e-06, "loss": 0.3684, "step": 5577 }, { "epoch": 1.8624373956594322, "grad_norm": 0.8127159660610079, "learning_rate": 3.7770332202038014e-06, "loss": 0.3602, "step": 5578 }, { "epoch": 1.862771285475793, "grad_norm": 0.7787851298731557, "learning_rate": 3.7751497056257306e-06, "loss": 0.3448, "step": 5579 }, { "epoch": 1.8631051752921537, "grad_norm": 0.8359179985682281, "learning_rate": 3.77326637593845e-06, "loss": 0.3679, "step": 5580 }, { "epoch": 1.8634390651085142, "grad_norm": 0.8235029527360798, "learning_rate": 3.771383231426245e-06, "loss": 0.3654, "step": 5581 }, { "epoch": 1.8637729549248747, "grad_norm": 0.8341449941462742, "learning_rate": 3.769500272373379e-06, "loss": 0.3542, "step": 5582 }, { "epoch": 1.8641068447412354, "grad_norm": 0.8276433431423621, "learning_rate": 3.767617499064083e-06, "loss": 0.3554, "step": 5583 }, { "epoch": 1.864440734557596, "grad_norm": 0.8049872466296796, "learning_rate": 3.7657349117825614e-06, "loss": 0.3386, "step": 5584 }, { "epoch": 1.8647746243739567, "grad_norm": 0.8159037518505309, "learning_rate": 3.763852510812987e-06, "loss": 0.3544, "step": 5585 }, { "epoch": 1.8651085141903172, "grad_norm": 0.8569440664841681, "learning_rate": 3.761970296439513e-06, "loss": 0.3589, "step": 5586 }, { "epoch": 1.8654424040066777, "grad_norm": 0.8079452828103791, "learning_rate": 3.7600882689462552e-06, "loss": 0.358, "step": 5587 }, { "epoch": 1.8657762938230384, "grad_norm": 0.8467968681423333, "learning_rate": 3.758206428617307e-06, "loss": 0.347, "step": 5588 }, { "epoch": 1.866110183639399, "grad_norm": 0.818730434403776, "learning_rate": 3.7563247757367295e-06, "loss": 0.3619, "step": 5589 }, { "epoch": 1.8664440734557597, "grad_norm": 0.7982182652057649, "learning_rate": 3.7544433105885598e-06, "loss": 0.3558, "step": 5590 }, { "epoch": 1.8667779632721202, "grad_norm": 0.7773722535692514, "learning_rate": 3.7525620334568045e-06, "loss": 0.342, "step": 5591 }, { "epoch": 1.8671118530884807, "grad_norm": 0.7885895377150635, "learning_rate": 3.750680944625441e-06, "loss": 0.3508, "step": 5592 }, { "epoch": 1.8674457429048414, "grad_norm": 0.8093557578011326, "learning_rate": 3.748800044378419e-06, "loss": 0.3565, "step": 5593 }, { "epoch": 1.867779632721202, "grad_norm": 0.8186228769119945, "learning_rate": 3.7469193329996584e-06, "loss": 0.3502, "step": 5594 }, { "epoch": 1.8681135225375627, "grad_norm": 0.8255787178962882, "learning_rate": 3.745038810773055e-06, "loss": 0.3471, "step": 5595 }, { "epoch": 1.8684474123539232, "grad_norm": 0.805411231907931, "learning_rate": 3.7431584779824714e-06, "loss": 0.3525, "step": 5596 }, { "epoch": 1.8687813021702837, "grad_norm": 0.7696394845368737, "learning_rate": 3.7412783349117455e-06, "loss": 0.3393, "step": 5597 }, { "epoch": 1.8691151919866444, "grad_norm": 0.8257924008749631, "learning_rate": 3.7393983818446787e-06, "loss": 0.3602, "step": 5598 }, { "epoch": 1.8694490818030052, "grad_norm": 0.8080092902722226, "learning_rate": 3.737518619065056e-06, "loss": 0.3543, "step": 5599 }, { "epoch": 1.8697829716193657, "grad_norm": 0.8645505546896716, "learning_rate": 3.7356390468566237e-06, "loss": 0.3757, "step": 5600 }, { "epoch": 1.8701168614357262, "grad_norm": 0.8452865996987992, "learning_rate": 3.7337596655031027e-06, "loss": 0.3662, "step": 5601 }, { "epoch": 1.8704507512520867, "grad_norm": 0.8335737044684991, "learning_rate": 3.731880475288184e-06, "loss": 0.3624, "step": 5602 }, { "epoch": 1.8707846410684474, "grad_norm": 0.8316945328843985, "learning_rate": 3.7300014764955343e-06, "loss": 0.3702, "step": 5603 }, { "epoch": 1.8711185308848082, "grad_norm": 0.8325555579684163, "learning_rate": 3.728122669408787e-06, "loss": 0.3639, "step": 5604 }, { "epoch": 1.8714524207011687, "grad_norm": 0.8205181889152273, "learning_rate": 3.7262440543115446e-06, "loss": 0.3595, "step": 5605 }, { "epoch": 1.8717863105175292, "grad_norm": 0.8117911625990765, "learning_rate": 3.7243656314873843e-06, "loss": 0.358, "step": 5606 }, { "epoch": 1.8721202003338897, "grad_norm": 0.8325502029581078, "learning_rate": 3.7224874012198557e-06, "loss": 0.3608, "step": 5607 }, { "epoch": 1.8724540901502504, "grad_norm": 0.7969231117834202, "learning_rate": 3.7206093637924757e-06, "loss": 0.3541, "step": 5608 }, { "epoch": 1.8727879799666112, "grad_norm": 0.8412801661496387, "learning_rate": 3.7187315194887348e-06, "loss": 0.3614, "step": 5609 }, { "epoch": 1.8731218697829717, "grad_norm": 0.81714317485168, "learning_rate": 3.7168538685920886e-06, "loss": 0.3576, "step": 5610 }, { "epoch": 1.8734557595993322, "grad_norm": 0.8148326099754727, "learning_rate": 3.7149764113859744e-06, "loss": 0.3534, "step": 5611 }, { "epoch": 1.8737896494156927, "grad_norm": 0.8031161596047037, "learning_rate": 3.7130991481537894e-06, "loss": 0.3392, "step": 5612 }, { "epoch": 1.8741235392320534, "grad_norm": 0.8149579081509548, "learning_rate": 3.7112220791789076e-06, "loss": 0.3645, "step": 5613 }, { "epoch": 1.8744574290484142, "grad_norm": 0.8298977534227328, "learning_rate": 3.7093452047446697e-06, "loss": 0.351, "step": 5614 }, { "epoch": 1.8747913188647747, "grad_norm": 0.8581124465619033, "learning_rate": 3.707468525134393e-06, "loss": 0.3627, "step": 5615 }, { "epoch": 1.8751252086811352, "grad_norm": 0.8579438781309955, "learning_rate": 3.7055920406313594e-06, "loss": 0.3641, "step": 5616 }, { "epoch": 1.8754590984974957, "grad_norm": 0.813872909649455, "learning_rate": 3.703715751518826e-06, "loss": 0.3481, "step": 5617 }, { "epoch": 1.8757929883138564, "grad_norm": 0.8467003434858511, "learning_rate": 3.701839658080012e-06, "loss": 0.3647, "step": 5618 }, { "epoch": 1.8761268781302172, "grad_norm": 0.8392041274251212, "learning_rate": 3.699963760598122e-06, "loss": 0.3471, "step": 5619 }, { "epoch": 1.8764607679465777, "grad_norm": 0.8098220010040509, "learning_rate": 3.698088059356315e-06, "loss": 0.3367, "step": 5620 }, { "epoch": 1.8767946577629382, "grad_norm": 0.8621049208161661, "learning_rate": 3.6962125546377314e-06, "loss": 0.3701, "step": 5621 }, { "epoch": 1.8771285475792987, "grad_norm": 0.8266962964935018, "learning_rate": 3.6943372467254775e-06, "loss": 0.3553, "step": 5622 }, { "epoch": 1.8774624373956594, "grad_norm": 0.8538814231746146, "learning_rate": 3.6924621359026265e-06, "loss": 0.3603, "step": 5623 }, { "epoch": 1.8777963272120202, "grad_norm": 0.839085887446298, "learning_rate": 3.690587222452232e-06, "loss": 0.3618, "step": 5624 }, { "epoch": 1.8781302170283807, "grad_norm": 0.8384722372094808, "learning_rate": 3.6887125066573084e-06, "loss": 0.37, "step": 5625 }, { "epoch": 1.8784641068447412, "grad_norm": 0.8347134487100869, "learning_rate": 3.6868379888008436e-06, "loss": 0.3666, "step": 5626 }, { "epoch": 1.8787979966611017, "grad_norm": 0.8936608967013228, "learning_rate": 3.6849636691657945e-06, "loss": 0.3604, "step": 5627 }, { "epoch": 1.8791318864774624, "grad_norm": 0.8330756767193869, "learning_rate": 3.683089548035092e-06, "loss": 0.3677, "step": 5628 }, { "epoch": 1.8794657762938232, "grad_norm": 0.8767330797383575, "learning_rate": 3.681215625691632e-06, "loss": 0.3724, "step": 5629 }, { "epoch": 1.8797996661101837, "grad_norm": 0.8248322304206107, "learning_rate": 3.679341902418284e-06, "loss": 0.3576, "step": 5630 }, { "epoch": 1.8801335559265442, "grad_norm": 0.893166301069804, "learning_rate": 3.677468378497883e-06, "loss": 0.3762, "step": 5631 }, { "epoch": 1.8804674457429047, "grad_norm": 0.8315498598384379, "learning_rate": 3.67559505421324e-06, "loss": 0.3495, "step": 5632 }, { "epoch": 1.8808013355592654, "grad_norm": 0.8128392341565689, "learning_rate": 3.673721929847132e-06, "loss": 0.3458, "step": 5633 }, { "epoch": 1.8811352253756262, "grad_norm": 0.803186160872085, "learning_rate": 3.6718490056823066e-06, "loss": 0.3579, "step": 5634 }, { "epoch": 1.8814691151919867, "grad_norm": 0.7826989726507455, "learning_rate": 3.6699762820014782e-06, "loss": 0.3417, "step": 5635 }, { "epoch": 1.8818030050083472, "grad_norm": 0.794688977013589, "learning_rate": 3.6681037590873383e-06, "loss": 0.3437, "step": 5636 }, { "epoch": 1.8821368948247077, "grad_norm": 0.8245508578389129, "learning_rate": 3.666231437222543e-06, "loss": 0.3514, "step": 5637 }, { "epoch": 1.8824707846410684, "grad_norm": 0.8487134975619675, "learning_rate": 3.6643593166897163e-06, "loss": 0.3802, "step": 5638 }, { "epoch": 1.8828046744574292, "grad_norm": 0.8291905778451175, "learning_rate": 3.6624873977714547e-06, "loss": 0.3647, "step": 5639 }, { "epoch": 1.8831385642737897, "grad_norm": 0.7953915898817457, "learning_rate": 3.660615680750325e-06, "loss": 0.3596, "step": 5640 }, { "epoch": 1.8834724540901502, "grad_norm": 0.8315690567034646, "learning_rate": 3.6587441659088624e-06, "loss": 0.3534, "step": 5641 }, { "epoch": 1.8838063439065107, "grad_norm": 0.8179905583000353, "learning_rate": 3.6568728535295707e-06, "loss": 0.3616, "step": 5642 }, { "epoch": 1.8841402337228714, "grad_norm": 0.8249684192886022, "learning_rate": 3.6550017438949236e-06, "loss": 0.3503, "step": 5643 }, { "epoch": 1.8844741235392322, "grad_norm": 0.8387745491824357, "learning_rate": 3.6531308372873663e-06, "loss": 0.3582, "step": 5644 }, { "epoch": 1.8848080133555927, "grad_norm": 0.8211207771280785, "learning_rate": 3.651260133989312e-06, "loss": 0.3619, "step": 5645 }, { "epoch": 1.8851419031719532, "grad_norm": 0.8500633268313421, "learning_rate": 3.649389634283139e-06, "loss": 0.3586, "step": 5646 }, { "epoch": 1.8854757929883137, "grad_norm": 0.7859804169132227, "learning_rate": 3.6475193384512008e-06, "loss": 0.3418, "step": 5647 }, { "epoch": 1.8858096828046744, "grad_norm": 0.8388072875162154, "learning_rate": 3.6456492467758187e-06, "loss": 0.36, "step": 5648 }, { "epoch": 1.8861435726210352, "grad_norm": 0.841491746498949, "learning_rate": 3.643779359539282e-06, "loss": 0.3608, "step": 5649 }, { "epoch": 1.8864774624373957, "grad_norm": 0.8209812380802343, "learning_rate": 3.6419096770238517e-06, "loss": 0.355, "step": 5650 }, { "epoch": 1.8868113522537562, "grad_norm": 0.8247528437178614, "learning_rate": 3.6400401995117503e-06, "loss": 0.3502, "step": 5651 }, { "epoch": 1.887145242070117, "grad_norm": 0.8297989911723169, "learning_rate": 3.6381709272851806e-06, "loss": 0.3581, "step": 5652 }, { "epoch": 1.8874791318864774, "grad_norm": 0.8061087583923343, "learning_rate": 3.636301860626306e-06, "loss": 0.3519, "step": 5653 }, { "epoch": 1.8878130217028382, "grad_norm": 0.8468243709012325, "learning_rate": 3.634432999817262e-06, "loss": 0.3661, "step": 5654 }, { "epoch": 1.8881469115191987, "grad_norm": 0.8088016788633935, "learning_rate": 3.6325643451401525e-06, "loss": 0.3672, "step": 5655 }, { "epoch": 1.8884808013355592, "grad_norm": 0.8430339762182774, "learning_rate": 3.6306958968770494e-06, "loss": 0.3637, "step": 5656 }, { "epoch": 1.88881469115192, "grad_norm": 0.8103038796019031, "learning_rate": 3.628827655309998e-06, "loss": 0.3599, "step": 5657 }, { "epoch": 1.8891485809682804, "grad_norm": 0.8069311657340555, "learning_rate": 3.6269596207210044e-06, "loss": 0.3541, "step": 5658 }, { "epoch": 1.8894824707846412, "grad_norm": 0.8444466953279819, "learning_rate": 3.62509179339205e-06, "loss": 0.3819, "step": 5659 }, { "epoch": 1.8898163606010017, "grad_norm": 0.8380149331283914, "learning_rate": 3.623224173605081e-06, "loss": 0.3554, "step": 5660 }, { "epoch": 1.8901502504173622, "grad_norm": 0.8007151560950794, "learning_rate": 3.6213567616420174e-06, "loss": 0.3581, "step": 5661 }, { "epoch": 1.890484140233723, "grad_norm": 0.7870817263551507, "learning_rate": 3.619489557784742e-06, "loss": 0.3503, "step": 5662 }, { "epoch": 1.8908180300500834, "grad_norm": 0.8268427074589316, "learning_rate": 3.617622562315111e-06, "loss": 0.3599, "step": 5663 }, { "epoch": 1.8911519198664442, "grad_norm": 0.8357991536808882, "learning_rate": 3.6157557755149413e-06, "loss": 0.3686, "step": 5664 }, { "epoch": 1.8914858096828047, "grad_norm": 0.8353328710596767, "learning_rate": 3.6138891976660312e-06, "loss": 0.345, "step": 5665 }, { "epoch": 1.8918196994991652, "grad_norm": 0.7878921800058679, "learning_rate": 3.612022829050136e-06, "loss": 0.3418, "step": 5666 }, { "epoch": 1.892153589315526, "grad_norm": 0.8019314977624483, "learning_rate": 3.610156669948983e-06, "loss": 0.35, "step": 5667 }, { "epoch": 1.8924874791318866, "grad_norm": 0.8254922631116027, "learning_rate": 3.6082907206442685e-06, "loss": 0.3619, "step": 5668 }, { "epoch": 1.8928213689482472, "grad_norm": 0.869193936950006, "learning_rate": 3.606424981417659e-06, "loss": 0.3848, "step": 5669 }, { "epoch": 1.8931552587646077, "grad_norm": 0.813762398459982, "learning_rate": 3.6045594525507875e-06, "loss": 0.3514, "step": 5670 }, { "epoch": 1.8934891485809682, "grad_norm": 0.7910472599848374, "learning_rate": 3.6026941343252525e-06, "loss": 0.3554, "step": 5671 }, { "epoch": 1.893823038397329, "grad_norm": 0.759704302564898, "learning_rate": 3.6008290270226232e-06, "loss": 0.3176, "step": 5672 }, { "epoch": 1.8941569282136896, "grad_norm": 0.7913906623676564, "learning_rate": 3.5989641309244394e-06, "loss": 0.3409, "step": 5673 }, { "epoch": 1.8944908180300501, "grad_norm": 0.8072686075620373, "learning_rate": 3.5970994463122055e-06, "loss": 0.3415, "step": 5674 }, { "epoch": 1.8948247078464107, "grad_norm": 0.7994463897644223, "learning_rate": 3.5952349734673945e-06, "loss": 0.351, "step": 5675 }, { "epoch": 1.8951585976627712, "grad_norm": 0.8023114858222959, "learning_rate": 3.593370712671448e-06, "loss": 0.3527, "step": 5676 }, { "epoch": 1.895492487479132, "grad_norm": 0.783037294101263, "learning_rate": 3.591506664205777e-06, "loss": 0.3504, "step": 5677 }, { "epoch": 1.8958263772954926, "grad_norm": 0.7797047857408376, "learning_rate": 3.589642828351759e-06, "loss": 0.345, "step": 5678 }, { "epoch": 1.8961602671118531, "grad_norm": 0.8492172587583554, "learning_rate": 3.587779205390739e-06, "loss": 0.3582, "step": 5679 }, { "epoch": 1.8964941569282137, "grad_norm": 0.8503083003141451, "learning_rate": 3.5859157956040273e-06, "loss": 0.3615, "step": 5680 }, { "epoch": 1.8968280467445742, "grad_norm": 0.798887789363038, "learning_rate": 3.5840525992729096e-06, "loss": 0.3388, "step": 5681 }, { "epoch": 1.897161936560935, "grad_norm": 0.8355174256472319, "learning_rate": 3.582189616678633e-06, "loss": 0.3696, "step": 5682 }, { "epoch": 1.8974958263772956, "grad_norm": 0.8443702764323747, "learning_rate": 3.580326848102415e-06, "loss": 0.3775, "step": 5683 }, { "epoch": 1.8978297161936561, "grad_norm": 0.8346869696673174, "learning_rate": 3.5784642938254384e-06, "loss": 0.3611, "step": 5684 }, { "epoch": 1.8981636060100167, "grad_norm": 0.7755194708321255, "learning_rate": 3.5766019541288543e-06, "loss": 0.3333, "step": 5685 }, { "epoch": 1.8984974958263772, "grad_norm": 0.8090969828840372, "learning_rate": 3.5747398292937856e-06, "loss": 0.3548, "step": 5686 }, { "epoch": 1.898831385642738, "grad_norm": 0.8127668855844612, "learning_rate": 3.572877919601318e-06, "loss": 0.3475, "step": 5687 }, { "epoch": 1.8991652754590986, "grad_norm": 0.8281996615326505, "learning_rate": 3.571016225332506e-06, "loss": 0.3532, "step": 5688 }, { "epoch": 1.8994991652754591, "grad_norm": 0.83946894235761, "learning_rate": 3.5691547467683707e-06, "loss": 0.3729, "step": 5689 }, { "epoch": 1.8998330550918197, "grad_norm": 0.8033688153954808, "learning_rate": 3.5672934841899037e-06, "loss": 0.3448, "step": 5690 }, { "epoch": 1.9001669449081802, "grad_norm": 0.8439979230292454, "learning_rate": 3.5654324378780623e-06, "loss": 0.3566, "step": 5691 }, { "epoch": 1.900500834724541, "grad_norm": 0.8029857875390748, "learning_rate": 3.5635716081137696e-06, "loss": 0.3502, "step": 5692 }, { "epoch": 1.9008347245409016, "grad_norm": 0.8102073068013912, "learning_rate": 3.561710995177916e-06, "loss": 0.3577, "step": 5693 }, { "epoch": 1.9011686143572621, "grad_norm": 0.8098954529938804, "learning_rate": 3.5598505993513633e-06, "loss": 0.3555, "step": 5694 }, { "epoch": 1.9015025041736227, "grad_norm": 0.8367691389826785, "learning_rate": 3.5579904209149362e-06, "loss": 0.3678, "step": 5695 }, { "epoch": 1.9018363939899832, "grad_norm": 0.8270422625837147, "learning_rate": 3.5561304601494294e-06, "loss": 0.3694, "step": 5696 }, { "epoch": 1.902170283806344, "grad_norm": 0.7678649246013824, "learning_rate": 3.5542707173355995e-06, "loss": 0.3401, "step": 5697 }, { "epoch": 1.9025041736227046, "grad_norm": 0.8153049947997487, "learning_rate": 3.5524111927541795e-06, "loss": 0.3552, "step": 5698 }, { "epoch": 1.9028380634390651, "grad_norm": 0.8473751104370951, "learning_rate": 3.5505518866858613e-06, "loss": 0.3719, "step": 5699 }, { "epoch": 1.9031719532554257, "grad_norm": 0.8359627789175823, "learning_rate": 3.5486927994113063e-06, "loss": 0.3752, "step": 5700 }, { "epoch": 1.9035058430717862, "grad_norm": 0.803620821888279, "learning_rate": 3.546833931211142e-06, "loss": 0.3404, "step": 5701 }, { "epoch": 1.903839732888147, "grad_norm": 0.8147573995863419, "learning_rate": 3.5449752823659676e-06, "loss": 0.35, "step": 5702 }, { "epoch": 1.9041736227045076, "grad_norm": 0.8390956263511308, "learning_rate": 3.543116853156344e-06, "loss": 0.355, "step": 5703 }, { "epoch": 1.9045075125208681, "grad_norm": 0.8295885576984944, "learning_rate": 3.5412586438628016e-06, "loss": 0.357, "step": 5704 }, { "epoch": 1.9048414023372287, "grad_norm": 0.8119457425946908, "learning_rate": 3.539400654765831e-06, "loss": 0.3637, "step": 5705 }, { "epoch": 1.9051752921535892, "grad_norm": 0.8259195973840355, "learning_rate": 3.537542886145903e-06, "loss": 0.3614, "step": 5706 }, { "epoch": 1.90550918196995, "grad_norm": 0.7803784509087878, "learning_rate": 3.535685338283443e-06, "loss": 0.3431, "step": 5707 }, { "epoch": 1.9058430717863106, "grad_norm": 0.8297368307151318, "learning_rate": 3.5338280114588475e-06, "loss": 0.3632, "step": 5708 }, { "epoch": 1.9061769616026711, "grad_norm": 0.8361146293888414, "learning_rate": 3.531970905952478e-06, "loss": 0.3569, "step": 5709 }, { "epoch": 1.9065108514190316, "grad_norm": 0.8252045457909511, "learning_rate": 3.530114022044667e-06, "loss": 0.354, "step": 5710 }, { "epoch": 1.9068447412353922, "grad_norm": 0.8038881379292194, "learning_rate": 3.5282573600157117e-06, "loss": 0.353, "step": 5711 }, { "epoch": 1.907178631051753, "grad_norm": 0.8000332516806256, "learning_rate": 3.5264009201458703e-06, "loss": 0.3609, "step": 5712 }, { "epoch": 1.9075125208681136, "grad_norm": 0.8123636916897037, "learning_rate": 3.5245447027153727e-06, "loss": 0.354, "step": 5713 }, { "epoch": 1.9078464106844741, "grad_norm": 0.8155353834318562, "learning_rate": 3.522688708004417e-06, "loss": 0.3612, "step": 5714 }, { "epoch": 1.9081803005008346, "grad_norm": 0.8395667913405549, "learning_rate": 3.520832936293164e-06, "loss": 0.3632, "step": 5715 }, { "epoch": 1.9085141903171954, "grad_norm": 0.8333789380328281, "learning_rate": 3.5189773878617416e-06, "loss": 0.3629, "step": 5716 }, { "epoch": 1.908848080133556, "grad_norm": 0.8553229750737872, "learning_rate": 3.517122062990244e-06, "loss": 0.3696, "step": 5717 }, { "epoch": 1.9091819699499166, "grad_norm": 0.8631413772316757, "learning_rate": 3.515266961958731e-06, "loss": 0.3673, "step": 5718 }, { "epoch": 1.9095158597662771, "grad_norm": 0.809777007459505, "learning_rate": 3.513412085047232e-06, "loss": 0.3675, "step": 5719 }, { "epoch": 1.9098497495826376, "grad_norm": 0.8206786578507183, "learning_rate": 3.5115574325357383e-06, "loss": 0.3526, "step": 5720 }, { "epoch": 1.9101836393989984, "grad_norm": 0.8330406647631238, "learning_rate": 3.5097030047042096e-06, "loss": 0.3567, "step": 5721 }, { "epoch": 1.910517529215359, "grad_norm": 0.8584804224251333, "learning_rate": 3.5078488018325707e-06, "loss": 0.3773, "step": 5722 }, { "epoch": 1.9108514190317196, "grad_norm": 0.7821660940660795, "learning_rate": 3.505994824200714e-06, "loss": 0.3397, "step": 5723 }, { "epoch": 1.9111853088480801, "grad_norm": 0.8352635757908133, "learning_rate": 3.5041410720884984e-06, "loss": 0.3706, "step": 5724 }, { "epoch": 1.9115191986644406, "grad_norm": 0.8540967827028713, "learning_rate": 3.502287545775743e-06, "loss": 0.3617, "step": 5725 }, { "epoch": 1.9118530884808014, "grad_norm": 0.7995419212882754, "learning_rate": 3.5004342455422396e-06, "loss": 0.3589, "step": 5726 }, { "epoch": 1.9121869782971619, "grad_norm": 0.8472637293664296, "learning_rate": 3.498581171667743e-06, "loss": 0.3502, "step": 5727 }, { "epoch": 1.9125208681135226, "grad_norm": 0.7919469397729876, "learning_rate": 3.496728324431975e-06, "loss": 0.3534, "step": 5728 }, { "epoch": 1.9128547579298831, "grad_norm": 0.7911817919747861, "learning_rate": 3.4948757041146223e-06, "loss": 0.3448, "step": 5729 }, { "epoch": 1.9131886477462436, "grad_norm": 0.830582633125892, "learning_rate": 3.493023310995334e-06, "loss": 0.3712, "step": 5730 }, { "epoch": 1.9135225375626044, "grad_norm": 0.8255161144636163, "learning_rate": 3.491171145353733e-06, "loss": 0.3527, "step": 5731 }, { "epoch": 1.9138564273789649, "grad_norm": 0.8690217945728742, "learning_rate": 3.4893192074694016e-06, "loss": 0.3652, "step": 5732 }, { "epoch": 1.9141903171953256, "grad_norm": 0.8621422620370376, "learning_rate": 3.4874674976218876e-06, "loss": 0.3707, "step": 5733 }, { "epoch": 1.9145242070116861, "grad_norm": 0.8335098324378472, "learning_rate": 3.485616016090706e-06, "loss": 0.3719, "step": 5734 }, { "epoch": 1.9148580968280466, "grad_norm": 0.7845852985063082, "learning_rate": 3.4837647631553405e-06, "loss": 0.3398, "step": 5735 }, { "epoch": 1.9151919866444074, "grad_norm": 0.8634443047875761, "learning_rate": 3.481913739095235e-06, "loss": 0.3498, "step": 5736 }, { "epoch": 1.915525876460768, "grad_norm": 0.7817132702902657, "learning_rate": 3.480062944189802e-06, "loss": 0.3437, "step": 5737 }, { "epoch": 1.9158597662771286, "grad_norm": 0.8031487716190363, "learning_rate": 3.478212378718415e-06, "loss": 0.3467, "step": 5738 }, { "epoch": 1.9161936560934891, "grad_norm": 0.8418022591302757, "learning_rate": 3.4763620429604223e-06, "loss": 0.3527, "step": 5739 }, { "epoch": 1.9165275459098496, "grad_norm": 0.8377063581789935, "learning_rate": 3.474511937195127e-06, "loss": 0.3623, "step": 5740 }, { "epoch": 1.9168614357262104, "grad_norm": 0.8005001651503724, "learning_rate": 3.4726620617018038e-06, "loss": 0.3486, "step": 5741 }, { "epoch": 1.917195325542571, "grad_norm": 0.8426453456982776, "learning_rate": 3.4708124167596886e-06, "loss": 0.373, "step": 5742 }, { "epoch": 1.9175292153589316, "grad_norm": 0.8065874233690473, "learning_rate": 3.4689630026479882e-06, "loss": 0.3487, "step": 5743 }, { "epoch": 1.9178631051752921, "grad_norm": 0.8149655478595319, "learning_rate": 3.46711381964587e-06, "loss": 0.3657, "step": 5744 }, { "epoch": 1.9181969949916526, "grad_norm": 0.8797087229998152, "learning_rate": 3.4652648680324665e-06, "loss": 0.364, "step": 5745 }, { "epoch": 1.9185308848080134, "grad_norm": 0.8313172433988838, "learning_rate": 3.4634161480868767e-06, "loss": 0.3663, "step": 5746 }, { "epoch": 1.918864774624374, "grad_norm": 0.8074447680376882, "learning_rate": 3.461567660088164e-06, "loss": 0.3558, "step": 5747 }, { "epoch": 1.9191986644407346, "grad_norm": 0.8185364619082136, "learning_rate": 3.459719404315358e-06, "loss": 0.3552, "step": 5748 }, { "epoch": 1.9195325542570951, "grad_norm": 0.8242234003099538, "learning_rate": 3.4578713810474527e-06, "loss": 0.3613, "step": 5749 }, { "epoch": 1.9198664440734556, "grad_norm": 0.7832089312902711, "learning_rate": 3.4560235905634063e-06, "loss": 0.3455, "step": 5750 }, { "epoch": 1.9202003338898164, "grad_norm": 0.8294757120602683, "learning_rate": 3.4541760331421403e-06, "loss": 0.3518, "step": 5751 }, { "epoch": 1.920534223706177, "grad_norm": 0.7945614307457969, "learning_rate": 3.452328709062547e-06, "loss": 0.3464, "step": 5752 }, { "epoch": 1.9208681135225376, "grad_norm": 0.8118295679139947, "learning_rate": 3.4504816186034757e-06, "loss": 0.3523, "step": 5753 }, { "epoch": 1.9212020033388981, "grad_norm": 0.9125484972432052, "learning_rate": 3.4486347620437454e-06, "loss": 0.3678, "step": 5754 }, { "epoch": 1.9215358931552586, "grad_norm": 0.8161020061556595, "learning_rate": 3.4467881396621384e-06, "loss": 0.3434, "step": 5755 }, { "epoch": 1.9218697829716194, "grad_norm": 0.7939325940122594, "learning_rate": 3.444941751737403e-06, "loss": 0.344, "step": 5756 }, { "epoch": 1.92220367278798, "grad_norm": 0.8344808873563224, "learning_rate": 3.4430955985482506e-06, "loss": 0.3686, "step": 5757 }, { "epoch": 1.9225375626043406, "grad_norm": 0.8302799732158045, "learning_rate": 3.441249680373356e-06, "loss": 0.3734, "step": 5758 }, { "epoch": 1.9228714524207011, "grad_norm": 0.8241233186607744, "learning_rate": 3.4394039974913596e-06, "loss": 0.3454, "step": 5759 }, { "epoch": 1.9232053422370616, "grad_norm": 0.8744797419330094, "learning_rate": 3.4375585501808696e-06, "loss": 0.3749, "step": 5760 }, { "epoch": 1.9235392320534224, "grad_norm": 0.83224159127041, "learning_rate": 3.4357133387204535e-06, "loss": 0.3504, "step": 5761 }, { "epoch": 1.923873121869783, "grad_norm": 0.7960968718082346, "learning_rate": 3.433868363388647e-06, "loss": 0.3464, "step": 5762 }, { "epoch": 1.9242070116861436, "grad_norm": 0.8653734177357538, "learning_rate": 3.432023624463945e-06, "loss": 0.3729, "step": 5763 }, { "epoch": 1.9245409015025041, "grad_norm": 0.8120549615695549, "learning_rate": 3.430179122224815e-06, "loss": 0.354, "step": 5764 }, { "epoch": 1.9248747913188646, "grad_norm": 0.8190051056539243, "learning_rate": 3.428334856949682e-06, "loss": 0.3432, "step": 5765 }, { "epoch": 1.9252086811352254, "grad_norm": 0.8403662777568821, "learning_rate": 3.4264908289169364e-06, "loss": 0.3571, "step": 5766 }, { "epoch": 1.925542570951586, "grad_norm": 0.7904911718995312, "learning_rate": 3.4246470384049334e-06, "loss": 0.3598, "step": 5767 }, { "epoch": 1.9258764607679466, "grad_norm": 0.8173883177984826, "learning_rate": 3.4228034856919958e-06, "loss": 0.3603, "step": 5768 }, { "epoch": 1.9262103505843071, "grad_norm": 0.8277941907223624, "learning_rate": 3.4209601710564043e-06, "loss": 0.3641, "step": 5769 }, { "epoch": 1.9265442404006676, "grad_norm": 0.7975702607283499, "learning_rate": 3.4191170947764094e-06, "loss": 0.3614, "step": 5770 }, { "epoch": 1.9268781302170284, "grad_norm": 0.8037441574457913, "learning_rate": 3.4172742571302177e-06, "loss": 0.3431, "step": 5771 }, { "epoch": 1.927212020033389, "grad_norm": 0.799967489667277, "learning_rate": 3.415431658396012e-06, "loss": 0.3573, "step": 5772 }, { "epoch": 1.9275459098497496, "grad_norm": 0.8540046250945832, "learning_rate": 3.413589298851927e-06, "loss": 0.3595, "step": 5773 }, { "epoch": 1.9278797996661101, "grad_norm": 0.8369094151788874, "learning_rate": 3.4117471787760682e-06, "loss": 0.3514, "step": 5774 }, { "epoch": 1.9282136894824706, "grad_norm": 0.8660412227450985, "learning_rate": 3.409905298446502e-06, "loss": 0.3628, "step": 5775 }, { "epoch": 1.9285475792988314, "grad_norm": 0.8226399066341954, "learning_rate": 3.4080636581412607e-06, "loss": 0.3445, "step": 5776 }, { "epoch": 1.928881469115192, "grad_norm": 0.8516267141254387, "learning_rate": 3.406222258138341e-06, "loss": 0.3544, "step": 5777 }, { "epoch": 1.9292153589315526, "grad_norm": 0.8509436286431739, "learning_rate": 3.4043810987156977e-06, "loss": 0.376, "step": 5778 }, { "epoch": 1.9295492487479131, "grad_norm": 0.8511038043308681, "learning_rate": 3.402540180151256e-06, "loss": 0.3596, "step": 5779 }, { "epoch": 1.9298831385642736, "grad_norm": 0.8275639459841364, "learning_rate": 3.4006995027229003e-06, "loss": 0.3469, "step": 5780 }, { "epoch": 1.9302170283806344, "grad_norm": 0.8396917708407821, "learning_rate": 3.398859066708482e-06, "loss": 0.3596, "step": 5781 }, { "epoch": 1.930550918196995, "grad_norm": 0.8565808905395857, "learning_rate": 3.3970188723858143e-06, "loss": 0.3628, "step": 5782 }, { "epoch": 1.9308848080133556, "grad_norm": 0.8525323312751999, "learning_rate": 3.395178920032674e-06, "loss": 0.3842, "step": 5783 }, { "epoch": 1.9312186978297161, "grad_norm": 0.8178257110129662, "learning_rate": 3.393339209926798e-06, "loss": 0.361, "step": 5784 }, { "epoch": 1.9315525876460768, "grad_norm": 0.7860552550466882, "learning_rate": 3.3914997423458962e-06, "loss": 0.3532, "step": 5785 }, { "epoch": 1.9318864774624374, "grad_norm": 0.8398851612737229, "learning_rate": 3.3896605175676304e-06, "loss": 0.3583, "step": 5786 }, { "epoch": 1.932220367278798, "grad_norm": 0.8019271556198024, "learning_rate": 3.3878215358696333e-06, "loss": 0.3447, "step": 5787 }, { "epoch": 1.9325542570951586, "grad_norm": 0.8880988799106525, "learning_rate": 3.3859827975294977e-06, "loss": 0.3701, "step": 5788 }, { "epoch": 1.9328881469115191, "grad_norm": 0.8443228544578223, "learning_rate": 3.3841443028247817e-06, "loss": 0.3781, "step": 5789 }, { "epoch": 1.9332220367278798, "grad_norm": 0.7992724275148192, "learning_rate": 3.3823060520330065e-06, "loss": 0.3567, "step": 5790 }, { "epoch": 1.9335559265442404, "grad_norm": 0.8149041835437664, "learning_rate": 3.380468045431652e-06, "loss": 0.3578, "step": 5791 }, { "epoch": 1.933889816360601, "grad_norm": 0.8402958974055992, "learning_rate": 3.3786302832981666e-06, "loss": 0.3487, "step": 5792 }, { "epoch": 1.9342237061769616, "grad_norm": 0.8137683355511803, "learning_rate": 3.376792765909961e-06, "loss": 0.3577, "step": 5793 }, { "epoch": 1.934557595993322, "grad_norm": 0.833624651553317, "learning_rate": 3.374955493544407e-06, "loss": 0.378, "step": 5794 }, { "epoch": 1.9348914858096828, "grad_norm": 0.8397381612534269, "learning_rate": 3.37311846647884e-06, "loss": 0.3649, "step": 5795 }, { "epoch": 1.9352253756260434, "grad_norm": 0.8397732973558204, "learning_rate": 3.3712816849905583e-06, "loss": 0.3645, "step": 5796 }, { "epoch": 1.935559265442404, "grad_norm": 0.7923042084996909, "learning_rate": 3.3694451493568254e-06, "loss": 0.3488, "step": 5797 }, { "epoch": 1.9358931552587646, "grad_norm": 0.85900120215943, "learning_rate": 3.3676088598548653e-06, "loss": 0.3774, "step": 5798 }, { "epoch": 1.936227045075125, "grad_norm": 0.8429736915673555, "learning_rate": 3.3657728167618643e-06, "loss": 0.3653, "step": 5799 }, { "epoch": 1.9365609348914858, "grad_norm": 0.858913609809066, "learning_rate": 3.3639370203549713e-06, "loss": 0.3718, "step": 5800 }, { "epoch": 1.9368948247078466, "grad_norm": 0.863932521712262, "learning_rate": 3.362101470911302e-06, "loss": 0.368, "step": 5801 }, { "epoch": 1.937228714524207, "grad_norm": 0.8353448384120618, "learning_rate": 3.360266168707932e-06, "loss": 0.3446, "step": 5802 }, { "epoch": 1.9375626043405676, "grad_norm": 0.8470419977214755, "learning_rate": 3.3584311140218982e-06, "loss": 0.3716, "step": 5803 }, { "epoch": 1.937896494156928, "grad_norm": 0.9057996723357838, "learning_rate": 3.3565963071302e-06, "loss": 0.3618, "step": 5804 }, { "epoch": 1.9382303839732888, "grad_norm": 0.8352240280765082, "learning_rate": 3.354761748309805e-06, "loss": 0.3551, "step": 5805 }, { "epoch": 1.9385642737896496, "grad_norm": 0.8120069041537858, "learning_rate": 3.3529274378376364e-06, "loss": 0.3535, "step": 5806 }, { "epoch": 1.93889816360601, "grad_norm": 0.8258279062937723, "learning_rate": 3.351093375990584e-06, "loss": 0.3645, "step": 5807 }, { "epoch": 1.9392320534223706, "grad_norm": 0.8385291641970227, "learning_rate": 3.3492595630454995e-06, "loss": 0.3636, "step": 5808 }, { "epoch": 1.939565943238731, "grad_norm": 0.7960163200371578, "learning_rate": 3.3474259992791925e-06, "loss": 0.344, "step": 5809 }, { "epoch": 1.9398998330550918, "grad_norm": 0.8314939900186499, "learning_rate": 3.3455926849684445e-06, "loss": 0.3647, "step": 5810 }, { "epoch": 1.9402337228714526, "grad_norm": 0.8503926899503934, "learning_rate": 3.34375962038999e-06, "loss": 0.3708, "step": 5811 }, { "epoch": 1.940567612687813, "grad_norm": 0.8140517963967883, "learning_rate": 3.3419268058205313e-06, "loss": 0.3495, "step": 5812 }, { "epoch": 1.9409015025041736, "grad_norm": 0.8289859970078496, "learning_rate": 3.340094241536729e-06, "loss": 0.3615, "step": 5813 }, { "epoch": 1.941235392320534, "grad_norm": 0.8625384545406791, "learning_rate": 3.3382619278152107e-06, "loss": 0.3732, "step": 5814 }, { "epoch": 1.9415692821368948, "grad_norm": 0.8493809843024972, "learning_rate": 3.3364298649325632e-06, "loss": 0.3653, "step": 5815 }, { "epoch": 1.9419031719532556, "grad_norm": 0.7966307789048713, "learning_rate": 3.334598053165336e-06, "loss": 0.3363, "step": 5816 }, { "epoch": 1.942237061769616, "grad_norm": 0.84137028259461, "learning_rate": 3.3327664927900366e-06, "loss": 0.354, "step": 5817 }, { "epoch": 1.9425709515859766, "grad_norm": 0.9070781289336887, "learning_rate": 3.330935184083145e-06, "loss": 0.3851, "step": 5818 }, { "epoch": 1.942904841402337, "grad_norm": 0.813118537291205, "learning_rate": 3.3291041273210923e-06, "loss": 0.3476, "step": 5819 }, { "epoch": 1.9432387312186978, "grad_norm": 0.811614355461036, "learning_rate": 3.3272733227802773e-06, "loss": 0.3395, "step": 5820 }, { "epoch": 1.9435726210350586, "grad_norm": 0.8212212030427528, "learning_rate": 3.325442770737058e-06, "loss": 0.3583, "step": 5821 }, { "epoch": 1.943906510851419, "grad_norm": 0.8340286862585479, "learning_rate": 3.3236124714677577e-06, "loss": 0.3534, "step": 5822 }, { "epoch": 1.9442404006677796, "grad_norm": 0.8171434456839948, "learning_rate": 3.32178242524866e-06, "loss": 0.3545, "step": 5823 }, { "epoch": 1.94457429048414, "grad_norm": 0.8285296749693957, "learning_rate": 3.319952632356007e-06, "loss": 0.3651, "step": 5824 }, { "epoch": 1.9449081803005008, "grad_norm": 0.8180316720251858, "learning_rate": 3.318123093066007e-06, "loss": 0.3493, "step": 5825 }, { "epoch": 1.9452420701168616, "grad_norm": 0.8009293492984415, "learning_rate": 3.316293807654829e-06, "loss": 0.3434, "step": 5826 }, { "epoch": 1.945575959933222, "grad_norm": 0.8250683241283429, "learning_rate": 3.314464776398602e-06, "loss": 0.3468, "step": 5827 }, { "epoch": 1.9459098497495826, "grad_norm": 0.836706129033858, "learning_rate": 3.3126359995734185e-06, "loss": 0.35, "step": 5828 }, { "epoch": 1.946243739565943, "grad_norm": 0.7954543674231068, "learning_rate": 3.3108074774553307e-06, "loss": 0.3533, "step": 5829 }, { "epoch": 1.9465776293823038, "grad_norm": 0.8350279758238379, "learning_rate": 3.308979210320355e-06, "loss": 0.3586, "step": 5830 }, { "epoch": 1.9469115191986646, "grad_norm": 0.812759289600001, "learning_rate": 3.3071511984444683e-06, "loss": 0.3513, "step": 5831 }, { "epoch": 1.947245409015025, "grad_norm": 0.83703244751204, "learning_rate": 3.305323442103606e-06, "loss": 0.3525, "step": 5832 }, { "epoch": 1.9475792988313856, "grad_norm": 0.8330336254199341, "learning_rate": 3.3034959415736677e-06, "loss": 0.3673, "step": 5833 }, { "epoch": 1.947913188647746, "grad_norm": 0.7924343166249996, "learning_rate": 3.3016686971305154e-06, "loss": 0.3366, "step": 5834 }, { "epoch": 1.9482470784641068, "grad_norm": 0.8384842836468088, "learning_rate": 3.2998417090499717e-06, "loss": 0.3683, "step": 5835 }, { "epoch": 1.9485809682804676, "grad_norm": 0.8922619381049837, "learning_rate": 3.2980149776078194e-06, "loss": 0.3876, "step": 5836 }, { "epoch": 1.948914858096828, "grad_norm": 0.8583833416742328, "learning_rate": 3.2961885030797996e-06, "loss": 0.3653, "step": 5837 }, { "epoch": 1.9492487479131886, "grad_norm": 0.8187655844416489, "learning_rate": 3.294362285741624e-06, "loss": 0.3646, "step": 5838 }, { "epoch": 1.949582637729549, "grad_norm": 0.8052376198540327, "learning_rate": 3.2925363258689556e-06, "loss": 0.3445, "step": 5839 }, { "epoch": 1.9499165275459098, "grad_norm": 0.8532670665863206, "learning_rate": 3.2907106237374237e-06, "loss": 0.3607, "step": 5840 }, { "epoch": 1.9502504173622706, "grad_norm": 0.8230493714391749, "learning_rate": 3.288885179622617e-06, "loss": 0.3477, "step": 5841 }, { "epoch": 1.950584307178631, "grad_norm": 0.8009700081727384, "learning_rate": 3.287059993800085e-06, "loss": 0.3524, "step": 5842 }, { "epoch": 1.9509181969949916, "grad_norm": 0.8209264830438724, "learning_rate": 3.2852350665453412e-06, "loss": 0.3573, "step": 5843 }, { "epoch": 1.951252086811352, "grad_norm": 0.8693511239297361, "learning_rate": 3.2834103981338583e-06, "loss": 0.3694, "step": 5844 }, { "epoch": 1.9515859766277128, "grad_norm": 0.8257553243499042, "learning_rate": 3.281585988841066e-06, "loss": 0.3486, "step": 5845 }, { "epoch": 1.9519198664440736, "grad_norm": 0.7984133650355288, "learning_rate": 3.279761838942359e-06, "loss": 0.3504, "step": 5846 }, { "epoch": 1.952253756260434, "grad_norm": 0.7914718568741695, "learning_rate": 3.2779379487130946e-06, "loss": 0.3584, "step": 5847 }, { "epoch": 1.9525876460767946, "grad_norm": 0.8271565021005021, "learning_rate": 3.276114318428587e-06, "loss": 0.3646, "step": 5848 }, { "epoch": 1.9529215358931553, "grad_norm": 0.7984104142132794, "learning_rate": 3.274290948364115e-06, "loss": 0.3619, "step": 5849 }, { "epoch": 1.9532554257095158, "grad_norm": 0.8207083169862176, "learning_rate": 3.2724678387949095e-06, "loss": 0.3519, "step": 5850 }, { "epoch": 1.9535893155258766, "grad_norm": 0.8304409278288053, "learning_rate": 3.2706449899961757e-06, "loss": 0.3566, "step": 5851 }, { "epoch": 1.953923205342237, "grad_norm": 0.8130049527851261, "learning_rate": 3.268822402243068e-06, "loss": 0.3547, "step": 5852 }, { "epoch": 1.9542570951585976, "grad_norm": 0.8581717693220827, "learning_rate": 3.267000075810706e-06, "loss": 0.3607, "step": 5853 }, { "epoch": 1.9545909849749583, "grad_norm": 0.831328998583368, "learning_rate": 3.2651780109741694e-06, "loss": 0.362, "step": 5854 }, { "epoch": 1.9549248747913188, "grad_norm": 0.8405192274523985, "learning_rate": 3.263356208008499e-06, "loss": 0.3476, "step": 5855 }, { "epoch": 1.9552587646076796, "grad_norm": 0.8170751615839487, "learning_rate": 3.261534667188696e-06, "loss": 0.3654, "step": 5856 }, { "epoch": 1.95559265442404, "grad_norm": 0.8178029831419593, "learning_rate": 3.2597133887897203e-06, "loss": 0.3446, "step": 5857 }, { "epoch": 1.9559265442404006, "grad_norm": 0.8443618687689063, "learning_rate": 3.257892373086492e-06, "loss": 0.3594, "step": 5858 }, { "epoch": 1.9562604340567613, "grad_norm": 0.8089479034202621, "learning_rate": 3.2560716203538955e-06, "loss": 0.3492, "step": 5859 }, { "epoch": 1.9565943238731218, "grad_norm": 0.8079338213616951, "learning_rate": 3.2542511308667725e-06, "loss": 0.3535, "step": 5860 }, { "epoch": 1.9569282136894826, "grad_norm": 0.7946355231385352, "learning_rate": 3.2524309048999237e-06, "loss": 0.3498, "step": 5861 }, { "epoch": 1.957262103505843, "grad_norm": 0.8771215624429581, "learning_rate": 3.250610942728112e-06, "loss": 0.3633, "step": 5862 }, { "epoch": 1.9575959933222036, "grad_norm": 0.818399987766962, "learning_rate": 3.2487912446260617e-06, "loss": 0.3564, "step": 5863 }, { "epoch": 1.9579298831385643, "grad_norm": 0.8273938980873374, "learning_rate": 3.2469718108684557e-06, "loss": 0.3466, "step": 5864 }, { "epoch": 1.9582637729549248, "grad_norm": 0.8321054462891021, "learning_rate": 3.245152641729935e-06, "loss": 0.3474, "step": 5865 }, { "epoch": 1.9585976627712856, "grad_norm": 0.8089408840744917, "learning_rate": 3.2433337374851028e-06, "loss": 0.3655, "step": 5866 }, { "epoch": 1.958931552587646, "grad_norm": 0.7747682723006929, "learning_rate": 3.241515098408524e-06, "loss": 0.3451, "step": 5867 }, { "epoch": 1.9592654424040066, "grad_norm": 0.835859007823097, "learning_rate": 3.2396967247747214e-06, "loss": 0.3599, "step": 5868 }, { "epoch": 1.9595993322203673, "grad_norm": 0.82540070088762, "learning_rate": 3.2378786168581777e-06, "loss": 0.3571, "step": 5869 }, { "epoch": 1.959933222036728, "grad_norm": 0.8178907009050964, "learning_rate": 3.2360607749333338e-06, "loss": 0.3441, "step": 5870 }, { "epoch": 1.9602671118530886, "grad_norm": 0.7939624418439984, "learning_rate": 3.2342431992745937e-06, "loss": 0.3594, "step": 5871 }, { "epoch": 1.960601001669449, "grad_norm": 0.7958096480865757, "learning_rate": 3.232425890156321e-06, "loss": 0.362, "step": 5872 }, { "epoch": 1.9609348914858096, "grad_norm": 0.8383977274341619, "learning_rate": 3.230608847852837e-06, "loss": 0.366, "step": 5873 }, { "epoch": 1.9612687813021703, "grad_norm": 0.8043619757039485, "learning_rate": 3.2287920726384235e-06, "loss": 0.3588, "step": 5874 }, { "epoch": 1.961602671118531, "grad_norm": 0.8380265231228227, "learning_rate": 3.226975564787322e-06, "loss": 0.3482, "step": 5875 }, { "epoch": 1.9619365609348915, "grad_norm": 0.8173864744025983, "learning_rate": 3.2251593245737347e-06, "loss": 0.354, "step": 5876 }, { "epoch": 1.962270450751252, "grad_norm": 0.8451181476684015, "learning_rate": 3.2233433522718237e-06, "loss": 0.3703, "step": 5877 }, { "epoch": 1.9626043405676126, "grad_norm": 0.8293946882116047, "learning_rate": 3.2215276481557066e-06, "loss": 0.3704, "step": 5878 }, { "epoch": 1.9629382303839733, "grad_norm": 0.8397862375333691, "learning_rate": 3.219712212499464e-06, "loss": 0.3592, "step": 5879 }, { "epoch": 1.963272120200334, "grad_norm": 0.8232284237527814, "learning_rate": 3.2178970455771364e-06, "loss": 0.36, "step": 5880 }, { "epoch": 1.9636060100166945, "grad_norm": 0.8169237808801766, "learning_rate": 3.216082147662723e-06, "loss": 0.365, "step": 5881 }, { "epoch": 1.963939899833055, "grad_norm": 0.8159982299645991, "learning_rate": 3.214267519030182e-06, "loss": 0.3563, "step": 5882 }, { "epoch": 1.9642737896494156, "grad_norm": 0.8539550388284792, "learning_rate": 3.212453159953428e-06, "loss": 0.3571, "step": 5883 }, { "epoch": 1.9646076794657763, "grad_norm": 0.8094805250381276, "learning_rate": 3.2106390707063436e-06, "loss": 0.3626, "step": 5884 }, { "epoch": 1.964941569282137, "grad_norm": 0.8163894393989689, "learning_rate": 3.2088252515627605e-06, "loss": 0.3576, "step": 5885 }, { "epoch": 1.9652754590984975, "grad_norm": 0.8010998087944081, "learning_rate": 3.2070117027964755e-06, "loss": 0.3386, "step": 5886 }, { "epoch": 1.965609348914858, "grad_norm": 0.7897408816849426, "learning_rate": 3.205198424681243e-06, "loss": 0.3204, "step": 5887 }, { "epoch": 1.9659432387312186, "grad_norm": 0.8231348394571332, "learning_rate": 3.2033854174907773e-06, "loss": 0.3611, "step": 5888 }, { "epoch": 1.9662771285475793, "grad_norm": 0.8096114457947656, "learning_rate": 3.201572681498752e-06, "loss": 0.3566, "step": 5889 }, { "epoch": 1.96661101836394, "grad_norm": 0.8524949865605961, "learning_rate": 3.199760216978799e-06, "loss": 0.3469, "step": 5890 }, { "epoch": 1.9669449081803005, "grad_norm": 0.8299434212166691, "learning_rate": 3.1979480242045047e-06, "loss": 0.3722, "step": 5891 }, { "epoch": 1.967278797996661, "grad_norm": 0.8128779549012032, "learning_rate": 3.1961361034494263e-06, "loss": 0.3634, "step": 5892 }, { "epoch": 1.9676126878130216, "grad_norm": 0.7831334708006008, "learning_rate": 3.1943244549870687e-06, "loss": 0.3381, "step": 5893 }, { "epoch": 1.9679465776293823, "grad_norm": 0.8359761102015119, "learning_rate": 3.1925130790909e-06, "loss": 0.346, "step": 5894 }, { "epoch": 1.968280467445743, "grad_norm": 0.8265804953447826, "learning_rate": 3.190701976034346e-06, "loss": 0.3621, "step": 5895 }, { "epoch": 1.9686143572621035, "grad_norm": 0.807240375683292, "learning_rate": 3.1888911460907947e-06, "loss": 0.3464, "step": 5896 }, { "epoch": 1.968948247078464, "grad_norm": 0.8492011710080206, "learning_rate": 3.18708058953359e-06, "loss": 0.3398, "step": 5897 }, { "epoch": 1.9692821368948246, "grad_norm": 0.8024888235413643, "learning_rate": 3.185270306636032e-06, "loss": 0.3385, "step": 5898 }, { "epoch": 1.9696160267111853, "grad_norm": 0.8064907854946577, "learning_rate": 3.1834602976713845e-06, "loss": 0.3486, "step": 5899 }, { "epoch": 1.969949916527546, "grad_norm": 0.8131825381258101, "learning_rate": 3.181650562912867e-06, "loss": 0.3514, "step": 5900 }, { "epoch": 1.9702838063439065, "grad_norm": 0.8656050478548691, "learning_rate": 3.179841102633659e-06, "loss": 0.3674, "step": 5901 }, { "epoch": 1.970617696160267, "grad_norm": 0.8545561196261541, "learning_rate": 3.1780319171068985e-06, "loss": 0.3651, "step": 5902 }, { "epoch": 1.9709515859766276, "grad_norm": 0.803914961468934, "learning_rate": 3.176223006605682e-06, "loss": 0.3378, "step": 5903 }, { "epoch": 1.9712854757929883, "grad_norm": 0.8070964235184831, "learning_rate": 3.174414371403061e-06, "loss": 0.3535, "step": 5904 }, { "epoch": 1.971619365609349, "grad_norm": 0.8392396349152491, "learning_rate": 3.1726060117720504e-06, "loss": 0.367, "step": 5905 }, { "epoch": 1.9719532554257095, "grad_norm": 0.8450016572233618, "learning_rate": 3.1707979279856228e-06, "loss": 0.3667, "step": 5906 }, { "epoch": 1.97228714524207, "grad_norm": 0.8457755385702546, "learning_rate": 3.1689901203167063e-06, "loss": 0.3602, "step": 5907 }, { "epoch": 1.9726210350584306, "grad_norm": 0.8703396093091696, "learning_rate": 3.1671825890381874e-06, "loss": 0.3699, "step": 5908 }, { "epoch": 1.9729549248747913, "grad_norm": 0.8903958516800375, "learning_rate": 3.165375334422916e-06, "loss": 0.3894, "step": 5909 }, { "epoch": 1.973288814691152, "grad_norm": 0.786231290525852, "learning_rate": 3.163568356743696e-06, "loss": 0.3383, "step": 5910 }, { "epoch": 1.9736227045075125, "grad_norm": 0.8284494173993735, "learning_rate": 3.1617616562732878e-06, "loss": 0.3484, "step": 5911 }, { "epoch": 1.973956594323873, "grad_norm": 0.8446715715215671, "learning_rate": 3.159955233284413e-06, "loss": 0.3612, "step": 5912 }, { "epoch": 1.9742904841402336, "grad_norm": 0.8560188769336105, "learning_rate": 3.1581490880497522e-06, "loss": 0.3782, "step": 5913 }, { "epoch": 1.9746243739565943, "grad_norm": 0.8066740670115496, "learning_rate": 3.156343220841941e-06, "loss": 0.3573, "step": 5914 }, { "epoch": 1.974958263772955, "grad_norm": 0.8244096633451595, "learning_rate": 3.1545376319335763e-06, "loss": 0.3681, "step": 5915 }, { "epoch": 1.9752921535893155, "grad_norm": 0.7738241507255761, "learning_rate": 3.152732321597207e-06, "loss": 0.3316, "step": 5916 }, { "epoch": 1.975626043405676, "grad_norm": 0.8266086678012698, "learning_rate": 3.15092729010535e-06, "loss": 0.3752, "step": 5917 }, { "epoch": 1.9759599332220368, "grad_norm": 0.8094188375584778, "learning_rate": 3.14912253773047e-06, "loss": 0.3512, "step": 5918 }, { "epoch": 1.9762938230383973, "grad_norm": 0.86359039630872, "learning_rate": 3.1473180647449958e-06, "loss": 0.3582, "step": 5919 }, { "epoch": 1.976627712854758, "grad_norm": 0.810039342865362, "learning_rate": 3.14551387142131e-06, "loss": 0.3519, "step": 5920 }, { "epoch": 1.9769616026711185, "grad_norm": 0.8512045150433452, "learning_rate": 3.143709958031758e-06, "loss": 0.3584, "step": 5921 }, { "epoch": 1.977295492487479, "grad_norm": 0.8173569945019444, "learning_rate": 3.141906324848638e-06, "loss": 0.3426, "step": 5922 }, { "epoch": 1.9776293823038398, "grad_norm": 0.8044537153272259, "learning_rate": 3.14010297214421e-06, "loss": 0.3593, "step": 5923 }, { "epoch": 1.9779632721202003, "grad_norm": 0.8216347657983286, "learning_rate": 3.1382999001906846e-06, "loss": 0.3611, "step": 5924 }, { "epoch": 1.978297161936561, "grad_norm": 0.8226513378968507, "learning_rate": 3.1364971092602407e-06, "loss": 0.3386, "step": 5925 }, { "epoch": 1.9786310517529215, "grad_norm": 0.8052417702165398, "learning_rate": 3.134694599625007e-06, "loss": 0.3388, "step": 5926 }, { "epoch": 1.978964941569282, "grad_norm": 0.8029676085098828, "learning_rate": 3.1328923715570703e-06, "loss": 0.3592, "step": 5927 }, { "epoch": 1.9792988313856428, "grad_norm": 0.842507803397055, "learning_rate": 3.1310904253284767e-06, "loss": 0.3684, "step": 5928 }, { "epoch": 1.9796327212020033, "grad_norm": 0.802664469445994, "learning_rate": 3.1292887612112314e-06, "loss": 0.345, "step": 5929 }, { "epoch": 1.979966611018364, "grad_norm": 0.7810406613355639, "learning_rate": 3.127487379477295e-06, "loss": 0.3486, "step": 5930 }, { "epoch": 1.9803005008347245, "grad_norm": 0.8104771564570512, "learning_rate": 3.1256862803985843e-06, "loss": 0.3383, "step": 5931 }, { "epoch": 1.980634390651085, "grad_norm": 0.8194803897482213, "learning_rate": 3.123885464246974e-06, "loss": 0.3388, "step": 5932 }, { "epoch": 1.9809682804674458, "grad_norm": 0.8595006420842146, "learning_rate": 3.1220849312942967e-06, "loss": 0.3723, "step": 5933 }, { "epoch": 1.9813021702838065, "grad_norm": 0.8283866161559019, "learning_rate": 3.1202846818123446e-06, "loss": 0.3645, "step": 5934 }, { "epoch": 1.981636060100167, "grad_norm": 0.8852443171172917, "learning_rate": 3.118484716072864e-06, "loss": 0.3565, "step": 5935 }, { "epoch": 1.9819699499165275, "grad_norm": 0.832086550705656, "learning_rate": 3.1166850343475587e-06, "loss": 0.3627, "step": 5936 }, { "epoch": 1.982303839732888, "grad_norm": 0.8595882319518344, "learning_rate": 3.1148856369080875e-06, "loss": 0.3515, "step": 5937 }, { "epoch": 1.9826377295492488, "grad_norm": 0.7993881704862432, "learning_rate": 3.1130865240260748e-06, "loss": 0.3518, "step": 5938 }, { "epoch": 1.9829716193656095, "grad_norm": 0.8208405511810558, "learning_rate": 3.111287695973092e-06, "loss": 0.3598, "step": 5939 }, { "epoch": 1.98330550918197, "grad_norm": 0.831500594461491, "learning_rate": 3.109489153020672e-06, "loss": 0.3607, "step": 5940 }, { "epoch": 1.9836393989983305, "grad_norm": 0.823833687155506, "learning_rate": 3.107690895440304e-06, "loss": 0.372, "step": 5941 }, { "epoch": 1.983973288814691, "grad_norm": 0.8292616769052236, "learning_rate": 3.105892923503436e-06, "loss": 0.3542, "step": 5942 }, { "epoch": 1.9843071786310518, "grad_norm": 0.8177364689446491, "learning_rate": 3.1040952374814726e-06, "loss": 0.3646, "step": 5943 }, { "epoch": 1.9846410684474125, "grad_norm": 0.8167792134137317, "learning_rate": 3.1022978376457707e-06, "loss": 0.3587, "step": 5944 }, { "epoch": 1.984974958263773, "grad_norm": 0.8225405747594562, "learning_rate": 3.100500724267648e-06, "loss": 0.3672, "step": 5945 }, { "epoch": 1.9853088480801335, "grad_norm": 0.8013663823576604, "learning_rate": 3.0987038976183787e-06, "loss": 0.3507, "step": 5946 }, { "epoch": 1.985642737896494, "grad_norm": 0.8187981038512357, "learning_rate": 3.0969073579691944e-06, "loss": 0.367, "step": 5947 }, { "epoch": 1.9859766277128548, "grad_norm": 0.8264304682500815, "learning_rate": 3.095111105591281e-06, "loss": 0.3591, "step": 5948 }, { "epoch": 1.9863105175292155, "grad_norm": 0.8142032094673683, "learning_rate": 3.0933151407557804e-06, "loss": 0.3544, "step": 5949 }, { "epoch": 1.986644407345576, "grad_norm": 0.8471523051283592, "learning_rate": 3.0915194637337977e-06, "loss": 0.3718, "step": 5950 }, { "epoch": 1.9869782971619365, "grad_norm": 0.814335652253569, "learning_rate": 3.0897240747963873e-06, "loss": 0.3532, "step": 5951 }, { "epoch": 1.987312186978297, "grad_norm": 0.8083431298480863, "learning_rate": 3.0879289742145615e-06, "loss": 0.3378, "step": 5952 }, { "epoch": 1.9876460767946578, "grad_norm": 0.8221484718929176, "learning_rate": 3.08613416225929e-06, "loss": 0.3539, "step": 5953 }, { "epoch": 1.9879799666110185, "grad_norm": 0.7963050224475402, "learning_rate": 3.0843396392015017e-06, "loss": 0.3512, "step": 5954 }, { "epoch": 1.988313856427379, "grad_norm": 0.8385478331220035, "learning_rate": 3.0825454053120783e-06, "loss": 0.3587, "step": 5955 }, { "epoch": 1.9886477462437395, "grad_norm": 0.8524078825120832, "learning_rate": 3.0807514608618593e-06, "loss": 0.3437, "step": 5956 }, { "epoch": 1.9889816360601, "grad_norm": 0.809856643530708, "learning_rate": 3.0789578061216357e-06, "loss": 0.3546, "step": 5957 }, { "epoch": 1.9893155258764608, "grad_norm": 0.8269018530154636, "learning_rate": 3.077164441362166e-06, "loss": 0.3633, "step": 5958 }, { "epoch": 1.9896494156928215, "grad_norm": 0.8850112890972909, "learning_rate": 3.075371366854154e-06, "loss": 0.3734, "step": 5959 }, { "epoch": 1.989983305509182, "grad_norm": 0.8065329123213854, "learning_rate": 3.0735785828682652e-06, "loss": 0.3512, "step": 5960 }, { "epoch": 1.9903171953255425, "grad_norm": 0.8205075641551626, "learning_rate": 3.0717860896751194e-06, "loss": 0.3629, "step": 5961 }, { "epoch": 1.990651085141903, "grad_norm": 0.8062033460088368, "learning_rate": 3.06999388754529e-06, "loss": 0.3536, "step": 5962 }, { "epoch": 1.9909849749582638, "grad_norm": 0.8282408218838405, "learning_rate": 3.0682019767493155e-06, "loss": 0.3484, "step": 5963 }, { "epoch": 1.9913188647746245, "grad_norm": 0.8218477870577908, "learning_rate": 3.066410357557679e-06, "loss": 0.3478, "step": 5964 }, { "epoch": 1.991652754590985, "grad_norm": 0.7892259452301649, "learning_rate": 3.0646190302408276e-06, "loss": 0.3456, "step": 5965 }, { "epoch": 1.9919866444073455, "grad_norm": 0.8205696283436119, "learning_rate": 3.0628279950691595e-06, "loss": 0.3443, "step": 5966 }, { "epoch": 1.992320534223706, "grad_norm": 0.816816519419226, "learning_rate": 3.0610372523130333e-06, "loss": 0.3567, "step": 5967 }, { "epoch": 1.9926544240400668, "grad_norm": 0.7992181863560586, "learning_rate": 3.05924680224276e-06, "loss": 0.345, "step": 5968 }, { "epoch": 1.9929883138564275, "grad_norm": 0.8453576915088399, "learning_rate": 3.0574566451286094e-06, "loss": 0.3571, "step": 5969 }, { "epoch": 1.993322203672788, "grad_norm": 0.8556543476842773, "learning_rate": 3.055666781240799e-06, "loss": 0.3698, "step": 5970 }, { "epoch": 1.9936560934891485, "grad_norm": 0.8033609770330361, "learning_rate": 3.0538772108495163e-06, "loss": 0.3442, "step": 5971 }, { "epoch": 1.993989983305509, "grad_norm": 0.8250367258415386, "learning_rate": 3.0520879342248915e-06, "loss": 0.3538, "step": 5972 }, { "epoch": 1.9943238731218698, "grad_norm": 0.8477626154055208, "learning_rate": 3.050298951637017e-06, "loss": 0.3696, "step": 5973 }, { "epoch": 1.9946577629382305, "grad_norm": 0.8519719604208028, "learning_rate": 3.048510263355937e-06, "loss": 0.3478, "step": 5974 }, { "epoch": 1.994991652754591, "grad_norm": 0.8592004454188488, "learning_rate": 3.0467218696516563e-06, "loss": 0.3753, "step": 5975 }, { "epoch": 1.9953255425709515, "grad_norm": 0.8627956673366862, "learning_rate": 3.044933770794133e-06, "loss": 0.362, "step": 5976 }, { "epoch": 1.995659432387312, "grad_norm": 0.8046269922292765, "learning_rate": 3.0431459670532772e-06, "loss": 0.3497, "step": 5977 }, { "epoch": 1.9959933222036728, "grad_norm": 0.8308738024893031, "learning_rate": 3.0413584586989577e-06, "loss": 0.343, "step": 5978 }, { "epoch": 1.9963272120200335, "grad_norm": 0.8052908494705175, "learning_rate": 3.0395712460009997e-06, "loss": 0.3388, "step": 5979 }, { "epoch": 1.996661101836394, "grad_norm": 0.8101794554556351, "learning_rate": 3.0377843292291827e-06, "loss": 0.3468, "step": 5980 }, { "epoch": 1.9969949916527545, "grad_norm": 0.8163619154204581, "learning_rate": 3.03599770865324e-06, "loss": 0.3551, "step": 5981 }, { "epoch": 1.9973288814691152, "grad_norm": 0.8287089439060625, "learning_rate": 3.034211384542861e-06, "loss": 0.3588, "step": 5982 }, { "epoch": 1.9976627712854758, "grad_norm": 0.7784833113900437, "learning_rate": 3.032425357167693e-06, "loss": 0.3461, "step": 5983 }, { "epoch": 1.9979966611018365, "grad_norm": 0.8136394763305684, "learning_rate": 3.030639626797336e-06, "loss": 0.359, "step": 5984 }, { "epoch": 1.998330550918197, "grad_norm": 0.8249975176596209, "learning_rate": 3.0288541937013425e-06, "loss": 0.3559, "step": 5985 }, { "epoch": 1.9986644407345575, "grad_norm": 0.8099214596240686, "learning_rate": 3.0270690581492256e-06, "loss": 0.3633, "step": 5986 }, { "epoch": 1.9989983305509182, "grad_norm": 0.8363166999935583, "learning_rate": 3.025284220410451e-06, "loss": 0.3632, "step": 5987 }, { "epoch": 1.9993322203672788, "grad_norm": 0.8006561437834531, "learning_rate": 3.0234996807544387e-06, "loss": 0.3593, "step": 5988 }, { "epoch": 1.9996661101836395, "grad_norm": 0.8485254134937027, "learning_rate": 3.021715439450566e-06, "loss": 0.3758, "step": 5989 }, { "epoch": 2.0, "grad_norm": 0.8363237063464748, "learning_rate": 3.0199314967681593e-06, "loss": 0.3529, "step": 5990 }, { "epoch": 2.0, "eval_loss": 0.3732949197292328, "eval_runtime": 821.9269, "eval_samples_per_second": 24.55, "eval_steps_per_second": 0.768, "step": 5990 }, { "epoch": 2.0003338898163605, "grad_norm": 0.7848532910559022, "learning_rate": 3.0181478529765095e-06, "loss": 0.3098, "step": 5991 }, { "epoch": 2.000667779632721, "grad_norm": 0.8457100524616675, "learning_rate": 3.0163645083448547e-06, "loss": 0.3104, "step": 5992 }, { "epoch": 2.001001669449082, "grad_norm": 0.8145610946236922, "learning_rate": 3.0145814631423897e-06, "loss": 0.3147, "step": 5993 }, { "epoch": 2.0013355592654425, "grad_norm": 0.7787794968116113, "learning_rate": 3.0127987176382657e-06, "loss": 0.3112, "step": 5994 }, { "epoch": 2.001669449081803, "grad_norm": 0.8113422914814536, "learning_rate": 3.0110162721015857e-06, "loss": 0.3105, "step": 5995 }, { "epoch": 2.0020033388981635, "grad_norm": 0.8094753734863851, "learning_rate": 3.0092341268014124e-06, "loss": 0.315, "step": 5996 }, { "epoch": 2.002337228714524, "grad_norm": 0.8262753254223906, "learning_rate": 3.0074522820067592e-06, "loss": 0.3058, "step": 5997 }, { "epoch": 2.002671118530885, "grad_norm": 0.8519506558969417, "learning_rate": 3.0056707379865934e-06, "loss": 0.3244, "step": 5998 }, { "epoch": 2.0030050083472455, "grad_norm": 0.8788827089269111, "learning_rate": 3.0038894950098386e-06, "loss": 0.3119, "step": 5999 }, { "epoch": 2.003338898163606, "grad_norm": 0.8683061824460336, "learning_rate": 3.0021085533453747e-06, "loss": 0.2998, "step": 6000 }, { "epoch": 2.0036727879799665, "grad_norm": 0.9015634248753164, "learning_rate": 3.0003279132620333e-06, "loss": 0.3046, "step": 6001 }, { "epoch": 2.004006677796327, "grad_norm": 0.8731100987373474, "learning_rate": 2.998547575028603e-06, "loss": 0.3164, "step": 6002 }, { "epoch": 2.004340567612688, "grad_norm": 0.8501572848345164, "learning_rate": 2.9967675389138207e-06, "loss": 0.3062, "step": 6003 }, { "epoch": 2.0046744574290485, "grad_norm": 0.8197529613870068, "learning_rate": 2.994987805186389e-06, "loss": 0.2978, "step": 6004 }, { "epoch": 2.005008347245409, "grad_norm": 0.8505455549404203, "learning_rate": 2.993208374114953e-06, "loss": 0.3037, "step": 6005 }, { "epoch": 2.0053422370617695, "grad_norm": 0.8481833051354402, "learning_rate": 2.9914292459681192e-06, "loss": 0.3064, "step": 6006 }, { "epoch": 2.00567612687813, "grad_norm": 0.8846521052656703, "learning_rate": 2.9896504210144457e-06, "loss": 0.3223, "step": 6007 }, { "epoch": 2.006010016694491, "grad_norm": 0.8408085345962582, "learning_rate": 2.987871899522447e-06, "loss": 0.2999, "step": 6008 }, { "epoch": 2.0063439065108515, "grad_norm": 0.8441659163536025, "learning_rate": 2.9860936817605906e-06, "loss": 0.3034, "step": 6009 }, { "epoch": 2.006677796327212, "grad_norm": 0.7847971201863565, "learning_rate": 2.9843157679972954e-06, "loss": 0.2811, "step": 6010 }, { "epoch": 2.0070116861435725, "grad_norm": 0.8063025986654098, "learning_rate": 2.982538158500938e-06, "loss": 0.3038, "step": 6011 }, { "epoch": 2.007345575959933, "grad_norm": 0.8843898673106426, "learning_rate": 2.98076085353985e-06, "loss": 0.3263, "step": 6012 }, { "epoch": 2.007679465776294, "grad_norm": 0.8320855551075828, "learning_rate": 2.9789838533823134e-06, "loss": 0.2912, "step": 6013 }, { "epoch": 2.0080133555926545, "grad_norm": 0.8299743480748608, "learning_rate": 2.977207158296566e-06, "loss": 0.2932, "step": 6014 }, { "epoch": 2.008347245409015, "grad_norm": 0.8985675954346791, "learning_rate": 2.975430768550798e-06, "loss": 0.3249, "step": 6015 }, { "epoch": 2.0086811352253755, "grad_norm": 0.8605405950804292, "learning_rate": 2.973654684413158e-06, "loss": 0.2993, "step": 6016 }, { "epoch": 2.009015025041736, "grad_norm": 0.8741726685063562, "learning_rate": 2.9718789061517445e-06, "loss": 0.3081, "step": 6017 }, { "epoch": 2.009348914858097, "grad_norm": 0.822457825560732, "learning_rate": 2.9701034340346086e-06, "loss": 0.2827, "step": 6018 }, { "epoch": 2.0096828046744575, "grad_norm": 0.8861981124064852, "learning_rate": 2.9683282683297576e-06, "loss": 0.3165, "step": 6019 }, { "epoch": 2.010016694490818, "grad_norm": 0.8358399100513539, "learning_rate": 2.966553409305155e-06, "loss": 0.3002, "step": 6020 }, { "epoch": 2.0103505843071785, "grad_norm": 0.8748948526196673, "learning_rate": 2.9647788572287127e-06, "loss": 0.3051, "step": 6021 }, { "epoch": 2.010684474123539, "grad_norm": 0.8874617988307131, "learning_rate": 2.963004612368301e-06, "loss": 0.309, "step": 6022 }, { "epoch": 2.0110183639399, "grad_norm": 0.8211116625220729, "learning_rate": 2.961230674991739e-06, "loss": 0.292, "step": 6023 }, { "epoch": 2.0113522537562605, "grad_norm": 0.9131687136731658, "learning_rate": 2.9594570453668027e-06, "loss": 0.3223, "step": 6024 }, { "epoch": 2.011686143572621, "grad_norm": 0.8751430800368234, "learning_rate": 2.957683723761222e-06, "loss": 0.3093, "step": 6025 }, { "epoch": 2.0120200333889815, "grad_norm": 0.8711648774395326, "learning_rate": 2.955910710442679e-06, "loss": 0.3098, "step": 6026 }, { "epoch": 2.0123539232053425, "grad_norm": 0.8586067025342091, "learning_rate": 2.9541380056788096e-06, "loss": 0.2919, "step": 6027 }, { "epoch": 2.012687813021703, "grad_norm": 0.8230222993165354, "learning_rate": 2.952365609737201e-06, "loss": 0.2867, "step": 6028 }, { "epoch": 2.0130217028380635, "grad_norm": 0.8652365094079125, "learning_rate": 2.9505935228853995e-06, "loss": 0.306, "step": 6029 }, { "epoch": 2.013355592654424, "grad_norm": 0.8821409821156423, "learning_rate": 2.9488217453908994e-06, "loss": 0.3013, "step": 6030 }, { "epoch": 2.0136894824707845, "grad_norm": 0.8528526121147632, "learning_rate": 2.94705027752115e-06, "loss": 0.2924, "step": 6031 }, { "epoch": 2.0140233722871455, "grad_norm": 0.8330334502238076, "learning_rate": 2.9452791195435514e-06, "loss": 0.2862, "step": 6032 }, { "epoch": 2.014357262103506, "grad_norm": 0.8624572218025218, "learning_rate": 2.9435082717254637e-06, "loss": 0.2963, "step": 6033 }, { "epoch": 2.0146911519198665, "grad_norm": 0.854268363710126, "learning_rate": 2.9417377343341935e-06, "loss": 0.2971, "step": 6034 }, { "epoch": 2.015025041736227, "grad_norm": 0.8901978720939168, "learning_rate": 2.9399675076370037e-06, "loss": 0.3024, "step": 6035 }, { "epoch": 2.0153589315525875, "grad_norm": 0.8543547269483228, "learning_rate": 2.938197591901106e-06, "loss": 0.3038, "step": 6036 }, { "epoch": 2.0156928213689485, "grad_norm": 0.8841834818397643, "learning_rate": 2.936427987393675e-06, "loss": 0.3078, "step": 6037 }, { "epoch": 2.016026711185309, "grad_norm": 0.8445018020764885, "learning_rate": 2.934658694381828e-06, "loss": 0.3024, "step": 6038 }, { "epoch": 2.0163606010016695, "grad_norm": 0.8973176352781066, "learning_rate": 2.9328897131326396e-06, "loss": 0.3076, "step": 6039 }, { "epoch": 2.01669449081803, "grad_norm": 0.8754470450920693, "learning_rate": 2.9311210439131362e-06, "loss": 0.3099, "step": 6040 }, { "epoch": 2.0170283806343905, "grad_norm": 0.8505374634620131, "learning_rate": 2.9293526869903e-06, "loss": 0.296, "step": 6041 }, { "epoch": 2.0173622704507514, "grad_norm": 0.88048439512787, "learning_rate": 2.927584642631063e-06, "loss": 0.3112, "step": 6042 }, { "epoch": 2.017696160267112, "grad_norm": 0.8764294254955454, "learning_rate": 2.9258169111023115e-06, "loss": 0.2997, "step": 6043 }, { "epoch": 2.0180300500834725, "grad_norm": 0.8627698674379704, "learning_rate": 2.924049492670881e-06, "loss": 0.2999, "step": 6044 }, { "epoch": 2.018363939899833, "grad_norm": 0.8821985440691257, "learning_rate": 2.922282387603567e-06, "loss": 0.3096, "step": 6045 }, { "epoch": 2.0186978297161935, "grad_norm": 0.9107886345981654, "learning_rate": 2.9205155961671116e-06, "loss": 0.3148, "step": 6046 }, { "epoch": 2.0190317195325544, "grad_norm": 0.8871660930325789, "learning_rate": 2.9187491186282128e-06, "loss": 0.3023, "step": 6047 }, { "epoch": 2.019365609348915, "grad_norm": 0.8806594351096336, "learning_rate": 2.9169829552535156e-06, "loss": 0.3112, "step": 6048 }, { "epoch": 2.0196994991652755, "grad_norm": 0.863216696748605, "learning_rate": 2.915217106309627e-06, "loss": 0.3041, "step": 6049 }, { "epoch": 2.020033388981636, "grad_norm": 0.9310957196602695, "learning_rate": 2.9134515720630987e-06, "loss": 0.3124, "step": 6050 }, { "epoch": 2.0203672787979965, "grad_norm": 0.8829552826555003, "learning_rate": 2.9116863527804384e-06, "loss": 0.3157, "step": 6051 }, { "epoch": 2.0207011686143574, "grad_norm": 0.8938372433781169, "learning_rate": 2.9099214487281046e-06, "loss": 0.3019, "step": 6052 }, { "epoch": 2.021035058430718, "grad_norm": 0.8767753976089553, "learning_rate": 2.9081568601725103e-06, "loss": 0.308, "step": 6053 }, { "epoch": 2.0213689482470785, "grad_norm": 0.8847258245602964, "learning_rate": 2.9063925873800185e-06, "loss": 0.3147, "step": 6054 }, { "epoch": 2.021702838063439, "grad_norm": 0.8782258965883235, "learning_rate": 2.904628630616946e-06, "loss": 0.3072, "step": 6055 }, { "epoch": 2.0220367278797995, "grad_norm": 0.8627732780532511, "learning_rate": 2.902864990149562e-06, "loss": 0.2909, "step": 6056 }, { "epoch": 2.0223706176961604, "grad_norm": 0.8628916614463139, "learning_rate": 2.9011016662440873e-06, "loss": 0.2913, "step": 6057 }, { "epoch": 2.022704507512521, "grad_norm": 0.8896974369761678, "learning_rate": 2.8993386591666945e-06, "loss": 0.3022, "step": 6058 }, { "epoch": 2.0230383973288815, "grad_norm": 0.8937199492094896, "learning_rate": 2.8975759691835092e-06, "loss": 0.3119, "step": 6059 }, { "epoch": 2.023372287145242, "grad_norm": 0.8560110715676792, "learning_rate": 2.8958135965606092e-06, "loss": 0.3056, "step": 6060 }, { "epoch": 2.0237061769616025, "grad_norm": 0.8990791872673177, "learning_rate": 2.8940515415640247e-06, "loss": 0.3011, "step": 6061 }, { "epoch": 2.0240400667779634, "grad_norm": 0.8862670230209715, "learning_rate": 2.892289804459737e-06, "loss": 0.3095, "step": 6062 }, { "epoch": 2.024373956594324, "grad_norm": 0.8787990503191326, "learning_rate": 2.890528385513679e-06, "loss": 0.2949, "step": 6063 }, { "epoch": 2.0247078464106845, "grad_norm": 0.8633817999047065, "learning_rate": 2.8887672849917368e-06, "loss": 0.3007, "step": 6064 }, { "epoch": 2.025041736227045, "grad_norm": 0.8980758564129271, "learning_rate": 2.8870065031597462e-06, "loss": 0.3139, "step": 6065 }, { "epoch": 2.0253756260434055, "grad_norm": 0.908058270107476, "learning_rate": 2.8852460402835007e-06, "loss": 0.3075, "step": 6066 }, { "epoch": 2.0257095158597664, "grad_norm": 0.8408269598198526, "learning_rate": 2.8834858966287416e-06, "loss": 0.2898, "step": 6067 }, { "epoch": 2.026043405676127, "grad_norm": 0.8977297371316372, "learning_rate": 2.8817260724611586e-06, "loss": 0.3123, "step": 6068 }, { "epoch": 2.0263772954924875, "grad_norm": 0.8818884522458847, "learning_rate": 2.879966568046395e-06, "loss": 0.3109, "step": 6069 }, { "epoch": 2.026711185308848, "grad_norm": 0.9018864910985623, "learning_rate": 2.8782073836500533e-06, "loss": 0.3054, "step": 6070 }, { "epoch": 2.0270450751252085, "grad_norm": 0.8985444051211138, "learning_rate": 2.876448519537679e-06, "loss": 0.3071, "step": 6071 }, { "epoch": 2.0273789649415694, "grad_norm": 0.8856200986288895, "learning_rate": 2.8746899759747743e-06, "loss": 0.301, "step": 6072 }, { "epoch": 2.02771285475793, "grad_norm": 0.839487760844401, "learning_rate": 2.8729317532267845e-06, "loss": 0.2917, "step": 6073 }, { "epoch": 2.0280467445742905, "grad_norm": 0.8516957126430627, "learning_rate": 2.871173851559119e-06, "loss": 0.3037, "step": 6074 }, { "epoch": 2.028380634390651, "grad_norm": 0.9092358322933295, "learning_rate": 2.8694162712371314e-06, "loss": 0.2989, "step": 6075 }, { "epoch": 2.0287145242070115, "grad_norm": 0.8939065059255759, "learning_rate": 2.8676590125261265e-06, "loss": 0.3149, "step": 6076 }, { "epoch": 2.0290484140233724, "grad_norm": 0.8740408718372424, "learning_rate": 2.8659020756913627e-06, "loss": 0.2976, "step": 6077 }, { "epoch": 2.029382303839733, "grad_norm": 0.9233960535508103, "learning_rate": 2.864145460998049e-06, "loss": 0.3029, "step": 6078 }, { "epoch": 2.0297161936560935, "grad_norm": 0.900337967787825, "learning_rate": 2.862389168711346e-06, "loss": 0.2998, "step": 6079 }, { "epoch": 2.030050083472454, "grad_norm": 0.8742191964428508, "learning_rate": 2.8606331990963653e-06, "loss": 0.3008, "step": 6080 }, { "epoch": 2.0303839732888145, "grad_norm": 0.9509203118455295, "learning_rate": 2.85887755241817e-06, "loss": 0.3236, "step": 6081 }, { "epoch": 2.0307178631051754, "grad_norm": 0.8944853354734937, "learning_rate": 2.8571222289417747e-06, "loss": 0.3059, "step": 6082 }, { "epoch": 2.031051752921536, "grad_norm": 0.9259873527446001, "learning_rate": 2.8553672289321453e-06, "loss": 0.3087, "step": 6083 }, { "epoch": 2.0313856427378965, "grad_norm": 0.9198990217548848, "learning_rate": 2.853612552654197e-06, "loss": 0.3089, "step": 6084 }, { "epoch": 2.031719532554257, "grad_norm": 0.9117144453772911, "learning_rate": 2.851858200372799e-06, "loss": 0.3, "step": 6085 }, { "epoch": 2.0320534223706175, "grad_norm": 0.9310025665439872, "learning_rate": 2.8501041723527684e-06, "loss": 0.321, "step": 6086 }, { "epoch": 2.0323873121869784, "grad_norm": 0.895665937956854, "learning_rate": 2.8483504688588804e-06, "loss": 0.3017, "step": 6087 }, { "epoch": 2.032721202003339, "grad_norm": 0.8537900674869353, "learning_rate": 2.8465970901558505e-06, "loss": 0.2912, "step": 6088 }, { "epoch": 2.0330550918196995, "grad_norm": 0.8907654201001168, "learning_rate": 2.844844036508353e-06, "loss": 0.2943, "step": 6089 }, { "epoch": 2.03338898163606, "grad_norm": 0.9126592991779545, "learning_rate": 2.8430913081810073e-06, "loss": 0.3018, "step": 6090 }, { "epoch": 2.0337228714524205, "grad_norm": 0.9030180843962567, "learning_rate": 2.841338905438393e-06, "loss": 0.3024, "step": 6091 }, { "epoch": 2.0340567612687814, "grad_norm": 0.9013044234603512, "learning_rate": 2.839586828545034e-06, "loss": 0.2928, "step": 6092 }, { "epoch": 2.034390651085142, "grad_norm": 0.892741070597258, "learning_rate": 2.8378350777654017e-06, "loss": 0.3041, "step": 6093 }, { "epoch": 2.0347245409015025, "grad_norm": 0.9299282497988917, "learning_rate": 2.836083653363923e-06, "loss": 0.2964, "step": 6094 }, { "epoch": 2.035058430717863, "grad_norm": 0.8684981961885014, "learning_rate": 2.834332555604978e-06, "loss": 0.295, "step": 6095 }, { "epoch": 2.035392320534224, "grad_norm": 0.942154139506695, "learning_rate": 2.8325817847528935e-06, "loss": 0.3211, "step": 6096 }, { "epoch": 2.0357262103505844, "grad_norm": 0.9285758549930144, "learning_rate": 2.8308313410719467e-06, "loss": 0.2982, "step": 6097 }, { "epoch": 2.036060100166945, "grad_norm": 0.9230353965701052, "learning_rate": 2.8290812248263664e-06, "loss": 0.3022, "step": 6098 }, { "epoch": 2.0363939899833055, "grad_norm": 0.9473897961063948, "learning_rate": 2.8273314362803337e-06, "loss": 0.3101, "step": 6099 }, { "epoch": 2.036727879799666, "grad_norm": 0.8742405512064644, "learning_rate": 2.8255819756979776e-06, "loss": 0.2885, "step": 6100 }, { "epoch": 2.037061769616027, "grad_norm": 0.9277319400577305, "learning_rate": 2.823832843343379e-06, "loss": 0.3126, "step": 6101 }, { "epoch": 2.0373956594323874, "grad_norm": 0.9186382334322268, "learning_rate": 2.8220840394805682e-06, "loss": 0.2985, "step": 6102 }, { "epoch": 2.037729549248748, "grad_norm": 0.9791643716067238, "learning_rate": 2.820335564373527e-06, "loss": 0.3153, "step": 6103 }, { "epoch": 2.0380634390651085, "grad_norm": 0.9178435052568527, "learning_rate": 2.8185874182861873e-06, "loss": 0.3026, "step": 6104 }, { "epoch": 2.038397328881469, "grad_norm": 0.9974072216348574, "learning_rate": 2.8168396014824317e-06, "loss": 0.3193, "step": 6105 }, { "epoch": 2.03873121869783, "grad_norm": 0.8893850147610624, "learning_rate": 2.81509211422609e-06, "loss": 0.2857, "step": 6106 }, { "epoch": 2.0390651085141904, "grad_norm": 0.9186748762747039, "learning_rate": 2.813344956780951e-06, "loss": 0.3056, "step": 6107 }, { "epoch": 2.039398998330551, "grad_norm": 0.9175695508831823, "learning_rate": 2.811598129410742e-06, "loss": 0.3106, "step": 6108 }, { "epoch": 2.0397328881469114, "grad_norm": 0.9109601652333937, "learning_rate": 2.809851632379147e-06, "loss": 0.306, "step": 6109 }, { "epoch": 2.040066777963272, "grad_norm": 0.9219848255141732, "learning_rate": 2.8081054659497985e-06, "loss": 0.3107, "step": 6110 }, { "epoch": 2.040400667779633, "grad_norm": 0.8801504617528297, "learning_rate": 2.8063596303862833e-06, "loss": 0.3016, "step": 6111 }, { "epoch": 2.0407345575959934, "grad_norm": 0.8612100427277061, "learning_rate": 2.8046141259521325e-06, "loss": 0.2813, "step": 6112 }, { "epoch": 2.041068447412354, "grad_norm": 0.8912607778192202, "learning_rate": 2.802868952910832e-06, "loss": 0.2941, "step": 6113 }, { "epoch": 2.0414023372287144, "grad_norm": 0.8965072965330166, "learning_rate": 2.801124111525808e-06, "loss": 0.2982, "step": 6114 }, { "epoch": 2.041736227045075, "grad_norm": 0.8571913318436815, "learning_rate": 2.799379602060451e-06, "loss": 0.292, "step": 6115 }, { "epoch": 2.042070116861436, "grad_norm": 0.9019923729358932, "learning_rate": 2.7976354247780924e-06, "loss": 0.3114, "step": 6116 }, { "epoch": 2.0424040066777964, "grad_norm": 0.9540070269354693, "learning_rate": 2.795891579942015e-06, "loss": 0.3206, "step": 6117 }, { "epoch": 2.042737896494157, "grad_norm": 0.8776237131301357, "learning_rate": 2.794148067815452e-06, "loss": 0.2921, "step": 6118 }, { "epoch": 2.0430717863105174, "grad_norm": 0.8862703323328042, "learning_rate": 2.7924048886615824e-06, "loss": 0.2985, "step": 6119 }, { "epoch": 2.043405676126878, "grad_norm": 0.8957994077026007, "learning_rate": 2.7906620427435433e-06, "loss": 0.2867, "step": 6120 }, { "epoch": 2.043739565943239, "grad_norm": 0.8972642483432659, "learning_rate": 2.788919530324415e-06, "loss": 0.2891, "step": 6121 }, { "epoch": 2.0440734557595994, "grad_norm": 0.9174116748408879, "learning_rate": 2.787177351667229e-06, "loss": 0.3041, "step": 6122 }, { "epoch": 2.04440734557596, "grad_norm": 0.8864986001630716, "learning_rate": 2.785435507034967e-06, "loss": 0.3001, "step": 6123 }, { "epoch": 2.0447412353923204, "grad_norm": 0.8972280490112473, "learning_rate": 2.78369399669056e-06, "loss": 0.2911, "step": 6124 }, { "epoch": 2.045075125208681, "grad_norm": 0.9284307914507481, "learning_rate": 2.781952820896888e-06, "loss": 0.3077, "step": 6125 }, { "epoch": 2.045409015025042, "grad_norm": 0.9286871421655104, "learning_rate": 2.780211979916782e-06, "loss": 0.3105, "step": 6126 }, { "epoch": 2.0457429048414024, "grad_norm": 0.8932128337846331, "learning_rate": 2.7784714740130204e-06, "loss": 0.2844, "step": 6127 }, { "epoch": 2.046076794657763, "grad_norm": 0.9106355678260022, "learning_rate": 2.776731303448332e-06, "loss": 0.3006, "step": 6128 }, { "epoch": 2.0464106844741234, "grad_norm": 0.9365424620807442, "learning_rate": 2.7749914684853957e-06, "loss": 0.3092, "step": 6129 }, { "epoch": 2.046744574290484, "grad_norm": 0.9001865400850891, "learning_rate": 2.7732519693868386e-06, "loss": 0.297, "step": 6130 }, { "epoch": 2.047078464106845, "grad_norm": 0.9458342123800203, "learning_rate": 2.7715128064152364e-06, "loss": 0.3087, "step": 6131 }, { "epoch": 2.0474123539232054, "grad_norm": 0.9043511689231029, "learning_rate": 2.769773979833118e-06, "loss": 0.298, "step": 6132 }, { "epoch": 2.047746243739566, "grad_norm": 0.9023110019902255, "learning_rate": 2.7680354899029595e-06, "loss": 0.2965, "step": 6133 }, { "epoch": 2.0480801335559264, "grad_norm": 0.9271926741311198, "learning_rate": 2.7662973368871804e-06, "loss": 0.3074, "step": 6134 }, { "epoch": 2.048414023372287, "grad_norm": 0.9167690301276182, "learning_rate": 2.764559521048156e-06, "loss": 0.2932, "step": 6135 }, { "epoch": 2.048747913188648, "grad_norm": 0.885962782681357, "learning_rate": 2.762822042648213e-06, "loss": 0.2877, "step": 6136 }, { "epoch": 2.0490818030050084, "grad_norm": 0.9251971394141806, "learning_rate": 2.76108490194962e-06, "loss": 0.2988, "step": 6137 }, { "epoch": 2.049415692821369, "grad_norm": 0.9844947267651075, "learning_rate": 2.7593480992146005e-06, "loss": 0.2997, "step": 6138 }, { "epoch": 2.0497495826377294, "grad_norm": 0.9068258934432247, "learning_rate": 2.7576116347053185e-06, "loss": 0.2921, "step": 6139 }, { "epoch": 2.05008347245409, "grad_norm": 0.9252914993321799, "learning_rate": 2.755875508683898e-06, "loss": 0.3126, "step": 6140 }, { "epoch": 2.050417362270451, "grad_norm": 0.9237829999707304, "learning_rate": 2.7541397214124055e-06, "loss": 0.304, "step": 6141 }, { "epoch": 2.0507512520868114, "grad_norm": 0.888283727862224, "learning_rate": 2.752404273152858e-06, "loss": 0.2858, "step": 6142 }, { "epoch": 2.051085141903172, "grad_norm": 0.9502272140062522, "learning_rate": 2.75066916416722e-06, "loss": 0.3131, "step": 6143 }, { "epoch": 2.0514190317195324, "grad_norm": 0.9159488431299028, "learning_rate": 2.7489343947174065e-06, "loss": 0.3103, "step": 6144 }, { "epoch": 2.051752921535893, "grad_norm": 0.8808413696447205, "learning_rate": 2.747199965065279e-06, "loss": 0.2902, "step": 6145 }, { "epoch": 2.052086811352254, "grad_norm": 0.8982851765976576, "learning_rate": 2.7454658754726505e-06, "loss": 0.2988, "step": 6146 }, { "epoch": 2.0524207011686144, "grad_norm": 0.9091395413175332, "learning_rate": 2.7437321262012812e-06, "loss": 0.2969, "step": 6147 }, { "epoch": 2.052754590984975, "grad_norm": 0.9166334375222887, "learning_rate": 2.7419987175128794e-06, "loss": 0.2958, "step": 6148 }, { "epoch": 2.0530884808013354, "grad_norm": 0.9271989195817926, "learning_rate": 2.740265649669103e-06, "loss": 0.3065, "step": 6149 }, { "epoch": 2.053422370617696, "grad_norm": 0.9318554558745652, "learning_rate": 2.7385329229315583e-06, "loss": 0.3076, "step": 6150 }, { "epoch": 2.053756260434057, "grad_norm": 0.9291878363088848, "learning_rate": 2.7368005375617996e-06, "loss": 0.2971, "step": 6151 }, { "epoch": 2.0540901502504174, "grad_norm": 0.9127264752235371, "learning_rate": 2.735068493821328e-06, "loss": 0.3107, "step": 6152 }, { "epoch": 2.054424040066778, "grad_norm": 0.9423815021353811, "learning_rate": 2.7333367919716005e-06, "loss": 0.2994, "step": 6153 }, { "epoch": 2.0547579298831384, "grad_norm": 0.9360050437574613, "learning_rate": 2.7316054322740114e-06, "loss": 0.3055, "step": 6154 }, { "epoch": 2.0550918196994994, "grad_norm": 0.9622081764782652, "learning_rate": 2.729874414989912e-06, "loss": 0.3102, "step": 6155 }, { "epoch": 2.05542570951586, "grad_norm": 0.9514163782353051, "learning_rate": 2.7281437403805957e-06, "loss": 0.2955, "step": 6156 }, { "epoch": 2.0557595993322204, "grad_norm": 0.9099471217862447, "learning_rate": 2.72641340870731e-06, "loss": 0.3017, "step": 6157 }, { "epoch": 2.056093489148581, "grad_norm": 0.8959030011977821, "learning_rate": 2.724683420231249e-06, "loss": 0.2983, "step": 6158 }, { "epoch": 2.0564273789649414, "grad_norm": 0.9431743517364113, "learning_rate": 2.7229537752135537e-06, "loss": 0.3007, "step": 6159 }, { "epoch": 2.0567612687813024, "grad_norm": 0.9224330032449368, "learning_rate": 2.721224473915308e-06, "loss": 0.3065, "step": 6160 }, { "epoch": 2.057095158597663, "grad_norm": 0.9138011428090909, "learning_rate": 2.7194955165975566e-06, "loss": 0.3019, "step": 6161 }, { "epoch": 2.0574290484140234, "grad_norm": 0.9222685657262313, "learning_rate": 2.717766903521281e-06, "loss": 0.3049, "step": 6162 }, { "epoch": 2.057762938230384, "grad_norm": 0.9153781017442935, "learning_rate": 2.7160386349474166e-06, "loss": 0.305, "step": 6163 }, { "epoch": 2.0580968280467444, "grad_norm": 0.9317214450953735, "learning_rate": 2.7143107111368437e-06, "loss": 0.2959, "step": 6164 }, { "epoch": 2.0584307178631054, "grad_norm": 0.9059832891016255, "learning_rate": 2.7125831323503933e-06, "loss": 0.295, "step": 6165 }, { "epoch": 2.058764607679466, "grad_norm": 0.8870840473703229, "learning_rate": 2.7108558988488414e-06, "loss": 0.298, "step": 6166 }, { "epoch": 2.0590984974958264, "grad_norm": 0.8956862421814162, "learning_rate": 2.7091290108929146e-06, "loss": 0.3088, "step": 6167 }, { "epoch": 2.059432387312187, "grad_norm": 0.9169818041464938, "learning_rate": 2.7074024687432856e-06, "loss": 0.3032, "step": 6168 }, { "epoch": 2.0597662771285474, "grad_norm": 0.9598212134901193, "learning_rate": 2.7056762726605757e-06, "loss": 0.3093, "step": 6169 }, { "epoch": 2.0601001669449084, "grad_norm": 0.9370529290078886, "learning_rate": 2.7039504229053527e-06, "loss": 0.3047, "step": 6170 }, { "epoch": 2.060434056761269, "grad_norm": 0.9069663356107912, "learning_rate": 2.7022249197381346e-06, "loss": 0.2884, "step": 6171 }, { "epoch": 2.0607679465776294, "grad_norm": 0.903239391511068, "learning_rate": 2.7004997634193824e-06, "loss": 0.2748, "step": 6172 }, { "epoch": 2.06110183639399, "grad_norm": 0.9436723641649212, "learning_rate": 2.6987749542095126e-06, "loss": 0.3081, "step": 6173 }, { "epoch": 2.0614357262103504, "grad_norm": 0.918040514341989, "learning_rate": 2.6970504923688845e-06, "loss": 0.3008, "step": 6174 }, { "epoch": 2.0617696160267114, "grad_norm": 0.9640889660113496, "learning_rate": 2.695326378157801e-06, "loss": 0.3138, "step": 6175 }, { "epoch": 2.062103505843072, "grad_norm": 0.9332143500799606, "learning_rate": 2.693602611836517e-06, "loss": 0.3088, "step": 6176 }, { "epoch": 2.0624373956594324, "grad_norm": 0.9237607240181694, "learning_rate": 2.691879193665238e-06, "loss": 0.2949, "step": 6177 }, { "epoch": 2.062771285475793, "grad_norm": 0.9351126330040396, "learning_rate": 2.6901561239041114e-06, "loss": 0.301, "step": 6178 }, { "epoch": 2.0631051752921534, "grad_norm": 0.9061033657158604, "learning_rate": 2.6884334028132365e-06, "loss": 0.2921, "step": 6179 }, { "epoch": 2.0634390651085144, "grad_norm": 0.9262101390015232, "learning_rate": 2.6867110306526533e-06, "loss": 0.307, "step": 6180 }, { "epoch": 2.063772954924875, "grad_norm": 0.9334021302906379, "learning_rate": 2.684989007682354e-06, "loss": 0.2968, "step": 6181 }, { "epoch": 2.0641068447412354, "grad_norm": 0.9409944471997339, "learning_rate": 2.68326733416228e-06, "loss": 0.2917, "step": 6182 }, { "epoch": 2.064440734557596, "grad_norm": 0.9109577335463553, "learning_rate": 2.6815460103523165e-06, "loss": 0.2942, "step": 6183 }, { "epoch": 2.0647746243739564, "grad_norm": 0.9390816981396498, "learning_rate": 2.679825036512299e-06, "loss": 0.3068, "step": 6184 }, { "epoch": 2.0651085141903174, "grad_norm": 0.9432849196494686, "learning_rate": 2.6781044129020017e-06, "loss": 0.2997, "step": 6185 }, { "epoch": 2.065442404006678, "grad_norm": 0.9497149349450977, "learning_rate": 2.6763841397811576e-06, "loss": 0.304, "step": 6186 }, { "epoch": 2.0657762938230384, "grad_norm": 0.9037844531853061, "learning_rate": 2.6746642174094406e-06, "loss": 0.2983, "step": 6187 }, { "epoch": 2.066110183639399, "grad_norm": 0.9315513220018175, "learning_rate": 2.6729446460464714e-06, "loss": 0.303, "step": 6188 }, { "epoch": 2.0664440734557594, "grad_norm": 0.9620364366587075, "learning_rate": 2.6712254259518187e-06, "loss": 0.306, "step": 6189 }, { "epoch": 2.0667779632721204, "grad_norm": 0.955408976640722, "learning_rate": 2.6695065573849998e-06, "loss": 0.3098, "step": 6190 }, { "epoch": 2.067111853088481, "grad_norm": 0.9284713533180206, "learning_rate": 2.6677880406054756e-06, "loss": 0.2914, "step": 6191 }, { "epoch": 2.0674457429048414, "grad_norm": 0.9501757018817535, "learning_rate": 2.6660698758726572e-06, "loss": 0.2885, "step": 6192 }, { "epoch": 2.067779632721202, "grad_norm": 0.9171980517209408, "learning_rate": 2.6643520634458986e-06, "loss": 0.2965, "step": 6193 }, { "epoch": 2.0681135225375624, "grad_norm": 0.9359371050676308, "learning_rate": 2.6626346035845073e-06, "loss": 0.3, "step": 6194 }, { "epoch": 2.0684474123539234, "grad_norm": 0.9580556993326885, "learning_rate": 2.66091749654773e-06, "loss": 0.2938, "step": 6195 }, { "epoch": 2.068781302170284, "grad_norm": 0.9208957315875768, "learning_rate": 2.6592007425947643e-06, "loss": 0.2867, "step": 6196 }, { "epoch": 2.0691151919866444, "grad_norm": 0.8941816379467974, "learning_rate": 2.657484341984752e-06, "loss": 0.2791, "step": 6197 }, { "epoch": 2.069449081803005, "grad_norm": 0.9298225529824364, "learning_rate": 2.6557682949767866e-06, "loss": 0.2934, "step": 6198 }, { "epoch": 2.0697829716193654, "grad_norm": 0.9153022295294334, "learning_rate": 2.654052601829905e-06, "loss": 0.2958, "step": 6199 }, { "epoch": 2.0701168614357264, "grad_norm": 0.9622087031402827, "learning_rate": 2.6523372628030875e-06, "loss": 0.3106, "step": 6200 }, { "epoch": 2.070450751252087, "grad_norm": 0.9303988847665822, "learning_rate": 2.650622278155264e-06, "loss": 0.3074, "step": 6201 }, { "epoch": 2.0707846410684474, "grad_norm": 0.9025091704350755, "learning_rate": 2.6489076481453135e-06, "loss": 0.2908, "step": 6202 }, { "epoch": 2.071118530884808, "grad_norm": 0.9533859836506484, "learning_rate": 2.6471933730320575e-06, "loss": 0.3054, "step": 6203 }, { "epoch": 2.0714524207011684, "grad_norm": 0.9658755682579199, "learning_rate": 2.645479453074266e-06, "loss": 0.3148, "step": 6204 }, { "epoch": 2.0717863105175294, "grad_norm": 0.9306245719857099, "learning_rate": 2.643765888530654e-06, "loss": 0.2912, "step": 6205 }, { "epoch": 2.07212020033389, "grad_norm": 0.9142404150525782, "learning_rate": 2.6420526796598845e-06, "loss": 0.2944, "step": 6206 }, { "epoch": 2.0724540901502504, "grad_norm": 0.9223358942453307, "learning_rate": 2.6403398267205648e-06, "loss": 0.2945, "step": 6207 }, { "epoch": 2.072787979966611, "grad_norm": 0.9387782107978546, "learning_rate": 2.6386273299712494e-06, "loss": 0.2964, "step": 6208 }, { "epoch": 2.0731218697829714, "grad_norm": 0.9517180197145116, "learning_rate": 2.63691518967044e-06, "loss": 0.2958, "step": 6209 }, { "epoch": 2.0734557595993324, "grad_norm": 0.9196476742267959, "learning_rate": 2.635203406076584e-06, "loss": 0.2959, "step": 6210 }, { "epoch": 2.073789649415693, "grad_norm": 0.9714987480526193, "learning_rate": 2.633491979448074e-06, "loss": 0.31, "step": 6211 }, { "epoch": 2.0741235392320534, "grad_norm": 0.9223090853216567, "learning_rate": 2.631780910043249e-06, "loss": 0.2908, "step": 6212 }, { "epoch": 2.074457429048414, "grad_norm": 0.9617484687454958, "learning_rate": 2.630070198120395e-06, "loss": 0.312, "step": 6213 }, { "epoch": 2.0747913188647744, "grad_norm": 0.9108835596680767, "learning_rate": 2.6283598439377424e-06, "loss": 0.2904, "step": 6214 }, { "epoch": 2.0751252086811354, "grad_norm": 0.9956900248025534, "learning_rate": 2.6266498477534706e-06, "loss": 0.3111, "step": 6215 }, { "epoch": 2.075459098497496, "grad_norm": 0.8976825504787707, "learning_rate": 2.6249402098257014e-06, "loss": 0.2895, "step": 6216 }, { "epoch": 2.0757929883138564, "grad_norm": 0.9525063014217718, "learning_rate": 2.623230930412505e-06, "loss": 0.3022, "step": 6217 }, { "epoch": 2.076126878130217, "grad_norm": 0.882377925879649, "learning_rate": 2.6215220097718943e-06, "loss": 0.2835, "step": 6218 }, { "epoch": 2.0764607679465774, "grad_norm": 0.9550312903364309, "learning_rate": 2.6198134481618332e-06, "loss": 0.3004, "step": 6219 }, { "epoch": 2.0767946577629384, "grad_norm": 0.904942773982881, "learning_rate": 2.618105245840231e-06, "loss": 0.2859, "step": 6220 }, { "epoch": 2.077128547579299, "grad_norm": 0.923595551827087, "learning_rate": 2.616397403064934e-06, "loss": 0.2983, "step": 6221 }, { "epoch": 2.0774624373956594, "grad_norm": 0.8963063512034115, "learning_rate": 2.6146899200937424e-06, "loss": 0.2944, "step": 6222 }, { "epoch": 2.07779632721202, "grad_norm": 0.9050054360167216, "learning_rate": 2.6129827971844037e-06, "loss": 0.2894, "step": 6223 }, { "epoch": 2.078130217028381, "grad_norm": 0.9415941134712086, "learning_rate": 2.6112760345946053e-06, "loss": 0.2954, "step": 6224 }, { "epoch": 2.0784641068447414, "grad_norm": 0.926354300215061, "learning_rate": 2.609569632581984e-06, "loss": 0.2967, "step": 6225 }, { "epoch": 2.078797996661102, "grad_norm": 0.9448042760234358, "learning_rate": 2.6078635914041154e-06, "loss": 0.3072, "step": 6226 }, { "epoch": 2.0791318864774624, "grad_norm": 0.9210667183468507, "learning_rate": 2.606157911318532e-06, "loss": 0.2862, "step": 6227 }, { "epoch": 2.079465776293823, "grad_norm": 0.8979900750719202, "learning_rate": 2.6044525925827034e-06, "loss": 0.296, "step": 6228 }, { "epoch": 2.079799666110184, "grad_norm": 0.9492302925426777, "learning_rate": 2.602747635454047e-06, "loss": 0.3107, "step": 6229 }, { "epoch": 2.0801335559265444, "grad_norm": 0.950966895784862, "learning_rate": 2.6010430401899256e-06, "loss": 0.2994, "step": 6230 }, { "epoch": 2.080467445742905, "grad_norm": 0.9520838860254242, "learning_rate": 2.599338807047647e-06, "loss": 0.3047, "step": 6231 }, { "epoch": 2.0808013355592654, "grad_norm": 0.9019076015014108, "learning_rate": 2.5976349362844654e-06, "loss": 0.2862, "step": 6232 }, { "epoch": 2.081135225375626, "grad_norm": 0.9283104247820623, "learning_rate": 2.595931428157579e-06, "loss": 0.297, "step": 6233 }, { "epoch": 2.081469115191987, "grad_norm": 0.9623421942789122, "learning_rate": 2.594228282924133e-06, "loss": 0.3119, "step": 6234 }, { "epoch": 2.0818030050083474, "grad_norm": 0.9352748566088824, "learning_rate": 2.592525500841214e-06, "loss": 0.3041, "step": 6235 }, { "epoch": 2.082136894824708, "grad_norm": 0.964449932890926, "learning_rate": 2.5908230821658596e-06, "loss": 0.3067, "step": 6236 }, { "epoch": 2.0824707846410684, "grad_norm": 0.9873116487469759, "learning_rate": 2.589121027155047e-06, "loss": 0.3182, "step": 6237 }, { "epoch": 2.082804674457429, "grad_norm": 0.9413639587937626, "learning_rate": 2.587419336065701e-06, "loss": 0.306, "step": 6238 }, { "epoch": 2.08313856427379, "grad_norm": 0.9633392555199173, "learning_rate": 2.5857180091546937e-06, "loss": 0.3254, "step": 6239 }, { "epoch": 2.0834724540901504, "grad_norm": 0.9206258052351918, "learning_rate": 2.5840170466788405e-06, "loss": 0.2958, "step": 6240 }, { "epoch": 2.083806343906511, "grad_norm": 0.9713414404412568, "learning_rate": 2.5823164488948973e-06, "loss": 0.3045, "step": 6241 }, { "epoch": 2.0841402337228714, "grad_norm": 0.9054690084574053, "learning_rate": 2.58061621605957e-06, "loss": 0.2894, "step": 6242 }, { "epoch": 2.084474123539232, "grad_norm": 0.9477558866156982, "learning_rate": 2.5789163484295075e-06, "loss": 0.2988, "step": 6243 }, { "epoch": 2.084808013355593, "grad_norm": 0.9276293571177424, "learning_rate": 2.577216846261307e-06, "loss": 0.2962, "step": 6244 }, { "epoch": 2.0851419031719534, "grad_norm": 0.9346018865588723, "learning_rate": 2.575517709811509e-06, "loss": 0.3003, "step": 6245 }, { "epoch": 2.085475792988314, "grad_norm": 0.9010495539127035, "learning_rate": 2.5738189393365944e-06, "loss": 0.2838, "step": 6246 }, { "epoch": 2.0858096828046744, "grad_norm": 0.9173226882593293, "learning_rate": 2.572120535092989e-06, "loss": 0.2932, "step": 6247 }, { "epoch": 2.086143572621035, "grad_norm": 0.9535016890055643, "learning_rate": 2.5704224973370735e-06, "loss": 0.3046, "step": 6248 }, { "epoch": 2.086477462437396, "grad_norm": 0.9387161202659372, "learning_rate": 2.568724826325163e-06, "loss": 0.2925, "step": 6249 }, { "epoch": 2.0868113522537564, "grad_norm": 0.9293324486150987, "learning_rate": 2.5670275223135198e-06, "loss": 0.2883, "step": 6250 }, { "epoch": 2.087145242070117, "grad_norm": 0.9659694760039935, "learning_rate": 2.565330585558352e-06, "loss": 0.3047, "step": 6251 }, { "epoch": 2.0874791318864774, "grad_norm": 0.9421383408306739, "learning_rate": 2.5636340163158125e-06, "loss": 0.2935, "step": 6252 }, { "epoch": 2.087813021702838, "grad_norm": 1.001648918587462, "learning_rate": 2.561937814841997e-06, "loss": 0.3149, "step": 6253 }, { "epoch": 2.088146911519199, "grad_norm": 0.9521819714852806, "learning_rate": 2.560241981392947e-06, "loss": 0.3015, "step": 6254 }, { "epoch": 2.0884808013355594, "grad_norm": 0.9276159987208352, "learning_rate": 2.5585465162246483e-06, "loss": 0.3063, "step": 6255 }, { "epoch": 2.08881469115192, "grad_norm": 1.0200678328719903, "learning_rate": 2.5568514195930295e-06, "loss": 0.3001, "step": 6256 }, { "epoch": 2.0891485809682804, "grad_norm": 0.9783275512270582, "learning_rate": 2.555156691753966e-06, "loss": 0.3098, "step": 6257 }, { "epoch": 2.089482470784641, "grad_norm": 0.9609754986643381, "learning_rate": 2.5534623329632767e-06, "loss": 0.3009, "step": 6258 }, { "epoch": 2.089816360601002, "grad_norm": 0.9656962686761528, "learning_rate": 2.5517683434767228e-06, "loss": 0.3145, "step": 6259 }, { "epoch": 2.0901502504173624, "grad_norm": 1.0051979385643692, "learning_rate": 2.5500747235500156e-06, "loss": 0.3176, "step": 6260 }, { "epoch": 2.090484140233723, "grad_norm": 0.9518905342877971, "learning_rate": 2.548381473438803e-06, "loss": 0.3029, "step": 6261 }, { "epoch": 2.0908180300500834, "grad_norm": 0.9507385322393627, "learning_rate": 2.5466885933986808e-06, "loss": 0.2944, "step": 6262 }, { "epoch": 2.091151919866444, "grad_norm": 0.9324951121083083, "learning_rate": 2.5449960836851883e-06, "loss": 0.3042, "step": 6263 }, { "epoch": 2.091485809682805, "grad_norm": 0.9770252542916315, "learning_rate": 2.5433039445538115e-06, "loss": 0.3015, "step": 6264 }, { "epoch": 2.0918196994991654, "grad_norm": 0.9716262987087404, "learning_rate": 2.5416121762599778e-06, "loss": 0.3108, "step": 6265 }, { "epoch": 2.092153589315526, "grad_norm": 0.9321155597624836, "learning_rate": 2.5399207790590595e-06, "loss": 0.2925, "step": 6266 }, { "epoch": 2.0924874791318864, "grad_norm": 0.9604318007495267, "learning_rate": 2.5382297532063693e-06, "loss": 0.3044, "step": 6267 }, { "epoch": 2.092821368948247, "grad_norm": 0.951502965591456, "learning_rate": 2.5365390989571705e-06, "loss": 0.3096, "step": 6268 }, { "epoch": 2.093155258764608, "grad_norm": 0.9518894397612555, "learning_rate": 2.534848816566665e-06, "loss": 0.3019, "step": 6269 }, { "epoch": 2.0934891485809684, "grad_norm": 0.9539769641709666, "learning_rate": 2.533158906290002e-06, "loss": 0.3043, "step": 6270 }, { "epoch": 2.093823038397329, "grad_norm": 0.974464537567827, "learning_rate": 2.5314693683822733e-06, "loss": 0.3095, "step": 6271 }, { "epoch": 2.0941569282136894, "grad_norm": 0.9526654552218853, "learning_rate": 2.5297802030985097e-06, "loss": 0.305, "step": 6272 }, { "epoch": 2.09449081803005, "grad_norm": 0.9891500936243189, "learning_rate": 2.528091410693695e-06, "loss": 0.3096, "step": 6273 }, { "epoch": 2.094824707846411, "grad_norm": 0.947460805267302, "learning_rate": 2.5264029914227508e-06, "loss": 0.3021, "step": 6274 }, { "epoch": 2.0951585976627713, "grad_norm": 0.9195359607594538, "learning_rate": 2.524714945540543e-06, "loss": 0.2964, "step": 6275 }, { "epoch": 2.095492487479132, "grad_norm": 0.9659648515772737, "learning_rate": 2.5230272733018812e-06, "loss": 0.3029, "step": 6276 }, { "epoch": 2.0958263772954924, "grad_norm": 0.9153318858856885, "learning_rate": 2.521339974961519e-06, "loss": 0.2813, "step": 6277 }, { "epoch": 2.096160267111853, "grad_norm": 0.9696805560683637, "learning_rate": 2.5196530507741546e-06, "loss": 0.2998, "step": 6278 }, { "epoch": 2.096494156928214, "grad_norm": 0.9400063060104319, "learning_rate": 2.517966500994429e-06, "loss": 0.2928, "step": 6279 }, { "epoch": 2.0968280467445743, "grad_norm": 0.9255882778672176, "learning_rate": 2.5162803258769244e-06, "loss": 0.2875, "step": 6280 }, { "epoch": 2.097161936560935, "grad_norm": 0.9541666813836694, "learning_rate": 2.5145945256761694e-06, "loss": 0.2995, "step": 6281 }, { "epoch": 2.0974958263772954, "grad_norm": 0.9201092071898836, "learning_rate": 2.5129091006466365e-06, "loss": 0.2924, "step": 6282 }, { "epoch": 2.097829716193656, "grad_norm": 0.986493012180953, "learning_rate": 2.5112240510427378e-06, "loss": 0.3065, "step": 6283 }, { "epoch": 2.098163606010017, "grad_norm": 0.9316283683450329, "learning_rate": 2.5095393771188305e-06, "loss": 0.2874, "step": 6284 }, { "epoch": 2.0984974958263773, "grad_norm": 0.9496628442424245, "learning_rate": 2.507855079129219e-06, "loss": 0.309, "step": 6285 }, { "epoch": 2.098831385642738, "grad_norm": 0.9156094181650508, "learning_rate": 2.506171157328148e-06, "loss": 0.2894, "step": 6286 }, { "epoch": 2.0991652754590984, "grad_norm": 0.9579531389511972, "learning_rate": 2.5044876119698e-06, "loss": 0.3082, "step": 6287 }, { "epoch": 2.099499165275459, "grad_norm": 0.9851757073286532, "learning_rate": 2.502804443308308e-06, "loss": 0.3225, "step": 6288 }, { "epoch": 2.09983305509182, "grad_norm": 0.9484612248716469, "learning_rate": 2.5011216515977477e-06, "loss": 0.2947, "step": 6289 }, { "epoch": 2.1001669449081803, "grad_norm": 0.914263308335105, "learning_rate": 2.499439237092134e-06, "loss": 0.2916, "step": 6290 }, { "epoch": 2.100500834724541, "grad_norm": 0.9514927623786664, "learning_rate": 2.4977572000454296e-06, "loss": 0.3127, "step": 6291 }, { "epoch": 2.1008347245409014, "grad_norm": 0.9419310630448199, "learning_rate": 2.4960755407115317e-06, "loss": 0.2957, "step": 6292 }, { "epoch": 2.1011686143572623, "grad_norm": 0.9723121139601274, "learning_rate": 2.4943942593442916e-06, "loss": 0.2993, "step": 6293 }, { "epoch": 2.101502504173623, "grad_norm": 1.0087967809265348, "learning_rate": 2.492713356197497e-06, "loss": 0.3178, "step": 6294 }, { "epoch": 2.1018363939899833, "grad_norm": 0.9515363104881381, "learning_rate": 2.491032831524879e-06, "loss": 0.3006, "step": 6295 }, { "epoch": 2.102170283806344, "grad_norm": 0.9541337562384781, "learning_rate": 2.4893526855801126e-06, "loss": 0.3008, "step": 6296 }, { "epoch": 2.1025041736227044, "grad_norm": 1.0129024252526106, "learning_rate": 2.4876729186168155e-06, "loss": 0.3089, "step": 6297 }, { "epoch": 2.1028380634390653, "grad_norm": 0.9727205244571002, "learning_rate": 2.4859935308885473e-06, "loss": 0.3069, "step": 6298 }, { "epoch": 2.103171953255426, "grad_norm": 0.967199263915196, "learning_rate": 2.484314522648812e-06, "loss": 0.2997, "step": 6299 }, { "epoch": 2.1035058430717863, "grad_norm": 0.9765239691019303, "learning_rate": 2.482635894151055e-06, "loss": 0.3013, "step": 6300 }, { "epoch": 2.103839732888147, "grad_norm": 0.9846563394971878, "learning_rate": 2.4809576456486646e-06, "loss": 0.3142, "step": 6301 }, { "epoch": 2.1041736227045074, "grad_norm": 0.9652723645283859, "learning_rate": 2.4792797773949722e-06, "loss": 0.2974, "step": 6302 }, { "epoch": 2.1045075125208683, "grad_norm": 0.9710557553048687, "learning_rate": 2.4776022896432516e-06, "loss": 0.312, "step": 6303 }, { "epoch": 2.104841402337229, "grad_norm": 0.9689101081843609, "learning_rate": 2.475925182646719e-06, "loss": 0.2929, "step": 6304 }, { "epoch": 2.1051752921535893, "grad_norm": 1.0322940369946936, "learning_rate": 2.4742484566585312e-06, "loss": 0.3041, "step": 6305 }, { "epoch": 2.10550918196995, "grad_norm": 0.9708404942019667, "learning_rate": 2.472572111931795e-06, "loss": 0.2888, "step": 6306 }, { "epoch": 2.1058430717863104, "grad_norm": 0.913247681755574, "learning_rate": 2.470896148719549e-06, "loss": 0.2868, "step": 6307 }, { "epoch": 2.1061769616026713, "grad_norm": 0.9209732115166338, "learning_rate": 2.4692205672747806e-06, "loss": 0.2878, "step": 6308 }, { "epoch": 2.106510851419032, "grad_norm": 0.9559420758794697, "learning_rate": 2.4675453678504164e-06, "loss": 0.2894, "step": 6309 }, { "epoch": 2.1068447412353923, "grad_norm": 0.9973715271034674, "learning_rate": 2.465870550699332e-06, "loss": 0.3065, "step": 6310 }, { "epoch": 2.107178631051753, "grad_norm": 0.9950693883291182, "learning_rate": 2.4641961160743377e-06, "loss": 0.3085, "step": 6311 }, { "epoch": 2.1075125208681134, "grad_norm": 0.9663632585763505, "learning_rate": 2.4625220642281912e-06, "loss": 0.3055, "step": 6312 }, { "epoch": 2.1078464106844743, "grad_norm": 0.9577308550365112, "learning_rate": 2.4608483954135843e-06, "loss": 0.2993, "step": 6313 }, { "epoch": 2.108180300500835, "grad_norm": 0.9228871794413954, "learning_rate": 2.459175109883163e-06, "loss": 0.2851, "step": 6314 }, { "epoch": 2.1085141903171953, "grad_norm": 0.9676983459600137, "learning_rate": 2.457502207889507e-06, "loss": 0.2981, "step": 6315 }, { "epoch": 2.108848080133556, "grad_norm": 0.9285533982701345, "learning_rate": 2.455829689685139e-06, "loss": 0.2902, "step": 6316 }, { "epoch": 2.1091819699499164, "grad_norm": 0.9702991532208608, "learning_rate": 2.454157555522528e-06, "loss": 0.3112, "step": 6317 }, { "epoch": 2.1095158597662773, "grad_norm": 0.9751789210477685, "learning_rate": 2.4524858056540796e-06, "loss": 0.3109, "step": 6318 }, { "epoch": 2.109849749582638, "grad_norm": 0.9734459223702192, "learning_rate": 2.4508144403321455e-06, "loss": 0.3042, "step": 6319 }, { "epoch": 2.1101836393989983, "grad_norm": 0.9572408302019308, "learning_rate": 2.449143459809017e-06, "loss": 0.3006, "step": 6320 }, { "epoch": 2.110517529215359, "grad_norm": 0.9983210050320837, "learning_rate": 2.447472864336928e-06, "loss": 0.3044, "step": 6321 }, { "epoch": 2.1108514190317194, "grad_norm": 0.9199701442538394, "learning_rate": 2.445802654168055e-06, "loss": 0.2828, "step": 6322 }, { "epoch": 2.1111853088480803, "grad_norm": 0.945217381017864, "learning_rate": 2.4441328295545156e-06, "loss": 0.2971, "step": 6323 }, { "epoch": 2.111519198664441, "grad_norm": 0.9207820699041023, "learning_rate": 2.4424633907483696e-06, "loss": 0.2832, "step": 6324 }, { "epoch": 2.1118530884808013, "grad_norm": 0.9483321188687195, "learning_rate": 2.4407943380016157e-06, "loss": 0.2988, "step": 6325 }, { "epoch": 2.112186978297162, "grad_norm": 0.9546667040523646, "learning_rate": 2.4391256715662027e-06, "loss": 0.2975, "step": 6326 }, { "epoch": 2.1125208681135224, "grad_norm": 0.9479309635495051, "learning_rate": 2.4374573916940098e-06, "loss": 0.297, "step": 6327 }, { "epoch": 2.1128547579298833, "grad_norm": 0.981932950665203, "learning_rate": 2.4357894986368657e-06, "loss": 0.3054, "step": 6328 }, { "epoch": 2.113188647746244, "grad_norm": 0.9656099367655432, "learning_rate": 2.4341219926465354e-06, "loss": 0.2994, "step": 6329 }, { "epoch": 2.1135225375626043, "grad_norm": 0.985347578926952, "learning_rate": 2.4324548739747334e-06, "loss": 0.3118, "step": 6330 }, { "epoch": 2.113856427378965, "grad_norm": 0.9510444357889594, "learning_rate": 2.4307881428731085e-06, "loss": 0.2867, "step": 6331 }, { "epoch": 2.1141903171953254, "grad_norm": 0.963780373938356, "learning_rate": 2.4291217995932544e-06, "loss": 0.2947, "step": 6332 }, { "epoch": 2.1145242070116863, "grad_norm": 0.9256871116424736, "learning_rate": 2.4274558443867023e-06, "loss": 0.2922, "step": 6333 }, { "epoch": 2.114858096828047, "grad_norm": 0.9637512772858859, "learning_rate": 2.4257902775049273e-06, "loss": 0.3081, "step": 6334 }, { "epoch": 2.1151919866444073, "grad_norm": 1.0042234324791892, "learning_rate": 2.42412509919935e-06, "loss": 0.2929, "step": 6335 }, { "epoch": 2.115525876460768, "grad_norm": 0.9731201897251859, "learning_rate": 2.422460309721326e-06, "loss": 0.3045, "step": 6336 }, { "epoch": 2.1158597662771284, "grad_norm": 1.000276130246984, "learning_rate": 2.4207959093221585e-06, "loss": 0.307, "step": 6337 }, { "epoch": 2.1161936560934893, "grad_norm": 1.0001562056310898, "learning_rate": 2.419131898253081e-06, "loss": 0.303, "step": 6338 }, { "epoch": 2.11652754590985, "grad_norm": 0.9751638826705192, "learning_rate": 2.4174682767652814e-06, "loss": 0.3006, "step": 6339 }, { "epoch": 2.1168614357262103, "grad_norm": 0.949888896362011, "learning_rate": 2.415805045109881e-06, "loss": 0.3014, "step": 6340 }, { "epoch": 2.117195325542571, "grad_norm": 0.9522854917150063, "learning_rate": 2.414142203537945e-06, "loss": 0.2926, "step": 6341 }, { "epoch": 2.1175292153589313, "grad_norm": 0.9633358609780692, "learning_rate": 2.4124797523004773e-06, "loss": 0.2901, "step": 6342 }, { "epoch": 2.1178631051752923, "grad_norm": 0.965065962621782, "learning_rate": 2.4108176916484255e-06, "loss": 0.3015, "step": 6343 }, { "epoch": 2.118196994991653, "grad_norm": 0.9355640128059504, "learning_rate": 2.409156021832677e-06, "loss": 0.2874, "step": 6344 }, { "epoch": 2.1185308848080133, "grad_norm": 0.9380427213424088, "learning_rate": 2.40749474310406e-06, "loss": 0.2985, "step": 6345 }, { "epoch": 2.118864774624374, "grad_norm": 0.9501737042911225, "learning_rate": 2.405833855713343e-06, "loss": 0.2928, "step": 6346 }, { "epoch": 2.1191986644407343, "grad_norm": 1.2483539146271074, "learning_rate": 2.4041733599112417e-06, "loss": 0.3129, "step": 6347 }, { "epoch": 2.1195325542570953, "grad_norm": 0.9287061593931768, "learning_rate": 2.402513255948401e-06, "loss": 0.2826, "step": 6348 }, { "epoch": 2.119866444073456, "grad_norm": 0.9938152039654279, "learning_rate": 2.4008535440754154e-06, "loss": 0.305, "step": 6349 }, { "epoch": 2.1202003338898163, "grad_norm": 0.9547611462934974, "learning_rate": 2.399194224542817e-06, "loss": 0.2986, "step": 6350 }, { "epoch": 2.120534223706177, "grad_norm": 0.9320885235955152, "learning_rate": 2.3975352976010825e-06, "loss": 0.2925, "step": 6351 }, { "epoch": 2.120868113522538, "grad_norm": 0.9513344958572402, "learning_rate": 2.3958767635006264e-06, "loss": 0.2877, "step": 6352 }, { "epoch": 2.1212020033388983, "grad_norm": 0.9952006160972763, "learning_rate": 2.3942186224918003e-06, "loss": 0.3239, "step": 6353 }, { "epoch": 2.121535893155259, "grad_norm": 0.9692378662445612, "learning_rate": 2.3925608748248995e-06, "loss": 0.3092, "step": 6354 }, { "epoch": 2.1218697829716193, "grad_norm": 0.9715060895505515, "learning_rate": 2.3909035207501657e-06, "loss": 0.3115, "step": 6355 }, { "epoch": 2.12220367278798, "grad_norm": 0.9457433546790045, "learning_rate": 2.3892465605177733e-06, "loss": 0.297, "step": 6356 }, { "epoch": 2.1225375626043403, "grad_norm": 0.9594970266099548, "learning_rate": 2.3875899943778395e-06, "loss": 0.3019, "step": 6357 }, { "epoch": 2.1228714524207013, "grad_norm": 0.9879584488125953, "learning_rate": 2.3859338225804234e-06, "loss": 0.3014, "step": 6358 }, { "epoch": 2.123205342237062, "grad_norm": 0.9865458380805734, "learning_rate": 2.3842780453755232e-06, "loss": 0.3033, "step": 6359 }, { "epoch": 2.1235392320534223, "grad_norm": 0.9949889834337594, "learning_rate": 2.3826226630130778e-06, "loss": 0.307, "step": 6360 }, { "epoch": 2.123873121869783, "grad_norm": 0.9530982632137394, "learning_rate": 2.380967675742967e-06, "loss": 0.304, "step": 6361 }, { "epoch": 2.124207011686144, "grad_norm": 0.9430075861739836, "learning_rate": 2.3793130838150095e-06, "loss": 0.2979, "step": 6362 }, { "epoch": 2.1245409015025043, "grad_norm": 0.9581934423667985, "learning_rate": 2.3776588874789677e-06, "loss": 0.3073, "step": 6363 }, { "epoch": 2.124874791318865, "grad_norm": 0.9383630449279227, "learning_rate": 2.37600508698454e-06, "loss": 0.288, "step": 6364 }, { "epoch": 2.1252086811352253, "grad_norm": 0.9643657411741914, "learning_rate": 2.374351682581368e-06, "loss": 0.289, "step": 6365 }, { "epoch": 2.125542570951586, "grad_norm": 0.9723993733162329, "learning_rate": 2.3726986745190332e-06, "loss": 0.3036, "step": 6366 }, { "epoch": 2.125876460767947, "grad_norm": 0.9622725937669385, "learning_rate": 2.3710460630470554e-06, "loss": 0.2961, "step": 6367 }, { "epoch": 2.1262103505843073, "grad_norm": 0.9779872568821083, "learning_rate": 2.3693938484148965e-06, "loss": 0.3011, "step": 6368 }, { "epoch": 2.126544240400668, "grad_norm": 0.9545745457460667, "learning_rate": 2.3677420308719577e-06, "loss": 0.2942, "step": 6369 }, { "epoch": 2.1268781302170283, "grad_norm": 0.9727969713136001, "learning_rate": 2.3660906106675803e-06, "loss": 0.3034, "step": 6370 }, { "epoch": 2.127212020033389, "grad_norm": 0.9601714924870981, "learning_rate": 2.364439588051044e-06, "loss": 0.3009, "step": 6371 }, { "epoch": 2.12754590984975, "grad_norm": 0.9716342510335819, "learning_rate": 2.3627889632715756e-06, "loss": 0.2938, "step": 6372 }, { "epoch": 2.1278797996661103, "grad_norm": 0.9732146129584138, "learning_rate": 2.3611387365783304e-06, "loss": 0.2961, "step": 6373 }, { "epoch": 2.128213689482471, "grad_norm": 0.9776416111516394, "learning_rate": 2.3594889082204124e-06, "loss": 0.2992, "step": 6374 }, { "epoch": 2.1285475792988313, "grad_norm": 0.9516578862935087, "learning_rate": 2.3578394784468607e-06, "loss": 0.3063, "step": 6375 }, { "epoch": 2.128881469115192, "grad_norm": 0.9898703718001677, "learning_rate": 2.3561904475066593e-06, "loss": 0.2995, "step": 6376 }, { "epoch": 2.129215358931553, "grad_norm": 0.9557732804374715, "learning_rate": 2.3545418156487275e-06, "loss": 0.2926, "step": 6377 }, { "epoch": 2.1295492487479133, "grad_norm": 0.9160306849342709, "learning_rate": 2.352893583121928e-06, "loss": 0.2835, "step": 6378 }, { "epoch": 2.129883138564274, "grad_norm": 0.9413138447239944, "learning_rate": 2.351245750175055e-06, "loss": 0.2976, "step": 6379 }, { "epoch": 2.1302170283806343, "grad_norm": 0.9561134444315846, "learning_rate": 2.3495983170568538e-06, "loss": 0.3005, "step": 6380 }, { "epoch": 2.130550918196995, "grad_norm": 0.9569877962749908, "learning_rate": 2.347951284016002e-06, "loss": 0.3064, "step": 6381 }, { "epoch": 2.130884808013356, "grad_norm": 0.9863202553262229, "learning_rate": 2.346304651301119e-06, "loss": 0.3044, "step": 6382 }, { "epoch": 2.1312186978297163, "grad_norm": 0.9898306494388555, "learning_rate": 2.344658419160764e-06, "loss": 0.3004, "step": 6383 }, { "epoch": 2.131552587646077, "grad_norm": 1.0260522577464, "learning_rate": 2.3430125878434346e-06, "loss": 0.3071, "step": 6384 }, { "epoch": 2.1318864774624373, "grad_norm": 0.9625564533402923, "learning_rate": 2.341367157597569e-06, "loss": 0.3, "step": 6385 }, { "epoch": 2.132220367278798, "grad_norm": 0.9639924688248648, "learning_rate": 2.339722128671544e-06, "loss": 0.285, "step": 6386 }, { "epoch": 2.132554257095159, "grad_norm": 0.9978423415693195, "learning_rate": 2.3380775013136763e-06, "loss": 0.3062, "step": 6387 }, { "epoch": 2.1328881469115193, "grad_norm": 0.9806980476116095, "learning_rate": 2.336433275772222e-06, "loss": 0.3018, "step": 6388 }, { "epoch": 2.13322203672788, "grad_norm": 0.947592113123143, "learning_rate": 2.334789452295377e-06, "loss": 0.2937, "step": 6389 }, { "epoch": 2.1335559265442403, "grad_norm": 0.9906178123443928, "learning_rate": 2.333146031131275e-06, "loss": 0.3181, "step": 6390 }, { "epoch": 2.133889816360601, "grad_norm": 1.0207313115582533, "learning_rate": 2.331503012527988e-06, "loss": 0.3136, "step": 6391 }, { "epoch": 2.1342237061769618, "grad_norm": 0.9580852915445941, "learning_rate": 2.329860396733534e-06, "loss": 0.2903, "step": 6392 }, { "epoch": 2.1345575959933223, "grad_norm": 0.9599133923537017, "learning_rate": 2.328218183995865e-06, "loss": 0.3045, "step": 6393 }, { "epoch": 2.134891485809683, "grad_norm": 0.9619398741867933, "learning_rate": 2.326576374562868e-06, "loss": 0.3063, "step": 6394 }, { "epoch": 2.1352253756260433, "grad_norm": 0.9436209728722118, "learning_rate": 2.3249349686823767e-06, "loss": 0.2942, "step": 6395 }, { "epoch": 2.135559265442404, "grad_norm": 0.9406127986789138, "learning_rate": 2.323293966602158e-06, "loss": 0.3055, "step": 6396 }, { "epoch": 2.1358931552587648, "grad_norm": 0.9863918181151416, "learning_rate": 2.321653368569925e-06, "loss": 0.3154, "step": 6397 }, { "epoch": 2.1362270450751253, "grad_norm": 0.9359121600602665, "learning_rate": 2.3200131748333254e-06, "loss": 0.3017, "step": 6398 }, { "epoch": 2.136560934891486, "grad_norm": 0.958231419204102, "learning_rate": 2.3183733856399415e-06, "loss": 0.2985, "step": 6399 }, { "epoch": 2.1368948247078463, "grad_norm": 0.9527884910420023, "learning_rate": 2.3167340012373003e-06, "loss": 0.2872, "step": 6400 }, { "epoch": 2.137228714524207, "grad_norm": 0.959140073466506, "learning_rate": 2.315095021872869e-06, "loss": 0.3015, "step": 6401 }, { "epoch": 2.1375626043405678, "grad_norm": 0.9659034983892882, "learning_rate": 2.31345644779405e-06, "loss": 0.299, "step": 6402 }, { "epoch": 2.1378964941569283, "grad_norm": 0.9790760295862692, "learning_rate": 2.311818279248184e-06, "loss": 0.3043, "step": 6403 }, { "epoch": 2.138230383973289, "grad_norm": 0.9475826145408548, "learning_rate": 2.3101805164825542e-06, "loss": 0.2933, "step": 6404 }, { "epoch": 2.1385642737896493, "grad_norm": 0.9663794057558707, "learning_rate": 2.3085431597443797e-06, "loss": 0.3078, "step": 6405 }, { "epoch": 2.13889816360601, "grad_norm": 0.9946851319230906, "learning_rate": 2.306906209280818e-06, "loss": 0.3012, "step": 6406 }, { "epoch": 2.1392320534223708, "grad_norm": 0.9905492376455969, "learning_rate": 2.305269665338967e-06, "loss": 0.3016, "step": 6407 }, { "epoch": 2.1395659432387313, "grad_norm": 0.9915696873658156, "learning_rate": 2.3036335281658634e-06, "loss": 0.3076, "step": 6408 }, { "epoch": 2.139899833055092, "grad_norm": 0.966776875028706, "learning_rate": 2.30199779800848e-06, "loss": 0.2963, "step": 6409 }, { "epoch": 2.1402337228714523, "grad_norm": 0.9472196697359453, "learning_rate": 2.300362475113731e-06, "loss": 0.303, "step": 6410 }, { "epoch": 2.140567612687813, "grad_norm": 0.9609308554960251, "learning_rate": 2.2987275597284668e-06, "loss": 0.2955, "step": 6411 }, { "epoch": 2.1409015025041738, "grad_norm": 0.9697628434128674, "learning_rate": 2.2970930520994766e-06, "loss": 0.3018, "step": 6412 }, { "epoch": 2.1412353923205343, "grad_norm": 0.9787492406346877, "learning_rate": 2.2954589524734944e-06, "loss": 0.2988, "step": 6413 }, { "epoch": 2.141569282136895, "grad_norm": 0.9850929119611562, "learning_rate": 2.2938252610971805e-06, "loss": 0.3008, "step": 6414 }, { "epoch": 2.1419031719532553, "grad_norm": 0.9369001688788258, "learning_rate": 2.292191978217143e-06, "loss": 0.2832, "step": 6415 }, { "epoch": 2.142237061769616, "grad_norm": 0.9640117620469298, "learning_rate": 2.290559104079923e-06, "loss": 0.2943, "step": 6416 }, { "epoch": 2.1425709515859768, "grad_norm": 0.9527103958957395, "learning_rate": 2.2889266389320065e-06, "loss": 0.2973, "step": 6417 }, { "epoch": 2.1429048414023373, "grad_norm": 0.9540775437975441, "learning_rate": 2.2872945830198113e-06, "loss": 0.2988, "step": 6418 }, { "epoch": 2.143238731218698, "grad_norm": 0.9741252152072153, "learning_rate": 2.2856629365896978e-06, "loss": 0.2978, "step": 6419 }, { "epoch": 2.1435726210350583, "grad_norm": 0.9832015136496343, "learning_rate": 2.2840316998879573e-06, "loss": 0.2915, "step": 6420 }, { "epoch": 2.1439065108514193, "grad_norm": 0.957916788196669, "learning_rate": 2.282400873160829e-06, "loss": 0.2854, "step": 6421 }, { "epoch": 2.1442404006677798, "grad_norm": 0.9611469879083196, "learning_rate": 2.2807704566544857e-06, "loss": 0.3077, "step": 6422 }, { "epoch": 2.1445742904841403, "grad_norm": 0.9928972109086586, "learning_rate": 2.2791404506150373e-06, "loss": 0.3142, "step": 6423 }, { "epoch": 2.144908180300501, "grad_norm": 0.965976472788614, "learning_rate": 2.277510855288534e-06, "loss": 0.3019, "step": 6424 }, { "epoch": 2.1452420701168613, "grad_norm": 0.9942835212470457, "learning_rate": 2.275881670920958e-06, "loss": 0.295, "step": 6425 }, { "epoch": 2.145575959933222, "grad_norm": 0.9481332439370097, "learning_rate": 2.274252897758239e-06, "loss": 0.2886, "step": 6426 }, { "epoch": 2.1459098497495828, "grad_norm": 0.9666593214508001, "learning_rate": 2.272624536046239e-06, "loss": 0.305, "step": 6427 }, { "epoch": 2.1462437395659433, "grad_norm": 0.9358953388276595, "learning_rate": 2.2709965860307575e-06, "loss": 0.2824, "step": 6428 }, { "epoch": 2.146577629382304, "grad_norm": 1.0057250971296738, "learning_rate": 2.269369047957534e-06, "loss": 0.3118, "step": 6429 }, { "epoch": 2.1469115191986643, "grad_norm": 0.9687044482873574, "learning_rate": 2.2677419220722453e-06, "loss": 0.2939, "step": 6430 }, { "epoch": 2.1472454090150253, "grad_norm": 1.0329226627657961, "learning_rate": 2.266115208620504e-06, "loss": 0.3204, "step": 6431 }, { "epoch": 2.1475792988313858, "grad_norm": 1.0003475520001588, "learning_rate": 2.2644889078478632e-06, "loss": 0.3067, "step": 6432 }, { "epoch": 2.1479131886477463, "grad_norm": 0.9826977782713572, "learning_rate": 2.2628630199998125e-06, "loss": 0.2997, "step": 6433 }, { "epoch": 2.148247078464107, "grad_norm": 0.9801782075164932, "learning_rate": 2.2612375453217787e-06, "loss": 0.3174, "step": 6434 }, { "epoch": 2.1485809682804673, "grad_norm": 0.9899140000865488, "learning_rate": 2.2596124840591266e-06, "loss": 0.3037, "step": 6435 }, { "epoch": 2.1489148580968283, "grad_norm": 0.9894802927359038, "learning_rate": 2.2579878364571595e-06, "loss": 0.3051, "step": 6436 }, { "epoch": 2.1492487479131888, "grad_norm": 0.9293123541242109, "learning_rate": 2.256363602761115e-06, "loss": 0.2781, "step": 6437 }, { "epoch": 2.1495826377295493, "grad_norm": 0.9828964412343448, "learning_rate": 2.254739783216175e-06, "loss": 0.2948, "step": 6438 }, { "epoch": 2.14991652754591, "grad_norm": 0.9658946849784302, "learning_rate": 2.2531163780674535e-06, "loss": 0.3034, "step": 6439 }, { "epoch": 2.1502504173622703, "grad_norm": 0.9519349247560178, "learning_rate": 2.2514933875600004e-06, "loss": 0.2988, "step": 6440 }, { "epoch": 2.1505843071786312, "grad_norm": 1.0021554052611725, "learning_rate": 2.249870811938805e-06, "loss": 0.3103, "step": 6441 }, { "epoch": 2.1509181969949918, "grad_norm": 0.9415765457942498, "learning_rate": 2.248248651448798e-06, "loss": 0.2919, "step": 6442 }, { "epoch": 2.1512520868113523, "grad_norm": 0.9795864394955281, "learning_rate": 2.2466269063348438e-06, "loss": 0.2948, "step": 6443 }, { "epoch": 2.151585976627713, "grad_norm": 0.9918074645393713, "learning_rate": 2.2450055768417445e-06, "loss": 0.295, "step": 6444 }, { "epoch": 2.1519198664440733, "grad_norm": 0.95007826272883, "learning_rate": 2.243384663214235e-06, "loss": 0.29, "step": 6445 }, { "epoch": 2.1522537562604342, "grad_norm": 0.9665694306708199, "learning_rate": 2.2417641656969957e-06, "loss": 0.3011, "step": 6446 }, { "epoch": 2.1525876460767948, "grad_norm": 0.9969853958085835, "learning_rate": 2.24014408453464e-06, "loss": 0.3065, "step": 6447 }, { "epoch": 2.1529215358931553, "grad_norm": 0.9818690134831286, "learning_rate": 2.2385244199717175e-06, "loss": 0.2984, "step": 6448 }, { "epoch": 2.153255425709516, "grad_norm": 1.0080674194965216, "learning_rate": 2.2369051722527176e-06, "loss": 0.296, "step": 6449 }, { "epoch": 2.1535893155258763, "grad_norm": 0.9746103503370922, "learning_rate": 2.2352863416220634e-06, "loss": 0.297, "step": 6450 }, { "epoch": 2.1539232053422372, "grad_norm": 1.0039621705449213, "learning_rate": 2.2336679283241176e-06, "loss": 0.3014, "step": 6451 }, { "epoch": 2.1542570951585978, "grad_norm": 0.9783951140714325, "learning_rate": 2.2320499326031796e-06, "loss": 0.2961, "step": 6452 }, { "epoch": 2.1545909849749583, "grad_norm": 0.9939024204185829, "learning_rate": 2.2304323547034834e-06, "loss": 0.2977, "step": 6453 }, { "epoch": 2.154924874791319, "grad_norm": 0.9625368965940335, "learning_rate": 2.228815194869207e-06, "loss": 0.2942, "step": 6454 }, { "epoch": 2.1552587646076793, "grad_norm": 1.0152079438039205, "learning_rate": 2.2271984533444555e-06, "loss": 0.2955, "step": 6455 }, { "epoch": 2.1555926544240402, "grad_norm": 0.9968078405044848, "learning_rate": 2.2255821303732765e-06, "loss": 0.2935, "step": 6456 }, { "epoch": 2.1559265442404008, "grad_norm": 1.0093124049535973, "learning_rate": 2.223966226199654e-06, "loss": 0.3065, "step": 6457 }, { "epoch": 2.1562604340567613, "grad_norm": 1.0364193040168577, "learning_rate": 2.2223507410675064e-06, "loss": 0.3149, "step": 6458 }, { "epoch": 2.1565943238731218, "grad_norm": 0.9779230987104492, "learning_rate": 2.2207356752206967e-06, "loss": 0.2937, "step": 6459 }, { "epoch": 2.1569282136894823, "grad_norm": 0.9811161692005987, "learning_rate": 2.2191210289030118e-06, "loss": 0.3016, "step": 6460 }, { "epoch": 2.1572621035058432, "grad_norm": 1.0188251029046005, "learning_rate": 2.2175068023581847e-06, "loss": 0.3026, "step": 6461 }, { "epoch": 2.1575959933222038, "grad_norm": 0.961198437841407, "learning_rate": 2.2158929958298805e-06, "loss": 0.2975, "step": 6462 }, { "epoch": 2.1579298831385643, "grad_norm": 1.0049718794564184, "learning_rate": 2.2142796095617063e-06, "loss": 0.3238, "step": 6463 }, { "epoch": 2.1582637729549248, "grad_norm": 0.9802059935564276, "learning_rate": 2.2126666437972005e-06, "loss": 0.2915, "step": 6464 }, { "epoch": 2.1585976627712853, "grad_norm": 1.0260235925768921, "learning_rate": 2.2110540987798425e-06, "loss": 0.3246, "step": 6465 }, { "epoch": 2.1589315525876462, "grad_norm": 0.9775286688427407, "learning_rate": 2.209441974753039e-06, "loss": 0.2991, "step": 6466 }, { "epoch": 2.1592654424040068, "grad_norm": 1.0019689266446965, "learning_rate": 2.2078302719601454e-06, "loss": 0.3074, "step": 6467 }, { "epoch": 2.1595993322203673, "grad_norm": 1.020552877572038, "learning_rate": 2.2062189906444453e-06, "loss": 0.3126, "step": 6468 }, { "epoch": 2.1599332220367278, "grad_norm": 1.027827712607566, "learning_rate": 2.204608131049162e-06, "loss": 0.3088, "step": 6469 }, { "epoch": 2.1602671118530883, "grad_norm": 0.9443094753409272, "learning_rate": 2.202997693417455e-06, "loss": 0.289, "step": 6470 }, { "epoch": 2.1606010016694492, "grad_norm": 1.0239949609027401, "learning_rate": 2.2013876779924176e-06, "loss": 0.3072, "step": 6471 }, { "epoch": 2.1609348914858098, "grad_norm": 0.974753083455808, "learning_rate": 2.1997780850170827e-06, "loss": 0.2978, "step": 6472 }, { "epoch": 2.1612687813021703, "grad_norm": 0.982925430561306, "learning_rate": 2.1981689147344166e-06, "loss": 0.2952, "step": 6473 }, { "epoch": 2.1616026711185308, "grad_norm": 0.9859591095129034, "learning_rate": 2.196560167387324e-06, "loss": 0.3004, "step": 6474 }, { "epoch": 2.1619365609348913, "grad_norm": 0.9532660028547529, "learning_rate": 2.194951843218644e-06, "loss": 0.2849, "step": 6475 }, { "epoch": 2.1622704507512522, "grad_norm": 0.9921891019556409, "learning_rate": 2.1933439424711534e-06, "loss": 0.2989, "step": 6476 }, { "epoch": 2.1626043405676127, "grad_norm": 1.0051961817947923, "learning_rate": 2.1917364653875643e-06, "loss": 0.2968, "step": 6477 }, { "epoch": 2.1629382303839733, "grad_norm": 0.9376528508599871, "learning_rate": 2.190129412210523e-06, "loss": 0.2853, "step": 6478 }, { "epoch": 2.1632721202003338, "grad_norm": 0.9588913681204648, "learning_rate": 2.1885227831826182e-06, "loss": 0.2941, "step": 6479 }, { "epoch": 2.1636060100166947, "grad_norm": 1.0251395768743714, "learning_rate": 2.186916578546366e-06, "loss": 0.3116, "step": 6480 }, { "epoch": 2.1639398998330552, "grad_norm": 0.9909698067502922, "learning_rate": 2.185310798544224e-06, "loss": 0.2983, "step": 6481 }, { "epoch": 2.1642737896494157, "grad_norm": 0.9940936866881012, "learning_rate": 2.1837054434185816e-06, "loss": 0.3046, "step": 6482 }, { "epoch": 2.1646076794657763, "grad_norm": 0.9854451930918061, "learning_rate": 2.1821005134117714e-06, "loss": 0.2977, "step": 6483 }, { "epoch": 2.1649415692821368, "grad_norm": 0.9813563876421556, "learning_rate": 2.180496008766055e-06, "loss": 0.2971, "step": 6484 }, { "epoch": 2.1652754590984973, "grad_norm": 0.9882576922368429, "learning_rate": 2.178891929723633e-06, "loss": 0.3118, "step": 6485 }, { "epoch": 2.1656093489148582, "grad_norm": 0.9983192404108642, "learning_rate": 2.1772882765266374e-06, "loss": 0.3163, "step": 6486 }, { "epoch": 2.1659432387312187, "grad_norm": 0.9990413965147923, "learning_rate": 2.1756850494171392e-06, "loss": 0.3116, "step": 6487 }, { "epoch": 2.1662771285475793, "grad_norm": 0.9801538724463756, "learning_rate": 2.1740822486371492e-06, "loss": 0.3115, "step": 6488 }, { "epoch": 2.1666110183639398, "grad_norm": 0.985480054949782, "learning_rate": 2.1724798744286076e-06, "loss": 0.2896, "step": 6489 }, { "epoch": 2.1669449081803007, "grad_norm": 1.013272314274242, "learning_rate": 2.170877927033394e-06, "loss": 0.3154, "step": 6490 }, { "epoch": 2.1672787979966612, "grad_norm": 0.9905187170020571, "learning_rate": 2.169276406693316e-06, "loss": 0.3029, "step": 6491 }, { "epoch": 2.1676126878130217, "grad_norm": 0.9921581847422817, "learning_rate": 2.167675313650129e-06, "loss": 0.3163, "step": 6492 }, { "epoch": 2.1679465776293823, "grad_norm": 0.9936367807778101, "learning_rate": 2.1660746481455146e-06, "loss": 0.3042, "step": 6493 }, { "epoch": 2.1682804674457428, "grad_norm": 0.9667124534485079, "learning_rate": 2.164474410421094e-06, "loss": 0.3006, "step": 6494 }, { "epoch": 2.1686143572621033, "grad_norm": 1.0127697494251273, "learning_rate": 2.162874600718422e-06, "loss": 0.2946, "step": 6495 }, { "epoch": 2.1689482470784642, "grad_norm": 1.0154905237531504, "learning_rate": 2.161275219278989e-06, "loss": 0.2991, "step": 6496 }, { "epoch": 2.1692821368948247, "grad_norm": 1.0289204068133646, "learning_rate": 2.159676266344222e-06, "loss": 0.3177, "step": 6497 }, { "epoch": 2.1696160267111853, "grad_norm": 0.9716174662094176, "learning_rate": 2.1580777421554814e-06, "loss": 0.3022, "step": 6498 }, { "epoch": 2.1699499165275458, "grad_norm": 1.0104514755089797, "learning_rate": 2.1564796469540633e-06, "loss": 0.3146, "step": 6499 }, { "epoch": 2.1702838063439067, "grad_norm": 1.0011834621904634, "learning_rate": 2.154881980981205e-06, "loss": 0.2955, "step": 6500 }, { "epoch": 2.1706176961602672, "grad_norm": 0.9939181858805098, "learning_rate": 2.153284744478067e-06, "loss": 0.2954, "step": 6501 }, { "epoch": 2.1709515859766277, "grad_norm": 1.0538353788988317, "learning_rate": 2.1516879376857548e-06, "loss": 0.3287, "step": 6502 }, { "epoch": 2.1712854757929883, "grad_norm": 0.9921728270772827, "learning_rate": 2.150091560845304e-06, "loss": 0.2918, "step": 6503 }, { "epoch": 2.1716193656093488, "grad_norm": 0.9843970705579259, "learning_rate": 2.1484956141976897e-06, "loss": 0.2897, "step": 6504 }, { "epoch": 2.1719532554257097, "grad_norm": 0.9982402757745586, "learning_rate": 2.146900097983821e-06, "loss": 0.3007, "step": 6505 }, { "epoch": 2.1722871452420702, "grad_norm": 0.9684981403177357, "learning_rate": 2.1453050124445364e-06, "loss": 0.3005, "step": 6506 }, { "epoch": 2.1726210350584307, "grad_norm": 1.0233563061939346, "learning_rate": 2.1437103578206137e-06, "loss": 0.2968, "step": 6507 }, { "epoch": 2.1729549248747912, "grad_norm": 0.9814967226064166, "learning_rate": 2.1421161343527685e-06, "loss": 0.3033, "step": 6508 }, { "epoch": 2.1732888146911518, "grad_norm": 0.9578929202818213, "learning_rate": 2.1405223422816474e-06, "loss": 0.3047, "step": 6509 }, { "epoch": 2.1736227045075127, "grad_norm": 0.9642174350368188, "learning_rate": 2.138928981847832e-06, "loss": 0.2987, "step": 6510 }, { "epoch": 2.1739565943238732, "grad_norm": 1.0195038829357541, "learning_rate": 2.1373360532918406e-06, "loss": 0.3069, "step": 6511 }, { "epoch": 2.1742904841402337, "grad_norm": 0.9779315864167817, "learning_rate": 2.1357435568541245e-06, "loss": 0.3012, "step": 6512 }, { "epoch": 2.1746243739565942, "grad_norm": 1.0069252246551916, "learning_rate": 2.134151492775071e-06, "loss": 0.291, "step": 6513 }, { "epoch": 2.1749582637729548, "grad_norm": 1.004348873752671, "learning_rate": 2.132559861295002e-06, "loss": 0.3149, "step": 6514 }, { "epoch": 2.1752921535893157, "grad_norm": 0.990831811160622, "learning_rate": 2.1309686626541733e-06, "loss": 0.2983, "step": 6515 }, { "epoch": 2.1756260434056762, "grad_norm": 0.9504600398029875, "learning_rate": 2.129377897092775e-06, "loss": 0.288, "step": 6516 }, { "epoch": 2.1759599332220367, "grad_norm": 0.9601280602407725, "learning_rate": 2.1277875648509344e-06, "loss": 0.2812, "step": 6517 }, { "epoch": 2.1762938230383972, "grad_norm": 1.0103370450380946, "learning_rate": 2.1261976661687106e-06, "loss": 0.3001, "step": 6518 }, { "epoch": 2.1766277128547578, "grad_norm": 0.9809619673889175, "learning_rate": 2.1246082012860993e-06, "loss": 0.2968, "step": 6519 }, { "epoch": 2.1769616026711187, "grad_norm": 1.0552165435793492, "learning_rate": 2.123019170443028e-06, "loss": 0.3133, "step": 6520 }, { "epoch": 2.1772954924874792, "grad_norm": 1.0054144733065469, "learning_rate": 2.121430573879362e-06, "loss": 0.3021, "step": 6521 }, { "epoch": 2.1776293823038397, "grad_norm": 1.0094201950346715, "learning_rate": 2.1198424118348994e-06, "loss": 0.2948, "step": 6522 }, { "epoch": 2.1779632721202002, "grad_norm": 0.9604354868053689, "learning_rate": 2.1182546845493716e-06, "loss": 0.2895, "step": 6523 }, { "epoch": 2.1782971619365608, "grad_norm": 0.9695977812138888, "learning_rate": 2.1166673922624454e-06, "loss": 0.2949, "step": 6524 }, { "epoch": 2.1786310517529217, "grad_norm": 0.9972552724220501, "learning_rate": 2.115080535213726e-06, "loss": 0.315, "step": 6525 }, { "epoch": 2.178964941569282, "grad_norm": 0.9392855184807244, "learning_rate": 2.1134941136427446e-06, "loss": 0.2784, "step": 6526 }, { "epoch": 2.1792988313856427, "grad_norm": 0.977224328805791, "learning_rate": 2.1119081277889726e-06, "loss": 0.3032, "step": 6527 }, { "epoch": 2.1796327212020032, "grad_norm": 0.9727727899748981, "learning_rate": 2.1103225778918118e-06, "loss": 0.2979, "step": 6528 }, { "epoch": 2.1799666110183638, "grad_norm": 0.9860143835513098, "learning_rate": 2.108737464190605e-06, "loss": 0.3021, "step": 6529 }, { "epoch": 2.1803005008347247, "grad_norm": 1.02981907413306, "learning_rate": 2.1071527869246224e-06, "loss": 0.3035, "step": 6530 }, { "epoch": 2.180634390651085, "grad_norm": 1.0113905350192685, "learning_rate": 2.105568546333072e-06, "loss": 0.3063, "step": 6531 }, { "epoch": 2.1809682804674457, "grad_norm": 1.0108249875694113, "learning_rate": 2.103984742655089e-06, "loss": 0.3096, "step": 6532 }, { "epoch": 2.1813021702838062, "grad_norm": 0.972115828090503, "learning_rate": 2.1024013761297547e-06, "loss": 0.3058, "step": 6533 }, { "epoch": 2.1816360601001668, "grad_norm": 0.987034678911052, "learning_rate": 2.100818446996075e-06, "loss": 0.3004, "step": 6534 }, { "epoch": 2.1819699499165277, "grad_norm": 0.9765180997554043, "learning_rate": 2.099235955492992e-06, "loss": 0.2942, "step": 6535 }, { "epoch": 2.182303839732888, "grad_norm": 1.0013282661939553, "learning_rate": 2.0976539018593833e-06, "loss": 0.2997, "step": 6536 }, { "epoch": 2.1826377295492487, "grad_norm": 0.9330016327283838, "learning_rate": 2.09607228633406e-06, "loss": 0.2882, "step": 6537 }, { "epoch": 2.1829716193656092, "grad_norm": 0.9840995231970457, "learning_rate": 2.0944911091557646e-06, "loss": 0.2924, "step": 6538 }, { "epoch": 2.1833055091819698, "grad_norm": 0.9533614058486878, "learning_rate": 2.092910370563177e-06, "loss": 0.2903, "step": 6539 }, { "epoch": 2.1836393989983307, "grad_norm": 0.9592409062875049, "learning_rate": 2.0913300707949093e-06, "loss": 0.2958, "step": 6540 }, { "epoch": 2.183973288814691, "grad_norm": 0.9759285843974199, "learning_rate": 2.0897502100895054e-06, "loss": 0.3053, "step": 6541 }, { "epoch": 2.1843071786310517, "grad_norm": 0.9656229654405849, "learning_rate": 2.0881707886854467e-06, "loss": 0.3048, "step": 6542 }, { "epoch": 2.1846410684474122, "grad_norm": 1.021457587380571, "learning_rate": 2.086591806821146e-06, "loss": 0.3076, "step": 6543 }, { "epoch": 2.1849749582637727, "grad_norm": 0.9871175607724831, "learning_rate": 2.0850132647349474e-06, "loss": 0.3075, "step": 6544 }, { "epoch": 2.1853088480801337, "grad_norm": 1.0133243600318196, "learning_rate": 2.083435162665136e-06, "loss": 0.2976, "step": 6545 }, { "epoch": 2.185642737896494, "grad_norm": 1.0140996168203593, "learning_rate": 2.081857500849926e-06, "loss": 0.3018, "step": 6546 }, { "epoch": 2.1859766277128547, "grad_norm": 1.0203391371451436, "learning_rate": 2.08028027952746e-06, "loss": 0.2838, "step": 6547 }, { "epoch": 2.1863105175292152, "grad_norm": 1.012309020487552, "learning_rate": 2.078703498935822e-06, "loss": 0.3189, "step": 6548 }, { "epoch": 2.186644407345576, "grad_norm": 0.9718882580182518, "learning_rate": 2.0771271593130248e-06, "loss": 0.3042, "step": 6549 }, { "epoch": 2.1869782971619367, "grad_norm": 1.0035896641859536, "learning_rate": 2.0755512608970196e-06, "loss": 0.2943, "step": 6550 }, { "epoch": 2.187312186978297, "grad_norm": 0.9953263811498162, "learning_rate": 2.073975803925688e-06, "loss": 0.305, "step": 6551 }, { "epoch": 2.1876460767946577, "grad_norm": 1.003335774913937, "learning_rate": 2.0724007886368406e-06, "loss": 0.3012, "step": 6552 }, { "epoch": 2.1879799666110182, "grad_norm": 1.017788318119562, "learning_rate": 2.070826215268226e-06, "loss": 0.3128, "step": 6553 }, { "epoch": 2.1883138564273787, "grad_norm": 1.0275254208551388, "learning_rate": 2.0692520840575297e-06, "loss": 0.3114, "step": 6554 }, { "epoch": 2.1886477462437397, "grad_norm": 0.954448071570975, "learning_rate": 2.067678395242364e-06, "loss": 0.2804, "step": 6555 }, { "epoch": 2.1889816360601, "grad_norm": 1.0096380755532661, "learning_rate": 2.066105149060277e-06, "loss": 0.3014, "step": 6556 }, { "epoch": 2.1893155258764607, "grad_norm": 0.9567962940210086, "learning_rate": 2.0645323457487485e-06, "loss": 0.2858, "step": 6557 }, { "epoch": 2.1896494156928212, "grad_norm": 1.0069387667744023, "learning_rate": 2.0629599855451944e-06, "loss": 0.3016, "step": 6558 }, { "epoch": 2.189983305509182, "grad_norm": 0.9844345813586944, "learning_rate": 2.0613880686869613e-06, "loss": 0.299, "step": 6559 }, { "epoch": 2.1903171953255427, "grad_norm": 0.9472770508674359, "learning_rate": 2.0598165954113293e-06, "loss": 0.2892, "step": 6560 }, { "epoch": 2.190651085141903, "grad_norm": 0.9793498162377183, "learning_rate": 2.058245565955512e-06, "loss": 0.3166, "step": 6561 }, { "epoch": 2.1909849749582637, "grad_norm": 0.9874080489970778, "learning_rate": 2.0566749805566565e-06, "loss": 0.3025, "step": 6562 }, { "epoch": 2.1913188647746242, "grad_norm": 0.9856448289885948, "learning_rate": 2.055104839451841e-06, "loss": 0.295, "step": 6563 }, { "epoch": 2.1916527545909847, "grad_norm": 0.9941225241172199, "learning_rate": 2.0535351428780794e-06, "loss": 0.3017, "step": 6564 }, { "epoch": 2.1919866444073457, "grad_norm": 0.9663366597063141, "learning_rate": 2.0519658910723134e-06, "loss": 0.2966, "step": 6565 }, { "epoch": 2.192320534223706, "grad_norm": 1.0132110535721974, "learning_rate": 2.050397084271428e-06, "loss": 0.3039, "step": 6566 }, { "epoch": 2.1926544240400667, "grad_norm": 0.96239103555714, "learning_rate": 2.048828722712227e-06, "loss": 0.2961, "step": 6567 }, { "epoch": 2.1929883138564272, "grad_norm": 1.0034960550805634, "learning_rate": 2.047260806631458e-06, "loss": 0.3005, "step": 6568 }, { "epoch": 2.193322203672788, "grad_norm": 0.9929134322803279, "learning_rate": 2.045693336265794e-06, "loss": 0.2868, "step": 6569 }, { "epoch": 2.1936560934891487, "grad_norm": 0.9856780192165729, "learning_rate": 2.044126311851848e-06, "loss": 0.2872, "step": 6570 }, { "epoch": 2.193989983305509, "grad_norm": 0.983287453616207, "learning_rate": 2.0425597336261637e-06, "loss": 0.2997, "step": 6571 }, { "epoch": 2.1943238731218697, "grad_norm": 0.993746637082134, "learning_rate": 2.0409936018252096e-06, "loss": 0.2928, "step": 6572 }, { "epoch": 2.1946577629382302, "grad_norm": 0.9891911485239112, "learning_rate": 2.039427916685394e-06, "loss": 0.2925, "step": 6573 }, { "epoch": 2.194991652754591, "grad_norm": 1.053769410107879, "learning_rate": 2.03786267844306e-06, "loss": 0.3145, "step": 6574 }, { "epoch": 2.1953255425709517, "grad_norm": 1.0170620149792577, "learning_rate": 2.036297887334478e-06, "loss": 0.3028, "step": 6575 }, { "epoch": 2.195659432387312, "grad_norm": 0.9654062650139006, "learning_rate": 2.0347335435958534e-06, "loss": 0.2924, "step": 6576 }, { "epoch": 2.1959933222036727, "grad_norm": 1.027224300789521, "learning_rate": 2.0331696474633223e-06, "loss": 0.2997, "step": 6577 }, { "epoch": 2.1963272120200332, "grad_norm": 1.0139068734930798, "learning_rate": 2.031606199172955e-06, "loss": 0.3023, "step": 6578 }, { "epoch": 2.196661101836394, "grad_norm": 0.9921727721045436, "learning_rate": 2.0300431989607533e-06, "loss": 0.2991, "step": 6579 }, { "epoch": 2.1969949916527547, "grad_norm": 1.002618588793801, "learning_rate": 2.0284806470626524e-06, "loss": 0.2886, "step": 6580 }, { "epoch": 2.197328881469115, "grad_norm": 0.9550973839131992, "learning_rate": 2.0269185437145177e-06, "loss": 0.2938, "step": 6581 }, { "epoch": 2.1976627712854757, "grad_norm": 0.9938179193852864, "learning_rate": 2.0253568891521495e-06, "loss": 0.2982, "step": 6582 }, { "epoch": 2.1979966611018362, "grad_norm": 0.990573647071265, "learning_rate": 2.023795683611279e-06, "loss": 0.3024, "step": 6583 }, { "epoch": 2.198330550918197, "grad_norm": 1.0176778991995834, "learning_rate": 2.022234927327568e-06, "loss": 0.3128, "step": 6584 }, { "epoch": 2.1986644407345577, "grad_norm": 0.9936070247491846, "learning_rate": 2.0206746205366147e-06, "loss": 0.2942, "step": 6585 }, { "epoch": 2.198998330550918, "grad_norm": 0.985577882770341, "learning_rate": 2.0191147634739456e-06, "loss": 0.2989, "step": 6586 }, { "epoch": 2.1993322203672787, "grad_norm": 1.01139114286328, "learning_rate": 2.01755535637502e-06, "loss": 0.2976, "step": 6587 }, { "epoch": 2.1996661101836392, "grad_norm": 0.9693398053834745, "learning_rate": 2.0159963994752312e-06, "loss": 0.2899, "step": 6588 }, { "epoch": 2.2, "grad_norm": 1.0395565623276932, "learning_rate": 2.014437893009903e-06, "loss": 0.3055, "step": 6589 }, { "epoch": 2.2003338898163607, "grad_norm": 1.0283119407192993, "learning_rate": 2.0128798372142894e-06, "loss": 0.2991, "step": 6590 }, { "epoch": 2.200667779632721, "grad_norm": 0.9674858139316094, "learning_rate": 2.011322232323582e-06, "loss": 0.2915, "step": 6591 }, { "epoch": 2.2010016694490817, "grad_norm": 0.978499114020337, "learning_rate": 2.0097650785729013e-06, "loss": 0.2936, "step": 6592 }, { "epoch": 2.201335559265442, "grad_norm": 1.0141492928117555, "learning_rate": 2.0082083761972956e-06, "loss": 0.3034, "step": 6593 }, { "epoch": 2.201669449081803, "grad_norm": 0.9661132440381672, "learning_rate": 2.006652125431747e-06, "loss": 0.291, "step": 6594 }, { "epoch": 2.2020033388981637, "grad_norm": 0.9668936827690023, "learning_rate": 2.0050963265111773e-06, "loss": 0.2953, "step": 6595 }, { "epoch": 2.202337228714524, "grad_norm": 0.984819892647706, "learning_rate": 2.0035409796704305e-06, "loss": 0.3012, "step": 6596 }, { "epoch": 2.2026711185308847, "grad_norm": 1.0189403234577108, "learning_rate": 2.0019860851442873e-06, "loss": 0.3016, "step": 6597 }, { "epoch": 2.203005008347245, "grad_norm": 0.9842552997948671, "learning_rate": 2.0004316431674547e-06, "loss": 0.2955, "step": 6598 }, { "epoch": 2.203338898163606, "grad_norm": 0.9635903381696931, "learning_rate": 1.998877653974579e-06, "loss": 0.2934, "step": 6599 }, { "epoch": 2.2036727879799667, "grad_norm": 0.9888970646926134, "learning_rate": 1.9973241178002336e-06, "loss": 0.2922, "step": 6600 }, { "epoch": 2.204006677796327, "grad_norm": 0.9942657693301857, "learning_rate": 1.995771034878924e-06, "loss": 0.3045, "step": 6601 }, { "epoch": 2.2043405676126877, "grad_norm": 1.0515705374977842, "learning_rate": 1.9942184054450875e-06, "loss": 0.3167, "step": 6602 }, { "epoch": 2.204674457429048, "grad_norm": 1.004492723865505, "learning_rate": 1.992666229733093e-06, "loss": 0.3, "step": 6603 }, { "epoch": 2.205008347245409, "grad_norm": 1.020925134569206, "learning_rate": 1.9911145079772415e-06, "loss": 0.3131, "step": 6604 }, { "epoch": 2.2053422370617697, "grad_norm": 0.9776090573057956, "learning_rate": 1.9895632404117644e-06, "loss": 0.2982, "step": 6605 }, { "epoch": 2.20567612687813, "grad_norm": 1.0119808432358757, "learning_rate": 1.9880124272708233e-06, "loss": 0.3057, "step": 6606 }, { "epoch": 2.2060100166944907, "grad_norm": 1.0285847180215397, "learning_rate": 1.986462068788519e-06, "loss": 0.3186, "step": 6607 }, { "epoch": 2.206343906510851, "grad_norm": 1.0173792749387407, "learning_rate": 1.9849121651988713e-06, "loss": 0.3061, "step": 6608 }, { "epoch": 2.206677796327212, "grad_norm": 1.011455011003675, "learning_rate": 1.9833627167358406e-06, "loss": 0.3033, "step": 6609 }, { "epoch": 2.2070116861435727, "grad_norm": 1.048185147499326, "learning_rate": 1.9818137236333144e-06, "loss": 0.3278, "step": 6610 }, { "epoch": 2.207345575959933, "grad_norm": 1.011114244482361, "learning_rate": 1.980265186125112e-06, "loss": 0.2936, "step": 6611 }, { "epoch": 2.2076794657762937, "grad_norm": 0.9681442398546803, "learning_rate": 1.978717104444989e-06, "loss": 0.2944, "step": 6612 }, { "epoch": 2.208013355592654, "grad_norm": 1.0044575445437205, "learning_rate": 1.9771694788266245e-06, "loss": 0.3057, "step": 6613 }, { "epoch": 2.208347245409015, "grad_norm": 1.0350966997365545, "learning_rate": 1.975622309503632e-06, "loss": 0.3166, "step": 6614 }, { "epoch": 2.2086811352253757, "grad_norm": 0.9921573007425543, "learning_rate": 1.9740755967095547e-06, "loss": 0.291, "step": 6615 }, { "epoch": 2.209015025041736, "grad_norm": 0.980441725702422, "learning_rate": 1.9725293406778727e-06, "loss": 0.3017, "step": 6616 }, { "epoch": 2.2093489148580967, "grad_norm": 1.0023740052268635, "learning_rate": 1.970983541641991e-06, "loss": 0.3023, "step": 6617 }, { "epoch": 2.2096828046744577, "grad_norm": 1.025712945582843, "learning_rate": 1.9694381998352485e-06, "loss": 0.3055, "step": 6618 }, { "epoch": 2.210016694490818, "grad_norm": 0.9903096394410431, "learning_rate": 1.9678933154909096e-06, "loss": 0.2985, "step": 6619 }, { "epoch": 2.2103505843071787, "grad_norm": 1.0013742969914579, "learning_rate": 1.9663488888421795e-06, "loss": 0.3026, "step": 6620 }, { "epoch": 2.210684474123539, "grad_norm": 0.9716215838467406, "learning_rate": 1.964804920122187e-06, "loss": 0.2879, "step": 6621 }, { "epoch": 2.2110183639398997, "grad_norm": 0.9674095732576853, "learning_rate": 1.963261409563993e-06, "loss": 0.2947, "step": 6622 }, { "epoch": 2.21135225375626, "grad_norm": 0.962282345916058, "learning_rate": 1.961718357400591e-06, "loss": 0.2938, "step": 6623 }, { "epoch": 2.211686143572621, "grad_norm": 0.9728074027107977, "learning_rate": 1.9601757638649034e-06, "loss": 0.2997, "step": 6624 }, { "epoch": 2.2120200333889817, "grad_norm": 0.9946456008323445, "learning_rate": 1.958633629189785e-06, "loss": 0.2878, "step": 6625 }, { "epoch": 2.212353923205342, "grad_norm": 1.0138901800516023, "learning_rate": 1.9570919536080195e-06, "loss": 0.3036, "step": 6626 }, { "epoch": 2.2126878130217027, "grad_norm": 0.9601331922416074, "learning_rate": 1.955550737352324e-06, "loss": 0.2931, "step": 6627 }, { "epoch": 2.2130217028380637, "grad_norm": 0.996690384721316, "learning_rate": 1.9540099806553435e-06, "loss": 0.2952, "step": 6628 }, { "epoch": 2.213355592654424, "grad_norm": 1.0057697984898228, "learning_rate": 1.952469683749655e-06, "loss": 0.294, "step": 6629 }, { "epoch": 2.2136894824707847, "grad_norm": 1.0308615967462773, "learning_rate": 1.9509298468677657e-06, "loss": 0.2946, "step": 6630 }, { "epoch": 2.214023372287145, "grad_norm": 1.036199351919529, "learning_rate": 1.949390470242112e-06, "loss": 0.3078, "step": 6631 }, { "epoch": 2.2143572621035057, "grad_norm": 0.9602790328341727, "learning_rate": 1.9478515541050675e-06, "loss": 0.2921, "step": 6632 }, { "epoch": 2.2146911519198667, "grad_norm": 1.0206171603562704, "learning_rate": 1.946313098688926e-06, "loss": 0.303, "step": 6633 }, { "epoch": 2.215025041736227, "grad_norm": 0.9670321688417065, "learning_rate": 1.9447751042259184e-06, "loss": 0.2949, "step": 6634 }, { "epoch": 2.2153589315525877, "grad_norm": 0.9831669065149272, "learning_rate": 1.9432375709482034e-06, "loss": 0.307, "step": 6635 }, { "epoch": 2.215692821368948, "grad_norm": 0.9714533747497015, "learning_rate": 1.9417004990878735e-06, "loss": 0.2825, "step": 6636 }, { "epoch": 2.2160267111853087, "grad_norm": 1.0170897763797964, "learning_rate": 1.940163888876949e-06, "loss": 0.2994, "step": 6637 }, { "epoch": 2.2163606010016697, "grad_norm": 1.009184942719877, "learning_rate": 1.938627740547381e-06, "loss": 0.2989, "step": 6638 }, { "epoch": 2.21669449081803, "grad_norm": 0.9276740177196464, "learning_rate": 1.937092054331046e-06, "loss": 0.2725, "step": 6639 }, { "epoch": 2.2170283806343907, "grad_norm": 1.004867184621084, "learning_rate": 1.9355568304597615e-06, "loss": 0.2941, "step": 6640 }, { "epoch": 2.217362270450751, "grad_norm": 0.9672948698447964, "learning_rate": 1.9340220691652662e-06, "loss": 0.2773, "step": 6641 }, { "epoch": 2.2176961602671117, "grad_norm": 1.020787974097339, "learning_rate": 1.9324877706792324e-06, "loss": 0.3065, "step": 6642 }, { "epoch": 2.2180300500834726, "grad_norm": 0.951227907936627, "learning_rate": 1.930953935233263e-06, "loss": 0.2827, "step": 6643 }, { "epoch": 2.218363939899833, "grad_norm": 1.0085940163610088, "learning_rate": 1.929420563058886e-06, "loss": 0.3062, "step": 6644 }, { "epoch": 2.2186978297161937, "grad_norm": 0.9798175897667551, "learning_rate": 1.9278876543875672e-06, "loss": 0.2922, "step": 6645 }, { "epoch": 2.219031719532554, "grad_norm": 1.0215099618552006, "learning_rate": 1.926355209450699e-06, "loss": 0.3092, "step": 6646 }, { "epoch": 2.2193656093489147, "grad_norm": 0.9564145054352705, "learning_rate": 1.9248232284796017e-06, "loss": 0.2839, "step": 6647 }, { "epoch": 2.2196994991652756, "grad_norm": 0.997769043349474, "learning_rate": 1.9232917117055276e-06, "loss": 0.3014, "step": 6648 }, { "epoch": 2.220033388981636, "grad_norm": 0.9831134848193884, "learning_rate": 1.92176065935966e-06, "loss": 0.2847, "step": 6649 }, { "epoch": 2.2203672787979967, "grad_norm": 1.0051322588346725, "learning_rate": 1.9202300716731093e-06, "loss": 0.3036, "step": 6650 }, { "epoch": 2.220701168614357, "grad_norm": 0.9902888246423525, "learning_rate": 1.9186999488769177e-06, "loss": 0.3048, "step": 6651 }, { "epoch": 2.2210350584307177, "grad_norm": 0.9605744732888827, "learning_rate": 1.917170291202056e-06, "loss": 0.2796, "step": 6652 }, { "epoch": 2.2213689482470786, "grad_norm": 1.0198683630595486, "learning_rate": 1.9156410988794296e-06, "loss": 0.3019, "step": 6653 }, { "epoch": 2.221702838063439, "grad_norm": 1.03770905626133, "learning_rate": 1.914112372139864e-06, "loss": 0.3002, "step": 6654 }, { "epoch": 2.2220367278797997, "grad_norm": 1.022027504004167, "learning_rate": 1.9125841112141238e-06, "loss": 0.3081, "step": 6655 }, { "epoch": 2.22237061769616, "grad_norm": 0.9621751079183225, "learning_rate": 1.911056316332896e-06, "loss": 0.2769, "step": 6656 }, { "epoch": 2.2227045075125207, "grad_norm": 0.9921945703911255, "learning_rate": 1.9095289877268054e-06, "loss": 0.2956, "step": 6657 }, { "epoch": 2.2230383973288816, "grad_norm": 0.9910115327069879, "learning_rate": 1.9080021256264e-06, "loss": 0.3001, "step": 6658 }, { "epoch": 2.223372287145242, "grad_norm": 0.9746984744724667, "learning_rate": 1.9064757302621583e-06, "loss": 0.2881, "step": 6659 }, { "epoch": 2.2237061769616027, "grad_norm": 1.0213570567536392, "learning_rate": 1.9049498018644874e-06, "loss": 0.2937, "step": 6660 }, { "epoch": 2.224040066777963, "grad_norm": 0.9769365854318264, "learning_rate": 1.9034243406637298e-06, "loss": 0.2866, "step": 6661 }, { "epoch": 2.2243739565943237, "grad_norm": 1.0439750133994257, "learning_rate": 1.9018993468901515e-06, "loss": 0.3069, "step": 6662 }, { "epoch": 2.2247078464106846, "grad_norm": 0.9905274943360536, "learning_rate": 1.9003748207739503e-06, "loss": 0.3023, "step": 6663 }, { "epoch": 2.225041736227045, "grad_norm": 0.9897376282030641, "learning_rate": 1.8988507625452519e-06, "loss": 0.292, "step": 6664 }, { "epoch": 2.2253756260434057, "grad_norm": 0.9481996246001216, "learning_rate": 1.8973271724341136e-06, "loss": 0.2824, "step": 6665 }, { "epoch": 2.225709515859766, "grad_norm": 1.0156620868642037, "learning_rate": 1.8958040506705206e-06, "loss": 0.2981, "step": 6666 }, { "epoch": 2.2260434056761267, "grad_norm": 1.006898455781841, "learning_rate": 1.8942813974843872e-06, "loss": 0.3011, "step": 6667 }, { "epoch": 2.2263772954924876, "grad_norm": 1.0076328205063956, "learning_rate": 1.8927592131055577e-06, "loss": 0.2938, "step": 6668 }, { "epoch": 2.226711185308848, "grad_norm": 1.0457868710578773, "learning_rate": 1.891237497763806e-06, "loss": 0.3127, "step": 6669 }, { "epoch": 2.2270450751252087, "grad_norm": 1.0292118256789209, "learning_rate": 1.8897162516888335e-06, "loss": 0.3011, "step": 6670 }, { "epoch": 2.227378964941569, "grad_norm": 1.0318333419942842, "learning_rate": 1.888195475110272e-06, "loss": 0.301, "step": 6671 }, { "epoch": 2.2277128547579297, "grad_norm": 1.018669075668509, "learning_rate": 1.8866751682576833e-06, "loss": 0.3007, "step": 6672 }, { "epoch": 2.2280467445742906, "grad_norm": 1.0096523165021332, "learning_rate": 1.8851553313605563e-06, "loss": 0.3002, "step": 6673 }, { "epoch": 2.228380634390651, "grad_norm": 1.0007163529462437, "learning_rate": 1.8836359646483104e-06, "loss": 0.2839, "step": 6674 }, { "epoch": 2.2287145242070117, "grad_norm": 1.0167250498513625, "learning_rate": 1.8821170683502937e-06, "loss": 0.3079, "step": 6675 }, { "epoch": 2.229048414023372, "grad_norm": 0.9896081232212529, "learning_rate": 1.8805986426957823e-06, "loss": 0.3024, "step": 6676 }, { "epoch": 2.2293823038397327, "grad_norm": 1.0191017191232585, "learning_rate": 1.8790806879139812e-06, "loss": 0.307, "step": 6677 }, { "epoch": 2.2297161936560936, "grad_norm": 1.0348982754508047, "learning_rate": 1.8775632042340297e-06, "loss": 0.3052, "step": 6678 }, { "epoch": 2.230050083472454, "grad_norm": 0.963163210449662, "learning_rate": 1.876046191884987e-06, "loss": 0.2925, "step": 6679 }, { "epoch": 2.2303839732888147, "grad_norm": 1.0137742510986252, "learning_rate": 1.8745296510958478e-06, "loss": 0.2875, "step": 6680 }, { "epoch": 2.230717863105175, "grad_norm": 1.0002081516146328, "learning_rate": 1.8730135820955303e-06, "loss": 0.3012, "step": 6681 }, { "epoch": 2.2310517529215357, "grad_norm": 1.0342618566258317, "learning_rate": 1.871497985112889e-06, "loss": 0.3034, "step": 6682 }, { "epoch": 2.2313856427378966, "grad_norm": 1.0324700960588262, "learning_rate": 1.8699828603767011e-06, "loss": 0.3144, "step": 6683 }, { "epoch": 2.231719532554257, "grad_norm": 1.012229731055848, "learning_rate": 1.8684682081156764e-06, "loss": 0.3026, "step": 6684 }, { "epoch": 2.2320534223706177, "grad_norm": 0.9781069849663645, "learning_rate": 1.8669540285584448e-06, "loss": 0.2921, "step": 6685 }, { "epoch": 2.232387312186978, "grad_norm": 0.9927403990602288, "learning_rate": 1.8654403219335777e-06, "loss": 0.2957, "step": 6686 }, { "epoch": 2.232721202003339, "grad_norm": 1.0119549518661226, "learning_rate": 1.863927088469566e-06, "loss": 0.2914, "step": 6687 }, { "epoch": 2.2330550918196996, "grad_norm": 1.0119482600342475, "learning_rate": 1.862414328394832e-06, "loss": 0.2986, "step": 6688 }, { "epoch": 2.23338898163606, "grad_norm": 1.0221349959916586, "learning_rate": 1.8609020419377266e-06, "loss": 0.3084, "step": 6689 }, { "epoch": 2.2337228714524207, "grad_norm": 1.0063509430946163, "learning_rate": 1.8593902293265299e-06, "loss": 0.2915, "step": 6690 }, { "epoch": 2.234056761268781, "grad_norm": 1.0188607042464322, "learning_rate": 1.857878890789448e-06, "loss": 0.3025, "step": 6691 }, { "epoch": 2.2343906510851417, "grad_norm": 1.0562655636835303, "learning_rate": 1.8563680265546175e-06, "loss": 0.3129, "step": 6692 }, { "epoch": 2.2347245409015026, "grad_norm": 1.01293322407798, "learning_rate": 1.854857636850103e-06, "loss": 0.2946, "step": 6693 }, { "epoch": 2.235058430717863, "grad_norm": 1.0230440118493629, "learning_rate": 1.8533477219038982e-06, "loss": 0.3025, "step": 6694 }, { "epoch": 2.2353923205342237, "grad_norm": 1.0035156659502118, "learning_rate": 1.851838281943923e-06, "loss": 0.3009, "step": 6695 }, { "epoch": 2.235726210350584, "grad_norm": 0.9965330566816942, "learning_rate": 1.8503293171980274e-06, "loss": 0.3014, "step": 6696 }, { "epoch": 2.236060100166945, "grad_norm": 1.042982523123675, "learning_rate": 1.8488208278939878e-06, "loss": 0.2967, "step": 6697 }, { "epoch": 2.2363939899833056, "grad_norm": 0.9953286985399882, "learning_rate": 1.8473128142595137e-06, "loss": 0.2973, "step": 6698 }, { "epoch": 2.236727879799666, "grad_norm": 0.9834804024743824, "learning_rate": 1.8458052765222384e-06, "loss": 0.2991, "step": 6699 }, { "epoch": 2.2370617696160267, "grad_norm": 1.0176808914747477, "learning_rate": 1.8442982149097216e-06, "loss": 0.3033, "step": 6700 }, { "epoch": 2.237395659432387, "grad_norm": 0.9655682659141246, "learning_rate": 1.8427916296494542e-06, "loss": 0.2955, "step": 6701 }, { "epoch": 2.237729549248748, "grad_norm": 1.0110357788362627, "learning_rate": 1.8412855209688569e-06, "loss": 0.2987, "step": 6702 }, { "epoch": 2.2380634390651086, "grad_norm": 1.0582616251293613, "learning_rate": 1.8397798890952761e-06, "loss": 0.3159, "step": 6703 }, { "epoch": 2.238397328881469, "grad_norm": 1.0238668873303605, "learning_rate": 1.838274734255987e-06, "loss": 0.3083, "step": 6704 }, { "epoch": 2.2387312186978297, "grad_norm": 1.0293426071351235, "learning_rate": 1.8367700566781898e-06, "loss": 0.3086, "step": 6705 }, { "epoch": 2.23906510851419, "grad_norm": 1.018929735389319, "learning_rate": 1.8352658565890147e-06, "loss": 0.3003, "step": 6706 }, { "epoch": 2.239398998330551, "grad_norm": 0.9563744594218139, "learning_rate": 1.8337621342155233e-06, "loss": 0.2886, "step": 6707 }, { "epoch": 2.2397328881469116, "grad_norm": 1.034562717940207, "learning_rate": 1.832258889784701e-06, "loss": 0.3082, "step": 6708 }, { "epoch": 2.240066777963272, "grad_norm": 1.0096805963916125, "learning_rate": 1.8307561235234616e-06, "loss": 0.2924, "step": 6709 }, { "epoch": 2.2404006677796326, "grad_norm": 0.9718892666277693, "learning_rate": 1.829253835658648e-06, "loss": 0.2927, "step": 6710 }, { "epoch": 2.240734557595993, "grad_norm": 0.9996960412490768, "learning_rate": 1.8277520264170296e-06, "loss": 0.2999, "step": 6711 }, { "epoch": 2.241068447412354, "grad_norm": 1.023562895495038, "learning_rate": 1.8262506960253035e-06, "loss": 0.3044, "step": 6712 }, { "epoch": 2.2414023372287146, "grad_norm": 1.019810043027696, "learning_rate": 1.824749844710096e-06, "loss": 0.2939, "step": 6713 }, { "epoch": 2.241736227045075, "grad_norm": 1.0442907454705883, "learning_rate": 1.8232494726979595e-06, "loss": 0.3085, "step": 6714 }, { "epoch": 2.2420701168614356, "grad_norm": 1.0124810583297597, "learning_rate": 1.821749580215375e-06, "loss": 0.2998, "step": 6715 }, { "epoch": 2.242404006677796, "grad_norm": 1.014709859395324, "learning_rate": 1.8202501674887506e-06, "loss": 0.2859, "step": 6716 }, { "epoch": 2.242737896494157, "grad_norm": 1.1992231000969222, "learning_rate": 1.8187512347444231e-06, "loss": 0.3142, "step": 6717 }, { "epoch": 2.2430717863105176, "grad_norm": 1.0049190577603537, "learning_rate": 1.8172527822086528e-06, "loss": 0.2924, "step": 6718 }, { "epoch": 2.243405676126878, "grad_norm": 1.0101177773146437, "learning_rate": 1.8157548101076372e-06, "loss": 0.2943, "step": 6719 }, { "epoch": 2.2437395659432386, "grad_norm": 1.0130430398643064, "learning_rate": 1.8142573186674883e-06, "loss": 0.2916, "step": 6720 }, { "epoch": 2.244073455759599, "grad_norm": 1.062338905843943, "learning_rate": 1.8127603081142548e-06, "loss": 0.3019, "step": 6721 }, { "epoch": 2.24440734557596, "grad_norm": 1.0428722598339057, "learning_rate": 1.811263778673908e-06, "loss": 0.2981, "step": 6722 }, { "epoch": 2.2447412353923206, "grad_norm": 0.9823504160142681, "learning_rate": 1.8097677305723517e-06, "loss": 0.2918, "step": 6723 }, { "epoch": 2.245075125208681, "grad_norm": 1.0402572817594906, "learning_rate": 1.808272164035414e-06, "loss": 0.2977, "step": 6724 }, { "epoch": 2.2454090150250416, "grad_norm": 1.0133227311648627, "learning_rate": 1.8067770792888473e-06, "loss": 0.3018, "step": 6725 }, { "epoch": 2.245742904841402, "grad_norm": 1.0348399372073418, "learning_rate": 1.8052824765583333e-06, "loss": 0.3141, "step": 6726 }, { "epoch": 2.246076794657763, "grad_norm": 1.0448124194595254, "learning_rate": 1.8037883560694858e-06, "loss": 0.2918, "step": 6727 }, { "epoch": 2.2464106844741236, "grad_norm": 0.9925639516154335, "learning_rate": 1.80229471804784e-06, "loss": 0.2981, "step": 6728 }, { "epoch": 2.246744574290484, "grad_norm": 0.9809719052358229, "learning_rate": 1.8008015627188603e-06, "loss": 0.3022, "step": 6729 }, { "epoch": 2.2470784641068446, "grad_norm": 1.0069449540329218, "learning_rate": 1.7993088903079381e-06, "loss": 0.3028, "step": 6730 }, { "epoch": 2.247412353923205, "grad_norm": 0.9770683179290591, "learning_rate": 1.797816701040392e-06, "loss": 0.2914, "step": 6731 }, { "epoch": 2.247746243739566, "grad_norm": 0.9844996308256891, "learning_rate": 1.7963249951414664e-06, "loss": 0.2838, "step": 6732 }, { "epoch": 2.2480801335559266, "grad_norm": 1.0086556179998714, "learning_rate": 1.7948337728363352e-06, "loss": 0.2977, "step": 6733 }, { "epoch": 2.248414023372287, "grad_norm": 0.9847519587808771, "learning_rate": 1.7933430343500974e-06, "loss": 0.2985, "step": 6734 }, { "epoch": 2.2487479131886476, "grad_norm": 1.019326071552922, "learning_rate": 1.7918527799077795e-06, "loss": 0.3077, "step": 6735 }, { "epoch": 2.249081803005008, "grad_norm": 1.009327529956881, "learning_rate": 1.7903630097343354e-06, "loss": 0.301, "step": 6736 }, { "epoch": 2.249415692821369, "grad_norm": 0.98238047613929, "learning_rate": 1.788873724054645e-06, "loss": 0.291, "step": 6737 }, { "epoch": 2.2497495826377296, "grad_norm": 0.9633827752845281, "learning_rate": 1.7873849230935153e-06, "loss": 0.2929, "step": 6738 }, { "epoch": 2.25008347245409, "grad_norm": 1.0003883690912425, "learning_rate": 1.7858966070756811e-06, "loss": 0.3001, "step": 6739 }, { "epoch": 2.2504173622704506, "grad_norm": 1.0078645732122475, "learning_rate": 1.7844087762258028e-06, "loss": 0.3092, "step": 6740 }, { "epoch": 2.250751252086811, "grad_norm": 0.9573333571151323, "learning_rate": 1.7829214307684685e-06, "loss": 0.28, "step": 6741 }, { "epoch": 2.251085141903172, "grad_norm": 1.000003180305658, "learning_rate": 1.7814345709281917e-06, "loss": 0.288, "step": 6742 }, { "epoch": 2.2514190317195326, "grad_norm": 1.0062776488076346, "learning_rate": 1.7799481969294124e-06, "loss": 0.2981, "step": 6743 }, { "epoch": 2.251752921535893, "grad_norm": 1.0057912599610135, "learning_rate": 1.7784623089965015e-06, "loss": 0.2971, "step": 6744 }, { "epoch": 2.2520868113522536, "grad_norm": 1.0614920087274706, "learning_rate": 1.7769769073537534e-06, "loss": 0.313, "step": 6745 }, { "epoch": 2.2524207011686146, "grad_norm": 1.0000454221092805, "learning_rate": 1.7754919922253855e-06, "loss": 0.2968, "step": 6746 }, { "epoch": 2.252754590984975, "grad_norm": 0.9920097720045261, "learning_rate": 1.7740075638355454e-06, "loss": 0.2887, "step": 6747 }, { "epoch": 2.2530884808013356, "grad_norm": 1.0115103047425595, "learning_rate": 1.7725236224083109e-06, "loss": 0.3022, "step": 6748 }, { "epoch": 2.253422370617696, "grad_norm": 0.9751786143726543, "learning_rate": 1.7710401681676802e-06, "loss": 0.2774, "step": 6749 }, { "epoch": 2.2537562604340566, "grad_norm": 1.0284361850802342, "learning_rate": 1.7695572013375823e-06, "loss": 0.3093, "step": 6750 }, { "epoch": 2.254090150250417, "grad_norm": 1.0470535998051145, "learning_rate": 1.7680747221418654e-06, "loss": 0.3028, "step": 6751 }, { "epoch": 2.254424040066778, "grad_norm": 1.0259149320469962, "learning_rate": 1.7665927308043146e-06, "loss": 0.2989, "step": 6752 }, { "epoch": 2.2547579298831386, "grad_norm": 0.9828722107324054, "learning_rate": 1.7651112275486343e-06, "loss": 0.2941, "step": 6753 }, { "epoch": 2.255091819699499, "grad_norm": 1.01842649865441, "learning_rate": 1.7636302125984568e-06, "loss": 0.31, "step": 6754 }, { "epoch": 2.2554257095158596, "grad_norm": 1.0315011816593018, "learning_rate": 1.7621496861773408e-06, "loss": 0.3027, "step": 6755 }, { "epoch": 2.2557595993322206, "grad_norm": 1.0551792117928405, "learning_rate": 1.7606696485087716e-06, "loss": 0.3099, "step": 6756 }, { "epoch": 2.256093489148581, "grad_norm": 0.9817507219936985, "learning_rate": 1.7591900998161599e-06, "loss": 0.2829, "step": 6757 }, { "epoch": 2.2564273789649416, "grad_norm": 0.9925198648848842, "learning_rate": 1.7577110403228442e-06, "loss": 0.3089, "step": 6758 }, { "epoch": 2.256761268781302, "grad_norm": 0.9774481473653126, "learning_rate": 1.7562324702520855e-06, "loss": 0.2984, "step": 6759 }, { "epoch": 2.2570951585976626, "grad_norm": 0.9765426669860938, "learning_rate": 1.7547543898270785e-06, "loss": 0.2952, "step": 6760 }, { "epoch": 2.257429048414023, "grad_norm": 0.9957771694213501, "learning_rate": 1.753276799270935e-06, "loss": 0.3034, "step": 6761 }, { "epoch": 2.257762938230384, "grad_norm": 0.9939954904618685, "learning_rate": 1.751799698806697e-06, "loss": 0.302, "step": 6762 }, { "epoch": 2.2580968280467446, "grad_norm": 0.9635329622439585, "learning_rate": 1.750323088657334e-06, "loss": 0.2923, "step": 6763 }, { "epoch": 2.258430717863105, "grad_norm": 0.9939196993449485, "learning_rate": 1.7488469690457371e-06, "loss": 0.2868, "step": 6764 }, { "epoch": 2.2587646076794656, "grad_norm": 1.0031948991711261, "learning_rate": 1.7473713401947312e-06, "loss": 0.2966, "step": 6765 }, { "epoch": 2.2590984974958266, "grad_norm": 0.9585070843541302, "learning_rate": 1.7458962023270575e-06, "loss": 0.2849, "step": 6766 }, { "epoch": 2.259432387312187, "grad_norm": 1.02320253940884, "learning_rate": 1.7444215556653892e-06, "loss": 0.2979, "step": 6767 }, { "epoch": 2.2597662771285476, "grad_norm": 1.004031553150205, "learning_rate": 1.7429474004323217e-06, "loss": 0.2962, "step": 6768 }, { "epoch": 2.260100166944908, "grad_norm": 1.0164772298429097, "learning_rate": 1.741473736850382e-06, "loss": 0.2949, "step": 6769 }, { "epoch": 2.2604340567612686, "grad_norm": 1.0390038541550601, "learning_rate": 1.7400005651420187e-06, "loss": 0.3031, "step": 6770 }, { "epoch": 2.260767946577629, "grad_norm": 0.9995599902637589, "learning_rate": 1.7385278855296046e-06, "loss": 0.2951, "step": 6771 }, { "epoch": 2.26110183639399, "grad_norm": 0.9876638398446176, "learning_rate": 1.7370556982354386e-06, "loss": 0.2972, "step": 6772 }, { "epoch": 2.2614357262103506, "grad_norm": 1.0001867738192507, "learning_rate": 1.735584003481751e-06, "loss": 0.3043, "step": 6773 }, { "epoch": 2.261769616026711, "grad_norm": 1.0090444854859135, "learning_rate": 1.7341128014906921e-06, "loss": 0.3025, "step": 6774 }, { "epoch": 2.2621035058430716, "grad_norm": 1.0225130282058668, "learning_rate": 1.7326420924843395e-06, "loss": 0.3019, "step": 6775 }, { "epoch": 2.2624373956594326, "grad_norm": 0.9966298316524558, "learning_rate": 1.7311718766846958e-06, "loss": 0.3004, "step": 6776 }, { "epoch": 2.262771285475793, "grad_norm": 1.0507769264352533, "learning_rate": 1.7297021543136905e-06, "loss": 0.3097, "step": 6777 }, { "epoch": 2.2631051752921536, "grad_norm": 1.0130733803081948, "learning_rate": 1.7282329255931769e-06, "loss": 0.2986, "step": 6778 }, { "epoch": 2.263439065108514, "grad_norm": 0.9934134001578672, "learning_rate": 1.7267641907449346e-06, "loss": 0.3081, "step": 6779 }, { "epoch": 2.2637729549248746, "grad_norm": 0.9953490096899263, "learning_rate": 1.7252959499906697e-06, "loss": 0.2904, "step": 6780 }, { "epoch": 2.2641068447412356, "grad_norm": 1.006637674755375, "learning_rate": 1.7238282035520116e-06, "loss": 0.2939, "step": 6781 }, { "epoch": 2.264440734557596, "grad_norm": 1.0046771046950387, "learning_rate": 1.7223609516505168e-06, "loss": 0.2981, "step": 6782 }, { "epoch": 2.2647746243739566, "grad_norm": 0.9883978041883952, "learning_rate": 1.7208941945076663e-06, "loss": 0.2919, "step": 6783 }, { "epoch": 2.265108514190317, "grad_norm": 0.9884630922596922, "learning_rate": 1.719427932344865e-06, "loss": 0.2983, "step": 6784 }, { "epoch": 2.2654424040066776, "grad_norm": 1.0367339731302243, "learning_rate": 1.7179621653834499e-06, "loss": 0.2961, "step": 6785 }, { "epoch": 2.2657762938230386, "grad_norm": 1.05137667288406, "learning_rate": 1.716496893844673e-06, "loss": 0.3158, "step": 6786 }, { "epoch": 2.266110183639399, "grad_norm": 0.9831286908249927, "learning_rate": 1.7150321179497181e-06, "loss": 0.2913, "step": 6787 }, { "epoch": 2.2664440734557596, "grad_norm": 1.030888594605132, "learning_rate": 1.7135678379196913e-06, "loss": 0.2968, "step": 6788 }, { "epoch": 2.26677796327212, "grad_norm": 1.0142749384006575, "learning_rate": 1.712104053975629e-06, "loss": 0.3036, "step": 6789 }, { "epoch": 2.2671118530884806, "grad_norm": 1.0056434653487552, "learning_rate": 1.710640766338486e-06, "loss": 0.2968, "step": 6790 }, { "epoch": 2.2674457429048416, "grad_norm": 0.9835866671162337, "learning_rate": 1.7091779752291477e-06, "loss": 0.2858, "step": 6791 }, { "epoch": 2.267779632721202, "grad_norm": 1.0505913627008507, "learning_rate": 1.707715680868417e-06, "loss": 0.2994, "step": 6792 }, { "epoch": 2.2681135225375626, "grad_norm": 1.0142359265647214, "learning_rate": 1.7062538834770316e-06, "loss": 0.3083, "step": 6793 }, { "epoch": 2.268447412353923, "grad_norm": 1.006975103392873, "learning_rate": 1.7047925832756479e-06, "loss": 0.3023, "step": 6794 }, { "epoch": 2.2687813021702836, "grad_norm": 0.9666270596061173, "learning_rate": 1.7033317804848482e-06, "loss": 0.2929, "step": 6795 }, { "epoch": 2.2691151919866446, "grad_norm": 1.0311819450212687, "learning_rate": 1.7018714753251424e-06, "loss": 0.3034, "step": 6796 }, { "epoch": 2.269449081803005, "grad_norm": 1.0122726386892271, "learning_rate": 1.7004116680169585e-06, "loss": 0.2874, "step": 6797 }, { "epoch": 2.2697829716193656, "grad_norm": 1.0072217375659775, "learning_rate": 1.6989523587806577e-06, "loss": 0.2966, "step": 6798 }, { "epoch": 2.270116861435726, "grad_norm": 0.9845413537100693, "learning_rate": 1.6974935478365224e-06, "loss": 0.2878, "step": 6799 }, { "epoch": 2.2704507512520866, "grad_norm": 0.9891732131226884, "learning_rate": 1.6960352354047583e-06, "loss": 0.2925, "step": 6800 }, { "epoch": 2.2707846410684476, "grad_norm": 1.039672267266816, "learning_rate": 1.6945774217054971e-06, "loss": 0.2995, "step": 6801 }, { "epoch": 2.271118530884808, "grad_norm": 0.9953925123966876, "learning_rate": 1.6931201069587966e-06, "loss": 0.2852, "step": 6802 }, { "epoch": 2.2714524207011686, "grad_norm": 1.010073781707654, "learning_rate": 1.691663291384637e-06, "loss": 0.304, "step": 6803 }, { "epoch": 2.271786310517529, "grad_norm": 1.0111918655862426, "learning_rate": 1.6902069752029249e-06, "loss": 0.2865, "step": 6804 }, { "epoch": 2.27212020033389, "grad_norm": 1.0039484272186916, "learning_rate": 1.6887511586334886e-06, "loss": 0.284, "step": 6805 }, { "epoch": 2.2724540901502506, "grad_norm": 0.964641579330258, "learning_rate": 1.6872958418960882e-06, "loss": 0.2816, "step": 6806 }, { "epoch": 2.272787979966611, "grad_norm": 1.006792447482226, "learning_rate": 1.6858410252103984e-06, "loss": 0.3018, "step": 6807 }, { "epoch": 2.2731218697829716, "grad_norm": 0.9884973626278073, "learning_rate": 1.6843867087960252e-06, "loss": 0.2803, "step": 6808 }, { "epoch": 2.273455759599332, "grad_norm": 1.0680302775919306, "learning_rate": 1.6829328928724953e-06, "loss": 0.307, "step": 6809 }, { "epoch": 2.2737896494156926, "grad_norm": 1.0498598630057223, "learning_rate": 1.6814795776592653e-06, "loss": 0.312, "step": 6810 }, { "epoch": 2.2741235392320536, "grad_norm": 1.0033510440402855, "learning_rate": 1.680026763375713e-06, "loss": 0.2967, "step": 6811 }, { "epoch": 2.274457429048414, "grad_norm": 1.0170501221456767, "learning_rate": 1.678574450241136e-06, "loss": 0.308, "step": 6812 }, { "epoch": 2.2747913188647746, "grad_norm": 0.9756023881460518, "learning_rate": 1.6771226384747613e-06, "loss": 0.3001, "step": 6813 }, { "epoch": 2.275125208681135, "grad_norm": 1.0120811185604548, "learning_rate": 1.6756713282957427e-06, "loss": 0.315, "step": 6814 }, { "epoch": 2.275459098497496, "grad_norm": 1.041686816717626, "learning_rate": 1.6742205199231533e-06, "loss": 0.2989, "step": 6815 }, { "epoch": 2.2757929883138566, "grad_norm": 0.9920224048585141, "learning_rate": 1.6727702135759938e-06, "loss": 0.2896, "step": 6816 }, { "epoch": 2.276126878130217, "grad_norm": 0.9957825644106076, "learning_rate": 1.6713204094731827e-06, "loss": 0.2869, "step": 6817 }, { "epoch": 2.2764607679465776, "grad_norm": 1.1202171271648713, "learning_rate": 1.6698711078335727e-06, "loss": 0.2904, "step": 6818 }, { "epoch": 2.276794657762938, "grad_norm": 1.0248009898181296, "learning_rate": 1.6684223088759333e-06, "loss": 0.3054, "step": 6819 }, { "epoch": 2.2771285475792986, "grad_norm": 0.9606242967052332, "learning_rate": 1.6669740128189598e-06, "loss": 0.2785, "step": 6820 }, { "epoch": 2.2774624373956596, "grad_norm": 1.0395492265798507, "learning_rate": 1.6655262198812733e-06, "loss": 0.2982, "step": 6821 }, { "epoch": 2.27779632721202, "grad_norm": 1.047918137240749, "learning_rate": 1.664078930281417e-06, "loss": 0.3009, "step": 6822 }, { "epoch": 2.2781302170283806, "grad_norm": 0.9905388922691086, "learning_rate": 1.662632144237859e-06, "loss": 0.2773, "step": 6823 }, { "epoch": 2.278464106844741, "grad_norm": 1.0051383210411764, "learning_rate": 1.6611858619689914e-06, "loss": 0.293, "step": 6824 }, { "epoch": 2.278797996661102, "grad_norm": 1.0023424638545082, "learning_rate": 1.6597400836931294e-06, "loss": 0.2913, "step": 6825 }, { "epoch": 2.2791318864774626, "grad_norm": 1.0713388766019463, "learning_rate": 1.6582948096285135e-06, "loss": 0.2982, "step": 6826 }, { "epoch": 2.279465776293823, "grad_norm": 1.033962887243687, "learning_rate": 1.6568500399933074e-06, "loss": 0.3085, "step": 6827 }, { "epoch": 2.2797996661101836, "grad_norm": 1.0214267144120739, "learning_rate": 1.655405775005598e-06, "loss": 0.2961, "step": 6828 }, { "epoch": 2.280133555926544, "grad_norm": 1.024574732657261, "learning_rate": 1.6539620148833974e-06, "loss": 0.3074, "step": 6829 }, { "epoch": 2.2804674457429046, "grad_norm": 1.0074050814701292, "learning_rate": 1.6525187598446384e-06, "loss": 0.3052, "step": 6830 }, { "epoch": 2.2808013355592656, "grad_norm": 0.9994417428729907, "learning_rate": 1.6510760101071854e-06, "loss": 0.2899, "step": 6831 }, { "epoch": 2.281135225375626, "grad_norm": 1.0231211546985923, "learning_rate": 1.6496337658888163e-06, "loss": 0.2964, "step": 6832 }, { "epoch": 2.2814691151919866, "grad_norm": 1.0468989565432099, "learning_rate": 1.6481920274072383e-06, "loss": 0.3085, "step": 6833 }, { "epoch": 2.281803005008347, "grad_norm": 1.026654692406792, "learning_rate": 1.6467507948800804e-06, "loss": 0.299, "step": 6834 }, { "epoch": 2.282136894824708, "grad_norm": 1.0057537090287538, "learning_rate": 1.6453100685248996e-06, "loss": 0.2944, "step": 6835 }, { "epoch": 2.2824707846410686, "grad_norm": 1.0114024068908822, "learning_rate": 1.6438698485591709e-06, "loss": 0.2892, "step": 6836 }, { "epoch": 2.282804674457429, "grad_norm": 1.0109237437573655, "learning_rate": 1.642430135200297e-06, "loss": 0.3012, "step": 6837 }, { "epoch": 2.2831385642737896, "grad_norm": 1.0006137172858551, "learning_rate": 1.6409909286655974e-06, "loss": 0.2899, "step": 6838 }, { "epoch": 2.28347245409015, "grad_norm": 1.0046861522408082, "learning_rate": 1.6395522291723242e-06, "loss": 0.3016, "step": 6839 }, { "epoch": 2.2838063439065106, "grad_norm": 0.9710603856335244, "learning_rate": 1.638114036937648e-06, "loss": 0.2895, "step": 6840 }, { "epoch": 2.2841402337228716, "grad_norm": 1.0065529846877403, "learning_rate": 1.6366763521786627e-06, "loss": 0.3059, "step": 6841 }, { "epoch": 2.284474123539232, "grad_norm": 0.9595643212489927, "learning_rate": 1.635239175112387e-06, "loss": 0.2846, "step": 6842 }, { "epoch": 2.2848080133555926, "grad_norm": 0.9860396428885518, "learning_rate": 1.6338025059557621e-06, "loss": 0.2958, "step": 6843 }, { "epoch": 2.285141903171953, "grad_norm": 0.9966485165285424, "learning_rate": 1.6323663449256528e-06, "loss": 0.2977, "step": 6844 }, { "epoch": 2.285475792988314, "grad_norm": 0.9880136336326439, "learning_rate": 1.6309306922388469e-06, "loss": 0.2892, "step": 6845 }, { "epoch": 2.2858096828046746, "grad_norm": 1.0050407852932919, "learning_rate": 1.629495548112056e-06, "loss": 0.2875, "step": 6846 }, { "epoch": 2.286143572621035, "grad_norm": 0.9779981820262289, "learning_rate": 1.6280609127619146e-06, "loss": 0.288, "step": 6847 }, { "epoch": 2.2864774624373956, "grad_norm": 1.1066636869142323, "learning_rate": 1.6266267864049813e-06, "loss": 0.3042, "step": 6848 }, { "epoch": 2.286811352253756, "grad_norm": 1.0425531955317409, "learning_rate": 1.6251931692577354e-06, "loss": 0.3105, "step": 6849 }, { "epoch": 2.287145242070117, "grad_norm": 1.0246452660900656, "learning_rate": 1.6237600615365806e-06, "loss": 0.2894, "step": 6850 }, { "epoch": 2.2874791318864776, "grad_norm": 1.0384020305593893, "learning_rate": 1.6223274634578468e-06, "loss": 0.3037, "step": 6851 }, { "epoch": 2.287813021702838, "grad_norm": 1.0175233146419218, "learning_rate": 1.6208953752377842e-06, "loss": 0.2968, "step": 6852 }, { "epoch": 2.2881469115191986, "grad_norm": 1.0561142949077313, "learning_rate": 1.6194637970925635e-06, "loss": 0.3047, "step": 6853 }, { "epoch": 2.288480801335559, "grad_norm": 1.053684693220085, "learning_rate": 1.6180327292382808e-06, "loss": 0.3039, "step": 6854 }, { "epoch": 2.28881469115192, "grad_norm": 1.0332231454165766, "learning_rate": 1.616602171890958e-06, "loss": 0.3009, "step": 6855 }, { "epoch": 2.2891485809682806, "grad_norm": 1.0372941338445172, "learning_rate": 1.6151721252665365e-06, "loss": 0.3018, "step": 6856 }, { "epoch": 2.289482470784641, "grad_norm": 1.0459140880309765, "learning_rate": 1.6137425895808822e-06, "loss": 0.3036, "step": 6857 }, { "epoch": 2.2898163606010016, "grad_norm": 1.0322199984054705, "learning_rate": 1.61231356504978e-06, "loss": 0.2964, "step": 6858 }, { "epoch": 2.290150250417362, "grad_norm": 1.061001523620483, "learning_rate": 1.6108850518889418e-06, "loss": 0.3043, "step": 6859 }, { "epoch": 2.290484140233723, "grad_norm": 1.0292655736560858, "learning_rate": 1.6094570503140034e-06, "loss": 0.2954, "step": 6860 }, { "epoch": 2.2908180300500836, "grad_norm": 1.0285469726509129, "learning_rate": 1.6080295605405194e-06, "loss": 0.2922, "step": 6861 }, { "epoch": 2.291151919866444, "grad_norm": 0.9778145631905613, "learning_rate": 1.60660258278397e-06, "loss": 0.2884, "step": 6862 }, { "epoch": 2.2914858096828046, "grad_norm": 1.0085369151620607, "learning_rate": 1.6051761172597564e-06, "loss": 0.2935, "step": 6863 }, { "epoch": 2.291819699499165, "grad_norm": 0.9873196503936674, "learning_rate": 1.6037501641832031e-06, "loss": 0.2872, "step": 6864 }, { "epoch": 2.292153589315526, "grad_norm": 0.9866498899214348, "learning_rate": 1.602324723769557e-06, "loss": 0.2877, "step": 6865 }, { "epoch": 2.2924874791318866, "grad_norm": 0.9999353502045183, "learning_rate": 1.600899796233989e-06, "loss": 0.3037, "step": 6866 }, { "epoch": 2.292821368948247, "grad_norm": 1.045092080489952, "learning_rate": 1.5994753817915909e-06, "loss": 0.3129, "step": 6867 }, { "epoch": 2.2931552587646076, "grad_norm": 0.9928441634666192, "learning_rate": 1.5980514806573771e-06, "loss": 0.2987, "step": 6868 }, { "epoch": 2.293489148580968, "grad_norm": 1.0041741103774788, "learning_rate": 1.5966280930462864e-06, "loss": 0.2896, "step": 6869 }, { "epoch": 2.293823038397329, "grad_norm": 0.9856374630835688, "learning_rate": 1.5952052191731771e-06, "loss": 0.2878, "step": 6870 }, { "epoch": 2.2941569282136896, "grad_norm": 0.9652697392345145, "learning_rate": 1.593782859252831e-06, "loss": 0.2793, "step": 6871 }, { "epoch": 2.29449081803005, "grad_norm": 1.0445234638411078, "learning_rate": 1.5923610134999573e-06, "loss": 0.3035, "step": 6872 }, { "epoch": 2.2948247078464106, "grad_norm": 1.0040028179329101, "learning_rate": 1.5909396821291783e-06, "loss": 0.2896, "step": 6873 }, { "epoch": 2.2951585976627715, "grad_norm": 0.9883463814099147, "learning_rate": 1.5895188653550458e-06, "loss": 0.2776, "step": 6874 }, { "epoch": 2.295492487479132, "grad_norm": 1.0166097704574613, "learning_rate": 1.5880985633920298e-06, "loss": 0.2866, "step": 6875 }, { "epoch": 2.2958263772954925, "grad_norm": 0.9906000885304829, "learning_rate": 1.5866787764545277e-06, "loss": 0.297, "step": 6876 }, { "epoch": 2.296160267111853, "grad_norm": 1.0259309684315352, "learning_rate": 1.5852595047568552e-06, "loss": 0.2942, "step": 6877 }, { "epoch": 2.2964941569282136, "grad_norm": 1.0378225022734262, "learning_rate": 1.583840748513249e-06, "loss": 0.2953, "step": 6878 }, { "epoch": 2.296828046744574, "grad_norm": 1.0322032325561044, "learning_rate": 1.5824225079378686e-06, "loss": 0.3102, "step": 6879 }, { "epoch": 2.297161936560935, "grad_norm": 1.0764378131901617, "learning_rate": 1.581004783244801e-06, "loss": 0.3079, "step": 6880 }, { "epoch": 2.2974958263772955, "grad_norm": 0.9724140153472084, "learning_rate": 1.57958757464805e-06, "loss": 0.2876, "step": 6881 }, { "epoch": 2.297829716193656, "grad_norm": 1.0258608082706773, "learning_rate": 1.578170882361542e-06, "loss": 0.3082, "step": 6882 }, { "epoch": 2.2981636060100166, "grad_norm": 0.9458283404041692, "learning_rate": 1.5767547065991268e-06, "loss": 0.2745, "step": 6883 }, { "epoch": 2.2984974958263775, "grad_norm": 1.0443729106455086, "learning_rate": 1.5753390475745755e-06, "loss": 0.3077, "step": 6884 }, { "epoch": 2.298831385642738, "grad_norm": 1.0249089059427077, "learning_rate": 1.5739239055015814e-06, "loss": 0.3051, "step": 6885 }, { "epoch": 2.2991652754590985, "grad_norm": 0.9884697617071877, "learning_rate": 1.5725092805937603e-06, "loss": 0.2912, "step": 6886 }, { "epoch": 2.299499165275459, "grad_norm": 1.0016517128791667, "learning_rate": 1.5710951730646484e-06, "loss": 0.2871, "step": 6887 }, { "epoch": 2.2998330550918196, "grad_norm": 0.9938794993283184, "learning_rate": 1.5696815831277057e-06, "loss": 0.2959, "step": 6888 }, { "epoch": 2.30016694490818, "grad_norm": 1.0305583227167958, "learning_rate": 1.568268510996313e-06, "loss": 0.306, "step": 6889 }, { "epoch": 2.300500834724541, "grad_norm": 1.035668935149242, "learning_rate": 1.5668559568837727e-06, "loss": 0.3058, "step": 6890 }, { "epoch": 2.3008347245409015, "grad_norm": 1.011782252497904, "learning_rate": 1.56544392100331e-06, "loss": 0.2977, "step": 6891 }, { "epoch": 2.301168614357262, "grad_norm": 1.007076804125209, "learning_rate": 1.564032403568071e-06, "loss": 0.2921, "step": 6892 }, { "epoch": 2.3015025041736226, "grad_norm": 1.0377094052205793, "learning_rate": 1.5626214047911237e-06, "loss": 0.2932, "step": 6893 }, { "epoch": 2.3018363939899835, "grad_norm": 0.9878955042071118, "learning_rate": 1.561210924885458e-06, "loss": 0.2914, "step": 6894 }, { "epoch": 2.302170283806344, "grad_norm": 1.0219040218373132, "learning_rate": 1.5598009640639855e-06, "loss": 0.3113, "step": 6895 }, { "epoch": 2.3025041736227045, "grad_norm": 1.0232107425397512, "learning_rate": 1.558391522539538e-06, "loss": 0.3011, "step": 6896 }, { "epoch": 2.302838063439065, "grad_norm": 1.0032591586200728, "learning_rate": 1.5569826005248734e-06, "loss": 0.2952, "step": 6897 }, { "epoch": 2.3031719532554256, "grad_norm": 0.995611962858238, "learning_rate": 1.5555741982326676e-06, "loss": 0.2994, "step": 6898 }, { "epoch": 2.303505843071786, "grad_norm": 1.028420248360942, "learning_rate": 1.554166315875516e-06, "loss": 0.2973, "step": 6899 }, { "epoch": 2.303839732888147, "grad_norm": 1.0396007201081727, "learning_rate": 1.5527589536659376e-06, "loss": 0.2987, "step": 6900 }, { "epoch": 2.3041736227045075, "grad_norm": 0.9685423117006475, "learning_rate": 1.5513521118163766e-06, "loss": 0.2879, "step": 6901 }, { "epoch": 2.304507512520868, "grad_norm": 0.9313552199436108, "learning_rate": 1.5499457905391935e-06, "loss": 0.284, "step": 6902 }, { "epoch": 2.3048414023372286, "grad_norm": 0.976017848785838, "learning_rate": 1.5485399900466741e-06, "loss": 0.2957, "step": 6903 }, { "epoch": 2.3051752921535895, "grad_norm": 0.9527208901713422, "learning_rate": 1.5471347105510188e-06, "loss": 0.2869, "step": 6904 }, { "epoch": 2.30550918196995, "grad_norm": 1.027956801210855, "learning_rate": 1.5457299522643582e-06, "loss": 0.297, "step": 6905 }, { "epoch": 2.3058430717863105, "grad_norm": 0.9955301560432649, "learning_rate": 1.544325715398739e-06, "loss": 0.2912, "step": 6906 }, { "epoch": 2.306176961602671, "grad_norm": 0.980951865223609, "learning_rate": 1.542922000166131e-06, "loss": 0.2877, "step": 6907 }, { "epoch": 2.3065108514190316, "grad_norm": 1.0134202398956098, "learning_rate": 1.5415188067784237e-06, "loss": 0.3015, "step": 6908 }, { "epoch": 2.306844741235392, "grad_norm": 1.0509333076605452, "learning_rate": 1.5401161354474291e-06, "loss": 0.3145, "step": 6909 }, { "epoch": 2.307178631051753, "grad_norm": 1.0210153110275708, "learning_rate": 1.5387139863848798e-06, "loss": 0.2988, "step": 6910 }, { "epoch": 2.3075125208681135, "grad_norm": 1.0122458619428505, "learning_rate": 1.5373123598024302e-06, "loss": 0.3075, "step": 6911 }, { "epoch": 2.307846410684474, "grad_norm": 0.9976677697767276, "learning_rate": 1.5359112559116536e-06, "loss": 0.2981, "step": 6912 }, { "epoch": 2.3081803005008346, "grad_norm": 1.0339331346784788, "learning_rate": 1.5345106749240507e-06, "loss": 0.3094, "step": 6913 }, { "epoch": 2.3085141903171955, "grad_norm": 1.0418162382914202, "learning_rate": 1.533110617051034e-06, "loss": 0.3076, "step": 6914 }, { "epoch": 2.308848080133556, "grad_norm": 0.9714853204703278, "learning_rate": 1.5317110825039444e-06, "loss": 0.2811, "step": 6915 }, { "epoch": 2.3091819699499165, "grad_norm": 1.0326925077324756, "learning_rate": 1.5303120714940384e-06, "loss": 0.3127, "step": 6916 }, { "epoch": 2.309515859766277, "grad_norm": 1.0344522565988263, "learning_rate": 1.5289135842325004e-06, "loss": 0.299, "step": 6917 }, { "epoch": 2.3098497495826376, "grad_norm": 1.022453312153779, "learning_rate": 1.5275156209304309e-06, "loss": 0.2998, "step": 6918 }, { "epoch": 2.3101836393989985, "grad_norm": 0.9914548896406286, "learning_rate": 1.5261181817988491e-06, "loss": 0.2804, "step": 6919 }, { "epoch": 2.310517529215359, "grad_norm": 1.0172732433711902, "learning_rate": 1.5247212670487e-06, "loss": 0.2957, "step": 6920 }, { "epoch": 2.3108514190317195, "grad_norm": 1.0029243574300162, "learning_rate": 1.5233248768908454e-06, "loss": 0.2871, "step": 6921 }, { "epoch": 2.31118530884808, "grad_norm": 1.0349953821395177, "learning_rate": 1.5219290115360735e-06, "loss": 0.2973, "step": 6922 }, { "epoch": 2.3115191986644406, "grad_norm": 0.9681252662638463, "learning_rate": 1.5205336711950896e-06, "loss": 0.2831, "step": 6923 }, { "epoch": 2.3118530884808015, "grad_norm": 1.0079232831462575, "learning_rate": 1.5191388560785165e-06, "loss": 0.2994, "step": 6924 }, { "epoch": 2.312186978297162, "grad_norm": 1.0332738364224447, "learning_rate": 1.5177445663969014e-06, "loss": 0.2966, "step": 6925 }, { "epoch": 2.3125208681135225, "grad_norm": 1.0129738740859324, "learning_rate": 1.5163508023607154e-06, "loss": 0.2955, "step": 6926 }, { "epoch": 2.312854757929883, "grad_norm": 1.0546563476011237, "learning_rate": 1.5149575641803449e-06, "loss": 0.3039, "step": 6927 }, { "epoch": 2.3131886477462436, "grad_norm": 1.0291544843707296, "learning_rate": 1.5135648520660984e-06, "loss": 0.3031, "step": 6928 }, { "epoch": 2.3135225375626045, "grad_norm": 1.0203799197349712, "learning_rate": 1.5121726662282056e-06, "loss": 0.2942, "step": 6929 }, { "epoch": 2.313856427378965, "grad_norm": 1.043794776523671, "learning_rate": 1.5107810068768169e-06, "loss": 0.307, "step": 6930 }, { "epoch": 2.3141903171953255, "grad_norm": 1.0099155143601726, "learning_rate": 1.5093898742220025e-06, "loss": 0.2856, "step": 6931 }, { "epoch": 2.314524207011686, "grad_norm": 0.9797289078262366, "learning_rate": 1.5079992684737537e-06, "loss": 0.2858, "step": 6932 }, { "epoch": 2.3148580968280466, "grad_norm": 1.0563115729271693, "learning_rate": 1.5066091898419822e-06, "loss": 0.309, "step": 6933 }, { "epoch": 2.3151919866444075, "grad_norm": 0.9464594994454318, "learning_rate": 1.5052196385365192e-06, "loss": 0.2794, "step": 6934 }, { "epoch": 2.315525876460768, "grad_norm": 1.0103250072201415, "learning_rate": 1.5038306147671177e-06, "loss": 0.2941, "step": 6935 }, { "epoch": 2.3158597662771285, "grad_norm": 1.0315848678390036, "learning_rate": 1.5024421187434507e-06, "loss": 0.3003, "step": 6936 }, { "epoch": 2.316193656093489, "grad_norm": 1.0597402964141107, "learning_rate": 1.5010541506751086e-06, "loss": 0.3064, "step": 6937 }, { "epoch": 2.3165275459098496, "grad_norm": 0.9869108389313984, "learning_rate": 1.4996667107716106e-06, "loss": 0.2821, "step": 6938 }, { "epoch": 2.3168614357262105, "grad_norm": 1.0058687553000052, "learning_rate": 1.4982797992423848e-06, "loss": 0.2927, "step": 6939 }, { "epoch": 2.317195325542571, "grad_norm": 1.0469415372405557, "learning_rate": 1.4968934162967875e-06, "loss": 0.301, "step": 6940 }, { "epoch": 2.3175292153589315, "grad_norm": 0.988160484961115, "learning_rate": 1.49550756214409e-06, "loss": 0.2878, "step": 6941 }, { "epoch": 2.317863105175292, "grad_norm": 0.9852604848754745, "learning_rate": 1.494122236993491e-06, "loss": 0.2902, "step": 6942 }, { "epoch": 2.318196994991653, "grad_norm": 1.0514222212534736, "learning_rate": 1.4927374410541028e-06, "loss": 0.3105, "step": 6943 }, { "epoch": 2.3185308848080135, "grad_norm": 1.0651706207782219, "learning_rate": 1.4913531745349612e-06, "loss": 0.3155, "step": 6944 }, { "epoch": 2.318864774624374, "grad_norm": 1.027643100730548, "learning_rate": 1.489969437645017e-06, "loss": 0.3092, "step": 6945 }, { "epoch": 2.3191986644407345, "grad_norm": 1.0103768234531698, "learning_rate": 1.4885862305931486e-06, "loss": 0.2934, "step": 6946 }, { "epoch": 2.319532554257095, "grad_norm": 1.0712259936829744, "learning_rate": 1.4872035535881496e-06, "loss": 0.3026, "step": 6947 }, { "epoch": 2.3198664440734555, "grad_norm": 1.0346286150834378, "learning_rate": 1.4858214068387345e-06, "loss": 0.3143, "step": 6948 }, { "epoch": 2.3202003338898165, "grad_norm": 1.0207415041326506, "learning_rate": 1.4844397905535397e-06, "loss": 0.2917, "step": 6949 }, { "epoch": 2.320534223706177, "grad_norm": 1.0427825088432765, "learning_rate": 1.4830587049411154e-06, "loss": 0.2985, "step": 6950 }, { "epoch": 2.3208681135225375, "grad_norm": 1.0253381048855053, "learning_rate": 1.4816781502099397e-06, "loss": 0.294, "step": 6951 }, { "epoch": 2.321202003338898, "grad_norm": 1.0094325526169312, "learning_rate": 1.4802981265684063e-06, "loss": 0.2971, "step": 6952 }, { "epoch": 2.321535893155259, "grad_norm": 0.9670778289418662, "learning_rate": 1.4789186342248296e-06, "loss": 0.2824, "step": 6953 }, { "epoch": 2.3218697829716195, "grad_norm": 1.0188598836787577, "learning_rate": 1.4775396733874425e-06, "loss": 0.2856, "step": 6954 }, { "epoch": 2.32220367278798, "grad_norm": 1.023201495455853, "learning_rate": 1.4761612442643992e-06, "loss": 0.2951, "step": 6955 }, { "epoch": 2.3225375626043405, "grad_norm": 0.9761895286272341, "learning_rate": 1.4747833470637734e-06, "loss": 0.2864, "step": 6956 }, { "epoch": 2.322871452420701, "grad_norm": 1.0139355031531418, "learning_rate": 1.4734059819935581e-06, "loss": 0.287, "step": 6957 }, { "epoch": 2.3232053422370615, "grad_norm": 1.0257182324297678, "learning_rate": 1.472029149261665e-06, "loss": 0.2978, "step": 6958 }, { "epoch": 2.3235392320534225, "grad_norm": 1.0183266454469937, "learning_rate": 1.4706528490759303e-06, "loss": 0.2982, "step": 6959 }, { "epoch": 2.323873121869783, "grad_norm": 1.0190391869362065, "learning_rate": 1.4692770816441027e-06, "loss": 0.2938, "step": 6960 }, { "epoch": 2.3242070116861435, "grad_norm": 1.024860504246835, "learning_rate": 1.4679018471738543e-06, "loss": 0.2887, "step": 6961 }, { "epoch": 2.324540901502504, "grad_norm": 1.0128031157823902, "learning_rate": 1.4665271458727749e-06, "loss": 0.293, "step": 6962 }, { "epoch": 2.324874791318865, "grad_norm": 1.0322245430682133, "learning_rate": 1.4651529779483791e-06, "loss": 0.2988, "step": 6963 }, { "epoch": 2.3252086811352255, "grad_norm": 1.0111852864840083, "learning_rate": 1.4637793436080972e-06, "loss": 0.2784, "step": 6964 }, { "epoch": 2.325542570951586, "grad_norm": 1.0237676907655286, "learning_rate": 1.4624062430592746e-06, "loss": 0.2965, "step": 6965 }, { "epoch": 2.3258764607679465, "grad_norm": 1.0594862747810452, "learning_rate": 1.4610336765091814e-06, "loss": 0.3072, "step": 6966 }, { "epoch": 2.326210350584307, "grad_norm": 1.029129190281934, "learning_rate": 1.4596616441650092e-06, "loss": 0.303, "step": 6967 }, { "epoch": 2.3265442404006675, "grad_norm": 1.0423803540560324, "learning_rate": 1.4582901462338638e-06, "loss": 0.2973, "step": 6968 }, { "epoch": 2.3268781302170285, "grad_norm": 1.0205829780975086, "learning_rate": 1.456919182922774e-06, "loss": 0.2992, "step": 6969 }, { "epoch": 2.327212020033389, "grad_norm": 1.0202896607477943, "learning_rate": 1.4555487544386825e-06, "loss": 0.2832, "step": 6970 }, { "epoch": 2.3275459098497495, "grad_norm": 1.0793504854741645, "learning_rate": 1.4541788609884588e-06, "loss": 0.3071, "step": 6971 }, { "epoch": 2.32787979966611, "grad_norm": 1.0452909197214435, "learning_rate": 1.4528095027788869e-06, "loss": 0.3066, "step": 6972 }, { "epoch": 2.328213689482471, "grad_norm": 1.0082677693868765, "learning_rate": 1.4514406800166708e-06, "loss": 0.2835, "step": 6973 }, { "epoch": 2.3285475792988315, "grad_norm": 1.0212421329297314, "learning_rate": 1.4500723929084336e-06, "loss": 0.3069, "step": 6974 }, { "epoch": 2.328881469115192, "grad_norm": 1.0392963447672625, "learning_rate": 1.4487046416607187e-06, "loss": 0.2986, "step": 6975 }, { "epoch": 2.3292153589315525, "grad_norm": 1.0441493919545024, "learning_rate": 1.4473374264799866e-06, "loss": 0.2983, "step": 6976 }, { "epoch": 2.329549248747913, "grad_norm": 1.0323705159316952, "learning_rate": 1.4459707475726191e-06, "loss": 0.3017, "step": 6977 }, { "epoch": 2.329883138564274, "grad_norm": 1.0091225756146773, "learning_rate": 1.4446046051449136e-06, "loss": 0.2911, "step": 6978 }, { "epoch": 2.3302170283806345, "grad_norm": 1.0440817821439274, "learning_rate": 1.4432389994030942e-06, "loss": 0.2842, "step": 6979 }, { "epoch": 2.330550918196995, "grad_norm": 1.050791840053477, "learning_rate": 1.4418739305532936e-06, "loss": 0.2995, "step": 6980 }, { "epoch": 2.3308848080133555, "grad_norm": 0.9806120078550052, "learning_rate": 1.44050939880157e-06, "loss": 0.2907, "step": 6981 }, { "epoch": 2.331218697829716, "grad_norm": 0.9814633272813278, "learning_rate": 1.4391454043538993e-06, "loss": 0.2819, "step": 6982 }, { "epoch": 2.331552587646077, "grad_norm": 1.0468827574264823, "learning_rate": 1.4377819474161747e-06, "loss": 0.3068, "step": 6983 }, { "epoch": 2.3318864774624375, "grad_norm": 1.0340637593614355, "learning_rate": 1.4364190281942142e-06, "loss": 0.3042, "step": 6984 }, { "epoch": 2.332220367278798, "grad_norm": 1.0103867550351542, "learning_rate": 1.435056646893745e-06, "loss": 0.2871, "step": 6985 }, { "epoch": 2.3325542570951585, "grad_norm": 1.0257272122893655, "learning_rate": 1.4336948037204195e-06, "loss": 0.3003, "step": 6986 }, { "epoch": 2.332888146911519, "grad_norm": 0.9971857346303683, "learning_rate": 1.4323334988798072e-06, "loss": 0.2938, "step": 6987 }, { "epoch": 2.33322203672788, "grad_norm": 0.9963201987462658, "learning_rate": 1.4309727325773986e-06, "loss": 0.2907, "step": 6988 }, { "epoch": 2.3335559265442405, "grad_norm": 1.0548400103291378, "learning_rate": 1.4296125050186e-06, "loss": 0.2955, "step": 6989 }, { "epoch": 2.333889816360601, "grad_norm": 1.0131848690436178, "learning_rate": 1.4282528164087384e-06, "loss": 0.2918, "step": 6990 }, { "epoch": 2.3342237061769615, "grad_norm": 1.0232886438669089, "learning_rate": 1.426893666953054e-06, "loss": 0.2994, "step": 6991 }, { "epoch": 2.334557595993322, "grad_norm": 0.9798147612420771, "learning_rate": 1.4255350568567156e-06, "loss": 0.274, "step": 6992 }, { "epoch": 2.334891485809683, "grad_norm": 0.9834261046946662, "learning_rate": 1.4241769863248018e-06, "loss": 0.294, "step": 6993 }, { "epoch": 2.3352253756260435, "grad_norm": 0.9880670454763489, "learning_rate": 1.4228194555623137e-06, "loss": 0.2985, "step": 6994 }, { "epoch": 2.335559265442404, "grad_norm": 1.0332269542294854, "learning_rate": 1.4214624647741704e-06, "loss": 0.2999, "step": 6995 }, { "epoch": 2.3358931552587645, "grad_norm": 1.0140151918087206, "learning_rate": 1.420106014165209e-06, "loss": 0.2954, "step": 6996 }, { "epoch": 2.336227045075125, "grad_norm": 1.026983969624807, "learning_rate": 1.4187501039401852e-06, "loss": 0.2878, "step": 6997 }, { "epoch": 2.336560934891486, "grad_norm": 0.9982307829633116, "learning_rate": 1.4173947343037737e-06, "loss": 0.2903, "step": 6998 }, { "epoch": 2.3368948247078465, "grad_norm": 1.0545350629298063, "learning_rate": 1.4160399054605662e-06, "loss": 0.3072, "step": 6999 }, { "epoch": 2.337228714524207, "grad_norm": 1.0445049646191764, "learning_rate": 1.4146856176150747e-06, "loss": 0.3017, "step": 7000 }, { "epoch": 2.3375626043405675, "grad_norm": 1.0005104036836208, "learning_rate": 1.4133318709717286e-06, "loss": 0.292, "step": 7001 }, { "epoch": 2.3378964941569285, "grad_norm": 1.0673128341589404, "learning_rate": 1.4119786657348743e-06, "loss": 0.3048, "step": 7002 }, { "epoch": 2.338230383973289, "grad_norm": 0.9754193738756042, "learning_rate": 1.410626002108778e-06, "loss": 0.282, "step": 7003 }, { "epoch": 2.3385642737896495, "grad_norm": 1.0315042988001435, "learning_rate": 1.4092738802976253e-06, "loss": 0.304, "step": 7004 }, { "epoch": 2.33889816360601, "grad_norm": 1.0571528630170632, "learning_rate": 1.4079223005055197e-06, "loss": 0.3074, "step": 7005 }, { "epoch": 2.3392320534223705, "grad_norm": 0.9864241096251909, "learning_rate": 1.406571262936478e-06, "loss": 0.2959, "step": 7006 }, { "epoch": 2.339565943238731, "grad_norm": 1.011550394886038, "learning_rate": 1.4052207677944396e-06, "loss": 0.2926, "step": 7007 }, { "epoch": 2.339899833055092, "grad_norm": 1.0351303222027233, "learning_rate": 1.4038708152832636e-06, "loss": 0.3125, "step": 7008 }, { "epoch": 2.3402337228714525, "grad_norm": 1.032284586897697, "learning_rate": 1.4025214056067237e-06, "loss": 0.2995, "step": 7009 }, { "epoch": 2.340567612687813, "grad_norm": 1.003982965178435, "learning_rate": 1.401172538968515e-06, "loss": 0.3008, "step": 7010 }, { "epoch": 2.3409015025041735, "grad_norm": 1.0370876133009117, "learning_rate": 1.3998242155722446e-06, "loss": 0.3105, "step": 7011 }, { "epoch": 2.3412353923205345, "grad_norm": 1.019242547404917, "learning_rate": 1.398476435621442e-06, "loss": 0.2984, "step": 7012 }, { "epoch": 2.341569282136895, "grad_norm": 1.0055322418665138, "learning_rate": 1.3971291993195574e-06, "loss": 0.2873, "step": 7013 }, { "epoch": 2.3419031719532555, "grad_norm": 1.0166989296668496, "learning_rate": 1.3957825068699538e-06, "loss": 0.2922, "step": 7014 }, { "epoch": 2.342237061769616, "grad_norm": 1.0539586471780167, "learning_rate": 1.3944363584759157e-06, "loss": 0.2927, "step": 7015 }, { "epoch": 2.3425709515859765, "grad_norm": 1.0352747458520004, "learning_rate": 1.3930907543406391e-06, "loss": 0.2946, "step": 7016 }, { "epoch": 2.342904841402337, "grad_norm": 1.0456463611231934, "learning_rate": 1.3917456946672476e-06, "loss": 0.3008, "step": 7017 }, { "epoch": 2.343238731218698, "grad_norm": 1.0382500350843074, "learning_rate": 1.3904011796587752e-06, "loss": 0.3069, "step": 7018 }, { "epoch": 2.3435726210350585, "grad_norm": 0.9984565323065435, "learning_rate": 1.389057209518177e-06, "loss": 0.2971, "step": 7019 }, { "epoch": 2.343906510851419, "grad_norm": 0.9841011170094467, "learning_rate": 1.387713784448324e-06, "loss": 0.3043, "step": 7020 }, { "epoch": 2.3442404006677795, "grad_norm": 1.0036997496718112, "learning_rate": 1.3863709046520062e-06, "loss": 0.2901, "step": 7021 }, { "epoch": 2.3445742904841405, "grad_norm": 1.0080235283348837, "learning_rate": 1.3850285703319316e-06, "loss": 0.2928, "step": 7022 }, { "epoch": 2.344908180300501, "grad_norm": 1.0547927365405867, "learning_rate": 1.383686781690724e-06, "loss": 0.2919, "step": 7023 }, { "epoch": 2.3452420701168615, "grad_norm": 1.033612664806664, "learning_rate": 1.3823455389309249e-06, "loss": 0.3072, "step": 7024 }, { "epoch": 2.345575959933222, "grad_norm": 1.0181467526265033, "learning_rate": 1.3810048422549992e-06, "loss": 0.2898, "step": 7025 }, { "epoch": 2.3459098497495825, "grad_norm": 1.0395395095646665, "learning_rate": 1.37966469186532e-06, "loss": 0.3005, "step": 7026 }, { "epoch": 2.346243739565943, "grad_norm": 1.0390585387264353, "learning_rate": 1.3783250879641846e-06, "loss": 0.3043, "step": 7027 }, { "epoch": 2.346577629382304, "grad_norm": 1.0369396111515568, "learning_rate": 1.3769860307538035e-06, "loss": 0.3019, "step": 7028 }, { "epoch": 2.3469115191986645, "grad_norm": 1.0023316295364635, "learning_rate": 1.3756475204363107e-06, "loss": 0.2879, "step": 7029 }, { "epoch": 2.347245409015025, "grad_norm": 1.0246896409747461, "learning_rate": 1.3743095572137532e-06, "loss": 0.2936, "step": 7030 }, { "epoch": 2.3475792988313855, "grad_norm": 0.99669302598877, "learning_rate": 1.3729721412880936e-06, "loss": 0.287, "step": 7031 }, { "epoch": 2.3479131886477465, "grad_norm": 1.0018768172691448, "learning_rate": 1.3716352728612148e-06, "loss": 0.2941, "step": 7032 }, { "epoch": 2.348247078464107, "grad_norm": 1.0370955252890695, "learning_rate": 1.370298952134919e-06, "loss": 0.3062, "step": 7033 }, { "epoch": 2.3485809682804675, "grad_norm": 1.0481116911684227, "learning_rate": 1.368963179310922e-06, "loss": 0.3163, "step": 7034 }, { "epoch": 2.348914858096828, "grad_norm": 1.022432745703983, "learning_rate": 1.3676279545908595e-06, "loss": 0.3016, "step": 7035 }, { "epoch": 2.3492487479131885, "grad_norm": 1.0363113118946627, "learning_rate": 1.366293278176281e-06, "loss": 0.3154, "step": 7036 }, { "epoch": 2.349582637729549, "grad_norm": 1.0148287475869946, "learning_rate": 1.3649591502686576e-06, "loss": 0.2929, "step": 7037 }, { "epoch": 2.34991652754591, "grad_norm": 0.9872924879369482, "learning_rate": 1.3636255710693746e-06, "loss": 0.2928, "step": 7038 }, { "epoch": 2.3502504173622705, "grad_norm": 1.0075350891461954, "learning_rate": 1.3622925407797354e-06, "loss": 0.2912, "step": 7039 }, { "epoch": 2.350584307178631, "grad_norm": 1.0090530517959577, "learning_rate": 1.3609600596009608e-06, "loss": 0.295, "step": 7040 }, { "epoch": 2.3509181969949915, "grad_norm": 1.0593966096780925, "learning_rate": 1.3596281277341882e-06, "loss": 0.2953, "step": 7041 }, { "epoch": 2.3512520868113524, "grad_norm": 1.0111950016291658, "learning_rate": 1.3582967453804723e-06, "loss": 0.2961, "step": 7042 }, { "epoch": 2.351585976627713, "grad_norm": 0.9779971465690248, "learning_rate": 1.3569659127407853e-06, "loss": 0.2836, "step": 7043 }, { "epoch": 2.3519198664440735, "grad_norm": 0.9885274424331769, "learning_rate": 1.3556356300160156e-06, "loss": 0.2862, "step": 7044 }, { "epoch": 2.352253756260434, "grad_norm": 1.0092576147916117, "learning_rate": 1.3543058974069688e-06, "loss": 0.2915, "step": 7045 }, { "epoch": 2.3525876460767945, "grad_norm": 1.0091896924373873, "learning_rate": 1.352976715114368e-06, "loss": 0.2819, "step": 7046 }, { "epoch": 2.3529215358931554, "grad_norm": 1.0089606690354962, "learning_rate": 1.351648083338853e-06, "loss": 0.2925, "step": 7047 }, { "epoch": 2.353255425709516, "grad_norm": 1.01689765362674, "learning_rate": 1.3503200022809798e-06, "loss": 0.3055, "step": 7048 }, { "epoch": 2.3535893155258765, "grad_norm": 1.009589559121288, "learning_rate": 1.3489924721412206e-06, "loss": 0.2937, "step": 7049 }, { "epoch": 2.353923205342237, "grad_norm": 1.0271486265228875, "learning_rate": 1.347665493119969e-06, "loss": 0.3022, "step": 7050 }, { "epoch": 2.3542570951585975, "grad_norm": 1.0168236088401081, "learning_rate": 1.3463390654175318e-06, "loss": 0.2985, "step": 7051 }, { "epoch": 2.3545909849749584, "grad_norm": 1.0267216695723445, "learning_rate": 1.34501318923413e-06, "loss": 0.3051, "step": 7052 }, { "epoch": 2.354924874791319, "grad_norm": 1.0132197008650032, "learning_rate": 1.3436878647699042e-06, "loss": 0.288, "step": 7053 }, { "epoch": 2.3552587646076795, "grad_norm": 1.0567964497540137, "learning_rate": 1.3423630922249148e-06, "loss": 0.3004, "step": 7054 }, { "epoch": 2.35559265442404, "grad_norm": 1.0331082244715182, "learning_rate": 1.3410388717991341e-06, "loss": 0.2906, "step": 7055 }, { "epoch": 2.3559265442404005, "grad_norm": 1.0383037916713584, "learning_rate": 1.3397152036924544e-06, "loss": 0.3036, "step": 7056 }, { "epoch": 2.3562604340567614, "grad_norm": 1.0224824064167586, "learning_rate": 1.338392088104678e-06, "loss": 0.2939, "step": 7057 }, { "epoch": 2.356594323873122, "grad_norm": 1.0058349933771922, "learning_rate": 1.3370695252355342e-06, "loss": 0.2877, "step": 7058 }, { "epoch": 2.3569282136894825, "grad_norm": 1.011441427091464, "learning_rate": 1.3357475152846616e-06, "loss": 0.2835, "step": 7059 }, { "epoch": 2.357262103505843, "grad_norm": 0.9869308684464189, "learning_rate": 1.3344260584516172e-06, "loss": 0.2899, "step": 7060 }, { "epoch": 2.3575959933222035, "grad_norm": 0.9918619126691538, "learning_rate": 1.3331051549358743e-06, "loss": 0.2957, "step": 7061 }, { "epoch": 2.3579298831385644, "grad_norm": 1.032741882442344, "learning_rate": 1.3317848049368231e-06, "loss": 0.307, "step": 7062 }, { "epoch": 2.358263772954925, "grad_norm": 1.0562018124376666, "learning_rate": 1.3304650086537707e-06, "loss": 0.3012, "step": 7063 }, { "epoch": 2.3585976627712855, "grad_norm": 1.0277290072074636, "learning_rate": 1.3291457662859385e-06, "loss": 0.2908, "step": 7064 }, { "epoch": 2.358931552587646, "grad_norm": 1.0532275512449059, "learning_rate": 1.327827078032467e-06, "loss": 0.3084, "step": 7065 }, { "epoch": 2.3592654424040065, "grad_norm": 0.9984251782045086, "learning_rate": 1.3265089440924113e-06, "loss": 0.2866, "step": 7066 }, { "epoch": 2.3595993322203674, "grad_norm": 1.0229418893105682, "learning_rate": 1.325191364664743e-06, "loss": 0.3094, "step": 7067 }, { "epoch": 2.359933222036728, "grad_norm": 1.0105939316929806, "learning_rate": 1.3238743399483511e-06, "loss": 0.2849, "step": 7068 }, { "epoch": 2.3602671118530885, "grad_norm": 0.9895133976189897, "learning_rate": 1.3225578701420378e-06, "loss": 0.2988, "step": 7069 }, { "epoch": 2.360601001669449, "grad_norm": 1.0421238597536706, "learning_rate": 1.3212419554445278e-06, "loss": 0.2977, "step": 7070 }, { "epoch": 2.36093489148581, "grad_norm": 1.0509894258432986, "learning_rate": 1.3199265960544566e-06, "loss": 0.3004, "step": 7071 }, { "epoch": 2.3612687813021704, "grad_norm": 0.9866042539011429, "learning_rate": 1.3186117921703762e-06, "loss": 0.2918, "step": 7072 }, { "epoch": 2.361602671118531, "grad_norm": 1.0432018228701994, "learning_rate": 1.3172975439907553e-06, "loss": 0.2985, "step": 7073 }, { "epoch": 2.3619365609348915, "grad_norm": 1.008266217059214, "learning_rate": 1.3159838517139795e-06, "loss": 0.2811, "step": 7074 }, { "epoch": 2.362270450751252, "grad_norm": 1.0239055963727914, "learning_rate": 1.314670715538352e-06, "loss": 0.2882, "step": 7075 }, { "epoch": 2.3626043405676125, "grad_norm": 1.0362885065666008, "learning_rate": 1.3133581356620916e-06, "loss": 0.3055, "step": 7076 }, { "epoch": 2.3629382303839734, "grad_norm": 1.0018453031873717, "learning_rate": 1.3120461122833277e-06, "loss": 0.2941, "step": 7077 }, { "epoch": 2.363272120200334, "grad_norm": 1.0300187324036383, "learning_rate": 1.3107346456001101e-06, "loss": 0.3041, "step": 7078 }, { "epoch": 2.3636060100166945, "grad_norm": 1.0679948122359348, "learning_rate": 1.3094237358104078e-06, "loss": 0.3025, "step": 7079 }, { "epoch": 2.363939899833055, "grad_norm": 1.0363528335869643, "learning_rate": 1.3081133831121001e-06, "loss": 0.3099, "step": 7080 }, { "epoch": 2.364273789649416, "grad_norm": 1.0238240056323593, "learning_rate": 1.306803587702985e-06, "loss": 0.2938, "step": 7081 }, { "epoch": 2.3646076794657764, "grad_norm": 0.9905507053133181, "learning_rate": 1.3054943497807748e-06, "loss": 0.2834, "step": 7082 }, { "epoch": 2.364941569282137, "grad_norm": 1.0354675587377766, "learning_rate": 1.3041856695430999e-06, "loss": 0.2965, "step": 7083 }, { "epoch": 2.3652754590984975, "grad_norm": 1.0528845704490497, "learning_rate": 1.302877547187504e-06, "loss": 0.2987, "step": 7084 }, { "epoch": 2.365609348914858, "grad_norm": 1.0299255129542302, "learning_rate": 1.301569982911448e-06, "loss": 0.297, "step": 7085 }, { "epoch": 2.3659432387312185, "grad_norm": 1.0467054337714015, "learning_rate": 1.3002629769123093e-06, "loss": 0.3057, "step": 7086 }, { "epoch": 2.3662771285475794, "grad_norm": 1.0583294061245667, "learning_rate": 1.298956529387379e-06, "loss": 0.295, "step": 7087 }, { "epoch": 2.36661101836394, "grad_norm": 1.0205834006008112, "learning_rate": 1.2976506405338651e-06, "loss": 0.3013, "step": 7088 }, { "epoch": 2.3669449081803005, "grad_norm": 1.006355784159531, "learning_rate": 1.2963453105488916e-06, "loss": 0.3021, "step": 7089 }, { "epoch": 2.367278797996661, "grad_norm": 1.0382178135907818, "learning_rate": 1.2950405396294957e-06, "loss": 0.2887, "step": 7090 }, { "epoch": 2.367612687813022, "grad_norm": 1.0157823475728538, "learning_rate": 1.2937363279726373e-06, "loss": 0.2894, "step": 7091 }, { "epoch": 2.3679465776293824, "grad_norm": 1.0331327033562194, "learning_rate": 1.2924326757751814e-06, "loss": 0.2976, "step": 7092 }, { "epoch": 2.368280467445743, "grad_norm": 1.0115561070871735, "learning_rate": 1.2911295832339165e-06, "loss": 0.283, "step": 7093 }, { "epoch": 2.3686143572621035, "grad_norm": 1.0310905070616423, "learning_rate": 1.2898270505455417e-06, "loss": 0.2998, "step": 7094 }, { "epoch": 2.368948247078464, "grad_norm": 1.0247484456147262, "learning_rate": 1.288525077906677e-06, "loss": 0.302, "step": 7095 }, { "epoch": 2.3692821368948245, "grad_norm": 1.0009614933153101, "learning_rate": 1.2872236655138537e-06, "loss": 0.2969, "step": 7096 }, { "epoch": 2.3696160267111854, "grad_norm": 1.0047826472838242, "learning_rate": 1.2859228135635205e-06, "loss": 0.2966, "step": 7097 }, { "epoch": 2.369949916527546, "grad_norm": 1.0058587708058742, "learning_rate": 1.2846225222520365e-06, "loss": 0.2929, "step": 7098 }, { "epoch": 2.3702838063439065, "grad_norm": 1.073333168580625, "learning_rate": 1.2833227917756847e-06, "loss": 0.311, "step": 7099 }, { "epoch": 2.370617696160267, "grad_norm": 1.0140527513655597, "learning_rate": 1.282023622330658e-06, "loss": 0.2935, "step": 7100 }, { "epoch": 2.370951585976628, "grad_norm": 1.0110766223027907, "learning_rate": 1.2807250141130651e-06, "loss": 0.2864, "step": 7101 }, { "epoch": 2.3712854757929884, "grad_norm": 1.0281036104785062, "learning_rate": 1.2794269673189302e-06, "loss": 0.2964, "step": 7102 }, { "epoch": 2.371619365609349, "grad_norm": 1.078352447170794, "learning_rate": 1.2781294821441937e-06, "loss": 0.291, "step": 7103 }, { "epoch": 2.3719532554257095, "grad_norm": 0.9919599395246345, "learning_rate": 1.2768325587847107e-06, "loss": 0.296, "step": 7104 }, { "epoch": 2.37228714524207, "grad_norm": 1.0716546139432925, "learning_rate": 1.2755361974362512e-06, "loss": 0.3197, "step": 7105 }, { "epoch": 2.3726210350584305, "grad_norm": 1.0100747514591828, "learning_rate": 1.2742403982945e-06, "loss": 0.2974, "step": 7106 }, { "epoch": 2.3729549248747914, "grad_norm": 1.0244223845944957, "learning_rate": 1.2729451615550587e-06, "loss": 0.2913, "step": 7107 }, { "epoch": 2.373288814691152, "grad_norm": 1.013030031715388, "learning_rate": 1.2716504874134422e-06, "loss": 0.2929, "step": 7108 }, { "epoch": 2.3736227045075124, "grad_norm": 1.0691123677834966, "learning_rate": 1.2703563760650806e-06, "loss": 0.3072, "step": 7109 }, { "epoch": 2.373956594323873, "grad_norm": 1.0108477110123355, "learning_rate": 1.2690628277053207e-06, "loss": 0.286, "step": 7110 }, { "epoch": 2.374290484140234, "grad_norm": 1.0131801858547433, "learning_rate": 1.2677698425294217e-06, "loss": 0.2885, "step": 7111 }, { "epoch": 2.3746243739565944, "grad_norm": 1.0406512158821242, "learning_rate": 1.2664774207325631e-06, "loss": 0.3022, "step": 7112 }, { "epoch": 2.374958263772955, "grad_norm": 1.013854210575489, "learning_rate": 1.265185562509832e-06, "loss": 0.2818, "step": 7113 }, { "epoch": 2.3752921535893154, "grad_norm": 1.0407892173053366, "learning_rate": 1.2638942680562344e-06, "loss": 0.3059, "step": 7114 }, { "epoch": 2.375626043405676, "grad_norm": 1.0657360453843787, "learning_rate": 1.26260353756669e-06, "loss": 0.3084, "step": 7115 }, { "epoch": 2.375959933222037, "grad_norm": 1.0357569008908114, "learning_rate": 1.261313371236037e-06, "loss": 0.2962, "step": 7116 }, { "epoch": 2.3762938230383974, "grad_norm": 1.0266978441255745, "learning_rate": 1.2600237692590256e-06, "loss": 0.2923, "step": 7117 }, { "epoch": 2.376627712854758, "grad_norm": 0.9978348344900116, "learning_rate": 1.2587347318303185e-06, "loss": 0.2879, "step": 7118 }, { "epoch": 2.3769616026711184, "grad_norm": 1.0246137245655147, "learning_rate": 1.257446259144494e-06, "loss": 0.2877, "step": 7119 }, { "epoch": 2.377295492487479, "grad_norm": 1.0560507225279634, "learning_rate": 1.2561583513960518e-06, "loss": 0.3047, "step": 7120 }, { "epoch": 2.37762938230384, "grad_norm": 1.060634028197039, "learning_rate": 1.2548710087793975e-06, "loss": 0.3072, "step": 7121 }, { "epoch": 2.3779632721202004, "grad_norm": 1.0405381065983617, "learning_rate": 1.2535842314888579e-06, "loss": 0.3011, "step": 7122 }, { "epoch": 2.378297161936561, "grad_norm": 1.0113435407729972, "learning_rate": 1.2522980197186674e-06, "loss": 0.2957, "step": 7123 }, { "epoch": 2.3786310517529214, "grad_norm": 1.0603851793675476, "learning_rate": 1.2510123736629825e-06, "loss": 0.313, "step": 7124 }, { "epoch": 2.378964941569282, "grad_norm": 1.0144101988959104, "learning_rate": 1.249727293515871e-06, "loss": 0.2968, "step": 7125 }, { "epoch": 2.379298831385643, "grad_norm": 1.0388585140097502, "learning_rate": 1.2484427794713144e-06, "loss": 0.3047, "step": 7126 }, { "epoch": 2.3796327212020034, "grad_norm": 1.0478194472395383, "learning_rate": 1.2471588317232102e-06, "loss": 0.3003, "step": 7127 }, { "epoch": 2.379966611018364, "grad_norm": 1.0144466205834857, "learning_rate": 1.2458754504653691e-06, "loss": 0.2936, "step": 7128 }, { "epoch": 2.3803005008347244, "grad_norm": 1.0349928319387451, "learning_rate": 1.2445926358915182e-06, "loss": 0.302, "step": 7129 }, { "epoch": 2.380634390651085, "grad_norm": 0.9836985766221924, "learning_rate": 1.2433103881952979e-06, "loss": 0.2822, "step": 7130 }, { "epoch": 2.380968280467446, "grad_norm": 1.0159428239638806, "learning_rate": 1.2420287075702609e-06, "loss": 0.2991, "step": 7131 }, { "epoch": 2.3813021702838064, "grad_norm": 0.9835253239023483, "learning_rate": 1.2407475942098817e-06, "loss": 0.282, "step": 7132 }, { "epoch": 2.381636060100167, "grad_norm": 1.019031653234652, "learning_rate": 1.2394670483075389e-06, "loss": 0.312, "step": 7133 }, { "epoch": 2.3819699499165274, "grad_norm": 1.0055694170215856, "learning_rate": 1.2381870700565324e-06, "loss": 0.2921, "step": 7134 }, { "epoch": 2.382303839732888, "grad_norm": 0.999949648855721, "learning_rate": 1.236907659650074e-06, "loss": 0.2802, "step": 7135 }, { "epoch": 2.382637729549249, "grad_norm": 0.9716891373212939, "learning_rate": 1.2356288172812898e-06, "loss": 0.2929, "step": 7136 }, { "epoch": 2.3829716193656094, "grad_norm": 1.05960288681483, "learning_rate": 1.234350543143224e-06, "loss": 0.3029, "step": 7137 }, { "epoch": 2.38330550918197, "grad_norm": 1.0116944525500604, "learning_rate": 1.2330728374288281e-06, "loss": 0.2922, "step": 7138 }, { "epoch": 2.3836393989983304, "grad_norm": 1.0284745483144002, "learning_rate": 1.2317957003309727e-06, "loss": 0.2954, "step": 7139 }, { "epoch": 2.3839732888146914, "grad_norm": 0.9947680234353183, "learning_rate": 1.2305191320424398e-06, "loss": 0.2854, "step": 7140 }, { "epoch": 2.384307178631052, "grad_norm": 1.0257548418472255, "learning_rate": 1.22924313275593e-06, "loss": 0.2901, "step": 7141 }, { "epoch": 2.3846410684474124, "grad_norm": 1.0076657290304776, "learning_rate": 1.227967702664053e-06, "loss": 0.2827, "step": 7142 }, { "epoch": 2.384974958263773, "grad_norm": 1.0200120563682262, "learning_rate": 1.226692841959337e-06, "loss": 0.2849, "step": 7143 }, { "epoch": 2.3853088480801334, "grad_norm": 0.9856246082673485, "learning_rate": 1.2254185508342164e-06, "loss": 0.2757, "step": 7144 }, { "epoch": 2.385642737896494, "grad_norm": 1.0127706586181624, "learning_rate": 1.2241448294810505e-06, "loss": 0.2876, "step": 7145 }, { "epoch": 2.385976627712855, "grad_norm": 0.9872197713486176, "learning_rate": 1.2228716780921046e-06, "loss": 0.2917, "step": 7146 }, { "epoch": 2.3863105175292154, "grad_norm": 1.0251650011661153, "learning_rate": 1.2215990968595614e-06, "loss": 0.2852, "step": 7147 }, { "epoch": 2.386644407345576, "grad_norm": 1.0272150894705698, "learning_rate": 1.2203270859755163e-06, "loss": 0.3017, "step": 7148 }, { "epoch": 2.3869782971619364, "grad_norm": 1.0742217109369516, "learning_rate": 1.2190556456319792e-06, "loss": 0.3108, "step": 7149 }, { "epoch": 2.3873121869782974, "grad_norm": 0.9810444859801363, "learning_rate": 1.2177847760208733e-06, "loss": 0.2859, "step": 7150 }, { "epoch": 2.387646076794658, "grad_norm": 1.018917333830258, "learning_rate": 1.216514477334036e-06, "loss": 0.287, "step": 7151 }, { "epoch": 2.3879799666110184, "grad_norm": 1.042466215249886, "learning_rate": 1.215244749763219e-06, "loss": 0.2949, "step": 7152 }, { "epoch": 2.388313856427379, "grad_norm": 0.9988682482545747, "learning_rate": 1.2139755935000868e-06, "loss": 0.2835, "step": 7153 }, { "epoch": 2.3886477462437394, "grad_norm": 0.9998172292984464, "learning_rate": 1.2127070087362187e-06, "loss": 0.2849, "step": 7154 }, { "epoch": 2.3889816360601, "grad_norm": 1.006607172496108, "learning_rate": 1.2114389956631062e-06, "loss": 0.2854, "step": 7155 }, { "epoch": 2.389315525876461, "grad_norm": 1.033451958573905, "learning_rate": 1.2101715544721555e-06, "loss": 0.3102, "step": 7156 }, { "epoch": 2.3896494156928214, "grad_norm": 1.0531141735871727, "learning_rate": 1.2089046853546876e-06, "loss": 0.3055, "step": 7157 }, { "epoch": 2.389983305509182, "grad_norm": 1.0374977839513109, "learning_rate": 1.2076383885019378e-06, "loss": 0.2927, "step": 7158 }, { "epoch": 2.3903171953255424, "grad_norm": 0.9828360188172279, "learning_rate": 1.2063726641050488e-06, "loss": 0.2876, "step": 7159 }, { "epoch": 2.3906510851419034, "grad_norm": 0.9779932136473238, "learning_rate": 1.2051075123550826e-06, "loss": 0.2829, "step": 7160 }, { "epoch": 2.390984974958264, "grad_norm": 1.0025754270910527, "learning_rate": 1.2038429334430158e-06, "loss": 0.2931, "step": 7161 }, { "epoch": 2.3913188647746244, "grad_norm": 1.0101649668493196, "learning_rate": 1.2025789275597344e-06, "loss": 0.2841, "step": 7162 }, { "epoch": 2.391652754590985, "grad_norm": 1.086908732868904, "learning_rate": 1.201315494896042e-06, "loss": 0.3096, "step": 7163 }, { "epoch": 2.3919866444073454, "grad_norm": 0.9799048327875909, "learning_rate": 1.2000526356426483e-06, "loss": 0.2798, "step": 7164 }, { "epoch": 2.392320534223706, "grad_norm": 1.053277467085049, "learning_rate": 1.1987903499901859e-06, "loss": 0.3067, "step": 7165 }, { "epoch": 2.392654424040067, "grad_norm": 1.0776480670439204, "learning_rate": 1.1975286381291957e-06, "loss": 0.3078, "step": 7166 }, { "epoch": 2.3929883138564274, "grad_norm": 1.0008383933974694, "learning_rate": 1.1962675002501324e-06, "loss": 0.2922, "step": 7167 }, { "epoch": 2.393322203672788, "grad_norm": 1.0624659838834847, "learning_rate": 1.1950069365433652e-06, "loss": 0.3014, "step": 7168 }, { "epoch": 2.3936560934891484, "grad_norm": 1.0834803393726486, "learning_rate": 1.1937469471991724e-06, "loss": 0.31, "step": 7169 }, { "epoch": 2.3939899833055094, "grad_norm": 1.0086493922444821, "learning_rate": 1.1924875324077529e-06, "loss": 0.29, "step": 7170 }, { "epoch": 2.39432387312187, "grad_norm": 0.9982797531935065, "learning_rate": 1.1912286923592138e-06, "loss": 0.2772, "step": 7171 }, { "epoch": 2.3946577629382304, "grad_norm": 0.9875824025358658, "learning_rate": 1.1899704272435758e-06, "loss": 0.2774, "step": 7172 }, { "epoch": 2.394991652754591, "grad_norm": 1.0671253675548373, "learning_rate": 1.1887127372507746e-06, "loss": 0.3011, "step": 7173 }, { "epoch": 2.3953255425709514, "grad_norm": 1.0662119780133323, "learning_rate": 1.187455622570658e-06, "loss": 0.3105, "step": 7174 }, { "epoch": 2.395659432387312, "grad_norm": 1.0208535169874795, "learning_rate": 1.1861990833929866e-06, "loss": 0.3013, "step": 7175 }, { "epoch": 2.395993322203673, "grad_norm": 1.0381444794632715, "learning_rate": 1.1849431199074352e-06, "loss": 0.2991, "step": 7176 }, { "epoch": 2.3963272120200334, "grad_norm": 1.0443765506796538, "learning_rate": 1.1836877323035895e-06, "loss": 0.2958, "step": 7177 }, { "epoch": 2.396661101836394, "grad_norm": 1.0190548679051032, "learning_rate": 1.182432920770954e-06, "loss": 0.2888, "step": 7178 }, { "epoch": 2.3969949916527544, "grad_norm": 1.0140912794461148, "learning_rate": 1.1811786854989377e-06, "loss": 0.301, "step": 7179 }, { "epoch": 2.3973288814691154, "grad_norm": 1.041533669651129, "learning_rate": 1.1799250266768685e-06, "loss": 0.2946, "step": 7180 }, { "epoch": 2.397662771285476, "grad_norm": 0.9999762106752265, "learning_rate": 1.1786719444939853e-06, "loss": 0.2823, "step": 7181 }, { "epoch": 2.3979966611018364, "grad_norm": 1.0892285064318765, "learning_rate": 1.1774194391394422e-06, "loss": 0.3053, "step": 7182 }, { "epoch": 2.398330550918197, "grad_norm": 1.1074005691732538, "learning_rate": 1.176167510802305e-06, "loss": 0.3031, "step": 7183 }, { "epoch": 2.3986644407345574, "grad_norm": 1.057848012378002, "learning_rate": 1.1749161596715491e-06, "loss": 0.2952, "step": 7184 }, { "epoch": 2.3989983305509184, "grad_norm": 1.008968775759639, "learning_rate": 1.1736653859360652e-06, "loss": 0.2929, "step": 7185 }, { "epoch": 2.399332220367279, "grad_norm": 1.0287239699131736, "learning_rate": 1.1724151897846608e-06, "loss": 0.2992, "step": 7186 }, { "epoch": 2.3996661101836394, "grad_norm": 1.0156177889886482, "learning_rate": 1.1711655714060505e-06, "loss": 0.2967, "step": 7187 }, { "epoch": 2.4, "grad_norm": 1.0566373284015245, "learning_rate": 1.1699165309888644e-06, "loss": 0.3026, "step": 7188 }, { "epoch": 2.4003338898163604, "grad_norm": 1.0250395193124402, "learning_rate": 1.168668068721644e-06, "loss": 0.2901, "step": 7189 }, { "epoch": 2.4006677796327214, "grad_norm": 1.0449032241662382, "learning_rate": 1.167420184792845e-06, "loss": 0.3068, "step": 7190 }, { "epoch": 2.401001669449082, "grad_norm": 1.0345907399934853, "learning_rate": 1.166172879390835e-06, "loss": 0.2994, "step": 7191 }, { "epoch": 2.4013355592654424, "grad_norm": 1.0702421212738338, "learning_rate": 1.1649261527038946e-06, "loss": 0.312, "step": 7192 }, { "epoch": 2.401669449081803, "grad_norm": 1.045149853612451, "learning_rate": 1.1636800049202162e-06, "loss": 0.2998, "step": 7193 }, { "epoch": 2.4020033388981634, "grad_norm": 1.0355430355908217, "learning_rate": 1.1624344362279055e-06, "loss": 0.3059, "step": 7194 }, { "epoch": 2.4023372287145244, "grad_norm": 1.0363596974345834, "learning_rate": 1.1611894468149815e-06, "loss": 0.2987, "step": 7195 }, { "epoch": 2.402671118530885, "grad_norm": 1.0141531637702483, "learning_rate": 1.1599450368693739e-06, "loss": 0.2911, "step": 7196 }, { "epoch": 2.4030050083472454, "grad_norm": 1.0319518329032675, "learning_rate": 1.158701206578927e-06, "loss": 0.3119, "step": 7197 }, { "epoch": 2.403338898163606, "grad_norm": 1.0137388804237453, "learning_rate": 1.1574579561313959e-06, "loss": 0.2938, "step": 7198 }, { "epoch": 2.4036727879799664, "grad_norm": 1.0571819911711806, "learning_rate": 1.1562152857144493e-06, "loss": 0.3086, "step": 7199 }, { "epoch": 2.4040066777963274, "grad_norm": 1.0537669999297503, "learning_rate": 1.154973195515668e-06, "loss": 0.2972, "step": 7200 }, { "epoch": 2.404340567612688, "grad_norm": 1.0409525841313843, "learning_rate": 1.153731685722545e-06, "loss": 0.2957, "step": 7201 }, { "epoch": 2.4046744574290484, "grad_norm": 1.0513195797585613, "learning_rate": 1.1524907565224841e-06, "loss": 0.2925, "step": 7202 }, { "epoch": 2.405008347245409, "grad_norm": 1.006847087307969, "learning_rate": 1.1512504081028065e-06, "loss": 0.288, "step": 7203 }, { "epoch": 2.4053422370617694, "grad_norm": 1.0150316089902514, "learning_rate": 1.1500106406507416e-06, "loss": 0.3005, "step": 7204 }, { "epoch": 2.4056761268781304, "grad_norm": 1.0147535494376787, "learning_rate": 1.1487714543534305e-06, "loss": 0.3025, "step": 7205 }, { "epoch": 2.406010016694491, "grad_norm": 1.0132252813800378, "learning_rate": 1.147532849397927e-06, "loss": 0.2967, "step": 7206 }, { "epoch": 2.4063439065108514, "grad_norm": 1.0266460169775729, "learning_rate": 1.1462948259712015e-06, "loss": 0.2942, "step": 7207 }, { "epoch": 2.406677796327212, "grad_norm": 1.0269015071561367, "learning_rate": 1.1450573842601309e-06, "loss": 0.2864, "step": 7208 }, { "epoch": 2.407011686143573, "grad_norm": 1.0359027936022827, "learning_rate": 1.1438205244515093e-06, "loss": 0.2972, "step": 7209 }, { "epoch": 2.4073455759599334, "grad_norm": 1.008114176634516, "learning_rate": 1.1425842467320352e-06, "loss": 0.2899, "step": 7210 }, { "epoch": 2.407679465776294, "grad_norm": 1.0257999883529445, "learning_rate": 1.1413485512883294e-06, "loss": 0.2862, "step": 7211 }, { "epoch": 2.4080133555926544, "grad_norm": 0.985606840794364, "learning_rate": 1.1401134383069172e-06, "loss": 0.2763, "step": 7212 }, { "epoch": 2.408347245409015, "grad_norm": 1.025305461086894, "learning_rate": 1.1388789079742396e-06, "loss": 0.2919, "step": 7213 }, { "epoch": 2.4086811352253754, "grad_norm": 1.0186162698117975, "learning_rate": 1.1376449604766483e-06, "loss": 0.2784, "step": 7214 }, { "epoch": 2.4090150250417364, "grad_norm": 1.05464564336414, "learning_rate": 1.1364115960004068e-06, "loss": 0.3051, "step": 7215 }, { "epoch": 2.409348914858097, "grad_norm": 1.093885822032371, "learning_rate": 1.1351788147316916e-06, "loss": 0.3082, "step": 7216 }, { "epoch": 2.4096828046744574, "grad_norm": 1.024935710461584, "learning_rate": 1.133946616856591e-06, "loss": 0.2865, "step": 7217 }, { "epoch": 2.410016694490818, "grad_norm": 1.0247799169593, "learning_rate": 1.1327150025611045e-06, "loss": 0.2904, "step": 7218 }, { "epoch": 2.410350584307179, "grad_norm": 1.028401669182662, "learning_rate": 1.1314839720311433e-06, "loss": 0.3, "step": 7219 }, { "epoch": 2.4106844741235394, "grad_norm": 1.0034907065315009, "learning_rate": 1.130253525452532e-06, "loss": 0.2925, "step": 7220 }, { "epoch": 2.4110183639399, "grad_norm": 1.0181203061555584, "learning_rate": 1.1290236630110058e-06, "loss": 0.3034, "step": 7221 }, { "epoch": 2.4113522537562604, "grad_norm": 1.0080547275459888, "learning_rate": 1.1277943848922107e-06, "loss": 0.288, "step": 7222 }, { "epoch": 2.411686143572621, "grad_norm": 1.032419105729212, "learning_rate": 1.126565691281709e-06, "loss": 0.2962, "step": 7223 }, { "epoch": 2.4120200333889814, "grad_norm": 1.0449922721243599, "learning_rate": 1.1253375823649708e-06, "loss": 0.3091, "step": 7224 }, { "epoch": 2.4123539232053424, "grad_norm": 1.0019331828497129, "learning_rate": 1.1241100583273768e-06, "loss": 0.2865, "step": 7225 }, { "epoch": 2.412687813021703, "grad_norm": 1.0812858178120257, "learning_rate": 1.1228831193542228e-06, "loss": 0.318, "step": 7226 }, { "epoch": 2.4130217028380634, "grad_norm": 1.0254461360765117, "learning_rate": 1.1216567656307126e-06, "loss": 0.2918, "step": 7227 }, { "epoch": 2.413355592654424, "grad_norm": 1.076849320151403, "learning_rate": 1.1204309973419675e-06, "loss": 0.2988, "step": 7228 }, { "epoch": 2.413689482470785, "grad_norm": 1.0503233636053333, "learning_rate": 1.1192058146730167e-06, "loss": 0.2952, "step": 7229 }, { "epoch": 2.4140233722871454, "grad_norm": 1.0165162227828828, "learning_rate": 1.117981217808798e-06, "loss": 0.2782, "step": 7230 }, { "epoch": 2.414357262103506, "grad_norm": 1.035841072594066, "learning_rate": 1.1167572069341647e-06, "loss": 0.2926, "step": 7231 }, { "epoch": 2.4146911519198664, "grad_norm": 1.0194501154803794, "learning_rate": 1.115533782233883e-06, "loss": 0.2946, "step": 7232 }, { "epoch": 2.415025041736227, "grad_norm": 0.9940137373320256, "learning_rate": 1.1143109438926275e-06, "loss": 0.2955, "step": 7233 }, { "epoch": 2.4153589315525874, "grad_norm": 1.0482041727786144, "learning_rate": 1.1130886920949846e-06, "loss": 0.3004, "step": 7234 }, { "epoch": 2.4156928213689484, "grad_norm": 1.0499625194288622, "learning_rate": 1.111867027025454e-06, "loss": 0.2887, "step": 7235 }, { "epoch": 2.416026711185309, "grad_norm": 0.98758718702501, "learning_rate": 1.1106459488684445e-06, "loss": 0.2808, "step": 7236 }, { "epoch": 2.4163606010016694, "grad_norm": 1.0176438756732578, "learning_rate": 1.109425457808278e-06, "loss": 0.2976, "step": 7237 }, { "epoch": 2.41669449081803, "grad_norm": 1.0220704586856848, "learning_rate": 1.1082055540291876e-06, "loss": 0.2874, "step": 7238 }, { "epoch": 2.417028380634391, "grad_norm": 1.022560622989897, "learning_rate": 1.1069862377153173e-06, "loss": 0.2968, "step": 7239 }, { "epoch": 2.4173622704507514, "grad_norm": 1.0523547454326108, "learning_rate": 1.1057675090507219e-06, "loss": 0.2903, "step": 7240 }, { "epoch": 2.417696160267112, "grad_norm": 1.081522705542665, "learning_rate": 1.1045493682193687e-06, "loss": 0.3111, "step": 7241 }, { "epoch": 2.4180300500834724, "grad_norm": 0.9849717222117045, "learning_rate": 1.1033318154051358e-06, "loss": 0.288, "step": 7242 }, { "epoch": 2.418363939899833, "grad_norm": 1.0260311753044074, "learning_rate": 1.1021148507918105e-06, "loss": 0.2873, "step": 7243 }, { "epoch": 2.418697829716194, "grad_norm": 1.0566954947678717, "learning_rate": 1.1008984745630979e-06, "loss": 0.3006, "step": 7244 }, { "epoch": 2.4190317195325544, "grad_norm": 1.016587528294866, "learning_rate": 1.099682686902605e-06, "loss": 0.2877, "step": 7245 }, { "epoch": 2.419365609348915, "grad_norm": 1.030889898849633, "learning_rate": 1.0984674879938566e-06, "loss": 0.2819, "step": 7246 }, { "epoch": 2.4196994991652754, "grad_norm": 1.042601634917814, "learning_rate": 1.0972528780202852e-06, "loss": 0.3004, "step": 7247 }, { "epoch": 2.420033388981636, "grad_norm": 1.060648543344774, "learning_rate": 1.0960388571652375e-06, "loss": 0.3077, "step": 7248 }, { "epoch": 2.420367278797997, "grad_norm": 1.1354074927070092, "learning_rate": 1.0948254256119695e-06, "loss": 0.306, "step": 7249 }, { "epoch": 2.4207011686143574, "grad_norm": 1.097889682779125, "learning_rate": 1.093612583543649e-06, "loss": 0.2964, "step": 7250 }, { "epoch": 2.421035058430718, "grad_norm": 1.017360430632109, "learning_rate": 1.0924003311433501e-06, "loss": 0.2902, "step": 7251 }, { "epoch": 2.4213689482470784, "grad_norm": 1.009140445972597, "learning_rate": 1.0911886685940666e-06, "loss": 0.29, "step": 7252 }, { "epoch": 2.421702838063439, "grad_norm": 1.0201424189693338, "learning_rate": 1.0899775960786968e-06, "loss": 0.2862, "step": 7253 }, { "epoch": 2.4220367278798, "grad_norm": 1.0094432076165767, "learning_rate": 1.0887671137800516e-06, "loss": 0.2972, "step": 7254 }, { "epoch": 2.4223706176961604, "grad_norm": 1.0315393162277697, "learning_rate": 1.0875572218808528e-06, "loss": 0.2853, "step": 7255 }, { "epoch": 2.422704507512521, "grad_norm": 1.0357562386457255, "learning_rate": 1.086347920563734e-06, "loss": 0.2824, "step": 7256 }, { "epoch": 2.4230383973288814, "grad_norm": 1.0493620484828778, "learning_rate": 1.085139210011238e-06, "loss": 0.3091, "step": 7257 }, { "epoch": 2.423372287145242, "grad_norm": 1.0122478105015762, "learning_rate": 1.0839310904058199e-06, "loss": 0.2891, "step": 7258 }, { "epoch": 2.423706176961603, "grad_norm": 1.0652774861880911, "learning_rate": 1.0827235619298449e-06, "loss": 0.2992, "step": 7259 }, { "epoch": 2.4240400667779634, "grad_norm": 0.9953200007638652, "learning_rate": 1.081516624765589e-06, "loss": 0.2888, "step": 7260 }, { "epoch": 2.424373956594324, "grad_norm": 1.0376501693077818, "learning_rate": 1.0803102790952387e-06, "loss": 0.2875, "step": 7261 }, { "epoch": 2.4247078464106844, "grad_norm": 0.998364883732308, "learning_rate": 1.079104525100892e-06, "loss": 0.2839, "step": 7262 }, { "epoch": 2.425041736227045, "grad_norm": 1.0132480836631086, "learning_rate": 1.0778993629645568e-06, "loss": 0.2842, "step": 7263 }, { "epoch": 2.425375626043406, "grad_norm": 1.0360662117453958, "learning_rate": 1.076694792868152e-06, "loss": 0.2937, "step": 7264 }, { "epoch": 2.4257095158597664, "grad_norm": 0.9996059779579555, "learning_rate": 1.0754908149935072e-06, "loss": 0.2863, "step": 7265 }, { "epoch": 2.426043405676127, "grad_norm": 1.0574068079631267, "learning_rate": 1.0742874295223627e-06, "loss": 0.3081, "step": 7266 }, { "epoch": 2.4263772954924874, "grad_norm": 0.9961956625604754, "learning_rate": 1.073084636636369e-06, "loss": 0.2904, "step": 7267 }, { "epoch": 2.4267111853088483, "grad_norm": 1.0990383366797385, "learning_rate": 1.0718824365170855e-06, "loss": 0.3102, "step": 7268 }, { "epoch": 2.427045075125209, "grad_norm": 1.0069371851870577, "learning_rate": 1.0706808293459875e-06, "loss": 0.2821, "step": 7269 }, { "epoch": 2.4273789649415694, "grad_norm": 1.013677293277814, "learning_rate": 1.0694798153044567e-06, "loss": 0.2905, "step": 7270 }, { "epoch": 2.42771285475793, "grad_norm": 1.0455667737572591, "learning_rate": 1.0682793945737835e-06, "loss": 0.3001, "step": 7271 }, { "epoch": 2.4280467445742904, "grad_norm": 1.0513590164431517, "learning_rate": 1.0670795673351703e-06, "loss": 0.2964, "step": 7272 }, { "epoch": 2.428380634390651, "grad_norm": 1.0621158003355287, "learning_rate": 1.0658803337697344e-06, "loss": 0.2918, "step": 7273 }, { "epoch": 2.428714524207012, "grad_norm": 1.1011805266154366, "learning_rate": 1.0646816940584976e-06, "loss": 0.289, "step": 7274 }, { "epoch": 2.4290484140233723, "grad_norm": 1.0147435844901387, "learning_rate": 1.0634836483823956e-06, "loss": 0.2921, "step": 7275 }, { "epoch": 2.429382303839733, "grad_norm": 1.050366057183726, "learning_rate": 1.0622861969222693e-06, "loss": 0.2963, "step": 7276 }, { "epoch": 2.4297161936560934, "grad_norm": 1.0246027416571837, "learning_rate": 1.061089339858878e-06, "loss": 0.2896, "step": 7277 }, { "epoch": 2.4300500834724543, "grad_norm": 1.0434331846848786, "learning_rate": 1.0598930773728849e-06, "loss": 0.289, "step": 7278 }, { "epoch": 2.430383973288815, "grad_norm": 1.0358551517668373, "learning_rate": 1.0586974096448654e-06, "loss": 0.2974, "step": 7279 }, { "epoch": 2.4307178631051753, "grad_norm": 1.0540929700950046, "learning_rate": 1.0575023368553062e-06, "loss": 0.303, "step": 7280 }, { "epoch": 2.431051752921536, "grad_norm": 1.0421875093235649, "learning_rate": 1.0563078591846021e-06, "loss": 0.2962, "step": 7281 }, { "epoch": 2.4313856427378964, "grad_norm": 0.9972560596162149, "learning_rate": 1.0551139768130597e-06, "loss": 0.2798, "step": 7282 }, { "epoch": 2.431719532554257, "grad_norm": 1.024249938821832, "learning_rate": 1.0539206899208955e-06, "loss": 0.2915, "step": 7283 }, { "epoch": 2.432053422370618, "grad_norm": 1.005303410811709, "learning_rate": 1.0527279986882333e-06, "loss": 0.2953, "step": 7284 }, { "epoch": 2.4323873121869783, "grad_norm": 1.0432921304043814, "learning_rate": 1.0515359032951145e-06, "loss": 0.2926, "step": 7285 }, { "epoch": 2.432721202003339, "grad_norm": 1.0059047821530662, "learning_rate": 1.0503444039214816e-06, "loss": 0.2895, "step": 7286 }, { "epoch": 2.4330550918196994, "grad_norm": 1.0423743656404918, "learning_rate": 1.0491535007471914e-06, "loss": 0.3037, "step": 7287 }, { "epoch": 2.4333889816360603, "grad_norm": 1.0311732222286425, "learning_rate": 1.0479631939520118e-06, "loss": 0.2859, "step": 7288 }, { "epoch": 2.433722871452421, "grad_norm": 1.0080589343747204, "learning_rate": 1.0467734837156168e-06, "loss": 0.2871, "step": 7289 }, { "epoch": 2.4340567612687813, "grad_norm": 1.045337549425462, "learning_rate": 1.045584370217597e-06, "loss": 0.3, "step": 7290 }, { "epoch": 2.434390651085142, "grad_norm": 1.0242421352239832, "learning_rate": 1.0443958536374454e-06, "loss": 0.3033, "step": 7291 }, { "epoch": 2.4347245409015024, "grad_norm": 1.013568763869641, "learning_rate": 1.0432079341545691e-06, "loss": 0.2981, "step": 7292 }, { "epoch": 2.435058430717863, "grad_norm": 0.9969584558702402, "learning_rate": 1.0420206119482829e-06, "loss": 0.2924, "step": 7293 }, { "epoch": 2.435392320534224, "grad_norm": 1.017430920902796, "learning_rate": 1.0408338871978146e-06, "loss": 0.3077, "step": 7294 }, { "epoch": 2.4357262103505843, "grad_norm": 1.0175130040729272, "learning_rate": 1.0396477600823003e-06, "loss": 0.2847, "step": 7295 }, { "epoch": 2.436060100166945, "grad_norm": 1.0534467119873445, "learning_rate": 1.0384622307807857e-06, "loss": 0.3136, "step": 7296 }, { "epoch": 2.4363939899833054, "grad_norm": 0.9854591262817628, "learning_rate": 1.0372772994722219e-06, "loss": 0.2974, "step": 7297 }, { "epoch": 2.4367278797996663, "grad_norm": 1.0151135402944658, "learning_rate": 1.0360929663354784e-06, "loss": 0.2882, "step": 7298 }, { "epoch": 2.437061769616027, "grad_norm": 1.0510143840790247, "learning_rate": 1.0349092315493292e-06, "loss": 0.2987, "step": 7299 }, { "epoch": 2.4373956594323873, "grad_norm": 1.0573561033588412, "learning_rate": 1.0337260952924577e-06, "loss": 0.3018, "step": 7300 }, { "epoch": 2.437729549248748, "grad_norm": 1.0377993132570176, "learning_rate": 1.0325435577434584e-06, "loss": 0.2996, "step": 7301 }, { "epoch": 2.4380634390651084, "grad_norm": 1.0070124491391632, "learning_rate": 1.0313616190808345e-06, "loss": 0.2781, "step": 7302 }, { "epoch": 2.438397328881469, "grad_norm": 1.0305889329843005, "learning_rate": 1.0301802794830002e-06, "loss": 0.2994, "step": 7303 }, { "epoch": 2.43873121869783, "grad_norm": 1.0119165226210876, "learning_rate": 1.0289995391282782e-06, "loss": 0.2961, "step": 7304 }, { "epoch": 2.4390651085141903, "grad_norm": 0.9918591653608012, "learning_rate": 1.0278193981948997e-06, "loss": 0.2915, "step": 7305 }, { "epoch": 2.439398998330551, "grad_norm": 1.0161856886926846, "learning_rate": 1.0266398568610081e-06, "loss": 0.2967, "step": 7306 }, { "epoch": 2.4397328881469114, "grad_norm": 0.9912491295190737, "learning_rate": 1.0254609153046546e-06, "loss": 0.2902, "step": 7307 }, { "epoch": 2.4400667779632723, "grad_norm": 1.0463234368218028, "learning_rate": 1.0242825737037993e-06, "loss": 0.3021, "step": 7308 }, { "epoch": 2.440400667779633, "grad_norm": 1.0392967802584727, "learning_rate": 1.0231048322363113e-06, "loss": 0.2977, "step": 7309 }, { "epoch": 2.4407345575959933, "grad_norm": 0.9826462649865689, "learning_rate": 1.0219276910799752e-06, "loss": 0.2754, "step": 7310 }, { "epoch": 2.441068447412354, "grad_norm": 0.9961216024921247, "learning_rate": 1.020751150412475e-06, "loss": 0.2855, "step": 7311 }, { "epoch": 2.4414023372287144, "grad_norm": 1.0740846167804372, "learning_rate": 1.0195752104114115e-06, "loss": 0.2983, "step": 7312 }, { "epoch": 2.4417362270450753, "grad_norm": 1.0041282323471115, "learning_rate": 1.0183998712542909e-06, "loss": 0.2742, "step": 7313 }, { "epoch": 2.442070116861436, "grad_norm": 1.0077065638653082, "learning_rate": 1.0172251331185323e-06, "loss": 0.2882, "step": 7314 }, { "epoch": 2.4424040066777963, "grad_norm": 1.0191019736757971, "learning_rate": 1.0160509961814614e-06, "loss": 0.2877, "step": 7315 }, { "epoch": 2.442737896494157, "grad_norm": 1.012306070524245, "learning_rate": 1.0148774606203148e-06, "loss": 0.2969, "step": 7316 }, { "epoch": 2.4430717863105174, "grad_norm": 1.0807430524418662, "learning_rate": 1.0137045266122335e-06, "loss": 0.3078, "step": 7317 }, { "epoch": 2.4434056761268783, "grad_norm": 1.0335956726644742, "learning_rate": 1.0125321943342758e-06, "loss": 0.2909, "step": 7318 }, { "epoch": 2.443739565943239, "grad_norm": 0.9924375950022783, "learning_rate": 1.0113604639634034e-06, "loss": 0.2757, "step": 7319 }, { "epoch": 2.4440734557595993, "grad_norm": 1.0736555846255584, "learning_rate": 1.0101893356764886e-06, "loss": 0.3037, "step": 7320 }, { "epoch": 2.44440734557596, "grad_norm": 1.0618628664176648, "learning_rate": 1.009018809650314e-06, "loss": 0.2938, "step": 7321 }, { "epoch": 2.4447412353923204, "grad_norm": 1.01085414693435, "learning_rate": 1.0078488860615666e-06, "loss": 0.2907, "step": 7322 }, { "epoch": 2.4450751252086813, "grad_norm": 0.9974061909553825, "learning_rate": 1.0066795650868494e-06, "loss": 0.2822, "step": 7323 }, { "epoch": 2.445409015025042, "grad_norm": 1.0707101353226858, "learning_rate": 1.0055108469026702e-06, "loss": 0.2942, "step": 7324 }, { "epoch": 2.4457429048414023, "grad_norm": 1.0228919186226848, "learning_rate": 1.0043427316854465e-06, "loss": 0.2921, "step": 7325 }, { "epoch": 2.446076794657763, "grad_norm": 1.0397799752484027, "learning_rate": 1.0031752196115053e-06, "loss": 0.3011, "step": 7326 }, { "epoch": 2.4464106844741234, "grad_norm": 1.0275001331912725, "learning_rate": 1.0020083108570817e-06, "loss": 0.2946, "step": 7327 }, { "epoch": 2.4467445742904843, "grad_norm": 1.0140493942205213, "learning_rate": 1.0008420055983197e-06, "loss": 0.2866, "step": 7328 }, { "epoch": 2.447078464106845, "grad_norm": 1.061079453228242, "learning_rate": 9.996763040112744e-07, "loss": 0.299, "step": 7329 }, { "epoch": 2.4474123539232053, "grad_norm": 1.0409365150920475, "learning_rate": 9.985112062719055e-07, "loss": 0.2826, "step": 7330 }, { "epoch": 2.447746243739566, "grad_norm": 1.035463422736658, "learning_rate": 9.973467125560888e-07, "loss": 0.2869, "step": 7331 }, { "epoch": 2.4480801335559264, "grad_norm": 1.00910931119262, "learning_rate": 9.961828230395998e-07, "loss": 0.28, "step": 7332 }, { "epoch": 2.4484140233722873, "grad_norm": 1.0413244625259819, "learning_rate": 9.950195378981288e-07, "loss": 0.3001, "step": 7333 }, { "epoch": 2.448747913188648, "grad_norm": 1.036291704456385, "learning_rate": 9.938568573072716e-07, "loss": 0.2784, "step": 7334 }, { "epoch": 2.4490818030050083, "grad_norm": 1.0119254831546785, "learning_rate": 9.926947814425381e-07, "loss": 0.2887, "step": 7335 }, { "epoch": 2.449415692821369, "grad_norm": 1.0391508404584744, "learning_rate": 9.915333104793428e-07, "loss": 0.2913, "step": 7336 }, { "epoch": 2.44974958263773, "grad_norm": 1.045349269031623, "learning_rate": 9.903724445930064e-07, "loss": 0.3017, "step": 7337 }, { "epoch": 2.4500834724540903, "grad_norm": 1.0302517558520021, "learning_rate": 9.892121839587615e-07, "loss": 0.2904, "step": 7338 }, { "epoch": 2.450417362270451, "grad_norm": 1.0414376180829168, "learning_rate": 9.880525287517522e-07, "loss": 0.3006, "step": 7339 }, { "epoch": 2.4507512520868113, "grad_norm": 1.0366602093579653, "learning_rate": 9.868934791470258e-07, "loss": 0.2779, "step": 7340 }, { "epoch": 2.451085141903172, "grad_norm": 1.0441619392188477, "learning_rate": 9.857350353195416e-07, "loss": 0.3094, "step": 7341 }, { "epoch": 2.4514190317195323, "grad_norm": 1.0662309103218064, "learning_rate": 9.84577197444166e-07, "loss": 0.2901, "step": 7342 }, { "epoch": 2.4517529215358933, "grad_norm": 1.0038360891969627, "learning_rate": 9.83419965695674e-07, "loss": 0.295, "step": 7343 }, { "epoch": 2.452086811352254, "grad_norm": 1.0503147818931053, "learning_rate": 9.822633402487491e-07, "loss": 0.2977, "step": 7344 }, { "epoch": 2.4524207011686143, "grad_norm": 1.0083000263412403, "learning_rate": 9.811073212779844e-07, "loss": 0.2886, "step": 7345 }, { "epoch": 2.452754590984975, "grad_norm": 0.9781019403413678, "learning_rate": 9.7995190895788e-07, "loss": 0.2791, "step": 7346 }, { "epoch": 2.453088480801336, "grad_norm": 1.0319924972617447, "learning_rate": 9.787971034628457e-07, "loss": 0.2895, "step": 7347 }, { "epoch": 2.4534223706176963, "grad_norm": 1.0741782199448306, "learning_rate": 9.77642904967198e-07, "loss": 0.3106, "step": 7348 }, { "epoch": 2.453756260434057, "grad_norm": 1.026260412210811, "learning_rate": 9.764893136451642e-07, "loss": 0.2954, "step": 7349 }, { "epoch": 2.4540901502504173, "grad_norm": 1.050662483560632, "learning_rate": 9.753363296708774e-07, "loss": 0.3101, "step": 7350 }, { "epoch": 2.454424040066778, "grad_norm": 1.026238508875115, "learning_rate": 9.741839532183806e-07, "loss": 0.3, "step": 7351 }, { "epoch": 2.4547579298831383, "grad_norm": 1.0293766988349289, "learning_rate": 9.730321844616242e-07, "loss": 0.2812, "step": 7352 }, { "epoch": 2.4550918196994993, "grad_norm": 1.0637951406602162, "learning_rate": 9.718810235744686e-07, "loss": 0.3095, "step": 7353 }, { "epoch": 2.45542570951586, "grad_norm": 1.0447361105050657, "learning_rate": 9.707304707306797e-07, "loss": 0.2915, "step": 7354 }, { "epoch": 2.4557595993322203, "grad_norm": 1.0284516135046537, "learning_rate": 9.695805261039331e-07, "loss": 0.2871, "step": 7355 }, { "epoch": 2.456093489148581, "grad_norm": 1.0314968506508053, "learning_rate": 9.684311898678146e-07, "loss": 0.2888, "step": 7356 }, { "epoch": 2.456427378964942, "grad_norm": 1.0758975343312915, "learning_rate": 9.67282462195816e-07, "loss": 0.3077, "step": 7357 }, { "epoch": 2.4567612687813023, "grad_norm": 1.0558214468156517, "learning_rate": 9.661343432613352e-07, "loss": 0.3142, "step": 7358 }, { "epoch": 2.457095158597663, "grad_norm": 1.0518287978798666, "learning_rate": 9.649868332376798e-07, "loss": 0.297, "step": 7359 }, { "epoch": 2.4574290484140233, "grad_norm": 1.0367345164194728, "learning_rate": 9.638399322980696e-07, "loss": 0.2993, "step": 7360 }, { "epoch": 2.457762938230384, "grad_norm": 1.0132232293198684, "learning_rate": 9.626936406156273e-07, "loss": 0.2941, "step": 7361 }, { "epoch": 2.4580968280467443, "grad_norm": 1.0130865921421177, "learning_rate": 9.615479583633864e-07, "loss": 0.2869, "step": 7362 }, { "epoch": 2.4584307178631053, "grad_norm": 1.0374212005034682, "learning_rate": 9.604028857142827e-07, "loss": 0.2954, "step": 7363 }, { "epoch": 2.458764607679466, "grad_norm": 1.0442774668440742, "learning_rate": 9.5925842284117e-07, "loss": 0.2913, "step": 7364 }, { "epoch": 2.4590984974958263, "grad_norm": 1.0518436118105532, "learning_rate": 9.581145699168026e-07, "loss": 0.3036, "step": 7365 }, { "epoch": 2.459432387312187, "grad_norm": 1.0306939639164678, "learning_rate": 9.569713271138442e-07, "loss": 0.2847, "step": 7366 }, { "epoch": 2.459766277128548, "grad_norm": 1.0084830918684828, "learning_rate": 9.558286946048678e-07, "loss": 0.2836, "step": 7367 }, { "epoch": 2.4601001669449083, "grad_norm": 1.0203301925137793, "learning_rate": 9.546866725623533e-07, "loss": 0.2959, "step": 7368 }, { "epoch": 2.460434056761269, "grad_norm": 1.0323437363570258, "learning_rate": 9.535452611586882e-07, "loss": 0.2959, "step": 7369 }, { "epoch": 2.4607679465776293, "grad_norm": 1.0246329383035515, "learning_rate": 9.524044605661686e-07, "loss": 0.2977, "step": 7370 }, { "epoch": 2.46110183639399, "grad_norm": 1.0074210644208255, "learning_rate": 9.512642709569975e-07, "loss": 0.2819, "step": 7371 }, { "epoch": 2.4614357262103503, "grad_norm": 1.0221226199770226, "learning_rate": 9.50124692503287e-07, "loss": 0.2934, "step": 7372 }, { "epoch": 2.4617696160267113, "grad_norm": 1.024212671843343, "learning_rate": 9.489857253770552e-07, "loss": 0.2998, "step": 7373 }, { "epoch": 2.462103505843072, "grad_norm": 1.0630249229461701, "learning_rate": 9.478473697502299e-07, "loss": 0.303, "step": 7374 }, { "epoch": 2.4624373956594323, "grad_norm": 1.0216909546277504, "learning_rate": 9.467096257946428e-07, "loss": 0.2923, "step": 7375 }, { "epoch": 2.462771285475793, "grad_norm": 1.0161719650634633, "learning_rate": 9.455724936820404e-07, "loss": 0.2842, "step": 7376 }, { "epoch": 2.463105175292154, "grad_norm": 1.0953332677851304, "learning_rate": 9.444359735840708e-07, "loss": 0.3152, "step": 7377 }, { "epoch": 2.4634390651085143, "grad_norm": 1.0217615351628315, "learning_rate": 9.433000656722901e-07, "loss": 0.2959, "step": 7378 }, { "epoch": 2.463772954924875, "grad_norm": 1.003359091215792, "learning_rate": 9.421647701181624e-07, "loss": 0.2838, "step": 7379 }, { "epoch": 2.4641068447412353, "grad_norm": 0.9861714874248708, "learning_rate": 9.410300870930634e-07, "loss": 0.2727, "step": 7380 }, { "epoch": 2.464440734557596, "grad_norm": 1.0485408106151712, "learning_rate": 9.398960167682708e-07, "loss": 0.3016, "step": 7381 }, { "epoch": 2.464774624373957, "grad_norm": 1.0447808002477454, "learning_rate": 9.387625593149746e-07, "loss": 0.2886, "step": 7382 }, { "epoch": 2.4651085141903173, "grad_norm": 1.0477160390261184, "learning_rate": 9.376297149042673e-07, "loss": 0.3036, "step": 7383 }, { "epoch": 2.465442404006678, "grad_norm": 1.0378383347909301, "learning_rate": 9.364974837071506e-07, "loss": 0.2925, "step": 7384 }, { "epoch": 2.4657762938230383, "grad_norm": 1.0354772899144653, "learning_rate": 9.353658658945374e-07, "loss": 0.2865, "step": 7385 }, { "epoch": 2.466110183639399, "grad_norm": 1.0340352122474477, "learning_rate": 9.342348616372443e-07, "loss": 0.2936, "step": 7386 }, { "epoch": 2.4664440734557598, "grad_norm": 1.0523343923861708, "learning_rate": 9.331044711059955e-07, "loss": 0.295, "step": 7387 }, { "epoch": 2.4667779632721203, "grad_norm": 1.0301735849897335, "learning_rate": 9.319746944714231e-07, "loss": 0.2881, "step": 7388 }, { "epoch": 2.467111853088481, "grad_norm": 1.0354059150995991, "learning_rate": 9.308455319040671e-07, "loss": 0.2824, "step": 7389 }, { "epoch": 2.4674457429048413, "grad_norm": 1.0125228655678997, "learning_rate": 9.297169835743735e-07, "loss": 0.2981, "step": 7390 }, { "epoch": 2.467779632721202, "grad_norm": 0.979710502171214, "learning_rate": 9.285890496526967e-07, "loss": 0.2869, "step": 7391 }, { "epoch": 2.4681135225375628, "grad_norm": 0.9988610699065047, "learning_rate": 9.274617303092981e-07, "loss": 0.2887, "step": 7392 }, { "epoch": 2.4684474123539233, "grad_norm": 1.0454486579531883, "learning_rate": 9.263350257143455e-07, "loss": 0.298, "step": 7393 }, { "epoch": 2.468781302170284, "grad_norm": 1.0653328568652944, "learning_rate": 9.252089360379157e-07, "loss": 0.3045, "step": 7394 }, { "epoch": 2.4691151919866443, "grad_norm": 1.0658477325686515, "learning_rate": 9.240834614499917e-07, "loss": 0.2996, "step": 7395 }, { "epoch": 2.469449081803005, "grad_norm": 1.0477099121359932, "learning_rate": 9.229586021204612e-07, "loss": 0.3044, "step": 7396 }, { "epoch": 2.4697829716193658, "grad_norm": 1.0665387872538596, "learning_rate": 9.218343582191252e-07, "loss": 0.3013, "step": 7397 }, { "epoch": 2.4701168614357263, "grad_norm": 1.0093050537458204, "learning_rate": 9.207107299156854e-07, "loss": 0.2838, "step": 7398 }, { "epoch": 2.470450751252087, "grad_norm": 1.08868843268084, "learning_rate": 9.195877173797535e-07, "loss": 0.3047, "step": 7399 }, { "epoch": 2.4707846410684473, "grad_norm": 1.0155535439754944, "learning_rate": 9.184653207808469e-07, "loss": 0.2896, "step": 7400 }, { "epoch": 2.471118530884808, "grad_norm": 1.0742043365250344, "learning_rate": 9.173435402883935e-07, "loss": 0.3014, "step": 7401 }, { "epoch": 2.4714524207011688, "grad_norm": 1.0198001736456095, "learning_rate": 9.162223760717243e-07, "loss": 0.2994, "step": 7402 }, { "epoch": 2.4717863105175293, "grad_norm": 0.9880988706612103, "learning_rate": 9.1510182830008e-07, "loss": 0.2736, "step": 7403 }, { "epoch": 2.47212020033389, "grad_norm": 1.046351821192754, "learning_rate": 9.13981897142604e-07, "loss": 0.2856, "step": 7404 }, { "epoch": 2.4724540901502503, "grad_norm": 0.998922405951892, "learning_rate": 9.128625827683524e-07, "loss": 0.2863, "step": 7405 }, { "epoch": 2.4727879799666113, "grad_norm": 1.0585763738509275, "learning_rate": 9.11743885346284e-07, "loss": 0.3049, "step": 7406 }, { "epoch": 2.4731218697829718, "grad_norm": 1.04807811097251, "learning_rate": 9.106258050452671e-07, "loss": 0.3036, "step": 7407 }, { "epoch": 2.4734557595993323, "grad_norm": 1.0338334608812378, "learning_rate": 9.095083420340739e-07, "loss": 0.2966, "step": 7408 }, { "epoch": 2.473789649415693, "grad_norm": 1.0341938865241498, "learning_rate": 9.083914964813861e-07, "loss": 0.3029, "step": 7409 }, { "epoch": 2.4741235392320533, "grad_norm": 1.0508973547115128, "learning_rate": 9.072752685557912e-07, "loss": 0.2969, "step": 7410 }, { "epoch": 2.474457429048414, "grad_norm": 1.0035570110767214, "learning_rate": 9.061596584257836e-07, "loss": 0.2973, "step": 7411 }, { "epoch": 2.4747913188647748, "grad_norm": 1.034462961348887, "learning_rate": 9.05044666259764e-07, "loss": 0.2923, "step": 7412 }, { "epoch": 2.4751252086811353, "grad_norm": 1.051414102382115, "learning_rate": 9.039302922260396e-07, "loss": 0.3046, "step": 7413 }, { "epoch": 2.475459098497496, "grad_norm": 1.0657875079503758, "learning_rate": 9.02816536492826e-07, "loss": 0.3056, "step": 7414 }, { "epoch": 2.4757929883138563, "grad_norm": 1.020931822385234, "learning_rate": 9.017033992282436e-07, "loss": 0.2972, "step": 7415 }, { "epoch": 2.4761268781302173, "grad_norm": 0.9966031308803687, "learning_rate": 9.005908806003199e-07, "loss": 0.2861, "step": 7416 }, { "epoch": 2.4764607679465778, "grad_norm": 1.0205745182322894, "learning_rate": 8.994789807769899e-07, "loss": 0.2964, "step": 7417 }, { "epoch": 2.4767946577629383, "grad_norm": 1.048858553930697, "learning_rate": 8.983676999260942e-07, "loss": 0.2978, "step": 7418 }, { "epoch": 2.477128547579299, "grad_norm": 1.0666686167166133, "learning_rate": 8.972570382153806e-07, "loss": 0.2979, "step": 7419 }, { "epoch": 2.4774624373956593, "grad_norm": 1.084114533144994, "learning_rate": 8.96146995812503e-07, "loss": 0.3064, "step": 7420 }, { "epoch": 2.47779632721202, "grad_norm": 1.0342214458313361, "learning_rate": 8.950375728850203e-07, "loss": 0.2954, "step": 7421 }, { "epoch": 2.4781302170283808, "grad_norm": 1.0268203606134436, "learning_rate": 8.939287696004024e-07, "loss": 0.286, "step": 7422 }, { "epoch": 2.4784641068447413, "grad_norm": 1.0234351919089992, "learning_rate": 8.92820586126023e-07, "loss": 0.2895, "step": 7423 }, { "epoch": 2.478797996661102, "grad_norm": 1.0292486315603053, "learning_rate": 8.917130226291592e-07, "loss": 0.2935, "step": 7424 }, { "epoch": 2.4791318864774623, "grad_norm": 1.0795217610981305, "learning_rate": 8.906060792769982e-07, "loss": 0.297, "step": 7425 }, { "epoch": 2.4794657762938233, "grad_norm": 1.0251818621888, "learning_rate": 8.894997562366342e-07, "loss": 0.2824, "step": 7426 }, { "epoch": 2.4797996661101838, "grad_norm": 1.0184336122783988, "learning_rate": 8.883940536750657e-07, "loss": 0.2876, "step": 7427 }, { "epoch": 2.4801335559265443, "grad_norm": 1.049639274427695, "learning_rate": 8.872889717591993e-07, "loss": 0.3042, "step": 7428 }, { "epoch": 2.480467445742905, "grad_norm": 1.0429064346281234, "learning_rate": 8.861845106558426e-07, "loss": 0.2926, "step": 7429 }, { "epoch": 2.4808013355592653, "grad_norm": 1.0532859614989714, "learning_rate": 8.850806705317183e-07, "loss": 0.298, "step": 7430 }, { "epoch": 2.481135225375626, "grad_norm": 1.033373356846954, "learning_rate": 8.839774515534489e-07, "loss": 0.2893, "step": 7431 }, { "epoch": 2.4814691151919868, "grad_norm": 1.0243475661153452, "learning_rate": 8.828748538875647e-07, "loss": 0.2862, "step": 7432 }, { "epoch": 2.4818030050083473, "grad_norm": 1.0419999340224104, "learning_rate": 8.817728777005036e-07, "loss": 0.2968, "step": 7433 }, { "epoch": 2.482136894824708, "grad_norm": 1.0279523394821055, "learning_rate": 8.806715231586077e-07, "loss": 0.2969, "step": 7434 }, { "epoch": 2.4824707846410683, "grad_norm": 1.0454533504579286, "learning_rate": 8.795707904281259e-07, "loss": 0.3002, "step": 7435 }, { "epoch": 2.4828046744574293, "grad_norm": 1.0366238714522575, "learning_rate": 8.784706796752146e-07, "loss": 0.2959, "step": 7436 }, { "epoch": 2.4831385642737898, "grad_norm": 1.0288390907903433, "learning_rate": 8.773711910659327e-07, "loss": 0.2891, "step": 7437 }, { "epoch": 2.4834724540901503, "grad_norm": 1.0102973502494876, "learning_rate": 8.762723247662519e-07, "loss": 0.2755, "step": 7438 }, { "epoch": 2.483806343906511, "grad_norm": 1.0313095461578987, "learning_rate": 8.751740809420423e-07, "loss": 0.2935, "step": 7439 }, { "epoch": 2.4841402337228713, "grad_norm": 1.0950843454596713, "learning_rate": 8.740764597590845e-07, "loss": 0.2938, "step": 7440 }, { "epoch": 2.484474123539232, "grad_norm": 1.0290419320786157, "learning_rate": 8.729794613830628e-07, "loss": 0.2884, "step": 7441 }, { "epoch": 2.4848080133555928, "grad_norm": 1.0510814226796938, "learning_rate": 8.718830859795718e-07, "loss": 0.2936, "step": 7442 }, { "epoch": 2.4851419031719533, "grad_norm": 1.0411008701943856, "learning_rate": 8.707873337141082e-07, "loss": 0.2933, "step": 7443 }, { "epoch": 2.485475792988314, "grad_norm": 1.0381955731149082, "learning_rate": 8.696922047520728e-07, "loss": 0.3042, "step": 7444 }, { "epoch": 2.4858096828046743, "grad_norm": 1.0370033076399134, "learning_rate": 8.685976992587775e-07, "loss": 0.2896, "step": 7445 }, { "epoch": 2.4861435726210352, "grad_norm": 1.0979327422838248, "learning_rate": 8.675038173994355e-07, "loss": 0.303, "step": 7446 }, { "epoch": 2.4864774624373958, "grad_norm": 1.038096500696042, "learning_rate": 8.664105593391703e-07, "loss": 0.2859, "step": 7447 }, { "epoch": 2.4868113522537563, "grad_norm": 1.0462797133413086, "learning_rate": 8.653179252430083e-07, "loss": 0.292, "step": 7448 }, { "epoch": 2.487145242070117, "grad_norm": 0.989306325271042, "learning_rate": 8.642259152758826e-07, "loss": 0.2806, "step": 7449 }, { "epoch": 2.4874791318864773, "grad_norm": 1.0480499068479696, "learning_rate": 8.631345296026289e-07, "loss": 0.2902, "step": 7450 }, { "epoch": 2.4878130217028382, "grad_norm": 1.0485513978558307, "learning_rate": 8.620437683879951e-07, "loss": 0.3085, "step": 7451 }, { "epoch": 2.4881469115191988, "grad_norm": 1.0241046960133884, "learning_rate": 8.609536317966294e-07, "loss": 0.2826, "step": 7452 }, { "epoch": 2.4884808013355593, "grad_norm": 1.049383021052221, "learning_rate": 8.598641199930885e-07, "loss": 0.2934, "step": 7453 }, { "epoch": 2.4888146911519198, "grad_norm": 1.011827211765563, "learning_rate": 8.587752331418331e-07, "loss": 0.2816, "step": 7454 }, { "epoch": 2.4891485809682803, "grad_norm": 1.040029860665243, "learning_rate": 8.576869714072311e-07, "loss": 0.2901, "step": 7455 }, { "epoch": 2.4894824707846412, "grad_norm": 1.0465072743130734, "learning_rate": 8.565993349535551e-07, "loss": 0.2981, "step": 7456 }, { "epoch": 2.4898163606010018, "grad_norm": 1.071483019539431, "learning_rate": 8.555123239449831e-07, "loss": 0.311, "step": 7457 }, { "epoch": 2.4901502504173623, "grad_norm": 1.052559019114628, "learning_rate": 8.544259385455989e-07, "loss": 0.305, "step": 7458 }, { "epoch": 2.4904841402337228, "grad_norm": 1.061181215990169, "learning_rate": 8.53340178919393e-07, "loss": 0.2947, "step": 7459 }, { "epoch": 2.4908180300500833, "grad_norm": 1.0357334402866372, "learning_rate": 8.522550452302597e-07, "loss": 0.2899, "step": 7460 }, { "epoch": 2.4911519198664442, "grad_norm": 1.0622052131948752, "learning_rate": 8.511705376419999e-07, "loss": 0.2887, "step": 7461 }, { "epoch": 2.4914858096828048, "grad_norm": 1.0174562755592296, "learning_rate": 8.50086656318318e-07, "loss": 0.2916, "step": 7462 }, { "epoch": 2.4918196994991653, "grad_norm": 1.0221270691045656, "learning_rate": 8.490034014228305e-07, "loss": 0.2948, "step": 7463 }, { "epoch": 2.4921535893155258, "grad_norm": 1.0817538063647776, "learning_rate": 8.479207731190491e-07, "loss": 0.3159, "step": 7464 }, { "epoch": 2.4924874791318863, "grad_norm": 1.0674344396137794, "learning_rate": 8.468387715703985e-07, "loss": 0.2995, "step": 7465 }, { "epoch": 2.4928213689482472, "grad_norm": 1.0269783361753442, "learning_rate": 8.457573969402056e-07, "loss": 0.2911, "step": 7466 }, { "epoch": 2.4931552587646078, "grad_norm": 1.0363895469412068, "learning_rate": 8.446766493917047e-07, "loss": 0.2879, "step": 7467 }, { "epoch": 2.4934891485809683, "grad_norm": 1.0305462101001666, "learning_rate": 8.435965290880343e-07, "loss": 0.2935, "step": 7468 }, { "epoch": 2.4938230383973288, "grad_norm": 1.0417470252347567, "learning_rate": 8.425170361922392e-07, "loss": 0.2919, "step": 7469 }, { "epoch": 2.4941569282136893, "grad_norm": 1.0193855041987399, "learning_rate": 8.414381708672642e-07, "loss": 0.2895, "step": 7470 }, { "epoch": 2.4944908180300502, "grad_norm": 1.019537248381321, "learning_rate": 8.403599332759682e-07, "loss": 0.2988, "step": 7471 }, { "epoch": 2.4948247078464107, "grad_norm": 1.0528132105144137, "learning_rate": 8.392823235811087e-07, "loss": 0.3054, "step": 7472 }, { "epoch": 2.4951585976627713, "grad_norm": 1.0283343183369136, "learning_rate": 8.382053419453517e-07, "loss": 0.2865, "step": 7473 }, { "epoch": 2.4954924874791318, "grad_norm": 1.030710517046452, "learning_rate": 8.371289885312683e-07, "loss": 0.2873, "step": 7474 }, { "epoch": 2.4958263772954927, "grad_norm": 1.0221640078278054, "learning_rate": 8.36053263501329e-07, "loss": 0.2884, "step": 7475 }, { "epoch": 2.4961602671118532, "grad_norm": 1.0246943289062758, "learning_rate": 8.349781670179186e-07, "loss": 0.291, "step": 7476 }, { "epoch": 2.4964941569282137, "grad_norm": 1.0216591872262715, "learning_rate": 8.339036992433219e-07, "loss": 0.2901, "step": 7477 }, { "epoch": 2.4968280467445743, "grad_norm": 1.069490127974, "learning_rate": 8.328298603397284e-07, "loss": 0.3041, "step": 7478 }, { "epoch": 2.4971619365609348, "grad_norm": 1.0150237829501312, "learning_rate": 8.317566504692342e-07, "loss": 0.2906, "step": 7479 }, { "epoch": 2.4974958263772953, "grad_norm": 1.0063029193306785, "learning_rate": 8.306840697938406e-07, "loss": 0.2748, "step": 7480 }, { "epoch": 2.4978297161936562, "grad_norm": 1.023239826135765, "learning_rate": 8.296121184754524e-07, "loss": 0.2856, "step": 7481 }, { "epoch": 2.4981636060100167, "grad_norm": 1.023270816172108, "learning_rate": 8.285407966758807e-07, "loss": 0.2918, "step": 7482 }, { "epoch": 2.4984974958263773, "grad_norm": 1.0186925856638989, "learning_rate": 8.274701045568401e-07, "loss": 0.2931, "step": 7483 }, { "epoch": 2.4988313856427378, "grad_norm": 1.0436624893603128, "learning_rate": 8.264000422799551e-07, "loss": 0.2989, "step": 7484 }, { "epoch": 2.4991652754590987, "grad_norm": 1.0410842480451012, "learning_rate": 8.253306100067471e-07, "loss": 0.2915, "step": 7485 }, { "epoch": 2.4994991652754592, "grad_norm": 1.0672394708726376, "learning_rate": 8.242618078986481e-07, "loss": 0.2986, "step": 7486 }, { "epoch": 2.4998330550918197, "grad_norm": 1.0176542159284128, "learning_rate": 8.231936361169923e-07, "loss": 0.2912, "step": 7487 }, { "epoch": 2.5001669449081803, "grad_norm": 1.007239835591803, "learning_rate": 8.221260948230226e-07, "loss": 0.2861, "step": 7488 }, { "epoch": 2.5005008347245408, "grad_norm": 1.0706495540719152, "learning_rate": 8.210591841778831e-07, "loss": 0.3033, "step": 7489 }, { "epoch": 2.5008347245409013, "grad_norm": 1.0335277679537884, "learning_rate": 8.199929043426225e-07, "loss": 0.3001, "step": 7490 }, { "epoch": 2.5011686143572622, "grad_norm": 1.015489511658933, "learning_rate": 8.18927255478194e-07, "loss": 0.286, "step": 7491 }, { "epoch": 2.5015025041736227, "grad_norm": 1.0230430080385458, "learning_rate": 8.178622377454609e-07, "loss": 0.2888, "step": 7492 }, { "epoch": 2.5018363939899833, "grad_norm": 1.0189988276109068, "learning_rate": 8.16797851305185e-07, "loss": 0.2852, "step": 7493 }, { "epoch": 2.5021702838063438, "grad_norm": 0.9706129235228216, "learning_rate": 8.157340963180355e-07, "loss": 0.2758, "step": 7494 }, { "epoch": 2.5025041736227047, "grad_norm": 1.0073434578200944, "learning_rate": 8.146709729445862e-07, "loss": 0.294, "step": 7495 }, { "epoch": 2.5028380634390652, "grad_norm": 1.035081709347745, "learning_rate": 8.136084813453144e-07, "loss": 0.2974, "step": 7496 }, { "epoch": 2.5031719532554257, "grad_norm": 1.0215654603983484, "learning_rate": 8.125466216806039e-07, "loss": 0.2987, "step": 7497 }, { "epoch": 2.5035058430717863, "grad_norm": 1.0174369063665651, "learning_rate": 8.114853941107414e-07, "loss": 0.2995, "step": 7498 }, { "epoch": 2.5038397328881468, "grad_norm": 1.0389417443829299, "learning_rate": 8.10424798795919e-07, "loss": 0.2868, "step": 7499 }, { "epoch": 2.5041736227045073, "grad_norm": 1.06436909116066, "learning_rate": 8.093648358962337e-07, "loss": 0.2986, "step": 7500 }, { "epoch": 2.5045075125208682, "grad_norm": 1.0189698143852592, "learning_rate": 8.08305505571686e-07, "loss": 0.2863, "step": 7501 }, { "epoch": 2.5048414023372287, "grad_norm": 1.0594309861591498, "learning_rate": 8.072468079821822e-07, "loss": 0.2997, "step": 7502 }, { "epoch": 2.5051752921535893, "grad_norm": 1.0367665670659694, "learning_rate": 8.061887432875304e-07, "loss": 0.2973, "step": 7503 }, { "epoch": 2.5055091819699498, "grad_norm": 1.0099339373696756, "learning_rate": 8.051313116474491e-07, "loss": 0.2882, "step": 7504 }, { "epoch": 2.5058430717863107, "grad_norm": 1.0145679326671915, "learning_rate": 8.040745132215538e-07, "loss": 0.284, "step": 7505 }, { "epoch": 2.5061769616026712, "grad_norm": 1.042714845455895, "learning_rate": 8.030183481693687e-07, "loss": 0.2954, "step": 7506 }, { "epoch": 2.5065108514190317, "grad_norm": 1.0345211387234905, "learning_rate": 8.019628166503224e-07, "loss": 0.2894, "step": 7507 }, { "epoch": 2.5068447412353922, "grad_norm": 1.0470896185827399, "learning_rate": 8.009079188237451e-07, "loss": 0.2958, "step": 7508 }, { "epoch": 2.5071786310517528, "grad_norm": 1.0208881008242128, "learning_rate": 7.998536548488772e-07, "loss": 0.2933, "step": 7509 }, { "epoch": 2.5075125208681133, "grad_norm": 1.0330871984880161, "learning_rate": 7.98800024884856e-07, "loss": 0.2821, "step": 7510 }, { "epoch": 2.5078464106844742, "grad_norm": 1.0430717286923354, "learning_rate": 7.97747029090728e-07, "loss": 0.2984, "step": 7511 }, { "epoch": 2.5081803005008347, "grad_norm": 1.0471688309054332, "learning_rate": 7.966946676254406e-07, "loss": 0.2888, "step": 7512 }, { "epoch": 2.5085141903171952, "grad_norm": 1.091542776121613, "learning_rate": 7.956429406478506e-07, "loss": 0.3052, "step": 7513 }, { "epoch": 2.508848080133556, "grad_norm": 1.0427132792551226, "learning_rate": 7.945918483167148e-07, "loss": 0.3027, "step": 7514 }, { "epoch": 2.5091819699499167, "grad_norm": 1.1026708396662757, "learning_rate": 7.935413907906958e-07, "loss": 0.3097, "step": 7515 }, { "epoch": 2.5095158597662772, "grad_norm": 1.029300622329293, "learning_rate": 7.924915682283568e-07, "loss": 0.2947, "step": 7516 }, { "epoch": 2.5098497495826377, "grad_norm": 1.0592499357015006, "learning_rate": 7.914423807881716e-07, "loss": 0.2996, "step": 7517 }, { "epoch": 2.5101836393989982, "grad_norm": 1.0737765114629387, "learning_rate": 7.903938286285134e-07, "loss": 0.2945, "step": 7518 }, { "epoch": 2.5105175292153588, "grad_norm": 1.0149371311656465, "learning_rate": 7.893459119076613e-07, "loss": 0.286, "step": 7519 }, { "epoch": 2.5108514190317193, "grad_norm": 1.0721532170351453, "learning_rate": 7.882986307837975e-07, "loss": 0.2919, "step": 7520 }, { "epoch": 2.5111853088480802, "grad_norm": 1.0980229525767948, "learning_rate": 7.87251985415009e-07, "loss": 0.2941, "step": 7521 }, { "epoch": 2.5115191986644407, "grad_norm": 1.048854107249793, "learning_rate": 7.862059759592871e-07, "loss": 0.2954, "step": 7522 }, { "epoch": 2.5118530884808012, "grad_norm": 1.072757821154118, "learning_rate": 7.851606025745257e-07, "loss": 0.3046, "step": 7523 }, { "epoch": 2.512186978297162, "grad_norm": 1.0576080123834348, "learning_rate": 7.841158654185238e-07, "loss": 0.2942, "step": 7524 }, { "epoch": 2.5125208681135227, "grad_norm": 1.0684512549148784, "learning_rate": 7.830717646489849e-07, "loss": 0.3076, "step": 7525 }, { "epoch": 2.512854757929883, "grad_norm": 1.045083650697392, "learning_rate": 7.820283004235146e-07, "loss": 0.2999, "step": 7526 }, { "epoch": 2.5131886477462437, "grad_norm": 1.0618633865089804, "learning_rate": 7.809854728996246e-07, "loss": 0.3054, "step": 7527 }, { "epoch": 2.5135225375626042, "grad_norm": 1.0593242444484305, "learning_rate": 7.799432822347275e-07, "loss": 0.2915, "step": 7528 }, { "epoch": 2.5138564273789648, "grad_norm": 1.0229600874843352, "learning_rate": 7.789017285861439e-07, "loss": 0.29, "step": 7529 }, { "epoch": 2.5141903171953257, "grad_norm": 1.0057317817340565, "learning_rate": 7.77860812111097e-07, "loss": 0.284, "step": 7530 }, { "epoch": 2.514524207011686, "grad_norm": 1.023544165077721, "learning_rate": 7.768205329667095e-07, "loss": 0.2907, "step": 7531 }, { "epoch": 2.5148580968280467, "grad_norm": 1.017087308132316, "learning_rate": 7.757808913100118e-07, "loss": 0.2878, "step": 7532 }, { "epoch": 2.5151919866444072, "grad_norm": 1.0123841602837478, "learning_rate": 7.747418872979396e-07, "loss": 0.2919, "step": 7533 }, { "epoch": 2.515525876460768, "grad_norm": 1.0396847000257443, "learning_rate": 7.73703521087329e-07, "loss": 0.2964, "step": 7534 }, { "epoch": 2.5158597662771287, "grad_norm": 1.052269595869164, "learning_rate": 7.726657928349224e-07, "loss": 0.3027, "step": 7535 }, { "epoch": 2.516193656093489, "grad_norm": 1.0458358046902185, "learning_rate": 7.716287026973623e-07, "loss": 0.2934, "step": 7536 }, { "epoch": 2.5165275459098497, "grad_norm": 1.0416333054345295, "learning_rate": 7.70592250831197e-07, "loss": 0.2929, "step": 7537 }, { "epoch": 2.5168614357262102, "grad_norm": 1.0655749824336966, "learning_rate": 7.695564373928804e-07, "loss": 0.3008, "step": 7538 }, { "epoch": 2.5171953255425707, "grad_norm": 1.0083454264316796, "learning_rate": 7.685212625387683e-07, "loss": 0.283, "step": 7539 }, { "epoch": 2.5175292153589317, "grad_norm": 1.1169375668470072, "learning_rate": 7.674867264251196e-07, "loss": 0.2971, "step": 7540 }, { "epoch": 2.517863105175292, "grad_norm": 1.0478557665054409, "learning_rate": 7.664528292080964e-07, "loss": 0.2994, "step": 7541 }, { "epoch": 2.5181969949916527, "grad_norm": 1.018524365922003, "learning_rate": 7.654195710437656e-07, "loss": 0.2913, "step": 7542 }, { "epoch": 2.5185308848080132, "grad_norm": 1.0569456025096668, "learning_rate": 7.643869520880981e-07, "loss": 0.2986, "step": 7543 }, { "epoch": 2.518864774624374, "grad_norm": 1.0554749772133196, "learning_rate": 7.633549724969663e-07, "loss": 0.2967, "step": 7544 }, { "epoch": 2.5191986644407347, "grad_norm": 1.0286543970757256, "learning_rate": 7.623236324261479e-07, "loss": 0.2868, "step": 7545 }, { "epoch": 2.519532554257095, "grad_norm": 1.0650601557338424, "learning_rate": 7.612929320313229e-07, "loss": 0.2994, "step": 7546 }, { "epoch": 2.5198664440734557, "grad_norm": 1.0421080217723244, "learning_rate": 7.602628714680754e-07, "loss": 0.2875, "step": 7547 }, { "epoch": 2.5202003338898162, "grad_norm": 1.0653120936632765, "learning_rate": 7.592334508918925e-07, "loss": 0.304, "step": 7548 }, { "epoch": 2.5205342237061767, "grad_norm": 1.0190886555924068, "learning_rate": 7.582046704581647e-07, "loss": 0.2899, "step": 7549 }, { "epoch": 2.5208681135225377, "grad_norm": 1.038702288230132, "learning_rate": 7.571765303221884e-07, "loss": 0.2871, "step": 7550 }, { "epoch": 2.521202003338898, "grad_norm": 1.013797199030358, "learning_rate": 7.561490306391584e-07, "loss": 0.2853, "step": 7551 }, { "epoch": 2.5215358931552587, "grad_norm": 1.022562744269093, "learning_rate": 7.55122171564176e-07, "loss": 0.2871, "step": 7552 }, { "epoch": 2.5218697829716192, "grad_norm": 1.033941172286923, "learning_rate": 7.540959532522447e-07, "loss": 0.2893, "step": 7553 }, { "epoch": 2.52220367278798, "grad_norm": 1.017870538270455, "learning_rate": 7.530703758582735e-07, "loss": 0.2766, "step": 7554 }, { "epoch": 2.5225375626043407, "grad_norm": 1.063199992886846, "learning_rate": 7.520454395370724e-07, "loss": 0.293, "step": 7555 }, { "epoch": 2.522871452420701, "grad_norm": 1.031431875782973, "learning_rate": 7.510211444433569e-07, "loss": 0.2948, "step": 7556 }, { "epoch": 2.5232053422370617, "grad_norm": 1.0128429059386803, "learning_rate": 7.499974907317392e-07, "loss": 0.2736, "step": 7557 }, { "epoch": 2.5235392320534222, "grad_norm": 1.0268847699113501, "learning_rate": 7.489744785567438e-07, "loss": 0.2861, "step": 7558 }, { "epoch": 2.5238731218697827, "grad_norm": 1.0239979367453085, "learning_rate": 7.47952108072793e-07, "loss": 0.2916, "step": 7559 }, { "epoch": 2.5242070116861437, "grad_norm": 1.0454065448312828, "learning_rate": 7.469303794342131e-07, "loss": 0.2931, "step": 7560 }, { "epoch": 2.524540901502504, "grad_norm": 1.0475670307315266, "learning_rate": 7.459092927952339e-07, "loss": 0.2976, "step": 7561 }, { "epoch": 2.5248747913188647, "grad_norm": 1.040752404637158, "learning_rate": 7.448888483099875e-07, "loss": 0.2897, "step": 7562 }, { "epoch": 2.5252086811352252, "grad_norm": 1.052688941870016, "learning_rate": 7.438690461325104e-07, "loss": 0.3017, "step": 7563 }, { "epoch": 2.525542570951586, "grad_norm": 1.0478733371304176, "learning_rate": 7.428498864167416e-07, "loss": 0.2972, "step": 7564 }, { "epoch": 2.5258764607679467, "grad_norm": 1.0624782853787056, "learning_rate": 7.418313693165225e-07, "loss": 0.3023, "step": 7565 }, { "epoch": 2.526210350584307, "grad_norm": 1.0513073727796565, "learning_rate": 7.408134949855983e-07, "loss": 0.3008, "step": 7566 }, { "epoch": 2.5265442404006677, "grad_norm": 1.009491073207904, "learning_rate": 7.397962635776162e-07, "loss": 0.2824, "step": 7567 }, { "epoch": 2.5268781302170282, "grad_norm": 1.0123540482919768, "learning_rate": 7.387796752461273e-07, "loss": 0.2909, "step": 7568 }, { "epoch": 2.5272120200333887, "grad_norm": 0.9983343171268109, "learning_rate": 7.377637301445855e-07, "loss": 0.2768, "step": 7569 }, { "epoch": 2.5275459098497497, "grad_norm": 1.065173310304401, "learning_rate": 7.367484284263477e-07, "loss": 0.3034, "step": 7570 }, { "epoch": 2.52787979966611, "grad_norm": 1.0156586820377986, "learning_rate": 7.35733770244672e-07, "loss": 0.284, "step": 7571 }, { "epoch": 2.5282136894824707, "grad_norm": 1.0091895567920965, "learning_rate": 7.347197557527225e-07, "loss": 0.2911, "step": 7572 }, { "epoch": 2.5285475792988312, "grad_norm": 1.006564117340525, "learning_rate": 7.337063851035636e-07, "loss": 0.2794, "step": 7573 }, { "epoch": 2.528881469115192, "grad_norm": 1.0083352127070915, "learning_rate": 7.326936584501621e-07, "loss": 0.2845, "step": 7574 }, { "epoch": 2.5292153589315527, "grad_norm": 1.0089232871025173, "learning_rate": 7.31681575945391e-07, "loss": 0.2878, "step": 7575 }, { "epoch": 2.529549248747913, "grad_norm": 1.015750436248381, "learning_rate": 7.306701377420239e-07, "loss": 0.2917, "step": 7576 }, { "epoch": 2.5298831385642737, "grad_norm": 1.0156391871924406, "learning_rate": 7.296593439927352e-07, "loss": 0.2906, "step": 7577 }, { "epoch": 2.5302170283806342, "grad_norm": 1.0521401363217273, "learning_rate": 7.286491948501035e-07, "loss": 0.2949, "step": 7578 }, { "epoch": 2.5305509181969947, "grad_norm": 1.0259118064001436, "learning_rate": 7.276396904666133e-07, "loss": 0.2991, "step": 7579 }, { "epoch": 2.5308848080133557, "grad_norm": 1.0590603481057919, "learning_rate": 7.266308309946468e-07, "loss": 0.286, "step": 7580 }, { "epoch": 2.531218697829716, "grad_norm": 1.023118258623417, "learning_rate": 7.256226165864932e-07, "loss": 0.2914, "step": 7581 }, { "epoch": 2.5315525876460767, "grad_norm": 1.0026220730056603, "learning_rate": 7.246150473943386e-07, "loss": 0.2788, "step": 7582 }, { "epoch": 2.5318864774624377, "grad_norm": 1.0670481714319813, "learning_rate": 7.236081235702786e-07, "loss": 0.2986, "step": 7583 }, { "epoch": 2.532220367278798, "grad_norm": 0.9303386862292812, "learning_rate": 7.226018452663064e-07, "loss": 0.2656, "step": 7584 }, { "epoch": 2.5325542570951587, "grad_norm": 1.0575921828880956, "learning_rate": 7.215962126343201e-07, "loss": 0.294, "step": 7585 }, { "epoch": 2.532888146911519, "grad_norm": 1.0916517989367767, "learning_rate": 7.205912258261188e-07, "loss": 0.3127, "step": 7586 }, { "epoch": 2.5332220367278797, "grad_norm": 1.0289666736584127, "learning_rate": 7.195868849934062e-07, "loss": 0.2944, "step": 7587 }, { "epoch": 2.5335559265442402, "grad_norm": 1.0189719668496395, "learning_rate": 7.185831902877865e-07, "loss": 0.2843, "step": 7588 }, { "epoch": 2.5338898163606007, "grad_norm": 1.0098087191089589, "learning_rate": 7.175801418607669e-07, "loss": 0.2819, "step": 7589 }, { "epoch": 2.5342237061769617, "grad_norm": 0.9952292154389444, "learning_rate": 7.165777398637569e-07, "loss": 0.2832, "step": 7590 }, { "epoch": 2.534557595993322, "grad_norm": 1.0812294683210213, "learning_rate": 7.155759844480714e-07, "loss": 0.3011, "step": 7591 }, { "epoch": 2.5348914858096827, "grad_norm": 1.0534941068977255, "learning_rate": 7.145748757649218e-07, "loss": 0.3028, "step": 7592 }, { "epoch": 2.5352253756260437, "grad_norm": 1.0310918118266663, "learning_rate": 7.135744139654266e-07, "loss": 0.2944, "step": 7593 }, { "epoch": 2.535559265442404, "grad_norm": 1.040765110982464, "learning_rate": 7.125745992006044e-07, "loss": 0.2865, "step": 7594 }, { "epoch": 2.5358931552587647, "grad_norm": 1.0406707050603965, "learning_rate": 7.115754316213785e-07, "loss": 0.2941, "step": 7595 }, { "epoch": 2.536227045075125, "grad_norm": 0.9862187472048609, "learning_rate": 7.105769113785732e-07, "loss": 0.285, "step": 7596 }, { "epoch": 2.5365609348914857, "grad_norm": 1.0143652597276098, "learning_rate": 7.095790386229123e-07, "loss": 0.2898, "step": 7597 }, { "epoch": 2.536894824707846, "grad_norm": 1.0408428689917524, "learning_rate": 7.08581813505026e-07, "loss": 0.2963, "step": 7598 }, { "epoch": 2.537228714524207, "grad_norm": 1.060102329913134, "learning_rate": 7.075852361754443e-07, "loss": 0.3045, "step": 7599 }, { "epoch": 2.5375626043405677, "grad_norm": 1.0391241959667157, "learning_rate": 7.065893067846013e-07, "loss": 0.2955, "step": 7600 }, { "epoch": 2.537896494156928, "grad_norm": 1.0170997467872187, "learning_rate": 7.055940254828325e-07, "loss": 0.2854, "step": 7601 }, { "epoch": 2.5382303839732887, "grad_norm": 1.0216365691285505, "learning_rate": 7.045993924203759e-07, "loss": 0.2925, "step": 7602 }, { "epoch": 2.5385642737896497, "grad_norm": 1.0644859342883062, "learning_rate": 7.03605407747367e-07, "loss": 0.2972, "step": 7603 }, { "epoch": 2.53889816360601, "grad_norm": 1.0484763726834656, "learning_rate": 7.026120716138518e-07, "loss": 0.2936, "step": 7604 }, { "epoch": 2.5392320534223707, "grad_norm": 1.02923529691805, "learning_rate": 7.016193841697727e-07, "loss": 0.2882, "step": 7605 }, { "epoch": 2.539565943238731, "grad_norm": 1.0434686211505786, "learning_rate": 7.006273455649753e-07, "loss": 0.2958, "step": 7606 }, { "epoch": 2.5398998330550917, "grad_norm": 1.0195897756549894, "learning_rate": 6.996359559492083e-07, "loss": 0.2816, "step": 7607 }, { "epoch": 2.540233722871452, "grad_norm": 1.0266330759578968, "learning_rate": 6.986452154721207e-07, "loss": 0.2933, "step": 7608 }, { "epoch": 2.540567612687813, "grad_norm": 1.049427792785477, "learning_rate": 6.976551242832646e-07, "loss": 0.2961, "step": 7609 }, { "epoch": 2.5409015025041737, "grad_norm": 1.0527487156674475, "learning_rate": 6.966656825320944e-07, "loss": 0.2935, "step": 7610 }, { "epoch": 2.541235392320534, "grad_norm": 1.0666038635642012, "learning_rate": 6.956768903679661e-07, "loss": 0.2938, "step": 7611 }, { "epoch": 2.5415692821368947, "grad_norm": 1.0775630656782786, "learning_rate": 6.946887479401376e-07, "loss": 0.2899, "step": 7612 }, { "epoch": 2.5419031719532557, "grad_norm": 1.0327681693858934, "learning_rate": 6.937012553977685e-07, "loss": 0.2897, "step": 7613 }, { "epoch": 2.542237061769616, "grad_norm": 1.0116093654373401, "learning_rate": 6.9271441288992e-07, "loss": 0.2957, "step": 7614 }, { "epoch": 2.5425709515859767, "grad_norm": 1.0434631234511487, "learning_rate": 6.917282205655557e-07, "loss": 0.2938, "step": 7615 }, { "epoch": 2.542904841402337, "grad_norm": 1.1401849722245727, "learning_rate": 6.907426785735433e-07, "loss": 0.3132, "step": 7616 }, { "epoch": 2.5432387312186977, "grad_norm": 1.0403643271014744, "learning_rate": 6.897577870626471e-07, "loss": 0.2851, "step": 7617 }, { "epoch": 2.543572621035058, "grad_norm": 1.0286462218306327, "learning_rate": 6.887735461815376e-07, "loss": 0.2964, "step": 7618 }, { "epoch": 2.543906510851419, "grad_norm": 1.0165545140278573, "learning_rate": 6.877899560787849e-07, "loss": 0.2797, "step": 7619 }, { "epoch": 2.5442404006677797, "grad_norm": 1.017100140635932, "learning_rate": 6.868070169028623e-07, "loss": 0.2887, "step": 7620 }, { "epoch": 2.54457429048414, "grad_norm": 0.9982572853945884, "learning_rate": 6.858247288021446e-07, "loss": 0.2843, "step": 7621 }, { "epoch": 2.5449081803005007, "grad_norm": 1.1164160168123263, "learning_rate": 6.848430919249089e-07, "loss": 0.2967, "step": 7622 }, { "epoch": 2.5452420701168617, "grad_norm": 1.039070437684246, "learning_rate": 6.838621064193285e-07, "loss": 0.2866, "step": 7623 }, { "epoch": 2.545575959933222, "grad_norm": 1.0840773266474801, "learning_rate": 6.828817724334874e-07, "loss": 0.3117, "step": 7624 }, { "epoch": 2.5459098497495827, "grad_norm": 1.0547545664352607, "learning_rate": 6.81902090115365e-07, "loss": 0.3104, "step": 7625 }, { "epoch": 2.546243739565943, "grad_norm": 1.057325071593861, "learning_rate": 6.809230596128441e-07, "loss": 0.2922, "step": 7626 }, { "epoch": 2.5465776293823037, "grad_norm": 1.0484741720367394, "learning_rate": 6.799446810737093e-07, "loss": 0.2882, "step": 7627 }, { "epoch": 2.546911519198664, "grad_norm": 1.049449395520004, "learning_rate": 6.789669546456462e-07, "loss": 0.2928, "step": 7628 }, { "epoch": 2.547245409015025, "grad_norm": 1.0116701892213715, "learning_rate": 6.77989880476243e-07, "loss": 0.2785, "step": 7629 }, { "epoch": 2.5475792988313857, "grad_norm": 1.070349816816993, "learning_rate": 6.770134587129878e-07, "loss": 0.2931, "step": 7630 }, { "epoch": 2.547913188647746, "grad_norm": 1.0687085596741697, "learning_rate": 6.760376895032722e-07, "loss": 0.3057, "step": 7631 }, { "epoch": 2.5482470784641067, "grad_norm": 0.9992606935460435, "learning_rate": 6.750625729943877e-07, "loss": 0.2851, "step": 7632 }, { "epoch": 2.5485809682804677, "grad_norm": 1.054770730551728, "learning_rate": 6.740881093335278e-07, "loss": 0.294, "step": 7633 }, { "epoch": 2.548914858096828, "grad_norm": 1.0254729527376496, "learning_rate": 6.731142986677875e-07, "loss": 0.2843, "step": 7634 }, { "epoch": 2.5492487479131887, "grad_norm": 1.0493736476011386, "learning_rate": 6.72141141144163e-07, "loss": 0.3009, "step": 7635 }, { "epoch": 2.549582637729549, "grad_norm": 1.0583141357932175, "learning_rate": 6.711686369095521e-07, "loss": 0.2974, "step": 7636 }, { "epoch": 2.5499165275459097, "grad_norm": 1.060127494328977, "learning_rate": 6.701967861107561e-07, "loss": 0.3033, "step": 7637 }, { "epoch": 2.55025041736227, "grad_norm": 1.0937284133368514, "learning_rate": 6.692255888944732e-07, "loss": 0.3112, "step": 7638 }, { "epoch": 2.550584307178631, "grad_norm": 1.0655013731521277, "learning_rate": 6.682550454073055e-07, "loss": 0.2916, "step": 7639 }, { "epoch": 2.5509181969949917, "grad_norm": 1.0355528513064833, "learning_rate": 6.67285155795756e-07, "loss": 0.2873, "step": 7640 }, { "epoch": 2.551252086811352, "grad_norm": 1.0372371750532527, "learning_rate": 6.663159202062308e-07, "loss": 0.2867, "step": 7641 }, { "epoch": 2.5515859766277127, "grad_norm": 1.0448715697802449, "learning_rate": 6.653473387850368e-07, "loss": 0.295, "step": 7642 }, { "epoch": 2.5519198664440736, "grad_norm": 1.0353002409229242, "learning_rate": 6.643794116783775e-07, "loss": 0.2873, "step": 7643 }, { "epoch": 2.552253756260434, "grad_norm": 0.9930685027878855, "learning_rate": 6.634121390323622e-07, "loss": 0.2822, "step": 7644 }, { "epoch": 2.5525876460767947, "grad_norm": 1.014203952379326, "learning_rate": 6.62445520993002e-07, "loss": 0.2807, "step": 7645 }, { "epoch": 2.552921535893155, "grad_norm": 1.039959188038764, "learning_rate": 6.614795577062066e-07, "loss": 0.2972, "step": 7646 }, { "epoch": 2.5532554257095157, "grad_norm": 0.9801263653480643, "learning_rate": 6.605142493177885e-07, "loss": 0.2739, "step": 7647 }, { "epoch": 2.553589315525876, "grad_norm": 1.0090843477556317, "learning_rate": 6.595495959734599e-07, "loss": 0.2933, "step": 7648 }, { "epoch": 2.553923205342237, "grad_norm": 1.0189359262544262, "learning_rate": 6.585855978188354e-07, "loss": 0.2966, "step": 7649 }, { "epoch": 2.5542570951585977, "grad_norm": 1.0103001064809567, "learning_rate": 6.576222549994305e-07, "loss": 0.2847, "step": 7650 }, { "epoch": 2.554590984974958, "grad_norm": 1.0763501976218526, "learning_rate": 6.566595676606608e-07, "loss": 0.2896, "step": 7651 }, { "epoch": 2.554924874791319, "grad_norm": 1.074762033916645, "learning_rate": 6.55697535947844e-07, "loss": 0.2988, "step": 7652 }, { "epoch": 2.5552587646076796, "grad_norm": 1.0771438792328405, "learning_rate": 6.547361600061985e-07, "loss": 0.3104, "step": 7653 }, { "epoch": 2.55559265442404, "grad_norm": 1.0654635182036354, "learning_rate": 6.537754399808438e-07, "loss": 0.2929, "step": 7654 }, { "epoch": 2.5559265442404007, "grad_norm": 1.0399667899699334, "learning_rate": 6.528153760167999e-07, "loss": 0.2816, "step": 7655 }, { "epoch": 2.556260434056761, "grad_norm": 1.0217269528190724, "learning_rate": 6.518559682589882e-07, "loss": 0.2936, "step": 7656 }, { "epoch": 2.5565943238731217, "grad_norm": 1.0484590775140854, "learning_rate": 6.508972168522326e-07, "loss": 0.2923, "step": 7657 }, { "epoch": 2.556928213689482, "grad_norm": 1.0320906938748529, "learning_rate": 6.499391219412543e-07, "loss": 0.2944, "step": 7658 }, { "epoch": 2.557262103505843, "grad_norm": 1.0259146214534862, "learning_rate": 6.489816836706786e-07, "loss": 0.2916, "step": 7659 }, { "epoch": 2.5575959933222037, "grad_norm": 1.040939771032334, "learning_rate": 6.480249021850299e-07, "loss": 0.2952, "step": 7660 }, { "epoch": 2.557929883138564, "grad_norm": 1.0269966481781776, "learning_rate": 6.470687776287332e-07, "loss": 0.2843, "step": 7661 }, { "epoch": 2.558263772954925, "grad_norm": 1.047901583071936, "learning_rate": 6.461133101461181e-07, "loss": 0.2959, "step": 7662 }, { "epoch": 2.5585976627712856, "grad_norm": 1.0267747996675418, "learning_rate": 6.451584998814098e-07, "loss": 0.2858, "step": 7663 }, { "epoch": 2.558931552587646, "grad_norm": 1.0687921784722367, "learning_rate": 6.442043469787374e-07, "loss": 0.3085, "step": 7664 }, { "epoch": 2.5592654424040067, "grad_norm": 1.0334018287108508, "learning_rate": 6.432508515821279e-07, "loss": 0.2889, "step": 7665 }, { "epoch": 2.559599332220367, "grad_norm": 1.035152064240782, "learning_rate": 6.422980138355145e-07, "loss": 0.2971, "step": 7666 }, { "epoch": 2.5599332220367277, "grad_norm": 1.0680654285416988, "learning_rate": 6.413458338827266e-07, "loss": 0.2851, "step": 7667 }, { "epoch": 2.5602671118530886, "grad_norm": 1.079235201113566, "learning_rate": 6.40394311867496e-07, "loss": 0.3003, "step": 7668 }, { "epoch": 2.560601001669449, "grad_norm": 1.072096955061961, "learning_rate": 6.394434479334516e-07, "loss": 0.306, "step": 7669 }, { "epoch": 2.5609348914858097, "grad_norm": 1.0049034477358176, "learning_rate": 6.384932422241296e-07, "loss": 0.2784, "step": 7670 }, { "epoch": 2.56126878130217, "grad_norm": 1.0854432113881154, "learning_rate": 6.375436948829622e-07, "loss": 0.3056, "step": 7671 }, { "epoch": 2.561602671118531, "grad_norm": 1.0376000375605072, "learning_rate": 6.365948060532823e-07, "loss": 0.2971, "step": 7672 }, { "epoch": 2.5619365609348916, "grad_norm": 1.0708748308456446, "learning_rate": 6.356465758783259e-07, "loss": 0.2923, "step": 7673 }, { "epoch": 2.562270450751252, "grad_norm": 1.0263679689918095, "learning_rate": 6.346990045012269e-07, "loss": 0.2884, "step": 7674 }, { "epoch": 2.5626043405676127, "grad_norm": 1.0341348477802976, "learning_rate": 6.337520920650215e-07, "loss": 0.2887, "step": 7675 }, { "epoch": 2.562938230383973, "grad_norm": 1.0369687693539344, "learning_rate": 6.328058387126457e-07, "loss": 0.3065, "step": 7676 }, { "epoch": 2.5632721202003337, "grad_norm": 0.9920338994474781, "learning_rate": 6.31860244586936e-07, "loss": 0.2752, "step": 7677 }, { "epoch": 2.5636060100166946, "grad_norm": 0.9962727577293662, "learning_rate": 6.309153098306297e-07, "loss": 0.2834, "step": 7678 }, { "epoch": 2.563939899833055, "grad_norm": 1.0453369740188603, "learning_rate": 6.299710345863641e-07, "loss": 0.2934, "step": 7679 }, { "epoch": 2.5642737896494157, "grad_norm": 1.0462459318225352, "learning_rate": 6.290274189966783e-07, "loss": 0.3001, "step": 7680 }, { "epoch": 2.564607679465776, "grad_norm": 1.0049877907294384, "learning_rate": 6.280844632040083e-07, "loss": 0.2958, "step": 7681 }, { "epoch": 2.564941569282137, "grad_norm": 1.036267408118579, "learning_rate": 6.271421673506961e-07, "loss": 0.2865, "step": 7682 }, { "epoch": 2.5652754590984976, "grad_norm": 1.031152070013243, "learning_rate": 6.26200531578981e-07, "loss": 0.2917, "step": 7683 }, { "epoch": 2.565609348914858, "grad_norm": 0.9996463215663226, "learning_rate": 6.252595560309998e-07, "loss": 0.2809, "step": 7684 }, { "epoch": 2.5659432387312187, "grad_norm": 1.0479914000839814, "learning_rate": 6.24319240848793e-07, "loss": 0.2938, "step": 7685 }, { "epoch": 2.566277128547579, "grad_norm": 1.070888724052955, "learning_rate": 6.233795861743031e-07, "loss": 0.3001, "step": 7686 }, { "epoch": 2.5666110183639397, "grad_norm": 1.04131381575835, "learning_rate": 6.224405921493699e-07, "loss": 0.2911, "step": 7687 }, { "epoch": 2.5669449081803006, "grad_norm": 1.0073266349493795, "learning_rate": 6.215022589157343e-07, "loss": 0.2885, "step": 7688 }, { "epoch": 2.567278797996661, "grad_norm": 1.0651219442573445, "learning_rate": 6.205645866150361e-07, "loss": 0.2984, "step": 7689 }, { "epoch": 2.5676126878130217, "grad_norm": 1.0116628272064372, "learning_rate": 6.196275753888159e-07, "loss": 0.2859, "step": 7690 }, { "epoch": 2.567946577629382, "grad_norm": 1.0608819894348596, "learning_rate": 6.186912253785187e-07, "loss": 0.2993, "step": 7691 }, { "epoch": 2.568280467445743, "grad_norm": 1.0658272562425477, "learning_rate": 6.177555367254839e-07, "loss": 0.2944, "step": 7692 }, { "epoch": 2.5686143572621036, "grad_norm": 1.056079915966241, "learning_rate": 6.168205095709546e-07, "loss": 0.2979, "step": 7693 }, { "epoch": 2.568948247078464, "grad_norm": 1.0631411907473993, "learning_rate": 6.15886144056072e-07, "loss": 0.2902, "step": 7694 }, { "epoch": 2.5692821368948247, "grad_norm": 1.058239717465227, "learning_rate": 6.149524403218781e-07, "loss": 0.3029, "step": 7695 }, { "epoch": 2.569616026711185, "grad_norm": 1.0361774865111901, "learning_rate": 6.140193985093163e-07, "loss": 0.2851, "step": 7696 }, { "epoch": 2.5699499165275457, "grad_norm": 1.0283115712262163, "learning_rate": 6.130870187592281e-07, "loss": 0.2829, "step": 7697 }, { "epoch": 2.5702838063439066, "grad_norm": 1.0817049926811324, "learning_rate": 6.12155301212356e-07, "loss": 0.3062, "step": 7698 }, { "epoch": 2.570617696160267, "grad_norm": 1.064562514389507, "learning_rate": 6.112242460093426e-07, "loss": 0.297, "step": 7699 }, { "epoch": 2.5709515859766277, "grad_norm": 1.0354283723912883, "learning_rate": 6.102938532907304e-07, "loss": 0.2869, "step": 7700 }, { "epoch": 2.571285475792988, "grad_norm": 1.0497984010357388, "learning_rate": 6.093641231969627e-07, "loss": 0.3052, "step": 7701 }, { "epoch": 2.571619365609349, "grad_norm": 1.0311614311722062, "learning_rate": 6.084350558683794e-07, "loss": 0.3024, "step": 7702 }, { "epoch": 2.5719532554257096, "grad_norm": 1.013752881730349, "learning_rate": 6.075066514452271e-07, "loss": 0.2799, "step": 7703 }, { "epoch": 2.57228714524207, "grad_norm": 1.0415731638266414, "learning_rate": 6.06578910067645e-07, "loss": 0.2942, "step": 7704 }, { "epoch": 2.5726210350584306, "grad_norm": 1.0429609311570172, "learning_rate": 6.056518318756766e-07, "loss": 0.2996, "step": 7705 }, { "epoch": 2.572954924874791, "grad_norm": 1.0176589617493406, "learning_rate": 6.04725417009262e-07, "loss": 0.2813, "step": 7706 }, { "epoch": 2.5732888146911517, "grad_norm": 1.026215472519921, "learning_rate": 6.037996656082462e-07, "loss": 0.2898, "step": 7707 }, { "epoch": 2.5736227045075126, "grad_norm": 1.0254367795110086, "learning_rate": 6.028745778123713e-07, "loss": 0.2893, "step": 7708 }, { "epoch": 2.573956594323873, "grad_norm": 1.0038639825272302, "learning_rate": 6.01950153761276e-07, "loss": 0.2833, "step": 7709 }, { "epoch": 2.5742904841402336, "grad_norm": 1.1427299272495726, "learning_rate": 6.010263935945026e-07, "loss": 0.3054, "step": 7710 }, { "epoch": 2.574624373956594, "grad_norm": 1.0484970714110782, "learning_rate": 6.001032974514948e-07, "loss": 0.2897, "step": 7711 }, { "epoch": 2.574958263772955, "grad_norm": 1.0082394113060023, "learning_rate": 5.991808654715914e-07, "loss": 0.2783, "step": 7712 }, { "epoch": 2.5752921535893156, "grad_norm": 1.0420525876705278, "learning_rate": 5.982590977940344e-07, "loss": 0.2938, "step": 7713 }, { "epoch": 2.575626043405676, "grad_norm": 1.048928499747524, "learning_rate": 5.973379945579644e-07, "loss": 0.2894, "step": 7714 }, { "epoch": 2.5759599332220366, "grad_norm": 1.0358855925165162, "learning_rate": 5.964175559024205e-07, "loss": 0.2913, "step": 7715 }, { "epoch": 2.576293823038397, "grad_norm": 1.0406525885933429, "learning_rate": 5.954977819663437e-07, "loss": 0.299, "step": 7716 }, { "epoch": 2.5766277128547577, "grad_norm": 1.047492228823581, "learning_rate": 5.945786728885733e-07, "loss": 0.2883, "step": 7717 }, { "epoch": 2.5769616026711186, "grad_norm": 1.0340130264374991, "learning_rate": 5.93660228807848e-07, "loss": 0.2924, "step": 7718 }, { "epoch": 2.577295492487479, "grad_norm": 1.021804001288988, "learning_rate": 5.927424498628075e-07, "loss": 0.2894, "step": 7719 }, { "epoch": 2.5776293823038396, "grad_norm": 1.0567057781932498, "learning_rate": 5.918253361919901e-07, "loss": 0.3022, "step": 7720 }, { "epoch": 2.5779632721202006, "grad_norm": 1.0374202705415856, "learning_rate": 5.909088879338337e-07, "loss": 0.2859, "step": 7721 }, { "epoch": 2.578297161936561, "grad_norm": 1.023085902942936, "learning_rate": 5.899931052266755e-07, "loss": 0.2931, "step": 7722 }, { "epoch": 2.5786310517529216, "grad_norm": 1.0357418607544828, "learning_rate": 5.890779882087533e-07, "loss": 0.2967, "step": 7723 }, { "epoch": 2.578964941569282, "grad_norm": 1.0165087377943873, "learning_rate": 5.881635370182037e-07, "loss": 0.2833, "step": 7724 }, { "epoch": 2.5792988313856426, "grad_norm": 0.9958567509224753, "learning_rate": 5.872497517930619e-07, "loss": 0.2737, "step": 7725 }, { "epoch": 2.579632721202003, "grad_norm": 0.9950640339365784, "learning_rate": 5.863366326712644e-07, "loss": 0.2763, "step": 7726 }, { "epoch": 2.5799666110183637, "grad_norm": 1.0608671695668135, "learning_rate": 5.854241797906451e-07, "loss": 0.2997, "step": 7727 }, { "epoch": 2.5803005008347246, "grad_norm": 1.0433731150568282, "learning_rate": 5.845123932889408e-07, "loss": 0.2921, "step": 7728 }, { "epoch": 2.580634390651085, "grad_norm": 0.9962512297839642, "learning_rate": 5.836012733037843e-07, "loss": 0.2758, "step": 7729 }, { "epoch": 2.5809682804674456, "grad_norm": 1.0151261380494945, "learning_rate": 5.826908199727083e-07, "loss": 0.287, "step": 7730 }, { "epoch": 2.5813021702838066, "grad_norm": 1.0426017009020732, "learning_rate": 5.817810334331447e-07, "loss": 0.303, "step": 7731 }, { "epoch": 2.581636060100167, "grad_norm": 1.027147840487573, "learning_rate": 5.808719138224273e-07, "loss": 0.2915, "step": 7732 }, { "epoch": 2.5819699499165276, "grad_norm": 1.0216524838864809, "learning_rate": 5.799634612777865e-07, "loss": 0.2941, "step": 7733 }, { "epoch": 2.582303839732888, "grad_norm": 1.0106446483739613, "learning_rate": 5.790556759363547e-07, "loss": 0.2891, "step": 7734 }, { "epoch": 2.5826377295492486, "grad_norm": 1.0257558457639022, "learning_rate": 5.781485579351581e-07, "loss": 0.2936, "step": 7735 }, { "epoch": 2.582971619365609, "grad_norm": 1.0525589576751542, "learning_rate": 5.77242107411129e-07, "loss": 0.2867, "step": 7736 }, { "epoch": 2.58330550918197, "grad_norm": 1.0264356819924703, "learning_rate": 5.76336324501095e-07, "loss": 0.2951, "step": 7737 }, { "epoch": 2.5836393989983306, "grad_norm": 1.0601316846353301, "learning_rate": 5.754312093417841e-07, "loss": 0.2934, "step": 7738 }, { "epoch": 2.583973288814691, "grad_norm": 1.0618407170642434, "learning_rate": 5.745267620698225e-07, "loss": 0.3069, "step": 7739 }, { "epoch": 2.5843071786310516, "grad_norm": 1.086785210110501, "learning_rate": 5.73622982821736e-07, "loss": 0.3054, "step": 7740 }, { "epoch": 2.5846410684474126, "grad_norm": 1.0603021644906836, "learning_rate": 5.727198717339511e-07, "loss": 0.2956, "step": 7741 }, { "epoch": 2.584974958263773, "grad_norm": 1.0470231064653175, "learning_rate": 5.718174289427907e-07, "loss": 0.3024, "step": 7742 }, { "epoch": 2.5853088480801336, "grad_norm": 1.052723894529576, "learning_rate": 5.709156545844785e-07, "loss": 0.2957, "step": 7743 }, { "epoch": 2.585642737896494, "grad_norm": 1.0084700676730176, "learning_rate": 5.700145487951391e-07, "loss": 0.2879, "step": 7744 }, { "epoch": 2.5859766277128546, "grad_norm": 1.0269086257652376, "learning_rate": 5.691141117107917e-07, "loss": 0.2788, "step": 7745 }, { "epoch": 2.586310517529215, "grad_norm": 0.9877541531717093, "learning_rate": 5.682143434673581e-07, "loss": 0.2883, "step": 7746 }, { "epoch": 2.586644407345576, "grad_norm": 1.0651242835201173, "learning_rate": 5.673152442006563e-07, "loss": 0.3057, "step": 7747 }, { "epoch": 2.5869782971619366, "grad_norm": 1.087937754062138, "learning_rate": 5.664168140464083e-07, "loss": 0.3098, "step": 7748 }, { "epoch": 2.587312186978297, "grad_norm": 1.0099880632033236, "learning_rate": 5.655190531402304e-07, "loss": 0.2884, "step": 7749 }, { "epoch": 2.5876460767946576, "grad_norm": 1.0725840101228385, "learning_rate": 5.646219616176391e-07, "loss": 0.2997, "step": 7750 }, { "epoch": 2.5879799666110186, "grad_norm": 1.064650576625791, "learning_rate": 5.637255396140495e-07, "loss": 0.3036, "step": 7751 }, { "epoch": 2.588313856427379, "grad_norm": 1.0374482723406648, "learning_rate": 5.628297872647759e-07, "loss": 0.2892, "step": 7752 }, { "epoch": 2.5886477462437396, "grad_norm": 1.006965048448473, "learning_rate": 5.619347047050333e-07, "loss": 0.2828, "step": 7753 }, { "epoch": 2.5889816360601, "grad_norm": 1.0458811527343528, "learning_rate": 5.610402920699353e-07, "loss": 0.2868, "step": 7754 }, { "epoch": 2.5893155258764606, "grad_norm": 1.0126032121050865, "learning_rate": 5.601465494944902e-07, "loss": 0.2891, "step": 7755 }, { "epoch": 2.589649415692821, "grad_norm": 1.027198160591828, "learning_rate": 5.592534771136088e-07, "loss": 0.2745, "step": 7756 }, { "epoch": 2.589983305509182, "grad_norm": 1.0952632789857921, "learning_rate": 5.583610750621021e-07, "loss": 0.3039, "step": 7757 }, { "epoch": 2.5903171953255426, "grad_norm": 1.01991421621831, "learning_rate": 5.574693434746764e-07, "loss": 0.2761, "step": 7758 }, { "epoch": 2.590651085141903, "grad_norm": 1.0392156489064244, "learning_rate": 5.565782824859383e-07, "loss": 0.2808, "step": 7759 }, { "epoch": 2.5909849749582636, "grad_norm": 1.0632266979730405, "learning_rate": 5.556878922303938e-07, "loss": 0.2968, "step": 7760 }, { "epoch": 2.5913188647746246, "grad_norm": 1.0305351978589254, "learning_rate": 5.547981728424468e-07, "loss": 0.2862, "step": 7761 }, { "epoch": 2.591652754590985, "grad_norm": 1.0207829513736144, "learning_rate": 5.539091244563998e-07, "loss": 0.2931, "step": 7762 }, { "epoch": 2.5919866444073456, "grad_norm": 1.0095187682823545, "learning_rate": 5.530207472064553e-07, "loss": 0.2843, "step": 7763 }, { "epoch": 2.592320534223706, "grad_norm": 1.581370408346598, "learning_rate": 5.521330412267123e-07, "loss": 0.3024, "step": 7764 }, { "epoch": 2.5926544240400666, "grad_norm": 1.0703743207201424, "learning_rate": 5.51246006651171e-07, "loss": 0.2907, "step": 7765 }, { "epoch": 2.592988313856427, "grad_norm": 1.00877706167094, "learning_rate": 5.503596436137282e-07, "loss": 0.2766, "step": 7766 }, { "epoch": 2.593322203672788, "grad_norm": 1.062418605090258, "learning_rate": 5.494739522481801e-07, "loss": 0.2893, "step": 7767 }, { "epoch": 2.5936560934891486, "grad_norm": 1.0136201110969665, "learning_rate": 5.485889326882216e-07, "loss": 0.2825, "step": 7768 }, { "epoch": 2.593989983305509, "grad_norm": 1.0368153802103206, "learning_rate": 5.477045850674478e-07, "loss": 0.2915, "step": 7769 }, { "epoch": 2.5943238731218696, "grad_norm": 1.0709526414288537, "learning_rate": 5.468209095193483e-07, "loss": 0.3002, "step": 7770 }, { "epoch": 2.5946577629382306, "grad_norm": 1.0710778191083439, "learning_rate": 5.459379061773146e-07, "loss": 0.3063, "step": 7771 }, { "epoch": 2.594991652754591, "grad_norm": 1.0458529116925988, "learning_rate": 5.45055575174635e-07, "loss": 0.2937, "step": 7772 }, { "epoch": 2.5953255425709516, "grad_norm": 1.120618771233275, "learning_rate": 5.441739166444993e-07, "loss": 0.3076, "step": 7773 }, { "epoch": 2.595659432387312, "grad_norm": 1.0190481516930228, "learning_rate": 5.432929307199919e-07, "loss": 0.2807, "step": 7774 }, { "epoch": 2.5959933222036726, "grad_norm": 1.069819800252674, "learning_rate": 5.424126175340988e-07, "loss": 0.2962, "step": 7775 }, { "epoch": 2.596327212020033, "grad_norm": 1.063480540612793, "learning_rate": 5.415329772196998e-07, "loss": 0.3014, "step": 7776 }, { "epoch": 2.596661101836394, "grad_norm": 1.0749308673936082, "learning_rate": 5.406540099095791e-07, "loss": 0.294, "step": 7777 }, { "epoch": 2.5969949916527546, "grad_norm": 1.0324318680972522, "learning_rate": 5.397757157364164e-07, "loss": 0.2923, "step": 7778 }, { "epoch": 2.597328881469115, "grad_norm": 1.088377651638241, "learning_rate": 5.388980948327899e-07, "loss": 0.3006, "step": 7779 }, { "epoch": 2.597662771285476, "grad_norm": 1.0278915869860414, "learning_rate": 5.380211473311753e-07, "loss": 0.2897, "step": 7780 }, { "epoch": 2.5979966611018366, "grad_norm": 1.0698586534788603, "learning_rate": 5.371448733639479e-07, "loss": 0.3012, "step": 7781 }, { "epoch": 2.598330550918197, "grad_norm": 1.040933224191412, "learning_rate": 5.362692730633811e-07, "loss": 0.2993, "step": 7782 }, { "epoch": 2.5986644407345576, "grad_norm": 1.0301158925754106, "learning_rate": 5.353943465616468e-07, "loss": 0.2881, "step": 7783 }, { "epoch": 2.598998330550918, "grad_norm": 1.0414297983165786, "learning_rate": 5.345200939908146e-07, "loss": 0.2934, "step": 7784 }, { "epoch": 2.5993322203672786, "grad_norm": 1.0364765674035352, "learning_rate": 5.33646515482853e-07, "loss": 0.2857, "step": 7785 }, { "epoch": 2.599666110183639, "grad_norm": 1.051616433556568, "learning_rate": 5.32773611169628e-07, "loss": 0.2936, "step": 7786 }, { "epoch": 2.6, "grad_norm": 1.1274832895442028, "learning_rate": 5.319013811829038e-07, "loss": 0.3051, "step": 7787 }, { "epoch": 2.6003338898163606, "grad_norm": 1.0547041570694589, "learning_rate": 5.310298256543445e-07, "loss": 0.2896, "step": 7788 }, { "epoch": 2.600667779632721, "grad_norm": 1.0439225512316779, "learning_rate": 5.301589447155092e-07, "loss": 0.283, "step": 7789 }, { "epoch": 2.601001669449082, "grad_norm": 1.0531035281249563, "learning_rate": 5.292887384978606e-07, "loss": 0.2866, "step": 7790 }, { "epoch": 2.6013355592654426, "grad_norm": 1.090597889852536, "learning_rate": 5.284192071327526e-07, "loss": 0.3057, "step": 7791 }, { "epoch": 2.601669449081803, "grad_norm": 1.046143246340875, "learning_rate": 5.275503507514423e-07, "loss": 0.2932, "step": 7792 }, { "epoch": 2.6020033388981636, "grad_norm": 1.058733689605035, "learning_rate": 5.266821694850821e-07, "loss": 0.292, "step": 7793 }, { "epoch": 2.602337228714524, "grad_norm": 1.0857001718305999, "learning_rate": 5.258146634647249e-07, "loss": 0.3049, "step": 7794 }, { "epoch": 2.6026711185308846, "grad_norm": 1.024019528649445, "learning_rate": 5.24947832821322e-07, "loss": 0.294, "step": 7795 }, { "epoch": 2.6030050083472456, "grad_norm": 1.0723970660424889, "learning_rate": 5.240816776857178e-07, "loss": 0.303, "step": 7796 }, { "epoch": 2.603338898163606, "grad_norm": 1.0251838470971077, "learning_rate": 5.232161981886591e-07, "loss": 0.2939, "step": 7797 }, { "epoch": 2.6036727879799666, "grad_norm": 1.02774006012565, "learning_rate": 5.223513944607917e-07, "loss": 0.278, "step": 7798 }, { "epoch": 2.604006677796327, "grad_norm": 1.0289820873043742, "learning_rate": 5.214872666326553e-07, "loss": 0.2973, "step": 7799 }, { "epoch": 2.604340567612688, "grad_norm": 1.0461499494706132, "learning_rate": 5.206238148346915e-07, "loss": 0.2973, "step": 7800 }, { "epoch": 2.6046744574290486, "grad_norm": 1.0296536686537492, "learning_rate": 5.197610391972368e-07, "loss": 0.2872, "step": 7801 }, { "epoch": 2.605008347245409, "grad_norm": 1.066698421945756, "learning_rate": 5.188989398505279e-07, "loss": 0.2913, "step": 7802 }, { "epoch": 2.6053422370617696, "grad_norm": 1.0263453711186272, "learning_rate": 5.180375169246976e-07, "loss": 0.2913, "step": 7803 }, { "epoch": 2.60567612687813, "grad_norm": 1.0257108595633837, "learning_rate": 5.171767705497777e-07, "loss": 0.2966, "step": 7804 }, { "epoch": 2.6060100166944906, "grad_norm": 1.0217381953897613, "learning_rate": 5.163167008556974e-07, "loss": 0.2904, "step": 7805 }, { "epoch": 2.6063439065108516, "grad_norm": 1.042924899571974, "learning_rate": 5.154573079722847e-07, "loss": 0.2876, "step": 7806 }, { "epoch": 2.606677796327212, "grad_norm": 1.0592313389007015, "learning_rate": 5.14598592029264e-07, "loss": 0.2989, "step": 7807 }, { "epoch": 2.6070116861435726, "grad_norm": 1.0461471435843155, "learning_rate": 5.137405531562584e-07, "loss": 0.2859, "step": 7808 }, { "epoch": 2.607345575959933, "grad_norm": 1.0290165194577583, "learning_rate": 5.128831914827875e-07, "loss": 0.2895, "step": 7809 }, { "epoch": 2.607679465776294, "grad_norm": 1.0523614211443373, "learning_rate": 5.120265071382735e-07, "loss": 0.2936, "step": 7810 }, { "epoch": 2.6080133555926546, "grad_norm": 1.0228414324971968, "learning_rate": 5.111705002520284e-07, "loss": 0.2887, "step": 7811 }, { "epoch": 2.608347245409015, "grad_norm": 1.0446075528401448, "learning_rate": 5.103151709532683e-07, "loss": 0.279, "step": 7812 }, { "epoch": 2.6086811352253756, "grad_norm": 1.042558798159906, "learning_rate": 5.094605193711044e-07, "loss": 0.2914, "step": 7813 }, { "epoch": 2.609015025041736, "grad_norm": 1.0278918986469785, "learning_rate": 5.086065456345457e-07, "loss": 0.2901, "step": 7814 }, { "epoch": 2.6093489148580966, "grad_norm": 0.9743946790123446, "learning_rate": 5.077532498725013e-07, "loss": 0.2828, "step": 7815 }, { "epoch": 2.6096828046744576, "grad_norm": 1.0846632206750917, "learning_rate": 5.069006322137732e-07, "loss": 0.3069, "step": 7816 }, { "epoch": 2.610016694490818, "grad_norm": 1.0004390711257252, "learning_rate": 5.060486927870651e-07, "loss": 0.2792, "step": 7817 }, { "epoch": 2.6103505843071786, "grad_norm": 1.0409538490915826, "learning_rate": 5.051974317209763e-07, "loss": 0.2896, "step": 7818 }, { "epoch": 2.610684474123539, "grad_norm": 1.0771465227001455, "learning_rate": 5.043468491440057e-07, "loss": 0.2985, "step": 7819 }, { "epoch": 2.6110183639399, "grad_norm": 1.073103021350486, "learning_rate": 5.034969451845478e-07, "loss": 0.304, "step": 7820 }, { "epoch": 2.6113522537562606, "grad_norm": 1.0585911483137924, "learning_rate": 5.026477199708962e-07, "loss": 0.2889, "step": 7821 }, { "epoch": 2.611686143572621, "grad_norm": 1.0510123179228386, "learning_rate": 5.017991736312389e-07, "loss": 0.3007, "step": 7822 }, { "epoch": 2.6120200333889816, "grad_norm": 1.0302893668640483, "learning_rate": 5.009513062936655e-07, "loss": 0.2965, "step": 7823 }, { "epoch": 2.612353923205342, "grad_norm": 1.0557480945086477, "learning_rate": 5.001041180861616e-07, "loss": 0.3015, "step": 7824 }, { "epoch": 2.6126878130217026, "grad_norm": 1.0113776812863937, "learning_rate": 4.992576091366086e-07, "loss": 0.2885, "step": 7825 }, { "epoch": 2.6130217028380636, "grad_norm": 1.039224644134077, "learning_rate": 4.984117795727883e-07, "loss": 0.2886, "step": 7826 }, { "epoch": 2.613355592654424, "grad_norm": 1.0522190732982846, "learning_rate": 4.975666295223775e-07, "loss": 0.2948, "step": 7827 }, { "epoch": 2.6136894824707846, "grad_norm": 1.0227024129312248, "learning_rate": 4.967221591129512e-07, "loss": 0.2817, "step": 7828 }, { "epoch": 2.614023372287145, "grad_norm": 1.0231099921842854, "learning_rate": 4.958783684719826e-07, "loss": 0.2848, "step": 7829 }, { "epoch": 2.614357262103506, "grad_norm": 1.0149672882237202, "learning_rate": 4.950352577268413e-07, "loss": 0.2818, "step": 7830 }, { "epoch": 2.6146911519198666, "grad_norm": 1.048859269589007, "learning_rate": 4.941928270047953e-07, "loss": 0.2953, "step": 7831 }, { "epoch": 2.615025041736227, "grad_norm": 1.0230922395219573, "learning_rate": 4.93351076433008e-07, "loss": 0.2851, "step": 7832 }, { "epoch": 2.6153589315525876, "grad_norm": 1.0841094973626422, "learning_rate": 4.925100061385418e-07, "loss": 0.2996, "step": 7833 }, { "epoch": 2.615692821368948, "grad_norm": 1.0235462410618965, "learning_rate": 4.916696162483553e-07, "loss": 0.2851, "step": 7834 }, { "epoch": 2.6160267111853086, "grad_norm": 1.035115234091588, "learning_rate": 4.908299068893074e-07, "loss": 0.2885, "step": 7835 }, { "epoch": 2.6163606010016696, "grad_norm": 1.0445814707302512, "learning_rate": 4.899908781881519e-07, "loss": 0.2897, "step": 7836 }, { "epoch": 2.61669449081803, "grad_norm": 0.9986138512750865, "learning_rate": 4.891525302715372e-07, "loss": 0.2777, "step": 7837 }, { "epoch": 2.6170283806343906, "grad_norm": 1.077421990936209, "learning_rate": 4.883148632660117e-07, "loss": 0.3028, "step": 7838 }, { "epoch": 2.617362270450751, "grad_norm": 1.0195350815683992, "learning_rate": 4.874778772980243e-07, "loss": 0.3008, "step": 7839 }, { "epoch": 2.617696160267112, "grad_norm": 1.014501002277978, "learning_rate": 4.866415724939156e-07, "loss": 0.288, "step": 7840 }, { "epoch": 2.6180300500834726, "grad_norm": 1.0309079540456145, "learning_rate": 4.858059489799266e-07, "loss": 0.2918, "step": 7841 }, { "epoch": 2.618363939899833, "grad_norm": 1.0266795813190492, "learning_rate": 4.849710068821922e-07, "loss": 0.2896, "step": 7842 }, { "epoch": 2.6186978297161936, "grad_norm": 1.035991049482111, "learning_rate": 4.841367463267488e-07, "loss": 0.2882, "step": 7843 }, { "epoch": 2.619031719532554, "grad_norm": 1.0233643961995809, "learning_rate": 4.833031674395272e-07, "loss": 0.2862, "step": 7844 }, { "epoch": 2.6193656093489146, "grad_norm": 0.9795489482876962, "learning_rate": 4.824702703463558e-07, "loss": 0.274, "step": 7845 }, { "epoch": 2.6196994991652756, "grad_norm": 0.9953904931535269, "learning_rate": 4.8163805517296e-07, "loss": 0.2754, "step": 7846 }, { "epoch": 2.620033388981636, "grad_norm": 1.0304515290508287, "learning_rate": 4.808065220449631e-07, "loss": 0.2876, "step": 7847 }, { "epoch": 2.6203672787979966, "grad_norm": 1.072245902094772, "learning_rate": 4.799756710878839e-07, "loss": 0.311, "step": 7848 }, { "epoch": 2.6207011686143575, "grad_norm": 1.0383310514095654, "learning_rate": 4.791455024271396e-07, "loss": 0.29, "step": 7849 }, { "epoch": 2.621035058430718, "grad_norm": 1.0563970986566327, "learning_rate": 4.78316016188044e-07, "loss": 0.3028, "step": 7850 }, { "epoch": 2.6213689482470786, "grad_norm": 1.0221650100703896, "learning_rate": 4.774872124958074e-07, "loss": 0.2801, "step": 7851 }, { "epoch": 2.621702838063439, "grad_norm": 1.0581042116969486, "learning_rate": 4.7665909147553813e-07, "loss": 0.2914, "step": 7852 }, { "epoch": 2.6220367278797996, "grad_norm": 1.049431680720123, "learning_rate": 4.7583165325224e-07, "loss": 0.2934, "step": 7853 }, { "epoch": 2.62237061769616, "grad_norm": 1.0062416709995106, "learning_rate": 4.7500489795081485e-07, "loss": 0.2852, "step": 7854 }, { "epoch": 2.6227045075125206, "grad_norm": 0.9974751834668154, "learning_rate": 4.7417882569606055e-07, "loss": 0.286, "step": 7855 }, { "epoch": 2.6230383973288816, "grad_norm": 1.0973637952875808, "learning_rate": 4.733534366126752e-07, "loss": 0.2968, "step": 7856 }, { "epoch": 2.623372287145242, "grad_norm": 1.0558574816502382, "learning_rate": 4.7252873082524743e-07, "loss": 0.2863, "step": 7857 }, { "epoch": 2.6237061769616026, "grad_norm": 1.0662904442645127, "learning_rate": 4.717047084582682e-07, "loss": 0.2924, "step": 7858 }, { "epoch": 2.6240400667779635, "grad_norm": 1.061185532999553, "learning_rate": 4.708813696361225e-07, "loss": 0.2893, "step": 7859 }, { "epoch": 2.624373956594324, "grad_norm": 1.0622347542166068, "learning_rate": 4.700587144830948e-07, "loss": 0.2909, "step": 7860 }, { "epoch": 2.6247078464106846, "grad_norm": 1.0271793229548318, "learning_rate": 4.692367431233641e-07, "loss": 0.2834, "step": 7861 }, { "epoch": 2.625041736227045, "grad_norm": 1.06164667466163, "learning_rate": 4.68415455681005e-07, "loss": 0.2959, "step": 7862 }, { "epoch": 2.6253756260434056, "grad_norm": 1.0197115263318184, "learning_rate": 4.675948522799917e-07, "loss": 0.2844, "step": 7863 }, { "epoch": 2.625709515859766, "grad_norm": 1.0543046171046877, "learning_rate": 4.667749330441945e-07, "loss": 0.296, "step": 7864 }, { "epoch": 2.626043405676127, "grad_norm": 1.0601517173597312, "learning_rate": 4.659556980973795e-07, "loss": 0.2984, "step": 7865 }, { "epoch": 2.6263772954924876, "grad_norm": 1.0314897386789126, "learning_rate": 4.651371475632105e-07, "loss": 0.2902, "step": 7866 }, { "epoch": 2.626711185308848, "grad_norm": 1.0428171896586278, "learning_rate": 4.6431928156524695e-07, "loss": 0.289, "step": 7867 }, { "epoch": 2.6270450751252086, "grad_norm": 1.075853607396684, "learning_rate": 4.635021002269452e-07, "loss": 0.2911, "step": 7868 }, { "epoch": 2.6273789649415695, "grad_norm": 1.053724513048987, "learning_rate": 4.626856036716593e-07, "loss": 0.2958, "step": 7869 }, { "epoch": 2.62771285475793, "grad_norm": 1.0427332987547449, "learning_rate": 4.6186979202263895e-07, "loss": 0.2956, "step": 7870 }, { "epoch": 2.6280467445742905, "grad_norm": 1.0307497113367776, "learning_rate": 4.6105466540303013e-07, "loss": 0.2807, "step": 7871 }, { "epoch": 2.628380634390651, "grad_norm": 0.994754725019409, "learning_rate": 4.6024022393587666e-07, "loss": 0.2821, "step": 7872 }, { "epoch": 2.6287145242070116, "grad_norm": 1.0161641809038429, "learning_rate": 4.5942646774411736e-07, "loss": 0.2921, "step": 7873 }, { "epoch": 2.629048414023372, "grad_norm": 1.0222754086127523, "learning_rate": 4.586133969505896e-07, "loss": 0.2852, "step": 7874 }, { "epoch": 2.629382303839733, "grad_norm": 1.0627226355231245, "learning_rate": 4.5780101167802515e-07, "loss": 0.3022, "step": 7875 }, { "epoch": 2.6297161936560935, "grad_norm": 1.0453778854845843, "learning_rate": 4.569893120490543e-07, "loss": 0.3044, "step": 7876 }, { "epoch": 2.630050083472454, "grad_norm": 1.0238199804535735, "learning_rate": 4.5617829818620174e-07, "loss": 0.3028, "step": 7877 }, { "epoch": 2.6303839732888146, "grad_norm": 1.02634859736905, "learning_rate": 4.553679702118907e-07, "loss": 0.2924, "step": 7878 }, { "epoch": 2.6307178631051755, "grad_norm": 1.0318938568274911, "learning_rate": 4.5455832824844004e-07, "loss": 0.2971, "step": 7879 }, { "epoch": 2.631051752921536, "grad_norm": 1.074319332500655, "learning_rate": 4.5374937241806306e-07, "loss": 0.2992, "step": 7880 }, { "epoch": 2.6313856427378965, "grad_norm": 1.0801721294833062, "learning_rate": 4.529411028428743e-07, "loss": 0.2927, "step": 7881 }, { "epoch": 2.631719532554257, "grad_norm": 1.0357161350495807, "learning_rate": 4.521335196448812e-07, "loss": 0.2951, "step": 7882 }, { "epoch": 2.6320534223706176, "grad_norm": 1.0148473083743854, "learning_rate": 4.513266229459862e-07, "loss": 0.2752, "step": 7883 }, { "epoch": 2.632387312186978, "grad_norm": 1.0062998211929939, "learning_rate": 4.5052041286799076e-07, "loss": 0.2835, "step": 7884 }, { "epoch": 2.632721202003339, "grad_norm": 1.0513075763981088, "learning_rate": 4.497148895325937e-07, "loss": 0.2961, "step": 7885 }, { "epoch": 2.6330550918196995, "grad_norm": 1.0077569828881487, "learning_rate": 4.489100530613871e-07, "loss": 0.2869, "step": 7886 }, { "epoch": 2.63338898163606, "grad_norm": 1.0382386882897467, "learning_rate": 4.4810590357586225e-07, "loss": 0.2849, "step": 7887 }, { "epoch": 2.6337228714524206, "grad_norm": 1.0340671372041523, "learning_rate": 4.47302441197402e-07, "loss": 0.2894, "step": 7888 }, { "epoch": 2.6340567612687815, "grad_norm": 1.0335017619884708, "learning_rate": 4.4649966604729254e-07, "loss": 0.2917, "step": 7889 }, { "epoch": 2.634390651085142, "grad_norm": 1.0117782725643154, "learning_rate": 4.4569757824671045e-07, "loss": 0.2731, "step": 7890 }, { "epoch": 2.6347245409015025, "grad_norm": 1.0507469068016224, "learning_rate": 4.4489617791673155e-07, "loss": 0.2954, "step": 7891 }, { "epoch": 2.635058430717863, "grad_norm": 1.0740197484955536, "learning_rate": 4.440954651783258e-07, "loss": 0.2995, "step": 7892 }, { "epoch": 2.6353923205342236, "grad_norm": 1.055946988734644, "learning_rate": 4.4329544015236204e-07, "loss": 0.3052, "step": 7893 }, { "epoch": 2.635726210350584, "grad_norm": 1.0663970613650176, "learning_rate": 4.424961029596031e-07, "loss": 0.2937, "step": 7894 }, { "epoch": 2.636060100166945, "grad_norm": 1.0901279180435868, "learning_rate": 4.416974537207086e-07, "loss": 0.304, "step": 7895 }, { "epoch": 2.6363939899833055, "grad_norm": 1.047868651471082, "learning_rate": 4.408994925562332e-07, "loss": 0.2974, "step": 7896 }, { "epoch": 2.636727879799666, "grad_norm": 1.0148396140542355, "learning_rate": 4.4010221958663223e-07, "loss": 0.2759, "step": 7897 }, { "epoch": 2.6370617696160266, "grad_norm": 1.039274445059621, "learning_rate": 4.393056349322511e-07, "loss": 0.2893, "step": 7898 }, { "epoch": 2.6373956594323875, "grad_norm": 1.0065350182434973, "learning_rate": 4.385097387133347e-07, "loss": 0.2844, "step": 7899 }, { "epoch": 2.637729549248748, "grad_norm": 1.0818630945995142, "learning_rate": 4.3771453105002195e-07, "loss": 0.3008, "step": 7900 }, { "epoch": 2.6380634390651085, "grad_norm": 0.9980668845308547, "learning_rate": 4.369200120623518e-07, "loss": 0.285, "step": 7901 }, { "epoch": 2.638397328881469, "grad_norm": 1.037341504916512, "learning_rate": 4.361261818702567e-07, "loss": 0.2878, "step": 7902 }, { "epoch": 2.6387312186978296, "grad_norm": 1.0248559709530995, "learning_rate": 4.353330405935624e-07, "loss": 0.293, "step": 7903 }, { "epoch": 2.63906510851419, "grad_norm": 1.0414342120870883, "learning_rate": 4.345405883519943e-07, "loss": 0.2917, "step": 7904 }, { "epoch": 2.639398998330551, "grad_norm": 1.023514092447343, "learning_rate": 4.3374882526517394e-07, "loss": 0.2968, "step": 7905 }, { "epoch": 2.6397328881469115, "grad_norm": 1.0662154039503622, "learning_rate": 4.329577514526173e-07, "loss": 0.2896, "step": 7906 }, { "epoch": 2.640066777963272, "grad_norm": 0.9967234228515153, "learning_rate": 4.321673670337373e-07, "loss": 0.2801, "step": 7907 }, { "epoch": 2.6404006677796326, "grad_norm": 1.072415504009564, "learning_rate": 4.313776721278401e-07, "loss": 0.3, "step": 7908 }, { "epoch": 2.6407345575959935, "grad_norm": 1.0475402780549254, "learning_rate": 4.305886668541309e-07, "loss": 0.2911, "step": 7909 }, { "epoch": 2.641068447412354, "grad_norm": 1.0271472839508011, "learning_rate": 4.298003513317112e-07, "loss": 0.2881, "step": 7910 }, { "epoch": 2.6414023372287145, "grad_norm": 1.0102712453115799, "learning_rate": 4.2901272567957563e-07, "loss": 0.2885, "step": 7911 }, { "epoch": 2.641736227045075, "grad_norm": 1.0515639799045962, "learning_rate": 4.2822579001661645e-07, "loss": 0.2954, "step": 7912 }, { "epoch": 2.6420701168614356, "grad_norm": 1.088939151471615, "learning_rate": 4.2743954446162195e-07, "loss": 0.2969, "step": 7913 }, { "epoch": 2.642404006677796, "grad_norm": 1.09489035003961, "learning_rate": 4.266539891332744e-07, "loss": 0.3047, "step": 7914 }, { "epoch": 2.642737896494157, "grad_norm": 1.0321246007895186, "learning_rate": 4.258691241501539e-07, "loss": 0.2891, "step": 7915 }, { "epoch": 2.6430717863105175, "grad_norm": 1.0374818188291914, "learning_rate": 4.250849496307352e-07, "loss": 0.2894, "step": 7916 }, { "epoch": 2.643405676126878, "grad_norm": 1.0515977620297279, "learning_rate": 4.243014656933897e-07, "loss": 0.2867, "step": 7917 }, { "epoch": 2.643739565943239, "grad_norm": 1.01511407438961, "learning_rate": 4.235186724563839e-07, "loss": 0.2831, "step": 7918 }, { "epoch": 2.6440734557595995, "grad_norm": 1.0100222042490983, "learning_rate": 4.2273657003787993e-07, "loss": 0.2831, "step": 7919 }, { "epoch": 2.64440734557596, "grad_norm": 1.0560058538006414, "learning_rate": 4.219551585559367e-07, "loss": 0.2946, "step": 7920 }, { "epoch": 2.6447412353923205, "grad_norm": 1.0782068962945888, "learning_rate": 4.21174438128506e-07, "loss": 0.3054, "step": 7921 }, { "epoch": 2.645075125208681, "grad_norm": 1.030957600050845, "learning_rate": 4.2039440887344076e-07, "loss": 0.2906, "step": 7922 }, { "epoch": 2.6454090150250416, "grad_norm": 1.0377356425178759, "learning_rate": 4.196150709084834e-07, "loss": 0.2856, "step": 7923 }, { "epoch": 2.645742904841402, "grad_norm": 1.0673773342605695, "learning_rate": 4.18836424351276e-07, "loss": 0.3008, "step": 7924 }, { "epoch": 2.646076794657763, "grad_norm": 1.0784509764071195, "learning_rate": 4.180584693193529e-07, "loss": 0.3033, "step": 7925 }, { "epoch": 2.6464106844741235, "grad_norm": 1.0550299231958304, "learning_rate": 4.1728120593014956e-07, "loss": 0.2971, "step": 7926 }, { "epoch": 2.646744574290484, "grad_norm": 1.0558260478254817, "learning_rate": 4.1650463430099155e-07, "loss": 0.2944, "step": 7927 }, { "epoch": 2.647078464106845, "grad_norm": 1.0620579417525966, "learning_rate": 4.1572875454910347e-07, "loss": 0.2949, "step": 7928 }, { "epoch": 2.6474123539232055, "grad_norm": 1.022448056066889, "learning_rate": 4.149535667916016e-07, "loss": 0.2871, "step": 7929 }, { "epoch": 2.647746243739566, "grad_norm": 1.0231400223086273, "learning_rate": 4.1417907114550293e-07, "loss": 0.2917, "step": 7930 }, { "epoch": 2.6480801335559265, "grad_norm": 1.0268718694383416, "learning_rate": 4.1340526772771616e-07, "loss": 0.2892, "step": 7931 }, { "epoch": 2.648414023372287, "grad_norm": 1.0042977768162593, "learning_rate": 4.1263215665504673e-07, "loss": 0.279, "step": 7932 }, { "epoch": 2.6487479131886476, "grad_norm": 1.038000772791649, "learning_rate": 4.118597380441958e-07, "loss": 0.287, "step": 7933 }, { "epoch": 2.6490818030050085, "grad_norm": 1.0115390941262468, "learning_rate": 4.1108801201175994e-07, "loss": 0.2769, "step": 7934 }, { "epoch": 2.649415692821369, "grad_norm": 1.0652767148714257, "learning_rate": 4.103169786742306e-07, "loss": 0.2964, "step": 7935 }, { "epoch": 2.6497495826377295, "grad_norm": 1.0438522647664141, "learning_rate": 4.0954663814799454e-07, "loss": 0.2965, "step": 7936 }, { "epoch": 2.65008347245409, "grad_norm": 1.0562318702857305, "learning_rate": 4.087769905493355e-07, "loss": 0.2965, "step": 7937 }, { "epoch": 2.650417362270451, "grad_norm": 1.0146661314310297, "learning_rate": 4.0800803599443106e-07, "loss": 0.2815, "step": 7938 }, { "epoch": 2.6507512520868115, "grad_norm": 1.0377552262697973, "learning_rate": 4.072397745993545e-07, "loss": 0.2951, "step": 7939 }, { "epoch": 2.651085141903172, "grad_norm": 1.0102333675965671, "learning_rate": 4.064722064800747e-07, "loss": 0.2787, "step": 7940 }, { "epoch": 2.6514190317195325, "grad_norm": 1.0616772797644185, "learning_rate": 4.0570533175245563e-07, "loss": 0.2987, "step": 7941 }, { "epoch": 2.651752921535893, "grad_norm": 1.0366981282153902, "learning_rate": 4.04939150532257e-07, "loss": 0.2905, "step": 7942 }, { "epoch": 2.6520868113522535, "grad_norm": 1.024116674262969, "learning_rate": 4.0417366293513506e-07, "loss": 0.277, "step": 7943 }, { "epoch": 2.6524207011686145, "grad_norm": 1.0313143489460974, "learning_rate": 4.03408869076638e-07, "loss": 0.2886, "step": 7944 }, { "epoch": 2.652754590984975, "grad_norm": 1.0631401574656365, "learning_rate": 4.026447690722113e-07, "loss": 0.2953, "step": 7945 }, { "epoch": 2.6530884808013355, "grad_norm": 1.0432748587613478, "learning_rate": 4.0188136303719593e-07, "loss": 0.2899, "step": 7946 }, { "epoch": 2.653422370617696, "grad_norm": 1.057962868019285, "learning_rate": 4.0111865108682867e-07, "loss": 0.2957, "step": 7947 }, { "epoch": 2.653756260434057, "grad_norm": 1.075723257099615, "learning_rate": 4.003566333362408e-07, "loss": 0.3101, "step": 7948 }, { "epoch": 2.6540901502504175, "grad_norm": 1.0113647732527307, "learning_rate": 3.9959530990045736e-07, "loss": 0.2859, "step": 7949 }, { "epoch": 2.654424040066778, "grad_norm": 1.0250560518946468, "learning_rate": 3.9883468089439936e-07, "loss": 0.2946, "step": 7950 }, { "epoch": 2.6547579298831385, "grad_norm": 1.032585157515085, "learning_rate": 3.98074746432886e-07, "loss": 0.2777, "step": 7951 }, { "epoch": 2.655091819699499, "grad_norm": 1.000950389225794, "learning_rate": 3.973155066306272e-07, "loss": 0.2818, "step": 7952 }, { "epoch": 2.6554257095158595, "grad_norm": 1.0091687012110062, "learning_rate": 3.9655696160223136e-07, "loss": 0.2903, "step": 7953 }, { "epoch": 2.6557595993322205, "grad_norm": 1.045801720503042, "learning_rate": 3.957991114621984e-07, "loss": 0.2904, "step": 7954 }, { "epoch": 2.656093489148581, "grad_norm": 1.080251444080728, "learning_rate": 3.9504195632492806e-07, "loss": 0.2833, "step": 7955 }, { "epoch": 2.6564273789649415, "grad_norm": 1.1133334145339915, "learning_rate": 3.9428549630471157e-07, "loss": 0.3011, "step": 7956 }, { "epoch": 2.656761268781302, "grad_norm": 1.0202546901762586, "learning_rate": 3.9352973151573606e-07, "loss": 0.2797, "step": 7957 }, { "epoch": 2.657095158597663, "grad_norm": 1.0527539176687157, "learning_rate": 3.927746620720846e-07, "loss": 0.2874, "step": 7958 }, { "epoch": 2.6574290484140235, "grad_norm": 1.0112153723356863, "learning_rate": 3.9202028808773496e-07, "loss": 0.286, "step": 7959 }, { "epoch": 2.657762938230384, "grad_norm": 1.0300834737869444, "learning_rate": 3.9126660967655827e-07, "loss": 0.2881, "step": 7960 }, { "epoch": 2.6580968280467445, "grad_norm": 1.0256128955526624, "learning_rate": 3.9051362695232353e-07, "loss": 0.2839, "step": 7961 }, { "epoch": 2.658430717863105, "grad_norm": 1.0444033172839435, "learning_rate": 3.897613400286915e-07, "loss": 0.296, "step": 7962 }, { "epoch": 2.6587646076794655, "grad_norm": 1.0338827097524477, "learning_rate": 3.8900974901922294e-07, "loss": 0.2901, "step": 7963 }, { "epoch": 2.6590984974958265, "grad_norm": 0.9905995906380533, "learning_rate": 3.8825885403736716e-07, "loss": 0.2827, "step": 7964 }, { "epoch": 2.659432387312187, "grad_norm": 1.056957202400536, "learning_rate": 3.875086551964724e-07, "loss": 0.2991, "step": 7965 }, { "epoch": 2.6597662771285475, "grad_norm": 1.0345686812345947, "learning_rate": 3.867591526097808e-07, "loss": 0.2969, "step": 7966 }, { "epoch": 2.660100166944908, "grad_norm": 1.0508335449298574, "learning_rate": 3.8601034639043024e-07, "loss": 0.2901, "step": 7967 }, { "epoch": 2.660434056761269, "grad_norm": 1.0200576132017296, "learning_rate": 3.852622366514536e-07, "loss": 0.2868, "step": 7968 }, { "epoch": 2.6607679465776295, "grad_norm": 1.07962331536299, "learning_rate": 3.845148235057755e-07, "loss": 0.2993, "step": 7969 }, { "epoch": 2.66110183639399, "grad_norm": 1.0361719668530025, "learning_rate": 3.8376810706621915e-07, "loss": 0.2877, "step": 7970 }, { "epoch": 2.6614357262103505, "grad_norm": 1.0693461498333698, "learning_rate": 3.8302208744549984e-07, "loss": 0.2977, "step": 7971 }, { "epoch": 2.661769616026711, "grad_norm": 1.0472589889708819, "learning_rate": 3.8227676475623146e-07, "loss": 0.2868, "step": 7972 }, { "epoch": 2.6621035058430715, "grad_norm": 1.0793977286854557, "learning_rate": 3.8153213911091836e-07, "loss": 0.2994, "step": 7973 }, { "epoch": 2.6624373956594325, "grad_norm": 1.0395896120520725, "learning_rate": 3.8078821062196346e-07, "loss": 0.2922, "step": 7974 }, { "epoch": 2.662771285475793, "grad_norm": 1.025382317710998, "learning_rate": 3.800449794016592e-07, "loss": 0.2864, "step": 7975 }, { "epoch": 2.6631051752921535, "grad_norm": 1.0352054271186384, "learning_rate": 3.793024455621991e-07, "loss": 0.2888, "step": 7976 }, { "epoch": 2.663439065108514, "grad_norm": 1.0072423719674797, "learning_rate": 3.785606092156668e-07, "loss": 0.2848, "step": 7977 }, { "epoch": 2.663772954924875, "grad_norm": 1.0221823152666163, "learning_rate": 3.7781947047404343e-07, "loss": 0.2877, "step": 7978 }, { "epoch": 2.6641068447412355, "grad_norm": 1.0580362858525132, "learning_rate": 3.770790294492033e-07, "loss": 0.2994, "step": 7979 }, { "epoch": 2.664440734557596, "grad_norm": 1.0482584027926924, "learning_rate": 3.7633928625291525e-07, "loss": 0.2853, "step": 7980 }, { "epoch": 2.6647746243739565, "grad_norm": 0.9890814729051999, "learning_rate": 3.7560024099684345e-07, "loss": 0.2663, "step": 7981 }, { "epoch": 2.665108514190317, "grad_norm": 1.0354922868484107, "learning_rate": 3.748618937925469e-07, "loss": 0.2809, "step": 7982 }, { "epoch": 2.6654424040066775, "grad_norm": 1.0666820241242323, "learning_rate": 3.741242447514792e-07, "loss": 0.2881, "step": 7983 }, { "epoch": 2.6657762938230385, "grad_norm": 1.0176268643494948, "learning_rate": 3.733872939849875e-07, "loss": 0.2835, "step": 7984 }, { "epoch": 2.666110183639399, "grad_norm": 1.0364651758053676, "learning_rate": 3.7265104160431445e-07, "loss": 0.2945, "step": 7985 }, { "epoch": 2.6664440734557595, "grad_norm": 1.0294433267310945, "learning_rate": 3.7191548772059713e-07, "loss": 0.2915, "step": 7986 }, { "epoch": 2.6667779632721205, "grad_norm": 1.0136556512990207, "learning_rate": 3.7118063244486693e-07, "loss": 0.2887, "step": 7987 }, { "epoch": 2.667111853088481, "grad_norm": 1.0418653592645624, "learning_rate": 3.7044647588805106e-07, "loss": 0.298, "step": 7988 }, { "epoch": 2.6674457429048415, "grad_norm": 1.0470078012788089, "learning_rate": 3.697130181609704e-07, "loss": 0.2882, "step": 7989 }, { "epoch": 2.667779632721202, "grad_norm": 1.087849579352661, "learning_rate": 3.689802593743386e-07, "loss": 0.3007, "step": 7990 }, { "epoch": 2.6681135225375625, "grad_norm": 1.0638007770816116, "learning_rate": 3.6824819963876545e-07, "loss": 0.2901, "step": 7991 }, { "epoch": 2.668447412353923, "grad_norm": 1.08620131540407, "learning_rate": 3.6751683906475654e-07, "loss": 0.2976, "step": 7992 }, { "epoch": 2.6687813021702835, "grad_norm": 1.0503440894633358, "learning_rate": 3.667861777627096e-07, "loss": 0.3005, "step": 7993 }, { "epoch": 2.6691151919866445, "grad_norm": 1.0463686058377217, "learning_rate": 3.660562158429193e-07, "loss": 0.2698, "step": 7994 }, { "epoch": 2.669449081803005, "grad_norm": 1.0948466751973678, "learning_rate": 3.6532695341556957e-07, "loss": 0.3072, "step": 7995 }, { "epoch": 2.6697829716193655, "grad_norm": 1.0540814101697635, "learning_rate": 3.645983905907463e-07, "loss": 0.3013, "step": 7996 }, { "epoch": 2.6701168614357265, "grad_norm": 1.0390599607623503, "learning_rate": 3.638705274784238e-07, "loss": 0.2895, "step": 7997 }, { "epoch": 2.670450751252087, "grad_norm": 1.0460365033586116, "learning_rate": 3.6314336418847297e-07, "loss": 0.3012, "step": 7998 }, { "epoch": 2.6707846410684475, "grad_norm": 1.022765958521049, "learning_rate": 3.624169008306594e-07, "loss": 0.292, "step": 7999 }, { "epoch": 2.671118530884808, "grad_norm": 1.0331274899965237, "learning_rate": 3.61691137514642e-07, "loss": 0.2838, "step": 8000 }, { "epoch": 2.6714524207011685, "grad_norm": 1.017618639431118, "learning_rate": 3.609660743499749e-07, "loss": 0.2805, "step": 8001 }, { "epoch": 2.671786310517529, "grad_norm": 1.036215731170446, "learning_rate": 3.6024171144610654e-07, "loss": 0.2885, "step": 8002 }, { "epoch": 2.67212020033389, "grad_norm": 1.0517523912370303, "learning_rate": 3.595180489123778e-07, "loss": 0.2909, "step": 8003 }, { "epoch": 2.6724540901502505, "grad_norm": 1.0586473506065959, "learning_rate": 3.58795086858027e-07, "loss": 0.2869, "step": 8004 }, { "epoch": 2.672787979966611, "grad_norm": 1.0597071624993286, "learning_rate": 3.580728253921839e-07, "loss": 0.3013, "step": 8005 }, { "epoch": 2.6731218697829715, "grad_norm": 1.0473424109390475, "learning_rate": 3.573512646238747e-07, "loss": 0.286, "step": 8006 }, { "epoch": 2.6734557595993325, "grad_norm": 1.0657395568890566, "learning_rate": 3.5663040466201784e-07, "loss": 0.3057, "step": 8007 }, { "epoch": 2.673789649415693, "grad_norm": 1.086610358571906, "learning_rate": 3.559102456154262e-07, "loss": 0.3042, "step": 8008 }, { "epoch": 2.6741235392320535, "grad_norm": 0.970033456941136, "learning_rate": 3.5519078759281013e-07, "loss": 0.2676, "step": 8009 }, { "epoch": 2.674457429048414, "grad_norm": 1.0587971056907886, "learning_rate": 3.544720307027699e-07, "loss": 0.2918, "step": 8010 }, { "epoch": 2.6747913188647745, "grad_norm": 1.0155079658442625, "learning_rate": 3.537539750538016e-07, "loss": 0.2768, "step": 8011 }, { "epoch": 2.675125208681135, "grad_norm": 1.0372058244609126, "learning_rate": 3.5303662075429455e-07, "loss": 0.2903, "step": 8012 }, { "epoch": 2.675459098497496, "grad_norm": 1.0688247990552768, "learning_rate": 3.523199679125361e-07, "loss": 0.2927, "step": 8013 }, { "epoch": 2.6757929883138565, "grad_norm": 1.0624460360504135, "learning_rate": 3.51604016636703e-07, "loss": 0.2962, "step": 8014 }, { "epoch": 2.676126878130217, "grad_norm": 1.037634676628197, "learning_rate": 3.5088876703486765e-07, "loss": 0.2919, "step": 8015 }, { "epoch": 2.6764607679465775, "grad_norm": 1.0689489749712644, "learning_rate": 3.5017421921499594e-07, "loss": 0.2946, "step": 8016 }, { "epoch": 2.6767946577629385, "grad_norm": 1.0588247136692246, "learning_rate": 3.494603732849511e-07, "loss": 0.3093, "step": 8017 }, { "epoch": 2.677128547579299, "grad_norm": 1.0043475649931508, "learning_rate": 3.487472293524863e-07, "loss": 0.2915, "step": 8018 }, { "epoch": 2.6774624373956595, "grad_norm": 1.0335746095902765, "learning_rate": 3.48034787525251e-07, "loss": 0.2793, "step": 8019 }, { "epoch": 2.67779632721202, "grad_norm": 1.0481210937273844, "learning_rate": 3.4732304791078753e-07, "loss": 0.2825, "step": 8020 }, { "epoch": 2.6781302170283805, "grad_norm": 1.01638177017067, "learning_rate": 3.4661201061653325e-07, "loss": 0.2827, "step": 8021 }, { "epoch": 2.678464106844741, "grad_norm": 1.060493303309888, "learning_rate": 3.45901675749819e-07, "loss": 0.2966, "step": 8022 }, { "epoch": 2.678797996661102, "grad_norm": 0.9876229283470654, "learning_rate": 3.451920434178691e-07, "loss": 0.2755, "step": 8023 }, { "epoch": 2.6791318864774625, "grad_norm": 1.0292772722344081, "learning_rate": 3.444831137278032e-07, "loss": 0.2899, "step": 8024 }, { "epoch": 2.679465776293823, "grad_norm": 0.9925388894988951, "learning_rate": 3.4377488678663304e-07, "loss": 0.281, "step": 8025 }, { "epoch": 2.6797996661101835, "grad_norm": 1.0101200844934357, "learning_rate": 3.4306736270126596e-07, "loss": 0.2864, "step": 8026 }, { "epoch": 2.6801335559265445, "grad_norm": 1.0240208087722409, "learning_rate": 3.423605415785025e-07, "loss": 0.2945, "step": 8027 }, { "epoch": 2.680467445742905, "grad_norm": 1.0570218291356246, "learning_rate": 3.416544235250363e-07, "loss": 0.3003, "step": 8028 }, { "epoch": 2.6808013355592655, "grad_norm": 0.9983058638563429, "learning_rate": 3.4094900864745763e-07, "loss": 0.2895, "step": 8029 }, { "epoch": 2.681135225375626, "grad_norm": 1.058512776983295, "learning_rate": 3.4024429705224627e-07, "loss": 0.2901, "step": 8030 }, { "epoch": 2.6814691151919865, "grad_norm": 1.0720774376892583, "learning_rate": 3.395402888457794e-07, "loss": 0.3074, "step": 8031 }, { "epoch": 2.681803005008347, "grad_norm": 1.0382127902586162, "learning_rate": 3.38836984134327e-07, "loss": 0.2975, "step": 8032 }, { "epoch": 2.682136894824708, "grad_norm": 1.0283356607117247, "learning_rate": 3.3813438302405133e-07, "loss": 0.2821, "step": 8033 }, { "epoch": 2.6824707846410685, "grad_norm": 1.0425865928177667, "learning_rate": 3.3743248562101204e-07, "loss": 0.2856, "step": 8034 }, { "epoch": 2.682804674457429, "grad_norm": 1.0685269621627782, "learning_rate": 3.3673129203115996e-07, "loss": 0.3052, "step": 8035 }, { "epoch": 2.6831385642737895, "grad_norm": 1.1169938385954974, "learning_rate": 3.360308023603387e-07, "loss": 0.2987, "step": 8036 }, { "epoch": 2.6834724540901504, "grad_norm": 1.0332407781940365, "learning_rate": 3.353310167142865e-07, "loss": 0.2989, "step": 8037 }, { "epoch": 2.683806343906511, "grad_norm": 0.9865656963166956, "learning_rate": 3.346319351986377e-07, "loss": 0.2848, "step": 8038 }, { "epoch": 2.6841402337228715, "grad_norm": 1.044525656325323, "learning_rate": 3.3393355791891736e-07, "loss": 0.294, "step": 8039 }, { "epoch": 2.684474123539232, "grad_norm": 1.0586608006981093, "learning_rate": 3.332358849805467e-07, "loss": 0.3074, "step": 8040 }, { "epoch": 2.6848080133555925, "grad_norm": 1.0234647163043529, "learning_rate": 3.325389164888365e-07, "loss": 0.2796, "step": 8041 }, { "epoch": 2.685141903171953, "grad_norm": 1.0339385496936666, "learning_rate": 3.318426525489965e-07, "loss": 0.2838, "step": 8042 }, { "epoch": 2.685475792988314, "grad_norm": 1.0523265117828173, "learning_rate": 3.3114709326612705e-07, "loss": 0.2836, "step": 8043 }, { "epoch": 2.6858096828046745, "grad_norm": 1.0302756245327171, "learning_rate": 3.3045223874522135e-07, "loss": 0.2892, "step": 8044 }, { "epoch": 2.686143572621035, "grad_norm": 1.0871608905580736, "learning_rate": 3.297580890911689e-07, "loss": 0.2994, "step": 8045 }, { "epoch": 2.686477462437396, "grad_norm": 1.09673199940856, "learning_rate": 3.290646444087503e-07, "loss": 0.3001, "step": 8046 }, { "epoch": 2.6868113522537564, "grad_norm": 1.05432702991934, "learning_rate": 3.283719048026418e-07, "loss": 0.2877, "step": 8047 }, { "epoch": 2.687145242070117, "grad_norm": 1.0318379999405356, "learning_rate": 3.2767987037741145e-07, "loss": 0.2967, "step": 8048 }, { "epoch": 2.6874791318864775, "grad_norm": 1.0238399181774334, "learning_rate": 3.269885412375223e-07, "loss": 0.2858, "step": 8049 }, { "epoch": 2.687813021702838, "grad_norm": 1.0773039527559436, "learning_rate": 3.262979174873299e-07, "loss": 0.3036, "step": 8050 }, { "epoch": 2.6881469115191985, "grad_norm": 1.0274485647157792, "learning_rate": 3.2560799923108346e-07, "loss": 0.2941, "step": 8051 }, { "epoch": 2.688480801335559, "grad_norm": 1.0575716389755627, "learning_rate": 3.2491878657292643e-07, "loss": 0.2959, "step": 8052 }, { "epoch": 2.68881469115192, "grad_norm": 1.024953546758388, "learning_rate": 3.2423027961689393e-07, "loss": 0.281, "step": 8053 }, { "epoch": 2.6891485809682805, "grad_norm": 1.0690224366213195, "learning_rate": 3.235424784669183e-07, "loss": 0.2908, "step": 8054 }, { "epoch": 2.689482470784641, "grad_norm": 1.0557465356949023, "learning_rate": 3.228553832268222e-07, "loss": 0.2989, "step": 8055 }, { "epoch": 2.689816360601002, "grad_norm": 1.030403302992674, "learning_rate": 3.2216899400032085e-07, "loss": 0.2808, "step": 8056 }, { "epoch": 2.6901502504173624, "grad_norm": 1.0541340450060412, "learning_rate": 3.214833108910248e-07, "loss": 0.2903, "step": 8057 }, { "epoch": 2.690484140233723, "grad_norm": 0.9836273333514779, "learning_rate": 3.207983340024395e-07, "loss": 0.2769, "step": 8058 }, { "epoch": 2.6908180300500835, "grad_norm": 1.0627808515201151, "learning_rate": 3.201140634379607e-07, "loss": 0.2948, "step": 8059 }, { "epoch": 2.691151919866444, "grad_norm": 1.01856957037347, "learning_rate": 3.194304993008801e-07, "loss": 0.2893, "step": 8060 }, { "epoch": 2.6914858096828045, "grad_norm": 1.026430325934716, "learning_rate": 3.187476416943791e-07, "loss": 0.2842, "step": 8061 }, { "epoch": 2.6918196994991654, "grad_norm": 1.069571772621405, "learning_rate": 3.180654907215364e-07, "loss": 0.2893, "step": 8062 }, { "epoch": 2.692153589315526, "grad_norm": 1.0162783058848248, "learning_rate": 3.1738404648532226e-07, "loss": 0.2798, "step": 8063 }, { "epoch": 2.6924874791318865, "grad_norm": 1.0269916940457735, "learning_rate": 3.167033090886007e-07, "loss": 0.2831, "step": 8064 }, { "epoch": 2.692821368948247, "grad_norm": 1.051353972786801, "learning_rate": 3.1602327863412886e-07, "loss": 0.2961, "step": 8065 }, { "epoch": 2.693155258764608, "grad_norm": 1.0545127348045906, "learning_rate": 3.153439552245574e-07, "loss": 0.2895, "step": 8066 }, { "epoch": 2.6934891485809684, "grad_norm": 1.066077653476318, "learning_rate": 3.1466533896242877e-07, "loss": 0.2938, "step": 8067 }, { "epoch": 2.693823038397329, "grad_norm": 1.055140195946302, "learning_rate": 3.1398742995018103e-07, "loss": 0.2882, "step": 8068 }, { "epoch": 2.6941569282136895, "grad_norm": 1.0450647460813305, "learning_rate": 3.13310228290144e-07, "loss": 0.2911, "step": 8069 }, { "epoch": 2.69449081803005, "grad_norm": 1.0342794941921976, "learning_rate": 3.126337340845409e-07, "loss": 0.2845, "step": 8070 }, { "epoch": 2.6948247078464105, "grad_norm": 1.0530143928821105, "learning_rate": 3.1195794743548893e-07, "loss": 0.2854, "step": 8071 }, { "epoch": 2.6951585976627714, "grad_norm": 1.0061723278241896, "learning_rate": 3.11282868444997e-07, "loss": 0.2714, "step": 8072 }, { "epoch": 2.695492487479132, "grad_norm": 1.074631261470898, "learning_rate": 3.1060849721496865e-07, "loss": 0.3038, "step": 8073 }, { "epoch": 2.6958263772954925, "grad_norm": 1.0265630475755283, "learning_rate": 3.0993483384719916e-07, "loss": 0.2804, "step": 8074 }, { "epoch": 2.696160267111853, "grad_norm": 1.0615252140981768, "learning_rate": 3.0926187844337986e-07, "loss": 0.2856, "step": 8075 }, { "epoch": 2.696494156928214, "grad_norm": 1.0445824953942562, "learning_rate": 3.085896311050912e-07, "loss": 0.2759, "step": 8076 }, { "epoch": 2.6968280467445744, "grad_norm": 1.055934338903995, "learning_rate": 3.079180919338093e-07, "loss": 0.293, "step": 8077 }, { "epoch": 2.697161936560935, "grad_norm": 1.0573426550763776, "learning_rate": 3.0724726103090243e-07, "loss": 0.29, "step": 8078 }, { "epoch": 2.6974958263772955, "grad_norm": 1.0556473188131292, "learning_rate": 3.0657713849763295e-07, "loss": 0.2881, "step": 8079 }, { "epoch": 2.697829716193656, "grad_norm": 1.0261212892857303, "learning_rate": 3.0590772443515605e-07, "loss": 0.2867, "step": 8080 }, { "epoch": 2.6981636060100165, "grad_norm": 1.0231575388335663, "learning_rate": 3.052390189445192e-07, "loss": 0.2853, "step": 8081 }, { "epoch": 2.6984974958263774, "grad_norm": 1.052562119480772, "learning_rate": 3.045710221266623e-07, "loss": 0.2864, "step": 8082 }, { "epoch": 2.698831385642738, "grad_norm": 1.0239263557467388, "learning_rate": 3.0390373408242023e-07, "loss": 0.2843, "step": 8083 }, { "epoch": 2.6991652754590985, "grad_norm": 1.0320649698081323, "learning_rate": 3.0323715491252014e-07, "loss": 0.2798, "step": 8084 }, { "epoch": 2.699499165275459, "grad_norm": 1.0706228986519715, "learning_rate": 3.02571284717581e-07, "loss": 0.2969, "step": 8085 }, { "epoch": 2.69983305509182, "grad_norm": 1.0307911723476433, "learning_rate": 3.019061235981169e-07, "loss": 0.2886, "step": 8086 }, { "epoch": 2.7001669449081804, "grad_norm": 1.0351692560975325, "learning_rate": 3.01241671654533e-07, "loss": 0.2899, "step": 8087 }, { "epoch": 2.700500834724541, "grad_norm": 1.0472841544075906, "learning_rate": 3.00577928987128e-07, "loss": 0.2966, "step": 8088 }, { "epoch": 2.7008347245409015, "grad_norm": 1.055242791091283, "learning_rate": 2.99914895696094e-07, "loss": 0.2962, "step": 8089 }, { "epoch": 2.701168614357262, "grad_norm": 1.1194637224157913, "learning_rate": 2.9925257188151533e-07, "loss": 0.2997, "step": 8090 }, { "epoch": 2.7015025041736225, "grad_norm": 1.0716150578047965, "learning_rate": 2.9859095764337045e-07, "loss": 0.2869, "step": 8091 }, { "epoch": 2.7018363939899834, "grad_norm": 1.0240409132814676, "learning_rate": 2.979300530815288e-07, "loss": 0.274, "step": 8092 }, { "epoch": 2.702170283806344, "grad_norm": 1.0484290269246161, "learning_rate": 2.9726985829575397e-07, "loss": 0.2902, "step": 8093 }, { "epoch": 2.7025041736227045, "grad_norm": 1.006774271563574, "learning_rate": 2.9661037338570284e-07, "loss": 0.2896, "step": 8094 }, { "epoch": 2.702838063439065, "grad_norm": 1.0202850251975117, "learning_rate": 2.9595159845092257e-07, "loss": 0.289, "step": 8095 }, { "epoch": 2.703171953255426, "grad_norm": 1.0552732325481937, "learning_rate": 2.952935335908586e-07, "loss": 0.2864, "step": 8096 }, { "epoch": 2.7035058430717864, "grad_norm": 1.070847708298994, "learning_rate": 2.9463617890484184e-07, "loss": 0.2984, "step": 8097 }, { "epoch": 2.703839732888147, "grad_norm": 1.0319664452608788, "learning_rate": 2.9397953449210205e-07, "loss": 0.2979, "step": 8098 }, { "epoch": 2.7041736227045075, "grad_norm": 1.0343913490068466, "learning_rate": 2.933236004517581e-07, "loss": 0.2948, "step": 8099 }, { "epoch": 2.704507512520868, "grad_norm": 1.0760343102894656, "learning_rate": 2.9266837688282424e-07, "loss": 0.3056, "step": 8100 }, { "epoch": 2.7048414023372285, "grad_norm": 1.0451895909974813, "learning_rate": 2.920138638842068e-07, "loss": 0.2911, "step": 8101 }, { "epoch": 2.7051752921535894, "grad_norm": 1.0199989322771301, "learning_rate": 2.9136006155470244e-07, "loss": 0.2846, "step": 8102 }, { "epoch": 2.70550918196995, "grad_norm": 1.086694406049633, "learning_rate": 2.907069699930026e-07, "loss": 0.3056, "step": 8103 }, { "epoch": 2.7058430717863104, "grad_norm": 1.045734909373347, "learning_rate": 2.900545892976925e-07, "loss": 0.2857, "step": 8104 }, { "epoch": 2.706176961602671, "grad_norm": 1.008394063831006, "learning_rate": 2.8940291956724874e-07, "loss": 0.2691, "step": 8105 }, { "epoch": 2.706510851419032, "grad_norm": 1.0614693895771643, "learning_rate": 2.887519609000411e-07, "loss": 0.2972, "step": 8106 }, { "epoch": 2.7068447412353924, "grad_norm": 1.118019932237365, "learning_rate": 2.881017133943287e-07, "loss": 0.305, "step": 8107 }, { "epoch": 2.707178631051753, "grad_norm": 0.9979233412979195, "learning_rate": 2.874521771482691e-07, "loss": 0.2768, "step": 8108 }, { "epoch": 2.7075125208681134, "grad_norm": 1.0549329215869405, "learning_rate": 2.8680335225990897e-07, "loss": 0.2979, "step": 8109 }, { "epoch": 2.707846410684474, "grad_norm": 1.0313223983985913, "learning_rate": 2.861552388271882e-07, "loss": 0.2903, "step": 8110 }, { "epoch": 2.7081803005008345, "grad_norm": 0.9860457829548057, "learning_rate": 2.855078369479386e-07, "loss": 0.2717, "step": 8111 }, { "epoch": 2.7085141903171954, "grad_norm": 1.0533215841501011, "learning_rate": 2.848611467198864e-07, "loss": 0.289, "step": 8112 }, { "epoch": 2.708848080133556, "grad_norm": 1.0724839937793609, "learning_rate": 2.8421516824064855e-07, "loss": 0.2997, "step": 8113 }, { "epoch": 2.7091819699499164, "grad_norm": 0.995490167828089, "learning_rate": 2.8356990160773534e-07, "loss": 0.2789, "step": 8114 }, { "epoch": 2.7095158597662774, "grad_norm": 1.0197557945197127, "learning_rate": 2.829253469185489e-07, "loss": 0.2746, "step": 8115 }, { "epoch": 2.709849749582638, "grad_norm": 1.0422145855288767, "learning_rate": 2.822815042703869e-07, "loss": 0.29, "step": 8116 }, { "epoch": 2.7101836393989984, "grad_norm": 1.0579695666098787, "learning_rate": 2.81638373760435e-07, "loss": 0.2967, "step": 8117 }, { "epoch": 2.710517529215359, "grad_norm": 1.0340014899204821, "learning_rate": 2.8099595548577443e-07, "loss": 0.2919, "step": 8118 }, { "epoch": 2.7108514190317194, "grad_norm": 1.029555889389638, "learning_rate": 2.80354249543377e-07, "loss": 0.2896, "step": 8119 }, { "epoch": 2.71118530884808, "grad_norm": 1.0349603161011007, "learning_rate": 2.797132560301097e-07, "loss": 0.283, "step": 8120 }, { "epoch": 2.7115191986644405, "grad_norm": 1.039960086414792, "learning_rate": 2.7907297504273013e-07, "loss": 0.287, "step": 8121 }, { "epoch": 2.7118530884808014, "grad_norm": 1.048785413297547, "learning_rate": 2.784334066778871e-07, "loss": 0.2842, "step": 8122 }, { "epoch": 2.712186978297162, "grad_norm": 1.0220236172331412, "learning_rate": 2.7779455103212393e-07, "loss": 0.2806, "step": 8123 }, { "epoch": 2.7125208681135224, "grad_norm": 1.025281212620819, "learning_rate": 2.771564082018752e-07, "loss": 0.2709, "step": 8124 }, { "epoch": 2.7128547579298834, "grad_norm": 1.0572797447338922, "learning_rate": 2.7651897828346984e-07, "loss": 0.2995, "step": 8125 }, { "epoch": 2.713188647746244, "grad_norm": 1.0411806642027572, "learning_rate": 2.758822613731271e-07, "loss": 0.292, "step": 8126 }, { "epoch": 2.7135225375626044, "grad_norm": 1.065153572209205, "learning_rate": 2.7524625756695955e-07, "loss": 0.2891, "step": 8127 }, { "epoch": 2.713856427378965, "grad_norm": 1.0071416422191732, "learning_rate": 2.746109669609692e-07, "loss": 0.2787, "step": 8128 }, { "epoch": 2.7141903171953254, "grad_norm": 1.0719803486354262, "learning_rate": 2.739763896510561e-07, "loss": 0.298, "step": 8129 }, { "epoch": 2.714524207011686, "grad_norm": 1.0967647915518774, "learning_rate": 2.733425257330086e-07, "loss": 0.2902, "step": 8130 }, { "epoch": 2.714858096828047, "grad_norm": 1.0379331274381496, "learning_rate": 2.7270937530250783e-07, "loss": 0.2818, "step": 8131 }, { "epoch": 2.7151919866444074, "grad_norm": 1.0108214182362203, "learning_rate": 2.7207693845512806e-07, "loss": 0.2751, "step": 8132 }, { "epoch": 2.715525876460768, "grad_norm": 1.1034050774184814, "learning_rate": 2.7144521528633506e-07, "loss": 0.3031, "step": 8133 }, { "epoch": 2.7158597662771284, "grad_norm": 1.0326418881196973, "learning_rate": 2.7081420589148755e-07, "loss": 0.2818, "step": 8134 }, { "epoch": 2.7161936560934894, "grad_norm": 1.0375291148491832, "learning_rate": 2.70183910365836e-07, "loss": 0.2862, "step": 8135 }, { "epoch": 2.71652754590985, "grad_norm": 1.052488400794386, "learning_rate": 2.695543288045238e-07, "loss": 0.2923, "step": 8136 }, { "epoch": 2.7168614357262104, "grad_norm": 1.0541784798254252, "learning_rate": 2.6892546130258543e-07, "loss": 0.3018, "step": 8137 }, { "epoch": 2.717195325542571, "grad_norm": 1.04290852017828, "learning_rate": 2.6829730795494893e-07, "loss": 0.2909, "step": 8138 }, { "epoch": 2.7175292153589314, "grad_norm": 1.0427141619349498, "learning_rate": 2.6766986885643333e-07, "loss": 0.2928, "step": 8139 }, { "epoch": 2.717863105175292, "grad_norm": 1.056979731926259, "learning_rate": 2.670431441017496e-07, "loss": 0.2974, "step": 8140 }, { "epoch": 2.718196994991653, "grad_norm": 1.0434608412561324, "learning_rate": 2.6641713378550373e-07, "loss": 0.2904, "step": 8141 }, { "epoch": 2.7185308848080134, "grad_norm": 1.0626823619802481, "learning_rate": 2.657918380021912e-07, "loss": 0.2913, "step": 8142 }, { "epoch": 2.718864774624374, "grad_norm": 1.0628796859967942, "learning_rate": 2.651672568461988e-07, "loss": 0.3029, "step": 8143 }, { "epoch": 2.7191986644407344, "grad_norm": 1.0074718920540349, "learning_rate": 2.645433904118072e-07, "loss": 0.2888, "step": 8144 }, { "epoch": 2.7195325542570954, "grad_norm": 1.0273936913420194, "learning_rate": 2.6392023879318993e-07, "loss": 0.2831, "step": 8145 }, { "epoch": 2.719866444073456, "grad_norm": 1.0040086935890131, "learning_rate": 2.632978020844118e-07, "loss": 0.2774, "step": 8146 }, { "epoch": 2.7202003338898164, "grad_norm": 1.0346249477368885, "learning_rate": 2.626760803794287e-07, "loss": 0.2965, "step": 8147 }, { "epoch": 2.720534223706177, "grad_norm": 1.0203095296408928, "learning_rate": 2.620550737720878e-07, "loss": 0.2774, "step": 8148 }, { "epoch": 2.7208681135225374, "grad_norm": 1.0671385461216285, "learning_rate": 2.6143478235613253e-07, "loss": 0.2916, "step": 8149 }, { "epoch": 2.721202003338898, "grad_norm": 1.049156786244822, "learning_rate": 2.608152062251945e-07, "loss": 0.2831, "step": 8150 }, { "epoch": 2.721535893155259, "grad_norm": 1.0586039827104985, "learning_rate": 2.601963454727985e-07, "loss": 0.2933, "step": 8151 }, { "epoch": 2.7218697829716194, "grad_norm": 1.0376865054814386, "learning_rate": 2.595782001923625e-07, "loss": 0.2933, "step": 8152 }, { "epoch": 2.72220367278798, "grad_norm": 1.0418416026895931, "learning_rate": 2.589607704771924e-07, "loss": 0.2881, "step": 8153 }, { "epoch": 2.7225375626043404, "grad_norm": 1.0544556639510505, "learning_rate": 2.58344056420492e-07, "loss": 0.2939, "step": 8154 }, { "epoch": 2.7228714524207014, "grad_norm": 1.0678547951943855, "learning_rate": 2.5772805811535293e-07, "loss": 0.2937, "step": 8155 }, { "epoch": 2.723205342237062, "grad_norm": 1.0476712795815226, "learning_rate": 2.5711277565475976e-07, "loss": 0.2952, "step": 8156 }, { "epoch": 2.7235392320534224, "grad_norm": 1.0528089278684465, "learning_rate": 2.5649820913158987e-07, "loss": 0.2888, "step": 8157 }, { "epoch": 2.723873121869783, "grad_norm": 1.0649360740311002, "learning_rate": 2.558843586386117e-07, "loss": 0.2928, "step": 8158 }, { "epoch": 2.7242070116861434, "grad_norm": 1.0627535903872332, "learning_rate": 2.5527122426848517e-07, "loss": 0.2949, "step": 8159 }, { "epoch": 2.724540901502504, "grad_norm": 1.0653542807535483, "learning_rate": 2.546588061137639e-07, "loss": 0.2937, "step": 8160 }, { "epoch": 2.724874791318865, "grad_norm": 1.0272929543880407, "learning_rate": 2.540471042668902e-07, "loss": 0.2991, "step": 8161 }, { "epoch": 2.7252086811352254, "grad_norm": 1.0470314968008796, "learning_rate": 2.5343611882020336e-07, "loss": 0.2917, "step": 8162 }, { "epoch": 2.725542570951586, "grad_norm": 1.0226892140221555, "learning_rate": 2.528258498659286e-07, "loss": 0.2788, "step": 8163 }, { "epoch": 2.7258764607679464, "grad_norm": 1.0680646435047922, "learning_rate": 2.522162974961873e-07, "loss": 0.2918, "step": 8164 }, { "epoch": 2.7262103505843074, "grad_norm": 1.0233517921872464, "learning_rate": 2.516074618029901e-07, "loss": 0.2778, "step": 8165 }, { "epoch": 2.726544240400668, "grad_norm": 1.025565743525879, "learning_rate": 2.50999342878242e-07, "loss": 0.2824, "step": 8166 }, { "epoch": 2.7268781302170284, "grad_norm": 1.0518401071460797, "learning_rate": 2.503919408137384e-07, "loss": 0.2942, "step": 8167 }, { "epoch": 2.727212020033389, "grad_norm": 1.0745896831253143, "learning_rate": 2.497852557011654e-07, "loss": 0.294, "step": 8168 }, { "epoch": 2.7275459098497494, "grad_norm": 1.0388985287576173, "learning_rate": 2.4917928763210084e-07, "loss": 0.2882, "step": 8169 }, { "epoch": 2.72787979966611, "grad_norm": 1.0726739852772116, "learning_rate": 2.485740366980183e-07, "loss": 0.3005, "step": 8170 }, { "epoch": 2.728213689482471, "grad_norm": 1.0127850589114946, "learning_rate": 2.4796950299027846e-07, "loss": 0.2787, "step": 8171 }, { "epoch": 2.7285475792988314, "grad_norm": 1.0284705772694498, "learning_rate": 2.473656866001356e-07, "loss": 0.2838, "step": 8172 }, { "epoch": 2.728881469115192, "grad_norm": 1.0525216328787776, "learning_rate": 2.4676258761873627e-07, "loss": 0.2803, "step": 8173 }, { "epoch": 2.7292153589315524, "grad_norm": 1.0512760741690437, "learning_rate": 2.461602061371177e-07, "loss": 0.2913, "step": 8174 }, { "epoch": 2.7295492487479134, "grad_norm": 1.0641895404986708, "learning_rate": 2.455585422462092e-07, "loss": 0.3113, "step": 8175 }, { "epoch": 2.729883138564274, "grad_norm": 1.0536458313328991, "learning_rate": 2.4495759603683165e-07, "loss": 0.2958, "step": 8176 }, { "epoch": 2.7302170283806344, "grad_norm": 1.036935230257796, "learning_rate": 2.443573675996974e-07, "loss": 0.2915, "step": 8177 }, { "epoch": 2.730550918196995, "grad_norm": 1.082473037780468, "learning_rate": 2.4375785702541174e-07, "loss": 0.3083, "step": 8178 }, { "epoch": 2.7308848080133554, "grad_norm": 1.0404557767264282, "learning_rate": 2.4315906440446957e-07, "loss": 0.2942, "step": 8179 }, { "epoch": 2.731218697829716, "grad_norm": 1.091500568122865, "learning_rate": 2.425609898272591e-07, "loss": 0.2894, "step": 8180 }, { "epoch": 2.731552587646077, "grad_norm": 1.0894156797194587, "learning_rate": 2.419636333840586e-07, "loss": 0.2945, "step": 8181 }, { "epoch": 2.7318864774624374, "grad_norm": 1.0659065837156536, "learning_rate": 2.4136699516504104e-07, "loss": 0.2932, "step": 8182 }, { "epoch": 2.732220367278798, "grad_norm": 1.074731489310348, "learning_rate": 2.4077107526026653e-07, "loss": 0.2945, "step": 8183 }, { "epoch": 2.732554257095159, "grad_norm": 1.0691072570403217, "learning_rate": 2.4017587375968986e-07, "loss": 0.299, "step": 8184 }, { "epoch": 2.7328881469115194, "grad_norm": 1.049341370373078, "learning_rate": 2.3958139075315633e-07, "loss": 0.2864, "step": 8185 }, { "epoch": 2.73322203672788, "grad_norm": 0.9903655337745176, "learning_rate": 2.3898762633040253e-07, "loss": 0.272, "step": 8186 }, { "epoch": 2.7335559265442404, "grad_norm": 1.083643836842429, "learning_rate": 2.3839458058105835e-07, "loss": 0.301, "step": 8187 }, { "epoch": 2.733889816360601, "grad_norm": 1.0434773652569262, "learning_rate": 2.3780225359464393e-07, "loss": 0.3012, "step": 8188 }, { "epoch": 2.7342237061769614, "grad_norm": 1.098665666604828, "learning_rate": 2.3721064546056882e-07, "loss": 0.2997, "step": 8189 }, { "epoch": 2.734557595993322, "grad_norm": 1.0449163995585256, "learning_rate": 2.366197562681366e-07, "loss": 0.2814, "step": 8190 }, { "epoch": 2.734891485809683, "grad_norm": 1.0005857759621821, "learning_rate": 2.360295861065437e-07, "loss": 0.273, "step": 8191 }, { "epoch": 2.7352253756260434, "grad_norm": 1.070697180776863, "learning_rate": 2.3544013506487495e-07, "loss": 0.2935, "step": 8192 }, { "epoch": 2.735559265442404, "grad_norm": 1.0390831850067037, "learning_rate": 2.348514032321081e-07, "loss": 0.2896, "step": 8193 }, { "epoch": 2.735893155258765, "grad_norm": 1.0663900790175243, "learning_rate": 2.3426339069711034e-07, "loss": 0.3057, "step": 8194 }, { "epoch": 2.7362270450751254, "grad_norm": 1.0745963602797413, "learning_rate": 2.3367609754864462e-07, "loss": 0.2862, "step": 8195 }, { "epoch": 2.736560934891486, "grad_norm": 1.0741595602575038, "learning_rate": 2.330895238753611e-07, "loss": 0.3004, "step": 8196 }, { "epoch": 2.7368948247078464, "grad_norm": 1.0968932767999156, "learning_rate": 2.325036697658034e-07, "loss": 0.3117, "step": 8197 }, { "epoch": 2.737228714524207, "grad_norm": 1.03422753802392, "learning_rate": 2.319185353084058e-07, "loss": 0.2834, "step": 8198 }, { "epoch": 2.7375626043405674, "grad_norm": 1.0881011321731628, "learning_rate": 2.313341205914943e-07, "loss": 0.302, "step": 8199 }, { "epoch": 2.7378964941569284, "grad_norm": 1.0587607741845289, "learning_rate": 2.3075042570328553e-07, "loss": 0.2842, "step": 8200 }, { "epoch": 2.738230383973289, "grad_norm": 1.0300118776302598, "learning_rate": 2.3016745073188905e-07, "loss": 0.2847, "step": 8201 }, { "epoch": 2.7385642737896494, "grad_norm": 1.0579021831746407, "learning_rate": 2.2958519576530448e-07, "loss": 0.2922, "step": 8202 }, { "epoch": 2.73889816360601, "grad_norm": 1.0343990097180515, "learning_rate": 2.2900366089142257e-07, "loss": 0.2865, "step": 8203 }, { "epoch": 2.739232053422371, "grad_norm": 1.0428414298002937, "learning_rate": 2.2842284619802646e-07, "loss": 0.2851, "step": 8204 }, { "epoch": 2.7395659432387314, "grad_norm": 1.0596076049328134, "learning_rate": 2.2784275177278935e-07, "loss": 0.3001, "step": 8205 }, { "epoch": 2.739899833055092, "grad_norm": 1.067201489758032, "learning_rate": 2.2726337770327622e-07, "loss": 0.2912, "step": 8206 }, { "epoch": 2.7402337228714524, "grad_norm": 1.0586629985585854, "learning_rate": 2.2668472407694431e-07, "loss": 0.284, "step": 8207 }, { "epoch": 2.740567612687813, "grad_norm": 0.9640214437436981, "learning_rate": 2.261067909811415e-07, "loss": 0.2687, "step": 8208 }, { "epoch": 2.7409015025041734, "grad_norm": 1.0442344310998404, "learning_rate": 2.2552957850310532e-07, "loss": 0.2901, "step": 8209 }, { "epoch": 2.7412353923205344, "grad_norm": 1.0540902269755534, "learning_rate": 2.249530867299654e-07, "loss": 0.2983, "step": 8210 }, { "epoch": 2.741569282136895, "grad_norm": 1.0353819306037209, "learning_rate": 2.2437731574874444e-07, "loss": 0.294, "step": 8211 }, { "epoch": 2.7419031719532554, "grad_norm": 1.011600278948207, "learning_rate": 2.2380226564635455e-07, "loss": 0.2888, "step": 8212 }, { "epoch": 2.742237061769616, "grad_norm": 1.0630540011946312, "learning_rate": 2.232279365095996e-07, "loss": 0.3049, "step": 8213 }, { "epoch": 2.742570951585977, "grad_norm": 1.0348975441082786, "learning_rate": 2.2265432842517364e-07, "loss": 0.2913, "step": 8214 }, { "epoch": 2.7429048414023374, "grad_norm": 1.0181243116433234, "learning_rate": 2.2208144147966237e-07, "loss": 0.2775, "step": 8215 }, { "epoch": 2.743238731218698, "grad_norm": 1.0372883432170865, "learning_rate": 2.215092757595444e-07, "loss": 0.2927, "step": 8216 }, { "epoch": 2.7435726210350584, "grad_norm": 1.011075696489029, "learning_rate": 2.2093783135118673e-07, "loss": 0.278, "step": 8217 }, { "epoch": 2.743906510851419, "grad_norm": 1.0614776263408874, "learning_rate": 2.2036710834084984e-07, "loss": 0.2881, "step": 8218 }, { "epoch": 2.7442404006677794, "grad_norm": 1.0614842104359186, "learning_rate": 2.1979710681468257e-07, "loss": 0.2946, "step": 8219 }, { "epoch": 2.7445742904841404, "grad_norm": 1.0696884514033607, "learning_rate": 2.1922782685872779e-07, "loss": 0.2982, "step": 8220 }, { "epoch": 2.744908180300501, "grad_norm": 1.0094250306995518, "learning_rate": 2.1865926855891783e-07, "loss": 0.2891, "step": 8221 }, { "epoch": 2.7452420701168614, "grad_norm": 1.0661568095047693, "learning_rate": 2.180914320010763e-07, "loss": 0.2989, "step": 8222 }, { "epoch": 2.745575959933222, "grad_norm": 1.0447274223753915, "learning_rate": 2.1752431727091795e-07, "loss": 0.2921, "step": 8223 }, { "epoch": 2.745909849749583, "grad_norm": 1.0509781246845604, "learning_rate": 2.1695792445404872e-07, "loss": 0.3004, "step": 8224 }, { "epoch": 2.7462437395659434, "grad_norm": 1.0420816679784684, "learning_rate": 2.163922536359647e-07, "loss": 0.2868, "step": 8225 }, { "epoch": 2.746577629382304, "grad_norm": 1.0559550183501738, "learning_rate": 2.1582730490205484e-07, "loss": 0.2912, "step": 8226 }, { "epoch": 2.7469115191986644, "grad_norm": 1.0466697857148046, "learning_rate": 2.1526307833759698e-07, "loss": 0.2936, "step": 8227 }, { "epoch": 2.747245409015025, "grad_norm": 1.045878934950752, "learning_rate": 2.1469957402776243e-07, "loss": 0.2904, "step": 8228 }, { "epoch": 2.7475792988313854, "grad_norm": 1.0658389391787904, "learning_rate": 2.1413679205761097e-07, "loss": 0.293, "step": 8229 }, { "epoch": 2.7479131886477464, "grad_norm": 1.021793362487575, "learning_rate": 2.1357473251209403e-07, "loss": 0.2822, "step": 8230 }, { "epoch": 2.748247078464107, "grad_norm": 1.0766014585167594, "learning_rate": 2.1301339547605383e-07, "loss": 0.3011, "step": 8231 }, { "epoch": 2.7485809682804674, "grad_norm": 1.0114937484624444, "learning_rate": 2.1245278103422585e-07, "loss": 0.284, "step": 8232 }, { "epoch": 2.748914858096828, "grad_norm": 1.0269682763325365, "learning_rate": 2.1189288927123408e-07, "loss": 0.2873, "step": 8233 }, { "epoch": 2.749248747913189, "grad_norm": 1.002202620195892, "learning_rate": 2.1133372027159372e-07, "loss": 0.2709, "step": 8234 }, { "epoch": 2.7495826377295494, "grad_norm": 1.0394861605298142, "learning_rate": 2.1077527411971055e-07, "loss": 0.2946, "step": 8235 }, { "epoch": 2.74991652754591, "grad_norm": 1.0305962643074684, "learning_rate": 2.1021755089988271e-07, "loss": 0.2916, "step": 8236 }, { "epoch": 2.7502504173622704, "grad_norm": 1.0051366239790156, "learning_rate": 2.0966055069629787e-07, "loss": 0.2812, "step": 8237 }, { "epoch": 2.750584307178631, "grad_norm": 1.023507143939962, "learning_rate": 2.0910427359303599e-07, "loss": 0.2827, "step": 8238 }, { "epoch": 2.7509181969949914, "grad_norm": 1.0853208921631563, "learning_rate": 2.0854871967406541e-07, "loss": 0.3019, "step": 8239 }, { "epoch": 2.7512520868113524, "grad_norm": 1.030274411067501, "learning_rate": 2.07993889023248e-07, "loss": 0.2884, "step": 8240 }, { "epoch": 2.751585976627713, "grad_norm": 1.0381007123550285, "learning_rate": 2.074397817243351e-07, "loss": 0.2933, "step": 8241 }, { "epoch": 2.7519198664440734, "grad_norm": 1.055198365115739, "learning_rate": 2.0688639786096865e-07, "loss": 0.2916, "step": 8242 }, { "epoch": 2.752253756260434, "grad_norm": 1.0795258105508587, "learning_rate": 2.0633373751668184e-07, "loss": 0.2965, "step": 8243 }, { "epoch": 2.752587646076795, "grad_norm": 1.0370250254014708, "learning_rate": 2.0578180077489906e-07, "loss": 0.2821, "step": 8244 }, { "epoch": 2.7529215358931554, "grad_norm": 1.0537014723645202, "learning_rate": 2.0523058771893423e-07, "loss": 0.2882, "step": 8245 }, { "epoch": 2.753255425709516, "grad_norm": 1.0527744830915629, "learning_rate": 2.0468009843199354e-07, "loss": 0.2844, "step": 8246 }, { "epoch": 2.7535893155258764, "grad_norm": 1.0264203211692609, "learning_rate": 2.0413033299717332e-07, "loss": 0.2909, "step": 8247 }, { "epoch": 2.753923205342237, "grad_norm": 1.0145859019476997, "learning_rate": 2.0358129149745943e-07, "loss": 0.2741, "step": 8248 }, { "epoch": 2.7542570951585974, "grad_norm": 1.0512350407588613, "learning_rate": 2.0303297401573051e-07, "loss": 0.2936, "step": 8249 }, { "epoch": 2.7545909849749584, "grad_norm": 1.0309804172349486, "learning_rate": 2.0248538063475432e-07, "loss": 0.2842, "step": 8250 }, { "epoch": 2.754924874791319, "grad_norm": 1.0348728179196662, "learning_rate": 2.0193851143719022e-07, "loss": 0.2751, "step": 8251 }, { "epoch": 2.7552587646076794, "grad_norm": 1.0642369392122162, "learning_rate": 2.0139236650558725e-07, "loss": 0.2981, "step": 8252 }, { "epoch": 2.7555926544240403, "grad_norm": 1.0468246280953195, "learning_rate": 2.0084694592238718e-07, "loss": 0.2771, "step": 8253 }, { "epoch": 2.755926544240401, "grad_norm": 1.024914229671854, "learning_rate": 2.0030224976992086e-07, "loss": 0.285, "step": 8254 }, { "epoch": 2.7562604340567614, "grad_norm": 1.0282142927833866, "learning_rate": 1.9975827813040915e-07, "loss": 0.2866, "step": 8255 }, { "epoch": 2.756594323873122, "grad_norm": 1.0410390833849286, "learning_rate": 1.992150310859642e-07, "loss": 0.2928, "step": 8256 }, { "epoch": 2.7569282136894824, "grad_norm": 1.0151884414728523, "learning_rate": 1.9867250871858978e-07, "loss": 0.2836, "step": 8257 }, { "epoch": 2.757262103505843, "grad_norm": 1.019221165249127, "learning_rate": 1.981307111101799e-07, "loss": 0.2772, "step": 8258 }, { "epoch": 2.7575959933222034, "grad_norm": 1.112474182775316, "learning_rate": 1.9758963834251855e-07, "loss": 0.3175, "step": 8259 }, { "epoch": 2.7579298831385644, "grad_norm": 1.0461453812156265, "learning_rate": 1.9704929049727872e-07, "loss": 0.2799, "step": 8260 }, { "epoch": 2.758263772954925, "grad_norm": 0.9987805312789918, "learning_rate": 1.965096676560274e-07, "loss": 0.2882, "step": 8261 }, { "epoch": 2.7585976627712854, "grad_norm": 1.0309261307144144, "learning_rate": 1.9597076990022057e-07, "loss": 0.2912, "step": 8262 }, { "epoch": 2.7589315525876463, "grad_norm": 1.069139067222486, "learning_rate": 1.9543259731120424e-07, "loss": 0.2965, "step": 8263 }, { "epoch": 2.759265442404007, "grad_norm": 1.037734438023575, "learning_rate": 1.9489514997021564e-07, "loss": 0.296, "step": 8264 }, { "epoch": 2.7595993322203674, "grad_norm": 1.0630527943563393, "learning_rate": 1.943584279583821e-07, "loss": 0.2918, "step": 8265 }, { "epoch": 2.759933222036728, "grad_norm": 1.0034559614913554, "learning_rate": 1.9382243135672218e-07, "loss": 0.2788, "step": 8266 }, { "epoch": 2.7602671118530884, "grad_norm": 1.0086907608408149, "learning_rate": 1.9328716024614335e-07, "loss": 0.2863, "step": 8267 }, { "epoch": 2.760601001669449, "grad_norm": 1.0805179662053275, "learning_rate": 1.927526147074449e-07, "loss": 0.2868, "step": 8268 }, { "epoch": 2.76093489148581, "grad_norm": 1.059274498036667, "learning_rate": 1.9221879482131721e-07, "loss": 0.3011, "step": 8269 }, { "epoch": 2.7612687813021703, "grad_norm": 1.0526010082269797, "learning_rate": 1.916857006683398e-07, "loss": 0.3007, "step": 8270 }, { "epoch": 2.761602671118531, "grad_norm": 1.0163617732245163, "learning_rate": 1.911533323289827e-07, "loss": 0.2823, "step": 8271 }, { "epoch": 2.7619365609348914, "grad_norm": 1.0701065059917876, "learning_rate": 1.9062168988360606e-07, "loss": 0.2984, "step": 8272 }, { "epoch": 2.7622704507512523, "grad_norm": 1.0471959547175906, "learning_rate": 1.9009077341246296e-07, "loss": 0.2873, "step": 8273 }, { "epoch": 2.762604340567613, "grad_norm": 1.057965069741275, "learning_rate": 1.895605829956948e-07, "loss": 0.2949, "step": 8274 }, { "epoch": 2.7629382303839733, "grad_norm": 1.0127140281556022, "learning_rate": 1.8903111871333258e-07, "loss": 0.2796, "step": 8275 }, { "epoch": 2.763272120200334, "grad_norm": 1.03004462916947, "learning_rate": 1.8850238064529903e-07, "loss": 0.2788, "step": 8276 }, { "epoch": 2.7636060100166944, "grad_norm": 1.0266658684098187, "learning_rate": 1.879743688714064e-07, "loss": 0.2903, "step": 8277 }, { "epoch": 2.763939899833055, "grad_norm": 1.0580962130038707, "learning_rate": 1.8744708347135987e-07, "loss": 0.2941, "step": 8278 }, { "epoch": 2.764273789649416, "grad_norm": 1.011874716079794, "learning_rate": 1.869205245247513e-07, "loss": 0.2849, "step": 8279 }, { "epoch": 2.7646076794657763, "grad_norm": 1.0228361232080516, "learning_rate": 1.8639469211106652e-07, "loss": 0.2855, "step": 8280 }, { "epoch": 2.764941569282137, "grad_norm": 1.0504175223979635, "learning_rate": 1.8586958630967656e-07, "loss": 0.294, "step": 8281 }, { "epoch": 2.7652754590984974, "grad_norm": 1.051410520291784, "learning_rate": 1.8534520719984907e-07, "loss": 0.2948, "step": 8282 }, { "epoch": 2.7656093489148583, "grad_norm": 1.0478524650626975, "learning_rate": 1.848215548607374e-07, "loss": 0.2952, "step": 8283 }, { "epoch": 2.765943238731219, "grad_norm": 0.986582790469324, "learning_rate": 1.8429862937138666e-07, "loss": 0.2757, "step": 8284 }, { "epoch": 2.7662771285475793, "grad_norm": 1.0351292066727074, "learning_rate": 1.8377643081073315e-07, "loss": 0.2945, "step": 8285 }, { "epoch": 2.76661101836394, "grad_norm": 1.0352441636192362, "learning_rate": 1.8325495925760217e-07, "loss": 0.2877, "step": 8286 }, { "epoch": 2.7669449081803004, "grad_norm": 1.055328449469977, "learning_rate": 1.8273421479070963e-07, "loss": 0.2845, "step": 8287 }, { "epoch": 2.767278797996661, "grad_norm": 1.0071919540136287, "learning_rate": 1.8221419748866153e-07, "loss": 0.2795, "step": 8288 }, { "epoch": 2.767612687813022, "grad_norm": 1.0345865617999577, "learning_rate": 1.8169490742995455e-07, "loss": 0.2928, "step": 8289 }, { "epoch": 2.7679465776293823, "grad_norm": 1.076027714841874, "learning_rate": 1.8117634469297596e-07, "loss": 0.2973, "step": 8290 }, { "epoch": 2.768280467445743, "grad_norm": 1.0447844469824263, "learning_rate": 1.8065850935600148e-07, "loss": 0.2903, "step": 8291 }, { "epoch": 2.7686143572621034, "grad_norm": 1.0920910430295583, "learning_rate": 1.8014140149719915e-07, "loss": 0.3031, "step": 8292 }, { "epoch": 2.7689482470784643, "grad_norm": 1.007331460203463, "learning_rate": 1.7962502119462487e-07, "loss": 0.272, "step": 8293 }, { "epoch": 2.769282136894825, "grad_norm": 1.0727442785950683, "learning_rate": 1.7910936852622796e-07, "loss": 0.2994, "step": 8294 }, { "epoch": 2.7696160267111853, "grad_norm": 1.0634884937440587, "learning_rate": 1.7859444356984555e-07, "loss": 0.3001, "step": 8295 }, { "epoch": 2.769949916527546, "grad_norm": 1.0295747003847726, "learning_rate": 1.7808024640320498e-07, "loss": 0.2759, "step": 8296 }, { "epoch": 2.7702838063439064, "grad_norm": 1.039634837117919, "learning_rate": 1.7756677710392356e-07, "loss": 0.287, "step": 8297 }, { "epoch": 2.770617696160267, "grad_norm": 1.0348960636507232, "learning_rate": 1.770540357495104e-07, "loss": 0.2937, "step": 8298 }, { "epoch": 2.770951585976628, "grad_norm": 1.0114757895482995, "learning_rate": 1.7654202241736306e-07, "loss": 0.2835, "step": 8299 }, { "epoch": 2.7712854757929883, "grad_norm": 1.054758030441863, "learning_rate": 1.760307371847708e-07, "loss": 0.298, "step": 8300 }, { "epoch": 2.771619365609349, "grad_norm": 1.0375875258246894, "learning_rate": 1.7552018012891025e-07, "loss": 0.2768, "step": 8301 }, { "epoch": 2.7719532554257094, "grad_norm": 1.0798172979539948, "learning_rate": 1.7501035132685086e-07, "loss": 0.2955, "step": 8302 }, { "epoch": 2.7722871452420703, "grad_norm": 1.0261117613487727, "learning_rate": 1.7450125085555157e-07, "loss": 0.2906, "step": 8303 }, { "epoch": 2.772621035058431, "grad_norm": 1.0213882582786575, "learning_rate": 1.739928787918599e-07, "loss": 0.2789, "step": 8304 }, { "epoch": 2.7729549248747913, "grad_norm": 1.0594134922380765, "learning_rate": 1.7348523521251548e-07, "loss": 0.2836, "step": 8305 }, { "epoch": 2.773288814691152, "grad_norm": 1.0501668391124626, "learning_rate": 1.7297832019414651e-07, "loss": 0.2931, "step": 8306 }, { "epoch": 2.7736227045075124, "grad_norm": 1.102211562803664, "learning_rate": 1.724721338132712e-07, "loss": 0.3131, "step": 8307 }, { "epoch": 2.773956594323873, "grad_norm": 1.022353163383906, "learning_rate": 1.7196667614629848e-07, "loss": 0.2834, "step": 8308 }, { "epoch": 2.774290484140234, "grad_norm": 1.0397056485313954, "learning_rate": 1.714619472695278e-07, "loss": 0.2869, "step": 8309 }, { "epoch": 2.7746243739565943, "grad_norm": 1.0766936123976296, "learning_rate": 1.7095794725914717e-07, "loss": 0.3032, "step": 8310 }, { "epoch": 2.774958263772955, "grad_norm": 1.0667164828004783, "learning_rate": 1.7045467619123512e-07, "loss": 0.2906, "step": 8311 }, { "epoch": 2.775292153589316, "grad_norm": 1.042232632800517, "learning_rate": 1.6995213414176038e-07, "loss": 0.2855, "step": 8312 }, { "epoch": 2.7756260434056763, "grad_norm": 1.0398602761279045, "learning_rate": 1.6945032118658166e-07, "loss": 0.2807, "step": 8313 }, { "epoch": 2.775959933222037, "grad_norm": 1.0649490666297097, "learning_rate": 1.6894923740144676e-07, "loss": 0.2989, "step": 8314 }, { "epoch": 2.7762938230383973, "grad_norm": 1.0603631197812902, "learning_rate": 1.6844888286199623e-07, "loss": 0.285, "step": 8315 }, { "epoch": 2.776627712854758, "grad_norm": 1.0310497670709635, "learning_rate": 1.6794925764375637e-07, "loss": 0.2942, "step": 8316 }, { "epoch": 2.7769616026711184, "grad_norm": 1.0717178925248256, "learning_rate": 1.6745036182214624e-07, "loss": 0.2972, "step": 8317 }, { "epoch": 2.777295492487479, "grad_norm": 1.061239713892443, "learning_rate": 1.6695219547247288e-07, "loss": 0.3064, "step": 8318 }, { "epoch": 2.77762938230384, "grad_norm": 1.0196779655100126, "learning_rate": 1.664547586699361e-07, "loss": 0.2881, "step": 8319 }, { "epoch": 2.7779632721202003, "grad_norm": 1.0756714468188895, "learning_rate": 1.6595805148962362e-07, "loss": 0.2994, "step": 8320 }, { "epoch": 2.778297161936561, "grad_norm": 1.0434754068557321, "learning_rate": 1.6546207400651215e-07, "loss": 0.2899, "step": 8321 }, { "epoch": 2.778631051752922, "grad_norm": 1.1091188158082055, "learning_rate": 1.6496682629546955e-07, "loss": 0.2905, "step": 8322 }, { "epoch": 2.7789649415692823, "grad_norm": 1.0360522842272302, "learning_rate": 1.6447230843125383e-07, "loss": 0.2786, "step": 8323 }, { "epoch": 2.779298831385643, "grad_norm": 1.0224135062566984, "learning_rate": 1.6397852048851248e-07, "loss": 0.2775, "step": 8324 }, { "epoch": 2.7796327212020033, "grad_norm": 1.0426938259362653, "learning_rate": 1.6348546254178199e-07, "loss": 0.3011, "step": 8325 }, { "epoch": 2.779966611018364, "grad_norm": 1.0822874599750714, "learning_rate": 1.6299313466549006e-07, "loss": 0.3114, "step": 8326 }, { "epoch": 2.7803005008347244, "grad_norm": 1.041297385546152, "learning_rate": 1.6250153693395276e-07, "loss": 0.2914, "step": 8327 }, { "epoch": 2.7806343906510853, "grad_norm": 1.0284699099938113, "learning_rate": 1.620106694213769e-07, "loss": 0.2843, "step": 8328 }, { "epoch": 2.780968280467446, "grad_norm": 1.0397484531111931, "learning_rate": 1.615205322018587e-07, "loss": 0.2947, "step": 8329 }, { "epoch": 2.7813021702838063, "grad_norm": 1.012273921694819, "learning_rate": 1.6103112534938458e-07, "loss": 0.2822, "step": 8330 }, { "epoch": 2.781636060100167, "grad_norm": 1.0475329274998286, "learning_rate": 1.6054244893782934e-07, "loss": 0.2921, "step": 8331 }, { "epoch": 2.781969949916528, "grad_norm": 1.0098629257635685, "learning_rate": 1.6005450304096004e-07, "loss": 0.2872, "step": 8332 }, { "epoch": 2.7823038397328883, "grad_norm": 1.039284049622168, "learning_rate": 1.5956728773243113e-07, "loss": 0.3003, "step": 8333 }, { "epoch": 2.782637729549249, "grad_norm": 1.039057318332795, "learning_rate": 1.5908080308578654e-07, "loss": 0.3043, "step": 8334 }, { "epoch": 2.7829716193656093, "grad_norm": 1.0710635788855, "learning_rate": 1.5859504917446366e-07, "loss": 0.2939, "step": 8335 }, { "epoch": 2.78330550918197, "grad_norm": 1.0328049098853527, "learning_rate": 1.5811002607178495e-07, "loss": 0.2819, "step": 8336 }, { "epoch": 2.7836393989983303, "grad_norm": 1.0308154044254363, "learning_rate": 1.5762573385096458e-07, "loss": 0.2926, "step": 8337 }, { "epoch": 2.7839732888146913, "grad_norm": 1.01031536793299, "learning_rate": 1.571421725851069e-07, "loss": 0.2856, "step": 8338 }, { "epoch": 2.784307178631052, "grad_norm": 1.0900955166092188, "learning_rate": 1.5665934234720404e-07, "loss": 0.301, "step": 8339 }, { "epoch": 2.7846410684474123, "grad_norm": 1.0418597187876222, "learning_rate": 1.561772432101416e-07, "loss": 0.2827, "step": 8340 }, { "epoch": 2.784974958263773, "grad_norm": 1.0762385826566834, "learning_rate": 1.5569587524669082e-07, "loss": 0.2877, "step": 8341 }, { "epoch": 2.785308848080134, "grad_norm": 1.0061419262097393, "learning_rate": 1.552152385295136e-07, "loss": 0.2754, "step": 8342 }, { "epoch": 2.7856427378964943, "grad_norm": 1.0299984052797764, "learning_rate": 1.5473533313116184e-07, "loss": 0.2847, "step": 8343 }, { "epoch": 2.785976627712855, "grad_norm": 1.0316082048864523, "learning_rate": 1.5425615912407764e-07, "loss": 0.2901, "step": 8344 }, { "epoch": 2.7863105175292153, "grad_norm": 1.0487184663592874, "learning_rate": 1.5377771658059314e-07, "loss": 0.2977, "step": 8345 }, { "epoch": 2.786644407345576, "grad_norm": 1.0481438119653517, "learning_rate": 1.5330000557292834e-07, "loss": 0.2967, "step": 8346 }, { "epoch": 2.7869782971619363, "grad_norm": 1.0547764731179816, "learning_rate": 1.5282302617319222e-07, "loss": 0.293, "step": 8347 }, { "epoch": 2.7873121869782973, "grad_norm": 1.088919617123745, "learning_rate": 1.5234677845338608e-07, "loss": 0.3015, "step": 8348 }, { "epoch": 2.787646076794658, "grad_norm": 1.0416622052319595, "learning_rate": 1.5187126248539964e-07, "loss": 0.2894, "step": 8349 }, { "epoch": 2.7879799666110183, "grad_norm": 1.0373559257068035, "learning_rate": 1.5139647834101102e-07, "loss": 0.2848, "step": 8350 }, { "epoch": 2.788313856427379, "grad_norm": 1.0232460829806898, "learning_rate": 1.5092242609188957e-07, "loss": 0.2887, "step": 8351 }, { "epoch": 2.78864774624374, "grad_norm": 1.0829327133276336, "learning_rate": 1.5044910580959249e-07, "loss": 0.2997, "step": 8352 }, { "epoch": 2.7889816360601003, "grad_norm": 0.9969699702872105, "learning_rate": 1.499765175655682e-07, "loss": 0.2763, "step": 8353 }, { "epoch": 2.789315525876461, "grad_norm": 1.051861329686417, "learning_rate": 1.4950466143115295e-07, "loss": 0.2952, "step": 8354 }, { "epoch": 2.7896494156928213, "grad_norm": 1.0168744042035316, "learning_rate": 1.4903353747757366e-07, "loss": 0.2919, "step": 8355 }, { "epoch": 2.789983305509182, "grad_norm": 1.0673334685915097, "learning_rate": 1.485631457759462e-07, "loss": 0.2999, "step": 8356 }, { "epoch": 2.7903171953255423, "grad_norm": 1.038422016150817, "learning_rate": 1.4809348639727605e-07, "loss": 0.2872, "step": 8357 }, { "epoch": 2.7906510851419033, "grad_norm": 1.0431874668784658, "learning_rate": 1.4762455941245869e-07, "loss": 0.2977, "step": 8358 }, { "epoch": 2.790984974958264, "grad_norm": 1.0481582604437667, "learning_rate": 1.4715636489227747e-07, "loss": 0.2907, "step": 8359 }, { "epoch": 2.7913188647746243, "grad_norm": 1.0048480249991225, "learning_rate": 1.46688902907407e-07, "loss": 0.2804, "step": 8360 }, { "epoch": 2.791652754590985, "grad_norm": 1.090008130604244, "learning_rate": 1.4622217352841138e-07, "loss": 0.2985, "step": 8361 }, { "epoch": 2.791986644407346, "grad_norm": 1.075398563714214, "learning_rate": 1.457561768257415e-07, "loss": 0.2935, "step": 8362 }, { "epoch": 2.7923205342237063, "grad_norm": 1.0367105261965484, "learning_rate": 1.4529091286973994e-07, "loss": 0.292, "step": 8363 }, { "epoch": 2.792654424040067, "grad_norm": 1.0423256707633437, "learning_rate": 1.4482638173063835e-07, "loss": 0.2916, "step": 8364 }, { "epoch": 2.7929883138564273, "grad_norm": 1.0339134416818745, "learning_rate": 1.4436258347855837e-07, "loss": 0.2849, "step": 8365 }, { "epoch": 2.793322203672788, "grad_norm": 1.0545519254386828, "learning_rate": 1.4389951818350957e-07, "loss": 0.2875, "step": 8366 }, { "epoch": 2.7936560934891483, "grad_norm": 1.0574827691259954, "learning_rate": 1.434371859153899e-07, "loss": 0.2822, "step": 8367 }, { "epoch": 2.7939899833055093, "grad_norm": 1.0596999026667693, "learning_rate": 1.429755867439908e-07, "loss": 0.2956, "step": 8368 }, { "epoch": 2.79432387312187, "grad_norm": 1.0583769794277997, "learning_rate": 1.4251472073898986e-07, "loss": 0.2904, "step": 8369 }, { "epoch": 2.7946577629382303, "grad_norm": 1.021434504469648, "learning_rate": 1.4205458796995365e-07, "loss": 0.2832, "step": 8370 }, { "epoch": 2.794991652754591, "grad_norm": 1.0103081954986228, "learning_rate": 1.415951885063399e-07, "loss": 0.2908, "step": 8371 }, { "epoch": 2.795325542570952, "grad_norm": 1.068317500211332, "learning_rate": 1.4113652241749488e-07, "loss": 0.3019, "step": 8372 }, { "epoch": 2.7956594323873123, "grad_norm": 1.0563276461341353, "learning_rate": 1.4067858977265425e-07, "loss": 0.288, "step": 8373 }, { "epoch": 2.795993322203673, "grad_norm": 1.0358986984830771, "learning_rate": 1.4022139064094163e-07, "loss": 0.282, "step": 8374 }, { "epoch": 2.7963272120200333, "grad_norm": 1.0185559788776049, "learning_rate": 1.397649250913724e-07, "loss": 0.2867, "step": 8375 }, { "epoch": 2.796661101836394, "grad_norm": 0.9955044359098889, "learning_rate": 1.3930919319284918e-07, "loss": 0.271, "step": 8376 }, { "epoch": 2.7969949916527543, "grad_norm": 1.0478156312811722, "learning_rate": 1.388541950141653e-07, "loss": 0.2826, "step": 8377 }, { "epoch": 2.7973288814691153, "grad_norm": 0.9953886029816734, "learning_rate": 1.3839993062400137e-07, "loss": 0.272, "step": 8378 }, { "epoch": 2.797662771285476, "grad_norm": 1.0417642193838417, "learning_rate": 1.3794640009092973e-07, "loss": 0.2882, "step": 8379 }, { "epoch": 2.7979966611018363, "grad_norm": 1.1008068611864814, "learning_rate": 1.3749360348340955e-07, "loss": 0.308, "step": 8380 }, { "epoch": 2.7983305509181973, "grad_norm": 1.0375114773071854, "learning_rate": 1.370415408697917e-07, "loss": 0.2803, "step": 8381 }, { "epoch": 2.798664440734558, "grad_norm": 1.01866653814001, "learning_rate": 1.3659021231831383e-07, "loss": 0.2809, "step": 8382 }, { "epoch": 2.7989983305509183, "grad_norm": 1.0408775610708327, "learning_rate": 1.3613961789710418e-07, "loss": 0.3021, "step": 8383 }, { "epoch": 2.799332220367279, "grad_norm": 1.071309271062015, "learning_rate": 1.3568975767417947e-07, "loss": 0.3052, "step": 8384 }, { "epoch": 2.7996661101836393, "grad_norm": 1.063032023837812, "learning_rate": 1.352406317174465e-07, "loss": 0.2928, "step": 8385 }, { "epoch": 2.8, "grad_norm": 1.0470345621130033, "learning_rate": 1.3479224009470048e-07, "loss": 0.291, "step": 8386 }, { "epoch": 2.8003338898163603, "grad_norm": 1.0083804340828078, "learning_rate": 1.3434458287362673e-07, "loss": 0.2868, "step": 8387 }, { "epoch": 2.8006677796327213, "grad_norm": 1.0358092177514975, "learning_rate": 1.3389766012179729e-07, "loss": 0.2817, "step": 8388 }, { "epoch": 2.801001669449082, "grad_norm": 1.3307589080005864, "learning_rate": 1.3345147190667652e-07, "loss": 0.2997, "step": 8389 }, { "epoch": 2.8013355592654423, "grad_norm": 1.0516295043653021, "learning_rate": 1.3300601829561554e-07, "loss": 0.2806, "step": 8390 }, { "epoch": 2.8016694490818033, "grad_norm": 1.0500797273455118, "learning_rate": 1.3256129935585616e-07, "loss": 0.293, "step": 8391 }, { "epoch": 2.8020033388981638, "grad_norm": 1.0250264349677913, "learning_rate": 1.3211731515452797e-07, "loss": 0.2842, "step": 8392 }, { "epoch": 2.8023372287145243, "grad_norm": 1.0554578048730157, "learning_rate": 1.316740657586507e-07, "loss": 0.2866, "step": 8393 }, { "epoch": 2.802671118530885, "grad_norm": 1.0529541871262043, "learning_rate": 1.3123155123513254e-07, "loss": 0.3021, "step": 8394 }, { "epoch": 2.8030050083472453, "grad_norm": 1.0807393690966347, "learning_rate": 1.3078977165077055e-07, "loss": 0.2979, "step": 8395 }, { "epoch": 2.803338898163606, "grad_norm": 1.06438279276971, "learning_rate": 1.3034872707225145e-07, "loss": 0.2915, "step": 8396 }, { "epoch": 2.8036727879799668, "grad_norm": 1.0436840811145771, "learning_rate": 1.2990841756615135e-07, "loss": 0.2956, "step": 8397 }, { "epoch": 2.8040066777963273, "grad_norm": 1.0713344669508773, "learning_rate": 1.294688431989344e-07, "loss": 0.293, "step": 8398 }, { "epoch": 2.804340567612688, "grad_norm": 1.0631491143360723, "learning_rate": 1.290300040369541e-07, "loss": 0.2971, "step": 8399 }, { "epoch": 2.8046744574290483, "grad_norm": 1.0460438882665497, "learning_rate": 1.2859190014645305e-07, "loss": 0.2872, "step": 8400 }, { "epoch": 2.8050083472454093, "grad_norm": 1.0390751612473395, "learning_rate": 1.2815453159356283e-07, "loss": 0.2937, "step": 8401 }, { "epoch": 2.8053422370617698, "grad_norm": 1.0300508049245731, "learning_rate": 1.277178984443045e-07, "loss": 0.2707, "step": 8402 }, { "epoch": 2.8056761268781303, "grad_norm": 1.0581992346547546, "learning_rate": 1.272820007645875e-07, "loss": 0.2971, "step": 8403 }, { "epoch": 2.806010016694491, "grad_norm": 1.0641420926349763, "learning_rate": 1.2684683862021096e-07, "loss": 0.2946, "step": 8404 }, { "epoch": 2.8063439065108513, "grad_norm": 1.0407462320482426, "learning_rate": 1.2641241207686117e-07, "loss": 0.284, "step": 8405 }, { "epoch": 2.806677796327212, "grad_norm": 1.0542781741652876, "learning_rate": 1.2597872120011568e-07, "loss": 0.2977, "step": 8406 }, { "epoch": 2.8070116861435728, "grad_norm": 1.054142507495388, "learning_rate": 1.25545766055441e-07, "loss": 0.2865, "step": 8407 }, { "epoch": 2.8073455759599333, "grad_norm": 1.0377958745446534, "learning_rate": 1.251135467081893e-07, "loss": 0.2926, "step": 8408 }, { "epoch": 2.807679465776294, "grad_norm": 1.0880085557975872, "learning_rate": 1.2468206322360454e-07, "loss": 0.3008, "step": 8409 }, { "epoch": 2.8080133555926543, "grad_norm": 1.041638645695463, "learning_rate": 1.242513156668207e-07, "loss": 0.2864, "step": 8410 }, { "epoch": 2.8083472454090153, "grad_norm": 1.081759167849389, "learning_rate": 1.2382130410285743e-07, "loss": 0.288, "step": 8411 }, { "epoch": 2.8086811352253758, "grad_norm": 1.0124260149025663, "learning_rate": 1.2339202859662557e-07, "loss": 0.2798, "step": 8412 }, { "epoch": 2.8090150250417363, "grad_norm": 1.050074504581051, "learning_rate": 1.2296348921292334e-07, "loss": 0.2863, "step": 8413 }, { "epoch": 2.809348914858097, "grad_norm": 1.059918957050258, "learning_rate": 1.2253568601643896e-07, "loss": 0.2984, "step": 8414 }, { "epoch": 2.8096828046744573, "grad_norm": 1.0216919980034351, "learning_rate": 1.221086190717502e-07, "loss": 0.2827, "step": 8415 }, { "epoch": 2.810016694490818, "grad_norm": 1.0095810577493303, "learning_rate": 1.2168228844332109e-07, "loss": 0.2773, "step": 8416 }, { "epoch": 2.8103505843071788, "grad_norm": 1.0557328656324947, "learning_rate": 1.2125669419550734e-07, "loss": 0.3007, "step": 8417 }, { "epoch": 2.8106844741235393, "grad_norm": 1.0609344331801622, "learning_rate": 1.2083183639255148e-07, "loss": 0.29, "step": 8418 }, { "epoch": 2.8110183639399, "grad_norm": 1.0425415580182131, "learning_rate": 1.2040771509858606e-07, "loss": 0.2923, "step": 8419 }, { "epoch": 2.8113522537562603, "grad_norm": 1.017166639296635, "learning_rate": 1.1998433037763212e-07, "loss": 0.2931, "step": 8420 }, { "epoch": 2.8116861435726213, "grad_norm": 1.0676586950527798, "learning_rate": 1.195616822935991e-07, "loss": 0.2862, "step": 8421 }, { "epoch": 2.8120200333889818, "grad_norm": 1.0525428338934366, "learning_rate": 1.191397709102865e-07, "loss": 0.2935, "step": 8422 }, { "epoch": 2.8123539232053423, "grad_norm": 1.031392485138554, "learning_rate": 1.187185962913806e-07, "loss": 0.2915, "step": 8423 }, { "epoch": 2.812687813021703, "grad_norm": 1.026741389747238, "learning_rate": 1.1829815850045833e-07, "loss": 0.2846, "step": 8424 }, { "epoch": 2.8130217028380633, "grad_norm": 1.0573079420877771, "learning_rate": 1.1787845760098337e-07, "loss": 0.2956, "step": 8425 }, { "epoch": 2.813355592654424, "grad_norm": 1.0861163630637192, "learning_rate": 1.1745949365631115e-07, "loss": 0.3036, "step": 8426 }, { "epoch": 2.8136894824707848, "grad_norm": 0.9897871224388475, "learning_rate": 1.1704126672968386e-07, "loss": 0.273, "step": 8427 }, { "epoch": 2.8140233722871453, "grad_norm": 1.0093306361739318, "learning_rate": 1.166237768842321e-07, "loss": 0.2935, "step": 8428 }, { "epoch": 2.814357262103506, "grad_norm": 1.0742331773149605, "learning_rate": 1.1620702418297547e-07, "loss": 0.3106, "step": 8429 }, { "epoch": 2.8146911519198663, "grad_norm": 1.0298049486353176, "learning_rate": 1.1579100868882365e-07, "loss": 0.2849, "step": 8430 }, { "epoch": 2.8150250417362273, "grad_norm": 1.0125905099334793, "learning_rate": 1.1537573046457362e-07, "loss": 0.2811, "step": 8431 }, { "epoch": 2.8153589315525878, "grad_norm": 1.0468921529152582, "learning_rate": 1.1496118957291191e-07, "loss": 0.3009, "step": 8432 }, { "epoch": 2.8156928213689483, "grad_norm": 1.033723441364988, "learning_rate": 1.1454738607641292e-07, "loss": 0.2817, "step": 8433 }, { "epoch": 2.816026711185309, "grad_norm": 1.0180801418352954, "learning_rate": 1.1413432003753943e-07, "loss": 0.282, "step": 8434 }, { "epoch": 2.8163606010016693, "grad_norm": 1.0197146454969308, "learning_rate": 1.1372199151864494e-07, "loss": 0.2837, "step": 8435 }, { "epoch": 2.81669449081803, "grad_norm": 1.0352179116771694, "learning_rate": 1.1331040058197018e-07, "loss": 0.2886, "step": 8436 }, { "epoch": 2.8170283806343908, "grad_norm": 1.0554439670736047, "learning_rate": 1.1289954728964381e-07, "loss": 0.2858, "step": 8437 }, { "epoch": 2.8173622704507513, "grad_norm": 1.0641181355037184, "learning_rate": 1.1248943170368454e-07, "loss": 0.3015, "step": 8438 }, { "epoch": 2.817696160267112, "grad_norm": 1.0717581072394098, "learning_rate": 1.1208005388599952e-07, "loss": 0.3009, "step": 8439 }, { "epoch": 2.8180300500834723, "grad_norm": 1.039049295263868, "learning_rate": 1.1167141389838376e-07, "loss": 0.2852, "step": 8440 }, { "epoch": 2.8183639398998332, "grad_norm": 1.0541522729591963, "learning_rate": 1.1126351180252126e-07, "loss": 0.2903, "step": 8441 }, { "epoch": 2.8186978297161938, "grad_norm": 1.0215240224910087, "learning_rate": 1.1085634765998499e-07, "loss": 0.2856, "step": 8442 }, { "epoch": 2.8190317195325543, "grad_norm": 1.053034955324995, "learning_rate": 1.1044992153223578e-07, "loss": 0.2808, "step": 8443 }, { "epoch": 2.819365609348915, "grad_norm": 1.0387982209136275, "learning_rate": 1.1004423348062453e-07, "loss": 0.2834, "step": 8444 }, { "epoch": 2.8196994991652753, "grad_norm": 1.0467146004777308, "learning_rate": 1.0963928356638898e-07, "loss": 0.2867, "step": 8445 }, { "epoch": 2.820033388981636, "grad_norm": 1.0676589361401319, "learning_rate": 1.0923507185065574e-07, "loss": 0.3004, "step": 8446 }, { "epoch": 2.8203672787979968, "grad_norm": 1.0558029156918198, "learning_rate": 1.0883159839444212e-07, "loss": 0.2999, "step": 8447 }, { "epoch": 2.8207011686143573, "grad_norm": 1.0492197954230793, "learning_rate": 1.0842886325865054e-07, "loss": 0.2899, "step": 8448 }, { "epoch": 2.821035058430718, "grad_norm": 1.0372918199447148, "learning_rate": 1.0802686650407457e-07, "loss": 0.2902, "step": 8449 }, { "epoch": 2.8213689482470787, "grad_norm": 1.0371748189583065, "learning_rate": 1.0762560819139511e-07, "loss": 0.2967, "step": 8450 }, { "epoch": 2.8217028380634392, "grad_norm": 1.0638441315453209, "learning_rate": 1.0722508838118262e-07, "loss": 0.2842, "step": 8451 }, { "epoch": 2.8220367278797998, "grad_norm": 1.0421586093502502, "learning_rate": 1.0682530713389483e-07, "loss": 0.2896, "step": 8452 }, { "epoch": 2.8223706176961603, "grad_norm": 1.027813100892739, "learning_rate": 1.0642626450987959e-07, "loss": 0.2838, "step": 8453 }, { "epoch": 2.8227045075125208, "grad_norm": 1.049395395465698, "learning_rate": 1.0602796056937093e-07, "loss": 0.2852, "step": 8454 }, { "epoch": 2.8230383973288813, "grad_norm": 1.0203352708259967, "learning_rate": 1.0563039537249353e-07, "loss": 0.2871, "step": 8455 }, { "epoch": 2.823372287145242, "grad_norm": 1.070143054633835, "learning_rate": 1.052335689792594e-07, "loss": 0.2947, "step": 8456 }, { "epoch": 2.8237061769616028, "grad_norm": 1.0411524008108637, "learning_rate": 1.0483748144956951e-07, "loss": 0.2915, "step": 8457 }, { "epoch": 2.8240400667779633, "grad_norm": 1.035136334912848, "learning_rate": 1.0444213284321324e-07, "loss": 0.2861, "step": 8458 }, { "epoch": 2.8243739565943238, "grad_norm": 1.0720647124365696, "learning_rate": 1.0404752321986789e-07, "loss": 0.2864, "step": 8459 }, { "epoch": 2.8247078464106847, "grad_norm": 1.0525061198401267, "learning_rate": 1.0365365263910021e-07, "loss": 0.2929, "step": 8460 }, { "epoch": 2.8250417362270452, "grad_norm": 1.047801975104618, "learning_rate": 1.032605211603649e-07, "loss": 0.2825, "step": 8461 }, { "epoch": 2.8253756260434058, "grad_norm": 1.0720605724728787, "learning_rate": 1.0286812884300501e-07, "loss": 0.2931, "step": 8462 }, { "epoch": 2.8257095158597663, "grad_norm": 1.0216841689002183, "learning_rate": 1.0247647574625097e-07, "loss": 0.284, "step": 8463 }, { "epoch": 2.8260434056761268, "grad_norm": 1.0372620141086937, "learning_rate": 1.0208556192922437e-07, "loss": 0.288, "step": 8464 }, { "epoch": 2.8263772954924873, "grad_norm": 1.0482085921681032, "learning_rate": 1.0169538745093244e-07, "loss": 0.2906, "step": 8465 }, { "epoch": 2.8267111853088482, "grad_norm": 1.050583425541171, "learning_rate": 1.0130595237027196e-07, "loss": 0.2929, "step": 8466 }, { "epoch": 2.8270450751252088, "grad_norm": 1.0702122261348228, "learning_rate": 1.0091725674602815e-07, "loss": 0.2919, "step": 8467 }, { "epoch": 2.8273789649415693, "grad_norm": 1.0557261350376785, "learning_rate": 1.0052930063687571e-07, "loss": 0.2908, "step": 8468 }, { "epoch": 2.8277128547579298, "grad_norm": 1.0304515865802426, "learning_rate": 1.0014208410137449e-07, "loss": 0.2829, "step": 8469 }, { "epoch": 2.8280467445742907, "grad_norm": 1.0428037827377954, "learning_rate": 9.975560719797606e-08, "loss": 0.3034, "step": 8470 }, { "epoch": 2.8283806343906512, "grad_norm": 1.012267297458373, "learning_rate": 9.936986998501763e-08, "loss": 0.286, "step": 8471 }, { "epoch": 2.8287145242070117, "grad_norm": 1.068383887656218, "learning_rate": 9.898487252072819e-08, "loss": 0.2984, "step": 8472 }, { "epoch": 2.8290484140233723, "grad_norm": 1.0039442536964898, "learning_rate": 9.860061486322181e-08, "loss": 0.2805, "step": 8473 }, { "epoch": 2.8293823038397328, "grad_norm": 1.0246067548594417, "learning_rate": 9.821709707050154e-08, "loss": 0.2894, "step": 8474 }, { "epoch": 2.8297161936560933, "grad_norm": 1.0398710154444974, "learning_rate": 9.783431920045994e-08, "loss": 0.282, "step": 8475 }, { "epoch": 2.8300500834724542, "grad_norm": 1.058769315664985, "learning_rate": 9.745228131087691e-08, "loss": 0.2976, "step": 8476 }, { "epoch": 2.8303839732888147, "grad_norm": 1.0234032249957226, "learning_rate": 9.70709834594219e-08, "loss": 0.2857, "step": 8477 }, { "epoch": 2.8307178631051753, "grad_norm": 1.0242952667098948, "learning_rate": 9.66904257036505e-08, "loss": 0.2835, "step": 8478 }, { "epoch": 2.8310517529215358, "grad_norm": 1.0619221497131992, "learning_rate": 9.631060810100846e-08, "loss": 0.2968, "step": 8479 }, { "epoch": 2.8313856427378967, "grad_norm": 1.0251024981964372, "learning_rate": 9.593153070882877e-08, "loss": 0.276, "step": 8480 }, { "epoch": 2.8317195325542572, "grad_norm": 1.044744513127203, "learning_rate": 9.555319358433345e-08, "loss": 0.2854, "step": 8481 }, { "epoch": 2.8320534223706177, "grad_norm": 1.0046837886442004, "learning_rate": 9.517559678463184e-08, "loss": 0.2811, "step": 8482 }, { "epoch": 2.8323873121869783, "grad_norm": 1.0029144179170417, "learning_rate": 9.479874036672277e-08, "loss": 0.2687, "step": 8483 }, { "epoch": 2.8327212020033388, "grad_norm": 1.0520307487825995, "learning_rate": 9.442262438749183e-08, "loss": 0.2954, "step": 8484 }, { "epoch": 2.8330550918196993, "grad_norm": 1.0588773307630317, "learning_rate": 9.404724890371419e-08, "loss": 0.2941, "step": 8485 }, { "epoch": 2.8333889816360602, "grad_norm": 1.0605342797636288, "learning_rate": 9.367261397205285e-08, "loss": 0.2954, "step": 8486 }, { "epoch": 2.8337228714524207, "grad_norm": 1.0568639516461293, "learning_rate": 9.329871964905757e-08, "loss": 0.2987, "step": 8487 }, { "epoch": 2.8340567612687813, "grad_norm": 1.0223464169231633, "learning_rate": 9.29255659911693e-08, "loss": 0.2876, "step": 8488 }, { "epoch": 2.8343906510851418, "grad_norm": 1.0579091571117145, "learning_rate": 9.255315305471468e-08, "loss": 0.3034, "step": 8489 }, { "epoch": 2.8347245409015027, "grad_norm": 1.0202721133255999, "learning_rate": 9.218148089590928e-08, "loss": 0.2776, "step": 8490 }, { "epoch": 2.8350584307178632, "grad_norm": 1.040528425909886, "learning_rate": 9.1810549570856e-08, "loss": 0.2819, "step": 8491 }, { "epoch": 2.8353923205342237, "grad_norm": 1.0176087774186382, "learning_rate": 9.144035913554839e-08, "loss": 0.2831, "step": 8492 }, { "epoch": 2.8357262103505843, "grad_norm": 1.0181920355109386, "learning_rate": 9.107090964586673e-08, "loss": 0.278, "step": 8493 }, { "epoch": 2.8360601001669448, "grad_norm": 1.0928121037730156, "learning_rate": 9.070220115757755e-08, "loss": 0.2997, "step": 8494 }, { "epoch": 2.8363939899833053, "grad_norm": 1.0640278226770548, "learning_rate": 9.033423372633854e-08, "loss": 0.2849, "step": 8495 }, { "epoch": 2.8367278797996662, "grad_norm": 1.04328489944088, "learning_rate": 8.99670074076936e-08, "loss": 0.2941, "step": 8496 }, { "epoch": 2.8370617696160267, "grad_norm": 1.0371436957377154, "learning_rate": 8.960052225707672e-08, "loss": 0.287, "step": 8497 }, { "epoch": 2.8373956594323873, "grad_norm": 1.0324123072145466, "learning_rate": 8.923477832980698e-08, "loss": 0.2906, "step": 8498 }, { "epoch": 2.8377295492487478, "grad_norm": 1.057071866798569, "learning_rate": 8.886977568109523e-08, "loss": 0.297, "step": 8499 }, { "epoch": 2.8380634390651087, "grad_norm": 0.9800809189348638, "learning_rate": 8.850551436603627e-08, "loss": 0.2854, "step": 8500 }, { "epoch": 2.8383973288814692, "grad_norm": 1.0495841224724447, "learning_rate": 8.814199443961723e-08, "loss": 0.2855, "step": 8501 }, { "epoch": 2.8387312186978297, "grad_norm": 1.0344150376065753, "learning_rate": 8.777921595671035e-08, "loss": 0.286, "step": 8502 }, { "epoch": 2.8390651085141902, "grad_norm": 1.0024986635952415, "learning_rate": 8.74171789720768e-08, "loss": 0.281, "step": 8503 }, { "epoch": 2.8393989983305508, "grad_norm": 1.0678451095634107, "learning_rate": 8.705588354036675e-08, "loss": 0.291, "step": 8504 }, { "epoch": 2.8397328881469113, "grad_norm": 1.017732192595583, "learning_rate": 8.669532971611716e-08, "loss": 0.2871, "step": 8505 }, { "epoch": 2.8400667779632722, "grad_norm": 1.067338858045696, "learning_rate": 8.633551755375336e-08, "loss": 0.2956, "step": 8506 }, { "epoch": 2.8404006677796327, "grad_norm": 1.04746148700522, "learning_rate": 8.597644710758857e-08, "loss": 0.2821, "step": 8507 }, { "epoch": 2.8407345575959932, "grad_norm": 1.070541229552556, "learning_rate": 8.561811843182555e-08, "loss": 0.2968, "step": 8508 }, { "epoch": 2.8410684474123538, "grad_norm": 1.0550566872378435, "learning_rate": 8.526053158055325e-08, "loss": 0.2908, "step": 8509 }, { "epoch": 2.8414023372287147, "grad_norm": 1.0664024060959711, "learning_rate": 8.49036866077485e-08, "loss": 0.2944, "step": 8510 }, { "epoch": 2.8417362270450752, "grad_norm": 1.0450093181959108, "learning_rate": 8.454758356727821e-08, "loss": 0.2883, "step": 8511 }, { "epoch": 2.8420701168614357, "grad_norm": 1.0549763761677344, "learning_rate": 8.41922225128955e-08, "loss": 0.2932, "step": 8512 }, { "epoch": 2.8424040066777962, "grad_norm": 1.0514232429152846, "learning_rate": 8.383760349824188e-08, "loss": 0.2963, "step": 8513 }, { "epoch": 2.8427378964941568, "grad_norm": 1.0878138898284726, "learning_rate": 8.34837265768479e-08, "loss": 0.2958, "step": 8514 }, { "epoch": 2.8430717863105173, "grad_norm": 1.0497686287598993, "learning_rate": 8.313059180212968e-08, "loss": 0.2909, "step": 8515 }, { "epoch": 2.8434056761268782, "grad_norm": 1.008833911332039, "learning_rate": 8.27781992273935e-08, "loss": 0.2763, "step": 8516 }, { "epoch": 2.8437395659432387, "grad_norm": 1.0854520444935754, "learning_rate": 8.24265489058329e-08, "loss": 0.2993, "step": 8517 }, { "epoch": 2.8440734557595992, "grad_norm": 1.0325876252148882, "learning_rate": 8.207564089052988e-08, "loss": 0.2939, "step": 8518 }, { "epoch": 2.84440734557596, "grad_norm": 1.076538274044235, "learning_rate": 8.172547523445373e-08, "loss": 0.2931, "step": 8519 }, { "epoch": 2.8447412353923207, "grad_norm": 1.04959037183115, "learning_rate": 8.137605199046106e-08, "loss": 0.3083, "step": 8520 }, { "epoch": 2.845075125208681, "grad_norm": 1.0586562969129125, "learning_rate": 8.102737121129745e-08, "loss": 0.2944, "step": 8521 }, { "epoch": 2.8454090150250417, "grad_norm": 1.0936325102102065, "learning_rate": 8.067943294959691e-08, "loss": 0.299, "step": 8522 }, { "epoch": 2.8457429048414022, "grad_norm": 1.0270157042216066, "learning_rate": 8.03322372578802e-08, "loss": 0.2824, "step": 8523 }, { "epoch": 2.8460767946577628, "grad_norm": 1.0744696315186055, "learning_rate": 7.998578418855652e-08, "loss": 0.2949, "step": 8524 }, { "epoch": 2.8464106844741233, "grad_norm": 1.0735192839592058, "learning_rate": 7.964007379392236e-08, "loss": 0.2953, "step": 8525 }, { "epoch": 2.846744574290484, "grad_norm": 1.0101276187018888, "learning_rate": 7.92951061261632e-08, "loss": 0.2873, "step": 8526 }, { "epoch": 2.8470784641068447, "grad_norm": 1.04624134303508, "learning_rate": 7.895088123735128e-08, "loss": 0.2906, "step": 8527 }, { "epoch": 2.8474123539232052, "grad_norm": 1.050298434488303, "learning_rate": 7.860739917944782e-08, "loss": 0.2859, "step": 8528 }, { "epoch": 2.847746243739566, "grad_norm": 1.042680383756553, "learning_rate": 7.826466000430133e-08, "loss": 0.2836, "step": 8529 }, { "epoch": 2.8480801335559267, "grad_norm": 1.062676604593724, "learning_rate": 7.79226637636471e-08, "loss": 0.3019, "step": 8530 }, { "epoch": 2.848414023372287, "grad_norm": 1.0203325433841741, "learning_rate": 7.75814105091105e-08, "loss": 0.2817, "step": 8531 }, { "epoch": 2.8487479131886477, "grad_norm": 1.061175543785908, "learning_rate": 7.724090029220366e-08, "loss": 0.3022, "step": 8532 }, { "epoch": 2.8490818030050082, "grad_norm": 1.1413117103966814, "learning_rate": 7.690113316432546e-08, "loss": 0.2967, "step": 8533 }, { "epoch": 2.8494156928213688, "grad_norm": 1.032260360008817, "learning_rate": 7.656210917676488e-08, "loss": 0.2857, "step": 8534 }, { "epoch": 2.8497495826377297, "grad_norm": 1.0353434142955094, "learning_rate": 7.622382838069653e-08, "loss": 0.292, "step": 8535 }, { "epoch": 2.85008347245409, "grad_norm": 1.0286337439231066, "learning_rate": 7.588629082718402e-08, "loss": 0.2908, "step": 8536 }, { "epoch": 2.8504173622704507, "grad_norm": 1.0428592585114367, "learning_rate": 7.55494965671788e-08, "loss": 0.287, "step": 8537 }, { "epoch": 2.8507512520868112, "grad_norm": 1.0467045426615116, "learning_rate": 7.521344565151966e-08, "loss": 0.2975, "step": 8538 }, { "epoch": 2.851085141903172, "grad_norm": 1.04666546567971, "learning_rate": 7.487813813093381e-08, "loss": 0.281, "step": 8539 }, { "epoch": 2.8514190317195327, "grad_norm": 0.9986278645357816, "learning_rate": 7.454357405603574e-08, "loss": 0.2857, "step": 8540 }, { "epoch": 2.851752921535893, "grad_norm": 1.0469890615112452, "learning_rate": 7.420975347732673e-08, "loss": 0.2833, "step": 8541 }, { "epoch": 2.8520868113522537, "grad_norm": 1.06728934924794, "learning_rate": 7.387667644519813e-08, "loss": 0.2932, "step": 8542 }, { "epoch": 2.8524207011686142, "grad_norm": 1.031250512653934, "learning_rate": 7.354434300992752e-08, "loss": 0.2804, "step": 8543 }, { "epoch": 2.8527545909849747, "grad_norm": 0.9850195582374301, "learning_rate": 7.321275322168031e-08, "loss": 0.2691, "step": 8544 }, { "epoch": 2.8530884808013357, "grad_norm": 1.0344261823526482, "learning_rate": 7.288190713051036e-08, "loss": 0.2868, "step": 8545 }, { "epoch": 2.853422370617696, "grad_norm": 1.0692079977050701, "learning_rate": 7.255180478635826e-08, "loss": 0.2957, "step": 8546 }, { "epoch": 2.8537562604340567, "grad_norm": 1.1210058304300186, "learning_rate": 7.22224462390525e-08, "loss": 0.3016, "step": 8547 }, { "epoch": 2.8540901502504172, "grad_norm": 1.037470496389637, "learning_rate": 7.189383153831054e-08, "loss": 0.2903, "step": 8548 }, { "epoch": 2.854424040066778, "grad_norm": 1.0448615321700483, "learning_rate": 7.156596073373657e-08, "loss": 0.2906, "step": 8549 }, { "epoch": 2.8547579298831387, "grad_norm": 1.0447828286466625, "learning_rate": 7.123883387482211e-08, "loss": 0.2873, "step": 8550 }, { "epoch": 2.855091819699499, "grad_norm": 1.0294098302457446, "learning_rate": 7.091245101094657e-08, "loss": 0.2933, "step": 8551 }, { "epoch": 2.8554257095158597, "grad_norm": 1.0661918670307884, "learning_rate": 7.058681219137831e-08, "loss": 0.2963, "step": 8552 }, { "epoch": 2.8557595993322202, "grad_norm": 1.0678327805252452, "learning_rate": 7.026191746527245e-08, "loss": 0.2995, "step": 8553 }, { "epoch": 2.8560934891485807, "grad_norm": 1.0511427973740837, "learning_rate": 6.993776688167031e-08, "loss": 0.2929, "step": 8554 }, { "epoch": 2.8564273789649417, "grad_norm": 1.0058483174960717, "learning_rate": 6.961436048950387e-08, "loss": 0.2715, "step": 8555 }, { "epoch": 2.856761268781302, "grad_norm": 1.0272083033656347, "learning_rate": 6.929169833759075e-08, "loss": 0.2797, "step": 8556 }, { "epoch": 2.8570951585976627, "grad_norm": 1.0308382657807995, "learning_rate": 6.896978047463643e-08, "loss": 0.2813, "step": 8557 }, { "epoch": 2.8574290484140232, "grad_norm": 1.01834917154102, "learning_rate": 6.86486069492337e-08, "loss": 0.2839, "step": 8558 }, { "epoch": 2.857762938230384, "grad_norm": 1.058113213028561, "learning_rate": 6.832817780986545e-08, "loss": 0.2899, "step": 8559 }, { "epoch": 2.8580968280467447, "grad_norm": 0.9856405541760024, "learning_rate": 6.80084931048991e-08, "loss": 0.2734, "step": 8560 }, { "epoch": 2.858430717863105, "grad_norm": 1.0598961129489533, "learning_rate": 6.768955288259104e-08, "loss": 0.2916, "step": 8561 }, { "epoch": 2.8587646076794657, "grad_norm": 1.0546155774959651, "learning_rate": 6.737135719108501e-08, "loss": 0.2882, "step": 8562 }, { "epoch": 2.8590984974958262, "grad_norm": 1.022193906209883, "learning_rate": 6.705390607841311e-08, "loss": 0.2796, "step": 8563 }, { "epoch": 2.8594323873121867, "grad_norm": 1.054580991296506, "learning_rate": 6.673719959249426e-08, "loss": 0.2906, "step": 8564 }, { "epoch": 2.8597662771285477, "grad_norm": 1.0838419028762676, "learning_rate": 6.642123778113574e-08, "loss": 0.285, "step": 8565 }, { "epoch": 2.860100166944908, "grad_norm": 1.0615222875403731, "learning_rate": 6.610602069203054e-08, "loss": 0.3032, "step": 8566 }, { "epoch": 2.8604340567612687, "grad_norm": 1.0300498676207999, "learning_rate": 6.579154837276169e-08, "loss": 0.2894, "step": 8567 }, { "epoch": 2.8607679465776292, "grad_norm": 1.0457620320589556, "learning_rate": 6.547782087079846e-08, "loss": 0.2768, "step": 8568 }, { "epoch": 2.86110183639399, "grad_norm": 1.0766200027758075, "learning_rate": 6.516483823349796e-08, "loss": 0.2944, "step": 8569 }, { "epoch": 2.8614357262103507, "grad_norm": 1.0273754585977033, "learning_rate": 6.485260050810461e-08, "loss": 0.2834, "step": 8570 }, { "epoch": 2.861769616026711, "grad_norm": 1.0744126803577048, "learning_rate": 6.454110774175071e-08, "loss": 0.2939, "step": 8571 }, { "epoch": 2.8621035058430717, "grad_norm": 1.0671760172760287, "learning_rate": 6.423035998145588e-08, "loss": 0.2878, "step": 8572 }, { "epoch": 2.8624373956594322, "grad_norm": 1.0260868594569819, "learning_rate": 6.392035727412815e-08, "loss": 0.2748, "step": 8573 }, { "epoch": 2.8627712854757927, "grad_norm": 1.0385733533059192, "learning_rate": 6.361109966656065e-08, "loss": 0.2892, "step": 8574 }, { "epoch": 2.8631051752921537, "grad_norm": 1.0123748642379262, "learning_rate": 6.330258720543825e-08, "loss": 0.2816, "step": 8575 }, { "epoch": 2.863439065108514, "grad_norm": 1.0488318016862024, "learning_rate": 6.299481993732815e-08, "loss": 0.2896, "step": 8576 }, { "epoch": 2.8637729549248747, "grad_norm": 1.0460881804401678, "learning_rate": 6.268779790868929e-08, "loss": 0.2819, "step": 8577 }, { "epoch": 2.8641068447412357, "grad_norm": 1.0433252512581865, "learning_rate": 6.238152116586626e-08, "loss": 0.2897, "step": 8578 }, { "epoch": 2.864440734557596, "grad_norm": 1.0181738699046177, "learning_rate": 6.207598975509155e-08, "loss": 0.2801, "step": 8579 }, { "epoch": 2.8647746243739567, "grad_norm": 1.013788005079009, "learning_rate": 6.17712037224849e-08, "loss": 0.2796, "step": 8580 }, { "epoch": 2.865108514190317, "grad_norm": 1.0952341494250304, "learning_rate": 6.146716311405344e-08, "loss": 0.3049, "step": 8581 }, { "epoch": 2.8654424040066777, "grad_norm": 1.0730671946295185, "learning_rate": 6.116386797569208e-08, "loss": 0.3026, "step": 8582 }, { "epoch": 2.8657762938230382, "grad_norm": 1.0628424175569224, "learning_rate": 6.086131835318366e-08, "loss": 0.2799, "step": 8583 }, { "epoch": 2.8661101836393987, "grad_norm": 1.103222848149039, "learning_rate": 6.05595142921972e-08, "loss": 0.2957, "step": 8584 }, { "epoch": 2.8664440734557597, "grad_norm": 1.0552880720171252, "learning_rate": 6.025845583829071e-08, "loss": 0.2925, "step": 8585 }, { "epoch": 2.86677796327212, "grad_norm": 1.0420501981924073, "learning_rate": 5.995814303690839e-08, "loss": 0.2884, "step": 8586 }, { "epoch": 2.8671118530884807, "grad_norm": 1.08881351940285, "learning_rate": 5.965857593338176e-08, "loss": 0.2903, "step": 8587 }, { "epoch": 2.8674457429048417, "grad_norm": 1.0330738183263601, "learning_rate": 5.935975457293186e-08, "loss": 0.2974, "step": 8588 }, { "epoch": 2.867779632721202, "grad_norm": 1.0471279716803925, "learning_rate": 5.906167900066484e-08, "loss": 0.2946, "step": 8589 }, { "epoch": 2.8681135225375627, "grad_norm": 1.092681295630568, "learning_rate": 5.8764349261574704e-08, "loss": 0.2909, "step": 8590 }, { "epoch": 2.868447412353923, "grad_norm": 1.0287763399887035, "learning_rate": 5.846776540054389e-08, "loss": 0.2732, "step": 8591 }, { "epoch": 2.8687813021702837, "grad_norm": 1.044076372060207, "learning_rate": 5.817192746234213e-08, "loss": 0.2962, "step": 8592 }, { "epoch": 2.869115191986644, "grad_norm": 1.031711766584784, "learning_rate": 5.787683549162482e-08, "loss": 0.2833, "step": 8593 }, { "epoch": 2.869449081803005, "grad_norm": 1.0838589910338796, "learning_rate": 5.758248953293688e-08, "loss": 0.2992, "step": 8594 }, { "epoch": 2.8697829716193657, "grad_norm": 1.0473976292993787, "learning_rate": 5.7288889630709446e-08, "loss": 0.2903, "step": 8595 }, { "epoch": 2.870116861435726, "grad_norm": 1.0495677573061972, "learning_rate": 5.69960358292615e-08, "loss": 0.2895, "step": 8596 }, { "epoch": 2.8704507512520867, "grad_norm": 1.0873848497710603, "learning_rate": 5.67039281727988e-08, "loss": 0.2927, "step": 8597 }, { "epoch": 2.8707846410684477, "grad_norm": 1.0996419432603999, "learning_rate": 5.641256670541551e-08, "loss": 0.2969, "step": 8598 }, { "epoch": 2.871118530884808, "grad_norm": 1.0632929234569404, "learning_rate": 5.6121951471092004e-08, "loss": 0.3031, "step": 8599 }, { "epoch": 2.8714524207011687, "grad_norm": 1.0009446751839624, "learning_rate": 5.583208251369765e-08, "loss": 0.2842, "step": 8600 }, { "epoch": 2.871786310517529, "grad_norm": 1.0377223557955784, "learning_rate": 5.554295987698688e-08, "loss": 0.2963, "step": 8601 }, { "epoch": 2.8721202003338897, "grad_norm": 1.0360814054828946, "learning_rate": 5.5254583604602566e-08, "loss": 0.2978, "step": 8602 }, { "epoch": 2.87245409015025, "grad_norm": 1.0516575593740272, "learning_rate": 5.496695374007599e-08, "loss": 0.2916, "step": 8603 }, { "epoch": 2.872787979966611, "grad_norm": 1.0611614185478997, "learning_rate": 5.4680070326824096e-08, "loss": 0.2981, "step": 8604 }, { "epoch": 2.8731218697829717, "grad_norm": 1.0226547846465823, "learning_rate": 5.439393340815224e-08, "loss": 0.2949, "step": 8605 }, { "epoch": 2.873455759599332, "grad_norm": 1.0508776965541673, "learning_rate": 5.41085430272531e-08, "loss": 0.2847, "step": 8606 }, { "epoch": 2.8737896494156927, "grad_norm": 1.053715337846924, "learning_rate": 5.3823899227204435e-08, "loss": 0.2852, "step": 8607 }, { "epoch": 2.8741235392320537, "grad_norm": 1.0405238598720825, "learning_rate": 5.3540002050975206e-08, "loss": 0.285, "step": 8608 }, { "epoch": 2.874457429048414, "grad_norm": 1.0704922421029806, "learning_rate": 5.325685154141891e-08, "loss": 0.2929, "step": 8609 }, { "epoch": 2.8747913188647747, "grad_norm": 1.0980915271997382, "learning_rate": 5.2974447741276915e-08, "loss": 0.2954, "step": 8610 }, { "epoch": 2.875125208681135, "grad_norm": 1.030808054743, "learning_rate": 5.2692790693177896e-08, "loss": 0.2912, "step": 8611 }, { "epoch": 2.8754590984974957, "grad_norm": 1.0745902054379977, "learning_rate": 5.241188043963841e-08, "loss": 0.3012, "step": 8612 }, { "epoch": 2.875792988313856, "grad_norm": 1.0490824040447555, "learning_rate": 5.213171702306119e-08, "loss": 0.3032, "step": 8613 }, { "epoch": 2.876126878130217, "grad_norm": 1.0756871218294752, "learning_rate": 5.1852300485736863e-08, "loss": 0.3041, "step": 8614 }, { "epoch": 2.8764607679465777, "grad_norm": 1.0474852856898835, "learning_rate": 5.157363086984335e-08, "loss": 0.3008, "step": 8615 }, { "epoch": 2.876794657762938, "grad_norm": 1.0208376082829582, "learning_rate": 5.129570821744645e-08, "loss": 0.2921, "step": 8616 }, { "epoch": 2.8771285475792987, "grad_norm": 1.0343413502092211, "learning_rate": 5.101853257049705e-08, "loss": 0.2761, "step": 8617 }, { "epoch": 2.8774624373956597, "grad_norm": 1.0455875007874564, "learning_rate": 5.074210397083612e-08, "loss": 0.2944, "step": 8618 }, { "epoch": 2.87779632721202, "grad_norm": 1.0557914848445578, "learning_rate": 5.046642246019029e-08, "loss": 0.2854, "step": 8619 }, { "epoch": 2.8781302170283807, "grad_norm": 1.0796042539976913, "learning_rate": 5.019148808017238e-08, "loss": 0.2848, "step": 8620 }, { "epoch": 2.878464106844741, "grad_norm": 1.0479072725194787, "learning_rate": 4.99173008722853e-08, "loss": 0.2941, "step": 8621 }, { "epoch": 2.8787979966611017, "grad_norm": 1.0409800490888306, "learning_rate": 4.964386087791595e-08, "loss": 0.3034, "step": 8622 }, { "epoch": 2.879131886477462, "grad_norm": 1.077272961120177, "learning_rate": 4.9371168138341305e-08, "loss": 0.302, "step": 8623 }, { "epoch": 2.879465776293823, "grad_norm": 1.0576198942170842, "learning_rate": 4.9099222694723446e-08, "loss": 0.2972, "step": 8624 }, { "epoch": 2.8797996661101837, "grad_norm": 1.038981221261224, "learning_rate": 4.882802458811342e-08, "loss": 0.2926, "step": 8625 }, { "epoch": 2.880133555926544, "grad_norm": 1.029500149114208, "learning_rate": 4.855757385944737e-08, "loss": 0.2893, "step": 8626 }, { "epoch": 2.8804674457429047, "grad_norm": 1.0967069910119145, "learning_rate": 4.828787054955042e-08, "loss": 0.2944, "step": 8627 }, { "epoch": 2.8808013355592657, "grad_norm": 1.0318270243896732, "learning_rate": 4.801891469913389e-08, "loss": 0.2841, "step": 8628 }, { "epoch": 2.881135225375626, "grad_norm": 1.0598557809287874, "learning_rate": 4.775070634879697e-08, "loss": 0.2841, "step": 8629 }, { "epoch": 2.8814691151919867, "grad_norm": 1.033446638539234, "learning_rate": 4.748324553902506e-08, "loss": 0.2831, "step": 8630 }, { "epoch": 2.881803005008347, "grad_norm": 1.0322580463394666, "learning_rate": 4.721653231019141e-08, "loss": 0.2842, "step": 8631 }, { "epoch": 2.8821368948247077, "grad_norm": 1.0469729294422083, "learning_rate": 4.695056670255715e-08, "loss": 0.2865, "step": 8632 }, { "epoch": 2.882470784641068, "grad_norm": 1.0238797571139442, "learning_rate": 4.668534875626851e-08, "loss": 0.2785, "step": 8633 }, { "epoch": 2.882804674457429, "grad_norm": 1.046280721342975, "learning_rate": 4.642087851136123e-08, "loss": 0.2816, "step": 8634 }, { "epoch": 2.8831385642737897, "grad_norm": 1.0575640575407463, "learning_rate": 4.615715600775561e-08, "loss": 0.2958, "step": 8635 }, { "epoch": 2.88347245409015, "grad_norm": 1.0256269697371168, "learning_rate": 4.589418128526202e-08, "loss": 0.2798, "step": 8636 }, { "epoch": 2.8838063439065107, "grad_norm": 1.0384804979074251, "learning_rate": 4.563195438357537e-08, "loss": 0.287, "step": 8637 }, { "epoch": 2.8841402337228716, "grad_norm": 1.0893330308239857, "learning_rate": 4.537047534227901e-08, "loss": 0.2983, "step": 8638 }, { "epoch": 2.884474123539232, "grad_norm": 1.041798961465777, "learning_rate": 4.510974420084302e-08, "loss": 0.2831, "step": 8639 }, { "epoch": 2.8848080133555927, "grad_norm": 1.0517896651143555, "learning_rate": 4.484976099862482e-08, "loss": 0.3044, "step": 8640 }, { "epoch": 2.885141903171953, "grad_norm": 1.0652729471835451, "learning_rate": 4.459052577486911e-08, "loss": 0.3017, "step": 8641 }, { "epoch": 2.8854757929883137, "grad_norm": 1.0267192103311404, "learning_rate": 4.433203856870682e-08, "loss": 0.2873, "step": 8642 }, { "epoch": 2.885809682804674, "grad_norm": 1.0571045902579266, "learning_rate": 4.407429941915675e-08, "loss": 0.2896, "step": 8643 }, { "epoch": 2.886143572621035, "grad_norm": 1.081057077096679, "learning_rate": 4.381730836512443e-08, "loss": 0.2909, "step": 8644 }, { "epoch": 2.8864774624373957, "grad_norm": 1.0499780924562196, "learning_rate": 4.3561065445402726e-08, "loss": 0.2936, "step": 8645 }, { "epoch": 2.886811352253756, "grad_norm": 1.0166995098641811, "learning_rate": 4.3305570698671804e-08, "loss": 0.269, "step": 8646 }, { "epoch": 2.887145242070117, "grad_norm": 1.0119132619585882, "learning_rate": 4.305082416349804e-08, "loss": 0.282, "step": 8647 }, { "epoch": 2.8874791318864776, "grad_norm": 1.0645887405803092, "learning_rate": 4.279682587833511e-08, "loss": 0.2971, "step": 8648 }, { "epoch": 2.887813021702838, "grad_norm": 1.0229960716594222, "learning_rate": 4.2543575881524023e-08, "loss": 0.2884, "step": 8649 }, { "epoch": 2.8881469115191987, "grad_norm": 0.9983822766394446, "learning_rate": 4.2291074211293634e-08, "loss": 0.2817, "step": 8650 }, { "epoch": 2.888480801335559, "grad_norm": 1.0346638912307902, "learning_rate": 4.2039320905757906e-08, "loss": 0.2835, "step": 8651 }, { "epoch": 2.8888146911519197, "grad_norm": 1.0686118724317593, "learning_rate": 4.178831600292032e-08, "loss": 0.2994, "step": 8652 }, { "epoch": 2.88914858096828, "grad_norm": 1.036877371859949, "learning_rate": 4.153805954066836e-08, "loss": 0.2848, "step": 8653 }, { "epoch": 2.889482470784641, "grad_norm": 1.036388257482719, "learning_rate": 4.128855155677902e-08, "loss": 0.2964, "step": 8654 }, { "epoch": 2.8898163606010017, "grad_norm": 1.0111342990391794, "learning_rate": 4.1039792088915506e-08, "loss": 0.2823, "step": 8655 }, { "epoch": 2.890150250417362, "grad_norm": 1.0664511833807715, "learning_rate": 4.079178117462834e-08, "loss": 0.3037, "step": 8656 }, { "epoch": 2.890484140233723, "grad_norm": 1.0467359564932912, "learning_rate": 4.0544518851353684e-08, "loss": 0.2911, "step": 8657 }, { "epoch": 2.8908180300500836, "grad_norm": 1.04867777808513, "learning_rate": 4.0298005156416134e-08, "loss": 0.2737, "step": 8658 }, { "epoch": 2.891151919866444, "grad_norm": 1.0299406397878632, "learning_rate": 4.005224012702702e-08, "loss": 0.2807, "step": 8659 }, { "epoch": 2.8914858096828047, "grad_norm": 1.0602617335708602, "learning_rate": 3.980722380028445e-08, "loss": 0.2883, "step": 8660 }, { "epoch": 2.891819699499165, "grad_norm": 1.028310357013216, "learning_rate": 3.9562956213173855e-08, "loss": 0.2978, "step": 8661 }, { "epoch": 2.8921535893155257, "grad_norm": 1.0299214421970988, "learning_rate": 3.931943740256683e-08, "loss": 0.2894, "step": 8662 }, { "epoch": 2.8924874791318866, "grad_norm": 1.063160221282751, "learning_rate": 3.907666740522231e-08, "loss": 0.3024, "step": 8663 }, { "epoch": 2.892821368948247, "grad_norm": 1.0405116756581296, "learning_rate": 3.88346462577871e-08, "loss": 0.291, "step": 8664 }, { "epoch": 2.8931552587646077, "grad_norm": 1.040947976818487, "learning_rate": 3.8593373996793637e-08, "loss": 0.2819, "step": 8665 }, { "epoch": 2.893489148580968, "grad_norm": 1.0518713102329078, "learning_rate": 3.8352850658662234e-08, "loss": 0.2862, "step": 8666 }, { "epoch": 2.893823038397329, "grad_norm": 1.0949246628702507, "learning_rate": 3.811307627970051e-08, "loss": 0.3015, "step": 8667 }, { "epoch": 2.8941569282136896, "grad_norm": 1.076071465443938, "learning_rate": 3.7874050896100634e-08, "loss": 0.2884, "step": 8668 }, { "epoch": 2.89449081803005, "grad_norm": 1.0437392545037, "learning_rate": 3.763577454394429e-08, "loss": 0.2796, "step": 8669 }, { "epoch": 2.8948247078464107, "grad_norm": 1.0477571409063056, "learning_rate": 3.7398247259199385e-08, "loss": 0.2866, "step": 8670 }, { "epoch": 2.895158597662771, "grad_norm": 1.0180653780808424, "learning_rate": 3.716146907772056e-08, "loss": 0.2817, "step": 8671 }, { "epoch": 2.8954924874791317, "grad_norm": 1.0396024101950243, "learning_rate": 3.6925440035249226e-08, "loss": 0.2828, "step": 8672 }, { "epoch": 2.8958263772954926, "grad_norm": 1.0391003012256133, "learning_rate": 3.669016016741356e-08, "loss": 0.2875, "step": 8673 }, { "epoch": 2.896160267111853, "grad_norm": 1.0474319448390397, "learning_rate": 3.645562950973014e-08, "loss": 0.3009, "step": 8674 }, { "epoch": 2.8964941569282137, "grad_norm": 1.025465801160501, "learning_rate": 3.6221848097600096e-08, "loss": 0.2885, "step": 8675 }, { "epoch": 2.896828046744574, "grad_norm": 1.0854973026884218, "learning_rate": 3.598881596631354e-08, "loss": 0.2877, "step": 8676 }, { "epoch": 2.897161936560935, "grad_norm": 1.0380540002123657, "learning_rate": 3.5756533151045655e-08, "loss": 0.2916, "step": 8677 }, { "epoch": 2.8974958263772956, "grad_norm": 1.0367588359863722, "learning_rate": 3.5524999686860074e-08, "loss": 0.2844, "step": 8678 }, { "epoch": 2.897829716193656, "grad_norm": 1.055590507775639, "learning_rate": 3.529421560870716e-08, "loss": 0.2939, "step": 8679 }, { "epoch": 2.8981636060100167, "grad_norm": 1.0115631369544689, "learning_rate": 3.5064180951422386e-08, "loss": 0.281, "step": 8680 }, { "epoch": 2.898497495826377, "grad_norm": 1.0262464776864244, "learning_rate": 3.4834895749730756e-08, "loss": 0.2806, "step": 8681 }, { "epoch": 2.8988313856427377, "grad_norm": 1.1013778716668219, "learning_rate": 3.460636003824236e-08, "loss": 0.3016, "step": 8682 }, { "epoch": 2.8991652754590986, "grad_norm": 1.0311658375384885, "learning_rate": 3.437857385145404e-08, "loss": 0.2881, "step": 8683 }, { "epoch": 2.899499165275459, "grad_norm": 1.0145880006951276, "learning_rate": 3.415153722375053e-08, "loss": 0.2848, "step": 8684 }, { "epoch": 2.8998330550918197, "grad_norm": 1.0417612792855584, "learning_rate": 3.392525018940329e-08, "loss": 0.2937, "step": 8685 }, { "epoch": 2.90016694490818, "grad_norm": 1.0309328260652477, "learning_rate": 3.369971278256945e-08, "loss": 0.295, "step": 8686 }, { "epoch": 2.900500834724541, "grad_norm": 1.0185907324719288, "learning_rate": 3.347492503729455e-08, "loss": 0.2783, "step": 8687 }, { "epoch": 2.9008347245409016, "grad_norm": 1.0308662856601325, "learning_rate": 3.3250886987509776e-08, "loss": 0.2864, "step": 8688 }, { "epoch": 2.901168614357262, "grad_norm": 1.0715831847381172, "learning_rate": 3.302759866703364e-08, "loss": 0.3074, "step": 8689 }, { "epoch": 2.9015025041736227, "grad_norm": 0.9835372376297423, "learning_rate": 3.2805060109571965e-08, "loss": 0.2713, "step": 8690 }, { "epoch": 2.901836393989983, "grad_norm": 1.0669344016892366, "learning_rate": 3.25832713487162e-08, "loss": 0.2828, "step": 8691 }, { "epoch": 2.9021702838063437, "grad_norm": 1.0336061349480008, "learning_rate": 3.2362232417945674e-08, "loss": 0.2901, "step": 8692 }, { "epoch": 2.9025041736227046, "grad_norm": 1.0357629143961131, "learning_rate": 3.214194335062648e-08, "loss": 0.3026, "step": 8693 }, { "epoch": 2.902838063439065, "grad_norm": 1.0196704146761062, "learning_rate": 3.1922404180009803e-08, "loss": 0.2809, "step": 8694 }, { "epoch": 2.9031719532554257, "grad_norm": 1.0559175231348936, "learning_rate": 3.1703614939236904e-08, "loss": 0.2921, "step": 8695 }, { "epoch": 2.903505843071786, "grad_norm": 1.0363762443313205, "learning_rate": 3.1485575661332476e-08, "loss": 0.2776, "step": 8696 }, { "epoch": 2.903839732888147, "grad_norm": 1.0520144963035873, "learning_rate": 3.12682863792102e-08, "loss": 0.2886, "step": 8697 }, { "epoch": 2.9041736227045076, "grad_norm": 1.0135085543752596, "learning_rate": 3.105174712566994e-08, "loss": 0.2823, "step": 8698 }, { "epoch": 2.904507512520868, "grad_norm": 1.0804875090284634, "learning_rate": 3.083595793339778e-08, "loss": 0.2986, "step": 8699 }, { "epoch": 2.9048414023372287, "grad_norm": 1.0371404636800365, "learning_rate": 3.0620918834967096e-08, "loss": 0.2851, "step": 8700 }, { "epoch": 2.905175292153589, "grad_norm": 1.0512164228314158, "learning_rate": 3.04066298628386e-08, "loss": 0.3008, "step": 8701 }, { "epoch": 2.9055091819699497, "grad_norm": 1.0691817769761258, "learning_rate": 3.019309104935808e-08, "loss": 0.2938, "step": 8702 }, { "epoch": 2.9058430717863106, "grad_norm": 1.050488321919771, "learning_rate": 2.9980302426759754e-08, "loss": 0.2974, "step": 8703 }, { "epoch": 2.906176961602671, "grad_norm": 1.0470617471142822, "learning_rate": 2.976826402716404e-08, "loss": 0.302, "step": 8704 }, { "epoch": 2.9065108514190316, "grad_norm": 1.0300158878993255, "learning_rate": 2.955697588257811e-08, "loss": 0.2904, "step": 8705 }, { "epoch": 2.906844741235392, "grad_norm": 1.0214262691966296, "learning_rate": 2.9346438024895897e-08, "loss": 0.2893, "step": 8706 }, { "epoch": 2.907178631051753, "grad_norm": 1.041805328616401, "learning_rate": 2.9136650485897534e-08, "loss": 0.288, "step": 8707 }, { "epoch": 2.9075125208681136, "grad_norm": 1.0521581819437302, "learning_rate": 2.8927613297251024e-08, "loss": 0.2902, "step": 8708 }, { "epoch": 2.907846410684474, "grad_norm": 1.053193143736269, "learning_rate": 2.8719326490510012e-08, "loss": 0.2917, "step": 8709 }, { "epoch": 2.9081803005008346, "grad_norm": 1.0274986837200701, "learning_rate": 2.8511790097115467e-08, "loss": 0.2812, "step": 8710 }, { "epoch": 2.908514190317195, "grad_norm": 1.0482842580772989, "learning_rate": 2.83050041483951e-08, "loss": 0.2917, "step": 8711 }, { "epoch": 2.9088480801335557, "grad_norm": 1.1140254425344813, "learning_rate": 2.8098968675562832e-08, "loss": 0.2946, "step": 8712 }, { "epoch": 2.9091819699499166, "grad_norm": 1.0043496092760547, "learning_rate": 2.7893683709720452e-08, "loss": 0.2701, "step": 8713 }, { "epoch": 2.909515859766277, "grad_norm": 0.9879537522561813, "learning_rate": 2.7689149281854288e-08, "loss": 0.2743, "step": 8714 }, { "epoch": 2.9098497495826376, "grad_norm": 1.0766543961930317, "learning_rate": 2.748536542283964e-08, "loss": 0.2966, "step": 8715 }, { "epoch": 2.9101836393989986, "grad_norm": 1.028089142843578, "learning_rate": 2.7282332163438007e-08, "loss": 0.2838, "step": 8716 }, { "epoch": 2.910517529215359, "grad_norm": 1.0513050912975281, "learning_rate": 2.708004953429655e-08, "loss": 0.2876, "step": 8717 }, { "epoch": 2.9108514190317196, "grad_norm": 1.0260500550815435, "learning_rate": 2.687851756595028e-08, "loss": 0.2775, "step": 8718 }, { "epoch": 2.91118530884808, "grad_norm": 1.0529555488552846, "learning_rate": 2.667773628881931e-08, "loss": 0.2886, "step": 8719 }, { "epoch": 2.9115191986644406, "grad_norm": 1.007859396065133, "learning_rate": 2.6477705733212732e-08, "loss": 0.2665, "step": 8720 }, { "epoch": 2.911853088480801, "grad_norm": 1.033476415731519, "learning_rate": 2.6278425929324725e-08, "loss": 0.2756, "step": 8721 }, { "epoch": 2.9121869782971617, "grad_norm": 1.0559181742508936, "learning_rate": 2.6079896907236223e-08, "loss": 0.3021, "step": 8722 }, { "epoch": 2.9125208681135226, "grad_norm": 1.0302323632637493, "learning_rate": 2.5882118696915483e-08, "loss": 0.2831, "step": 8723 }, { "epoch": 2.912854757929883, "grad_norm": 1.040451462314783, "learning_rate": 2.568509132821695e-08, "loss": 0.2962, "step": 8724 }, { "epoch": 2.9131886477462436, "grad_norm": 1.0406280861297885, "learning_rate": 2.548881483088128e-08, "loss": 0.2934, "step": 8725 }, { "epoch": 2.9135225375626046, "grad_norm": 1.014427599977292, "learning_rate": 2.5293289234537553e-08, "loss": 0.281, "step": 8726 }, { "epoch": 2.913856427378965, "grad_norm": 1.040941150137598, "learning_rate": 2.509851456869883e-08, "loss": 0.2898, "step": 8727 }, { "epoch": 2.9141903171953256, "grad_norm": 1.075955354581328, "learning_rate": 2.490449086276825e-08, "loss": 0.2876, "step": 8728 }, { "epoch": 2.914524207011686, "grad_norm": 1.0392470757181427, "learning_rate": 2.4711218146031278e-08, "loss": 0.2848, "step": 8729 }, { "epoch": 2.9148580968280466, "grad_norm": 1.0593101252175674, "learning_rate": 2.451869644766458e-08, "loss": 0.2877, "step": 8730 }, { "epoch": 2.915191986644407, "grad_norm": 1.0506834359533057, "learning_rate": 2.432692579672713e-08, "loss": 0.2842, "step": 8731 }, { "epoch": 2.915525876460768, "grad_norm": 1.0635278038251188, "learning_rate": 2.4135906222168548e-08, "loss": 0.2943, "step": 8732 }, { "epoch": 2.9158597662771286, "grad_norm": 0.9950221208943294, "learning_rate": 2.3945637752822437e-08, "loss": 0.2869, "step": 8733 }, { "epoch": 2.916193656093489, "grad_norm": 1.0624710914413273, "learning_rate": 2.3756120417409713e-08, "loss": 0.2937, "step": 8734 }, { "epoch": 2.9165275459098496, "grad_norm": 0.9866332077396213, "learning_rate": 2.3567354244537488e-08, "loss": 0.2656, "step": 8735 }, { "epoch": 2.9168614357262106, "grad_norm": 1.0467630139118538, "learning_rate": 2.3379339262700197e-08, "loss": 0.2913, "step": 8736 }, { "epoch": 2.917195325542571, "grad_norm": 1.0056494279009138, "learning_rate": 2.3192075500279577e-08, "loss": 0.2766, "step": 8737 }, { "epoch": 2.9175292153589316, "grad_norm": 1.0859380427484568, "learning_rate": 2.3005562985542462e-08, "loss": 0.3008, "step": 8738 }, { "epoch": 2.917863105175292, "grad_norm": 1.054705616820702, "learning_rate": 2.2819801746642445e-08, "loss": 0.3127, "step": 8739 }, { "epoch": 2.9181969949916526, "grad_norm": 1.0469487621252098, "learning_rate": 2.2634791811620425e-08, "loss": 0.2833, "step": 8740 }, { "epoch": 2.918530884808013, "grad_norm": 1.0429448114278714, "learning_rate": 2.2450533208403514e-08, "loss": 0.2964, "step": 8741 }, { "epoch": 2.918864774624374, "grad_norm": 1.0421889332326608, "learning_rate": 2.2267025964805568e-08, "loss": 0.2871, "step": 8742 }, { "epoch": 2.9191986644407346, "grad_norm": 1.0563351893670931, "learning_rate": 2.2084270108527206e-08, "loss": 0.2901, "step": 8743 }, { "epoch": 2.919532554257095, "grad_norm": 1.065218584153953, "learning_rate": 2.19022656671547e-08, "loss": 0.2879, "step": 8744 }, { "epoch": 2.9198664440734556, "grad_norm": 1.0195073351732138, "learning_rate": 2.1721012668162178e-08, "loss": 0.281, "step": 8745 }, { "epoch": 2.9202003338898166, "grad_norm": 1.0435141345842012, "learning_rate": 2.154051113890998e-08, "loss": 0.2758, "step": 8746 }, { "epoch": 2.920534223706177, "grad_norm": 1.0006102307547373, "learning_rate": 2.1360761106643534e-08, "loss": 0.2818, "step": 8747 }, { "epoch": 2.9208681135225376, "grad_norm": 1.0694131294542202, "learning_rate": 2.11817625984978e-08, "loss": 0.2902, "step": 8748 }, { "epoch": 2.921202003338898, "grad_norm": 0.9777176104999565, "learning_rate": 2.1003515641490614e-08, "loss": 0.269, "step": 8749 }, { "epoch": 2.9215358931552586, "grad_norm": 1.0881244003093689, "learning_rate": 2.082602026252989e-08, "loss": 0.3028, "step": 8750 }, { "epoch": 2.921869782971619, "grad_norm": 1.0788133277786438, "learning_rate": 2.064927648840809e-08, "loss": 0.2922, "step": 8751 }, { "epoch": 2.92220367278798, "grad_norm": 1.0867105259368923, "learning_rate": 2.0473284345803866e-08, "loss": 0.2996, "step": 8752 }, { "epoch": 2.9225375626043406, "grad_norm": 1.0325924625299998, "learning_rate": 2.0298043861283755e-08, "loss": 0.2885, "step": 8753 }, { "epoch": 2.922871452420701, "grad_norm": 1.0275647813225326, "learning_rate": 2.012355506130048e-08, "loss": 0.2837, "step": 8754 }, { "epoch": 2.9232053422370616, "grad_norm": 1.044984940158239, "learning_rate": 1.9949817972192975e-08, "loss": 0.292, "step": 8755 }, { "epoch": 2.9235392320534226, "grad_norm": 1.0108435157831253, "learning_rate": 1.977683262018637e-08, "loss": 0.2798, "step": 8756 }, { "epoch": 2.923873121869783, "grad_norm": 1.0310871923396054, "learning_rate": 1.9604599031393113e-08, "loss": 0.295, "step": 8757 }, { "epoch": 2.9242070116861436, "grad_norm": 1.0548401119569664, "learning_rate": 1.9433117231812404e-08, "loss": 0.2935, "step": 8758 }, { "epoch": 2.924540901502504, "grad_norm": 1.0327132545409536, "learning_rate": 1.9262387247327984e-08, "loss": 0.2806, "step": 8759 }, { "epoch": 2.9248747913188646, "grad_norm": 1.0410304169645255, "learning_rate": 1.909240910371257e-08, "loss": 0.2858, "step": 8760 }, { "epoch": 2.925208681135225, "grad_norm": 1.039532346734735, "learning_rate": 1.8923182826623975e-08, "loss": 0.2917, "step": 8761 }, { "epoch": 2.925542570951586, "grad_norm": 1.0033551216612402, "learning_rate": 1.8754708441606207e-08, "loss": 0.2775, "step": 8762 }, { "epoch": 2.9258764607679466, "grad_norm": 1.0945483708401784, "learning_rate": 1.85869859740917e-08, "loss": 0.2909, "step": 8763 }, { "epoch": 2.926210350584307, "grad_norm": 1.0602432071666401, "learning_rate": 1.842001544939742e-08, "loss": 0.2952, "step": 8764 }, { "epoch": 2.9265442404006676, "grad_norm": 1.05482157093055, "learning_rate": 1.825379689272766e-08, "loss": 0.2821, "step": 8765 }, { "epoch": 2.9268781302170286, "grad_norm": 1.0184222277819042, "learning_rate": 1.8088330329172897e-08, "loss": 0.2716, "step": 8766 }, { "epoch": 2.927212020033389, "grad_norm": 1.0679962947521737, "learning_rate": 1.792361578371038e-08, "loss": 0.2953, "step": 8767 }, { "epoch": 2.9275459098497496, "grad_norm": 1.0605697518965551, "learning_rate": 1.7759653281203547e-08, "loss": 0.2962, "step": 8768 }, { "epoch": 2.92787979966611, "grad_norm": 1.0377433375707237, "learning_rate": 1.7596442846402605e-08, "loss": 0.2932, "step": 8769 }, { "epoch": 2.9282136894824706, "grad_norm": 1.052815351981369, "learning_rate": 1.7433984503944512e-08, "loss": 0.2948, "step": 8770 }, { "epoch": 2.928547579298831, "grad_norm": 1.081020564973521, "learning_rate": 1.727227827835132e-08, "loss": 0.2874, "step": 8771 }, { "epoch": 2.928881469115192, "grad_norm": 1.0591533398924358, "learning_rate": 1.711132419403405e-08, "loss": 0.2833, "step": 8772 }, { "epoch": 2.9292153589315526, "grad_norm": 1.030041508170386, "learning_rate": 1.6951122275286614e-08, "loss": 0.2763, "step": 8773 }, { "epoch": 2.929549248747913, "grad_norm": 1.0626995604211575, "learning_rate": 1.6791672546293546e-08, "loss": 0.2913, "step": 8774 }, { "epoch": 2.9298831385642736, "grad_norm": 1.037711741419059, "learning_rate": 1.6632975031122822e-08, "loss": 0.2811, "step": 8775 }, { "epoch": 2.9302170283806346, "grad_norm": 1.0256913028641352, "learning_rate": 1.6475029753729167e-08, "loss": 0.2765, "step": 8776 }, { "epoch": 2.930550918196995, "grad_norm": 1.0241285936917035, "learning_rate": 1.6317836737955173e-08, "loss": 0.2909, "step": 8777 }, { "epoch": 2.9308848080133556, "grad_norm": 1.0343382395569047, "learning_rate": 1.6161396007529083e-08, "loss": 0.2809, "step": 8778 }, { "epoch": 2.931218697829716, "grad_norm": 1.0389067850045046, "learning_rate": 1.6005707586065346e-08, "loss": 0.2789, "step": 8779 }, { "epoch": 2.9315525876460766, "grad_norm": 1.0380126743351468, "learning_rate": 1.58507714970646e-08, "loss": 0.292, "step": 8780 }, { "epoch": 2.931886477462437, "grad_norm": 1.0458144664612021, "learning_rate": 1.569658776391536e-08, "loss": 0.2918, "step": 8781 }, { "epoch": 2.932220367278798, "grad_norm": 1.0087273183088898, "learning_rate": 1.5543156409891236e-08, "loss": 0.2797, "step": 8782 }, { "epoch": 2.9325542570951586, "grad_norm": 1.05961765168612, "learning_rate": 1.5390477458152585e-08, "loss": 0.2819, "step": 8783 }, { "epoch": 2.932888146911519, "grad_norm": 1.054455489022684, "learning_rate": 1.5238550931745977e-08, "loss": 0.294, "step": 8784 }, { "epoch": 2.93322203672788, "grad_norm": 1.0235237611312729, "learning_rate": 1.508737685360473e-08, "loss": 0.2855, "step": 8785 }, { "epoch": 2.9335559265442406, "grad_norm": 1.0437127208077415, "learning_rate": 1.493695524654948e-08, "loss": 0.2911, "step": 8786 }, { "epoch": 2.933889816360601, "grad_norm": 1.0652424900481468, "learning_rate": 1.478728613328484e-08, "loss": 0.2946, "step": 8787 }, { "epoch": 2.9342237061769616, "grad_norm": 1.0299071089530285, "learning_rate": 1.4638369536404406e-08, "loss": 0.2869, "step": 8788 }, { "epoch": 2.934557595993322, "grad_norm": 1.052161032820968, "learning_rate": 1.4490205478386865e-08, "loss": 0.293, "step": 8789 }, { "epoch": 2.9348914858096826, "grad_norm": 1.049816608422078, "learning_rate": 1.4342793981597103e-08, "loss": 0.2869, "step": 8790 }, { "epoch": 2.935225375626043, "grad_norm": 1.0481668126742612, "learning_rate": 1.419613506828732e-08, "loss": 0.2898, "step": 8791 }, { "epoch": 2.935559265442404, "grad_norm": 1.0393237151917631, "learning_rate": 1.4050228760595364e-08, "loss": 0.2881, "step": 8792 }, { "epoch": 2.9358931552587646, "grad_norm": 1.068956477469084, "learning_rate": 1.3905075080545837e-08, "loss": 0.2983, "step": 8793 }, { "epoch": 2.936227045075125, "grad_norm": 1.0361068417956574, "learning_rate": 1.3760674050050105e-08, "loss": 0.2806, "step": 8794 }, { "epoch": 2.936560934891486, "grad_norm": 1.0629858927678149, "learning_rate": 1.3617025690904617e-08, "loss": 0.2871, "step": 8795 }, { "epoch": 2.9368948247078466, "grad_norm": 1.0489673585421202, "learning_rate": 1.3474130024793697e-08, "loss": 0.2959, "step": 8796 }, { "epoch": 2.937228714524207, "grad_norm": 1.0260919447554842, "learning_rate": 1.3331987073286757e-08, "loss": 0.2963, "step": 8797 }, { "epoch": 2.9375626043405676, "grad_norm": 1.0134118832856513, "learning_rate": 1.3190596857841075e-08, "loss": 0.2807, "step": 8798 }, { "epoch": 2.937896494156928, "grad_norm": 1.0144012400072417, "learning_rate": 1.3049959399799028e-08, "loss": 0.276, "step": 8799 }, { "epoch": 2.9382303839732886, "grad_norm": 1.086267266170929, "learning_rate": 1.2910074720389742e-08, "loss": 0.2874, "step": 8800 }, { "epoch": 2.9385642737896496, "grad_norm": 1.06912118579758, "learning_rate": 1.2770942840728551e-08, "loss": 0.304, "step": 8801 }, { "epoch": 2.93889816360601, "grad_norm": 1.017648534531197, "learning_rate": 1.2632563781817542e-08, "loss": 0.2789, "step": 8802 }, { "epoch": 2.9392320534223706, "grad_norm": 1.029969384849711, "learning_rate": 1.2494937564545562e-08, "loss": 0.2858, "step": 8803 }, { "epoch": 2.939565943238731, "grad_norm": 1.0191506662444798, "learning_rate": 1.235806420968655e-08, "loss": 0.294, "step": 8804 }, { "epoch": 2.939899833055092, "grad_norm": 1.0261475122695842, "learning_rate": 1.2221943737901754e-08, "loss": 0.2841, "step": 8805 }, { "epoch": 2.9402337228714526, "grad_norm": 1.0440251993513094, "learning_rate": 1.2086576169738074e-08, "loss": 0.2955, "step": 8806 }, { "epoch": 2.940567612687813, "grad_norm": 1.0346695265402357, "learning_rate": 1.1951961525630273e-08, "loss": 0.2844, "step": 8807 }, { "epoch": 2.9409015025041736, "grad_norm": 1.0546673274477139, "learning_rate": 1.1818099825897656e-08, "loss": 0.278, "step": 8808 }, { "epoch": 2.941235392320534, "grad_norm": 1.0252095461648374, "learning_rate": 1.1684991090746278e-08, "loss": 0.2753, "step": 8809 }, { "epoch": 2.9415692821368946, "grad_norm": 1.0431331061167475, "learning_rate": 1.1552635340269513e-08, "loss": 0.288, "step": 8810 }, { "epoch": 2.9419031719532556, "grad_norm": 1.0232907566033713, "learning_rate": 1.142103259444638e-08, "loss": 0.2905, "step": 8811 }, { "epoch": 2.942237061769616, "grad_norm": 0.9990398797151788, "learning_rate": 1.1290182873141542e-08, "loss": 0.2736, "step": 8812 }, { "epoch": 2.9425709515859766, "grad_norm": 1.0174539413335137, "learning_rate": 1.1160086196107534e-08, "loss": 0.2782, "step": 8813 }, { "epoch": 2.942904841402337, "grad_norm": 1.0650625755068606, "learning_rate": 1.1030742582982534e-08, "loss": 0.2918, "step": 8814 }, { "epoch": 2.943238731218698, "grad_norm": 1.035780514028576, "learning_rate": 1.0902152053289817e-08, "loss": 0.2765, "step": 8815 }, { "epoch": 2.9435726210350586, "grad_norm": 1.0537753050522995, "learning_rate": 1.077431462644163e-08, "loss": 0.2825, "step": 8816 }, { "epoch": 2.943906510851419, "grad_norm": 1.0157339028739412, "learning_rate": 1.0647230321733648e-08, "loss": 0.2796, "step": 8817 }, { "epoch": 2.9442404006677796, "grad_norm": 1.0326555500564274, "learning_rate": 1.0520899158349418e-08, "loss": 0.2897, "step": 8818 }, { "epoch": 2.94457429048414, "grad_norm": 1.0291201901914726, "learning_rate": 1.0395321155359239e-08, "loss": 0.2818, "step": 8819 }, { "epoch": 2.9449081803005006, "grad_norm": 1.0307559428778141, "learning_rate": 1.027049633171906e-08, "loss": 0.2766, "step": 8820 }, { "epoch": 2.9452420701168616, "grad_norm": 1.0328610962096307, "learning_rate": 1.0146424706270474e-08, "loss": 0.2942, "step": 8821 }, { "epoch": 2.945575959933222, "grad_norm": 1.0345181107079766, "learning_rate": 1.0023106297742391e-08, "loss": 0.2934, "step": 8822 }, { "epoch": 2.9459098497495826, "grad_norm": 1.0277070871555838, "learning_rate": 9.900541124749919e-09, "loss": 0.2827, "step": 8823 }, { "epoch": 2.946243739565943, "grad_norm": 1.0730887739958272, "learning_rate": 9.778729205793814e-09, "loss": 0.2856, "step": 8824 }, { "epoch": 2.946577629382304, "grad_norm": 1.044380937873468, "learning_rate": 9.65767055926159e-09, "loss": 0.2858, "step": 8825 }, { "epoch": 2.9469115191986646, "grad_norm": 1.0324203085678543, "learning_rate": 9.537365203427517e-09, "loss": 0.2826, "step": 8826 }, { "epoch": 2.947245409015025, "grad_norm": 1.0341101592290547, "learning_rate": 9.417813156450961e-09, "loss": 0.2958, "step": 8827 }, { "epoch": 2.9475792988313856, "grad_norm": 1.0282483971792988, "learning_rate": 9.299014436379151e-09, "loss": 0.2801, "step": 8828 }, { "epoch": 2.947913188647746, "grad_norm": 1.0634512490437773, "learning_rate": 9.180969061143852e-09, "loss": 0.2926, "step": 8829 }, { "epoch": 2.9482470784641066, "grad_norm": 1.0354938369293933, "learning_rate": 9.063677048564145e-09, "loss": 0.287, "step": 8830 }, { "epoch": 2.9485809682804676, "grad_norm": 1.0613933429893798, "learning_rate": 8.947138416345314e-09, "loss": 0.2973, "step": 8831 }, { "epoch": 2.948914858096828, "grad_norm": 1.096041014542482, "learning_rate": 8.831353182079393e-09, "loss": 0.2975, "step": 8832 }, { "epoch": 2.9492487479131886, "grad_norm": 1.0231370484450535, "learning_rate": 8.716321363243518e-09, "loss": 0.2819, "step": 8833 }, { "epoch": 2.949582637729549, "grad_norm": 1.0070826816831768, "learning_rate": 8.602042977201574e-09, "loss": 0.2866, "step": 8834 }, { "epoch": 2.94991652754591, "grad_norm": 1.064372013057939, "learning_rate": 8.488518041204208e-09, "loss": 0.2871, "step": 8835 }, { "epoch": 2.9502504173622706, "grad_norm": 1.0432853946930443, "learning_rate": 8.375746572388266e-09, "loss": 0.2961, "step": 8836 }, { "epoch": 2.950584307178631, "grad_norm": 0.9984081388501276, "learning_rate": 8.263728587775688e-09, "loss": 0.2718, "step": 8837 }, { "epoch": 2.9509181969949916, "grad_norm": 1.0459581831702323, "learning_rate": 8.15246410427628e-09, "loss": 0.2978, "step": 8838 }, { "epoch": 2.951252086811352, "grad_norm": 1.0503965494742453, "learning_rate": 8.041953138684944e-09, "loss": 0.2859, "step": 8839 }, { "epoch": 2.9515859766277126, "grad_norm": 1.0581402311929478, "learning_rate": 7.932195707683888e-09, "loss": 0.2959, "step": 8840 }, { "epoch": 2.9519198664440736, "grad_norm": 1.0438713390138976, "learning_rate": 7.823191827840414e-09, "loss": 0.2896, "step": 8841 }, { "epoch": 2.952253756260434, "grad_norm": 1.0225663400749943, "learning_rate": 7.714941515608587e-09, "loss": 0.2918, "step": 8842 }, { "epoch": 2.9525876460767946, "grad_norm": 1.0846978935268448, "learning_rate": 7.607444787328666e-09, "loss": 0.2947, "step": 8843 }, { "epoch": 2.9529215358931555, "grad_norm": 1.056922712578444, "learning_rate": 7.500701659228226e-09, "loss": 0.3013, "step": 8844 }, { "epoch": 2.953255425709516, "grad_norm": 1.0614908508624035, "learning_rate": 7.394712147418825e-09, "loss": 0.2991, "step": 8845 }, { "epoch": 2.9535893155258766, "grad_norm": 1.0635388814799913, "learning_rate": 7.289476267900442e-09, "loss": 0.2894, "step": 8846 }, { "epoch": 2.953923205342237, "grad_norm": 1.0434242123336481, "learning_rate": 7.184994036558146e-09, "loss": 0.2835, "step": 8847 }, { "epoch": 2.9542570951585976, "grad_norm": 1.0670028342640476, "learning_rate": 7.081265469163212e-09, "loss": 0.295, "step": 8848 }, { "epoch": 2.954590984974958, "grad_norm": 1.092473646581287, "learning_rate": 6.97829058137367e-09, "loss": 0.3093, "step": 8849 }, { "epoch": 2.9549248747913186, "grad_norm": 1.0641969950734236, "learning_rate": 6.8760693887337525e-09, "loss": 0.2999, "step": 8850 }, { "epoch": 2.9552587646076796, "grad_norm": 1.0209351596398692, "learning_rate": 6.774601906673339e-09, "loss": 0.289, "step": 8851 }, { "epoch": 2.95559265442404, "grad_norm": 1.0436792379567823, "learning_rate": 6.673888150509622e-09, "loss": 0.2869, "step": 8852 }, { "epoch": 2.9559265442404006, "grad_norm": 1.04877709069664, "learning_rate": 6.5739281354443295e-09, "loss": 0.3041, "step": 8853 }, { "epoch": 2.9562604340567615, "grad_norm": 1.0375547815252397, "learning_rate": 6.474721876566503e-09, "loss": 0.2863, "step": 8854 }, { "epoch": 2.956594323873122, "grad_norm": 0.9794868789971017, "learning_rate": 6.3762693888524965e-09, "loss": 0.2692, "step": 8855 }, { "epoch": 2.9569282136894826, "grad_norm": 1.0346415407346194, "learning_rate": 6.278570687162089e-09, "loss": 0.2924, "step": 8856 }, { "epoch": 2.957262103505843, "grad_norm": 1.0581193331145042, "learning_rate": 6.181625786244039e-09, "loss": 0.2802, "step": 8857 }, { "epoch": 2.9575959933222036, "grad_norm": 1.0317202442523385, "learning_rate": 6.0854347007316404e-09, "loss": 0.2822, "step": 8858 }, { "epoch": 2.957929883138564, "grad_norm": 1.0291405134249338, "learning_rate": 5.989997445144946e-09, "loss": 0.2813, "step": 8859 }, { "epoch": 2.9582637729549246, "grad_norm": 1.0567144979333043, "learning_rate": 5.895314033890209e-09, "loss": 0.2965, "step": 8860 }, { "epoch": 2.9585976627712856, "grad_norm": 1.0429244975632166, "learning_rate": 5.801384481259886e-09, "loss": 0.2878, "step": 8861 }, { "epoch": 2.958931552587646, "grad_norm": 1.0464139672424533, "learning_rate": 5.7082088014326356e-09, "loss": 0.2895, "step": 8862 }, { "epoch": 2.9592654424040066, "grad_norm": 0.9960063610424555, "learning_rate": 5.615787008473317e-09, "loss": 0.2855, "step": 8863 }, { "epoch": 2.9595993322203675, "grad_norm": 1.0502584743570773, "learning_rate": 5.524119116332993e-09, "loss": 0.2931, "step": 8864 }, { "epoch": 2.959933222036728, "grad_norm": 1.0683401801854029, "learning_rate": 5.433205138848374e-09, "loss": 0.2971, "step": 8865 }, { "epoch": 2.9602671118530886, "grad_norm": 1.0362564856533336, "learning_rate": 5.343045089744036e-09, "loss": 0.2858, "step": 8866 }, { "epoch": 2.960601001669449, "grad_norm": 1.016450419237181, "learning_rate": 5.25363898262854e-09, "loss": 0.2854, "step": 8867 }, { "epoch": 2.9609348914858096, "grad_norm": 1.061153671385597, "learning_rate": 5.164986830998308e-09, "loss": 0.2872, "step": 8868 }, { "epoch": 2.96126878130217, "grad_norm": 1.006717641167584, "learning_rate": 5.077088648235418e-09, "loss": 0.2786, "step": 8869 }, { "epoch": 2.961602671118531, "grad_norm": 1.0530615501113463, "learning_rate": 4.989944447607031e-09, "loss": 0.2951, "step": 8870 }, { "epoch": 2.9619365609348915, "grad_norm": 1.009886007024124, "learning_rate": 4.903554242269293e-09, "loss": 0.2766, "step": 8871 }, { "epoch": 2.962270450751252, "grad_norm": 1.095676597288118, "learning_rate": 4.817918045261216e-09, "loss": 0.3065, "step": 8872 }, { "epoch": 2.9626043405676126, "grad_norm": 1.023911232057721, "learning_rate": 4.733035869510238e-09, "loss": 0.2936, "step": 8873 }, { "epoch": 2.9629382303839735, "grad_norm": 1.0866784681629076, "learning_rate": 4.648907727829444e-09, "loss": 0.2885, "step": 8874 }, { "epoch": 2.963272120200334, "grad_norm": 1.0421354538395597, "learning_rate": 4.565533632917008e-09, "loss": 0.2932, "step": 8875 }, { "epoch": 2.9636060100166945, "grad_norm": 1.051335563736659, "learning_rate": 4.482913597359529e-09, "loss": 0.2958, "step": 8876 }, { "epoch": 2.963939899833055, "grad_norm": 1.0452228425227104, "learning_rate": 4.401047633628141e-09, "loss": 0.2794, "step": 8877 }, { "epoch": 2.9642737896494156, "grad_norm": 1.0464922241847237, "learning_rate": 4.319935754079629e-09, "loss": 0.3008, "step": 8878 }, { "epoch": 2.964607679465776, "grad_norm": 1.0877126280432268, "learning_rate": 4.239577970959196e-09, "loss": 0.302, "step": 8879 }, { "epoch": 2.964941569282137, "grad_norm": 1.0603390091685143, "learning_rate": 4.159974296395475e-09, "loss": 0.3006, "step": 8880 }, { "epoch": 2.9652754590984975, "grad_norm": 1.001937310456153, "learning_rate": 4.081124742404963e-09, "loss": 0.2781, "step": 8881 }, { "epoch": 2.965609348914858, "grad_norm": 1.0164577869295075, "learning_rate": 4.003029320890917e-09, "loss": 0.2893, "step": 8882 }, { "epoch": 2.9659432387312186, "grad_norm": 1.0261838380808739, "learning_rate": 3.925688043640574e-09, "loss": 0.275, "step": 8883 }, { "epoch": 2.9662771285475795, "grad_norm": 1.0554302920267087, "learning_rate": 3.849100922329041e-09, "loss": 0.292, "step": 8884 }, { "epoch": 2.96661101836394, "grad_norm": 1.0745187773445366, "learning_rate": 3.7732679685176245e-09, "loss": 0.2999, "step": 8885 }, { "epoch": 2.9669449081803005, "grad_norm": 1.0244202519119783, "learning_rate": 3.6981891936527236e-09, "loss": 0.2946, "step": 8886 }, { "epoch": 2.967278797996661, "grad_norm": 1.0591255320365123, "learning_rate": 3.6238646090674957e-09, "loss": 0.2942, "step": 8887 }, { "epoch": 2.9676126878130216, "grad_norm": 1.0381000372728109, "learning_rate": 3.5502942259807436e-09, "loss": 0.2876, "step": 8888 }, { "epoch": 2.967946577629382, "grad_norm": 1.049696551364272, "learning_rate": 3.4774780554991394e-09, "loss": 0.2805, "step": 8889 }, { "epoch": 2.968280467445743, "grad_norm": 1.0613172139823681, "learning_rate": 3.4054161086133354e-09, "loss": 0.3016, "step": 8890 }, { "epoch": 2.9686143572621035, "grad_norm": 1.0611506962348434, "learning_rate": 3.334108396201852e-09, "loss": 0.2943, "step": 8891 }, { "epoch": 2.968948247078464, "grad_norm": 1.0372727067802916, "learning_rate": 3.2635549290271907e-09, "loss": 0.2916, "step": 8892 }, { "epoch": 2.9692821368948246, "grad_norm": 1.0354362233442973, "learning_rate": 3.193755717740832e-09, "loss": 0.286, "step": 8893 }, { "epoch": 2.9696160267111855, "grad_norm": 1.0785444229111567, "learning_rate": 3.1247107728776815e-09, "loss": 0.3038, "step": 8894 }, { "epoch": 2.969949916527546, "grad_norm": 1.049854495744381, "learning_rate": 3.056420104861069e-09, "loss": 0.294, "step": 8895 }, { "epoch": 2.9702838063439065, "grad_norm": 1.04638776186452, "learning_rate": 2.988883723998859e-09, "loss": 0.2971, "step": 8896 }, { "epoch": 2.970617696160267, "grad_norm": 1.0059661465674183, "learning_rate": 2.92210164048623e-09, "loss": 0.2896, "step": 8897 }, { "epoch": 2.9709515859766276, "grad_norm": 1.0469748902238367, "learning_rate": 2.8560738644034524e-09, "loss": 0.2848, "step": 8898 }, { "epoch": 2.971285475792988, "grad_norm": 1.0591227786271729, "learning_rate": 2.790800405717553e-09, "loss": 0.2974, "step": 8899 }, { "epoch": 2.971619365609349, "grad_norm": 1.0504180692485496, "learning_rate": 2.7262812742812063e-09, "loss": 0.2938, "step": 8900 }, { "epoch": 2.9719532554257095, "grad_norm": 1.044996161694012, "learning_rate": 2.662516479833843e-09, "loss": 0.2894, "step": 8901 }, { "epoch": 2.97228714524207, "grad_norm": 1.0423599159074004, "learning_rate": 2.5995060320010977e-09, "loss": 0.2853, "step": 8902 }, { "epoch": 2.9726210350584306, "grad_norm": 1.0484356652764273, "learning_rate": 2.5372499402936957e-09, "loss": 0.2942, "step": 8903 }, { "epoch": 2.9729549248747915, "grad_norm": 1.008968219981436, "learning_rate": 2.4757482141096743e-09, "loss": 0.2772, "step": 8904 }, { "epoch": 2.973288814691152, "grad_norm": 1.0536867193180381, "learning_rate": 2.415000862732164e-09, "loss": 0.2918, "step": 8905 }, { "epoch": 2.9736227045075125, "grad_norm": 1.034346272122005, "learning_rate": 2.3550078953316067e-09, "loss": 0.288, "step": 8906 }, { "epoch": 2.973956594323873, "grad_norm": 1.0497311062623587, "learning_rate": 2.295769320963537e-09, "loss": 0.2974, "step": 8907 }, { "epoch": 2.9742904841402336, "grad_norm": 1.020726391307306, "learning_rate": 2.237285148570245e-09, "loss": 0.2933, "step": 8908 }, { "epoch": 2.974624373956594, "grad_norm": 1.048912548934363, "learning_rate": 2.179555386979115e-09, "loss": 0.2864, "step": 8909 }, { "epoch": 2.974958263772955, "grad_norm": 1.0256651443468474, "learning_rate": 2.122580044905953e-09, "loss": 0.2898, "step": 8910 }, { "epoch": 2.9752921535893155, "grad_norm": 1.0160786873538243, "learning_rate": 2.066359130949991e-09, "loss": 0.2876, "step": 8911 }, { "epoch": 2.975626043405676, "grad_norm": 1.0530220336352152, "learning_rate": 2.0108926535977736e-09, "loss": 0.2896, "step": 8912 }, { "epoch": 2.975959933222037, "grad_norm": 1.0209969400971417, "learning_rate": 1.956180621222048e-09, "loss": 0.2869, "step": 8913 }, { "epoch": 2.9762938230383975, "grad_norm": 1.0508989069401526, "learning_rate": 1.9022230420823184e-09, "loss": 0.3099, "step": 8914 }, { "epoch": 2.976627712854758, "grad_norm": 1.0372601306621336, "learning_rate": 1.8490199243226258e-09, "loss": 0.2807, "step": 8915 }, { "epoch": 2.9769616026711185, "grad_norm": 1.0587174030149615, "learning_rate": 1.7965712759743238e-09, "loss": 0.3025, "step": 8916 }, { "epoch": 2.977295492487479, "grad_norm": 1.104409437126018, "learning_rate": 1.7448771049544121e-09, "loss": 0.3073, "step": 8917 }, { "epoch": 2.9776293823038396, "grad_norm": 1.0374899136933606, "learning_rate": 1.693937419066094e-09, "loss": 0.2901, "step": 8918 }, { "epoch": 2.9779632721202, "grad_norm": 1.0370498540783628, "learning_rate": 1.6437522259993288e-09, "loss": 0.2896, "step": 8919 }, { "epoch": 2.978297161936561, "grad_norm": 0.9960184939051908, "learning_rate": 1.5943215333280581e-09, "loss": 0.2685, "step": 8920 }, { "epoch": 2.9786310517529215, "grad_norm": 1.0333470167899674, "learning_rate": 1.5456453485152012e-09, "loss": 0.2938, "step": 8921 }, { "epoch": 2.978964941569282, "grad_norm": 1.0194614726172253, "learning_rate": 1.497723678908214e-09, "loss": 0.2807, "step": 8922 }, { "epoch": 2.979298831385643, "grad_norm": 1.0390457272859974, "learning_rate": 1.4505565317401993e-09, "loss": 0.2863, "step": 8923 }, { "epoch": 2.9796327212020035, "grad_norm": 0.997589757099141, "learning_rate": 1.4041439141315727e-09, "loss": 0.2741, "step": 8924 }, { "epoch": 2.979966611018364, "grad_norm": 1.0367559292052404, "learning_rate": 1.3584858330878414e-09, "loss": 0.2868, "step": 8925 }, { "epoch": 2.9803005008347245, "grad_norm": 1.0615130370933843, "learning_rate": 1.3135822955018252e-09, "loss": 0.3061, "step": 8926 }, { "epoch": 2.980634390651085, "grad_norm": 1.0252484635504244, "learning_rate": 1.2694333081514355e-09, "loss": 0.2849, "step": 8927 }, { "epoch": 2.9809682804674456, "grad_norm": 1.047544390919415, "learning_rate": 1.2260388777002307e-09, "loss": 0.2962, "step": 8928 }, { "epoch": 2.9813021702838065, "grad_norm": 1.0686472695640195, "learning_rate": 1.1833990106996374e-09, "loss": 0.3071, "step": 8929 }, { "epoch": 2.981636060100167, "grad_norm": 1.0424256591344347, "learning_rate": 1.141513713585063e-09, "loss": 0.2911, "step": 8930 }, { "epoch": 2.9819699499165275, "grad_norm": 1.026879459362956, "learning_rate": 1.1003829926797827e-09, "loss": 0.2904, "step": 8931 }, { "epoch": 2.982303839732888, "grad_norm": 1.0663099512504766, "learning_rate": 1.060006854192719e-09, "loss": 0.3024, "step": 8932 }, { "epoch": 2.982637729549249, "grad_norm": 1.0419979629954716, "learning_rate": 1.0203853042184408e-09, "loss": 0.2821, "step": 8933 }, { "epoch": 2.9829716193656095, "grad_norm": 1.0299564483742536, "learning_rate": 9.815183487371649e-10, "loss": 0.2918, "step": 8934 }, { "epoch": 2.98330550918197, "grad_norm": 1.0527611791458389, "learning_rate": 9.434059936164197e-10, "loss": 0.291, "step": 8935 }, { "epoch": 2.9836393989983305, "grad_norm": 1.013478233385601, "learning_rate": 9.060482446088259e-10, "loss": 0.2798, "step": 8936 }, { "epoch": 2.983973288814691, "grad_norm": 1.0265706400729597, "learning_rate": 8.694451073543164e-10, "loss": 0.2805, "step": 8937 }, { "epoch": 2.9843071786310515, "grad_norm": 1.0688128298403519, "learning_rate": 8.33596587377361e-10, "loss": 0.2905, "step": 8938 }, { "epoch": 2.9846410684474125, "grad_norm": 1.028224232905745, "learning_rate": 7.985026900897419e-10, "loss": 0.2779, "step": 8939 }, { "epoch": 2.984974958263773, "grad_norm": 1.0086773179378121, "learning_rate": 7.641634207888882e-10, "loss": 0.2817, "step": 8940 }, { "epoch": 2.9853088480801335, "grad_norm": 1.0023724262471312, "learning_rate": 7.30578784657876e-10, "loss": 0.2801, "step": 8941 }, { "epoch": 2.985642737896494, "grad_norm": 1.0244343047201703, "learning_rate": 6.977487867665389e-10, "loss": 0.2781, "step": 8942 }, { "epoch": 2.985976627712855, "grad_norm": 1.041313565257544, "learning_rate": 6.656734320703573e-10, "loss": 0.2947, "step": 8943 }, { "epoch": 2.9863105175292155, "grad_norm": 1.029107638172874, "learning_rate": 6.343527254115689e-10, "loss": 0.2937, "step": 8944 }, { "epoch": 2.986644407345576, "grad_norm": 1.0505563228632768, "learning_rate": 6.037866715175034e-10, "loss": 0.2955, "step": 8945 }, { "epoch": 2.9869782971619365, "grad_norm": 1.0479675084707685, "learning_rate": 5.739752750022476e-10, "loss": 0.2876, "step": 8946 }, { "epoch": 2.987312186978297, "grad_norm": 1.0905108436760322, "learning_rate": 5.449185403666457e-10, "loss": 0.3008, "step": 8947 }, { "epoch": 2.9876460767946575, "grad_norm": 1.0388864943446634, "learning_rate": 5.166164719955236e-10, "loss": 0.2846, "step": 8948 }, { "epoch": 2.9879799666110185, "grad_norm": 1.0166305334406873, "learning_rate": 4.890690741615744e-10, "loss": 0.2844, "step": 8949 }, { "epoch": 2.988313856427379, "grad_norm": 1.0519240039427074, "learning_rate": 4.622763510231387e-10, "loss": 0.3003, "step": 8950 }, { "epoch": 2.9886477462437395, "grad_norm": 1.017695830597539, "learning_rate": 4.3623830662475884e-10, "loss": 0.2855, "step": 8951 }, { "epoch": 2.9889816360601, "grad_norm": 1.052496651013131, "learning_rate": 4.1095494489662436e-10, "loss": 0.3037, "step": 8952 }, { "epoch": 2.989315525876461, "grad_norm": 1.0439097240451984, "learning_rate": 3.8642626965512685e-10, "loss": 0.2874, "step": 8953 }, { "epoch": 2.9896494156928215, "grad_norm": 1.0397939014727147, "learning_rate": 3.6265228460286017e-10, "loss": 0.2853, "step": 8954 }, { "epoch": 2.989983305509182, "grad_norm": 1.032964006167351, "learning_rate": 3.3963299332917535e-10, "loss": 0.2894, "step": 8955 }, { "epoch": 2.9903171953255425, "grad_norm": 1.040592316817682, "learning_rate": 3.1736839930796014e-10, "loss": 0.297, "step": 8956 }, { "epoch": 2.990651085141903, "grad_norm": 1.0419799606647044, "learning_rate": 2.9585850590041485e-10, "loss": 0.2978, "step": 8957 }, { "epoch": 2.9909849749582635, "grad_norm": 1.045403096899493, "learning_rate": 2.751033163533867e-10, "loss": 0.2835, "step": 8958 }, { "epoch": 2.9913188647746245, "grad_norm": 1.0422916550449886, "learning_rate": 2.5510283379992507e-10, "loss": 0.2953, "step": 8959 }, { "epoch": 2.991652754590985, "grad_norm": 1.0424650692700892, "learning_rate": 2.3585706125928145e-10, "loss": 0.2953, "step": 8960 }, { "epoch": 2.9919866444073455, "grad_norm": 0.9999920755561816, "learning_rate": 2.1736600163635434e-10, "loss": 0.2708, "step": 8961 }, { "epoch": 2.992320534223706, "grad_norm": 1.0656451109689575, "learning_rate": 1.9962965772224452e-10, "loss": 0.2857, "step": 8962 }, { "epoch": 2.992654424040067, "grad_norm": 1.0451008649388698, "learning_rate": 1.8264803219480986e-10, "loss": 0.2929, "step": 8963 }, { "epoch": 2.9929883138564275, "grad_norm": 1.0598052636978834, "learning_rate": 1.6642112761700024e-10, "loss": 0.2983, "step": 8964 }, { "epoch": 2.993322203672788, "grad_norm": 1.0246443172370638, "learning_rate": 1.5094894643796766e-10, "loss": 0.2898, "step": 8965 }, { "epoch": 2.9936560934891485, "grad_norm": 1.0785822608933269, "learning_rate": 1.3623149099362132e-10, "loss": 0.3065, "step": 8966 }, { "epoch": 2.993989983305509, "grad_norm": 1.0460178079787452, "learning_rate": 1.2226876350551752e-10, "loss": 0.2815, "step": 8967 }, { "epoch": 2.9943238731218695, "grad_norm": 1.040214643599977, "learning_rate": 1.090607660814147e-10, "loss": 0.2943, "step": 8968 }, { "epoch": 2.9946577629382305, "grad_norm": 1.0377846711027776, "learning_rate": 9.660750071471826e-11, "loss": 0.2822, "step": 8969 }, { "epoch": 2.994991652754591, "grad_norm": 1.070298256870568, "learning_rate": 8.490896928559089e-11, "loss": 0.2998, "step": 8970 }, { "epoch": 2.9953255425709515, "grad_norm": 1.0438247210557432, "learning_rate": 7.396517355984234e-11, "loss": 0.2866, "step": 8971 }, { "epoch": 2.995659432387312, "grad_norm": 1.0445544975500198, "learning_rate": 6.377611518948446e-11, "loss": 0.2935, "step": 8972 }, { "epoch": 2.995993322203673, "grad_norm": 1.013553135179039, "learning_rate": 5.434179571217613e-11, "loss": 0.2788, "step": 8973 }, { "epoch": 2.9963272120200335, "grad_norm": 1.0667038991691054, "learning_rate": 4.566221655233349e-11, "loss": 0.3015, "step": 8974 }, { "epoch": 2.996661101836394, "grad_norm": 1.0230840332048852, "learning_rate": 3.773737902057484e-11, "loss": 0.2727, "step": 8975 }, { "epoch": 2.9969949916527545, "grad_norm": 1.0415353518803463, "learning_rate": 3.0567284312055243e-11, "loss": 0.2875, "step": 8976 }, { "epoch": 2.997328881469115, "grad_norm": 0.9971612078863324, "learning_rate": 2.4151933510352388e-11, "loss": 0.2781, "step": 8977 }, { "epoch": 2.9976627712854755, "grad_norm": 1.0513629127626376, "learning_rate": 1.849132758302563e-11, "loss": 0.2853, "step": 8978 }, { "epoch": 2.9979966611018365, "grad_norm": 1.011833564418466, "learning_rate": 1.3585467384391592e-11, "loss": 0.2883, "step": 8979 }, { "epoch": 2.998330550918197, "grad_norm": 2.668166480829407, "learning_rate": 9.434353656079253e-12, "loss": 0.2952, "step": 8980 }, { "epoch": 2.9986644407345575, "grad_norm": 1.0353822110142428, "learning_rate": 6.037987023144176e-12, "loss": 0.2833, "step": 8981 }, { "epoch": 2.9989983305509185, "grad_norm": 1.055114774255733, "learning_rate": 3.396367999619621e-12, "loss": 0.286, "step": 8982 }, { "epoch": 2.999332220367279, "grad_norm": 1.0632476476477566, "learning_rate": 1.509496983520542e-12, "loss": 0.2947, "step": 8983 }, { "epoch": 2.9996661101836395, "grad_norm": 1.0530331520806548, "learning_rate": 3.7737426017425693e-13, "loss": 0.2867, "step": 8984 }, { "epoch": 3.0, "grad_norm": 1.0246903863850174, "learning_rate": 0.0, "loss": 0.2858, "step": 8985 }, { "epoch": 3.0, "eval_loss": 0.37112197279930115, "eval_runtime": 822.1932, "eval_samples_per_second": 24.542, "eval_steps_per_second": 0.767, "step": 8985 }, { "epoch": 3.0, "step": 8985, "total_flos": 3.068536882104107e+18, "train_loss": 0.5731845734878321, "train_runtime": 174288.5534, "train_samples_per_second": 6.598, "train_steps_per_second": 0.052 } ], "logging_steps": 1, "max_steps": 8985, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.068536882104107e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }