{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998560778331078, "eval_steps": 500, "global_step": 2171, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00046055093405486313, "grad_norm": 21.45334747189511, "learning_rate": 3.0303030303030305e-08, "loss": 1.3256, "step": 1 }, { "epoch": 0.0009211018681097263, "grad_norm": 19.580095776154405, "learning_rate": 6.060606060606061e-08, "loss": 1.2555, "step": 2 }, { "epoch": 0.0013816528021645895, "grad_norm": 20.88684668345603, "learning_rate": 9.09090909090909e-08, "loss": 1.2588, "step": 3 }, { "epoch": 0.0018422037362194525, "grad_norm": 19.685319022864512, "learning_rate": 1.2121212121212122e-07, "loss": 1.2589, "step": 4 }, { "epoch": 0.002302754670274316, "grad_norm": 23.030634143530854, "learning_rate": 1.5151515151515152e-07, "loss": 1.2032, "step": 5 }, { "epoch": 0.002763305604329179, "grad_norm": 22.575725827527073, "learning_rate": 1.818181818181818e-07, "loss": 1.1612, "step": 6 }, { "epoch": 0.003223856538384042, "grad_norm": 21.264901994735776, "learning_rate": 2.121212121212121e-07, "loss": 1.2157, "step": 7 }, { "epoch": 0.003684407472438905, "grad_norm": 19.071512870789324, "learning_rate": 2.4242424242424244e-07, "loss": 1.2748, "step": 8 }, { "epoch": 0.004144958406493768, "grad_norm": 20.368951163158684, "learning_rate": 2.727272727272727e-07, "loss": 1.2934, "step": 9 }, { "epoch": 0.004605509340548632, "grad_norm": 19.127898017786986, "learning_rate": 3.0303030303030305e-07, "loss": 1.4955, "step": 10 }, { "epoch": 0.005066060274603494, "grad_norm": 19.90155995072908, "learning_rate": 3.333333333333333e-07, "loss": 1.3619, "step": 11 }, { "epoch": 0.005526611208658358, "grad_norm": 18.99510926121404, "learning_rate": 3.636363636363636e-07, "loss": 1.4517, "step": 12 }, { "epoch": 0.0059871621427132204, "grad_norm": 18.213673184141232, "learning_rate": 3.939393939393939e-07, "loss": 1.47, "step": 13 }, { "epoch": 0.006447713076768084, "grad_norm": 20.18031895758793, "learning_rate": 4.242424242424242e-07, "loss": 1.3055, "step": 14 }, { "epoch": 0.006908264010822947, "grad_norm": 21.02552691036899, "learning_rate": 4.545454545454545e-07, "loss": 1.252, "step": 15 }, { "epoch": 0.00736881494487781, "grad_norm": 18.561221405147762, "learning_rate": 4.848484848484849e-07, "loss": 1.2293, "step": 16 }, { "epoch": 0.007829365878932673, "grad_norm": 15.730570274397426, "learning_rate": 5.151515151515151e-07, "loss": 1.4934, "step": 17 }, { "epoch": 0.008289916812987536, "grad_norm": 16.33418879128859, "learning_rate": 5.454545454545454e-07, "loss": 1.4092, "step": 18 }, { "epoch": 0.0087504677470424, "grad_norm": 18.648783705573848, "learning_rate": 5.757575757575758e-07, "loss": 1.1603, "step": 19 }, { "epoch": 0.009211018681097263, "grad_norm": 16.5450620829572, "learning_rate": 6.060606060606061e-07, "loss": 1.258, "step": 20 }, { "epoch": 0.009671569615152125, "grad_norm": 15.115835661716348, "learning_rate": 6.363636363636363e-07, "loss": 1.4325, "step": 21 }, { "epoch": 0.010132120549206989, "grad_norm": 19.5184665716698, "learning_rate": 6.666666666666666e-07, "loss": 1.1265, "step": 22 }, { "epoch": 0.010592671483261852, "grad_norm": 16.056804590673597, "learning_rate": 6.96969696969697e-07, "loss": 1.4436, "step": 23 }, { "epoch": 0.011053222417316716, "grad_norm": 13.975716180751283, "learning_rate": 7.272727272727272e-07, "loss": 1.1745, "step": 24 }, { "epoch": 0.011513773351371579, "grad_norm": 13.327454910243086, "learning_rate": 7.575757575757575e-07, "loss": 1.2678, "step": 25 }, { "epoch": 0.011974324285426441, "grad_norm": 11.83190176620112, "learning_rate": 7.878787878787878e-07, "loss": 1.2034, "step": 26 }, { "epoch": 0.012434875219481304, "grad_norm": 11.458888081171626, "learning_rate": 8.181818181818182e-07, "loss": 1.2103, "step": 27 }, { "epoch": 0.012895426153536168, "grad_norm": 11.919507011224344, "learning_rate": 8.484848484848484e-07, "loss": 1.2206, "step": 28 }, { "epoch": 0.013355977087591031, "grad_norm": 10.938850998307037, "learning_rate": 8.787878787878787e-07, "loss": 1.0822, "step": 29 }, { "epoch": 0.013816528021645893, "grad_norm": 10.569725956458699, "learning_rate": 9.09090909090909e-07, "loss": 1.1344, "step": 30 }, { "epoch": 0.014277078955700757, "grad_norm": 8.729707563991274, "learning_rate": 9.393939393939395e-07, "loss": 1.2329, "step": 31 }, { "epoch": 0.01473762988975562, "grad_norm": 9.67339257380593, "learning_rate": 9.696969696969698e-07, "loss": 1.3485, "step": 32 }, { "epoch": 0.015198180823810484, "grad_norm": 8.883133586162915, "learning_rate": 1e-06, "loss": 1.2696, "step": 33 }, { "epoch": 0.015658731757865346, "grad_norm": 7.54855226917559, "learning_rate": 1.0303030303030302e-06, "loss": 1.1104, "step": 34 }, { "epoch": 0.01611928269192021, "grad_norm": 6.585721210653854, "learning_rate": 1.0606060606060606e-06, "loss": 0.8758, "step": 35 }, { "epoch": 0.016579833625975073, "grad_norm": 6.947988444942179, "learning_rate": 1.0909090909090908e-06, "loss": 1.0534, "step": 36 }, { "epoch": 0.017040384560029934, "grad_norm": 7.151187512323246, "learning_rate": 1.121212121212121e-06, "loss": 1.0542, "step": 37 }, { "epoch": 0.0175009354940848, "grad_norm": 8.191403913341114, "learning_rate": 1.1515151515151516e-06, "loss": 0.9007, "step": 38 }, { "epoch": 0.01796148642813966, "grad_norm": 6.127478182672473, "learning_rate": 1.1818181818181818e-06, "loss": 1.0101, "step": 39 }, { "epoch": 0.018422037362194527, "grad_norm": 5.974155924894252, "learning_rate": 1.2121212121212122e-06, "loss": 1.1028, "step": 40 }, { "epoch": 0.01888258829624939, "grad_norm": 5.743454831875933, "learning_rate": 1.2424242424242424e-06, "loss": 1.0467, "step": 41 }, { "epoch": 0.01934313923030425, "grad_norm": 6.5041582565842235, "learning_rate": 1.2727272727272726e-06, "loss": 1.159, "step": 42 }, { "epoch": 0.019803690164359115, "grad_norm": 6.823854474650457, "learning_rate": 1.303030303030303e-06, "loss": 1.1179, "step": 43 }, { "epoch": 0.020264241098413977, "grad_norm": 5.70954928742195, "learning_rate": 1.3333333333333332e-06, "loss": 1.1165, "step": 44 }, { "epoch": 0.020724792032468842, "grad_norm": 5.36763933087953, "learning_rate": 1.3636363636363634e-06, "loss": 1.0304, "step": 45 }, { "epoch": 0.021185342966523704, "grad_norm": 5.598106639215977, "learning_rate": 1.393939393939394e-06, "loss": 1.1187, "step": 46 }, { "epoch": 0.021645893900578566, "grad_norm": 5.60323582573283, "learning_rate": 1.4242424242424242e-06, "loss": 1.1855, "step": 47 }, { "epoch": 0.02210644483463343, "grad_norm": 6.452315864601203, "learning_rate": 1.4545454545454544e-06, "loss": 1.0131, "step": 48 }, { "epoch": 0.022566995768688293, "grad_norm": 4.735978691644224, "learning_rate": 1.4848484848484848e-06, "loss": 0.82, "step": 49 }, { "epoch": 0.023027546702743158, "grad_norm": 5.319830703344703, "learning_rate": 1.515151515151515e-06, "loss": 0.84, "step": 50 }, { "epoch": 0.02348809763679802, "grad_norm": 5.616798643689776, "learning_rate": 1.5454545454545454e-06, "loss": 1.2087, "step": 51 }, { "epoch": 0.023948648570852882, "grad_norm": 4.908913076585616, "learning_rate": 1.5757575757575756e-06, "loss": 0.9678, "step": 52 }, { "epoch": 0.024409199504907747, "grad_norm": 4.791467507278751, "learning_rate": 1.6060606060606058e-06, "loss": 1.0368, "step": 53 }, { "epoch": 0.02486975043896261, "grad_norm": 4.889868911111545, "learning_rate": 1.6363636363636365e-06, "loss": 0.8456, "step": 54 }, { "epoch": 0.02533030137301747, "grad_norm": 4.910202096186145, "learning_rate": 1.6666666666666667e-06, "loss": 0.8929, "step": 55 }, { "epoch": 0.025790852307072336, "grad_norm": 4.700122762945805, "learning_rate": 1.6969696969696969e-06, "loss": 0.955, "step": 56 }, { "epoch": 0.026251403241127198, "grad_norm": 5.476381138627308, "learning_rate": 1.7272727272727273e-06, "loss": 0.9445, "step": 57 }, { "epoch": 0.026711954175182063, "grad_norm": 4.6083667643642325, "learning_rate": 1.7575757575757575e-06, "loss": 0.8648, "step": 58 }, { "epoch": 0.027172505109236925, "grad_norm": 4.250698308262483, "learning_rate": 1.7878787878787877e-06, "loss": 0.9157, "step": 59 }, { "epoch": 0.027633056043291786, "grad_norm": 4.999976810678289, "learning_rate": 1.818181818181818e-06, "loss": 1.1341, "step": 60 }, { "epoch": 0.02809360697734665, "grad_norm": 4.522497229279195, "learning_rate": 1.8484848484848483e-06, "loss": 1.117, "step": 61 }, { "epoch": 0.028554157911401513, "grad_norm": 5.1279215517265815, "learning_rate": 1.878787878787879e-06, "loss": 1.0545, "step": 62 }, { "epoch": 0.02901470884545638, "grad_norm": 4.943951560385474, "learning_rate": 1.909090909090909e-06, "loss": 0.9157, "step": 63 }, { "epoch": 0.02947525977951124, "grad_norm": 5.690484639927253, "learning_rate": 1.9393939393939395e-06, "loss": 1.0542, "step": 64 }, { "epoch": 0.029935810713566102, "grad_norm": 4.814150383295266, "learning_rate": 1.9696969696969695e-06, "loss": 1.0494, "step": 65 }, { "epoch": 0.030396361647620967, "grad_norm": 4.57899294439187, "learning_rate": 2e-06, "loss": 0.8373, "step": 66 }, { "epoch": 0.03085691258167583, "grad_norm": 4.435895901557883, "learning_rate": 1.9999988863070544e-06, "loss": 0.8165, "step": 67 }, { "epoch": 0.03131746351573069, "grad_norm": 4.598477090733282, "learning_rate": 1.999995545230698e-06, "loss": 1.0677, "step": 68 }, { "epoch": 0.031778014449785556, "grad_norm": 5.7135916003192175, "learning_rate": 1.9999899767783724e-06, "loss": 1.0097, "step": 69 }, { "epoch": 0.03223856538384042, "grad_norm": 5.16421078717936, "learning_rate": 1.999982180962482e-06, "loss": 0.9817, "step": 70 }, { "epoch": 0.03269911631789528, "grad_norm": 4.637231786603895, "learning_rate": 1.999972157800389e-06, "loss": 0.8308, "step": 71 }, { "epoch": 0.033159667251950145, "grad_norm": 4.59288209573882, "learning_rate": 1.999959907314421e-06, "loss": 0.8153, "step": 72 }, { "epoch": 0.03362021818600501, "grad_norm": 4.934552067939579, "learning_rate": 1.999945429531863e-06, "loss": 1.0857, "step": 73 }, { "epoch": 0.03408076912005987, "grad_norm": 4.4408317102077515, "learning_rate": 1.9999287244849633e-06, "loss": 0.9306, "step": 74 }, { "epoch": 0.034541320054114734, "grad_norm": 5.003959006589021, "learning_rate": 1.9999097922109303e-06, "loss": 1.0462, "step": 75 }, { "epoch": 0.0350018709881696, "grad_norm": 4.846960224876457, "learning_rate": 1.9998886327519336e-06, "loss": 0.9484, "step": 76 }, { "epoch": 0.035462421922224464, "grad_norm": 4.470757808126205, "learning_rate": 1.999865246155103e-06, "loss": 0.9723, "step": 77 }, { "epoch": 0.03592297285627932, "grad_norm": 4.207595287427176, "learning_rate": 1.9998396324725305e-06, "loss": 0.789, "step": 78 }, { "epoch": 0.03638352379033419, "grad_norm": 4.809676197913923, "learning_rate": 1.999811791761267e-06, "loss": 0.9338, "step": 79 }, { "epoch": 0.03684407472438905, "grad_norm": 4.652700872860048, "learning_rate": 1.999781724083324e-06, "loss": 0.9216, "step": 80 }, { "epoch": 0.03730462565844391, "grad_norm": 4.596251122838455, "learning_rate": 1.9997494295056746e-06, "loss": 0.9111, "step": 81 }, { "epoch": 0.03776517659249878, "grad_norm": 4.227792496891715, "learning_rate": 1.9997149081002514e-06, "loss": 0.8401, "step": 82 }, { "epoch": 0.03822572752655364, "grad_norm": 4.178649520550145, "learning_rate": 1.9996781599439464e-06, "loss": 0.9825, "step": 83 }, { "epoch": 0.0386862784606085, "grad_norm": 4.251021827723842, "learning_rate": 1.9996391851186118e-06, "loss": 0.9281, "step": 84 }, { "epoch": 0.039146829394663366, "grad_norm": 5.233035685458836, "learning_rate": 1.99959798371106e-06, "loss": 0.794, "step": 85 }, { "epoch": 0.03960738032871823, "grad_norm": 4.320798353650191, "learning_rate": 1.999554555813062e-06, "loss": 0.9005, "step": 86 }, { "epoch": 0.04006793126277309, "grad_norm": 4.427037207999404, "learning_rate": 1.9995089015213493e-06, "loss": 1.0936, "step": 87 }, { "epoch": 0.040528482196827954, "grad_norm": 4.331964356078285, "learning_rate": 1.999461020937611e-06, "loss": 0.7822, "step": 88 }, { "epoch": 0.04098903313088282, "grad_norm": 4.66453184904006, "learning_rate": 1.999410914168495e-06, "loss": 1.0616, "step": 89 }, { "epoch": 0.041449584064937685, "grad_norm": 4.266070028677423, "learning_rate": 1.99935858132561e-06, "loss": 0.8662, "step": 90 }, { "epoch": 0.04191013499899254, "grad_norm": 4.84120877032618, "learning_rate": 1.99930402252552e-06, "loss": 0.8339, "step": 91 }, { "epoch": 0.04237068593304741, "grad_norm": 4.144895073097025, "learning_rate": 1.9992472378897497e-06, "loss": 0.8032, "step": 92 }, { "epoch": 0.042831236867102274, "grad_norm": 4.51841620791687, "learning_rate": 1.9991882275447794e-06, "loss": 0.822, "step": 93 }, { "epoch": 0.04329178780115713, "grad_norm": 4.224157916827472, "learning_rate": 1.9991269916220485e-06, "loss": 0.9154, "step": 94 }, { "epoch": 0.043752338735212, "grad_norm": 4.211120105950228, "learning_rate": 1.999063530257952e-06, "loss": 0.8124, "step": 95 }, { "epoch": 0.04421288966926686, "grad_norm": 4.326625423970908, "learning_rate": 1.998997843593845e-06, "loss": 0.7527, "step": 96 }, { "epoch": 0.04467344060332172, "grad_norm": 4.5973326946216835, "learning_rate": 1.9989299317760345e-06, "loss": 0.8709, "step": 97 }, { "epoch": 0.045133991537376586, "grad_norm": 4.930344036646002, "learning_rate": 1.9988597949557883e-06, "loss": 0.9289, "step": 98 }, { "epoch": 0.04559454247143145, "grad_norm": 4.574340272138858, "learning_rate": 1.998787433289327e-06, "loss": 0.8849, "step": 99 }, { "epoch": 0.046055093405486316, "grad_norm": 4.144628028425758, "learning_rate": 1.9987128469378284e-06, "loss": 0.7667, "step": 100 }, { "epoch": 0.046515644339541175, "grad_norm": 3.7709818624172584, "learning_rate": 1.998636036067425e-06, "loss": 0.789, "step": 101 }, { "epoch": 0.04697619527359604, "grad_norm": 4.045004894525086, "learning_rate": 1.9985570008492044e-06, "loss": 0.7691, "step": 102 }, { "epoch": 0.047436746207650905, "grad_norm": 4.06166254171897, "learning_rate": 1.9984757414592083e-06, "loss": 1.0352, "step": 103 }, { "epoch": 0.047897297141705764, "grad_norm": 4.0608430521591306, "learning_rate": 1.998392258078433e-06, "loss": 0.8233, "step": 104 }, { "epoch": 0.04835784807576063, "grad_norm": 5.823247042958998, "learning_rate": 1.998306550892828e-06, "loss": 0.813, "step": 105 }, { "epoch": 0.048818399009815494, "grad_norm": 4.696281685694475, "learning_rate": 1.9982186200932964e-06, "loss": 0.8591, "step": 106 }, { "epoch": 0.04927894994387035, "grad_norm": 4.437101080524055, "learning_rate": 1.998128465875694e-06, "loss": 0.8358, "step": 107 }, { "epoch": 0.04973950087792522, "grad_norm": 4.076497245294541, "learning_rate": 1.9980360884408288e-06, "loss": 0.8754, "step": 108 }, { "epoch": 0.05020005181198008, "grad_norm": 4.105676237033945, "learning_rate": 1.997941487994461e-06, "loss": 0.8985, "step": 109 }, { "epoch": 0.05066060274603494, "grad_norm": 4.571891004448879, "learning_rate": 1.9978446647473024e-06, "loss": 0.9675, "step": 110 }, { "epoch": 0.051121153680089806, "grad_norm": 4.610424652588345, "learning_rate": 1.9977456189150163e-06, "loss": 1.02, "step": 111 }, { "epoch": 0.05158170461414467, "grad_norm": 4.28574088213822, "learning_rate": 1.9976443507182152e-06, "loss": 0.8326, "step": 112 }, { "epoch": 0.05204225554819954, "grad_norm": 4.396979919367839, "learning_rate": 1.997540860382463e-06, "loss": 1.0093, "step": 113 }, { "epoch": 0.052502806482254395, "grad_norm": 3.9647977291434344, "learning_rate": 1.997435148138272e-06, "loss": 0.7961, "step": 114 }, { "epoch": 0.05296335741630926, "grad_norm": 4.236292798120858, "learning_rate": 1.9973272142211046e-06, "loss": 0.8513, "step": 115 }, { "epoch": 0.053423908350364126, "grad_norm": 4.517811052464861, "learning_rate": 1.997217058871371e-06, "loss": 0.7646, "step": 116 }, { "epoch": 0.053884459284418984, "grad_norm": 4.543710006518788, "learning_rate": 1.9971046823344304e-06, "loss": 0.7397, "step": 117 }, { "epoch": 0.05434501021847385, "grad_norm": 4.941222980729467, "learning_rate": 1.9969900848605877e-06, "loss": 0.9873, "step": 118 }, { "epoch": 0.054805561152528715, "grad_norm": 4.345465168929934, "learning_rate": 1.9968732667050966e-06, "loss": 0.8196, "step": 119 }, { "epoch": 0.05526611208658357, "grad_norm": 4.396955990813849, "learning_rate": 1.9967542281281557e-06, "loss": 0.9636, "step": 120 }, { "epoch": 0.05572666302063844, "grad_norm": 4.186347539540102, "learning_rate": 1.9966329693949093e-06, "loss": 0.9839, "step": 121 }, { "epoch": 0.0561872139546933, "grad_norm": 4.672883164820037, "learning_rate": 1.996509490775449e-06, "loss": 0.863, "step": 122 }, { "epoch": 0.05664776488874816, "grad_norm": 4.045454181835613, "learning_rate": 1.996383792544808e-06, "loss": 0.8478, "step": 123 }, { "epoch": 0.05710831582280303, "grad_norm": 4.245826525450453, "learning_rate": 1.996255874982965e-06, "loss": 0.795, "step": 124 }, { "epoch": 0.05756886675685789, "grad_norm": 4.063360737403201, "learning_rate": 1.996125738374842e-06, "loss": 0.7381, "step": 125 }, { "epoch": 0.05802941769091276, "grad_norm": 4.6001202563009125, "learning_rate": 1.995993383010303e-06, "loss": 0.8197, "step": 126 }, { "epoch": 0.058489968624967616, "grad_norm": 4.3572960956134645, "learning_rate": 1.9958588091841553e-06, "loss": 0.885, "step": 127 }, { "epoch": 0.05895051955902248, "grad_norm": 4.529582513351954, "learning_rate": 1.9957220171961465e-06, "loss": 0.8511, "step": 128 }, { "epoch": 0.059411070493077346, "grad_norm": 4.639837798664132, "learning_rate": 1.995583007350964e-06, "loss": 0.9577, "step": 129 }, { "epoch": 0.059871621427132204, "grad_norm": 4.434619114955021, "learning_rate": 1.9954417799582382e-06, "loss": 0.7616, "step": 130 }, { "epoch": 0.06033217236118707, "grad_norm": 4.915749757513064, "learning_rate": 1.9952983353325356e-06, "loss": 0.7644, "step": 131 }, { "epoch": 0.060792723295241935, "grad_norm": 4.661010308603537, "learning_rate": 1.9951526737933634e-06, "loss": 0.7593, "step": 132 }, { "epoch": 0.06125327422929679, "grad_norm": 4.448121597336482, "learning_rate": 1.9950047956651657e-06, "loss": 0.7689, "step": 133 }, { "epoch": 0.06171382516335166, "grad_norm": 4.522674747885371, "learning_rate": 1.9948547012773246e-06, "loss": 0.8117, "step": 134 }, { "epoch": 0.062174376097406524, "grad_norm": 4.406397713315307, "learning_rate": 1.9947023909641574e-06, "loss": 0.9077, "step": 135 }, { "epoch": 0.06263492703146138, "grad_norm": 5.124137042580522, "learning_rate": 1.994547865064919e-06, "loss": 1.0187, "step": 136 }, { "epoch": 0.06309547796551625, "grad_norm": 4.201946675442459, "learning_rate": 1.9943911239237974e-06, "loss": 0.7254, "step": 137 }, { "epoch": 0.06355602889957111, "grad_norm": 4.275188257218188, "learning_rate": 1.9942321678899163e-06, "loss": 0.7436, "step": 138 }, { "epoch": 0.06401657983362598, "grad_norm": 4.381900439416534, "learning_rate": 1.9940709973173314e-06, "loss": 0.7357, "step": 139 }, { "epoch": 0.06447713076768084, "grad_norm": 3.9780787969876203, "learning_rate": 1.993907612565032e-06, "loss": 0.782, "step": 140 }, { "epoch": 0.0649376817017357, "grad_norm": 4.490574866486918, "learning_rate": 1.9937420139969395e-06, "loss": 0.9729, "step": 141 }, { "epoch": 0.06539823263579056, "grad_norm": 4.347751558881718, "learning_rate": 1.993574201981905e-06, "loss": 0.8677, "step": 142 }, { "epoch": 0.06585878356984542, "grad_norm": 4.242105216397751, "learning_rate": 1.9934041768937114e-06, "loss": 0.8068, "step": 143 }, { "epoch": 0.06631933450390029, "grad_norm": 5.06767925907119, "learning_rate": 1.9932319391110695e-06, "loss": 0.9671, "step": 144 }, { "epoch": 0.06677988543795516, "grad_norm": 4.72331586804004, "learning_rate": 1.99305748901762e-06, "loss": 0.9432, "step": 145 }, { "epoch": 0.06724043637201002, "grad_norm": 4.0870518776790465, "learning_rate": 1.9928808270019296e-06, "loss": 0.7749, "step": 146 }, { "epoch": 0.06770098730606489, "grad_norm": 4.3908449538960825, "learning_rate": 1.9927019534574937e-06, "loss": 0.9355, "step": 147 }, { "epoch": 0.06816153824011974, "grad_norm": 4.0013692855754135, "learning_rate": 1.992520868782732e-06, "loss": 0.7393, "step": 148 }, { "epoch": 0.0686220891741746, "grad_norm": 4.098350803994796, "learning_rate": 1.9923375733809905e-06, "loss": 0.9536, "step": 149 }, { "epoch": 0.06908264010822947, "grad_norm": 4.769415905979551, "learning_rate": 1.992152067660539e-06, "loss": 0.7989, "step": 150 }, { "epoch": 0.06954319104228433, "grad_norm": 3.920398264277416, "learning_rate": 1.9919643520345695e-06, "loss": 0.7356, "step": 151 }, { "epoch": 0.0700037419763392, "grad_norm": 4.708310457059996, "learning_rate": 1.991774426921198e-06, "loss": 0.8233, "step": 152 }, { "epoch": 0.07046429291039406, "grad_norm": 4.44906528530574, "learning_rate": 1.99158229274346e-06, "loss": 0.8449, "step": 153 }, { "epoch": 0.07092484384444893, "grad_norm": 4.745426516683283, "learning_rate": 1.9913879499293136e-06, "loss": 0.8554, "step": 154 }, { "epoch": 0.07138539477850378, "grad_norm": 4.786550860807339, "learning_rate": 1.9911913989116345e-06, "loss": 0.8547, "step": 155 }, { "epoch": 0.07184594571255865, "grad_norm": 3.698772513259385, "learning_rate": 1.990992640128218e-06, "loss": 0.7386, "step": 156 }, { "epoch": 0.07230649664661351, "grad_norm": 4.030793701902804, "learning_rate": 1.990791674021776e-06, "loss": 0.7488, "step": 157 }, { "epoch": 0.07276704758066838, "grad_norm": 3.7563936251632826, "learning_rate": 1.9905885010399386e-06, "loss": 0.7332, "step": 158 }, { "epoch": 0.07322759851472324, "grad_norm": 4.250874895215243, "learning_rate": 1.9903831216352494e-06, "loss": 0.7266, "step": 159 }, { "epoch": 0.0736881494487781, "grad_norm": 4.522701632866992, "learning_rate": 1.9901755362651685e-06, "loss": 0.8244, "step": 160 }, { "epoch": 0.07414870038283296, "grad_norm": 5.177419644782726, "learning_rate": 1.9899657453920676e-06, "loss": 0.9211, "step": 161 }, { "epoch": 0.07460925131688782, "grad_norm": 4.255873603369332, "learning_rate": 1.989753749483233e-06, "loss": 0.8841, "step": 162 }, { "epoch": 0.07506980225094269, "grad_norm": 4.915340250065574, "learning_rate": 1.989539549010861e-06, "loss": 0.8094, "step": 163 }, { "epoch": 0.07553035318499755, "grad_norm": 4.357946034809001, "learning_rate": 1.9893231444520584e-06, "loss": 0.9189, "step": 164 }, { "epoch": 0.07599090411905242, "grad_norm": 4.659198788694097, "learning_rate": 1.9891045362888413e-06, "loss": 0.9028, "step": 165 }, { "epoch": 0.07645145505310728, "grad_norm": 4.574986004183622, "learning_rate": 1.988883725008136e-06, "loss": 1.0211, "step": 166 }, { "epoch": 0.07691200598716215, "grad_norm": 4.4643266271689255, "learning_rate": 1.9886607111017727e-06, "loss": 0.7172, "step": 167 }, { "epoch": 0.077372556921217, "grad_norm": 5.058586608040258, "learning_rate": 1.988435495066491e-06, "loss": 0.8611, "step": 168 }, { "epoch": 0.07783310785527187, "grad_norm": 3.748500471393698, "learning_rate": 1.988208077403932e-06, "loss": 0.6844, "step": 169 }, { "epoch": 0.07829365878932673, "grad_norm": 4.3748832968972655, "learning_rate": 1.9879784586206446e-06, "loss": 0.7836, "step": 170 }, { "epoch": 0.0787542097233816, "grad_norm": 4.172920564010643, "learning_rate": 1.987746639228077e-06, "loss": 0.5776, "step": 171 }, { "epoch": 0.07921476065743646, "grad_norm": 4.159267576934922, "learning_rate": 1.9875126197425812e-06, "loss": 0.7184, "step": 172 }, { "epoch": 0.07967531159149133, "grad_norm": 4.505689230335674, "learning_rate": 1.987276400685409e-06, "loss": 0.6918, "step": 173 }, { "epoch": 0.08013586252554618, "grad_norm": 4.519012737295589, "learning_rate": 1.9870379825827105e-06, "loss": 0.9395, "step": 174 }, { "epoch": 0.08059641345960104, "grad_norm": 4.1695351389198425, "learning_rate": 1.9867973659655357e-06, "loss": 0.7781, "step": 175 }, { "epoch": 0.08105696439365591, "grad_norm": 4.15627139998625, "learning_rate": 1.9865545513698304e-06, "loss": 1.0332, "step": 176 }, { "epoch": 0.08151751532771077, "grad_norm": 3.8253711076749712, "learning_rate": 1.9863095393364363e-06, "loss": 0.7669, "step": 177 }, { "epoch": 0.08197806626176564, "grad_norm": 4.7487429261996805, "learning_rate": 1.9860623304110895e-06, "loss": 1.1474, "step": 178 }, { "epoch": 0.0824386171958205, "grad_norm": 4.3003353650615965, "learning_rate": 1.9858129251444203e-06, "loss": 0.7946, "step": 179 }, { "epoch": 0.08289916812987537, "grad_norm": 4.065928742500816, "learning_rate": 1.9855613240919496e-06, "loss": 0.762, "step": 180 }, { "epoch": 0.08335971906393022, "grad_norm": 4.649889255988966, "learning_rate": 1.985307527814091e-06, "loss": 0.9952, "step": 181 }, { "epoch": 0.08382026999798509, "grad_norm": 4.746336859002219, "learning_rate": 1.9850515368761465e-06, "loss": 0.9683, "step": 182 }, { "epoch": 0.08428082093203995, "grad_norm": 4.821335252219909, "learning_rate": 1.9847933518483066e-06, "loss": 0.7904, "step": 183 }, { "epoch": 0.08474137186609482, "grad_norm": 4.3850129247838865, "learning_rate": 1.9845329733056488e-06, "loss": 0.8574, "step": 184 }, { "epoch": 0.08520192280014968, "grad_norm": 4.216639149080323, "learning_rate": 1.9842704018281364e-06, "loss": 0.8613, "step": 185 }, { "epoch": 0.08566247373420455, "grad_norm": 4.170385902014758, "learning_rate": 1.984005638000618e-06, "loss": 0.7891, "step": 186 }, { "epoch": 0.0861230246682594, "grad_norm": 4.95771863106949, "learning_rate": 1.983738682412824e-06, "loss": 0.8338, "step": 187 }, { "epoch": 0.08658357560231426, "grad_norm": 4.088259220781835, "learning_rate": 1.983469535659369e-06, "loss": 0.7117, "step": 188 }, { "epoch": 0.08704412653636913, "grad_norm": 4.420560242221758, "learning_rate": 1.983198198339745e-06, "loss": 0.8087, "step": 189 }, { "epoch": 0.087504677470424, "grad_norm": 3.8271838534460403, "learning_rate": 1.9829246710583258e-06, "loss": 0.8647, "step": 190 }, { "epoch": 0.08796522840447886, "grad_norm": 4.020814508062301, "learning_rate": 1.982648954424362e-06, "loss": 0.7712, "step": 191 }, { "epoch": 0.08842577933853372, "grad_norm": 4.753311217903237, "learning_rate": 1.982371049051981e-06, "loss": 0.84, "step": 192 }, { "epoch": 0.08888633027258859, "grad_norm": 4.200157457100054, "learning_rate": 1.982090955560185e-06, "loss": 0.7731, "step": 193 }, { "epoch": 0.08934688120664344, "grad_norm": 3.9182484514888465, "learning_rate": 1.981808674572851e-06, "loss": 0.829, "step": 194 }, { "epoch": 0.0898074321406983, "grad_norm": 4.2201896154655545, "learning_rate": 1.9815242067187273e-06, "loss": 0.9247, "step": 195 }, { "epoch": 0.09026798307475317, "grad_norm": 4.109287481014169, "learning_rate": 1.9812375526314335e-06, "loss": 0.8146, "step": 196 }, { "epoch": 0.09072853400880804, "grad_norm": 4.4825503124323385, "learning_rate": 1.9809487129494588e-06, "loss": 0.7407, "step": 197 }, { "epoch": 0.0911890849428629, "grad_norm": 4.473757840046989, "learning_rate": 1.9806576883161607e-06, "loss": 0.9692, "step": 198 }, { "epoch": 0.09164963587691777, "grad_norm": 5.190489328553833, "learning_rate": 1.9803644793797635e-06, "loss": 0.8647, "step": 199 }, { "epoch": 0.09211018681097263, "grad_norm": 4.2008848311866505, "learning_rate": 1.9800690867933567e-06, "loss": 0.7702, "step": 200 }, { "epoch": 0.09257073774502748, "grad_norm": 4.663675212875854, "learning_rate": 1.9797715112148933e-06, "loss": 0.8459, "step": 201 }, { "epoch": 0.09303128867908235, "grad_norm": 4.652613876445913, "learning_rate": 1.979471753307189e-06, "loss": 0.7597, "step": 202 }, { "epoch": 0.09349183961313721, "grad_norm": 3.7290109203710813, "learning_rate": 1.979169813737921e-06, "loss": 0.7179, "step": 203 }, { "epoch": 0.09395239054719208, "grad_norm": 4.501109657049456, "learning_rate": 1.9788656931796237e-06, "loss": 0.6973, "step": 204 }, { "epoch": 0.09441294148124695, "grad_norm": 4.2508533905120505, "learning_rate": 1.9785593923096927e-06, "loss": 0.7691, "step": 205 }, { "epoch": 0.09487349241530181, "grad_norm": 4.855836874305918, "learning_rate": 1.978250911810377e-06, "loss": 0.9953, "step": 206 }, { "epoch": 0.09533404334935666, "grad_norm": 4.898939817915782, "learning_rate": 1.9779402523687825e-06, "loss": 0.9503, "step": 207 }, { "epoch": 0.09579459428341153, "grad_norm": 4.747803253920184, "learning_rate": 1.977627414676867e-06, "loss": 0.8502, "step": 208 }, { "epoch": 0.09625514521746639, "grad_norm": 4.228981222607201, "learning_rate": 1.977312399431441e-06, "loss": 0.8798, "step": 209 }, { "epoch": 0.09671569615152126, "grad_norm": 4.037894026487023, "learning_rate": 1.9769952073341655e-06, "loss": 0.728, "step": 210 }, { "epoch": 0.09717624708557612, "grad_norm": 4.2470083828954275, "learning_rate": 1.976675839091549e-06, "loss": 0.9108, "step": 211 }, { "epoch": 0.09763679801963099, "grad_norm": 5.2238103707415195, "learning_rate": 1.976354295414948e-06, "loss": 0.9388, "step": 212 }, { "epoch": 0.09809734895368585, "grad_norm": 4.330664521185689, "learning_rate": 1.9760305770205648e-06, "loss": 0.8079, "step": 213 }, { "epoch": 0.0985578998877407, "grad_norm": 4.262064704864086, "learning_rate": 1.9757046846294446e-06, "loss": 0.7511, "step": 214 }, { "epoch": 0.09901845082179557, "grad_norm": 3.8961249229022665, "learning_rate": 1.975376618967476e-06, "loss": 0.7945, "step": 215 }, { "epoch": 0.09947900175585044, "grad_norm": 4.360160703751904, "learning_rate": 1.975046380765387e-06, "loss": 0.6704, "step": 216 }, { "epoch": 0.0999395526899053, "grad_norm": 4.351774318183123, "learning_rate": 1.9747139707587467e-06, "loss": 0.6588, "step": 217 }, { "epoch": 0.10040010362396017, "grad_norm": 3.8409225180675404, "learning_rate": 1.9743793896879595e-06, "loss": 0.8798, "step": 218 }, { "epoch": 0.10086065455801503, "grad_norm": 3.852652192449355, "learning_rate": 1.974042638298267e-06, "loss": 0.6984, "step": 219 }, { "epoch": 0.10132120549206988, "grad_norm": 3.8154544501749506, "learning_rate": 1.9737037173397446e-06, "loss": 0.7716, "step": 220 }, { "epoch": 0.10178175642612475, "grad_norm": 4.529757091125926, "learning_rate": 1.9733626275672996e-06, "loss": 0.9577, "step": 221 }, { "epoch": 0.10224230736017961, "grad_norm": 4.677125552818579, "learning_rate": 1.973019369740671e-06, "loss": 0.7441, "step": 222 }, { "epoch": 0.10270285829423448, "grad_norm": 4.51884924127569, "learning_rate": 1.972673944624426e-06, "loss": 0.8766, "step": 223 }, { "epoch": 0.10316340922828934, "grad_norm": 3.8583276565135427, "learning_rate": 1.9723263529879598e-06, "loss": 1.0781, "step": 224 }, { "epoch": 0.10362396016234421, "grad_norm": 4.278075830909717, "learning_rate": 1.9719765956054933e-06, "loss": 0.8362, "step": 225 }, { "epoch": 0.10408451109639907, "grad_norm": 3.9077379945647186, "learning_rate": 1.971624673256071e-06, "loss": 0.7102, "step": 226 }, { "epoch": 0.10454506203045393, "grad_norm": 3.5420758114193585, "learning_rate": 1.9712705867235604e-06, "loss": 0.5952, "step": 227 }, { "epoch": 0.10500561296450879, "grad_norm": 4.78260071293026, "learning_rate": 1.970914336796648e-06, "loss": 0.7009, "step": 228 }, { "epoch": 0.10546616389856366, "grad_norm": 4.358993060438873, "learning_rate": 1.97055592426884e-06, "loss": 0.8754, "step": 229 }, { "epoch": 0.10592671483261852, "grad_norm": 4.431446340602275, "learning_rate": 1.9701953499384593e-06, "loss": 0.899, "step": 230 }, { "epoch": 0.10638726576667339, "grad_norm": 4.209205046758923, "learning_rate": 1.9698326146086445e-06, "loss": 0.7829, "step": 231 }, { "epoch": 0.10684781670072825, "grad_norm": 4.573726256701481, "learning_rate": 1.9694677190873467e-06, "loss": 0.8623, "step": 232 }, { "epoch": 0.1073083676347831, "grad_norm": 3.875616804677292, "learning_rate": 1.9691006641873296e-06, "loss": 0.8645, "step": 233 }, { "epoch": 0.10776891856883797, "grad_norm": 4.092615975636817, "learning_rate": 1.968731450726166e-06, "loss": 0.7946, "step": 234 }, { "epoch": 0.10822946950289283, "grad_norm": 4.247327566170609, "learning_rate": 1.9683600795262364e-06, "loss": 0.851, "step": 235 }, { "epoch": 0.1086900204369477, "grad_norm": 4.829416029992188, "learning_rate": 1.9679865514147277e-06, "loss": 0.9661, "step": 236 }, { "epoch": 0.10915057137100256, "grad_norm": 3.758367463279879, "learning_rate": 1.9676108672236317e-06, "loss": 0.7398, "step": 237 }, { "epoch": 0.10961112230505743, "grad_norm": 4.315769250485394, "learning_rate": 1.9672330277897414e-06, "loss": 0.7113, "step": 238 }, { "epoch": 0.1100716732391123, "grad_norm": 4.294373502509573, "learning_rate": 1.9668530339546514e-06, "loss": 0.8176, "step": 239 }, { "epoch": 0.11053222417316715, "grad_norm": 4.303627297877947, "learning_rate": 1.966470886564755e-06, "loss": 0.8932, "step": 240 }, { "epoch": 0.11099277510722201, "grad_norm": 3.687193811388138, "learning_rate": 1.9660865864712412e-06, "loss": 0.629, "step": 241 }, { "epoch": 0.11145332604127688, "grad_norm": 4.25784352382271, "learning_rate": 1.965700134530095e-06, "loss": 0.6582, "step": 242 }, { "epoch": 0.11191387697533174, "grad_norm": 4.1719879031219795, "learning_rate": 1.9653115316020935e-06, "loss": 0.8589, "step": 243 }, { "epoch": 0.1123744279093866, "grad_norm": 4.450668614592456, "learning_rate": 1.9649207785528065e-06, "loss": 0.8563, "step": 244 }, { "epoch": 0.11283497884344147, "grad_norm": 3.899167108612543, "learning_rate": 1.96452787625259e-06, "loss": 0.8878, "step": 245 }, { "epoch": 0.11329552977749632, "grad_norm": 3.897951147294836, "learning_rate": 1.9641328255765913e-06, "loss": 0.7245, "step": 246 }, { "epoch": 0.11375608071155119, "grad_norm": 4.735995144055128, "learning_rate": 1.963735627404739e-06, "loss": 0.852, "step": 247 }, { "epoch": 0.11421663164560605, "grad_norm": 4.6857408806623075, "learning_rate": 1.963336282621747e-06, "loss": 0.7829, "step": 248 }, { "epoch": 0.11467718257966092, "grad_norm": 5.00388863906722, "learning_rate": 1.962934792117111e-06, "loss": 0.715, "step": 249 }, { "epoch": 0.11513773351371578, "grad_norm": 3.9884869716753157, "learning_rate": 1.9625311567851045e-06, "loss": 0.7981, "step": 250 }, { "epoch": 0.11559828444777065, "grad_norm": 4.7476817555071245, "learning_rate": 1.9621253775247795e-06, "loss": 0.9195, "step": 251 }, { "epoch": 0.11605883538182551, "grad_norm": 4.551564425357827, "learning_rate": 1.9617174552399633e-06, "loss": 0.9688, "step": 252 }, { "epoch": 0.11651938631588037, "grad_norm": 4.008168636629303, "learning_rate": 1.961307390839255e-06, "loss": 0.8306, "step": 253 }, { "epoch": 0.11697993724993523, "grad_norm": 4.159734113311041, "learning_rate": 1.960895185236028e-06, "loss": 0.8021, "step": 254 }, { "epoch": 0.1174404881839901, "grad_norm": 4.296722744535187, "learning_rate": 1.9604808393484217e-06, "loss": 0.7712, "step": 255 }, { "epoch": 0.11790103911804496, "grad_norm": 4.436792322248848, "learning_rate": 1.960064354099345e-06, "loss": 0.8384, "step": 256 }, { "epoch": 0.11836159005209983, "grad_norm": 3.8838808202373123, "learning_rate": 1.959645730416471e-06, "loss": 0.5416, "step": 257 }, { "epoch": 0.11882214098615469, "grad_norm": 4.410338719179471, "learning_rate": 1.959224969232237e-06, "loss": 0.7943, "step": 258 }, { "epoch": 0.11928269192020956, "grad_norm": 3.8380761661203295, "learning_rate": 1.9588020714838394e-06, "loss": 0.7273, "step": 259 }, { "epoch": 0.11974324285426441, "grad_norm": 4.13224063167812, "learning_rate": 1.9583770381132357e-06, "loss": 0.7573, "step": 260 }, { "epoch": 0.12020379378831927, "grad_norm": 4.5327297001190505, "learning_rate": 1.9579498700671386e-06, "loss": 0.8338, "step": 261 }, { "epoch": 0.12066434472237414, "grad_norm": 4.198676892965317, "learning_rate": 1.9575205682970163e-06, "loss": 0.8196, "step": 262 }, { "epoch": 0.121124895656429, "grad_norm": 4.358195020938178, "learning_rate": 1.9570891337590895e-06, "loss": 0.725, "step": 263 }, { "epoch": 0.12158544659048387, "grad_norm": 4.58984489005725, "learning_rate": 1.956655567414329e-06, "loss": 0.9412, "step": 264 }, { "epoch": 0.12204599752453874, "grad_norm": 4.642202579074306, "learning_rate": 1.9562198702284552e-06, "loss": 0.998, "step": 265 }, { "epoch": 0.12250654845859359, "grad_norm": 4.219588034377072, "learning_rate": 1.955782043171933e-06, "loss": 0.7324, "step": 266 }, { "epoch": 0.12296709939264845, "grad_norm": 4.295416292926601, "learning_rate": 1.9553420872199732e-06, "loss": 0.7907, "step": 267 }, { "epoch": 0.12342765032670332, "grad_norm": 4.329179679925182, "learning_rate": 1.954900003352527e-06, "loss": 0.6968, "step": 268 }, { "epoch": 0.12388820126075818, "grad_norm": 4.227737323861334, "learning_rate": 1.954455792554285e-06, "loss": 0.9123, "step": 269 }, { "epoch": 0.12434875219481305, "grad_norm": 3.7102075257130562, "learning_rate": 1.9540094558146775e-06, "loss": 0.5998, "step": 270 }, { "epoch": 0.12480930312886791, "grad_norm": 4.600002037369864, "learning_rate": 1.9535609941278677e-06, "loss": 0.8487, "step": 271 }, { "epoch": 0.12526985406292276, "grad_norm": 3.7088387666869864, "learning_rate": 1.9531104084927526e-06, "loss": 0.6891, "step": 272 }, { "epoch": 0.12573040499697763, "grad_norm": 4.503571399554216, "learning_rate": 1.9526576999129613e-06, "loss": 0.8089, "step": 273 }, { "epoch": 0.1261909559310325, "grad_norm": 3.9678400335749053, "learning_rate": 1.9522028693968496e-06, "loss": 0.8514, "step": 274 }, { "epoch": 0.12665150686508736, "grad_norm": 4.89579226315999, "learning_rate": 1.951745917957501e-06, "loss": 0.8972, "step": 275 }, { "epoch": 0.12711205779914223, "grad_norm": 5.086241683978352, "learning_rate": 1.951286846612723e-06, "loss": 0.7791, "step": 276 }, { "epoch": 0.1275726087331971, "grad_norm": 4.564809465147914, "learning_rate": 1.9508256563850437e-06, "loss": 0.6901, "step": 277 }, { "epoch": 0.12803315966725196, "grad_norm": 4.418147372958734, "learning_rate": 1.9503623483017125e-06, "loss": 0.8073, "step": 278 }, { "epoch": 0.12849371060130682, "grad_norm": 4.666493858867394, "learning_rate": 1.949896923394695e-06, "loss": 0.8376, "step": 279 }, { "epoch": 0.12895426153536169, "grad_norm": 4.34267784599555, "learning_rate": 1.9494293827006724e-06, "loss": 0.8378, "step": 280 }, { "epoch": 0.12941481246941655, "grad_norm": 3.9512493288315276, "learning_rate": 1.9489597272610374e-06, "loss": 0.9685, "step": 281 }, { "epoch": 0.1298753634034714, "grad_norm": 4.427329713346436, "learning_rate": 1.948487958121895e-06, "loss": 0.873, "step": 282 }, { "epoch": 0.13033591433752625, "grad_norm": 4.240528109540963, "learning_rate": 1.9480140763340563e-06, "loss": 0.7996, "step": 283 }, { "epoch": 0.13079646527158112, "grad_norm": 4.317729657439711, "learning_rate": 1.9475380829530394e-06, "loss": 0.7201, "step": 284 }, { "epoch": 0.13125701620563598, "grad_norm": 4.860423932028537, "learning_rate": 1.947059979039065e-06, "loss": 0.9474, "step": 285 }, { "epoch": 0.13171756713969085, "grad_norm": 4.740334803503774, "learning_rate": 1.9465797656570544e-06, "loss": 0.7562, "step": 286 }, { "epoch": 0.13217811807374572, "grad_norm": 4.134901709248649, "learning_rate": 1.946097443876629e-06, "loss": 0.7436, "step": 287 }, { "epoch": 0.13263866900780058, "grad_norm": 3.9229024460054998, "learning_rate": 1.9456130147721057e-06, "loss": 0.8336, "step": 288 }, { "epoch": 0.13309921994185545, "grad_norm": 3.93608922156453, "learning_rate": 1.9451264794224948e-06, "loss": 0.5832, "step": 289 }, { "epoch": 0.1335597708759103, "grad_norm": 4.032970774693436, "learning_rate": 1.944637838911498e-06, "loss": 0.5932, "step": 290 }, { "epoch": 0.13402032180996518, "grad_norm": 4.104769729835206, "learning_rate": 1.944147094327506e-06, "loss": 0.7631, "step": 291 }, { "epoch": 0.13448087274402004, "grad_norm": 3.849715127801897, "learning_rate": 1.9436542467635968e-06, "loss": 0.5067, "step": 292 }, { "epoch": 0.1349414236780749, "grad_norm": 4.753360776549398, "learning_rate": 1.943159297317532e-06, "loss": 0.7707, "step": 293 }, { "epoch": 0.13540197461212977, "grad_norm": 3.9920775650471554, "learning_rate": 1.9426622470917553e-06, "loss": 0.79, "step": 294 }, { "epoch": 0.13586252554618464, "grad_norm": 4.4550649186852675, "learning_rate": 1.942163097193389e-06, "loss": 0.8822, "step": 295 }, { "epoch": 0.13632307648023947, "grad_norm": 3.901014540521738, "learning_rate": 1.941661848734233e-06, "loss": 0.8589, "step": 296 }, { "epoch": 0.13678362741429434, "grad_norm": 4.245457712302958, "learning_rate": 1.9411585028307604e-06, "loss": 0.7626, "step": 297 }, { "epoch": 0.1372441783483492, "grad_norm": 3.562734012442427, "learning_rate": 1.9406530606041173e-06, "loss": 0.6786, "step": 298 }, { "epoch": 0.13770472928240407, "grad_norm": 4.010523604960292, "learning_rate": 1.940145523180118e-06, "loss": 0.7643, "step": 299 }, { "epoch": 0.13816528021645894, "grad_norm": 4.005196576866189, "learning_rate": 1.939635891689245e-06, "loss": 0.6738, "step": 300 }, { "epoch": 0.1386258311505138, "grad_norm": 4.005702483561272, "learning_rate": 1.9391241672666437e-06, "loss": 0.8744, "step": 301 }, { "epoch": 0.13908638208456867, "grad_norm": 4.028845907187332, "learning_rate": 1.938610351052122e-06, "loss": 0.732, "step": 302 }, { "epoch": 0.13954693301862353, "grad_norm": 4.242157728053936, "learning_rate": 1.938094444190147e-06, "loss": 0.807, "step": 303 }, { "epoch": 0.1400074839526784, "grad_norm": 4.772605405551662, "learning_rate": 1.937576447829842e-06, "loss": 0.7235, "step": 304 }, { "epoch": 0.14046803488673326, "grad_norm": 4.259790715080789, "learning_rate": 1.937056363124985e-06, "loss": 0.7188, "step": 305 }, { "epoch": 0.14092858582078813, "grad_norm": 3.8028053351626765, "learning_rate": 1.936534191234006e-06, "loss": 0.876, "step": 306 }, { "epoch": 0.141389136754843, "grad_norm": 4.491827775058677, "learning_rate": 1.9360099333199825e-06, "loss": 0.799, "step": 307 }, { "epoch": 0.14184968768889786, "grad_norm": 4.7735817649664485, "learning_rate": 1.935483590550639e-06, "loss": 0.947, "step": 308 }, { "epoch": 0.1423102386229527, "grad_norm": 4.67656503326881, "learning_rate": 1.9349551640983444e-06, "loss": 0.7009, "step": 309 }, { "epoch": 0.14277078955700756, "grad_norm": 4.201453854604124, "learning_rate": 1.934424655140109e-06, "loss": 0.8097, "step": 310 }, { "epoch": 0.14323134049106243, "grad_norm": 4.8311090437366335, "learning_rate": 1.933892064857579e-06, "loss": 0.7444, "step": 311 }, { "epoch": 0.1436918914251173, "grad_norm": 4.648733387787015, "learning_rate": 1.933357394437041e-06, "loss": 0.8561, "step": 312 }, { "epoch": 0.14415244235917216, "grad_norm": 4.234925514729082, "learning_rate": 1.93282064506941e-06, "loss": 0.7065, "step": 313 }, { "epoch": 0.14461299329322702, "grad_norm": 4.169230155349272, "learning_rate": 1.9322818179502356e-06, "loss": 0.7548, "step": 314 }, { "epoch": 0.1450735442272819, "grad_norm": 4.062247427430783, "learning_rate": 1.931740914279693e-06, "loss": 0.7145, "step": 315 }, { "epoch": 0.14553409516133675, "grad_norm": 4.013627249533899, "learning_rate": 1.9311979352625832e-06, "loss": 0.8251, "step": 316 }, { "epoch": 0.14599464609539162, "grad_norm": 4.2111518012644895, "learning_rate": 1.930652882108331e-06, "loss": 0.8628, "step": 317 }, { "epoch": 0.14645519702944648, "grad_norm": 3.9943870113048896, "learning_rate": 1.930105756030979e-06, "loss": 0.7935, "step": 318 }, { "epoch": 0.14691574796350135, "grad_norm": 4.463273773595843, "learning_rate": 1.929556558249189e-06, "loss": 0.7582, "step": 319 }, { "epoch": 0.1473762988975562, "grad_norm": 4.597314987282698, "learning_rate": 1.9290052899862353e-06, "loss": 0.8646, "step": 320 }, { "epoch": 0.14783684983161108, "grad_norm": 3.7323458415204875, "learning_rate": 1.9284519524700063e-06, "loss": 0.6209, "step": 321 }, { "epoch": 0.14829740076566592, "grad_norm": 3.9712764952322934, "learning_rate": 1.9278965469329976e-06, "loss": 0.6612, "step": 322 }, { "epoch": 0.14875795169972078, "grad_norm": 4.267856786186064, "learning_rate": 1.9273390746123115e-06, "loss": 0.7496, "step": 323 }, { "epoch": 0.14921850263377565, "grad_norm": 3.577837156385955, "learning_rate": 1.926779536749654e-06, "loss": 0.5674, "step": 324 }, { "epoch": 0.1496790535678305, "grad_norm": 4.050727736338075, "learning_rate": 1.9262179345913323e-06, "loss": 0.8076, "step": 325 }, { "epoch": 0.15013960450188538, "grad_norm": 4.351607267200179, "learning_rate": 1.9256542693882503e-06, "loss": 0.8437, "step": 326 }, { "epoch": 0.15060015543594024, "grad_norm": 4.338661852856848, "learning_rate": 1.925088542395909e-06, "loss": 0.7202, "step": 327 }, { "epoch": 0.1510607063699951, "grad_norm": 4.094622669232897, "learning_rate": 1.9245207548743994e-06, "loss": 0.6534, "step": 328 }, { "epoch": 0.15152125730404997, "grad_norm": 4.252788906414486, "learning_rate": 1.9239509080884043e-06, "loss": 0.6903, "step": 329 }, { "epoch": 0.15198180823810484, "grad_norm": 4.355070936247698, "learning_rate": 1.923379003307193e-06, "loss": 0.6722, "step": 330 }, { "epoch": 0.1524423591721597, "grad_norm": 3.8647701548257465, "learning_rate": 1.9228050418046165e-06, "loss": 0.5505, "step": 331 }, { "epoch": 0.15290291010621457, "grad_norm": 4.42906534029131, "learning_rate": 1.92222902485911e-06, "loss": 0.7296, "step": 332 }, { "epoch": 0.15336346104026943, "grad_norm": 4.532290715442148, "learning_rate": 1.921650953753685e-06, "loss": 0.5357, "step": 333 }, { "epoch": 0.1538240119743243, "grad_norm": 4.490709312811716, "learning_rate": 1.9210708297759284e-06, "loss": 0.82, "step": 334 }, { "epoch": 0.15428456290837914, "grad_norm": 3.9557959122199873, "learning_rate": 1.9204886542180007e-06, "loss": 0.7572, "step": 335 }, { "epoch": 0.154745113842434, "grad_norm": 4.0061370373897915, "learning_rate": 1.9199044283766315e-06, "loss": 0.7713, "step": 336 }, { "epoch": 0.15520566477648887, "grad_norm": 4.377451526069874, "learning_rate": 1.9193181535531177e-06, "loss": 0.7999, "step": 337 }, { "epoch": 0.15566621571054373, "grad_norm": 4.248785249084672, "learning_rate": 1.9187298310533184e-06, "loss": 0.8005, "step": 338 }, { "epoch": 0.1561267666445986, "grad_norm": 4.547097865600838, "learning_rate": 1.9181394621876556e-06, "loss": 0.7829, "step": 339 }, { "epoch": 0.15658731757865346, "grad_norm": 3.7303468901249124, "learning_rate": 1.917547048271109e-06, "loss": 0.825, "step": 340 }, { "epoch": 0.15704786851270833, "grad_norm": 3.686799316569441, "learning_rate": 1.916952590623212e-06, "loss": 0.5809, "step": 341 }, { "epoch": 0.1575084194467632, "grad_norm": 4.309857633609644, "learning_rate": 1.9163560905680514e-06, "loss": 0.6742, "step": 342 }, { "epoch": 0.15796897038081806, "grad_norm": 4.08374207766144, "learning_rate": 1.9157575494342636e-06, "loss": 0.6583, "step": 343 }, { "epoch": 0.15842952131487292, "grad_norm": 4.223611488659376, "learning_rate": 1.91515696855503e-06, "loss": 0.8102, "step": 344 }, { "epoch": 0.1588900722489278, "grad_norm": 3.7882348684901013, "learning_rate": 1.9145543492680763e-06, "loss": 0.7947, "step": 345 }, { "epoch": 0.15935062318298265, "grad_norm": 4.219981132739743, "learning_rate": 1.9139496929156683e-06, "loss": 0.8231, "step": 346 }, { "epoch": 0.15981117411703752, "grad_norm": 3.9216788494452706, "learning_rate": 1.913343000844609e-06, "loss": 0.7847, "step": 347 }, { "epoch": 0.16027172505109236, "grad_norm": 3.816620123866442, "learning_rate": 1.9127342744062357e-06, "loss": 0.7388, "step": 348 }, { "epoch": 0.16073227598514722, "grad_norm": 3.8108758437792942, "learning_rate": 1.912123514956417e-06, "loss": 0.7041, "step": 349 }, { "epoch": 0.1611928269192021, "grad_norm": 3.745138606070341, "learning_rate": 1.9115107238555497e-06, "loss": 0.6777, "step": 350 }, { "epoch": 0.16165337785325695, "grad_norm": 4.7299586136125775, "learning_rate": 1.9108959024685566e-06, "loss": 0.8283, "step": 351 }, { "epoch": 0.16211392878731182, "grad_norm": 3.9027029372850626, "learning_rate": 1.9102790521648817e-06, "loss": 0.6629, "step": 352 }, { "epoch": 0.16257447972136668, "grad_norm": 4.734607009009984, "learning_rate": 1.909660174318489e-06, "loss": 0.7579, "step": 353 }, { "epoch": 0.16303503065542155, "grad_norm": 4.193557142999425, "learning_rate": 1.909039270307858e-06, "loss": 0.7217, "step": 354 }, { "epoch": 0.1634955815894764, "grad_norm": 3.8891616435996346, "learning_rate": 1.9084163415159817e-06, "loss": 0.7571, "step": 355 }, { "epoch": 0.16395613252353128, "grad_norm": 3.9671585174983135, "learning_rate": 1.907791389330363e-06, "loss": 0.6706, "step": 356 }, { "epoch": 0.16441668345758614, "grad_norm": 3.854343506759972, "learning_rate": 1.9071644151430108e-06, "loss": 0.7525, "step": 357 }, { "epoch": 0.164877234391641, "grad_norm": 4.220647909614812, "learning_rate": 1.9065354203504398e-06, "loss": 0.8702, "step": 358 }, { "epoch": 0.16533778532569587, "grad_norm": 4.218543342136645, "learning_rate": 1.9059044063536633e-06, "loss": 0.7815, "step": 359 }, { "epoch": 0.16579833625975074, "grad_norm": 3.967431895620317, "learning_rate": 1.9052713745581931e-06, "loss": 0.9263, "step": 360 }, { "epoch": 0.16625888719380558, "grad_norm": 4.3176506443769265, "learning_rate": 1.9046363263740358e-06, "loss": 0.6538, "step": 361 }, { "epoch": 0.16671943812786044, "grad_norm": 4.372992737762752, "learning_rate": 1.9039992632156881e-06, "loss": 0.7697, "step": 362 }, { "epoch": 0.1671799890619153, "grad_norm": 3.611524330111492, "learning_rate": 1.9033601865021356e-06, "loss": 0.8046, "step": 363 }, { "epoch": 0.16764053999597017, "grad_norm": 5.100335629172945, "learning_rate": 1.902719097656849e-06, "loss": 0.7744, "step": 364 }, { "epoch": 0.16810109093002504, "grad_norm": 4.2910820311503945, "learning_rate": 1.9020759981077804e-06, "loss": 0.7453, "step": 365 }, { "epoch": 0.1685616418640799, "grad_norm": 3.9934847455412403, "learning_rate": 1.9014308892873608e-06, "loss": 0.7091, "step": 366 }, { "epoch": 0.16902219279813477, "grad_norm": 4.79113910750631, "learning_rate": 1.9007837726324965e-06, "loss": 0.7628, "step": 367 }, { "epoch": 0.16948274373218963, "grad_norm": 4.2389305396597035, "learning_rate": 1.9001346495845656e-06, "loss": 0.8035, "step": 368 }, { "epoch": 0.1699432946662445, "grad_norm": 5.218556711182811, "learning_rate": 1.899483521589416e-06, "loss": 0.8928, "step": 369 }, { "epoch": 0.17040384560029936, "grad_norm": 4.561899577262396, "learning_rate": 1.8988303900973612e-06, "loss": 0.7821, "step": 370 }, { "epoch": 0.17086439653435423, "grad_norm": 4.971302255234122, "learning_rate": 1.8981752565631767e-06, "loss": 0.8053, "step": 371 }, { "epoch": 0.1713249474684091, "grad_norm": 4.0274153130568955, "learning_rate": 1.8975181224460974e-06, "loss": 0.8333, "step": 372 }, { "epoch": 0.17178549840246396, "grad_norm": 4.241030604714798, "learning_rate": 1.8968589892098153e-06, "loss": 0.5961, "step": 373 }, { "epoch": 0.1722460493365188, "grad_norm": 4.21652163528252, "learning_rate": 1.8961978583224743e-06, "loss": 0.7489, "step": 374 }, { "epoch": 0.17270660027057366, "grad_norm": 4.287775539212254, "learning_rate": 1.8955347312566675e-06, "loss": 0.7364, "step": 375 }, { "epoch": 0.17316715120462853, "grad_norm": 4.655656736507351, "learning_rate": 1.8948696094894352e-06, "loss": 0.7033, "step": 376 }, { "epoch": 0.1736277021386834, "grad_norm": 4.799640615863611, "learning_rate": 1.8942024945022598e-06, "loss": 0.9007, "step": 377 }, { "epoch": 0.17408825307273826, "grad_norm": 4.02120671671727, "learning_rate": 1.8935333877810646e-06, "loss": 0.7226, "step": 378 }, { "epoch": 0.17454880400679312, "grad_norm": 4.205974563779713, "learning_rate": 1.892862290816208e-06, "loss": 0.7478, "step": 379 }, { "epoch": 0.175009354940848, "grad_norm": 3.9668768632583835, "learning_rate": 1.8921892051024816e-06, "loss": 0.7211, "step": 380 }, { "epoch": 0.17546990587490285, "grad_norm": 4.341089743114455, "learning_rate": 1.8915141321391078e-06, "loss": 0.8684, "step": 381 }, { "epoch": 0.17593045680895772, "grad_norm": 4.263383813430479, "learning_rate": 1.8908370734297338e-06, "loss": 0.9709, "step": 382 }, { "epoch": 0.17639100774301258, "grad_norm": 3.960368905469365, "learning_rate": 1.8901580304824311e-06, "loss": 0.5726, "step": 383 }, { "epoch": 0.17685155867706745, "grad_norm": 4.694760790072716, "learning_rate": 1.8894770048096903e-06, "loss": 0.7174, "step": 384 }, { "epoch": 0.17731210961112231, "grad_norm": 3.644487366899629, "learning_rate": 1.8887939979284182e-06, "loss": 0.8053, "step": 385 }, { "epoch": 0.17777266054517718, "grad_norm": 3.8511319821174794, "learning_rate": 1.8881090113599352e-06, "loss": 0.7401, "step": 386 }, { "epoch": 0.17823321147923202, "grad_norm": 4.289949436533432, "learning_rate": 1.88742204662997e-06, "loss": 0.7908, "step": 387 }, { "epoch": 0.17869376241328688, "grad_norm": 4.337554369302425, "learning_rate": 1.8867331052686583e-06, "loss": 0.8067, "step": 388 }, { "epoch": 0.17915431334734175, "grad_norm": 4.43244343165439, "learning_rate": 1.886042188810539e-06, "loss": 0.6545, "step": 389 }, { "epoch": 0.1796148642813966, "grad_norm": 4.095125069819511, "learning_rate": 1.8853492987945487e-06, "loss": 0.7014, "step": 390 }, { "epoch": 0.18007541521545148, "grad_norm": 4.306078449480186, "learning_rate": 1.8846544367640216e-06, "loss": 0.8056, "step": 391 }, { "epoch": 0.18053596614950634, "grad_norm": 4.3208989296150575, "learning_rate": 1.8839576042666833e-06, "loss": 0.677, "step": 392 }, { "epoch": 0.1809965170835612, "grad_norm": 4.119904121783205, "learning_rate": 1.883258802854649e-06, "loss": 0.8309, "step": 393 }, { "epoch": 0.18145706801761607, "grad_norm": 4.171443622541779, "learning_rate": 1.8825580340844187e-06, "loss": 0.737, "step": 394 }, { "epoch": 0.18191761895167094, "grad_norm": 4.311252354148097, "learning_rate": 1.8818552995168748e-06, "loss": 0.8269, "step": 395 }, { "epoch": 0.1823781698857258, "grad_norm": 4.202347614880689, "learning_rate": 1.8811506007172788e-06, "loss": 0.878, "step": 396 }, { "epoch": 0.18283872081978067, "grad_norm": 4.066402828422022, "learning_rate": 1.8804439392552664e-06, "loss": 0.7662, "step": 397 }, { "epoch": 0.18329927175383554, "grad_norm": 4.350660749953921, "learning_rate": 1.8797353167048457e-06, "loss": 0.6834, "step": 398 }, { "epoch": 0.1837598226878904, "grad_norm": 4.0936393027915745, "learning_rate": 1.8790247346443927e-06, "loss": 0.7483, "step": 399 }, { "epoch": 0.18422037362194527, "grad_norm": 3.991658835077223, "learning_rate": 1.8783121946566473e-06, "loss": 0.7503, "step": 400 }, { "epoch": 0.1846809245560001, "grad_norm": 4.174947538174928, "learning_rate": 1.8775976983287114e-06, "loss": 0.7258, "step": 401 }, { "epoch": 0.18514147549005497, "grad_norm": 3.9600953054025942, "learning_rate": 1.876881247252044e-06, "loss": 0.8638, "step": 402 }, { "epoch": 0.18560202642410983, "grad_norm": 4.288010214056422, "learning_rate": 1.8761628430224582e-06, "loss": 0.8995, "step": 403 }, { "epoch": 0.1860625773581647, "grad_norm": 3.898209264004386, "learning_rate": 1.875442487240117e-06, "loss": 0.7765, "step": 404 }, { "epoch": 0.18652312829221956, "grad_norm": 4.353101552505601, "learning_rate": 1.8747201815095313e-06, "loss": 0.7748, "step": 405 }, { "epoch": 0.18698367922627443, "grad_norm": 4.510950944280057, "learning_rate": 1.8739959274395547e-06, "loss": 0.702, "step": 406 }, { "epoch": 0.1874442301603293, "grad_norm": 4.057059804875028, "learning_rate": 1.87326972664338e-06, "loss": 0.7043, "step": 407 }, { "epoch": 0.18790478109438416, "grad_norm": 4.030996697845687, "learning_rate": 1.8725415807385368e-06, "loss": 0.7625, "step": 408 }, { "epoch": 0.18836533202843903, "grad_norm": 4.532473687619107, "learning_rate": 1.871811491346887e-06, "loss": 0.9238, "step": 409 }, { "epoch": 0.1888258829624939, "grad_norm": 4.036030240594292, "learning_rate": 1.8710794600946216e-06, "loss": 0.6368, "step": 410 }, { "epoch": 0.18928643389654876, "grad_norm": 4.077592780716509, "learning_rate": 1.8703454886122565e-06, "loss": 0.7384, "step": 411 }, { "epoch": 0.18974698483060362, "grad_norm": 4.58834568171342, "learning_rate": 1.8696095785346295e-06, "loss": 0.7173, "step": 412 }, { "epoch": 0.1902075357646585, "grad_norm": 4.554377471449734, "learning_rate": 1.8688717315008962e-06, "loss": 0.6627, "step": 413 }, { "epoch": 0.19066808669871332, "grad_norm": 3.956349812546939, "learning_rate": 1.8681319491545269e-06, "loss": 0.7935, "step": 414 }, { "epoch": 0.1911286376327682, "grad_norm": 4.865756737714845, "learning_rate": 1.8673902331433022e-06, "loss": 0.7569, "step": 415 }, { "epoch": 0.19158918856682305, "grad_norm": 3.8773212739896357, "learning_rate": 1.86664658511931e-06, "loss": 0.7788, "step": 416 }, { "epoch": 0.19204973950087792, "grad_norm": 3.8958634780338435, "learning_rate": 1.8659010067389414e-06, "loss": 0.7262, "step": 417 }, { "epoch": 0.19251029043493278, "grad_norm": 4.082107895191221, "learning_rate": 1.8651534996628869e-06, "loss": 0.8081, "step": 418 }, { "epoch": 0.19297084136898765, "grad_norm": 4.303884175939858, "learning_rate": 1.8644040655561334e-06, "loss": 0.8014, "step": 419 }, { "epoch": 0.19343139230304252, "grad_norm": 4.199467213351864, "learning_rate": 1.8636527060879601e-06, "loss": 0.6907, "step": 420 }, { "epoch": 0.19389194323709738, "grad_norm": 4.331645272471318, "learning_rate": 1.8628994229319338e-06, "loss": 0.7295, "step": 421 }, { "epoch": 0.19435249417115225, "grad_norm": 4.0043504788779245, "learning_rate": 1.8621442177659076e-06, "loss": 0.9281, "step": 422 }, { "epoch": 0.1948130451052071, "grad_norm": 3.986461499978195, "learning_rate": 1.8613870922720145e-06, "loss": 0.8836, "step": 423 }, { "epoch": 0.19527359603926198, "grad_norm": 3.959536452853786, "learning_rate": 1.8606280481366649e-06, "loss": 0.8382, "step": 424 }, { "epoch": 0.19573414697331684, "grad_norm": 4.038672269193263, "learning_rate": 1.8598670870505434e-06, "loss": 0.7431, "step": 425 }, { "epoch": 0.1961946979073717, "grad_norm": 4.123219826169265, "learning_rate": 1.8591042107086038e-06, "loss": 0.8146, "step": 426 }, { "epoch": 0.19665524884142654, "grad_norm": 4.333700465651219, "learning_rate": 1.8583394208100658e-06, "loss": 0.8862, "step": 427 }, { "epoch": 0.1971157997754814, "grad_norm": 4.854835553513849, "learning_rate": 1.857572719058412e-06, "loss": 0.7585, "step": 428 }, { "epoch": 0.19757635070953627, "grad_norm": 4.339276359078161, "learning_rate": 1.8568041071613832e-06, "loss": 0.7574, "step": 429 }, { "epoch": 0.19803690164359114, "grad_norm": 4.491458779620567, "learning_rate": 1.8560335868309742e-06, "loss": 0.7823, "step": 430 }, { "epoch": 0.198497452577646, "grad_norm": 3.8966365616054603, "learning_rate": 1.8552611597834317e-06, "loss": 0.6117, "step": 431 }, { "epoch": 0.19895800351170087, "grad_norm": 4.6373473800788, "learning_rate": 1.8544868277392482e-06, "loss": 0.7858, "step": 432 }, { "epoch": 0.19941855444575574, "grad_norm": 4.272084669683394, "learning_rate": 1.8537105924231601e-06, "loss": 0.7599, "step": 433 }, { "epoch": 0.1998791053798106, "grad_norm": 3.667658325768108, "learning_rate": 1.8529324555641436e-06, "loss": 0.8154, "step": 434 }, { "epoch": 0.20033965631386547, "grad_norm": 3.4506284954813737, "learning_rate": 1.8521524188954091e-06, "loss": 0.728, "step": 435 }, { "epoch": 0.20080020724792033, "grad_norm": 3.753997893517419, "learning_rate": 1.8513704841543995e-06, "loss": 0.7156, "step": 436 }, { "epoch": 0.2012607581819752, "grad_norm": 4.60938099265099, "learning_rate": 1.8505866530827855e-06, "loss": 0.8059, "step": 437 }, { "epoch": 0.20172130911603006, "grad_norm": 4.009768746469466, "learning_rate": 1.8498009274264605e-06, "loss": 0.7185, "step": 438 }, { "epoch": 0.20218186005008493, "grad_norm": 4.178806891815072, "learning_rate": 1.8490133089355398e-06, "loss": 0.9194, "step": 439 }, { "epoch": 0.20264241098413976, "grad_norm": 3.918196021344088, "learning_rate": 1.848223799364353e-06, "loss": 0.7982, "step": 440 }, { "epoch": 0.20310296191819463, "grad_norm": 3.9229175721305776, "learning_rate": 1.8474324004714428e-06, "loss": 0.8489, "step": 441 }, { "epoch": 0.2035635128522495, "grad_norm": 4.510206562778889, "learning_rate": 1.8466391140195601e-06, "loss": 0.7828, "step": 442 }, { "epoch": 0.20402406378630436, "grad_norm": 4.23905788891001, "learning_rate": 1.8458439417756594e-06, "loss": 0.9603, "step": 443 }, { "epoch": 0.20448461472035923, "grad_norm": 4.29318027075286, "learning_rate": 1.8450468855108969e-06, "loss": 0.9125, "step": 444 }, { "epoch": 0.2049451656544141, "grad_norm": 4.453043819593493, "learning_rate": 1.8442479470006239e-06, "loss": 0.7764, "step": 445 }, { "epoch": 0.20540571658846896, "grad_norm": 4.039380026274038, "learning_rate": 1.843447128024385e-06, "loss": 0.839, "step": 446 }, { "epoch": 0.20586626752252382, "grad_norm": 4.523247140314352, "learning_rate": 1.842644430365913e-06, "loss": 0.732, "step": 447 }, { "epoch": 0.2063268184565787, "grad_norm": 3.924040307705112, "learning_rate": 1.8418398558131257e-06, "loss": 0.7296, "step": 448 }, { "epoch": 0.20678736939063355, "grad_norm": 4.009543913565817, "learning_rate": 1.8410334061581206e-06, "loss": 0.6765, "step": 449 }, { "epoch": 0.20724792032468842, "grad_norm": 4.9873168288301315, "learning_rate": 1.8402250831971723e-06, "loss": 0.7453, "step": 450 }, { "epoch": 0.20770847125874328, "grad_norm": 4.293718107015198, "learning_rate": 1.8394148887307285e-06, "loss": 0.7417, "step": 451 }, { "epoch": 0.20816902219279815, "grad_norm": 4.1765862788364565, "learning_rate": 1.8386028245634041e-06, "loss": 0.7811, "step": 452 }, { "epoch": 0.20862957312685299, "grad_norm": 4.514493406730088, "learning_rate": 1.83778889250398e-06, "loss": 0.7785, "step": 453 }, { "epoch": 0.20909012406090785, "grad_norm": 3.606118369551966, "learning_rate": 1.836973094365397e-06, "loss": 0.622, "step": 454 }, { "epoch": 0.20955067499496272, "grad_norm": 4.61789772401644, "learning_rate": 1.8361554319647522e-06, "loss": 0.8444, "step": 455 }, { "epoch": 0.21001122592901758, "grad_norm": 4.658780186607878, "learning_rate": 1.8353359071232951e-06, "loss": 0.7155, "step": 456 }, { "epoch": 0.21047177686307245, "grad_norm": 4.955425770822849, "learning_rate": 1.8345145216664242e-06, "loss": 0.9587, "step": 457 }, { "epoch": 0.2109323277971273, "grad_norm": 4.832151921417503, "learning_rate": 1.8336912774236818e-06, "loss": 0.8321, "step": 458 }, { "epoch": 0.21139287873118218, "grad_norm": 4.248965110846628, "learning_rate": 1.83286617622875e-06, "loss": 0.691, "step": 459 }, { "epoch": 0.21185342966523704, "grad_norm": 4.311009691954827, "learning_rate": 1.8320392199194476e-06, "loss": 0.7364, "step": 460 }, { "epoch": 0.2123139805992919, "grad_norm": 4.342086421859098, "learning_rate": 1.8312104103377262e-06, "loss": 0.796, "step": 461 }, { "epoch": 0.21277453153334677, "grad_norm": 4.293062909335649, "learning_rate": 1.8303797493296637e-06, "loss": 0.8273, "step": 462 }, { "epoch": 0.21323508246740164, "grad_norm": 5.131733695290907, "learning_rate": 1.8295472387454636e-06, "loss": 0.6167, "step": 463 }, { "epoch": 0.2136956334014565, "grad_norm": 4.132120508159019, "learning_rate": 1.8287128804394474e-06, "loss": 0.8183, "step": 464 }, { "epoch": 0.21415618433551137, "grad_norm": 4.20872316476642, "learning_rate": 1.8278766762700534e-06, "loss": 0.6631, "step": 465 }, { "epoch": 0.2146167352695662, "grad_norm": 4.283460724193411, "learning_rate": 1.8270386280998309e-06, "loss": 0.7694, "step": 466 }, { "epoch": 0.21507728620362107, "grad_norm": 4.3188053838726255, "learning_rate": 1.8261987377954365e-06, "loss": 0.832, "step": 467 }, { "epoch": 0.21553783713767594, "grad_norm": 4.001363036060869, "learning_rate": 1.8253570072276303e-06, "loss": 0.7658, "step": 468 }, { "epoch": 0.2159983880717308, "grad_norm": 4.477385882178747, "learning_rate": 1.8245134382712709e-06, "loss": 0.9259, "step": 469 }, { "epoch": 0.21645893900578567, "grad_norm": 4.799335876533736, "learning_rate": 1.8236680328053116e-06, "loss": 0.7359, "step": 470 }, { "epoch": 0.21691948993984053, "grad_norm": 4.24042472141132, "learning_rate": 1.822820792712797e-06, "loss": 0.7165, "step": 471 }, { "epoch": 0.2173800408738954, "grad_norm": 4.92347972149165, "learning_rate": 1.8219717198808578e-06, "loss": 0.8094, "step": 472 }, { "epoch": 0.21784059180795026, "grad_norm": 4.443081951004624, "learning_rate": 1.8211208162007065e-06, "loss": 0.6582, "step": 473 }, { "epoch": 0.21830114274200513, "grad_norm": 4.0523145485075425, "learning_rate": 1.820268083567634e-06, "loss": 0.6396, "step": 474 }, { "epoch": 0.21876169367606, "grad_norm": 4.91688056541697, "learning_rate": 1.819413523881005e-06, "loss": 0.7918, "step": 475 }, { "epoch": 0.21922224461011486, "grad_norm": 3.770458724583453, "learning_rate": 1.818557139044254e-06, "loss": 0.6731, "step": 476 }, { "epoch": 0.21968279554416972, "grad_norm": 4.362331992352855, "learning_rate": 1.8176989309648803e-06, "loss": 0.6457, "step": 477 }, { "epoch": 0.2201433464782246, "grad_norm": 3.9042632380817115, "learning_rate": 1.8168389015544444e-06, "loss": 0.6979, "step": 478 }, { "epoch": 0.22060389741227943, "grad_norm": 4.323573544838259, "learning_rate": 1.8159770527285634e-06, "loss": 0.7519, "step": 479 }, { "epoch": 0.2210644483463343, "grad_norm": 4.640457822402732, "learning_rate": 1.815113386406908e-06, "loss": 0.7032, "step": 480 }, { "epoch": 0.22152499928038916, "grad_norm": 3.792561706019914, "learning_rate": 1.8142479045131953e-06, "loss": 0.8834, "step": 481 }, { "epoch": 0.22198555021444402, "grad_norm": 4.1433609936585, "learning_rate": 1.8133806089751884e-06, "loss": 0.6621, "step": 482 }, { "epoch": 0.2224461011484989, "grad_norm": 4.440245257937701, "learning_rate": 1.8125115017246887e-06, "loss": 0.5946, "step": 483 }, { "epoch": 0.22290665208255375, "grad_norm": 4.515568475591344, "learning_rate": 1.8116405846975335e-06, "loss": 0.7345, "step": 484 }, { "epoch": 0.22336720301660862, "grad_norm": 3.978580307728638, "learning_rate": 1.8107678598335912e-06, "loss": 0.8354, "step": 485 }, { "epoch": 0.22382775395066348, "grad_norm": 4.07899177697823, "learning_rate": 1.8098933290767567e-06, "loss": 0.7274, "step": 486 }, { "epoch": 0.22428830488471835, "grad_norm": 4.231901256957745, "learning_rate": 1.8090169943749474e-06, "loss": 0.8194, "step": 487 }, { "epoch": 0.2247488558187732, "grad_norm": 4.439213546870366, "learning_rate": 1.808138857680099e-06, "loss": 0.6056, "step": 488 }, { "epoch": 0.22520940675282808, "grad_norm": 4.312563223799548, "learning_rate": 1.8072589209481607e-06, "loss": 0.6744, "step": 489 }, { "epoch": 0.22566995768688294, "grad_norm": 4.437157326534839, "learning_rate": 1.8063771861390915e-06, "loss": 0.8277, "step": 490 }, { "epoch": 0.2261305086209378, "grad_norm": 4.095101419308749, "learning_rate": 1.8054936552168547e-06, "loss": 0.6334, "step": 491 }, { "epoch": 0.22659105955499265, "grad_norm": 4.481674697748022, "learning_rate": 1.804608330149415e-06, "loss": 0.7628, "step": 492 }, { "epoch": 0.2270516104890475, "grad_norm": 4.2036532918090215, "learning_rate": 1.8037212129087322e-06, "loss": 0.6994, "step": 493 }, { "epoch": 0.22751216142310238, "grad_norm": 3.9815790488122653, "learning_rate": 1.8028323054707592e-06, "loss": 0.8116, "step": 494 }, { "epoch": 0.22797271235715724, "grad_norm": 4.195328959396714, "learning_rate": 1.801941609815436e-06, "loss": 0.7945, "step": 495 }, { "epoch": 0.2284332632912121, "grad_norm": 4.488093103725238, "learning_rate": 1.8010491279266858e-06, "loss": 0.7601, "step": 496 }, { "epoch": 0.22889381422526697, "grad_norm": 4.120125996626268, "learning_rate": 1.8001548617924095e-06, "loss": 0.5489, "step": 497 }, { "epoch": 0.22935436515932184, "grad_norm": 4.36470554253384, "learning_rate": 1.799258813404483e-06, "loss": 0.8274, "step": 498 }, { "epoch": 0.2298149160933767, "grad_norm": 4.578831116611269, "learning_rate": 1.7983609847587521e-06, "loss": 0.7223, "step": 499 }, { "epoch": 0.23027546702743157, "grad_norm": 4.465479785476238, "learning_rate": 1.7974613778550278e-06, "loss": 0.8502, "step": 500 }, { "epoch": 0.23073601796148643, "grad_norm": 4.337945580371207, "learning_rate": 1.7965599946970812e-06, "loss": 0.7506, "step": 501 }, { "epoch": 0.2311965688955413, "grad_norm": 4.193839229272682, "learning_rate": 1.795656837292641e-06, "loss": 0.8613, "step": 502 }, { "epoch": 0.23165711982959616, "grad_norm": 4.390376192729833, "learning_rate": 1.7947519076533873e-06, "loss": 0.6651, "step": 503 }, { "epoch": 0.23211767076365103, "grad_norm": 4.276031019219444, "learning_rate": 1.793845207794947e-06, "loss": 0.7875, "step": 504 }, { "epoch": 0.23257822169770587, "grad_norm": 4.296664337009562, "learning_rate": 1.7929367397368913e-06, "loss": 0.7622, "step": 505 }, { "epoch": 0.23303877263176073, "grad_norm": 4.147182468508663, "learning_rate": 1.7920265055027283e-06, "loss": 0.7013, "step": 506 }, { "epoch": 0.2334993235658156, "grad_norm": 4.453337455985819, "learning_rate": 1.7911145071199018e-06, "loss": 0.8487, "step": 507 }, { "epoch": 0.23395987449987046, "grad_norm": 4.295689250945765, "learning_rate": 1.7902007466197837e-06, "loss": 0.7384, "step": 508 }, { "epoch": 0.23442042543392533, "grad_norm": 4.345750994316164, "learning_rate": 1.789285226037671e-06, "loss": 0.7762, "step": 509 }, { "epoch": 0.2348809763679802, "grad_norm": 4.245769814083758, "learning_rate": 1.788367947412782e-06, "loss": 0.6858, "step": 510 }, { "epoch": 0.23534152730203506, "grad_norm": 4.582655102991562, "learning_rate": 1.7874489127882493e-06, "loss": 0.6846, "step": 511 }, { "epoch": 0.23580207823608992, "grad_norm": 4.839702113033688, "learning_rate": 1.7865281242111182e-06, "loss": 0.8621, "step": 512 }, { "epoch": 0.2362626291701448, "grad_norm": 3.959216250513401, "learning_rate": 1.7856055837323406e-06, "loss": 0.8295, "step": 513 }, { "epoch": 0.23672318010419965, "grad_norm": 4.53937914601787, "learning_rate": 1.784681293406769e-06, "loss": 0.8029, "step": 514 }, { "epoch": 0.23718373103825452, "grad_norm": 4.7922853400866225, "learning_rate": 1.7837552552931555e-06, "loss": 0.6642, "step": 515 }, { "epoch": 0.23764428197230938, "grad_norm": 3.992603364370258, "learning_rate": 1.7828274714541443e-06, "loss": 0.8123, "step": 516 }, { "epoch": 0.23810483290636425, "grad_norm": 3.634053515857122, "learning_rate": 1.7818979439562677e-06, "loss": 0.5655, "step": 517 }, { "epoch": 0.23856538384041912, "grad_norm": 4.509882882964582, "learning_rate": 1.7809666748699424e-06, "loss": 0.8376, "step": 518 }, { "epoch": 0.23902593477447395, "grad_norm": 4.546206778782706, "learning_rate": 1.7800336662694635e-06, "loss": 0.7181, "step": 519 }, { "epoch": 0.23948648570852882, "grad_norm": 4.315355537576437, "learning_rate": 1.7790989202330018e-06, "loss": 0.7531, "step": 520 }, { "epoch": 0.23994703664258368, "grad_norm": 4.588559484880985, "learning_rate": 1.7781624388425973e-06, "loss": 0.9547, "step": 521 }, { "epoch": 0.24040758757663855, "grad_norm": 4.427350828828884, "learning_rate": 1.7772242241841552e-06, "loss": 0.6995, "step": 522 }, { "epoch": 0.2408681385106934, "grad_norm": 3.6209682675988377, "learning_rate": 1.7762842783474419e-06, "loss": 0.7027, "step": 523 }, { "epoch": 0.24132868944474828, "grad_norm": 3.8928430336662205, "learning_rate": 1.7753426034260794e-06, "loss": 0.774, "step": 524 }, { "epoch": 0.24178924037880314, "grad_norm": 4.130111515588415, "learning_rate": 1.774399201517541e-06, "loss": 0.7759, "step": 525 }, { "epoch": 0.242249791312858, "grad_norm": 3.9925185017293914, "learning_rate": 1.7734540747231469e-06, "loss": 0.7783, "step": 526 }, { "epoch": 0.24271034224691287, "grad_norm": 4.261414891338643, "learning_rate": 1.772507225148059e-06, "loss": 0.5416, "step": 527 }, { "epoch": 0.24317089318096774, "grad_norm": 3.8216817037977817, "learning_rate": 1.7715586549012768e-06, "loss": 0.7298, "step": 528 }, { "epoch": 0.2436314441150226, "grad_norm": 4.345769588872297, "learning_rate": 1.7706083660956327e-06, "loss": 0.8006, "step": 529 }, { "epoch": 0.24409199504907747, "grad_norm": 4.221863918866884, "learning_rate": 1.7696563608477862e-06, "loss": 0.809, "step": 530 }, { "epoch": 0.24455254598313234, "grad_norm": 4.195718260885216, "learning_rate": 1.76870264127822e-06, "loss": 0.8129, "step": 531 }, { "epoch": 0.24501309691718717, "grad_norm": 4.927446573422902, "learning_rate": 1.7677472095112363e-06, "loss": 0.6351, "step": 532 }, { "epoch": 0.24547364785124204, "grad_norm": 4.450860974974582, "learning_rate": 1.7667900676749498e-06, "loss": 0.6949, "step": 533 }, { "epoch": 0.2459341987852969, "grad_norm": 3.8033331384295863, "learning_rate": 1.7658312179012854e-06, "loss": 0.6301, "step": 534 }, { "epoch": 0.24639474971935177, "grad_norm": 4.257403389764273, "learning_rate": 1.7648706623259706e-06, "loss": 0.886, "step": 535 }, { "epoch": 0.24685530065340663, "grad_norm": 4.301041346693013, "learning_rate": 1.7639084030885338e-06, "loss": 0.7574, "step": 536 }, { "epoch": 0.2473158515874615, "grad_norm": 4.114808933094132, "learning_rate": 1.7629444423322982e-06, "loss": 0.9541, "step": 537 }, { "epoch": 0.24777640252151636, "grad_norm": 4.052767542785926, "learning_rate": 1.7619787822043754e-06, "loss": 0.59, "step": 538 }, { "epoch": 0.24823695345557123, "grad_norm": 4.235070302836609, "learning_rate": 1.7610114248556639e-06, "loss": 0.7553, "step": 539 }, { "epoch": 0.2486975043896261, "grad_norm": 4.2983744575084755, "learning_rate": 1.7600423724408415e-06, "loss": 0.6846, "step": 540 }, { "epoch": 0.24915805532368096, "grad_norm": 4.397187105269342, "learning_rate": 1.759071627118362e-06, "loss": 0.7333, "step": 541 }, { "epoch": 0.24961860625773583, "grad_norm": 4.730011496535825, "learning_rate": 1.7580991910504494e-06, "loss": 0.7424, "step": 542 }, { "epoch": 0.2500791571917907, "grad_norm": 4.174178151614478, "learning_rate": 1.7571250664030947e-06, "loss": 0.7691, "step": 543 }, { "epoch": 0.25053970812584553, "grad_norm": 3.9376371694116385, "learning_rate": 1.7561492553460488e-06, "loss": 0.7201, "step": 544 }, { "epoch": 0.2510002590599004, "grad_norm": 4.088858896803994, "learning_rate": 1.7551717600528203e-06, "loss": 0.7066, "step": 545 }, { "epoch": 0.25146080999395526, "grad_norm": 4.029030814271541, "learning_rate": 1.7541925827006678e-06, "loss": 0.727, "step": 546 }, { "epoch": 0.25192136092801015, "grad_norm": 3.8637937381088583, "learning_rate": 1.7532117254705972e-06, "loss": 0.7623, "step": 547 }, { "epoch": 0.252381911862065, "grad_norm": 3.788618968746217, "learning_rate": 1.7522291905473558e-06, "loss": 0.5311, "step": 548 }, { "epoch": 0.2528424627961199, "grad_norm": 4.228916893996322, "learning_rate": 1.7512449801194286e-06, "loss": 0.7298, "step": 549 }, { "epoch": 0.2533030137301747, "grad_norm": 3.957046423649985, "learning_rate": 1.7502590963790316e-06, "loss": 0.7728, "step": 550 }, { "epoch": 0.25376356466422956, "grad_norm": 4.5269939227670575, "learning_rate": 1.7492715415221087e-06, "loss": 0.8525, "step": 551 }, { "epoch": 0.25422411559828445, "grad_norm": 3.8322173620002995, "learning_rate": 1.7482823177483252e-06, "loss": 0.8282, "step": 552 }, { "epoch": 0.2546846665323393, "grad_norm": 4.2166292321615115, "learning_rate": 1.7472914272610646e-06, "loss": 0.6476, "step": 553 }, { "epoch": 0.2551452174663942, "grad_norm": 4.198686126203914, "learning_rate": 1.7462988722674221e-06, "loss": 0.7519, "step": 554 }, { "epoch": 0.255605768400449, "grad_norm": 4.346497616490723, "learning_rate": 1.7453046549782009e-06, "loss": 0.6421, "step": 555 }, { "epoch": 0.2560663193345039, "grad_norm": 4.413236984974349, "learning_rate": 1.7443087776079064e-06, "loss": 0.7459, "step": 556 }, { "epoch": 0.25652687026855875, "grad_norm": 4.473360574178836, "learning_rate": 1.7433112423747418e-06, "loss": 0.8505, "step": 557 }, { "epoch": 0.25698742120261364, "grad_norm": 4.025590590868836, "learning_rate": 1.742312051500603e-06, "loss": 0.7609, "step": 558 }, { "epoch": 0.2574479721366685, "grad_norm": 4.225065430560797, "learning_rate": 1.741311207211074e-06, "loss": 0.8148, "step": 559 }, { "epoch": 0.25790852307072337, "grad_norm": 4.581252607599293, "learning_rate": 1.740308711735421e-06, "loss": 0.72, "step": 560 }, { "epoch": 0.2583690740047782, "grad_norm": 4.158924621446202, "learning_rate": 1.7393045673065878e-06, "loss": 0.6667, "step": 561 }, { "epoch": 0.2588296249388331, "grad_norm": 4.332445827424479, "learning_rate": 1.7382987761611924e-06, "loss": 0.7357, "step": 562 }, { "epoch": 0.25929017587288794, "grad_norm": 4.515867700808527, "learning_rate": 1.7372913405395191e-06, "loss": 0.7637, "step": 563 }, { "epoch": 0.2597507268069428, "grad_norm": 4.3355804305140975, "learning_rate": 1.7362822626855165e-06, "loss": 0.7094, "step": 564 }, { "epoch": 0.26021127774099767, "grad_norm": 3.984490764171951, "learning_rate": 1.7352715448467895e-06, "loss": 0.736, "step": 565 }, { "epoch": 0.2606718286750525, "grad_norm": 4.002256310186809, "learning_rate": 1.7342591892745974e-06, "loss": 0.8424, "step": 566 }, { "epoch": 0.2611323796091074, "grad_norm": 4.068967423696258, "learning_rate": 1.7332451982238465e-06, "loss": 0.6914, "step": 567 }, { "epoch": 0.26159293054316224, "grad_norm": 4.04510958923272, "learning_rate": 1.732229573953086e-06, "loss": 0.7699, "step": 568 }, { "epoch": 0.26205348147721713, "grad_norm": 3.814017301766935, "learning_rate": 1.7312123187245037e-06, "loss": 0.7837, "step": 569 }, { "epoch": 0.26251403241127197, "grad_norm": 4.08644206941439, "learning_rate": 1.7301934348039188e-06, "loss": 0.6936, "step": 570 }, { "epoch": 0.26297458334532686, "grad_norm": 3.788350244356071, "learning_rate": 1.7291729244607795e-06, "loss": 0.6982, "step": 571 }, { "epoch": 0.2634351342793817, "grad_norm": 4.009147026470945, "learning_rate": 1.7281507899681556e-06, "loss": 0.7708, "step": 572 }, { "epoch": 0.2638956852134366, "grad_norm": 4.363727316777757, "learning_rate": 1.7271270336027358e-06, "loss": 0.7508, "step": 573 }, { "epoch": 0.26435623614749143, "grad_norm": 4.0260385741154066, "learning_rate": 1.7261016576448198e-06, "loss": 0.7602, "step": 574 }, { "epoch": 0.2648167870815463, "grad_norm": 4.532724930828355, "learning_rate": 1.7250746643783158e-06, "loss": 0.9159, "step": 575 }, { "epoch": 0.26527733801560116, "grad_norm": 4.277413415852311, "learning_rate": 1.7240460560907343e-06, "loss": 0.5962, "step": 576 }, { "epoch": 0.26573788894965605, "grad_norm": 4.023544494730847, "learning_rate": 1.7230158350731832e-06, "loss": 0.7031, "step": 577 }, { "epoch": 0.2661984398837109, "grad_norm": 4.394502913346497, "learning_rate": 1.7219840036203613e-06, "loss": 0.8688, "step": 578 }, { "epoch": 0.26665899081776573, "grad_norm": 4.2135455600282, "learning_rate": 1.7209505640305562e-06, "loss": 0.6824, "step": 579 }, { "epoch": 0.2671195417518206, "grad_norm": 4.166716129675478, "learning_rate": 1.7199155186056364e-06, "loss": 0.9158, "step": 580 }, { "epoch": 0.26758009268587546, "grad_norm": 4.257419128994967, "learning_rate": 1.7188788696510474e-06, "loss": 0.7032, "step": 581 }, { "epoch": 0.26804064361993035, "grad_norm": 5.245712690030778, "learning_rate": 1.7178406194758067e-06, "loss": 0.7513, "step": 582 }, { "epoch": 0.2685011945539852, "grad_norm": 3.6565153793924066, "learning_rate": 1.7168007703924978e-06, "loss": 0.8362, "step": 583 }, { "epoch": 0.2689617454880401, "grad_norm": 3.988355159868799, "learning_rate": 1.7157593247172664e-06, "loss": 0.6912, "step": 584 }, { "epoch": 0.2694222964220949, "grad_norm": 4.859998726292773, "learning_rate": 1.714716284769813e-06, "loss": 0.8122, "step": 585 }, { "epoch": 0.2698828473561498, "grad_norm": 4.449587603712418, "learning_rate": 1.7136716528733912e-06, "loss": 0.5227, "step": 586 }, { "epoch": 0.27034339829020465, "grad_norm": 4.757104115116771, "learning_rate": 1.7126254313547985e-06, "loss": 0.8368, "step": 587 }, { "epoch": 0.27080394922425954, "grad_norm": 4.375911000588365, "learning_rate": 1.7115776225443739e-06, "loss": 1.0057, "step": 588 }, { "epoch": 0.2712645001583144, "grad_norm": 4.119109719334851, "learning_rate": 1.7105282287759926e-06, "loss": 0.6735, "step": 589 }, { "epoch": 0.2717250510923693, "grad_norm": 4.22411371120936, "learning_rate": 1.7094772523870589e-06, "loss": 0.7689, "step": 590 }, { "epoch": 0.2721856020264241, "grad_norm": 4.077309442314833, "learning_rate": 1.7084246957185032e-06, "loss": 0.7915, "step": 591 }, { "epoch": 0.27264615296047895, "grad_norm": 4.467850072522287, "learning_rate": 1.707370561114775e-06, "loss": 0.78, "step": 592 }, { "epoch": 0.27310670389453384, "grad_norm": 4.036360465678462, "learning_rate": 1.7063148509238393e-06, "loss": 0.5921, "step": 593 }, { "epoch": 0.2735672548285887, "grad_norm": 4.169786800516592, "learning_rate": 1.70525756749717e-06, "loss": 0.7458, "step": 594 }, { "epoch": 0.2740278057626436, "grad_norm": 4.658440993968708, "learning_rate": 1.7041987131897445e-06, "loss": 0.7242, "step": 595 }, { "epoch": 0.2744883566966984, "grad_norm": 3.7385100042587176, "learning_rate": 1.703138290360041e-06, "loss": 0.6831, "step": 596 }, { "epoch": 0.2749489076307533, "grad_norm": 4.205125903795114, "learning_rate": 1.7020763013700296e-06, "loss": 0.6982, "step": 597 }, { "epoch": 0.27540945856480814, "grad_norm": 3.7871758076978463, "learning_rate": 1.70101274858517e-06, "loss": 0.7298, "step": 598 }, { "epoch": 0.27587000949886303, "grad_norm": 4.654275215064341, "learning_rate": 1.6999476343744047e-06, "loss": 0.6886, "step": 599 }, { "epoch": 0.27633056043291787, "grad_norm": 3.9111251582957927, "learning_rate": 1.6988809611101535e-06, "loss": 0.8106, "step": 600 }, { "epoch": 0.27679111136697276, "grad_norm": 4.356651519020497, "learning_rate": 1.69781273116831e-06, "loss": 0.6649, "step": 601 }, { "epoch": 0.2772516623010276, "grad_norm": 3.6310423929984177, "learning_rate": 1.6967429469282345e-06, "loss": 0.7905, "step": 602 }, { "epoch": 0.2777122132350825, "grad_norm": 4.016121263918193, "learning_rate": 1.695671610772749e-06, "loss": 0.689, "step": 603 }, { "epoch": 0.27817276416913733, "grad_norm": 4.560976835572483, "learning_rate": 1.694598725088133e-06, "loss": 0.6644, "step": 604 }, { "epoch": 0.27863331510319217, "grad_norm": 4.015835654795746, "learning_rate": 1.6935242922641159e-06, "loss": 0.7877, "step": 605 }, { "epoch": 0.27909386603724706, "grad_norm": 4.11514343582174, "learning_rate": 1.6924483146938754e-06, "loss": 0.7355, "step": 606 }, { "epoch": 0.2795544169713019, "grad_norm": 3.8277660047452473, "learning_rate": 1.6913707947740284e-06, "loss": 0.6868, "step": 607 }, { "epoch": 0.2800149679053568, "grad_norm": 4.159176256433197, "learning_rate": 1.690291734904627e-06, "loss": 0.7049, "step": 608 }, { "epoch": 0.28047551883941163, "grad_norm": 4.016074394227972, "learning_rate": 1.6892111374891547e-06, "loss": 0.7511, "step": 609 }, { "epoch": 0.2809360697734665, "grad_norm": 4.346396616132792, "learning_rate": 1.6881290049345185e-06, "loss": 0.6382, "step": 610 }, { "epoch": 0.28139662070752136, "grad_norm": 4.658124592167597, "learning_rate": 1.6870453396510453e-06, "loss": 0.7902, "step": 611 }, { "epoch": 0.28185717164157625, "grad_norm": 4.317042284267942, "learning_rate": 1.6859601440524757e-06, "loss": 0.6758, "step": 612 }, { "epoch": 0.2823177225756311, "grad_norm": 4.382911039726369, "learning_rate": 1.6848734205559593e-06, "loss": 0.77, "step": 613 }, { "epoch": 0.282778273509686, "grad_norm": 4.460098968188679, "learning_rate": 1.6837851715820488e-06, "loss": 0.7776, "step": 614 }, { "epoch": 0.2832388244437408, "grad_norm": 4.201446926372252, "learning_rate": 1.6826953995546945e-06, "loss": 0.7108, "step": 615 }, { "epoch": 0.2836993753777957, "grad_norm": 4.301676245433856, "learning_rate": 1.6816041069012388e-06, "loss": 0.7584, "step": 616 }, { "epoch": 0.28415992631185055, "grad_norm": 4.24177365906166, "learning_rate": 1.680511296052412e-06, "loss": 0.7573, "step": 617 }, { "epoch": 0.2846204772459054, "grad_norm": 5.097084620773646, "learning_rate": 1.6794169694423257e-06, "loss": 0.7926, "step": 618 }, { "epoch": 0.2850810281799603, "grad_norm": 3.8725765849923923, "learning_rate": 1.6783211295084669e-06, "loss": 0.7723, "step": 619 }, { "epoch": 0.2855415791140151, "grad_norm": 3.6801759334144846, "learning_rate": 1.677223778691695e-06, "loss": 0.7673, "step": 620 }, { "epoch": 0.28600213004807, "grad_norm": 4.157850420913044, "learning_rate": 1.6761249194362328e-06, "loss": 0.8196, "step": 621 }, { "epoch": 0.28646268098212485, "grad_norm": 4.768943377538657, "learning_rate": 1.6750245541896644e-06, "loss": 0.9362, "step": 622 }, { "epoch": 0.28692323191617974, "grad_norm": 4.643301378908142, "learning_rate": 1.6739226854029276e-06, "loss": 0.6367, "step": 623 }, { "epoch": 0.2873837828502346, "grad_norm": 3.5849610659144115, "learning_rate": 1.6728193155303097e-06, "loss": 0.6891, "step": 624 }, { "epoch": 0.2878443337842895, "grad_norm": 4.532107681265048, "learning_rate": 1.6717144470294406e-06, "loss": 0.8196, "step": 625 }, { "epoch": 0.2883048847183443, "grad_norm": 4.1979322612660335, "learning_rate": 1.6706080823612894e-06, "loss": 0.7381, "step": 626 }, { "epoch": 0.2887654356523992, "grad_norm": 3.772989230670649, "learning_rate": 1.6695002239901569e-06, "loss": 0.8644, "step": 627 }, { "epoch": 0.28922598658645404, "grad_norm": 4.000414148219418, "learning_rate": 1.6683908743836711e-06, "loss": 0.7666, "step": 628 }, { "epoch": 0.28968653752050894, "grad_norm": 4.006520756409899, "learning_rate": 1.6672800360127823e-06, "loss": 0.703, "step": 629 }, { "epoch": 0.2901470884545638, "grad_norm": 3.966412235701031, "learning_rate": 1.6661677113517553e-06, "loss": 0.8114, "step": 630 }, { "epoch": 0.2906076393886186, "grad_norm": 3.95249380975532, "learning_rate": 1.6650539028781667e-06, "loss": 0.9015, "step": 631 }, { "epoch": 0.2910681903226735, "grad_norm": 4.341655284836586, "learning_rate": 1.663938613072898e-06, "loss": 0.7267, "step": 632 }, { "epoch": 0.29152874125672834, "grad_norm": 3.603966019863015, "learning_rate": 1.6628218444201299e-06, "loss": 0.75, "step": 633 }, { "epoch": 0.29198929219078323, "grad_norm": 3.462156165973966, "learning_rate": 1.6617035994073372e-06, "loss": 0.6169, "step": 634 }, { "epoch": 0.29244984312483807, "grad_norm": 4.116805246202923, "learning_rate": 1.6605838805252828e-06, "loss": 0.8683, "step": 635 }, { "epoch": 0.29291039405889296, "grad_norm": 4.135915617090618, "learning_rate": 1.6594626902680126e-06, "loss": 0.6505, "step": 636 }, { "epoch": 0.2933709449929478, "grad_norm": 4.270785952001011, "learning_rate": 1.6583400311328505e-06, "loss": 0.6733, "step": 637 }, { "epoch": 0.2938314959270027, "grad_norm": 3.82172458598364, "learning_rate": 1.6572159056203915e-06, "loss": 0.8082, "step": 638 }, { "epoch": 0.29429204686105753, "grad_norm": 4.2489659446126, "learning_rate": 1.6560903162344966e-06, "loss": 0.7488, "step": 639 }, { "epoch": 0.2947525977951124, "grad_norm": 4.4487224510156915, "learning_rate": 1.6549632654822875e-06, "loss": 0.8191, "step": 640 }, { "epoch": 0.29521314872916726, "grad_norm": 3.6396604209620786, "learning_rate": 1.6538347558741422e-06, "loss": 0.6289, "step": 641 }, { "epoch": 0.29567369966322216, "grad_norm": 4.032224213368084, "learning_rate": 1.652704789923686e-06, "loss": 0.9269, "step": 642 }, { "epoch": 0.296134250597277, "grad_norm": 4.516571282736476, "learning_rate": 1.6515733701477896e-06, "loss": 0.8316, "step": 643 }, { "epoch": 0.29659480153133183, "grad_norm": 4.218676271712179, "learning_rate": 1.6504404990665615e-06, "loss": 0.7312, "step": 644 }, { "epoch": 0.2970553524653867, "grad_norm": 4.121918861041974, "learning_rate": 1.6493061792033424e-06, "loss": 0.7367, "step": 645 }, { "epoch": 0.29751590339944156, "grad_norm": 3.7363634666100976, "learning_rate": 1.648170413084701e-06, "loss": 0.8503, "step": 646 }, { "epoch": 0.29797645433349645, "grad_norm": 3.762541270796297, "learning_rate": 1.6470332032404258e-06, "loss": 0.7573, "step": 647 }, { "epoch": 0.2984370052675513, "grad_norm": 3.7210263441518454, "learning_rate": 1.6458945522035227e-06, "loss": 0.6499, "step": 648 }, { "epoch": 0.2988975562016062, "grad_norm": 3.65142457460231, "learning_rate": 1.6447544625102068e-06, "loss": 0.7038, "step": 649 }, { "epoch": 0.299358107135661, "grad_norm": 4.081774612148493, "learning_rate": 1.6436129366998973e-06, "loss": 0.8311, "step": 650 }, { "epoch": 0.2998186580697159, "grad_norm": 3.9507535400648544, "learning_rate": 1.6424699773152138e-06, "loss": 0.7965, "step": 651 }, { "epoch": 0.30027920900377075, "grad_norm": 4.373000501506878, "learning_rate": 1.6413255869019666e-06, "loss": 0.7474, "step": 652 }, { "epoch": 0.30073975993782565, "grad_norm": 3.7854532892193866, "learning_rate": 1.640179768009156e-06, "loss": 0.7439, "step": 653 }, { "epoch": 0.3012003108718805, "grad_norm": 4.0102756045459635, "learning_rate": 1.6390325231889616e-06, "loss": 0.5621, "step": 654 }, { "epoch": 0.3016608618059354, "grad_norm": 3.523046074954149, "learning_rate": 1.6378838549967415e-06, "loss": 0.7927, "step": 655 }, { "epoch": 0.3021214127399902, "grad_norm": 4.481872930905736, "learning_rate": 1.6367337659910221e-06, "loss": 0.7755, "step": 656 }, { "epoch": 0.30258196367404505, "grad_norm": 4.205137682998843, "learning_rate": 1.6355822587334959e-06, "loss": 0.6824, "step": 657 }, { "epoch": 0.30304251460809994, "grad_norm": 4.657560239756234, "learning_rate": 1.6344293357890137e-06, "loss": 0.728, "step": 658 }, { "epoch": 0.3035030655421548, "grad_norm": 3.8882301744606673, "learning_rate": 1.6332749997255804e-06, "loss": 0.7955, "step": 659 }, { "epoch": 0.3039636164762097, "grad_norm": 4.5222766175225235, "learning_rate": 1.632119253114347e-06, "loss": 0.5952, "step": 660 }, { "epoch": 0.3044241674102645, "grad_norm": 4.419899200547831, "learning_rate": 1.6309620985296072e-06, "loss": 0.9106, "step": 661 }, { "epoch": 0.3048847183443194, "grad_norm": 3.6451565323135644, "learning_rate": 1.6298035385487918e-06, "loss": 0.8166, "step": 662 }, { "epoch": 0.30534526927837424, "grad_norm": 4.449032395860612, "learning_rate": 1.6286435757524602e-06, "loss": 0.719, "step": 663 }, { "epoch": 0.30580582021242914, "grad_norm": 4.076360059628079, "learning_rate": 1.6274822127242974e-06, "loss": 0.5708, "step": 664 }, { "epoch": 0.306266371146484, "grad_norm": 4.058088448528635, "learning_rate": 1.6263194520511064e-06, "loss": 0.7202, "step": 665 }, { "epoch": 0.30672692208053887, "grad_norm": 3.7437569144731104, "learning_rate": 1.6251552963228048e-06, "loss": 0.6594, "step": 666 }, { "epoch": 0.3071874730145937, "grad_norm": 3.9108733866307825, "learning_rate": 1.6239897481324164e-06, "loss": 0.5626, "step": 667 }, { "epoch": 0.3076480239486486, "grad_norm": 4.10281494934095, "learning_rate": 1.6228228100760664e-06, "loss": 0.7095, "step": 668 }, { "epoch": 0.30810857488270343, "grad_norm": 3.9315081424052507, "learning_rate": 1.6216544847529764e-06, "loss": 0.8348, "step": 669 }, { "epoch": 0.30856912581675827, "grad_norm": 3.492302008407148, "learning_rate": 1.620484774765458e-06, "loss": 0.6437, "step": 670 }, { "epoch": 0.30902967675081316, "grad_norm": 4.388500997518393, "learning_rate": 1.6193136827189065e-06, "loss": 0.8731, "step": 671 }, { "epoch": 0.309490227684868, "grad_norm": 4.6167653930370935, "learning_rate": 1.6181412112217957e-06, "loss": 0.7627, "step": 672 }, { "epoch": 0.3099507786189229, "grad_norm": 4.467752345412684, "learning_rate": 1.6169673628856722e-06, "loss": 0.7976, "step": 673 }, { "epoch": 0.31041132955297773, "grad_norm": 4.705832025351113, "learning_rate": 1.6157921403251492e-06, "loss": 0.7098, "step": 674 }, { "epoch": 0.3108718804870326, "grad_norm": 4.207918349309328, "learning_rate": 1.6146155461579007e-06, "loss": 0.9319, "step": 675 }, { "epoch": 0.31133243142108746, "grad_norm": 4.488216110872569, "learning_rate": 1.6134375830046563e-06, "loss": 0.7625, "step": 676 }, { "epoch": 0.31179298235514236, "grad_norm": 4.068023864138209, "learning_rate": 1.6122582534891942e-06, "loss": 0.8362, "step": 677 }, { "epoch": 0.3122535332891972, "grad_norm": 4.07407187135257, "learning_rate": 1.6110775602383365e-06, "loss": 0.834, "step": 678 }, { "epoch": 0.3127140842232521, "grad_norm": 4.183459494222109, "learning_rate": 1.6098955058819423e-06, "loss": 0.6472, "step": 679 }, { "epoch": 0.3131746351573069, "grad_norm": 4.384513121587818, "learning_rate": 1.6087120930529036e-06, "loss": 0.7421, "step": 680 }, { "epoch": 0.3136351860913618, "grad_norm": 4.156249289942091, "learning_rate": 1.6075273243871367e-06, "loss": 0.6781, "step": 681 }, { "epoch": 0.31409573702541665, "grad_norm": 4.674310254023724, "learning_rate": 1.606341202523579e-06, "loss": 0.644, "step": 682 }, { "epoch": 0.3145562879594715, "grad_norm": 3.4466678084320588, "learning_rate": 1.6051537301041812e-06, "loss": 0.671, "step": 683 }, { "epoch": 0.3150168388935264, "grad_norm": 4.209956929215634, "learning_rate": 1.6039649097739032e-06, "loss": 0.8545, "step": 684 }, { "epoch": 0.3154773898275812, "grad_norm": 4.104712525861724, "learning_rate": 1.602774744180706e-06, "loss": 0.8387, "step": 685 }, { "epoch": 0.3159379407616361, "grad_norm": 4.0506458768068105, "learning_rate": 1.601583235975548e-06, "loss": 0.6038, "step": 686 }, { "epoch": 0.31639849169569095, "grad_norm": 4.144914896917376, "learning_rate": 1.6003903878123782e-06, "loss": 0.64, "step": 687 }, { "epoch": 0.31685904262974585, "grad_norm": 4.6354574597967595, "learning_rate": 1.599196202348129e-06, "loss": 0.7363, "step": 688 }, { "epoch": 0.3173195935638007, "grad_norm": 4.125784855289517, "learning_rate": 1.5980006822427123e-06, "loss": 0.7128, "step": 689 }, { "epoch": 0.3177801444978556, "grad_norm": 4.2368611177702915, "learning_rate": 1.5968038301590133e-06, "loss": 0.8132, "step": 690 }, { "epoch": 0.3182406954319104, "grad_norm": 4.362017701126882, "learning_rate": 1.5956056487628827e-06, "loss": 0.8274, "step": 691 }, { "epoch": 0.3187012463659653, "grad_norm": 3.879865947138015, "learning_rate": 1.5944061407231338e-06, "loss": 0.8622, "step": 692 }, { "epoch": 0.31916179730002014, "grad_norm": 3.925712652745689, "learning_rate": 1.593205308711533e-06, "loss": 0.6824, "step": 693 }, { "epoch": 0.31962234823407504, "grad_norm": 3.9554190339834174, "learning_rate": 1.5920031554027969e-06, "loss": 0.7145, "step": 694 }, { "epoch": 0.3200828991681299, "grad_norm": 4.305721105884675, "learning_rate": 1.590799683474585e-06, "loss": 0.8595, "step": 695 }, { "epoch": 0.3205434501021847, "grad_norm": 4.128602626439069, "learning_rate": 1.5895948956074933e-06, "loss": 0.8568, "step": 696 }, { "epoch": 0.3210040010362396, "grad_norm": 4.403719204555602, "learning_rate": 1.5883887944850495e-06, "loss": 0.8569, "step": 697 }, { "epoch": 0.32146455197029444, "grad_norm": 4.242952411258247, "learning_rate": 1.5871813827937063e-06, "loss": 0.6951, "step": 698 }, { "epoch": 0.32192510290434934, "grad_norm": 3.8613776353047613, "learning_rate": 1.5859726632228357e-06, "loss": 0.7366, "step": 699 }, { "epoch": 0.3223856538384042, "grad_norm": 3.9800245509376393, "learning_rate": 1.5847626384647221e-06, "loss": 0.6496, "step": 700 }, { "epoch": 0.32284620477245907, "grad_norm": 4.312536404228704, "learning_rate": 1.583551311214558e-06, "loss": 0.7772, "step": 701 }, { "epoch": 0.3233067557065139, "grad_norm": 4.841186210643986, "learning_rate": 1.5823386841704362e-06, "loss": 0.5912, "step": 702 }, { "epoch": 0.3237673066405688, "grad_norm": 3.8798262958874985, "learning_rate": 1.5811247600333456e-06, "loss": 0.7343, "step": 703 }, { "epoch": 0.32422785757462363, "grad_norm": 4.066243467427467, "learning_rate": 1.5799095415071628e-06, "loss": 0.7617, "step": 704 }, { "epoch": 0.3246884085086785, "grad_norm": 3.597360646840306, "learning_rate": 1.5786930312986495e-06, "loss": 0.545, "step": 705 }, { "epoch": 0.32514895944273337, "grad_norm": 4.017644158758088, "learning_rate": 1.5774752321174427e-06, "loss": 0.6916, "step": 706 }, { "epoch": 0.32560951037678826, "grad_norm": 4.199725481419527, "learning_rate": 1.576256146676051e-06, "loss": 0.7117, "step": 707 }, { "epoch": 0.3260700613108431, "grad_norm": 3.5517322225843846, "learning_rate": 1.575035777689849e-06, "loss": 0.5882, "step": 708 }, { "epoch": 0.32653061224489793, "grad_norm": 4.317259265038379, "learning_rate": 1.5738141278770685e-06, "loss": 0.8165, "step": 709 }, { "epoch": 0.3269911631789528, "grad_norm": 3.9972113996434886, "learning_rate": 1.5725911999587953e-06, "loss": 0.7654, "step": 710 }, { "epoch": 0.32745171411300766, "grad_norm": 4.179141760605785, "learning_rate": 1.5713669966589618e-06, "loss": 0.7672, "step": 711 }, { "epoch": 0.32791226504706256, "grad_norm": 4.143366627467984, "learning_rate": 1.5701415207043414e-06, "loss": 0.6803, "step": 712 }, { "epoch": 0.3283728159811174, "grad_norm": 4.106206000196388, "learning_rate": 1.5689147748245412e-06, "loss": 0.702, "step": 713 }, { "epoch": 0.3288333669151723, "grad_norm": 3.7262402149200886, "learning_rate": 1.5676867617519986e-06, "loss": 0.7456, "step": 714 }, { "epoch": 0.3292939178492271, "grad_norm": 4.384394069328469, "learning_rate": 1.566457484221972e-06, "loss": 0.7596, "step": 715 }, { "epoch": 0.329754468783282, "grad_norm": 4.500247025356155, "learning_rate": 1.5652269449725374e-06, "loss": 0.8032, "step": 716 }, { "epoch": 0.33021501971733686, "grad_norm": 4.476883415129445, "learning_rate": 1.5639951467445798e-06, "loss": 0.897, "step": 717 }, { "epoch": 0.33067557065139175, "grad_norm": 3.5573958389212823, "learning_rate": 1.5627620922817895e-06, "loss": 0.6286, "step": 718 }, { "epoch": 0.3311361215854466, "grad_norm": 4.10297947033326, "learning_rate": 1.561527784330655e-06, "loss": 0.5745, "step": 719 }, { "epoch": 0.3315966725195015, "grad_norm": 4.083709547635969, "learning_rate": 1.5602922256404556e-06, "loss": 0.7926, "step": 720 }, { "epoch": 0.3320572234535563, "grad_norm": 4.487058653907245, "learning_rate": 1.559055418963258e-06, "loss": 0.646, "step": 721 }, { "epoch": 0.33251777438761115, "grad_norm": 3.8886240339102645, "learning_rate": 1.557817367053908e-06, "loss": 0.586, "step": 722 }, { "epoch": 0.33297832532166605, "grad_norm": 3.6488138814249576, "learning_rate": 1.5565780726700244e-06, "loss": 0.5751, "step": 723 }, { "epoch": 0.3334388762557209, "grad_norm": 3.9963922131217435, "learning_rate": 1.5553375385719943e-06, "loss": 0.6547, "step": 724 }, { "epoch": 0.3338994271897758, "grad_norm": 4.422140333030559, "learning_rate": 1.5540957675229663e-06, "loss": 0.6622, "step": 725 }, { "epoch": 0.3343599781238306, "grad_norm": 4.488783854390157, "learning_rate": 1.5528527622888428e-06, "loss": 0.7432, "step": 726 }, { "epoch": 0.3348205290578855, "grad_norm": 3.828705560936496, "learning_rate": 1.5516085256382764e-06, "loss": 0.8012, "step": 727 }, { "epoch": 0.33528107999194035, "grad_norm": 4.018699541956054, "learning_rate": 1.5503630603426628e-06, "loss": 0.7593, "step": 728 }, { "epoch": 0.33574163092599524, "grad_norm": 4.341218043642786, "learning_rate": 1.5491163691761334e-06, "loss": 0.6925, "step": 729 }, { "epoch": 0.3362021818600501, "grad_norm": 4.734000572832922, "learning_rate": 1.5478684549155507e-06, "loss": 0.6499, "step": 730 }, { "epoch": 0.33666273279410497, "grad_norm": 4.10185323581989, "learning_rate": 1.5466193203405015e-06, "loss": 0.881, "step": 731 }, { "epoch": 0.3371232837281598, "grad_norm": 4.385923026265603, "learning_rate": 1.5453689682332898e-06, "loss": 0.8318, "step": 732 }, { "epoch": 0.3375838346622147, "grad_norm": 4.319849103952587, "learning_rate": 1.5441174013789326e-06, "loss": 0.7699, "step": 733 }, { "epoch": 0.33804438559626954, "grad_norm": 4.052212277841386, "learning_rate": 1.5428646225651525e-06, "loss": 0.7167, "step": 734 }, { "epoch": 0.3385049365303244, "grad_norm": 4.009673399640246, "learning_rate": 1.5416106345823714e-06, "loss": 0.7194, "step": 735 }, { "epoch": 0.33896548746437927, "grad_norm": 4.267302462661928, "learning_rate": 1.540355440223704e-06, "loss": 0.7619, "step": 736 }, { "epoch": 0.3394260383984341, "grad_norm": 4.489606915743702, "learning_rate": 1.5390990422849528e-06, "loss": 0.6769, "step": 737 }, { "epoch": 0.339886589332489, "grad_norm": 4.1696436692986785, "learning_rate": 1.5378414435646008e-06, "loss": 0.7858, "step": 738 }, { "epoch": 0.34034714026654384, "grad_norm": 4.210298300958352, "learning_rate": 1.5365826468638057e-06, "loss": 0.6308, "step": 739 }, { "epoch": 0.34080769120059873, "grad_norm": 4.0145594564386196, "learning_rate": 1.5353226549863933e-06, "loss": 0.665, "step": 740 }, { "epoch": 0.34126824213465357, "grad_norm": 4.0689863190420095, "learning_rate": 1.5340614707388516e-06, "loss": 0.6986, "step": 741 }, { "epoch": 0.34172879306870846, "grad_norm": 3.9692855744433033, "learning_rate": 1.5327990969303256e-06, "loss": 0.7962, "step": 742 }, { "epoch": 0.3421893440027633, "grad_norm": 3.5247863563319317, "learning_rate": 1.531535536372608e-06, "loss": 0.6751, "step": 743 }, { "epoch": 0.3426498949368182, "grad_norm": 4.439546413918242, "learning_rate": 1.5302707918801354e-06, "loss": 0.7568, "step": 744 }, { "epoch": 0.343110445870873, "grad_norm": 4.432833644664294, "learning_rate": 1.5290048662699828e-06, "loss": 0.7079, "step": 745 }, { "epoch": 0.3435709968049279, "grad_norm": 4.620874394892662, "learning_rate": 1.5277377623618546e-06, "loss": 0.746, "step": 746 }, { "epoch": 0.34403154773898276, "grad_norm": 4.068868772583429, "learning_rate": 1.5264694829780801e-06, "loss": 0.7807, "step": 747 }, { "epoch": 0.3444920986730376, "grad_norm": 4.614618001943191, "learning_rate": 1.5252000309436071e-06, "loss": 0.6657, "step": 748 }, { "epoch": 0.3449526496070925, "grad_norm": 4.154751642108749, "learning_rate": 1.5239294090859948e-06, "loss": 0.752, "step": 749 }, { "epoch": 0.3454132005411473, "grad_norm": 4.257545169176656, "learning_rate": 1.522657620235409e-06, "loss": 0.7923, "step": 750 }, { "epoch": 0.3458737514752022, "grad_norm": 4.14571765042948, "learning_rate": 1.5213846672246138e-06, "loss": 0.7288, "step": 751 }, { "epoch": 0.34633430240925706, "grad_norm": 4.2365755894329835, "learning_rate": 1.5201105528889666e-06, "loss": 0.7055, "step": 752 }, { "epoch": 0.34679485334331195, "grad_norm": 3.8611455200067697, "learning_rate": 1.5188352800664119e-06, "loss": 0.683, "step": 753 }, { "epoch": 0.3472554042773668, "grad_norm": 4.562787134869739, "learning_rate": 1.5175588515974748e-06, "loss": 0.829, "step": 754 }, { "epoch": 0.3477159552114217, "grad_norm": 3.742971034682943, "learning_rate": 1.5162812703252537e-06, "loss": 0.573, "step": 755 }, { "epoch": 0.3481765061454765, "grad_norm": 4.301269832928734, "learning_rate": 1.5150025390954152e-06, "loss": 0.7167, "step": 756 }, { "epoch": 0.3486370570795314, "grad_norm": 4.14437196081642, "learning_rate": 1.513722660756187e-06, "loss": 0.6334, "step": 757 }, { "epoch": 0.34909760801358625, "grad_norm": 3.8167407786618046, "learning_rate": 1.5124416381583517e-06, "loss": 0.5738, "step": 758 }, { "epoch": 0.34955815894764114, "grad_norm": 4.436091177012611, "learning_rate": 1.5111594741552423e-06, "loss": 0.6753, "step": 759 }, { "epoch": 0.350018709881696, "grad_norm": 4.114689154017256, "learning_rate": 1.5098761716027315e-06, "loss": 0.6555, "step": 760 }, { "epoch": 0.3504792608157508, "grad_norm": 4.151722574358113, "learning_rate": 1.5085917333592297e-06, "loss": 0.8806, "step": 761 }, { "epoch": 0.3509398117498057, "grad_norm": 3.9560967351585816, "learning_rate": 1.5073061622856765e-06, "loss": 0.5352, "step": 762 }, { "epoch": 0.35140036268386055, "grad_norm": 4.221088969470446, "learning_rate": 1.506019461245535e-06, "loss": 0.6162, "step": 763 }, { "epoch": 0.35186091361791544, "grad_norm": 4.518359045200024, "learning_rate": 1.5047316331047846e-06, "loss": 0.6903, "step": 764 }, { "epoch": 0.3523214645519703, "grad_norm": 3.657150834793211, "learning_rate": 1.5034426807319162e-06, "loss": 0.6644, "step": 765 }, { "epoch": 0.35278201548602517, "grad_norm": 4.299840785551978, "learning_rate": 1.5021526069979232e-06, "loss": 0.7961, "step": 766 }, { "epoch": 0.35324256642008, "grad_norm": 4.3182017386107185, "learning_rate": 1.5008614147762982e-06, "loss": 0.8513, "step": 767 }, { "epoch": 0.3537031173541349, "grad_norm": 4.437106408982837, "learning_rate": 1.4995691069430244e-06, "loss": 0.8393, "step": 768 }, { "epoch": 0.35416366828818974, "grad_norm": 4.163749724374516, "learning_rate": 1.49827568637657e-06, "loss": 0.7635, "step": 769 }, { "epoch": 0.35462421922224463, "grad_norm": 3.9813912915216965, "learning_rate": 1.4969811559578818e-06, "loss": 0.6376, "step": 770 }, { "epoch": 0.35508477015629947, "grad_norm": 4.366675916968024, "learning_rate": 1.4956855185703786e-06, "loss": 0.499, "step": 771 }, { "epoch": 0.35554532109035436, "grad_norm": 3.746707833335465, "learning_rate": 1.4943887770999447e-06, "loss": 0.6788, "step": 772 }, { "epoch": 0.3560058720244092, "grad_norm": 4.484484812817845, "learning_rate": 1.493090934434924e-06, "loss": 0.724, "step": 773 }, { "epoch": 0.35646642295846404, "grad_norm": 4.334048310750252, "learning_rate": 1.4917919934661128e-06, "loss": 0.6797, "step": 774 }, { "epoch": 0.35692697389251893, "grad_norm": 4.4466845008503855, "learning_rate": 1.4904919570867539e-06, "loss": 0.6893, "step": 775 }, { "epoch": 0.35738752482657377, "grad_norm": 4.385561211498958, "learning_rate": 1.4891908281925298e-06, "loss": 0.8447, "step": 776 }, { "epoch": 0.35784807576062866, "grad_norm": 4.176517370869988, "learning_rate": 1.4878886096815569e-06, "loss": 0.6388, "step": 777 }, { "epoch": 0.3583086266946835, "grad_norm": 4.03790699066878, "learning_rate": 1.486585304454378e-06, "loss": 0.749, "step": 778 }, { "epoch": 0.3587691776287384, "grad_norm": 3.90312981079058, "learning_rate": 1.4852809154139576e-06, "loss": 0.6911, "step": 779 }, { "epoch": 0.3592297285627932, "grad_norm": 3.48460425864836, "learning_rate": 1.4839754454656723e-06, "loss": 0.6342, "step": 780 }, { "epoch": 0.3596902794968481, "grad_norm": 4.4725994928077215, "learning_rate": 1.4826688975173084e-06, "loss": 0.8818, "step": 781 }, { "epoch": 0.36015083043090296, "grad_norm": 4.901712204303722, "learning_rate": 1.481361274479052e-06, "loss": 0.6644, "step": 782 }, { "epoch": 0.36061138136495785, "grad_norm": 4.135236249425426, "learning_rate": 1.4800525792634838e-06, "loss": 0.681, "step": 783 }, { "epoch": 0.3610719322990127, "grad_norm": 4.1136403516057, "learning_rate": 1.4787428147855737e-06, "loss": 0.6774, "step": 784 }, { "epoch": 0.3615324832330676, "grad_norm": 4.409174750821155, "learning_rate": 1.4774319839626725e-06, "loss": 0.7842, "step": 785 }, { "epoch": 0.3619930341671224, "grad_norm": 4.054251941047054, "learning_rate": 1.476120089714506e-06, "loss": 0.7306, "step": 786 }, { "epoch": 0.36245358510117726, "grad_norm": 4.259641867350963, "learning_rate": 1.4748071349631693e-06, "loss": 0.8591, "step": 787 }, { "epoch": 0.36291413603523215, "grad_norm": 4.613510618951292, "learning_rate": 1.4734931226331188e-06, "loss": 0.6247, "step": 788 }, { "epoch": 0.363374686969287, "grad_norm": 3.8854959417464694, "learning_rate": 1.4721780556511674e-06, "loss": 0.639, "step": 789 }, { "epoch": 0.3638352379033419, "grad_norm": 4.082822051849626, "learning_rate": 1.4708619369464765e-06, "loss": 0.8625, "step": 790 }, { "epoch": 0.3642957888373967, "grad_norm": 4.6608462034891565, "learning_rate": 1.469544769450551e-06, "loss": 0.6385, "step": 791 }, { "epoch": 0.3647563397714516, "grad_norm": 4.13181905980936, "learning_rate": 1.46822655609723e-06, "loss": 0.7425, "step": 792 }, { "epoch": 0.36521689070550645, "grad_norm": 4.758198980342137, "learning_rate": 1.4669072998226843e-06, "loss": 0.764, "step": 793 }, { "epoch": 0.36567744163956134, "grad_norm": 3.910399298865837, "learning_rate": 1.4655870035654065e-06, "loss": 0.7915, "step": 794 }, { "epoch": 0.3661379925736162, "grad_norm": 3.851107495397856, "learning_rate": 1.4642656702662058e-06, "loss": 0.8249, "step": 795 }, { "epoch": 0.36659854350767107, "grad_norm": 3.8616802675926762, "learning_rate": 1.4629433028682013e-06, "loss": 0.7513, "step": 796 }, { "epoch": 0.3670590944417259, "grad_norm": 3.990701503973582, "learning_rate": 1.4616199043168154e-06, "loss": 0.8926, "step": 797 }, { "epoch": 0.3675196453757808, "grad_norm": 3.8082989762986696, "learning_rate": 1.4602954775597673e-06, "loss": 0.7132, "step": 798 }, { "epoch": 0.36798019630983564, "grad_norm": 4.099376682088573, "learning_rate": 1.458970025547067e-06, "loss": 0.8587, "step": 799 }, { "epoch": 0.36844074724389053, "grad_norm": 4.003022782157259, "learning_rate": 1.457643551231007e-06, "loss": 0.806, "step": 800 }, { "epoch": 0.36890129817794537, "grad_norm": 4.696491753422249, "learning_rate": 1.456316057566158e-06, "loss": 0.79, "step": 801 }, { "epoch": 0.3693618491120002, "grad_norm": 3.592426304235567, "learning_rate": 1.45498754750936e-06, "loss": 0.7034, "step": 802 }, { "epoch": 0.3698224000460551, "grad_norm": 3.928952091852703, "learning_rate": 1.453658024019718e-06, "loss": 0.7784, "step": 803 }, { "epoch": 0.37028295098010994, "grad_norm": 4.505748862283958, "learning_rate": 1.4523274900585942e-06, "loss": 0.6962, "step": 804 }, { "epoch": 0.37074350191416483, "grad_norm": 3.9782382470783806, "learning_rate": 1.4509959485896004e-06, "loss": 0.8815, "step": 805 }, { "epoch": 0.37120405284821967, "grad_norm": 4.237205066452709, "learning_rate": 1.4496634025785937e-06, "loss": 0.6502, "step": 806 }, { "epoch": 0.37166460378227456, "grad_norm": 4.134328827061462, "learning_rate": 1.4483298549936684e-06, "loss": 0.7103, "step": 807 }, { "epoch": 0.3721251547163294, "grad_norm": 4.233678332295039, "learning_rate": 1.4469953088051497e-06, "loss": 0.7381, "step": 808 }, { "epoch": 0.3725857056503843, "grad_norm": 4.01162317908695, "learning_rate": 1.445659766985586e-06, "loss": 0.6688, "step": 809 }, { "epoch": 0.37304625658443913, "grad_norm": 3.685660546587571, "learning_rate": 1.4443232325097454e-06, "loss": 0.6392, "step": 810 }, { "epoch": 0.373506807518494, "grad_norm": 4.160446173680877, "learning_rate": 1.4429857083546053e-06, "loss": 0.6051, "step": 811 }, { "epoch": 0.37396735845254886, "grad_norm": 4.466185059951008, "learning_rate": 1.4416471974993487e-06, "loss": 0.6045, "step": 812 }, { "epoch": 0.37442790938660375, "grad_norm": 4.077915886236709, "learning_rate": 1.4403077029253553e-06, "loss": 0.8253, "step": 813 }, { "epoch": 0.3748884603206586, "grad_norm": 3.5196016329477473, "learning_rate": 1.4389672276161963e-06, "loss": 0.5586, "step": 814 }, { "epoch": 0.3753490112547134, "grad_norm": 4.55533726932242, "learning_rate": 1.4376257745576282e-06, "loss": 0.6545, "step": 815 }, { "epoch": 0.3758095621887683, "grad_norm": 4.05185972383985, "learning_rate": 1.4362833467375836e-06, "loss": 0.7198, "step": 816 }, { "epoch": 0.37627011312282316, "grad_norm": 4.271050327302304, "learning_rate": 1.4349399471461684e-06, "loss": 0.7515, "step": 817 }, { "epoch": 0.37673066405687805, "grad_norm": 4.800754100943742, "learning_rate": 1.4335955787756513e-06, "loss": 0.8616, "step": 818 }, { "epoch": 0.3771912149909329, "grad_norm": 4.421638800178091, "learning_rate": 1.4322502446204592e-06, "loss": 0.8043, "step": 819 }, { "epoch": 0.3776517659249878, "grad_norm": 4.3574058804762315, "learning_rate": 1.4309039476771706e-06, "loss": 0.6156, "step": 820 }, { "epoch": 0.3781123168590426, "grad_norm": 4.620449275826889, "learning_rate": 1.429556690944509e-06, "loss": 0.7928, "step": 821 }, { "epoch": 0.3785728677930975, "grad_norm": 4.2419098918179685, "learning_rate": 1.4282084774233338e-06, "loss": 0.5802, "step": 822 }, { "epoch": 0.37903341872715235, "grad_norm": 4.033830382661969, "learning_rate": 1.4268593101166378e-06, "loss": 0.8021, "step": 823 }, { "epoch": 0.37949396966120724, "grad_norm": 4.014543723936092, "learning_rate": 1.4255091920295367e-06, "loss": 0.7799, "step": 824 }, { "epoch": 0.3799545205952621, "grad_norm": 4.22081195833773, "learning_rate": 1.4241581261692647e-06, "loss": 0.8352, "step": 825 }, { "epoch": 0.380415071529317, "grad_norm": 4.148836814940295, "learning_rate": 1.422806115545167e-06, "loss": 0.7955, "step": 826 }, { "epoch": 0.3808756224633718, "grad_norm": 3.562707026522088, "learning_rate": 1.4214531631686929e-06, "loss": 0.7286, "step": 827 }, { "epoch": 0.38133617339742665, "grad_norm": 4.1120072110499795, "learning_rate": 1.4200992720533886e-06, "loss": 0.8185, "step": 828 }, { "epoch": 0.38179672433148154, "grad_norm": 4.447951057862562, "learning_rate": 1.4187444452148934e-06, "loss": 0.7174, "step": 829 }, { "epoch": 0.3822572752655364, "grad_norm": 3.841481661707061, "learning_rate": 1.4173886856709288e-06, "loss": 0.7041, "step": 830 }, { "epoch": 0.38271782619959127, "grad_norm": 4.302423579036526, "learning_rate": 1.416031996441294e-06, "loss": 0.7109, "step": 831 }, { "epoch": 0.3831783771336461, "grad_norm": 4.243330881999875, "learning_rate": 1.4146743805478605e-06, "loss": 0.6826, "step": 832 }, { "epoch": 0.383638928067701, "grad_norm": 3.727246610705627, "learning_rate": 1.413315841014562e-06, "loss": 0.7232, "step": 833 }, { "epoch": 0.38409947900175584, "grad_norm": 4.5907531289906975, "learning_rate": 1.4119563808673905e-06, "loss": 0.7465, "step": 834 }, { "epoch": 0.38456002993581073, "grad_norm": 4.262969023114881, "learning_rate": 1.4105960031343889e-06, "loss": 0.7325, "step": 835 }, { "epoch": 0.38502058086986557, "grad_norm": 4.135843328368776, "learning_rate": 1.4092347108456424e-06, "loss": 0.8992, "step": 836 }, { "epoch": 0.38548113180392046, "grad_norm": 3.6732649443358762, "learning_rate": 1.4078725070332746e-06, "loss": 0.5065, "step": 837 }, { "epoch": 0.3859416827379753, "grad_norm": 4.084772410145544, "learning_rate": 1.4065093947314396e-06, "loss": 0.5881, "step": 838 }, { "epoch": 0.3864022336720302, "grad_norm": 4.366356928323156, "learning_rate": 1.4051453769763143e-06, "loss": 0.823, "step": 839 }, { "epoch": 0.38686278460608503, "grad_norm": 4.767944467357939, "learning_rate": 1.4037804568060919e-06, "loss": 0.7609, "step": 840 }, { "epoch": 0.38732333554013987, "grad_norm": 4.175875957633963, "learning_rate": 1.402414637260977e-06, "loss": 0.669, "step": 841 }, { "epoch": 0.38778388647419476, "grad_norm": 4.1116805237966405, "learning_rate": 1.4010479213831762e-06, "loss": 0.6037, "step": 842 }, { "epoch": 0.3882444374082496, "grad_norm": 4.421726310388768, "learning_rate": 1.399680312216894e-06, "loss": 0.7214, "step": 843 }, { "epoch": 0.3887049883423045, "grad_norm": 4.1618590329482945, "learning_rate": 1.3983118128083234e-06, "loss": 0.7242, "step": 844 }, { "epoch": 0.38916553927635933, "grad_norm": 4.469170515445956, "learning_rate": 1.3969424262056402e-06, "loss": 0.7637, "step": 845 }, { "epoch": 0.3896260902104142, "grad_norm": 4.205895124263287, "learning_rate": 1.3955721554589975e-06, "loss": 0.7922, "step": 846 }, { "epoch": 0.39008664114446906, "grad_norm": 4.310001613267352, "learning_rate": 1.3942010036205165e-06, "loss": 0.7034, "step": 847 }, { "epoch": 0.39054719207852395, "grad_norm": 3.9500597800259705, "learning_rate": 1.392828973744282e-06, "loss": 0.7482, "step": 848 }, { "epoch": 0.3910077430125788, "grad_norm": 4.270159459675723, "learning_rate": 1.3914560688863336e-06, "loss": 0.6449, "step": 849 }, { "epoch": 0.3914682939466337, "grad_norm": 4.161064943776425, "learning_rate": 1.39008229210466e-06, "loss": 0.6382, "step": 850 }, { "epoch": 0.3919288448806885, "grad_norm": 4.4888339074621655, "learning_rate": 1.3887076464591928e-06, "loss": 0.7049, "step": 851 }, { "epoch": 0.3923893958147434, "grad_norm": 3.6997951450270716, "learning_rate": 1.3873321350117981e-06, "loss": 0.617, "step": 852 }, { "epoch": 0.39284994674879825, "grad_norm": 3.9350947614285468, "learning_rate": 1.3859557608262705e-06, "loss": 0.7341, "step": 853 }, { "epoch": 0.3933104976828531, "grad_norm": 3.9702455593583332, "learning_rate": 1.384578526968326e-06, "loss": 0.6756, "step": 854 }, { "epoch": 0.393771048616908, "grad_norm": 4.380181721232569, "learning_rate": 1.3832004365055974e-06, "loss": 0.6833, "step": 855 }, { "epoch": 0.3942315995509628, "grad_norm": 3.683047472830385, "learning_rate": 1.3818214925076223e-06, "loss": 0.5763, "step": 856 }, { "epoch": 0.3946921504850177, "grad_norm": 3.9123871714276848, "learning_rate": 1.380441698045842e-06, "loss": 0.6336, "step": 857 }, { "epoch": 0.39515270141907255, "grad_norm": 4.200809906452029, "learning_rate": 1.3790610561935911e-06, "loss": 0.8197, "step": 858 }, { "epoch": 0.39561325235312744, "grad_norm": 4.311778926886457, "learning_rate": 1.3776795700260915e-06, "loss": 0.7477, "step": 859 }, { "epoch": 0.3960738032871823, "grad_norm": 4.766048912512157, "learning_rate": 1.3762972426204461e-06, "loss": 0.7929, "step": 860 }, { "epoch": 0.3965343542212372, "grad_norm": 4.394695898441554, "learning_rate": 1.374914077055632e-06, "loss": 0.8538, "step": 861 }, { "epoch": 0.396994905155292, "grad_norm": 3.8485583713267926, "learning_rate": 1.3735300764124916e-06, "loss": 0.6322, "step": 862 }, { "epoch": 0.3974554560893469, "grad_norm": 3.8261874841516454, "learning_rate": 1.3721452437737293e-06, "loss": 0.6999, "step": 863 }, { "epoch": 0.39791600702340174, "grad_norm": 4.547720315773425, "learning_rate": 1.3707595822239015e-06, "loss": 0.6515, "step": 864 }, { "epoch": 0.39837655795745663, "grad_norm": 3.904617763068036, "learning_rate": 1.3693730948494114e-06, "loss": 0.6813, "step": 865 }, { "epoch": 0.39883710889151147, "grad_norm": 3.976054214130074, "learning_rate": 1.3679857847385009e-06, "loss": 0.8241, "step": 866 }, { "epoch": 0.3992976598255663, "grad_norm": 4.343418223005652, "learning_rate": 1.3665976549812452e-06, "loss": 0.6301, "step": 867 }, { "epoch": 0.3997582107596212, "grad_norm": 3.9049044524224286, "learning_rate": 1.365208708669545e-06, "loss": 0.5713, "step": 868 }, { "epoch": 0.40021876169367604, "grad_norm": 4.335977278700569, "learning_rate": 1.36381894889712e-06, "loss": 0.6894, "step": 869 }, { "epoch": 0.40067931262773093, "grad_norm": 4.81289851836495, "learning_rate": 1.362428378759501e-06, "loss": 0.8549, "step": 870 }, { "epoch": 0.40113986356178577, "grad_norm": 4.232493263983137, "learning_rate": 1.3610370013540247e-06, "loss": 0.688, "step": 871 }, { "epoch": 0.40160041449584066, "grad_norm": 4.107505357306198, "learning_rate": 1.3596448197798253e-06, "loss": 0.7345, "step": 872 }, { "epoch": 0.4020609654298955, "grad_norm": 4.006011118569454, "learning_rate": 1.3582518371378282e-06, "loss": 0.8191, "step": 873 }, { "epoch": 0.4025215163639504, "grad_norm": 4.518005887607633, "learning_rate": 1.3568580565307436e-06, "loss": 0.7181, "step": 874 }, { "epoch": 0.40298206729800523, "grad_norm": 3.8897529026334663, "learning_rate": 1.355463481063059e-06, "loss": 0.6621, "step": 875 }, { "epoch": 0.4034426182320601, "grad_norm": 3.9777801680777727, "learning_rate": 1.3540681138410314e-06, "loss": 0.7208, "step": 876 }, { "epoch": 0.40390316916611496, "grad_norm": 3.7733514816468423, "learning_rate": 1.3526719579726829e-06, "loss": 0.6835, "step": 877 }, { "epoch": 0.40436372010016985, "grad_norm": 4.096070605611639, "learning_rate": 1.3512750165677906e-06, "loss": 0.7184, "step": 878 }, { "epoch": 0.4048242710342247, "grad_norm": 4.802494280655094, "learning_rate": 1.3498772927378824e-06, "loss": 0.7823, "step": 879 }, { "epoch": 0.40528482196827953, "grad_norm": 4.929169125910823, "learning_rate": 1.348478789596229e-06, "loss": 0.8811, "step": 880 }, { "epoch": 0.4057453729023344, "grad_norm": 4.400594746171594, "learning_rate": 1.3470795102578355e-06, "loss": 0.8202, "step": 881 }, { "epoch": 0.40620592383638926, "grad_norm": 4.121146435659359, "learning_rate": 1.3456794578394382e-06, "loss": 0.7593, "step": 882 }, { "epoch": 0.40666647477044415, "grad_norm": 4.239726577562792, "learning_rate": 1.3442786354594937e-06, "loss": 0.6906, "step": 883 }, { "epoch": 0.407127025704499, "grad_norm": 4.36410540854326, "learning_rate": 1.3428770462381739e-06, "loss": 0.6685, "step": 884 }, { "epoch": 0.4075875766385539, "grad_norm": 3.7559915703902758, "learning_rate": 1.3414746932973583e-06, "loss": 0.703, "step": 885 }, { "epoch": 0.4080481275726087, "grad_norm": 3.9565660820847923, "learning_rate": 1.340071579760629e-06, "loss": 0.6904, "step": 886 }, { "epoch": 0.4085086785066636, "grad_norm": 3.9763609203596544, "learning_rate": 1.338667708753261e-06, "loss": 0.6308, "step": 887 }, { "epoch": 0.40896922944071845, "grad_norm": 4.0203202672078575, "learning_rate": 1.3372630834022165e-06, "loss": 0.7439, "step": 888 }, { "epoch": 0.40942978037477334, "grad_norm": 4.413599234187577, "learning_rate": 1.3358577068361383e-06, "loss": 0.9272, "step": 889 }, { "epoch": 0.4098903313088282, "grad_norm": 4.091509270430083, "learning_rate": 1.3344515821853427e-06, "loss": 0.6614, "step": 890 }, { "epoch": 0.4103508822428831, "grad_norm": 3.8697114819355565, "learning_rate": 1.3330447125818114e-06, "loss": 0.6711, "step": 891 }, { "epoch": 0.4108114331769379, "grad_norm": 3.6700676494339004, "learning_rate": 1.331637101159186e-06, "loss": 0.7962, "step": 892 }, { "epoch": 0.41127198411099275, "grad_norm": 3.8215166732873764, "learning_rate": 1.3302287510527606e-06, "loss": 0.6166, "step": 893 }, { "epoch": 0.41173253504504764, "grad_norm": 4.233724378943085, "learning_rate": 1.3288196653994742e-06, "loss": 0.7968, "step": 894 }, { "epoch": 0.4121930859791025, "grad_norm": 4.20649336497008, "learning_rate": 1.3274098473379041e-06, "loss": 0.6567, "step": 895 }, { "epoch": 0.4126536369131574, "grad_norm": 3.802451269484863, "learning_rate": 1.3259993000082597e-06, "loss": 0.8233, "step": 896 }, { "epoch": 0.4131141878472122, "grad_norm": 3.5022294396542275, "learning_rate": 1.3245880265523737e-06, "loss": 0.6136, "step": 897 }, { "epoch": 0.4135747387812671, "grad_norm": 3.694365009014095, "learning_rate": 1.3231760301136968e-06, "loss": 0.6268, "step": 898 }, { "epoch": 0.41403528971532194, "grad_norm": 3.905649004641092, "learning_rate": 1.32176331383729e-06, "loss": 0.8077, "step": 899 }, { "epoch": 0.41449584064937683, "grad_norm": 3.971536463700037, "learning_rate": 1.3203498808698177e-06, "loss": 0.6161, "step": 900 }, { "epoch": 0.41495639158343167, "grad_norm": 4.846539888986999, "learning_rate": 1.3189357343595405e-06, "loss": 0.6126, "step": 901 }, { "epoch": 0.41541694251748656, "grad_norm": 4.189080762596746, "learning_rate": 1.317520877456308e-06, "loss": 0.6694, "step": 902 }, { "epoch": 0.4158774934515414, "grad_norm": 3.9488585386748336, "learning_rate": 1.3161053133115534e-06, "loss": 0.8634, "step": 903 }, { "epoch": 0.4163380443855963, "grad_norm": 3.9443618383214507, "learning_rate": 1.3146890450782833e-06, "loss": 0.7949, "step": 904 }, { "epoch": 0.41679859531965113, "grad_norm": 4.2302631169230045, "learning_rate": 1.3132720759110742e-06, "loss": 0.7533, "step": 905 }, { "epoch": 0.41725914625370597, "grad_norm": 4.292826960341292, "learning_rate": 1.3118544089660632e-06, "loss": 0.6933, "step": 906 }, { "epoch": 0.41771969718776086, "grad_norm": 3.9525373367744003, "learning_rate": 1.3104360474009413e-06, "loss": 0.6319, "step": 907 }, { "epoch": 0.4181802481218157, "grad_norm": 3.777103878681741, "learning_rate": 1.3090169943749473e-06, "loss": 0.7275, "step": 908 }, { "epoch": 0.4186407990558706, "grad_norm": 3.9695047826028613, "learning_rate": 1.3075972530488601e-06, "loss": 0.8247, "step": 909 }, { "epoch": 0.41910134998992543, "grad_norm": 4.15291959534821, "learning_rate": 1.306176826584991e-06, "loss": 0.7142, "step": 910 }, { "epoch": 0.4195619009239803, "grad_norm": 4.502719256347666, "learning_rate": 1.3047557181471782e-06, "loss": 0.779, "step": 911 }, { "epoch": 0.42002245185803516, "grad_norm": 4.2314815489896285, "learning_rate": 1.3033339309007782e-06, "loss": 0.7942, "step": 912 }, { "epoch": 0.42048300279209005, "grad_norm": 3.718983433590726, "learning_rate": 1.3019114680126607e-06, "loss": 0.7657, "step": 913 }, { "epoch": 0.4209435537261449, "grad_norm": 4.799593121983244, "learning_rate": 1.3004883326511986e-06, "loss": 0.8854, "step": 914 }, { "epoch": 0.4214041046601998, "grad_norm": 4.301623687345529, "learning_rate": 1.2990645279862637e-06, "loss": 0.7842, "step": 915 }, { "epoch": 0.4218646555942546, "grad_norm": 4.421866411580571, "learning_rate": 1.2976400571892187e-06, "loss": 0.6864, "step": 916 }, { "epoch": 0.4223252065283095, "grad_norm": 4.389402109219135, "learning_rate": 1.2962149234329096e-06, "loss": 0.7567, "step": 917 }, { "epoch": 0.42278575746236435, "grad_norm": 4.752070965920367, "learning_rate": 1.2947891298916597e-06, "loss": 0.7161, "step": 918 }, { "epoch": 0.4232463083964192, "grad_norm": 4.816495057762737, "learning_rate": 1.2933626797412601e-06, "loss": 0.5854, "step": 919 }, { "epoch": 0.4237068593304741, "grad_norm": 4.3888340674163, "learning_rate": 1.2919355761589673e-06, "loss": 0.7754, "step": 920 }, { "epoch": 0.4241674102645289, "grad_norm": 3.9997142599568054, "learning_rate": 1.2905078223234907e-06, "loss": 0.7866, "step": 921 }, { "epoch": 0.4246279611985838, "grad_norm": 4.427247490711759, "learning_rate": 1.2890794214149895e-06, "loss": 0.769, "step": 922 }, { "epoch": 0.42508851213263865, "grad_norm": 4.142788351819903, "learning_rate": 1.2876503766150634e-06, "loss": 0.7033, "step": 923 }, { "epoch": 0.42554906306669354, "grad_norm": 3.8958858375616168, "learning_rate": 1.2862206911067467e-06, "loss": 0.7111, "step": 924 }, { "epoch": 0.4260096140007484, "grad_norm": 3.9771407868078574, "learning_rate": 1.2847903680745012e-06, "loss": 0.7684, "step": 925 }, { "epoch": 0.4264701649348033, "grad_norm": 4.4074477140270885, "learning_rate": 1.2833594107042075e-06, "loss": 0.8682, "step": 926 }, { "epoch": 0.4269307158688581, "grad_norm": 4.283610772754408, "learning_rate": 1.2819278221831604e-06, "loss": 0.8018, "step": 927 }, { "epoch": 0.427391266802913, "grad_norm": 4.661978048921169, "learning_rate": 1.2804956057000597e-06, "loss": 0.7379, "step": 928 }, { "epoch": 0.42785181773696784, "grad_norm": 4.2856911168423455, "learning_rate": 1.2790627644450042e-06, "loss": 0.7024, "step": 929 }, { "epoch": 0.42831236867102274, "grad_norm": 4.3432817023308745, "learning_rate": 1.2776293016094848e-06, "loss": 0.7277, "step": 930 }, { "epoch": 0.4287729196050776, "grad_norm": 4.009815871037261, "learning_rate": 1.2761952203863758e-06, "loss": 0.6877, "step": 931 }, { "epoch": 0.4292334705391324, "grad_norm": 4.361628612943775, "learning_rate": 1.2747605239699293e-06, "loss": 0.7585, "step": 932 }, { "epoch": 0.4296940214731873, "grad_norm": 3.649724435583715, "learning_rate": 1.2733252155557686e-06, "loss": 0.6222, "step": 933 }, { "epoch": 0.43015457240724214, "grad_norm": 4.553666467307083, "learning_rate": 1.2718892983408787e-06, "loss": 0.7594, "step": 934 }, { "epoch": 0.43061512334129703, "grad_norm": 4.102114581755996, "learning_rate": 1.270452775523602e-06, "loss": 0.8818, "step": 935 }, { "epoch": 0.43107567427535187, "grad_norm": 4.793526332301778, "learning_rate": 1.2690156503036288e-06, "loss": 0.7231, "step": 936 }, { "epoch": 0.43153622520940677, "grad_norm": 3.5428001240830262, "learning_rate": 1.2675779258819913e-06, "loss": 0.566, "step": 937 }, { "epoch": 0.4319967761434616, "grad_norm": 4.134350627014905, "learning_rate": 1.2661396054610568e-06, "loss": 0.6602, "step": 938 }, { "epoch": 0.4324573270775165, "grad_norm": 3.3579481091100662, "learning_rate": 1.2647006922445203e-06, "loss": 0.7088, "step": 939 }, { "epoch": 0.43291787801157133, "grad_norm": 4.0109051309962584, "learning_rate": 1.2632611894373963e-06, "loss": 0.6691, "step": 940 }, { "epoch": 0.4333784289456262, "grad_norm": 4.078476089178835, "learning_rate": 1.2618211002460133e-06, "loss": 0.798, "step": 941 }, { "epoch": 0.43383897987968106, "grad_norm": 4.191349683218142, "learning_rate": 1.2603804278780054e-06, "loss": 0.5625, "step": 942 }, { "epoch": 0.43429953081373596, "grad_norm": 3.822714982552427, "learning_rate": 1.2589391755423061e-06, "loss": 0.5349, "step": 943 }, { "epoch": 0.4347600817477908, "grad_norm": 4.036119565941586, "learning_rate": 1.2574973464491406e-06, "loss": 0.6581, "step": 944 }, { "epoch": 0.43522063268184563, "grad_norm": 4.291870198266609, "learning_rate": 1.2560549438100187e-06, "loss": 0.8701, "step": 945 }, { "epoch": 0.4356811836159005, "grad_norm": 3.718497045229617, "learning_rate": 1.2546119708377273e-06, "loss": 0.7702, "step": 946 }, { "epoch": 0.43614173454995536, "grad_norm": 3.9961509392186723, "learning_rate": 1.2531684307463243e-06, "loss": 0.7252, "step": 947 }, { "epoch": 0.43660228548401026, "grad_norm": 3.9235493460856006, "learning_rate": 1.2517243267511308e-06, "loss": 0.667, "step": 948 }, { "epoch": 0.4370628364180651, "grad_norm": 4.411197226827501, "learning_rate": 1.2502796620687232e-06, "loss": 0.9429, "step": 949 }, { "epoch": 0.43752338735212, "grad_norm": 4.904255158821886, "learning_rate": 1.2488344399169275e-06, "loss": 0.8618, "step": 950 }, { "epoch": 0.4379839382861748, "grad_norm": 3.9947280653198747, "learning_rate": 1.2473886635148107e-06, "loss": 0.5152, "step": 951 }, { "epoch": 0.4384444892202297, "grad_norm": 3.9983149487696514, "learning_rate": 1.2459423360826753e-06, "loss": 0.6625, "step": 952 }, { "epoch": 0.43890504015428455, "grad_norm": 4.068019196515014, "learning_rate": 1.2444954608420509e-06, "loss": 0.6578, "step": 953 }, { "epoch": 0.43936559108833945, "grad_norm": 3.8397452440253335, "learning_rate": 1.2430480410156859e-06, "loss": 0.6834, "step": 954 }, { "epoch": 0.4398261420223943, "grad_norm": 3.8949483274968673, "learning_rate": 1.2416000798275434e-06, "loss": 0.7227, "step": 955 }, { "epoch": 0.4402866929564492, "grad_norm": 4.107874414650414, "learning_rate": 1.2401515805027923e-06, "loss": 0.7635, "step": 956 }, { "epoch": 0.440747243890504, "grad_norm": 3.8696809596668023, "learning_rate": 1.2387025462677986e-06, "loss": 0.675, "step": 957 }, { "epoch": 0.44120779482455885, "grad_norm": 4.343318465606402, "learning_rate": 1.2372529803501212e-06, "loss": 0.8226, "step": 958 }, { "epoch": 0.44166834575861375, "grad_norm": 4.194181123823176, "learning_rate": 1.2358028859785027e-06, "loss": 0.8117, "step": 959 }, { "epoch": 0.4421288966926686, "grad_norm": 4.117429286773159, "learning_rate": 1.234352266382863e-06, "loss": 0.5803, "step": 960 }, { "epoch": 0.4425894476267235, "grad_norm": 3.6275617903808817, "learning_rate": 1.2329011247942913e-06, "loss": 0.6018, "step": 961 }, { "epoch": 0.4430499985607783, "grad_norm": 4.969970055010778, "learning_rate": 1.2314494644450405e-06, "loss": 0.7268, "step": 962 }, { "epoch": 0.4435105494948332, "grad_norm": 4.2004830767826435, "learning_rate": 1.2299972885685175e-06, "loss": 0.6819, "step": 963 }, { "epoch": 0.44397110042888804, "grad_norm": 4.5176222419006695, "learning_rate": 1.2285446003992794e-06, "loss": 0.8154, "step": 964 }, { "epoch": 0.44443165136294294, "grad_norm": 4.428562375309563, "learning_rate": 1.2270914031730227e-06, "loss": 0.9058, "step": 965 }, { "epoch": 0.4448922022969978, "grad_norm": 4.503655625567003, "learning_rate": 1.2256377001265782e-06, "loss": 0.7579, "step": 966 }, { "epoch": 0.44535275323105267, "grad_norm": 4.014900943663767, "learning_rate": 1.2241834944979043e-06, "loss": 0.7185, "step": 967 }, { "epoch": 0.4458133041651075, "grad_norm": 3.910557632364268, "learning_rate": 1.2227287895260774e-06, "loss": 0.6681, "step": 968 }, { "epoch": 0.4462738550991624, "grad_norm": 4.174637645068096, "learning_rate": 1.2212735884512873e-06, "loss": 0.7096, "step": 969 }, { "epoch": 0.44673440603321724, "grad_norm": 4.269807235889961, "learning_rate": 1.2198178945148284e-06, "loss": 0.8077, "step": 970 }, { "epoch": 0.4471949569672721, "grad_norm": 4.457035835983003, "learning_rate": 1.2183617109590923e-06, "loss": 0.5903, "step": 971 }, { "epoch": 0.44765550790132697, "grad_norm": 3.7891728998796634, "learning_rate": 1.2169050410275617e-06, "loss": 0.7075, "step": 972 }, { "epoch": 0.4481160588353818, "grad_norm": 4.471225051089375, "learning_rate": 1.2154478879648034e-06, "loss": 0.6667, "step": 973 }, { "epoch": 0.4485766097694367, "grad_norm": 4.187944046520419, "learning_rate": 1.213990255016459e-06, "loss": 0.8755, "step": 974 }, { "epoch": 0.44903716070349153, "grad_norm": 4.249412382273956, "learning_rate": 1.2125321454292397e-06, "loss": 0.8683, "step": 975 }, { "epoch": 0.4494977116375464, "grad_norm": 3.9949257525487027, "learning_rate": 1.2110735624509184e-06, "loss": 0.8316, "step": 976 }, { "epoch": 0.44995826257160126, "grad_norm": 4.260416133396751, "learning_rate": 1.2096145093303215e-06, "loss": 0.6975, "step": 977 }, { "epoch": 0.45041881350565616, "grad_norm": 4.084476761681592, "learning_rate": 1.2081549893173244e-06, "loss": 0.6713, "step": 978 }, { "epoch": 0.450879364439711, "grad_norm": 3.628389705912675, "learning_rate": 1.206695005662841e-06, "loss": 0.5593, "step": 979 }, { "epoch": 0.4513399153737659, "grad_norm": 3.9744451515765715, "learning_rate": 1.2052345616188177e-06, "loss": 0.6643, "step": 980 }, { "epoch": 0.4518004663078207, "grad_norm": 4.115333493299806, "learning_rate": 1.2037736604382277e-06, "loss": 0.6601, "step": 981 }, { "epoch": 0.4522610172418756, "grad_norm": 4.133119596560022, "learning_rate": 1.2023123053750613e-06, "loss": 0.6882, "step": 982 }, { "epoch": 0.45272156817593046, "grad_norm": 4.438097555523807, "learning_rate": 1.2008504996843206e-06, "loss": 0.8097, "step": 983 }, { "epoch": 0.4531821191099853, "grad_norm": 4.4524611502143445, "learning_rate": 1.1993882466220102e-06, "loss": 0.7599, "step": 984 }, { "epoch": 0.4536426700440402, "grad_norm": 4.283084070325147, "learning_rate": 1.1979255494451326e-06, "loss": 0.7568, "step": 985 }, { "epoch": 0.454103220978095, "grad_norm": 3.5379115566414794, "learning_rate": 1.1964624114116784e-06, "loss": 0.9021, "step": 986 }, { "epoch": 0.4545637719121499, "grad_norm": 3.9695660505375177, "learning_rate": 1.194998835780621e-06, "loss": 0.7953, "step": 987 }, { "epoch": 0.45502432284620475, "grad_norm": 4.085913828667795, "learning_rate": 1.1935348258119083e-06, "loss": 0.7095, "step": 988 }, { "epoch": 0.45548487378025965, "grad_norm": 4.542094809350361, "learning_rate": 1.1920703847664546e-06, "loss": 0.6862, "step": 989 }, { "epoch": 0.4559454247143145, "grad_norm": 3.855322046199465, "learning_rate": 1.190605515906136e-06, "loss": 0.5849, "step": 990 }, { "epoch": 0.4564059756483694, "grad_norm": 4.004429822546657, "learning_rate": 1.1891402224937804e-06, "loss": 0.6704, "step": 991 }, { "epoch": 0.4568665265824242, "grad_norm": 4.011526647279701, "learning_rate": 1.1876745077931617e-06, "loss": 0.6577, "step": 992 }, { "epoch": 0.4573270775164791, "grad_norm": 3.753152302378122, "learning_rate": 1.1862083750689923e-06, "loss": 0.6747, "step": 993 }, { "epoch": 0.45778762845053395, "grad_norm": 4.144914277962396, "learning_rate": 1.1847418275869151e-06, "loss": 0.7485, "step": 994 }, { "epoch": 0.45824817938458884, "grad_norm": 4.041753669078422, "learning_rate": 1.183274868613498e-06, "loss": 0.8023, "step": 995 }, { "epoch": 0.4587087303186437, "grad_norm": 3.307166851578441, "learning_rate": 1.181807501416224e-06, "loss": 0.5752, "step": 996 }, { "epoch": 0.4591692812526985, "grad_norm": 4.499197738759874, "learning_rate": 1.1803397292634867e-06, "loss": 0.82, "step": 997 }, { "epoch": 0.4596298321867534, "grad_norm": 4.808546131795734, "learning_rate": 1.1788715554245807e-06, "loss": 0.8027, "step": 998 }, { "epoch": 0.46009038312080824, "grad_norm": 3.9798850491885074, "learning_rate": 1.1774029831696955e-06, "loss": 0.8561, "step": 999 }, { "epoch": 0.46055093405486314, "grad_norm": 4.004029291302463, "learning_rate": 1.1759340157699088e-06, "loss": 0.7203, "step": 1000 }, { "epoch": 0.461011484988918, "grad_norm": 4.033824413727988, "learning_rate": 1.1744646564971777e-06, "loss": 0.7202, "step": 1001 }, { "epoch": 0.46147203592297287, "grad_norm": 4.142175628315949, "learning_rate": 1.1729949086243319e-06, "loss": 0.8747, "step": 1002 }, { "epoch": 0.4619325868570277, "grad_norm": 3.8672987946352553, "learning_rate": 1.1715247754250673e-06, "loss": 0.5977, "step": 1003 }, { "epoch": 0.4623931377910826, "grad_norm": 3.543932066924655, "learning_rate": 1.1700542601739381e-06, "loss": 0.7574, "step": 1004 }, { "epoch": 0.46285368872513744, "grad_norm": 4.084833516897344, "learning_rate": 1.1685833661463488e-06, "loss": 0.6079, "step": 1005 }, { "epoch": 0.46331423965919233, "grad_norm": 4.525241633637477, "learning_rate": 1.1671120966185484e-06, "loss": 0.6966, "step": 1006 }, { "epoch": 0.46377479059324717, "grad_norm": 4.206567423249349, "learning_rate": 1.1656404548676219e-06, "loss": 0.7205, "step": 1007 }, { "epoch": 0.46423534152730206, "grad_norm": 4.056639494890737, "learning_rate": 1.1641684441714828e-06, "loss": 0.7653, "step": 1008 }, { "epoch": 0.4646958924613569, "grad_norm": 3.80274071676937, "learning_rate": 1.1626960678088677e-06, "loss": 0.5379, "step": 1009 }, { "epoch": 0.46515644339541173, "grad_norm": 4.18375627778784, "learning_rate": 1.1612233290593264e-06, "loss": 0.605, "step": 1010 }, { "epoch": 0.4656169943294666, "grad_norm": 3.6995552804789322, "learning_rate": 1.1597502312032168e-06, "loss": 0.6604, "step": 1011 }, { "epoch": 0.46607754526352146, "grad_norm": 4.470258432539243, "learning_rate": 1.158276777521696e-06, "loss": 0.6735, "step": 1012 }, { "epoch": 0.46653809619757636, "grad_norm": 4.478827362350565, "learning_rate": 1.1568029712967137e-06, "loss": 0.5908, "step": 1013 }, { "epoch": 0.4669986471316312, "grad_norm": 3.816586906527528, "learning_rate": 1.1553288158110057e-06, "loss": 0.7807, "step": 1014 }, { "epoch": 0.4674591980656861, "grad_norm": 4.756896730468176, "learning_rate": 1.153854314348085e-06, "loss": 0.6567, "step": 1015 }, { "epoch": 0.4679197489997409, "grad_norm": 4.002732522094042, "learning_rate": 1.152379470192235e-06, "loss": 0.7353, "step": 1016 }, { "epoch": 0.4683802999337958, "grad_norm": 4.077566965893754, "learning_rate": 1.1509042866285028e-06, "loss": 0.67, "step": 1017 }, { "epoch": 0.46884085086785066, "grad_norm": 4.5746044759206805, "learning_rate": 1.149428766942692e-06, "loss": 0.7369, "step": 1018 }, { "epoch": 0.46930140180190555, "grad_norm": 3.8587964660485117, "learning_rate": 1.1479529144213537e-06, "loss": 0.6675, "step": 1019 }, { "epoch": 0.4697619527359604, "grad_norm": 3.6455089870981325, "learning_rate": 1.1464767323517813e-06, "loss": 0.5115, "step": 1020 }, { "epoch": 0.4702225036700153, "grad_norm": 4.108150155903759, "learning_rate": 1.145000224022002e-06, "loss": 0.6494, "step": 1021 }, { "epoch": 0.4706830546040701, "grad_norm": 4.4482355227889405, "learning_rate": 1.143523392720769e-06, "loss": 0.7232, "step": 1022 }, { "epoch": 0.47114360553812495, "grad_norm": 4.077436736827702, "learning_rate": 1.1420462417375562e-06, "loss": 0.5589, "step": 1023 }, { "epoch": 0.47160415647217985, "grad_norm": 4.457965959606954, "learning_rate": 1.140568774362549e-06, "loss": 0.7405, "step": 1024 }, { "epoch": 0.4720647074062347, "grad_norm": 4.386323951379144, "learning_rate": 1.1390909938866367e-06, "loss": 0.6891, "step": 1025 }, { "epoch": 0.4725252583402896, "grad_norm": 4.152438462007998, "learning_rate": 1.137612903601407e-06, "loss": 0.5841, "step": 1026 }, { "epoch": 0.4729858092743444, "grad_norm": 4.408040777470179, "learning_rate": 1.1361345067991375e-06, "loss": 0.9611, "step": 1027 }, { "epoch": 0.4734463602083993, "grad_norm": 3.760610848054255, "learning_rate": 1.134655806772788e-06, "loss": 0.6102, "step": 1028 }, { "epoch": 0.47390691114245415, "grad_norm": 4.529567952391161, "learning_rate": 1.1331768068159946e-06, "loss": 0.9179, "step": 1029 }, { "epoch": 0.47436746207650904, "grad_norm": 4.457981306724757, "learning_rate": 1.1316975102230604e-06, "loss": 0.7677, "step": 1030 }, { "epoch": 0.4748280130105639, "grad_norm": 4.08539116970673, "learning_rate": 1.1302179202889505e-06, "loss": 0.6885, "step": 1031 }, { "epoch": 0.47528856394461877, "grad_norm": 4.389140279830501, "learning_rate": 1.1287380403092816e-06, "loss": 0.6779, "step": 1032 }, { "epoch": 0.4757491148786736, "grad_norm": 3.6521254524306728, "learning_rate": 1.127257873580318e-06, "loss": 0.6179, "step": 1033 }, { "epoch": 0.4762096658127285, "grad_norm": 3.8347695607539203, "learning_rate": 1.1257774233989623e-06, "loss": 0.5908, "step": 1034 }, { "epoch": 0.47667021674678334, "grad_norm": 4.1558249587669795, "learning_rate": 1.1242966930627484e-06, "loss": 0.5833, "step": 1035 }, { "epoch": 0.47713076768083823, "grad_norm": 3.960855998469203, "learning_rate": 1.1228156858698343e-06, "loss": 0.5451, "step": 1036 }, { "epoch": 0.47759131861489307, "grad_norm": 4.5939436762345265, "learning_rate": 1.1213344051189939e-06, "loss": 0.7571, "step": 1037 }, { "epoch": 0.4780518695489479, "grad_norm": 3.9322689553384795, "learning_rate": 1.1198528541096115e-06, "loss": 0.603, "step": 1038 }, { "epoch": 0.4785124204830028, "grad_norm": 4.650285675852403, "learning_rate": 1.1183710361416727e-06, "loss": 0.939, "step": 1039 }, { "epoch": 0.47897297141705764, "grad_norm": 4.3084508631521246, "learning_rate": 1.1168889545157582e-06, "loss": 0.6426, "step": 1040 }, { "epoch": 0.47943352235111253, "grad_norm": 4.03295494892592, "learning_rate": 1.1154066125330357e-06, "loss": 0.6887, "step": 1041 }, { "epoch": 0.47989407328516737, "grad_norm": 4.0449076168357285, "learning_rate": 1.1139240134952523e-06, "loss": 0.6838, "step": 1042 }, { "epoch": 0.48035462421922226, "grad_norm": 4.304095502820378, "learning_rate": 1.1124411607047288e-06, "loss": 0.6626, "step": 1043 }, { "epoch": 0.4808151751532771, "grad_norm": 4.391318043003625, "learning_rate": 1.1109580574643503e-06, "loss": 0.7702, "step": 1044 }, { "epoch": 0.481275726087332, "grad_norm": 4.005148943961203, "learning_rate": 1.10947470707756e-06, "loss": 0.6564, "step": 1045 }, { "epoch": 0.4817362770213868, "grad_norm": 4.387840228078561, "learning_rate": 1.107991112848352e-06, "loss": 0.7544, "step": 1046 }, { "epoch": 0.4821968279554417, "grad_norm": 4.071163593383692, "learning_rate": 1.1065072780812625e-06, "loss": 0.6924, "step": 1047 }, { "epoch": 0.48265737888949656, "grad_norm": 4.190656313172978, "learning_rate": 1.1050232060813644e-06, "loss": 0.749, "step": 1048 }, { "epoch": 0.48311792982355145, "grad_norm": 4.064204317728122, "learning_rate": 1.1035389001542595e-06, "loss": 0.8327, "step": 1049 }, { "epoch": 0.4835784807576063, "grad_norm": 4.072907244190008, "learning_rate": 1.1020543636060683e-06, "loss": 0.635, "step": 1050 }, { "epoch": 0.4840390316916611, "grad_norm": 4.417043878806534, "learning_rate": 1.100569599743428e-06, "loss": 0.7259, "step": 1051 }, { "epoch": 0.484499582625716, "grad_norm": 4.253358043481205, "learning_rate": 1.09908461187348e-06, "loss": 0.7593, "step": 1052 }, { "epoch": 0.48496013355977086, "grad_norm": 3.6348007882400037, "learning_rate": 1.0975994033038656e-06, "loss": 0.6468, "step": 1053 }, { "epoch": 0.48542068449382575, "grad_norm": 4.251912615709632, "learning_rate": 1.0961139773427171e-06, "loss": 0.7794, "step": 1054 }, { "epoch": 0.4858812354278806, "grad_norm": 3.805922012288859, "learning_rate": 1.0946283372986516e-06, "loss": 0.7632, "step": 1055 }, { "epoch": 0.4863417863619355, "grad_norm": 4.570873111553829, "learning_rate": 1.0931424864807623e-06, "loss": 0.7725, "step": 1056 }, { "epoch": 0.4868023372959903, "grad_norm": 4.143827849390761, "learning_rate": 1.0916564281986133e-06, "loss": 0.6471, "step": 1057 }, { "epoch": 0.4872628882300452, "grad_norm": 4.254882572698313, "learning_rate": 1.0901701657622291e-06, "loss": 0.6866, "step": 1058 }, { "epoch": 0.48772343916410005, "grad_norm": 4.647418621286222, "learning_rate": 1.0886837024820897e-06, "loss": 0.7686, "step": 1059 }, { "epoch": 0.48818399009815494, "grad_norm": 4.035408895466573, "learning_rate": 1.0871970416691227e-06, "loss": 0.8559, "step": 1060 }, { "epoch": 0.4886445410322098, "grad_norm": 4.272163611825335, "learning_rate": 1.085710186634695e-06, "loss": 0.7681, "step": 1061 }, { "epoch": 0.48910509196626467, "grad_norm": 3.9504556561277537, "learning_rate": 1.0842231406906076e-06, "loss": 0.6932, "step": 1062 }, { "epoch": 0.4895656429003195, "grad_norm": 4.0197256831808605, "learning_rate": 1.0827359071490845e-06, "loss": 0.7409, "step": 1063 }, { "epoch": 0.49002619383437435, "grad_norm": 4.51485193854349, "learning_rate": 1.0812484893227688e-06, "loss": 0.7056, "step": 1064 }, { "epoch": 0.49048674476842924, "grad_norm": 4.1486677624668875, "learning_rate": 1.079760890524715e-06, "loss": 0.6347, "step": 1065 }, { "epoch": 0.4909472957024841, "grad_norm": 4.501353519390922, "learning_rate": 1.0782731140683784e-06, "loss": 0.7964, "step": 1066 }, { "epoch": 0.49140784663653897, "grad_norm": 4.843233769513205, "learning_rate": 1.0767851632676119e-06, "loss": 0.719, "step": 1067 }, { "epoch": 0.4918683975705938, "grad_norm": 4.408353840170217, "learning_rate": 1.0752970414366561e-06, "loss": 0.6919, "step": 1068 }, { "epoch": 0.4923289485046487, "grad_norm": 3.95567191662388, "learning_rate": 1.0738087518901326e-06, "loss": 0.7527, "step": 1069 }, { "epoch": 0.49278949943870354, "grad_norm": 4.152331858906229, "learning_rate": 1.0723202979430364e-06, "loss": 0.762, "step": 1070 }, { "epoch": 0.49325005037275843, "grad_norm": 4.380145565764831, "learning_rate": 1.0708316829107293e-06, "loss": 0.627, "step": 1071 }, { "epoch": 0.49371060130681327, "grad_norm": 4.6120911010812975, "learning_rate": 1.0693429101089306e-06, "loss": 0.6907, "step": 1072 }, { "epoch": 0.49417115224086816, "grad_norm": 4.318413446085777, "learning_rate": 1.0678539828537123e-06, "loss": 0.6239, "step": 1073 }, { "epoch": 0.494631703174923, "grad_norm": 4.004441044828475, "learning_rate": 1.06636490446149e-06, "loss": 0.7466, "step": 1074 }, { "epoch": 0.4950922541089779, "grad_norm": 3.811530156634689, "learning_rate": 1.064875678249016e-06, "loss": 0.6861, "step": 1075 }, { "epoch": 0.49555280504303273, "grad_norm": 4.505337109539018, "learning_rate": 1.0633863075333712e-06, "loss": 0.8421, "step": 1076 }, { "epoch": 0.49601335597708757, "grad_norm": 4.319150521676603, "learning_rate": 1.0618967956319595e-06, "loss": 0.8524, "step": 1077 }, { "epoch": 0.49647390691114246, "grad_norm": 4.318655235641764, "learning_rate": 1.0604071458624985e-06, "loss": 0.7279, "step": 1078 }, { "epoch": 0.4969344578451973, "grad_norm": 3.9614958932734594, "learning_rate": 1.058917361543013e-06, "loss": 0.8145, "step": 1079 }, { "epoch": 0.4973950087792522, "grad_norm": 4.832764322971954, "learning_rate": 1.0574274459918279e-06, "loss": 0.6912, "step": 1080 }, { "epoch": 0.497855559713307, "grad_norm": 4.3822975415522665, "learning_rate": 1.0559374025275595e-06, "loss": 0.7201, "step": 1081 }, { "epoch": 0.4983161106473619, "grad_norm": 4.133226714005239, "learning_rate": 1.0544472344691102e-06, "loss": 0.8841, "step": 1082 }, { "epoch": 0.49877666158141676, "grad_norm": 4.5638617724648, "learning_rate": 1.0529569451356586e-06, "loss": 0.7121, "step": 1083 }, { "epoch": 0.49923721251547165, "grad_norm": 4.257578288646333, "learning_rate": 1.051466537846655e-06, "loss": 0.8808, "step": 1084 }, { "epoch": 0.4996977634495265, "grad_norm": 4.134139993965767, "learning_rate": 1.049976015921811e-06, "loss": 0.7911, "step": 1085 }, { "epoch": 0.5001583143835814, "grad_norm": 4.435239858783283, "learning_rate": 1.048485382681094e-06, "loss": 0.9537, "step": 1086 }, { "epoch": 0.5006188653176362, "grad_norm": 3.711410013109254, "learning_rate": 1.0469946414447196e-06, "loss": 0.6043, "step": 1087 }, { "epoch": 0.5010794162516911, "grad_norm": 4.038677060872628, "learning_rate": 1.0455037955331447e-06, "loss": 0.6301, "step": 1088 }, { "epoch": 0.5015399671857459, "grad_norm": 3.960720111031372, "learning_rate": 1.0440128482670569e-06, "loss": 0.7507, "step": 1089 }, { "epoch": 0.5020005181198008, "grad_norm": 4.690165288799361, "learning_rate": 1.0425218029673718e-06, "loss": 0.6391, "step": 1090 }, { "epoch": 0.5024610690538557, "grad_norm": 4.128661150418332, "learning_rate": 1.0410306629552231e-06, "loss": 0.6847, "step": 1091 }, { "epoch": 0.5029216199879105, "grad_norm": 4.210996416854986, "learning_rate": 1.0395394315519541e-06, "loss": 0.7746, "step": 1092 }, { "epoch": 0.5033821709219654, "grad_norm": 4.1148554479176465, "learning_rate": 1.0380481120791136e-06, "loss": 0.6453, "step": 1093 }, { "epoch": 0.5038427218560203, "grad_norm": 3.6451717450624597, "learning_rate": 1.036556707858445e-06, "loss": 0.6499, "step": 1094 }, { "epoch": 0.5043032727900751, "grad_norm": 3.9267692170295905, "learning_rate": 1.0350652222118807e-06, "loss": 0.7299, "step": 1095 }, { "epoch": 0.50476382372413, "grad_norm": 3.765620832807947, "learning_rate": 1.0335736584615356e-06, "loss": 0.6691, "step": 1096 }, { "epoch": 0.5052243746581848, "grad_norm": 4.183007163591825, "learning_rate": 1.0320820199296974e-06, "loss": 0.6075, "step": 1097 }, { "epoch": 0.5056849255922398, "grad_norm": 4.180747240764905, "learning_rate": 1.0305903099388202e-06, "loss": 0.5269, "step": 1098 }, { "epoch": 0.5061454765262946, "grad_norm": 3.7578133657731048, "learning_rate": 1.0290985318115184e-06, "loss": 0.6996, "step": 1099 }, { "epoch": 0.5066060274603494, "grad_norm": 4.337103988337365, "learning_rate": 1.0276066888705574e-06, "loss": 0.8582, "step": 1100 }, { "epoch": 0.5070665783944043, "grad_norm": 3.847549389433139, "learning_rate": 1.0261147844388472e-06, "loss": 0.7523, "step": 1101 }, { "epoch": 0.5075271293284591, "grad_norm": 3.94921226608635, "learning_rate": 1.0246228218394346e-06, "loss": 0.6824, "step": 1102 }, { "epoch": 0.5079876802625141, "grad_norm": 4.709843497821357, "learning_rate": 1.023130804395496e-06, "loss": 0.7228, "step": 1103 }, { "epoch": 0.5084482311965689, "grad_norm": 4.110120530022858, "learning_rate": 1.0216387354303295e-06, "loss": 0.5728, "step": 1104 }, { "epoch": 0.5089087821306237, "grad_norm": 3.3846966006659915, "learning_rate": 1.0201466182673498e-06, "loss": 0.6399, "step": 1105 }, { "epoch": 0.5093693330646786, "grad_norm": 4.332669135222788, "learning_rate": 1.0186544562300764e-06, "loss": 0.628, "step": 1106 }, { "epoch": 0.5098298839987335, "grad_norm": 3.848040731741899, "learning_rate": 1.0171622526421304e-06, "loss": 0.7712, "step": 1107 }, { "epoch": 0.5102904349327884, "grad_norm": 3.7909720345376248, "learning_rate": 1.0156700108272252e-06, "loss": 0.8016, "step": 1108 }, { "epoch": 0.5107509858668432, "grad_norm": 4.072257376987735, "learning_rate": 1.0141777341091587e-06, "loss": 0.6257, "step": 1109 }, { "epoch": 0.511211536800898, "grad_norm": 4.457788041023676, "learning_rate": 1.0126854258118074e-06, "loss": 0.7379, "step": 1110 }, { "epoch": 0.511672087734953, "grad_norm": 4.237931228397368, "learning_rate": 1.011193089259118e-06, "loss": 0.6218, "step": 1111 }, { "epoch": 0.5121326386690078, "grad_norm": 4.273387111813834, "learning_rate": 1.009700727775099e-06, "loss": 0.6821, "step": 1112 }, { "epoch": 0.5125931896030627, "grad_norm": 3.8864849154263883, "learning_rate": 1.008208344683816e-06, "loss": 0.5783, "step": 1113 }, { "epoch": 0.5130537405371175, "grad_norm": 4.596503256965401, "learning_rate": 1.0067159433093815e-06, "loss": 0.7439, "step": 1114 }, { "epoch": 0.5135142914711723, "grad_norm": 4.464638347121223, "learning_rate": 1.00522352697595e-06, "loss": 0.902, "step": 1115 }, { "epoch": 0.5139748424052273, "grad_norm": 3.7283832580495497, "learning_rate": 1.003731099007708e-06, "loss": 0.6476, "step": 1116 }, { "epoch": 0.5144353933392821, "grad_norm": 4.072202262701987, "learning_rate": 1.002238662728869e-06, "loss": 0.6871, "step": 1117 }, { "epoch": 0.514895944273337, "grad_norm": 3.876781107464835, "learning_rate": 1.000746221463664e-06, "loss": 0.758, "step": 1118 }, { "epoch": 0.5153564952073918, "grad_norm": 3.8410203433095225, "learning_rate": 9.992537785363361e-07, "loss": 0.6267, "step": 1119 }, { "epoch": 0.5158170461414467, "grad_norm": 3.7556849256096707, "learning_rate": 9.977613372711308e-07, "loss": 0.704, "step": 1120 }, { "epoch": 0.5162775970755016, "grad_norm": 3.4621563699112543, "learning_rate": 9.962689009922918e-07, "loss": 0.6586, "step": 1121 }, { "epoch": 0.5167381480095564, "grad_norm": 4.157400036312437, "learning_rate": 9.947764730240501e-07, "loss": 0.6953, "step": 1122 }, { "epoch": 0.5171986989436113, "grad_norm": 4.091692639526056, "learning_rate": 9.932840566906184e-07, "loss": 0.6493, "step": 1123 }, { "epoch": 0.5176592498776662, "grad_norm": 3.748938161986732, "learning_rate": 9.917916553161841e-07, "loss": 0.6406, "step": 1124 }, { "epoch": 0.518119800811721, "grad_norm": 4.265193427791908, "learning_rate": 9.90299272224901e-07, "loss": 0.69, "step": 1125 }, { "epoch": 0.5185803517457759, "grad_norm": 3.892009630589068, "learning_rate": 9.888069107408824e-07, "loss": 0.7303, "step": 1126 }, { "epoch": 0.5190409026798307, "grad_norm": 3.81764930072352, "learning_rate": 9.873145741881927e-07, "loss": 0.8022, "step": 1127 }, { "epoch": 0.5195014536138856, "grad_norm": 4.161926089261744, "learning_rate": 9.858222658908412e-07, "loss": 0.8289, "step": 1128 }, { "epoch": 0.5199620045479405, "grad_norm": 4.241341270707092, "learning_rate": 9.84329989172775e-07, "loss": 0.8226, "step": 1129 }, { "epoch": 0.5204225554819953, "grad_norm": 3.7923568071731415, "learning_rate": 9.828377473578697e-07, "loss": 0.6971, "step": 1130 }, { "epoch": 0.5208831064160502, "grad_norm": 4.1679083378205375, "learning_rate": 9.813455437699237e-07, "loss": 0.77, "step": 1131 }, { "epoch": 0.521343657350105, "grad_norm": 4.006964815236345, "learning_rate": 9.798533817326504e-07, "loss": 0.7541, "step": 1132 }, { "epoch": 0.52180420828416, "grad_norm": 5.083385766323812, "learning_rate": 9.783612645696702e-07, "loss": 0.6579, "step": 1133 }, { "epoch": 0.5222647592182148, "grad_norm": 3.734512548524421, "learning_rate": 9.768691956045042e-07, "loss": 0.6652, "step": 1134 }, { "epoch": 0.5227253101522696, "grad_norm": 4.326374594547312, "learning_rate": 9.753771781605657e-07, "loss": 0.7162, "step": 1135 }, { "epoch": 0.5231858610863245, "grad_norm": 4.222645202689667, "learning_rate": 9.73885215561153e-07, "loss": 0.6991, "step": 1136 }, { "epoch": 0.5236464120203794, "grad_norm": 4.054089171562866, "learning_rate": 9.723933111294427e-07, "loss": 0.7171, "step": 1137 }, { "epoch": 0.5241069629544343, "grad_norm": 4.0058206337013065, "learning_rate": 9.709014681884815e-07, "loss": 0.6212, "step": 1138 }, { "epoch": 0.5245675138884891, "grad_norm": 4.462829883472917, "learning_rate": 9.6940969006118e-07, "loss": 0.8297, "step": 1139 }, { "epoch": 0.5250280648225439, "grad_norm": 4.154330363313688, "learning_rate": 9.67917980070303e-07, "loss": 0.8899, "step": 1140 }, { "epoch": 0.5254886157565988, "grad_norm": 3.9230034824402225, "learning_rate": 9.664263415384643e-07, "loss": 0.6491, "step": 1141 }, { "epoch": 0.5259491666906537, "grad_norm": 4.167083283008367, "learning_rate": 9.649347777881192e-07, "loss": 0.65, "step": 1142 }, { "epoch": 0.5264097176247086, "grad_norm": 4.632081758089831, "learning_rate": 9.634432921415554e-07, "loss": 0.6723, "step": 1143 }, { "epoch": 0.5268702685587634, "grad_norm": 3.956711418966235, "learning_rate": 9.619518879208865e-07, "loss": 0.6923, "step": 1144 }, { "epoch": 0.5273308194928182, "grad_norm": 3.6226103721015575, "learning_rate": 9.604605684480458e-07, "loss": 0.7288, "step": 1145 }, { "epoch": 0.5277913704268732, "grad_norm": 4.395291266126995, "learning_rate": 9.589693370447768e-07, "loss": 0.7965, "step": 1146 }, { "epoch": 0.528251921360928, "grad_norm": 4.107585282939286, "learning_rate": 9.574781970326283e-07, "loss": 0.6804, "step": 1147 }, { "epoch": 0.5287124722949829, "grad_norm": 3.6717897994934066, "learning_rate": 9.559871517329434e-07, "loss": 0.6114, "step": 1148 }, { "epoch": 0.5291730232290377, "grad_norm": 4.2066950781071455, "learning_rate": 9.544962044668555e-07, "loss": 0.7463, "step": 1149 }, { "epoch": 0.5296335741630926, "grad_norm": 4.253901395461955, "learning_rate": 9.530053585552802e-07, "loss": 0.73, "step": 1150 }, { "epoch": 0.5300941250971475, "grad_norm": 4.22985494274616, "learning_rate": 9.515146173189057e-07, "loss": 0.7149, "step": 1151 }, { "epoch": 0.5305546760312023, "grad_norm": 4.131088082414393, "learning_rate": 9.50023984078189e-07, "loss": 0.604, "step": 1152 }, { "epoch": 0.5310152269652572, "grad_norm": 4.213421725067517, "learning_rate": 9.485334621533453e-07, "loss": 0.6192, "step": 1153 }, { "epoch": 0.5314757778993121, "grad_norm": 3.8998137471713386, "learning_rate": 9.470430548643411e-07, "loss": 0.6848, "step": 1154 }, { "epoch": 0.531936328833367, "grad_norm": 3.920674220040441, "learning_rate": 9.455527655308899e-07, "loss": 0.6701, "step": 1155 }, { "epoch": 0.5323968797674218, "grad_norm": 4.282762930913784, "learning_rate": 9.440625974724407e-07, "loss": 0.8079, "step": 1156 }, { "epoch": 0.5328574307014766, "grad_norm": 3.8822520692318383, "learning_rate": 9.425725540081721e-07, "loss": 0.5608, "step": 1157 }, { "epoch": 0.5333179816355315, "grad_norm": 4.095098070889826, "learning_rate": 9.410826384569869e-07, "loss": 0.7821, "step": 1158 }, { "epoch": 0.5337785325695864, "grad_norm": 4.1746507751309325, "learning_rate": 9.395928541375013e-07, "loss": 0.6254, "step": 1159 }, { "epoch": 0.5342390835036412, "grad_norm": 4.105394541675109, "learning_rate": 9.381032043680405e-07, "loss": 0.7669, "step": 1160 }, { "epoch": 0.5346996344376961, "grad_norm": 3.6658809302711783, "learning_rate": 9.366136924666288e-07, "loss": 0.821, "step": 1161 }, { "epoch": 0.5351601853717509, "grad_norm": 3.9690597020422795, "learning_rate": 9.351243217509842e-07, "loss": 0.8011, "step": 1162 }, { "epoch": 0.5356207363058059, "grad_norm": 4.203966380528625, "learning_rate": 9.336350955385101e-07, "loss": 0.7245, "step": 1163 }, { "epoch": 0.5360812872398607, "grad_norm": 4.177358562984364, "learning_rate": 9.321460171462876e-07, "loss": 0.6989, "step": 1164 }, { "epoch": 0.5365418381739155, "grad_norm": 4.332302103535103, "learning_rate": 9.306570898910695e-07, "loss": 0.6436, "step": 1165 }, { "epoch": 0.5370023891079704, "grad_norm": 4.059823128895644, "learning_rate": 9.29168317089271e-07, "loss": 0.6816, "step": 1166 }, { "epoch": 0.5374629400420253, "grad_norm": 4.069368785154939, "learning_rate": 9.276797020569635e-07, "loss": 0.7369, "step": 1167 }, { "epoch": 0.5379234909760802, "grad_norm": 3.4255690376648724, "learning_rate": 9.261912481098675e-07, "loss": 0.6237, "step": 1168 }, { "epoch": 0.538384041910135, "grad_norm": 4.343803302823385, "learning_rate": 9.24702958563344e-07, "loss": 0.6789, "step": 1169 }, { "epoch": 0.5388445928441898, "grad_norm": 4.1226926144703615, "learning_rate": 9.232148367323882e-07, "loss": 0.8195, "step": 1170 }, { "epoch": 0.5393051437782447, "grad_norm": 4.16456403336254, "learning_rate": 9.217268859316218e-07, "loss": 0.6723, "step": 1171 }, { "epoch": 0.5397656947122996, "grad_norm": 4.853656789261731, "learning_rate": 9.202391094752853e-07, "loss": 0.685, "step": 1172 }, { "epoch": 0.5402262456463545, "grad_norm": 4.29898905420006, "learning_rate": 9.187515106772311e-07, "loss": 0.7038, "step": 1173 }, { "epoch": 0.5406867965804093, "grad_norm": 4.301021559346878, "learning_rate": 9.172640928509158e-07, "loss": 0.7236, "step": 1174 }, { "epoch": 0.5411473475144641, "grad_norm": 4.357779562241686, "learning_rate": 9.157768593093925e-07, "loss": 0.6689, "step": 1175 }, { "epoch": 0.5416078984485191, "grad_norm": 3.6413649849005516, "learning_rate": 9.142898133653047e-07, "loss": 0.7693, "step": 1176 }, { "epoch": 0.5420684493825739, "grad_norm": 4.632386945479398, "learning_rate": 9.128029583308773e-07, "loss": 0.6802, "step": 1177 }, { "epoch": 0.5425290003166288, "grad_norm": 4.615182981678207, "learning_rate": 9.113162975179104e-07, "loss": 0.6672, "step": 1178 }, { "epoch": 0.5429895512506836, "grad_norm": 4.061461376462398, "learning_rate": 9.098298342377711e-07, "loss": 0.8493, "step": 1179 }, { "epoch": 0.5434501021847385, "grad_norm": 3.929694497766991, "learning_rate": 9.083435718013868e-07, "loss": 0.7636, "step": 1180 }, { "epoch": 0.5439106531187934, "grad_norm": 3.9790861823546857, "learning_rate": 9.068575135192376e-07, "loss": 0.6698, "step": 1181 }, { "epoch": 0.5443712040528482, "grad_norm": 4.144412007709005, "learning_rate": 9.053716627013487e-07, "loss": 0.6695, "step": 1182 }, { "epoch": 0.5448317549869031, "grad_norm": 4.0496688830155385, "learning_rate": 9.038860226572831e-07, "loss": 0.7601, "step": 1183 }, { "epoch": 0.5452923059209579, "grad_norm": 4.416889504507818, "learning_rate": 9.024005966961346e-07, "loss": 0.7662, "step": 1184 }, { "epoch": 0.5457528568550128, "grad_norm": 4.831468353599739, "learning_rate": 9.009153881265198e-07, "loss": 0.7714, "step": 1185 }, { "epoch": 0.5462134077890677, "grad_norm": 4.3402194792178515, "learning_rate": 8.994304002565722e-07, "loss": 0.8429, "step": 1186 }, { "epoch": 0.5466739587231225, "grad_norm": 4.358476869427937, "learning_rate": 8.979456363939317e-07, "loss": 0.6832, "step": 1187 }, { "epoch": 0.5471345096571774, "grad_norm": 3.9945919905204126, "learning_rate": 8.964610998457407e-07, "loss": 0.5779, "step": 1188 }, { "epoch": 0.5475950605912323, "grad_norm": 4.117483281550575, "learning_rate": 8.949767939186356e-07, "loss": 0.7107, "step": 1189 }, { "epoch": 0.5480556115252871, "grad_norm": 4.011786757545117, "learning_rate": 8.934927219187373e-07, "loss": 0.754, "step": 1190 }, { "epoch": 0.548516162459342, "grad_norm": 3.9436835755413817, "learning_rate": 8.920088871516481e-07, "loss": 0.6904, "step": 1191 }, { "epoch": 0.5489767133933968, "grad_norm": 4.398933742514039, "learning_rate": 8.905252929224402e-07, "loss": 0.7132, "step": 1192 }, { "epoch": 0.5494372643274518, "grad_norm": 4.456779196759148, "learning_rate": 8.890419425356495e-07, "loss": 0.8422, "step": 1193 }, { "epoch": 0.5498978152615066, "grad_norm": 4.184603923032094, "learning_rate": 8.875588392952712e-07, "loss": 0.6804, "step": 1194 }, { "epoch": 0.5503583661955614, "grad_norm": 4.551794510283765, "learning_rate": 8.860759865047475e-07, "loss": 0.7313, "step": 1195 }, { "epoch": 0.5508189171296163, "grad_norm": 4.019820614008391, "learning_rate": 8.845933874669644e-07, "loss": 0.5872, "step": 1196 }, { "epoch": 0.5512794680636711, "grad_norm": 4.2604065999543215, "learning_rate": 8.831110454842418e-07, "loss": 0.6851, "step": 1197 }, { "epoch": 0.5517400189977261, "grad_norm": 4.312789321131146, "learning_rate": 8.816289638583272e-07, "loss": 0.8704, "step": 1198 }, { "epoch": 0.5522005699317809, "grad_norm": 3.8001776377640684, "learning_rate": 8.801471458903885e-07, "loss": 0.6018, "step": 1199 }, { "epoch": 0.5526611208658357, "grad_norm": 4.1010433383880756, "learning_rate": 8.786655948810062e-07, "loss": 0.6156, "step": 1200 }, { "epoch": 0.5531216717998906, "grad_norm": 3.8876221863099176, "learning_rate": 8.771843141301658e-07, "loss": 0.7676, "step": 1201 }, { "epoch": 0.5535822227339455, "grad_norm": 4.526187781100721, "learning_rate": 8.757033069372514e-07, "loss": 0.706, "step": 1202 }, { "epoch": 0.5540427736680004, "grad_norm": 3.884356238888823, "learning_rate": 8.742225766010375e-07, "loss": 0.8039, "step": 1203 }, { "epoch": 0.5545033246020552, "grad_norm": 3.9603978303178784, "learning_rate": 8.727421264196819e-07, "loss": 0.721, "step": 1204 }, { "epoch": 0.55496387553611, "grad_norm": 3.871000251285073, "learning_rate": 8.712619596907187e-07, "loss": 0.7703, "step": 1205 }, { "epoch": 0.555424426470165, "grad_norm": 3.6021910232704633, "learning_rate": 8.697820797110498e-07, "loss": 0.7384, "step": 1206 }, { "epoch": 0.5558849774042198, "grad_norm": 4.0743461029761185, "learning_rate": 8.683024897769395e-07, "loss": 0.6137, "step": 1207 }, { "epoch": 0.5563455283382747, "grad_norm": 4.367817390270403, "learning_rate": 8.668231931840053e-07, "loss": 0.6603, "step": 1208 }, { "epoch": 0.5568060792723295, "grad_norm": 3.89539678260872, "learning_rate": 8.653441932272118e-07, "loss": 0.7025, "step": 1209 }, { "epoch": 0.5572666302063843, "grad_norm": 3.757534587177121, "learning_rate": 8.638654932008626e-07, "loss": 0.7583, "step": 1210 }, { "epoch": 0.5577271811404393, "grad_norm": 4.082849019453942, "learning_rate": 8.623870963985929e-07, "loss": 0.656, "step": 1211 }, { "epoch": 0.5581877320744941, "grad_norm": 4.027585828738317, "learning_rate": 8.609090061133633e-07, "loss": 0.6461, "step": 1212 }, { "epoch": 0.558648283008549, "grad_norm": 4.568858558324279, "learning_rate": 8.594312256374512e-07, "loss": 0.8754, "step": 1213 }, { "epoch": 0.5591088339426038, "grad_norm": 4.026254170799164, "learning_rate": 8.579537582624437e-07, "loss": 0.7149, "step": 1214 }, { "epoch": 0.5595693848766587, "grad_norm": 4.117787853031288, "learning_rate": 8.564766072792311e-07, "loss": 0.6414, "step": 1215 }, { "epoch": 0.5600299358107136, "grad_norm": 4.096469062141035, "learning_rate": 8.54999775977998e-07, "loss": 0.7195, "step": 1216 }, { "epoch": 0.5604904867447684, "grad_norm": 4.442877351130344, "learning_rate": 8.535232676482189e-07, "loss": 0.7249, "step": 1217 }, { "epoch": 0.5609510376788233, "grad_norm": 4.472611136956585, "learning_rate": 8.520470855786466e-07, "loss": 0.7899, "step": 1218 }, { "epoch": 0.5614115886128782, "grad_norm": 4.313659506029644, "learning_rate": 8.505712330573079e-07, "loss": 0.7792, "step": 1219 }, { "epoch": 0.561872139546933, "grad_norm": 3.6443477890742906, "learning_rate": 8.490957133714973e-07, "loss": 0.608, "step": 1220 }, { "epoch": 0.5623326904809879, "grad_norm": 4.61122592164592, "learning_rate": 8.476205298077649e-07, "loss": 0.8251, "step": 1221 }, { "epoch": 0.5627932414150427, "grad_norm": 4.005127703742635, "learning_rate": 8.46145685651915e-07, "loss": 0.6599, "step": 1222 }, { "epoch": 0.5632537923490976, "grad_norm": 4.177801420705785, "learning_rate": 8.446711841889945e-07, "loss": 0.8858, "step": 1223 }, { "epoch": 0.5637143432831525, "grad_norm": 4.046863233487957, "learning_rate": 8.431970287032861e-07, "loss": 0.7748, "step": 1224 }, { "epoch": 0.5641748942172073, "grad_norm": 4.046059258192909, "learning_rate": 8.417232224783041e-07, "loss": 0.6445, "step": 1225 }, { "epoch": 0.5646354451512622, "grad_norm": 3.728974112908037, "learning_rate": 8.402497687967836e-07, "loss": 0.5657, "step": 1226 }, { "epoch": 0.565095996085317, "grad_norm": 4.355084478625041, "learning_rate": 8.387766709406735e-07, "loss": 0.7229, "step": 1227 }, { "epoch": 0.565556547019372, "grad_norm": 4.340705574516379, "learning_rate": 8.373039321911323e-07, "loss": 0.7905, "step": 1228 }, { "epoch": 0.5660170979534268, "grad_norm": 4.404472917813874, "learning_rate": 8.358315558285169e-07, "loss": 0.7822, "step": 1229 }, { "epoch": 0.5664776488874816, "grad_norm": 4.267682907763365, "learning_rate": 8.343595451323781e-07, "loss": 0.6909, "step": 1230 }, { "epoch": 0.5669381998215365, "grad_norm": 4.39291227020088, "learning_rate": 8.328879033814515e-07, "loss": 0.8954, "step": 1231 }, { "epoch": 0.5673987507555914, "grad_norm": 4.2593658935775105, "learning_rate": 8.31416633853651e-07, "loss": 0.6355, "step": 1232 }, { "epoch": 0.5678593016896463, "grad_norm": 4.000769659828659, "learning_rate": 8.29945739826062e-07, "loss": 0.8631, "step": 1233 }, { "epoch": 0.5683198526237011, "grad_norm": 4.161586330106131, "learning_rate": 8.284752245749327e-07, "loss": 0.7103, "step": 1234 }, { "epoch": 0.5687804035577559, "grad_norm": 4.065253907531023, "learning_rate": 8.270050913756683e-07, "loss": 0.7965, "step": 1235 }, { "epoch": 0.5692409544918108, "grad_norm": 4.168079060972027, "learning_rate": 8.255353435028226e-07, "loss": 0.6579, "step": 1236 }, { "epoch": 0.5697015054258657, "grad_norm": 4.99867734373356, "learning_rate": 8.240659842300912e-07, "loss": 0.7187, "step": 1237 }, { "epoch": 0.5701620563599206, "grad_norm": 4.128028590204339, "learning_rate": 8.225970168303045e-07, "loss": 0.7195, "step": 1238 }, { "epoch": 0.5706226072939754, "grad_norm": 4.664893316889261, "learning_rate": 8.211284445754197e-07, "loss": 0.8141, "step": 1239 }, { "epoch": 0.5710831582280302, "grad_norm": 3.9090159929996537, "learning_rate": 8.196602707365134e-07, "loss": 0.6515, "step": 1240 }, { "epoch": 0.5715437091620852, "grad_norm": 4.369017303607819, "learning_rate": 8.18192498583776e-07, "loss": 0.603, "step": 1241 }, { "epoch": 0.57200426009614, "grad_norm": 4.618832749246373, "learning_rate": 8.16725131386502e-07, "loss": 0.6098, "step": 1242 }, { "epoch": 0.5724648110301949, "grad_norm": 4.039261933349974, "learning_rate": 8.152581724130849e-07, "loss": 0.6052, "step": 1243 }, { "epoch": 0.5729253619642497, "grad_norm": 4.2394807738445115, "learning_rate": 8.13791624931008e-07, "loss": 0.7922, "step": 1244 }, { "epoch": 0.5733859128983047, "grad_norm": 4.115127007634882, "learning_rate": 8.123254922068383e-07, "loss": 0.7878, "step": 1245 }, { "epoch": 0.5738464638323595, "grad_norm": 3.6459975331269243, "learning_rate": 8.108597775062199e-07, "loss": 0.72, "step": 1246 }, { "epoch": 0.5743070147664143, "grad_norm": 4.06138895681959, "learning_rate": 8.093944840938638e-07, "loss": 0.6953, "step": 1247 }, { "epoch": 0.5747675657004692, "grad_norm": 4.149045136219881, "learning_rate": 8.079296152335454e-07, "loss": 0.653, "step": 1248 }, { "epoch": 0.575228116634524, "grad_norm": 3.825512577116604, "learning_rate": 8.06465174188092e-07, "loss": 0.6137, "step": 1249 }, { "epoch": 0.575688667568579, "grad_norm": 3.8424653006029055, "learning_rate": 8.050011642193787e-07, "loss": 0.713, "step": 1250 }, { "epoch": 0.5761492185026338, "grad_norm": 3.980136071499712, "learning_rate": 8.035375885883217e-07, "loss": 0.7507, "step": 1251 }, { "epoch": 0.5766097694366886, "grad_norm": 3.8166923940310653, "learning_rate": 8.020744505548678e-07, "loss": 0.5936, "step": 1252 }, { "epoch": 0.5770703203707435, "grad_norm": 3.890026831695776, "learning_rate": 8.006117533779897e-07, "loss": 0.7401, "step": 1253 }, { "epoch": 0.5775308713047984, "grad_norm": 3.689347414505034, "learning_rate": 7.991495003156799e-07, "loss": 0.7226, "step": 1254 }, { "epoch": 0.5779914222388532, "grad_norm": 4.054957377649338, "learning_rate": 7.976876946249385e-07, "loss": 0.5988, "step": 1255 }, { "epoch": 0.5784519731729081, "grad_norm": 4.000783785868516, "learning_rate": 7.962263395617723e-07, "loss": 0.7979, "step": 1256 }, { "epoch": 0.5789125241069629, "grad_norm": 3.9044712964919417, "learning_rate": 7.947654383811826e-07, "loss": 0.8621, "step": 1257 }, { "epoch": 0.5793730750410179, "grad_norm": 4.354077470424388, "learning_rate": 7.933049943371591e-07, "loss": 0.7543, "step": 1258 }, { "epoch": 0.5798336259750727, "grad_norm": 4.107367197636584, "learning_rate": 7.918450106826756e-07, "loss": 0.5977, "step": 1259 }, { "epoch": 0.5802941769091275, "grad_norm": 3.442480467364629, "learning_rate": 7.903854906696783e-07, "loss": 0.5662, "step": 1260 }, { "epoch": 0.5807547278431824, "grad_norm": 3.8834672377592625, "learning_rate": 7.889264375490819e-07, "loss": 0.6443, "step": 1261 }, { "epoch": 0.5812152787772372, "grad_norm": 4.449189936705478, "learning_rate": 7.874678545707605e-07, "loss": 0.621, "step": 1262 }, { "epoch": 0.5816758297112922, "grad_norm": 4.3771145083536425, "learning_rate": 7.86009744983541e-07, "loss": 0.6671, "step": 1263 }, { "epoch": 0.582136380645347, "grad_norm": 4.173417752251295, "learning_rate": 7.845521120351967e-07, "loss": 0.6521, "step": 1264 }, { "epoch": 0.5825969315794018, "grad_norm": 4.0120578569000065, "learning_rate": 7.830949589724381e-07, "loss": 0.7846, "step": 1265 }, { "epoch": 0.5830574825134567, "grad_norm": 4.033864403464805, "learning_rate": 7.816382890409079e-07, "loss": 0.6132, "step": 1266 }, { "epoch": 0.5835180334475116, "grad_norm": 3.748445653381217, "learning_rate": 7.80182105485172e-07, "loss": 0.7701, "step": 1267 }, { "epoch": 0.5839785843815665, "grad_norm": 4.102919027681568, "learning_rate": 7.787264115487125e-07, "loss": 0.8118, "step": 1268 }, { "epoch": 0.5844391353156213, "grad_norm": 3.898079142758882, "learning_rate": 7.772712104739225e-07, "loss": 0.7816, "step": 1269 }, { "epoch": 0.5848996862496761, "grad_norm": 4.2653336273278795, "learning_rate": 7.758165055020959e-07, "loss": 0.638, "step": 1270 }, { "epoch": 0.5853602371837311, "grad_norm": 4.108558491202909, "learning_rate": 7.743622998734216e-07, "loss": 0.6584, "step": 1271 }, { "epoch": 0.5858207881177859, "grad_norm": 3.6525238386383623, "learning_rate": 7.729085968269775e-07, "loss": 0.758, "step": 1272 }, { "epoch": 0.5862813390518408, "grad_norm": 4.017983410574245, "learning_rate": 7.714553996007207e-07, "loss": 0.8103, "step": 1273 }, { "epoch": 0.5867418899858956, "grad_norm": 4.4324642884865035, "learning_rate": 7.700027114314824e-07, "loss": 0.6719, "step": 1274 }, { "epoch": 0.5872024409199504, "grad_norm": 3.618574728850041, "learning_rate": 7.685505355549599e-07, "loss": 0.6956, "step": 1275 }, { "epoch": 0.5876629918540054, "grad_norm": 3.66219248674673, "learning_rate": 7.670988752057087e-07, "loss": 0.6023, "step": 1276 }, { "epoch": 0.5881235427880602, "grad_norm": 4.248339538752855, "learning_rate": 7.656477336171372e-07, "loss": 0.6506, "step": 1277 }, { "epoch": 0.5885840937221151, "grad_norm": 3.83411109971581, "learning_rate": 7.64197114021497e-07, "loss": 0.6914, "step": 1278 }, { "epoch": 0.5890446446561699, "grad_norm": 4.373256581201152, "learning_rate": 7.627470196498788e-07, "loss": 0.8443, "step": 1279 }, { "epoch": 0.5895051955902249, "grad_norm": 4.268912273497411, "learning_rate": 7.612974537322015e-07, "loss": 0.8116, "step": 1280 }, { "epoch": 0.5899657465242797, "grad_norm": 3.7691018017089943, "learning_rate": 7.598484194972076e-07, "loss": 0.69, "step": 1281 }, { "epoch": 0.5904262974583345, "grad_norm": 3.873681153992648, "learning_rate": 7.583999201724565e-07, "loss": 0.7544, "step": 1282 }, { "epoch": 0.5908868483923894, "grad_norm": 4.064297163625697, "learning_rate": 7.569519589843144e-07, "loss": 0.7571, "step": 1283 }, { "epoch": 0.5913473993264443, "grad_norm": 3.951090350302211, "learning_rate": 7.555045391579492e-07, "loss": 0.6639, "step": 1284 }, { "epoch": 0.5918079502604991, "grad_norm": 4.1800420653546055, "learning_rate": 7.540576639173247e-07, "loss": 0.6233, "step": 1285 }, { "epoch": 0.592268501194554, "grad_norm": 3.863275608178385, "learning_rate": 7.526113364851891e-07, "loss": 0.7665, "step": 1286 }, { "epoch": 0.5927290521286088, "grad_norm": 4.004516887631692, "learning_rate": 7.511655600830727e-07, "loss": 0.6193, "step": 1287 }, { "epoch": 0.5931896030626637, "grad_norm": 3.6558655195059386, "learning_rate": 7.497203379312771e-07, "loss": 0.6027, "step": 1288 }, { "epoch": 0.5936501539967186, "grad_norm": 4.066876930580018, "learning_rate": 7.482756732488691e-07, "loss": 0.7533, "step": 1289 }, { "epoch": 0.5941107049307734, "grad_norm": 3.746933083039724, "learning_rate": 7.468315692536755e-07, "loss": 0.6975, "step": 1290 }, { "epoch": 0.5945712558648283, "grad_norm": 4.178627955025207, "learning_rate": 7.453880291622725e-07, "loss": 0.582, "step": 1291 }, { "epoch": 0.5950318067988831, "grad_norm": 3.8202391603008987, "learning_rate": 7.439450561899813e-07, "loss": 0.6408, "step": 1292 }, { "epoch": 0.5954923577329381, "grad_norm": 4.167387205348762, "learning_rate": 7.425026535508593e-07, "loss": 0.749, "step": 1293 }, { "epoch": 0.5959529086669929, "grad_norm": 4.188921529465533, "learning_rate": 7.410608244576937e-07, "loss": 0.7177, "step": 1294 }, { "epoch": 0.5964134596010477, "grad_norm": 4.152012263322836, "learning_rate": 7.396195721219945e-07, "loss": 0.6544, "step": 1295 }, { "epoch": 0.5968740105351026, "grad_norm": 3.8659193196487736, "learning_rate": 7.381788997539868e-07, "loss": 0.6487, "step": 1296 }, { "epoch": 0.5973345614691575, "grad_norm": 4.479907293458234, "learning_rate": 7.367388105626036e-07, "loss": 0.6906, "step": 1297 }, { "epoch": 0.5977951124032124, "grad_norm": 4.156079318991526, "learning_rate": 7.352993077554798e-07, "loss": 0.646, "step": 1298 }, { "epoch": 0.5982556633372672, "grad_norm": 3.8366204209260775, "learning_rate": 7.33860394538943e-07, "loss": 0.677, "step": 1299 }, { "epoch": 0.598716214271322, "grad_norm": 3.962137774721687, "learning_rate": 7.324220741180088e-07, "loss": 0.7075, "step": 1300 }, { "epoch": 0.5991767652053769, "grad_norm": 4.138860891979015, "learning_rate": 7.309843496963715e-07, "loss": 0.7751, "step": 1301 }, { "epoch": 0.5996373161394318, "grad_norm": 3.5058879817701545, "learning_rate": 7.295472244763981e-07, "loss": 0.5501, "step": 1302 }, { "epoch": 0.6000978670734867, "grad_norm": 3.7683032088944137, "learning_rate": 7.281107016591213e-07, "loss": 0.5931, "step": 1303 }, { "epoch": 0.6005584180075415, "grad_norm": 3.9726892994238123, "learning_rate": 7.266747844442315e-07, "loss": 0.8008, "step": 1304 }, { "epoch": 0.6010189689415963, "grad_norm": 4.482131963843059, "learning_rate": 7.252394760300707e-07, "loss": 0.7934, "step": 1305 }, { "epoch": 0.6014795198756513, "grad_norm": 4.221927628807229, "learning_rate": 7.238047796136246e-07, "loss": 0.708, "step": 1306 }, { "epoch": 0.6019400708097061, "grad_norm": 4.233631196329559, "learning_rate": 7.223706983905153e-07, "loss": 0.7049, "step": 1307 }, { "epoch": 0.602400621743761, "grad_norm": 4.883536112667081, "learning_rate": 7.209372355549956e-07, "loss": 0.6093, "step": 1308 }, { "epoch": 0.6028611726778158, "grad_norm": 3.9473696538457177, "learning_rate": 7.195043942999404e-07, "loss": 0.7761, "step": 1309 }, { "epoch": 0.6033217236118708, "grad_norm": 3.883519858887965, "learning_rate": 7.180721778168397e-07, "loss": 0.6247, "step": 1310 }, { "epoch": 0.6037822745459256, "grad_norm": 4.045638067252495, "learning_rate": 7.166405892957925e-07, "loss": 0.7737, "step": 1311 }, { "epoch": 0.6042428254799804, "grad_norm": 4.101589876229725, "learning_rate": 7.152096319254988e-07, "loss": 0.7074, "step": 1312 }, { "epoch": 0.6047033764140353, "grad_norm": 3.96146949323466, "learning_rate": 7.137793088932533e-07, "loss": 0.8476, "step": 1313 }, { "epoch": 0.6051639273480901, "grad_norm": 3.829353782919895, "learning_rate": 7.123496233849367e-07, "loss": 0.6837, "step": 1314 }, { "epoch": 0.605624478282145, "grad_norm": 3.8822069155716807, "learning_rate": 7.109205785850106e-07, "loss": 0.5381, "step": 1315 }, { "epoch": 0.6060850292161999, "grad_norm": 4.4398807992091545, "learning_rate": 7.094921776765094e-07, "loss": 0.652, "step": 1316 }, { "epoch": 0.6065455801502547, "grad_norm": 3.9362156802499575, "learning_rate": 7.080644238410325e-07, "loss": 0.704, "step": 1317 }, { "epoch": 0.6070061310843096, "grad_norm": 4.541930578003096, "learning_rate": 7.066373202587397e-07, "loss": 0.7844, "step": 1318 }, { "epoch": 0.6074666820183645, "grad_norm": 4.131466781236791, "learning_rate": 7.052108701083407e-07, "loss": 0.7339, "step": 1319 }, { "epoch": 0.6079272329524193, "grad_norm": 3.9676601190553753, "learning_rate": 7.0378507656709e-07, "loss": 0.6713, "step": 1320 }, { "epoch": 0.6083877838864742, "grad_norm": 3.7570789863814347, "learning_rate": 7.023599428107814e-07, "loss": 0.6448, "step": 1321 }, { "epoch": 0.608848334820529, "grad_norm": 4.044397890801651, "learning_rate": 7.009354720137364e-07, "loss": 0.6924, "step": 1322 }, { "epoch": 0.609308885754584, "grad_norm": 3.7454231287399224, "learning_rate": 6.995116673488014e-07, "loss": 0.7998, "step": 1323 }, { "epoch": 0.6097694366886388, "grad_norm": 3.822038531190499, "learning_rate": 6.980885319873397e-07, "loss": 0.7772, "step": 1324 }, { "epoch": 0.6102299876226936, "grad_norm": 3.7148390516117855, "learning_rate": 6.966660690992214e-07, "loss": 0.6023, "step": 1325 }, { "epoch": 0.6106905385567485, "grad_norm": 3.885551561336453, "learning_rate": 6.952442818528219e-07, "loss": 0.6596, "step": 1326 }, { "epoch": 0.6111510894908033, "grad_norm": 4.208523217346637, "learning_rate": 6.938231734150093e-07, "loss": 0.7851, "step": 1327 }, { "epoch": 0.6116116404248583, "grad_norm": 3.9382542861769707, "learning_rate": 6.9240274695114e-07, "loss": 0.6453, "step": 1328 }, { "epoch": 0.6120721913589131, "grad_norm": 4.254090862225461, "learning_rate": 6.909830056250526e-07, "loss": 0.7987, "step": 1329 }, { "epoch": 0.612532742292968, "grad_norm": 4.474761651600492, "learning_rate": 6.895639525990586e-07, "loss": 0.7411, "step": 1330 }, { "epoch": 0.6129932932270228, "grad_norm": 4.109755620455925, "learning_rate": 6.881455910339369e-07, "loss": 0.7394, "step": 1331 }, { "epoch": 0.6134538441610777, "grad_norm": 4.1272252097255135, "learning_rate": 6.867279240889259e-07, "loss": 0.7315, "step": 1332 }, { "epoch": 0.6139143950951326, "grad_norm": 4.239044637604233, "learning_rate": 6.853109549217166e-07, "loss": 0.8381, "step": 1333 }, { "epoch": 0.6143749460291874, "grad_norm": 3.81010886231608, "learning_rate": 6.838946866884467e-07, "loss": 0.7903, "step": 1334 }, { "epoch": 0.6148354969632422, "grad_norm": 4.838422043564699, "learning_rate": 6.824791225436918e-07, "loss": 0.6877, "step": 1335 }, { "epoch": 0.6152960478972972, "grad_norm": 4.683106171049681, "learning_rate": 6.810642656404596e-07, "loss": 0.744, "step": 1336 }, { "epoch": 0.615756598831352, "grad_norm": 3.8748710484057476, "learning_rate": 6.796501191301824e-07, "loss": 0.6399, "step": 1337 }, { "epoch": 0.6162171497654069, "grad_norm": 3.675315156644315, "learning_rate": 6.782366861627101e-07, "loss": 0.5187, "step": 1338 }, { "epoch": 0.6166777006994617, "grad_norm": 3.882372241204272, "learning_rate": 6.768239698863033e-07, "loss": 0.6368, "step": 1339 }, { "epoch": 0.6171382516335165, "grad_norm": 3.533819195043388, "learning_rate": 6.754119734476266e-07, "loss": 0.5791, "step": 1340 }, { "epoch": 0.6175988025675715, "grad_norm": 4.07180838542704, "learning_rate": 6.740006999917405e-07, "loss": 0.7891, "step": 1341 }, { "epoch": 0.6180593535016263, "grad_norm": 4.05091398600746, "learning_rate": 6.725901526620959e-07, "loss": 0.7483, "step": 1342 }, { "epoch": 0.6185199044356812, "grad_norm": 3.9866775711924314, "learning_rate": 6.711803346005258e-07, "loss": 0.607, "step": 1343 }, { "epoch": 0.618980455369736, "grad_norm": 4.23846529914182, "learning_rate": 6.697712489472395e-07, "loss": 0.7654, "step": 1344 }, { "epoch": 0.619441006303791, "grad_norm": 3.3523567615095597, "learning_rate": 6.68362898840814e-07, "loss": 0.6973, "step": 1345 }, { "epoch": 0.6199015572378458, "grad_norm": 3.7995701693620596, "learning_rate": 6.669552874181888e-07, "loss": 0.6277, "step": 1346 }, { "epoch": 0.6203621081719006, "grad_norm": 4.2812891163190425, "learning_rate": 6.655484178146576e-07, "loss": 0.6225, "step": 1347 }, { "epoch": 0.6208226591059555, "grad_norm": 3.841876272899927, "learning_rate": 6.641422931638614e-07, "loss": 0.7144, "step": 1348 }, { "epoch": 0.6212832100400104, "grad_norm": 4.3144872371279, "learning_rate": 6.627369165977837e-07, "loss": 0.7443, "step": 1349 }, { "epoch": 0.6217437609740653, "grad_norm": 3.579677598792408, "learning_rate": 6.613322912467392e-07, "loss": 0.6131, "step": 1350 }, { "epoch": 0.6222043119081201, "grad_norm": 4.123711572731146, "learning_rate": 6.599284202393708e-07, "loss": 0.7926, "step": 1351 }, { "epoch": 0.6226648628421749, "grad_norm": 4.165879414099282, "learning_rate": 6.585253067026417e-07, "loss": 0.7231, "step": 1352 }, { "epoch": 0.6231254137762298, "grad_norm": 4.079223567363339, "learning_rate": 6.571229537618266e-07, "loss": 0.7644, "step": 1353 }, { "epoch": 0.6235859647102847, "grad_norm": 4.235495830579471, "learning_rate": 6.557213645405064e-07, "loss": 0.5654, "step": 1354 }, { "epoch": 0.6240465156443396, "grad_norm": 3.893962077204517, "learning_rate": 6.54320542160562e-07, "loss": 0.8383, "step": 1355 }, { "epoch": 0.6245070665783944, "grad_norm": 4.351704833427758, "learning_rate": 6.529204897421643e-07, "loss": 0.7185, "step": 1356 }, { "epoch": 0.6249676175124492, "grad_norm": 4.4606265130996245, "learning_rate": 6.515212104037713e-07, "loss": 0.7542, "step": 1357 }, { "epoch": 0.6254281684465042, "grad_norm": 4.045754621997647, "learning_rate": 6.50122707262118e-07, "loss": 0.6073, "step": 1358 }, { "epoch": 0.625888719380559, "grad_norm": 4.67609266978647, "learning_rate": 6.487249834322095e-07, "loss": 0.9034, "step": 1359 }, { "epoch": 0.6263492703146138, "grad_norm": 4.239340324309984, "learning_rate": 6.473280420273172e-07, "loss": 0.6629, "step": 1360 }, { "epoch": 0.6268098212486687, "grad_norm": 4.245152810172918, "learning_rate": 6.459318861589685e-07, "loss": 0.8695, "step": 1361 }, { "epoch": 0.6272703721827236, "grad_norm": 4.153485739306382, "learning_rate": 6.445365189369411e-07, "loss": 0.6892, "step": 1362 }, { "epoch": 0.6277309231167785, "grad_norm": 4.116055364217876, "learning_rate": 6.431419434692563e-07, "loss": 0.6879, "step": 1363 }, { "epoch": 0.6281914740508333, "grad_norm": 4.261436205252849, "learning_rate": 6.417481628621717e-07, "loss": 0.6929, "step": 1364 }, { "epoch": 0.6286520249848881, "grad_norm": 3.922813703266068, "learning_rate": 6.403551802201748e-07, "loss": 0.6684, "step": 1365 }, { "epoch": 0.629112575918943, "grad_norm": 5.06464189474291, "learning_rate": 6.389629986459755e-07, "loss": 0.7743, "step": 1366 }, { "epoch": 0.6295731268529979, "grad_norm": 4.119730260295042, "learning_rate": 6.375716212404989e-07, "loss": 0.6904, "step": 1367 }, { "epoch": 0.6300336777870528, "grad_norm": 4.129792087315978, "learning_rate": 6.3618105110288e-07, "loss": 0.5855, "step": 1368 }, { "epoch": 0.6304942287211076, "grad_norm": 3.9284207838354135, "learning_rate": 6.347912913304548e-07, "loss": 0.7911, "step": 1369 }, { "epoch": 0.6309547796551624, "grad_norm": 4.393503482468327, "learning_rate": 6.334023450187549e-07, "loss": 0.6743, "step": 1370 }, { "epoch": 0.6314153305892174, "grad_norm": 3.7362028577169255, "learning_rate": 6.320142152614993e-07, "loss": 0.6985, "step": 1371 }, { "epoch": 0.6318758815232722, "grad_norm": 4.224309587191838, "learning_rate": 6.306269051505888e-07, "loss": 0.7849, "step": 1372 }, { "epoch": 0.6323364324573271, "grad_norm": 4.7493227844174175, "learning_rate": 6.292404177760986e-07, "loss": 0.7161, "step": 1373 }, { "epoch": 0.6327969833913819, "grad_norm": 4.341238566339486, "learning_rate": 6.278547562262706e-07, "loss": 0.7336, "step": 1374 }, { "epoch": 0.6332575343254369, "grad_norm": 4.086966535530286, "learning_rate": 6.264699235875084e-07, "loss": 0.8233, "step": 1375 }, { "epoch": 0.6337180852594917, "grad_norm": 4.350986922857276, "learning_rate": 6.250859229443684e-07, "loss": 0.6819, "step": 1376 }, { "epoch": 0.6341786361935465, "grad_norm": 4.006577518118876, "learning_rate": 6.237027573795538e-07, "loss": 0.7803, "step": 1377 }, { "epoch": 0.6346391871276014, "grad_norm": 3.979933538761797, "learning_rate": 6.223204299739087e-07, "loss": 0.7754, "step": 1378 }, { "epoch": 0.6350997380616562, "grad_norm": 4.099923990909366, "learning_rate": 6.209389438064092e-07, "loss": 0.7603, "step": 1379 }, { "epoch": 0.6355602889957112, "grad_norm": 3.5794177523809463, "learning_rate": 6.19558301954158e-07, "loss": 0.6374, "step": 1380 }, { "epoch": 0.636020839929766, "grad_norm": 4.227855244635807, "learning_rate": 6.181785074923777e-07, "loss": 0.6992, "step": 1381 }, { "epoch": 0.6364813908638208, "grad_norm": 4.358289858977804, "learning_rate": 6.167995634944025e-07, "loss": 0.8381, "step": 1382 }, { "epoch": 0.6369419417978757, "grad_norm": 3.917617817721734, "learning_rate": 6.154214730316738e-07, "loss": 0.5754, "step": 1383 }, { "epoch": 0.6374024927319306, "grad_norm": 4.157495068412997, "learning_rate": 6.1404423917373e-07, "loss": 0.5925, "step": 1384 }, { "epoch": 0.6378630436659855, "grad_norm": 3.9303924191280792, "learning_rate": 6.126678649882019e-07, "loss": 0.6216, "step": 1385 }, { "epoch": 0.6383235946000403, "grad_norm": 4.241706538620224, "learning_rate": 6.112923535408073e-07, "loss": 0.5677, "step": 1386 }, { "epoch": 0.6387841455340951, "grad_norm": 3.9087765787814006, "learning_rate": 6.099177078953397e-07, "loss": 0.7016, "step": 1387 }, { "epoch": 0.6392446964681501, "grad_norm": 4.30564362035509, "learning_rate": 6.085439311136664e-07, "loss": 0.7988, "step": 1388 }, { "epoch": 0.6397052474022049, "grad_norm": 4.127979295659567, "learning_rate": 6.071710262557181e-07, "loss": 0.6103, "step": 1389 }, { "epoch": 0.6401657983362598, "grad_norm": 3.925321650628113, "learning_rate": 6.057989963794832e-07, "loss": 0.5843, "step": 1390 }, { "epoch": 0.6406263492703146, "grad_norm": 3.679811405246505, "learning_rate": 6.044278445410025e-07, "loss": 0.7395, "step": 1391 }, { "epoch": 0.6410869002043694, "grad_norm": 4.22552868020839, "learning_rate": 6.030575737943595e-07, "loss": 0.5878, "step": 1392 }, { "epoch": 0.6415474511384244, "grad_norm": 3.6791991186973236, "learning_rate": 6.016881871916766e-07, "loss": 0.8119, "step": 1393 }, { "epoch": 0.6420080020724792, "grad_norm": 4.41943713031382, "learning_rate": 6.003196877831059e-07, "loss": 0.8662, "step": 1394 }, { "epoch": 0.642468553006534, "grad_norm": 4.008812528744462, "learning_rate": 5.989520786168235e-07, "loss": 0.6953, "step": 1395 }, { "epoch": 0.6429291039405889, "grad_norm": 4.3534069377955245, "learning_rate": 5.975853627390232e-07, "loss": 0.6369, "step": 1396 }, { "epoch": 0.6433896548746438, "grad_norm": 3.5935660218148024, "learning_rate": 5.962195431939084e-07, "loss": 0.6193, "step": 1397 }, { "epoch": 0.6438502058086987, "grad_norm": 4.022309280247683, "learning_rate": 5.94854623023686e-07, "loss": 0.6543, "step": 1398 }, { "epoch": 0.6443107567427535, "grad_norm": 4.083285679982612, "learning_rate": 5.934906052685603e-07, "loss": 0.7656, "step": 1399 }, { "epoch": 0.6447713076768083, "grad_norm": 4.523125070824751, "learning_rate": 5.921274929667251e-07, "loss": 0.7931, "step": 1400 }, { "epoch": 0.6452318586108633, "grad_norm": 3.7431348304387115, "learning_rate": 5.907652891543576e-07, "loss": 0.7482, "step": 1401 }, { "epoch": 0.6456924095449181, "grad_norm": 4.625439630770187, "learning_rate": 5.894039968656114e-07, "loss": 0.7533, "step": 1402 }, { "epoch": 0.646152960478973, "grad_norm": 4.487643138296221, "learning_rate": 5.880436191326092e-07, "loss": 0.8825, "step": 1403 }, { "epoch": 0.6466135114130278, "grad_norm": 4.259296204633693, "learning_rate": 5.866841589854381e-07, "loss": 0.6392, "step": 1404 }, { "epoch": 0.6470740623470826, "grad_norm": 3.966571673212627, "learning_rate": 5.853256194521395e-07, "loss": 0.7189, "step": 1405 }, { "epoch": 0.6475346132811376, "grad_norm": 4.250092681478124, "learning_rate": 5.83968003558706e-07, "loss": 0.7313, "step": 1406 }, { "epoch": 0.6479951642151924, "grad_norm": 4.083102362029428, "learning_rate": 5.826113143290717e-07, "loss": 0.7537, "step": 1407 }, { "epoch": 0.6484557151492473, "grad_norm": 4.696599354216123, "learning_rate": 5.812555547851068e-07, "loss": 0.7056, "step": 1408 }, { "epoch": 0.6489162660833021, "grad_norm": 4.631007806230033, "learning_rate": 5.799007279466111e-07, "loss": 0.7021, "step": 1409 }, { "epoch": 0.649376817017357, "grad_norm": 4.506529428003081, "learning_rate": 5.785468368313076e-07, "loss": 0.6634, "step": 1410 }, { "epoch": 0.6498373679514119, "grad_norm": 3.98588849382825, "learning_rate": 5.77193884454833e-07, "loss": 0.6788, "step": 1411 }, { "epoch": 0.6502979188854667, "grad_norm": 3.586192901109569, "learning_rate": 5.758418738307351e-07, "loss": 0.6232, "step": 1412 }, { "epoch": 0.6507584698195216, "grad_norm": 3.8086277273034006, "learning_rate": 5.74490807970463e-07, "loss": 0.7508, "step": 1413 }, { "epoch": 0.6512190207535765, "grad_norm": 4.286485512229637, "learning_rate": 5.731406898833623e-07, "loss": 0.8492, "step": 1414 }, { "epoch": 0.6516795716876314, "grad_norm": 3.7112305951833533, "learning_rate": 5.717915225766661e-07, "loss": 0.5563, "step": 1415 }, { "epoch": 0.6521401226216862, "grad_norm": 4.003612851176155, "learning_rate": 5.704433090554911e-07, "loss": 0.6043, "step": 1416 }, { "epoch": 0.652600673555741, "grad_norm": 4.1782517354354765, "learning_rate": 5.690960523228294e-07, "loss": 0.7847, "step": 1417 }, { "epoch": 0.6530612244897959, "grad_norm": 4.154957585415872, "learning_rate": 5.677497553795409e-07, "loss": 0.5793, "step": 1418 }, { "epoch": 0.6535217754238508, "grad_norm": 3.787188594274459, "learning_rate": 5.664044212243489e-07, "loss": 0.5131, "step": 1419 }, { "epoch": 0.6539823263579057, "grad_norm": 3.9142990747490667, "learning_rate": 5.650600528538318e-07, "loss": 0.6294, "step": 1420 }, { "epoch": 0.6544428772919605, "grad_norm": 3.751790677266819, "learning_rate": 5.637166532624163e-07, "loss": 0.7561, "step": 1421 }, { "epoch": 0.6549034282260153, "grad_norm": 4.789902031110964, "learning_rate": 5.623742254423718e-07, "loss": 0.6751, "step": 1422 }, { "epoch": 0.6553639791600703, "grad_norm": 4.157183448961878, "learning_rate": 5.610327723838037e-07, "loss": 0.9458, "step": 1423 }, { "epoch": 0.6558245300941251, "grad_norm": 4.452918445316668, "learning_rate": 5.596922970746449e-07, "loss": 0.7142, "step": 1424 }, { "epoch": 0.65628508102818, "grad_norm": 4.011196742957825, "learning_rate": 5.583528025006513e-07, "loss": 0.5877, "step": 1425 }, { "epoch": 0.6567456319622348, "grad_norm": 4.689789550205055, "learning_rate": 5.570142916453944e-07, "loss": 0.7415, "step": 1426 }, { "epoch": 0.6572061828962897, "grad_norm": 4.08414823614292, "learning_rate": 5.556767674902548e-07, "loss": 0.7439, "step": 1427 }, { "epoch": 0.6576667338303446, "grad_norm": 3.651529255074715, "learning_rate": 5.54340233014414e-07, "loss": 0.775, "step": 1428 }, { "epoch": 0.6581272847643994, "grad_norm": 4.166568533050651, "learning_rate": 5.530046911948505e-07, "loss": 0.6311, "step": 1429 }, { "epoch": 0.6585878356984542, "grad_norm": 4.088236237199374, "learning_rate": 5.516701450063316e-07, "loss": 0.5811, "step": 1430 }, { "epoch": 0.6590483866325091, "grad_norm": 4.599407468084095, "learning_rate": 5.503365974214058e-07, "loss": 0.8115, "step": 1431 }, { "epoch": 0.659508937566564, "grad_norm": 3.7765285515487546, "learning_rate": 5.490040514103995e-07, "loss": 0.5368, "step": 1432 }, { "epoch": 0.6599694885006189, "grad_norm": 3.9041949478298585, "learning_rate": 5.476725099414062e-07, "loss": 0.6653, "step": 1433 }, { "epoch": 0.6604300394346737, "grad_norm": 4.145443259713471, "learning_rate": 5.463419759802817e-07, "loss": 0.8028, "step": 1434 }, { "epoch": 0.6608905903687285, "grad_norm": 4.1802055260739905, "learning_rate": 5.450124524906401e-07, "loss": 0.6341, "step": 1435 }, { "epoch": 0.6613511413027835, "grad_norm": 4.451105580307017, "learning_rate": 5.436839424338425e-07, "loss": 0.7247, "step": 1436 }, { "epoch": 0.6618116922368383, "grad_norm": 4.063114177355392, "learning_rate": 5.423564487689929e-07, "loss": 0.6484, "step": 1437 }, { "epoch": 0.6622722431708932, "grad_norm": 4.837970687153956, "learning_rate": 5.410299744529332e-07, "loss": 0.7274, "step": 1438 }, { "epoch": 0.662732794104948, "grad_norm": 4.467050230739087, "learning_rate": 5.397045224402326e-07, "loss": 0.7529, "step": 1439 }, { "epoch": 0.663193345039003, "grad_norm": 4.007715225660783, "learning_rate": 5.383800956831846e-07, "loss": 0.6691, "step": 1440 }, { "epoch": 0.6636538959730578, "grad_norm": 4.386705239656003, "learning_rate": 5.370566971317989e-07, "loss": 0.7447, "step": 1441 }, { "epoch": 0.6641144469071126, "grad_norm": 4.157390471837008, "learning_rate": 5.357343297337943e-07, "loss": 0.7817, "step": 1442 }, { "epoch": 0.6645749978411675, "grad_norm": 4.461674520146108, "learning_rate": 5.344129964345934e-07, "loss": 0.6365, "step": 1443 }, { "epoch": 0.6650355487752223, "grad_norm": 3.7793205639729863, "learning_rate": 5.330927001773154e-07, "loss": 0.693, "step": 1444 }, { "epoch": 0.6654960997092773, "grad_norm": 4.279089627461291, "learning_rate": 5.317734439027699e-07, "loss": 0.7419, "step": 1445 }, { "epoch": 0.6659566506433321, "grad_norm": 3.9754181951943566, "learning_rate": 5.304552305494492e-07, "loss": 0.5342, "step": 1446 }, { "epoch": 0.6664172015773869, "grad_norm": 4.13264836527073, "learning_rate": 5.291380630535231e-07, "loss": 0.7169, "step": 1447 }, { "epoch": 0.6668777525114418, "grad_norm": 3.6830490893102885, "learning_rate": 5.278219443488328e-07, "loss": 0.6664, "step": 1448 }, { "epoch": 0.6673383034454967, "grad_norm": 4.318664799560766, "learning_rate": 5.265068773668812e-07, "loss": 0.6639, "step": 1449 }, { "epoch": 0.6677988543795516, "grad_norm": 4.288173807493679, "learning_rate": 5.251928650368307e-07, "loss": 0.7318, "step": 1450 }, { "epoch": 0.6682594053136064, "grad_norm": 3.7624358921837686, "learning_rate": 5.238799102854941e-07, "loss": 0.6254, "step": 1451 }, { "epoch": 0.6687199562476612, "grad_norm": 4.137532347531494, "learning_rate": 5.225680160373275e-07, "loss": 0.6444, "step": 1452 }, { "epoch": 0.6691805071817162, "grad_norm": 4.179251304676898, "learning_rate": 5.212571852144261e-07, "loss": 0.715, "step": 1453 }, { "epoch": 0.669641058115771, "grad_norm": 4.05771287332896, "learning_rate": 5.199474207365162e-07, "loss": 0.6667, "step": 1454 }, { "epoch": 0.6701016090498259, "grad_norm": 3.642486921265352, "learning_rate": 5.186387255209481e-07, "loss": 0.6224, "step": 1455 }, { "epoch": 0.6705621599838807, "grad_norm": 4.219531241070388, "learning_rate": 5.173311024826915e-07, "loss": 0.7762, "step": 1456 }, { "epoch": 0.6710227109179355, "grad_norm": 4.1083232646487255, "learning_rate": 5.160245545343274e-07, "loss": 0.6808, "step": 1457 }, { "epoch": 0.6714832618519905, "grad_norm": 4.313938941506191, "learning_rate": 5.147190845860426e-07, "loss": 0.8464, "step": 1458 }, { "epoch": 0.6719438127860453, "grad_norm": 4.066322320025792, "learning_rate": 5.134146955456218e-07, "loss": 0.5871, "step": 1459 }, { "epoch": 0.6724043637201002, "grad_norm": 4.190185040132176, "learning_rate": 5.121113903184431e-07, "loss": 0.6287, "step": 1460 }, { "epoch": 0.672864914654155, "grad_norm": 3.9810110053494, "learning_rate": 5.108091718074705e-07, "loss": 0.8185, "step": 1461 }, { "epoch": 0.6733254655882099, "grad_norm": 3.718874121598935, "learning_rate": 5.095080429132459e-07, "loss": 0.7091, "step": 1462 }, { "epoch": 0.6737860165222648, "grad_norm": 4.378908526214594, "learning_rate": 5.082080065338872e-07, "loss": 0.6787, "step": 1463 }, { "epoch": 0.6742465674563196, "grad_norm": 4.218278847235568, "learning_rate": 5.069090655650762e-07, "loss": 0.7414, "step": 1464 }, { "epoch": 0.6747071183903744, "grad_norm": 4.079311941949285, "learning_rate": 5.05611222900055e-07, "loss": 0.6785, "step": 1465 }, { "epoch": 0.6751676693244294, "grad_norm": 4.348520695692373, "learning_rate": 5.043144814296215e-07, "loss": 0.6797, "step": 1466 }, { "epoch": 0.6756282202584842, "grad_norm": 4.1451570367856165, "learning_rate": 5.030188440421185e-07, "loss": 0.5989, "step": 1467 }, { "epoch": 0.6760887711925391, "grad_norm": 4.192831036983876, "learning_rate": 5.017243136234298e-07, "loss": 0.7555, "step": 1468 }, { "epoch": 0.6765493221265939, "grad_norm": 4.216524683757611, "learning_rate": 5.004308930569757e-07, "loss": 0.8294, "step": 1469 }, { "epoch": 0.6770098730606487, "grad_norm": 3.77497378039177, "learning_rate": 4.991385852237017e-07, "loss": 0.7398, "step": 1470 }, { "epoch": 0.6774704239947037, "grad_norm": 3.669887841795954, "learning_rate": 4.978473930020767e-07, "loss": 0.6292, "step": 1471 }, { "epoch": 0.6779309749287585, "grad_norm": 4.136808261948525, "learning_rate": 4.965573192680841e-07, "loss": 0.6894, "step": 1472 }, { "epoch": 0.6783915258628134, "grad_norm": 3.5638291918762426, "learning_rate": 4.952683668952152e-07, "loss": 0.6208, "step": 1473 }, { "epoch": 0.6788520767968682, "grad_norm": 4.389044081085301, "learning_rate": 4.939805387544649e-07, "loss": 0.6938, "step": 1474 }, { "epoch": 0.6793126277309232, "grad_norm": 4.489933848616866, "learning_rate": 4.926938377143232e-07, "loss": 0.7787, "step": 1475 }, { "epoch": 0.679773178664978, "grad_norm": 3.9224897896971496, "learning_rate": 4.914082666407704e-07, "loss": 0.6471, "step": 1476 }, { "epoch": 0.6802337295990328, "grad_norm": 4.164623263171233, "learning_rate": 4.901238283972685e-07, "loss": 0.5646, "step": 1477 }, { "epoch": 0.6806942805330877, "grad_norm": 4.060417648219034, "learning_rate": 4.888405258447576e-07, "loss": 0.6592, "step": 1478 }, { "epoch": 0.6811548314671426, "grad_norm": 3.785093251939364, "learning_rate": 4.875583618416481e-07, "loss": 0.673, "step": 1479 }, { "epoch": 0.6816153824011975, "grad_norm": 3.878311705585543, "learning_rate": 4.862773392438131e-07, "loss": 0.6865, "step": 1480 }, { "epoch": 0.6820759333352523, "grad_norm": 4.725958495844652, "learning_rate": 4.849974609045848e-07, "loss": 0.7796, "step": 1481 }, { "epoch": 0.6825364842693071, "grad_norm": 3.6011075380644604, "learning_rate": 4.837187296747463e-07, "loss": 0.6652, "step": 1482 }, { "epoch": 0.682997035203362, "grad_norm": 3.5410109779020384, "learning_rate": 4.82441148402525e-07, "loss": 0.7585, "step": 1483 }, { "epoch": 0.6834575861374169, "grad_norm": 3.884308236709822, "learning_rate": 4.811647199335877e-07, "loss": 0.6603, "step": 1484 }, { "epoch": 0.6839181370714718, "grad_norm": 3.9660769288181426, "learning_rate": 4.798894471110336e-07, "loss": 0.7689, "step": 1485 }, { "epoch": 0.6843786880055266, "grad_norm": 3.8951732818565543, "learning_rate": 4.786153327753864e-07, "loss": 0.7391, "step": 1486 }, { "epoch": 0.6848392389395814, "grad_norm": 4.130845461557095, "learning_rate": 4.773423797645911e-07, "loss": 0.6818, "step": 1487 }, { "epoch": 0.6852997898736364, "grad_norm": 3.931223239460785, "learning_rate": 4.76070590914005e-07, "loss": 0.7502, "step": 1488 }, { "epoch": 0.6857603408076912, "grad_norm": 4.101289979409272, "learning_rate": 4.747999690563932e-07, "loss": 0.6024, "step": 1489 }, { "epoch": 0.686220891741746, "grad_norm": 4.184635967092108, "learning_rate": 4.7353051702191994e-07, "loss": 0.6977, "step": 1490 }, { "epoch": 0.6866814426758009, "grad_norm": 4.265425862050201, "learning_rate": 4.7226223763814545e-07, "loss": 0.6922, "step": 1491 }, { "epoch": 0.6871419936098558, "grad_norm": 4.598883258878285, "learning_rate": 4.709951337300174e-07, "loss": 0.702, "step": 1492 }, { "epoch": 0.6876025445439107, "grad_norm": 3.8212574630727625, "learning_rate": 4.697292081198646e-07, "loss": 0.6094, "step": 1493 }, { "epoch": 0.6880630954779655, "grad_norm": 4.123662927230591, "learning_rate": 4.684644636273922e-07, "loss": 0.5743, "step": 1494 }, { "epoch": 0.6885236464120204, "grad_norm": 3.918867072583025, "learning_rate": 4.6720090306967465e-07, "loss": 0.8501, "step": 1495 }, { "epoch": 0.6889841973460752, "grad_norm": 3.9098859252647924, "learning_rate": 4.6593852926114784e-07, "loss": 0.6268, "step": 1496 }, { "epoch": 0.6894447482801301, "grad_norm": 4.199039542831021, "learning_rate": 4.646773450136067e-07, "loss": 0.7408, "step": 1497 }, { "epoch": 0.689905299214185, "grad_norm": 3.8838994023341913, "learning_rate": 4.634173531361947e-07, "loss": 0.6801, "step": 1498 }, { "epoch": 0.6903658501482398, "grad_norm": 4.059274520251461, "learning_rate": 4.6215855643539903e-07, "loss": 0.5966, "step": 1499 }, { "epoch": 0.6908264010822946, "grad_norm": 3.8369615979737546, "learning_rate": 4.609009577150472e-07, "loss": 0.7861, "step": 1500 }, { "epoch": 0.6912869520163496, "grad_norm": 4.223405706360811, "learning_rate": 4.5964455977629593e-07, "loss": 0.6235, "step": 1501 }, { "epoch": 0.6917475029504044, "grad_norm": 3.819568203332057, "learning_rate": 4.583893654176285e-07, "loss": 0.6342, "step": 1502 }, { "epoch": 0.6922080538844593, "grad_norm": 3.873789329454416, "learning_rate": 4.5713537743484754e-07, "loss": 0.6479, "step": 1503 }, { "epoch": 0.6926686048185141, "grad_norm": 4.133200478269183, "learning_rate": 4.5588259862106725e-07, "loss": 0.6478, "step": 1504 }, { "epoch": 0.6931291557525691, "grad_norm": 4.053003104427769, "learning_rate": 4.5463103176671016e-07, "loss": 0.7983, "step": 1505 }, { "epoch": 0.6935897066866239, "grad_norm": 3.6261912175252022, "learning_rate": 4.533806796594989e-07, "loss": 0.7111, "step": 1506 }, { "epoch": 0.6940502576206787, "grad_norm": 4.0899648458490185, "learning_rate": 4.521315450844492e-07, "loss": 0.7176, "step": 1507 }, { "epoch": 0.6945108085547336, "grad_norm": 4.332187239561834, "learning_rate": 4.508836308238664e-07, "loss": 0.6938, "step": 1508 }, { "epoch": 0.6949713594887884, "grad_norm": 4.121278875764634, "learning_rate": 4.4963693965733686e-07, "loss": 0.7326, "step": 1509 }, { "epoch": 0.6954319104228434, "grad_norm": 4.086814650022688, "learning_rate": 4.483914743617235e-07, "loss": 0.8352, "step": 1510 }, { "epoch": 0.6958924613568982, "grad_norm": 4.524238820254504, "learning_rate": 4.471472377111574e-07, "loss": 0.8758, "step": 1511 }, { "epoch": 0.696353012290953, "grad_norm": 4.129045270197193, "learning_rate": 4.459042324770338e-07, "loss": 0.736, "step": 1512 }, { "epoch": 0.6968135632250079, "grad_norm": 4.169660839026019, "learning_rate": 4.446624614280058e-07, "loss": 0.757, "step": 1513 }, { "epoch": 0.6972741141590628, "grad_norm": 4.4538125663150145, "learning_rate": 4.4342192732997565e-07, "loss": 0.6548, "step": 1514 }, { "epoch": 0.6977346650931177, "grad_norm": 4.373098491112191, "learning_rate": 4.4218263294609205e-07, "loss": 0.648, "step": 1515 }, { "epoch": 0.6981952160271725, "grad_norm": 4.18190493521653, "learning_rate": 4.4094458103674204e-07, "loss": 0.682, "step": 1516 }, { "epoch": 0.6986557669612273, "grad_norm": 3.7682832305523886, "learning_rate": 4.397077743595444e-07, "loss": 0.7108, "step": 1517 }, { "epoch": 0.6991163178952823, "grad_norm": 4.335754078875089, "learning_rate": 4.384722156693451e-07, "loss": 0.7905, "step": 1518 }, { "epoch": 0.6995768688293371, "grad_norm": 4.7437123383133155, "learning_rate": 4.3723790771821067e-07, "loss": 0.632, "step": 1519 }, { "epoch": 0.700037419763392, "grad_norm": 4.206844975241523, "learning_rate": 4.3600485325542047e-07, "loss": 0.6623, "step": 1520 }, { "epoch": 0.7004979706974468, "grad_norm": 3.782343709983383, "learning_rate": 4.3477305502746275e-07, "loss": 0.6946, "step": 1521 }, { "epoch": 0.7009585216315016, "grad_norm": 3.7823546045084635, "learning_rate": 4.335425157780277e-07, "loss": 0.7276, "step": 1522 }, { "epoch": 0.7014190725655566, "grad_norm": 4.160471019097744, "learning_rate": 4.323132382480015e-07, "loss": 0.6247, "step": 1523 }, { "epoch": 0.7018796234996114, "grad_norm": 4.461440869018238, "learning_rate": 4.3108522517545866e-07, "loss": 0.7726, "step": 1524 }, { "epoch": 0.7023401744336663, "grad_norm": 4.052160679236237, "learning_rate": 4.2985847929565865e-07, "loss": 0.6563, "step": 1525 }, { "epoch": 0.7028007253677211, "grad_norm": 3.8263042864222574, "learning_rate": 4.2863300334103837e-07, "loss": 0.6544, "step": 1526 }, { "epoch": 0.703261276301776, "grad_norm": 4.485266866492086, "learning_rate": 4.2740880004120474e-07, "loss": 0.7232, "step": 1527 }, { "epoch": 0.7037218272358309, "grad_norm": 3.9984499375258666, "learning_rate": 4.2618587212293147e-07, "loss": 0.78, "step": 1528 }, { "epoch": 0.7041823781698857, "grad_norm": 4.681174251713209, "learning_rate": 4.2496422231015115e-07, "loss": 0.7829, "step": 1529 }, { "epoch": 0.7046429291039406, "grad_norm": 4.372577469804908, "learning_rate": 4.237438533239488e-07, "loss": 0.6879, "step": 1530 }, { "epoch": 0.7051034800379955, "grad_norm": 4.496567442903065, "learning_rate": 4.2252476788255733e-07, "loss": 0.7376, "step": 1531 }, { "epoch": 0.7055640309720503, "grad_norm": 4.221455298816086, "learning_rate": 4.213069687013505e-07, "loss": 0.8797, "step": 1532 }, { "epoch": 0.7060245819061052, "grad_norm": 3.634228092074425, "learning_rate": 4.200904584928373e-07, "loss": 0.5811, "step": 1533 }, { "epoch": 0.70648513284016, "grad_norm": 4.413460841068345, "learning_rate": 4.1887523996665474e-07, "loss": 0.818, "step": 1534 }, { "epoch": 0.7069456837742149, "grad_norm": 4.273171493014707, "learning_rate": 4.176613158295639e-07, "loss": 0.797, "step": 1535 }, { "epoch": 0.7074062347082698, "grad_norm": 3.751474831446282, "learning_rate": 4.164486887854424e-07, "loss": 0.8724, "step": 1536 }, { "epoch": 0.7078667856423246, "grad_norm": 3.8570090464509743, "learning_rate": 4.15237361535278e-07, "loss": 0.7928, "step": 1537 }, { "epoch": 0.7083273365763795, "grad_norm": 4.333200555262222, "learning_rate": 4.140273367771643e-07, "loss": 0.7262, "step": 1538 }, { "epoch": 0.7087878875104343, "grad_norm": 3.943374005936875, "learning_rate": 4.1281861720629374e-07, "loss": 0.7013, "step": 1539 }, { "epoch": 0.7092484384444893, "grad_norm": 4.257568163326161, "learning_rate": 4.1161120551495023e-07, "loss": 0.846, "step": 1540 }, { "epoch": 0.7097089893785441, "grad_norm": 4.141396751417382, "learning_rate": 4.1040510439250676e-07, "loss": 0.5886, "step": 1541 }, { "epoch": 0.7101695403125989, "grad_norm": 4.5962657545080825, "learning_rate": 4.092003165254154e-07, "loss": 0.7204, "step": 1542 }, { "epoch": 0.7106300912466538, "grad_norm": 3.6088968525612395, "learning_rate": 4.0799684459720295e-07, "loss": 0.6201, "step": 1543 }, { "epoch": 0.7110906421807087, "grad_norm": 4.144989459720046, "learning_rate": 4.067946912884672e-07, "loss": 0.8733, "step": 1544 }, { "epoch": 0.7115511931147636, "grad_norm": 4.177496377603498, "learning_rate": 4.055938592768663e-07, "loss": 0.7664, "step": 1545 }, { "epoch": 0.7120117440488184, "grad_norm": 3.9396683355426716, "learning_rate": 4.0439435123711707e-07, "loss": 0.5601, "step": 1546 }, { "epoch": 0.7124722949828732, "grad_norm": 3.866332409026043, "learning_rate": 4.031961698409869e-07, "loss": 0.6166, "step": 1547 }, { "epoch": 0.7129328459169281, "grad_norm": 4.218431831986867, "learning_rate": 4.0199931775728767e-07, "loss": 0.7986, "step": 1548 }, { "epoch": 0.713393396850983, "grad_norm": 3.9337050216283997, "learning_rate": 4.008037976518711e-07, "loss": 0.6794, "step": 1549 }, { "epoch": 0.7138539477850379, "grad_norm": 4.420628265508012, "learning_rate": 3.996096121876221e-07, "loss": 0.5845, "step": 1550 }, { "epoch": 0.7143144987190927, "grad_norm": 4.186252526514928, "learning_rate": 3.984167640244518e-07, "loss": 0.6721, "step": 1551 }, { "epoch": 0.7147750496531475, "grad_norm": 4.171782230761937, "learning_rate": 3.972252558192938e-07, "loss": 0.71, "step": 1552 }, { "epoch": 0.7152356005872025, "grad_norm": 4.0856095831795365, "learning_rate": 3.960350902260967e-07, "loss": 0.6047, "step": 1553 }, { "epoch": 0.7156961515212573, "grad_norm": 4.216578448779837, "learning_rate": 3.948462698958188e-07, "loss": 0.6001, "step": 1554 }, { "epoch": 0.7161567024553122, "grad_norm": 4.126788995677488, "learning_rate": 3.9365879747642106e-07, "loss": 0.74, "step": 1555 }, { "epoch": 0.716617253389367, "grad_norm": 3.963854175572097, "learning_rate": 3.924726756128631e-07, "loss": 0.6874, "step": 1556 }, { "epoch": 0.7170778043234219, "grad_norm": 4.141466850230008, "learning_rate": 3.912879069470966e-07, "loss": 0.6699, "step": 1557 }, { "epoch": 0.7175383552574768, "grad_norm": 3.8775452146595124, "learning_rate": 3.9010449411805747e-07, "loss": 0.8121, "step": 1558 }, { "epoch": 0.7179989061915316, "grad_norm": 3.7766816657321627, "learning_rate": 3.889224397616635e-07, "loss": 0.5993, "step": 1559 }, { "epoch": 0.7184594571255865, "grad_norm": 4.069935646746802, "learning_rate": 3.8774174651080596e-07, "loss": 0.7372, "step": 1560 }, { "epoch": 0.7189200080596413, "grad_norm": 3.9455724152798117, "learning_rate": 3.865624169953439e-07, "loss": 0.7972, "step": 1561 }, { "epoch": 0.7193805589936962, "grad_norm": 4.125989788600182, "learning_rate": 3.853844538420993e-07, "loss": 0.7338, "step": 1562 }, { "epoch": 0.7198411099277511, "grad_norm": 3.8176217985754715, "learning_rate": 3.8420785967485115e-07, "loss": 0.6763, "step": 1563 }, { "epoch": 0.7203016608618059, "grad_norm": 4.66057902606012, "learning_rate": 3.83032637114328e-07, "loss": 0.7885, "step": 1564 }, { "epoch": 0.7207622117958608, "grad_norm": 3.896859323644279, "learning_rate": 3.8185878877820443e-07, "loss": 0.7104, "step": 1565 }, { "epoch": 0.7212227627299157, "grad_norm": 4.39146656571102, "learning_rate": 3.806863172810936e-07, "loss": 0.6122, "step": 1566 }, { "epoch": 0.7216833136639705, "grad_norm": 4.206914396392435, "learning_rate": 3.7951522523454214e-07, "loss": 0.8657, "step": 1567 }, { "epoch": 0.7221438645980254, "grad_norm": 4.502641569691358, "learning_rate": 3.7834551524702364e-07, "loss": 0.799, "step": 1568 }, { "epoch": 0.7226044155320802, "grad_norm": 4.204696153954864, "learning_rate": 3.7717718992393365e-07, "loss": 0.6986, "step": 1569 }, { "epoch": 0.7230649664661352, "grad_norm": 4.321902034801217, "learning_rate": 3.760102518675839e-07, "loss": 0.8545, "step": 1570 }, { "epoch": 0.72352551740019, "grad_norm": 3.981302445860022, "learning_rate": 3.748447036771949e-07, "loss": 0.7124, "step": 1571 }, { "epoch": 0.7239860683342448, "grad_norm": 3.942086678064472, "learning_rate": 3.736805479488936e-07, "loss": 0.7209, "step": 1572 }, { "epoch": 0.7244466192682997, "grad_norm": 4.1770372427838165, "learning_rate": 3.7251778727570305e-07, "loss": 0.6995, "step": 1573 }, { "epoch": 0.7249071702023545, "grad_norm": 3.747265066291878, "learning_rate": 3.7135642424753967e-07, "loss": 0.6572, "step": 1574 }, { "epoch": 0.7253677211364095, "grad_norm": 3.9600055552553206, "learning_rate": 3.701964614512082e-07, "loss": 0.7377, "step": 1575 }, { "epoch": 0.7258282720704643, "grad_norm": 3.7818750350687047, "learning_rate": 3.690379014703928e-07, "loss": 0.5631, "step": 1576 }, { "epoch": 0.7262888230045191, "grad_norm": 4.3781372501517, "learning_rate": 3.67880746885653e-07, "loss": 0.8093, "step": 1577 }, { "epoch": 0.726749373938574, "grad_norm": 4.047257190193407, "learning_rate": 3.667250002744199e-07, "loss": 0.6831, "step": 1578 }, { "epoch": 0.7272099248726289, "grad_norm": 4.629760089344014, "learning_rate": 3.6557066421098604e-07, "loss": 0.5979, "step": 1579 }, { "epoch": 0.7276704758066838, "grad_norm": 4.398921461275871, "learning_rate": 3.644177412665039e-07, "loss": 0.5277, "step": 1580 }, { "epoch": 0.7281310267407386, "grad_norm": 4.368462578816135, "learning_rate": 3.6326623400897796e-07, "loss": 0.7131, "step": 1581 }, { "epoch": 0.7285915776747934, "grad_norm": 4.582787436049706, "learning_rate": 3.621161450032586e-07, "loss": 0.8231, "step": 1582 }, { "epoch": 0.7290521286088484, "grad_norm": 3.580636463729336, "learning_rate": 3.609674768110381e-07, "loss": 0.7631, "step": 1583 }, { "epoch": 0.7295126795429032, "grad_norm": 4.201294984667672, "learning_rate": 3.59820231990844e-07, "loss": 0.8661, "step": 1584 }, { "epoch": 0.7299732304769581, "grad_norm": 3.8739247741656744, "learning_rate": 3.5867441309803325e-07, "loss": 0.6782, "step": 1585 }, { "epoch": 0.7304337814110129, "grad_norm": 4.280816961872699, "learning_rate": 3.5753002268478625e-07, "loss": 0.7352, "step": 1586 }, { "epoch": 0.7308943323450677, "grad_norm": 4.216257763682442, "learning_rate": 3.5638706330010236e-07, "loss": 0.7528, "step": 1587 }, { "epoch": 0.7313548832791227, "grad_norm": 3.9724488780441765, "learning_rate": 3.552455374897935e-07, "loss": 0.7126, "step": 1588 }, { "epoch": 0.7318154342131775, "grad_norm": 3.7124929518515724, "learning_rate": 3.5410544779647735e-07, "loss": 0.5471, "step": 1589 }, { "epoch": 0.7322759851472324, "grad_norm": 4.315232720577472, "learning_rate": 3.529667967595742e-07, "loss": 0.6483, "step": 1590 }, { "epoch": 0.7327365360812872, "grad_norm": 3.680077511329172, "learning_rate": 3.518295869152994e-07, "loss": 0.6188, "step": 1591 }, { "epoch": 0.7331970870153421, "grad_norm": 3.8162279240717574, "learning_rate": 3.5069382079665763e-07, "loss": 0.6308, "step": 1592 }, { "epoch": 0.733657637949397, "grad_norm": 3.987923174046764, "learning_rate": 3.4955950093343857e-07, "loss": 0.9052, "step": 1593 }, { "epoch": 0.7341181888834518, "grad_norm": 4.0444116358394435, "learning_rate": 3.484266298522106e-07, "loss": 0.7595, "step": 1594 }, { "epoch": 0.7345787398175067, "grad_norm": 3.929672410826172, "learning_rate": 3.472952100763141e-07, "loss": 0.7928, "step": 1595 }, { "epoch": 0.7350392907515616, "grad_norm": 4.047259469809458, "learning_rate": 3.461652441258579e-07, "loss": 0.7174, "step": 1596 }, { "epoch": 0.7354998416856164, "grad_norm": 4.314901266951611, "learning_rate": 3.450367345177122e-07, "loss": 0.7004, "step": 1597 }, { "epoch": 0.7359603926196713, "grad_norm": 3.8815363926705064, "learning_rate": 3.4390968376550367e-07, "loss": 0.714, "step": 1598 }, { "epoch": 0.7364209435537261, "grad_norm": 3.849498046905846, "learning_rate": 3.4278409437960865e-07, "loss": 0.7202, "step": 1599 }, { "epoch": 0.7368814944877811, "grad_norm": 3.95377583986676, "learning_rate": 3.4165996886714944e-07, "loss": 0.7119, "step": 1600 }, { "epoch": 0.7373420454218359, "grad_norm": 4.1463474068123585, "learning_rate": 3.405373097319875e-07, "loss": 0.6509, "step": 1601 }, { "epoch": 0.7378025963558907, "grad_norm": 4.490224942121161, "learning_rate": 3.3941611947471703e-07, "loss": 0.7047, "step": 1602 }, { "epoch": 0.7382631472899456, "grad_norm": 3.657457447271507, "learning_rate": 3.3829640059266283e-07, "loss": 0.6617, "step": 1603 }, { "epoch": 0.7387236982240004, "grad_norm": 4.226404752404148, "learning_rate": 3.3717815557987027e-07, "loss": 0.67, "step": 1604 }, { "epoch": 0.7391842491580554, "grad_norm": 4.565221226946919, "learning_rate": 3.360613869271016e-07, "loss": 0.6654, "step": 1605 }, { "epoch": 0.7396448000921102, "grad_norm": 3.923990843414239, "learning_rate": 3.349460971218332e-07, "loss": 0.6935, "step": 1606 }, { "epoch": 0.740105351026165, "grad_norm": 3.953755071827396, "learning_rate": 3.3383228864824496e-07, "loss": 0.5337, "step": 1607 }, { "epoch": 0.7405659019602199, "grad_norm": 4.255808715109493, "learning_rate": 3.327199639872177e-07, "loss": 0.7317, "step": 1608 }, { "epoch": 0.7410264528942748, "grad_norm": 4.0911978341373505, "learning_rate": 3.316091256163288e-07, "loss": 0.7389, "step": 1609 }, { "epoch": 0.7414870038283297, "grad_norm": 3.7636663100138934, "learning_rate": 3.3049977600984304e-07, "loss": 0.6056, "step": 1610 }, { "epoch": 0.7419475547623845, "grad_norm": 4.462318674571597, "learning_rate": 3.293919176387104e-07, "loss": 0.8265, "step": 1611 }, { "epoch": 0.7424081056964393, "grad_norm": 4.756540560080908, "learning_rate": 3.2828555297055946e-07, "loss": 0.7606, "step": 1612 }, { "epoch": 0.7428686566304943, "grad_norm": 4.099830273191573, "learning_rate": 3.271806844696905e-07, "loss": 0.7126, "step": 1613 }, { "epoch": 0.7433292075645491, "grad_norm": 4.118810830644486, "learning_rate": 3.260773145970723e-07, "loss": 0.8241, "step": 1614 }, { "epoch": 0.743789758498604, "grad_norm": 4.157467745996711, "learning_rate": 3.2497544581033555e-07, "loss": 0.8229, "step": 1615 }, { "epoch": 0.7442503094326588, "grad_norm": 4.62738643071351, "learning_rate": 3.2387508056376724e-07, "loss": 0.7624, "step": 1616 }, { "epoch": 0.7447108603667136, "grad_norm": 4.234269160154858, "learning_rate": 3.2277622130830505e-07, "loss": 0.6372, "step": 1617 }, { "epoch": 0.7451714113007686, "grad_norm": 3.938394370582372, "learning_rate": 3.216788704915327e-07, "loss": 0.6485, "step": 1618 }, { "epoch": 0.7456319622348234, "grad_norm": 4.041538086123712, "learning_rate": 3.2058303055767443e-07, "loss": 0.6557, "step": 1619 }, { "epoch": 0.7460925131688783, "grad_norm": 3.8847443427532515, "learning_rate": 3.19488703947588e-07, "loss": 0.7786, "step": 1620 }, { "epoch": 0.7465530641029331, "grad_norm": 4.125336758606481, "learning_rate": 3.1839589309876115e-07, "loss": 0.7316, "step": 1621 }, { "epoch": 0.747013615036988, "grad_norm": 3.6220189413878843, "learning_rate": 3.1730460044530573e-07, "loss": 0.716, "step": 1622 }, { "epoch": 0.7474741659710429, "grad_norm": 4.629238959815557, "learning_rate": 3.1621482841795124e-07, "loss": 0.8283, "step": 1623 }, { "epoch": 0.7479347169050977, "grad_norm": 4.255445493921282, "learning_rate": 3.151265794440404e-07, "loss": 0.6469, "step": 1624 }, { "epoch": 0.7483952678391526, "grad_norm": 4.1286951425556175, "learning_rate": 3.140398559475244e-07, "loss": 0.5988, "step": 1625 }, { "epoch": 0.7488558187732075, "grad_norm": 4.291617460620995, "learning_rate": 3.129546603489548e-07, "loss": 0.6389, "step": 1626 }, { "epoch": 0.7493163697072623, "grad_norm": 4.022681267255074, "learning_rate": 3.1187099506548153e-07, "loss": 0.6533, "step": 1627 }, { "epoch": 0.7497769206413172, "grad_norm": 4.496817611021747, "learning_rate": 3.1078886251084525e-07, "loss": 0.6744, "step": 1628 }, { "epoch": 0.750237471575372, "grad_norm": 4.236080447031633, "learning_rate": 3.0970826509537304e-07, "loss": 0.7603, "step": 1629 }, { "epoch": 0.7506980225094269, "grad_norm": 4.403231410041205, "learning_rate": 3.0862920522597167e-07, "loss": 0.6489, "step": 1630 }, { "epoch": 0.7511585734434818, "grad_norm": 3.9667675249145815, "learning_rate": 3.075516853061244e-07, "loss": 0.6522, "step": 1631 }, { "epoch": 0.7516191243775366, "grad_norm": 4.316693159823399, "learning_rate": 3.0647570773588403e-07, "loss": 0.7095, "step": 1632 }, { "epoch": 0.7520796753115915, "grad_norm": 4.076131286522707, "learning_rate": 3.0540127491186727e-07, "loss": 0.6209, "step": 1633 }, { "epoch": 0.7525402262456463, "grad_norm": 4.482685373160184, "learning_rate": 3.043283892272508e-07, "loss": 0.8073, "step": 1634 }, { "epoch": 0.7530007771797013, "grad_norm": 3.849162186532663, "learning_rate": 3.0325705307176564e-07, "loss": 0.5731, "step": 1635 }, { "epoch": 0.7534613281137561, "grad_norm": 4.742234015064628, "learning_rate": 3.0218726883168955e-07, "loss": 0.6875, "step": 1636 }, { "epoch": 0.7539218790478109, "grad_norm": 3.866704421599576, "learning_rate": 3.011190388898464e-07, "loss": 0.5818, "step": 1637 }, { "epoch": 0.7543824299818658, "grad_norm": 3.9993098592512535, "learning_rate": 3.0005236562559566e-07, "loss": 0.7648, "step": 1638 }, { "epoch": 0.7548429809159207, "grad_norm": 3.688801628277126, "learning_rate": 2.989872514148298e-07, "loss": 0.682, "step": 1639 }, { "epoch": 0.7553035318499756, "grad_norm": 4.175288437235589, "learning_rate": 2.9792369862997046e-07, "loss": 0.6455, "step": 1640 }, { "epoch": 0.7557640827840304, "grad_norm": 4.077425541537207, "learning_rate": 2.9686170963995915e-07, "loss": 0.6436, "step": 1641 }, { "epoch": 0.7562246337180852, "grad_norm": 3.8782525968743995, "learning_rate": 2.958012868102553e-07, "loss": 0.7096, "step": 1642 }, { "epoch": 0.7566851846521401, "grad_norm": 4.451533265135843, "learning_rate": 2.9474243250283035e-07, "loss": 0.7648, "step": 1643 }, { "epoch": 0.757145735586195, "grad_norm": 4.107978918236136, "learning_rate": 2.936851490761606e-07, "loss": 0.8803, "step": 1644 }, { "epoch": 0.7576062865202499, "grad_norm": 4.521620971917857, "learning_rate": 2.926294388852246e-07, "loss": 0.6687, "step": 1645 }, { "epoch": 0.7580668374543047, "grad_norm": 3.862220111746421, "learning_rate": 2.9157530428149677e-07, "loss": 0.556, "step": 1646 }, { "epoch": 0.7585273883883595, "grad_norm": 4.371373508134497, "learning_rate": 2.9052274761294094e-07, "loss": 0.7434, "step": 1647 }, { "epoch": 0.7589879393224145, "grad_norm": 3.8088714142363806, "learning_rate": 2.8947177122400737e-07, "loss": 0.578, "step": 1648 }, { "epoch": 0.7594484902564693, "grad_norm": 3.8709081540366377, "learning_rate": 2.8842237745562583e-07, "loss": 0.5642, "step": 1649 }, { "epoch": 0.7599090411905242, "grad_norm": 3.920598084258714, "learning_rate": 2.873745686452017e-07, "loss": 0.6384, "step": 1650 }, { "epoch": 0.760369592124579, "grad_norm": 3.9920964273346287, "learning_rate": 2.863283471266088e-07, "loss": 0.7067, "step": 1651 }, { "epoch": 0.760830143058634, "grad_norm": 4.590396352093968, "learning_rate": 2.852837152301867e-07, "loss": 0.7509, "step": 1652 }, { "epoch": 0.7612906939926888, "grad_norm": 3.9668382313177495, "learning_rate": 2.8424067528273374e-07, "loss": 0.6962, "step": 1653 }, { "epoch": 0.7617512449267436, "grad_norm": 4.132555207218126, "learning_rate": 2.83199229607502e-07, "loss": 0.6934, "step": 1654 }, { "epoch": 0.7622117958607985, "grad_norm": 4.319713772433016, "learning_rate": 2.821593805241932e-07, "loss": 0.6417, "step": 1655 }, { "epoch": 0.7626723467948533, "grad_norm": 4.783515687157115, "learning_rate": 2.811211303489527e-07, "loss": 0.7066, "step": 1656 }, { "epoch": 0.7631328977289082, "grad_norm": 4.125118503737503, "learning_rate": 2.8008448139436367e-07, "loss": 0.6969, "step": 1657 }, { "epoch": 0.7635934486629631, "grad_norm": 4.070424855021912, "learning_rate": 2.7904943596944373e-07, "loss": 0.7392, "step": 1658 }, { "epoch": 0.7640539995970179, "grad_norm": 3.913527201777221, "learning_rate": 2.7801599637963893e-07, "loss": 0.7032, "step": 1659 }, { "epoch": 0.7645145505310728, "grad_norm": 3.5104117260947887, "learning_rate": 2.769841649268171e-07, "loss": 0.6511, "step": 1660 }, { "epoch": 0.7649751014651277, "grad_norm": 4.075562198543556, "learning_rate": 2.759539439092655e-07, "loss": 0.6959, "step": 1661 }, { "epoch": 0.7654356523991825, "grad_norm": 4.014459089923441, "learning_rate": 2.7492533562168407e-07, "loss": 0.7122, "step": 1662 }, { "epoch": 0.7658962033332374, "grad_norm": 4.258811657151267, "learning_rate": 2.738983423551804e-07, "loss": 0.6198, "step": 1663 }, { "epoch": 0.7663567542672922, "grad_norm": 3.7434864014016935, "learning_rate": 2.7287296639726443e-07, "loss": 0.7387, "step": 1664 }, { "epoch": 0.7668173052013472, "grad_norm": 3.7793319803857726, "learning_rate": 2.7184921003184424e-07, "loss": 0.6768, "step": 1665 }, { "epoch": 0.767277856135402, "grad_norm": 4.776815430698518, "learning_rate": 2.7082707553922067e-07, "loss": 0.7045, "step": 1666 }, { "epoch": 0.7677384070694568, "grad_norm": 4.044391043655435, "learning_rate": 2.698065651960809e-07, "loss": 0.6977, "step": 1667 }, { "epoch": 0.7681989580035117, "grad_norm": 4.012818229531163, "learning_rate": 2.687876812754963e-07, "loss": 0.6609, "step": 1668 }, { "epoch": 0.7686595089375665, "grad_norm": 3.839047917298436, "learning_rate": 2.67770426046914e-07, "loss": 0.4732, "step": 1669 }, { "epoch": 0.7691200598716215, "grad_norm": 4.2513141163306924, "learning_rate": 2.6675480177615326e-07, "loss": 0.8871, "step": 1670 }, { "epoch": 0.7695806108056763, "grad_norm": 4.280719333176865, "learning_rate": 2.6574081072540264e-07, "loss": 0.7824, "step": 1671 }, { "epoch": 0.7700411617397311, "grad_norm": 3.5851021167750736, "learning_rate": 2.647284551532104e-07, "loss": 0.5351, "step": 1672 }, { "epoch": 0.770501712673786, "grad_norm": 4.146450836402305, "learning_rate": 2.6371773731448357e-07, "loss": 0.7544, "step": 1673 }, { "epoch": 0.7709622636078409, "grad_norm": 3.9552073178198253, "learning_rate": 2.6270865946048084e-07, "loss": 0.7313, "step": 1674 }, { "epoch": 0.7714228145418958, "grad_norm": 4.054441839700071, "learning_rate": 2.617012238388077e-07, "loss": 0.6175, "step": 1675 }, { "epoch": 0.7718833654759506, "grad_norm": 4.450615097517179, "learning_rate": 2.60695432693412e-07, "loss": 0.7052, "step": 1676 }, { "epoch": 0.7723439164100054, "grad_norm": 4.373431947800079, "learning_rate": 2.596912882645792e-07, "loss": 0.745, "step": 1677 }, { "epoch": 0.7728044673440604, "grad_norm": 3.4886420696367946, "learning_rate": 2.5868879278892597e-07, "loss": 0.5032, "step": 1678 }, { "epoch": 0.7732650182781152, "grad_norm": 3.4787081113812484, "learning_rate": 2.576879484993968e-07, "loss": 0.755, "step": 1679 }, { "epoch": 0.7737255692121701, "grad_norm": 3.933820209502802, "learning_rate": 2.56688757625258e-07, "loss": 0.6092, "step": 1680 }, { "epoch": 0.7741861201462249, "grad_norm": 4.088272283847079, "learning_rate": 2.5569122239209364e-07, "loss": 0.6984, "step": 1681 }, { "epoch": 0.7746466710802797, "grad_norm": 4.467399345575099, "learning_rate": 2.54695345021799e-07, "loss": 0.8592, "step": 1682 }, { "epoch": 0.7751072220143347, "grad_norm": 3.745179274549252, "learning_rate": 2.537011277325777e-07, "loss": 0.6382, "step": 1683 }, { "epoch": 0.7755677729483895, "grad_norm": 3.959007520323967, "learning_rate": 2.527085727389354e-07, "loss": 0.7132, "step": 1684 }, { "epoch": 0.7760283238824444, "grad_norm": 4.162313338921386, "learning_rate": 2.5171768225167465e-07, "loss": 0.7098, "step": 1685 }, { "epoch": 0.7764888748164992, "grad_norm": 4.3637098018291125, "learning_rate": 2.5072845847789126e-07, "loss": 0.7336, "step": 1686 }, { "epoch": 0.7769494257505541, "grad_norm": 3.577810599535255, "learning_rate": 2.4974090362096843e-07, "loss": 0.6042, "step": 1687 }, { "epoch": 0.777409976684609, "grad_norm": 4.177960441883861, "learning_rate": 2.487550198805715e-07, "loss": 0.6665, "step": 1688 }, { "epoch": 0.7778705276186638, "grad_norm": 4.048927079652939, "learning_rate": 2.4777080945264416e-07, "loss": 0.6906, "step": 1689 }, { "epoch": 0.7783310785527187, "grad_norm": 3.6746037831335725, "learning_rate": 2.467882745294031e-07, "loss": 0.5759, "step": 1690 }, { "epoch": 0.7787916294867736, "grad_norm": 3.778291769258809, "learning_rate": 2.458074172993324e-07, "loss": 0.7084, "step": 1691 }, { "epoch": 0.7792521804208284, "grad_norm": 3.9450687764074903, "learning_rate": 2.4482823994717974e-07, "loss": 0.6956, "step": 1692 }, { "epoch": 0.7797127313548833, "grad_norm": 3.9166829039321533, "learning_rate": 2.4385074465395084e-07, "loss": 0.6827, "step": 1693 }, { "epoch": 0.7801732822889381, "grad_norm": 3.7885820908183083, "learning_rate": 2.4287493359690534e-07, "loss": 0.658, "step": 1694 }, { "epoch": 0.780633833222993, "grad_norm": 3.920109265651807, "learning_rate": 2.4190080894955054e-07, "loss": 0.5944, "step": 1695 }, { "epoch": 0.7810943841570479, "grad_norm": 3.949354161688812, "learning_rate": 2.4092837288163805e-07, "loss": 0.6686, "step": 1696 }, { "epoch": 0.7815549350911027, "grad_norm": 4.473281249287729, "learning_rate": 2.399576275591586e-07, "loss": 0.6474, "step": 1697 }, { "epoch": 0.7820154860251576, "grad_norm": 3.844474172015165, "learning_rate": 2.389885751443358e-07, "loss": 0.6799, "step": 1698 }, { "epoch": 0.7824760369592124, "grad_norm": 4.295295844622006, "learning_rate": 2.3802121779562446e-07, "loss": 0.7636, "step": 1699 }, { "epoch": 0.7829365878932674, "grad_norm": 3.5832993895296803, "learning_rate": 2.3705555766770203e-07, "loss": 0.6088, "step": 1700 }, { "epoch": 0.7833971388273222, "grad_norm": 4.327992442431543, "learning_rate": 2.3609159691146575e-07, "loss": 0.766, "step": 1701 }, { "epoch": 0.783857689761377, "grad_norm": 3.9207689153268714, "learning_rate": 2.3512933767402942e-07, "loss": 0.8035, "step": 1702 }, { "epoch": 0.7843182406954319, "grad_norm": 4.247090023847478, "learning_rate": 2.3416878209871493e-07, "loss": 0.7006, "step": 1703 }, { "epoch": 0.7847787916294868, "grad_norm": 4.399957608077019, "learning_rate": 2.3320993232504993e-07, "loss": 0.6458, "step": 1704 }, { "epoch": 0.7852393425635417, "grad_norm": 3.74439913009725, "learning_rate": 2.3225279048876367e-07, "loss": 0.6558, "step": 1705 }, { "epoch": 0.7856998934975965, "grad_norm": 3.9342025773794314, "learning_rate": 2.312973587217798e-07, "loss": 0.8329, "step": 1706 }, { "epoch": 0.7861604444316513, "grad_norm": 3.623845045611112, "learning_rate": 2.3034363915221378e-07, "loss": 0.5824, "step": 1707 }, { "epoch": 0.7866209953657062, "grad_norm": 3.951889744231618, "learning_rate": 2.2939163390436732e-07, "loss": 0.5336, "step": 1708 }, { "epoch": 0.7870815462997611, "grad_norm": 4.023236211999363, "learning_rate": 2.2844134509872292e-07, "loss": 0.7491, "step": 1709 }, { "epoch": 0.787542097233816, "grad_norm": 4.158300144276421, "learning_rate": 2.2749277485194085e-07, "loss": 0.6784, "step": 1710 }, { "epoch": 0.7880026481678708, "grad_norm": 4.055906780881064, "learning_rate": 2.26545925276853e-07, "loss": 0.5397, "step": 1711 }, { "epoch": 0.7884631991019256, "grad_norm": 4.318229228619447, "learning_rate": 2.2560079848245905e-07, "loss": 0.6337, "step": 1712 }, { "epoch": 0.7889237500359806, "grad_norm": 4.197225612891661, "learning_rate": 2.2465739657392057e-07, "loss": 0.7389, "step": 1713 }, { "epoch": 0.7893843009700354, "grad_norm": 3.8227361830869637, "learning_rate": 2.2371572165255792e-07, "loss": 0.6176, "step": 1714 }, { "epoch": 0.7898448519040903, "grad_norm": 4.722749098481098, "learning_rate": 2.2277577581584473e-07, "loss": 0.7742, "step": 1715 }, { "epoch": 0.7903054028381451, "grad_norm": 4.13296616953355, "learning_rate": 2.218375611574027e-07, "loss": 0.7021, "step": 1716 }, { "epoch": 0.7907659537722, "grad_norm": 4.209226389731357, "learning_rate": 2.2090107976699802e-07, "loss": 0.6981, "step": 1717 }, { "epoch": 0.7912265047062549, "grad_norm": 4.1786584846888095, "learning_rate": 2.1996633373053653e-07, "loss": 0.7481, "step": 1718 }, { "epoch": 0.7916870556403097, "grad_norm": 4.706830754398524, "learning_rate": 2.190333251300578e-07, "loss": 0.7813, "step": 1719 }, { "epoch": 0.7921476065743646, "grad_norm": 3.573401637178929, "learning_rate": 2.1810205604373233e-07, "loss": 0.7731, "step": 1720 }, { "epoch": 0.7926081575084194, "grad_norm": 3.916902282780029, "learning_rate": 2.171725285458559e-07, "loss": 0.5782, "step": 1721 }, { "epoch": 0.7930687084424743, "grad_norm": 3.905059818104729, "learning_rate": 2.162447447068444e-07, "loss": 0.7263, "step": 1722 }, { "epoch": 0.7935292593765292, "grad_norm": 3.761910879008577, "learning_rate": 2.1531870659323082e-07, "loss": 0.7124, "step": 1723 }, { "epoch": 0.793989810310584, "grad_norm": 4.225168685042068, "learning_rate": 2.1439441626765943e-07, "loss": 0.7099, "step": 1724 }, { "epoch": 0.7944503612446389, "grad_norm": 4.02390863175608, "learning_rate": 2.1347187578888158e-07, "loss": 0.7823, "step": 1725 }, { "epoch": 0.7949109121786938, "grad_norm": 4.462592697722787, "learning_rate": 2.1255108721175065e-07, "loss": 0.7108, "step": 1726 }, { "epoch": 0.7953714631127486, "grad_norm": 4.560366948865443, "learning_rate": 2.1163205258721806e-07, "loss": 0.808, "step": 1727 }, { "epoch": 0.7958320140468035, "grad_norm": 3.725930914662531, "learning_rate": 2.1071477396232894e-07, "loss": 0.6234, "step": 1728 }, { "epoch": 0.7962925649808583, "grad_norm": 4.082616407447403, "learning_rate": 2.097992533802163e-07, "loss": 0.6591, "step": 1729 }, { "epoch": 0.7967531159149133, "grad_norm": 4.181602367611804, "learning_rate": 2.0888549288009804e-07, "loss": 0.9215, "step": 1730 }, { "epoch": 0.7972136668489681, "grad_norm": 3.6768347310107354, "learning_rate": 2.0797349449727163e-07, "loss": 0.7067, "step": 1731 }, { "epoch": 0.7976742177830229, "grad_norm": 3.690050727823596, "learning_rate": 2.070632602631086e-07, "loss": 0.6398, "step": 1732 }, { "epoch": 0.7981347687170778, "grad_norm": 3.988684781765165, "learning_rate": 2.0615479220505293e-07, "loss": 0.7586, "step": 1733 }, { "epoch": 0.7985953196511326, "grad_norm": 3.9630860959234693, "learning_rate": 2.05248092346613e-07, "loss": 0.69, "step": 1734 }, { "epoch": 0.7990558705851876, "grad_norm": 4.227115303444912, "learning_rate": 2.0434316270735875e-07, "loss": 0.4892, "step": 1735 }, { "epoch": 0.7995164215192424, "grad_norm": 4.219726473407682, "learning_rate": 2.0344000530291872e-07, "loss": 0.6646, "step": 1736 }, { "epoch": 0.7999769724532972, "grad_norm": 4.203026026035872, "learning_rate": 2.025386221449722e-07, "loss": 0.6918, "step": 1737 }, { "epoch": 0.8004375233873521, "grad_norm": 4.050857210037986, "learning_rate": 2.0163901524124771e-07, "loss": 0.7139, "step": 1738 }, { "epoch": 0.800898074321407, "grad_norm": 4.302860332129815, "learning_rate": 2.0074118659551697e-07, "loss": 0.663, "step": 1739 }, { "epoch": 0.8013586252554619, "grad_norm": 3.9835552867202746, "learning_rate": 1.9984513820759052e-07, "loss": 0.6133, "step": 1740 }, { "epoch": 0.8018191761895167, "grad_norm": 3.983519299452069, "learning_rate": 1.9895087207331417e-07, "loss": 0.6715, "step": 1741 }, { "epoch": 0.8022797271235715, "grad_norm": 4.637885769083362, "learning_rate": 1.980583901845636e-07, "loss": 0.7157, "step": 1742 }, { "epoch": 0.8027402780576265, "grad_norm": 3.916194944795315, "learning_rate": 1.9716769452924065e-07, "loss": 0.6397, "step": 1743 }, { "epoch": 0.8032008289916813, "grad_norm": 4.084763613141294, "learning_rate": 1.9627878709126778e-07, "loss": 0.7518, "step": 1744 }, { "epoch": 0.8036613799257362, "grad_norm": 3.8379131642864475, "learning_rate": 1.9539166985058508e-07, "loss": 0.6446, "step": 1745 }, { "epoch": 0.804121930859791, "grad_norm": 4.130479851144024, "learning_rate": 1.945063447831452e-07, "loss": 0.6062, "step": 1746 }, { "epoch": 0.8045824817938458, "grad_norm": 3.8993549508637537, "learning_rate": 1.936228138609084e-07, "loss": 0.7223, "step": 1747 }, { "epoch": 0.8050430327279008, "grad_norm": 4.42937785506752, "learning_rate": 1.92741079051839e-07, "loss": 0.7171, "step": 1748 }, { "epoch": 0.8055035836619556, "grad_norm": 3.959741936990014, "learning_rate": 1.9186114231990104e-07, "loss": 0.6447, "step": 1749 }, { "epoch": 0.8059641345960105, "grad_norm": 3.731912424215661, "learning_rate": 1.9098300562505264e-07, "loss": 0.5744, "step": 1750 }, { "epoch": 0.8064246855300653, "grad_norm": 3.9435154057872355, "learning_rate": 1.901066709232434e-07, "loss": 0.7583, "step": 1751 }, { "epoch": 0.8068852364641202, "grad_norm": 4.016682624130555, "learning_rate": 1.8923214016640898e-07, "loss": 0.6407, "step": 1752 }, { "epoch": 0.8073457873981751, "grad_norm": 4.068936191351287, "learning_rate": 1.8835941530246657e-07, "loss": 0.7608, "step": 1753 }, { "epoch": 0.8078063383322299, "grad_norm": 3.7270472646357344, "learning_rate": 1.8748849827531133e-07, "loss": 0.6275, "step": 1754 }, { "epoch": 0.8082668892662848, "grad_norm": 4.209923787298806, "learning_rate": 1.866193910248115e-07, "loss": 0.6318, "step": 1755 }, { "epoch": 0.8087274402003397, "grad_norm": 4.273194393564992, "learning_rate": 1.857520954868047e-07, "loss": 0.7075, "step": 1756 }, { "epoch": 0.8091879911343945, "grad_norm": 3.967161798176114, "learning_rate": 1.848866135930922e-07, "loss": 0.6821, "step": 1757 }, { "epoch": 0.8096485420684494, "grad_norm": 3.7593169831121074, "learning_rate": 1.8402294727143642e-07, "loss": 0.6238, "step": 1758 }, { "epoch": 0.8101090930025042, "grad_norm": 4.194605538967217, "learning_rate": 1.831610984455557e-07, "loss": 0.771, "step": 1759 }, { "epoch": 0.8105696439365591, "grad_norm": 3.9604525115656433, "learning_rate": 1.8230106903511965e-07, "loss": 0.5702, "step": 1760 }, { "epoch": 0.811030194870614, "grad_norm": 4.6274848680551335, "learning_rate": 1.814428609557458e-07, "loss": 0.4976, "step": 1761 }, { "epoch": 0.8114907458046688, "grad_norm": 4.360568632251986, "learning_rate": 1.805864761189949e-07, "loss": 0.8221, "step": 1762 }, { "epoch": 0.8119512967387237, "grad_norm": 4.460258870271302, "learning_rate": 1.7973191643236574e-07, "loss": 0.6428, "step": 1763 }, { "epoch": 0.8124118476727785, "grad_norm": 3.578777672014248, "learning_rate": 1.7887918379929356e-07, "loss": 0.5652, "step": 1764 }, { "epoch": 0.8128723986068335, "grad_norm": 4.267880740880121, "learning_rate": 1.780282801191425e-07, "loss": 0.6859, "step": 1765 }, { "epoch": 0.8133329495408883, "grad_norm": 4.642631200655122, "learning_rate": 1.771792072872028e-07, "loss": 0.687, "step": 1766 }, { "epoch": 0.8137935004749431, "grad_norm": 4.528301751383843, "learning_rate": 1.7633196719468846e-07, "loss": 0.6984, "step": 1767 }, { "epoch": 0.814254051408998, "grad_norm": 3.8811786305674785, "learning_rate": 1.7548656172872922e-07, "loss": 0.8267, "step": 1768 }, { "epoch": 0.8147146023430529, "grad_norm": 3.814398856019114, "learning_rate": 1.746429927723696e-07, "loss": 0.693, "step": 1769 }, { "epoch": 0.8151751532771078, "grad_norm": 4.31830153199216, "learning_rate": 1.738012622045635e-07, "loss": 0.6444, "step": 1770 }, { "epoch": 0.8156357042111626, "grad_norm": 4.125108421815761, "learning_rate": 1.7296137190016913e-07, "loss": 0.6994, "step": 1771 }, { "epoch": 0.8160962551452174, "grad_norm": 4.387897466217684, "learning_rate": 1.7212332372994654e-07, "loss": 0.8079, "step": 1772 }, { "epoch": 0.8165568060792723, "grad_norm": 3.8111566059384594, "learning_rate": 1.7128711956055274e-07, "loss": 0.7242, "step": 1773 }, { "epoch": 0.8170173570133272, "grad_norm": 4.198140118054288, "learning_rate": 1.7045276125453645e-07, "loss": 0.6655, "step": 1774 }, { "epoch": 0.8174779079473821, "grad_norm": 4.1261403619178765, "learning_rate": 1.6962025067033604e-07, "loss": 0.687, "step": 1775 }, { "epoch": 0.8179384588814369, "grad_norm": 3.6051619472115553, "learning_rate": 1.6878958966227363e-07, "loss": 0.6461, "step": 1776 }, { "epoch": 0.8183990098154917, "grad_norm": 3.698640152200037, "learning_rate": 1.6796078008055225e-07, "loss": 0.5552, "step": 1777 }, { "epoch": 0.8188595607495467, "grad_norm": 3.534193146163067, "learning_rate": 1.671338237712502e-07, "loss": 0.729, "step": 1778 }, { "epoch": 0.8193201116836015, "grad_norm": 3.8488574392875616, "learning_rate": 1.6630872257631834e-07, "loss": 0.5721, "step": 1779 }, { "epoch": 0.8197806626176564, "grad_norm": 4.084457114502669, "learning_rate": 1.6548547833357573e-07, "loss": 0.6005, "step": 1780 }, { "epoch": 0.8202412135517112, "grad_norm": 4.1371319506826545, "learning_rate": 1.6466409287670468e-07, "loss": 0.6851, "step": 1781 }, { "epoch": 0.8207017644857661, "grad_norm": 4.236196195794174, "learning_rate": 1.638445680352476e-07, "loss": 0.7015, "step": 1782 }, { "epoch": 0.821162315419821, "grad_norm": 4.202584810373604, "learning_rate": 1.6302690563460288e-07, "loss": 0.7897, "step": 1783 }, { "epoch": 0.8216228663538758, "grad_norm": 4.213514653803861, "learning_rate": 1.6221110749601973e-07, "loss": 0.5143, "step": 1784 }, { "epoch": 0.8220834172879307, "grad_norm": 3.9807839637110507, "learning_rate": 1.613971754365957e-07, "loss": 0.6232, "step": 1785 }, { "epoch": 0.8225439682219855, "grad_norm": 3.954619270495669, "learning_rate": 1.6058511126927176e-07, "loss": 0.8316, "step": 1786 }, { "epoch": 0.8230045191560404, "grad_norm": 4.094305887942518, "learning_rate": 1.5977491680282762e-07, "loss": 0.6881, "step": 1787 }, { "epoch": 0.8234650700900953, "grad_norm": 4.224641358269551, "learning_rate": 1.589665938418795e-07, "loss": 0.6417, "step": 1788 }, { "epoch": 0.8239256210241501, "grad_norm": 4.028458884329341, "learning_rate": 1.581601441868743e-07, "loss": 0.6895, "step": 1789 }, { "epoch": 0.824386171958205, "grad_norm": 3.6511583819764337, "learning_rate": 1.5735556963408693e-07, "loss": 0.8445, "step": 1790 }, { "epoch": 0.8248467228922599, "grad_norm": 4.196123477520471, "learning_rate": 1.5655287197561495e-07, "loss": 0.803, "step": 1791 }, { "epoch": 0.8253072738263147, "grad_norm": 4.292698175916626, "learning_rate": 1.5575205299937599e-07, "loss": 0.7059, "step": 1792 }, { "epoch": 0.8257678247603696, "grad_norm": 3.7998906880825736, "learning_rate": 1.549531144891032e-07, "loss": 0.6478, "step": 1793 }, { "epoch": 0.8262283756944244, "grad_norm": 4.712766181179012, "learning_rate": 1.5415605822434053e-07, "loss": 0.7083, "step": 1794 }, { "epoch": 0.8266889266284794, "grad_norm": 4.081138065052953, "learning_rate": 1.5336088598043995e-07, "loss": 0.5927, "step": 1795 }, { "epoch": 0.8271494775625342, "grad_norm": 3.8040523990786066, "learning_rate": 1.5256759952855737e-07, "loss": 0.7563, "step": 1796 }, { "epoch": 0.827610028496589, "grad_norm": 4.029825628413112, "learning_rate": 1.5177620063564712e-07, "loss": 0.8335, "step": 1797 }, { "epoch": 0.8280705794306439, "grad_norm": 4.504669947152346, "learning_rate": 1.5098669106446026e-07, "loss": 0.7323, "step": 1798 }, { "epoch": 0.8285311303646987, "grad_norm": 3.881234748437548, "learning_rate": 1.5019907257353925e-07, "loss": 0.6441, "step": 1799 }, { "epoch": 0.8289916812987537, "grad_norm": 4.072872364596469, "learning_rate": 1.4941334691721474e-07, "loss": 0.7558, "step": 1800 }, { "epoch": 0.8294522322328085, "grad_norm": 4.262428057952184, "learning_rate": 1.4862951584560034e-07, "loss": 0.693, "step": 1801 }, { "epoch": 0.8299127831668633, "grad_norm": 4.3087541758292165, "learning_rate": 1.4784758110459073e-07, "loss": 0.5537, "step": 1802 }, { "epoch": 0.8303733341009182, "grad_norm": 4.57829487890937, "learning_rate": 1.4706754443585644e-07, "loss": 0.7456, "step": 1803 }, { "epoch": 0.8308338850349731, "grad_norm": 4.004467437839686, "learning_rate": 1.4628940757683972e-07, "loss": 0.7066, "step": 1804 }, { "epoch": 0.831294435969028, "grad_norm": 4.410054624225223, "learning_rate": 1.4551317226075176e-07, "loss": 0.8473, "step": 1805 }, { "epoch": 0.8317549869030828, "grad_norm": 4.113779070482338, "learning_rate": 1.4473884021656858e-07, "loss": 0.5926, "step": 1806 }, { "epoch": 0.8322155378371376, "grad_norm": 4.064662264364339, "learning_rate": 1.4396641316902558e-07, "loss": 0.7699, "step": 1807 }, { "epoch": 0.8326760887711926, "grad_norm": 3.899146595526925, "learning_rate": 1.431958928386169e-07, "loss": 0.6947, "step": 1808 }, { "epoch": 0.8331366397052474, "grad_norm": 4.303515498059097, "learning_rate": 1.4242728094158807e-07, "loss": 0.6917, "step": 1809 }, { "epoch": 0.8335971906393023, "grad_norm": 4.189533552153498, "learning_rate": 1.41660579189934e-07, "loss": 0.629, "step": 1810 }, { "epoch": 0.8340577415733571, "grad_norm": 4.260871111855973, "learning_rate": 1.4089578929139635e-07, "loss": 0.7063, "step": 1811 }, { "epoch": 0.8345182925074119, "grad_norm": 4.042775466912899, "learning_rate": 1.4013291294945652e-07, "loss": 0.7202, "step": 1812 }, { "epoch": 0.8349788434414669, "grad_norm": 4.068487414043774, "learning_rate": 1.3937195186333483e-07, "loss": 0.5552, "step": 1813 }, { "epoch": 0.8354393943755217, "grad_norm": 4.411667948379511, "learning_rate": 1.3861290772798552e-07, "loss": 0.8669, "step": 1814 }, { "epoch": 0.8358999453095766, "grad_norm": 4.1723034533364185, "learning_rate": 1.378557822340922e-07, "loss": 0.7396, "step": 1815 }, { "epoch": 0.8363604962436314, "grad_norm": 3.8036841912056896, "learning_rate": 1.3710057706806588e-07, "loss": 0.5876, "step": 1816 }, { "epoch": 0.8368210471776864, "grad_norm": 3.7441977199704586, "learning_rate": 1.3634729391204003e-07, "loss": 0.6205, "step": 1817 }, { "epoch": 0.8372815981117412, "grad_norm": 3.83727997289714, "learning_rate": 1.355959344438665e-07, "loss": 0.5878, "step": 1818 }, { "epoch": 0.837742149045796, "grad_norm": 4.064925509584922, "learning_rate": 1.3484650033711308e-07, "loss": 0.6824, "step": 1819 }, { "epoch": 0.8382026999798509, "grad_norm": 4.56759623596578, "learning_rate": 1.3409899326105856e-07, "loss": 0.7777, "step": 1820 }, { "epoch": 0.8386632509139058, "grad_norm": 4.835833947254294, "learning_rate": 1.3335341488068997e-07, "loss": 0.7539, "step": 1821 }, { "epoch": 0.8391238018479606, "grad_norm": 3.7994927647524266, "learning_rate": 1.3260976685669767e-07, "loss": 0.5769, "step": 1822 }, { "epoch": 0.8395843527820155, "grad_norm": 3.9051152772043674, "learning_rate": 1.3186805084547292e-07, "loss": 0.6789, "step": 1823 }, { "epoch": 0.8400449037160703, "grad_norm": 3.986369050108082, "learning_rate": 1.3112826849910374e-07, "loss": 0.8121, "step": 1824 }, { "epoch": 0.8405054546501252, "grad_norm": 4.625879766616072, "learning_rate": 1.303904214653705e-07, "loss": 0.8064, "step": 1825 }, { "epoch": 0.8409660055841801, "grad_norm": 3.8888129277115446, "learning_rate": 1.2965451138774342e-07, "loss": 0.6083, "step": 1826 }, { "epoch": 0.841426556518235, "grad_norm": 3.93694053800381, "learning_rate": 1.2892053990537855e-07, "loss": 0.5729, "step": 1827 }, { "epoch": 0.8418871074522898, "grad_norm": 4.297866749301145, "learning_rate": 1.2818850865311304e-07, "loss": 0.6333, "step": 1828 }, { "epoch": 0.8423476583863446, "grad_norm": 3.7423190169416336, "learning_rate": 1.2745841926146328e-07, "loss": 0.8208, "step": 1829 }, { "epoch": 0.8428082093203996, "grad_norm": 4.0640994987974395, "learning_rate": 1.2673027335662023e-07, "loss": 0.6768, "step": 1830 }, { "epoch": 0.8432687602544544, "grad_norm": 4.199628192208477, "learning_rate": 1.2600407256044543e-07, "loss": 0.7289, "step": 1831 }, { "epoch": 0.8437293111885092, "grad_norm": 4.220921499646459, "learning_rate": 1.2527981849046855e-07, "loss": 0.7107, "step": 1832 }, { "epoch": 0.8441898621225641, "grad_norm": 3.9655381726628587, "learning_rate": 1.245575127598828e-07, "loss": 0.5937, "step": 1833 }, { "epoch": 0.844650413056619, "grad_norm": 4.009071427222852, "learning_rate": 1.2383715697754194e-07, "loss": 0.7579, "step": 1834 }, { "epoch": 0.8451109639906739, "grad_norm": 3.5878659531431367, "learning_rate": 1.23118752747956e-07, "loss": 0.6427, "step": 1835 }, { "epoch": 0.8455715149247287, "grad_norm": 4.69973829233036, "learning_rate": 1.224023016712886e-07, "loss": 0.6892, "step": 1836 }, { "epoch": 0.8460320658587835, "grad_norm": 3.7117204907607517, "learning_rate": 1.2168780534335288e-07, "loss": 0.652, "step": 1837 }, { "epoch": 0.8464926167928384, "grad_norm": 4.5372071319572544, "learning_rate": 1.2097526535560732e-07, "loss": 0.8841, "step": 1838 }, { "epoch": 0.8469531677268933, "grad_norm": 3.914144633120273, "learning_rate": 1.2026468329515415e-07, "loss": 0.5896, "step": 1839 }, { "epoch": 0.8474137186609482, "grad_norm": 4.0767233208731, "learning_rate": 1.1955606074473368e-07, "loss": 0.7369, "step": 1840 }, { "epoch": 0.847874269595003, "grad_norm": 4.121185787996921, "learning_rate": 1.1884939928272108e-07, "loss": 0.7419, "step": 1841 }, { "epoch": 0.8483348205290578, "grad_norm": 4.759061280116938, "learning_rate": 1.1814470048312508e-07, "loss": 0.6478, "step": 1842 }, { "epoch": 0.8487953714631128, "grad_norm": 4.023263859187684, "learning_rate": 1.1744196591558153e-07, "loss": 0.5422, "step": 1843 }, { "epoch": 0.8492559223971676, "grad_norm": 3.8882175056254793, "learning_rate": 1.167411971453509e-07, "loss": 0.682, "step": 1844 }, { "epoch": 0.8497164733312225, "grad_norm": 4.131292478396944, "learning_rate": 1.1604239573331653e-07, "loss": 0.6343, "step": 1845 }, { "epoch": 0.8501770242652773, "grad_norm": 4.092569623792137, "learning_rate": 1.1534556323597821e-07, "loss": 0.7724, "step": 1846 }, { "epoch": 0.8506375751993323, "grad_norm": 4.089957652837395, "learning_rate": 1.1465070120545106e-07, "loss": 0.6409, "step": 1847 }, { "epoch": 0.8510981261333871, "grad_norm": 4.1622160080598665, "learning_rate": 1.1395781118946124e-07, "loss": 0.8043, "step": 1848 }, { "epoch": 0.8515586770674419, "grad_norm": 3.3789003561204822, "learning_rate": 1.1326689473134166e-07, "loss": 0.619, "step": 1849 }, { "epoch": 0.8520192280014968, "grad_norm": 3.9596176091031863, "learning_rate": 1.1257795337003007e-07, "loss": 0.6715, "step": 1850 }, { "epoch": 0.8524797789355516, "grad_norm": 3.882811770923557, "learning_rate": 1.1189098864006486e-07, "loss": 0.6569, "step": 1851 }, { "epoch": 0.8529403298696066, "grad_norm": 4.165975741418348, "learning_rate": 1.112060020715817e-07, "loss": 0.7746, "step": 1852 }, { "epoch": 0.8534008808036614, "grad_norm": 4.295746397046491, "learning_rate": 1.1052299519030961e-07, "loss": 0.6196, "step": 1853 }, { "epoch": 0.8538614317377162, "grad_norm": 3.9715998337194973, "learning_rate": 1.0984196951756863e-07, "loss": 0.722, "step": 1854 }, { "epoch": 0.8543219826717711, "grad_norm": 4.147644404927229, "learning_rate": 1.0916292657026616e-07, "loss": 0.6723, "step": 1855 }, { "epoch": 0.854782533605826, "grad_norm": 4.414972012331756, "learning_rate": 1.084858678608922e-07, "loss": 0.6545, "step": 1856 }, { "epoch": 0.8552430845398808, "grad_norm": 4.424828839802699, "learning_rate": 1.078107948975181e-07, "loss": 0.7282, "step": 1857 }, { "epoch": 0.8557036354739357, "grad_norm": 4.116903869604174, "learning_rate": 1.0713770918379206e-07, "loss": 0.7086, "step": 1858 }, { "epoch": 0.8561641864079905, "grad_norm": 3.7122789040066735, "learning_rate": 1.0646661221893538e-07, "loss": 0.6658, "step": 1859 }, { "epoch": 0.8566247373420455, "grad_norm": 3.770176974188287, "learning_rate": 1.0579750549773992e-07, "loss": 0.7551, "step": 1860 }, { "epoch": 0.8570852882761003, "grad_norm": 3.746951418148416, "learning_rate": 1.0513039051056504e-07, "loss": 0.6556, "step": 1861 }, { "epoch": 0.8575458392101551, "grad_norm": 4.5004035823582065, "learning_rate": 1.0446526874333262e-07, "loss": 0.5824, "step": 1862 }, { "epoch": 0.85800639014421, "grad_norm": 4.197124735971041, "learning_rate": 1.0380214167752588e-07, "loss": 0.7432, "step": 1863 }, { "epoch": 0.8584669410782648, "grad_norm": 4.3623524369175835, "learning_rate": 1.0314101079018456e-07, "loss": 0.8017, "step": 1864 }, { "epoch": 0.8589274920123198, "grad_norm": 4.103969526298146, "learning_rate": 1.0248187755390247e-07, "loss": 0.6645, "step": 1865 }, { "epoch": 0.8593880429463746, "grad_norm": 3.8899628727458704, "learning_rate": 1.0182474343682346e-07, "loss": 0.7836, "step": 1866 }, { "epoch": 0.8598485938804294, "grad_norm": 4.0935457954055945, "learning_rate": 1.0116960990263879e-07, "loss": 0.5547, "step": 1867 }, { "epoch": 0.8603091448144843, "grad_norm": 4.001605468412815, "learning_rate": 1.0051647841058385e-07, "loss": 0.5858, "step": 1868 }, { "epoch": 0.8607696957485392, "grad_norm": 3.9364784903742205, "learning_rate": 9.986535041543409e-08, "loss": 0.6476, "step": 1869 }, { "epoch": 0.8612302466825941, "grad_norm": 3.867263043851385, "learning_rate": 9.921622736750345e-08, "loss": 0.6678, "step": 1870 }, { "epoch": 0.8616907976166489, "grad_norm": 4.063487726324002, "learning_rate": 9.856911071263918e-08, "loss": 0.7496, "step": 1871 }, { "epoch": 0.8621513485507037, "grad_norm": 4.05267320245936, "learning_rate": 9.792400189221927e-08, "loss": 0.7131, "step": 1872 }, { "epoch": 0.8626118994847587, "grad_norm": 3.9627640543603664, "learning_rate": 9.72809023431509e-08, "loss": 0.708, "step": 1873 }, { "epoch": 0.8630724504188135, "grad_norm": 4.881699595027948, "learning_rate": 9.663981349786443e-08, "loss": 0.6447, "step": 1874 }, { "epoch": 0.8635330013528684, "grad_norm": 3.783456627548274, "learning_rate": 9.600073678431186e-08, "loss": 0.6748, "step": 1875 }, { "epoch": 0.8639935522869232, "grad_norm": 4.430217397890016, "learning_rate": 9.53636736259642e-08, "loss": 0.6695, "step": 1876 }, { "epoch": 0.864454103220978, "grad_norm": 3.9375184737276654, "learning_rate": 9.472862544180659e-08, "loss": 0.6646, "step": 1877 }, { "epoch": 0.864914654155033, "grad_norm": 3.9205132181890763, "learning_rate": 9.409559364633646e-08, "loss": 0.5926, "step": 1878 }, { "epoch": 0.8653752050890878, "grad_norm": 4.2301066730283186, "learning_rate": 9.346457964956023e-08, "loss": 0.7897, "step": 1879 }, { "epoch": 0.8658357560231427, "grad_norm": 3.9987562676263884, "learning_rate": 9.283558485698894e-08, "loss": 0.6348, "step": 1880 }, { "epoch": 0.8662963069571975, "grad_norm": 3.660542391775347, "learning_rate": 9.220861066963715e-08, "loss": 0.6485, "step": 1881 }, { "epoch": 0.8667568578912525, "grad_norm": 4.261589444408702, "learning_rate": 9.158365848401817e-08, "loss": 0.7269, "step": 1882 }, { "epoch": 0.8672174088253073, "grad_norm": 4.082381436826719, "learning_rate": 9.096072969214197e-08, "loss": 0.7476, "step": 1883 }, { "epoch": 0.8676779597593621, "grad_norm": 3.9441932967683173, "learning_rate": 9.0339825681511e-08, "loss": 0.6632, "step": 1884 }, { "epoch": 0.868138510693417, "grad_norm": 4.215605462508429, "learning_rate": 8.972094783511807e-08, "loss": 0.7951, "step": 1885 }, { "epoch": 0.8685990616274719, "grad_norm": 3.894693299610649, "learning_rate": 8.910409753144344e-08, "loss": 0.611, "step": 1886 }, { "epoch": 0.8690596125615268, "grad_norm": 4.233400606916813, "learning_rate": 8.848927614445011e-08, "loss": 0.7648, "step": 1887 }, { "epoch": 0.8695201634955816, "grad_norm": 4.073179472009033, "learning_rate": 8.787648504358291e-08, "loss": 0.6029, "step": 1888 }, { "epoch": 0.8699807144296364, "grad_norm": 3.9527950173206414, "learning_rate": 8.726572559376433e-08, "loss": 0.8436, "step": 1889 }, { "epoch": 0.8704412653636913, "grad_norm": 4.637399274961162, "learning_rate": 8.665699915539094e-08, "loss": 0.8876, "step": 1890 }, { "epoch": 0.8709018162977462, "grad_norm": 4.024738855456848, "learning_rate": 8.605030708433147e-08, "loss": 0.6965, "step": 1891 }, { "epoch": 0.871362367231801, "grad_norm": 4.100720080828648, "learning_rate": 8.544565073192367e-08, "loss": 0.5646, "step": 1892 }, { "epoch": 0.8718229181658559, "grad_norm": 3.9570639083624726, "learning_rate": 8.484303144497007e-08, "loss": 0.6365, "step": 1893 }, { "epoch": 0.8722834690999107, "grad_norm": 4.494528235997817, "learning_rate": 8.424245056573653e-08, "loss": 0.6611, "step": 1894 }, { "epoch": 0.8727440200339657, "grad_norm": 3.8830532293862494, "learning_rate": 8.364390943194855e-08, "loss": 0.6606, "step": 1895 }, { "epoch": 0.8732045709680205, "grad_norm": 4.126044091140759, "learning_rate": 8.304740937678833e-08, "loss": 0.7817, "step": 1896 }, { "epoch": 0.8736651219020753, "grad_norm": 4.2480776279846015, "learning_rate": 8.245295172889121e-08, "loss": 0.5817, "step": 1897 }, { "epoch": 0.8741256728361302, "grad_norm": 3.9732696562782466, "learning_rate": 8.186053781234414e-08, "loss": 0.6471, "step": 1898 }, { "epoch": 0.8745862237701851, "grad_norm": 3.7139519749173884, "learning_rate": 8.12701689466816e-08, "loss": 0.735, "step": 1899 }, { "epoch": 0.87504677470424, "grad_norm": 4.170681723384209, "learning_rate": 8.068184644688248e-08, "loss": 0.6306, "step": 1900 }, { "epoch": 0.8755073256382948, "grad_norm": 4.318100256716466, "learning_rate": 8.009557162336822e-08, "loss": 0.6633, "step": 1901 }, { "epoch": 0.8759678765723496, "grad_norm": 4.1442239376606995, "learning_rate": 7.951134578199925e-08, "loss": 0.6368, "step": 1902 }, { "epoch": 0.8764284275064045, "grad_norm": 4.208923922252678, "learning_rate": 7.892917022407153e-08, "loss": 0.6948, "step": 1903 }, { "epoch": 0.8768889784404594, "grad_norm": 4.273612558633616, "learning_rate": 7.834904624631523e-08, "loss": 0.8546, "step": 1904 }, { "epoch": 0.8773495293745143, "grad_norm": 3.8889020729909807, "learning_rate": 7.777097514089014e-08, "loss": 0.7912, "step": 1905 }, { "epoch": 0.8778100803085691, "grad_norm": 4.329744962075652, "learning_rate": 7.719495819538324e-08, "loss": 0.7147, "step": 1906 }, { "epoch": 0.878270631242624, "grad_norm": 3.7699294059169306, "learning_rate": 7.66209966928072e-08, "loss": 0.6152, "step": 1907 }, { "epoch": 0.8787311821766789, "grad_norm": 4.486387519442778, "learning_rate": 7.604909191159537e-08, "loss": 0.7114, "step": 1908 }, { "epoch": 0.8791917331107337, "grad_norm": 4.000901931700129, "learning_rate": 7.547924512560044e-08, "loss": 0.7801, "step": 1909 }, { "epoch": 0.8796522840447886, "grad_norm": 3.8819032970139666, "learning_rate": 7.491145760409134e-08, "loss": 0.8371, "step": 1910 }, { "epoch": 0.8801128349788434, "grad_norm": 4.401457213258927, "learning_rate": 7.434573061174965e-08, "loss": 0.7486, "step": 1911 }, { "epoch": 0.8805733859128984, "grad_norm": 3.802020625225121, "learning_rate": 7.378206540866783e-08, "loss": 0.744, "step": 1912 }, { "epoch": 0.8810339368469532, "grad_norm": 3.986544700313653, "learning_rate": 7.322046325034603e-08, "loss": 0.5983, "step": 1913 }, { "epoch": 0.881494487781008, "grad_norm": 4.040865079974755, "learning_rate": 7.266092538768853e-08, "loss": 0.6561, "step": 1914 }, { "epoch": 0.8819550387150629, "grad_norm": 4.113984203740787, "learning_rate": 7.210345306700238e-08, "loss": 0.7377, "step": 1915 }, { "epoch": 0.8824155896491177, "grad_norm": 4.053097169636221, "learning_rate": 7.154804752999344e-08, "loss": 0.6006, "step": 1916 }, { "epoch": 0.8828761405831727, "grad_norm": 3.777548665927776, "learning_rate": 7.099471001376434e-08, "loss": 0.7759, "step": 1917 }, { "epoch": 0.8833366915172275, "grad_norm": 4.103834466283661, "learning_rate": 7.044344175081107e-08, "loss": 0.7681, "step": 1918 }, { "epoch": 0.8837972424512823, "grad_norm": 4.298466760003686, "learning_rate": 6.989424396902078e-08, "loss": 0.7805, "step": 1919 }, { "epoch": 0.8842577933853372, "grad_norm": 3.804448297891989, "learning_rate": 6.934711789166902e-08, "loss": 0.6954, "step": 1920 }, { "epoch": 0.8847183443193921, "grad_norm": 4.185775164980842, "learning_rate": 6.880206473741646e-08, "loss": 0.7017, "step": 1921 }, { "epoch": 0.885178895253447, "grad_norm": 4.029919124157303, "learning_rate": 6.825908572030703e-08, "loss": 0.8222, "step": 1922 }, { "epoch": 0.8856394461875018, "grad_norm": 3.801068812656155, "learning_rate": 6.771818204976453e-08, "loss": 0.7544, "step": 1923 }, { "epoch": 0.8860999971215566, "grad_norm": 4.0448883387973025, "learning_rate": 6.71793549305899e-08, "loss": 0.7371, "step": 1924 }, { "epoch": 0.8865605480556116, "grad_norm": 4.4643929333794015, "learning_rate": 6.66426055629593e-08, "loss": 0.7437, "step": 1925 }, { "epoch": 0.8870210989896664, "grad_norm": 3.753547001271755, "learning_rate": 6.610793514242074e-08, "loss": 0.7361, "step": 1926 }, { "epoch": 0.8874816499237212, "grad_norm": 4.034426817537708, "learning_rate": 6.557534485989135e-08, "loss": 0.6097, "step": 1927 }, { "epoch": 0.8879422008577761, "grad_norm": 4.1381903159690205, "learning_rate": 6.504483590165533e-08, "loss": 0.8859, "step": 1928 }, { "epoch": 0.8884027517918309, "grad_norm": 3.8093150461572343, "learning_rate": 6.451640944936087e-08, "loss": 0.693, "step": 1929 }, { "epoch": 0.8888633027258859, "grad_norm": 3.964020787568163, "learning_rate": 6.399006668001772e-08, "loss": 0.6079, "step": 1930 }, { "epoch": 0.8893238536599407, "grad_norm": 4.367642794449504, "learning_rate": 6.346580876599394e-08, "loss": 0.7053, "step": 1931 }, { "epoch": 0.8897844045939955, "grad_norm": 4.092247700388315, "learning_rate": 6.294363687501459e-08, "loss": 0.6806, "step": 1932 }, { "epoch": 0.8902449555280504, "grad_norm": 3.9646639754418076, "learning_rate": 6.242355217015793e-08, "loss": 0.7208, "step": 1933 }, { "epoch": 0.8907055064621053, "grad_norm": 3.7917943530047475, "learning_rate": 6.190555580985291e-08, "loss": 0.531, "step": 1934 }, { "epoch": 0.8911660573961602, "grad_norm": 4.170784668242506, "learning_rate": 6.138964894787802e-08, "loss": 0.7067, "step": 1935 }, { "epoch": 0.891626608330215, "grad_norm": 4.142736350181332, "learning_rate": 6.08758327333564e-08, "loss": 0.7535, "step": 1936 }, { "epoch": 0.8920871592642698, "grad_norm": 4.184685512952498, "learning_rate": 6.036410831075489e-08, "loss": 0.6527, "step": 1937 }, { "epoch": 0.8925477101983248, "grad_norm": 3.736162687889201, "learning_rate": 5.985447681988187e-08, "loss": 0.8102, "step": 1938 }, { "epoch": 0.8930082611323796, "grad_norm": 4.396077593525383, "learning_rate": 5.934693939588276e-08, "loss": 0.7305, "step": 1939 }, { "epoch": 0.8934688120664345, "grad_norm": 3.608054964577549, "learning_rate": 5.884149716923947e-08, "loss": 0.6454, "step": 1940 }, { "epoch": 0.8939293630004893, "grad_norm": 3.6312321936956096, "learning_rate": 5.833815126576713e-08, "loss": 0.6418, "step": 1941 }, { "epoch": 0.8943899139345441, "grad_norm": 4.639393243358984, "learning_rate": 5.78369028066108e-08, "loss": 0.7163, "step": 1942 }, { "epoch": 0.8948504648685991, "grad_norm": 4.3810389420770806, "learning_rate": 5.7337752908244604e-08, "loss": 0.7787, "step": 1943 }, { "epoch": 0.8953110158026539, "grad_norm": 3.8504339501127176, "learning_rate": 5.684070268246799e-08, "loss": 0.6856, "step": 1944 }, { "epoch": 0.8957715667367088, "grad_norm": 3.7675336778238995, "learning_rate": 5.634575323640334e-08, "loss": 0.6773, "step": 1945 }, { "epoch": 0.8962321176707636, "grad_norm": 3.505121258779919, "learning_rate": 5.5852905672494235e-08, "loss": 0.5695, "step": 1946 }, { "epoch": 0.8966926686048186, "grad_norm": 4.3544240031122845, "learning_rate": 5.5362161088502335e-08, "loss": 0.8088, "step": 1947 }, { "epoch": 0.8971532195388734, "grad_norm": 3.807619705229715, "learning_rate": 5.487352057750538e-08, "loss": 0.7262, "step": 1948 }, { "epoch": 0.8976137704729282, "grad_norm": 4.034833958290026, "learning_rate": 5.438698522789409e-08, "loss": 0.5972, "step": 1949 }, { "epoch": 0.8980743214069831, "grad_norm": 4.329077491140869, "learning_rate": 5.390255612337058e-08, "loss": 0.7114, "step": 1950 }, { "epoch": 0.898534872341038, "grad_norm": 4.217756810382616, "learning_rate": 5.3420234342945515e-08, "loss": 0.7673, "step": 1951 }, { "epoch": 0.8989954232750929, "grad_norm": 4.196239345404449, "learning_rate": 5.2940020960935416e-08, "loss": 0.8408, "step": 1952 }, { "epoch": 0.8994559742091477, "grad_norm": 4.182648747016511, "learning_rate": 5.246191704696079e-08, "loss": 0.6983, "step": 1953 }, { "epoch": 0.8999165251432025, "grad_norm": 3.405721286815077, "learning_rate": 5.1985923665943787e-08, "loss": 0.4323, "step": 1954 }, { "epoch": 0.9003770760772574, "grad_norm": 4.2674476257344995, "learning_rate": 5.1512041878105095e-08, "loss": 0.6705, "step": 1955 }, { "epoch": 0.9008376270113123, "grad_norm": 4.206093816489189, "learning_rate": 5.104027273896239e-08, "loss": 0.6771, "step": 1956 }, { "epoch": 0.9012981779453672, "grad_norm": 3.8758771367289455, "learning_rate": 5.057061729932777e-08, "loss": 0.5457, "step": 1957 }, { "epoch": 0.901758728879422, "grad_norm": 3.63276850931252, "learning_rate": 5.0103076605304885e-08, "loss": 0.6295, "step": 1958 }, { "epoch": 0.9022192798134768, "grad_norm": 4.020144504291796, "learning_rate": 4.963765169828737e-08, "loss": 0.6506, "step": 1959 }, { "epoch": 0.9026798307475318, "grad_norm": 4.334839691692827, "learning_rate": 4.917434361495609e-08, "loss": 0.6668, "step": 1960 }, { "epoch": 0.9031403816815866, "grad_norm": 4.217821411700517, "learning_rate": 4.871315338727711e-08, "loss": 0.677, "step": 1961 }, { "epoch": 0.9036009326156415, "grad_norm": 3.6435381261541853, "learning_rate": 4.825408204249881e-08, "loss": 0.604, "step": 1962 }, { "epoch": 0.9040614835496963, "grad_norm": 4.115648377603094, "learning_rate": 4.779713060315016e-08, "loss": 0.6904, "step": 1963 }, { "epoch": 0.9045220344837512, "grad_norm": 3.644061506553867, "learning_rate": 4.734230008703877e-08, "loss": 0.704, "step": 1964 }, { "epoch": 0.9049825854178061, "grad_norm": 4.361485746011998, "learning_rate": 4.688959150724703e-08, "loss": 0.7508, "step": 1965 }, { "epoch": 0.9054431363518609, "grad_norm": 4.410942204718258, "learning_rate": 4.6439005872132454e-08, "loss": 0.8016, "step": 1966 }, { "epoch": 0.9059036872859157, "grad_norm": 3.888260284183559, "learning_rate": 4.599054418532267e-08, "loss": 0.7079, "step": 1967 }, { "epoch": 0.9063642382199706, "grad_norm": 3.8494470653546697, "learning_rate": 4.554420744571463e-08, "loss": 0.6449, "step": 1968 }, { "epoch": 0.9068247891540255, "grad_norm": 4.65108095004671, "learning_rate": 4.5099996647473215e-08, "loss": 0.5958, "step": 1969 }, { "epoch": 0.9072853400880804, "grad_norm": 3.8338095634652807, "learning_rate": 4.465791278002684e-08, "loss": 0.605, "step": 1970 }, { "epoch": 0.9077458910221352, "grad_norm": 4.6061317539369755, "learning_rate": 4.4217956828066614e-08, "loss": 0.7174, "step": 1971 }, { "epoch": 0.90820644195619, "grad_norm": 4.060999911762566, "learning_rate": 4.3780129771544885e-08, "loss": 0.6904, "step": 1972 }, { "epoch": 0.908666992890245, "grad_norm": 4.362796141247837, "learning_rate": 4.3344432585670886e-08, "loss": 0.8756, "step": 1973 }, { "epoch": 0.9091275438242998, "grad_norm": 4.229418634412177, "learning_rate": 4.291086624091067e-08, "loss": 0.7341, "step": 1974 }, { "epoch": 0.9095880947583547, "grad_norm": 4.313288177522705, "learning_rate": 4.2479431702983845e-08, "loss": 0.779, "step": 1975 }, { "epoch": 0.9100486456924095, "grad_norm": 3.9245622086869507, "learning_rate": 4.205012993286139e-08, "loss": 0.6024, "step": 1976 }, { "epoch": 0.9105091966264645, "grad_norm": 4.035858033824634, "learning_rate": 4.162296188676417e-08, "loss": 0.6833, "step": 1977 }, { "epoch": 0.9109697475605193, "grad_norm": 3.707058612763738, "learning_rate": 4.119792851616022e-08, "loss": 0.6334, "step": 1978 }, { "epoch": 0.9114302984945741, "grad_norm": 4.57057211773407, "learning_rate": 4.0775030767762895e-08, "loss": 0.7264, "step": 1979 }, { "epoch": 0.911890849428629, "grad_norm": 3.848223519577542, "learning_rate": 4.035426958352861e-08, "loss": 0.7253, "step": 1980 }, { "epoch": 0.9123514003626838, "grad_norm": 3.7035016926488646, "learning_rate": 3.99356459006549e-08, "loss": 0.5571, "step": 1981 }, { "epoch": 0.9128119512967388, "grad_norm": 4.075849861837885, "learning_rate": 3.9519160651578456e-08, "loss": 0.6863, "step": 1982 }, { "epoch": 0.9132725022307936, "grad_norm": 4.610474779633875, "learning_rate": 3.910481476397231e-08, "loss": 0.6595, "step": 1983 }, { "epoch": 0.9137330531648484, "grad_norm": 4.201596366252376, "learning_rate": 3.8692609160744796e-08, "loss": 0.6233, "step": 1984 }, { "epoch": 0.9141936040989033, "grad_norm": 3.8468114659064265, "learning_rate": 3.8282544760037005e-08, "loss": 0.773, "step": 1985 }, { "epoch": 0.9146541550329582, "grad_norm": 4.375264625213212, "learning_rate": 3.787462247522033e-08, "loss": 0.7223, "step": 1986 }, { "epoch": 0.915114705967013, "grad_norm": 4.203382536199992, "learning_rate": 3.74688432148953e-08, "loss": 0.6303, "step": 1987 }, { "epoch": 0.9155752569010679, "grad_norm": 4.4171125767467965, "learning_rate": 3.7065207882888915e-08, "loss": 0.7376, "step": 1988 }, { "epoch": 0.9160358078351227, "grad_norm": 4.378078610219948, "learning_rate": 3.666371737825269e-08, "loss": 0.6929, "step": 1989 }, { "epoch": 0.9164963587691777, "grad_norm": 3.382706241577863, "learning_rate": 3.626437259526094e-08, "loss": 0.5892, "step": 1990 }, { "epoch": 0.9169569097032325, "grad_norm": 4.7561582592983696, "learning_rate": 3.58671744234087e-08, "loss": 0.714, "step": 1991 }, { "epoch": 0.9174174606372874, "grad_norm": 3.9766243006758097, "learning_rate": 3.54721237474096e-08, "loss": 0.7114, "step": 1992 }, { "epoch": 0.9178780115713422, "grad_norm": 3.9928619018478275, "learning_rate": 3.5079221447193665e-08, "loss": 0.6635, "step": 1993 }, { "epoch": 0.918338562505397, "grad_norm": 4.097246144198975, "learning_rate": 3.468846839790629e-08, "loss": 0.6425, "step": 1994 }, { "epoch": 0.918799113439452, "grad_norm": 3.915581802164911, "learning_rate": 3.4299865469905156e-08, "loss": 0.6013, "step": 1995 }, { "epoch": 0.9192596643735068, "grad_norm": 3.6419172368067296, "learning_rate": 3.391341352875887e-08, "loss": 0.6963, "step": 1996 }, { "epoch": 0.9197202153075617, "grad_norm": 4.217487103485303, "learning_rate": 3.3529113435245e-08, "loss": 0.7405, "step": 1997 }, { "epoch": 0.9201807662416165, "grad_norm": 4.308294553151241, "learning_rate": 3.314696604534839e-08, "loss": 0.7395, "step": 1998 }, { "epoch": 0.9206413171756714, "grad_norm": 4.506730060314923, "learning_rate": 3.276697221025848e-08, "loss": 0.6393, "step": 1999 }, { "epoch": 0.9211018681097263, "grad_norm": 4.496393092773118, "learning_rate": 3.238913277636846e-08, "loss": 0.6129, "step": 2000 }, { "epoch": 0.9215624190437811, "grad_norm": 4.485077894758053, "learning_rate": 3.201344858527233e-08, "loss": 0.6457, "step": 2001 }, { "epoch": 0.922022969977836, "grad_norm": 4.247732077212845, "learning_rate": 3.163992047376374e-08, "loss": 0.6963, "step": 2002 }, { "epoch": 0.9224835209118909, "grad_norm": 4.703804368514163, "learning_rate": 3.126854927383416e-08, "loss": 0.6348, "step": 2003 }, { "epoch": 0.9229440718459457, "grad_norm": 4.258131329951441, "learning_rate": 3.089933581267024e-08, "loss": 0.8083, "step": 2004 }, { "epoch": 0.9234046227800006, "grad_norm": 4.510615686185916, "learning_rate": 3.053228091265314e-08, "loss": 0.7742, "step": 2005 }, { "epoch": 0.9238651737140554, "grad_norm": 4.186314960244619, "learning_rate": 3.016738539135566e-08, "loss": 0.5982, "step": 2006 }, { "epoch": 0.9243257246481102, "grad_norm": 4.137192560359873, "learning_rate": 2.980465006154076e-08, "loss": 0.716, "step": 2007 }, { "epoch": 0.9247862755821652, "grad_norm": 4.337662994973174, "learning_rate": 2.9444075731160256e-08, "loss": 0.7428, "step": 2008 }, { "epoch": 0.92524682651622, "grad_norm": 4.130275568894714, "learning_rate": 2.908566320335215e-08, "loss": 0.6902, "step": 2009 }, { "epoch": 0.9257073774502749, "grad_norm": 4.360064282522056, "learning_rate": 2.872941327643963e-08, "loss": 0.6992, "step": 2010 }, { "epoch": 0.9261679283843297, "grad_norm": 4.163565407452106, "learning_rate": 2.837532674392862e-08, "loss": 0.6199, "step": 2011 }, { "epoch": 0.9266284793183847, "grad_norm": 4.006834687085842, "learning_rate": 2.8023404394506345e-08, "loss": 0.6661, "step": 2012 }, { "epoch": 0.9270890302524395, "grad_norm": 4.211212215238348, "learning_rate": 2.767364701204e-08, "loss": 0.6008, "step": 2013 }, { "epoch": 0.9275495811864943, "grad_norm": 4.070745925405195, "learning_rate": 2.7326055375573976e-08, "loss": 0.6959, "step": 2014 }, { "epoch": 0.9280101321205492, "grad_norm": 3.831479873568211, "learning_rate": 2.6980630259329063e-08, "loss": 0.6931, "step": 2015 }, { "epoch": 0.9284706830546041, "grad_norm": 4.225657714656069, "learning_rate": 2.6637372432700476e-08, "loss": 0.7864, "step": 2016 }, { "epoch": 0.928931233988659, "grad_norm": 3.9415785217641144, "learning_rate": 2.629628266025552e-08, "loss": 0.7056, "step": 2017 }, { "epoch": 0.9293917849227138, "grad_norm": 3.883834377094946, "learning_rate": 2.5957361701732904e-08, "loss": 0.9223, "step": 2018 }, { "epoch": 0.9298523358567686, "grad_norm": 4.1226515136433886, "learning_rate": 2.5620610312040436e-08, "loss": 0.5919, "step": 2019 }, { "epoch": 0.9303128867908235, "grad_norm": 4.000659762155779, "learning_rate": 2.528602924125334e-08, "loss": 0.6221, "step": 2020 }, { "epoch": 0.9307734377248784, "grad_norm": 3.830235498394158, "learning_rate": 2.495361923461281e-08, "loss": 0.7441, "step": 2021 }, { "epoch": 0.9312339886589333, "grad_norm": 4.260998616556306, "learning_rate": 2.462338103252415e-08, "loss": 0.8955, "step": 2022 }, { "epoch": 0.9316945395929881, "grad_norm": 4.076712161858721, "learning_rate": 2.4295315370555402e-08, "loss": 0.629, "step": 2023 }, { "epoch": 0.9321550905270429, "grad_norm": 3.66976241071991, "learning_rate": 2.3969422979435162e-08, "loss": 0.5932, "step": 2024 }, { "epoch": 0.9326156414610979, "grad_norm": 4.027163517836014, "learning_rate": 2.3645704585051775e-08, "loss": 0.7372, "step": 2025 }, { "epoch": 0.9330761923951527, "grad_norm": 5.058801836586365, "learning_rate": 2.3324160908451017e-08, "loss": 0.6179, "step": 2026 }, { "epoch": 0.9335367433292076, "grad_norm": 4.26187603752162, "learning_rate": 2.300479266583455e-08, "loss": 0.6186, "step": 2027 }, { "epoch": 0.9339972942632624, "grad_norm": 4.648350037302623, "learning_rate": 2.2687600568558785e-08, "loss": 0.9356, "step": 2028 }, { "epoch": 0.9344578451973173, "grad_norm": 4.541362923526103, "learning_rate": 2.237258532313302e-08, "loss": 0.7177, "step": 2029 }, { "epoch": 0.9349183961313722, "grad_norm": 3.793280121386138, "learning_rate": 2.205974763121754e-08, "loss": 0.7101, "step": 2030 }, { "epoch": 0.935378947065427, "grad_norm": 3.9334130634783264, "learning_rate": 2.1749088189622844e-08, "loss": 0.7328, "step": 2031 }, { "epoch": 0.9358394979994819, "grad_norm": 4.19316477842629, "learning_rate": 2.144060769030742e-08, "loss": 0.7611, "step": 2032 }, { "epoch": 0.9363000489335367, "grad_norm": 3.8315127899639467, "learning_rate": 2.113430682037598e-08, "loss": 0.6903, "step": 2033 }, { "epoch": 0.9367605998675916, "grad_norm": 3.4281808994777685, "learning_rate": 2.083018626207933e-08, "loss": 0.6382, "step": 2034 }, { "epoch": 0.9372211508016465, "grad_norm": 4.407163026913376, "learning_rate": 2.0528246692810835e-08, "loss": 0.6991, "step": 2035 }, { "epoch": 0.9376817017357013, "grad_norm": 3.5550385549515964, "learning_rate": 2.0228488785106634e-08, "loss": 0.6599, "step": 2036 }, { "epoch": 0.9381422526697561, "grad_norm": 4.348848445622488, "learning_rate": 1.9930913206643306e-08, "loss": 0.776, "step": 2037 }, { "epoch": 0.9386028036038111, "grad_norm": 4.1561024662500925, "learning_rate": 1.9635520620236323e-08, "loss": 0.6089, "step": 2038 }, { "epoch": 0.9390633545378659, "grad_norm": 4.240184946870303, "learning_rate": 1.934231168383915e-08, "loss": 0.875, "step": 2039 }, { "epoch": 0.9395239054719208, "grad_norm": 3.809032067618163, "learning_rate": 1.9051287050541263e-08, "loss": 0.7122, "step": 2040 }, { "epoch": 0.9399844564059756, "grad_norm": 3.5916934099668616, "learning_rate": 1.876244736856658e-08, "loss": 0.5354, "step": 2041 }, { "epoch": 0.9404450073400306, "grad_norm": 5.323420834607806, "learning_rate": 1.847579328127269e-08, "loss": 0.8227, "step": 2042 }, { "epoch": 0.9409055582740854, "grad_norm": 4.576365519620792, "learning_rate": 1.819132542714874e-08, "loss": 0.6696, "step": 2043 }, { "epoch": 0.9413661092081402, "grad_norm": 4.148409334275933, "learning_rate": 1.790904443981478e-08, "loss": 0.7177, "step": 2044 }, { "epoch": 0.9418266601421951, "grad_norm": 4.3674468910141595, "learning_rate": 1.7628950948018974e-08, "loss": 0.6395, "step": 2045 }, { "epoch": 0.9422872110762499, "grad_norm": 3.9214023579522395, "learning_rate": 1.7351045575638044e-08, "loss": 0.6155, "step": 2046 }, { "epoch": 0.9427477620103049, "grad_norm": 4.615835709182989, "learning_rate": 1.7075328941674295e-08, "loss": 0.7578, "step": 2047 }, { "epoch": 0.9432083129443597, "grad_norm": 3.8874127751863328, "learning_rate": 1.680180166025513e-08, "loss": 0.7247, "step": 2048 }, { "epoch": 0.9436688638784145, "grad_norm": 4.412524910293846, "learning_rate": 1.653046434063121e-08, "loss": 0.8181, "step": 2049 }, { "epoch": 0.9441294148124694, "grad_norm": 4.295564108961298, "learning_rate": 1.626131758717575e-08, "loss": 0.753, "step": 2050 }, { "epoch": 0.9445899657465243, "grad_norm": 3.721358325280816, "learning_rate": 1.59943619993822e-08, "loss": 0.6362, "step": 2051 }, { "epoch": 0.9450505166805792, "grad_norm": 4.557164915780125, "learning_rate": 1.572959817186359e-08, "loss": 0.7518, "step": 2052 }, { "epoch": 0.945511067614634, "grad_norm": 4.248037080799682, "learning_rate": 1.5467026694351404e-08, "loss": 0.6991, "step": 2053 }, { "epoch": 0.9459716185486888, "grad_norm": 4.4205861285424675, "learning_rate": 1.5206648151693478e-08, "loss": 0.7081, "step": 2054 }, { "epoch": 0.9464321694827438, "grad_norm": 3.935721854835775, "learning_rate": 1.4948463123853337e-08, "loss": 0.6534, "step": 2055 }, { "epoch": 0.9468927204167986, "grad_norm": 3.810018013454002, "learning_rate": 1.4692472185908633e-08, "loss": 0.5983, "step": 2056 }, { "epoch": 0.9473532713508535, "grad_norm": 4.246367216365032, "learning_rate": 1.4438675908050036e-08, "loss": 0.7059, "step": 2057 }, { "epoch": 0.9478138222849083, "grad_norm": 3.522146198759834, "learning_rate": 1.4187074855579795e-08, "loss": 0.5058, "step": 2058 }, { "epoch": 0.9482743732189632, "grad_norm": 4.329357428289024, "learning_rate": 1.3937669588910406e-08, "loss": 0.8103, "step": 2059 }, { "epoch": 0.9487349241530181, "grad_norm": 4.215171207898709, "learning_rate": 1.3690460663563829e-08, "loss": 0.7758, "step": 2060 }, { "epoch": 0.9491954750870729, "grad_norm": 4.446174127103142, "learning_rate": 1.344544863016961e-08, "loss": 0.7003, "step": 2061 }, { "epoch": 0.9496560260211278, "grad_norm": 3.8501021096524854, "learning_rate": 1.3202634034464199e-08, "loss": 0.7127, "step": 2062 }, { "epoch": 0.9501165769551826, "grad_norm": 4.014835620999268, "learning_rate": 1.2962017417289418e-08, "loss": 0.7231, "step": 2063 }, { "epoch": 0.9505771278892375, "grad_norm": 3.853643917597477, "learning_rate": 1.2723599314591105e-08, "loss": 0.7103, "step": 2064 }, { "epoch": 0.9510376788232924, "grad_norm": 4.101959531955964, "learning_rate": 1.2487380257418578e-08, "loss": 0.7049, "step": 2065 }, { "epoch": 0.9514982297573472, "grad_norm": 4.316533408334548, "learning_rate": 1.2253360771922739e-08, "loss": 0.7709, "step": 2066 }, { "epoch": 0.951958780691402, "grad_norm": 4.24548798061771, "learning_rate": 1.2021541379355404e-08, "loss": 0.7025, "step": 2067 }, { "epoch": 0.952419331625457, "grad_norm": 3.821778811446078, "learning_rate": 1.1791922596067649e-08, "loss": 0.7155, "step": 2068 }, { "epoch": 0.9528798825595118, "grad_norm": 3.761976153457539, "learning_rate": 1.1564504933509244e-08, "loss": 0.5871, "step": 2069 }, { "epoch": 0.9533404334935667, "grad_norm": 4.0015484373526675, "learning_rate": 1.1339288898227106e-08, "loss": 0.7279, "step": 2070 }, { "epoch": 0.9538009844276215, "grad_norm": 3.975243268524635, "learning_rate": 1.1116274991864072e-08, "loss": 0.6388, "step": 2071 }, { "epoch": 0.9542615353616765, "grad_norm": 3.9069571332652804, "learning_rate": 1.0895463711158349e-08, "loss": 0.613, "step": 2072 }, { "epoch": 0.9547220862957313, "grad_norm": 4.021943614624526, "learning_rate": 1.0676855547941844e-08, "loss": 0.586, "step": 2073 }, { "epoch": 0.9551826372297861, "grad_norm": 4.100330286925524, "learning_rate": 1.0460450989139169e-08, "loss": 0.6528, "step": 2074 }, { "epoch": 0.955643188163841, "grad_norm": 3.899668455248482, "learning_rate": 1.0246250516766863e-08, "loss": 0.6936, "step": 2075 }, { "epoch": 0.9561037390978958, "grad_norm": 3.5977654551230875, "learning_rate": 1.0034254607932168e-08, "loss": 0.6935, "step": 2076 }, { "epoch": 0.9565642900319508, "grad_norm": 3.9125573199032746, "learning_rate": 9.82446373483159e-09, "loss": 0.6554, "step": 2077 }, { "epoch": 0.9570248409660056, "grad_norm": 3.6836115399394767, "learning_rate": 9.616878364750446e-09, "loss": 0.6075, "step": 2078 }, { "epoch": 0.9574853919000604, "grad_norm": 3.813912463855398, "learning_rate": 9.411498960061436e-09, "loss": 0.5322, "step": 2079 }, { "epoch": 0.9579459428341153, "grad_norm": 4.250339746745826, "learning_rate": 9.208325978223741e-09, "loss": 0.7425, "step": 2080 }, { "epoch": 0.9584064937681702, "grad_norm": 3.942926495251086, "learning_rate": 9.00735987178214e-09, "loss": 0.7607, "step": 2081 }, { "epoch": 0.9588670447022251, "grad_norm": 3.9179427835804406, "learning_rate": 8.808601088365453e-09, "loss": 0.7865, "step": 2082 }, { "epoch": 0.9593275956362799, "grad_norm": 4.148771081277964, "learning_rate": 8.612050070686217e-09, "loss": 0.7642, "step": 2083 }, { "epoch": 0.9597881465703347, "grad_norm": 3.9977390560775796, "learning_rate": 8.417707256539675e-09, "loss": 0.617, "step": 2084 }, { "epoch": 0.9602486975043897, "grad_norm": 4.717447899113945, "learning_rate": 8.225573078802006e-09, "loss": 0.7362, "step": 2085 }, { "epoch": 0.9607092484384445, "grad_norm": 4.092846893204385, "learning_rate": 8.035647965430215e-09, "loss": 0.6919, "step": 2086 }, { "epoch": 0.9611697993724994, "grad_norm": 3.5970094331053826, "learning_rate": 7.847932339460906e-09, "loss": 0.6842, "step": 2087 }, { "epoch": 0.9616303503065542, "grad_norm": 4.3621995834345215, "learning_rate": 7.662426619009178e-09, "loss": 0.7261, "step": 2088 }, { "epoch": 0.962090901240609, "grad_norm": 3.8365775244675473, "learning_rate": 7.479131217267732e-09, "loss": 0.8581, "step": 2089 }, { "epoch": 0.962551452174664, "grad_norm": 3.993548961022831, "learning_rate": 7.2980465425063196e-09, "loss": 0.6748, "step": 2090 }, { "epoch": 0.9630120031087188, "grad_norm": 4.449281552447998, "learning_rate": 7.119172998070411e-09, "loss": 0.7102, "step": 2091 }, { "epoch": 0.9634725540427737, "grad_norm": 4.169952466410993, "learning_rate": 6.9425109823803e-09, "loss": 0.575, "step": 2092 }, { "epoch": 0.9639331049768285, "grad_norm": 4.431525046857189, "learning_rate": 6.768060888930449e-09, "loss": 0.8587, "step": 2093 }, { "epoch": 0.9643936559108834, "grad_norm": 4.03301345367469, "learning_rate": 6.595823106288589e-09, "loss": 0.7718, "step": 2094 }, { "epoch": 0.9648542068449383, "grad_norm": 4.439843191879285, "learning_rate": 6.4257980180948415e-09, "loss": 0.7084, "step": 2095 }, { "epoch": 0.9653147577789931, "grad_norm": 4.264313688070129, "learning_rate": 6.257986003060489e-09, "loss": 0.6522, "step": 2096 }, { "epoch": 0.965775308713048, "grad_norm": 3.9500128663551766, "learning_rate": 6.09238743496776e-09, "loss": 0.7148, "step": 2097 }, { "epoch": 0.9662358596471029, "grad_norm": 4.273871304441331, "learning_rate": 5.929002682668494e-09, "loss": 0.637, "step": 2098 }, { "epoch": 0.9666964105811577, "grad_norm": 4.864572795840003, "learning_rate": 5.7678321100836925e-09, "loss": 0.8515, "step": 2099 }, { "epoch": 0.9671569615152126, "grad_norm": 4.215039736733958, "learning_rate": 5.608876076202307e-09, "loss": 0.5351, "step": 2100 }, { "epoch": 0.9676175124492674, "grad_norm": 4.102251662655667, "learning_rate": 5.452134935080899e-09, "loss": 0.6637, "step": 2101 }, { "epoch": 0.9680780633833223, "grad_norm": 3.928747862706378, "learning_rate": 5.29760903584231e-09, "loss": 0.6104, "step": 2102 }, { "epoch": 0.9685386143173772, "grad_norm": 4.108574746165931, "learning_rate": 5.145298722675439e-09, "loss": 0.7589, "step": 2103 }, { "epoch": 0.968999165251432, "grad_norm": 4.057155434006943, "learning_rate": 4.9952043348342465e-09, "loss": 0.6262, "step": 2104 }, { "epoch": 0.9694597161854869, "grad_norm": 4.396465575880081, "learning_rate": 4.847326206636526e-09, "loss": 0.683, "step": 2105 }, { "epoch": 0.9699202671195417, "grad_norm": 4.443748673378629, "learning_rate": 4.701664667464245e-09, "loss": 0.6537, "step": 2106 }, { "epoch": 0.9703808180535967, "grad_norm": 3.958304145675278, "learning_rate": 4.5582200417617625e-09, "loss": 0.6596, "step": 2107 }, { "epoch": 0.9708413689876515, "grad_norm": 4.07129578140727, "learning_rate": 4.416992649035612e-09, "loss": 0.6715, "step": 2108 }, { "epoch": 0.9713019199217063, "grad_norm": 3.864644605274384, "learning_rate": 4.2779828038536085e-09, "loss": 0.7726, "step": 2109 }, { "epoch": 0.9717624708557612, "grad_norm": 3.692543709506051, "learning_rate": 4.14119081584452e-09, "loss": 0.7036, "step": 2110 }, { "epoch": 0.9722230217898161, "grad_norm": 4.4863642525153296, "learning_rate": 4.00661698969662e-09, "loss": 0.6777, "step": 2111 }, { "epoch": 0.972683572723871, "grad_norm": 4.0289867949989775, "learning_rate": 3.874261625157915e-09, "loss": 0.6974, "step": 2112 }, { "epoch": 0.9731441236579258, "grad_norm": 3.7785742072934094, "learning_rate": 3.744125017034916e-09, "loss": 0.6516, "step": 2113 }, { "epoch": 0.9736046745919806, "grad_norm": 3.71338298057666, "learning_rate": 3.6162074551919772e-09, "loss": 0.6141, "step": 2114 }, { "epoch": 0.9740652255260355, "grad_norm": 4.636575137954527, "learning_rate": 3.4905092245509637e-09, "loss": 0.6935, "step": 2115 }, { "epoch": 0.9745257764600904, "grad_norm": 4.2202660060346995, "learning_rate": 3.3670306050902485e-09, "loss": 0.6968, "step": 2116 }, { "epoch": 0.9749863273941453, "grad_norm": 3.5094637568484006, "learning_rate": 3.2457718718443827e-09, "loss": 0.7229, "step": 2117 }, { "epoch": 0.9754468783282001, "grad_norm": 3.9233778895007787, "learning_rate": 3.1267332949033166e-09, "loss": 0.7956, "step": 2118 }, { "epoch": 0.9759074292622549, "grad_norm": 4.229877663013131, "learning_rate": 3.009915139412067e-09, "loss": 0.672, "step": 2119 }, { "epoch": 0.9763679801963099, "grad_norm": 4.148228080530327, "learning_rate": 2.8953176655696075e-09, "loss": 0.8011, "step": 2120 }, { "epoch": 0.9768285311303647, "grad_norm": 4.349431351104838, "learning_rate": 2.7829411286287572e-09, "loss": 0.6967, "step": 2121 }, { "epoch": 0.9772890820644196, "grad_norm": 4.2339034058382685, "learning_rate": 2.6727857788954033e-09, "loss": 0.772, "step": 2122 }, { "epoch": 0.9777496329984744, "grad_norm": 4.07166648058708, "learning_rate": 2.5648518617280567e-09, "loss": 0.6638, "step": 2123 }, { "epoch": 0.9782101839325293, "grad_norm": 4.101361846772003, "learning_rate": 2.459139617537187e-09, "loss": 0.6417, "step": 2124 }, { "epoch": 0.9786707348665842, "grad_norm": 4.015454959879105, "learning_rate": 2.3556492817847773e-09, "loss": 0.6757, "step": 2125 }, { "epoch": 0.979131285800639, "grad_norm": 4.072877267233095, "learning_rate": 2.2543810849836586e-09, "loss": 0.6808, "step": 2126 }, { "epoch": 0.9795918367346939, "grad_norm": 3.9107442421287426, "learning_rate": 2.1553352526972878e-09, "loss": 0.6806, "step": 2127 }, { "epoch": 0.9800523876687487, "grad_norm": 3.8455167205141327, "learning_rate": 2.0585120055389705e-09, "loss": 0.6232, "step": 2128 }, { "epoch": 0.9805129386028036, "grad_norm": 3.890935728597498, "learning_rate": 1.963911559171416e-09, "loss": 0.7995, "step": 2129 }, { "epoch": 0.9809734895368585, "grad_norm": 4.076378154627958, "learning_rate": 1.8715341243061846e-09, "loss": 0.8182, "step": 2130 }, { "epoch": 0.9814340404709133, "grad_norm": 4.809284266117527, "learning_rate": 1.7813799067035729e-09, "loss": 0.6664, "step": 2131 }, { "epoch": 0.9818945914049682, "grad_norm": 4.0964018718060355, "learning_rate": 1.6934491071719515e-09, "loss": 0.7526, "step": 2132 }, { "epoch": 0.9823551423390231, "grad_norm": 4.575926377349539, "learning_rate": 1.6077419215668742e-09, "loss": 0.6953, "step": 2133 }, { "epoch": 0.9828156932730779, "grad_norm": 4.993765231496591, "learning_rate": 1.5242585407915231e-09, "loss": 0.6785, "step": 2134 }, { "epoch": 0.9832762442071328, "grad_norm": 4.2206368491784385, "learning_rate": 1.4429991507954874e-09, "loss": 0.7926, "step": 2135 }, { "epoch": 0.9837367951411876, "grad_norm": 4.298926372015521, "learning_rate": 1.3639639325748741e-09, "loss": 0.7294, "step": 2136 }, { "epoch": 0.9841973460752426, "grad_norm": 3.7307773566249742, "learning_rate": 1.287153062171642e-09, "loss": 0.6512, "step": 2137 }, { "epoch": 0.9846578970092974, "grad_norm": 4.05937134234853, "learning_rate": 1.2125667106730464e-09, "loss": 0.643, "step": 2138 }, { "epoch": 0.9851184479433522, "grad_norm": 4.1369570612536135, "learning_rate": 1.1402050442118616e-09, "loss": 0.5435, "step": 2139 }, { "epoch": 0.9855789988774071, "grad_norm": 3.968505554244249, "learning_rate": 1.0700682239653814e-09, "loss": 0.7708, "step": 2140 }, { "epoch": 0.9860395498114619, "grad_norm": 3.970520523144629, "learning_rate": 1.002156406155419e-09, "loss": 0.7258, "step": 2141 }, { "epoch": 0.9865001007455169, "grad_norm": 4.132821479139841, "learning_rate": 9.364697420476408e-10, "loss": 0.5927, "step": 2142 }, { "epoch": 0.9869606516795717, "grad_norm": 3.97696417636453, "learning_rate": 8.730083779516784e-10, "loss": 0.76, "step": 2143 }, { "epoch": 0.9874212026136265, "grad_norm": 3.8733030191010367, "learning_rate": 8.117724552205718e-10, "loss": 0.6256, "step": 2144 }, { "epoch": 0.9878817535476814, "grad_norm": 3.7209052434160936, "learning_rate": 7.527621102503268e-10, "loss": 0.7157, "step": 2145 }, { "epoch": 0.9883423044817363, "grad_norm": 3.9836834486613903, "learning_rate": 6.959774744796921e-10, "loss": 0.7767, "step": 2146 }, { "epoch": 0.9888028554157912, "grad_norm": 4.173663728228439, "learning_rate": 6.414186743899375e-10, "loss": 0.6044, "step": 2147 }, { "epoch": 0.989263406349846, "grad_norm": 4.171963876132027, "learning_rate": 5.890858315046321e-10, "loss": 0.6925, "step": 2148 }, { "epoch": 0.9897239572839008, "grad_norm": 4.119387953775365, "learning_rate": 5.389790623891999e-10, "loss": 0.6414, "step": 2149 }, { "epoch": 0.9901845082179558, "grad_norm": 4.134367289206945, "learning_rate": 4.910984786506978e-10, "loss": 0.8013, "step": 2150 }, { "epoch": 0.9906450591520106, "grad_norm": 4.337430952641052, "learning_rate": 4.454441869377046e-10, "loss": 0.7678, "step": 2151 }, { "epoch": 0.9911056100860655, "grad_norm": 3.9229223770895336, "learning_rate": 4.020162889399881e-10, "loss": 0.6433, "step": 2152 }, { "epoch": 0.9915661610201203, "grad_norm": 3.896199925854136, "learning_rate": 3.6081488138817176e-10, "loss": 0.7273, "step": 2153 }, { "epoch": 0.9920267119541751, "grad_norm": 4.386178012171442, "learning_rate": 3.2184005605373487e-10, "loss": 0.8442, "step": 2154 }, { "epoch": 0.9924872628882301, "grad_norm": 4.037301706173176, "learning_rate": 2.850918997485685e-10, "loss": 0.7729, "step": 2155 }, { "epoch": 0.9929478138222849, "grad_norm": 3.823240410436053, "learning_rate": 2.505704943251974e-10, "loss": 0.5649, "step": 2156 }, { "epoch": 0.9934083647563398, "grad_norm": 3.5953280422372256, "learning_rate": 2.1827591667578083e-10, "loss": 0.7281, "step": 2157 }, { "epoch": 0.9938689156903946, "grad_norm": 4.358784729367533, "learning_rate": 1.8820823873311187e-10, "loss": 0.7478, "step": 2158 }, { "epoch": 0.9943294666244495, "grad_norm": 3.7945213076232363, "learning_rate": 1.6036752746939608e-10, "loss": 0.766, "step": 2159 }, { "epoch": 0.9947900175585044, "grad_norm": 3.8178786538600247, "learning_rate": 1.347538448966956e-10, "loss": 0.7413, "step": 2160 }, { "epoch": 0.9952505684925592, "grad_norm": 4.472357923364029, "learning_rate": 1.113672480663741e-10, "loss": 0.6854, "step": 2161 }, { "epoch": 0.995711119426614, "grad_norm": 3.97163337859561, "learning_rate": 9.020778906965176e-11, "loss": 0.6832, "step": 2162 }, { "epoch": 0.996171670360669, "grad_norm": 4.030853241335211, "learning_rate": 7.127551503671724e-11, "loss": 0.707, "step": 2163 }, { "epoch": 0.9966322212947238, "grad_norm": 4.384180733324331, "learning_rate": 5.4570468136949655e-11, "loss": 0.7383, "step": 2164 }, { "epoch": 0.9970927722287787, "grad_norm": 4.737346841004294, "learning_rate": 4.009268557902956e-11, "loss": 0.7003, "step": 2165 }, { "epoch": 0.9975533231628335, "grad_norm": 4.22941680161207, "learning_rate": 2.7842199610605965e-11, "loss": 0.6934, "step": 2166 }, { "epoch": 0.9980138740968884, "grad_norm": 4.120817955252019, "learning_rate": 1.7819037518185252e-11, "loss": 0.7701, "step": 2167 }, { "epoch": 0.9984744250309433, "grad_norm": 4.905499059838261, "learning_rate": 1.0023221627242229e-11, "loss": 0.639, "step": 2168 }, { "epoch": 0.9989349759649981, "grad_norm": 4.098627395573196, "learning_rate": 4.454769301998063e-12, "loss": 0.8183, "step": 2169 }, { "epoch": 0.999395526899053, "grad_norm": 4.2885388139196206, "learning_rate": 1.1136929456423416e-12, "loss": 0.6782, "step": 2170 }, { "epoch": 0.9998560778331078, "grad_norm": 4.355287860386229, "learning_rate": 0.0, "loss": 0.7753, "step": 2171 }, { "epoch": 0.9998560778331078, "step": 2171, "total_flos": 1448382231805952.0, "train_loss": 0.7409125669575792, "train_runtime": 76522.3855, "train_samples_per_second": 1.816, "train_steps_per_second": 0.028 } ], "logging_steps": 1.0, "max_steps": 2171, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1448382231805952.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }