{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 1336, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014970059880239522, "grad_norm": 3.9547965526934012, "learning_rate": 9.9999861762256e-06, "loss": 0.1508, "step": 1 }, { "epoch": 0.0029940119760479044, "grad_norm": 4.022065325558731, "learning_rate": 9.999944704978835e-06, "loss": 0.1707, "step": 2 }, { "epoch": 0.004491017964071856, "grad_norm": 4.424523980510991, "learning_rate": 9.999875586489023e-06, "loss": 0.1472, "step": 3 }, { "epoch": 0.005988023952095809, "grad_norm": 3.9019303335345206, "learning_rate": 9.999778821138357e-06, "loss": 0.1389, "step": 4 }, { "epoch": 0.0074850299401197605, "grad_norm": 4.116205902159375, "learning_rate": 9.999654409461897e-06, "loss": 0.1494, "step": 5 }, { "epoch": 0.008982035928143712, "grad_norm": 4.379798568820562, "learning_rate": 9.999502352147583e-06, "loss": 0.137, "step": 6 }, { "epoch": 0.010479041916167664, "grad_norm": 4.53910386967001, "learning_rate": 9.999322650036216e-06, "loss": 0.1524, "step": 7 }, { "epoch": 0.011976047904191617, "grad_norm": 4.188308515992357, "learning_rate": 9.999115304121459e-06, "loss": 0.1301, "step": 8 }, { "epoch": 0.01347305389221557, "grad_norm": 4.363735489455175, "learning_rate": 9.998880315549834e-06, "loss": 0.1691, "step": 9 }, { "epoch": 0.014970059880239521, "grad_norm": 4.403762012055069, "learning_rate": 9.998617685620715e-06, "loss": 0.1813, "step": 10 }, { "epoch": 0.016467065868263474, "grad_norm": 4.358562236354187, "learning_rate": 9.998327415786315e-06, "loss": 0.1481, "step": 11 }, { "epoch": 0.017964071856287425, "grad_norm": 4.412584621014515, "learning_rate": 9.998009507651683e-06, "loss": 0.1665, "step": 12 }, { "epoch": 0.019461077844311378, "grad_norm": 4.054100023759871, "learning_rate": 9.997663962974698e-06, "loss": 0.1507, "step": 13 }, { "epoch": 0.020958083832335328, "grad_norm": 3.1323048302636156, "learning_rate": 9.997290783666048e-06, "loss": 0.1403, "step": 14 }, { "epoch": 0.02245508982035928, "grad_norm": 4.167922551930639, "learning_rate": 9.996889971789236e-06, "loss": 0.1676, "step": 15 }, { "epoch": 0.023952095808383235, "grad_norm": 3.568371028529737, "learning_rate": 9.996461529560553e-06, "loss": 0.1444, "step": 16 }, { "epoch": 0.025449101796407185, "grad_norm": 4.253118327791206, "learning_rate": 9.996005459349073e-06, "loss": 0.1612, "step": 17 }, { "epoch": 0.02694610778443114, "grad_norm": 3.642855432985505, "learning_rate": 9.995521763676645e-06, "loss": 0.1906, "step": 18 }, { "epoch": 0.02844311377245509, "grad_norm": 3.607019191860814, "learning_rate": 9.995010445217867e-06, "loss": 0.1344, "step": 19 }, { "epoch": 0.029940119760479042, "grad_norm": 3.4612011004615875, "learning_rate": 9.994471506800078e-06, "loss": 0.1326, "step": 20 }, { "epoch": 0.03143712574850299, "grad_norm": 3.5995923317892777, "learning_rate": 9.993904951403344e-06, "loss": 0.1789, "step": 21 }, { "epoch": 0.03293413173652695, "grad_norm": 5.064479298864311, "learning_rate": 9.993310782160439e-06, "loss": 0.1672, "step": 22 }, { "epoch": 0.0344311377245509, "grad_norm": 3.249511278846839, "learning_rate": 9.992689002356828e-06, "loss": 0.1508, "step": 23 }, { "epoch": 0.03592814371257485, "grad_norm": 3.3348453288569577, "learning_rate": 9.992039615430648e-06, "loss": 0.1469, "step": 24 }, { "epoch": 0.0374251497005988, "grad_norm": 3.3914975787866726, "learning_rate": 9.991362624972689e-06, "loss": 0.1203, "step": 25 }, { "epoch": 0.038922155688622756, "grad_norm": 3.6848017821775643, "learning_rate": 9.99065803472638e-06, "loss": 0.1382, "step": 26 }, { "epoch": 0.040419161676646706, "grad_norm": 4.164890034567255, "learning_rate": 9.989925848587757e-06, "loss": 0.1839, "step": 27 }, { "epoch": 0.041916167664670656, "grad_norm": 3.405313615802546, "learning_rate": 9.989166070605447e-06, "loss": 0.1329, "step": 28 }, { "epoch": 0.04341317365269461, "grad_norm": 4.2276575143052595, "learning_rate": 9.988378704980657e-06, "loss": 0.2296, "step": 29 }, { "epoch": 0.04491017964071856, "grad_norm": 3.9864268874394537, "learning_rate": 9.98756375606713e-06, "loss": 0.1293, "step": 30 }, { "epoch": 0.04640718562874251, "grad_norm": 3.319470111040806, "learning_rate": 9.98672122837113e-06, "loss": 0.1329, "step": 31 }, { "epoch": 0.04790419161676647, "grad_norm": 4.387350887536248, "learning_rate": 9.985851126551428e-06, "loss": 0.1859, "step": 32 }, { "epoch": 0.04940119760479042, "grad_norm": 3.830061278901427, "learning_rate": 9.984953455419258e-06, "loss": 0.1654, "step": 33 }, { "epoch": 0.05089820359281437, "grad_norm": 3.637818350190068, "learning_rate": 9.9840282199383e-06, "loss": 0.1404, "step": 34 }, { "epoch": 0.05239520958083832, "grad_norm": 6.1575684640379835, "learning_rate": 9.983075425224654e-06, "loss": 0.138, "step": 35 }, { "epoch": 0.05389221556886228, "grad_norm": 3.3683535305468957, "learning_rate": 9.982095076546806e-06, "loss": 0.1636, "step": 36 }, { "epoch": 0.05538922155688623, "grad_norm": 3.6436321729638403, "learning_rate": 9.981087179325607e-06, "loss": 0.1459, "step": 37 }, { "epoch": 0.05688622754491018, "grad_norm": 4.073764116143889, "learning_rate": 9.980051739134235e-06, "loss": 0.152, "step": 38 }, { "epoch": 0.058383233532934134, "grad_norm": 4.048457396936142, "learning_rate": 9.978988761698162e-06, "loss": 0.1897, "step": 39 }, { "epoch": 0.059880239520958084, "grad_norm": 4.190131485840806, "learning_rate": 9.977898252895133e-06, "loss": 0.1699, "step": 40 }, { "epoch": 0.061377245508982034, "grad_norm": 4.127620353302428, "learning_rate": 9.976780218755132e-06, "loss": 0.1976, "step": 41 }, { "epoch": 0.06287425149700598, "grad_norm": 4.827962513971266, "learning_rate": 9.975634665460333e-06, "loss": 0.2083, "step": 42 }, { "epoch": 0.06437125748502993, "grad_norm": 3.7993654603731533, "learning_rate": 9.974461599345088e-06, "loss": 0.1236, "step": 43 }, { "epoch": 0.0658682634730539, "grad_norm": 3.4122694695520948, "learning_rate": 9.973261026895878e-06, "loss": 0.1449, "step": 44 }, { "epoch": 0.06736526946107785, "grad_norm": 3.556628189982966, "learning_rate": 9.972032954751279e-06, "loss": 0.135, "step": 45 }, { "epoch": 0.0688622754491018, "grad_norm": 4.163961738746035, "learning_rate": 9.970777389701927e-06, "loss": 0.1634, "step": 46 }, { "epoch": 0.07035928143712575, "grad_norm": 4.665141264755418, "learning_rate": 9.969494338690481e-06, "loss": 0.1817, "step": 47 }, { "epoch": 0.0718562874251497, "grad_norm": 4.726157234347155, "learning_rate": 9.968183808811586e-06, "loss": 0.2054, "step": 48 }, { "epoch": 0.07335329341317365, "grad_norm": 4.458396797888645, "learning_rate": 9.966845807311829e-06, "loss": 0.1924, "step": 49 }, { "epoch": 0.0748502994011976, "grad_norm": 4.934433729216006, "learning_rate": 9.965480341589702e-06, "loss": 0.1498, "step": 50 }, { "epoch": 0.07634730538922156, "grad_norm": 4.276210304787019, "learning_rate": 9.96408741919556e-06, "loss": 0.1666, "step": 51 }, { "epoch": 0.07784431137724551, "grad_norm": 3.950901535656718, "learning_rate": 9.962667047831585e-06, "loss": 0.1525, "step": 52 }, { "epoch": 0.07934131736526946, "grad_norm": 4.062266005780282, "learning_rate": 9.96121923535173e-06, "loss": 0.1365, "step": 53 }, { "epoch": 0.08083832335329341, "grad_norm": 3.3147880809902994, "learning_rate": 9.95974398976169e-06, "loss": 0.1325, "step": 54 }, { "epoch": 0.08233532934131736, "grad_norm": 3.832953621624798, "learning_rate": 9.958241319218848e-06, "loss": 0.1473, "step": 55 }, { "epoch": 0.08383233532934131, "grad_norm": 3.7093822074898775, "learning_rate": 9.95671123203224e-06, "loss": 0.169, "step": 56 }, { "epoch": 0.08532934131736528, "grad_norm": 3.5546770947846995, "learning_rate": 9.955153736662493e-06, "loss": 0.1576, "step": 57 }, { "epoch": 0.08682634730538923, "grad_norm": 3.4508384202380737, "learning_rate": 9.953568841721796e-06, "loss": 0.1416, "step": 58 }, { "epoch": 0.08832335329341318, "grad_norm": 3.1266472304991955, "learning_rate": 9.951956555973841e-06, "loss": 0.1351, "step": 59 }, { "epoch": 0.08982035928143713, "grad_norm": 3.325288937998713, "learning_rate": 9.950316888333775e-06, "loss": 0.1202, "step": 60 }, { "epoch": 0.09131736526946108, "grad_norm": 3.5425337176899774, "learning_rate": 9.94864984786816e-06, "loss": 0.1435, "step": 61 }, { "epoch": 0.09281437125748503, "grad_norm": 4.411689473721189, "learning_rate": 9.946955443794908e-06, "loss": 0.157, "step": 62 }, { "epoch": 0.09431137724550898, "grad_norm": 3.962833307549629, "learning_rate": 9.945233685483247e-06, "loss": 0.1579, "step": 63 }, { "epoch": 0.09580838323353294, "grad_norm": 3.659656934799494, "learning_rate": 9.943484582453653e-06, "loss": 0.1343, "step": 64 }, { "epoch": 0.09730538922155689, "grad_norm": 3.4984395889790973, "learning_rate": 9.941708144377813e-06, "loss": 0.1261, "step": 65 }, { "epoch": 0.09880239520958084, "grad_norm": 3.265461899503414, "learning_rate": 9.939904381078553e-06, "loss": 0.1241, "step": 66 }, { "epoch": 0.10029940119760479, "grad_norm": 4.179310100971323, "learning_rate": 9.938073302529804e-06, "loss": 0.1585, "step": 67 }, { "epoch": 0.10179640718562874, "grad_norm": 4.6949852702385115, "learning_rate": 9.93621491885653e-06, "loss": 0.1819, "step": 68 }, { "epoch": 0.10329341317365269, "grad_norm": 4.4210815344888825, "learning_rate": 9.934329240334686e-06, "loss": 0.1927, "step": 69 }, { "epoch": 0.10479041916167664, "grad_norm": 4.542166525483617, "learning_rate": 9.932416277391144e-06, "loss": 0.176, "step": 70 }, { "epoch": 0.1062874251497006, "grad_norm": 4.439820113199197, "learning_rate": 9.930476040603654e-06, "loss": 0.1966, "step": 71 }, { "epoch": 0.10778443113772455, "grad_norm": 3.708085124658049, "learning_rate": 9.928508540700775e-06, "loss": 0.1766, "step": 72 }, { "epoch": 0.1092814371257485, "grad_norm": 3.151751929960482, "learning_rate": 9.926513788561815e-06, "loss": 0.156, "step": 73 }, { "epoch": 0.11077844311377245, "grad_norm": 3.87275524976828, "learning_rate": 9.924491795216777e-06, "loss": 0.1529, "step": 74 }, { "epoch": 0.1122754491017964, "grad_norm": 4.394810410566181, "learning_rate": 9.922442571846293e-06, "loss": 0.1525, "step": 75 }, { "epoch": 0.11377245508982035, "grad_norm": 3.706114342784154, "learning_rate": 9.920366129781564e-06, "loss": 0.1517, "step": 76 }, { "epoch": 0.11526946107784432, "grad_norm": 4.658312422260835, "learning_rate": 9.918262480504295e-06, "loss": 0.2216, "step": 77 }, { "epoch": 0.11676646706586827, "grad_norm": 3.5171038391413925, "learning_rate": 9.916131635646635e-06, "loss": 0.1806, "step": 78 }, { "epoch": 0.11826347305389222, "grad_norm": 3.947625862915919, "learning_rate": 9.913973606991113e-06, "loss": 0.1815, "step": 79 }, { "epoch": 0.11976047904191617, "grad_norm": 3.4822511426643397, "learning_rate": 9.91178840647057e-06, "loss": 0.1435, "step": 80 }, { "epoch": 0.12125748502994012, "grad_norm": 2.7831680095946507, "learning_rate": 9.90957604616809e-06, "loss": 0.1399, "step": 81 }, { "epoch": 0.12275449101796407, "grad_norm": 3.6625571292856764, "learning_rate": 9.907336538316946e-06, "loss": 0.1583, "step": 82 }, { "epoch": 0.12425149700598802, "grad_norm": 3.8893228309636427, "learning_rate": 9.905069895300515e-06, "loss": 0.149, "step": 83 }, { "epoch": 0.12574850299401197, "grad_norm": 3.128344547259625, "learning_rate": 9.902776129652223e-06, "loss": 0.1307, "step": 84 }, { "epoch": 0.12724550898203593, "grad_norm": 3.843883930566061, "learning_rate": 9.900455254055467e-06, "loss": 0.1619, "step": 85 }, { "epoch": 0.12874251497005987, "grad_norm": 4.2862418078965865, "learning_rate": 9.898107281343557e-06, "loss": 0.156, "step": 86 }, { "epoch": 0.13023952095808383, "grad_norm": 3.6487591309138714, "learning_rate": 9.895732224499625e-06, "loss": 0.1687, "step": 87 }, { "epoch": 0.1317365269461078, "grad_norm": 3.6694028967912815, "learning_rate": 9.893330096656576e-06, "loss": 0.1357, "step": 88 }, { "epoch": 0.13323353293413173, "grad_norm": 3.643847096799062, "learning_rate": 9.890900911096994e-06, "loss": 0.1462, "step": 89 }, { "epoch": 0.1347305389221557, "grad_norm": 3.4889746555337195, "learning_rate": 9.888444681253087e-06, "loss": 0.1241, "step": 90 }, { "epoch": 0.13622754491017963, "grad_norm": 2.9092430991304705, "learning_rate": 9.885961420706603e-06, "loss": 0.1173, "step": 91 }, { "epoch": 0.1377245508982036, "grad_norm": 4.311950016391543, "learning_rate": 9.883451143188753e-06, "loss": 0.1717, "step": 92 }, { "epoch": 0.13922155688622753, "grad_norm": 4.352304225481185, "learning_rate": 9.880913862580147e-06, "loss": 0.2038, "step": 93 }, { "epoch": 0.1407185628742515, "grad_norm": 4.012300278688119, "learning_rate": 9.878349592910694e-06, "loss": 0.1659, "step": 94 }, { "epoch": 0.14221556886227546, "grad_norm": 3.8003699607131205, "learning_rate": 9.875758348359553e-06, "loss": 0.1574, "step": 95 }, { "epoch": 0.1437125748502994, "grad_norm": 4.119107305987945, "learning_rate": 9.873140143255035e-06, "loss": 0.2128, "step": 96 }, { "epoch": 0.14520958083832336, "grad_norm": 4.033580579538639, "learning_rate": 9.870494992074532e-06, "loss": 0.1835, "step": 97 }, { "epoch": 0.1467065868263473, "grad_norm": 3.7095869355957687, "learning_rate": 9.867822909444435e-06, "loss": 0.1906, "step": 98 }, { "epoch": 0.14820359281437126, "grad_norm": 3.152563453489479, "learning_rate": 9.865123910140047e-06, "loss": 0.137, "step": 99 }, { "epoch": 0.1497005988023952, "grad_norm": 3.3001340265985917, "learning_rate": 9.862398009085511e-06, "loss": 0.1681, "step": 100 }, { "epoch": 0.15119760479041916, "grad_norm": 3.3351802962231925, "learning_rate": 9.859645221353725e-06, "loss": 0.1318, "step": 101 }, { "epoch": 0.15269461077844312, "grad_norm": 3.5893217503680095, "learning_rate": 9.856865562166256e-06, "loss": 0.1353, "step": 102 }, { "epoch": 0.15419161676646706, "grad_norm": 3.983431142554415, "learning_rate": 9.854059046893257e-06, "loss": 0.1916, "step": 103 }, { "epoch": 0.15568862275449102, "grad_norm": 4.104598834445521, "learning_rate": 9.851225691053382e-06, "loss": 0.1654, "step": 104 }, { "epoch": 0.15718562874251496, "grad_norm": 4.139023897390913, "learning_rate": 9.848365510313696e-06, "loss": 0.1944, "step": 105 }, { "epoch": 0.15868263473053892, "grad_norm": 3.6310973263174313, "learning_rate": 9.8454785204896e-06, "loss": 0.131, "step": 106 }, { "epoch": 0.1601796407185629, "grad_norm": 3.8254599048870324, "learning_rate": 9.842564737544731e-06, "loss": 0.1513, "step": 107 }, { "epoch": 0.16167664670658682, "grad_norm": 4.199638176129087, "learning_rate": 9.83962417759088e-06, "loss": 0.176, "step": 108 }, { "epoch": 0.1631736526946108, "grad_norm": 4.214775285069575, "learning_rate": 9.836656856887903e-06, "loss": 0.1956, "step": 109 }, { "epoch": 0.16467065868263472, "grad_norm": 3.689391292849312, "learning_rate": 9.833662791843628e-06, "loss": 0.1688, "step": 110 }, { "epoch": 0.1661676646706587, "grad_norm": 4.220048097989626, "learning_rate": 9.830641999013768e-06, "loss": 0.1561, "step": 111 }, { "epoch": 0.16766467065868262, "grad_norm": 3.9047611784947334, "learning_rate": 9.827594495101824e-06, "loss": 0.1426, "step": 112 }, { "epoch": 0.1691616766467066, "grad_norm": 3.3686020066395534, "learning_rate": 9.824520296959001e-06, "loss": 0.1613, "step": 113 }, { "epoch": 0.17065868263473055, "grad_norm": 3.7603384709647956, "learning_rate": 9.821419421584108e-06, "loss": 0.2385, "step": 114 }, { "epoch": 0.1721556886227545, "grad_norm": 3.106254688425045, "learning_rate": 9.818291886123463e-06, "loss": 0.1485, "step": 115 }, { "epoch": 0.17365269461077845, "grad_norm": 4.497341071646391, "learning_rate": 9.815137707870806e-06, "loss": 0.2033, "step": 116 }, { "epoch": 0.1751497005988024, "grad_norm": 3.2525782558107954, "learning_rate": 9.811956904267195e-06, "loss": 0.1529, "step": 117 }, { "epoch": 0.17664670658682635, "grad_norm": 4.370434612594541, "learning_rate": 9.808749492900917e-06, "loss": 0.2017, "step": 118 }, { "epoch": 0.1781437125748503, "grad_norm": 3.5463335352437855, "learning_rate": 9.805515491507382e-06, "loss": 0.1404, "step": 119 }, { "epoch": 0.17964071856287425, "grad_norm": 3.9572810578195416, "learning_rate": 9.802254917969033e-06, "loss": 0.1869, "step": 120 }, { "epoch": 0.18113772455089822, "grad_norm": 3.7127560340724726, "learning_rate": 9.798967790315244e-06, "loss": 0.15, "step": 121 }, { "epoch": 0.18263473053892215, "grad_norm": 3.4845114240965414, "learning_rate": 9.795654126722218e-06, "loss": 0.1318, "step": 122 }, { "epoch": 0.18413173652694612, "grad_norm": 3.8503401681488802, "learning_rate": 9.79231394551289e-06, "loss": 0.1787, "step": 123 }, { "epoch": 0.18562874251497005, "grad_norm": 3.344250405623332, "learning_rate": 9.788947265156828e-06, "loss": 0.119, "step": 124 }, { "epoch": 0.18712574850299402, "grad_norm": 3.834999153295011, "learning_rate": 9.785554104270119e-06, "loss": 0.1679, "step": 125 }, { "epoch": 0.18862275449101795, "grad_norm": 3.9770288633444535, "learning_rate": 9.782134481615282e-06, "loss": 0.1941, "step": 126 }, { "epoch": 0.19011976047904192, "grad_norm": 3.923371543916325, "learning_rate": 9.778688416101155e-06, "loss": 0.1363, "step": 127 }, { "epoch": 0.19161676646706588, "grad_norm": 3.6619625462182146, "learning_rate": 9.775215926782788e-06, "loss": 0.1546, "step": 128 }, { "epoch": 0.19311377245508982, "grad_norm": 3.392272287457994, "learning_rate": 9.771717032861346e-06, "loss": 0.1649, "step": 129 }, { "epoch": 0.19461077844311378, "grad_norm": 3.5354018049553657, "learning_rate": 9.768191753683997e-06, "loss": 0.1493, "step": 130 }, { "epoch": 0.19610778443113772, "grad_norm": 4.451479454669286, "learning_rate": 9.764640108743808e-06, "loss": 0.1887, "step": 131 }, { "epoch": 0.19760479041916168, "grad_norm": 4.188361898240075, "learning_rate": 9.761062117679632e-06, "loss": 0.1725, "step": 132 }, { "epoch": 0.19910179640718562, "grad_norm": 3.0478953005936966, "learning_rate": 9.757457800276007e-06, "loss": 0.1203, "step": 133 }, { "epoch": 0.20059880239520958, "grad_norm": 3.5647947061504905, "learning_rate": 9.75382717646304e-06, "loss": 0.1434, "step": 134 }, { "epoch": 0.20209580838323354, "grad_norm": 3.3549577465372957, "learning_rate": 9.750170266316303e-06, "loss": 0.1384, "step": 135 }, { "epoch": 0.20359281437125748, "grad_norm": 3.706310991411502, "learning_rate": 9.746487090056712e-06, "loss": 0.1511, "step": 136 }, { "epoch": 0.20508982035928144, "grad_norm": 3.535434307362884, "learning_rate": 9.742777668050433e-06, "loss": 0.1357, "step": 137 }, { "epoch": 0.20658682634730538, "grad_norm": 3.870395343793705, "learning_rate": 9.739042020808746e-06, "loss": 0.1638, "step": 138 }, { "epoch": 0.20808383233532934, "grad_norm": 3.35325946584975, "learning_rate": 9.73528016898795e-06, "loss": 0.1586, "step": 139 }, { "epoch": 0.20958083832335328, "grad_norm": 3.758509549910922, "learning_rate": 9.73149213338924e-06, "loss": 0.169, "step": 140 }, { "epoch": 0.21107784431137724, "grad_norm": 3.5026632581845125, "learning_rate": 9.7276779349586e-06, "loss": 0.1416, "step": 141 }, { "epoch": 0.2125748502994012, "grad_norm": 3.5685371350123787, "learning_rate": 9.72383759478667e-06, "loss": 0.1357, "step": 142 }, { "epoch": 0.21407185628742514, "grad_norm": 3.75385318783379, "learning_rate": 9.719971134108659e-06, "loss": 0.1736, "step": 143 }, { "epoch": 0.2155688622754491, "grad_norm": 3.847133129549992, "learning_rate": 9.71607857430419e-06, "loss": 0.1417, "step": 144 }, { "epoch": 0.21706586826347304, "grad_norm": 3.9481300240536843, "learning_rate": 9.712159936897215e-06, "loss": 0.186, "step": 145 }, { "epoch": 0.218562874251497, "grad_norm": 4.524269276231721, "learning_rate": 9.708215243555875e-06, "loss": 0.1816, "step": 146 }, { "epoch": 0.22005988023952097, "grad_norm": 3.3729073628614104, "learning_rate": 9.704244516092392e-06, "loss": 0.1395, "step": 147 }, { "epoch": 0.2215568862275449, "grad_norm": 3.593663215372058, "learning_rate": 9.700247776462944e-06, "loss": 0.1408, "step": 148 }, { "epoch": 0.22305389221556887, "grad_norm": 3.9941433563198325, "learning_rate": 9.696225046767537e-06, "loss": 0.1814, "step": 149 }, { "epoch": 0.2245508982035928, "grad_norm": 4.684743058581778, "learning_rate": 9.6921763492499e-06, "loss": 0.1798, "step": 150 }, { "epoch": 0.22604790419161677, "grad_norm": 3.8815712345410165, "learning_rate": 9.688101706297341e-06, "loss": 0.1885, "step": 151 }, { "epoch": 0.2275449101796407, "grad_norm": 4.072380189730279, "learning_rate": 9.68400114044064e-06, "loss": 0.1844, "step": 152 }, { "epoch": 0.22904191616766467, "grad_norm": 4.449219011721279, "learning_rate": 9.679874674353915e-06, "loss": 0.2354, "step": 153 }, { "epoch": 0.23053892215568864, "grad_norm": 3.3918695346802785, "learning_rate": 9.6757223308545e-06, "loss": 0.1359, "step": 154 }, { "epoch": 0.23203592814371257, "grad_norm": 3.2752557930624735, "learning_rate": 9.671544132902821e-06, "loss": 0.1435, "step": 155 }, { "epoch": 0.23353293413173654, "grad_norm": 3.8337055503391957, "learning_rate": 9.667340103602263e-06, "loss": 0.1419, "step": 156 }, { "epoch": 0.23502994011976047, "grad_norm": 3.51782466842606, "learning_rate": 9.663110266199045e-06, "loss": 0.1895, "step": 157 }, { "epoch": 0.23652694610778444, "grad_norm": 3.3890651902265896, "learning_rate": 9.658854644082099e-06, "loss": 0.1467, "step": 158 }, { "epoch": 0.23802395209580837, "grad_norm": 3.848733811242815, "learning_rate": 9.654573260782925e-06, "loss": 0.1413, "step": 159 }, { "epoch": 0.23952095808383234, "grad_norm": 3.583617653970189, "learning_rate": 9.650266139975474e-06, "loss": 0.1715, "step": 160 }, { "epoch": 0.2410179640718563, "grad_norm": 4.3400703741957845, "learning_rate": 9.645933305476016e-06, "loss": 0.1648, "step": 161 }, { "epoch": 0.24251497005988024, "grad_norm": 3.6589048917022895, "learning_rate": 9.641574781242999e-06, "loss": 0.1518, "step": 162 }, { "epoch": 0.2440119760479042, "grad_norm": 3.9546925776796624, "learning_rate": 9.637190591376926e-06, "loss": 0.1466, "step": 163 }, { "epoch": 0.24550898203592814, "grad_norm": 3.557256165584965, "learning_rate": 9.632780760120217e-06, "loss": 0.1439, "step": 164 }, { "epoch": 0.2470059880239521, "grad_norm": 3.8290407985147987, "learning_rate": 9.628345311857076e-06, "loss": 0.1888, "step": 165 }, { "epoch": 0.24850299401197604, "grad_norm": 3.292759438217181, "learning_rate": 9.62388427111336e-06, "loss": 0.1393, "step": 166 }, { "epoch": 0.25, "grad_norm": 3.682173237903761, "learning_rate": 9.619397662556434e-06, "loss": 0.1464, "step": 167 }, { "epoch": 0.25149700598802394, "grad_norm": 4.088017502802779, "learning_rate": 9.614885510995047e-06, "loss": 0.1624, "step": 168 }, { "epoch": 0.25299401197604793, "grad_norm": 3.2631713025213895, "learning_rate": 9.610347841379186e-06, "loss": 0.166, "step": 169 }, { "epoch": 0.25449101796407186, "grad_norm": 3.5662406349587146, "learning_rate": 9.605784678799934e-06, "loss": 0.1964, "step": 170 }, { "epoch": 0.2559880239520958, "grad_norm": 4.236740014128051, "learning_rate": 9.60119604848935e-06, "loss": 0.1861, "step": 171 }, { "epoch": 0.25748502994011974, "grad_norm": 3.2600503608280738, "learning_rate": 9.596581975820304e-06, "loss": 0.1646, "step": 172 }, { "epoch": 0.25898203592814373, "grad_norm": 4.461823511561488, "learning_rate": 9.591942486306359e-06, "loss": 0.1738, "step": 173 }, { "epoch": 0.26047904191616766, "grad_norm": 3.4455911753878135, "learning_rate": 9.587277605601617e-06, "loss": 0.1634, "step": 174 }, { "epoch": 0.2619760479041916, "grad_norm": 3.4248425120338633, "learning_rate": 9.582587359500581e-06, "loss": 0.141, "step": 175 }, { "epoch": 0.2634730538922156, "grad_norm": 3.699805951733734, "learning_rate": 9.577871773938013e-06, "loss": 0.2003, "step": 176 }, { "epoch": 0.26497005988023953, "grad_norm": 3.904632448596208, "learning_rate": 9.573130874988789e-06, "loss": 0.1568, "step": 177 }, { "epoch": 0.26646706586826346, "grad_norm": 3.439406314404279, "learning_rate": 9.568364688867757e-06, "loss": 0.1574, "step": 178 }, { "epoch": 0.2679640718562874, "grad_norm": 4.173559824536794, "learning_rate": 9.563573241929588e-06, "loss": 0.1958, "step": 179 }, { "epoch": 0.2694610778443114, "grad_norm": 3.1643058439828797, "learning_rate": 9.558756560668637e-06, "loss": 0.124, "step": 180 }, { "epoch": 0.27095808383233533, "grad_norm": 3.9034575599917285, "learning_rate": 9.553914671718788e-06, "loss": 0.2048, "step": 181 }, { "epoch": 0.27245508982035926, "grad_norm": 3.198671189616176, "learning_rate": 9.549047601853313e-06, "loss": 0.1515, "step": 182 }, { "epoch": 0.27395209580838326, "grad_norm": 4.097284435586595, "learning_rate": 9.544155377984723e-06, "loss": 0.1585, "step": 183 }, { "epoch": 0.2754491017964072, "grad_norm": 4.2186943811879125, "learning_rate": 9.539238027164618e-06, "loss": 0.1856, "step": 184 }, { "epoch": 0.27694610778443113, "grad_norm": 4.446948852058148, "learning_rate": 9.53429557658354e-06, "loss": 0.2234, "step": 185 }, { "epoch": 0.27844311377245506, "grad_norm": 4.354797678957244, "learning_rate": 9.52932805357081e-06, "loss": 0.1643, "step": 186 }, { "epoch": 0.27994011976047906, "grad_norm": 3.0540816469718766, "learning_rate": 9.524335485594402e-06, "loss": 0.1123, "step": 187 }, { "epoch": 0.281437125748503, "grad_norm": 3.5976811563241453, "learning_rate": 9.519317900260769e-06, "loss": 0.164, "step": 188 }, { "epoch": 0.28293413173652693, "grad_norm": 3.7586211023488105, "learning_rate": 9.514275325314695e-06, "loss": 0.1742, "step": 189 }, { "epoch": 0.2844311377245509, "grad_norm": 2.93491846935756, "learning_rate": 9.509207788639148e-06, "loss": 0.1346, "step": 190 }, { "epoch": 0.28592814371257486, "grad_norm": 3.5535550253132344, "learning_rate": 9.504115318255122e-06, "loss": 0.1674, "step": 191 }, { "epoch": 0.2874251497005988, "grad_norm": 3.5383212769431136, "learning_rate": 9.498997942321484e-06, "loss": 0.1589, "step": 192 }, { "epoch": 0.28892215568862273, "grad_norm": 3.598555336515225, "learning_rate": 9.49385568913481e-06, "loss": 0.1633, "step": 193 }, { "epoch": 0.2904191616766467, "grad_norm": 3.4816144779266978, "learning_rate": 9.488688587129243e-06, "loss": 0.1533, "step": 194 }, { "epoch": 0.29191616766467066, "grad_norm": 3.3998846843054307, "learning_rate": 9.48349666487632e-06, "loss": 0.1656, "step": 195 }, { "epoch": 0.2934131736526946, "grad_norm": 3.477744819461639, "learning_rate": 9.47827995108483e-06, "loss": 0.1668, "step": 196 }, { "epoch": 0.2949101796407186, "grad_norm": 3.403995981674912, "learning_rate": 9.47303847460064e-06, "loss": 0.1558, "step": 197 }, { "epoch": 0.2964071856287425, "grad_norm": 3.511765364601212, "learning_rate": 9.467772264406545e-06, "loss": 0.1642, "step": 198 }, { "epoch": 0.29790419161676646, "grad_norm": 4.152681609352657, "learning_rate": 9.462481349622108e-06, "loss": 0.1751, "step": 199 }, { "epoch": 0.2994011976047904, "grad_norm": 3.847032317133825, "learning_rate": 9.457165759503492e-06, "loss": 0.1878, "step": 200 }, { "epoch": 0.3008982035928144, "grad_norm": 3.4784973729058133, "learning_rate": 9.451825523443307e-06, "loss": 0.1551, "step": 201 }, { "epoch": 0.3023952095808383, "grad_norm": 4.051734549920119, "learning_rate": 9.446460670970436e-06, "loss": 0.1608, "step": 202 }, { "epoch": 0.30389221556886226, "grad_norm": 3.8307806103635658, "learning_rate": 9.441071231749889e-06, "loss": 0.1313, "step": 203 }, { "epoch": 0.30538922155688625, "grad_norm": 3.9863571618593356, "learning_rate": 9.435657235582616e-06, "loss": 0.1642, "step": 204 }, { "epoch": 0.3068862275449102, "grad_norm": 3.957451288795904, "learning_rate": 9.430218712405367e-06, "loss": 0.2253, "step": 205 }, { "epoch": 0.3083832335329341, "grad_norm": 3.620967074253851, "learning_rate": 9.424755692290507e-06, "loss": 0.153, "step": 206 }, { "epoch": 0.30988023952095806, "grad_norm": 3.986121501680831, "learning_rate": 9.419268205445862e-06, "loss": 0.1849, "step": 207 }, { "epoch": 0.31137724550898205, "grad_norm": 3.484995544493043, "learning_rate": 9.413756282214538e-06, "loss": 0.1446, "step": 208 }, { "epoch": 0.312874251497006, "grad_norm": 4.0117272497041885, "learning_rate": 9.408219953074772e-06, "loss": 0.1772, "step": 209 }, { "epoch": 0.3143712574850299, "grad_norm": 3.737494611639225, "learning_rate": 9.402659248639749e-06, "loss": 0.1922, "step": 210 }, { "epoch": 0.3158682634730539, "grad_norm": 3.8012924686470946, "learning_rate": 9.397074199657442e-06, "loss": 0.1946, "step": 211 }, { "epoch": 0.31736526946107785, "grad_norm": 3.0406021634653877, "learning_rate": 9.391464837010428e-06, "loss": 0.1383, "step": 212 }, { "epoch": 0.3188622754491018, "grad_norm": 3.3472822285668693, "learning_rate": 9.385831191715735e-06, "loss": 0.1395, "step": 213 }, { "epoch": 0.3203592814371258, "grad_norm": 3.460659907716194, "learning_rate": 9.380173294924661e-06, "loss": 0.1814, "step": 214 }, { "epoch": 0.3218562874251497, "grad_norm": 3.669279878784114, "learning_rate": 9.374491177922603e-06, "loss": 0.1732, "step": 215 }, { "epoch": 0.32335329341317365, "grad_norm": 3.155321810688514, "learning_rate": 9.368784872128877e-06, "loss": 0.1263, "step": 216 }, { "epoch": 0.3248502994011976, "grad_norm": 3.6358066589971214, "learning_rate": 9.363054409096562e-06, "loss": 0.1548, "step": 217 }, { "epoch": 0.3263473053892216, "grad_norm": 4.089138979710726, "learning_rate": 9.357299820512305e-06, "loss": 0.1991, "step": 218 }, { "epoch": 0.3278443113772455, "grad_norm": 2.6332187865397967, "learning_rate": 9.35152113819616e-06, "loss": 0.1238, "step": 219 }, { "epoch": 0.32934131736526945, "grad_norm": 3.8948988254826724, "learning_rate": 9.345718394101412e-06, "loss": 0.1689, "step": 220 }, { "epoch": 0.33083832335329344, "grad_norm": 3.478139928865488, "learning_rate": 9.339891620314388e-06, "loss": 0.1762, "step": 221 }, { "epoch": 0.3323353293413174, "grad_norm": 3.8263865016186775, "learning_rate": 9.334040849054288e-06, "loss": 0.1553, "step": 222 }, { "epoch": 0.3338323353293413, "grad_norm": 4.161846778000413, "learning_rate": 9.328166112673012e-06, "loss": 0.1689, "step": 223 }, { "epoch": 0.33532934131736525, "grad_norm": 3.749610671460975, "learning_rate": 9.322267443654974e-06, "loss": 0.1661, "step": 224 }, { "epoch": 0.33682634730538924, "grad_norm": 3.3963378991729463, "learning_rate": 9.316344874616915e-06, "loss": 0.1441, "step": 225 }, { "epoch": 0.3383233532934132, "grad_norm": 3.7683625943157932, "learning_rate": 9.310398438307747e-06, "loss": 0.1364, "step": 226 }, { "epoch": 0.3398203592814371, "grad_norm": 3.7280384077284245, "learning_rate": 9.304428167608342e-06, "loss": 0.1764, "step": 227 }, { "epoch": 0.3413173652694611, "grad_norm": 3.324455681066031, "learning_rate": 9.29843409553137e-06, "loss": 0.1928, "step": 228 }, { "epoch": 0.34281437125748504, "grad_norm": 3.569245592899911, "learning_rate": 9.292416255221113e-06, "loss": 0.1543, "step": 229 }, { "epoch": 0.344311377245509, "grad_norm": 3.588598721156542, "learning_rate": 9.286374679953278e-06, "loss": 0.1353, "step": 230 }, { "epoch": 0.3458083832335329, "grad_norm": 3.412790332819261, "learning_rate": 9.280309403134812e-06, "loss": 0.1633, "step": 231 }, { "epoch": 0.3473053892215569, "grad_norm": 3.587904855393938, "learning_rate": 9.274220458303727e-06, "loss": 0.1558, "step": 232 }, { "epoch": 0.34880239520958084, "grad_norm": 3.592164333068894, "learning_rate": 9.268107879128899e-06, "loss": 0.1797, "step": 233 }, { "epoch": 0.3502994011976048, "grad_norm": 3.3496831326791385, "learning_rate": 9.261971699409893e-06, "loss": 0.1703, "step": 234 }, { "epoch": 0.35179640718562877, "grad_norm": 3.7960894129201628, "learning_rate": 9.255811953076777e-06, "loss": 0.1737, "step": 235 }, { "epoch": 0.3532934131736527, "grad_norm": 3.3199256331491864, "learning_rate": 9.249628674189928e-06, "loss": 0.1408, "step": 236 }, { "epoch": 0.35479041916167664, "grad_norm": 3.5848353334699494, "learning_rate": 9.243421896939848e-06, "loss": 0.1401, "step": 237 }, { "epoch": 0.3562874251497006, "grad_norm": 4.066485919413828, "learning_rate": 9.237191655646972e-06, "loss": 0.1709, "step": 238 }, { "epoch": 0.35778443113772457, "grad_norm": 3.310324240243756, "learning_rate": 9.230937984761478e-06, "loss": 0.1541, "step": 239 }, { "epoch": 0.3592814371257485, "grad_norm": 3.8267181771760073, "learning_rate": 9.224660918863104e-06, "loss": 0.1648, "step": 240 }, { "epoch": 0.36077844311377244, "grad_norm": 3.578346223613609, "learning_rate": 9.218360492660942e-06, "loss": 0.1828, "step": 241 }, { "epoch": 0.36227544910179643, "grad_norm": 3.6915114819929062, "learning_rate": 9.212036740993265e-06, "loss": 0.1807, "step": 242 }, { "epoch": 0.36377245508982037, "grad_norm": 4.17299834087616, "learning_rate": 9.205689698827319e-06, "loss": 0.1457, "step": 243 }, { "epoch": 0.3652694610778443, "grad_norm": 2.9138730586287984, "learning_rate": 9.199319401259132e-06, "loss": 0.1555, "step": 244 }, { "epoch": 0.36676646706586824, "grad_norm": 3.9390373499278737, "learning_rate": 9.192925883513328e-06, "loss": 0.1854, "step": 245 }, { "epoch": 0.36826347305389223, "grad_norm": 3.4137845463471606, "learning_rate": 9.186509180942928e-06, "loss": 0.1579, "step": 246 }, { "epoch": 0.36976047904191617, "grad_norm": 3.6031031376797324, "learning_rate": 9.180069329029151e-06, "loss": 0.1298, "step": 247 }, { "epoch": 0.3712574850299401, "grad_norm": 3.6717862152243783, "learning_rate": 9.173606363381218e-06, "loss": 0.1516, "step": 248 }, { "epoch": 0.3727544910179641, "grad_norm": 3.7870955008562106, "learning_rate": 9.167120319736164e-06, "loss": 0.1597, "step": 249 }, { "epoch": 0.37425149700598803, "grad_norm": 3.911545968463257, "learning_rate": 9.16061123395863e-06, "loss": 0.1696, "step": 250 }, { "epoch": 0.37574850299401197, "grad_norm": 3.9602486175597043, "learning_rate": 9.154079142040668e-06, "loss": 0.2219, "step": 251 }, { "epoch": 0.3772455089820359, "grad_norm": 4.180862985380015, "learning_rate": 9.147524080101543e-06, "loss": 0.1914, "step": 252 }, { "epoch": 0.3787425149700599, "grad_norm": 3.760343505187692, "learning_rate": 9.140946084387538e-06, "loss": 0.1627, "step": 253 }, { "epoch": 0.38023952095808383, "grad_norm": 3.5276344565464783, "learning_rate": 9.134345191271742e-06, "loss": 0.1244, "step": 254 }, { "epoch": 0.38173652694610777, "grad_norm": 3.9467389681683613, "learning_rate": 9.127721437253859e-06, "loss": 0.1704, "step": 255 }, { "epoch": 0.38323353293413176, "grad_norm": 3.844028239273459, "learning_rate": 9.121074858959997e-06, "loss": 0.1841, "step": 256 }, { "epoch": 0.3847305389221557, "grad_norm": 3.9404404342182806, "learning_rate": 9.114405493142483e-06, "loss": 0.1923, "step": 257 }, { "epoch": 0.38622754491017963, "grad_norm": 3.1694817914019477, "learning_rate": 9.107713376679634e-06, "loss": 0.1322, "step": 258 }, { "epoch": 0.38772455089820357, "grad_norm": 4.037341895260832, "learning_rate": 9.100998546575576e-06, "loss": 0.1368, "step": 259 }, { "epoch": 0.38922155688622756, "grad_norm": 3.886686744223438, "learning_rate": 9.094261039960028e-06, "loss": 0.1659, "step": 260 }, { "epoch": 0.3907185628742515, "grad_norm": 3.3599654920294992, "learning_rate": 9.0875008940881e-06, "loss": 0.1856, "step": 261 }, { "epoch": 0.39221556886227543, "grad_norm": 4.288549904587132, "learning_rate": 9.08071814634008e-06, "loss": 0.1658, "step": 262 }, { "epoch": 0.3937125748502994, "grad_norm": 3.269431576446486, "learning_rate": 9.073912834221241e-06, "loss": 0.1116, "step": 263 }, { "epoch": 0.39520958083832336, "grad_norm": 4.028325957785055, "learning_rate": 9.067084995361623e-06, "loss": 0.1481, "step": 264 }, { "epoch": 0.3967065868263473, "grad_norm": 3.2792295778358995, "learning_rate": 9.060234667515827e-06, "loss": 0.1547, "step": 265 }, { "epoch": 0.39820359281437123, "grad_norm": 4.290708091965637, "learning_rate": 9.053361888562807e-06, "loss": 0.1625, "step": 266 }, { "epoch": 0.3997005988023952, "grad_norm": 3.699080273626776, "learning_rate": 9.046466696505663e-06, "loss": 0.1463, "step": 267 }, { "epoch": 0.40119760479041916, "grad_norm": 3.610103634361475, "learning_rate": 9.039549129471423e-06, "loss": 0.1304, "step": 268 }, { "epoch": 0.4026946107784431, "grad_norm": 2.8845534969702333, "learning_rate": 9.032609225710847e-06, "loss": 0.1279, "step": 269 }, { "epoch": 0.4041916167664671, "grad_norm": 4.258112240636993, "learning_rate": 9.025647023598196e-06, "loss": 0.1836, "step": 270 }, { "epoch": 0.405688622754491, "grad_norm": 3.7583479310841144, "learning_rate": 9.018662561631037e-06, "loss": 0.1607, "step": 271 }, { "epoch": 0.40718562874251496, "grad_norm": 4.41822722693209, "learning_rate": 9.011655878430018e-06, "loss": 0.2262, "step": 272 }, { "epoch": 0.4086826347305389, "grad_norm": 3.7984982057465464, "learning_rate": 9.004627012738666e-06, "loss": 0.1727, "step": 273 }, { "epoch": 0.4101796407185629, "grad_norm": 3.331826806815702, "learning_rate": 8.99757600342316e-06, "loss": 0.131, "step": 274 }, { "epoch": 0.4116766467065868, "grad_norm": 3.4014062516855934, "learning_rate": 8.990502889472126e-06, "loss": 0.1527, "step": 275 }, { "epoch": 0.41317365269461076, "grad_norm": 3.452908623509962, "learning_rate": 8.983407709996415e-06, "loss": 0.1675, "step": 276 }, { "epoch": 0.41467065868263475, "grad_norm": 3.4125127983693466, "learning_rate": 8.976290504228891e-06, "loss": 0.1682, "step": 277 }, { "epoch": 0.4161676646706587, "grad_norm": 4.691189996819349, "learning_rate": 8.969151311524215e-06, "loss": 0.2175, "step": 278 }, { "epoch": 0.4176646706586826, "grad_norm": 3.1816442163259184, "learning_rate": 8.961990171358622e-06, "loss": 0.1286, "step": 279 }, { "epoch": 0.41916167664670656, "grad_norm": 3.283734176268078, "learning_rate": 8.954807123329703e-06, "loss": 0.1232, "step": 280 }, { "epoch": 0.42065868263473055, "grad_norm": 4.080290040208392, "learning_rate": 8.947602207156198e-06, "loss": 0.1417, "step": 281 }, { "epoch": 0.4221556886227545, "grad_norm": 3.335167264140905, "learning_rate": 8.940375462677758e-06, "loss": 0.153, "step": 282 }, { "epoch": 0.4236526946107784, "grad_norm": 3.8699615202653925, "learning_rate": 8.933126929854738e-06, "loss": 0.1538, "step": 283 }, { "epoch": 0.4251497005988024, "grad_norm": 4.172828845112422, "learning_rate": 8.92585664876797e-06, "loss": 0.168, "step": 284 }, { "epoch": 0.42664670658682635, "grad_norm": 4.2065547019405995, "learning_rate": 8.918564659618545e-06, "loss": 0.1644, "step": 285 }, { "epoch": 0.4281437125748503, "grad_norm": 3.59423953751418, "learning_rate": 8.911251002727588e-06, "loss": 0.1353, "step": 286 }, { "epoch": 0.4296407185628742, "grad_norm": 4.14713270049525, "learning_rate": 8.903915718536036e-06, "loss": 0.1867, "step": 287 }, { "epoch": 0.4311377245508982, "grad_norm": 3.4688995880654945, "learning_rate": 8.896558847604414e-06, "loss": 0.173, "step": 288 }, { "epoch": 0.43263473053892215, "grad_norm": 3.092147202481923, "learning_rate": 8.889180430612612e-06, "loss": 0.1184, "step": 289 }, { "epoch": 0.4341317365269461, "grad_norm": 3.8159016956325105, "learning_rate": 8.881780508359661e-06, "loss": 0.167, "step": 290 }, { "epoch": 0.4356287425149701, "grad_norm": 2.918424772859281, "learning_rate": 8.8743591217635e-06, "loss": 0.1447, "step": 291 }, { "epoch": 0.437125748502994, "grad_norm": 3.528623815620074, "learning_rate": 8.86691631186076e-06, "loss": 0.2004, "step": 292 }, { "epoch": 0.43862275449101795, "grad_norm": 3.3869812238225276, "learning_rate": 8.859452119806533e-06, "loss": 0.1506, "step": 293 }, { "epoch": 0.44011976047904194, "grad_norm": 4.192052370936314, "learning_rate": 8.851966586874138e-06, "loss": 0.2152, "step": 294 }, { "epoch": 0.4416167664670659, "grad_norm": 4.275415409216361, "learning_rate": 8.844459754454903e-06, "loss": 0.1893, "step": 295 }, { "epoch": 0.4431137724550898, "grad_norm": 3.1620160078120936, "learning_rate": 8.836931664057935e-06, "loss": 0.1707, "step": 296 }, { "epoch": 0.44461077844311375, "grad_norm": 3.6836973556488823, "learning_rate": 8.829382357309881e-06, "loss": 0.1317, "step": 297 }, { "epoch": 0.44610778443113774, "grad_norm": 3.6357359275513623, "learning_rate": 8.821811875954705e-06, "loss": 0.1866, "step": 298 }, { "epoch": 0.4476047904191617, "grad_norm": 3.7298062220914807, "learning_rate": 8.814220261853457e-06, "loss": 0.202, "step": 299 }, { "epoch": 0.4491017964071856, "grad_norm": 2.7200706539303496, "learning_rate": 8.806607556984045e-06, "loss": 0.1289, "step": 300 }, { "epoch": 0.4505988023952096, "grad_norm": 4.304095783721872, "learning_rate": 8.79897380344099e-06, "loss": 0.1799, "step": 301 }, { "epoch": 0.45209580838323354, "grad_norm": 3.553739429416531, "learning_rate": 8.791319043435213e-06, "loss": 0.1494, "step": 302 }, { "epoch": 0.4535928143712575, "grad_norm": 3.2195387942541562, "learning_rate": 8.78364331929378e-06, "loss": 0.1431, "step": 303 }, { "epoch": 0.4550898203592814, "grad_norm": 3.311414843086221, "learning_rate": 8.775946673459682e-06, "loss": 0.1268, "step": 304 }, { "epoch": 0.4565868263473054, "grad_norm": 3.27966726486623, "learning_rate": 8.768229148491599e-06, "loss": 0.1688, "step": 305 }, { "epoch": 0.45808383233532934, "grad_norm": 3.770004358120688, "learning_rate": 8.76049078706366e-06, "loss": 0.1524, "step": 306 }, { "epoch": 0.4595808383233533, "grad_norm": 3.9606921814107294, "learning_rate": 8.75273163196521e-06, "loss": 0.1843, "step": 307 }, { "epoch": 0.46107784431137727, "grad_norm": 3.052126672327801, "learning_rate": 8.744951726100572e-06, "loss": 0.126, "step": 308 }, { "epoch": 0.4625748502994012, "grad_norm": 3.188005075819771, "learning_rate": 8.737151112488814e-06, "loss": 0.1217, "step": 309 }, { "epoch": 0.46407185628742514, "grad_norm": 3.1659504258382594, "learning_rate": 8.729329834263503e-06, "loss": 0.1435, "step": 310 }, { "epoch": 0.4655688622754491, "grad_norm": 3.506401679071631, "learning_rate": 8.721487934672474e-06, "loss": 0.1301, "step": 311 }, { "epoch": 0.46706586826347307, "grad_norm": 3.5425405270483066, "learning_rate": 8.713625457077585e-06, "loss": 0.1589, "step": 312 }, { "epoch": 0.468562874251497, "grad_norm": 3.1837505386279585, "learning_rate": 8.705742444954488e-06, "loss": 0.1597, "step": 313 }, { "epoch": 0.47005988023952094, "grad_norm": 5.340342548175991, "learning_rate": 8.697838941892371e-06, "loss": 0.162, "step": 314 }, { "epoch": 0.47155688622754494, "grad_norm": 4.077340427993968, "learning_rate": 8.68991499159373e-06, "loss": 0.2089, "step": 315 }, { "epoch": 0.47305389221556887, "grad_norm": 3.3136523685502937, "learning_rate": 8.681970637874131e-06, "loss": 0.1837, "step": 316 }, { "epoch": 0.4745508982035928, "grad_norm": 3.0288697083947587, "learning_rate": 8.674005924661952e-06, "loss": 0.1153, "step": 317 }, { "epoch": 0.47604790419161674, "grad_norm": 3.563847646735737, "learning_rate": 8.666020895998154e-06, "loss": 0.1575, "step": 318 }, { "epoch": 0.47754491017964074, "grad_norm": 3.5639505011317443, "learning_rate": 8.658015596036028e-06, "loss": 0.1453, "step": 319 }, { "epoch": 0.47904191616766467, "grad_norm": 3.1728438028919905, "learning_rate": 8.64999006904096e-06, "loss": 0.1511, "step": 320 }, { "epoch": 0.4805389221556886, "grad_norm": 4.306568203222935, "learning_rate": 8.641944359390182e-06, "loss": 0.19, "step": 321 }, { "epoch": 0.4820359281437126, "grad_norm": 4.229427287686191, "learning_rate": 8.63387851157252e-06, "loss": 0.2062, "step": 322 }, { "epoch": 0.48353293413173654, "grad_norm": 3.1016805685180167, "learning_rate": 8.625792570188161e-06, "loss": 0.1508, "step": 323 }, { "epoch": 0.48502994011976047, "grad_norm": 3.2949955959039543, "learning_rate": 8.617686579948396e-06, "loss": 0.1445, "step": 324 }, { "epoch": 0.4865269461077844, "grad_norm": 3.395121176749734, "learning_rate": 8.609560585675379e-06, "loss": 0.1465, "step": 325 }, { "epoch": 0.4880239520958084, "grad_norm": 4.132996726782593, "learning_rate": 8.60141463230187e-06, "loss": 0.196, "step": 326 }, { "epoch": 0.48952095808383234, "grad_norm": 3.707968677532257, "learning_rate": 8.593248764871001e-06, "loss": 0.1916, "step": 327 }, { "epoch": 0.49101796407185627, "grad_norm": 4.154623344343296, "learning_rate": 8.585063028536015e-06, "loss": 0.1559, "step": 328 }, { "epoch": 0.49251497005988026, "grad_norm": 3.616156147280923, "learning_rate": 8.576857468560022e-06, "loss": 0.163, "step": 329 }, { "epoch": 0.4940119760479042, "grad_norm": 3.6186776732033734, "learning_rate": 8.568632130315747e-06, "loss": 0.1314, "step": 330 }, { "epoch": 0.49550898203592814, "grad_norm": 4.6040371758171625, "learning_rate": 8.560387059285274e-06, "loss": 0.1822, "step": 331 }, { "epoch": 0.49700598802395207, "grad_norm": 3.688452927357892, "learning_rate": 8.552122301059807e-06, "loss": 0.1455, "step": 332 }, { "epoch": 0.49850299401197606, "grad_norm": 3.5187948611970836, "learning_rate": 8.543837901339405e-06, "loss": 0.1486, "step": 333 }, { "epoch": 0.5, "grad_norm": 2.8789092500526543, "learning_rate": 8.535533905932739e-06, "loss": 0.1486, "step": 334 }, { "epoch": 0.5014970059880239, "grad_norm": 4.061356059345529, "learning_rate": 8.52721036075683e-06, "loss": 0.1406, "step": 335 }, { "epoch": 0.5029940119760479, "grad_norm": 3.532949260140435, "learning_rate": 8.518867311836808e-06, "loss": 0.1452, "step": 336 }, { "epoch": 0.5044910179640718, "grad_norm": 3.2306585133586276, "learning_rate": 8.510504805305638e-06, "loss": 0.1461, "step": 337 }, { "epoch": 0.5059880239520959, "grad_norm": 3.343618932469027, "learning_rate": 8.502122887403882e-06, "loss": 0.1702, "step": 338 }, { "epoch": 0.5074850299401198, "grad_norm": 3.4285575533194277, "learning_rate": 8.49372160447944e-06, "loss": 0.176, "step": 339 }, { "epoch": 0.5089820359281437, "grad_norm": 3.2204455286610147, "learning_rate": 8.485301002987285e-06, "loss": 0.1329, "step": 340 }, { "epoch": 0.5104790419161677, "grad_norm": 3.071061403435582, "learning_rate": 8.476861129489218e-06, "loss": 0.1316, "step": 341 }, { "epoch": 0.5119760479041916, "grad_norm": 3.7085163254294673, "learning_rate": 8.468402030653598e-06, "loss": 0.1415, "step": 342 }, { "epoch": 0.5134730538922155, "grad_norm": 4.227586277413551, "learning_rate": 8.459923753255097e-06, "loss": 0.1645, "step": 343 }, { "epoch": 0.5149700598802395, "grad_norm": 3.433180555898262, "learning_rate": 8.451426344174433e-06, "loss": 0.1545, "step": 344 }, { "epoch": 0.5164670658682635, "grad_norm": 3.449565791913072, "learning_rate": 8.44290985039811e-06, "loss": 0.1546, "step": 345 }, { "epoch": 0.5179640718562875, "grad_norm": 3.3746224991343263, "learning_rate": 8.434374319018165e-06, "loss": 0.1392, "step": 346 }, { "epoch": 0.5194610778443114, "grad_norm": 3.888569717594192, "learning_rate": 8.425819797231904e-06, "loss": 0.1915, "step": 347 }, { "epoch": 0.5209580838323353, "grad_norm": 3.624214576460007, "learning_rate": 8.417246332341638e-06, "loss": 0.1668, "step": 348 }, { "epoch": 0.5224550898203593, "grad_norm": 3.5287789926041757, "learning_rate": 8.408653971754421e-06, "loss": 0.1708, "step": 349 }, { "epoch": 0.5239520958083832, "grad_norm": 3.1781941229669317, "learning_rate": 8.4000427629818e-06, "loss": 0.1265, "step": 350 }, { "epoch": 0.5254491017964071, "grad_norm": 4.378680412487641, "learning_rate": 8.391412753639533e-06, "loss": 0.1768, "step": 351 }, { "epoch": 0.5269461077844312, "grad_norm": 3.570630799474735, "learning_rate": 8.382763991447344e-06, "loss": 0.1126, "step": 352 }, { "epoch": 0.5284431137724551, "grad_norm": 3.6409298200587465, "learning_rate": 8.374096524228648e-06, "loss": 0.1433, "step": 353 }, { "epoch": 0.5299401197604791, "grad_norm": 4.443126087555856, "learning_rate": 8.365410399910287e-06, "loss": 0.1362, "step": 354 }, { "epoch": 0.531437125748503, "grad_norm": 3.796483531080233, "learning_rate": 8.356705666522274e-06, "loss": 0.1372, "step": 355 }, { "epoch": 0.5329341317365269, "grad_norm": 3.4782283484695005, "learning_rate": 8.347982372197515e-06, "loss": 0.145, "step": 356 }, { "epoch": 0.5344311377245509, "grad_norm": 3.374377538950339, "learning_rate": 8.33924056517155e-06, "loss": 0.1302, "step": 357 }, { "epoch": 0.5359281437125748, "grad_norm": 3.58062866261276, "learning_rate": 8.33048029378229e-06, "loss": 0.1429, "step": 358 }, { "epoch": 0.5374251497005988, "grad_norm": 2.8970747603927336, "learning_rate": 8.321701606469737e-06, "loss": 0.103, "step": 359 }, { "epoch": 0.5389221556886228, "grad_norm": 4.008341408462571, "learning_rate": 8.312904551775731e-06, "loss": 0.1694, "step": 360 }, { "epoch": 0.5404191616766467, "grad_norm": 3.79371441886239, "learning_rate": 8.30408917834367e-06, "loss": 0.1779, "step": 361 }, { "epoch": 0.5419161676646707, "grad_norm": 2.931726994363889, "learning_rate": 8.295255534918249e-06, "loss": 0.1273, "step": 362 }, { "epoch": 0.5434131736526946, "grad_norm": 4.220218626136857, "learning_rate": 8.286403670345184e-06, "loss": 0.1852, "step": 363 }, { "epoch": 0.5449101796407185, "grad_norm": 3.8879543515523673, "learning_rate": 8.277533633570948e-06, "loss": 0.1643, "step": 364 }, { "epoch": 0.5464071856287425, "grad_norm": 4.084000302296261, "learning_rate": 8.268645473642493e-06, "loss": 0.1671, "step": 365 }, { "epoch": 0.5479041916167665, "grad_norm": 3.689060432875227, "learning_rate": 8.25973923970699e-06, "loss": 0.1742, "step": 366 }, { "epoch": 0.5494011976047904, "grad_norm": 3.438350175105849, "learning_rate": 8.250814981011546e-06, "loss": 0.1654, "step": 367 }, { "epoch": 0.5508982035928144, "grad_norm": 3.040194170881382, "learning_rate": 8.241872746902934e-06, "loss": 0.1319, "step": 368 }, { "epoch": 0.5523952095808383, "grad_norm": 3.3369051550597604, "learning_rate": 8.232912586827326e-06, "loss": 0.1195, "step": 369 }, { "epoch": 0.5538922155688623, "grad_norm": 3.9768767244400567, "learning_rate": 8.223934550330015e-06, "loss": 0.1842, "step": 370 }, { "epoch": 0.5553892215568862, "grad_norm": 4.105438348259141, "learning_rate": 8.214938687055141e-06, "loss": 0.1547, "step": 371 }, { "epoch": 0.5568862275449101, "grad_norm": 3.2783944948188695, "learning_rate": 8.20592504674542e-06, "loss": 0.162, "step": 372 }, { "epoch": 0.5583832335329342, "grad_norm": 3.4071888419501377, "learning_rate": 8.196893679241858e-06, "loss": 0.1521, "step": 373 }, { "epoch": 0.5598802395209581, "grad_norm": 3.733791107092681, "learning_rate": 8.187844634483495e-06, "loss": 0.1339, "step": 374 }, { "epoch": 0.561377245508982, "grad_norm": 3.4724618882766323, "learning_rate": 8.178777962507113e-06, "loss": 0.1548, "step": 375 }, { "epoch": 0.562874251497006, "grad_norm": 3.052565582698161, "learning_rate": 8.16969371344696e-06, "loss": 0.1375, "step": 376 }, { "epoch": 0.5643712574850299, "grad_norm": 4.549793913034271, "learning_rate": 8.16059193753448e-06, "loss": 0.2376, "step": 377 }, { "epoch": 0.5658682634730539, "grad_norm": 3.862935238138879, "learning_rate": 8.151472685098037e-06, "loss": 0.1755, "step": 378 }, { "epoch": 0.5673652694610778, "grad_norm": 3.0485915895910263, "learning_rate": 8.142336006562618e-06, "loss": 0.1293, "step": 379 }, { "epoch": 0.5688622754491018, "grad_norm": 2.9029235013132846, "learning_rate": 8.13318195244958e-06, "loss": 0.1476, "step": 380 }, { "epoch": 0.5703592814371258, "grad_norm": 3.2863635677903447, "learning_rate": 8.124010573376358e-06, "loss": 0.1521, "step": 381 }, { "epoch": 0.5718562874251497, "grad_norm": 3.2582557417033047, "learning_rate": 8.114821920056177e-06, "loss": 0.1362, "step": 382 }, { "epoch": 0.5733532934131736, "grad_norm": 4.559622116262038, "learning_rate": 8.105616043297788e-06, "loss": 0.1724, "step": 383 }, { "epoch": 0.5748502994011976, "grad_norm": 2.9933446096193634, "learning_rate": 8.096392994005177e-06, "loss": 0.1361, "step": 384 }, { "epoch": 0.5763473053892215, "grad_norm": 4.647138367172301, "learning_rate": 8.08715282317728e-06, "loss": 0.1654, "step": 385 }, { "epoch": 0.5778443113772455, "grad_norm": 3.456906450387393, "learning_rate": 8.077895581907719e-06, "loss": 0.1308, "step": 386 }, { "epoch": 0.5793413173652695, "grad_norm": 3.5701031708624984, "learning_rate": 8.068621321384495e-06, "loss": 0.1508, "step": 387 }, { "epoch": 0.5808383233532934, "grad_norm": 2.8929892289837005, "learning_rate": 8.059330092889724e-06, "loss": 0.1463, "step": 388 }, { "epoch": 0.5823353293413174, "grad_norm": 3.3660178332145576, "learning_rate": 8.050021947799343e-06, "loss": 0.1373, "step": 389 }, { "epoch": 0.5838323353293413, "grad_norm": 2.8973973876361074, "learning_rate": 8.040696937582833e-06, "loss": 0.1155, "step": 390 }, { "epoch": 0.5853293413173652, "grad_norm": 3.274054401215002, "learning_rate": 8.031355113802928e-06, "loss": 0.1167, "step": 391 }, { "epoch": 0.5868263473053892, "grad_norm": 3.5680359140664177, "learning_rate": 8.021996528115335e-06, "loss": 0.1738, "step": 392 }, { "epoch": 0.5883233532934131, "grad_norm": 3.619212355883971, "learning_rate": 8.012621232268444e-06, "loss": 0.1579, "step": 393 }, { "epoch": 0.5898203592814372, "grad_norm": 3.796558724517196, "learning_rate": 8.003229278103044e-06, "loss": 0.1826, "step": 394 }, { "epoch": 0.5913173652694611, "grad_norm": 3.152710039489962, "learning_rate": 7.993820717552038e-06, "loss": 0.1293, "step": 395 }, { "epoch": 0.592814371257485, "grad_norm": 3.21992598053355, "learning_rate": 7.984395602640153e-06, "loss": 0.1411, "step": 396 }, { "epoch": 0.594311377245509, "grad_norm": 3.090460047627513, "learning_rate": 7.974953985483656e-06, "loss": 0.1311, "step": 397 }, { "epoch": 0.5958083832335329, "grad_norm": 2.7391950237745677, "learning_rate": 7.96549591829006e-06, "loss": 0.1187, "step": 398 }, { "epoch": 0.5973053892215568, "grad_norm": 3.208497150557722, "learning_rate": 7.956021453357838e-06, "loss": 0.1626, "step": 399 }, { "epoch": 0.5988023952095808, "grad_norm": 3.8460449157064316, "learning_rate": 7.946530643076138e-06, "loss": 0.1739, "step": 400 }, { "epoch": 0.6002994011976048, "grad_norm": 3.8126409610062364, "learning_rate": 7.937023539924486e-06, "loss": 0.176, "step": 401 }, { "epoch": 0.6017964071856288, "grad_norm": 3.565386905101392, "learning_rate": 7.927500196472506e-06, "loss": 0.1721, "step": 402 }, { "epoch": 0.6032934131736527, "grad_norm": 2.999757610616761, "learning_rate": 7.917960665379617e-06, "loss": 0.1213, "step": 403 }, { "epoch": 0.6047904191616766, "grad_norm": 3.879691546636035, "learning_rate": 7.908404999394747e-06, "loss": 0.1614, "step": 404 }, { "epoch": 0.6062874251497006, "grad_norm": 3.3261166901336923, "learning_rate": 7.898833251356044e-06, "loss": 0.1354, "step": 405 }, { "epoch": 0.6077844311377245, "grad_norm": 3.357135735077487, "learning_rate": 7.889245474190588e-06, "loss": 0.1593, "step": 406 }, { "epoch": 0.6092814371257484, "grad_norm": 3.845083920558719, "learning_rate": 7.87964172091408e-06, "loss": 0.1571, "step": 407 }, { "epoch": 0.6107784431137725, "grad_norm": 3.421799495757958, "learning_rate": 7.870022044630569e-06, "loss": 0.1464, "step": 408 }, { "epoch": 0.6122754491017964, "grad_norm": 3.556356182620836, "learning_rate": 7.860386498532151e-06, "loss": 0.1819, "step": 409 }, { "epoch": 0.6137724550898204, "grad_norm": 3.567085557814881, "learning_rate": 7.85073513589867e-06, "loss": 0.187, "step": 410 }, { "epoch": 0.6152694610778443, "grad_norm": 3.0040072155535156, "learning_rate": 7.841068010097432e-06, "loss": 0.1247, "step": 411 }, { "epoch": 0.6167664670658682, "grad_norm": 3.791223543914753, "learning_rate": 7.831385174582901e-06, "loss": 0.1779, "step": 412 }, { "epoch": 0.6182634730538922, "grad_norm": 2.8269268330947868, "learning_rate": 7.821686682896412e-06, "loss": 0.1307, "step": 413 }, { "epoch": 0.6197604790419161, "grad_norm": 3.0458926094793197, "learning_rate": 7.81197258866587e-06, "loss": 0.1313, "step": 414 }, { "epoch": 0.6212574850299402, "grad_norm": 3.7550565313514515, "learning_rate": 7.802242945605452e-06, "loss": 0.1779, "step": 415 }, { "epoch": 0.6227544910179641, "grad_norm": 3.2240963908366744, "learning_rate": 7.792497807515317e-06, "loss": 0.1344, "step": 416 }, { "epoch": 0.624251497005988, "grad_norm": 3.3695182322529775, "learning_rate": 7.782737228281299e-06, "loss": 0.1272, "step": 417 }, { "epoch": 0.625748502994012, "grad_norm": 4.235772554802156, "learning_rate": 7.772961261874615e-06, "loss": 0.1727, "step": 418 }, { "epoch": 0.6272455089820359, "grad_norm": 3.976483657682477, "learning_rate": 7.763169962351571e-06, "loss": 0.175, "step": 419 }, { "epoch": 0.6287425149700598, "grad_norm": 2.67068158988133, "learning_rate": 7.75336338385325e-06, "loss": 0.1303, "step": 420 }, { "epoch": 0.6302395209580839, "grad_norm": 3.6085430231303635, "learning_rate": 7.74354158060522e-06, "loss": 0.1834, "step": 421 }, { "epoch": 0.6317365269461078, "grad_norm": 3.3794539424043135, "learning_rate": 7.733704606917248e-06, "loss": 0.1894, "step": 422 }, { "epoch": 0.6332335329341318, "grad_norm": 3.3055772497359834, "learning_rate": 7.723852517182965e-06, "loss": 0.1564, "step": 423 }, { "epoch": 0.6347305389221557, "grad_norm": 3.434714529821165, "learning_rate": 7.713985365879607e-06, "loss": 0.1902, "step": 424 }, { "epoch": 0.6362275449101796, "grad_norm": 3.4659395369169195, "learning_rate": 7.704103207567676e-06, "loss": 0.139, "step": 425 }, { "epoch": 0.6377245508982036, "grad_norm": 2.6100836741449585, "learning_rate": 7.694206096890667e-06, "loss": 0.1113, "step": 426 }, { "epoch": 0.6392215568862275, "grad_norm": 3.230137155437115, "learning_rate": 7.684294088574749e-06, "loss": 0.1514, "step": 427 }, { "epoch": 0.6407185628742516, "grad_norm": 4.240724809716955, "learning_rate": 7.674367237428467e-06, "loss": 0.1464, "step": 428 }, { "epoch": 0.6422155688622755, "grad_norm": 4.346439330321492, "learning_rate": 7.664425598342442e-06, "loss": 0.197, "step": 429 }, { "epoch": 0.6437125748502994, "grad_norm": 3.353940001200498, "learning_rate": 7.654469226289068e-06, "loss": 0.1544, "step": 430 }, { "epoch": 0.6452095808383234, "grad_norm": 3.2425238295339938, "learning_rate": 7.644498176322197e-06, "loss": 0.1253, "step": 431 }, { "epoch": 0.6467065868263473, "grad_norm": 2.848400725096475, "learning_rate": 7.63451250357685e-06, "loss": 0.1174, "step": 432 }, { "epoch": 0.6482035928143712, "grad_norm": 2.8070040149540367, "learning_rate": 7.6245122632689e-06, "loss": 0.1183, "step": 433 }, { "epoch": 0.6497005988023952, "grad_norm": 3.279892578618957, "learning_rate": 7.614497510694774e-06, "loss": 0.134, "step": 434 }, { "epoch": 0.6511976047904192, "grad_norm": 3.6995447910209585, "learning_rate": 7.6044683012311425e-06, "loss": 0.1439, "step": 435 }, { "epoch": 0.6526946107784432, "grad_norm": 2.897241387513523, "learning_rate": 7.5944246903346204e-06, "loss": 0.1384, "step": 436 }, { "epoch": 0.6541916167664671, "grad_norm": 2.9434313268408885, "learning_rate": 7.584366733541451e-06, "loss": 0.1263, "step": 437 }, { "epoch": 0.655688622754491, "grad_norm": 3.8309985748207236, "learning_rate": 7.574294486467204e-06, "loss": 0.1981, "step": 438 }, { "epoch": 0.657185628742515, "grad_norm": 4.084998006713069, "learning_rate": 7.564208004806467e-06, "loss": 0.1356, "step": 439 }, { "epoch": 0.6586826347305389, "grad_norm": 2.756163014905463, "learning_rate": 7.55410734433254e-06, "loss": 0.109, "step": 440 }, { "epoch": 0.6601796407185628, "grad_norm": 3.4699142689431626, "learning_rate": 7.543992560897124e-06, "loss": 0.1456, "step": 441 }, { "epoch": 0.6616766467065869, "grad_norm": 2.7682894367910373, "learning_rate": 7.533863710430011e-06, "loss": 0.1309, "step": 442 }, { "epoch": 0.6631736526946108, "grad_norm": 3.708385795676214, "learning_rate": 7.523720848938782e-06, "loss": 0.1443, "step": 443 }, { "epoch": 0.6646706586826348, "grad_norm": 2.768942027464495, "learning_rate": 7.513564032508484e-06, "loss": 0.1052, "step": 444 }, { "epoch": 0.6661676646706587, "grad_norm": 4.480883686712281, "learning_rate": 7.503393317301337e-06, "loss": 0.1536, "step": 445 }, { "epoch": 0.6676646706586826, "grad_norm": 3.686945473981944, "learning_rate": 7.493208759556406e-06, "loss": 0.1571, "step": 446 }, { "epoch": 0.6691616766467066, "grad_norm": 3.041828499610161, "learning_rate": 7.483010415589306e-06, "loss": 0.1579, "step": 447 }, { "epoch": 0.6706586826347305, "grad_norm": 3.8656926175419226, "learning_rate": 7.472798341791877e-06, "loss": 0.1421, "step": 448 }, { "epoch": 0.6721556886227545, "grad_norm": 4.713375853947614, "learning_rate": 7.462572594631881e-06, "loss": 0.2194, "step": 449 }, { "epoch": 0.6736526946107785, "grad_norm": 3.568134962985696, "learning_rate": 7.452333230652688e-06, "loss": 0.1686, "step": 450 }, { "epoch": 0.6751497005988024, "grad_norm": 2.977682174346779, "learning_rate": 7.442080306472962e-06, "loss": 0.134, "step": 451 }, { "epoch": 0.6766467065868264, "grad_norm": 3.719058522427309, "learning_rate": 7.431813878786343e-06, "loss": 0.187, "step": 452 }, { "epoch": 0.6781437125748503, "grad_norm": 3.8278236107849115, "learning_rate": 7.421534004361149e-06, "loss": 0.1762, "step": 453 }, { "epoch": 0.6796407185628742, "grad_norm": 3.3057851751542295, "learning_rate": 7.4112407400400395e-06, "loss": 0.1833, "step": 454 }, { "epoch": 0.6811377245508982, "grad_norm": 3.441061757734476, "learning_rate": 7.400934142739725e-06, "loss": 0.159, "step": 455 }, { "epoch": 0.6826347305389222, "grad_norm": 3.2008839872173844, "learning_rate": 7.390614269450633e-06, "loss": 0.1231, "step": 456 }, { "epoch": 0.6841317365269461, "grad_norm": 3.6080480830004578, "learning_rate": 7.380281177236608e-06, "loss": 0.1765, "step": 457 }, { "epoch": 0.6856287425149701, "grad_norm": 3.5363954933899393, "learning_rate": 7.369934923234577e-06, "loss": 0.1819, "step": 458 }, { "epoch": 0.687125748502994, "grad_norm": 2.9474556989329783, "learning_rate": 7.359575564654259e-06, "loss": 0.1609, "step": 459 }, { "epoch": 0.688622754491018, "grad_norm": 3.2388272282400656, "learning_rate": 7.349203158777826e-06, "loss": 0.1281, "step": 460 }, { "epoch": 0.6901197604790419, "grad_norm": 3.4499535466111495, "learning_rate": 7.338817762959596e-06, "loss": 0.1422, "step": 461 }, { "epoch": 0.6916167664670658, "grad_norm": 3.18388039773228, "learning_rate": 7.32841943462572e-06, "loss": 0.1543, "step": 462 }, { "epoch": 0.6931137724550899, "grad_norm": 3.29610764595099, "learning_rate": 7.318008231273851e-06, "loss": 0.1389, "step": 463 }, { "epoch": 0.6946107784431138, "grad_norm": 3.5995736686951147, "learning_rate": 7.3075842104728445e-06, "loss": 0.1395, "step": 464 }, { "epoch": 0.6961077844311377, "grad_norm": 3.5597809255592563, "learning_rate": 7.2971474298624235e-06, "loss": 0.1677, "step": 465 }, { "epoch": 0.6976047904191617, "grad_norm": 3.19232508962934, "learning_rate": 7.286697947152868e-06, "loss": 0.1205, "step": 466 }, { "epoch": 0.6991017964071856, "grad_norm": 3.2886795884325895, "learning_rate": 7.276235820124694e-06, "loss": 0.1247, "step": 467 }, { "epoch": 0.7005988023952096, "grad_norm": 3.5288876288662836, "learning_rate": 7.265761106628338e-06, "loss": 0.172, "step": 468 }, { "epoch": 0.7020958083832335, "grad_norm": 2.7003885028493166, "learning_rate": 7.255273864583825e-06, "loss": 0.1093, "step": 469 }, { "epoch": 0.7035928143712575, "grad_norm": 4.277192644371091, "learning_rate": 7.244774151980466e-06, "loss": 0.2287, "step": 470 }, { "epoch": 0.7050898203592815, "grad_norm": 3.6606462677435454, "learning_rate": 7.234262026876524e-06, "loss": 0.1721, "step": 471 }, { "epoch": 0.7065868263473054, "grad_norm": 4.4089300763256105, "learning_rate": 7.223737547398898e-06, "loss": 0.218, "step": 472 }, { "epoch": 0.7080838323353293, "grad_norm": 4.6226384091732236, "learning_rate": 7.213200771742799e-06, "loss": 0.1871, "step": 473 }, { "epoch": 0.7095808383233533, "grad_norm": 4.192395283120595, "learning_rate": 7.20265175817143e-06, "loss": 0.2011, "step": 474 }, { "epoch": 0.7110778443113772, "grad_norm": 3.0719348148834205, "learning_rate": 7.192090565015668e-06, "loss": 0.1476, "step": 475 }, { "epoch": 0.7125748502994012, "grad_norm": 2.9204055234906563, "learning_rate": 7.181517250673729e-06, "loss": 0.1611, "step": 476 }, { "epoch": 0.7140718562874252, "grad_norm": 3.316840128708357, "learning_rate": 7.17093187361086e-06, "loss": 0.1495, "step": 477 }, { "epoch": 0.7155688622754491, "grad_norm": 3.6310221646583636, "learning_rate": 7.1603344923590065e-06, "loss": 0.1517, "step": 478 }, { "epoch": 0.7170658682634731, "grad_norm": 3.6295927335998788, "learning_rate": 7.149725165516494e-06, "loss": 0.1608, "step": 479 }, { "epoch": 0.718562874251497, "grad_norm": 3.4278440397189835, "learning_rate": 7.139103951747694e-06, "loss": 0.1292, "step": 480 }, { "epoch": 0.7200598802395209, "grad_norm": 3.6717175078598268, "learning_rate": 7.128470909782717e-06, "loss": 0.1474, "step": 481 }, { "epoch": 0.7215568862275449, "grad_norm": 2.975981338985663, "learning_rate": 7.1178260984170675e-06, "loss": 0.1173, "step": 482 }, { "epoch": 0.7230538922155688, "grad_norm": 3.133559227854225, "learning_rate": 7.107169576511338e-06, "loss": 0.1346, "step": 483 }, { "epoch": 0.7245508982035929, "grad_norm": 3.5757310700854132, "learning_rate": 7.0965014029908654e-06, "loss": 0.1694, "step": 484 }, { "epoch": 0.7260479041916168, "grad_norm": 3.150805205436883, "learning_rate": 7.0858216368454246e-06, "loss": 0.1256, "step": 485 }, { "epoch": 0.7275449101796407, "grad_norm": 3.870576287573386, "learning_rate": 7.075130337128883e-06, "loss": 0.1664, "step": 486 }, { "epoch": 0.7290419161676647, "grad_norm": 3.542173117246519, "learning_rate": 7.06442756295889e-06, "loss": 0.1781, "step": 487 }, { "epoch": 0.7305389221556886, "grad_norm": 3.256238197488695, "learning_rate": 7.053713373516538e-06, "loss": 0.1443, "step": 488 }, { "epoch": 0.7320359281437125, "grad_norm": 3.9069496231229466, "learning_rate": 7.042987828046041e-06, "loss": 0.1849, "step": 489 }, { "epoch": 0.7335329341317365, "grad_norm": 3.530390387218226, "learning_rate": 7.03225098585441e-06, "loss": 0.1727, "step": 490 }, { "epoch": 0.7350299401197605, "grad_norm": 2.951624786075098, "learning_rate": 7.021502906311114e-06, "loss": 0.1498, "step": 491 }, { "epoch": 0.7365269461077845, "grad_norm": 3.3880527850162445, "learning_rate": 7.0107436488477694e-06, "loss": 0.1661, "step": 492 }, { "epoch": 0.7380239520958084, "grad_norm": 3.3111450641081746, "learning_rate": 6.999973272957793e-06, "loss": 0.1516, "step": 493 }, { "epoch": 0.7395209580838323, "grad_norm": 3.838549855051216, "learning_rate": 6.989191838196083e-06, "loss": 0.1725, "step": 494 }, { "epoch": 0.7410179640718563, "grad_norm": 3.9683196973203603, "learning_rate": 6.978399404178688e-06, "loss": 0.1782, "step": 495 }, { "epoch": 0.7425149700598802, "grad_norm": 3.4577426016691337, "learning_rate": 6.9675960305824785e-06, "loss": 0.1131, "step": 496 }, { "epoch": 0.7440119760479041, "grad_norm": 4.085441148596399, "learning_rate": 6.956781777144813e-06, "loss": 0.1236, "step": 497 }, { "epoch": 0.7455089820359282, "grad_norm": 3.378141952600603, "learning_rate": 6.945956703663212e-06, "loss": 0.1592, "step": 498 }, { "epoch": 0.7470059880239521, "grad_norm": 3.7804823306480677, "learning_rate": 6.935120869995023e-06, "loss": 0.1668, "step": 499 }, { "epoch": 0.7485029940119761, "grad_norm": 3.4892685966583867, "learning_rate": 6.9242743360570985e-06, "loss": 0.1728, "step": 500 }, { "epoch": 0.7485029940119761, "eval_loss": 0.15714237093925476, "eval_runtime": 1.8445, "eval_samples_per_second": 29.276, "eval_steps_per_second": 7.59, "step": 500 }, { "epoch": 0.75, "grad_norm": 3.5111428294788354, "learning_rate": 6.913417161825449e-06, "loss": 0.1517, "step": 501 }, { "epoch": 0.7514970059880239, "grad_norm": 3.6095693058438365, "learning_rate": 6.9025494073349284e-06, "loss": 0.1889, "step": 502 }, { "epoch": 0.7529940119760479, "grad_norm": 3.6580518445888996, "learning_rate": 6.891671132678892e-06, "loss": 0.2159, "step": 503 }, { "epoch": 0.7544910179640718, "grad_norm": 3.689862115332128, "learning_rate": 6.880782398008862e-06, "loss": 0.1592, "step": 504 }, { "epoch": 0.7559880239520959, "grad_norm": 3.8320171946918076, "learning_rate": 6.869883263534205e-06, "loss": 0.1735, "step": 505 }, { "epoch": 0.7574850299401198, "grad_norm": 3.3745031449640677, "learning_rate": 6.858973789521792e-06, "loss": 0.1681, "step": 506 }, { "epoch": 0.7589820359281437, "grad_norm": 3.026257677043077, "learning_rate": 6.8480540362956664e-06, "loss": 0.1299, "step": 507 }, { "epoch": 0.7604790419161677, "grad_norm": 3.1246114771326834, "learning_rate": 6.837124064236709e-06, "loss": 0.158, "step": 508 }, { "epoch": 0.7619760479041916, "grad_norm": 2.8006831202072227, "learning_rate": 6.826183933782307e-06, "loss": 0.1025, "step": 509 }, { "epoch": 0.7634730538922155, "grad_norm": 2.8993998437822883, "learning_rate": 6.815233705426019e-06, "loss": 0.144, "step": 510 }, { "epoch": 0.7649700598802395, "grad_norm": 2.9305781661910295, "learning_rate": 6.8042734397172405e-06, "loss": 0.1027, "step": 511 }, { "epoch": 0.7664670658682635, "grad_norm": 3.715349269993241, "learning_rate": 6.7933031972608644e-06, "loss": 0.1845, "step": 512 }, { "epoch": 0.7679640718562875, "grad_norm": 2.8788484408777415, "learning_rate": 6.782323038716957e-06, "loss": 0.1392, "step": 513 }, { "epoch": 0.7694610778443114, "grad_norm": 4.1168412476610845, "learning_rate": 6.771333024800411e-06, "loss": 0.1626, "step": 514 }, { "epoch": 0.7709580838323353, "grad_norm": 3.565856118517076, "learning_rate": 6.760333216280617e-06, "loss": 0.1418, "step": 515 }, { "epoch": 0.7724550898203593, "grad_norm": 3.835950805115863, "learning_rate": 6.74932367398112e-06, "loss": 0.1823, "step": 516 }, { "epoch": 0.7739520958083832, "grad_norm": 3.9331168522787516, "learning_rate": 6.738304458779293e-06, "loss": 0.1693, "step": 517 }, { "epoch": 0.7754491017964071, "grad_norm": 4.17657936631769, "learning_rate": 6.727275631605996e-06, "loss": 0.1689, "step": 518 }, { "epoch": 0.7769461077844312, "grad_norm": 2.984792357517865, "learning_rate": 6.716237253445235e-06, "loss": 0.1395, "step": 519 }, { "epoch": 0.7784431137724551, "grad_norm": 2.952100504161887, "learning_rate": 6.70518938533383e-06, "loss": 0.1255, "step": 520 }, { "epoch": 0.7799401197604791, "grad_norm": 3.6102126808582224, "learning_rate": 6.694132088361075e-06, "loss": 0.1377, "step": 521 }, { "epoch": 0.781437125748503, "grad_norm": 3.3866360168094958, "learning_rate": 6.683065423668403e-06, "loss": 0.1134, "step": 522 }, { "epoch": 0.7829341317365269, "grad_norm": 2.78625814393132, "learning_rate": 6.671989452449044e-06, "loss": 0.1347, "step": 523 }, { "epoch": 0.7844311377245509, "grad_norm": 3.8630809797799035, "learning_rate": 6.660904235947687e-06, "loss": 0.1505, "step": 524 }, { "epoch": 0.7859281437125748, "grad_norm": 4.220407699866795, "learning_rate": 6.649809835460147e-06, "loss": 0.1697, "step": 525 }, { "epoch": 0.7874251497005988, "grad_norm": 3.832023476589111, "learning_rate": 6.638706312333018e-06, "loss": 0.1528, "step": 526 }, { "epoch": 0.7889221556886228, "grad_norm": 3.3946083876349813, "learning_rate": 6.627593727963342e-06, "loss": 0.1457, "step": 527 }, { "epoch": 0.7904191616766467, "grad_norm": 3.3027876268491303, "learning_rate": 6.61647214379826e-06, "loss": 0.1291, "step": 528 }, { "epoch": 0.7919161676646707, "grad_norm": 3.5690436336800535, "learning_rate": 6.605341621334683e-06, "loss": 0.1615, "step": 529 }, { "epoch": 0.7934131736526946, "grad_norm": 3.4466412616578292, "learning_rate": 6.594202222118941e-06, "loss": 0.1521, "step": 530 }, { "epoch": 0.7949101796407185, "grad_norm": 3.68000569642346, "learning_rate": 6.583054007746452e-06, "loss": 0.1781, "step": 531 }, { "epoch": 0.7964071856287425, "grad_norm": 3.5071180555751047, "learning_rate": 6.571897039861377e-06, "loss": 0.1592, "step": 532 }, { "epoch": 0.7979041916167665, "grad_norm": 5.071263581493598, "learning_rate": 6.5607313801562755e-06, "loss": 0.2103, "step": 533 }, { "epoch": 0.7994011976047904, "grad_norm": 3.6534059518137774, "learning_rate": 6.549557090371775e-06, "loss": 0.1647, "step": 534 }, { "epoch": 0.8008982035928144, "grad_norm": 3.400406069837567, "learning_rate": 6.538374232296221e-06, "loss": 0.1452, "step": 535 }, { "epoch": 0.8023952095808383, "grad_norm": 3.5348588106853347, "learning_rate": 6.527182867765333e-06, "loss": 0.1817, "step": 536 }, { "epoch": 0.8038922155688623, "grad_norm": 3.5797377763076295, "learning_rate": 6.5159830586618725e-06, "loss": 0.1764, "step": 537 }, { "epoch": 0.8053892215568862, "grad_norm": 3.2427572782719043, "learning_rate": 6.504774866915291e-06, "loss": 0.1677, "step": 538 }, { "epoch": 0.8068862275449101, "grad_norm": 3.5901185890913423, "learning_rate": 6.493558354501397e-06, "loss": 0.1238, "step": 539 }, { "epoch": 0.8083832335329342, "grad_norm": 3.5930087940703768, "learning_rate": 6.482333583442002e-06, "loss": 0.1643, "step": 540 }, { "epoch": 0.8098802395209581, "grad_norm": 3.5063245071724305, "learning_rate": 6.471100615804592e-06, "loss": 0.1649, "step": 541 }, { "epoch": 0.811377245508982, "grad_norm": 3.3586985040438284, "learning_rate": 6.459859513701967e-06, "loss": 0.1456, "step": 542 }, { "epoch": 0.812874251497006, "grad_norm": 2.7179366283456976, "learning_rate": 6.448610339291913e-06, "loss": 0.1132, "step": 543 }, { "epoch": 0.8143712574850299, "grad_norm": 3.8162531856077933, "learning_rate": 6.437353154776848e-06, "loss": 0.2001, "step": 544 }, { "epoch": 0.8158682634730539, "grad_norm": 3.3619399746019236, "learning_rate": 6.426088022403485e-06, "loss": 0.1749, "step": 545 }, { "epoch": 0.8173652694610778, "grad_norm": 3.232534113649391, "learning_rate": 6.414815004462483e-06, "loss": 0.1532, "step": 546 }, { "epoch": 0.8188622754491018, "grad_norm": 2.890559415588995, "learning_rate": 6.403534163288106e-06, "loss": 0.1262, "step": 547 }, { "epoch": 0.8203592814371258, "grad_norm": 4.2126271176112295, "learning_rate": 6.3922455612578715e-06, "loss": 0.2274, "step": 548 }, { "epoch": 0.8218562874251497, "grad_norm": 3.072094097518995, "learning_rate": 6.380949260792218e-06, "loss": 0.1465, "step": 549 }, { "epoch": 0.8233532934131736, "grad_norm": 2.955683791717412, "learning_rate": 6.369645324354149e-06, "loss": 0.1214, "step": 550 }, { "epoch": 0.8248502994011976, "grad_norm": 3.300084597374136, "learning_rate": 6.35833381444889e-06, "loss": 0.147, "step": 551 }, { "epoch": 0.8263473053892215, "grad_norm": 3.1879973513504414, "learning_rate": 6.3470147936235485e-06, "loss": 0.133, "step": 552 }, { "epoch": 0.8278443113772455, "grad_norm": 3.029781181368221, "learning_rate": 6.3356883244667556e-06, "loss": 0.1381, "step": 553 }, { "epoch": 0.8293413173652695, "grad_norm": 3.600364421406585, "learning_rate": 6.3243544696083355e-06, "loss": 0.1206, "step": 554 }, { "epoch": 0.8308383233532934, "grad_norm": 4.065110861923993, "learning_rate": 6.313013291718951e-06, "loss": 0.1775, "step": 555 }, { "epoch": 0.8323353293413174, "grad_norm": 3.864818335238835, "learning_rate": 6.301664853509755e-06, "loss": 0.1311, "step": 556 }, { "epoch": 0.8338323353293413, "grad_norm": 3.313899554276063, "learning_rate": 6.290309217732046e-06, "loss": 0.1412, "step": 557 }, { "epoch": 0.8353293413173652, "grad_norm": 3.3710397333117905, "learning_rate": 6.278946447176924e-06, "loss": 0.1618, "step": 558 }, { "epoch": 0.8368263473053892, "grad_norm": 4.2207175681340985, "learning_rate": 6.267576604674939e-06, "loss": 0.2097, "step": 559 }, { "epoch": 0.8383233532934131, "grad_norm": 3.3374937881349767, "learning_rate": 6.256199753095745e-06, "loss": 0.1506, "step": 560 }, { "epoch": 0.8398203592814372, "grad_norm": 4.333860484181421, "learning_rate": 6.2448159553477564e-06, "loss": 0.1755, "step": 561 }, { "epoch": 0.8413173652694611, "grad_norm": 3.271029959909354, "learning_rate": 6.233425274377793e-06, "loss": 0.1419, "step": 562 }, { "epoch": 0.842814371257485, "grad_norm": 3.3679464204802945, "learning_rate": 6.222027773170737e-06, "loss": 0.1415, "step": 563 }, { "epoch": 0.844311377245509, "grad_norm": 3.6018471298248933, "learning_rate": 6.21062351474918e-06, "loss": 0.1435, "step": 564 }, { "epoch": 0.8458083832335329, "grad_norm": 3.649944667716659, "learning_rate": 6.199212562173085e-06, "loss": 0.1414, "step": 565 }, { "epoch": 0.8473053892215568, "grad_norm": 4.282885318402707, "learning_rate": 6.18779497853942e-06, "loss": 0.1463, "step": 566 }, { "epoch": 0.8488023952095808, "grad_norm": 3.758085366084196, "learning_rate": 6.176370826981829e-06, "loss": 0.1704, "step": 567 }, { "epoch": 0.8502994011976048, "grad_norm": 3.736732585335067, "learning_rate": 6.164940170670266e-06, "loss": 0.1424, "step": 568 }, { "epoch": 0.8517964071856288, "grad_norm": 2.72607677890374, "learning_rate": 6.153503072810663e-06, "loss": 0.1048, "step": 569 }, { "epoch": 0.8532934131736527, "grad_norm": 2.9606739884987014, "learning_rate": 6.142059596644557e-06, "loss": 0.1118, "step": 570 }, { "epoch": 0.8547904191616766, "grad_norm": 3.0561868674493797, "learning_rate": 6.1306098054487675e-06, "loss": 0.111, "step": 571 }, { "epoch": 0.8562874251497006, "grad_norm": 3.774495929509568, "learning_rate": 6.11915376253502e-06, "loss": 0.1903, "step": 572 }, { "epoch": 0.8577844311377245, "grad_norm": 3.2925526551417805, "learning_rate": 6.107691531249623e-06, "loss": 0.1222, "step": 573 }, { "epoch": 0.8592814371257484, "grad_norm": 4.298566969622808, "learning_rate": 6.096223174973091e-06, "loss": 0.1633, "step": 574 }, { "epoch": 0.8607784431137725, "grad_norm": 3.5629127710546666, "learning_rate": 6.084748757119811e-06, "loss": 0.1453, "step": 575 }, { "epoch": 0.8622754491017964, "grad_norm": 3.7602657229603538, "learning_rate": 6.073268341137694e-06, "loss": 0.1594, "step": 576 }, { "epoch": 0.8637724550898204, "grad_norm": 3.630189462462225, "learning_rate": 6.0617819905078075e-06, "loss": 0.1721, "step": 577 }, { "epoch": 0.8652694610778443, "grad_norm": 3.5936192194199306, "learning_rate": 6.050289768744042e-06, "loss": 0.1627, "step": 578 }, { "epoch": 0.8667664670658682, "grad_norm": 3.686989095451646, "learning_rate": 6.038791739392748e-06, "loss": 0.1244, "step": 579 }, { "epoch": 0.8682634730538922, "grad_norm": 4.1842222493842876, "learning_rate": 6.0272879660323936e-06, "loss": 0.1685, "step": 580 }, { "epoch": 0.8697604790419161, "grad_norm": 3.326748766056256, "learning_rate": 6.015778512273204e-06, "loss": 0.1442, "step": 581 }, { "epoch": 0.8712574850299402, "grad_norm": 3.6193235239183843, "learning_rate": 6.004263441756815e-06, "loss": 0.1248, "step": 582 }, { "epoch": 0.8727544910179641, "grad_norm": 2.9705187910005226, "learning_rate": 5.992742818155923e-06, "loss": 0.1507, "step": 583 }, { "epoch": 0.874251497005988, "grad_norm": 3.9229172875134757, "learning_rate": 5.98121670517393e-06, "loss": 0.171, "step": 584 }, { "epoch": 0.875748502994012, "grad_norm": 3.2468264571749, "learning_rate": 5.9696851665445875e-06, "loss": 0.1672, "step": 585 }, { "epoch": 0.8772455089820359, "grad_norm": 3.537365862694818, "learning_rate": 5.958148266031654e-06, "loss": 0.1781, "step": 586 }, { "epoch": 0.8787425149700598, "grad_norm": 3.155053646794424, "learning_rate": 5.94660606742853e-06, "loss": 0.1585, "step": 587 }, { "epoch": 0.8802395209580839, "grad_norm": 3.1480285392505425, "learning_rate": 5.935058634557917e-06, "loss": 0.1295, "step": 588 }, { "epoch": 0.8817365269461078, "grad_norm": 3.7244308099275156, "learning_rate": 5.923506031271457e-06, "loss": 0.1781, "step": 589 }, { "epoch": 0.8832335329341318, "grad_norm": 3.0101816555466185, "learning_rate": 5.911948321449384e-06, "loss": 0.1092, "step": 590 }, { "epoch": 0.8847305389221557, "grad_norm": 4.272624292787951, "learning_rate": 5.900385569000167e-06, "loss": 0.2116, "step": 591 }, { "epoch": 0.8862275449101796, "grad_norm": 3.409355186199914, "learning_rate": 5.8888178378601565e-06, "loss": 0.1886, "step": 592 }, { "epoch": 0.8877245508982036, "grad_norm": 3.4179578696337067, "learning_rate": 5.87724519199324e-06, "loss": 0.1455, "step": 593 }, { "epoch": 0.8892215568862275, "grad_norm": 3.3911413772807317, "learning_rate": 5.865667695390468e-06, "loss": 0.1768, "step": 594 }, { "epoch": 0.8907185628742516, "grad_norm": 4.340211725387971, "learning_rate": 5.8540854120697265e-06, "loss": 0.2309, "step": 595 }, { "epoch": 0.8922155688622755, "grad_norm": 2.818193774451659, "learning_rate": 5.842498406075363e-06, "loss": 0.1426, "step": 596 }, { "epoch": 0.8937125748502994, "grad_norm": 3.311338773610407, "learning_rate": 5.830906741477841e-06, "loss": 0.1434, "step": 597 }, { "epoch": 0.8952095808383234, "grad_norm": 3.534865393168402, "learning_rate": 5.819310482373381e-06, "loss": 0.138, "step": 598 }, { "epoch": 0.8967065868263473, "grad_norm": 3.310023452199738, "learning_rate": 5.8077096928836115e-06, "loss": 0.1612, "step": 599 }, { "epoch": 0.8982035928143712, "grad_norm": 2.823498551338264, "learning_rate": 5.796104437155213e-06, "loss": 0.1116, "step": 600 }, { "epoch": 0.8997005988023952, "grad_norm": 4.12095301705188, "learning_rate": 5.784494779359559e-06, "loss": 0.1997, "step": 601 }, { "epoch": 0.9011976047904192, "grad_norm": 3.593430772797771, "learning_rate": 5.772880783692363e-06, "loss": 0.1377, "step": 602 }, { "epoch": 0.9026946107784432, "grad_norm": 3.944919492879108, "learning_rate": 5.761262514373333e-06, "loss": 0.1869, "step": 603 }, { "epoch": 0.9041916167664671, "grad_norm": 3.58390263957567, "learning_rate": 5.749640035645798e-06, "loss": 0.155, "step": 604 }, { "epoch": 0.905688622754491, "grad_norm": 3.192359332581195, "learning_rate": 5.73801341177637e-06, "loss": 0.1234, "step": 605 }, { "epoch": 0.907185628742515, "grad_norm": 2.5959337089853114, "learning_rate": 5.726382707054578e-06, "loss": 0.1192, "step": 606 }, { "epoch": 0.9086826347305389, "grad_norm": 3.168409212759359, "learning_rate": 5.714747985792516e-06, "loss": 0.1581, "step": 607 }, { "epoch": 0.9101796407185628, "grad_norm": 2.4748301470123555, "learning_rate": 5.703109312324493e-06, "loss": 0.1137, "step": 608 }, { "epoch": 0.9116766467065869, "grad_norm": 4.641070394995441, "learning_rate": 5.691466751006663e-06, "loss": 0.2189, "step": 609 }, { "epoch": 0.9131736526946108, "grad_norm": 3.96913072739373, "learning_rate": 5.679820366216684e-06, "loss": 0.1731, "step": 610 }, { "epoch": 0.9146706586826348, "grad_norm": 3.289549070127143, "learning_rate": 5.668170222353355e-06, "loss": 0.1545, "step": 611 }, { "epoch": 0.9161676646706587, "grad_norm": 3.5752658780627495, "learning_rate": 5.656516383836263e-06, "loss": 0.1618, "step": 612 }, { "epoch": 0.9176646706586826, "grad_norm": 4.050308824260572, "learning_rate": 5.644858915105414e-06, "loss": 0.1673, "step": 613 }, { "epoch": 0.9191616766467066, "grad_norm": 3.3219138106798765, "learning_rate": 5.6331978806209044e-06, "loss": 0.1258, "step": 614 }, { "epoch": 0.9206586826347305, "grad_norm": 2.9083214613486046, "learning_rate": 5.621533344862531e-06, "loss": 0.1156, "step": 615 }, { "epoch": 0.9221556886227545, "grad_norm": 3.2185383132999505, "learning_rate": 5.609865372329461e-06, "loss": 0.1155, "step": 616 }, { "epoch": 0.9236526946107785, "grad_norm": 3.0282135911428623, "learning_rate": 5.598194027539862e-06, "loss": 0.1449, "step": 617 }, { "epoch": 0.9251497005988024, "grad_norm": 3.105408500464342, "learning_rate": 5.586519375030549e-06, "loss": 0.1112, "step": 618 }, { "epoch": 0.9266467065868264, "grad_norm": 2.845382263195855, "learning_rate": 5.574841479356628e-06, "loss": 0.1115, "step": 619 }, { "epoch": 0.9281437125748503, "grad_norm": 3.183276882620881, "learning_rate": 5.5631604050911354e-06, "loss": 0.1596, "step": 620 }, { "epoch": 0.9296407185628742, "grad_norm": 3.2450242944133407, "learning_rate": 5.551476216824687e-06, "loss": 0.167, "step": 621 }, { "epoch": 0.9311377245508982, "grad_norm": 3.100561613780013, "learning_rate": 5.539788979165115e-06, "loss": 0.1464, "step": 622 }, { "epoch": 0.9326347305389222, "grad_norm": 3.4195727001618166, "learning_rate": 5.528098756737113e-06, "loss": 0.1685, "step": 623 }, { "epoch": 0.9341317365269461, "grad_norm": 3.5126859641239863, "learning_rate": 5.516405614181883e-06, "loss": 0.1499, "step": 624 }, { "epoch": 0.9356287425149701, "grad_norm": 3.9509566988108724, "learning_rate": 5.504709616156768e-06, "loss": 0.1759, "step": 625 }, { "epoch": 0.937125748502994, "grad_norm": 2.717769112202308, "learning_rate": 5.4930108273349034e-06, "loss": 0.1166, "step": 626 }, { "epoch": 0.938622754491018, "grad_norm": 4.62876529743201, "learning_rate": 5.481309312404859e-06, "loss": 0.139, "step": 627 }, { "epoch": 0.9401197604790419, "grad_norm": 3.007903563584237, "learning_rate": 5.4696051360702725e-06, "loss": 0.1081, "step": 628 }, { "epoch": 0.9416167664670658, "grad_norm": 3.1549436914712006, "learning_rate": 5.457898363049504e-06, "loss": 0.1363, "step": 629 }, { "epoch": 0.9431137724550899, "grad_norm": 3.749487986507716, "learning_rate": 5.446189058075265e-06, "loss": 0.1417, "step": 630 }, { "epoch": 0.9446107784431138, "grad_norm": 3.430783870614881, "learning_rate": 5.434477285894277e-06, "loss": 0.1206, "step": 631 }, { "epoch": 0.9461077844311377, "grad_norm": 3.5938904490010124, "learning_rate": 5.4227631112668955e-06, "loss": 0.1318, "step": 632 }, { "epoch": 0.9476047904191617, "grad_norm": 3.741017509781444, "learning_rate": 5.411046598966764e-06, "loss": 0.131, "step": 633 }, { "epoch": 0.9491017964071856, "grad_norm": 3.1371257582844962, "learning_rate": 5.39932781378045e-06, "loss": 0.1258, "step": 634 }, { "epoch": 0.9505988023952096, "grad_norm": 3.5158018230481547, "learning_rate": 5.387606820507095e-06, "loss": 0.1261, "step": 635 }, { "epoch": 0.9520958083832335, "grad_norm": 3.56679368090538, "learning_rate": 5.375883683958041e-06, "loss": 0.1408, "step": 636 }, { "epoch": 0.9535928143712575, "grad_norm": 3.6862139230946647, "learning_rate": 5.364158468956487e-06, "loss": 0.1429, "step": 637 }, { "epoch": 0.9550898203592815, "grad_norm": 3.7651603162297578, "learning_rate": 5.3524312403371255e-06, "loss": 0.134, "step": 638 }, { "epoch": 0.9565868263473054, "grad_norm": 3.2521608354670666, "learning_rate": 5.3407020629457805e-06, "loss": 0.1243, "step": 639 }, { "epoch": 0.9580838323353293, "grad_norm": 3.4232032367980634, "learning_rate": 5.328971001639054e-06, "loss": 0.1246, "step": 640 }, { "epoch": 0.9595808383233533, "grad_norm": 3.0825287865749855, "learning_rate": 5.317238121283962e-06, "loss": 0.1404, "step": 641 }, { "epoch": 0.9610778443113772, "grad_norm": 2.928895989857095, "learning_rate": 5.3055034867575825e-06, "loss": 0.1134, "step": 642 }, { "epoch": 0.9625748502994012, "grad_norm": 3.3884245632777654, "learning_rate": 5.293767162946692e-06, "loss": 0.129, "step": 643 }, { "epoch": 0.9640718562874252, "grad_norm": 3.2777563752409398, "learning_rate": 5.282029214747404e-06, "loss": 0.1106, "step": 644 }, { "epoch": 0.9655688622754491, "grad_norm": 3.430065266124728, "learning_rate": 5.270289707064822e-06, "loss": 0.1157, "step": 645 }, { "epoch": 0.9670658682634731, "grad_norm": 2.93036067887931, "learning_rate": 5.258548704812667e-06, "loss": 0.1176, "step": 646 }, { "epoch": 0.968562874251497, "grad_norm": 3.249044128643264, "learning_rate": 5.2468062729129255e-06, "loss": 0.1437, "step": 647 }, { "epoch": 0.9700598802395209, "grad_norm": 4.554348207056405, "learning_rate": 5.235062476295488e-06, "loss": 0.1809, "step": 648 }, { "epoch": 0.9715568862275449, "grad_norm": 3.29681340014393, "learning_rate": 5.223317379897794e-06, "loss": 0.144, "step": 649 }, { "epoch": 0.9730538922155688, "grad_norm": 3.2005122326062554, "learning_rate": 5.211571048664469e-06, "loss": 0.1349, "step": 650 }, { "epoch": 0.9745508982035929, "grad_norm": 4.115982915477802, "learning_rate": 5.199823547546963e-06, "loss": 0.1842, "step": 651 }, { "epoch": 0.9760479041916168, "grad_norm": 3.278739990030093, "learning_rate": 5.188074941503203e-06, "loss": 0.1354, "step": 652 }, { "epoch": 0.9775449101796407, "grad_norm": 4.6916988302963585, "learning_rate": 5.176325295497217e-06, "loss": 0.1592, "step": 653 }, { "epoch": 0.9790419161676647, "grad_norm": 3.326675924066229, "learning_rate": 5.164574674498788e-06, "loss": 0.1226, "step": 654 }, { "epoch": 0.9805389221556886, "grad_norm": 3.622845080801504, "learning_rate": 5.152823143483092e-06, "loss": 0.1577, "step": 655 }, { "epoch": 0.9820359281437125, "grad_norm": 2.9083374807131483, "learning_rate": 5.141070767430331e-06, "loss": 0.1082, "step": 656 }, { "epoch": 0.9835329341317365, "grad_norm": 3.058700527205097, "learning_rate": 5.129317611325385e-06, "loss": 0.1509, "step": 657 }, { "epoch": 0.9850299401197605, "grad_norm": 3.1464322172554153, "learning_rate": 5.117563740157444e-06, "loss": 0.1287, "step": 658 }, { "epoch": 0.9865269461077845, "grad_norm": 2.701834779109062, "learning_rate": 5.105809218919656e-06, "loss": 0.1008, "step": 659 }, { "epoch": 0.9880239520958084, "grad_norm": 2.9899638354836524, "learning_rate": 5.094054112608758e-06, "loss": 0.1473, "step": 660 }, { "epoch": 0.9895209580838323, "grad_norm": 3.2023545182946056, "learning_rate": 5.082298486224728e-06, "loss": 0.1157, "step": 661 }, { "epoch": 0.9910179640718563, "grad_norm": 3.8714257370251435, "learning_rate": 5.070542404770413e-06, "loss": 0.1804, "step": 662 }, { "epoch": 0.9925149700598802, "grad_norm": 3.4704645436945003, "learning_rate": 5.0587859332511845e-06, "loss": 0.1419, "step": 663 }, { "epoch": 0.9940119760479041, "grad_norm": 3.7459085911790937, "learning_rate": 5.047029136674563e-06, "loss": 0.1578, "step": 664 }, { "epoch": 0.9955089820359282, "grad_norm": 2.62047879164251, "learning_rate": 5.035272080049871e-06, "loss": 0.104, "step": 665 }, { "epoch": 0.9970059880239521, "grad_norm": 3.3201682775751378, "learning_rate": 5.023514828387868e-06, "loss": 0.1535, "step": 666 }, { "epoch": 0.9985029940119761, "grad_norm": 3.365363193645878, "learning_rate": 5.011757446700393e-06, "loss": 0.1447, "step": 667 }, { "epoch": 1.0, "grad_norm": 3.1810582819155466, "learning_rate": 5e-06, "loss": 0.0776, "step": 668 }, { "epoch": 1.001497005988024, "grad_norm": 2.245163761655237, "learning_rate": 4.988242553299609e-06, "loss": 0.0657, "step": 669 }, { "epoch": 1.0029940119760479, "grad_norm": 2.3642228894923067, "learning_rate": 4.976485171612134e-06, "loss": 0.1062, "step": 670 }, { "epoch": 1.0044910179640718, "grad_norm": 2.6060631704747608, "learning_rate": 4.964727919950131e-06, "loss": 0.0753, "step": 671 }, { "epoch": 1.0059880239520957, "grad_norm": 2.368406513184329, "learning_rate": 4.95297086332544e-06, "loss": 0.0573, "step": 672 }, { "epoch": 1.0074850299401197, "grad_norm": 2.5148402051025447, "learning_rate": 4.941214066748818e-06, "loss": 0.0663, "step": 673 }, { "epoch": 1.0089820359281436, "grad_norm": 2.2593508146497925, "learning_rate": 4.9294575952295896e-06, "loss": 0.0604, "step": 674 }, { "epoch": 1.0104790419161676, "grad_norm": 2.196719478915662, "learning_rate": 4.9177015137752726e-06, "loss": 0.057, "step": 675 }, { "epoch": 1.0119760479041917, "grad_norm": 2.593733198437451, "learning_rate": 4.905945887391242e-06, "loss": 0.0718, "step": 676 }, { "epoch": 1.0134730538922156, "grad_norm": 2.5764489426977155, "learning_rate": 4.8941907810803445e-06, "loss": 0.0765, "step": 677 }, { "epoch": 1.0149700598802396, "grad_norm": 2.532205656693086, "learning_rate": 4.882436259842556e-06, "loss": 0.0669, "step": 678 }, { "epoch": 1.0164670658682635, "grad_norm": 2.7074593686291477, "learning_rate": 4.870682388674616e-06, "loss": 0.0584, "step": 679 }, { "epoch": 1.0179640718562875, "grad_norm": 2.4876634635558825, "learning_rate": 4.858929232569671e-06, "loss": 0.0718, "step": 680 }, { "epoch": 1.0194610778443114, "grad_norm": 3.444339553584467, "learning_rate": 4.847176856516909e-06, "loss": 0.0688, "step": 681 }, { "epoch": 1.0209580838323353, "grad_norm": 3.1091121730228317, "learning_rate": 4.835425325501214e-06, "loss": 0.0664, "step": 682 }, { "epoch": 1.0224550898203593, "grad_norm": 3.2366966723790456, "learning_rate": 4.823674704502785e-06, "loss": 0.0643, "step": 683 }, { "epoch": 1.0239520958083832, "grad_norm": 3.1621277009179543, "learning_rate": 4.811925058496799e-06, "loss": 0.0905, "step": 684 }, { "epoch": 1.0254491017964071, "grad_norm": 3.6484882284139486, "learning_rate": 4.800176452453038e-06, "loss": 0.0917, "step": 685 }, { "epoch": 1.026946107784431, "grad_norm": 3.2365732248418144, "learning_rate": 4.788428951335534e-06, "loss": 0.07, "step": 686 }, { "epoch": 1.028443113772455, "grad_norm": 2.8971465962126057, "learning_rate": 4.7766826201022085e-06, "loss": 0.0684, "step": 687 }, { "epoch": 1.029940119760479, "grad_norm": 3.067591996906434, "learning_rate": 4.7649375237045135e-06, "loss": 0.0553, "step": 688 }, { "epoch": 1.031437125748503, "grad_norm": 4.451417189311231, "learning_rate": 4.753193727087075e-06, "loss": 0.0655, "step": 689 }, { "epoch": 1.032934131736527, "grad_norm": 2.828840869907004, "learning_rate": 4.741451295187333e-06, "loss": 0.0455, "step": 690 }, { "epoch": 1.034431137724551, "grad_norm": 2.95493823243126, "learning_rate": 4.729710292935179e-06, "loss": 0.0497, "step": 691 }, { "epoch": 1.035928143712575, "grad_norm": 4.419104657423044, "learning_rate": 4.717970785252596e-06, "loss": 0.0946, "step": 692 }, { "epoch": 1.0374251497005988, "grad_norm": 3.708584157297959, "learning_rate": 4.706232837053311e-06, "loss": 0.0635, "step": 693 }, { "epoch": 1.0389221556886228, "grad_norm": 2.7352935990770297, "learning_rate": 4.694496513242418e-06, "loss": 0.0563, "step": 694 }, { "epoch": 1.0404191616766467, "grad_norm": 3.651713822517358, "learning_rate": 4.68276187871604e-06, "loss": 0.0888, "step": 695 }, { "epoch": 1.0419161676646707, "grad_norm": 3.239036105735356, "learning_rate": 4.671028998360947e-06, "loss": 0.0864, "step": 696 }, { "epoch": 1.0434131736526946, "grad_norm": 3.0975957548467914, "learning_rate": 4.659297937054221e-06, "loss": 0.0631, "step": 697 }, { "epoch": 1.0449101796407185, "grad_norm": 2.2149393501261248, "learning_rate": 4.647568759662876e-06, "loss": 0.0366, "step": 698 }, { "epoch": 1.0464071856287425, "grad_norm": 2.961057560092592, "learning_rate": 4.635841531043514e-06, "loss": 0.085, "step": 699 }, { "epoch": 1.0479041916167664, "grad_norm": 3.540327672406872, "learning_rate": 4.624116316041962e-06, "loss": 0.0947, "step": 700 }, { "epoch": 1.0494011976047903, "grad_norm": 3.1109882433007274, "learning_rate": 4.612393179492907e-06, "loss": 0.0871, "step": 701 }, { "epoch": 1.0508982035928143, "grad_norm": 3.13666558137064, "learning_rate": 4.600672186219551e-06, "loss": 0.0743, "step": 702 }, { "epoch": 1.0523952095808382, "grad_norm": 2.887464578174154, "learning_rate": 4.588953401033237e-06, "loss": 0.0829, "step": 703 }, { "epoch": 1.0538922155688624, "grad_norm": 2.929072115740665, "learning_rate": 4.5772368887331044e-06, "loss": 0.07, "step": 704 }, { "epoch": 1.0553892215568863, "grad_norm": 3.3659866184846767, "learning_rate": 4.565522714105723e-06, "loss": 0.0886, "step": 705 }, { "epoch": 1.0568862275449102, "grad_norm": 2.6325402735992753, "learning_rate": 4.553810941924735e-06, "loss": 0.0648, "step": 706 }, { "epoch": 1.0583832335329342, "grad_norm": 2.7528337655759145, "learning_rate": 4.542101636950497e-06, "loss": 0.0821, "step": 707 }, { "epoch": 1.0598802395209581, "grad_norm": 3.12987054185822, "learning_rate": 4.530394863929728e-06, "loss": 0.0618, "step": 708 }, { "epoch": 1.061377245508982, "grad_norm": 2.8080617050340297, "learning_rate": 4.5186906875951425e-06, "loss": 0.074, "step": 709 }, { "epoch": 1.062874251497006, "grad_norm": 3.042507384169922, "learning_rate": 4.506989172665097e-06, "loss": 0.0721, "step": 710 }, { "epoch": 1.06437125748503, "grad_norm": 2.894720566617191, "learning_rate": 4.4952903838432335e-06, "loss": 0.0781, "step": 711 }, { "epoch": 1.0658682634730539, "grad_norm": 3.0542805452442585, "learning_rate": 4.483594385818119e-06, "loss": 0.0852, "step": 712 }, { "epoch": 1.0673652694610778, "grad_norm": 2.4698994298135832, "learning_rate": 4.471901243262888e-06, "loss": 0.0662, "step": 713 }, { "epoch": 1.0688622754491017, "grad_norm": 3.1949449313110865, "learning_rate": 4.460211020834887e-06, "loss": 0.071, "step": 714 }, { "epoch": 1.0703592814371257, "grad_norm": 3.3683288464489074, "learning_rate": 4.448523783175315e-06, "loss": 0.0759, "step": 715 }, { "epoch": 1.0718562874251496, "grad_norm": 2.874658295016912, "learning_rate": 4.436839594908866e-06, "loss": 0.063, "step": 716 }, { "epoch": 1.0733532934131738, "grad_norm": 3.0270043997291376, "learning_rate": 4.425158520643372e-06, "loss": 0.0698, "step": 717 }, { "epoch": 1.0748502994011977, "grad_norm": 3.734569087331669, "learning_rate": 4.4134806249694514e-06, "loss": 0.0703, "step": 718 }, { "epoch": 1.0763473053892216, "grad_norm": 2.885926811445453, "learning_rate": 4.401805972460139e-06, "loss": 0.0579, "step": 719 }, { "epoch": 1.0778443113772456, "grad_norm": 2.529815413126784, "learning_rate": 4.39013462767054e-06, "loss": 0.0759, "step": 720 }, { "epoch": 1.0793413173652695, "grad_norm": 3.4650256388963987, "learning_rate": 4.378466655137471e-06, "loss": 0.0641, "step": 721 }, { "epoch": 1.0808383233532934, "grad_norm": 2.8843489398135906, "learning_rate": 4.366802119379098e-06, "loss": 0.0596, "step": 722 }, { "epoch": 1.0823353293413174, "grad_norm": 3.2907595229448052, "learning_rate": 4.355141084894587e-06, "loss": 0.0754, "step": 723 }, { "epoch": 1.0838323353293413, "grad_norm": 2.9050170897705363, "learning_rate": 4.34348361616374e-06, "loss": 0.0627, "step": 724 }, { "epoch": 1.0853293413173652, "grad_norm": 2.905549337711893, "learning_rate": 4.331829777646646e-06, "loss": 0.0797, "step": 725 }, { "epoch": 1.0868263473053892, "grad_norm": 3.00020697709472, "learning_rate": 4.3201796337833165e-06, "loss": 0.051, "step": 726 }, { "epoch": 1.0883233532934131, "grad_norm": 2.906300114165401, "learning_rate": 4.308533248993338e-06, "loss": 0.0698, "step": 727 }, { "epoch": 1.089820359281437, "grad_norm": 3.021750929238119, "learning_rate": 4.29689068767551e-06, "loss": 0.0556, "step": 728 }, { "epoch": 1.091317365269461, "grad_norm": 2.918509865910724, "learning_rate": 4.285252014207485e-06, "loss": 0.0788, "step": 729 }, { "epoch": 1.092814371257485, "grad_norm": 3.283188760480558, "learning_rate": 4.273617292945425e-06, "loss": 0.0673, "step": 730 }, { "epoch": 1.0943113772455089, "grad_norm": 2.6241899539360802, "learning_rate": 4.261986588223632e-06, "loss": 0.0563, "step": 731 }, { "epoch": 1.095808383233533, "grad_norm": 3.0866123920631554, "learning_rate": 4.250359964354203e-06, "loss": 0.1026, "step": 732 }, { "epoch": 1.097305389221557, "grad_norm": 2.6167818026044802, "learning_rate": 4.238737485626669e-06, "loss": 0.0682, "step": 733 }, { "epoch": 1.098802395209581, "grad_norm": 2.4973257538138895, "learning_rate": 4.227119216307637e-06, "loss": 0.0478, "step": 734 }, { "epoch": 1.1002994011976048, "grad_norm": 2.8075929604115073, "learning_rate": 4.215505220640442e-06, "loss": 0.0658, "step": 735 }, { "epoch": 1.1017964071856288, "grad_norm": 3.549481099161326, "learning_rate": 4.203895562844789e-06, "loss": 0.0837, "step": 736 }, { "epoch": 1.1032934131736527, "grad_norm": 2.583714298924795, "learning_rate": 4.192290307116389e-06, "loss": 0.0459, "step": 737 }, { "epoch": 1.1047904191616766, "grad_norm": 3.2113310123063648, "learning_rate": 4.18068951762662e-06, "loss": 0.085, "step": 738 }, { "epoch": 1.1062874251497006, "grad_norm": 3.5595746535078434, "learning_rate": 4.169093258522161e-06, "loss": 0.1064, "step": 739 }, { "epoch": 1.1077844311377245, "grad_norm": 2.92719551767083, "learning_rate": 4.157501593924638e-06, "loss": 0.0998, "step": 740 }, { "epoch": 1.1092814371257484, "grad_norm": 3.390572720170953, "learning_rate": 4.145914587930275e-06, "loss": 0.0763, "step": 741 }, { "epoch": 1.1107784431137724, "grad_norm": 3.0424410814426235, "learning_rate": 4.134332304609533e-06, "loss": 0.0756, "step": 742 }, { "epoch": 1.1122754491017963, "grad_norm": 2.905637122008734, "learning_rate": 4.122754808006764e-06, "loss": 0.0681, "step": 743 }, { "epoch": 1.1137724550898203, "grad_norm": 2.6220136190836616, "learning_rate": 4.111182162139844e-06, "loss": 0.0553, "step": 744 }, { "epoch": 1.1152694610778444, "grad_norm": 3.631987283788416, "learning_rate": 4.099614430999834e-06, "loss": 0.0866, "step": 745 }, { "epoch": 1.1167664670658684, "grad_norm": 3.1153542992190992, "learning_rate": 4.088051678550617e-06, "loss": 0.0717, "step": 746 }, { "epoch": 1.1182634730538923, "grad_norm": 3.280081735462654, "learning_rate": 4.076493968728544e-06, "loss": 0.0623, "step": 747 }, { "epoch": 1.1197604790419162, "grad_norm": 3.001997939282987, "learning_rate": 4.064941365442084e-06, "loss": 0.0785, "step": 748 }, { "epoch": 1.1212574850299402, "grad_norm": 2.7626875247160516, "learning_rate": 4.053393932571472e-06, "loss": 0.0536, "step": 749 }, { "epoch": 1.122754491017964, "grad_norm": 2.9253458312799614, "learning_rate": 4.041851733968348e-06, "loss": 0.0597, "step": 750 }, { "epoch": 1.124251497005988, "grad_norm": 2.92766655685794, "learning_rate": 4.030314833455413e-06, "loss": 0.0678, "step": 751 }, { "epoch": 1.125748502994012, "grad_norm": 3.5650769256722183, "learning_rate": 4.018783294826071e-06, "loss": 0.066, "step": 752 }, { "epoch": 1.127245508982036, "grad_norm": 3.428033375407255, "learning_rate": 4.007257181844078e-06, "loss": 0.0677, "step": 753 }, { "epoch": 1.1287425149700598, "grad_norm": 2.5288905379738327, "learning_rate": 3.995736558243186e-06, "loss": 0.0577, "step": 754 }, { "epoch": 1.1302395209580838, "grad_norm": 2.8496089601379273, "learning_rate": 3.984221487726799e-06, "loss": 0.0581, "step": 755 }, { "epoch": 1.1317365269461077, "grad_norm": 3.7820732151833836, "learning_rate": 3.972712033967608e-06, "loss": 0.0927, "step": 756 }, { "epoch": 1.1332335329341316, "grad_norm": 2.7642348459061536, "learning_rate": 3.9612082606072525e-06, "loss": 0.0806, "step": 757 }, { "epoch": 1.1347305389221556, "grad_norm": 3.624884560599716, "learning_rate": 3.949710231255961e-06, "loss": 0.092, "step": 758 }, { "epoch": 1.1362275449101795, "grad_norm": 3.108103311622628, "learning_rate": 3.938218009492193e-06, "loss": 0.0792, "step": 759 }, { "epoch": 1.1377245508982037, "grad_norm": 2.4812703994736824, "learning_rate": 3.926731658862307e-06, "loss": 0.0462, "step": 760 }, { "epoch": 1.1392215568862276, "grad_norm": 2.530170793846207, "learning_rate": 3.91525124288019e-06, "loss": 0.0586, "step": 761 }, { "epoch": 1.1407185628742516, "grad_norm": 2.3545115610744647, "learning_rate": 3.903776825026912e-06, "loss": 0.0607, "step": 762 }, { "epoch": 1.1422155688622755, "grad_norm": 3.6733007506514896, "learning_rate": 3.892308468750379e-06, "loss": 0.1076, "step": 763 }, { "epoch": 1.1437125748502994, "grad_norm": 2.92516394369606, "learning_rate": 3.8808462374649805e-06, "loss": 0.0712, "step": 764 }, { "epoch": 1.1452095808383234, "grad_norm": 3.334285042667334, "learning_rate": 3.869390194551235e-06, "loss": 0.0791, "step": 765 }, { "epoch": 1.1467065868263473, "grad_norm": 3.3794290130159994, "learning_rate": 3.857940403355444e-06, "loss": 0.0767, "step": 766 }, { "epoch": 1.1482035928143712, "grad_norm": 3.0642547785921317, "learning_rate": 3.846496927189339e-06, "loss": 0.0766, "step": 767 }, { "epoch": 1.1497005988023952, "grad_norm": 2.973979373103325, "learning_rate": 3.8350598293297345e-06, "loss": 0.0675, "step": 768 }, { "epoch": 1.151197604790419, "grad_norm": 3.603369570091167, "learning_rate": 3.823629173018174e-06, "loss": 0.0565, "step": 769 }, { "epoch": 1.152694610778443, "grad_norm": 3.282133394472687, "learning_rate": 3.8122050214605822e-06, "loss": 0.0757, "step": 770 }, { "epoch": 1.154191616766467, "grad_norm": 2.395268207460729, "learning_rate": 3.800787437826918e-06, "loss": 0.064, "step": 771 }, { "epoch": 1.1556886227544911, "grad_norm": 3.039235869144987, "learning_rate": 3.7893764852508207e-06, "loss": 0.0629, "step": 772 }, { "epoch": 1.157185628742515, "grad_norm": 3.2933523109485003, "learning_rate": 3.777972226829264e-06, "loss": 0.0621, "step": 773 }, { "epoch": 1.158682634730539, "grad_norm": 3.9254177074502263, "learning_rate": 3.766574725622208e-06, "loss": 0.1082, "step": 774 }, { "epoch": 1.160179640718563, "grad_norm": 2.3554145048274253, "learning_rate": 3.7551840446522444e-06, "loss": 0.0502, "step": 775 }, { "epoch": 1.1616766467065869, "grad_norm": 2.6876620375630833, "learning_rate": 3.7438002469042567e-06, "loss": 0.0681, "step": 776 }, { "epoch": 1.1631736526946108, "grad_norm": 3.0269324692237327, "learning_rate": 3.732423395325063e-06, "loss": 0.0597, "step": 777 }, { "epoch": 1.1646706586826348, "grad_norm": 3.0559291874528522, "learning_rate": 3.721053552823078e-06, "loss": 0.0973, "step": 778 }, { "epoch": 1.1661676646706587, "grad_norm": 2.5687542725093255, "learning_rate": 3.7096907822679564e-06, "loss": 0.055, "step": 779 }, { "epoch": 1.1676646706586826, "grad_norm": 2.7446075314016602, "learning_rate": 3.698335146490246e-06, "loss": 0.0783, "step": 780 }, { "epoch": 1.1691616766467066, "grad_norm": 2.3486077223955486, "learning_rate": 3.6869867082810507e-06, "loss": 0.0561, "step": 781 }, { "epoch": 1.1706586826347305, "grad_norm": 3.312141587592341, "learning_rate": 3.675645530391665e-06, "loss": 0.0864, "step": 782 }, { "epoch": 1.1721556886227544, "grad_norm": 3.1117705777684703, "learning_rate": 3.664311675533247e-06, "loss": 0.0825, "step": 783 }, { "epoch": 1.1736526946107784, "grad_norm": 2.765507472639561, "learning_rate": 3.652985206376455e-06, "loss": 0.0494, "step": 784 }, { "epoch": 1.1751497005988023, "grad_norm": 3.3400951839940736, "learning_rate": 3.641666185551111e-06, "loss": 0.0504, "step": 785 }, { "epoch": 1.1766467065868262, "grad_norm": 2.796850305012989, "learning_rate": 3.630354675645853e-06, "loss": 0.0696, "step": 786 }, { "epoch": 1.1781437125748502, "grad_norm": 3.842152784401489, "learning_rate": 3.619050739207782e-06, "loss": 0.0802, "step": 787 }, { "epoch": 1.1796407185628743, "grad_norm": 2.4724688706263396, "learning_rate": 3.6077544387421293e-06, "loss": 0.0458, "step": 788 }, { "epoch": 1.1811377245508983, "grad_norm": 2.9220387876039187, "learning_rate": 3.5964658367118964e-06, "loss": 0.0617, "step": 789 }, { "epoch": 1.1826347305389222, "grad_norm": 3.0049471800077203, "learning_rate": 3.5851849955375177e-06, "loss": 0.0855, "step": 790 }, { "epoch": 1.1841317365269461, "grad_norm": 3.241187376683164, "learning_rate": 3.573911977596517e-06, "loss": 0.0673, "step": 791 }, { "epoch": 1.18562874251497, "grad_norm": 2.9698877431069084, "learning_rate": 3.5626468452231534e-06, "loss": 0.059, "step": 792 }, { "epoch": 1.187125748502994, "grad_norm": 3.580587600295489, "learning_rate": 3.5513896607080884e-06, "loss": 0.0737, "step": 793 }, { "epoch": 1.188622754491018, "grad_norm": 3.4069831852623222, "learning_rate": 3.540140486298035e-06, "loss": 0.0797, "step": 794 }, { "epoch": 1.1901197604790419, "grad_norm": 3.128957913458415, "learning_rate": 3.5288993841954093e-06, "loss": 0.079, "step": 795 }, { "epoch": 1.1916167664670658, "grad_norm": 3.134223633391442, "learning_rate": 3.517666416557999e-06, "loss": 0.0776, "step": 796 }, { "epoch": 1.1931137724550898, "grad_norm": 3.1422376556355385, "learning_rate": 3.506441645498605e-06, "loss": 0.0735, "step": 797 }, { "epoch": 1.1946107784431137, "grad_norm": 2.3801650039470954, "learning_rate": 3.495225133084712e-06, "loss": 0.0579, "step": 798 }, { "epoch": 1.1961077844311376, "grad_norm": 2.760590494572972, "learning_rate": 3.484016941338131e-06, "loss": 0.0577, "step": 799 }, { "epoch": 1.1976047904191618, "grad_norm": 3.0385901872857826, "learning_rate": 3.472817132234669e-06, "loss": 0.0623, "step": 800 }, { "epoch": 1.1991017964071857, "grad_norm": 3.1392818060904184, "learning_rate": 3.4616257677037794e-06, "loss": 0.0719, "step": 801 }, { "epoch": 1.2005988023952097, "grad_norm": 3.360992132719966, "learning_rate": 3.4504429096282246e-06, "loss": 0.0694, "step": 802 }, { "epoch": 1.2020958083832336, "grad_norm": 2.5765068133389937, "learning_rate": 3.439268619843724e-06, "loss": 0.0466, "step": 803 }, { "epoch": 1.2035928143712575, "grad_norm": 3.5434575964035493, "learning_rate": 3.428102960138625e-06, "loss": 0.091, "step": 804 }, { "epoch": 1.2050898203592815, "grad_norm": 2.894997799993498, "learning_rate": 3.4169459922535485e-06, "loss": 0.0558, "step": 805 }, { "epoch": 1.2065868263473054, "grad_norm": 3.6265393390410208, "learning_rate": 3.405797777881059e-06, "loss": 0.0605, "step": 806 }, { "epoch": 1.2080838323353293, "grad_norm": 3.1121169920332137, "learning_rate": 3.394658378665319e-06, "loss": 0.0866, "step": 807 }, { "epoch": 1.2095808383233533, "grad_norm": 2.6407427266868027, "learning_rate": 3.3835278562017405e-06, "loss": 0.053, "step": 808 }, { "epoch": 1.2110778443113772, "grad_norm": 2.7023564131182853, "learning_rate": 3.37240627203666e-06, "loss": 0.0493, "step": 809 }, { "epoch": 1.2125748502994012, "grad_norm": 2.816444095899385, "learning_rate": 3.3612936876669834e-06, "loss": 0.0562, "step": 810 }, { "epoch": 1.214071856287425, "grad_norm": 2.796553011207744, "learning_rate": 3.3501901645398556e-06, "loss": 0.044, "step": 811 }, { "epoch": 1.215568862275449, "grad_norm": 2.7374586684159823, "learning_rate": 3.3390957640523147e-06, "loss": 0.0888, "step": 812 }, { "epoch": 1.217065868263473, "grad_norm": 3.041005436837862, "learning_rate": 3.3280105475509593e-06, "loss": 0.071, "step": 813 }, { "epoch": 1.218562874251497, "grad_norm": 2.75644027482199, "learning_rate": 3.3169345763315986e-06, "loss": 0.087, "step": 814 }, { "epoch": 1.220059880239521, "grad_norm": 3.055035332739645, "learning_rate": 3.3058679116389247e-06, "loss": 0.0579, "step": 815 }, { "epoch": 1.221556886227545, "grad_norm": 3.378386283128622, "learning_rate": 3.29481061466617e-06, "loss": 0.0728, "step": 816 }, { "epoch": 1.223053892215569, "grad_norm": 3.356394576654083, "learning_rate": 3.2837627465547663e-06, "loss": 0.0755, "step": 817 }, { "epoch": 1.2245508982035929, "grad_norm": 2.687507049979649, "learning_rate": 3.2727243683940045e-06, "loss": 0.052, "step": 818 }, { "epoch": 1.2260479041916168, "grad_norm": 2.5419037299564073, "learning_rate": 3.2616955412207087e-06, "loss": 0.0412, "step": 819 }, { "epoch": 1.2275449101796407, "grad_norm": 3.7943265365279504, "learning_rate": 3.2506763260188824e-06, "loss": 0.0665, "step": 820 }, { "epoch": 1.2290419161676647, "grad_norm": 2.6991279516382707, "learning_rate": 3.239666783719385e-06, "loss": 0.0531, "step": 821 }, { "epoch": 1.2305389221556886, "grad_norm": 3.4956054458433408, "learning_rate": 3.2286669751995905e-06, "loss": 0.0675, "step": 822 }, { "epoch": 1.2320359281437125, "grad_norm": 3.9817534686681126, "learning_rate": 3.217676961283044e-06, "loss": 0.07, "step": 823 }, { "epoch": 1.2335329341317365, "grad_norm": 2.9122180648749803, "learning_rate": 3.2066968027391377e-06, "loss": 0.0555, "step": 824 }, { "epoch": 1.2350299401197604, "grad_norm": 2.3192986245823093, "learning_rate": 3.1957265602827624e-06, "loss": 0.039, "step": 825 }, { "epoch": 1.2365269461077844, "grad_norm": 2.9881919390681833, "learning_rate": 3.1847662945739833e-06, "loss": 0.0606, "step": 826 }, { "epoch": 1.2380239520958083, "grad_norm": 3.0069636324416, "learning_rate": 3.173816066217695e-06, "loss": 0.0619, "step": 827 }, { "epoch": 1.2395209580838324, "grad_norm": 2.7797776465811213, "learning_rate": 3.1628759357632943e-06, "loss": 0.0562, "step": 828 }, { "epoch": 1.2410179640718564, "grad_norm": 2.625780840330345, "learning_rate": 3.1519459637043335e-06, "loss": 0.0487, "step": 829 }, { "epoch": 1.2425149700598803, "grad_norm": 3.048597624386284, "learning_rate": 3.1410262104782086e-06, "loss": 0.06, "step": 830 }, { "epoch": 1.2440119760479043, "grad_norm": 2.9594377477828537, "learning_rate": 3.1301167364657953e-06, "loss": 0.0775, "step": 831 }, { "epoch": 1.2455089820359282, "grad_norm": 2.5794455341505773, "learning_rate": 3.119217601991139e-06, "loss": 0.0543, "step": 832 }, { "epoch": 1.2470059880239521, "grad_norm": 3.216141193594074, "learning_rate": 3.10832886732111e-06, "loss": 0.0943, "step": 833 }, { "epoch": 1.248502994011976, "grad_norm": 3.0160591170147955, "learning_rate": 3.0974505926650724e-06, "loss": 0.0685, "step": 834 }, { "epoch": 1.25, "grad_norm": 2.542870467321082, "learning_rate": 3.0865828381745515e-06, "loss": 0.0662, "step": 835 }, { "epoch": 1.251497005988024, "grad_norm": 2.67240770300272, "learning_rate": 3.0757256639429027e-06, "loss": 0.0721, "step": 836 }, { "epoch": 1.2529940119760479, "grad_norm": 2.8916162139925583, "learning_rate": 3.064879130004978e-06, "loss": 0.0628, "step": 837 }, { "epoch": 1.2544910179640718, "grad_norm": 3.0791819125906894, "learning_rate": 3.0540432963367907e-06, "loss": 0.0702, "step": 838 }, { "epoch": 1.2559880239520957, "grad_norm": 2.1853642672258413, "learning_rate": 3.04321822285519e-06, "loss": 0.0453, "step": 839 }, { "epoch": 1.2574850299401197, "grad_norm": 2.937053976220558, "learning_rate": 3.032403969417523e-06, "loss": 0.0751, "step": 840 }, { "epoch": 1.2589820359281436, "grad_norm": 2.7318687651088194, "learning_rate": 3.0216005958213146e-06, "loss": 0.0547, "step": 841 }, { "epoch": 1.2604790419161676, "grad_norm": 3.0046140400029557, "learning_rate": 3.010808161803917e-06, "loss": 0.0673, "step": 842 }, { "epoch": 1.2619760479041915, "grad_norm": 2.446403017490101, "learning_rate": 3.000026727042208e-06, "loss": 0.0499, "step": 843 }, { "epoch": 1.2634730538922156, "grad_norm": 2.5688378310998035, "learning_rate": 2.9892563511522305e-06, "loss": 0.0477, "step": 844 }, { "epoch": 1.2649700598802396, "grad_norm": 3.264425861663404, "learning_rate": 2.978497093688886e-06, "loss": 0.0614, "step": 845 }, { "epoch": 1.2664670658682635, "grad_norm": 3.6184014074367097, "learning_rate": 2.9677490141455915e-06, "loss": 0.0925, "step": 846 }, { "epoch": 1.2679640718562875, "grad_norm": 2.9183415594852287, "learning_rate": 2.9570121719539603e-06, "loss": 0.0669, "step": 847 }, { "epoch": 1.2694610778443114, "grad_norm": 2.5170515374117475, "learning_rate": 2.946286626483463e-06, "loss": 0.0495, "step": 848 }, { "epoch": 1.2709580838323353, "grad_norm": 3.4474814839089682, "learning_rate": 2.935572437041111e-06, "loss": 0.0608, "step": 849 }, { "epoch": 1.2724550898203593, "grad_norm": 3.292623258168709, "learning_rate": 2.924869662871117e-06, "loss": 0.0639, "step": 850 }, { "epoch": 1.2739520958083832, "grad_norm": 2.630969421839152, "learning_rate": 2.914178363154577e-06, "loss": 0.0711, "step": 851 }, { "epoch": 1.2754491017964071, "grad_norm": 2.5595128898477855, "learning_rate": 2.903498597009136e-06, "loss": 0.0462, "step": 852 }, { "epoch": 1.276946107784431, "grad_norm": 3.147946336753926, "learning_rate": 2.8928304234886643e-06, "loss": 0.0727, "step": 853 }, { "epoch": 1.278443113772455, "grad_norm": 3.649587508941088, "learning_rate": 2.8821739015829338e-06, "loss": 0.0758, "step": 854 }, { "epoch": 1.2799401197604792, "grad_norm": 2.653251512140537, "learning_rate": 2.8715290902172866e-06, "loss": 0.0716, "step": 855 }, { "epoch": 1.281437125748503, "grad_norm": 3.0865553314786274, "learning_rate": 2.8608960482523058e-06, "loss": 0.072, "step": 856 }, { "epoch": 1.282934131736527, "grad_norm": 3.5253493971834513, "learning_rate": 2.8502748344835063e-06, "loss": 0.0907, "step": 857 }, { "epoch": 1.284431137724551, "grad_norm": 2.4962070891502672, "learning_rate": 2.839665507640992e-06, "loss": 0.0423, "step": 858 }, { "epoch": 1.285928143712575, "grad_norm": 3.288933136967987, "learning_rate": 2.8290681263891413e-06, "loss": 0.0588, "step": 859 }, { "epoch": 1.2874251497005988, "grad_norm": 2.8395571046241246, "learning_rate": 2.818482749326272e-06, "loss": 0.0612, "step": 860 }, { "epoch": 1.2889221556886228, "grad_norm": 2.667423396712476, "learning_rate": 2.807909434984333e-06, "loss": 0.06, "step": 861 }, { "epoch": 1.2904191616766467, "grad_norm": 2.9336458271976875, "learning_rate": 2.797348241828569e-06, "loss": 0.0622, "step": 862 }, { "epoch": 1.2919161676646707, "grad_norm": 2.572810865782894, "learning_rate": 2.786799228257203e-06, "loss": 0.0664, "step": 863 }, { "epoch": 1.2934131736526946, "grad_norm": 2.9751400092489746, "learning_rate": 2.776262452601104e-06, "loss": 0.0685, "step": 864 }, { "epoch": 1.2949101796407185, "grad_norm": 2.989987104754685, "learning_rate": 2.7657379731234767e-06, "loss": 0.0711, "step": 865 }, { "epoch": 1.2964071856287425, "grad_norm": 3.0898161870148795, "learning_rate": 2.7552258480195348e-06, "loss": 0.057, "step": 866 }, { "epoch": 1.2979041916167664, "grad_norm": 2.545073465858091, "learning_rate": 2.744726135416177e-06, "loss": 0.0616, "step": 867 }, { "epoch": 1.2994011976047903, "grad_norm": 3.303956434437067, "learning_rate": 2.734238893371667e-06, "loss": 0.0627, "step": 868 }, { "epoch": 1.3008982035928143, "grad_norm": 3.0651042419665884, "learning_rate": 2.7237641798753084e-06, "loss": 0.0588, "step": 869 }, { "epoch": 1.3023952095808382, "grad_norm": 2.6823230849279587, "learning_rate": 2.7133020528471322e-06, "loss": 0.0714, "step": 870 }, { "epoch": 1.3038922155688621, "grad_norm": 3.2901512820108936, "learning_rate": 2.7028525701375765e-06, "loss": 0.0723, "step": 871 }, { "epoch": 1.3053892215568863, "grad_norm": 3.3443919921834464, "learning_rate": 2.6924157895271563e-06, "loss": 0.0778, "step": 872 }, { "epoch": 1.3068862275449102, "grad_norm": 2.2308790235676463, "learning_rate": 2.681991768726149e-06, "loss": 0.037, "step": 873 }, { "epoch": 1.3083832335329342, "grad_norm": 3.0212728584110278, "learning_rate": 2.671580565374282e-06, "loss": 0.076, "step": 874 }, { "epoch": 1.3098802395209581, "grad_norm": 3.1399777628320824, "learning_rate": 2.6611822370404038e-06, "loss": 0.0727, "step": 875 }, { "epoch": 1.311377245508982, "grad_norm": 3.081174156048668, "learning_rate": 2.6507968412221763e-06, "loss": 0.0607, "step": 876 }, { "epoch": 1.312874251497006, "grad_norm": 2.649857564612077, "learning_rate": 2.6404244353457427e-06, "loss": 0.0417, "step": 877 }, { "epoch": 1.31437125748503, "grad_norm": 2.599918883680195, "learning_rate": 2.6300650767654234e-06, "loss": 0.0432, "step": 878 }, { "epoch": 1.3158682634730539, "grad_norm": 3.580380277832072, "learning_rate": 2.619718822763394e-06, "loss": 0.0657, "step": 879 }, { "epoch": 1.3173652694610778, "grad_norm": 3.2007105154693205, "learning_rate": 2.6093857305493666e-06, "loss": 0.0847, "step": 880 }, { "epoch": 1.3188622754491017, "grad_norm": 2.6056514573638836, "learning_rate": 2.599065857260277e-06, "loss": 0.0644, "step": 881 }, { "epoch": 1.3203592814371259, "grad_norm": 4.073130744441204, "learning_rate": 2.588759259959962e-06, "loss": 0.085, "step": 882 }, { "epoch": 1.3218562874251498, "grad_norm": 2.2266704880773465, "learning_rate": 2.5784659956388534e-06, "loss": 0.0392, "step": 883 }, { "epoch": 1.3233532934131738, "grad_norm": 3.236603707089369, "learning_rate": 2.568186121213658e-06, "loss": 0.0618, "step": 884 }, { "epoch": 1.3248502994011977, "grad_norm": 3.3027857783922374, "learning_rate": 2.55791969352704e-06, "loss": 0.0541, "step": 885 }, { "epoch": 1.3263473053892216, "grad_norm": 3.618218651429424, "learning_rate": 2.547666769347312e-06, "loss": 0.0809, "step": 886 }, { "epoch": 1.3278443113772456, "grad_norm": 3.4597845429493645, "learning_rate": 2.537427405368119e-06, "loss": 0.0984, "step": 887 }, { "epoch": 1.3293413173652695, "grad_norm": 3.781776253564825, "learning_rate": 2.5272016582081236e-06, "loss": 0.0876, "step": 888 }, { "epoch": 1.3308383233532934, "grad_norm": 2.8947139939487547, "learning_rate": 2.5169895844106963e-06, "loss": 0.0743, "step": 889 }, { "epoch": 1.3323353293413174, "grad_norm": 3.4863246865194775, "learning_rate": 2.5067912404435952e-06, "loss": 0.0652, "step": 890 }, { "epoch": 1.3338323353293413, "grad_norm": 2.4718873896081406, "learning_rate": 2.4966066826986644e-06, "loss": 0.0549, "step": 891 }, { "epoch": 1.3353293413173652, "grad_norm": 3.2969577176194003, "learning_rate": 2.486435967491516e-06, "loss": 0.0668, "step": 892 }, { "epoch": 1.3368263473053892, "grad_norm": 3.183551884902365, "learning_rate": 2.476279151061221e-06, "loss": 0.0755, "step": 893 }, { "epoch": 1.3383233532934131, "grad_norm": 3.353358054446364, "learning_rate": 2.4661362895699903e-06, "loss": 0.0645, "step": 894 }, { "epoch": 1.339820359281437, "grad_norm": 2.8463638592051255, "learning_rate": 2.4560074391028784e-06, "loss": 0.0545, "step": 895 }, { "epoch": 1.341317365269461, "grad_norm": 2.7526380408387157, "learning_rate": 2.445892655667462e-06, "loss": 0.0488, "step": 896 }, { "epoch": 1.342814371257485, "grad_norm": 2.707019788450973, "learning_rate": 2.4357919951935342e-06, "loss": 0.0694, "step": 897 }, { "epoch": 1.3443113772455089, "grad_norm": 3.0887686299875616, "learning_rate": 2.425705513532798e-06, "loss": 0.0642, "step": 898 }, { "epoch": 1.3458083832335328, "grad_norm": 2.9540537708084305, "learning_rate": 2.4156332664585497e-06, "loss": 0.062, "step": 899 }, { "epoch": 1.347305389221557, "grad_norm": 2.958485757219164, "learning_rate": 2.4055753096653795e-06, "loss": 0.055, "step": 900 }, { "epoch": 1.348802395209581, "grad_norm": 3.237670135934891, "learning_rate": 2.395531698768857e-06, "loss": 0.0643, "step": 901 }, { "epoch": 1.3502994011976048, "grad_norm": 2.9135931007108264, "learning_rate": 2.3855024893052286e-06, "loss": 0.0604, "step": 902 }, { "epoch": 1.3517964071856288, "grad_norm": 2.7993094602182644, "learning_rate": 2.375487736731102e-06, "loss": 0.0563, "step": 903 }, { "epoch": 1.3532934131736527, "grad_norm": 3.359310944382079, "learning_rate": 2.365487496423152e-06, "loss": 0.0892, "step": 904 }, { "epoch": 1.3547904191616766, "grad_norm": 2.730955023722793, "learning_rate": 2.355501823677803e-06, "loss": 0.0767, "step": 905 }, { "epoch": 1.3562874251497006, "grad_norm": 3.1735381331462404, "learning_rate": 2.3455307737109338e-06, "loss": 0.0796, "step": 906 }, { "epoch": 1.3577844311377245, "grad_norm": 2.80267134343848, "learning_rate": 2.335574401657559e-06, "loss": 0.0627, "step": 907 }, { "epoch": 1.3592814371257484, "grad_norm": 2.3758320980732144, "learning_rate": 2.3256327625715345e-06, "loss": 0.0537, "step": 908 }, { "epoch": 1.3607784431137724, "grad_norm": 2.709520589092785, "learning_rate": 2.3157059114252534e-06, "loss": 0.0492, "step": 909 }, { "epoch": 1.3622754491017965, "grad_norm": 3.052935263804327, "learning_rate": 2.3057939031093346e-06, "loss": 0.0763, "step": 910 }, { "epoch": 1.3637724550898205, "grad_norm": 2.972732152827326, "learning_rate": 2.295896792432326e-06, "loss": 0.0639, "step": 911 }, { "epoch": 1.3652694610778444, "grad_norm": 2.300718894562955, "learning_rate": 2.2860146341203936e-06, "loss": 0.0564, "step": 912 }, { "epoch": 1.3667664670658684, "grad_norm": 2.4902233281817856, "learning_rate": 2.2761474828170338e-06, "loss": 0.0475, "step": 913 }, { "epoch": 1.3682634730538923, "grad_norm": 2.858999823385802, "learning_rate": 2.2662953930827546e-06, "loss": 0.0539, "step": 914 }, { "epoch": 1.3697604790419162, "grad_norm": 3.287496805043617, "learning_rate": 2.2564584193947796e-06, "loss": 0.0784, "step": 915 }, { "epoch": 1.3712574850299402, "grad_norm": 3.326393708005825, "learning_rate": 2.2466366161467528e-06, "loss": 0.0539, "step": 916 }, { "epoch": 1.372754491017964, "grad_norm": 2.7646243889448168, "learning_rate": 2.2368300376484303e-06, "loss": 0.0579, "step": 917 }, { "epoch": 1.374251497005988, "grad_norm": 2.8487221714394244, "learning_rate": 2.227038738125385e-06, "loss": 0.0669, "step": 918 }, { "epoch": 1.375748502994012, "grad_norm": 2.7757689971772797, "learning_rate": 2.2172627717187033e-06, "loss": 0.0672, "step": 919 }, { "epoch": 1.377245508982036, "grad_norm": 2.3169592405954167, "learning_rate": 2.207502192484685e-06, "loss": 0.0409, "step": 920 }, { "epoch": 1.3787425149700598, "grad_norm": 2.3271544092520924, "learning_rate": 2.1977570543945486e-06, "loss": 0.0534, "step": 921 }, { "epoch": 1.3802395209580838, "grad_norm": 2.778496301046775, "learning_rate": 2.188027411334131e-06, "loss": 0.0529, "step": 922 }, { "epoch": 1.3817365269461077, "grad_norm": 3.289505481315959, "learning_rate": 2.1783133171035886e-06, "loss": 0.0711, "step": 923 }, { "epoch": 1.3832335329341316, "grad_norm": 2.916761132281548, "learning_rate": 2.1686148254171012e-06, "loss": 0.0609, "step": 924 }, { "epoch": 1.3847305389221556, "grad_norm": 2.791877865863004, "learning_rate": 2.158931989902571e-06, "loss": 0.0624, "step": 925 }, { "epoch": 1.3862275449101795, "grad_norm": 2.8181931733026726, "learning_rate": 2.1492648641013305e-06, "loss": 0.0634, "step": 926 }, { "epoch": 1.3877245508982035, "grad_norm": 2.775551153237697, "learning_rate": 2.139613501467851e-06, "loss": 0.0499, "step": 927 }, { "epoch": 1.3892215568862276, "grad_norm": 2.895570828069663, "learning_rate": 2.1299779553694323e-06, "loss": 0.0669, "step": 928 }, { "epoch": 1.3907185628742516, "grad_norm": 2.7686405382861397, "learning_rate": 2.120358279085922e-06, "loss": 0.0499, "step": 929 }, { "epoch": 1.3922155688622755, "grad_norm": 3.249476284555271, "learning_rate": 2.1107545258094135e-06, "loss": 0.0769, "step": 930 }, { "epoch": 1.3937125748502994, "grad_norm": 3.223916680523391, "learning_rate": 2.101166748643955e-06, "loss": 0.0584, "step": 931 }, { "epoch": 1.3952095808383234, "grad_norm": 3.125248386357628, "learning_rate": 2.0915950006052555e-06, "loss": 0.0596, "step": 932 }, { "epoch": 1.3967065868263473, "grad_norm": 2.860476788800031, "learning_rate": 2.0820393346203855e-06, "loss": 0.0502, "step": 933 }, { "epoch": 1.3982035928143712, "grad_norm": 4.837346990180923, "learning_rate": 2.0724998035274947e-06, "loss": 0.0724, "step": 934 }, { "epoch": 1.3997005988023952, "grad_norm": 2.8626469496318427, "learning_rate": 2.0629764600755136e-06, "loss": 0.0642, "step": 935 }, { "epoch": 1.401197604790419, "grad_norm": 3.6147065364893347, "learning_rate": 2.053469356923865e-06, "loss": 0.0839, "step": 936 }, { "epoch": 1.402694610778443, "grad_norm": 3.1374455543870106, "learning_rate": 2.043978546642165e-06, "loss": 0.0549, "step": 937 }, { "epoch": 1.4041916167664672, "grad_norm": 3.3108505736876754, "learning_rate": 2.0345040817099433e-06, "loss": 0.0612, "step": 938 }, { "epoch": 1.4056886227544911, "grad_norm": 3.041489191728596, "learning_rate": 2.0250460145163457e-06, "loss": 0.0529, "step": 939 }, { "epoch": 1.407185628742515, "grad_norm": 2.550369966045829, "learning_rate": 2.0156043973598475e-06, "loss": 0.0515, "step": 940 }, { "epoch": 1.408682634730539, "grad_norm": 2.7434129516999675, "learning_rate": 2.006179282447963e-06, "loss": 0.0549, "step": 941 }, { "epoch": 1.410179640718563, "grad_norm": 2.613647167450779, "learning_rate": 1.996770721896957e-06, "loss": 0.063, "step": 942 }, { "epoch": 1.4116766467065869, "grad_norm": 2.902030241683709, "learning_rate": 1.987378767731557e-06, "loss": 0.0513, "step": 943 }, { "epoch": 1.4131736526946108, "grad_norm": 3.5704248467545066, "learning_rate": 1.9780034718846653e-06, "loss": 0.0769, "step": 944 }, { "epoch": 1.4146706586826348, "grad_norm": 2.779745110895862, "learning_rate": 1.968644886197073e-06, "loss": 0.0672, "step": 945 }, { "epoch": 1.4161676646706587, "grad_norm": 3.1770316351777668, "learning_rate": 1.9593030624171683e-06, "loss": 0.0652, "step": 946 }, { "epoch": 1.4176646706586826, "grad_norm": 3.3542317664878003, "learning_rate": 1.949978052200658e-06, "loss": 0.0684, "step": 947 }, { "epoch": 1.4191616766467066, "grad_norm": 2.883351476132326, "learning_rate": 1.9406699071102774e-06, "loss": 0.0453, "step": 948 }, { "epoch": 1.4206586826347305, "grad_norm": 4.607525388337446, "learning_rate": 1.9313786786155077e-06, "loss": 0.0888, "step": 949 }, { "epoch": 1.4221556886227544, "grad_norm": 2.8832042847977193, "learning_rate": 1.9221044180922833e-06, "loss": 0.046, "step": 950 }, { "epoch": 1.4236526946107784, "grad_norm": 3.2753063197555075, "learning_rate": 1.9128471768227203e-06, "loss": 0.0623, "step": 951 }, { "epoch": 1.4251497005988023, "grad_norm": 3.1567952568782722, "learning_rate": 1.9036070059948253e-06, "loss": 0.0748, "step": 952 }, { "epoch": 1.4266467065868262, "grad_norm": 3.036789004265012, "learning_rate": 1.8943839567022126e-06, "loss": 0.0728, "step": 953 }, { "epoch": 1.4281437125748502, "grad_norm": 3.0718999649479057, "learning_rate": 1.885178079943823e-06, "loss": 0.0642, "step": 954 }, { "epoch": 1.4296407185628741, "grad_norm": 3.457935123458112, "learning_rate": 1.8759894266236423e-06, "loss": 0.0835, "step": 955 }, { "epoch": 1.4311377245508983, "grad_norm": 3.7184407452741155, "learning_rate": 1.866818047550419e-06, "loss": 0.0985, "step": 956 }, { "epoch": 1.4326347305389222, "grad_norm": 2.6254187851278585, "learning_rate": 1.8576639934373836e-06, "loss": 0.0443, "step": 957 }, { "epoch": 1.4341317365269461, "grad_norm": 3.2544054634559068, "learning_rate": 1.8485273149019655e-06, "loss": 0.0539, "step": 958 }, { "epoch": 1.43562874251497, "grad_norm": 4.734513385537045, "learning_rate": 1.8394080624655197e-06, "loss": 0.0596, "step": 959 }, { "epoch": 1.437125748502994, "grad_norm": 2.6234424357835633, "learning_rate": 1.8303062865530407e-06, "loss": 0.0614, "step": 960 }, { "epoch": 1.438622754491018, "grad_norm": 2.9705404040273042, "learning_rate": 1.8212220374928874e-06, "loss": 0.0689, "step": 961 }, { "epoch": 1.4401197604790419, "grad_norm": 2.6344892857133178, "learning_rate": 1.8121553655165058e-06, "loss": 0.0814, "step": 962 }, { "epoch": 1.4416167664670658, "grad_norm": 2.343394911238407, "learning_rate": 1.8031063207581434e-06, "loss": 0.0513, "step": 963 }, { "epoch": 1.4431137724550898, "grad_norm": 3.2546543885963346, "learning_rate": 1.7940749532545832e-06, "loss": 0.0712, "step": 964 }, { "epoch": 1.4446107784431137, "grad_norm": 3.4756248591494567, "learning_rate": 1.7850613129448597e-06, "loss": 0.0578, "step": 965 }, { "epoch": 1.4461077844311379, "grad_norm": 3.1840474284521108, "learning_rate": 1.7760654496699876e-06, "loss": 0.0731, "step": 966 }, { "epoch": 1.4476047904191618, "grad_norm": 2.8324379010795617, "learning_rate": 1.767087413172676e-06, "loss": 0.0608, "step": 967 }, { "epoch": 1.4491017964071857, "grad_norm": 2.7500528472857626, "learning_rate": 1.7581272530970666e-06, "loss": 0.0641, "step": 968 }, { "epoch": 1.4505988023952097, "grad_norm": 2.7778441342624607, "learning_rate": 1.749185018988454e-06, "loss": 0.0606, "step": 969 }, { "epoch": 1.4520958083832336, "grad_norm": 2.8879521669283497, "learning_rate": 1.7402607602930106e-06, "loss": 0.0541, "step": 970 }, { "epoch": 1.4535928143712575, "grad_norm": 2.931823987220164, "learning_rate": 1.731354526357507e-06, "loss": 0.0798, "step": 971 }, { "epoch": 1.4550898203592815, "grad_norm": 3.1000909688373617, "learning_rate": 1.7224663664290537e-06, "loss": 0.0643, "step": 972 }, { "epoch": 1.4565868263473054, "grad_norm": 3.8166857167738564, "learning_rate": 1.713596329654817e-06, "loss": 0.1024, "step": 973 }, { "epoch": 1.4580838323353293, "grad_norm": 2.7598971539943213, "learning_rate": 1.7047444650817518e-06, "loss": 0.0598, "step": 974 }, { "epoch": 1.4595808383233533, "grad_norm": 3.3047976783086277, "learning_rate": 1.6959108216563319e-06, "loss": 0.068, "step": 975 }, { "epoch": 1.4610778443113772, "grad_norm": 4.200423417297693, "learning_rate": 1.6870954482242707e-06, "loss": 0.0627, "step": 976 }, { "epoch": 1.4625748502994012, "grad_norm": 3.2231415110866477, "learning_rate": 1.6782983935302639e-06, "loss": 0.0659, "step": 977 }, { "epoch": 1.464071856287425, "grad_norm": 3.2330111115709217, "learning_rate": 1.669519706217711e-06, "loss": 0.0608, "step": 978 }, { "epoch": 1.465568862275449, "grad_norm": 2.2734010821939195, "learning_rate": 1.6607594348284512e-06, "loss": 0.0311, "step": 979 }, { "epoch": 1.467065868263473, "grad_norm": 3.0501719897773865, "learning_rate": 1.652017627802487e-06, "loss": 0.0637, "step": 980 }, { "epoch": 1.468562874251497, "grad_norm": 4.468268401218053, "learning_rate": 1.6432943334777273e-06, "loss": 0.0587, "step": 981 }, { "epoch": 1.4700598802395208, "grad_norm": 3.2746154677080432, "learning_rate": 1.6345896000897122e-06, "loss": 0.051, "step": 982 }, { "epoch": 1.471556886227545, "grad_norm": 3.210942435599958, "learning_rate": 1.6259034757713537e-06, "loss": 0.0865, "step": 983 }, { "epoch": 1.473053892215569, "grad_norm": 2.978054972606126, "learning_rate": 1.6172360085526567e-06, "loss": 0.0747, "step": 984 }, { "epoch": 1.4745508982035929, "grad_norm": 2.850940066216067, "learning_rate": 1.6085872463604678e-06, "loss": 0.0573, "step": 985 }, { "epoch": 1.4760479041916168, "grad_norm": 3.5538161945490794, "learning_rate": 1.5999572370182016e-06, "loss": 0.0627, "step": 986 }, { "epoch": 1.4775449101796407, "grad_norm": 2.5058704489844232, "learning_rate": 1.5913460282455807e-06, "loss": 0.0477, "step": 987 }, { "epoch": 1.4790419161676647, "grad_norm": 3.212456224731785, "learning_rate": 1.5827536676583643e-06, "loss": 0.0699, "step": 988 }, { "epoch": 1.4805389221556886, "grad_norm": 2.864523149333281, "learning_rate": 1.5741802027680963e-06, "loss": 0.0719, "step": 989 }, { "epoch": 1.4820359281437125, "grad_norm": 2.4673573652041467, "learning_rate": 1.5656256809818343e-06, "loss": 0.0404, "step": 990 }, { "epoch": 1.4835329341317365, "grad_norm": 3.060298788599623, "learning_rate": 1.5570901496018902e-06, "loss": 0.0715, "step": 991 }, { "epoch": 1.4850299401197604, "grad_norm": 2.5192366953545156, "learning_rate": 1.54857365582557e-06, "loss": 0.0572, "step": 992 }, { "epoch": 1.4865269461077844, "grad_norm": 3.3646641260266774, "learning_rate": 1.5400762467449048e-06, "loss": 0.0805, "step": 993 }, { "epoch": 1.4880239520958085, "grad_norm": 2.75628210745351, "learning_rate": 1.5315979693464039e-06, "loss": 0.0448, "step": 994 }, { "epoch": 1.4895209580838324, "grad_norm": 3.173603618898852, "learning_rate": 1.5231388705107842e-06, "loss": 0.0745, "step": 995 }, { "epoch": 1.4910179640718564, "grad_norm": 2.8196942107086316, "learning_rate": 1.5146989970127158e-06, "loss": 0.0503, "step": 996 }, { "epoch": 1.4925149700598803, "grad_norm": 3.480163629304613, "learning_rate": 1.5062783955205606e-06, "loss": 0.0659, "step": 997 }, { "epoch": 1.4940119760479043, "grad_norm": 2.8721298646353683, "learning_rate": 1.4978771125961177e-06, "loss": 0.0689, "step": 998 }, { "epoch": 1.4955089820359282, "grad_norm": 2.8080807069117144, "learning_rate": 1.4894951946943625e-06, "loss": 0.0539, "step": 999 }, { "epoch": 1.4970059880239521, "grad_norm": 2.631108540322156, "learning_rate": 1.4811326881631937e-06, "loss": 0.0472, "step": 1000 }, { "epoch": 1.4970059880239521, "eval_loss": 0.14734399318695068, "eval_runtime": 1.8212, "eval_samples_per_second": 29.651, "eval_steps_per_second": 7.687, "step": 1000 }, { "epoch": 1.498502994011976, "grad_norm": 3.0782258427282168, "learning_rate": 1.4727896392431701e-06, "loss": 0.0618, "step": 1001 }, { "epoch": 1.5, "grad_norm": 3.15505651598308, "learning_rate": 1.4644660940672628e-06, "loss": 0.0776, "step": 1002 }, { "epoch": 1.501497005988024, "grad_norm": 3.110527298449145, "learning_rate": 1.4561620986605968e-06, "loss": 0.0705, "step": 1003 }, { "epoch": 1.5029940119760479, "grad_norm": 2.575319261305866, "learning_rate": 1.4478776989401949e-06, "loss": 0.071, "step": 1004 }, { "epoch": 1.5044910179640718, "grad_norm": 3.948704737422377, "learning_rate": 1.4396129407147286e-06, "loss": 0.0707, "step": 1005 }, { "epoch": 1.5059880239520957, "grad_norm": 2.8458966280584126, "learning_rate": 1.4313678696842559e-06, "loss": 0.0535, "step": 1006 }, { "epoch": 1.5074850299401197, "grad_norm": 2.3134260304157555, "learning_rate": 1.4231425314399783e-06, "loss": 0.0632, "step": 1007 }, { "epoch": 1.5089820359281436, "grad_norm": 2.890655218686902, "learning_rate": 1.4149369714639856e-06, "loss": 0.0655, "step": 1008 }, { "epoch": 1.5104790419161676, "grad_norm": 3.255789605475453, "learning_rate": 1.4067512351289998e-06, "loss": 0.0629, "step": 1009 }, { "epoch": 1.5119760479041915, "grad_norm": 2.2895283799123107, "learning_rate": 1.3985853676981316e-06, "loss": 0.0631, "step": 1010 }, { "epoch": 1.5134730538922154, "grad_norm": 2.2603392588807325, "learning_rate": 1.390439414324623e-06, "loss": 0.0354, "step": 1011 }, { "epoch": 1.5149700598802394, "grad_norm": 2.715626496624367, "learning_rate": 1.3823134200516043e-06, "loss": 0.0529, "step": 1012 }, { "epoch": 1.5164670658682635, "grad_norm": 3.416256678375848, "learning_rate": 1.3742074298118403e-06, "loss": 0.0572, "step": 1013 }, { "epoch": 1.5179640718562875, "grad_norm": 2.6181338042309856, "learning_rate": 1.366121488427481e-06, "loss": 0.0536, "step": 1014 }, { "epoch": 1.5194610778443114, "grad_norm": 2.2870102169272006, "learning_rate": 1.3580556406098195e-06, "loss": 0.0632, "step": 1015 }, { "epoch": 1.5209580838323353, "grad_norm": 2.975645473986044, "learning_rate": 1.3500099309590397e-06, "loss": 0.0585, "step": 1016 }, { "epoch": 1.5224550898203593, "grad_norm": 2.9205550395989546, "learning_rate": 1.3419844039639723e-06, "loss": 0.0601, "step": 1017 }, { "epoch": 1.5239520958083832, "grad_norm": 2.749798460735675, "learning_rate": 1.3339791040018479e-06, "loss": 0.0654, "step": 1018 }, { "epoch": 1.5254491017964071, "grad_norm": 2.7267760374633774, "learning_rate": 1.3259940753380484e-06, "loss": 0.0559, "step": 1019 }, { "epoch": 1.5269461077844313, "grad_norm": 2.50636253157473, "learning_rate": 1.3180293621258694e-06, "loss": 0.0467, "step": 1020 }, { "epoch": 1.5284431137724552, "grad_norm": 2.9240253084905636, "learning_rate": 1.3100850084062694e-06, "loss": 0.0525, "step": 1021 }, { "epoch": 1.5299401197604792, "grad_norm": 2.908722643090983, "learning_rate": 1.3021610581076316e-06, "loss": 0.0506, "step": 1022 }, { "epoch": 1.531437125748503, "grad_norm": 2.391999043798078, "learning_rate": 1.2942575550455127e-06, "loss": 0.0596, "step": 1023 }, { "epoch": 1.532934131736527, "grad_norm": 2.9400981431186852, "learning_rate": 1.2863745429224145e-06, "loss": 0.0573, "step": 1024 }, { "epoch": 1.534431137724551, "grad_norm": 2.9353676304048992, "learning_rate": 1.2785120653275268e-06, "loss": 0.0536, "step": 1025 }, { "epoch": 1.535928143712575, "grad_norm": 3.3866174825512703, "learning_rate": 1.270670165736499e-06, "loss": 0.066, "step": 1026 }, { "epoch": 1.5374251497005988, "grad_norm": 3.1330311185289346, "learning_rate": 1.2628488875111878e-06, "loss": 0.0869, "step": 1027 }, { "epoch": 1.5389221556886228, "grad_norm": 2.7448614406818685, "learning_rate": 1.2550482738994284e-06, "loss": 0.0827, "step": 1028 }, { "epoch": 1.5404191616766467, "grad_norm": 2.9697208647085636, "learning_rate": 1.2472683680347914e-06, "loss": 0.086, "step": 1029 }, { "epoch": 1.5419161676646707, "grad_norm": 2.962461214957614, "learning_rate": 1.239509212936343e-06, "loss": 0.0496, "step": 1030 }, { "epoch": 1.5434131736526946, "grad_norm": 2.8172546404327354, "learning_rate": 1.2317708515084032e-06, "loss": 0.0508, "step": 1031 }, { "epoch": 1.5449101796407185, "grad_norm": 3.010933806019576, "learning_rate": 1.22405332654032e-06, "loss": 0.0538, "step": 1032 }, { "epoch": 1.5464071856287425, "grad_norm": 3.3676929777981535, "learning_rate": 1.216356680706222e-06, "loss": 0.0586, "step": 1033 }, { "epoch": 1.5479041916167664, "grad_norm": 3.346261972316183, "learning_rate": 1.2086809565647877e-06, "loss": 0.079, "step": 1034 }, { "epoch": 1.5494011976047903, "grad_norm": 3.714765974814343, "learning_rate": 1.2010261965590104e-06, "loss": 0.0732, "step": 1035 }, { "epoch": 1.5508982035928143, "grad_norm": 2.8392837013784584, "learning_rate": 1.1933924430159571e-06, "loss": 0.0391, "step": 1036 }, { "epoch": 1.5523952095808382, "grad_norm": 3.04686310566197, "learning_rate": 1.185779738146543e-06, "loss": 0.0618, "step": 1037 }, { "epoch": 1.5538922155688621, "grad_norm": 2.92117385305566, "learning_rate": 1.1781881240452958e-06, "loss": 0.0438, "step": 1038 }, { "epoch": 1.555389221556886, "grad_norm": 3.2608432816062356, "learning_rate": 1.170617642690121e-06, "loss": 0.07, "step": 1039 }, { "epoch": 1.55688622754491, "grad_norm": 2.7478502622897425, "learning_rate": 1.1630683359420653e-06, "loss": 0.0498, "step": 1040 }, { "epoch": 1.5583832335329342, "grad_norm": 2.923318127512918, "learning_rate": 1.1555402455450964e-06, "loss": 0.0591, "step": 1041 }, { "epoch": 1.5598802395209581, "grad_norm": 2.8815117120196807, "learning_rate": 1.1480334131258626e-06, "loss": 0.0523, "step": 1042 }, { "epoch": 1.561377245508982, "grad_norm": 3.538109646437672, "learning_rate": 1.1405478801934695e-06, "loss": 0.0734, "step": 1043 }, { "epoch": 1.562874251497006, "grad_norm": 3.3531926887113754, "learning_rate": 1.1330836881392405e-06, "loss": 0.0506, "step": 1044 }, { "epoch": 1.56437125748503, "grad_norm": 2.458749990348773, "learning_rate": 1.1256408782365008e-06, "loss": 0.0467, "step": 1045 }, { "epoch": 1.5658682634730539, "grad_norm": 3.1569569648592144, "learning_rate": 1.11821949164034e-06, "loss": 0.0592, "step": 1046 }, { "epoch": 1.5673652694610778, "grad_norm": 3.24163449495841, "learning_rate": 1.110819569387388e-06, "loss": 0.073, "step": 1047 }, { "epoch": 1.568862275449102, "grad_norm": 2.877134961548, "learning_rate": 1.103441152395588e-06, "loss": 0.0573, "step": 1048 }, { "epoch": 1.5703592814371259, "grad_norm": 2.7315848457440985, "learning_rate": 1.0960842814639666e-06, "loss": 0.0582, "step": 1049 }, { "epoch": 1.5718562874251498, "grad_norm": 2.7235075146268777, "learning_rate": 1.088748997272414e-06, "loss": 0.0521, "step": 1050 }, { "epoch": 1.5733532934131738, "grad_norm": 2.3498401998556364, "learning_rate": 1.081435340381457e-06, "loss": 0.0325, "step": 1051 }, { "epoch": 1.5748502994011977, "grad_norm": 3.575699475272151, "learning_rate": 1.0741433512320316e-06, "loss": 0.0691, "step": 1052 }, { "epoch": 1.5763473053892216, "grad_norm": 2.776410840180846, "learning_rate": 1.0668730701452634e-06, "loss": 0.0469, "step": 1053 }, { "epoch": 1.5778443113772456, "grad_norm": 3.354141313946311, "learning_rate": 1.0596245373222424e-06, "loss": 0.0819, "step": 1054 }, { "epoch": 1.5793413173652695, "grad_norm": 3.3882193260809226, "learning_rate": 1.0523977928438023e-06, "loss": 0.0921, "step": 1055 }, { "epoch": 1.5808383233532934, "grad_norm": 2.632451319852037, "learning_rate": 1.045192876670298e-06, "loss": 0.0531, "step": 1056 }, { "epoch": 1.5823353293413174, "grad_norm": 2.521479662646235, "learning_rate": 1.0380098286413809e-06, "loss": 0.0343, "step": 1057 }, { "epoch": 1.5838323353293413, "grad_norm": 2.9731872290885133, "learning_rate": 1.0308486884757868e-06, "loss": 0.0716, "step": 1058 }, { "epoch": 1.5853293413173652, "grad_norm": 2.75326974453219, "learning_rate": 1.0237094957711103e-06, "loss": 0.0533, "step": 1059 }, { "epoch": 1.5868263473053892, "grad_norm": 3.296537263445971, "learning_rate": 1.0165922900035886e-06, "loss": 0.0362, "step": 1060 }, { "epoch": 1.5883233532934131, "grad_norm": 2.457718881129073, "learning_rate": 1.0094971105278768e-06, "loss": 0.045, "step": 1061 }, { "epoch": 1.589820359281437, "grad_norm": 2.805366638088781, "learning_rate": 1.0024239965768417e-06, "loss": 0.0746, "step": 1062 }, { "epoch": 1.591317365269461, "grad_norm": 2.727931243853847, "learning_rate": 9.953729872613355e-07, "loss": 0.0858, "step": 1063 }, { "epoch": 1.592814371257485, "grad_norm": 2.613576837466677, "learning_rate": 9.883441215699824e-07, "loss": 0.0698, "step": 1064 }, { "epoch": 1.5943113772455089, "grad_norm": 3.187980163578163, "learning_rate": 9.813374383689645e-07, "loss": 0.0612, "step": 1065 }, { "epoch": 1.5958083832335328, "grad_norm": 3.7667457389510033, "learning_rate": 9.74352976401805e-07, "loss": 0.0758, "step": 1066 }, { "epoch": 1.5973053892215567, "grad_norm": 3.6041227185191693, "learning_rate": 9.673907742891542e-07, "loss": 0.073, "step": 1067 }, { "epoch": 1.5988023952095807, "grad_norm": 2.9913367486844527, "learning_rate": 9.604508705285765e-07, "loss": 0.0604, "step": 1068 }, { "epoch": 1.6002994011976048, "grad_norm": 2.7078055671750008, "learning_rate": 9.535333034943395e-07, "loss": 0.0587, "step": 1069 }, { "epoch": 1.6017964071856288, "grad_norm": 2.892738994583013, "learning_rate": 9.466381114371942e-07, "loss": 0.0527, "step": 1070 }, { "epoch": 1.6032934131736527, "grad_norm": 3.2033255447879396, "learning_rate": 9.397653324841738e-07, "loss": 0.0549, "step": 1071 }, { "epoch": 1.6047904191616766, "grad_norm": 3.552050968269877, "learning_rate": 9.329150046383773e-07, "loss": 0.0783, "step": 1072 }, { "epoch": 1.6062874251497006, "grad_norm": 3.2385085318988662, "learning_rate": 9.260871657787601e-07, "loss": 0.0494, "step": 1073 }, { "epoch": 1.6077844311377245, "grad_norm": 3.303464925612797, "learning_rate": 9.192818536599213e-07, "loss": 0.0633, "step": 1074 }, { "epoch": 1.6092814371257484, "grad_norm": 3.0207804383767343, "learning_rate": 9.124991059119021e-07, "loss": 0.0661, "step": 1075 }, { "epoch": 1.6107784431137726, "grad_norm": 2.4521917191366804, "learning_rate": 9.057389600399719e-07, "loss": 0.0483, "step": 1076 }, { "epoch": 1.6122754491017965, "grad_norm": 3.1392927415036245, "learning_rate": 8.99001453424424e-07, "loss": 0.0429, "step": 1077 }, { "epoch": 1.6137724550898205, "grad_norm": 2.762854496388237, "learning_rate": 8.922866233203681e-07, "loss": 0.0665, "step": 1078 }, { "epoch": 1.6152694610778444, "grad_norm": 2.8504356430269624, "learning_rate": 8.855945068575184e-07, "loss": 0.0547, "step": 1079 }, { "epoch": 1.6167664670658684, "grad_norm": 3.0782360334533747, "learning_rate": 8.789251410400024e-07, "loss": 0.0542, "step": 1080 }, { "epoch": 1.6182634730538923, "grad_norm": 3.0483925010574, "learning_rate": 8.722785627461439e-07, "loss": 0.0678, "step": 1081 }, { "epoch": 1.6197604790419162, "grad_norm": 2.8454618154844, "learning_rate": 8.65654808728259e-07, "loss": 0.0503, "step": 1082 }, { "epoch": 1.6212574850299402, "grad_norm": 2.673344129658176, "learning_rate": 8.590539156124628e-07, "loss": 0.0734, "step": 1083 }, { "epoch": 1.622754491017964, "grad_norm": 2.8075328392222896, "learning_rate": 8.524759198984567e-07, "loss": 0.0531, "step": 1084 }, { "epoch": 1.624251497005988, "grad_norm": 2.82493843037488, "learning_rate": 8.459208579593331e-07, "loss": 0.0641, "step": 1085 }, { "epoch": 1.625748502994012, "grad_norm": 2.463824462585041, "learning_rate": 8.393887660413719e-07, "loss": 0.0414, "step": 1086 }, { "epoch": 1.627245508982036, "grad_norm": 3.2916983539250126, "learning_rate": 8.328796802638373e-07, "loss": 0.0421, "step": 1087 }, { "epoch": 1.6287425149700598, "grad_norm": 2.958765635444716, "learning_rate": 8.263936366187825e-07, "loss": 0.0541, "step": 1088 }, { "epoch": 1.6302395209580838, "grad_norm": 2.967440980329771, "learning_rate": 8.199306709708505e-07, "loss": 0.0673, "step": 1089 }, { "epoch": 1.6317365269461077, "grad_norm": 2.9592323652987114, "learning_rate": 8.134908190570723e-07, "loss": 0.0627, "step": 1090 }, { "epoch": 1.6332335329341316, "grad_norm": 3.146495312506695, "learning_rate": 8.07074116486673e-07, "loss": 0.0512, "step": 1091 }, { "epoch": 1.6347305389221556, "grad_norm": 3.081402593776041, "learning_rate": 8.006805987408705e-07, "loss": 0.0658, "step": 1092 }, { "epoch": 1.6362275449101795, "grad_norm": 3.3777477576606874, "learning_rate": 7.943103011726821e-07, "loss": 0.0655, "step": 1093 }, { "epoch": 1.6377245508982035, "grad_norm": 3.1569646766109236, "learning_rate": 7.879632590067354e-07, "loss": 0.0494, "step": 1094 }, { "epoch": 1.6392215568862274, "grad_norm": 3.1940930306962945, "learning_rate": 7.816395073390581e-07, "loss": 0.0728, "step": 1095 }, { "epoch": 1.6407185628742516, "grad_norm": 3.240576619979089, "learning_rate": 7.753390811368972e-07, "loss": 0.0883, "step": 1096 }, { "epoch": 1.6422155688622755, "grad_norm": 2.5962751391117402, "learning_rate": 7.69062015238522e-07, "loss": 0.0475, "step": 1097 }, { "epoch": 1.6437125748502994, "grad_norm": 3.1361341463363575, "learning_rate": 7.628083443530287e-07, "loss": 0.0541, "step": 1098 }, { "epoch": 1.6452095808383234, "grad_norm": 3.119947049360168, "learning_rate": 7.565781030601526e-07, "loss": 0.0508, "step": 1099 }, { "epoch": 1.6467065868263473, "grad_norm": 2.7921810886304255, "learning_rate": 7.503713258100726e-07, "loss": 0.0679, "step": 1100 }, { "epoch": 1.6482035928143712, "grad_norm": 2.6500201499721316, "learning_rate": 7.441880469232244e-07, "loss": 0.074, "step": 1101 }, { "epoch": 1.6497005988023952, "grad_norm": 2.723815058698919, "learning_rate": 7.380283005901084e-07, "loss": 0.074, "step": 1102 }, { "epoch": 1.6511976047904193, "grad_norm": 3.266072358485103, "learning_rate": 7.318921208711044e-07, "loss": 0.0678, "step": 1103 }, { "epoch": 1.6526946107784433, "grad_norm": 3.3277777369322616, "learning_rate": 7.257795416962754e-07, "loss": 0.0762, "step": 1104 }, { "epoch": 1.6541916167664672, "grad_norm": 1.9972856848968734, "learning_rate": 7.19690596865188e-07, "loss": 0.0403, "step": 1105 }, { "epoch": 1.6556886227544911, "grad_norm": 2.500291908595557, "learning_rate": 7.136253200467231e-07, "loss": 0.0546, "step": 1106 }, { "epoch": 1.657185628742515, "grad_norm": 2.693251563506186, "learning_rate": 7.075837447788875e-07, "loss": 0.052, "step": 1107 }, { "epoch": 1.658682634730539, "grad_norm": 3.2231961597529306, "learning_rate": 7.015659044686307e-07, "loss": 0.0565, "step": 1108 }, { "epoch": 1.660179640718563, "grad_norm": 2.5787118334666546, "learning_rate": 6.955718323916594e-07, "loss": 0.0512, "step": 1109 }, { "epoch": 1.6616766467065869, "grad_norm": 3.315551286235718, "learning_rate": 6.896015616922535e-07, "loss": 0.0619, "step": 1110 }, { "epoch": 1.6631736526946108, "grad_norm": 2.634331097835363, "learning_rate": 6.836551253830836e-07, "loss": 0.0442, "step": 1111 }, { "epoch": 1.6646706586826348, "grad_norm": 2.9750552184908736, "learning_rate": 6.777325563450282e-07, "loss": 0.0691, "step": 1112 }, { "epoch": 1.6661676646706587, "grad_norm": 2.556168080553656, "learning_rate": 6.71833887326988e-07, "loss": 0.055, "step": 1113 }, { "epoch": 1.6676646706586826, "grad_norm": 3.052461540369427, "learning_rate": 6.659591509457125e-07, "loss": 0.0721, "step": 1114 }, { "epoch": 1.6691616766467066, "grad_norm": 2.93439361565189, "learning_rate": 6.601083796856139e-07, "loss": 0.0716, "step": 1115 }, { "epoch": 1.6706586826347305, "grad_norm": 3.1003324630727214, "learning_rate": 6.542816058985896e-07, "loss": 0.0732, "step": 1116 }, { "epoch": 1.6721556886227544, "grad_norm": 3.677060049977448, "learning_rate": 6.484788618038407e-07, "loss": 0.0552, "step": 1117 }, { "epoch": 1.6736526946107784, "grad_norm": 2.90686570137882, "learning_rate": 6.427001794876974e-07, "loss": 0.0798, "step": 1118 }, { "epoch": 1.6751497005988023, "grad_norm": 2.6798267490216907, "learning_rate": 6.369455909034406e-07, "loss": 0.0611, "step": 1119 }, { "epoch": 1.6766467065868262, "grad_norm": 2.7363552131080113, "learning_rate": 6.312151278711237e-07, "loss": 0.066, "step": 1120 }, { "epoch": 1.6781437125748502, "grad_norm": 3.149428633486324, "learning_rate": 6.255088220773986e-07, "loss": 0.0602, "step": 1121 }, { "epoch": 1.6796407185628741, "grad_norm": 2.945599741418467, "learning_rate": 6.198267050753387e-07, "loss": 0.0569, "step": 1122 }, { "epoch": 1.681137724550898, "grad_norm": 3.4044000569033557, "learning_rate": 6.141688082842645e-07, "loss": 0.0633, "step": 1123 }, { "epoch": 1.6826347305389222, "grad_norm": 3.5289531509718466, "learning_rate": 6.085351629895736e-07, "loss": 0.0722, "step": 1124 }, { "epoch": 1.6841317365269461, "grad_norm": 2.356538902722542, "learning_rate": 6.029258003425603e-07, "loss": 0.0506, "step": 1125 }, { "epoch": 1.68562874251497, "grad_norm": 3.042385473392552, "learning_rate": 5.973407513602514e-07, "loss": 0.0676, "step": 1126 }, { "epoch": 1.687125748502994, "grad_norm": 2.670933534540491, "learning_rate": 5.917800469252288e-07, "loss": 0.0595, "step": 1127 }, { "epoch": 1.688622754491018, "grad_norm": 2.6855496609939435, "learning_rate": 5.862437177854629e-07, "loss": 0.0454, "step": 1128 }, { "epoch": 1.6901197604790419, "grad_norm": 3.1340970768219183, "learning_rate": 5.807317945541407e-07, "loss": 0.0667, "step": 1129 }, { "epoch": 1.6916167664670658, "grad_norm": 2.5473935037731192, "learning_rate": 5.752443077094927e-07, "loss": 0.0522, "step": 1130 }, { "epoch": 1.69311377245509, "grad_norm": 3.2837325390213827, "learning_rate": 5.697812875946329e-07, "loss": 0.0588, "step": 1131 }, { "epoch": 1.694610778443114, "grad_norm": 2.5635115662533767, "learning_rate": 5.643427644173838e-07, "loss": 0.0384, "step": 1132 }, { "epoch": 1.6961077844311379, "grad_norm": 2.910159959492185, "learning_rate": 5.589287682501132e-07, "loss": 0.0789, "step": 1133 }, { "epoch": 1.6976047904191618, "grad_norm": 2.7661427214771415, "learning_rate": 5.535393290295643e-07, "loss": 0.0602, "step": 1134 }, { "epoch": 1.6991017964071857, "grad_norm": 3.517653202815707, "learning_rate": 5.481744765566937e-07, "loss": 0.0806, "step": 1135 }, { "epoch": 1.7005988023952097, "grad_norm": 2.857772014620174, "learning_rate": 5.428342404965076e-07, "loss": 0.052, "step": 1136 }, { "epoch": 1.7020958083832336, "grad_norm": 2.8216196099211386, "learning_rate": 5.375186503778929e-07, "loss": 0.0461, "step": 1137 }, { "epoch": 1.7035928143712575, "grad_norm": 3.9388670478839365, "learning_rate": 5.322277355934557e-07, "loss": 0.0782, "step": 1138 }, { "epoch": 1.7050898203592815, "grad_norm": 3.0953444804697052, "learning_rate": 5.269615253993615e-07, "loss": 0.0607, "step": 1139 }, { "epoch": 1.7065868263473054, "grad_norm": 4.180120610212145, "learning_rate": 5.217200489151714e-07, "loss": 0.0775, "step": 1140 }, { "epoch": 1.7080838323353293, "grad_norm": 3.7802093511335193, "learning_rate": 5.165033351236803e-07, "loss": 0.057, "step": 1141 }, { "epoch": 1.7095808383233533, "grad_norm": 3.073172127593169, "learning_rate": 5.113114128707592e-07, "loss": 0.0681, "step": 1142 }, { "epoch": 1.7110778443113772, "grad_norm": 3.67576262511222, "learning_rate": 5.061443108651909e-07, "loss": 0.0748, "step": 1143 }, { "epoch": 1.7125748502994012, "grad_norm": 3.1421726530663845, "learning_rate": 5.010020576785174e-07, "loss": 0.0626, "step": 1144 }, { "epoch": 1.714071856287425, "grad_norm": 2.551163985329783, "learning_rate": 4.958846817448776e-07, "loss": 0.0442, "step": 1145 }, { "epoch": 1.715568862275449, "grad_norm": 2.319453900826182, "learning_rate": 4.907922113608532e-07, "loss": 0.0357, "step": 1146 }, { "epoch": 1.717065868263473, "grad_norm": 2.6030666804694795, "learning_rate": 4.857246746853067e-07, "loss": 0.0704, "step": 1147 }, { "epoch": 1.718562874251497, "grad_norm": 3.382639120602234, "learning_rate": 4.806820997392325e-07, "loss": 0.0574, "step": 1148 }, { "epoch": 1.7200598802395208, "grad_norm": 2.564436066892517, "learning_rate": 4.7566451440559715e-07, "loss": 0.0465, "step": 1149 }, { "epoch": 1.7215568862275448, "grad_norm": 2.5568923263673113, "learning_rate": 4.7067194642919036e-07, "loss": 0.0448, "step": 1150 }, { "epoch": 1.7230538922155687, "grad_norm": 3.003529205912282, "learning_rate": 4.657044234164626e-07, "loss": 0.0879, "step": 1151 }, { "epoch": 1.7245508982035929, "grad_norm": 2.835188469518544, "learning_rate": 4.607619728353818e-07, "loss": 0.0544, "step": 1152 }, { "epoch": 1.7260479041916168, "grad_norm": 2.9269853375240062, "learning_rate": 4.5584462201527737e-07, "loss": 0.0642, "step": 1153 }, { "epoch": 1.7275449101796407, "grad_norm": 2.956743739607413, "learning_rate": 4.50952398146689e-07, "loss": 0.0649, "step": 1154 }, { "epoch": 1.7290419161676647, "grad_norm": 2.73991236968892, "learning_rate": 4.4608532828121445e-07, "loss": 0.056, "step": 1155 }, { "epoch": 1.7305389221556886, "grad_norm": 3.143837793544217, "learning_rate": 4.4124343933136525e-07, "loss": 0.0692, "step": 1156 }, { "epoch": 1.7320359281437125, "grad_norm": 2.3884982062262123, "learning_rate": 4.364267580704129e-07, "loss": 0.0545, "step": 1157 }, { "epoch": 1.7335329341317365, "grad_norm": 2.8282512858815525, "learning_rate": 4.3163531113224466e-07, "loss": 0.069, "step": 1158 }, { "epoch": 1.7350299401197606, "grad_norm": 3.1094816050058744, "learning_rate": 4.268691250112128e-07, "loss": 0.0763, "step": 1159 }, { "epoch": 1.7365269461077846, "grad_norm": 3.1020277022835905, "learning_rate": 4.221282260619891e-07, "loss": 0.0568, "step": 1160 }, { "epoch": 1.7380239520958085, "grad_norm": 3.7802721163415147, "learning_rate": 4.1741264049942055e-07, "loss": 0.0766, "step": 1161 }, { "epoch": 1.7395209580838324, "grad_norm": 2.4215221056123033, "learning_rate": 4.127223943983849e-07, "loss": 0.048, "step": 1162 }, { "epoch": 1.7410179640718564, "grad_norm": 2.7495749060515897, "learning_rate": 4.080575136936427e-07, "loss": 0.0634, "step": 1163 }, { "epoch": 1.7425149700598803, "grad_norm": 2.8190862146643036, "learning_rate": 4.03418024179697e-07, "loss": 0.0551, "step": 1164 }, { "epoch": 1.7440119760479043, "grad_norm": 2.4623224507674433, "learning_rate": 3.9880395151065174e-07, "loss": 0.0485, "step": 1165 }, { "epoch": 1.7455089820359282, "grad_norm": 4.478459023643514, "learning_rate": 3.9421532120006544e-07, "loss": 0.0718, "step": 1166 }, { "epoch": 1.7470059880239521, "grad_norm": 3.436219125624933, "learning_rate": 3.8965215862081627e-07, "loss": 0.0404, "step": 1167 }, { "epoch": 1.748502994011976, "grad_norm": 3.350042939008841, "learning_rate": 3.851144890049535e-07, "loss": 0.0644, "step": 1168 }, { "epoch": 1.75, "grad_norm": 2.962331362308292, "learning_rate": 3.8060233744356634e-07, "loss": 0.0677, "step": 1169 }, { "epoch": 1.751497005988024, "grad_norm": 3.066629382832869, "learning_rate": 3.761157288866418e-07, "loss": 0.0636, "step": 1170 }, { "epoch": 1.7529940119760479, "grad_norm": 2.8928122337879802, "learning_rate": 3.7165468814292504e-07, "loss": 0.0706, "step": 1171 }, { "epoch": 1.7544910179640718, "grad_norm": 3.9396721689530496, "learning_rate": 3.672192398797858e-07, "loss": 0.0788, "step": 1172 }, { "epoch": 1.7559880239520957, "grad_norm": 2.48980579545141, "learning_rate": 3.6280940862307603e-07, "loss": 0.0502, "step": 1173 }, { "epoch": 1.7574850299401197, "grad_norm": 2.6722113041619595, "learning_rate": 3.58425218757002e-07, "loss": 0.0431, "step": 1174 }, { "epoch": 1.7589820359281436, "grad_norm": 2.877963197221216, "learning_rate": 3.5406669452398455e-07, "loss": 0.0457, "step": 1175 }, { "epoch": 1.7604790419161676, "grad_norm": 3.345910377698391, "learning_rate": 3.497338600245254e-07, "loss": 0.0812, "step": 1176 }, { "epoch": 1.7619760479041915, "grad_norm": 2.7402207217319976, "learning_rate": 3.454267392170757e-07, "loss": 0.0601, "step": 1177 }, { "epoch": 1.7634730538922154, "grad_norm": 2.7557769405696937, "learning_rate": 3.4114535591790233e-07, "loss": 0.0634, "step": 1178 }, { "epoch": 1.7649700598802394, "grad_norm": 3.3084189049417403, "learning_rate": 3.3688973380095523e-07, "loss": 0.0752, "step": 1179 }, { "epoch": 1.7664670658682635, "grad_norm": 3.3663332937001127, "learning_rate": 3.326598963977395e-07, "loss": 0.074, "step": 1180 }, { "epoch": 1.7679640718562875, "grad_norm": 2.878378735484772, "learning_rate": 3.2845586709718105e-07, "loss": 0.0589, "step": 1181 }, { "epoch": 1.7694610778443114, "grad_norm": 3.075703561518641, "learning_rate": 3.242776691455013e-07, "loss": 0.0425, "step": 1182 }, { "epoch": 1.7709580838323353, "grad_norm": 2.6772768109783804, "learning_rate": 3.201253256460868e-07, "loss": 0.0382, "step": 1183 }, { "epoch": 1.7724550898203593, "grad_norm": 2.7481206206912394, "learning_rate": 3.159988595593616e-07, "loss": 0.0468, "step": 1184 }, { "epoch": 1.7739520958083832, "grad_norm": 2.7001361390700267, "learning_rate": 3.1189829370266045e-07, "loss": 0.0603, "step": 1185 }, { "epoch": 1.7754491017964071, "grad_norm": 3.400047170682373, "learning_rate": 3.078236507501015e-07, "loss": 0.0724, "step": 1186 }, { "epoch": 1.7769461077844313, "grad_norm": 3.411912479312659, "learning_rate": 3.03774953232463e-07, "loss": 0.0698, "step": 1187 }, { "epoch": 1.7784431137724552, "grad_norm": 2.2264142987153726, "learning_rate": 2.9975222353705757e-07, "loss": 0.0402, "step": 1188 }, { "epoch": 1.7799401197604792, "grad_norm": 3.0420466800984722, "learning_rate": 2.957554839076088e-07, "loss": 0.0681, "step": 1189 }, { "epoch": 1.781437125748503, "grad_norm": 2.58688549761168, "learning_rate": 2.917847564441256e-07, "loss": 0.0628, "step": 1190 }, { "epoch": 1.782934131736527, "grad_norm": 2.8954845097631026, "learning_rate": 2.878400631027861e-07, "loss": 0.0824, "step": 1191 }, { "epoch": 1.784431137724551, "grad_norm": 3.2131726719583433, "learning_rate": 2.839214256958106e-07, "loss": 0.0763, "step": 1192 }, { "epoch": 1.785928143712575, "grad_norm": 2.4798089357735567, "learning_rate": 2.8002886589134305e-07, "loss": 0.0484, "step": 1193 }, { "epoch": 1.7874251497005988, "grad_norm": 2.7926578037644862, "learning_rate": 2.7616240521332884e-07, "loss": 0.0678, "step": 1194 }, { "epoch": 1.7889221556886228, "grad_norm": 3.2859474094269507, "learning_rate": 2.7232206504140214e-07, "loss": 0.0682, "step": 1195 }, { "epoch": 1.7904191616766467, "grad_norm": 2.4269089476202415, "learning_rate": 2.6850786661076047e-07, "loss": 0.0422, "step": 1196 }, { "epoch": 1.7919161676646707, "grad_norm": 3.212207867470359, "learning_rate": 2.647198310120519e-07, "loss": 0.0684, "step": 1197 }, { "epoch": 1.7934131736526946, "grad_norm": 3.2624005833210488, "learning_rate": 2.6095797919125533e-07, "loss": 0.0568, "step": 1198 }, { "epoch": 1.7949101796407185, "grad_norm": 2.759443734850592, "learning_rate": 2.5722233194956815e-07, "loss": 0.0504, "step": 1199 }, { "epoch": 1.7964071856287425, "grad_norm": 2.7395142626595566, "learning_rate": 2.5351290994328703e-07, "loss": 0.0448, "step": 1200 }, { "epoch": 1.7979041916167664, "grad_norm": 2.9223865936598807, "learning_rate": 2.49829733683698e-07, "loss": 0.0696, "step": 1201 }, { "epoch": 1.7994011976047903, "grad_norm": 1.9625077424786044, "learning_rate": 2.4617282353696093e-07, "loss": 0.0326, "step": 1202 }, { "epoch": 1.8008982035928143, "grad_norm": 2.8100764430480023, "learning_rate": 2.425421997239946e-07, "loss": 0.0524, "step": 1203 }, { "epoch": 1.8023952095808382, "grad_norm": 2.78821566924981, "learning_rate": 2.3893788232036807e-07, "loss": 0.0689, "step": 1204 }, { "epoch": 1.8038922155688621, "grad_norm": 2.5883629350823654, "learning_rate": 2.353598912561922e-07, "loss": 0.0759, "step": 1205 }, { "epoch": 1.805389221556886, "grad_norm": 2.690970858888366, "learning_rate": 2.318082463160032e-07, "loss": 0.0672, "step": 1206 }, { "epoch": 1.80688622754491, "grad_norm": 2.498478481099843, "learning_rate": 2.282829671386544e-07, "loss": 0.0526, "step": 1207 }, { "epoch": 1.8083832335329342, "grad_norm": 3.7659501854372794, "learning_rate": 2.2478407321721295e-07, "loss": 0.0756, "step": 1208 }, { "epoch": 1.8098802395209581, "grad_norm": 2.486365958300414, "learning_rate": 2.213115838988461e-07, "loss": 0.0522, "step": 1209 }, { "epoch": 1.811377245508982, "grad_norm": 3.0074873683589005, "learning_rate": 2.1786551838471892e-07, "loss": 0.042, "step": 1210 }, { "epoch": 1.812874251497006, "grad_norm": 2.9606089982151227, "learning_rate": 2.1444589572988228e-07, "loss": 0.0743, "step": 1211 }, { "epoch": 1.81437125748503, "grad_norm": 2.5161829934382722, "learning_rate": 2.1105273484317402e-07, "loss": 0.0402, "step": 1212 }, { "epoch": 1.8158682634730539, "grad_norm": 2.376889099222581, "learning_rate": 2.0768605448711066e-07, "loss": 0.0364, "step": 1213 }, { "epoch": 1.8173652694610778, "grad_norm": 2.4980227639347823, "learning_rate": 2.043458732777831e-07, "loss": 0.0504, "step": 1214 }, { "epoch": 1.818862275449102, "grad_norm": 2.6046276716169134, "learning_rate": 2.0103220968475778e-07, "loss": 0.0515, "step": 1215 }, { "epoch": 1.8203592814371259, "grad_norm": 2.6910029194274174, "learning_rate": 1.9774508203096843e-07, "loss": 0.0522, "step": 1216 }, { "epoch": 1.8218562874251498, "grad_norm": 2.249433651691868, "learning_rate": 1.944845084926189e-07, "loss": 0.0514, "step": 1217 }, { "epoch": 1.8233532934131738, "grad_norm": 2.737832804991365, "learning_rate": 1.9125050709908388e-07, "loss": 0.0387, "step": 1218 }, { "epoch": 1.8248502994011977, "grad_norm": 2.6556392611291866, "learning_rate": 1.8804309573280498e-07, "loss": 0.0612, "step": 1219 }, { "epoch": 1.8263473053892216, "grad_norm": 3.25102885765781, "learning_rate": 1.8486229212919482e-07, "loss": 0.0615, "step": 1220 }, { "epoch": 1.8278443113772456, "grad_norm": 2.913144887931625, "learning_rate": 1.8170811387653753e-07, "loss": 0.0543, "step": 1221 }, { "epoch": 1.8293413173652695, "grad_norm": 2.413254003111924, "learning_rate": 1.7858057841589281e-07, "loss": 0.0413, "step": 1222 }, { "epoch": 1.8308383233532934, "grad_norm": 3.145258662057892, "learning_rate": 1.7547970304099937e-07, "loss": 0.0706, "step": 1223 }, { "epoch": 1.8323353293413174, "grad_norm": 3.0834762062842263, "learning_rate": 1.7240550489817652e-07, "loss": 0.0548, "step": 1224 }, { "epoch": 1.8338323353293413, "grad_norm": 2.6334069376016553, "learning_rate": 1.6935800098623334e-07, "loss": 0.0647, "step": 1225 }, { "epoch": 1.8353293413173652, "grad_norm": 3.4495008526357416, "learning_rate": 1.66337208156373e-07, "loss": 0.0603, "step": 1226 }, { "epoch": 1.8368263473053892, "grad_norm": 2.931625269233891, "learning_rate": 1.6334314311209854e-07, "loss": 0.0622, "step": 1227 }, { "epoch": 1.8383233532934131, "grad_norm": 2.272721568859326, "learning_rate": 1.6037582240912175e-07, "loss": 0.0424, "step": 1228 }, { "epoch": 1.839820359281437, "grad_norm": 2.909238760991961, "learning_rate": 1.5743526245527108e-07, "loss": 0.0608, "step": 1229 }, { "epoch": 1.841317365269461, "grad_norm": 3.13535430335616, "learning_rate": 1.5452147951040165e-07, "loss": 0.0662, "step": 1230 }, { "epoch": 1.842814371257485, "grad_norm": 3.0990251995686844, "learning_rate": 1.5163448968630534e-07, "loss": 0.0546, "step": 1231 }, { "epoch": 1.8443113772455089, "grad_norm": 3.3732323824353876, "learning_rate": 1.4877430894662037e-07, "loss": 0.0825, "step": 1232 }, { "epoch": 1.8458083832335328, "grad_norm": 2.5422736540757405, "learning_rate": 1.459409531067435e-07, "loss": 0.0445, "step": 1233 }, { "epoch": 1.8473053892215567, "grad_norm": 2.8681975896933416, "learning_rate": 1.4313443783374405e-07, "loss": 0.0522, "step": 1234 }, { "epoch": 1.8488023952095807, "grad_norm": 2.8355153361954306, "learning_rate": 1.40354778646275e-07, "loss": 0.0637, "step": 1235 }, { "epoch": 1.8502994011976048, "grad_norm": 3.56468420966407, "learning_rate": 1.3760199091449045e-07, "loss": 0.0748, "step": 1236 }, { "epoch": 1.8517964071856288, "grad_norm": 2.8048851667916286, "learning_rate": 1.3487608985995494e-07, "loss": 0.0456, "step": 1237 }, { "epoch": 1.8532934131736527, "grad_norm": 2.9520844576967464, "learning_rate": 1.3217709055556638e-07, "loss": 0.0645, "step": 1238 }, { "epoch": 1.8547904191616766, "grad_norm": 2.6773279887032633, "learning_rate": 1.2950500792546726e-07, "loss": 0.0505, "step": 1239 }, { "epoch": 1.8562874251497006, "grad_norm": 3.866345468220948, "learning_rate": 1.268598567449647e-07, "loss": 0.0677, "step": 1240 }, { "epoch": 1.8577844311377245, "grad_norm": 3.504613103412614, "learning_rate": 1.2424165164044822e-07, "loss": 0.081, "step": 1241 }, { "epoch": 1.8592814371257484, "grad_norm": 2.7987430845118415, "learning_rate": 1.2165040708930763e-07, "loss": 0.0706, "step": 1242 }, { "epoch": 1.8607784431137726, "grad_norm": 3.177758219382234, "learning_rate": 1.1908613741985542e-07, "loss": 0.0639, "step": 1243 }, { "epoch": 1.8622754491017965, "grad_norm": 2.9356857253244657, "learning_rate": 1.1654885681124661e-07, "loss": 0.0625, "step": 1244 }, { "epoch": 1.8637724550898205, "grad_norm": 2.6765756917818226, "learning_rate": 1.1403857929339845e-07, "loss": 0.0485, "step": 1245 }, { "epoch": 1.8652694610778444, "grad_norm": 3.311429967924805, "learning_rate": 1.1155531874691372e-07, "loss": 0.0593, "step": 1246 }, { "epoch": 1.8667664670658684, "grad_norm": 2.8710598988734994, "learning_rate": 1.0909908890300747e-07, "loss": 0.0425, "step": 1247 }, { "epoch": 1.8682634730538923, "grad_norm": 3.3558622220288528, "learning_rate": 1.0666990334342708e-07, "loss": 0.0526, "step": 1248 }, { "epoch": 1.8697604790419162, "grad_norm": 2.6556066824124858, "learning_rate": 1.042677755003757e-07, "loss": 0.0463, "step": 1249 }, { "epoch": 1.8712574850299402, "grad_norm": 2.887672748634312, "learning_rate": 1.0189271865644445e-07, "loss": 0.0477, "step": 1250 }, { "epoch": 1.872754491017964, "grad_norm": 2.3505406929010126, "learning_rate": 9.954474594453256e-08, "loss": 0.0449, "step": 1251 }, { "epoch": 1.874251497005988, "grad_norm": 2.4005302706487086, "learning_rate": 9.722387034777847e-08, "loss": 0.0634, "step": 1252 }, { "epoch": 1.875748502994012, "grad_norm": 2.83893564128621, "learning_rate": 9.493010469948605e-08, "loss": 0.0632, "step": 1253 }, { "epoch": 1.877245508982036, "grad_norm": 3.21827486766699, "learning_rate": 9.266346168305518e-08, "loss": 0.0604, "step": 1254 }, { "epoch": 1.8787425149700598, "grad_norm": 2.3375814620311273, "learning_rate": 9.042395383191016e-08, "loss": 0.0414, "step": 1255 }, { "epoch": 1.8802395209580838, "grad_norm": 2.904229280242883, "learning_rate": 8.821159352943142e-08, "loss": 0.0634, "step": 1256 }, { "epoch": 1.8817365269461077, "grad_norm": 3.5302290917857104, "learning_rate": 8.602639300888783e-08, "loss": 0.0469, "step": 1257 }, { "epoch": 1.8832335329341316, "grad_norm": 2.7783546173770652, "learning_rate": 8.38683643533661e-08, "loss": 0.0762, "step": 1258 }, { "epoch": 1.8847305389221556, "grad_norm": 2.8307563708530643, "learning_rate": 8.173751949570651e-08, "loss": 0.0642, "step": 1259 }, { "epoch": 1.8862275449101795, "grad_norm": 2.197485735775503, "learning_rate": 7.963387021843683e-08, "loss": 0.0463, "step": 1260 }, { "epoch": 1.8877245508982035, "grad_norm": 2.4625741623003123, "learning_rate": 7.755742815370726e-08, "loss": 0.0571, "step": 1261 }, { "epoch": 1.8892215568862274, "grad_norm": 2.378872260524273, "learning_rate": 7.550820478322285e-08, "loss": 0.0526, "step": 1262 }, { "epoch": 1.8907185628742516, "grad_norm": 3.055077390578523, "learning_rate": 7.348621143818512e-08, "loss": 0.0523, "step": 1263 }, { "epoch": 1.8922155688622755, "grad_norm": 2.6314783527452468, "learning_rate": 7.149145929922607e-08, "loss": 0.063, "step": 1264 }, { "epoch": 1.8937125748502994, "grad_norm": 2.83642750182173, "learning_rate": 6.952395939634648e-08, "loss": 0.0714, "step": 1265 }, { "epoch": 1.8952095808383234, "grad_norm": 4.117659933046145, "learning_rate": 6.758372260885714e-08, "loss": 0.0933, "step": 1266 }, { "epoch": 1.8967065868263473, "grad_norm": 2.99840904293613, "learning_rate": 6.56707596653161e-08, "loss": 0.0668, "step": 1267 }, { "epoch": 1.8982035928143712, "grad_norm": 2.9996538104612207, "learning_rate": 6.378508114346982e-08, "loss": 0.0596, "step": 1268 }, { "epoch": 1.8997005988023952, "grad_norm": 3.1060636884781534, "learning_rate": 6.192669747019653e-08, "loss": 0.0621, "step": 1269 }, { "epoch": 1.9011976047904193, "grad_norm": 3.1006681711903052, "learning_rate": 6.009561892144744e-08, "loss": 0.0617, "step": 1270 }, { "epoch": 1.9026946107784433, "grad_norm": 2.7410047588592974, "learning_rate": 5.829185562218898e-08, "loss": 0.0867, "step": 1271 }, { "epoch": 1.9041916167664672, "grad_norm": 2.7739888608582937, "learning_rate": 5.651541754634726e-08, "loss": 0.0642, "step": 1272 }, { "epoch": 1.9056886227544911, "grad_norm": 2.6826490950480513, "learning_rate": 5.476631451675429e-08, "loss": 0.045, "step": 1273 }, { "epoch": 1.907185628742515, "grad_norm": 2.6899354132405864, "learning_rate": 5.304455620509297e-08, "loss": 0.0525, "step": 1274 }, { "epoch": 1.908682634730539, "grad_norm": 2.5644691382232603, "learning_rate": 5.1350152131842136e-08, "loss": 0.0413, "step": 1275 }, { "epoch": 1.910179640718563, "grad_norm": 2.750699881074954, "learning_rate": 4.968311166622553e-08, "loss": 0.0728, "step": 1276 }, { "epoch": 1.9116766467065869, "grad_norm": 2.573906695056992, "learning_rate": 4.804344402616012e-08, "loss": 0.0581, "step": 1277 }, { "epoch": 1.9131736526946108, "grad_norm": 2.8307089301135053, "learning_rate": 4.643115827820399e-08, "loss": 0.0668, "step": 1278 }, { "epoch": 1.9146706586826348, "grad_norm": 3.14667794161904, "learning_rate": 4.484626333750686e-08, "loss": 0.0614, "step": 1279 }, { "epoch": 1.9161676646706587, "grad_norm": 2.226316798980247, "learning_rate": 4.328876796776071e-08, "loss": 0.0403, "step": 1280 }, { "epoch": 1.9176646706586826, "grad_norm": 2.921773783319529, "learning_rate": 4.1758680781151526e-08, "loss": 0.0776, "step": 1281 }, { "epoch": 1.9191616766467066, "grad_norm": 2.644015193250244, "learning_rate": 4.0256010238310936e-08, "loss": 0.0693, "step": 1282 }, { "epoch": 1.9206586826347305, "grad_norm": 2.744971616389126, "learning_rate": 3.87807646482713e-08, "loss": 0.0748, "step": 1283 }, { "epoch": 1.9221556886227544, "grad_norm": 2.6823147170872574, "learning_rate": 3.733295216841626e-08, "loss": 0.0447, "step": 1284 }, { "epoch": 1.9236526946107784, "grad_norm": 2.502092433748008, "learning_rate": 3.5912580804439714e-08, "loss": 0.0486, "step": 1285 }, { "epoch": 1.9251497005988023, "grad_norm": 2.876104157194026, "learning_rate": 3.451965841029914e-08, "loss": 0.0452, "step": 1286 }, { "epoch": 1.9266467065868262, "grad_norm": 3.0160095014714337, "learning_rate": 3.3154192688172324e-08, "loss": 0.0532, "step": 1287 }, { "epoch": 1.9281437125748502, "grad_norm": 3.0301528910102458, "learning_rate": 3.181619118841517e-08, "loss": 0.0457, "step": 1288 }, { "epoch": 1.9296407185628741, "grad_norm": 2.7880466849139416, "learning_rate": 3.050566130952004e-08, "loss": 0.0425, "step": 1289 }, { "epoch": 1.931137724550898, "grad_norm": 2.255561476135322, "learning_rate": 2.9222610298074717e-08, "loss": 0.0544, "step": 1290 }, { "epoch": 1.9326347305389222, "grad_norm": 3.382019379937165, "learning_rate": 2.7967045248722956e-08, "loss": 0.0934, "step": 1291 }, { "epoch": 1.9341317365269461, "grad_norm": 3.051139892769032, "learning_rate": 2.673897310412288e-08, "loss": 0.0731, "step": 1292 }, { "epoch": 1.93562874251497, "grad_norm": 2.9572437332172634, "learning_rate": 2.5538400654911977e-08, "loss": 0.0633, "step": 1293 }, { "epoch": 1.937125748502994, "grad_norm": 2.4319304238512465, "learning_rate": 2.4365334539667717e-08, "loss": 0.0424, "step": 1294 }, { "epoch": 1.938622754491018, "grad_norm": 2.4798116536302923, "learning_rate": 2.321978124486979e-08, "loss": 0.0456, "step": 1295 }, { "epoch": 1.9401197604790419, "grad_norm": 3.0259338849989326, "learning_rate": 2.210174710486679e-08, "loss": 0.056, "step": 1296 }, { "epoch": 1.9416167664670658, "grad_norm": 2.8508919688602363, "learning_rate": 2.1011238301839044e-08, "loss": 0.0664, "step": 1297 }, { "epoch": 1.94311377245509, "grad_norm": 2.559656985705334, "learning_rate": 1.99482608657664e-08, "loss": 0.0537, "step": 1298 }, { "epoch": 1.944610778443114, "grad_norm": 2.413483966548776, "learning_rate": 1.8912820674392152e-08, "loss": 0.0542, "step": 1299 }, { "epoch": 1.9461077844311379, "grad_norm": 2.7505972474866422, "learning_rate": 1.7904923453193056e-08, "loss": 0.0693, "step": 1300 }, { "epoch": 1.9476047904191618, "grad_norm": 2.940023522066428, "learning_rate": 1.6924574775347147e-08, "loss": 0.0677, "step": 1301 }, { "epoch": 1.9491017964071857, "grad_norm": 2.742449457026677, "learning_rate": 1.5971780061701524e-08, "loss": 0.052, "step": 1302 }, { "epoch": 1.9505988023952097, "grad_norm": 2.6607737253318597, "learning_rate": 1.504654458074406e-08, "loss": 0.0353, "step": 1303 }, { "epoch": 1.9520958083832336, "grad_norm": 2.991844051735187, "learning_rate": 1.4148873448573408e-08, "loss": 0.0612, "step": 1304 }, { "epoch": 1.9535928143712575, "grad_norm": 2.104479264933718, "learning_rate": 1.3278771628870702e-08, "loss": 0.0367, "step": 1305 }, { "epoch": 1.9550898203592815, "grad_norm": 3.4937781239383474, "learning_rate": 1.2436243932872349e-08, "loss": 0.072, "step": 1306 }, { "epoch": 1.9565868263473054, "grad_norm": 3.074910239773256, "learning_rate": 1.1621295019343948e-08, "loss": 0.0782, "step": 1307 }, { "epoch": 1.9580838323353293, "grad_norm": 2.398858615290936, "learning_rate": 1.0833929394552523e-08, "loss": 0.0412, "step": 1308 }, { "epoch": 1.9595808383233533, "grad_norm": 3.1718207942082977, "learning_rate": 1.0074151412245436e-08, "loss": 0.071, "step": 1309 }, { "epoch": 1.9610778443113772, "grad_norm": 2.5100575138788814, "learning_rate": 9.341965273621522e-09, "loss": 0.0425, "step": 1310 }, { "epoch": 1.9625748502994012, "grad_norm": 2.6381123248533656, "learning_rate": 8.637375027311102e-09, "loss": 0.0577, "step": 1311 }, { "epoch": 1.964071856287425, "grad_norm": 3.354412421125275, "learning_rate": 7.96038456935322e-09, "loss": 0.0719, "step": 1312 }, { "epoch": 1.965568862275449, "grad_norm": 3.147501751287912, "learning_rate": 7.31099764317289e-09, "loss": 0.0573, "step": 1313 }, { "epoch": 1.967065868263473, "grad_norm": 3.5585445921102354, "learning_rate": 6.6892178395611125e-09, "loss": 0.0671, "step": 1314 }, { "epoch": 1.968562874251497, "grad_norm": 3.5082707312521344, "learning_rate": 6.09504859665655e-09, "loss": 0.0574, "step": 1315 }, { "epoch": 1.9700598802395208, "grad_norm": 2.909715032553554, "learning_rate": 5.528493199922769e-09, "loss": 0.063, "step": 1316 }, { "epoch": 1.9715568862275448, "grad_norm": 3.1545288257155564, "learning_rate": 4.989554782133809e-09, "loss": 0.06, "step": 1317 }, { "epoch": 1.9730538922155687, "grad_norm": 2.575581626048881, "learning_rate": 4.478236323355312e-09, "loss": 0.0531, "step": 1318 }, { "epoch": 1.9745508982035929, "grad_norm": 3.130640305231653, "learning_rate": 3.994540650926748e-09, "loss": 0.0593, "step": 1319 }, { "epoch": 1.9760479041916168, "grad_norm": 3.0569533115251897, "learning_rate": 3.538470439448105e-09, "loss": 0.058, "step": 1320 }, { "epoch": 1.9775449101796407, "grad_norm": 2.9294976254102285, "learning_rate": 3.1100282107654477e-09, "loss": 0.0451, "step": 1321 }, { "epoch": 1.9790419161676647, "grad_norm": 2.893740365397889, "learning_rate": 2.709216333952602e-09, "loss": 0.0617, "step": 1322 }, { "epoch": 1.9805389221556886, "grad_norm": 2.9910701165670583, "learning_rate": 2.336037025303939e-09, "loss": 0.0743, "step": 1323 }, { "epoch": 1.9820359281437125, "grad_norm": 2.565018277198831, "learning_rate": 1.9904923483171632e-09, "loss": 0.0449, "step": 1324 }, { "epoch": 1.9835329341317365, "grad_norm": 2.750208104621821, "learning_rate": 1.6725842136855464e-09, "loss": 0.0671, "step": 1325 }, { "epoch": 1.9850299401197606, "grad_norm": 3.819642577430509, "learning_rate": 1.3823143792851545e-09, "loss": 0.0805, "step": 1326 }, { "epoch": 1.9865269461077846, "grad_norm": 3.2536455123653263, "learning_rate": 1.1196844501654148e-09, "loss": 0.0604, "step": 1327 }, { "epoch": 1.9880239520958085, "grad_norm": 1.967015792897722, "learning_rate": 8.846958785418969e-10, "loss": 0.0459, "step": 1328 }, { "epoch": 1.9895209580838324, "grad_norm": 2.031806526479593, "learning_rate": 6.773499637857672e-10, "loss": 0.0387, "step": 1329 }, { "epoch": 1.9910179640718564, "grad_norm": 2.681355391773002, "learning_rate": 4.97647852417682e-10, "loss": 0.0422, "step": 1330 }, { "epoch": 1.9925149700598803, "grad_norm": 2.6691809074388515, "learning_rate": 3.4559053810334643e-10, "loss": 0.0619, "step": 1331 }, { "epoch": 1.9940119760479043, "grad_norm": 3.1131192666970353, "learning_rate": 2.2117886164407797e-10, "loss": 0.0526, "step": 1332 }, { "epoch": 1.9955089820359282, "grad_norm": 2.4974887816655524, "learning_rate": 1.2441351097680632e-10, "loss": 0.0546, "step": 1333 }, { "epoch": 1.9970059880239521, "grad_norm": 2.705550673123676, "learning_rate": 5.529502116519148e-11, "loss": 0.0482, "step": 1334 }, { "epoch": 1.998502994011976, "grad_norm": 3.4605700328683953, "learning_rate": 1.3823774400734124e-11, "loss": 0.0707, "step": 1335 }, { "epoch": 2.0, "grad_norm": 2.4636681399369373, "learning_rate": 0.0, "loss": 0.0341, "step": 1336 }, { "epoch": 2.0, "step": 1336, "total_flos": 3161199415296.0, "train_loss": 0.10941897042396807, "train_runtime": 823.8564, "train_samples_per_second": 12.961, "train_steps_per_second": 1.622 } ], "logging_steps": 1, "max_steps": 1336, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3161199415296.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }