{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1876862199213282, "eval_steps": 375, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001251241466142188, "grad_norm": 0.15988914668560028, "learning_rate": 2e-05, "loss": 6.935, "step": 1 }, { "epoch": 0.0002502482932284376, "grad_norm": 0.10679785162210464, "learning_rate": 4e-05, "loss": 6.9369, "step": 2 }, { "epoch": 0.0003753724398426564, "grad_norm": 0.07426733523607254, "learning_rate": 6e-05, "loss": 6.9349, "step": 3 }, { "epoch": 0.0005004965864568752, "grad_norm": 0.1115022748708725, "learning_rate": 8e-05, "loss": 6.934, "step": 4 }, { "epoch": 0.000625620733071094, "grad_norm": 0.06209784746170044, "learning_rate": 0.0001, "loss": 6.9345, "step": 5 }, { "epoch": 0.0007507448796853128, "grad_norm": 0.054790958762168884, "learning_rate": 0.00012, "loss": 6.9332, "step": 6 }, { "epoch": 0.0008758690262995316, "grad_norm": 0.05543128401041031, "learning_rate": 0.00014, "loss": 6.9344, "step": 7 }, { "epoch": 0.0010009931729137503, "grad_norm": 0.05524062737822533, "learning_rate": 0.00016, "loss": 6.933, "step": 8 }, { "epoch": 0.0011261173195279692, "grad_norm": 0.05517721176147461, "learning_rate": 0.00018, "loss": 6.9323, "step": 9 }, { "epoch": 0.001251241466142188, "grad_norm": 0.0591803677380085, "learning_rate": 0.0002, "loss": 6.9306, "step": 10 }, { "epoch": 0.0013763656127564066, "grad_norm": 0.056528717279434204, "learning_rate": 0.00019999977772170748, "loss": 6.9318, "step": 11 }, { "epoch": 0.0015014897593706256, "grad_norm": 0.057307060807943344, "learning_rate": 0.00019999911088781805, "loss": 6.9311, "step": 12 }, { "epoch": 0.0016266139059848443, "grad_norm": 0.06184515729546547, "learning_rate": 0.0001999979995012962, "loss": 6.9303, "step": 13 }, { "epoch": 0.0017517380525990632, "grad_norm": 0.06591636687517166, "learning_rate": 0.00019999644356708261, "loss": 6.929, "step": 14 }, { "epoch": 0.001876862199213282, "grad_norm": 0.06827985495328903, "learning_rate": 0.00019999444309209432, "loss": 6.9279, "step": 15 }, { "epoch": 0.0020019863458275006, "grad_norm": 0.07131294161081314, "learning_rate": 0.0001999919980852246, "loss": 6.9295, "step": 16 }, { "epoch": 0.0021271104924417195, "grad_norm": 0.07191609591245651, "learning_rate": 0.00019998910855734288, "loss": 6.9286, "step": 17 }, { "epoch": 0.0022522346390559385, "grad_norm": 0.07154690474271774, "learning_rate": 0.0001999857745212947, "loss": 6.927, "step": 18 }, { "epoch": 0.002377358785670157, "grad_norm": 0.07113561034202576, "learning_rate": 0.00019998199599190178, "loss": 6.9296, "step": 19 }, { "epoch": 0.002502482932284376, "grad_norm": 0.07156097143888474, "learning_rate": 0.0001999777729859618, "loss": 6.9272, "step": 20 }, { "epoch": 0.002627607078898595, "grad_norm": 0.07506931573152542, "learning_rate": 0.00019997310552224846, "loss": 6.9288, "step": 21 }, { "epoch": 0.0027527312255128133, "grad_norm": 0.07703310996294022, "learning_rate": 0.00019996799362151122, "loss": 6.9264, "step": 22 }, { "epoch": 0.0028778553721270322, "grad_norm": 0.08246796578168869, "learning_rate": 0.00019996243730647538, "loss": 6.926, "step": 23 }, { "epoch": 0.003002979518741251, "grad_norm": 0.07903309166431427, "learning_rate": 0.00019995643660184191, "loss": 6.9276, "step": 24 }, { "epoch": 0.00312810366535547, "grad_norm": 0.07953737676143646, "learning_rate": 0.00019994999153428737, "loss": 6.9246, "step": 25 }, { "epoch": 0.0032532278119696886, "grad_norm": 0.08557987213134766, "learning_rate": 0.00019994310213246368, "loss": 6.9241, "step": 26 }, { "epoch": 0.0033783519585839075, "grad_norm": 0.08523714542388916, "learning_rate": 0.00019993576842699816, "loss": 6.9213, "step": 27 }, { "epoch": 0.0035034761051981264, "grad_norm": 0.08518058061599731, "learning_rate": 0.0001999279904504933, "loss": 6.9236, "step": 28 }, { "epoch": 0.003628600251812345, "grad_norm": 0.08648113906383514, "learning_rate": 0.00019991976823752653, "loss": 6.9224, "step": 29 }, { "epoch": 0.003753724398426564, "grad_norm": 0.09164538234472275, "learning_rate": 0.00019991110182465032, "loss": 6.9202, "step": 30 }, { "epoch": 0.0038788485450407828, "grad_norm": 0.09380660206079483, "learning_rate": 0.00019990199125039174, "loss": 6.9175, "step": 31 }, { "epoch": 0.004003972691655001, "grad_norm": 0.09288927912712097, "learning_rate": 0.00019989243655525247, "loss": 6.9175, "step": 32 }, { "epoch": 0.00412909683826922, "grad_norm": 0.0947941243648529, "learning_rate": 0.00019988243778170853, "loss": 6.9181, "step": 33 }, { "epoch": 0.004254220984883439, "grad_norm": 0.09333830326795578, "learning_rate": 0.0001998719949742101, "loss": 6.9172, "step": 34 }, { "epoch": 0.004379345131497658, "grad_norm": 0.09799979627132416, "learning_rate": 0.0001998611081791814, "loss": 6.9147, "step": 35 }, { "epoch": 0.004504469278111877, "grad_norm": 0.10424171388149261, "learning_rate": 0.00019984977744502038, "loss": 6.9146, "step": 36 }, { "epoch": 0.004629593424726095, "grad_norm": 0.10482361167669296, "learning_rate": 0.00019983800282209857, "loss": 6.9114, "step": 37 }, { "epoch": 0.004754717571340314, "grad_norm": 0.10715615004301071, "learning_rate": 0.00019982578436276082, "loss": 6.9117, "step": 38 }, { "epoch": 0.004879841717954533, "grad_norm": 0.11066976934671402, "learning_rate": 0.00019981312212132512, "loss": 6.9108, "step": 39 }, { "epoch": 0.005004965864568752, "grad_norm": 0.11982666701078415, "learning_rate": 0.00019980001615408228, "loss": 6.9071, "step": 40 }, { "epoch": 0.005130090011182971, "grad_norm": 0.11826398223638535, "learning_rate": 0.00019978646651929572, "loss": 6.9043, "step": 41 }, { "epoch": 0.00525521415779719, "grad_norm": 0.12079612165689468, "learning_rate": 0.00019977247327720128, "loss": 6.9045, "step": 42 }, { "epoch": 0.0053803383044114085, "grad_norm": 0.12371652573347092, "learning_rate": 0.0001997580364900068, "loss": 6.9014, "step": 43 }, { "epoch": 0.005505462451025627, "grad_norm": 0.134723961353302, "learning_rate": 0.000199743156221892, "loss": 6.9004, "step": 44 }, { "epoch": 0.0056305865976398455, "grad_norm": 0.12978613376617432, "learning_rate": 0.00019972783253900808, "loss": 6.8996, "step": 45 }, { "epoch": 0.0057557107442540644, "grad_norm": 0.13787208497524261, "learning_rate": 0.00019971206550947748, "loss": 6.8942, "step": 46 }, { "epoch": 0.005880834890868283, "grad_norm": 0.1441243588924408, "learning_rate": 0.00019969585520339354, "loss": 6.8885, "step": 47 }, { "epoch": 0.006005959037482502, "grad_norm": 0.15092897415161133, "learning_rate": 0.0001996792016928203, "loss": 6.8885, "step": 48 }, { "epoch": 0.006131083184096721, "grad_norm": 0.16750802099704742, "learning_rate": 0.00019966210505179197, "loss": 6.8812, "step": 49 }, { "epoch": 0.00625620733071094, "grad_norm": 0.20417548716068268, "learning_rate": 0.00019964456535631286, "loss": 6.8693, "step": 50 }, { "epoch": 0.006381331477325158, "grad_norm": 0.12091758847236633, "learning_rate": 0.0001996265826843568, "loss": 6.9261, "step": 51 }, { "epoch": 0.006506455623939377, "grad_norm": 0.11044614017009735, "learning_rate": 0.00019960815711586696, "loss": 6.9246, "step": 52 }, { "epoch": 0.006631579770553596, "grad_norm": 0.09137877821922302, "learning_rate": 0.00019958928873275539, "loss": 6.9233, "step": 53 }, { "epoch": 0.006756703917167815, "grad_norm": 0.08062753081321716, "learning_rate": 0.00019956997761890277, "loss": 6.9184, "step": 54 }, { "epoch": 0.006881828063782034, "grad_norm": 0.0844404548406601, "learning_rate": 0.00019955022386015792, "loss": 6.9157, "step": 55 }, { "epoch": 0.007006952210396253, "grad_norm": 0.07345044612884521, "learning_rate": 0.00019953002754433743, "loss": 6.9158, "step": 56 }, { "epoch": 0.007132076357010472, "grad_norm": 0.07632039487361908, "learning_rate": 0.00019950938876122542, "loss": 6.9145, "step": 57 }, { "epoch": 0.00725720050362469, "grad_norm": 0.07867372781038284, "learning_rate": 0.00019948830760257291, "loss": 6.9135, "step": 58 }, { "epoch": 0.007382324650238909, "grad_norm": 0.0778016448020935, "learning_rate": 0.0001994667841620976, "loss": 6.9123, "step": 59 }, { "epoch": 0.007507448796853128, "grad_norm": 0.09466104954481125, "learning_rate": 0.00019944481853548335, "loss": 6.9109, "step": 60 }, { "epoch": 0.007632572943467347, "grad_norm": 0.07847250252962112, "learning_rate": 0.00019942241082037982, "loss": 6.91, "step": 61 }, { "epoch": 0.0077576970900815655, "grad_norm": 0.07430963218212128, "learning_rate": 0.00019939956111640197, "loss": 6.9091, "step": 62 }, { "epoch": 0.007882821236695784, "grad_norm": 0.08360747992992401, "learning_rate": 0.00019937626952512964, "loss": 6.9081, "step": 63 }, { "epoch": 0.008007945383310002, "grad_norm": 0.08752304315567017, "learning_rate": 0.0001993525361501072, "loss": 6.9074, "step": 64 }, { "epoch": 0.008133069529924222, "grad_norm": 0.0866648480296135, "learning_rate": 0.00019932836109684286, "loss": 6.9033, "step": 65 }, { "epoch": 0.00825819367653844, "grad_norm": 0.09103065729141235, "learning_rate": 0.00019930374447280845, "loss": 6.9042, "step": 66 }, { "epoch": 0.00838331782315266, "grad_norm": 0.08400455117225647, "learning_rate": 0.00019927868638743875, "loss": 6.9057, "step": 67 }, { "epoch": 0.008508441969766878, "grad_norm": 0.08932878077030182, "learning_rate": 0.0001992531869521312, "loss": 6.9, "step": 68 }, { "epoch": 0.008633566116381096, "grad_norm": 0.08076933771371841, "learning_rate": 0.00019922724628024515, "loss": 6.9034, "step": 69 }, { "epoch": 0.008758690262995316, "grad_norm": 0.08748447895050049, "learning_rate": 0.0001992008644871016, "loss": 6.8989, "step": 70 }, { "epoch": 0.008883814409609534, "grad_norm": 0.08219264447689056, "learning_rate": 0.00019917404168998256, "loss": 6.8987, "step": 71 }, { "epoch": 0.009008938556223754, "grad_norm": 0.0816899910569191, "learning_rate": 0.0001991467780081305, "loss": 6.8955, "step": 72 }, { "epoch": 0.009134062702837972, "grad_norm": 0.08543135225772858, "learning_rate": 0.00019911907356274795, "loss": 6.896, "step": 73 }, { "epoch": 0.00925918684945219, "grad_norm": 0.08281300216913223, "learning_rate": 0.00019909092847699683, "loss": 6.8945, "step": 74 }, { "epoch": 0.00938431099606641, "grad_norm": 0.08432548493146896, "learning_rate": 0.00019906234287599798, "loss": 6.8911, "step": 75 }, { "epoch": 0.009509435142680628, "grad_norm": 0.08312211185693741, "learning_rate": 0.00019903331688683057, "loss": 6.894, "step": 76 }, { "epoch": 0.009634559289294848, "grad_norm": 0.08178085833787918, "learning_rate": 0.00019900385063853154, "loss": 6.8938, "step": 77 }, { "epoch": 0.009759683435909066, "grad_norm": 0.07686955481767654, "learning_rate": 0.00019897394426209505, "loss": 6.8911, "step": 78 }, { "epoch": 0.009884807582523285, "grad_norm": 0.08003832399845123, "learning_rate": 0.00019894359789047187, "loss": 6.8894, "step": 79 }, { "epoch": 0.010009931729137504, "grad_norm": 0.08270556479692459, "learning_rate": 0.00019891281165856873, "loss": 6.8902, "step": 80 }, { "epoch": 0.010135055875751722, "grad_norm": 0.08120972663164139, "learning_rate": 0.00019888158570324795, "loss": 6.8864, "step": 81 }, { "epoch": 0.010260180022365941, "grad_norm": 0.07850047200918198, "learning_rate": 0.0001988499201633265, "loss": 6.8874, "step": 82 }, { "epoch": 0.01038530416898016, "grad_norm": 0.08204144239425659, "learning_rate": 0.00019881781517957562, "loss": 6.8846, "step": 83 }, { "epoch": 0.01051042831559438, "grad_norm": 0.08431681990623474, "learning_rate": 0.0001987852708947202, "loss": 6.8847, "step": 84 }, { "epoch": 0.010635552462208597, "grad_norm": 0.08482389897108078, "learning_rate": 0.00019875228745343794, "loss": 6.884, "step": 85 }, { "epoch": 0.010760676608822817, "grad_norm": 0.08569814264774323, "learning_rate": 0.0001987188650023589, "loss": 6.8881, "step": 86 }, { "epoch": 0.010885800755437035, "grad_norm": 0.0944218635559082, "learning_rate": 0.0001986850036900648, "loss": 6.8808, "step": 87 }, { "epoch": 0.011010924902051253, "grad_norm": 0.09045609086751938, "learning_rate": 0.00019865070366708836, "loss": 6.8804, "step": 88 }, { "epoch": 0.011136049048665473, "grad_norm": 0.09469861537218094, "learning_rate": 0.00019861596508591255, "loss": 6.8774, "step": 89 }, { "epoch": 0.011261173195279691, "grad_norm": 0.10270188003778458, "learning_rate": 0.00019858078810097002, "loss": 6.8794, "step": 90 }, { "epoch": 0.01138629734189391, "grad_norm": 0.10337856411933899, "learning_rate": 0.00019854517286864245, "loss": 6.8744, "step": 91 }, { "epoch": 0.011511421488508129, "grad_norm": 0.10456780344247818, "learning_rate": 0.0001985091195472596, "loss": 6.8765, "step": 92 }, { "epoch": 0.011636545635122349, "grad_norm": 0.10681149363517761, "learning_rate": 0.0001984726282970989, "loss": 6.8752, "step": 93 }, { "epoch": 0.011761669781736567, "grad_norm": 0.11471783369779587, "learning_rate": 0.0001984356992803847, "loss": 6.8731, "step": 94 }, { "epoch": 0.011886793928350785, "grad_norm": 0.12389547377824783, "learning_rate": 0.00019839833266128724, "loss": 6.8697, "step": 95 }, { "epoch": 0.012011918074965005, "grad_norm": 0.11571670323610306, "learning_rate": 0.00019836052860592237, "loss": 6.8658, "step": 96 }, { "epoch": 0.012137042221579223, "grad_norm": 0.12612538039684296, "learning_rate": 0.0001983222872823505, "loss": 6.8663, "step": 97 }, { "epoch": 0.012262166368193442, "grad_norm": 0.14484509825706482, "learning_rate": 0.00019828360886057594, "loss": 6.8626, "step": 98 }, { "epoch": 0.01238729051480766, "grad_norm": 0.1861916184425354, "learning_rate": 0.00019824449351254616, "loss": 6.8545, "step": 99 }, { "epoch": 0.01251241466142188, "grad_norm": 0.2190321385860443, "learning_rate": 0.00019820494141215104, "loss": 6.8383, "step": 100 }, { "epoch": 0.012637538808036098, "grad_norm": 0.13650046288967133, "learning_rate": 0.000198164952735222, "loss": 6.9194, "step": 101 }, { "epoch": 0.012762662954650316, "grad_norm": 0.09935830533504486, "learning_rate": 0.00019812452765953135, "loss": 6.9151, "step": 102 }, { "epoch": 0.012887787101264536, "grad_norm": 0.11466773599386215, "learning_rate": 0.00019808366636479147, "loss": 6.9131, "step": 103 }, { "epoch": 0.013012911247878754, "grad_norm": 0.1382901668548584, "learning_rate": 0.00019804236903265388, "loss": 6.9075, "step": 104 }, { "epoch": 0.013138035394492974, "grad_norm": 0.07793024182319641, "learning_rate": 0.00019800063584670863, "loss": 6.9035, "step": 105 }, { "epoch": 0.013263159541107192, "grad_norm": 0.08946847915649414, "learning_rate": 0.00019795846699248332, "loss": 6.9018, "step": 106 }, { "epoch": 0.013388283687721412, "grad_norm": 0.08725947141647339, "learning_rate": 0.00019791586265744237, "loss": 6.8993, "step": 107 }, { "epoch": 0.01351340783433563, "grad_norm": 0.09045708924531937, "learning_rate": 0.00019787282303098617, "loss": 6.9033, "step": 108 }, { "epoch": 0.013638531980949848, "grad_norm": 0.08767056465148926, "learning_rate": 0.0001978293483044502, "loss": 6.9009, "step": 109 }, { "epoch": 0.013763656127564068, "grad_norm": 0.08048249781131744, "learning_rate": 0.00019778543867110426, "loss": 6.8953, "step": 110 }, { "epoch": 0.013888780274178286, "grad_norm": 0.0913679301738739, "learning_rate": 0.00019774109432615147, "loss": 6.8959, "step": 111 }, { "epoch": 0.014013904420792506, "grad_norm": 0.07238554954528809, "learning_rate": 0.00019769631546672756, "loss": 6.8955, "step": 112 }, { "epoch": 0.014139028567406724, "grad_norm": 0.07926269620656967, "learning_rate": 0.00019765110229189988, "loss": 6.8928, "step": 113 }, { "epoch": 0.014264152714020943, "grad_norm": 0.08271410316228867, "learning_rate": 0.00019760545500266657, "loss": 6.8924, "step": 114 }, { "epoch": 0.014389276860635162, "grad_norm": 0.07544316351413727, "learning_rate": 0.00019755937380195568, "loss": 6.8923, "step": 115 }, { "epoch": 0.01451440100724938, "grad_norm": 0.07391080260276794, "learning_rate": 0.00019751285889462423, "loss": 6.8915, "step": 116 }, { "epoch": 0.0146395251538636, "grad_norm": 0.06741408258676529, "learning_rate": 0.0001974659104874573, "loss": 6.8888, "step": 117 }, { "epoch": 0.014764649300477817, "grad_norm": 0.0781959667801857, "learning_rate": 0.0001974185287891671, "loss": 6.8879, "step": 118 }, { "epoch": 0.014889773447092037, "grad_norm": 0.07569989562034607, "learning_rate": 0.0001973707140103921, "loss": 6.8858, "step": 119 }, { "epoch": 0.015014897593706255, "grad_norm": 0.0700925812125206, "learning_rate": 0.00019732246636369605, "loss": 6.8868, "step": 120 }, { "epoch": 0.015140021740320473, "grad_norm": 0.06608221679925919, "learning_rate": 0.00019727378606356703, "loss": 6.884, "step": 121 }, { "epoch": 0.015265145886934693, "grad_norm": 0.06312540173530579, "learning_rate": 0.00019722467332641656, "loss": 6.8876, "step": 122 }, { "epoch": 0.015390270033548911, "grad_norm": 0.0734315887093544, "learning_rate": 0.00019717512837057855, "loss": 6.8837, "step": 123 }, { "epoch": 0.015515394180163131, "grad_norm": 0.07128648459911346, "learning_rate": 0.0001971251514163083, "loss": 6.883, "step": 124 }, { "epoch": 0.01564051832677735, "grad_norm": 0.06233321502804756, "learning_rate": 0.0001970747426857817, "loss": 6.8812, "step": 125 }, { "epoch": 0.01576564247339157, "grad_norm": 0.0629267767071724, "learning_rate": 0.00019702390240309404, "loss": 6.8811, "step": 126 }, { "epoch": 0.015890766620005787, "grad_norm": 0.06407860666513443, "learning_rate": 0.0001969726307942592, "loss": 6.8799, "step": 127 }, { "epoch": 0.016015890766620005, "grad_norm": 0.06389015167951584, "learning_rate": 0.00019692092808720846, "loss": 6.8746, "step": 128 }, { "epoch": 0.016141014913234223, "grad_norm": 0.07071290910243988, "learning_rate": 0.0001968687945117896, "loss": 6.881, "step": 129 }, { "epoch": 0.016266139059848445, "grad_norm": 0.06741896271705627, "learning_rate": 0.00019681623029976588, "loss": 6.8764, "step": 130 }, { "epoch": 0.016391263206462663, "grad_norm": 0.06621404737234116, "learning_rate": 0.00019676323568481498, "loss": 6.8723, "step": 131 }, { "epoch": 0.01651638735307688, "grad_norm": 0.08149314671754837, "learning_rate": 0.00019670981090252792, "loss": 6.8738, "step": 132 }, { "epoch": 0.0166415114996911, "grad_norm": 0.07840327173471451, "learning_rate": 0.00019665595619040808, "loss": 6.8759, "step": 133 }, { "epoch": 0.01676663564630532, "grad_norm": 0.07281148433685303, "learning_rate": 0.0001966016717878702, "loss": 6.8687, "step": 134 }, { "epoch": 0.01689175979291954, "grad_norm": 0.08108653128147125, "learning_rate": 0.00019654695793623907, "loss": 6.8703, "step": 135 }, { "epoch": 0.017016883939533756, "grad_norm": 0.08203914016485214, "learning_rate": 0.0001964918148787488, "loss": 6.8709, "step": 136 }, { "epoch": 0.017142008086147974, "grad_norm": 0.08835235238075256, "learning_rate": 0.00019643624286054144, "loss": 6.8721, "step": 137 }, { "epoch": 0.017267132232762192, "grad_norm": 0.0898795872926712, "learning_rate": 0.00019638024212866606, "loss": 6.8662, "step": 138 }, { "epoch": 0.017392256379376414, "grad_norm": 0.08384780585765839, "learning_rate": 0.0001963238129320776, "loss": 6.8646, "step": 139 }, { "epoch": 0.017517380525990632, "grad_norm": 0.08652830123901367, "learning_rate": 0.00019626695552163578, "loss": 6.8644, "step": 140 }, { "epoch": 0.01764250467260485, "grad_norm": 0.08495476096868515, "learning_rate": 0.00019620967015010395, "loss": 6.8643, "step": 141 }, { "epoch": 0.017767628819219068, "grad_norm": 0.09518766403198242, "learning_rate": 0.00019615195707214803, "loss": 6.8611, "step": 142 }, { "epoch": 0.017892752965833286, "grad_norm": 0.09175199270248413, "learning_rate": 0.0001960938165443353, "loss": 6.8566, "step": 143 }, { "epoch": 0.018017877112447508, "grad_norm": 0.09209998697042465, "learning_rate": 0.00019603524882513327, "loss": 6.8561, "step": 144 }, { "epoch": 0.018143001259061726, "grad_norm": 0.09908024966716766, "learning_rate": 0.0001959762541749086, "loss": 6.8576, "step": 145 }, { "epoch": 0.018268125405675944, "grad_norm": 0.10435979068279266, "learning_rate": 0.00019591683285592593, "loss": 6.8512, "step": 146 }, { "epoch": 0.018393249552290162, "grad_norm": 0.09801405668258667, "learning_rate": 0.00019585698513234663, "loss": 6.8457, "step": 147 }, { "epoch": 0.01851837369890438, "grad_norm": 0.10764221101999283, "learning_rate": 0.0001957967112702277, "loss": 6.8422, "step": 148 }, { "epoch": 0.0186434978455186, "grad_norm": 0.13900740444660187, "learning_rate": 0.00019573601153752052, "loss": 6.8298, "step": 149 }, { "epoch": 0.01876862199213282, "grad_norm": 0.16055220365524292, "learning_rate": 0.00019567488620406983, "loss": 6.8058, "step": 150 }, { "epoch": 0.018893746138747038, "grad_norm": 0.21872732043266296, "learning_rate": 0.00019561333554161224, "loss": 6.912, "step": 151 }, { "epoch": 0.019018870285361256, "grad_norm": 0.15268252789974213, "learning_rate": 0.0001955513598237753, "loss": 6.9098, "step": 152 }, { "epoch": 0.019143994431975477, "grad_norm": 0.13219447433948517, "learning_rate": 0.00019548895932607621, "loss": 6.9068, "step": 153 }, { "epoch": 0.019269118578589695, "grad_norm": 0.10205072909593582, "learning_rate": 0.00019542613432592038, "loss": 6.8986, "step": 154 }, { "epoch": 0.019394242725203913, "grad_norm": 0.10182590037584305, "learning_rate": 0.00019536288510260056, "loss": 6.8968, "step": 155 }, { "epoch": 0.01951936687181813, "grad_norm": 0.10630691051483154, "learning_rate": 0.00019529921193729534, "loss": 6.9007, "step": 156 }, { "epoch": 0.01964449101843235, "grad_norm": 0.11106487363576889, "learning_rate": 0.00019523511511306793, "loss": 6.8958, "step": 157 }, { "epoch": 0.01976961516504657, "grad_norm": 0.09893786907196045, "learning_rate": 0.000195170594914865, "loss": 6.8899, "step": 158 }, { "epoch": 0.01989473931166079, "grad_norm": 0.10803712904453278, "learning_rate": 0.00019510565162951537, "loss": 6.8913, "step": 159 }, { "epoch": 0.020019863458275007, "grad_norm": 0.0959392786026001, "learning_rate": 0.00019504028554572864, "loss": 6.8909, "step": 160 }, { "epoch": 0.020144987604889225, "grad_norm": 0.07896330952644348, "learning_rate": 0.00019497449695409408, "loss": 6.8914, "step": 161 }, { "epoch": 0.020270111751503443, "grad_norm": 0.08474501967430115, "learning_rate": 0.00019490828614707916, "loss": 6.8892, "step": 162 }, { "epoch": 0.020395235898117665, "grad_norm": 0.09169815480709076, "learning_rate": 0.00019484165341902845, "loss": 6.8909, "step": 163 }, { "epoch": 0.020520360044731883, "grad_norm": 0.07824847102165222, "learning_rate": 0.00019477459906616206, "loss": 6.8882, "step": 164 }, { "epoch": 0.0206454841913461, "grad_norm": 0.07111695408821106, "learning_rate": 0.00019470712338657458, "loss": 6.8837, "step": 165 }, { "epoch": 0.02077060833796032, "grad_norm": 0.07704120129346848, "learning_rate": 0.0001946392266802336, "loss": 6.881, "step": 166 }, { "epoch": 0.02089573248457454, "grad_norm": 0.06854541599750519, "learning_rate": 0.0001945709092489783, "loss": 6.8828, "step": 167 }, { "epoch": 0.02102085663118876, "grad_norm": 0.061837103217840195, "learning_rate": 0.00019450217139651844, "loss": 6.8802, "step": 168 }, { "epoch": 0.021145980777802977, "grad_norm": 0.0691787376999855, "learning_rate": 0.0001944330134284326, "loss": 6.8809, "step": 169 }, { "epoch": 0.021271104924417195, "grad_norm": 0.06940039247274399, "learning_rate": 0.00019436343565216711, "loss": 6.8765, "step": 170 }, { "epoch": 0.021396229071031413, "grad_norm": 0.06792094558477402, "learning_rate": 0.00019429343837703455, "loss": 6.8787, "step": 171 }, { "epoch": 0.021521353217645634, "grad_norm": 0.07158462703227997, "learning_rate": 0.0001942230219142124, "loss": 6.8755, "step": 172 }, { "epoch": 0.021646477364259852, "grad_norm": 0.07128900289535522, "learning_rate": 0.0001941521865767417, "loss": 6.8754, "step": 173 }, { "epoch": 0.02177160151087407, "grad_norm": 0.06436680257320404, "learning_rate": 0.0001940809326795256, "loss": 6.8737, "step": 174 }, { "epoch": 0.02189672565748829, "grad_norm": 0.07366479188203812, "learning_rate": 0.000194009260539328, "loss": 6.8707, "step": 175 }, { "epoch": 0.022021849804102506, "grad_norm": 0.07705742865800858, "learning_rate": 0.0001939371704747721, "loss": 6.8742, "step": 176 }, { "epoch": 0.022146973950716728, "grad_norm": 0.07979152351617813, "learning_rate": 0.00019386466280633906, "loss": 6.8706, "step": 177 }, { "epoch": 0.022272098097330946, "grad_norm": 0.06823064386844635, "learning_rate": 0.00019379173785636646, "loss": 6.8695, "step": 178 }, { "epoch": 0.022397222243945164, "grad_norm": 0.07276113331317902, "learning_rate": 0.000193718395949047, "loss": 6.8696, "step": 179 }, { "epoch": 0.022522346390559382, "grad_norm": 0.08664866536855698, "learning_rate": 0.00019364463741042694, "loss": 6.8675, "step": 180 }, { "epoch": 0.022647470537173604, "grad_norm": 0.07096172869205475, "learning_rate": 0.00019357046256840473, "loss": 6.8665, "step": 181 }, { "epoch": 0.02277259468378782, "grad_norm": 0.07934916764497757, "learning_rate": 0.00019349587175272948, "loss": 6.8624, "step": 182 }, { "epoch": 0.02289771883040204, "grad_norm": 0.06755341589450836, "learning_rate": 0.0001934208652949996, "loss": 6.8617, "step": 183 }, { "epoch": 0.023022842977016258, "grad_norm": 0.06911028176546097, "learning_rate": 0.00019334544352866127, "loss": 6.8622, "step": 184 }, { "epoch": 0.023147967123630476, "grad_norm": 0.07539703696966171, "learning_rate": 0.00019326960678900688, "loss": 6.8603, "step": 185 }, { "epoch": 0.023273091270244697, "grad_norm": 0.07016899436712265, "learning_rate": 0.00019319335541317361, "loss": 6.8608, "step": 186 }, { "epoch": 0.023398215416858915, "grad_norm": 0.06599403917789459, "learning_rate": 0.00019311668974014208, "loss": 6.8601, "step": 187 }, { "epoch": 0.023523339563473133, "grad_norm": 0.06527413427829742, "learning_rate": 0.00019303961011073447, "loss": 6.8577, "step": 188 }, { "epoch": 0.02364846371008735, "grad_norm": 0.06863480061292648, "learning_rate": 0.00019296211686761346, "loss": 6.8563, "step": 189 }, { "epoch": 0.02377358785670157, "grad_norm": 0.09146850556135178, "learning_rate": 0.00019288421035528028, "loss": 6.8552, "step": 190 }, { "epoch": 0.02389871200331579, "grad_norm": 0.07483351230621338, "learning_rate": 0.00019280589092007352, "loss": 6.8499, "step": 191 }, { "epoch": 0.02402383614993001, "grad_norm": 0.07879147678613663, "learning_rate": 0.00019272715891016735, "loss": 6.8542, "step": 192 }, { "epoch": 0.024148960296544227, "grad_norm": 0.0846157819032669, "learning_rate": 0.00019264801467557007, "loss": 6.8511, "step": 193 }, { "epoch": 0.024274084443158445, "grad_norm": 0.08077917248010635, "learning_rate": 0.00019256845856812266, "loss": 6.8424, "step": 194 }, { "epoch": 0.024399208589772663, "grad_norm": 0.08849223703145981, "learning_rate": 0.000192488490941497, "loss": 6.8412, "step": 195 }, { "epoch": 0.024524332736386885, "grad_norm": 0.08617791533470154, "learning_rate": 0.00019240811215119448, "loss": 6.8425, "step": 196 }, { "epoch": 0.024649456883001103, "grad_norm": 0.07852291315793991, "learning_rate": 0.00019232732255454422, "loss": 6.8343, "step": 197 }, { "epoch": 0.02477458102961532, "grad_norm": 0.09300388395786285, "learning_rate": 0.00019224612251070175, "loss": 6.8268, "step": 198 }, { "epoch": 0.02489970517622954, "grad_norm": 0.11427102982997894, "learning_rate": 0.0001921645123806472, "loss": 6.8145, "step": 199 }, { "epoch": 0.02502482932284376, "grad_norm": 0.16621610522270203, "learning_rate": 0.0001920824925271838, "loss": 6.7909, "step": 200 }, { "epoch": 0.02514995346945798, "grad_norm": 0.10691721737384796, "learning_rate": 0.0001920000633149362, "loss": 6.9027, "step": 201 }, { "epoch": 0.025275077616072197, "grad_norm": 0.1546848714351654, "learning_rate": 0.00019191722511034884, "loss": 6.8968, "step": 202 }, { "epoch": 0.025400201762686415, "grad_norm": 0.11958974599838257, "learning_rate": 0.00019183397828168448, "loss": 6.8958, "step": 203 }, { "epoch": 0.025525325909300633, "grad_norm": 0.19288329780101776, "learning_rate": 0.00019175032319902234, "loss": 6.8972, "step": 204 }, { "epoch": 0.025650450055914854, "grad_norm": 0.09380433708429337, "learning_rate": 0.00019166626023425662, "loss": 6.8874, "step": 205 }, { "epoch": 0.025775574202529072, "grad_norm": 0.09519658237695694, "learning_rate": 0.00019158178976109476, "loss": 6.8866, "step": 206 }, { "epoch": 0.02590069834914329, "grad_norm": 0.10003580898046494, "learning_rate": 0.0001914969121550558, "loss": 6.8893, "step": 207 }, { "epoch": 0.02602582249575751, "grad_norm": 0.07789091020822525, "learning_rate": 0.00019141162779346874, "loss": 6.8804, "step": 208 }, { "epoch": 0.026150946642371727, "grad_norm": 0.10385818779468536, "learning_rate": 0.00019132593705547082, "loss": 6.8848, "step": 209 }, { "epoch": 0.026276070788985948, "grad_norm": 0.08500756323337555, "learning_rate": 0.00019123984032200586, "loss": 6.8855, "step": 210 }, { "epoch": 0.026401194935600166, "grad_norm": 0.08211251348257065, "learning_rate": 0.00019115333797582254, "loss": 6.8846, "step": 211 }, { "epoch": 0.026526319082214384, "grad_norm": 0.07903140038251877, "learning_rate": 0.00019106643040147278, "loss": 6.884, "step": 212 }, { "epoch": 0.026651443228828602, "grad_norm": 0.058197762817144394, "learning_rate": 0.00019097911798530987, "loss": 6.8831, "step": 213 }, { "epoch": 0.026776567375442824, "grad_norm": 0.06288017332553864, "learning_rate": 0.00019089140111548696, "loss": 6.8805, "step": 214 }, { "epoch": 0.026901691522057042, "grad_norm": 0.060705605894327164, "learning_rate": 0.00019080328018195513, "loss": 6.8767, "step": 215 }, { "epoch": 0.02702681566867126, "grad_norm": 0.06506630033254623, "learning_rate": 0.0001907147555764618, "loss": 6.8794, "step": 216 }, { "epoch": 0.027151939815285478, "grad_norm": 0.061172664165496826, "learning_rate": 0.00019062582769254895, "loss": 6.8757, "step": 217 }, { "epoch": 0.027277063961899696, "grad_norm": 0.07000657171010971, "learning_rate": 0.00019053649692555135, "loss": 6.8786, "step": 218 }, { "epoch": 0.027402188108513918, "grad_norm": 0.08130121231079102, "learning_rate": 0.00019044676367259476, "loss": 6.8735, "step": 219 }, { "epoch": 0.027527312255128136, "grad_norm": 0.07489964365959167, "learning_rate": 0.00019035662833259432, "loss": 6.8722, "step": 220 }, { "epoch": 0.027652436401742354, "grad_norm": 0.08517754822969437, "learning_rate": 0.00019026609130625257, "loss": 6.8713, "step": 221 }, { "epoch": 0.02777756054835657, "grad_norm": 0.08161235600709915, "learning_rate": 0.00019017515299605788, "loss": 6.8683, "step": 222 }, { "epoch": 0.02790268469497079, "grad_norm": 0.07607857137918472, "learning_rate": 0.00019008381380628247, "loss": 6.8645, "step": 223 }, { "epoch": 0.02802780884158501, "grad_norm": 0.10040906071662903, "learning_rate": 0.00018999207414298067, "loss": 6.8668, "step": 224 }, { "epoch": 0.02815293298819923, "grad_norm": 0.07079536467790604, "learning_rate": 0.00018989993441398726, "loss": 6.8666, "step": 225 }, { "epoch": 0.028278057134813447, "grad_norm": 0.057264797389507294, "learning_rate": 0.00018980739502891546, "loss": 6.8645, "step": 226 }, { "epoch": 0.028403181281427665, "grad_norm": 0.06204447150230408, "learning_rate": 0.0001897144563991552, "loss": 6.8635, "step": 227 }, { "epoch": 0.028528305428041887, "grad_norm": 0.06299582868814468, "learning_rate": 0.00018962111893787128, "loss": 6.8604, "step": 228 }, { "epoch": 0.028653429574656105, "grad_norm": 0.06707160919904709, "learning_rate": 0.00018952738306000151, "loss": 6.8641, "step": 229 }, { "epoch": 0.028778553721270323, "grad_norm": 0.06820806860923767, "learning_rate": 0.00018943324918225494, "loss": 6.8634, "step": 230 }, { "epoch": 0.02890367786788454, "grad_norm": 0.0714719295501709, "learning_rate": 0.0001893387177231099, "loss": 6.8591, "step": 231 }, { "epoch": 0.02902880201449876, "grad_norm": 0.08739463239908218, "learning_rate": 0.0001892437891028122, "loss": 6.8576, "step": 232 }, { "epoch": 0.02915392616111298, "grad_norm": 0.09228727221488953, "learning_rate": 0.0001891484637433733, "loss": 6.8579, "step": 233 }, { "epoch": 0.0292790503077272, "grad_norm": 0.06334882974624634, "learning_rate": 0.00018905274206856837, "loss": 6.8582, "step": 234 }, { "epoch": 0.029404174454341417, "grad_norm": 0.07310575246810913, "learning_rate": 0.00018895662450393438, "loss": 6.8513, "step": 235 }, { "epoch": 0.029529298600955635, "grad_norm": 0.0723051205277443, "learning_rate": 0.00018886011147676833, "loss": 6.8537, "step": 236 }, { "epoch": 0.029654422747569853, "grad_norm": 0.07151124626398087, "learning_rate": 0.00018876320341612522, "loss": 6.851, "step": 237 }, { "epoch": 0.029779546894184074, "grad_norm": 0.06941047310829163, "learning_rate": 0.00018866590075281624, "loss": 6.8467, "step": 238 }, { "epoch": 0.029904671040798293, "grad_norm": 0.07442627847194672, "learning_rate": 0.00018856820391940674, "loss": 6.8442, "step": 239 }, { "epoch": 0.03002979518741251, "grad_norm": 0.07939758151769638, "learning_rate": 0.00018847011335021449, "loss": 6.8499, "step": 240 }, { "epoch": 0.03015491933402673, "grad_norm": 0.07285558432340622, "learning_rate": 0.00018837162948130752, "loss": 6.847, "step": 241 }, { "epoch": 0.030280043480640947, "grad_norm": 0.07187478989362717, "learning_rate": 0.00018827275275050233, "loss": 6.8443, "step": 242 }, { "epoch": 0.030405167627255168, "grad_norm": 0.07782912999391556, "learning_rate": 0.00018817348359736203, "loss": 6.8435, "step": 243 }, { "epoch": 0.030530291773869386, "grad_norm": 0.07288346439599991, "learning_rate": 0.00018807382246319412, "loss": 6.8392, "step": 244 }, { "epoch": 0.030655415920483604, "grad_norm": 0.08069871366024017, "learning_rate": 0.00018797376979104872, "loss": 6.8325, "step": 245 }, { "epoch": 0.030780540067097822, "grad_norm": 0.07995132356882095, "learning_rate": 0.00018787332602571662, "loss": 6.8363, "step": 246 }, { "epoch": 0.030905664213712044, "grad_norm": 0.08847439289093018, "learning_rate": 0.00018777249161372713, "loss": 6.8293, "step": 247 }, { "epoch": 0.031030788360326262, "grad_norm": 0.08906427770853043, "learning_rate": 0.00018767126700334634, "loss": 6.8229, "step": 248 }, { "epoch": 0.03115591250694048, "grad_norm": 0.1021634042263031, "learning_rate": 0.0001875696526445749, "loss": 6.8151, "step": 249 }, { "epoch": 0.0312810366535547, "grad_norm": 0.14636634290218353, "learning_rate": 0.0001874676489891461, "loss": 6.7888, "step": 250 }, { "epoch": 0.03140616080016892, "grad_norm": 0.14801375567913055, "learning_rate": 0.00018736525649052394, "loss": 6.8921, "step": 251 }, { "epoch": 0.03153128494678314, "grad_norm": 0.0968460887670517, "learning_rate": 0.00018726247560390099, "loss": 6.8952, "step": 252 }, { "epoch": 0.031656409093397356, "grad_norm": 0.10253704339265823, "learning_rate": 0.00018715930678619644, "loss": 6.8904, "step": 253 }, { "epoch": 0.031781533240011574, "grad_norm": 0.1360487937927246, "learning_rate": 0.00018705575049605413, "loss": 6.8845, "step": 254 }, { "epoch": 0.03190665738662579, "grad_norm": 0.09559576958417892, "learning_rate": 0.00018695180719384029, "loss": 6.8849, "step": 255 }, { "epoch": 0.03203178153324001, "grad_norm": 0.08431591093540192, "learning_rate": 0.00018684747734164177, "loss": 6.8858, "step": 256 }, { "epoch": 0.03215690567985423, "grad_norm": 0.09201119840145111, "learning_rate": 0.00018674276140326376, "loss": 6.8797, "step": 257 }, { "epoch": 0.032282029826468446, "grad_norm": 0.0803314745426178, "learning_rate": 0.00018663765984422786, "loss": 6.8835, "step": 258 }, { "epoch": 0.03240715397308267, "grad_norm": 0.07057393342256546, "learning_rate": 0.00018653217313177004, "loss": 6.8823, "step": 259 }, { "epoch": 0.03253227811969689, "grad_norm": 0.06357032060623169, "learning_rate": 0.00018642630173483832, "loss": 6.8832, "step": 260 }, { "epoch": 0.03265740226631111, "grad_norm": 0.06184772029519081, "learning_rate": 0.00018632004612409103, "loss": 6.8792, "step": 261 }, { "epoch": 0.032782526412925325, "grad_norm": 0.06408469378948212, "learning_rate": 0.00018621340677189453, "loss": 6.8803, "step": 262 }, { "epoch": 0.03290765055953954, "grad_norm": 0.0681229680776596, "learning_rate": 0.00018610638415232097, "loss": 6.8764, "step": 263 }, { "epoch": 0.03303277470615376, "grad_norm": 0.06694327294826508, "learning_rate": 0.00018599897874114652, "loss": 6.8715, "step": 264 }, { "epoch": 0.03315789885276798, "grad_norm": 0.06436242163181305, "learning_rate": 0.00018589119101584898, "loss": 6.8677, "step": 265 }, { "epoch": 0.0332830229993822, "grad_norm": 0.06092974916100502, "learning_rate": 0.00018578302145560584, "loss": 6.8694, "step": 266 }, { "epoch": 0.033408147145996415, "grad_norm": 0.060959141701459885, "learning_rate": 0.00018567447054129195, "loss": 6.8671, "step": 267 }, { "epoch": 0.03353327129261064, "grad_norm": 0.05819505825638771, "learning_rate": 0.00018556553875547754, "loss": 6.8711, "step": 268 }, { "epoch": 0.03365839543922486, "grad_norm": 0.07157029956579208, "learning_rate": 0.00018545622658242607, "loss": 6.8644, "step": 269 }, { "epoch": 0.03378351958583908, "grad_norm": 0.06844910234212875, "learning_rate": 0.00018534653450809197, "loss": 6.8655, "step": 270 }, { "epoch": 0.033908643732453295, "grad_norm": 0.08353777974843979, "learning_rate": 0.00018523646302011867, "loss": 6.8624, "step": 271 }, { "epoch": 0.03403376787906751, "grad_norm": 0.07243352383375168, "learning_rate": 0.00018512601260783606, "loss": 6.8633, "step": 272 }, { "epoch": 0.03415889202568173, "grad_norm": 0.06253904104232788, "learning_rate": 0.00018501518376225887, "loss": 6.865, "step": 273 }, { "epoch": 0.03428401617229595, "grad_norm": 0.07763414084911346, "learning_rate": 0.00018490397697608395, "loss": 6.8593, "step": 274 }, { "epoch": 0.03440914031891017, "grad_norm": 0.06429531425237656, "learning_rate": 0.0001847923927436884, "loss": 6.8576, "step": 275 }, { "epoch": 0.034534264465524385, "grad_norm": 0.06577996164560318, "learning_rate": 0.00018468043156112728, "loss": 6.857, "step": 276 }, { "epoch": 0.0346593886121386, "grad_norm": 0.0600145049393177, "learning_rate": 0.0001845680939261314, "loss": 6.8579, "step": 277 }, { "epoch": 0.03478451275875283, "grad_norm": 0.06577698141336441, "learning_rate": 0.00018445538033810515, "loss": 6.8551, "step": 278 }, { "epoch": 0.034909636905367046, "grad_norm": 0.0624128133058548, "learning_rate": 0.00018434229129812418, "loss": 6.8553, "step": 279 }, { "epoch": 0.035034761051981264, "grad_norm": 0.07974766194820404, "learning_rate": 0.0001842288273089332, "loss": 6.8559, "step": 280 }, { "epoch": 0.03515988519859548, "grad_norm": 0.07230106741189957, "learning_rate": 0.00018411498887494396, "loss": 6.8521, "step": 281 }, { "epoch": 0.0352850093452097, "grad_norm": 0.09589193761348724, "learning_rate": 0.00018400077650223263, "loss": 6.8495, "step": 282 }, { "epoch": 0.03541013349182392, "grad_norm": 0.06857012212276459, "learning_rate": 0.0001838861906985379, "loss": 6.8445, "step": 283 }, { "epoch": 0.035535257638438136, "grad_norm": 0.07783155888319016, "learning_rate": 0.00018377123197325842, "loss": 6.8489, "step": 284 }, { "epoch": 0.035660381785052354, "grad_norm": 0.06357921659946442, "learning_rate": 0.00018365590083745085, "loss": 6.8464, "step": 285 }, { "epoch": 0.03578550593166657, "grad_norm": 0.0658588781952858, "learning_rate": 0.00018354019780382735, "loss": 6.8476, "step": 286 }, { "epoch": 0.0359106300782808, "grad_norm": 0.06867069005966187, "learning_rate": 0.0001834241233867533, "loss": 6.8427, "step": 287 }, { "epoch": 0.036035754224895016, "grad_norm": 0.08719408512115479, "learning_rate": 0.00018330767810224524, "loss": 6.8483, "step": 288 }, { "epoch": 0.036160878371509234, "grad_norm": 0.06204197183251381, "learning_rate": 0.0001831908624679683, "loss": 6.8368, "step": 289 }, { "epoch": 0.03628600251812345, "grad_norm": 0.10901842266321182, "learning_rate": 0.0001830736770032341, "loss": 6.8361, "step": 290 }, { "epoch": 0.03641112666473767, "grad_norm": 0.06913967430591583, "learning_rate": 0.0001829561222289984, "loss": 6.8361, "step": 291 }, { "epoch": 0.03653625081135189, "grad_norm": 0.07199162989854813, "learning_rate": 0.00018283819866785853, "loss": 6.8362, "step": 292 }, { "epoch": 0.036661374957966106, "grad_norm": 0.09814311563968658, "learning_rate": 0.0001827199068440516, "loss": 6.8264, "step": 293 }, { "epoch": 0.036786499104580324, "grad_norm": 0.08308727294206619, "learning_rate": 0.00018260124728345162, "loss": 6.8346, "step": 294 }, { "epoch": 0.03691162325119454, "grad_norm": 0.08265476673841476, "learning_rate": 0.00018248222051356754, "loss": 6.825, "step": 295 }, { "epoch": 0.03703674739780876, "grad_norm": 0.08055634796619415, "learning_rate": 0.00018236282706354063, "loss": 6.8231, "step": 296 }, { "epoch": 0.037161871544422985, "grad_norm": 0.08792124688625336, "learning_rate": 0.00018224306746414238, "loss": 6.82, "step": 297 }, { "epoch": 0.0372869956910372, "grad_norm": 0.08508029580116272, "learning_rate": 0.00018212294224777197, "loss": 6.8074, "step": 298 }, { "epoch": 0.03741211983765142, "grad_norm": 0.10417867451906204, "learning_rate": 0.00018200245194845399, "loss": 6.7953, "step": 299 }, { "epoch": 0.03753724398426564, "grad_norm": 0.14350895583629608, "learning_rate": 0.00018188159710183594, "loss": 6.7736, "step": 300 }, { "epoch": 0.03766236813087986, "grad_norm": 0.13481715321540833, "learning_rate": 0.000181760378245186, "loss": 6.8932, "step": 301 }, { "epoch": 0.037787492277494075, "grad_norm": 0.13645990192890167, "learning_rate": 0.00018163879591739067, "loss": 6.8856, "step": 302 }, { "epoch": 0.03791261642410829, "grad_norm": 0.12011216580867767, "learning_rate": 0.0001815168506589521, "loss": 6.886, "step": 303 }, { "epoch": 0.03803774057072251, "grad_norm": 0.094432532787323, "learning_rate": 0.000181394543011986, "loss": 6.8909, "step": 304 }, { "epoch": 0.03816286471733673, "grad_norm": 0.1013614684343338, "learning_rate": 0.00018127187352021907, "loss": 6.8833, "step": 305 }, { "epoch": 0.038287988863950954, "grad_norm": 0.10038182884454727, "learning_rate": 0.0001811488427289866, "loss": 6.8806, "step": 306 }, { "epoch": 0.03841311301056517, "grad_norm": 0.09726443886756897, "learning_rate": 0.00018102545118523007, "loss": 6.8811, "step": 307 }, { "epoch": 0.03853823715717939, "grad_norm": 0.08817505836486816, "learning_rate": 0.00018090169943749476, "loss": 6.8837, "step": 308 }, { "epoch": 0.03866336130379361, "grad_norm": 0.07672196626663208, "learning_rate": 0.00018077758803592718, "loss": 6.8752, "step": 309 }, { "epoch": 0.03878848545040783, "grad_norm": 0.07172679901123047, "learning_rate": 0.00018065311753227273, "loss": 6.8784, "step": 310 }, { "epoch": 0.038913609597022045, "grad_norm": 0.06302495300769806, "learning_rate": 0.0001805282884798732, "loss": 6.8758, "step": 311 }, { "epoch": 0.03903873374363626, "grad_norm": 0.06705040484666824, "learning_rate": 0.00018040310143366446, "loss": 6.8766, "step": 312 }, { "epoch": 0.03916385789025048, "grad_norm": 0.06168697029352188, "learning_rate": 0.00018027755695017368, "loss": 6.8735, "step": 313 }, { "epoch": 0.0392889820368647, "grad_norm": 0.06605495512485504, "learning_rate": 0.00018015165558751717, "loss": 6.8726, "step": 314 }, { "epoch": 0.039414106183478924, "grad_norm": 0.07558814436197281, "learning_rate": 0.00018002539790539773, "loss": 6.8703, "step": 315 }, { "epoch": 0.03953923033009314, "grad_norm": 0.08488204330205917, "learning_rate": 0.00017989878446510215, "loss": 6.8686, "step": 316 }, { "epoch": 0.03966435447670736, "grad_norm": 0.09071745723485947, "learning_rate": 0.00017977181582949888, "loss": 6.8693, "step": 317 }, { "epoch": 0.03978947862332158, "grad_norm": 0.0709400400519371, "learning_rate": 0.0001796444925630353, "loss": 6.8617, "step": 318 }, { "epoch": 0.039914602769935796, "grad_norm": 0.09009866416454315, "learning_rate": 0.00017951681523173542, "loss": 6.8653, "step": 319 }, { "epoch": 0.040039726916550014, "grad_norm": 0.06021871417760849, "learning_rate": 0.0001793887844031972, "loss": 6.8638, "step": 320 }, { "epoch": 0.04016485106316423, "grad_norm": 0.06436780095100403, "learning_rate": 0.00017926040064659014, "loss": 6.8648, "step": 321 }, { "epoch": 0.04028997520977845, "grad_norm": 0.06928073614835739, "learning_rate": 0.0001791316645326526, "loss": 6.8582, "step": 322 }, { "epoch": 0.04041509935639267, "grad_norm": 0.06109292432665825, "learning_rate": 0.00017900257663368963, "loss": 6.8611, "step": 323 }, { "epoch": 0.040540223503006886, "grad_norm": 0.05868317559361458, "learning_rate": 0.0001788731375235698, "loss": 6.8552, "step": 324 }, { "epoch": 0.04066534764962111, "grad_norm": 0.05860351026058197, "learning_rate": 0.00017874334777772327, "loss": 6.8557, "step": 325 }, { "epoch": 0.04079047179623533, "grad_norm": 0.07347492128610611, "learning_rate": 0.00017861320797313892, "loss": 6.8547, "step": 326 }, { "epoch": 0.04091559594284955, "grad_norm": 0.06732910126447678, "learning_rate": 0.0001784827186883618, "loss": 6.8538, "step": 327 }, { "epoch": 0.041040720089463766, "grad_norm": 0.07551483064889908, "learning_rate": 0.00017835188050349064, "loss": 6.851, "step": 328 }, { "epoch": 0.041165844236077984, "grad_norm": 0.08504615724086761, "learning_rate": 0.00017822069400017516, "loss": 6.851, "step": 329 }, { "epoch": 0.0412909683826922, "grad_norm": 0.06322678178548813, "learning_rate": 0.00017808915976161362, "loss": 6.8507, "step": 330 }, { "epoch": 0.04141609252930642, "grad_norm": 0.07722601294517517, "learning_rate": 0.00017795727837255015, "loss": 6.8486, "step": 331 }, { "epoch": 0.04154121667592064, "grad_norm": 0.08305875957012177, "learning_rate": 0.00017782505041927216, "loss": 6.8502, "step": 332 }, { "epoch": 0.041666340822534856, "grad_norm": 0.07514189928770065, "learning_rate": 0.00017769247648960774, "loss": 6.8496, "step": 333 }, { "epoch": 0.04179146496914908, "grad_norm": 0.06142481043934822, "learning_rate": 0.00017755955717292296, "loss": 6.8413, "step": 334 }, { "epoch": 0.0419165891157633, "grad_norm": 0.09536569565534592, "learning_rate": 0.00017742629306011944, "loss": 6.844, "step": 335 }, { "epoch": 0.04204171326237752, "grad_norm": 0.06408409774303436, "learning_rate": 0.00017729268474363154, "loss": 6.84, "step": 336 }, { "epoch": 0.042166837408991735, "grad_norm": 0.07043524831533432, "learning_rate": 0.0001771587328174239, "loss": 6.8428, "step": 337 }, { "epoch": 0.04229196155560595, "grad_norm": 0.06857780367136002, "learning_rate": 0.0001770244378769885, "loss": 6.8379, "step": 338 }, { "epoch": 0.04241708570222017, "grad_norm": 0.0689932107925415, "learning_rate": 0.0001768898005193425, "loss": 6.8373, "step": 339 }, { "epoch": 0.04254220984883439, "grad_norm": 0.062465883791446686, "learning_rate": 0.000176754821343025, "loss": 6.8358, "step": 340 }, { "epoch": 0.04266733399544861, "grad_norm": 0.06850454211235046, "learning_rate": 0.0001766195009480949, "loss": 6.8335, "step": 341 }, { "epoch": 0.042792458142062825, "grad_norm": 0.07006986439228058, "learning_rate": 0.0001764838399361279, "loss": 6.8357, "step": 342 }, { "epoch": 0.04291758228867704, "grad_norm": 0.07481860369443893, "learning_rate": 0.00017634783891021393, "loss": 6.8291, "step": 343 }, { "epoch": 0.04304270643529127, "grad_norm": 0.08820731192827225, "learning_rate": 0.00017621149847495458, "loss": 6.8273, "step": 344 }, { "epoch": 0.043167830581905486, "grad_norm": 0.09380005300045013, "learning_rate": 0.00017607481923646016, "loss": 6.8202, "step": 345 }, { "epoch": 0.043292954728519704, "grad_norm": 0.08753819018602371, "learning_rate": 0.0001759378018023473, "loss": 6.8228, "step": 346 }, { "epoch": 0.04341807887513392, "grad_norm": 0.07920189946889877, "learning_rate": 0.00017580044678173592, "loss": 6.8172, "step": 347 }, { "epoch": 0.04354320302174814, "grad_norm": 0.07676713168621063, "learning_rate": 0.00017566275478524693, "loss": 6.8086, "step": 348 }, { "epoch": 0.04366832716836236, "grad_norm": 0.08009803295135498, "learning_rate": 0.0001755247264249991, "loss": 6.7977, "step": 349 }, { "epoch": 0.04379345131497658, "grad_norm": 0.1170094683766365, "learning_rate": 0.0001753863623146066, "loss": 6.7681, "step": 350 }, { "epoch": 0.043918575461590795, "grad_norm": 0.15286493301391602, "learning_rate": 0.00017524766306917618, "loss": 6.8843, "step": 351 }, { "epoch": 0.04404369960820501, "grad_norm": 0.11505300551652908, "learning_rate": 0.0001751086293053045, "loss": 6.8876, "step": 352 }, { "epoch": 0.04416882375481924, "grad_norm": 0.11281990259885788, "learning_rate": 0.0001749692616410753, "loss": 6.8813, "step": 353 }, { "epoch": 0.044293947901433456, "grad_norm": 0.10321272164583206, "learning_rate": 0.00017482956069605668, "loss": 6.8796, "step": 354 }, { "epoch": 0.044419072048047674, "grad_norm": 0.0983697772026062, "learning_rate": 0.00017468952709129846, "loss": 6.8795, "step": 355 }, { "epoch": 0.04454419619466189, "grad_norm": 0.09605101495981216, "learning_rate": 0.00017454916144932922, "loss": 6.8794, "step": 356 }, { "epoch": 0.04466932034127611, "grad_norm": 0.09562011808156967, "learning_rate": 0.0001744084643941536, "loss": 6.8805, "step": 357 }, { "epoch": 0.04479444448789033, "grad_norm": 0.08395621180534363, "learning_rate": 0.00017426743655124974, "loss": 6.8795, "step": 358 }, { "epoch": 0.044919568634504546, "grad_norm": 0.0897192731499672, "learning_rate": 0.0001741260785475661, "loss": 6.8792, "step": 359 }, { "epoch": 0.045044692781118764, "grad_norm": 0.06123959273099899, "learning_rate": 0.00017398439101151905, "loss": 6.8745, "step": 360 }, { "epoch": 0.04516981692773298, "grad_norm": 0.06178920716047287, "learning_rate": 0.00017384237457298987, "loss": 6.871, "step": 361 }, { "epoch": 0.04529494107434721, "grad_norm": 0.057122666388750076, "learning_rate": 0.00017370002986332193, "loss": 6.8693, "step": 362 }, { "epoch": 0.045420065220961425, "grad_norm": 0.06613767892122269, "learning_rate": 0.00017355735751531807, "loss": 6.8702, "step": 363 }, { "epoch": 0.04554518936757564, "grad_norm": 0.07268024981021881, "learning_rate": 0.00017341435816323756, "loss": 6.8709, "step": 364 }, { "epoch": 0.04567031351418986, "grad_norm": 0.07819833606481552, "learning_rate": 0.00017327103244279348, "loss": 6.8646, "step": 365 }, { "epoch": 0.04579543766080408, "grad_norm": 0.07191377133131027, "learning_rate": 0.00017312738099114973, "loss": 6.8652, "step": 366 }, { "epoch": 0.0459205618074183, "grad_norm": 0.06911371648311615, "learning_rate": 0.00017298340444691835, "loss": 6.8645, "step": 367 }, { "epoch": 0.046045685954032516, "grad_norm": 0.07727386802434921, "learning_rate": 0.00017283910345015647, "loss": 6.8613, "step": 368 }, { "epoch": 0.046170810100646734, "grad_norm": 0.07539987564086914, "learning_rate": 0.0001726944786423637, "loss": 6.8623, "step": 369 }, { "epoch": 0.04629593424726095, "grad_norm": 0.09430147707462311, "learning_rate": 0.00017254953066647913, "loss": 6.8575, "step": 370 }, { "epoch": 0.04642105839387517, "grad_norm": 0.07089678943157196, "learning_rate": 0.00017240426016687863, "loss": 6.8606, "step": 371 }, { "epoch": 0.046546182540489395, "grad_norm": 0.05445936322212219, "learning_rate": 0.00017225866778937165, "loss": 6.8553, "step": 372 }, { "epoch": 0.04667130668710361, "grad_norm": 0.06795695424079895, "learning_rate": 0.00017211275418119876, "loss": 6.8584, "step": 373 }, { "epoch": 0.04679643083371783, "grad_norm": 0.06026960164308548, "learning_rate": 0.0001719665199910285, "loss": 6.8548, "step": 374 }, { "epoch": 0.04692155498033205, "grad_norm": 0.08185417205095291, "learning_rate": 0.00017181996586895454, "loss": 6.8557, "step": 375 }, { "epoch": 0.04692155498033205, "eval_loss": 6.847612380981445, "eval_runtime": 28.8767, "eval_samples_per_second": 466.154, "eval_steps_per_second": 233.094, "step": 375 }, { "epoch": 0.04704667912694627, "grad_norm": 0.0679742693901062, "learning_rate": 0.00017167309246649297, "loss": 6.852, "step": 376 }, { "epoch": 0.047171803273560485, "grad_norm": 0.06306691467761993, "learning_rate": 0.0001715259004365791, "loss": 6.8508, "step": 377 }, { "epoch": 0.0472969274201747, "grad_norm": 0.053492121398448944, "learning_rate": 0.00017137839043356484, "loss": 6.8508, "step": 378 }, { "epoch": 0.04742205156678892, "grad_norm": 0.06604982912540436, "learning_rate": 0.00017123056311321562, "loss": 6.848, "step": 379 }, { "epoch": 0.04754717571340314, "grad_norm": 0.0668310672044754, "learning_rate": 0.0001710824191327075, "loss": 6.8458, "step": 380 }, { "epoch": 0.047672299860017364, "grad_norm": 0.0710315853357315, "learning_rate": 0.00017093395915062428, "loss": 6.8445, "step": 381 }, { "epoch": 0.04779742400663158, "grad_norm": 0.09105353057384491, "learning_rate": 0.00017078518382695465, "loss": 6.8468, "step": 382 }, { "epoch": 0.0479225481532458, "grad_norm": 0.06036103889346123, "learning_rate": 0.00017063609382308908, "loss": 6.8408, "step": 383 }, { "epoch": 0.04804767229986002, "grad_norm": 0.06515318900346756, "learning_rate": 0.00017048668980181698, "loss": 6.842, "step": 384 }, { "epoch": 0.048172796446474236, "grad_norm": 0.07050523906946182, "learning_rate": 0.00017033697242732377, "loss": 6.8415, "step": 385 }, { "epoch": 0.048297920593088454, "grad_norm": 0.07193600386381149, "learning_rate": 0.0001701869423651879, "loss": 6.8414, "step": 386 }, { "epoch": 0.04842304473970267, "grad_norm": 0.08587748557329178, "learning_rate": 0.00017003660028237793, "loss": 6.8368, "step": 387 }, { "epoch": 0.04854816888631689, "grad_norm": 0.06860475242137909, "learning_rate": 0.00016988594684724947, "loss": 6.8343, "step": 388 }, { "epoch": 0.04867329303293111, "grad_norm": 0.07483404874801636, "learning_rate": 0.00016973498272954222, "loss": 6.8332, "step": 389 }, { "epoch": 0.04879841717954533, "grad_norm": 0.0661049634218216, "learning_rate": 0.00016958370860037717, "loss": 6.8316, "step": 390 }, { "epoch": 0.04892354132615955, "grad_norm": 0.09279706329107285, "learning_rate": 0.00016943212513225345, "loss": 6.8262, "step": 391 }, { "epoch": 0.04904866547277377, "grad_norm": 0.07987859100103378, "learning_rate": 0.00016928023299904533, "loss": 6.8278, "step": 392 }, { "epoch": 0.04917378961938799, "grad_norm": 0.0658203661441803, "learning_rate": 0.0001691280328759992, "loss": 6.8262, "step": 393 }, { "epoch": 0.049298913766002206, "grad_norm": 0.0760604664683342, "learning_rate": 0.00016897552543973084, "loss": 6.8216, "step": 394 }, { "epoch": 0.049424037912616424, "grad_norm": 0.07406409084796906, "learning_rate": 0.00016882271136822206, "loss": 6.8165, "step": 395 }, { "epoch": 0.04954916205923064, "grad_norm": 0.07298344373703003, "learning_rate": 0.0001686695913408179, "loss": 6.8147, "step": 396 }, { "epoch": 0.04967428620584486, "grad_norm": 0.06973182410001755, "learning_rate": 0.0001685161660382235, "loss": 6.8072, "step": 397 }, { "epoch": 0.04979941035245908, "grad_norm": 0.12040932476520538, "learning_rate": 0.00016836243614250113, "loss": 6.8023, "step": 398 }, { "epoch": 0.049924534499073296, "grad_norm": 0.09196866303682327, "learning_rate": 0.00016820840233706719, "loss": 6.793, "step": 399 }, { "epoch": 0.05004965864568752, "grad_norm": 0.23371237516403198, "learning_rate": 0.0001680540653066891, "loss": 6.7635, "step": 400 }, { "epoch": 0.05017478279230174, "grad_norm": 0.12273257225751877, "learning_rate": 0.00016789942573748232, "loss": 6.8874, "step": 401 }, { "epoch": 0.05029990693891596, "grad_norm": 0.09563007205724716, "learning_rate": 0.0001677444843169072, "loss": 6.885, "step": 402 }, { "epoch": 0.050425031085530175, "grad_norm": 0.1150246113538742, "learning_rate": 0.00016758924173376603, "loss": 6.8799, "step": 403 }, { "epoch": 0.05055015523214439, "grad_norm": 0.10595707595348358, "learning_rate": 0.0001674336986781999, "loss": 6.8744, "step": 404 }, { "epoch": 0.05067527937875861, "grad_norm": 0.09950631111860275, "learning_rate": 0.00016727785584168581, "loss": 6.8785, "step": 405 }, { "epoch": 0.05080040352537283, "grad_norm": 0.12173973023891449, "learning_rate": 0.0001671217139170333, "loss": 6.8779, "step": 406 }, { "epoch": 0.05092552767198705, "grad_norm": 0.09207621216773987, "learning_rate": 0.00016696527359838154, "loss": 6.8781, "step": 407 }, { "epoch": 0.051050651818601266, "grad_norm": 0.10334669053554535, "learning_rate": 0.00016680853558119632, "loss": 6.8738, "step": 408 }, { "epoch": 0.05117577596521549, "grad_norm": 0.08528117090463638, "learning_rate": 0.0001666515005622668, "loss": 6.8711, "step": 409 }, { "epoch": 0.05130090011182971, "grad_norm": 0.0632965937256813, "learning_rate": 0.0001664941692397025, "loss": 6.8728, "step": 410 }, { "epoch": 0.05142602425844393, "grad_norm": 0.0665343850851059, "learning_rate": 0.00016633654231293013, "loss": 6.8688, "step": 411 }, { "epoch": 0.051551148405058145, "grad_norm": 0.062818244099617, "learning_rate": 0.00016617862048269065, "loss": 6.869, "step": 412 }, { "epoch": 0.05167627255167236, "grad_norm": 0.059508200734853745, "learning_rate": 0.00016602040445103588, "loss": 6.8626, "step": 413 }, { "epoch": 0.05180139669828658, "grad_norm": 0.056462232023477554, "learning_rate": 0.00016586189492132566, "loss": 6.864, "step": 414 }, { "epoch": 0.0519265208449008, "grad_norm": 0.07418836653232574, "learning_rate": 0.00016570309259822453, "loss": 6.8624, "step": 415 }, { "epoch": 0.05205164499151502, "grad_norm": 0.08192052692174911, "learning_rate": 0.0001655439981876987, "loss": 6.8634, "step": 416 }, { "epoch": 0.052176769138129235, "grad_norm": 0.09083849936723709, "learning_rate": 0.00016538461239701277, "loss": 6.8632, "step": 417 }, { "epoch": 0.05230189328474345, "grad_norm": 0.08329660445451736, "learning_rate": 0.00016522493593472683, "loss": 6.8591, "step": 418 }, { "epoch": 0.05242701743135768, "grad_norm": 0.06983418762683868, "learning_rate": 0.0001650649695106931, "loss": 6.8556, "step": 419 }, { "epoch": 0.052552141577971896, "grad_norm": 0.08162139356136322, "learning_rate": 0.00016490471383605288, "loss": 6.8555, "step": 420 }, { "epoch": 0.052677265724586114, "grad_norm": 0.06647763401269913, "learning_rate": 0.00016474416962323325, "loss": 6.8548, "step": 421 }, { "epoch": 0.05280238987120033, "grad_norm": 0.059303831309080124, "learning_rate": 0.00016458333758594414, "loss": 6.8522, "step": 422 }, { "epoch": 0.05292751401781455, "grad_norm": 0.06314858794212341, "learning_rate": 0.00016442221843917496, "loss": 6.8525, "step": 423 }, { "epoch": 0.05305263816442877, "grad_norm": 0.0485655777156353, "learning_rate": 0.00016426081289919143, "loss": 6.8528, "step": 424 }, { "epoch": 0.053177762311042986, "grad_norm": 0.0625639334321022, "learning_rate": 0.0001640991216835326, "loss": 6.85, "step": 425 }, { "epoch": 0.053302886457657205, "grad_norm": 0.05681520700454712, "learning_rate": 0.00016393714551100734, "loss": 6.8498, "step": 426 }, { "epoch": 0.05342801060427142, "grad_norm": 0.07148370146751404, "learning_rate": 0.0001637748851016914, "loss": 6.8525, "step": 427 }, { "epoch": 0.05355313475088565, "grad_norm": 0.06755717843770981, "learning_rate": 0.00016361234117692413, "loss": 6.8447, "step": 428 }, { "epoch": 0.053678258897499866, "grad_norm": 0.06268233060836792, "learning_rate": 0.00016344951445930526, "loss": 6.8429, "step": 429 }, { "epoch": 0.053803383044114084, "grad_norm": 0.06574989855289459, "learning_rate": 0.0001632864056726917, "loss": 6.8426, "step": 430 }, { "epoch": 0.0539285071907283, "grad_norm": 0.052641697227954865, "learning_rate": 0.00016312301554219426, "loss": 6.8457, "step": 431 }, { "epoch": 0.05405363133734252, "grad_norm": 0.09943837672472, "learning_rate": 0.00016295934479417453, "loss": 6.8418, "step": 432 }, { "epoch": 0.05417875548395674, "grad_norm": 0.07433007657527924, "learning_rate": 0.00016279539415624164, "loss": 6.8393, "step": 433 }, { "epoch": 0.054303879630570956, "grad_norm": 0.06012954190373421, "learning_rate": 0.0001626311643572489, "loss": 6.8413, "step": 434 }, { "epoch": 0.054429003777185174, "grad_norm": 0.060854729264974594, "learning_rate": 0.00016246665612729074, "loss": 6.8346, "step": 435 }, { "epoch": 0.05455412792379939, "grad_norm": 0.09752050787210464, "learning_rate": 0.00016230187019769928, "loss": 6.8393, "step": 436 }, { "epoch": 0.05467925207041361, "grad_norm": 0.0773884505033493, "learning_rate": 0.00016213680730104124, "loss": 6.836, "step": 437 }, { "epoch": 0.054804376217027835, "grad_norm": 0.08576478064060211, "learning_rate": 0.0001619714681711146, "loss": 6.8352, "step": 438 }, { "epoch": 0.05492950036364205, "grad_norm": 0.07786134630441666, "learning_rate": 0.00016180585354294536, "loss": 6.8304, "step": 439 }, { "epoch": 0.05505462451025627, "grad_norm": 0.06598285585641861, "learning_rate": 0.00016163996415278424, "loss": 6.8289, "step": 440 }, { "epoch": 0.05517974865687049, "grad_norm": 0.06323549151420593, "learning_rate": 0.00016147380073810346, "loss": 6.8294, "step": 441 }, { "epoch": 0.05530487280348471, "grad_norm": 0.07911103218793869, "learning_rate": 0.0001613073640375934, "loss": 6.8286, "step": 442 }, { "epoch": 0.055429996950098925, "grad_norm": 0.06942103803157806, "learning_rate": 0.00016114065479115946, "loss": 6.8248, "step": 443 }, { "epoch": 0.05555512109671314, "grad_norm": 0.07728816568851471, "learning_rate": 0.00016097367373991842, "loss": 6.8173, "step": 444 }, { "epoch": 0.05568024524332736, "grad_norm": 0.08438047021627426, "learning_rate": 0.00016080642162619565, "loss": 6.8164, "step": 445 }, { "epoch": 0.05580536938994158, "grad_norm": 0.08097823709249496, "learning_rate": 0.0001606388991935214, "loss": 6.8108, "step": 446 }, { "epoch": 0.055930493536555805, "grad_norm": 0.08989620953798294, "learning_rate": 0.0001604711071866277, "loss": 6.8087, "step": 447 }, { "epoch": 0.05605561768317002, "grad_norm": 0.0738961398601532, "learning_rate": 0.00016030304635144494, "loss": 6.804, "step": 448 }, { "epoch": 0.05618074182978424, "grad_norm": 0.07817257940769196, "learning_rate": 0.00016013471743509862, "loss": 6.796, "step": 449 }, { "epoch": 0.05630586597639846, "grad_norm": 0.13272996246814728, "learning_rate": 0.00015996612118590603, "loss": 6.7702, "step": 450 }, { "epoch": 0.05643099012301268, "grad_norm": 0.1718011051416397, "learning_rate": 0.00015979725835337294, "loss": 6.8745, "step": 451 }, { "epoch": 0.056556114269626895, "grad_norm": 0.10092202574014664, "learning_rate": 0.00015962812968819016, "loss": 6.8872, "step": 452 }, { "epoch": 0.05668123841624111, "grad_norm": 0.09672504663467407, "learning_rate": 0.0001594587359422303, "loss": 6.8764, "step": 453 }, { "epoch": 0.05680636256285533, "grad_norm": 0.09616785496473312, "learning_rate": 0.0001592890778685444, "loss": 6.8687, "step": 454 }, { "epoch": 0.05693148670946955, "grad_norm": 0.08847421407699585, "learning_rate": 0.00015911915622135862, "loss": 6.8722, "step": 455 }, { "epoch": 0.057056610856083774, "grad_norm": 0.09444184601306915, "learning_rate": 0.00015894897175607086, "loss": 6.8748, "step": 456 }, { "epoch": 0.05718173500269799, "grad_norm": 0.08493054658174515, "learning_rate": 0.00015877852522924732, "loss": 6.8721, "step": 457 }, { "epoch": 0.05730685914931221, "grad_norm": 0.07265307754278183, "learning_rate": 0.00015860781739861928, "loss": 6.8746, "step": 458 }, { "epoch": 0.05743198329592643, "grad_norm": 0.07113247364759445, "learning_rate": 0.00015843684902307962, "loss": 6.8701, "step": 459 }, { "epoch": 0.057557107442540646, "grad_norm": 0.05553045868873596, "learning_rate": 0.00015826562086267956, "loss": 6.8691, "step": 460 }, { "epoch": 0.057682231589154864, "grad_norm": 0.06427642703056335, "learning_rate": 0.00015809413367862512, "loss": 6.8724, "step": 461 }, { "epoch": 0.05780735573576908, "grad_norm": 0.06988020241260529, "learning_rate": 0.00015792238823327388, "loss": 6.8668, "step": 462 }, { "epoch": 0.0579324798823833, "grad_norm": 0.05613817274570465, "learning_rate": 0.00015775038529013152, "loss": 6.8644, "step": 463 }, { "epoch": 0.05805760402899752, "grad_norm": 0.060558583587408066, "learning_rate": 0.0001575781256138485, "loss": 6.8671, "step": 464 }, { "epoch": 0.058182728175611736, "grad_norm": 0.06008605659008026, "learning_rate": 0.00015740560997021648, "loss": 6.8631, "step": 465 }, { "epoch": 0.05830785232222596, "grad_norm": 0.06377378106117249, "learning_rate": 0.00015723283912616513, "loss": 6.8597, "step": 466 }, { "epoch": 0.05843297646884018, "grad_norm": 0.0957098975777626, "learning_rate": 0.00015705981384975866, "loss": 6.858, "step": 467 }, { "epoch": 0.0585581006154544, "grad_norm": 0.08405467122793198, "learning_rate": 0.0001568865349101923, "loss": 6.8567, "step": 468 }, { "epoch": 0.058683224762068616, "grad_norm": 0.06298062950372696, "learning_rate": 0.00015671300307778898, "loss": 6.8573, "step": 469 }, { "epoch": 0.058808348908682834, "grad_norm": 0.09028910845518112, "learning_rate": 0.00015653921912399589, "loss": 6.8561, "step": 470 }, { "epoch": 0.05893347305529705, "grad_norm": 0.0627702996134758, "learning_rate": 0.00015636518382138107, "loss": 6.8541, "step": 471 }, { "epoch": 0.05905859720191127, "grad_norm": 0.061995070427656174, "learning_rate": 0.0001561908979436299, "loss": 6.8557, "step": 472 }, { "epoch": 0.05918372134852549, "grad_norm": 0.05258549004793167, "learning_rate": 0.00015601636226554168, "loss": 6.8528, "step": 473 }, { "epoch": 0.059308845495139706, "grad_norm": 0.057481493800878525, "learning_rate": 0.00015584157756302634, "loss": 6.8542, "step": 474 }, { "epoch": 0.05943396964175393, "grad_norm": 0.07295267283916473, "learning_rate": 0.0001556665446131007, "loss": 6.848, "step": 475 }, { "epoch": 0.05955909378836815, "grad_norm": 0.06958470493555069, "learning_rate": 0.00015549126419388536, "loss": 6.8499, "step": 476 }, { "epoch": 0.05968421793498237, "grad_norm": 0.06368780136108398, "learning_rate": 0.0001553157370846009, "loss": 6.8505, "step": 477 }, { "epoch": 0.059809342081596585, "grad_norm": 0.06953843683004379, "learning_rate": 0.00015513996406556465, "loss": 6.8472, "step": 478 }, { "epoch": 0.0599344662282108, "grad_norm": 0.05779159814119339, "learning_rate": 0.00015496394591818716, "loss": 6.8411, "step": 479 }, { "epoch": 0.06005959037482502, "grad_norm": 0.07871963828802109, "learning_rate": 0.0001547876834249687, "loss": 6.8446, "step": 480 }, { "epoch": 0.06018471452143924, "grad_norm": 0.06701961159706116, "learning_rate": 0.00015461117736949577, "loss": 6.8428, "step": 481 }, { "epoch": 0.06030983866805346, "grad_norm": 0.0673895925283432, "learning_rate": 0.00015443442853643762, "loss": 6.8429, "step": 482 }, { "epoch": 0.060434962814667675, "grad_norm": 0.06733205914497375, "learning_rate": 0.00015425743771154294, "loss": 6.8454, "step": 483 }, { "epoch": 0.06056008696128189, "grad_norm": 0.06998990476131439, "learning_rate": 0.00015408020568163602, "loss": 6.838, "step": 484 }, { "epoch": 0.06068521110789612, "grad_norm": 0.06279562413692474, "learning_rate": 0.00015390273323461352, "loss": 6.841, "step": 485 }, { "epoch": 0.060810335254510337, "grad_norm": 0.07207836210727692, "learning_rate": 0.0001537250211594409, "loss": 6.8362, "step": 486 }, { "epoch": 0.060935459401124555, "grad_norm": 0.07468303292989731, "learning_rate": 0.0001535470702461489, "loss": 6.8347, "step": 487 }, { "epoch": 0.06106058354773877, "grad_norm": 0.08909296244382858, "learning_rate": 0.00015336888128583, "loss": 6.8312, "step": 488 }, { "epoch": 0.06118570769435299, "grad_norm": 0.07458513230085373, "learning_rate": 0.000153190455070635, "loss": 6.8334, "step": 489 }, { "epoch": 0.06131083184096721, "grad_norm": 0.05957014113664627, "learning_rate": 0.00015301179239376938, "loss": 6.8269, "step": 490 }, { "epoch": 0.06143595598758143, "grad_norm": 0.06831321865320206, "learning_rate": 0.00015283289404948976, "loss": 6.8249, "step": 491 }, { "epoch": 0.061561080134195645, "grad_norm": 0.06435420364141464, "learning_rate": 0.0001526537608331006, "loss": 6.8249, "step": 492 }, { "epoch": 0.06168620428080986, "grad_norm": 0.05904107540845871, "learning_rate": 0.00015247439354095041, "loss": 6.8245, "step": 493 }, { "epoch": 0.06181132842742409, "grad_norm": 0.09884268045425415, "learning_rate": 0.00015229479297042823, "loss": 6.8214, "step": 494 }, { "epoch": 0.061936452574038306, "grad_norm": 0.0654333084821701, "learning_rate": 0.00015211495991996027, "loss": 6.8205, "step": 495 }, { "epoch": 0.062061576720652524, "grad_norm": 0.07440678030252457, "learning_rate": 0.0001519348951890062, "loss": 6.8149, "step": 496 }, { "epoch": 0.06218670086726674, "grad_norm": 0.06847091019153595, "learning_rate": 0.0001517545995780556, "loss": 6.8117, "step": 497 }, { "epoch": 0.06231182501388096, "grad_norm": 0.070498026907444, "learning_rate": 0.00015157407388862452, "loss": 6.8005, "step": 498 }, { "epoch": 0.06243694916049518, "grad_norm": 0.13137699663639069, "learning_rate": 0.00015139331892325179, "loss": 6.7913, "step": 499 }, { "epoch": 0.0625620733071094, "grad_norm": 0.18194161355495453, "learning_rate": 0.0001512123354854955, "loss": 6.761, "step": 500 }, { "epoch": 0.06268719745372361, "grad_norm": 0.19133229553699493, "learning_rate": 0.0001510311243799295, "loss": 6.8577, "step": 501 }, { "epoch": 0.06281232160033784, "grad_norm": 0.1356317102909088, "learning_rate": 0.00015084968641213958, "loss": 6.8707, "step": 502 }, { "epoch": 0.06293744574695205, "grad_norm": 0.10677680373191833, "learning_rate": 0.00015066802238872023, "loss": 6.8777, "step": 503 }, { "epoch": 0.06306256989356628, "grad_norm": 0.1629563719034195, "learning_rate": 0.0001504861331172709, "loss": 6.8611, "step": 504 }, { "epoch": 0.06318769404018049, "grad_norm": 0.09069332480430603, "learning_rate": 0.0001503040194063922, "loss": 6.8721, "step": 505 }, { "epoch": 0.06331281818679471, "grad_norm": 0.0987582579255104, "learning_rate": 0.00015012168206568268, "loss": 6.8742, "step": 506 }, { "epoch": 0.06343794233340892, "grad_norm": 0.08196765184402466, "learning_rate": 0.00014993912190573505, "loss": 6.8709, "step": 507 }, { "epoch": 0.06356306648002315, "grad_norm": 0.0783083513379097, "learning_rate": 0.00014975633973813242, "loss": 6.8724, "step": 508 }, { "epoch": 0.06368819062663737, "grad_norm": 0.07032638788223267, "learning_rate": 0.00014957333637544503, "loss": 6.8718, "step": 509 }, { "epoch": 0.06381331477325158, "grad_norm": 0.06387288123369217, "learning_rate": 0.00014939011263122634, "loss": 6.8713, "step": 510 }, { "epoch": 0.06393843891986581, "grad_norm": 0.05888252705335617, "learning_rate": 0.0001492066693200096, "loss": 6.8701, "step": 511 }, { "epoch": 0.06406356306648002, "grad_norm": 0.0668516457080841, "learning_rate": 0.00014902300725730413, "loss": 6.8709, "step": 512 }, { "epoch": 0.06418868721309424, "grad_norm": 0.06639979034662247, "learning_rate": 0.00014883912725959167, "loss": 6.8648, "step": 513 }, { "epoch": 0.06431381135970846, "grad_norm": 0.061197392642498016, "learning_rate": 0.00014865503014432292, "loss": 6.8658, "step": 514 }, { "epoch": 0.06443893550632268, "grad_norm": 0.06445780396461487, "learning_rate": 0.00014847071672991367, "loss": 6.862, "step": 515 }, { "epoch": 0.06456405965293689, "grad_norm": 0.06259971112012863, "learning_rate": 0.0001482861878357414, "loss": 6.8615, "step": 516 }, { "epoch": 0.06468918379955112, "grad_norm": 0.07143022865056992, "learning_rate": 0.00014810144428214144, "loss": 6.8609, "step": 517 }, { "epoch": 0.06481430794616534, "grad_norm": 0.09002707153558731, "learning_rate": 0.0001479164868904034, "loss": 6.8549, "step": 518 }, { "epoch": 0.06493943209277955, "grad_norm": 0.05931887775659561, "learning_rate": 0.00014773131648276758, "loss": 6.8587, "step": 519 }, { "epoch": 0.06506455623939378, "grad_norm": 0.08823603391647339, "learning_rate": 0.00014754593388242117, "loss": 6.8567, "step": 520 }, { "epoch": 0.06518968038600799, "grad_norm": 0.059194374829530716, "learning_rate": 0.0001473603399134948, "loss": 6.8553, "step": 521 }, { "epoch": 0.06531480453262221, "grad_norm": 0.06221536919474602, "learning_rate": 0.0001471745354010586, "loss": 6.853, "step": 522 }, { "epoch": 0.06543992867923643, "grad_norm": 0.057904791086912155, "learning_rate": 0.00014698852117111884, "loss": 6.8514, "step": 523 }, { "epoch": 0.06556505282585065, "grad_norm": 0.06057745963335037, "learning_rate": 0.000146802298050614, "loss": 6.8508, "step": 524 }, { "epoch": 0.06569017697246486, "grad_norm": 0.05887136608362198, "learning_rate": 0.0001466158668674112, "loss": 6.8554, "step": 525 }, { "epoch": 0.06581530111907909, "grad_norm": 0.05819614976644516, "learning_rate": 0.00014642922845030257, "loss": 6.8433, "step": 526 }, { "epoch": 0.06594042526569331, "grad_norm": 0.07282175123691559, "learning_rate": 0.0001462423836290015, "loss": 6.8485, "step": 527 }, { "epoch": 0.06606554941230752, "grad_norm": 0.07722876965999603, "learning_rate": 0.00014605533323413887, "loss": 6.8457, "step": 528 }, { "epoch": 0.06619067355892175, "grad_norm": 0.08057966828346252, "learning_rate": 0.00014586807809725962, "loss": 6.8469, "step": 529 }, { "epoch": 0.06631579770553596, "grad_norm": 0.06307411193847656, "learning_rate": 0.00014568061905081875, "loss": 6.8432, "step": 530 }, { "epoch": 0.06644092185215018, "grad_norm": 0.06701157987117767, "learning_rate": 0.00014549295692817778, "loss": 6.8432, "step": 531 }, { "epoch": 0.0665660459987644, "grad_norm": 0.06190832704305649, "learning_rate": 0.00014530509256360102, "loss": 6.8418, "step": 532 }, { "epoch": 0.06669117014537862, "grad_norm": 0.06910586357116699, "learning_rate": 0.00014511702679225193, "loss": 6.8407, "step": 533 }, { "epoch": 0.06681629429199283, "grad_norm": 0.05844762176275253, "learning_rate": 0.0001449287604501893, "loss": 6.8395, "step": 534 }, { "epoch": 0.06694141843860706, "grad_norm": 0.055843740701675415, "learning_rate": 0.00014474029437436348, "loss": 6.8357, "step": 535 }, { "epoch": 0.06706654258522128, "grad_norm": 0.072757288813591, "learning_rate": 0.00014455162940261285, "loss": 6.8378, "step": 536 }, { "epoch": 0.06719166673183549, "grad_norm": 0.05763239413499832, "learning_rate": 0.0001443627663736599, "loss": 6.8344, "step": 537 }, { "epoch": 0.06731679087844972, "grad_norm": 0.11451676487922668, "learning_rate": 0.00014417370612710778, "loss": 6.8346, "step": 538 }, { "epoch": 0.06744191502506393, "grad_norm": 0.07055835425853729, "learning_rate": 0.00014398444950343623, "loss": 6.8356, "step": 539 }, { "epoch": 0.06756703917167815, "grad_norm": 0.0767672210931778, "learning_rate": 0.00014379499734399798, "loss": 6.8303, "step": 540 }, { "epoch": 0.06769216331829236, "grad_norm": 0.07831480354070663, "learning_rate": 0.0001436053504910151, "loss": 6.827, "step": 541 }, { "epoch": 0.06781728746490659, "grad_norm": 0.0734798014163971, "learning_rate": 0.0001434155097875752, "loss": 6.8224, "step": 542 }, { "epoch": 0.0679424116115208, "grad_norm": 0.09030284732580185, "learning_rate": 0.00014322547607762762, "loss": 6.8221, "step": 543 }, { "epoch": 0.06806753575813503, "grad_norm": 0.08662009984254837, "learning_rate": 0.0001430352502059797, "loss": 6.8203, "step": 544 }, { "epoch": 0.06819265990474924, "grad_norm": 0.1007436215877533, "learning_rate": 0.0001428448330182931, "loss": 6.8147, "step": 545 }, { "epoch": 0.06831778405136346, "grad_norm": 0.07188870757818222, "learning_rate": 0.00014265422536107993, "loss": 6.8104, "step": 546 }, { "epoch": 0.06844290819797769, "grad_norm": 0.07315655052661896, "learning_rate": 0.00014246342808169914, "loss": 6.8061, "step": 547 }, { "epoch": 0.0685680323445919, "grad_norm": 0.07562731206417084, "learning_rate": 0.00014227244202835257, "loss": 6.8024, "step": 548 }, { "epoch": 0.06869315649120612, "grad_norm": 0.14848235249519348, "learning_rate": 0.0001420812680500813, "loss": 6.7928, "step": 549 }, { "epoch": 0.06881828063782033, "grad_norm": 0.12424899637699127, "learning_rate": 0.00014188990699676184, "loss": 6.7664, "step": 550 }, { "epoch": 0.06894340478443456, "grad_norm": 0.12112855911254883, "learning_rate": 0.00014169835971910238, "loss": 6.8718, "step": 551 }, { "epoch": 0.06906852893104877, "grad_norm": 0.11926542967557907, "learning_rate": 0.0001415066270686389, "loss": 6.8696, "step": 552 }, { "epoch": 0.069193653077663, "grad_norm": 0.10589534789323807, "learning_rate": 0.00014131470989773158, "loss": 6.8696, "step": 553 }, { "epoch": 0.0693187772242772, "grad_norm": 0.09585961699485779, "learning_rate": 0.0001411226090595608, "loss": 6.8725, "step": 554 }, { "epoch": 0.06944390137089143, "grad_norm": 0.06100858375430107, "learning_rate": 0.00014093032540812348, "loss": 6.8742, "step": 555 }, { "epoch": 0.06956902551750566, "grad_norm": 0.06865556538105011, "learning_rate": 0.0001407378597982293, "loss": 6.8674, "step": 556 }, { "epoch": 0.06969414966411987, "grad_norm": 0.07369811087846756, "learning_rate": 0.00014054521308549673, "loss": 6.8723, "step": 557 }, { "epoch": 0.06981927381073409, "grad_norm": 0.06764942407608032, "learning_rate": 0.0001403523861263495, "loss": 6.8742, "step": 558 }, { "epoch": 0.0699443979573483, "grad_norm": 0.06187736988067627, "learning_rate": 0.00014015937977801256, "loss": 6.8718, "step": 559 }, { "epoch": 0.07006952210396253, "grad_norm": 0.062310222536325455, "learning_rate": 0.00013996619489850822, "loss": 6.8715, "step": 560 }, { "epoch": 0.07019464625057674, "grad_norm": 0.06987649202346802, "learning_rate": 0.00013977283234665273, "loss": 6.8709, "step": 561 }, { "epoch": 0.07031977039719096, "grad_norm": 0.05582519248127937, "learning_rate": 0.00013957929298205195, "loss": 6.8673, "step": 562 }, { "epoch": 0.07044489454380518, "grad_norm": 0.06284952908754349, "learning_rate": 0.00013938557766509792, "loss": 6.8624, "step": 563 }, { "epoch": 0.0705700186904194, "grad_norm": 0.06807936728000641, "learning_rate": 0.0001391916872569648, "loss": 6.8608, "step": 564 }, { "epoch": 0.07069514283703363, "grad_norm": 0.06335616856813431, "learning_rate": 0.00013899762261960518, "loss": 6.8576, "step": 565 }, { "epoch": 0.07082026698364784, "grad_norm": 0.05519183725118637, "learning_rate": 0.0001388033846157462, "loss": 6.8565, "step": 566 }, { "epoch": 0.07094539113026206, "grad_norm": 0.054543036967515945, "learning_rate": 0.0001386089741088857, "loss": 6.8603, "step": 567 }, { "epoch": 0.07107051527687627, "grad_norm": 0.06922731548547745, "learning_rate": 0.00013841439196328836, "loss": 6.8568, "step": 568 }, { "epoch": 0.0711956394234905, "grad_norm": 0.0763833224773407, "learning_rate": 0.00013821963904398193, "loss": 6.8561, "step": 569 }, { "epoch": 0.07132076357010471, "grad_norm": 0.06124742701649666, "learning_rate": 0.00013802471621675338, "loss": 6.8558, "step": 570 }, { "epoch": 0.07144588771671893, "grad_norm": 0.0650462955236435, "learning_rate": 0.00013782962434814492, "loss": 6.8582, "step": 571 }, { "epoch": 0.07157101186333314, "grad_norm": 0.052520815283060074, "learning_rate": 0.00013763436430545034, "loss": 6.8507, "step": 572 }, { "epoch": 0.07169613600994737, "grad_norm": 0.053291965276002884, "learning_rate": 0.00013743893695671096, "loss": 6.8533, "step": 573 }, { "epoch": 0.0718212601565616, "grad_norm": 0.054565366357564926, "learning_rate": 0.00013724334317071198, "loss": 6.8474, "step": 574 }, { "epoch": 0.0719463843031758, "grad_norm": 0.06371112912893295, "learning_rate": 0.00013704758381697844, "loss": 6.851, "step": 575 }, { "epoch": 0.07207150844979003, "grad_norm": 0.05248385667800903, "learning_rate": 0.00013685165976577146, "loss": 6.8442, "step": 576 }, { "epoch": 0.07219663259640424, "grad_norm": 0.05889122933149338, "learning_rate": 0.0001366555718880843, "loss": 6.8438, "step": 577 }, { "epoch": 0.07232175674301847, "grad_norm": 0.054406147450208664, "learning_rate": 0.00013645932105563844, "loss": 6.8446, "step": 578 }, { "epoch": 0.07244688088963268, "grad_norm": 0.053911879658699036, "learning_rate": 0.00013626290814088005, "loss": 6.8435, "step": 579 }, { "epoch": 0.0725720050362469, "grad_norm": 0.07551559805870056, "learning_rate": 0.00013606633401697557, "loss": 6.8419, "step": 580 }, { "epoch": 0.07269712918286111, "grad_norm": 0.06329525262117386, "learning_rate": 0.00013586959955780824, "loss": 6.8388, "step": 581 }, { "epoch": 0.07282225332947534, "grad_norm": 0.059528496116399765, "learning_rate": 0.00013567270563797398, "loss": 6.84, "step": 582 }, { "epoch": 0.07294737747608956, "grad_norm": 0.056839533150196075, "learning_rate": 0.00013547565313277776, "loss": 6.8398, "step": 583 }, { "epoch": 0.07307250162270378, "grad_norm": 0.054866183549165726, "learning_rate": 0.00013527844291822948, "loss": 6.8377, "step": 584 }, { "epoch": 0.073197625769318, "grad_norm": 0.07333236187696457, "learning_rate": 0.0001350810758710401, "loss": 6.8366, "step": 585 }, { "epoch": 0.07332274991593221, "grad_norm": 0.061047252267599106, "learning_rate": 0.00013488355286861783, "loss": 6.8316, "step": 586 }, { "epoch": 0.07344787406254644, "grad_norm": 0.08371743559837341, "learning_rate": 0.0001346858747890642, "loss": 6.8311, "step": 587 }, { "epoch": 0.07357299820916065, "grad_norm": 0.06274928897619247, "learning_rate": 0.00013448804251117003, "loss": 6.8294, "step": 588 }, { "epoch": 0.07369812235577487, "grad_norm": 0.07520133256912231, "learning_rate": 0.0001342900569144119, "loss": 6.8313, "step": 589 }, { "epoch": 0.07382324650238908, "grad_norm": 0.05626892298460007, "learning_rate": 0.0001340919188789477, "loss": 6.8263, "step": 590 }, { "epoch": 0.07394837064900331, "grad_norm": 0.08091791719198227, "learning_rate": 0.00013389362928561317, "loss": 6.8291, "step": 591 }, { "epoch": 0.07407349479561752, "grad_norm": 0.07199662923812866, "learning_rate": 0.00013369518901591772, "loss": 6.8274, "step": 592 }, { "epoch": 0.07419861894223174, "grad_norm": 0.07114735245704651, "learning_rate": 0.00013349659895204067, "loss": 6.8199, "step": 593 }, { "epoch": 0.07432374308884597, "grad_norm": 0.06578832119703293, "learning_rate": 0.0001332978599768272, "loss": 6.8154, "step": 594 }, { "epoch": 0.07444886723546018, "grad_norm": 0.07182328402996063, "learning_rate": 0.00013309897297378455, "loss": 6.8149, "step": 595 }, { "epoch": 0.0745739913820744, "grad_norm": 0.07365160435438156, "learning_rate": 0.00013289993882707797, "loss": 6.8146, "step": 596 }, { "epoch": 0.07469911552868862, "grad_norm": 0.09033498167991638, "learning_rate": 0.00013270075842152678, "loss": 6.8042, "step": 597 }, { "epoch": 0.07482423967530284, "grad_norm": 0.08086494356393814, "learning_rate": 0.00013250143264260074, "loss": 6.8096, "step": 598 }, { "epoch": 0.07494936382191705, "grad_norm": 0.08296974748373032, "learning_rate": 0.0001323019623764156, "loss": 6.7976, "step": 599 }, { "epoch": 0.07507448796853128, "grad_norm": 0.12728112936019897, "learning_rate": 0.00013210234850972964, "loss": 6.7703, "step": 600 }, { "epoch": 0.07519961211514549, "grad_norm": 0.11543053388595581, "learning_rate": 0.0001319025919299394, "loss": 6.8637, "step": 601 }, { "epoch": 0.07532473626175971, "grad_norm": 0.09637086093425751, "learning_rate": 0.00013170269352507597, "loss": 6.8754, "step": 602 }, { "epoch": 0.07544986040837394, "grad_norm": 0.10168655216693878, "learning_rate": 0.0001315026541838008, "loss": 6.8669, "step": 603 }, { "epoch": 0.07557498455498815, "grad_norm": 0.16151650249958038, "learning_rate": 0.00013130247479540202, "loss": 6.8533, "step": 604 }, { "epoch": 0.07570010870160238, "grad_norm": 0.07673663645982742, "learning_rate": 0.00013110215624979025, "loss": 6.8649, "step": 605 }, { "epoch": 0.07582523284821659, "grad_norm": 0.06671424210071564, "learning_rate": 0.00013090169943749476, "loss": 6.8694, "step": 606 }, { "epoch": 0.07595035699483081, "grad_norm": 0.061678413301706314, "learning_rate": 0.00013070110524965954, "loss": 6.8668, "step": 607 }, { "epoch": 0.07607548114144502, "grad_norm": 0.055082425475120544, "learning_rate": 0.00013050037457803924, "loss": 6.8692, "step": 608 }, { "epoch": 0.07620060528805925, "grad_norm": 0.058938805013895035, "learning_rate": 0.0001302995083149953, "loss": 6.8693, "step": 609 }, { "epoch": 0.07632572943467346, "grad_norm": 0.05529715120792389, "learning_rate": 0.0001300985073534919, "loss": 6.87, "step": 610 }, { "epoch": 0.07645085358128768, "grad_norm": 0.06287004053592682, "learning_rate": 0.00012989737258709203, "loss": 6.8669, "step": 611 }, { "epoch": 0.07657597772790191, "grad_norm": 0.05604143068194389, "learning_rate": 0.00012969610490995358, "loss": 6.8647, "step": 612 }, { "epoch": 0.07670110187451612, "grad_norm": 0.06314929574728012, "learning_rate": 0.00012949470521682528, "loss": 6.8643, "step": 613 }, { "epoch": 0.07682622602113034, "grad_norm": 0.07301146537065506, "learning_rate": 0.0001292931744030427, "loss": 6.8596, "step": 614 }, { "epoch": 0.07695135016774456, "grad_norm": 0.06212817505002022, "learning_rate": 0.0001290915133645243, "loss": 6.8612, "step": 615 }, { "epoch": 0.07707647431435878, "grad_norm": 0.06457951664924622, "learning_rate": 0.00012888972299776754, "loss": 6.8571, "step": 616 }, { "epoch": 0.07720159846097299, "grad_norm": 0.058301158249378204, "learning_rate": 0.00012868780419984482, "loss": 6.859, "step": 617 }, { "epoch": 0.07732672260758722, "grad_norm": 0.061633531004190445, "learning_rate": 0.00012848575786839943, "loss": 6.8545, "step": 618 }, { "epoch": 0.07745184675420143, "grad_norm": 0.0544595941901207, "learning_rate": 0.0001282835849016416, "loss": 6.859, "step": 619 }, { "epoch": 0.07757697090081565, "grad_norm": 0.061691369861364365, "learning_rate": 0.00012808128619834461, "loss": 6.8542, "step": 620 }, { "epoch": 0.07770209504742988, "grad_norm": 0.0718623623251915, "learning_rate": 0.0001278788626578407, "loss": 6.8495, "step": 621 }, { "epoch": 0.07782721919404409, "grad_norm": 0.05492227524518967, "learning_rate": 0.00012767631518001698, "loss": 6.8485, "step": 622 }, { "epoch": 0.07795234334065831, "grad_norm": 0.0662972554564476, "learning_rate": 0.00012747364466531163, "loss": 6.8468, "step": 623 }, { "epoch": 0.07807746748727253, "grad_norm": 0.05340439826250076, "learning_rate": 0.00012727085201470973, "loss": 6.8477, "step": 624 }, { "epoch": 0.07820259163388675, "grad_norm": 0.06175565347075462, "learning_rate": 0.00012706793812973941, "loss": 6.8483, "step": 625 }, { "epoch": 0.07832771578050096, "grad_norm": 0.06804826855659485, "learning_rate": 0.0001268649039124677, "loss": 6.8447, "step": 626 }, { "epoch": 0.07845283992711519, "grad_norm": 0.06182577833533287, "learning_rate": 0.00012666175026549662, "loss": 6.8467, "step": 627 }, { "epoch": 0.0785779640737294, "grad_norm": 0.07787974178791046, "learning_rate": 0.000126458478091959, "loss": 6.8428, "step": 628 }, { "epoch": 0.07870308822034362, "grad_norm": 0.06023358553647995, "learning_rate": 0.00012625508829551473, "loss": 6.8432, "step": 629 }, { "epoch": 0.07882821236695785, "grad_norm": 0.05802322179079056, "learning_rate": 0.00012605158178034654, "loss": 6.8394, "step": 630 }, { "epoch": 0.07895333651357206, "grad_norm": 0.07952333241701126, "learning_rate": 0.00012584795945115603, "loss": 6.8381, "step": 631 }, { "epoch": 0.07907846066018628, "grad_norm": 0.05956702679395676, "learning_rate": 0.0001256442222131597, "loss": 6.8377, "step": 632 }, { "epoch": 0.0792035848068005, "grad_norm": 0.07281078398227692, "learning_rate": 0.0001254403709720848, "loss": 6.8412, "step": 633 }, { "epoch": 0.07932870895341472, "grad_norm": 0.0676417127251625, "learning_rate": 0.0001252364066341655, "loss": 6.8379, "step": 634 }, { "epoch": 0.07945383310002893, "grad_norm": 0.06791514903306961, "learning_rate": 0.00012503233010613865, "loss": 6.8341, "step": 635 }, { "epoch": 0.07957895724664316, "grad_norm": 0.07389527559280396, "learning_rate": 0.00012482814229523997, "loss": 6.8321, "step": 636 }, { "epoch": 0.07970408139325737, "grad_norm": 0.07579143345355988, "learning_rate": 0.00012462384410919975, "loss": 6.8297, "step": 637 }, { "epoch": 0.07982920553987159, "grad_norm": 0.08403453230857849, "learning_rate": 0.00012441943645623903, "loss": 6.8252, "step": 638 }, { "epoch": 0.0799543296864858, "grad_norm": 0.07483692467212677, "learning_rate": 0.00012421492024506555, "loss": 6.824, "step": 639 }, { "epoch": 0.08007945383310003, "grad_norm": 0.08031223714351654, "learning_rate": 0.00012401029638486953, "loss": 6.8299, "step": 640 }, { "epoch": 0.08020457797971425, "grad_norm": 0.07551555335521698, "learning_rate": 0.0001238055657853198, "loss": 6.8242, "step": 641 }, { "epoch": 0.08032970212632846, "grad_norm": 0.06927035003900528, "learning_rate": 0.00012360072935655982, "loss": 6.8247, "step": 642 }, { "epoch": 0.08045482627294269, "grad_norm": 0.07835419476032257, "learning_rate": 0.00012339578800920332, "loss": 6.8223, "step": 643 }, { "epoch": 0.0805799504195569, "grad_norm": 0.09632575511932373, "learning_rate": 0.00012319074265433063, "loss": 6.8142, "step": 644 }, { "epoch": 0.08070507456617113, "grad_norm": 0.0693567618727684, "learning_rate": 0.00012298559420348437, "loss": 6.8132, "step": 645 }, { "epoch": 0.08083019871278534, "grad_norm": 0.09198413789272308, "learning_rate": 0.00012278034356866545, "loss": 6.8146, "step": 646 }, { "epoch": 0.08095532285939956, "grad_norm": 0.07684396952390671, "learning_rate": 0.00012257499166232907, "loss": 6.8034, "step": 647 }, { "epoch": 0.08108044700601377, "grad_norm": 0.10117211192846298, "learning_rate": 0.0001223695393973807, "loss": 6.7971, "step": 648 }, { "epoch": 0.081205571152628, "grad_norm": 0.11173521727323532, "learning_rate": 0.0001221639876871719, "loss": 6.7813, "step": 649 }, { "epoch": 0.08133069529924222, "grad_norm": 0.17603041231632233, "learning_rate": 0.0001219583374454963, "loss": 6.7539, "step": 650 }, { "epoch": 0.08145581944585643, "grad_norm": 0.10038384795188904, "learning_rate": 0.00012175258958658564, "loss": 6.8703, "step": 651 }, { "epoch": 0.08158094359247066, "grad_norm": 0.12352234125137329, "learning_rate": 0.00012154674502510555, "loss": 6.8747, "step": 652 }, { "epoch": 0.08170606773908487, "grad_norm": 0.10298015177249908, "learning_rate": 0.00012134080467615159, "loss": 6.8669, "step": 653 }, { "epoch": 0.0818311918856991, "grad_norm": 0.06857563555240631, "learning_rate": 0.00012113476945524513, "loss": 6.8738, "step": 654 }, { "epoch": 0.0819563160323133, "grad_norm": 0.07132629305124283, "learning_rate": 0.00012092864027832933, "loss": 6.8728, "step": 655 }, { "epoch": 0.08208144017892753, "grad_norm": 0.0848504900932312, "learning_rate": 0.000120722418061765, "loss": 6.8747, "step": 656 }, { "epoch": 0.08220656432554174, "grad_norm": 0.0778626874089241, "learning_rate": 0.0001205161037223266, "loss": 6.8735, "step": 657 }, { "epoch": 0.08233168847215597, "grad_norm": 0.0748337134718895, "learning_rate": 0.00012030969817719808, "loss": 6.8673, "step": 658 }, { "epoch": 0.08245681261877019, "grad_norm": 0.06857657432556152, "learning_rate": 0.00012010320234396894, "loss": 6.8646, "step": 659 }, { "epoch": 0.0825819367653844, "grad_norm": 0.06015115603804588, "learning_rate": 0.00011989661714062999, "loss": 6.8683, "step": 660 }, { "epoch": 0.08270706091199863, "grad_norm": 0.06430673599243164, "learning_rate": 0.0001196899434855693, "loss": 6.8682, "step": 661 }, { "epoch": 0.08283218505861284, "grad_norm": 0.056531719863414764, "learning_rate": 0.00011948318229756827, "loss": 6.8614, "step": 662 }, { "epoch": 0.08295730920522706, "grad_norm": 0.05133191868662834, "learning_rate": 0.00011927633449579735, "loss": 6.8617, "step": 663 }, { "epoch": 0.08308243335184128, "grad_norm": 0.05988540127873421, "learning_rate": 0.0001190694009998121, "loss": 6.8622, "step": 664 }, { "epoch": 0.0832075574984555, "grad_norm": 0.06990472227334976, "learning_rate": 0.00011886238272954897, "loss": 6.8618, "step": 665 }, { "epoch": 0.08333268164506971, "grad_norm": 0.05829968303442001, "learning_rate": 0.00011865528060532127, "loss": 6.8531, "step": 666 }, { "epoch": 0.08345780579168394, "grad_norm": 0.05838499963283539, "learning_rate": 0.0001184480955478152, "loss": 6.8563, "step": 667 }, { "epoch": 0.08358292993829816, "grad_norm": 0.058977026492357254, "learning_rate": 0.00011824082847808558, "loss": 6.8572, "step": 668 }, { "epoch": 0.08370805408491237, "grad_norm": 0.06622078269720078, "learning_rate": 0.00011803348031755179, "loss": 6.8539, "step": 669 }, { "epoch": 0.0838331782315266, "grad_norm": 0.05843067169189453, "learning_rate": 0.0001178260519879937, "loss": 6.8502, "step": 670 }, { "epoch": 0.08395830237814081, "grad_norm": 0.06287764012813568, "learning_rate": 0.00011761854441154767, "loss": 6.8508, "step": 671 }, { "epoch": 0.08408342652475503, "grad_norm": 0.060432303696870804, "learning_rate": 0.00011741095851070228, "loss": 6.8511, "step": 672 }, { "epoch": 0.08420855067136924, "grad_norm": 0.05567825585603714, "learning_rate": 0.00011720329520829429, "loss": 6.8457, "step": 673 }, { "epoch": 0.08433367481798347, "grad_norm": 0.07932166755199432, "learning_rate": 0.0001169955554275046, "loss": 6.8499, "step": 674 }, { "epoch": 0.08445879896459768, "grad_norm": 0.062287211418151855, "learning_rate": 0.0001167877400918541, "loss": 6.848, "step": 675 }, { "epoch": 0.0845839231112119, "grad_norm": 0.049426399171352386, "learning_rate": 0.00011657985012519952, "loss": 6.8451, "step": 676 }, { "epoch": 0.08470904725782613, "grad_norm": 0.0630982518196106, "learning_rate": 0.00011637188645172944, "loss": 6.8429, "step": 677 }, { "epoch": 0.08483417140444034, "grad_norm": 0.06772066652774811, "learning_rate": 0.00011616384999596006, "loss": 6.8461, "step": 678 }, { "epoch": 0.08495929555105457, "grad_norm": 0.06661872565746307, "learning_rate": 0.00011595574168273111, "loss": 6.8424, "step": 679 }, { "epoch": 0.08508441969766878, "grad_norm": 0.061995167285203934, "learning_rate": 0.0001157475624372018, "loss": 6.8415, "step": 680 }, { "epoch": 0.085209543844283, "grad_norm": 0.053295329213142395, "learning_rate": 0.0001155393131848467, "loss": 6.8366, "step": 681 }, { "epoch": 0.08533466799089721, "grad_norm": 0.059600021690130234, "learning_rate": 0.00011533099485145155, "loss": 6.8346, "step": 682 }, { "epoch": 0.08545979213751144, "grad_norm": 0.05497244372963905, "learning_rate": 0.00011512260836310924, "loss": 6.8368, "step": 683 }, { "epoch": 0.08558491628412565, "grad_norm": 0.08288810402154922, "learning_rate": 0.00011491415464621562, "loss": 6.8346, "step": 684 }, { "epoch": 0.08571004043073988, "grad_norm": 0.06281071901321411, "learning_rate": 0.00011470563462746541, "loss": 6.8353, "step": 685 }, { "epoch": 0.08583516457735409, "grad_norm": 0.06572843343019485, "learning_rate": 0.00011449704923384812, "loss": 6.8299, "step": 686 }, { "epoch": 0.08596028872396831, "grad_norm": 0.09339410811662674, "learning_rate": 0.00011428839939264382, "loss": 6.8305, "step": 687 }, { "epoch": 0.08608541287058254, "grad_norm": 0.0669776201248169, "learning_rate": 0.0001140796860314191, "loss": 6.8313, "step": 688 }, { "epoch": 0.08621053701719675, "grad_norm": 0.0743851363658905, "learning_rate": 0.00011387091007802297, "loss": 6.824, "step": 689 }, { "epoch": 0.08633566116381097, "grad_norm": 0.06968666613101959, "learning_rate": 0.0001136620724605827, "loss": 6.8239, "step": 690 }, { "epoch": 0.08646078531042518, "grad_norm": 0.06374047696590424, "learning_rate": 0.00011345317410749964, "loss": 6.8219, "step": 691 }, { "epoch": 0.08658590945703941, "grad_norm": 0.0567874051630497, "learning_rate": 0.00011324421594744516, "loss": 6.8246, "step": 692 }, { "epoch": 0.08671103360365362, "grad_norm": 0.0682770162820816, "learning_rate": 0.00011303519890935656, "loss": 6.8144, "step": 693 }, { "epoch": 0.08683615775026785, "grad_norm": 0.08805814385414124, "learning_rate": 0.00011282612392243286, "loss": 6.8153, "step": 694 }, { "epoch": 0.08696128189688206, "grad_norm": 0.07993702590465546, "learning_rate": 0.00011261699191613066, "loss": 6.807, "step": 695 }, { "epoch": 0.08708640604349628, "grad_norm": 0.06755012273788452, "learning_rate": 0.00011240780382016005, "loss": 6.809, "step": 696 }, { "epoch": 0.0872115301901105, "grad_norm": 0.10723106563091278, "learning_rate": 0.00011219856056448051, "loss": 6.8092, "step": 697 }, { "epoch": 0.08733665433672472, "grad_norm": 0.09726639091968536, "learning_rate": 0.00011198926307929664, "loss": 6.8004, "step": 698 }, { "epoch": 0.08746177848333894, "grad_norm": 0.09273732453584671, "learning_rate": 0.00011177991229505431, "loss": 6.7913, "step": 699 }, { "epoch": 0.08758690262995315, "grad_norm": 0.12786084413528442, "learning_rate": 0.00011157050914243614, "loss": 6.7691, "step": 700 }, { "epoch": 0.08771202677656738, "grad_norm": 0.1349579244852066, "learning_rate": 0.00011136105455235766, "loss": 6.8647, "step": 701 }, { "epoch": 0.08783715092318159, "grad_norm": 0.10763350874185562, "learning_rate": 0.00011115154945596305, "loss": 6.8685, "step": 702 }, { "epoch": 0.08796227506979581, "grad_norm": 0.09719506651163101, "learning_rate": 0.00011094199478462095, "loss": 6.8759, "step": 703 }, { "epoch": 0.08808739921641003, "grad_norm": 0.08966255933046341, "learning_rate": 0.00011073239146992054, "loss": 6.8714, "step": 704 }, { "epoch": 0.08821252336302425, "grad_norm": 0.09510458260774612, "learning_rate": 0.00011052274044366711, "loss": 6.8686, "step": 705 }, { "epoch": 0.08833764750963848, "grad_norm": 0.09939589351415634, "learning_rate": 0.00011031304263787812, "loss": 6.875, "step": 706 }, { "epoch": 0.08846277165625269, "grad_norm": 0.08032210916280746, "learning_rate": 0.00011010329898477891, "loss": 6.8712, "step": 707 }, { "epoch": 0.08858789580286691, "grad_norm": 0.0704149678349495, "learning_rate": 0.0001098935104167988, "loss": 6.8704, "step": 708 }, { "epoch": 0.08871301994948112, "grad_norm": 0.06082034856081009, "learning_rate": 0.00010968367786656663, "loss": 6.8712, "step": 709 }, { "epoch": 0.08883814409609535, "grad_norm": 0.06269743293523788, "learning_rate": 0.00010947380226690684, "loss": 6.8654, "step": 710 }, { "epoch": 0.08896326824270956, "grad_norm": 0.06509160995483398, "learning_rate": 0.00010926388455083522, "loss": 6.8652, "step": 711 }, { "epoch": 0.08908839238932378, "grad_norm": 0.054183229804039, "learning_rate": 0.00010905392565155477, "loss": 6.8614, "step": 712 }, { "epoch": 0.089213516535938, "grad_norm": 0.06796237826347351, "learning_rate": 0.00010884392650245165, "loss": 6.8636, "step": 713 }, { "epoch": 0.08933864068255222, "grad_norm": 0.06332603096961975, "learning_rate": 0.00010863388803709089, "loss": 6.8612, "step": 714 }, { "epoch": 0.08946376482916645, "grad_norm": 0.06295156478881836, "learning_rate": 0.00010842381118921232, "loss": 6.8578, "step": 715 }, { "epoch": 0.08958888897578066, "grad_norm": 0.07416083663702011, "learning_rate": 0.00010821369689272638, "loss": 6.8588, "step": 716 }, { "epoch": 0.08971401312239488, "grad_norm": 0.05611807852983475, "learning_rate": 0.00010800354608171003, "loss": 6.8526, "step": 717 }, { "epoch": 0.08983913726900909, "grad_norm": 0.054691098630428314, "learning_rate": 0.00010779335969040252, "loss": 6.8551, "step": 718 }, { "epoch": 0.08996426141562332, "grad_norm": 0.05359673500061035, "learning_rate": 0.00010758313865320134, "loss": 6.852, "step": 719 }, { "epoch": 0.09008938556223753, "grad_norm": 0.06512624770402908, "learning_rate": 0.00010737288390465792, "loss": 6.8509, "step": 720 }, { "epoch": 0.09021450970885175, "grad_norm": 0.05800400301814079, "learning_rate": 0.00010716259637947357, "loss": 6.8517, "step": 721 }, { "epoch": 0.09033963385546596, "grad_norm": 0.05813112109899521, "learning_rate": 0.00010695227701249537, "loss": 6.852, "step": 722 }, { "epoch": 0.09046475800208019, "grad_norm": 0.06028591841459274, "learning_rate": 0.00010674192673871191, "loss": 6.848, "step": 723 }, { "epoch": 0.09058988214869441, "grad_norm": 0.06185732036828995, "learning_rate": 0.00010653154649324917, "loss": 6.8492, "step": 724 }, { "epoch": 0.09071500629530863, "grad_norm": 0.06781361997127533, "learning_rate": 0.00010632113721136636, "loss": 6.8464, "step": 725 }, { "epoch": 0.09084013044192285, "grad_norm": 0.052035972476005554, "learning_rate": 0.00010611069982845183, "loss": 6.8426, "step": 726 }, { "epoch": 0.09096525458853706, "grad_norm": 0.060138311237096786, "learning_rate": 0.00010590023528001884, "loss": 6.8441, "step": 727 }, { "epoch": 0.09109037873515129, "grad_norm": 0.057765740901231766, "learning_rate": 0.00010568974450170139, "loss": 6.8405, "step": 728 }, { "epoch": 0.0912155028817655, "grad_norm": 0.05666891857981682, "learning_rate": 0.00010547922842925008, "loss": 6.8437, "step": 729 }, { "epoch": 0.09134062702837972, "grad_norm": 0.05446343123912811, "learning_rate": 0.00010526868799852796, "loss": 6.8418, "step": 730 }, { "epoch": 0.09146575117499393, "grad_norm": 0.05849521607160568, "learning_rate": 0.0001050581241455064, "loss": 6.8353, "step": 731 }, { "epoch": 0.09159087532160816, "grad_norm": 0.05765247344970703, "learning_rate": 0.00010484753780626089, "loss": 6.834, "step": 732 }, { "epoch": 0.09171599946822237, "grad_norm": 0.05641422048211098, "learning_rate": 0.00010463692991696685, "loss": 6.8382, "step": 733 }, { "epoch": 0.0918411236148366, "grad_norm": 0.058541812002658844, "learning_rate": 0.00010442630141389549, "loss": 6.8317, "step": 734 }, { "epoch": 0.09196624776145082, "grad_norm": 0.08843611180782318, "learning_rate": 0.00010421565323340971, "loss": 6.8351, "step": 735 }, { "epoch": 0.09209137190806503, "grad_norm": 0.07396817952394485, "learning_rate": 0.00010400498631195992, "loss": 6.8334, "step": 736 }, { "epoch": 0.09221649605467926, "grad_norm": 0.05443422123789787, "learning_rate": 0.00010379430158607975, "loss": 6.8306, "step": 737 }, { "epoch": 0.09234162020129347, "grad_norm": 0.0691477432847023, "learning_rate": 0.000103583599992382, "loss": 6.8269, "step": 738 }, { "epoch": 0.09246674434790769, "grad_norm": 0.07043517380952835, "learning_rate": 0.0001033728824675545, "loss": 6.8265, "step": 739 }, { "epoch": 0.0925918684945219, "grad_norm": 0.09884199500083923, "learning_rate": 0.0001031621499483559, "loss": 6.8216, "step": 740 }, { "epoch": 0.09271699264113613, "grad_norm": 0.07487069070339203, "learning_rate": 0.00010295140337161146, "loss": 6.8225, "step": 741 }, { "epoch": 0.09284211678775034, "grad_norm": 0.07228706032037735, "learning_rate": 0.00010274064367420897, "loss": 6.8213, "step": 742 }, { "epoch": 0.09296724093436456, "grad_norm": 0.09586122632026672, "learning_rate": 0.00010252987179309459, "loss": 6.8161, "step": 743 }, { "epoch": 0.09309236508097879, "grad_norm": 0.07028517127037048, "learning_rate": 0.00010231908866526851, "loss": 6.8156, "step": 744 }, { "epoch": 0.093217489227593, "grad_norm": 0.07866144180297852, "learning_rate": 0.00010210829522778111, "loss": 6.8157, "step": 745 }, { "epoch": 0.09334261337420723, "grad_norm": 0.1255524605512619, "learning_rate": 0.00010189749241772844, "loss": 6.8104, "step": 746 }, { "epoch": 0.09346773752082144, "grad_norm": 0.08999192714691162, "learning_rate": 0.00010168668117224825, "loss": 6.7999, "step": 747 }, { "epoch": 0.09359286166743566, "grad_norm": 0.09121166169643402, "learning_rate": 0.00010147586242851585, "loss": 6.7967, "step": 748 }, { "epoch": 0.09371798581404987, "grad_norm": 0.11249268800020218, "learning_rate": 0.00010126503712373982, "loss": 6.7863, "step": 749 }, { "epoch": 0.0938431099606641, "grad_norm": 0.1638951301574707, "learning_rate": 0.00010105420619515798, "loss": 6.7582, "step": 750 }, { "epoch": 0.0938431099606641, "eval_loss": 6.838387966156006, "eval_runtime": 30.386, "eval_samples_per_second": 443.0, "eval_steps_per_second": 221.516, "step": 750 }, { "epoch": 0.09396823410727831, "grad_norm": 0.12596431374549866, "learning_rate": 0.00010084337058003303, "loss": 6.8612, "step": 751 }, { "epoch": 0.09409335825389253, "grad_norm": 0.09819183498620987, "learning_rate": 0.00010063253121564868, "loss": 6.8665, "step": 752 }, { "epoch": 0.09421848240050676, "grad_norm": 0.08606826514005661, "learning_rate": 0.00010042168903930514, "loss": 6.8698, "step": 753 }, { "epoch": 0.09434360654712097, "grad_norm": 0.08439600467681885, "learning_rate": 0.00010021084498831522, "loss": 6.8662, "step": 754 }, { "epoch": 0.0944687306937352, "grad_norm": 0.10085678100585938, "learning_rate": 0.0001, "loss": 6.8609, "step": 755 }, { "epoch": 0.0945938548403494, "grad_norm": 0.06968796998262405, "learning_rate": 9.97891550116848e-05, "loss": 6.8704, "step": 756 }, { "epoch": 0.09471897898696363, "grad_norm": 0.08123313635587692, "learning_rate": 9.957831096069488e-05, "loss": 6.8705, "step": 757 }, { "epoch": 0.09484410313357784, "grad_norm": 0.06296410411596298, "learning_rate": 9.936746878435136e-05, "loss": 6.8656, "step": 758 }, { "epoch": 0.09496922728019207, "grad_norm": 0.07284566015005112, "learning_rate": 9.915662941996699e-05, "loss": 6.8718, "step": 759 }, { "epoch": 0.09509435142680628, "grad_norm": 0.0826282799243927, "learning_rate": 9.894579380484204e-05, "loss": 6.8661, "step": 760 }, { "epoch": 0.0952194755734205, "grad_norm": 0.051579512655735016, "learning_rate": 9.873496287626019e-05, "loss": 6.8656, "step": 761 }, { "epoch": 0.09534459972003473, "grad_norm": 0.05063299462199211, "learning_rate": 9.852413757148417e-05, "loss": 6.8611, "step": 762 }, { "epoch": 0.09546972386664894, "grad_norm": 0.0630897656083107, "learning_rate": 9.831331882775178e-05, "loss": 6.8632, "step": 763 }, { "epoch": 0.09559484801326316, "grad_norm": 0.05601769685745239, "learning_rate": 9.81025075822716e-05, "loss": 6.8573, "step": 764 }, { "epoch": 0.09571997215987738, "grad_norm": 0.0629180446267128, "learning_rate": 9.789170477221891e-05, "loss": 6.86, "step": 765 }, { "epoch": 0.0958450963064916, "grad_norm": 0.06247006729245186, "learning_rate": 9.76809113347315e-05, "loss": 6.8551, "step": 766 }, { "epoch": 0.09597022045310581, "grad_norm": 0.07091257721185684, "learning_rate": 9.747012820690543e-05, "loss": 6.858, "step": 767 }, { "epoch": 0.09609534459972004, "grad_norm": 0.05969550460577011, "learning_rate": 9.725935632579104e-05, "loss": 6.8518, "step": 768 }, { "epoch": 0.09622046874633425, "grad_norm": 0.07379163801670074, "learning_rate": 9.704859662838855e-05, "loss": 6.8564, "step": 769 }, { "epoch": 0.09634559289294847, "grad_norm": 0.057554204016923904, "learning_rate": 9.683785005164411e-05, "loss": 6.852, "step": 770 }, { "epoch": 0.0964707170395627, "grad_norm": 0.05670370161533356, "learning_rate": 9.662711753244551e-05, "loss": 6.8473, "step": 771 }, { "epoch": 0.09659584118617691, "grad_norm": 0.060861892998218536, "learning_rate": 9.641640000761802e-05, "loss": 6.8475, "step": 772 }, { "epoch": 0.09672096533279113, "grad_norm": 0.06020105630159378, "learning_rate": 9.620569841392029e-05, "loss": 6.8497, "step": 773 }, { "epoch": 0.09684608947940535, "grad_norm": 0.051159922033548355, "learning_rate": 9.59950136880401e-05, "loss": 6.8491, "step": 774 }, { "epoch": 0.09697121362601957, "grad_norm": 0.056930724531412125, "learning_rate": 9.57843467665903e-05, "loss": 6.8431, "step": 775 }, { "epoch": 0.09709633777263378, "grad_norm": 0.05780670419335365, "learning_rate": 9.557369858610453e-05, "loss": 6.8491, "step": 776 }, { "epoch": 0.097221461919248, "grad_norm": 0.06336930394172668, "learning_rate": 9.53630700830332e-05, "loss": 6.844, "step": 777 }, { "epoch": 0.09734658606586222, "grad_norm": 0.06081291288137436, "learning_rate": 9.51524621937391e-05, "loss": 6.8431, "step": 778 }, { "epoch": 0.09747171021247644, "grad_norm": 0.10846126079559326, "learning_rate": 9.494187585449358e-05, "loss": 6.8397, "step": 779 }, { "epoch": 0.09759683435909065, "grad_norm": 0.0634780079126358, "learning_rate": 9.473131200147205e-05, "loss": 6.8395, "step": 780 }, { "epoch": 0.09772195850570488, "grad_norm": 0.05900888890028, "learning_rate": 9.452077157074994e-05, "loss": 6.8366, "step": 781 }, { "epoch": 0.0978470826523191, "grad_norm": 0.08791916072368622, "learning_rate": 9.431025549829862e-05, "loss": 6.8367, "step": 782 }, { "epoch": 0.09797220679893331, "grad_norm": 0.06639428436756134, "learning_rate": 9.409976471998118e-05, "loss": 6.8359, "step": 783 }, { "epoch": 0.09809733094554754, "grad_norm": 0.0607585683465004, "learning_rate": 9.388930017154819e-05, "loss": 6.8365, "step": 784 }, { "epoch": 0.09822245509216175, "grad_norm": 0.08171333372592926, "learning_rate": 9.367886278863366e-05, "loss": 6.8346, "step": 785 }, { "epoch": 0.09834757923877598, "grad_norm": 0.07056614011526108, "learning_rate": 9.346845350675088e-05, "loss": 6.8287, "step": 786 }, { "epoch": 0.09847270338539019, "grad_norm": 0.07537488639354706, "learning_rate": 9.325807326128814e-05, "loss": 6.8314, "step": 787 }, { "epoch": 0.09859782753200441, "grad_norm": 0.058308299630880356, "learning_rate": 9.304772298750463e-05, "loss": 6.8287, "step": 788 }, { "epoch": 0.09872295167861862, "grad_norm": 0.07489283382892609, "learning_rate": 9.283740362052642e-05, "loss": 6.8237, "step": 789 }, { "epoch": 0.09884807582523285, "grad_norm": 0.06404101848602295, "learning_rate": 9.26271160953421e-05, "loss": 6.8209, "step": 790 }, { "epoch": 0.09897319997184707, "grad_norm": 0.076097272336483, "learning_rate": 9.241686134679867e-05, "loss": 6.8184, "step": 791 }, { "epoch": 0.09909832411846128, "grad_norm": 0.09229526668787003, "learning_rate": 9.220664030959749e-05, "loss": 6.8274, "step": 792 }, { "epoch": 0.09922344826507551, "grad_norm": 0.09321905672550201, "learning_rate": 9.199645391828999e-05, "loss": 6.8178, "step": 793 }, { "epoch": 0.09934857241168972, "grad_norm": 0.07489687949419022, "learning_rate": 9.178630310727365e-05, "loss": 6.8181, "step": 794 }, { "epoch": 0.09947369655830395, "grad_norm": 0.07067129760980606, "learning_rate": 9.157618881078772e-05, "loss": 6.8117, "step": 795 }, { "epoch": 0.09959882070491816, "grad_norm": 0.08925637602806091, "learning_rate": 9.136611196290915e-05, "loss": 6.8088, "step": 796 }, { "epoch": 0.09972394485153238, "grad_norm": 0.08182400465011597, "learning_rate": 9.115607349754834e-05, "loss": 6.8032, "step": 797 }, { "epoch": 0.09984906899814659, "grad_norm": 0.07875595986843109, "learning_rate": 9.094607434844523e-05, "loss": 6.7973, "step": 798 }, { "epoch": 0.09997419314476082, "grad_norm": 0.09395304322242737, "learning_rate": 9.07361154491648e-05, "loss": 6.7894, "step": 799 }, { "epoch": 0.10009931729137504, "grad_norm": 0.17146538197994232, "learning_rate": 9.052619773309317e-05, "loss": 6.7584, "step": 800 }, { "epoch": 0.10022444143798925, "grad_norm": 0.14705929160118103, "learning_rate": 9.031632213343339e-05, "loss": 6.8753, "step": 801 }, { "epoch": 0.10034956558460348, "grad_norm": 0.0928153544664383, "learning_rate": 9.01064895832012e-05, "loss": 6.8757, "step": 802 }, { "epoch": 0.10047468973121769, "grad_norm": 0.08452393114566803, "learning_rate": 8.98967010152211e-05, "loss": 6.8669, "step": 803 }, { "epoch": 0.10059981387783191, "grad_norm": 0.11025939881801605, "learning_rate": 8.968695736212193e-05, "loss": 6.8577, "step": 804 }, { "epoch": 0.10072493802444613, "grad_norm": 0.07015882432460785, "learning_rate": 8.947725955633294e-05, "loss": 6.8713, "step": 805 }, { "epoch": 0.10085006217106035, "grad_norm": 0.06923877447843552, "learning_rate": 8.926760853007946e-05, "loss": 6.8689, "step": 806 }, { "epoch": 0.10097518631767456, "grad_norm": 0.062181293964385986, "learning_rate": 8.905800521537905e-05, "loss": 6.8692, "step": 807 }, { "epoch": 0.10110031046428879, "grad_norm": 0.06040557101368904, "learning_rate": 8.884845054403699e-05, "loss": 6.8717, "step": 808 }, { "epoch": 0.10122543461090301, "grad_norm": 0.05984716862440109, "learning_rate": 8.863894544764236e-05, "loss": 6.8587, "step": 809 }, { "epoch": 0.10135055875751722, "grad_norm": 0.05124804750084877, "learning_rate": 8.84294908575639e-05, "loss": 6.8682, "step": 810 }, { "epoch": 0.10147568290413145, "grad_norm": 0.0511602908372879, "learning_rate": 8.822008770494572e-05, "loss": 6.8637, "step": 811 }, { "epoch": 0.10160080705074566, "grad_norm": 0.06778772175312042, "learning_rate": 8.801073692070337e-05, "loss": 6.8608, "step": 812 }, { "epoch": 0.10172593119735988, "grad_norm": 0.05513143539428711, "learning_rate": 8.780143943551954e-05, "loss": 6.8618, "step": 813 }, { "epoch": 0.1018510553439741, "grad_norm": 0.07517508417367935, "learning_rate": 8.759219617983999e-05, "loss": 6.8585, "step": 814 }, { "epoch": 0.10197617949058832, "grad_norm": 0.072541743516922, "learning_rate": 8.738300808386935e-05, "loss": 6.8633, "step": 815 }, { "epoch": 0.10210130363720253, "grad_norm": 0.06264056265354156, "learning_rate": 8.717387607756713e-05, "loss": 6.8559, "step": 816 }, { "epoch": 0.10222642778381676, "grad_norm": 0.05450925976037979, "learning_rate": 8.696480109064342e-05, "loss": 6.8502, "step": 817 }, { "epoch": 0.10235155193043098, "grad_norm": 0.06095590069890022, "learning_rate": 8.675578405255485e-05, "loss": 6.8539, "step": 818 }, { "epoch": 0.10247667607704519, "grad_norm": 0.05517008900642395, "learning_rate": 8.654682589250038e-05, "loss": 6.8462, "step": 819 }, { "epoch": 0.10260180022365942, "grad_norm": 0.05329270660877228, "learning_rate": 8.633792753941733e-05, "loss": 6.8496, "step": 820 }, { "epoch": 0.10272692437027363, "grad_norm": 0.05850008502602577, "learning_rate": 8.612908992197705e-05, "loss": 6.8485, "step": 821 }, { "epoch": 0.10285204851688785, "grad_norm": 0.06142275780439377, "learning_rate": 8.592031396858093e-05, "loss": 6.8488, "step": 822 }, { "epoch": 0.10297717266350206, "grad_norm": 0.06849534064531326, "learning_rate": 8.571160060735624e-05, "loss": 6.8497, "step": 823 }, { "epoch": 0.10310229681011629, "grad_norm": 0.048923857510089874, "learning_rate": 8.550295076615188e-05, "loss": 6.8435, "step": 824 }, { "epoch": 0.1032274209567305, "grad_norm": 0.06683506071567535, "learning_rate": 8.529436537253458e-05, "loss": 6.8449, "step": 825 }, { "epoch": 0.10335254510334473, "grad_norm": 0.05200260132551193, "learning_rate": 8.508584535378439e-05, "loss": 6.8427, "step": 826 }, { "epoch": 0.10347766924995894, "grad_norm": 0.05982294678688049, "learning_rate": 8.487739163689079e-05, "loss": 6.8404, "step": 827 }, { "epoch": 0.10360279339657316, "grad_norm": 0.06280147284269333, "learning_rate": 8.466900514854847e-05, "loss": 6.8409, "step": 828 }, { "epoch": 0.10372791754318739, "grad_norm": 0.059844985604286194, "learning_rate": 8.446068681515334e-05, "loss": 6.839, "step": 829 }, { "epoch": 0.1038530416898016, "grad_norm": 0.07303334772586823, "learning_rate": 8.425243756279824e-05, "loss": 6.8399, "step": 830 }, { "epoch": 0.10397816583641582, "grad_norm": 0.06451032310724258, "learning_rate": 8.404425831726894e-05, "loss": 6.8385, "step": 831 }, { "epoch": 0.10410328998303003, "grad_norm": 0.05930513143539429, "learning_rate": 8.383615000404e-05, "loss": 6.8362, "step": 832 }, { "epoch": 0.10422841412964426, "grad_norm": 0.06087719649076462, "learning_rate": 8.362811354827059e-05, "loss": 6.8368, "step": 833 }, { "epoch": 0.10435353827625847, "grad_norm": 0.05968770384788513, "learning_rate": 8.342014987480047e-05, "loss": 6.8324, "step": 834 }, { "epoch": 0.1044786624228727, "grad_norm": 0.07309002429246902, "learning_rate": 8.321225990814591e-05, "loss": 6.8302, "step": 835 }, { "epoch": 0.1046037865694869, "grad_norm": 0.06117624789476395, "learning_rate": 8.300444457249543e-05, "loss": 6.832, "step": 836 }, { "epoch": 0.10472891071610113, "grad_norm": 0.0783989205956459, "learning_rate": 8.279670479170573e-05, "loss": 6.8292, "step": 837 }, { "epoch": 0.10485403486271536, "grad_norm": 0.07800403982400894, "learning_rate": 8.258904148929775e-05, "loss": 6.8277, "step": 838 }, { "epoch": 0.10497915900932957, "grad_norm": 0.07050147652626038, "learning_rate": 8.238145558845235e-05, "loss": 6.8252, "step": 839 }, { "epoch": 0.10510428315594379, "grad_norm": 0.08124469965696335, "learning_rate": 8.217394801200631e-05, "loss": 6.821, "step": 840 }, { "epoch": 0.105229407302558, "grad_norm": 0.10209415853023529, "learning_rate": 8.196651968244826e-05, "loss": 6.8257, "step": 841 }, { "epoch": 0.10535453144917223, "grad_norm": 0.07131823152303696, "learning_rate": 8.175917152191447e-05, "loss": 6.8218, "step": 842 }, { "epoch": 0.10547965559578644, "grad_norm": 0.0662199854850769, "learning_rate": 8.15519044521848e-05, "loss": 6.8211, "step": 843 }, { "epoch": 0.10560477974240066, "grad_norm": 0.07491844892501831, "learning_rate": 8.134471939467874e-05, "loss": 6.8125, "step": 844 }, { "epoch": 0.10572990388901488, "grad_norm": 0.07353292405605316, "learning_rate": 8.113761727045105e-05, "loss": 6.812, "step": 845 }, { "epoch": 0.1058550280356291, "grad_norm": 0.07755854725837708, "learning_rate": 8.093059900018792e-05, "loss": 6.8107, "step": 846 }, { "epoch": 0.10598015218224333, "grad_norm": 0.09446351230144501, "learning_rate": 8.072366550420266e-05, "loss": 6.8037, "step": 847 }, { "epoch": 0.10610527632885754, "grad_norm": 0.09002508223056793, "learning_rate": 8.051681770243175e-05, "loss": 6.7908, "step": 848 }, { "epoch": 0.10623040047547176, "grad_norm": 0.10826987028121948, "learning_rate": 8.031005651443073e-05, "loss": 6.7755, "step": 849 }, { "epoch": 0.10635552462208597, "grad_norm": 0.1577070951461792, "learning_rate": 8.010338285937006e-05, "loss": 6.7446, "step": 850 }, { "epoch": 0.1064806487687002, "grad_norm": 0.10693579912185669, "learning_rate": 7.989679765603108e-05, "loss": 6.8654, "step": 851 }, { "epoch": 0.10660577291531441, "grad_norm": 0.10050234198570251, "learning_rate": 7.969030182280192e-05, "loss": 6.8684, "step": 852 }, { "epoch": 0.10673089706192863, "grad_norm": 0.09752437472343445, "learning_rate": 7.948389627767343e-05, "loss": 6.8641, "step": 853 }, { "epoch": 0.10685602120854285, "grad_norm": 0.0903589129447937, "learning_rate": 7.927758193823501e-05, "loss": 6.8666, "step": 854 }, { "epoch": 0.10698114535515707, "grad_norm": 0.09941129386425018, "learning_rate": 7.907135972167069e-05, "loss": 6.858, "step": 855 }, { "epoch": 0.1071062695017713, "grad_norm": 0.08500877022743225, "learning_rate": 7.88652305447549e-05, "loss": 6.868, "step": 856 }, { "epoch": 0.1072313936483855, "grad_norm": 0.06359398365020752, "learning_rate": 7.865919532384844e-05, "loss": 6.8696, "step": 857 }, { "epoch": 0.10735651779499973, "grad_norm": 0.06349831819534302, "learning_rate": 7.845325497489449e-05, "loss": 6.8735, "step": 858 }, { "epoch": 0.10748164194161394, "grad_norm": 0.05805711820721626, "learning_rate": 7.82474104134144e-05, "loss": 6.8621, "step": 859 }, { "epoch": 0.10760676608822817, "grad_norm": 0.060704994946718216, "learning_rate": 7.804166255450373e-05, "loss": 6.8656, "step": 860 }, { "epoch": 0.10773189023484238, "grad_norm": 0.0630919337272644, "learning_rate": 7.783601231282812e-05, "loss": 6.8645, "step": 861 }, { "epoch": 0.1078570143814566, "grad_norm": 0.0664263442158699, "learning_rate": 7.763046060261932e-05, "loss": 6.8656, "step": 862 }, { "epoch": 0.10798213852807081, "grad_norm": 0.05371001362800598, "learning_rate": 7.742500833767094e-05, "loss": 6.864, "step": 863 }, { "epoch": 0.10810726267468504, "grad_norm": 0.05414309725165367, "learning_rate": 7.721965643133458e-05, "loss": 6.859, "step": 864 }, { "epoch": 0.10823238682129926, "grad_norm": 0.05882842093706131, "learning_rate": 7.701440579651566e-05, "loss": 6.8571, "step": 865 }, { "epoch": 0.10835751096791348, "grad_norm": 0.06309391558170319, "learning_rate": 7.680925734566937e-05, "loss": 6.8543, "step": 866 }, { "epoch": 0.1084826351145277, "grad_norm": 0.05783820152282715, "learning_rate": 7.660421199079669e-05, "loss": 6.8538, "step": 867 }, { "epoch": 0.10860775926114191, "grad_norm": 0.0630047544836998, "learning_rate": 7.639927064344022e-05, "loss": 6.8532, "step": 868 }, { "epoch": 0.10873288340775614, "grad_norm": 0.07220378518104553, "learning_rate": 7.619443421468021e-05, "loss": 6.8497, "step": 869 }, { "epoch": 0.10885800755437035, "grad_norm": 0.06430362164974213, "learning_rate": 7.598970361513051e-05, "loss": 6.8488, "step": 870 }, { "epoch": 0.10898313170098457, "grad_norm": 0.053624700754880905, "learning_rate": 7.578507975493448e-05, "loss": 6.8483, "step": 871 }, { "epoch": 0.10910825584759878, "grad_norm": 0.07166837900876999, "learning_rate": 7.558056354376098e-05, "loss": 6.8457, "step": 872 }, { "epoch": 0.10923337999421301, "grad_norm": 0.05962124094367027, "learning_rate": 7.537615589080027e-05, "loss": 6.8397, "step": 873 }, { "epoch": 0.10935850414082722, "grad_norm": 0.054664015769958496, "learning_rate": 7.517185770476006e-05, "loss": 6.8446, "step": 874 }, { "epoch": 0.10948362828744145, "grad_norm": 0.04989203065633774, "learning_rate": 7.496766989386136e-05, "loss": 6.8444, "step": 875 }, { "epoch": 0.10960875243405567, "grad_norm": 0.05883051082491875, "learning_rate": 7.476359336583454e-05, "loss": 6.8424, "step": 876 }, { "epoch": 0.10973387658066988, "grad_norm": 0.06765749305486679, "learning_rate": 7.455962902791522e-05, "loss": 6.8373, "step": 877 }, { "epoch": 0.1098590007272841, "grad_norm": 0.05092870071530342, "learning_rate": 7.435577778684033e-05, "loss": 6.8393, "step": 878 }, { "epoch": 0.10998412487389832, "grad_norm": 0.05673045665025711, "learning_rate": 7.415204054884399e-05, "loss": 6.8382, "step": 879 }, { "epoch": 0.11010924902051254, "grad_norm": 0.0606488436460495, "learning_rate": 7.394841821965345e-05, "loss": 6.8382, "step": 880 }, { "epoch": 0.11023437316712675, "grad_norm": 0.05659674480557442, "learning_rate": 7.374491170448525e-05, "loss": 6.8359, "step": 881 }, { "epoch": 0.11035949731374098, "grad_norm": 0.052246998995542526, "learning_rate": 7.3541521908041e-05, "loss": 6.8362, "step": 882 }, { "epoch": 0.11048462146035519, "grad_norm": 0.0668715238571167, "learning_rate": 7.33382497345034e-05, "loss": 6.8337, "step": 883 }, { "epoch": 0.11060974560696941, "grad_norm": 0.06160235404968262, "learning_rate": 7.313509608753231e-05, "loss": 6.8335, "step": 884 }, { "epoch": 0.11073486975358364, "grad_norm": 0.07179737091064453, "learning_rate": 7.293206187026061e-05, "loss": 6.8288, "step": 885 }, { "epoch": 0.11085999390019785, "grad_norm": 0.0652439147233963, "learning_rate": 7.27291479852903e-05, "loss": 6.8263, "step": 886 }, { "epoch": 0.11098511804681208, "grad_norm": 0.058492496609687805, "learning_rate": 7.252635533468843e-05, "loss": 6.8291, "step": 887 }, { "epoch": 0.11111024219342629, "grad_norm": 0.05899550020694733, "learning_rate": 7.232368481998309e-05, "loss": 6.8276, "step": 888 }, { "epoch": 0.11123536634004051, "grad_norm": 0.060387495905160904, "learning_rate": 7.212113734215932e-05, "loss": 6.8274, "step": 889 }, { "epoch": 0.11136049048665472, "grad_norm": 0.06588352471590042, "learning_rate": 7.191871380165538e-05, "loss": 6.8265, "step": 890 }, { "epoch": 0.11148561463326895, "grad_norm": 0.07027008384466171, "learning_rate": 7.17164150983584e-05, "loss": 6.8171, "step": 891 }, { "epoch": 0.11161073877988316, "grad_norm": 0.07204768806695938, "learning_rate": 7.151424213160061e-05, "loss": 6.816, "step": 892 }, { "epoch": 0.11173586292649738, "grad_norm": 0.07449936866760254, "learning_rate": 7.131219580015521e-05, "loss": 6.8186, "step": 893 }, { "epoch": 0.11186098707311161, "grad_norm": 0.07239895313978195, "learning_rate": 7.11102770022325e-05, "loss": 6.8172, "step": 894 }, { "epoch": 0.11198611121972582, "grad_norm": 0.08143356442451477, "learning_rate": 7.090848663547574e-05, "loss": 6.8129, "step": 895 }, { "epoch": 0.11211123536634005, "grad_norm": 0.06850866973400116, "learning_rate": 7.070682559695736e-05, "loss": 6.8124, "step": 896 }, { "epoch": 0.11223635951295426, "grad_norm": 0.07549439370632172, "learning_rate": 7.050529478317476e-05, "loss": 6.7967, "step": 897 }, { "epoch": 0.11236148365956848, "grad_norm": 0.16005338728427887, "learning_rate": 7.03038950900464e-05, "loss": 6.7838, "step": 898 }, { "epoch": 0.11248660780618269, "grad_norm": 0.12860530614852905, "learning_rate": 7.010262741290798e-05, "loss": 6.7815, "step": 899 }, { "epoch": 0.11261173195279692, "grad_norm": 0.14462625980377197, "learning_rate": 6.990149264650814e-05, "loss": 6.7578, "step": 900 }, { "epoch": 0.11273685609941113, "grad_norm": 0.09840188175439835, "learning_rate": 6.970049168500474e-05, "loss": 6.867, "step": 901 }, { "epoch": 0.11286198024602535, "grad_norm": 0.0897248163819313, "learning_rate": 6.94996254219608e-05, "loss": 6.8693, "step": 902 }, { "epoch": 0.11298710439263958, "grad_norm": 0.08066925406455994, "learning_rate": 6.929889475034048e-05, "loss": 6.8709, "step": 903 }, { "epoch": 0.11311222853925379, "grad_norm": 0.13347402215003967, "learning_rate": 6.909830056250527e-05, "loss": 6.8459, "step": 904 }, { "epoch": 0.11323735268586801, "grad_norm": 0.07866514474153519, "learning_rate": 6.889784375020978e-05, "loss": 6.8647, "step": 905 }, { "epoch": 0.11336247683248223, "grad_norm": 0.0862560123205185, "learning_rate": 6.869752520459803e-05, "loss": 6.8689, "step": 906 }, { "epoch": 0.11348760097909645, "grad_norm": 0.09121212363243103, "learning_rate": 6.849734581619918e-05, "loss": 6.862, "step": 907 }, { "epoch": 0.11361272512571066, "grad_norm": 0.07277815043926239, "learning_rate": 6.829730647492404e-05, "loss": 6.8675, "step": 908 }, { "epoch": 0.11373784927232489, "grad_norm": 0.0751807689666748, "learning_rate": 6.80974080700606e-05, "loss": 6.8619, "step": 909 }, { "epoch": 0.1138629734189391, "grad_norm": 0.0807303935289383, "learning_rate": 6.789765149027039e-05, "loss": 6.8671, "step": 910 }, { "epoch": 0.11398809756555332, "grad_norm": 0.06756477802991867, "learning_rate": 6.769803762358443e-05, "loss": 6.8642, "step": 911 }, { "epoch": 0.11411322171216755, "grad_norm": 0.06978469341993332, "learning_rate": 6.749856735739928e-05, "loss": 6.8667, "step": 912 }, { "epoch": 0.11423834585878176, "grad_norm": 0.06968016922473907, "learning_rate": 6.729924157847323e-05, "loss": 6.8616, "step": 913 }, { "epoch": 0.11436347000539598, "grad_norm": 0.06229887902736664, "learning_rate": 6.710006117292209e-05, "loss": 6.8622, "step": 914 }, { "epoch": 0.1144885941520102, "grad_norm": 0.05655563250184059, "learning_rate": 6.690102702621548e-05, "loss": 6.8623, "step": 915 }, { "epoch": 0.11461371829862442, "grad_norm": 0.0545574352145195, "learning_rate": 6.670214002317278e-05, "loss": 6.8566, "step": 916 }, { "epoch": 0.11473884244523863, "grad_norm": 0.07182218134403229, "learning_rate": 6.650340104795932e-05, "loss": 6.8585, "step": 917 }, { "epoch": 0.11486396659185286, "grad_norm": 0.05521298944950104, "learning_rate": 6.630481098408228e-05, "loss": 6.8544, "step": 918 }, { "epoch": 0.11498909073846707, "grad_norm": 0.06467194110155106, "learning_rate": 6.610637071438686e-05, "loss": 6.8552, "step": 919 }, { "epoch": 0.11511421488508129, "grad_norm": 0.06465508043766022, "learning_rate": 6.590808112105232e-05, "loss": 6.8476, "step": 920 }, { "epoch": 0.1152393390316955, "grad_norm": 0.05993629992008209, "learning_rate": 6.570994308558812e-05, "loss": 6.8448, "step": 921 }, { "epoch": 0.11536446317830973, "grad_norm": 0.07816801220178604, "learning_rate": 6.551195748882997e-05, "loss": 6.8491, "step": 922 }, { "epoch": 0.11548958732492395, "grad_norm": 0.05899886414408684, "learning_rate": 6.531412521093586e-05, "loss": 6.8463, "step": 923 }, { "epoch": 0.11561471147153816, "grad_norm": 0.07259286195039749, "learning_rate": 6.51164471313822e-05, "loss": 6.8459, "step": 924 }, { "epoch": 0.11573983561815239, "grad_norm": 0.06900156289339066, "learning_rate": 6.491892412895995e-05, "loss": 6.8445, "step": 925 }, { "epoch": 0.1158649597647666, "grad_norm": 0.05701984837651253, "learning_rate": 6.472155708177052e-05, "loss": 6.8416, "step": 926 }, { "epoch": 0.11599008391138083, "grad_norm": 0.048884131014347076, "learning_rate": 6.452434686722224e-05, "loss": 6.8378, "step": 927 }, { "epoch": 0.11611520805799504, "grad_norm": 0.0714973509311676, "learning_rate": 6.432729436202604e-05, "loss": 6.8423, "step": 928 }, { "epoch": 0.11624033220460926, "grad_norm": 0.07560869306325912, "learning_rate": 6.41304004421918e-05, "loss": 6.8416, "step": 929 }, { "epoch": 0.11636545635122347, "grad_norm": 0.06673278659582138, "learning_rate": 6.393366598302446e-05, "loss": 6.8384, "step": 930 }, { "epoch": 0.1164905804978377, "grad_norm": 0.055350061506032944, "learning_rate": 6.373709185911998e-05, "loss": 6.8377, "step": 931 }, { "epoch": 0.11661570464445192, "grad_norm": 0.057414110749959946, "learning_rate": 6.354067894436155e-05, "loss": 6.8346, "step": 932 }, { "epoch": 0.11674082879106613, "grad_norm": 0.05158224701881409, "learning_rate": 6.334442811191576e-05, "loss": 6.8316, "step": 933 }, { "epoch": 0.11686595293768036, "grad_norm": 0.06667657196521759, "learning_rate": 6.314834023422858e-05, "loss": 6.8341, "step": 934 }, { "epoch": 0.11699107708429457, "grad_norm": 0.060989443212747574, "learning_rate": 6.295241618302156e-05, "loss": 6.8293, "step": 935 }, { "epoch": 0.1171162012309088, "grad_norm": 0.08631948381662369, "learning_rate": 6.275665682928803e-05, "loss": 6.8301, "step": 936 }, { "epoch": 0.117241325377523, "grad_norm": 0.08441098034381866, "learning_rate": 6.256106304328905e-05, "loss": 6.8262, "step": 937 }, { "epoch": 0.11736644952413723, "grad_norm": 0.08572442829608917, "learning_rate": 6.23656356945497e-05, "loss": 6.8278, "step": 938 }, { "epoch": 0.11749157367075144, "grad_norm": 0.06193273887038231, "learning_rate": 6.21703756518551e-05, "loss": 6.8282, "step": 939 }, { "epoch": 0.11761669781736567, "grad_norm": 0.07838264852762222, "learning_rate": 6.197528378324665e-05, "loss": 6.8229, "step": 940 }, { "epoch": 0.11774182196397989, "grad_norm": 0.07304549217224121, "learning_rate": 6.17803609560181e-05, "loss": 6.8224, "step": 941 }, { "epoch": 0.1178669461105941, "grad_norm": 0.0821477547287941, "learning_rate": 6.158560803671168e-05, "loss": 6.8177, "step": 942 }, { "epoch": 0.11799207025720833, "grad_norm": 0.06880045682191849, "learning_rate": 6.139102589111435e-05, "loss": 6.8189, "step": 943 }, { "epoch": 0.11811719440382254, "grad_norm": 0.07983653247356415, "learning_rate": 6.119661538425381e-05, "loss": 6.8113, "step": 944 }, { "epoch": 0.11824231855043676, "grad_norm": 0.07099959999322891, "learning_rate": 6.100237738039484e-05, "loss": 6.8141, "step": 945 }, { "epoch": 0.11836744269705098, "grad_norm": 0.1001226007938385, "learning_rate": 6.0808312743035236e-05, "loss": 6.8078, "step": 946 }, { "epoch": 0.1184925668436652, "grad_norm": 0.07789657264947891, "learning_rate": 6.061442233490211e-05, "loss": 6.8047, "step": 947 }, { "epoch": 0.11861769099027941, "grad_norm": 0.10231375694274902, "learning_rate": 6.042070701794806e-05, "loss": 6.7926, "step": 948 }, { "epoch": 0.11874281513689364, "grad_norm": 0.09192590415477753, "learning_rate": 6.0227167653347305e-05, "loss": 6.7759, "step": 949 }, { "epoch": 0.11886793928350786, "grad_norm": 0.16595888137817383, "learning_rate": 6.0033805101491794e-05, "loss": 6.7533, "step": 950 }, { "epoch": 0.11899306343012207, "grad_norm": 0.11045640707015991, "learning_rate": 5.98406202219875e-05, "loss": 6.8629, "step": 951 }, { "epoch": 0.1191181875767363, "grad_norm": 0.08538468182086945, "learning_rate": 5.964761387365052e-05, "loss": 6.8838, "step": 952 }, { "epoch": 0.11924331172335051, "grad_norm": 0.0835360661149025, "learning_rate": 5.9454786914503255e-05, "loss": 6.8766, "step": 953 }, { "epoch": 0.11936843586996473, "grad_norm": 0.1049657091498375, "learning_rate": 5.926214020177074e-05, "loss": 6.8536, "step": 954 }, { "epoch": 0.11949356001657895, "grad_norm": 0.085104800760746, "learning_rate": 5.9069674591876534e-05, "loss": 6.864, "step": 955 }, { "epoch": 0.11961868416319317, "grad_norm": 0.05738501250743866, "learning_rate": 5.887739094043923e-05, "loss": 6.8682, "step": 956 }, { "epoch": 0.11974380830980738, "grad_norm": 0.087504543364048, "learning_rate": 5.868529010226845e-05, "loss": 6.8728, "step": 957 }, { "epoch": 0.1198689324564216, "grad_norm": 0.061660073697566986, "learning_rate": 5.849337293136112e-05, "loss": 6.864, "step": 958 }, { "epoch": 0.11999405660303583, "grad_norm": 0.06358201056718826, "learning_rate": 5.830164028089766e-05, "loss": 6.8676, "step": 959 }, { "epoch": 0.12011918074965004, "grad_norm": 0.06568393856287003, "learning_rate": 5.811009300323818e-05, "loss": 6.8548, "step": 960 }, { "epoch": 0.12024430489626427, "grad_norm": 0.055418290197849274, "learning_rate": 5.791873194991872e-05, "loss": 6.8622, "step": 961 }, { "epoch": 0.12036942904287848, "grad_norm": 0.0675298199057579, "learning_rate": 5.7727557971647427e-05, "loss": 6.8638, "step": 962 }, { "epoch": 0.1204945531894927, "grad_norm": 0.06013267859816551, "learning_rate": 5.7536571918300864e-05, "loss": 6.8618, "step": 963 }, { "epoch": 0.12061967733610691, "grad_norm": 0.06305553019046783, "learning_rate": 5.734577463892008e-05, "loss": 6.8594, "step": 964 }, { "epoch": 0.12074480148272114, "grad_norm": 0.05761939287185669, "learning_rate": 5.7155166981706956e-05, "loss": 6.8571, "step": 965 }, { "epoch": 0.12086992562933535, "grad_norm": 0.060438401997089386, "learning_rate": 5.6964749794020354e-05, "loss": 6.8574, "step": 966 }, { "epoch": 0.12099504977594958, "grad_norm": 0.06701270490884781, "learning_rate": 5.6774523922372394e-05, "loss": 6.8553, "step": 967 }, { "epoch": 0.12112017392256379, "grad_norm": 0.06402300298213959, "learning_rate": 5.6584490212424804e-05, "loss": 6.8507, "step": 968 }, { "epoch": 0.12124529806917801, "grad_norm": 0.0751137062907219, "learning_rate": 5.639464950898491e-05, "loss": 6.8517, "step": 969 }, { "epoch": 0.12137042221579224, "grad_norm": 0.06476029753684998, "learning_rate": 5.620500265600206e-05, "loss": 6.8511, "step": 970 }, { "epoch": 0.12149554636240645, "grad_norm": 0.05707380548119545, "learning_rate": 5.601555049656382e-05, "loss": 6.8485, "step": 971 }, { "epoch": 0.12162067050902067, "grad_norm": 0.06374350935220718, "learning_rate": 5.58262938728922e-05, "loss": 6.8468, "step": 972 }, { "epoch": 0.12174579465563488, "grad_norm": 0.05225362256169319, "learning_rate": 5.563723362634008e-05, "loss": 6.845, "step": 973 }, { "epoch": 0.12187091880224911, "grad_norm": 0.06538752466440201, "learning_rate": 5.544837059738719e-05, "loss": 6.8445, "step": 974 }, { "epoch": 0.12199604294886332, "grad_norm": 0.057842180132865906, "learning_rate": 5.525970562563656e-05, "loss": 6.8452, "step": 975 }, { "epoch": 0.12212116709547755, "grad_norm": 0.05999337136745453, "learning_rate": 5.507123954981073e-05, "loss": 6.8448, "step": 976 }, { "epoch": 0.12224629124209176, "grad_norm": 0.06252874433994293, "learning_rate": 5.488297320774807e-05, "loss": 6.8421, "step": 977 }, { "epoch": 0.12237141538870598, "grad_norm": 0.06220080703496933, "learning_rate": 5.4694907436399e-05, "loss": 6.8408, "step": 978 }, { "epoch": 0.1224965395353202, "grad_norm": 0.05970214679837227, "learning_rate": 5.4507043071822284e-05, "loss": 6.8397, "step": 979 }, { "epoch": 0.12262166368193442, "grad_norm": 0.06806926429271698, "learning_rate": 5.431938094918132e-05, "loss": 6.8399, "step": 980 }, { "epoch": 0.12274678782854864, "grad_norm": 0.06218835338950157, "learning_rate": 5.41319219027404e-05, "loss": 6.8363, "step": 981 }, { "epoch": 0.12287191197516285, "grad_norm": 0.0615081824362278, "learning_rate": 5.394466676586114e-05, "loss": 6.8348, "step": 982 }, { "epoch": 0.12299703612177708, "grad_norm": 0.05896433815360069, "learning_rate": 5.375761637099854e-05, "loss": 6.8335, "step": 983 }, { "epoch": 0.12312216026839129, "grad_norm": 0.063568115234375, "learning_rate": 5.357077154969742e-05, "loss": 6.8362, "step": 984 }, { "epoch": 0.12324728441500551, "grad_norm": 0.06212873011827469, "learning_rate": 5.3384133132588784e-05, "loss": 6.8325, "step": 985 }, { "epoch": 0.12337240856161973, "grad_norm": 0.07271798700094223, "learning_rate": 5.3197701949386e-05, "loss": 6.8326, "step": 986 }, { "epoch": 0.12349753270823395, "grad_norm": 0.06554724276065826, "learning_rate": 5.301147882888116e-05, "loss": 6.8327, "step": 987 }, { "epoch": 0.12362265685484818, "grad_norm": 0.07189320772886276, "learning_rate": 5.28254645989414e-05, "loss": 6.8263, "step": 988 }, { "epoch": 0.12374778100146239, "grad_norm": 0.062407851219177246, "learning_rate": 5.2639660086505226e-05, "loss": 6.8253, "step": 989 }, { "epoch": 0.12387290514807661, "grad_norm": 0.06889786571264267, "learning_rate": 5.2454066117578815e-05, "loss": 6.8229, "step": 990 }, { "epoch": 0.12399802929469082, "grad_norm": 0.06396161764860153, "learning_rate": 5.226868351723244e-05, "loss": 6.8219, "step": 991 }, { "epoch": 0.12412315344130505, "grad_norm": 0.05977191403508186, "learning_rate": 5.2083513109596616e-05, "loss": 6.823, "step": 992 }, { "epoch": 0.12424827758791926, "grad_norm": 0.07001759111881256, "learning_rate": 5.189855571785859e-05, "loss": 6.8181, "step": 993 }, { "epoch": 0.12437340173453348, "grad_norm": 0.076796755194664, "learning_rate": 5.171381216425863e-05, "loss": 6.8115, "step": 994 }, { "epoch": 0.1244985258811477, "grad_norm": 0.08506302535533905, "learning_rate": 5.152928327008635e-05, "loss": 6.811, "step": 995 }, { "epoch": 0.12462365002776192, "grad_norm": 0.07255339622497559, "learning_rate": 5.134496985567714e-05, "loss": 6.8082, "step": 996 }, { "epoch": 0.12474877417437615, "grad_norm": 0.08383986353874207, "learning_rate": 5.116087274040837e-05, "loss": 6.8022, "step": 997 }, { "epoch": 0.12487389832099036, "grad_norm": 0.12946462631225586, "learning_rate": 5.0976992742695925e-05, "loss": 6.7992, "step": 998 }, { "epoch": 0.12499902246760458, "grad_norm": 0.10370881855487823, "learning_rate": 5.07933306799904e-05, "loss": 6.7887, "step": 999 }, { "epoch": 0.1251241466142188, "grad_norm": 0.11729452013969421, "learning_rate": 5.060988736877366e-05, "loss": 6.7613, "step": 1000 }, { "epoch": 0.125249270760833, "grad_norm": 0.12090456485748291, "learning_rate": 5.042666362455498e-05, "loss": 6.8519, "step": 1001 }, { "epoch": 0.12537439490744723, "grad_norm": 0.09242799878120422, "learning_rate": 5.024366026186755e-05, "loss": 6.8678, "step": 1002 }, { "epoch": 0.12549951905406145, "grad_norm": 0.07915151119232178, "learning_rate": 5.006087809426496e-05, "loss": 6.8749, "step": 1003 }, { "epoch": 0.12562464320067568, "grad_norm": 0.10022272914648056, "learning_rate": 4.987831793431731e-05, "loss": 6.8535, "step": 1004 }, { "epoch": 0.12574976734728988, "grad_norm": 0.08195165544748306, "learning_rate": 4.9695980593607817e-05, "loss": 6.866, "step": 1005 }, { "epoch": 0.1258748914939041, "grad_norm": 0.07860789448022842, "learning_rate": 4.9513866882729146e-05, "loss": 6.8531, "step": 1006 }, { "epoch": 0.12600001564051833, "grad_norm": 0.06036710739135742, "learning_rate": 4.9331977611279777e-05, "loss": 6.8647, "step": 1007 }, { "epoch": 0.12612513978713255, "grad_norm": 0.06800255179405212, "learning_rate": 4.9150313587860433e-05, "loss": 6.8689, "step": 1008 }, { "epoch": 0.12625026393374678, "grad_norm": 0.06008204072713852, "learning_rate": 4.896887562007054e-05, "loss": 6.8642, "step": 1009 }, { "epoch": 0.12637538808036097, "grad_norm": 0.059629153460264206, "learning_rate": 4.8787664514504504e-05, "loss": 6.8625, "step": 1010 }, { "epoch": 0.1265005122269752, "grad_norm": 0.06875478476285934, "learning_rate": 4.860668107674823e-05, "loss": 6.8644, "step": 1011 }, { "epoch": 0.12662563637358942, "grad_norm": 0.05550326406955719, "learning_rate": 4.8425926111375506e-05, "loss": 6.8572, "step": 1012 }, { "epoch": 0.12675076052020365, "grad_norm": 0.05815112590789795, "learning_rate": 4.824540042194443e-05, "loss": 6.8634, "step": 1013 }, { "epoch": 0.12687588466681785, "grad_norm": 0.06269997358322144, "learning_rate": 4.8065104810993856e-05, "loss": 6.8576, "step": 1014 }, { "epoch": 0.12700100881343207, "grad_norm": 0.06365355849266052, "learning_rate": 4.788504008003978e-05, "loss": 6.8597, "step": 1015 }, { "epoch": 0.1271261329600463, "grad_norm": 0.06040506809949875, "learning_rate": 4.770520702957182e-05, "loss": 6.8565, "step": 1016 }, { "epoch": 0.12725125710666052, "grad_norm": 0.05758778750896454, "learning_rate": 4.752560645904962e-05, "loss": 6.8554, "step": 1017 }, { "epoch": 0.12737638125327475, "grad_norm": 0.05948428809642792, "learning_rate": 4.734623916689941e-05, "loss": 6.8545, "step": 1018 }, { "epoch": 0.12750150539988894, "grad_norm": 0.04706620052456856, "learning_rate": 4.716710595051022e-05, "loss": 6.8528, "step": 1019 }, { "epoch": 0.12762662954650317, "grad_norm": 0.058379221707582474, "learning_rate": 4.698820760623064e-05, "loss": 6.8506, "step": 1020 }, { "epoch": 0.1277517536931174, "grad_norm": 0.04983724653720856, "learning_rate": 4.6809544929365004e-05, "loss": 6.8472, "step": 1021 }, { "epoch": 0.12787687783973162, "grad_norm": 0.06882207095623016, "learning_rate": 4.663111871417e-05, "loss": 6.8441, "step": 1022 }, { "epoch": 0.12800200198634581, "grad_norm": 0.05252334475517273, "learning_rate": 4.645292975385111e-05, "loss": 6.8487, "step": 1023 }, { "epoch": 0.12812712613296004, "grad_norm": 0.05272122099995613, "learning_rate": 4.627497884055912e-05, "loss": 6.8447, "step": 1024 }, { "epoch": 0.12825225027957426, "grad_norm": 0.05039634928107262, "learning_rate": 4.609726676538652e-05, "loss": 6.8434, "step": 1025 }, { "epoch": 0.1283773744261885, "grad_norm": 0.06072818860411644, "learning_rate": 4.591979431836402e-05, "loss": 6.8387, "step": 1026 }, { "epoch": 0.12850249857280271, "grad_norm": 0.052245255559682846, "learning_rate": 4.574256228845706e-05, "loss": 6.8429, "step": 1027 }, { "epoch": 0.1286276227194169, "grad_norm": 0.05823485553264618, "learning_rate": 4.5565571463562365e-05, "loss": 6.8427, "step": 1028 }, { "epoch": 0.12875274686603114, "grad_norm": 0.06420707702636719, "learning_rate": 4.5388822630504256e-05, "loss": 6.8364, "step": 1029 }, { "epoch": 0.12887787101264536, "grad_norm": 0.05722834914922714, "learning_rate": 4.521231657503132e-05, "loss": 6.837, "step": 1030 }, { "epoch": 0.1290029951592596, "grad_norm": 0.06068609282374382, "learning_rate": 4.503605408181286e-05, "loss": 6.8384, "step": 1031 }, { "epoch": 0.12912811930587378, "grad_norm": 0.05515401065349579, "learning_rate": 4.486003593443537e-05, "loss": 6.8369, "step": 1032 }, { "epoch": 0.129253243452488, "grad_norm": 0.05642779543995857, "learning_rate": 4.468426291539914e-05, "loss": 6.8333, "step": 1033 }, { "epoch": 0.12937836759910223, "grad_norm": 0.07462010532617569, "learning_rate": 4.4508735806114654e-05, "loss": 6.8316, "step": 1034 }, { "epoch": 0.12950349174571646, "grad_norm": 0.0689237043261528, "learning_rate": 4.433345538689929e-05, "loss": 6.8344, "step": 1035 }, { "epoch": 0.12962861589233068, "grad_norm": 0.0574100986123085, "learning_rate": 4.415842243697369e-05, "loss": 6.8293, "step": 1036 }, { "epoch": 0.12975374003894488, "grad_norm": 0.07493086904287338, "learning_rate": 4.39836377344583e-05, "loss": 6.8341, "step": 1037 }, { "epoch": 0.1298788641855591, "grad_norm": 0.060792844742536545, "learning_rate": 4.380910205637012e-05, "loss": 6.8281, "step": 1038 }, { "epoch": 0.13000398833217333, "grad_norm": 0.08996855467557907, "learning_rate": 4.363481617861893e-05, "loss": 6.8238, "step": 1039 }, { "epoch": 0.13012911247878756, "grad_norm": 0.09788423031568527, "learning_rate": 4.346078087600412e-05, "loss": 6.8225, "step": 1040 }, { "epoch": 0.13025423662540175, "grad_norm": 0.062340155243873596, "learning_rate": 4.3286996922211034e-05, "loss": 6.8259, "step": 1041 }, { "epoch": 0.13037936077201598, "grad_norm": 0.07115781307220459, "learning_rate": 4.311346508980772e-05, "loss": 6.8176, "step": 1042 }, { "epoch": 0.1305044849186302, "grad_norm": 0.0779058188199997, "learning_rate": 4.2940186150241365e-05, "loss": 6.812, "step": 1043 }, { "epoch": 0.13062960906524443, "grad_norm": 0.0738992691040039, "learning_rate": 4.27671608738349e-05, "loss": 6.8132, "step": 1044 }, { "epoch": 0.13075473321185865, "grad_norm": 0.08018433302640915, "learning_rate": 4.2594390029783534e-05, "loss": 6.8091, "step": 1045 }, { "epoch": 0.13087985735847285, "grad_norm": 0.08278074860572815, "learning_rate": 4.242187438615153e-05, "loss": 6.8058, "step": 1046 }, { "epoch": 0.13100498150508708, "grad_norm": 0.08070545643568039, "learning_rate": 4.224961470986849e-05, "loss": 6.8039, "step": 1047 }, { "epoch": 0.1311301056517013, "grad_norm": 0.07522248476743698, "learning_rate": 4.207761176672614e-05, "loss": 6.7942, "step": 1048 }, { "epoch": 0.13125522979831553, "grad_norm": 0.09361255913972855, "learning_rate": 4.190586632137491e-05, "loss": 6.7863, "step": 1049 }, { "epoch": 0.13138035394492972, "grad_norm": 0.1457311362028122, "learning_rate": 4.173437913732048e-05, "loss": 6.7532, "step": 1050 }, { "epoch": 0.13150547809154395, "grad_norm": 0.11023912578821182, "learning_rate": 4.156315097692037e-05, "loss": 6.8567, "step": 1051 }, { "epoch": 0.13163060223815817, "grad_norm": 0.11056804656982422, "learning_rate": 4.139218260138074e-05, "loss": 6.856, "step": 1052 }, { "epoch": 0.1317557263847724, "grad_norm": 0.0829029530286789, "learning_rate": 4.12214747707527e-05, "loss": 6.864, "step": 1053 }, { "epoch": 0.13188085053138662, "grad_norm": 0.0777166411280632, "learning_rate": 4.1051028243929125e-05, "loss": 6.8561, "step": 1054 }, { "epoch": 0.13200597467800082, "grad_norm": 0.06604031473398209, "learning_rate": 4.088084377864135e-05, "loss": 6.8584, "step": 1055 }, { "epoch": 0.13213109882461505, "grad_norm": 0.07673287391662598, "learning_rate": 4.07109221314556e-05, "loss": 6.8644, "step": 1056 }, { "epoch": 0.13225622297122927, "grad_norm": 0.07796303927898407, "learning_rate": 4.054126405776971e-05, "loss": 6.861, "step": 1057 }, { "epoch": 0.1323813471178435, "grad_norm": 0.061380449682474136, "learning_rate": 4.037187031180985e-05, "loss": 6.8683, "step": 1058 }, { "epoch": 0.1325064712644577, "grad_norm": 0.06435754150152206, "learning_rate": 4.020274164662707e-05, "loss": 6.8672, "step": 1059 }, { "epoch": 0.13263159541107192, "grad_norm": 0.069906085729599, "learning_rate": 4.003387881409397e-05, "loss": 6.8546, "step": 1060 }, { "epoch": 0.13275671955768614, "grad_norm": 0.05770441144704819, "learning_rate": 3.986528256490141e-05, "loss": 6.8672, "step": 1061 }, { "epoch": 0.13288184370430037, "grad_norm": 0.06263688951730728, "learning_rate": 3.969695364855511e-05, "loss": 6.86, "step": 1062 }, { "epoch": 0.1330069678509146, "grad_norm": 0.057067472487688065, "learning_rate": 3.952889281337235e-05, "loss": 6.8575, "step": 1063 }, { "epoch": 0.1331320919975288, "grad_norm": 0.058888472616672516, "learning_rate": 3.93611008064786e-05, "loss": 6.8564, "step": 1064 }, { "epoch": 0.13325721614414301, "grad_norm": 0.06526350229978561, "learning_rate": 3.9193578373804364e-05, "loss": 6.8573, "step": 1065 }, { "epoch": 0.13338234029075724, "grad_norm": 0.053352899849414825, "learning_rate": 3.90263262600816e-05, "loss": 6.8556, "step": 1066 }, { "epoch": 0.13350746443737146, "grad_norm": 0.05884247645735741, "learning_rate": 3.88593452088406e-05, "loss": 6.8505, "step": 1067 }, { "epoch": 0.13363258858398566, "grad_norm": 0.053777825087308884, "learning_rate": 3.869263596240661e-05, "loss": 6.8505, "step": 1068 }, { "epoch": 0.1337577127305999, "grad_norm": 0.04613254964351654, "learning_rate": 3.8526199261896544e-05, "loss": 6.8507, "step": 1069 }, { "epoch": 0.1338828368772141, "grad_norm": 0.05576649308204651, "learning_rate": 3.836003584721577e-05, "loss": 6.8476, "step": 1070 }, { "epoch": 0.13400796102382834, "grad_norm": 0.05065496265888214, "learning_rate": 3.8194146457054655e-05, "loss": 6.8444, "step": 1071 }, { "epoch": 0.13413308517044256, "grad_norm": 0.06763933598995209, "learning_rate": 3.802853182888543e-05, "loss": 6.8466, "step": 1072 }, { "epoch": 0.13425820931705676, "grad_norm": 0.0658731460571289, "learning_rate": 3.786319269895877e-05, "loss": 6.8461, "step": 1073 }, { "epoch": 0.13438333346367098, "grad_norm": 0.058000680059194565, "learning_rate": 3.769812980230074e-05, "loss": 6.8445, "step": 1074 }, { "epoch": 0.1345084576102852, "grad_norm": 0.05953332036733627, "learning_rate": 3.7533343872709294e-05, "loss": 6.8419, "step": 1075 }, { "epoch": 0.13463358175689943, "grad_norm": 0.06550874561071396, "learning_rate": 3.736883564275112e-05, "loss": 6.8379, "step": 1076 }, { "epoch": 0.13475870590351363, "grad_norm": 0.05789563059806824, "learning_rate": 3.7204605843758386e-05, "loss": 6.8419, "step": 1077 }, { "epoch": 0.13488383005012786, "grad_norm": 0.05452827364206314, "learning_rate": 3.704065520582549e-05, "loss": 6.8405, "step": 1078 }, { "epoch": 0.13500895419674208, "grad_norm": 0.06073261424899101, "learning_rate": 3.6876984457805786e-05, "loss": 6.8416, "step": 1079 }, { "epoch": 0.1351340783433563, "grad_norm": 0.04960978776216507, "learning_rate": 3.671359432730834e-05, "loss": 6.836, "step": 1080 }, { "epoch": 0.13525920248997053, "grad_norm": 0.06479854136705399, "learning_rate": 3.655048554069478e-05, "loss": 6.836, "step": 1081 }, { "epoch": 0.13538432663658473, "grad_norm": 0.057061318308115005, "learning_rate": 3.638765882307589e-05, "loss": 6.8331, "step": 1082 }, { "epoch": 0.13550945078319895, "grad_norm": 0.05493835359811783, "learning_rate": 3.6225114898308634e-05, "loss": 6.8354, "step": 1083 }, { "epoch": 0.13563457492981318, "grad_norm": 0.06502033025026321, "learning_rate": 3.6062854488992714e-05, "loss": 6.8291, "step": 1084 }, { "epoch": 0.1357596990764274, "grad_norm": 0.05362612009048462, "learning_rate": 3.5900878316467454e-05, "loss": 6.8302, "step": 1085 }, { "epoch": 0.1358848232230416, "grad_norm": 0.053463976830244064, "learning_rate": 3.573918710080857e-05, "loss": 6.8263, "step": 1086 }, { "epoch": 0.13600994736965583, "grad_norm": 0.06826691329479218, "learning_rate": 3.5577781560825066e-05, "loss": 6.8255, "step": 1087 }, { "epoch": 0.13613507151627005, "grad_norm": 0.07258055359125137, "learning_rate": 3.541666241405588e-05, "loss": 6.8246, "step": 1088 }, { "epoch": 0.13626019566288428, "grad_norm": 0.0681692585349083, "learning_rate": 3.5255830376766764e-05, "loss": 6.821, "step": 1089 }, { "epoch": 0.13638531980949847, "grad_norm": 0.08735627681016922, "learning_rate": 3.509528616394716e-05, "loss": 6.8246, "step": 1090 }, { "epoch": 0.1365104439561127, "grad_norm": 0.0646921768784523, "learning_rate": 3.4935030489306883e-05, "loss": 6.822, "step": 1091 }, { "epoch": 0.13663556810272692, "grad_norm": 0.07364804297685623, "learning_rate": 3.4775064065273165e-05, "loss": 6.8199, "step": 1092 }, { "epoch": 0.13676069224934115, "grad_norm": 0.07897388935089111, "learning_rate": 3.4615387602987236e-05, "loss": 6.8132, "step": 1093 }, { "epoch": 0.13688581639595537, "grad_norm": 0.09701348096132278, "learning_rate": 3.445600181230134e-05, "loss": 6.8149, "step": 1094 }, { "epoch": 0.13701094054256957, "grad_norm": 0.08542627096176147, "learning_rate": 3.429690740177549e-05, "loss": 6.8097, "step": 1095 }, { "epoch": 0.1371360646891838, "grad_norm": 0.07645785808563232, "learning_rate": 3.413810507867436e-05, "loss": 6.8076, "step": 1096 }, { "epoch": 0.13726118883579802, "grad_norm": 0.12249205261468887, "learning_rate": 3.397959554896415e-05, "loss": 6.8004, "step": 1097 }, { "epoch": 0.13738631298241225, "grad_norm": 0.09605488181114197, "learning_rate": 3.3821379517309405e-05, "loss": 6.7911, "step": 1098 }, { "epoch": 0.13751143712902644, "grad_norm": 0.14662940800189972, "learning_rate": 3.3663457687069924e-05, "loss": 6.7794, "step": 1099 }, { "epoch": 0.13763656127564067, "grad_norm": 0.1348690390586853, "learning_rate": 3.350583076029754e-05, "loss": 6.7481, "step": 1100 }, { "epoch": 0.1377616854222549, "grad_norm": 0.08334768563508987, "learning_rate": 3.334849943773323e-05, "loss": 6.8782, "step": 1101 }, { "epoch": 0.13788680956886912, "grad_norm": 0.07065068930387497, "learning_rate": 3.319146441880371e-05, "loss": 6.8769, "step": 1102 }, { "epoch": 0.13801193371548334, "grad_norm": 0.07668652385473251, "learning_rate": 3.3034726401618444e-05, "loss": 6.8576, "step": 1103 }, { "epoch": 0.13813705786209754, "grad_norm": 0.09782318025827408, "learning_rate": 3.28782860829667e-05, "loss": 6.8516, "step": 1104 }, { "epoch": 0.13826218200871176, "grad_norm": 0.07026124000549316, "learning_rate": 3.272214415831418e-05, "loss": 6.8561, "step": 1105 }, { "epoch": 0.138387306155326, "grad_norm": 0.05975576490163803, "learning_rate": 3.2566301321800085e-05, "loss": 6.8604, "step": 1106 }, { "epoch": 0.13851243030194021, "grad_norm": 0.07976236194372177, "learning_rate": 3.241075826623401e-05, "loss": 6.8588, "step": 1107 }, { "epoch": 0.1386375544485544, "grad_norm": 0.06251887232065201, "learning_rate": 3.225551568309284e-05, "loss": 6.8608, "step": 1108 }, { "epoch": 0.13876267859516864, "grad_norm": 0.06219517067074776, "learning_rate": 3.210057426251773e-05, "loss": 6.8616, "step": 1109 }, { "epoch": 0.13888780274178286, "grad_norm": 0.06574711948633194, "learning_rate": 3.1945934693310896e-05, "loss": 6.8637, "step": 1110 }, { "epoch": 0.1390129268883971, "grad_norm": 0.07629784196615219, "learning_rate": 3.179159766293282e-05, "loss": 6.8643, "step": 1111 }, { "epoch": 0.1391380510350113, "grad_norm": 0.062084611505270004, "learning_rate": 3.163756385749889e-05, "loss": 6.862, "step": 1112 }, { "epoch": 0.1392631751816255, "grad_norm": 0.0663914680480957, "learning_rate": 3.148383396177653e-05, "loss": 6.8568, "step": 1113 }, { "epoch": 0.13938829932823973, "grad_norm": 0.06513141095638275, "learning_rate": 3.133040865918213e-05, "loss": 6.8629, "step": 1114 }, { "epoch": 0.13951342347485396, "grad_norm": 0.06291124224662781, "learning_rate": 3.117728863177796e-05, "loss": 6.8559, "step": 1115 }, { "epoch": 0.13963854762146818, "grad_norm": 0.05424998328089714, "learning_rate": 3.102447456026919e-05, "loss": 6.8577, "step": 1116 }, { "epoch": 0.13976367176808238, "grad_norm": 0.06632808595895767, "learning_rate": 3.0871967124000834e-05, "loss": 6.8552, "step": 1117 }, { "epoch": 0.1398887959146966, "grad_norm": 0.06717648357152939, "learning_rate": 3.0719767000954714e-05, "loss": 6.8552, "step": 1118 }, { "epoch": 0.14001392006131083, "grad_norm": 0.05267615616321564, "learning_rate": 3.056787486774656e-05, "loss": 6.8514, "step": 1119 }, { "epoch": 0.14013904420792506, "grad_norm": 0.05303286761045456, "learning_rate": 3.041629139962283e-05, "loss": 6.8497, "step": 1120 }, { "epoch": 0.14026416835453928, "grad_norm": 0.06788763403892517, "learning_rate": 3.0265017270457775e-05, "loss": 6.8512, "step": 1121 }, { "epoch": 0.14038929250115348, "grad_norm": 0.0560276061296463, "learning_rate": 3.0114053152750556e-05, "loss": 6.8478, "step": 1122 }, { "epoch": 0.1405144166477677, "grad_norm": 0.055129554122686386, "learning_rate": 2.9963399717622077e-05, "loss": 6.849, "step": 1123 }, { "epoch": 0.14063954079438193, "grad_norm": 0.06273508071899414, "learning_rate": 2.98130576348121e-05, "loss": 6.8461, "step": 1124 }, { "epoch": 0.14076466494099615, "grad_norm": 0.05553919076919556, "learning_rate": 2.966302757267625e-05, "loss": 6.8436, "step": 1125 }, { "epoch": 0.14076466494099615, "eval_loss": 6.835322380065918, "eval_runtime": 30.0947, "eval_samples_per_second": 447.288, "eval_steps_per_second": 223.661, "step": 1125 }, { "epoch": 0.14088978908761035, "grad_norm": 0.07243514060974121, "learning_rate": 2.9513310198183065e-05, "loss": 6.8429, "step": 1126 }, { "epoch": 0.14101491323422458, "grad_norm": 0.05593620613217354, "learning_rate": 2.936390617691097e-05, "loss": 6.8431, "step": 1127 }, { "epoch": 0.1411400373808388, "grad_norm": 0.05393727123737335, "learning_rate": 2.9214816173045356e-05, "loss": 6.8441, "step": 1128 }, { "epoch": 0.14126516152745303, "grad_norm": 0.05595025047659874, "learning_rate": 2.906604084937572e-05, "loss": 6.8387, "step": 1129 }, { "epoch": 0.14139028567406725, "grad_norm": 0.057013221085071564, "learning_rate": 2.8917580867292526e-05, "loss": 6.8346, "step": 1130 }, { "epoch": 0.14151540982068145, "grad_norm": 0.05450030788779259, "learning_rate": 2.8769436886784408e-05, "loss": 6.8357, "step": 1131 }, { "epoch": 0.14164053396729567, "grad_norm": 0.05271396040916443, "learning_rate": 2.862160956643517e-05, "loss": 6.8386, "step": 1132 }, { "epoch": 0.1417656581139099, "grad_norm": 0.06149327754974365, "learning_rate": 2.847409956342092e-05, "loss": 6.835, "step": 1133 }, { "epoch": 0.14189078226052412, "grad_norm": 0.0672340914607048, "learning_rate": 2.8326907533507074e-05, "loss": 6.8354, "step": 1134 }, { "epoch": 0.14201590640713832, "grad_norm": 0.05268840491771698, "learning_rate": 2.8180034131045464e-05, "loss": 6.8292, "step": 1135 }, { "epoch": 0.14214103055375255, "grad_norm": 0.06812043488025665, "learning_rate": 2.8033480008971546e-05, "loss": 6.8255, "step": 1136 }, { "epoch": 0.14226615470036677, "grad_norm": 0.05760839208960533, "learning_rate": 2.7887245818801277e-05, "loss": 6.826, "step": 1137 }, { "epoch": 0.142391278846981, "grad_norm": 0.059192292392253876, "learning_rate": 2.7741332210628345e-05, "loss": 6.825, "step": 1138 }, { "epoch": 0.14251640299359522, "grad_norm": 0.052887432277202606, "learning_rate": 2.759573983312138e-05, "loss": 6.8255, "step": 1139 }, { "epoch": 0.14264152714020942, "grad_norm": 0.07347966730594635, "learning_rate": 2.7450469333520855e-05, "loss": 6.8233, "step": 1140 }, { "epoch": 0.14276665128682364, "grad_norm": 0.12838901579380035, "learning_rate": 2.730552135763632e-05, "loss": 6.8223, "step": 1141 }, { "epoch": 0.14289177543343787, "grad_norm": 0.06712338328361511, "learning_rate": 2.7160896549843562e-05, "loss": 6.8161, "step": 1142 }, { "epoch": 0.1430168995800521, "grad_norm": 0.09065883606672287, "learning_rate": 2.701659555308169e-05, "loss": 6.8111, "step": 1143 }, { "epoch": 0.1431420237266663, "grad_norm": 0.07420609891414642, "learning_rate": 2.6872619008850274e-05, "loss": 6.8113, "step": 1144 }, { "epoch": 0.14326714787328051, "grad_norm": 0.07504729926586151, "learning_rate": 2.672896755720654e-05, "loss": 6.8062, "step": 1145 }, { "epoch": 0.14339227201989474, "grad_norm": 0.09265592694282532, "learning_rate": 2.6585641836762433e-05, "loss": 6.8024, "step": 1146 }, { "epoch": 0.14351739616650896, "grad_norm": 0.08413922786712646, "learning_rate": 2.6442642484681944e-05, "loss": 6.8001, "step": 1147 }, { "epoch": 0.1436425203131232, "grad_norm": 0.12287787348031998, "learning_rate": 2.6299970136678077e-05, "loss": 6.7912, "step": 1148 }, { "epoch": 0.1437676444597374, "grad_norm": 0.0844724029302597, "learning_rate": 2.6157625427010156e-05, "loss": 6.7793, "step": 1149 }, { "epoch": 0.1438927686063516, "grad_norm": 0.15094740688800812, "learning_rate": 2.6015608988480955e-05, "loss": 6.7479, "step": 1150 }, { "epoch": 0.14401789275296584, "grad_norm": 0.08073284476995468, "learning_rate": 2.5873921452433915e-05, "loss": 6.878, "step": 1151 }, { "epoch": 0.14414301689958006, "grad_norm": 0.07900699973106384, "learning_rate": 2.57325634487503e-05, "loss": 6.8632, "step": 1152 }, { "epoch": 0.14426814104619426, "grad_norm": 0.08771805465221405, "learning_rate": 2.5591535605846383e-05, "loss": 6.8526, "step": 1153 }, { "epoch": 0.14439326519280848, "grad_norm": 0.06478151679039001, "learning_rate": 2.5450838550670808e-05, "loss": 6.8671, "step": 1154 }, { "epoch": 0.1445183893394227, "grad_norm": 0.07485893368721008, "learning_rate": 2.5310472908701555e-05, "loss": 6.8614, "step": 1155 }, { "epoch": 0.14464351348603693, "grad_norm": 0.05978316441178322, "learning_rate": 2.5170439303943294e-05, "loss": 6.8607, "step": 1156 }, { "epoch": 0.14476863763265116, "grad_norm": 0.04734022170305252, "learning_rate": 2.503073835892471e-05, "loss": 6.8649, "step": 1157 }, { "epoch": 0.14489376177926536, "grad_norm": 0.05720485374331474, "learning_rate": 2.4891370694695517e-05, "loss": 6.8684, "step": 1158 }, { "epoch": 0.14501888592587958, "grad_norm": 0.05629875510931015, "learning_rate": 2.4752336930823837e-05, "loss": 6.859, "step": 1159 }, { "epoch": 0.1451440100724938, "grad_norm": 0.057331185787916183, "learning_rate": 2.4613637685393432e-05, "loss": 6.8613, "step": 1160 }, { "epoch": 0.14526913421910803, "grad_norm": 0.059672728180885315, "learning_rate": 2.4475273575000936e-05, "loss": 6.8661, "step": 1161 }, { "epoch": 0.14539425836572223, "grad_norm": 0.060559701174497604, "learning_rate": 2.4337245214753103e-05, "loss": 6.8659, "step": 1162 }, { "epoch": 0.14551938251233645, "grad_norm": 0.050393495708703995, "learning_rate": 2.4199553218264093e-05, "loss": 6.8618, "step": 1163 }, { "epoch": 0.14564450665895068, "grad_norm": 0.06175202131271362, "learning_rate": 2.4062198197652752e-05, "loss": 6.8602, "step": 1164 }, { "epoch": 0.1457696308055649, "grad_norm": 0.06584306061267853, "learning_rate": 2.3925180763539844e-05, "loss": 6.8603, "step": 1165 }, { "epoch": 0.14589475495217913, "grad_norm": 0.06702952086925507, "learning_rate": 2.3788501525045438e-05, "loss": 6.8579, "step": 1166 }, { "epoch": 0.14601987909879333, "grad_norm": 0.04580916836857796, "learning_rate": 2.3652161089786086e-05, "loss": 6.8539, "step": 1167 }, { "epoch": 0.14614500324540755, "grad_norm": 0.06374320387840271, "learning_rate": 2.351616006387214e-05, "loss": 6.8504, "step": 1168 }, { "epoch": 0.14627012739202178, "grad_norm": 0.05468731373548508, "learning_rate": 2.3380499051905137e-05, "loss": 6.8542, "step": 1169 }, { "epoch": 0.146395251538636, "grad_norm": 0.06402197480201721, "learning_rate": 2.324517865697501e-05, "loss": 6.8478, "step": 1170 }, { "epoch": 0.1465203756852502, "grad_norm": 0.05739054083824158, "learning_rate": 2.3110199480657525e-05, "loss": 6.8505, "step": 1171 }, { "epoch": 0.14664549983186442, "grad_norm": 0.053598348051309586, "learning_rate": 2.2975562123011495e-05, "loss": 6.8494, "step": 1172 }, { "epoch": 0.14677062397847865, "grad_norm": 0.05298564210534096, "learning_rate": 2.2841267182576143e-05, "loss": 6.8463, "step": 1173 }, { "epoch": 0.14689574812509287, "grad_norm": 0.06621244549751282, "learning_rate": 2.2707315256368433e-05, "loss": 6.8401, "step": 1174 }, { "epoch": 0.1470208722717071, "grad_norm": 0.052186187356710434, "learning_rate": 2.2573706939880555e-05, "loss": 6.8456, "step": 1175 }, { "epoch": 0.1471459964183213, "grad_norm": 0.06235424429178238, "learning_rate": 2.2440442827077045e-05, "loss": 6.8418, "step": 1176 }, { "epoch": 0.14727112056493552, "grad_norm": 0.06564217805862427, "learning_rate": 2.230752351039228e-05, "loss": 6.8407, "step": 1177 }, { "epoch": 0.14739624471154975, "grad_norm": 0.05897653102874756, "learning_rate": 2.2174949580727832e-05, "loss": 6.8432, "step": 1178 }, { "epoch": 0.14752136885816397, "grad_norm": 0.05609191954135895, "learning_rate": 2.2042721627449846e-05, "loss": 6.8424, "step": 1179 }, { "epoch": 0.14764649300477817, "grad_norm": 0.06082276254892349, "learning_rate": 2.1910840238386398e-05, "loss": 6.8354, "step": 1180 }, { "epoch": 0.1477716171513924, "grad_norm": 0.06168831139802933, "learning_rate": 2.1779305999824884e-05, "loss": 6.8359, "step": 1181 }, { "epoch": 0.14789674129800662, "grad_norm": 0.05733821168541908, "learning_rate": 2.164811949650942e-05, "loss": 6.8334, "step": 1182 }, { "epoch": 0.14802186544462084, "grad_norm": 0.08882861584424973, "learning_rate": 2.1517281311638217e-05, "loss": 6.8294, "step": 1183 }, { "epoch": 0.14814698959123504, "grad_norm": 0.06660692393779755, "learning_rate": 2.1386792026861103e-05, "loss": 6.8335, "step": 1184 }, { "epoch": 0.14827211373784926, "grad_norm": 0.059202343225479126, "learning_rate": 2.125665222227675e-05, "loss": 6.8341, "step": 1185 }, { "epoch": 0.1483972378844635, "grad_norm": 0.0701708048582077, "learning_rate": 2.112686247643024e-05, "loss": 6.8311, "step": 1186 }, { "epoch": 0.14852236203107771, "grad_norm": 0.07104705274105072, "learning_rate": 2.09974233663104e-05, "loss": 6.829, "step": 1187 }, { "epoch": 0.14864748617769194, "grad_norm": 0.05942048877477646, "learning_rate": 2.0868335467347366e-05, "loss": 6.8304, "step": 1188 }, { "epoch": 0.14877261032430614, "grad_norm": 0.06808766722679138, "learning_rate": 2.073959935340988e-05, "loss": 6.8279, "step": 1189 }, { "epoch": 0.14889773447092036, "grad_norm": 0.08521654456853867, "learning_rate": 2.06112155968028e-05, "loss": 6.8211, "step": 1190 }, { "epoch": 0.1490228586175346, "grad_norm": 0.07640393078327179, "learning_rate": 2.0483184768264596e-05, "loss": 6.825, "step": 1191 }, { "epoch": 0.1491479827641488, "grad_norm": 0.06751570105552673, "learning_rate": 2.035550743696468e-05, "loss": 6.8199, "step": 1192 }, { "epoch": 0.149273106910763, "grad_norm": 0.07916046679019928, "learning_rate": 2.022818417050113e-05, "loss": 6.8119, "step": 1193 }, { "epoch": 0.14939823105737723, "grad_norm": 0.07297234982252121, "learning_rate": 2.0101215534897855e-05, "loss": 6.8168, "step": 1194 }, { "epoch": 0.14952335520399146, "grad_norm": 0.08085519075393677, "learning_rate": 1.99746020946023e-05, "loss": 6.8093, "step": 1195 }, { "epoch": 0.14964847935060568, "grad_norm": 0.0728239044547081, "learning_rate": 1.9848344412482854e-05, "loss": 6.8036, "step": 1196 }, { "epoch": 0.1497736034972199, "grad_norm": 0.07863820344209671, "learning_rate": 1.9722443049826344e-05, "loss": 6.8025, "step": 1197 }, { "epoch": 0.1498987276438341, "grad_norm": 0.09008979052305222, "learning_rate": 1.9596898566335576e-05, "loss": 6.7999, "step": 1198 }, { "epoch": 0.15002385179044833, "grad_norm": 0.10627716779708862, "learning_rate": 1.9471711520126824e-05, "loss": 6.7894, "step": 1199 }, { "epoch": 0.15014897593706256, "grad_norm": 0.13538858294487, "learning_rate": 1.9346882467727325e-05, "loss": 6.7624, "step": 1200 }, { "epoch": 0.15027410008367678, "grad_norm": 0.09252560138702393, "learning_rate": 1.9222411964072884e-05, "loss": 6.8607, "step": 1201 }, { "epoch": 0.15039922423029098, "grad_norm": 0.08366183191537857, "learning_rate": 1.9098300562505266e-05, "loss": 6.869, "step": 1202 }, { "epoch": 0.1505243483769052, "grad_norm": 0.09882515668869019, "learning_rate": 1.8974548814769944e-05, "loss": 6.8594, "step": 1203 }, { "epoch": 0.15064947252351943, "grad_norm": 0.11313113570213318, "learning_rate": 1.8851157271013442e-05, "loss": 6.8446, "step": 1204 }, { "epoch": 0.15077459667013365, "grad_norm": 0.07016993314027786, "learning_rate": 1.872812647978095e-05, "loss": 6.869, "step": 1205 }, { "epoch": 0.15089972081674788, "grad_norm": 0.04867501184344292, "learning_rate": 1.8605456988014015e-05, "loss": 6.8652, "step": 1206 }, { "epoch": 0.15102484496336208, "grad_norm": 0.05337794870138168, "learning_rate": 1.8483149341047923e-05, "loss": 6.867, "step": 1207 }, { "epoch": 0.1511499691099763, "grad_norm": 0.0720386803150177, "learning_rate": 1.8361204082609352e-05, "loss": 6.8628, "step": 1208 }, { "epoch": 0.15127509325659053, "grad_norm": 0.062227506190538406, "learning_rate": 1.8239621754813995e-05, "loss": 6.8703, "step": 1209 }, { "epoch": 0.15140021740320475, "grad_norm": 0.06548401713371277, "learning_rate": 1.811840289816409e-05, "loss": 6.8592, "step": 1210 }, { "epoch": 0.15152534154981895, "grad_norm": 0.06194954365491867, "learning_rate": 1.799754805154603e-05, "loss": 6.8645, "step": 1211 }, { "epoch": 0.15165046569643317, "grad_norm": 0.06796473264694214, "learning_rate": 1.787705775222802e-05, "loss": 6.8637, "step": 1212 }, { "epoch": 0.1517755898430474, "grad_norm": 0.05032079666852951, "learning_rate": 1.775693253585763e-05, "loss": 6.8543, "step": 1213 }, { "epoch": 0.15190071398966162, "grad_norm": 0.06427576392889023, "learning_rate": 1.763717293645939e-05, "loss": 6.8571, "step": 1214 }, { "epoch": 0.15202583813627585, "grad_norm": 0.06306448578834534, "learning_rate": 1.7517779486432495e-05, "loss": 6.8525, "step": 1215 }, { "epoch": 0.15215096228289005, "grad_norm": 0.057377833873033524, "learning_rate": 1.7398752716548395e-05, "loss": 6.8529, "step": 1216 }, { "epoch": 0.15227608642950427, "grad_norm": 0.05794732645153999, "learning_rate": 1.728009315594843e-05, "loss": 6.8514, "step": 1217 }, { "epoch": 0.1524012105761185, "grad_norm": 0.05499265342950821, "learning_rate": 1.716180133214149e-05, "loss": 6.854, "step": 1218 }, { "epoch": 0.15252633472273272, "grad_norm": 0.05832820013165474, "learning_rate": 1.704387777100165e-05, "loss": 6.8503, "step": 1219 }, { "epoch": 0.15265145886934692, "grad_norm": 0.055457763373851776, "learning_rate": 1.6926322996765897e-05, "loss": 6.8482, "step": 1220 }, { "epoch": 0.15277658301596114, "grad_norm": 0.05789404734969139, "learning_rate": 1.6809137532031704e-05, "loss": 6.8551, "step": 1221 }, { "epoch": 0.15290170716257537, "grad_norm": 0.05464902147650719, "learning_rate": 1.6692321897754758e-05, "loss": 6.8463, "step": 1222 }, { "epoch": 0.1530268313091896, "grad_norm": 0.06019265204668045, "learning_rate": 1.65758766132467e-05, "loss": 6.8469, "step": 1223 }, { "epoch": 0.15315195545580382, "grad_norm": 0.055001046508550644, "learning_rate": 1.6459802196172668e-05, "loss": 6.8469, "step": 1224 }, { "epoch": 0.15327707960241801, "grad_norm": 0.06733505427837372, "learning_rate": 1.634409916254914e-05, "loss": 6.8426, "step": 1225 }, { "epoch": 0.15340220374903224, "grad_norm": 0.0662558376789093, "learning_rate": 1.622876802674158e-05, "loss": 6.8425, "step": 1226 }, { "epoch": 0.15352732789564646, "grad_norm": 0.06077055633068085, "learning_rate": 1.6113809301462125e-05, "loss": 6.8385, "step": 1227 }, { "epoch": 0.1536524520422607, "grad_norm": 0.05627214536070824, "learning_rate": 1.599922349776738e-05, "loss": 6.8382, "step": 1228 }, { "epoch": 0.1537775761888749, "grad_norm": 0.0525030717253685, "learning_rate": 1.5885011125056047e-05, "loss": 6.8342, "step": 1229 }, { "epoch": 0.1539027003354891, "grad_norm": 0.058503177016973495, "learning_rate": 1.5771172691066794e-05, "loss": 6.8402, "step": 1230 }, { "epoch": 0.15402782448210334, "grad_norm": 0.06108471751213074, "learning_rate": 1.565770870187585e-05, "loss": 6.8373, "step": 1231 }, { "epoch": 0.15415294862871756, "grad_norm": 0.06098058447241783, "learning_rate": 1.5544619661894864e-05, "loss": 6.8381, "step": 1232 }, { "epoch": 0.1542780727753318, "grad_norm": 0.046376653015613556, "learning_rate": 1.543190607386861e-05, "loss": 6.8318, "step": 1233 }, { "epoch": 0.15440319692194598, "grad_norm": 0.057242501527071, "learning_rate": 1.5319568438872745e-05, "loss": 6.8317, "step": 1234 }, { "epoch": 0.1545283210685602, "grad_norm": 0.05511382594704628, "learning_rate": 1.520760725631164e-05, "loss": 6.8303, "step": 1235 }, { "epoch": 0.15465344521517443, "grad_norm": 0.058222271502017975, "learning_rate": 1.5096023023916094e-05, "loss": 6.827, "step": 1236 }, { "epoch": 0.15477856936178866, "grad_norm": 0.07645133137702942, "learning_rate": 1.498481623774115e-05, "loss": 6.8294, "step": 1237 }, { "epoch": 0.15490369350840286, "grad_norm": 0.07166320085525513, "learning_rate": 1.4873987392163947e-05, "loss": 6.8227, "step": 1238 }, { "epoch": 0.15502881765501708, "grad_norm": 0.057397421449422836, "learning_rate": 1.4763536979881354e-05, "loss": 6.8234, "step": 1239 }, { "epoch": 0.1551539418016313, "grad_norm": 0.07023253291845322, "learning_rate": 1.4653465491908003e-05, "loss": 6.8245, "step": 1240 }, { "epoch": 0.15527906594824553, "grad_norm": 0.0744609609246254, "learning_rate": 1.4543773417573925e-05, "loss": 6.8202, "step": 1241 }, { "epoch": 0.15540419009485976, "grad_norm": 0.0631578117609024, "learning_rate": 1.4434461244522458e-05, "loss": 6.8206, "step": 1242 }, { "epoch": 0.15552931424147395, "grad_norm": 0.09936635196208954, "learning_rate": 1.4325529458708065e-05, "loss": 6.8125, "step": 1243 }, { "epoch": 0.15565443838808818, "grad_norm": 0.07729128003120422, "learning_rate": 1.4216978544394177e-05, "loss": 6.8108, "step": 1244 }, { "epoch": 0.1557795625347024, "grad_norm": 0.10020412504673004, "learning_rate": 1.4108808984151023e-05, "loss": 6.8118, "step": 1245 }, { "epoch": 0.15590468668131663, "grad_norm": 0.11231788247823715, "learning_rate": 1.4001021258853509e-05, "loss": 6.8043, "step": 1246 }, { "epoch": 0.15602981082793083, "grad_norm": 0.12214125692844391, "learning_rate": 1.3893615847679065e-05, "loss": 6.8015, "step": 1247 }, { "epoch": 0.15615493497454505, "grad_norm": 0.08515699207782745, "learning_rate": 1.3786593228105494e-05, "loss": 6.7896, "step": 1248 }, { "epoch": 0.15628005912115928, "grad_norm": 0.0987449586391449, "learning_rate": 1.3679953875908957e-05, "loss": 6.7856, "step": 1249 }, { "epoch": 0.1564051832677735, "grad_norm": 0.14125549793243408, "learning_rate": 1.3573698265161683e-05, "loss": 6.7561, "step": 1250 }, { "epoch": 0.15653030741438773, "grad_norm": 0.10132603347301483, "learning_rate": 1.3467826868229994e-05, "loss": 6.8523, "step": 1251 }, { "epoch": 0.15665543156100192, "grad_norm": 0.07547570765018463, "learning_rate": 1.3362340155772146e-05, "loss": 6.8674, "step": 1252 }, { "epoch": 0.15678055570761615, "grad_norm": 0.0960906445980072, "learning_rate": 1.3257238596736266e-05, "loss": 6.8591, "step": 1253 }, { "epoch": 0.15690567985423037, "grad_norm": 0.09767723828554153, "learning_rate": 1.3152522658358245e-05, "loss": 6.8437, "step": 1254 }, { "epoch": 0.1570308040008446, "grad_norm": 0.0690990686416626, "learning_rate": 1.3048192806159721e-05, "loss": 6.8631, "step": 1255 }, { "epoch": 0.1571559281474588, "grad_norm": 0.06839364767074585, "learning_rate": 1.2944249503945894e-05, "loss": 6.8635, "step": 1256 }, { "epoch": 0.15728105229407302, "grad_norm": 0.06043870002031326, "learning_rate": 1.2840693213803545e-05, "loss": 6.8648, "step": 1257 }, { "epoch": 0.15740617644068725, "grad_norm": 0.0687263160943985, "learning_rate": 1.2737524396099032e-05, "loss": 6.8595, "step": 1258 }, { "epoch": 0.15753130058730147, "grad_norm": 0.05132860690355301, "learning_rate": 1.2634743509476088e-05, "loss": 6.864, "step": 1259 }, { "epoch": 0.1576564247339157, "grad_norm": 0.04881085827946663, "learning_rate": 1.2532351010853916e-05, "loss": 6.8635, "step": 1260 }, { "epoch": 0.1577815488805299, "grad_norm": 0.06334888935089111, "learning_rate": 1.243034735542512e-05, "loss": 6.8601, "step": 1261 }, { "epoch": 0.15790667302714412, "grad_norm": 0.05100672319531441, "learning_rate": 1.2328732996653669e-05, "loss": 6.8588, "step": 1262 }, { "epoch": 0.15803179717375834, "grad_norm": 0.057795774191617966, "learning_rate": 1.2227508386272878e-05, "loss": 6.8628, "step": 1263 }, { "epoch": 0.15815692132037257, "grad_norm": 0.05266465246677399, "learning_rate": 1.212667397428342e-05, "loss": 6.8604, "step": 1264 }, { "epoch": 0.15828204546698676, "grad_norm": 0.06735730171203613, "learning_rate": 1.2026230208951306e-05, "loss": 6.8572, "step": 1265 }, { "epoch": 0.158407169613601, "grad_norm": 0.053272221237421036, "learning_rate": 1.1926177536805905e-05, "loss": 6.8562, "step": 1266 }, { "epoch": 0.15853229376021521, "grad_norm": 0.07732369005680084, "learning_rate": 1.1826516402637989e-05, "loss": 6.8537, "step": 1267 }, { "epoch": 0.15865741790682944, "grad_norm": 0.05819777026772499, "learning_rate": 1.1727247249497685e-05, "loss": 6.854, "step": 1268 }, { "epoch": 0.15878254205344366, "grad_norm": 0.055479660630226135, "learning_rate": 1.1628370518692533e-05, "loss": 6.8496, "step": 1269 }, { "epoch": 0.15890766620005786, "grad_norm": 0.058534570038318634, "learning_rate": 1.152988664978556e-05, "loss": 6.854, "step": 1270 }, { "epoch": 0.1590327903466721, "grad_norm": 0.062482766807079315, "learning_rate": 1.1431796080593283e-05, "loss": 6.8521, "step": 1271 }, { "epoch": 0.1591579144932863, "grad_norm": 0.06007704511284828, "learning_rate": 1.1334099247183783e-05, "loss": 6.8486, "step": 1272 }, { "epoch": 0.15928303863990054, "grad_norm": 0.0645846351981163, "learning_rate": 1.1236796583874787e-05, "loss": 6.8472, "step": 1273 }, { "epoch": 0.15940816278651473, "grad_norm": 0.05408726632595062, "learning_rate": 1.1139888523231678e-05, "loss": 6.8477, "step": 1274 }, { "epoch": 0.15953328693312896, "grad_norm": 0.05780980736017227, "learning_rate": 1.1043375496065611e-05, "loss": 6.843, "step": 1275 }, { "epoch": 0.15965841107974318, "grad_norm": 0.058385562151670456, "learning_rate": 1.0947257931431642e-05, "loss": 6.8458, "step": 1276 }, { "epoch": 0.1597835352263574, "grad_norm": 0.05572573095560074, "learning_rate": 1.0851536256626705e-05, "loss": 6.847, "step": 1277 }, { "epoch": 0.1599086593729716, "grad_norm": 0.054147008806467056, "learning_rate": 1.0756210897187812e-05, "loss": 6.8415, "step": 1278 }, { "epoch": 0.16003378351958583, "grad_norm": 0.05122090131044388, "learning_rate": 1.0661282276890127e-05, "loss": 6.8356, "step": 1279 }, { "epoch": 0.16015890766620006, "grad_norm": 0.05577128008008003, "learning_rate": 1.0566750817745074e-05, "loss": 6.8385, "step": 1280 }, { "epoch": 0.16028403181281428, "grad_norm": 0.057551562786102295, "learning_rate": 1.0472616939998492e-05, "loss": 6.838, "step": 1281 }, { "epoch": 0.1604091559594285, "grad_norm": 0.06074923649430275, "learning_rate": 1.0378881062128731e-05, "loss": 6.8309, "step": 1282 }, { "epoch": 0.1605342801060427, "grad_norm": 0.05625593662261963, "learning_rate": 1.0285543600844804e-05, "loss": 6.8316, "step": 1283 }, { "epoch": 0.16065940425265693, "grad_norm": 0.05791671574115753, "learning_rate": 1.019260497108453e-05, "loss": 6.8344, "step": 1284 }, { "epoch": 0.16078452839927115, "grad_norm": 0.06227971613407135, "learning_rate": 1.010006558601274e-05, "loss": 6.8312, "step": 1285 }, { "epoch": 0.16090965254588538, "grad_norm": 0.061791278421878815, "learning_rate": 1.000792585701934e-05, "loss": 6.8268, "step": 1286 }, { "epoch": 0.16103477669249958, "grad_norm": 0.057605765759944916, "learning_rate": 9.91618619371757e-06, "loss": 6.8281, "step": 1287 }, { "epoch": 0.1611599008391138, "grad_norm": 0.07101979106664658, "learning_rate": 9.82484700394215e-06, "loss": 6.8253, "step": 1288 }, { "epoch": 0.16128502498572803, "grad_norm": 0.06478337943553925, "learning_rate": 9.73390869374743e-06, "loss": 6.823, "step": 1289 }, { "epoch": 0.16141014913234225, "grad_norm": 0.06531362980604172, "learning_rate": 9.643371667405698e-06, "loss": 6.8202, "step": 1290 }, { "epoch": 0.16153527327895648, "grad_norm": 0.07579568773508072, "learning_rate": 9.553236327405246e-06, "loss": 6.8168, "step": 1291 }, { "epoch": 0.16166039742557067, "grad_norm": 0.06963292509317398, "learning_rate": 9.463503074448677e-06, "loss": 6.8192, "step": 1292 }, { "epoch": 0.1617855215721849, "grad_norm": 0.06998664885759354, "learning_rate": 9.374172307451068e-06, "loss": 6.8166, "step": 1293 }, { "epoch": 0.16191064571879912, "grad_norm": 0.09028169512748718, "learning_rate": 9.285244423538197e-06, "loss": 6.8117, "step": 1294 }, { "epoch": 0.16203576986541335, "grad_norm": 0.06465288251638412, "learning_rate": 9.196719818044886e-06, "loss": 6.8104, "step": 1295 }, { "epoch": 0.16216089401202755, "grad_norm": 0.07682817429304123, "learning_rate": 9.108598884513053e-06, "loss": 6.8031, "step": 1296 }, { "epoch": 0.16228601815864177, "grad_norm": 0.08894361555576324, "learning_rate": 9.020882014690136e-06, "loss": 6.7982, "step": 1297 }, { "epoch": 0.162411142305256, "grad_norm": 0.10202386230230331, "learning_rate": 8.933569598527247e-06, "loss": 6.7969, "step": 1298 }, { "epoch": 0.16253626645187022, "grad_norm": 0.17059394717216492, "learning_rate": 8.846662024177477e-06, "loss": 6.7858, "step": 1299 }, { "epoch": 0.16266139059848445, "grad_norm": 0.14211882650852203, "learning_rate": 8.760159677994172e-06, "loss": 6.7531, "step": 1300 }, { "epoch": 0.16278651474509864, "grad_norm": 0.08895996958017349, "learning_rate": 8.674062944529216e-06, "loss": 6.8541, "step": 1301 }, { "epoch": 0.16291163889171287, "grad_norm": 0.08907456696033478, "learning_rate": 8.588372206531292e-06, "loss": 6.8638, "step": 1302 }, { "epoch": 0.1630367630383271, "grad_norm": 0.07456039637327194, "learning_rate": 8.503087844944213e-06, "loss": 6.865, "step": 1303 }, { "epoch": 0.16316188718494132, "grad_norm": 0.158840149641037, "learning_rate": 8.418210238905256e-06, "loss": 6.8306, "step": 1304 }, { "epoch": 0.16328701133155551, "grad_norm": 0.11015015095472336, "learning_rate": 8.333739765743398e-06, "loss": 6.8499, "step": 1305 }, { "epoch": 0.16341213547816974, "grad_norm": 0.04712749272584915, "learning_rate": 8.249676800977658e-06, "loss": 6.8658, "step": 1306 }, { "epoch": 0.16353725962478396, "grad_norm": 0.06225185841321945, "learning_rate": 8.16602171831553e-06, "loss": 6.8644, "step": 1307 }, { "epoch": 0.1636623837713982, "grad_norm": 0.05664203315973282, "learning_rate": 8.082774889651168e-06, "loss": 6.8701, "step": 1308 }, { "epoch": 0.16378750791801241, "grad_norm": 0.05763581395149231, "learning_rate": 7.999936685063835e-06, "loss": 6.8665, "step": 1309 }, { "epoch": 0.1639126320646266, "grad_norm": 0.0597081296145916, "learning_rate": 7.91750747281621e-06, "loss": 6.861, "step": 1310 }, { "epoch": 0.16403775621124084, "grad_norm": 0.05000796914100647, "learning_rate": 7.835487619352811e-06, "loss": 6.8578, "step": 1311 }, { "epoch": 0.16416288035785506, "grad_norm": 0.054538823664188385, "learning_rate": 7.753877489298244e-06, "loss": 6.8551, "step": 1312 }, { "epoch": 0.1642880045044693, "grad_norm": 0.06144242361187935, "learning_rate": 7.67267744545579e-06, "loss": 6.8575, "step": 1313 }, { "epoch": 0.16441312865108348, "grad_norm": 0.05060692876577377, "learning_rate": 7.591887848805545e-06, "loss": 6.8527, "step": 1314 }, { "epoch": 0.1645382527976977, "grad_norm": 0.05450880527496338, "learning_rate": 7.5115090585029966e-06, "loss": 6.8538, "step": 1315 }, { "epoch": 0.16466337694431193, "grad_norm": 0.05674571171402931, "learning_rate": 7.431541431877342e-06, "loss": 6.8546, "step": 1316 }, { "epoch": 0.16478850109092616, "grad_norm": 0.06123065575957298, "learning_rate": 7.351985324429933e-06, "loss": 6.8539, "step": 1317 }, { "epoch": 0.16491362523754038, "grad_norm": 0.052988309413194656, "learning_rate": 7.272841089832694e-06, "loss": 6.849, "step": 1318 }, { "epoch": 0.16503874938415458, "grad_norm": 0.06131447106599808, "learning_rate": 7.194109079926514e-06, "loss": 6.849, "step": 1319 }, { "epoch": 0.1651638735307688, "grad_norm": 0.052703384310007095, "learning_rate": 7.115789644719728e-06, "loss": 6.8467, "step": 1320 }, { "epoch": 0.16528899767738303, "grad_norm": 0.06801564246416092, "learning_rate": 7.037883132386547e-06, "loss": 6.8499, "step": 1321 }, { "epoch": 0.16541412182399726, "grad_norm": 0.07062938064336777, "learning_rate": 6.960389889265517e-06, "loss": 6.8447, "step": 1322 }, { "epoch": 0.16553924597061145, "grad_norm": 0.10002299398183823, "learning_rate": 6.883310259857944e-06, "loss": 6.8425, "step": 1323 }, { "epoch": 0.16566437011722568, "grad_norm": 0.06357421725988388, "learning_rate": 6.806644586826383e-06, "loss": 6.8456, "step": 1324 }, { "epoch": 0.1657894942638399, "grad_norm": 0.0659535825252533, "learning_rate": 6.730393210993147e-06, "loss": 6.8422, "step": 1325 }, { "epoch": 0.16591461841045413, "grad_norm": 0.06332556903362274, "learning_rate": 6.654556471338746e-06, "loss": 6.8409, "step": 1326 }, { "epoch": 0.16603974255706835, "grad_norm": 0.05108652263879776, "learning_rate": 6.579134705000412e-06, "loss": 6.837, "step": 1327 }, { "epoch": 0.16616486670368255, "grad_norm": 0.06405655294656754, "learning_rate": 6.504128247270546e-06, "loss": 6.8385, "step": 1328 }, { "epoch": 0.16628999085029678, "grad_norm": 0.05968260020017624, "learning_rate": 6.429537431595312e-06, "loss": 6.8325, "step": 1329 }, { "epoch": 0.166415114996911, "grad_norm": 0.05928070470690727, "learning_rate": 6.355362589573077e-06, "loss": 6.8355, "step": 1330 }, { "epoch": 0.16654023914352523, "grad_norm": 0.06275006383657455, "learning_rate": 6.2816040509530165e-06, "loss": 6.8355, "step": 1331 }, { "epoch": 0.16666536329013942, "grad_norm": 0.07541554421186447, "learning_rate": 6.2082621436335475e-06, "loss": 6.8351, "step": 1332 }, { "epoch": 0.16679048743675365, "grad_norm": 0.05294984206557274, "learning_rate": 6.135337193660962e-06, "loss": 6.8323, "step": 1333 }, { "epoch": 0.16691561158336787, "grad_norm": 0.06002030149102211, "learning_rate": 6.062829525227909e-06, "loss": 6.833, "step": 1334 }, { "epoch": 0.1670407357299821, "grad_norm": 0.05374766141176224, "learning_rate": 5.990739460672024e-06, "loss": 6.829, "step": 1335 }, { "epoch": 0.16716585987659632, "grad_norm": 0.06032724305987358, "learning_rate": 5.9190673204744255e-06, "loss": 6.8235, "step": 1336 }, { "epoch": 0.16729098402321052, "grad_norm": 0.07873076945543289, "learning_rate": 5.84781342325833e-06, "loss": 6.8274, "step": 1337 }, { "epoch": 0.16741610816982475, "grad_norm": 0.06482894718647003, "learning_rate": 5.77697808578761e-06, "loss": 6.8248, "step": 1338 }, { "epoch": 0.16754123231643897, "grad_norm": 0.06034001335501671, "learning_rate": 5.706561622965467e-06, "loss": 6.8233, "step": 1339 }, { "epoch": 0.1676663564630532, "grad_norm": 0.0549284853041172, "learning_rate": 5.636564347832907e-06, "loss": 6.8207, "step": 1340 }, { "epoch": 0.1677914806096674, "grad_norm": 0.06023858115077019, "learning_rate": 5.566986571567401e-06, "loss": 6.8187, "step": 1341 }, { "epoch": 0.16791660475628162, "grad_norm": 0.06581918895244598, "learning_rate": 5.497828603481569e-06, "loss": 6.8167, "step": 1342 }, { "epoch": 0.16804172890289584, "grad_norm": 0.0744078978896141, "learning_rate": 5.429090751021704e-06, "loss": 6.8141, "step": 1343 }, { "epoch": 0.16816685304951007, "grad_norm": 0.06617950648069382, "learning_rate": 5.3607733197664436e-06, "loss": 6.8127, "step": 1344 }, { "epoch": 0.1682919771961243, "grad_norm": 0.07519249618053436, "learning_rate": 5.2928766134254345e-06, "loss": 6.8122, "step": 1345 }, { "epoch": 0.1684171013427385, "grad_norm": 0.07171600311994553, "learning_rate": 5.225400933837954e-06, "loss": 6.8046, "step": 1346 }, { "epoch": 0.16854222548935271, "grad_norm": 0.08736371994018555, "learning_rate": 5.158346580971573e-06, "loss": 6.7995, "step": 1347 }, { "epoch": 0.16866734963596694, "grad_norm": 0.0909268856048584, "learning_rate": 5.091713852920854e-06, "loss": 6.7924, "step": 1348 }, { "epoch": 0.16879247378258116, "grad_norm": 0.1106017604470253, "learning_rate": 5.025503045905933e-06, "loss": 6.7833, "step": 1349 }, { "epoch": 0.16891759792919536, "grad_norm": 0.14610743522644043, "learning_rate": 4.959714454271369e-06, "loss": 6.762, "step": 1350 }, { "epoch": 0.1690427220758096, "grad_norm": 0.09530216455459595, "learning_rate": 4.8943483704846475e-06, "loss": 6.8633, "step": 1351 }, { "epoch": 0.1691678462224238, "grad_norm": 0.08176793903112411, "learning_rate": 4.829405085134997e-06, "loss": 6.869, "step": 1352 }, { "epoch": 0.16929297036903804, "grad_norm": 0.06940693408250809, "learning_rate": 4.764884886932086e-06, "loss": 6.8652, "step": 1353 }, { "epoch": 0.16941809451565226, "grad_norm": 0.12383774667978287, "learning_rate": 4.700788062704687e-06, "loss": 6.8393, "step": 1354 }, { "epoch": 0.16954321866226646, "grad_norm": 0.059174198657274246, "learning_rate": 4.6371148973994525e-06, "loss": 6.8673, "step": 1355 }, { "epoch": 0.16966834280888068, "grad_norm": 0.05575103312730789, "learning_rate": 4.573865674079625e-06, "loss": 6.8639, "step": 1356 }, { "epoch": 0.1697934669554949, "grad_norm": 0.06360621005296707, "learning_rate": 4.511040673923828e-06, "loss": 6.8683, "step": 1357 }, { "epoch": 0.16991859110210913, "grad_norm": 0.06387114524841309, "learning_rate": 4.448640176224694e-06, "loss": 6.8696, "step": 1358 }, { "epoch": 0.17004371524872333, "grad_norm": 0.050917718559503555, "learning_rate": 4.386664458387779e-06, "loss": 6.864, "step": 1359 }, { "epoch": 0.17016883939533756, "grad_norm": 0.05284392461180687, "learning_rate": 4.325113795930203e-06, "loss": 6.8616, "step": 1360 }, { "epoch": 0.17029396354195178, "grad_norm": 0.053370919078588486, "learning_rate": 4.263988462479484e-06, "loss": 6.8664, "step": 1361 }, { "epoch": 0.170419087688566, "grad_norm": 0.04814120754599571, "learning_rate": 4.203288729772326e-06, "loss": 6.8628, "step": 1362 }, { "epoch": 0.17054421183518023, "grad_norm": 0.054461799561977386, "learning_rate": 4.143014867653383e-06, "loss": 6.8596, "step": 1363 }, { "epoch": 0.17066933598179443, "grad_norm": 0.049728453159332275, "learning_rate": 4.083167144074073e-06, "loss": 6.8607, "step": 1364 }, { "epoch": 0.17079446012840865, "grad_norm": 0.049136579036712646, "learning_rate": 4.023745825091407e-06, "loss": 6.854, "step": 1365 }, { "epoch": 0.17091958427502288, "grad_norm": 0.0593116469681263, "learning_rate": 3.964751174866765e-06, "loss": 6.8524, "step": 1366 }, { "epoch": 0.1710447084216371, "grad_norm": 0.0577319897711277, "learning_rate": 3.906183455664725e-06, "loss": 6.8479, "step": 1367 }, { "epoch": 0.1711698325682513, "grad_norm": 0.05450035631656647, "learning_rate": 3.84804292785198e-06, "loss": 6.8538, "step": 1368 }, { "epoch": 0.17129495671486553, "grad_norm": 0.05601746588945389, "learning_rate": 3.7903298498960572e-06, "loss": 6.8491, "step": 1369 }, { "epoch": 0.17142008086147975, "grad_norm": 0.06383606791496277, "learning_rate": 3.7330444783642338e-06, "loss": 6.8453, "step": 1370 }, { "epoch": 0.17154520500809398, "grad_norm": 0.05076981708407402, "learning_rate": 3.676187067922421e-06, "loss": 6.8444, "step": 1371 }, { "epoch": 0.17167032915470817, "grad_norm": 0.06610114127397537, "learning_rate": 3.619757871333973e-06, "loss": 6.8468, "step": 1372 }, { "epoch": 0.1717954533013224, "grad_norm": 0.05181503668427467, "learning_rate": 3.563757139458579e-06, "loss": 6.843, "step": 1373 }, { "epoch": 0.17192057744793662, "grad_norm": 0.07020257413387299, "learning_rate": 3.5081851212512175e-06, "loss": 6.8429, "step": 1374 }, { "epoch": 0.17204570159455085, "grad_norm": 0.058668918907642365, "learning_rate": 3.4530420637609363e-06, "loss": 6.8459, "step": 1375 }, { "epoch": 0.17217082574116507, "grad_norm": 0.05918782576918602, "learning_rate": 3.3983282121298086e-06, "loss": 6.8378, "step": 1376 }, { "epoch": 0.17229594988777927, "grad_norm": 0.054112061858177185, "learning_rate": 3.3440438095919126e-06, "loss": 6.8431, "step": 1377 }, { "epoch": 0.1724210740343935, "grad_norm": 0.06641299277544022, "learning_rate": 3.290189097472096e-06, "loss": 6.8386, "step": 1378 }, { "epoch": 0.17254619818100772, "grad_norm": 0.06148559972643852, "learning_rate": 3.236764315185037e-06, "loss": 6.8395, "step": 1379 }, { "epoch": 0.17267132232762195, "grad_norm": 0.05435129627585411, "learning_rate": 3.1837697002341293e-06, "loss": 6.8381, "step": 1380 }, { "epoch": 0.17279644647423614, "grad_norm": 0.056218743324279785, "learning_rate": 3.131205488210409e-06, "loss": 6.8329, "step": 1381 }, { "epoch": 0.17292157062085037, "grad_norm": 0.07692524790763855, "learning_rate": 3.0790719127915646e-06, "loss": 6.834, "step": 1382 }, { "epoch": 0.1730466947674646, "grad_norm": 0.07179510593414307, "learning_rate": 3.0273692057408265e-06, "loss": 6.8352, "step": 1383 }, { "epoch": 0.17317181891407882, "grad_norm": 0.058145493268966675, "learning_rate": 2.976097596905969e-06, "loss": 6.8313, "step": 1384 }, { "epoch": 0.17329694306069304, "grad_norm": 0.08015631139278412, "learning_rate": 2.9252573142183326e-06, "loss": 6.8311, "step": 1385 }, { "epoch": 0.17342206720730724, "grad_norm": 0.059036049991846085, "learning_rate": 2.874848583691714e-06, "loss": 6.8289, "step": 1386 }, { "epoch": 0.17354719135392147, "grad_norm": 0.06354140490293503, "learning_rate": 2.8248716294214774e-06, "loss": 6.8224, "step": 1387 }, { "epoch": 0.1736723155005357, "grad_norm": 0.06228512153029442, "learning_rate": 2.7753266735834338e-06, "loss": 6.8214, "step": 1388 }, { "epoch": 0.17379743964714991, "grad_norm": 0.0634087398648262, "learning_rate": 2.7262139364329643e-06, "loss": 6.8238, "step": 1389 }, { "epoch": 0.1739225637937641, "grad_norm": 0.0680609792470932, "learning_rate": 2.677533636303964e-06, "loss": 6.8191, "step": 1390 }, { "epoch": 0.17404768794037834, "grad_norm": 0.07776638120412827, "learning_rate": 2.6292859896079213e-06, "loss": 6.822, "step": 1391 }, { "epoch": 0.17417281208699256, "grad_norm": 0.07317586243152618, "learning_rate": 2.581471210832931e-06, "loss": 6.8189, "step": 1392 }, { "epoch": 0.1742979362336068, "grad_norm": 0.09229645133018494, "learning_rate": 2.5340895125427364e-06, "loss": 6.8136, "step": 1393 }, { "epoch": 0.174423060380221, "grad_norm": 0.0773492231965065, "learning_rate": 2.4871411053757898e-06, "loss": 6.8085, "step": 1394 }, { "epoch": 0.1745481845268352, "grad_norm": 0.11506694555282593, "learning_rate": 2.440626198044327e-06, "loss": 6.8066, "step": 1395 }, { "epoch": 0.17467330867344943, "grad_norm": 0.06844329088926315, "learning_rate": 2.394544997333437e-06, "loss": 6.8036, "step": 1396 }, { "epoch": 0.17479843282006366, "grad_norm": 0.0800548866391182, "learning_rate": 2.3488977081001394e-06, "loss": 6.8005, "step": 1397 }, { "epoch": 0.17492355696667788, "grad_norm": 0.140996053814888, "learning_rate": 2.3036845332724543e-06, "loss": 6.7943, "step": 1398 }, { "epoch": 0.17504868111329208, "grad_norm": 0.13243380188941956, "learning_rate": 2.2589056738485324e-06, "loss": 6.7782, "step": 1399 }, { "epoch": 0.1751738052599063, "grad_norm": 0.1351012885570526, "learning_rate": 2.2145613288957478e-06, "loss": 6.7557, "step": 1400 }, { "epoch": 0.17529892940652053, "grad_norm": 0.09377124905586243, "learning_rate": 2.170651695549786e-06, "loss": 6.859, "step": 1401 }, { "epoch": 0.17542405355313476, "grad_norm": 0.07952917367219925, "learning_rate": 2.1271769690138332e-06, "loss": 6.8723, "step": 1402 }, { "epoch": 0.17554917769974898, "grad_norm": 0.07432472705841064, "learning_rate": 2.084137342557646e-06, "loss": 6.8693, "step": 1403 }, { "epoch": 0.17567430184636318, "grad_norm": 0.1256646364927292, "learning_rate": 2.0415330075166937e-06, "loss": 6.856, "step": 1404 }, { "epoch": 0.1757994259929774, "grad_norm": 0.058985039591789246, "learning_rate": 1.9993641532913833e-06, "loss": 6.8758, "step": 1405 }, { "epoch": 0.17592455013959163, "grad_norm": 0.05767710506916046, "learning_rate": 1.9576309673461357e-06, "loss": 6.8717, "step": 1406 }, { "epoch": 0.17604967428620585, "grad_norm": 0.05004095286130905, "learning_rate": 1.916333635208556e-06, "loss": 6.8659, "step": 1407 }, { "epoch": 0.17617479843282005, "grad_norm": 0.050979193300008774, "learning_rate": 1.8754723404686425e-06, "loss": 6.8668, "step": 1408 }, { "epoch": 0.17629992257943428, "grad_norm": 0.05201297625899315, "learning_rate": 1.8350472647780116e-06, "loss": 6.8612, "step": 1409 }, { "epoch": 0.1764250467260485, "grad_norm": 0.05118010193109512, "learning_rate": 1.7950585878489856e-06, "loss": 6.8596, "step": 1410 }, { "epoch": 0.17655017087266273, "grad_norm": 0.05477053299546242, "learning_rate": 1.7555064874538397e-06, "loss": 6.8633, "step": 1411 }, { "epoch": 0.17667529501927695, "grad_norm": 0.05414487421512604, "learning_rate": 1.7163911394240672e-06, "loss": 6.8606, "step": 1412 }, { "epoch": 0.17680041916589115, "grad_norm": 0.06751760840415955, "learning_rate": 1.6777127176495043e-06, "loss": 6.8593, "step": 1413 }, { "epoch": 0.17692554331250537, "grad_norm": 0.04939683899283409, "learning_rate": 1.6394713940776296e-06, "loss": 6.8597, "step": 1414 }, { "epoch": 0.1770506674591196, "grad_norm": 0.05685962736606598, "learning_rate": 1.6016673387127646e-06, "loss": 6.8576, "step": 1415 }, { "epoch": 0.17717579160573382, "grad_norm": 0.06365666538476944, "learning_rate": 1.5643007196153302e-06, "loss": 6.8557, "step": 1416 }, { "epoch": 0.17730091575234802, "grad_norm": 0.06001985818147659, "learning_rate": 1.5273717029010925e-06, "loss": 6.8531, "step": 1417 }, { "epoch": 0.17742603989896225, "grad_norm": 0.06495179235935211, "learning_rate": 1.4908804527404286e-06, "loss": 6.8526, "step": 1418 }, { "epoch": 0.17755116404557647, "grad_norm": 0.05439097434282303, "learning_rate": 1.4548271313575835e-06, "loss": 6.8549, "step": 1419 }, { "epoch": 0.1776762881921907, "grad_norm": 0.059629399329423904, "learning_rate": 1.4192118990299707e-06, "loss": 6.8445, "step": 1420 }, { "epoch": 0.17780141233880492, "grad_norm": 0.04757322743535042, "learning_rate": 1.3840349140874619e-06, "loss": 6.8475, "step": 1421 }, { "epoch": 0.17792653648541912, "grad_norm": 0.06703180074691772, "learning_rate": 1.3492963329116537e-06, "loss": 6.8473, "step": 1422 }, { "epoch": 0.17805166063203334, "grad_norm": 0.07519087940454483, "learning_rate": 1.3149963099352014e-06, "loss": 6.8478, "step": 1423 }, { "epoch": 0.17817678477864757, "grad_norm": 0.0541621595621109, "learning_rate": 1.2811349976411202e-06, "loss": 6.848, "step": 1424 }, { "epoch": 0.1783019089252618, "grad_norm": 0.058931704610586166, "learning_rate": 1.2477125465620853e-06, "loss": 6.8421, "step": 1425 }, { "epoch": 0.178427033071876, "grad_norm": 0.059702515602111816, "learning_rate": 1.2147291052798216e-06, "loss": 6.841, "step": 1426 }, { "epoch": 0.17855215721849022, "grad_norm": 0.056329287588596344, "learning_rate": 1.1821848204243814e-06, "loss": 6.8391, "step": 1427 }, { "epoch": 0.17867728136510444, "grad_norm": 0.05391077324748039, "learning_rate": 1.1500798366735233e-06, "loss": 6.8406, "step": 1428 }, { "epoch": 0.17880240551171867, "grad_norm": 0.056805387139320374, "learning_rate": 1.1184142967520794e-06, "loss": 6.8356, "step": 1429 }, { "epoch": 0.1789275296583329, "grad_norm": 0.05493747815489769, "learning_rate": 1.0871883414312777e-06, "loss": 6.8346, "step": 1430 }, { "epoch": 0.1790526538049471, "grad_norm": 0.06074679642915726, "learning_rate": 1.0564021095281652e-06, "loss": 6.8354, "step": 1431 }, { "epoch": 0.1791777779515613, "grad_norm": 0.056792713701725006, "learning_rate": 1.0260557379049519e-06, "loss": 6.8354, "step": 1432 }, { "epoch": 0.17930290209817554, "grad_norm": 0.05123317986726761, "learning_rate": 9.96149361468457e-07, "loss": 6.8308, "step": 1433 }, { "epoch": 0.17942802624478976, "grad_norm": 0.06268208473920822, "learning_rate": 9.66683113169431e-07, "loss": 6.8295, "step": 1434 }, { "epoch": 0.17955315039140396, "grad_norm": 0.06159789115190506, "learning_rate": 9.376571240020227e-07, "loss": 6.8298, "step": 1435 }, { "epoch": 0.17967827453801818, "grad_norm": 0.07016710937023163, "learning_rate": 9.090715230031688e-07, "loss": 6.8278, "step": 1436 }, { "epoch": 0.1798033986846324, "grad_norm": 0.07208134979009628, "learning_rate": 8.809264372520609e-07, "loss": 6.8288, "step": 1437 }, { "epoch": 0.17992852283124663, "grad_norm": 0.06572156399488449, "learning_rate": 8.532219918695128e-07, "loss": 6.8248, "step": 1438 }, { "epoch": 0.18005364697786086, "grad_norm": 0.06400032341480255, "learning_rate": 8.259583100174606e-07, "loss": 6.8204, "step": 1439 }, { "epoch": 0.18017877112447506, "grad_norm": 0.0679529458284378, "learning_rate": 7.991355128984079e-07, "loss": 6.8173, "step": 1440 }, { "epoch": 0.18030389527108928, "grad_norm": 0.07652369886636734, "learning_rate": 7.727537197548707e-07, "loss": 6.8166, "step": 1441 }, { "epoch": 0.1804290194177035, "grad_norm": 0.08422042429447174, "learning_rate": 7.468130478688218e-07, "loss": 6.8148, "step": 1442 }, { "epoch": 0.18055414356431773, "grad_norm": 0.08339574187994003, "learning_rate": 7.213136125612586e-07, "loss": 6.8147, "step": 1443 }, { "epoch": 0.18067926771093193, "grad_norm": 0.08772672712802887, "learning_rate": 6.962555271915805e-07, "loss": 6.8093, "step": 1444 }, { "epoch": 0.18080439185754615, "grad_norm": 0.08588062971830368, "learning_rate": 6.716389031571568e-07, "loss": 6.8107, "step": 1445 }, { "epoch": 0.18092951600416038, "grad_norm": 0.10700280219316483, "learning_rate": 6.474638498928265e-07, "loss": 6.8064, "step": 1446 }, { "epoch": 0.1810546401507746, "grad_norm": 0.07625709474086761, "learning_rate": 6.237304748703543e-07, "loss": 6.7989, "step": 1447 }, { "epoch": 0.18117976429738883, "grad_norm": 0.12817145884037018, "learning_rate": 6.004388835980423e-07, "loss": 6.7896, "step": 1448 }, { "epoch": 0.18130488844400303, "grad_norm": 0.09448394179344177, "learning_rate": 5.77589179620186e-07, "loss": 6.7849, "step": 1449 }, { "epoch": 0.18143001259061725, "grad_norm": 0.14554812014102936, "learning_rate": 5.55181464516652e-07, "loss": 6.7574, "step": 1450 }, { "epoch": 0.18155513673723148, "grad_norm": 0.10746641457080841, "learning_rate": 5.332158379024122e-07, "loss": 6.8465, "step": 1451 }, { "epoch": 0.1816802608838457, "grad_norm": 0.10362215340137482, "learning_rate": 5.116923974270993e-07, "loss": 6.8582, "step": 1452 }, { "epoch": 0.1818053850304599, "grad_norm": 0.08789569139480591, "learning_rate": 4.906112387745965e-07, "loss": 6.8572, "step": 1453 }, { "epoch": 0.18193050917707412, "grad_norm": 0.09827589988708496, "learning_rate": 4.6997245566257064e-07, "loss": 6.8474, "step": 1454 }, { "epoch": 0.18205563332368835, "grad_norm": 0.10099496692419052, "learning_rate": 4.497761398421063e-07, "loss": 6.8488, "step": 1455 }, { "epoch": 0.18218075747030257, "grad_norm": 0.06926746666431427, "learning_rate": 4.3002238109723927e-07, "loss": 6.8615, "step": 1456 }, { "epoch": 0.1823058816169168, "grad_norm": 0.053934670984745026, "learning_rate": 4.107112672446123e-07, "loss": 6.8683, "step": 1457 }, { "epoch": 0.182431005763531, "grad_norm": 0.05834341421723366, "learning_rate": 3.9184288413306456e-07, "loss": 6.8636, "step": 1458 }, { "epoch": 0.18255612991014522, "grad_norm": 0.054404616355895996, "learning_rate": 3.734173156432208e-07, "loss": 6.8559, "step": 1459 }, { "epoch": 0.18268125405675945, "grad_norm": 0.046236101537942886, "learning_rate": 3.554346436871581e-07, "loss": 6.8672, "step": 1460 }, { "epoch": 0.18280637820337367, "grad_norm": 0.053519781678915024, "learning_rate": 3.3789494820803957e-07, "loss": 6.8624, "step": 1461 }, { "epoch": 0.18293150234998787, "grad_norm": 0.05783402919769287, "learning_rate": 3.2079830717972606e-07, "loss": 6.8568, "step": 1462 }, { "epoch": 0.1830566264966021, "grad_norm": 0.05109785124659538, "learning_rate": 3.041447966064648e-07, "loss": 6.8628, "step": 1463 }, { "epoch": 0.18318175064321632, "grad_norm": 0.05525349825620651, "learning_rate": 2.8793449052254563e-07, "loss": 6.8621, "step": 1464 }, { "epoch": 0.18330687478983054, "grad_norm": 0.058113943785429, "learning_rate": 2.721674609919345e-07, "loss": 6.8567, "step": 1465 }, { "epoch": 0.18343199893644474, "grad_norm": 0.054108813405036926, "learning_rate": 2.568437781080069e-07, "loss": 6.8559, "step": 1466 }, { "epoch": 0.18355712308305897, "grad_norm": 0.059466779232025146, "learning_rate": 2.4196350999320384e-07, "loss": 6.8578, "step": 1467 }, { "epoch": 0.1836822472296732, "grad_norm": 0.05075949430465698, "learning_rate": 2.275267227987321e-07, "loss": 6.8509, "step": 1468 }, { "epoch": 0.18380737137628742, "grad_norm": 0.05008383467793465, "learning_rate": 2.135334807042866e-07, "loss": 6.8546, "step": 1469 }, { "epoch": 0.18393249552290164, "grad_norm": 0.056943994015455246, "learning_rate": 1.9998384591773944e-07, "loss": 6.8515, "step": 1470 }, { "epoch": 0.18405761966951584, "grad_norm": 0.05193135887384415, "learning_rate": 1.8687787867489592e-07, "loss": 6.8491, "step": 1471 }, { "epoch": 0.18418274381613006, "grad_norm": 0.059180911630392075, "learning_rate": 1.7421563723919454e-07, "loss": 6.8451, "step": 1472 }, { "epoch": 0.1843078679627443, "grad_norm": 0.05493330582976341, "learning_rate": 1.6199717790145174e-07, "loss": 6.8475, "step": 1473 }, { "epoch": 0.1844329921093585, "grad_norm": 0.05605136230587959, "learning_rate": 1.5022255497962879e-07, "loss": 6.8458, "step": 1474 }, { "epoch": 0.1845581162559727, "grad_norm": 0.053757090121507645, "learning_rate": 1.3889182081860962e-07, "loss": 6.843, "step": 1475 }, { "epoch": 0.18468324040258693, "grad_norm": 0.05816550552845001, "learning_rate": 1.2800502578991235e-07, "loss": 6.8402, "step": 1476 }, { "epoch": 0.18480836454920116, "grad_norm": 0.04979588836431503, "learning_rate": 1.1756221829148928e-07, "loss": 6.8432, "step": 1477 }, { "epoch": 0.18493348869581538, "grad_norm": 0.05705437809228897, "learning_rate": 1.0756344474753821e-07, "loss": 6.836, "step": 1478 }, { "epoch": 0.1850586128424296, "grad_norm": 0.06499331444501877, "learning_rate": 9.800874960826933e-08, "loss": 6.8347, "step": 1479 }, { "epoch": 0.1851837369890438, "grad_norm": 0.052779845893383026, "learning_rate": 8.889817534969425e-08, "loss": 6.8363, "step": 1480 }, { "epoch": 0.18530886113565803, "grad_norm": 0.05613570287823677, "learning_rate": 8.023176247348163e-08, "loss": 6.8341, "step": 1481 }, { "epoch": 0.18543398528227226, "grad_norm": 0.06078937277197838, "learning_rate": 7.200954950673522e-08, "loss": 6.839, "step": 1482 }, { "epoch": 0.18555910942888648, "grad_norm": 0.05623992532491684, "learning_rate": 6.423157300184946e-08, "loss": 6.8335, "step": 1483 }, { "epoch": 0.18568423357550068, "grad_norm": 0.04787232354283333, "learning_rate": 5.6897867536331864e-08, "loss": 6.8278, "step": 1484 }, { "epoch": 0.1858093577221149, "grad_norm": 0.061891939491033554, "learning_rate": 5.000846571264761e-08, "loss": 6.8347, "step": 1485 }, { "epoch": 0.18593448186872913, "grad_norm": 0.06739771366119385, "learning_rate": 4.35633981580974e-08, "loss": 6.8218, "step": 1486 }, { "epoch": 0.18605960601534335, "grad_norm": 0.06384280323982239, "learning_rate": 3.756269352462871e-08, "loss": 6.8237, "step": 1487 }, { "epoch": 0.18618473016195758, "grad_norm": 0.06037496030330658, "learning_rate": 3.20063784888025e-08, "loss": 6.8251, "step": 1488 }, { "epoch": 0.18630985430857178, "grad_norm": 0.06595481932163239, "learning_rate": 2.6894477751548964e-08, "loss": 6.822, "step": 1489 }, { "epoch": 0.186434978455186, "grad_norm": 0.06832300871610641, "learning_rate": 2.222701403818972e-08, "loss": 6.822, "step": 1490 }, { "epoch": 0.18656010260180023, "grad_norm": 0.0702512115240097, "learning_rate": 1.8004008098226887e-08, "loss": 6.82, "step": 1491 }, { "epoch": 0.18668522674841445, "grad_norm": 0.06402243673801422, "learning_rate": 1.4225478705309769e-08, "loss": 6.8174, "step": 1492 }, { "epoch": 0.18681035089502865, "grad_norm": 0.07588471472263336, "learning_rate": 1.0891442657134932e-08, "loss": 6.8137, "step": 1493 }, { "epoch": 0.18693547504164287, "grad_norm": 0.07471845299005508, "learning_rate": 8.001914775401798e-09, "loss": 6.8135, "step": 1494 }, { "epoch": 0.1870605991882571, "grad_norm": 0.10632038116455078, "learning_rate": 5.5569079056794206e-09, "loss": 6.8137, "step": 1495 }, { "epoch": 0.18718572333487132, "grad_norm": 0.0800660252571106, "learning_rate": 3.5564329174064824e-09, "loss": 6.8013, "step": 1496 }, { "epoch": 0.18731084748148555, "grad_norm": 0.08418132364749908, "learning_rate": 2.0004987038246824e-09, "loss": 6.802, "step": 1497 }, { "epoch": 0.18743597162809975, "grad_norm": 0.08943944424390793, "learning_rate": 8.891121819565306e-10, "loss": 6.7891, "step": 1498 }, { "epoch": 0.18756109577471397, "grad_norm": 0.15149299800395966, "learning_rate": 2.2227829252763344e-10, "loss": 6.7772, "step": 1499 }, { "epoch": 0.1876862199213282, "grad_norm": 0.1595006287097931, "learning_rate": 0.0, "loss": 6.7472, "step": 1500 }, { "epoch": 0.1876862199213282, "eval_loss": 6.83489465713501, "eval_runtime": 31.2539, "eval_samples_per_second": 430.698, "eval_steps_per_second": 215.365, "step": 1500 } ], "logging_steps": 1, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 375, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 16322907451392.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }