{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999152183128445, "eval_steps": 500, "global_step": 5897, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001695633743111488, "grad_norm": 8.009743100188633, "learning_rate": 1.1299435028248588e-07, "loss": 1.6644, "step": 1 }, { "epoch": 0.0003391267486222976, "grad_norm": 8.120057966565689, "learning_rate": 2.2598870056497177e-07, "loss": 1.654, "step": 2 }, { "epoch": 0.0005086901229334464, "grad_norm": 9.015060821761788, "learning_rate": 3.3898305084745766e-07, "loss": 1.7052, "step": 3 }, { "epoch": 0.0006782534972445952, "grad_norm": 8.497781543486818, "learning_rate": 4.5197740112994353e-07, "loss": 1.6626, "step": 4 }, { "epoch": 0.000847816871555744, "grad_norm": 7.334467732528096, "learning_rate": 5.649717514124295e-07, "loss": 1.6283, "step": 5 }, { "epoch": 0.0010173802458668928, "grad_norm": 7.352918044177938, "learning_rate": 6.779661016949153e-07, "loss": 1.6305, "step": 6 }, { "epoch": 0.0011869436201780415, "grad_norm": 5.8656279822628425, "learning_rate": 7.909604519774013e-07, "loss": 1.5736, "step": 7 }, { "epoch": 0.0013565069944891904, "grad_norm": 7.660884079016951, "learning_rate": 9.039548022598871e-07, "loss": 1.6539, "step": 8 }, { "epoch": 0.001526070368800339, "grad_norm": 5.833344012009003, "learning_rate": 1.016949152542373e-06, "loss": 1.6351, "step": 9 }, { "epoch": 0.001695633743111488, "grad_norm": 4.43504014266901, "learning_rate": 1.129943502824859e-06, "loss": 1.5749, "step": 10 }, { "epoch": 0.0018651971174226368, "grad_norm": 6.305339473012175, "learning_rate": 1.2429378531073449e-06, "loss": 1.6133, "step": 11 }, { "epoch": 0.0020347604917337857, "grad_norm": 5.444325833139642, "learning_rate": 1.3559322033898307e-06, "loss": 1.5851, "step": 12 }, { "epoch": 0.0022043238660449343, "grad_norm": 5.920785386190525, "learning_rate": 1.4689265536723166e-06, "loss": 1.5609, "step": 13 }, { "epoch": 0.002373887240356083, "grad_norm": 3.784131296672084, "learning_rate": 1.5819209039548026e-06, "loss": 1.4846, "step": 14 }, { "epoch": 0.002543450614667232, "grad_norm": 5.108565414877564, "learning_rate": 1.6949152542372882e-06, "loss": 1.4518, "step": 15 }, { "epoch": 0.0027130139889783808, "grad_norm": 4.659212542594555, "learning_rate": 1.8079096045197741e-06, "loss": 1.4776, "step": 16 }, { "epoch": 0.0028825773632895294, "grad_norm": 4.500699647023348, "learning_rate": 1.92090395480226e-06, "loss": 1.4471, "step": 17 }, { "epoch": 0.003052140737600678, "grad_norm": 4.359881151805078, "learning_rate": 2.033898305084746e-06, "loss": 1.4122, "step": 18 }, { "epoch": 0.003221704111911827, "grad_norm": 2.9457532035155882, "learning_rate": 2.146892655367232e-06, "loss": 1.3402, "step": 19 }, { "epoch": 0.003391267486222976, "grad_norm": 2.3289467248402285, "learning_rate": 2.259887005649718e-06, "loss": 1.3148, "step": 20 }, { "epoch": 0.0035608308605341245, "grad_norm": 2.129911559733815, "learning_rate": 2.372881355932204e-06, "loss": 1.3119, "step": 21 }, { "epoch": 0.0037303942348452736, "grad_norm": 1.9608178460181482, "learning_rate": 2.4858757062146898e-06, "loss": 1.3232, "step": 22 }, { "epoch": 0.0038999576091564223, "grad_norm": 2.0375618273846356, "learning_rate": 2.5988700564971753e-06, "loss": 1.2713, "step": 23 }, { "epoch": 0.004069520983467571, "grad_norm": 1.793874599018799, "learning_rate": 2.7118644067796613e-06, "loss": 1.288, "step": 24 }, { "epoch": 0.00423908435777872, "grad_norm": 2.004063156040502, "learning_rate": 2.8248587570621473e-06, "loss": 1.2909, "step": 25 }, { "epoch": 0.004408647732089869, "grad_norm": 1.9905323823337757, "learning_rate": 2.9378531073446333e-06, "loss": 1.3221, "step": 26 }, { "epoch": 0.004578211106401018, "grad_norm": 2.005543582552709, "learning_rate": 3.0508474576271192e-06, "loss": 1.317, "step": 27 }, { "epoch": 0.004747774480712166, "grad_norm": 1.8209655307101105, "learning_rate": 3.163841807909605e-06, "loss": 1.2674, "step": 28 }, { "epoch": 0.004917337855023315, "grad_norm": 1.9415017758256143, "learning_rate": 3.2768361581920903e-06, "loss": 1.3413, "step": 29 }, { "epoch": 0.005086901229334464, "grad_norm": 1.8110879769344066, "learning_rate": 3.3898305084745763e-06, "loss": 1.3151, "step": 30 }, { "epoch": 0.005256464603645612, "grad_norm": 1.724928822547298, "learning_rate": 3.5028248587570623e-06, "loss": 1.2628, "step": 31 }, { "epoch": 0.0054260279779567615, "grad_norm": 1.6661841508413606, "learning_rate": 3.6158192090395483e-06, "loss": 1.3117, "step": 32 }, { "epoch": 0.00559559135226791, "grad_norm": 1.5534064831870749, "learning_rate": 3.7288135593220342e-06, "loss": 1.2836, "step": 33 }, { "epoch": 0.005765154726579059, "grad_norm": 1.7046938776680567, "learning_rate": 3.84180790960452e-06, "loss": 1.285, "step": 34 }, { "epoch": 0.005934718100890208, "grad_norm": 1.612846525855917, "learning_rate": 3.954802259887006e-06, "loss": 1.2809, "step": 35 }, { "epoch": 0.006104281475201356, "grad_norm": 1.687964664224191, "learning_rate": 4.067796610169492e-06, "loss": 1.2731, "step": 36 }, { "epoch": 0.006273844849512505, "grad_norm": 1.6687480915272355, "learning_rate": 4.180790960451978e-06, "loss": 1.2763, "step": 37 }, { "epoch": 0.006443408223823654, "grad_norm": 1.4888760594468662, "learning_rate": 4.293785310734464e-06, "loss": 1.2137, "step": 38 }, { "epoch": 0.006612971598134803, "grad_norm": 1.8803849291882477, "learning_rate": 4.40677966101695e-06, "loss": 0.9621, "step": 39 }, { "epoch": 0.006782534972445952, "grad_norm": 1.4823709104222176, "learning_rate": 4.519774011299436e-06, "loss": 1.2617, "step": 40 }, { "epoch": 0.006952098346757101, "grad_norm": 1.4914333671052835, "learning_rate": 4.632768361581922e-06, "loss": 1.2139, "step": 41 }, { "epoch": 0.007121661721068249, "grad_norm": 1.3974249005302837, "learning_rate": 4.745762711864408e-06, "loss": 1.2326, "step": 42 }, { "epoch": 0.007291225095379398, "grad_norm": 1.5303511649936188, "learning_rate": 4.8587570621468936e-06, "loss": 1.1972, "step": 43 }, { "epoch": 0.007460788469690547, "grad_norm": 1.7150929659071426, "learning_rate": 4.9717514124293796e-06, "loss": 1.2443, "step": 44 }, { "epoch": 0.007630351844001695, "grad_norm": 1.4264128386170565, "learning_rate": 5.084745762711865e-06, "loss": 1.2487, "step": 45 }, { "epoch": 0.0077999152183128445, "grad_norm": 1.6364577845075876, "learning_rate": 5.197740112994351e-06, "loss": 1.2417, "step": 46 }, { "epoch": 0.007969478592623994, "grad_norm": 1.4332553821939178, "learning_rate": 5.310734463276837e-06, "loss": 1.1973, "step": 47 }, { "epoch": 0.008139041966935143, "grad_norm": 1.3592115062353742, "learning_rate": 5.423728813559323e-06, "loss": 1.1863, "step": 48 }, { "epoch": 0.00830860534124629, "grad_norm": 1.3738333468550614, "learning_rate": 5.536723163841809e-06, "loss": 1.1696, "step": 49 }, { "epoch": 0.00847816871555744, "grad_norm": 1.4768014324641212, "learning_rate": 5.6497175141242946e-06, "loss": 1.245, "step": 50 }, { "epoch": 0.008647732089868588, "grad_norm": 1.4675179858171783, "learning_rate": 5.7627118644067805e-06, "loss": 1.1864, "step": 51 }, { "epoch": 0.008817295464179737, "grad_norm": 1.4910780908907362, "learning_rate": 5.8757062146892665e-06, "loss": 1.2223, "step": 52 }, { "epoch": 0.008986858838490886, "grad_norm": 1.472831051111739, "learning_rate": 5.9887005649717525e-06, "loss": 1.2381, "step": 53 }, { "epoch": 0.009156422212802036, "grad_norm": 1.5872347424548234, "learning_rate": 6.1016949152542385e-06, "loss": 1.2539, "step": 54 }, { "epoch": 0.009325985587113183, "grad_norm": 1.5061858194714863, "learning_rate": 6.2146892655367244e-06, "loss": 1.2216, "step": 55 }, { "epoch": 0.009495548961424332, "grad_norm": 1.3283158228134124, "learning_rate": 6.32768361581921e-06, "loss": 1.1677, "step": 56 }, { "epoch": 0.009665112335735481, "grad_norm": 1.4486033239394485, "learning_rate": 6.440677966101695e-06, "loss": 1.2341, "step": 57 }, { "epoch": 0.00983467571004663, "grad_norm": 1.4316644916597505, "learning_rate": 6.553672316384181e-06, "loss": 1.2112, "step": 58 }, { "epoch": 0.01000423908435778, "grad_norm": 1.3755463463393165, "learning_rate": 6.666666666666667e-06, "loss": 1.1726, "step": 59 }, { "epoch": 0.010173802458668928, "grad_norm": 1.426073730303385, "learning_rate": 6.779661016949153e-06, "loss": 1.1728, "step": 60 }, { "epoch": 0.010343365832980076, "grad_norm": 1.4224049111949, "learning_rate": 6.892655367231639e-06, "loss": 1.2123, "step": 61 }, { "epoch": 0.010512929207291225, "grad_norm": 1.41522704459736, "learning_rate": 7.0056497175141246e-06, "loss": 1.1711, "step": 62 }, { "epoch": 0.010682492581602374, "grad_norm": 1.3791845008388997, "learning_rate": 7.1186440677966106e-06, "loss": 1.1764, "step": 63 }, { "epoch": 0.010852055955913523, "grad_norm": 1.4115609164022564, "learning_rate": 7.2316384180790965e-06, "loss": 1.1833, "step": 64 }, { "epoch": 0.011021619330224672, "grad_norm": 1.49155460296025, "learning_rate": 7.3446327683615825e-06, "loss": 1.2249, "step": 65 }, { "epoch": 0.01119118270453582, "grad_norm": 1.4550519804646345, "learning_rate": 7.4576271186440685e-06, "loss": 1.1723, "step": 66 }, { "epoch": 0.011360746078846969, "grad_norm": 1.3630110124605177, "learning_rate": 7.5706214689265545e-06, "loss": 1.1441, "step": 67 }, { "epoch": 0.011530309453158118, "grad_norm": 1.3421295802070998, "learning_rate": 7.68361581920904e-06, "loss": 1.1751, "step": 68 }, { "epoch": 0.011699872827469267, "grad_norm": 1.4571892679954532, "learning_rate": 7.796610169491526e-06, "loss": 1.179, "step": 69 }, { "epoch": 0.011869436201780416, "grad_norm": 1.5214067362659185, "learning_rate": 7.909604519774012e-06, "loss": 1.204, "step": 70 }, { "epoch": 0.012038999576091565, "grad_norm": 1.4484248517716496, "learning_rate": 8.022598870056498e-06, "loss": 1.1954, "step": 71 }, { "epoch": 0.012208562950402712, "grad_norm": 1.5414616013254128, "learning_rate": 8.135593220338983e-06, "loss": 1.1514, "step": 72 }, { "epoch": 0.012378126324713861, "grad_norm": 1.5454130785927713, "learning_rate": 8.248587570621469e-06, "loss": 1.1606, "step": 73 }, { "epoch": 0.01254768969902501, "grad_norm": 1.34697348323352, "learning_rate": 8.361581920903955e-06, "loss": 1.1995, "step": 74 }, { "epoch": 0.01271725307333616, "grad_norm": 1.3774590095095847, "learning_rate": 8.47457627118644e-06, "loss": 1.2001, "step": 75 }, { "epoch": 0.012886816447647309, "grad_norm": 1.3143471880877364, "learning_rate": 8.587570621468927e-06, "loss": 1.1454, "step": 76 }, { "epoch": 0.013056379821958458, "grad_norm": 1.375366668851842, "learning_rate": 8.700564971751413e-06, "loss": 1.1438, "step": 77 }, { "epoch": 0.013225943196269605, "grad_norm": 1.441944270436558, "learning_rate": 8.8135593220339e-06, "loss": 1.1993, "step": 78 }, { "epoch": 0.013395506570580754, "grad_norm": 1.2946879133856595, "learning_rate": 8.926553672316384e-06, "loss": 1.1533, "step": 79 }, { "epoch": 0.013565069944891903, "grad_norm": 1.4715500438889428, "learning_rate": 9.039548022598871e-06, "loss": 1.2228, "step": 80 }, { "epoch": 0.013734633319203052, "grad_norm": 1.3443740933148, "learning_rate": 9.152542372881356e-06, "loss": 1.1242, "step": 81 }, { "epoch": 0.013904196693514202, "grad_norm": 1.4263901354504167, "learning_rate": 9.265536723163843e-06, "loss": 1.1791, "step": 82 }, { "epoch": 0.014073760067825349, "grad_norm": 1.382684270849743, "learning_rate": 9.378531073446328e-06, "loss": 1.186, "step": 83 }, { "epoch": 0.014243323442136498, "grad_norm": 1.24687716463306, "learning_rate": 9.491525423728815e-06, "loss": 1.165, "step": 84 }, { "epoch": 0.014412886816447647, "grad_norm": 1.4046061864477641, "learning_rate": 9.6045197740113e-06, "loss": 1.1556, "step": 85 }, { "epoch": 0.014582450190758796, "grad_norm": 1.4201710759611585, "learning_rate": 9.717514124293787e-06, "loss": 1.2076, "step": 86 }, { "epoch": 0.014752013565069945, "grad_norm": 1.4052705160068086, "learning_rate": 9.830508474576272e-06, "loss": 1.1808, "step": 87 }, { "epoch": 0.014921576939381094, "grad_norm": 1.299029989336803, "learning_rate": 9.943502824858759e-06, "loss": 1.1539, "step": 88 }, { "epoch": 0.015091140313692242, "grad_norm": 1.4569947609058904, "learning_rate": 1.0056497175141244e-05, "loss": 1.1797, "step": 89 }, { "epoch": 0.01526070368800339, "grad_norm": 1.4143057551195373, "learning_rate": 1.016949152542373e-05, "loss": 1.1338, "step": 90 }, { "epoch": 0.01543026706231454, "grad_norm": 1.3602375190007165, "learning_rate": 1.0282485875706216e-05, "loss": 1.1706, "step": 91 }, { "epoch": 0.015599830436625689, "grad_norm": 1.5644532153484685, "learning_rate": 1.0395480225988701e-05, "loss": 1.2033, "step": 92 }, { "epoch": 0.015769393810936838, "grad_norm": 1.3546546367249224, "learning_rate": 1.0508474576271188e-05, "loss": 1.1561, "step": 93 }, { "epoch": 0.015938957185247987, "grad_norm": 1.3725561300593538, "learning_rate": 1.0621468926553673e-05, "loss": 1.1325, "step": 94 }, { "epoch": 0.016108520559559136, "grad_norm": 1.388499463375127, "learning_rate": 1.073446327683616e-05, "loss": 1.1288, "step": 95 }, { "epoch": 0.016278083933870285, "grad_norm": 1.4526522323723448, "learning_rate": 1.0847457627118645e-05, "loss": 1.1949, "step": 96 }, { "epoch": 0.016447647308181435, "grad_norm": 1.3638352643302796, "learning_rate": 1.096045197740113e-05, "loss": 1.1829, "step": 97 }, { "epoch": 0.01661721068249258, "grad_norm": 1.33892437598582, "learning_rate": 1.1073446327683617e-05, "loss": 1.1647, "step": 98 }, { "epoch": 0.01678677405680373, "grad_norm": 1.3625780186000709, "learning_rate": 1.1186440677966102e-05, "loss": 1.1446, "step": 99 }, { "epoch": 0.01695633743111488, "grad_norm": 1.4666896724472283, "learning_rate": 1.1299435028248589e-05, "loss": 1.1551, "step": 100 }, { "epoch": 0.017125900805426027, "grad_norm": 1.418947637976201, "learning_rate": 1.1412429378531074e-05, "loss": 1.1432, "step": 101 }, { "epoch": 0.017295464179737177, "grad_norm": 1.4582583979962045, "learning_rate": 1.1525423728813561e-05, "loss": 1.1372, "step": 102 }, { "epoch": 0.017465027554048326, "grad_norm": 1.5049126363000913, "learning_rate": 1.1638418079096046e-05, "loss": 1.1349, "step": 103 }, { "epoch": 0.017634590928359475, "grad_norm": 1.3732226340301854, "learning_rate": 1.1751412429378533e-05, "loss": 1.0922, "step": 104 }, { "epoch": 0.017804154302670624, "grad_norm": 1.3539434227531597, "learning_rate": 1.1864406779661018e-05, "loss": 1.1458, "step": 105 }, { "epoch": 0.017973717676981773, "grad_norm": 1.4197267694015603, "learning_rate": 1.1977401129943505e-05, "loss": 1.1086, "step": 106 }, { "epoch": 0.018143281051292922, "grad_norm": 1.438729038046718, "learning_rate": 1.209039548022599e-05, "loss": 1.0988, "step": 107 }, { "epoch": 0.01831284442560407, "grad_norm": 1.633758894428501, "learning_rate": 1.2203389830508477e-05, "loss": 1.1674, "step": 108 }, { "epoch": 0.018482407799915217, "grad_norm": 1.357554823293788, "learning_rate": 1.2316384180790962e-05, "loss": 1.1606, "step": 109 }, { "epoch": 0.018651971174226366, "grad_norm": 1.618537712329157, "learning_rate": 1.2429378531073449e-05, "loss": 1.1679, "step": 110 }, { "epoch": 0.018821534548537515, "grad_norm": 1.474783316466239, "learning_rate": 1.2542372881355932e-05, "loss": 1.1391, "step": 111 }, { "epoch": 0.018991097922848664, "grad_norm": 1.4025286069410725, "learning_rate": 1.265536723163842e-05, "loss": 1.1741, "step": 112 }, { "epoch": 0.019160661297159813, "grad_norm": 1.4202582137753894, "learning_rate": 1.2768361581920904e-05, "loss": 1.1488, "step": 113 }, { "epoch": 0.019330224671470962, "grad_norm": 1.3729347919531196, "learning_rate": 1.288135593220339e-05, "loss": 1.1339, "step": 114 }, { "epoch": 0.01949978804578211, "grad_norm": 1.4684487410646836, "learning_rate": 1.2994350282485876e-05, "loss": 1.1968, "step": 115 }, { "epoch": 0.01966935142009326, "grad_norm": 1.389502113861768, "learning_rate": 1.3107344632768361e-05, "loss": 1.1839, "step": 116 }, { "epoch": 0.01983891479440441, "grad_norm": 1.4360527100049258, "learning_rate": 1.3220338983050848e-05, "loss": 1.1367, "step": 117 }, { "epoch": 0.02000847816871556, "grad_norm": 1.418750605257581, "learning_rate": 1.3333333333333333e-05, "loss": 1.1746, "step": 118 }, { "epoch": 0.020178041543026708, "grad_norm": 1.549593645625235, "learning_rate": 1.344632768361582e-05, "loss": 1.2052, "step": 119 }, { "epoch": 0.020347604917337857, "grad_norm": 1.439169982956848, "learning_rate": 1.3559322033898305e-05, "loss": 1.1231, "step": 120 }, { "epoch": 0.020517168291649002, "grad_norm": 1.5599134557216332, "learning_rate": 1.3672316384180792e-05, "loss": 1.1337, "step": 121 }, { "epoch": 0.02068673166596015, "grad_norm": 1.3808742250679915, "learning_rate": 1.3785310734463277e-05, "loss": 1.1385, "step": 122 }, { "epoch": 0.0208562950402713, "grad_norm": 1.4567621392474348, "learning_rate": 1.3898305084745764e-05, "loss": 1.1281, "step": 123 }, { "epoch": 0.02102585841458245, "grad_norm": 1.4210796371692003, "learning_rate": 1.4011299435028249e-05, "loss": 1.1479, "step": 124 }, { "epoch": 0.0211954217888936, "grad_norm": 1.2766383464713438, "learning_rate": 1.4124293785310736e-05, "loss": 1.1319, "step": 125 }, { "epoch": 0.021364985163204748, "grad_norm": 1.4041737686093716, "learning_rate": 1.4237288135593221e-05, "loss": 1.1349, "step": 126 }, { "epoch": 0.021534548537515897, "grad_norm": 1.4698887894871435, "learning_rate": 1.4350282485875708e-05, "loss": 1.1594, "step": 127 }, { "epoch": 0.021704111911827046, "grad_norm": 1.2737750466404523, "learning_rate": 1.4463276836158193e-05, "loss": 1.0648, "step": 128 }, { "epoch": 0.021873675286138195, "grad_norm": 1.4613251955693345, "learning_rate": 1.4576271186440678e-05, "loss": 1.1833, "step": 129 }, { "epoch": 0.022043238660449344, "grad_norm": 1.358530679449396, "learning_rate": 1.4689265536723165e-05, "loss": 1.0942, "step": 130 }, { "epoch": 0.022212802034760493, "grad_norm": 1.4911629429567874, "learning_rate": 1.480225988700565e-05, "loss": 1.1287, "step": 131 }, { "epoch": 0.02238236540907164, "grad_norm": 1.3954468734206023, "learning_rate": 1.4915254237288137e-05, "loss": 1.1282, "step": 132 }, { "epoch": 0.022551928783382788, "grad_norm": 1.362654148285321, "learning_rate": 1.5028248587570622e-05, "loss": 1.1385, "step": 133 }, { "epoch": 0.022721492157693937, "grad_norm": 1.3510183744777187, "learning_rate": 1.5141242937853109e-05, "loss": 1.1401, "step": 134 }, { "epoch": 0.022891055532005086, "grad_norm": 1.3443832259921455, "learning_rate": 1.5254237288135594e-05, "loss": 1.1301, "step": 135 }, { "epoch": 0.023060618906316235, "grad_norm": 1.4729900026203113, "learning_rate": 1.536723163841808e-05, "loss": 1.1572, "step": 136 }, { "epoch": 0.023230182280627384, "grad_norm": 1.5192378566670621, "learning_rate": 1.5480225988700566e-05, "loss": 1.1121, "step": 137 }, { "epoch": 0.023399745654938534, "grad_norm": 1.3371938427826509, "learning_rate": 1.5593220338983053e-05, "loss": 1.1023, "step": 138 }, { "epoch": 0.023569309029249683, "grad_norm": 1.3506912312259471, "learning_rate": 1.5706214689265536e-05, "loss": 1.1164, "step": 139 }, { "epoch": 0.02373887240356083, "grad_norm": 1.4136938287610317, "learning_rate": 1.5819209039548023e-05, "loss": 1.097, "step": 140 }, { "epoch": 0.02390843577787198, "grad_norm": 1.488625382024975, "learning_rate": 1.593220338983051e-05, "loss": 1.145, "step": 141 }, { "epoch": 0.02407799915218313, "grad_norm": 1.3004109068927645, "learning_rate": 1.6045197740112997e-05, "loss": 1.1451, "step": 142 }, { "epoch": 0.024247562526494276, "grad_norm": 1.576383048933645, "learning_rate": 1.615819209039548e-05, "loss": 1.1522, "step": 143 }, { "epoch": 0.024417125900805425, "grad_norm": 1.5103087194461904, "learning_rate": 1.6271186440677967e-05, "loss": 1.0823, "step": 144 }, { "epoch": 0.024586689275116574, "grad_norm": 1.3086214235359925, "learning_rate": 1.6384180790960454e-05, "loss": 1.0841, "step": 145 }, { "epoch": 0.024756252649427723, "grad_norm": 1.5049556210751096, "learning_rate": 1.6497175141242937e-05, "loss": 1.1514, "step": 146 }, { "epoch": 0.024925816023738872, "grad_norm": 1.5829729386310978, "learning_rate": 1.6610169491525424e-05, "loss": 1.1277, "step": 147 }, { "epoch": 0.02509537939805002, "grad_norm": 1.308951750399878, "learning_rate": 1.672316384180791e-05, "loss": 1.0862, "step": 148 }, { "epoch": 0.02526494277236117, "grad_norm": 1.431271306611669, "learning_rate": 1.6836158192090398e-05, "loss": 1.1225, "step": 149 }, { "epoch": 0.02543450614667232, "grad_norm": 1.43200498985713, "learning_rate": 1.694915254237288e-05, "loss": 1.1142, "step": 150 }, { "epoch": 0.02560406952098347, "grad_norm": 1.3977923715266523, "learning_rate": 1.7062146892655368e-05, "loss": 1.0992, "step": 151 }, { "epoch": 0.025773632895294617, "grad_norm": 1.316473925950156, "learning_rate": 1.7175141242937855e-05, "loss": 1.1106, "step": 152 }, { "epoch": 0.025943196269605766, "grad_norm": 1.2750052619424626, "learning_rate": 1.728813559322034e-05, "loss": 1.0955, "step": 153 }, { "epoch": 0.026112759643916916, "grad_norm": 1.355255064360889, "learning_rate": 1.7401129943502825e-05, "loss": 1.0781, "step": 154 }, { "epoch": 0.02628232301822806, "grad_norm": 1.5199288561924749, "learning_rate": 1.7514124293785312e-05, "loss": 1.1173, "step": 155 }, { "epoch": 0.02645188639253921, "grad_norm": 1.5301032064436257, "learning_rate": 1.76271186440678e-05, "loss": 1.1632, "step": 156 }, { "epoch": 0.02662144976685036, "grad_norm": 1.351533994824582, "learning_rate": 1.7740112994350286e-05, "loss": 1.1359, "step": 157 }, { "epoch": 0.02679101314116151, "grad_norm": 1.334637742315722, "learning_rate": 1.785310734463277e-05, "loss": 1.1421, "step": 158 }, { "epoch": 0.026960576515472658, "grad_norm": 1.5032188427159412, "learning_rate": 1.7966101694915256e-05, "loss": 1.0946, "step": 159 }, { "epoch": 0.027130139889783807, "grad_norm": 1.4557110103401063, "learning_rate": 1.8079096045197743e-05, "loss": 1.1207, "step": 160 }, { "epoch": 0.027299703264094956, "grad_norm": 1.3679392208202088, "learning_rate": 1.8192090395480226e-05, "loss": 1.0915, "step": 161 }, { "epoch": 0.027469266638406105, "grad_norm": 1.3152214063562153, "learning_rate": 1.8305084745762713e-05, "loss": 1.1025, "step": 162 }, { "epoch": 0.027638830012717254, "grad_norm": 1.4785040033145542, "learning_rate": 1.84180790960452e-05, "loss": 1.1513, "step": 163 }, { "epoch": 0.027808393387028403, "grad_norm": 1.6818977355061948, "learning_rate": 1.8531073446327686e-05, "loss": 1.1426, "step": 164 }, { "epoch": 0.027977956761339552, "grad_norm": 1.2546223970138515, "learning_rate": 1.864406779661017e-05, "loss": 1.0774, "step": 165 }, { "epoch": 0.028147520135650698, "grad_norm": 1.3944074900148387, "learning_rate": 1.8757062146892657e-05, "loss": 1.1694, "step": 166 }, { "epoch": 0.028317083509961847, "grad_norm": 1.3946877861404816, "learning_rate": 1.8870056497175144e-05, "loss": 1.1154, "step": 167 }, { "epoch": 0.028486646884272996, "grad_norm": 1.4421720578692507, "learning_rate": 1.898305084745763e-05, "loss": 1.1094, "step": 168 }, { "epoch": 0.028656210258584145, "grad_norm": 1.4552809730187664, "learning_rate": 1.9096045197740114e-05, "loss": 1.1056, "step": 169 }, { "epoch": 0.028825773632895294, "grad_norm": 1.379880202176955, "learning_rate": 1.92090395480226e-05, "loss": 1.1584, "step": 170 }, { "epoch": 0.028995337007206443, "grad_norm": 1.363145032946682, "learning_rate": 1.9322033898305087e-05, "loss": 1.1336, "step": 171 }, { "epoch": 0.029164900381517592, "grad_norm": 1.6235858187409438, "learning_rate": 1.9435028248587574e-05, "loss": 1.1535, "step": 172 }, { "epoch": 0.02933446375582874, "grad_norm": 1.3155078338219928, "learning_rate": 1.9548022598870058e-05, "loss": 1.1423, "step": 173 }, { "epoch": 0.02950402713013989, "grad_norm": 1.296301744703282, "learning_rate": 1.9661016949152545e-05, "loss": 1.1682, "step": 174 }, { "epoch": 0.02967359050445104, "grad_norm": 1.263899877948669, "learning_rate": 1.977401129943503e-05, "loss": 1.1183, "step": 175 }, { "epoch": 0.02984315387876219, "grad_norm": 1.3585075935665827, "learning_rate": 1.9887005649717518e-05, "loss": 1.1356, "step": 176 }, { "epoch": 0.030012717253073338, "grad_norm": 1.311067313289672, "learning_rate": 2e-05, "loss": 1.1398, "step": 177 }, { "epoch": 0.030182280627384483, "grad_norm": 1.290727195525984, "learning_rate": 1.9999998491734904e-05, "loss": 1.0964, "step": 178 }, { "epoch": 0.030351844001695633, "grad_norm": 1.3036669420154081, "learning_rate": 1.9999993966940065e-05, "loss": 1.121, "step": 179 }, { "epoch": 0.03052140737600678, "grad_norm": 1.3479945804203648, "learning_rate": 1.9999986425616854e-05, "loss": 1.1593, "step": 180 }, { "epoch": 0.03069097075031793, "grad_norm": 1.3390700969940623, "learning_rate": 1.999997586776754e-05, "loss": 1.1012, "step": 181 }, { "epoch": 0.03086053412462908, "grad_norm": 1.3195809160200416, "learning_rate": 1.9999962293395314e-05, "loss": 1.1259, "step": 182 }, { "epoch": 0.03103009749894023, "grad_norm": 1.4471383785189624, "learning_rate": 1.9999945702504266e-05, "loss": 1.1562, "step": 183 }, { "epoch": 0.031199660873251378, "grad_norm": 1.2950706341674452, "learning_rate": 1.99999260950994e-05, "loss": 1.1021, "step": 184 }, { "epoch": 0.03136922424756253, "grad_norm": 1.4357046922256813, "learning_rate": 1.9999903471186634e-05, "loss": 1.1355, "step": 185 }, { "epoch": 0.031538787621873676, "grad_norm": 1.4081553660606614, "learning_rate": 1.9999877830772793e-05, "loss": 1.125, "step": 186 }, { "epoch": 0.031708350996184825, "grad_norm": 1.4040886421642182, "learning_rate": 1.9999849173865607e-05, "loss": 1.1268, "step": 187 }, { "epoch": 0.031877914370495974, "grad_norm": 1.5596826277587976, "learning_rate": 1.9999817500473724e-05, "loss": 1.1582, "step": 188 }, { "epoch": 0.032047477744807124, "grad_norm": 1.4462905902458718, "learning_rate": 1.9999782810606697e-05, "loss": 1.1256, "step": 189 }, { "epoch": 0.03221704111911827, "grad_norm": 1.3473764607427723, "learning_rate": 1.9999745104274995e-05, "loss": 1.1082, "step": 190 }, { "epoch": 0.03238660449342942, "grad_norm": 1.3307164474361097, "learning_rate": 1.9999704381489984e-05, "loss": 1.0869, "step": 191 }, { "epoch": 0.03255616786774057, "grad_norm": 1.45061392035038, "learning_rate": 1.999966064226395e-05, "loss": 1.1251, "step": 192 }, { "epoch": 0.03272573124205172, "grad_norm": 1.3321117845551607, "learning_rate": 1.9999613886610097e-05, "loss": 1.117, "step": 193 }, { "epoch": 0.03289529461636287, "grad_norm": 1.2742633797714435, "learning_rate": 1.9999564114542516e-05, "loss": 1.1095, "step": 194 }, { "epoch": 0.03306485799067401, "grad_norm": 1.3759506593588158, "learning_rate": 1.9999511326076227e-05, "loss": 1.1472, "step": 195 }, { "epoch": 0.03323442136498516, "grad_norm": 1.291439057363082, "learning_rate": 1.9999455521227153e-05, "loss": 1.0904, "step": 196 }, { "epoch": 0.03340398473929631, "grad_norm": 1.3809527670799788, "learning_rate": 1.9999396700012127e-05, "loss": 1.1363, "step": 197 }, { "epoch": 0.03357354811360746, "grad_norm": 1.2995526218260776, "learning_rate": 1.9999334862448896e-05, "loss": 1.1545, "step": 198 }, { "epoch": 0.03374311148791861, "grad_norm": 1.2478798571680139, "learning_rate": 1.9999270008556108e-05, "loss": 1.1001, "step": 199 }, { "epoch": 0.03391267486222976, "grad_norm": 1.2470480556423995, "learning_rate": 1.999920213835333e-05, "loss": 1.106, "step": 200 }, { "epoch": 0.034082238236540906, "grad_norm": 1.2102661616659502, "learning_rate": 1.9999131251861037e-05, "loss": 1.1276, "step": 201 }, { "epoch": 0.034251801610852055, "grad_norm": 1.2849980135270684, "learning_rate": 1.9999057349100606e-05, "loss": 1.087, "step": 202 }, { "epoch": 0.034421364985163204, "grad_norm": 1.2262571763597132, "learning_rate": 1.9998980430094333e-05, "loss": 1.123, "step": 203 }, { "epoch": 0.03459092835947435, "grad_norm": 1.3759934172355304, "learning_rate": 1.9998900494865426e-05, "loss": 1.1903, "step": 204 }, { "epoch": 0.0347604917337855, "grad_norm": 1.2906012627920067, "learning_rate": 1.999881754343799e-05, "loss": 1.0835, "step": 205 }, { "epoch": 0.03493005510809665, "grad_norm": 1.2864361841867833, "learning_rate": 1.999873157583705e-05, "loss": 1.1233, "step": 206 }, { "epoch": 0.0350996184824078, "grad_norm": 1.422919504276044, "learning_rate": 1.9998642592088543e-05, "loss": 1.1504, "step": 207 }, { "epoch": 0.03526918185671895, "grad_norm": 1.3010651375525106, "learning_rate": 1.9998550592219303e-05, "loss": 1.103, "step": 208 }, { "epoch": 0.0354387452310301, "grad_norm": 1.2773819298360978, "learning_rate": 1.999845557625709e-05, "loss": 1.1236, "step": 209 }, { "epoch": 0.03560830860534125, "grad_norm": 1.3252117783869803, "learning_rate": 1.9998357544230558e-05, "loss": 1.0974, "step": 210 }, { "epoch": 0.0357778719796524, "grad_norm": 1.3167233220674777, "learning_rate": 1.9998256496169282e-05, "loss": 1.1693, "step": 211 }, { "epoch": 0.035947435353963546, "grad_norm": 1.303191899641572, "learning_rate": 1.999815243210375e-05, "loss": 1.1376, "step": 212 }, { "epoch": 0.036116998728274695, "grad_norm": 1.1944527257891109, "learning_rate": 1.9998045352065342e-05, "loss": 1.0969, "step": 213 }, { "epoch": 0.036286562102585844, "grad_norm": 1.3268931468824336, "learning_rate": 1.9997935256086367e-05, "loss": 1.0888, "step": 214 }, { "epoch": 0.03645612547689699, "grad_norm": 1.3839432423572338, "learning_rate": 1.9997822144200035e-05, "loss": 1.1582, "step": 215 }, { "epoch": 0.03662568885120814, "grad_norm": 1.258912414155463, "learning_rate": 1.9997706016440462e-05, "loss": 1.118, "step": 216 }, { "epoch": 0.03679525222551929, "grad_norm": 1.228358322020127, "learning_rate": 1.9997586872842683e-05, "loss": 1.1222, "step": 217 }, { "epoch": 0.03696481559983043, "grad_norm": 1.25841793688745, "learning_rate": 1.9997464713442632e-05, "loss": 1.0823, "step": 218 }, { "epoch": 0.03713437897414158, "grad_norm": 1.3041590051483232, "learning_rate": 1.999733953827717e-05, "loss": 1.0758, "step": 219 }, { "epoch": 0.03730394234845273, "grad_norm": 1.2505716257886845, "learning_rate": 1.9997211347384043e-05, "loss": 1.0731, "step": 220 }, { "epoch": 0.03747350572276388, "grad_norm": 1.3359828294697025, "learning_rate": 1.9997080140801932e-05, "loss": 1.1173, "step": 221 }, { "epoch": 0.03764306909707503, "grad_norm": 1.3192749701037614, "learning_rate": 1.9996945918570407e-05, "loss": 1.0975, "step": 222 }, { "epoch": 0.03781263247138618, "grad_norm": 1.2953997261408543, "learning_rate": 1.999680868072996e-05, "loss": 1.0848, "step": 223 }, { "epoch": 0.03798219584569733, "grad_norm": 1.348437522113642, "learning_rate": 1.999666842732199e-05, "loss": 1.1355, "step": 224 }, { "epoch": 0.03815175922000848, "grad_norm": 1.2739818156470117, "learning_rate": 1.9996525158388804e-05, "loss": 1.1777, "step": 225 }, { "epoch": 0.038321322594319626, "grad_norm": 1.2926217899190098, "learning_rate": 1.999637887397362e-05, "loss": 1.0542, "step": 226 }, { "epoch": 0.038490885968630775, "grad_norm": 1.2651121705688904, "learning_rate": 1.9996229574120564e-05, "loss": 1.1221, "step": 227 }, { "epoch": 0.038660449342941924, "grad_norm": 1.3181402213368112, "learning_rate": 1.9996077258874672e-05, "loss": 1.139, "step": 228 }, { "epoch": 0.03883001271725307, "grad_norm": 1.2378616508446028, "learning_rate": 1.9995921928281893e-05, "loss": 1.1037, "step": 229 }, { "epoch": 0.03899957609156422, "grad_norm": 1.2611317412647842, "learning_rate": 1.999576358238908e-05, "loss": 1.0932, "step": 230 }, { "epoch": 0.03916913946587537, "grad_norm": 1.193033326336458, "learning_rate": 1.9995602221244007e-05, "loss": 1.1087, "step": 231 }, { "epoch": 0.03933870284018652, "grad_norm": 1.2303651899327985, "learning_rate": 1.9995437844895337e-05, "loss": 1.0697, "step": 232 }, { "epoch": 0.03950826621449767, "grad_norm": 1.3653670662742932, "learning_rate": 1.999527045339266e-05, "loss": 1.0869, "step": 233 }, { "epoch": 0.03967782958880882, "grad_norm": 1.32989695880418, "learning_rate": 1.999510004678647e-05, "loss": 1.1396, "step": 234 }, { "epoch": 0.03984739296311997, "grad_norm": 1.3648126580032514, "learning_rate": 1.999492662512817e-05, "loss": 1.1402, "step": 235 }, { "epoch": 0.04001695633743112, "grad_norm": 1.3643161893927116, "learning_rate": 1.9994750188470076e-05, "loss": 1.1381, "step": 236 }, { "epoch": 0.040186519711742266, "grad_norm": 1.3115065822462593, "learning_rate": 1.9994570736865407e-05, "loss": 1.1496, "step": 237 }, { "epoch": 0.040356083086053415, "grad_norm": 1.293568914755473, "learning_rate": 1.99943882703683e-05, "loss": 1.1124, "step": 238 }, { "epoch": 0.040525646460364564, "grad_norm": 1.3105142284743228, "learning_rate": 1.9994202789033787e-05, "loss": 1.144, "step": 239 }, { "epoch": 0.040695209834675714, "grad_norm": 1.3437109322210967, "learning_rate": 1.999401429291783e-05, "loss": 1.1265, "step": 240 }, { "epoch": 0.040864773208986856, "grad_norm": 1.2110366779560244, "learning_rate": 1.9993822782077282e-05, "loss": 1.1233, "step": 241 }, { "epoch": 0.041034336583298005, "grad_norm": 1.2498208989585575, "learning_rate": 1.999362825656992e-05, "loss": 1.1317, "step": 242 }, { "epoch": 0.041203899957609154, "grad_norm": 1.2945037951769887, "learning_rate": 1.9993430716454415e-05, "loss": 1.074, "step": 243 }, { "epoch": 0.0413734633319203, "grad_norm": 1.2917078153507724, "learning_rate": 1.999323016179036e-05, "loss": 1.1051, "step": 244 }, { "epoch": 0.04154302670623145, "grad_norm": 1.3256353982242184, "learning_rate": 1.999302659263825e-05, "loss": 1.1196, "step": 245 }, { "epoch": 0.0417125900805426, "grad_norm": 1.3042273541322487, "learning_rate": 1.9992820009059496e-05, "loss": 1.0862, "step": 246 }, { "epoch": 0.04188215345485375, "grad_norm": 1.232714465260533, "learning_rate": 1.9992610411116416e-05, "loss": 1.1242, "step": 247 }, { "epoch": 0.0420517168291649, "grad_norm": 1.274140585702574, "learning_rate": 1.9992397798872233e-05, "loss": 1.0831, "step": 248 }, { "epoch": 0.04222128020347605, "grad_norm": 1.3210616986271977, "learning_rate": 1.999218217239108e-05, "loss": 1.1094, "step": 249 }, { "epoch": 0.0423908435777872, "grad_norm": 1.318150501739465, "learning_rate": 1.9991963531738e-05, "loss": 1.1192, "step": 250 }, { "epoch": 0.04256040695209835, "grad_norm": 1.3431518541447707, "learning_rate": 1.9991741876978953e-05, "loss": 1.1231, "step": 251 }, { "epoch": 0.042729970326409496, "grad_norm": 1.2834723290563341, "learning_rate": 1.99915172081808e-05, "loss": 1.1467, "step": 252 }, { "epoch": 0.042899533700720645, "grad_norm": 1.233197047980099, "learning_rate": 1.9991289525411308e-05, "loss": 1.0816, "step": 253 }, { "epoch": 0.043069097075031794, "grad_norm": 1.4200243646148094, "learning_rate": 1.9991058828739164e-05, "loss": 1.1204, "step": 254 }, { "epoch": 0.04323866044934294, "grad_norm": 1.3172495315828985, "learning_rate": 1.9990825118233958e-05, "loss": 1.0725, "step": 255 }, { "epoch": 0.04340822382365409, "grad_norm": 1.3090225638033592, "learning_rate": 1.999058839396619e-05, "loss": 1.1622, "step": 256 }, { "epoch": 0.04357778719796524, "grad_norm": 1.2377323447279194, "learning_rate": 1.999034865600726e-05, "loss": 1.075, "step": 257 }, { "epoch": 0.04374735057227639, "grad_norm": 1.4027732073731827, "learning_rate": 1.9990105904429496e-05, "loss": 1.0977, "step": 258 }, { "epoch": 0.04391691394658754, "grad_norm": 1.2538009108391588, "learning_rate": 1.9989860139306122e-05, "loss": 1.1074, "step": 259 }, { "epoch": 0.04408647732089869, "grad_norm": 1.2678297263590095, "learning_rate": 1.998961136071127e-05, "loss": 1.1103, "step": 260 }, { "epoch": 0.04425604069520984, "grad_norm": 1.33640403897779, "learning_rate": 1.9989359568719988e-05, "loss": 1.0847, "step": 261 }, { "epoch": 0.04442560406952099, "grad_norm": 1.2378980641503972, "learning_rate": 1.998910476340823e-05, "loss": 1.1212, "step": 262 }, { "epoch": 0.04459516744383213, "grad_norm": 1.3400055287563686, "learning_rate": 1.998884694485286e-05, "loss": 1.1497, "step": 263 }, { "epoch": 0.04476473081814328, "grad_norm": 1.2067041306244417, "learning_rate": 1.9988586113131644e-05, "loss": 1.0468, "step": 264 }, { "epoch": 0.04493429419245443, "grad_norm": 1.2324082792925706, "learning_rate": 1.998832226832327e-05, "loss": 1.1453, "step": 265 }, { "epoch": 0.045103857566765576, "grad_norm": 1.4597384904677964, "learning_rate": 1.9988055410507318e-05, "loss": 0.9068, "step": 266 }, { "epoch": 0.045273420941076725, "grad_norm": 1.2517477406537323, "learning_rate": 1.99877855397643e-05, "loss": 1.0359, "step": 267 }, { "epoch": 0.045442984315387874, "grad_norm": 1.4215378641837717, "learning_rate": 1.9987512656175612e-05, "loss": 1.1356, "step": 268 }, { "epoch": 0.04561254768969902, "grad_norm": 1.2567936902932118, "learning_rate": 1.9987236759823573e-05, "loss": 1.1055, "step": 269 }, { "epoch": 0.04578211106401017, "grad_norm": 1.351280629298554, "learning_rate": 1.998695785079141e-05, "loss": 1.1201, "step": 270 }, { "epoch": 0.04595167443832132, "grad_norm": 0.919807199887456, "learning_rate": 1.998667592916326e-05, "loss": 0.9956, "step": 271 }, { "epoch": 0.04612123781263247, "grad_norm": 1.4444332019381603, "learning_rate": 1.9986390995024157e-05, "loss": 1.0809, "step": 272 }, { "epoch": 0.04629080118694362, "grad_norm": 0.7227797429961299, "learning_rate": 1.9986103048460056e-05, "loss": 0.8795, "step": 273 }, { "epoch": 0.04646036456125477, "grad_norm": 1.3839871147489478, "learning_rate": 1.998581208955782e-05, "loss": 1.0353, "step": 274 }, { "epoch": 0.04662992793556592, "grad_norm": 1.3734487260979455, "learning_rate": 1.998551811840521e-05, "loss": 1.1141, "step": 275 }, { "epoch": 0.04679949130987707, "grad_norm": 1.4209810351806924, "learning_rate": 1.9985221135090917e-05, "loss": 1.1568, "step": 276 }, { "epoch": 0.046969054684188216, "grad_norm": 1.298446091150691, "learning_rate": 1.998492113970451e-05, "loss": 1.0929, "step": 277 }, { "epoch": 0.047138618058499365, "grad_norm": 1.3256210754654363, "learning_rate": 1.99846181323365e-05, "loss": 1.0858, "step": 278 }, { "epoch": 0.047308181432810514, "grad_norm": 0.8731203119877494, "learning_rate": 1.9984312113078276e-05, "loss": 0.8564, "step": 279 }, { "epoch": 0.04747774480712166, "grad_norm": 1.5216913951133884, "learning_rate": 1.9984003082022153e-05, "loss": 1.1074, "step": 280 }, { "epoch": 0.04764730818143281, "grad_norm": 1.4487908926953819, "learning_rate": 1.9983691039261358e-05, "loss": 1.1528, "step": 281 }, { "epoch": 0.04781687155574396, "grad_norm": 1.3396040211574347, "learning_rate": 1.9983375984890013e-05, "loss": 1.0791, "step": 282 }, { "epoch": 0.04798643493005511, "grad_norm": 1.3752837460679006, "learning_rate": 1.9983057919003162e-05, "loss": 1.0966, "step": 283 }, { "epoch": 0.04815599830436626, "grad_norm": 1.4920780971869654, "learning_rate": 1.998273684169674e-05, "loss": 1.1034, "step": 284 }, { "epoch": 0.04832556167867741, "grad_norm": 1.453554721673209, "learning_rate": 1.998241275306761e-05, "loss": 1.1418, "step": 285 }, { "epoch": 0.04849512505298855, "grad_norm": 1.399795858996511, "learning_rate": 1.9982085653213535e-05, "loss": 1.0706, "step": 286 }, { "epoch": 0.0486646884272997, "grad_norm": 1.2899224919414538, "learning_rate": 1.9981755542233175e-05, "loss": 1.0845, "step": 287 }, { "epoch": 0.04883425180161085, "grad_norm": 1.4158557727233918, "learning_rate": 1.9981422420226118e-05, "loss": 1.1067, "step": 288 }, { "epoch": 0.049003815175922, "grad_norm": 1.2799294452344585, "learning_rate": 1.9981086287292853e-05, "loss": 1.0695, "step": 289 }, { "epoch": 0.04917337855023315, "grad_norm": 1.301157748670505, "learning_rate": 1.998074714353477e-05, "loss": 1.1089, "step": 290 }, { "epoch": 0.0493429419245443, "grad_norm": 1.448619349235768, "learning_rate": 1.9980404989054172e-05, "loss": 1.1176, "step": 291 }, { "epoch": 0.049512505298855446, "grad_norm": 1.1721849740370327, "learning_rate": 1.9980059823954274e-05, "loss": 1.1404, "step": 292 }, { "epoch": 0.049682068673166595, "grad_norm": 1.298641860447234, "learning_rate": 1.9979711648339192e-05, "loss": 1.1077, "step": 293 }, { "epoch": 0.049851632047477744, "grad_norm": 1.4437782843668467, "learning_rate": 1.9979360462313965e-05, "loss": 1.1086, "step": 294 }, { "epoch": 0.05002119542178889, "grad_norm": 1.389790720831879, "learning_rate": 1.9979006265984516e-05, "loss": 1.107, "step": 295 }, { "epoch": 0.05019075879610004, "grad_norm": 1.1748746125180785, "learning_rate": 1.99786490594577e-05, "loss": 1.0906, "step": 296 }, { "epoch": 0.05036032217041119, "grad_norm": 1.2299969821770056, "learning_rate": 1.997828884284126e-05, "loss": 1.1117, "step": 297 }, { "epoch": 0.05052988554472234, "grad_norm": 1.5034250884078735, "learning_rate": 1.9977925616243865e-05, "loss": 1.131, "step": 298 }, { "epoch": 0.05069944891903349, "grad_norm": 1.2757184845012663, "learning_rate": 1.9977559379775077e-05, "loss": 1.1091, "step": 299 }, { "epoch": 0.05086901229334464, "grad_norm": 1.1822834963848412, "learning_rate": 1.9977190133545378e-05, "loss": 1.0557, "step": 300 }, { "epoch": 0.05103857566765579, "grad_norm": 1.208059478563468, "learning_rate": 1.9976817877666143e-05, "loss": 1.0831, "step": 301 }, { "epoch": 0.05120813904196694, "grad_norm": 1.324640310424657, "learning_rate": 1.9976442612249677e-05, "loss": 1.0916, "step": 302 }, { "epoch": 0.051377702416278086, "grad_norm": 1.308192313784638, "learning_rate": 1.997606433740917e-05, "loss": 1.1033, "step": 303 }, { "epoch": 0.051547265790589235, "grad_norm": 1.2608508888907461, "learning_rate": 1.9975683053258734e-05, "loss": 1.1248, "step": 304 }, { "epoch": 0.051716829164900384, "grad_norm": 1.2454374853169659, "learning_rate": 1.9975298759913382e-05, "loss": 1.158, "step": 305 }, { "epoch": 0.05188639253921153, "grad_norm": 1.2495908625588392, "learning_rate": 1.9974911457489038e-05, "loss": 1.1099, "step": 306 }, { "epoch": 0.05205595591352268, "grad_norm": 1.2045956229853965, "learning_rate": 1.9974521146102535e-05, "loss": 1.0886, "step": 307 }, { "epoch": 0.05222551928783383, "grad_norm": 1.2316926551167495, "learning_rate": 1.997412782587161e-05, "loss": 1.1211, "step": 308 }, { "epoch": 0.05239508266214497, "grad_norm": 1.239607517766241, "learning_rate": 1.9973731496914914e-05, "loss": 1.0752, "step": 309 }, { "epoch": 0.05256464603645612, "grad_norm": 1.1464479852440976, "learning_rate": 1.997333215935199e-05, "loss": 1.0865, "step": 310 }, { "epoch": 0.05273420941076727, "grad_norm": 1.3018269907301956, "learning_rate": 1.997292981330331e-05, "loss": 1.1136, "step": 311 }, { "epoch": 0.05290377278507842, "grad_norm": 1.1685461379112567, "learning_rate": 1.9972524458890238e-05, "loss": 1.0699, "step": 312 }, { "epoch": 0.05307333615938957, "grad_norm": 1.3125531381497202, "learning_rate": 1.9972116096235047e-05, "loss": 1.1312, "step": 313 }, { "epoch": 0.05324289953370072, "grad_norm": 1.226818289966763, "learning_rate": 1.997170472546093e-05, "loss": 1.0853, "step": 314 }, { "epoch": 0.05341246290801187, "grad_norm": 1.2230916127999136, "learning_rate": 1.997129034669197e-05, "loss": 1.1326, "step": 315 }, { "epoch": 0.05358202628232302, "grad_norm": 1.3512152142512985, "learning_rate": 1.997087296005317e-05, "loss": 1.1461, "step": 316 }, { "epoch": 0.053751589656634166, "grad_norm": 1.1440299956887134, "learning_rate": 1.9970452565670432e-05, "loss": 1.0678, "step": 317 }, { "epoch": 0.053921153030945315, "grad_norm": 1.2169658630028495, "learning_rate": 1.9970029163670573e-05, "loss": 1.1072, "step": 318 }, { "epoch": 0.054090716405256464, "grad_norm": 1.3080151660027735, "learning_rate": 1.9969602754181316e-05, "loss": 1.1072, "step": 319 }, { "epoch": 0.05426027977956761, "grad_norm": 1.1625388269821455, "learning_rate": 1.9969173337331283e-05, "loss": 1.0435, "step": 320 }, { "epoch": 0.05442984315387876, "grad_norm": 1.3244220672285003, "learning_rate": 1.996874091325001e-05, "loss": 1.1208, "step": 321 }, { "epoch": 0.05459940652818991, "grad_norm": 1.3333021747321667, "learning_rate": 1.996830548206794e-05, "loss": 1.1382, "step": 322 }, { "epoch": 0.05476896990250106, "grad_norm": 0.8435225189212632, "learning_rate": 1.996786704391642e-05, "loss": 0.7844, "step": 323 }, { "epoch": 0.05493853327681221, "grad_norm": 1.393060292107635, "learning_rate": 1.9967425598927713e-05, "loss": 1.1377, "step": 324 }, { "epoch": 0.05510809665112336, "grad_norm": 1.2256625255974176, "learning_rate": 1.9966981147234975e-05, "loss": 1.0511, "step": 325 }, { "epoch": 0.05527766002543451, "grad_norm": 1.2184046483945477, "learning_rate": 1.996653368897228e-05, "loss": 1.1291, "step": 326 }, { "epoch": 0.05544722339974566, "grad_norm": 1.2851047625914724, "learning_rate": 1.99660832242746e-05, "loss": 1.0623, "step": 327 }, { "epoch": 0.055616786774056806, "grad_norm": 1.185571974883055, "learning_rate": 1.9965629753277825e-05, "loss": 1.1082, "step": 328 }, { "epoch": 0.055786350148367955, "grad_norm": 1.1915535921078764, "learning_rate": 1.9965173276118747e-05, "loss": 1.1088, "step": 329 }, { "epoch": 0.055955913522679104, "grad_norm": 1.269068710320991, "learning_rate": 1.9964713792935055e-05, "loss": 1.0913, "step": 330 }, { "epoch": 0.05612547689699025, "grad_norm": 1.2993242144559132, "learning_rate": 1.9964251303865362e-05, "loss": 1.1576, "step": 331 }, { "epoch": 0.056295040271301396, "grad_norm": 1.2078455062910056, "learning_rate": 1.9963785809049177e-05, "loss": 1.0755, "step": 332 }, { "epoch": 0.056464603645612545, "grad_norm": 1.1571021624315911, "learning_rate": 1.9963317308626916e-05, "loss": 1.0831, "step": 333 }, { "epoch": 0.056634167019923694, "grad_norm": 1.1504441341898868, "learning_rate": 1.9962845802739905e-05, "loss": 1.0871, "step": 334 }, { "epoch": 0.05680373039423484, "grad_norm": 1.2051311917349259, "learning_rate": 1.9962371291530375e-05, "loss": 1.1117, "step": 335 }, { "epoch": 0.05697329376854599, "grad_norm": 1.2509410873161557, "learning_rate": 1.996189377514146e-05, "loss": 1.0727, "step": 336 }, { "epoch": 0.05714285714285714, "grad_norm": 1.2245819111691587, "learning_rate": 1.9961413253717214e-05, "loss": 1.1031, "step": 337 }, { "epoch": 0.05731242051716829, "grad_norm": 1.184736822543079, "learning_rate": 1.9960929727402578e-05, "loss": 1.0962, "step": 338 }, { "epoch": 0.05748198389147944, "grad_norm": 1.2594965296566927, "learning_rate": 1.9960443196343413e-05, "loss": 1.0875, "step": 339 }, { "epoch": 0.05765154726579059, "grad_norm": 1.203417472683344, "learning_rate": 1.995995366068648e-05, "loss": 1.0666, "step": 340 }, { "epoch": 0.05782111064010174, "grad_norm": 1.1526596254622905, "learning_rate": 1.9959461120579457e-05, "loss": 1.0517, "step": 341 }, { "epoch": 0.05799067401441289, "grad_norm": 1.2189027654989488, "learning_rate": 1.995896557617091e-05, "loss": 1.1128, "step": 342 }, { "epoch": 0.058160237388724036, "grad_norm": 1.2449802055381771, "learning_rate": 1.9958467027610328e-05, "loss": 1.09, "step": 343 }, { "epoch": 0.058329800763035185, "grad_norm": 1.2372792210636403, "learning_rate": 1.9957965475048097e-05, "loss": 1.1435, "step": 344 }, { "epoch": 0.058499364137346334, "grad_norm": 1.159603118192446, "learning_rate": 1.9957460918635513e-05, "loss": 1.0912, "step": 345 }, { "epoch": 0.05866892751165748, "grad_norm": 1.1796547515743827, "learning_rate": 1.9956953358524774e-05, "loss": 1.1273, "step": 346 }, { "epoch": 0.05883849088596863, "grad_norm": 1.1463535463566787, "learning_rate": 1.995644279486899e-05, "loss": 1.0726, "step": 347 }, { "epoch": 0.05900805426027978, "grad_norm": 1.2773193191908023, "learning_rate": 1.995592922782218e-05, "loss": 1.0754, "step": 348 }, { "epoch": 0.05917761763459093, "grad_norm": 1.1605677125546763, "learning_rate": 1.995541265753925e-05, "loss": 1.12, "step": 349 }, { "epoch": 0.05934718100890208, "grad_norm": 1.2407734754453872, "learning_rate": 1.9954893084176032e-05, "loss": 1.1042, "step": 350 }, { "epoch": 0.05951674438321323, "grad_norm": 1.1425448429760805, "learning_rate": 1.9954370507889257e-05, "loss": 1.0759, "step": 351 }, { "epoch": 0.05968630775752438, "grad_norm": 1.2255570962994675, "learning_rate": 1.9953844928836563e-05, "loss": 1.12, "step": 352 }, { "epoch": 0.05985587113183553, "grad_norm": 1.1143633416997376, "learning_rate": 1.995331634717649e-05, "loss": 1.0537, "step": 353 }, { "epoch": 0.060025434506146676, "grad_norm": 1.1628787829315388, "learning_rate": 1.995278476306848e-05, "loss": 1.0933, "step": 354 }, { "epoch": 0.06019499788045782, "grad_norm": 1.209405637329876, "learning_rate": 1.9952250176672904e-05, "loss": 1.0961, "step": 355 }, { "epoch": 0.06036456125476897, "grad_norm": 1.2253620272384542, "learning_rate": 1.9951712588151005e-05, "loss": 1.0816, "step": 356 }, { "epoch": 0.060534124629080116, "grad_norm": 1.2668748619197234, "learning_rate": 1.995117199766496e-05, "loss": 1.1292, "step": 357 }, { "epoch": 0.060703688003391265, "grad_norm": 1.2279281497116579, "learning_rate": 1.995062840537783e-05, "loss": 1.1093, "step": 358 }, { "epoch": 0.060873251377702414, "grad_norm": 1.1857563656955354, "learning_rate": 1.9950081811453598e-05, "loss": 1.113, "step": 359 }, { "epoch": 0.06104281475201356, "grad_norm": 1.2608582753493773, "learning_rate": 1.9949532216057143e-05, "loss": 1.1107, "step": 360 }, { "epoch": 0.06121237812632471, "grad_norm": 1.1941519844686572, "learning_rate": 1.9948979619354253e-05, "loss": 1.0875, "step": 361 }, { "epoch": 0.06138194150063586, "grad_norm": 1.070663337506809, "learning_rate": 1.9948424021511617e-05, "loss": 0.8719, "step": 362 }, { "epoch": 0.06155150487494701, "grad_norm": 1.329920784246183, "learning_rate": 1.9947865422696837e-05, "loss": 1.1196, "step": 363 }, { "epoch": 0.06172106824925816, "grad_norm": 1.2011226625918563, "learning_rate": 1.9947303823078418e-05, "loss": 1.074, "step": 364 }, { "epoch": 0.06189063162356931, "grad_norm": 1.1737289768791428, "learning_rate": 1.994673922282576e-05, "loss": 1.0572, "step": 365 }, { "epoch": 0.06206019499788046, "grad_norm": 1.1692347436747448, "learning_rate": 1.9946171622109182e-05, "loss": 1.0882, "step": 366 }, { "epoch": 0.06222975837219161, "grad_norm": 1.173485876145163, "learning_rate": 1.9945601021099903e-05, "loss": 1.0622, "step": 367 }, { "epoch": 0.062399321746502756, "grad_norm": 1.0939343714424123, "learning_rate": 1.9945027419970045e-05, "loss": 1.0644, "step": 368 }, { "epoch": 0.0625688851208139, "grad_norm": 1.1938438449064799, "learning_rate": 1.994445081889264e-05, "loss": 1.1187, "step": 369 }, { "epoch": 0.06273844849512505, "grad_norm": 1.1134512299885773, "learning_rate": 1.9943871218041617e-05, "loss": 1.0974, "step": 370 }, { "epoch": 0.0629080118694362, "grad_norm": 1.1283419443504543, "learning_rate": 1.9943288617591813e-05, "loss": 1.0761, "step": 371 }, { "epoch": 0.06307757524374735, "grad_norm": 1.1469298461894093, "learning_rate": 1.9942703017718977e-05, "loss": 1.1146, "step": 372 }, { "epoch": 0.0632471386180585, "grad_norm": 1.1976903772081748, "learning_rate": 1.994211441859975e-05, "loss": 1.1162, "step": 373 }, { "epoch": 0.06341670199236965, "grad_norm": 1.259642806436094, "learning_rate": 1.994152282041169e-05, "loss": 1.1244, "step": 374 }, { "epoch": 0.0635862653666808, "grad_norm": 1.217164619750138, "learning_rate": 1.9940928223333254e-05, "loss": 1.1002, "step": 375 }, { "epoch": 0.06375582874099195, "grad_norm": 1.3102709114404878, "learning_rate": 1.99403306275438e-05, "loss": 1.1375, "step": 376 }, { "epoch": 0.0639253921153031, "grad_norm": 1.252171499091104, "learning_rate": 1.99397300332236e-05, "loss": 1.1143, "step": 377 }, { "epoch": 0.06409495548961425, "grad_norm": 1.1700395423595562, "learning_rate": 1.9939126440553824e-05, "loss": 1.119, "step": 378 }, { "epoch": 0.0642645188639254, "grad_norm": 1.190503582506576, "learning_rate": 1.9938519849716545e-05, "loss": 1.0989, "step": 379 }, { "epoch": 0.06443408223823655, "grad_norm": 1.159521427169997, "learning_rate": 1.9937910260894742e-05, "loss": 1.1476, "step": 380 }, { "epoch": 0.0646036456125477, "grad_norm": 1.2243375821608091, "learning_rate": 1.9937297674272302e-05, "loss": 1.1052, "step": 381 }, { "epoch": 0.06477320898685884, "grad_norm": 1.1052392237568562, "learning_rate": 1.9936682090034014e-05, "loss": 1.0732, "step": 382 }, { "epoch": 0.06494277236116999, "grad_norm": 1.133507141141504, "learning_rate": 1.993606350836557e-05, "loss": 1.0952, "step": 383 }, { "epoch": 0.06511233573548114, "grad_norm": 1.2441764920292675, "learning_rate": 1.9935441929453564e-05, "loss": 1.0923, "step": 384 }, { "epoch": 0.06528189910979229, "grad_norm": 0.9421309435936914, "learning_rate": 1.99348173534855e-05, "loss": 0.8583, "step": 385 }, { "epoch": 0.06545146248410344, "grad_norm": 1.139031396937891, "learning_rate": 1.993418978064979e-05, "loss": 1.0597, "step": 386 }, { "epoch": 0.06562102585841459, "grad_norm": 1.1901071034133301, "learning_rate": 1.993355921113573e-05, "loss": 1.0511, "step": 387 }, { "epoch": 0.06579058923272574, "grad_norm": 1.162965069394226, "learning_rate": 1.9932925645133542e-05, "loss": 1.0877, "step": 388 }, { "epoch": 0.06596015260703687, "grad_norm": 1.2311993497742715, "learning_rate": 1.9932289082834342e-05, "loss": 1.1016, "step": 389 }, { "epoch": 0.06612971598134802, "grad_norm": 1.2590527889026113, "learning_rate": 1.9931649524430144e-05, "loss": 1.1275, "step": 390 }, { "epoch": 0.06629927935565917, "grad_norm": 1.1992032106678683, "learning_rate": 1.9931006970113884e-05, "loss": 1.0881, "step": 391 }, { "epoch": 0.06646884272997032, "grad_norm": 1.1944888911389537, "learning_rate": 1.9930361420079385e-05, "loss": 1.0695, "step": 392 }, { "epoch": 0.06663840610428147, "grad_norm": 1.2642092133103153, "learning_rate": 1.9929712874521375e-05, "loss": 1.104, "step": 393 }, { "epoch": 0.06680796947859262, "grad_norm": 1.1480843498209758, "learning_rate": 1.99290613336355e-05, "loss": 1.0967, "step": 394 }, { "epoch": 0.06697753285290377, "grad_norm": 1.3089210241642424, "learning_rate": 1.9928406797618285e-05, "loss": 1.1199, "step": 395 }, { "epoch": 0.06714709622721492, "grad_norm": 1.1609342410390617, "learning_rate": 1.9927749266667185e-05, "loss": 1.0698, "step": 396 }, { "epoch": 0.06731665960152607, "grad_norm": 1.1602040335488506, "learning_rate": 1.992708874098054e-05, "loss": 1.0808, "step": 397 }, { "epoch": 0.06748622297583722, "grad_norm": 1.1668530858299222, "learning_rate": 1.9926425220757607e-05, "loss": 1.0477, "step": 398 }, { "epoch": 0.06765578635014836, "grad_norm": 1.269660516529872, "learning_rate": 1.9925758706198527e-05, "loss": 1.1181, "step": 399 }, { "epoch": 0.06782534972445951, "grad_norm": 1.2221527265069834, "learning_rate": 1.9925089197504363e-05, "loss": 1.0742, "step": 400 }, { "epoch": 0.06799491309877066, "grad_norm": 1.2424829682065042, "learning_rate": 1.9924416694877077e-05, "loss": 1.1273, "step": 401 }, { "epoch": 0.06816447647308181, "grad_norm": 1.1546686928366987, "learning_rate": 1.9923741198519528e-05, "loss": 1.0664, "step": 402 }, { "epoch": 0.06833403984739296, "grad_norm": 1.2232646211054141, "learning_rate": 1.992306270863548e-05, "loss": 1.0714, "step": 403 }, { "epoch": 0.06850360322170411, "grad_norm": 1.2449826857512076, "learning_rate": 1.9922381225429605e-05, "loss": 1.066, "step": 404 }, { "epoch": 0.06867316659601526, "grad_norm": 0.9073812725135206, "learning_rate": 1.992169674910747e-05, "loss": 0.8867, "step": 405 }, { "epoch": 0.06884272997032641, "grad_norm": 1.3337548424910155, "learning_rate": 1.9921009279875555e-05, "loss": 1.109, "step": 406 }, { "epoch": 0.06901229334463756, "grad_norm": 0.7421763544150676, "learning_rate": 1.9920318817941234e-05, "loss": 0.8916, "step": 407 }, { "epoch": 0.0691818567189487, "grad_norm": 1.3251686230747073, "learning_rate": 1.9919625363512788e-05, "loss": 1.1143, "step": 408 }, { "epoch": 0.06935142009325986, "grad_norm": 1.3170148414271075, "learning_rate": 1.9918928916799395e-05, "loss": 1.0978, "step": 409 }, { "epoch": 0.069520983467571, "grad_norm": 1.2335438282923488, "learning_rate": 1.9918229478011146e-05, "loss": 1.063, "step": 410 }, { "epoch": 0.06969054684188215, "grad_norm": 1.333054504408273, "learning_rate": 1.991752704735903e-05, "loss": 1.1366, "step": 411 }, { "epoch": 0.0698601102161933, "grad_norm": 1.1143623075087468, "learning_rate": 1.9916821625054932e-05, "loss": 1.0715, "step": 412 }, { "epoch": 0.07002967359050445, "grad_norm": 1.2582105757851425, "learning_rate": 1.9916113211311647e-05, "loss": 1.1078, "step": 413 }, { "epoch": 0.0701992369648156, "grad_norm": 1.2131117395052011, "learning_rate": 1.991540180634287e-05, "loss": 1.0947, "step": 414 }, { "epoch": 0.07036880033912675, "grad_norm": 1.1018529883468215, "learning_rate": 1.9914687410363196e-05, "loss": 1.064, "step": 415 }, { "epoch": 0.0705383637134379, "grad_norm": 1.2711756508896594, "learning_rate": 1.991397002358813e-05, "loss": 1.1139, "step": 416 }, { "epoch": 0.07070792708774905, "grad_norm": 1.1915856433178176, "learning_rate": 1.9913249646234072e-05, "loss": 1.0784, "step": 417 }, { "epoch": 0.0708774904620602, "grad_norm": 1.2336558096852979, "learning_rate": 1.991252627851832e-05, "loss": 1.0623, "step": 418 }, { "epoch": 0.07104705383637135, "grad_norm": 1.1645400412963238, "learning_rate": 1.9911799920659093e-05, "loss": 1.0914, "step": 419 }, { "epoch": 0.0712166172106825, "grad_norm": 1.1920752880656034, "learning_rate": 1.991107057287549e-05, "loss": 1.0821, "step": 420 }, { "epoch": 0.07138618058499364, "grad_norm": 1.1733810711305679, "learning_rate": 1.9910338235387517e-05, "loss": 1.1181, "step": 421 }, { "epoch": 0.0715557439593048, "grad_norm": 1.2048882987640317, "learning_rate": 1.9909602908416096e-05, "loss": 1.086, "step": 422 }, { "epoch": 0.07172530733361594, "grad_norm": 1.226238560221633, "learning_rate": 1.9908864592183034e-05, "loss": 1.077, "step": 423 }, { "epoch": 0.07189487070792709, "grad_norm": 1.087579504878477, "learning_rate": 1.990812328691105e-05, "loss": 1.0702, "step": 424 }, { "epoch": 0.07206443408223824, "grad_norm": 1.1444768150288578, "learning_rate": 1.9907378992823755e-05, "loss": 1.1015, "step": 425 }, { "epoch": 0.07223399745654939, "grad_norm": 1.1231516590945694, "learning_rate": 1.9906631710145672e-05, "loss": 1.0619, "step": 426 }, { "epoch": 0.07240356083086054, "grad_norm": 1.1735386016953435, "learning_rate": 1.9905881439102222e-05, "loss": 1.0811, "step": 427 }, { "epoch": 0.07257312420517169, "grad_norm": 1.2410768321150984, "learning_rate": 1.9905128179919725e-05, "loss": 1.1218, "step": 428 }, { "epoch": 0.07274268757948284, "grad_norm": 1.1685294001209103, "learning_rate": 1.9904371932825407e-05, "loss": 1.0218, "step": 429 }, { "epoch": 0.07291225095379399, "grad_norm": 1.1154083036700704, "learning_rate": 1.9903612698047387e-05, "loss": 1.0799, "step": 430 }, { "epoch": 0.07308181432810514, "grad_norm": 1.0161682428773147, "learning_rate": 1.9902850475814692e-05, "loss": 0.927, "step": 431 }, { "epoch": 0.07325137770241628, "grad_norm": 1.3231426104259807, "learning_rate": 1.9902085266357248e-05, "loss": 1.0861, "step": 432 }, { "epoch": 0.07342094107672743, "grad_norm": 1.2021291574427906, "learning_rate": 1.990131706990589e-05, "loss": 1.0588, "step": 433 }, { "epoch": 0.07359050445103858, "grad_norm": 1.2317625279376887, "learning_rate": 1.9900545886692334e-05, "loss": 1.0861, "step": 434 }, { "epoch": 0.07376006782534972, "grad_norm": 1.2876226099619001, "learning_rate": 1.9899771716949218e-05, "loss": 1.0898, "step": 435 }, { "epoch": 0.07392963119966087, "grad_norm": 1.2424658053018574, "learning_rate": 1.9898994560910074e-05, "loss": 1.0702, "step": 436 }, { "epoch": 0.07409919457397202, "grad_norm": 1.1700524463892321, "learning_rate": 1.989821441880933e-05, "loss": 1.0348, "step": 437 }, { "epoch": 0.07426875794828317, "grad_norm": 1.0849115703136105, "learning_rate": 1.9897431290882317e-05, "loss": 1.0095, "step": 438 }, { "epoch": 0.07443832132259431, "grad_norm": 1.239475681510119, "learning_rate": 1.989664517736527e-05, "loss": 1.1206, "step": 439 }, { "epoch": 0.07460788469690546, "grad_norm": 1.208914526371448, "learning_rate": 1.9895856078495326e-05, "loss": 1.0925, "step": 440 }, { "epoch": 0.07477744807121661, "grad_norm": 1.2284241418375532, "learning_rate": 1.9895063994510512e-05, "loss": 1.0952, "step": 441 }, { "epoch": 0.07494701144552776, "grad_norm": 1.083499924595316, "learning_rate": 1.9894268925649768e-05, "loss": 1.0705, "step": 442 }, { "epoch": 0.07511657481983891, "grad_norm": 1.1242864575942213, "learning_rate": 1.9893470872152925e-05, "loss": 1.0628, "step": 443 }, { "epoch": 0.07528613819415006, "grad_norm": 1.1679463150918064, "learning_rate": 1.989266983426072e-05, "loss": 1.0775, "step": 444 }, { "epoch": 0.07545570156846121, "grad_norm": 1.1855986453181477, "learning_rate": 1.9891865812214793e-05, "loss": 1.1036, "step": 445 }, { "epoch": 0.07562526494277236, "grad_norm": 1.1548333884726703, "learning_rate": 1.989105880625767e-05, "loss": 1.0782, "step": 446 }, { "epoch": 0.0757948283170835, "grad_norm": 1.198269179444107, "learning_rate": 1.9890248816632795e-05, "loss": 1.0748, "step": 447 }, { "epoch": 0.07596439169139466, "grad_norm": 1.1535236477204367, "learning_rate": 1.9889435843584502e-05, "loss": 1.0728, "step": 448 }, { "epoch": 0.0761339550657058, "grad_norm": 1.1761477415752795, "learning_rate": 1.9888619887358024e-05, "loss": 1.0732, "step": 449 }, { "epoch": 0.07630351844001695, "grad_norm": 1.183110526512685, "learning_rate": 1.9887800948199496e-05, "loss": 1.0943, "step": 450 }, { "epoch": 0.0764730818143281, "grad_norm": 1.195434195691069, "learning_rate": 1.988697902635596e-05, "loss": 1.0591, "step": 451 }, { "epoch": 0.07664264518863925, "grad_norm": 1.1334112331426272, "learning_rate": 1.9886154122075344e-05, "loss": 1.0464, "step": 452 }, { "epoch": 0.0768122085629504, "grad_norm": 1.0755156010758495, "learning_rate": 1.988532623560649e-05, "loss": 1.0516, "step": 453 }, { "epoch": 0.07698177193726155, "grad_norm": 1.2680417342605343, "learning_rate": 1.9884495367199122e-05, "loss": 1.0968, "step": 454 }, { "epoch": 0.0771513353115727, "grad_norm": 1.1523661937514382, "learning_rate": 1.9883661517103884e-05, "loss": 1.0457, "step": 455 }, { "epoch": 0.07732089868588385, "grad_norm": 1.1502546313529902, "learning_rate": 1.9882824685572304e-05, "loss": 1.0529, "step": 456 }, { "epoch": 0.077490462060195, "grad_norm": 1.033935925511959, "learning_rate": 1.988198487285682e-05, "loss": 1.0348, "step": 457 }, { "epoch": 0.07766002543450615, "grad_norm": 1.1462897653872424, "learning_rate": 1.9881142079210757e-05, "loss": 1.0954, "step": 458 }, { "epoch": 0.0778295888088173, "grad_norm": 1.1248230302181292, "learning_rate": 1.988029630488835e-05, "loss": 1.0496, "step": 459 }, { "epoch": 0.07799915218312845, "grad_norm": 1.1537385796926145, "learning_rate": 1.987944755014473e-05, "loss": 1.0485, "step": 460 }, { "epoch": 0.0781687155574396, "grad_norm": 1.1541762723860756, "learning_rate": 1.987859581523593e-05, "loss": 1.1034, "step": 461 }, { "epoch": 0.07833827893175074, "grad_norm": 1.1996045349054125, "learning_rate": 1.987774110041887e-05, "loss": 1.0856, "step": 462 }, { "epoch": 0.07850784230606189, "grad_norm": 1.0105469779199656, "learning_rate": 1.9876883405951378e-05, "loss": 0.8399, "step": 463 }, { "epoch": 0.07867740568037304, "grad_norm": 1.1632808203427976, "learning_rate": 1.987602273209219e-05, "loss": 1.0525, "step": 464 }, { "epoch": 0.07884696905468419, "grad_norm": 1.2955480396720245, "learning_rate": 1.9875159079100917e-05, "loss": 1.0818, "step": 465 }, { "epoch": 0.07901653242899534, "grad_norm": 1.233715775378925, "learning_rate": 1.9874292447238094e-05, "loss": 1.1598, "step": 466 }, { "epoch": 0.07918609580330649, "grad_norm": 1.1292694532160477, "learning_rate": 1.9873422836765138e-05, "loss": 1.1032, "step": 467 }, { "epoch": 0.07935565917761764, "grad_norm": 0.7733807873715602, "learning_rate": 1.9872550247944372e-05, "loss": 0.8492, "step": 468 }, { "epoch": 0.07952522255192879, "grad_norm": 1.1583804795274268, "learning_rate": 1.9871674681039013e-05, "loss": 1.0997, "step": 469 }, { "epoch": 0.07969478592623994, "grad_norm": 1.1945572185922984, "learning_rate": 1.987079613631318e-05, "loss": 1.0695, "step": 470 }, { "epoch": 0.07986434930055109, "grad_norm": 1.1787525634654576, "learning_rate": 1.9869914614031886e-05, "loss": 1.0628, "step": 471 }, { "epoch": 0.08003391267486223, "grad_norm": 1.1352104766373525, "learning_rate": 1.9869030114461044e-05, "loss": 1.0582, "step": 472 }, { "epoch": 0.08020347604917338, "grad_norm": 1.1993409165710693, "learning_rate": 1.9868142637867474e-05, "loss": 1.119, "step": 473 }, { "epoch": 0.08037303942348453, "grad_norm": 1.0885290693388283, "learning_rate": 1.9867252184518878e-05, "loss": 1.0691, "step": 474 }, { "epoch": 0.08054260279779568, "grad_norm": 1.1511887537900336, "learning_rate": 1.9866358754683864e-05, "loss": 1.097, "step": 475 }, { "epoch": 0.08071216617210683, "grad_norm": 1.1335862921683832, "learning_rate": 1.9865462348631945e-05, "loss": 1.0698, "step": 476 }, { "epoch": 0.08088172954641798, "grad_norm": 1.1208500382557103, "learning_rate": 1.9864562966633517e-05, "loss": 1.0879, "step": 477 }, { "epoch": 0.08105129292072913, "grad_norm": 1.1870491416374378, "learning_rate": 1.9863660608959885e-05, "loss": 1.1018, "step": 478 }, { "epoch": 0.08122085629504028, "grad_norm": 1.1444174404671785, "learning_rate": 1.9862755275883248e-05, "loss": 1.0877, "step": 479 }, { "epoch": 0.08139041966935143, "grad_norm": 1.1691301307951676, "learning_rate": 1.98618469676767e-05, "loss": 1.1, "step": 480 }, { "epoch": 0.08155998304366256, "grad_norm": 1.114283352536375, "learning_rate": 1.9860935684614235e-05, "loss": 1.0765, "step": 481 }, { "epoch": 0.08172954641797371, "grad_norm": 1.169320341834926, "learning_rate": 1.986002142697075e-05, "loss": 1.063, "step": 482 }, { "epoch": 0.08189910979228486, "grad_norm": 1.1200625962873132, "learning_rate": 1.9859104195022026e-05, "loss": 1.0414, "step": 483 }, { "epoch": 0.08206867316659601, "grad_norm": 1.103798371676376, "learning_rate": 1.985818398904475e-05, "loss": 1.0809, "step": 484 }, { "epoch": 0.08223823654090716, "grad_norm": 1.1553070463371595, "learning_rate": 1.985726080931651e-05, "loss": 1.11, "step": 485 }, { "epoch": 0.08240779991521831, "grad_norm": 1.1803341454465903, "learning_rate": 1.9856334656115785e-05, "loss": 1.0621, "step": 486 }, { "epoch": 0.08257736328952946, "grad_norm": 1.058097820682474, "learning_rate": 1.9855405529721944e-05, "loss": 1.0407, "step": 487 }, { "epoch": 0.0827469266638406, "grad_norm": 1.1599368042958134, "learning_rate": 1.985447343041527e-05, "loss": 1.0688, "step": 488 }, { "epoch": 0.08291649003815176, "grad_norm": 1.1129369451006759, "learning_rate": 1.9853538358476933e-05, "loss": 1.0742, "step": 489 }, { "epoch": 0.0830860534124629, "grad_norm": 1.2048572648077327, "learning_rate": 1.9852600314188993e-05, "loss": 1.1236, "step": 490 }, { "epoch": 0.08325561678677405, "grad_norm": 1.142848586187954, "learning_rate": 1.985165929783442e-05, "loss": 1.0631, "step": 491 }, { "epoch": 0.0834251801610852, "grad_norm": 1.2162601651798566, "learning_rate": 1.9850715309697076e-05, "loss": 1.1205, "step": 492 }, { "epoch": 0.08359474353539635, "grad_norm": 1.096956853782461, "learning_rate": 1.984976835006171e-05, "loss": 1.0877, "step": 493 }, { "epoch": 0.0837643069097075, "grad_norm": 1.1253873071080505, "learning_rate": 1.984881841921398e-05, "loss": 1.0549, "step": 494 }, { "epoch": 0.08393387028401865, "grad_norm": 1.1658881473741232, "learning_rate": 1.9847865517440438e-05, "loss": 1.1008, "step": 495 }, { "epoch": 0.0841034336583298, "grad_norm": 1.146384404240851, "learning_rate": 1.9846909645028524e-05, "loss": 1.0555, "step": 496 }, { "epoch": 0.08427299703264095, "grad_norm": 1.095194900303575, "learning_rate": 1.9845950802266584e-05, "loss": 1.1031, "step": 497 }, { "epoch": 0.0844425604069521, "grad_norm": 1.1769694437884834, "learning_rate": 1.9844988989443856e-05, "loss": 1.082, "step": 498 }, { "epoch": 0.08461212378126325, "grad_norm": 1.086331279711683, "learning_rate": 1.984402420685047e-05, "loss": 1.0752, "step": 499 }, { "epoch": 0.0847816871555744, "grad_norm": 1.1366979001490798, "learning_rate": 1.9843056454777464e-05, "loss": 1.1047, "step": 500 }, { "epoch": 0.08495125052988554, "grad_norm": 1.131429576014002, "learning_rate": 1.9842085733516753e-05, "loss": 1.0558, "step": 501 }, { "epoch": 0.0851208139041967, "grad_norm": 0.8769720882940868, "learning_rate": 1.984111204336116e-05, "loss": 0.9077, "step": 502 }, { "epoch": 0.08529037727850784, "grad_norm": 1.1598437085571796, "learning_rate": 1.984013538460441e-05, "loss": 1.0865, "step": 503 }, { "epoch": 0.08545994065281899, "grad_norm": 1.1129551006550698, "learning_rate": 1.9839155757541106e-05, "loss": 1.0239, "step": 504 }, { "epoch": 0.08562950402713014, "grad_norm": 1.0784083630482575, "learning_rate": 1.983817316246676e-05, "loss": 1.0725, "step": 505 }, { "epoch": 0.08579906740144129, "grad_norm": 1.0928281943237104, "learning_rate": 1.983718759967777e-05, "loss": 1.0575, "step": 506 }, { "epoch": 0.08596863077575244, "grad_norm": 1.1205149449144562, "learning_rate": 1.983619906947144e-05, "loss": 1.0827, "step": 507 }, { "epoch": 0.08613819415006359, "grad_norm": 1.100239093870298, "learning_rate": 1.9835207572145957e-05, "loss": 1.081, "step": 508 }, { "epoch": 0.08630775752437474, "grad_norm": 1.1349372799313462, "learning_rate": 1.9834213108000414e-05, "loss": 1.0464, "step": 509 }, { "epoch": 0.08647732089868589, "grad_norm": 1.091347043818639, "learning_rate": 1.983321567733479e-05, "loss": 1.0594, "step": 510 }, { "epoch": 0.08664688427299704, "grad_norm": 1.1057906831239117, "learning_rate": 1.983221528044997e-05, "loss": 1.0766, "step": 511 }, { "epoch": 0.08681644764730818, "grad_norm": 1.091013204329108, "learning_rate": 1.9831211917647723e-05, "loss": 1.0584, "step": 512 }, { "epoch": 0.08698601102161933, "grad_norm": 1.1230243076329225, "learning_rate": 1.9830205589230713e-05, "loss": 1.0839, "step": 513 }, { "epoch": 0.08715557439593048, "grad_norm": 1.1487894168527573, "learning_rate": 1.9829196295502506e-05, "loss": 1.1103, "step": 514 }, { "epoch": 0.08732513777024163, "grad_norm": 1.1139963729673388, "learning_rate": 1.9828184036767556e-05, "loss": 1.1076, "step": 515 }, { "epoch": 0.08749470114455278, "grad_norm": 1.1377141444715826, "learning_rate": 1.9827168813331215e-05, "loss": 1.0757, "step": 516 }, { "epoch": 0.08766426451886393, "grad_norm": 1.0505504938040815, "learning_rate": 1.9826150625499733e-05, "loss": 1.0078, "step": 517 }, { "epoch": 0.08783382789317508, "grad_norm": 1.194722335388699, "learning_rate": 1.982512947358024e-05, "loss": 1.0953, "step": 518 }, { "epoch": 0.08800339126748623, "grad_norm": 1.0972133835062998, "learning_rate": 1.9824105357880777e-05, "loss": 1.1141, "step": 519 }, { "epoch": 0.08817295464179738, "grad_norm": 1.0912509502482381, "learning_rate": 1.9823078278710265e-05, "loss": 1.0922, "step": 520 }, { "epoch": 0.08834251801610853, "grad_norm": 1.184452496622127, "learning_rate": 1.9822048236378536e-05, "loss": 1.0567, "step": 521 }, { "epoch": 0.08851208139041968, "grad_norm": 1.2015748250956637, "learning_rate": 1.982101523119629e-05, "loss": 1.1245, "step": 522 }, { "epoch": 0.08868164476473082, "grad_norm": 1.0301949286097938, "learning_rate": 1.9819979263475154e-05, "loss": 1.0559, "step": 523 }, { "epoch": 0.08885120813904197, "grad_norm": 1.1280944704408538, "learning_rate": 1.9818940333527616e-05, "loss": 1.0607, "step": 524 }, { "epoch": 0.08902077151335312, "grad_norm": 1.1243101999950735, "learning_rate": 1.9817898441667082e-05, "loss": 1.1164, "step": 525 }, { "epoch": 0.08919033488766426, "grad_norm": 1.104946151048045, "learning_rate": 1.9816853588207835e-05, "loss": 1.0333, "step": 526 }, { "epoch": 0.0893598982619754, "grad_norm": 1.0842306029656459, "learning_rate": 1.9815805773465064e-05, "loss": 1.0593, "step": 527 }, { "epoch": 0.08952946163628656, "grad_norm": 1.126482095627671, "learning_rate": 1.981475499775484e-05, "loss": 1.0655, "step": 528 }, { "epoch": 0.0896990250105977, "grad_norm": 1.0729177558590441, "learning_rate": 1.9813701261394136e-05, "loss": 1.094, "step": 529 }, { "epoch": 0.08986858838490885, "grad_norm": 1.1555215297008465, "learning_rate": 1.9812644564700814e-05, "loss": 1.0782, "step": 530 }, { "epoch": 0.09003815175922, "grad_norm": 1.1445213131107626, "learning_rate": 1.9811584907993632e-05, "loss": 1.0973, "step": 531 }, { "epoch": 0.09020771513353115, "grad_norm": 1.1395691634802012, "learning_rate": 1.9810522291592234e-05, "loss": 1.0558, "step": 532 }, { "epoch": 0.0903772785078423, "grad_norm": 1.1494803768817405, "learning_rate": 1.9809456715817163e-05, "loss": 1.0352, "step": 533 }, { "epoch": 0.09054684188215345, "grad_norm": 1.1211189975039118, "learning_rate": 1.980838818098986e-05, "loss": 1.0759, "step": 534 }, { "epoch": 0.0907164052564646, "grad_norm": 1.1593061242144942, "learning_rate": 1.9807316687432637e-05, "loss": 1.0562, "step": 535 }, { "epoch": 0.09088596863077575, "grad_norm": 1.0748556332850687, "learning_rate": 1.980624223546873e-05, "loss": 1.0293, "step": 536 }, { "epoch": 0.0910555320050869, "grad_norm": 1.1919672014479494, "learning_rate": 1.980516482542224e-05, "loss": 1.0528, "step": 537 }, { "epoch": 0.09122509537939805, "grad_norm": 1.1179549392680466, "learning_rate": 1.980408445761817e-05, "loss": 1.0065, "step": 538 }, { "epoch": 0.0913946587537092, "grad_norm": 1.072876811601526, "learning_rate": 1.9803001132382423e-05, "loss": 1.076, "step": 539 }, { "epoch": 0.09156422212802034, "grad_norm": 1.1689336105605155, "learning_rate": 1.9801914850041787e-05, "loss": 1.1354, "step": 540 }, { "epoch": 0.0917337855023315, "grad_norm": 1.1462606137452056, "learning_rate": 1.9800825610923937e-05, "loss": 1.0495, "step": 541 }, { "epoch": 0.09190334887664264, "grad_norm": 1.0869286432944305, "learning_rate": 1.979973341535745e-05, "loss": 1.0846, "step": 542 }, { "epoch": 0.09207291225095379, "grad_norm": 1.160874012440214, "learning_rate": 1.9798638263671783e-05, "loss": 1.0993, "step": 543 }, { "epoch": 0.09224247562526494, "grad_norm": 1.1069748287382373, "learning_rate": 1.9797540156197298e-05, "loss": 1.0531, "step": 544 }, { "epoch": 0.09241203899957609, "grad_norm": 1.0619426126639204, "learning_rate": 1.9796439093265245e-05, "loss": 1.0443, "step": 545 }, { "epoch": 0.09258160237388724, "grad_norm": 1.0900576991203947, "learning_rate": 1.9795335075207756e-05, "loss": 1.087, "step": 546 }, { "epoch": 0.09275116574819839, "grad_norm": 1.109818697228551, "learning_rate": 1.9794228102357868e-05, "loss": 1.0757, "step": 547 }, { "epoch": 0.09292072912250954, "grad_norm": 1.121649359705272, "learning_rate": 1.9793118175049497e-05, "loss": 1.0894, "step": 548 }, { "epoch": 0.09309029249682069, "grad_norm": 1.1407704380093286, "learning_rate": 1.979200529361746e-05, "loss": 1.0918, "step": 549 }, { "epoch": 0.09325985587113184, "grad_norm": 1.0968320814736103, "learning_rate": 1.9790889458397457e-05, "loss": 1.0652, "step": 550 }, { "epoch": 0.09342941924544299, "grad_norm": 1.1569327839984382, "learning_rate": 1.9789770669726088e-05, "loss": 1.0806, "step": 551 }, { "epoch": 0.09359898261975413, "grad_norm": 1.130038417534701, "learning_rate": 1.9788648927940833e-05, "loss": 1.064, "step": 552 }, { "epoch": 0.09376854599406528, "grad_norm": 1.0870086002941353, "learning_rate": 1.9787524233380076e-05, "loss": 1.0832, "step": 553 }, { "epoch": 0.09393810936837643, "grad_norm": 1.1104865662961882, "learning_rate": 1.9786396586383078e-05, "loss": 1.0809, "step": 554 }, { "epoch": 0.09410767274268758, "grad_norm": 1.115877825942729, "learning_rate": 1.978526598729e-05, "loss": 1.0717, "step": 555 }, { "epoch": 0.09427723611699873, "grad_norm": 1.2313468318597176, "learning_rate": 1.9784132436441888e-05, "loss": 1.0769, "step": 556 }, { "epoch": 0.09444679949130988, "grad_norm": 1.1522440263094618, "learning_rate": 1.9782995934180687e-05, "loss": 1.1218, "step": 557 }, { "epoch": 0.09461636286562103, "grad_norm": 1.1627190083653198, "learning_rate": 1.978185648084922e-05, "loss": 1.0797, "step": 558 }, { "epoch": 0.09478592623993218, "grad_norm": 1.1180499876689296, "learning_rate": 1.978071407679121e-05, "loss": 1.0429, "step": 559 }, { "epoch": 0.09495548961424333, "grad_norm": 1.0667542106344945, "learning_rate": 1.977956872235127e-05, "loss": 1.0663, "step": 560 }, { "epoch": 0.09512505298855448, "grad_norm": 1.1288587719208103, "learning_rate": 1.9778420417874894e-05, "loss": 1.0587, "step": 561 }, { "epoch": 0.09529461636286563, "grad_norm": 0.7695075463461137, "learning_rate": 1.977726916370847e-05, "loss": 0.9117, "step": 562 }, { "epoch": 0.09546417973717677, "grad_norm": 1.1902697873927301, "learning_rate": 1.9776114960199283e-05, "loss": 1.0674, "step": 563 }, { "epoch": 0.09563374311148792, "grad_norm": 1.1277515967187348, "learning_rate": 1.97749578076955e-05, "loss": 1.0948, "step": 564 }, { "epoch": 0.09580330648579907, "grad_norm": 1.126005646590579, "learning_rate": 1.9773797706546176e-05, "loss": 1.0254, "step": 565 }, { "epoch": 0.09597286986011022, "grad_norm": 1.1384127107672, "learning_rate": 1.9772634657101263e-05, "loss": 1.0632, "step": 566 }, { "epoch": 0.09614243323442137, "grad_norm": 1.0931297728405247, "learning_rate": 1.9771468659711595e-05, "loss": 1.052, "step": 567 }, { "epoch": 0.09631199660873252, "grad_norm": 1.1482837794662704, "learning_rate": 1.9770299714728908e-05, "loss": 1.0569, "step": 568 }, { "epoch": 0.09648155998304367, "grad_norm": 1.2170875755793362, "learning_rate": 1.9769127822505805e-05, "loss": 1.0873, "step": 569 }, { "epoch": 0.09665112335735482, "grad_norm": 1.098153826711913, "learning_rate": 1.9767952983395795e-05, "loss": 1.1112, "step": 570 }, { "epoch": 0.09682068673166597, "grad_norm": 1.06714468236688, "learning_rate": 1.9766775197753276e-05, "loss": 1.0605, "step": 571 }, { "epoch": 0.0969902501059771, "grad_norm": 1.1021341617313847, "learning_rate": 1.976559446593353e-05, "loss": 1.0405, "step": 572 }, { "epoch": 0.09715981348028825, "grad_norm": 1.2657459951125931, "learning_rate": 1.9764410788292724e-05, "loss": 1.0752, "step": 573 }, { "epoch": 0.0973293768545994, "grad_norm": 1.1382173754696283, "learning_rate": 1.9763224165187918e-05, "loss": 1.0391, "step": 574 }, { "epoch": 0.09749894022891055, "grad_norm": 1.1115137363027052, "learning_rate": 1.9762034596977066e-05, "loss": 1.0602, "step": 575 }, { "epoch": 0.0976685036032217, "grad_norm": 1.2638434665667782, "learning_rate": 1.9760842084019e-05, "loss": 1.1028, "step": 576 }, { "epoch": 0.09783806697753285, "grad_norm": 1.0956643884164092, "learning_rate": 1.9759646626673445e-05, "loss": 1.0383, "step": 577 }, { "epoch": 0.098007630351844, "grad_norm": 1.1951666900164801, "learning_rate": 1.9758448225301018e-05, "loss": 1.0522, "step": 578 }, { "epoch": 0.09817719372615515, "grad_norm": 1.1582403573338345, "learning_rate": 1.975724688026322e-05, "loss": 1.0759, "step": 579 }, { "epoch": 0.0983467571004663, "grad_norm": 1.105896806627842, "learning_rate": 1.9756042591922436e-05, "loss": 1.092, "step": 580 }, { "epoch": 0.09851632047477744, "grad_norm": 1.0743625525603464, "learning_rate": 1.9754835360641945e-05, "loss": 1.0617, "step": 581 }, { "epoch": 0.0986858838490886, "grad_norm": 1.1252124902043465, "learning_rate": 1.9753625186785915e-05, "loss": 1.0746, "step": 582 }, { "epoch": 0.09885544722339974, "grad_norm": 1.1017954423823568, "learning_rate": 1.9752412070719394e-05, "loss": 1.072, "step": 583 }, { "epoch": 0.09902501059771089, "grad_norm": 1.136591595976166, "learning_rate": 1.9751196012808328e-05, "loss": 1.0859, "step": 584 }, { "epoch": 0.09919457397202204, "grad_norm": 1.1120914379350122, "learning_rate": 1.9749977013419536e-05, "loss": 1.0326, "step": 585 }, { "epoch": 0.09936413734633319, "grad_norm": 1.0843116916903461, "learning_rate": 1.974875507292074e-05, "loss": 1.0523, "step": 586 }, { "epoch": 0.09953370072064434, "grad_norm": 1.1190010103242294, "learning_rate": 1.9747530191680543e-05, "loss": 1.0742, "step": 587 }, { "epoch": 0.09970326409495549, "grad_norm": 1.065987641581167, "learning_rate": 1.974630237006843e-05, "loss": 1.0643, "step": 588 }, { "epoch": 0.09987282746926664, "grad_norm": 1.047751061453114, "learning_rate": 1.9745071608454777e-05, "loss": 1.0378, "step": 589 }, { "epoch": 0.10004239084357779, "grad_norm": 1.1259994031260914, "learning_rate": 1.9743837907210847e-05, "loss": 1.0477, "step": 590 }, { "epoch": 0.10021195421788893, "grad_norm": 1.1236119296177303, "learning_rate": 1.9742601266708794e-05, "loss": 1.1015, "step": 591 }, { "epoch": 0.10038151759220008, "grad_norm": 1.129665111459205, "learning_rate": 1.974136168732165e-05, "loss": 1.1012, "step": 592 }, { "epoch": 0.10055108096651123, "grad_norm": 1.1243761191755925, "learning_rate": 1.9740119169423337e-05, "loss": 1.0143, "step": 593 }, { "epoch": 0.10072064434082238, "grad_norm": 1.1537337440303301, "learning_rate": 1.973887371338867e-05, "loss": 1.0616, "step": 594 }, { "epoch": 0.10089020771513353, "grad_norm": 1.0906142964884253, "learning_rate": 1.9737625319593338e-05, "loss": 1.0391, "step": 595 }, { "epoch": 0.10105977108944468, "grad_norm": 1.158231936547035, "learning_rate": 1.9736373988413923e-05, "loss": 1.0965, "step": 596 }, { "epoch": 0.10122933446375583, "grad_norm": 1.15562873277257, "learning_rate": 1.97351197202279e-05, "loss": 1.0914, "step": 597 }, { "epoch": 0.10139889783806698, "grad_norm": 1.1373645334454423, "learning_rate": 1.9733862515413616e-05, "loss": 1.0819, "step": 598 }, { "epoch": 0.10156846121237813, "grad_norm": 1.1791280926929748, "learning_rate": 1.9732602374350314e-05, "loss": 1.1079, "step": 599 }, { "epoch": 0.10173802458668928, "grad_norm": 1.1273615661016252, "learning_rate": 1.973133929741812e-05, "loss": 1.1228, "step": 600 }, { "epoch": 0.10190758796100043, "grad_norm": 1.1612422740440762, "learning_rate": 1.973007328499804e-05, "loss": 1.0937, "step": 601 }, { "epoch": 0.10207715133531158, "grad_norm": 1.143726584306414, "learning_rate": 1.9728804337471974e-05, "loss": 1.0559, "step": 602 }, { "epoch": 0.10224671470962272, "grad_norm": 1.1862024806139606, "learning_rate": 1.9727532455222707e-05, "loss": 1.0453, "step": 603 }, { "epoch": 0.10241627808393387, "grad_norm": 1.062593200351274, "learning_rate": 1.9726257638633898e-05, "loss": 1.0438, "step": 604 }, { "epoch": 0.10258584145824502, "grad_norm": 1.1571042199635704, "learning_rate": 1.972497988809011e-05, "loss": 1.0528, "step": 605 }, { "epoch": 0.10275540483255617, "grad_norm": 1.2193278886994043, "learning_rate": 1.9723699203976768e-05, "loss": 1.08, "step": 606 }, { "epoch": 0.10292496820686732, "grad_norm": 1.081899882791445, "learning_rate": 1.9722415586680204e-05, "loss": 1.0869, "step": 607 }, { "epoch": 0.10309453158117847, "grad_norm": 1.1578031935579174, "learning_rate": 1.9721129036587618e-05, "loss": 1.094, "step": 608 }, { "epoch": 0.10326409495548962, "grad_norm": 1.146092718487579, "learning_rate": 1.9719839554087108e-05, "loss": 1.0539, "step": 609 }, { "epoch": 0.10343365832980077, "grad_norm": 1.086646725053304, "learning_rate": 1.9718547139567648e-05, "loss": 1.0328, "step": 610 }, { "epoch": 0.10360322170411192, "grad_norm": 1.1486718723623022, "learning_rate": 1.9717251793419097e-05, "loss": 1.0359, "step": 611 }, { "epoch": 0.10377278507842307, "grad_norm": 1.125662041139612, "learning_rate": 1.97159535160322e-05, "loss": 1.0515, "step": 612 }, { "epoch": 0.10394234845273422, "grad_norm": 1.1430135256236766, "learning_rate": 1.971465230779859e-05, "loss": 1.1003, "step": 613 }, { "epoch": 0.10411191182704536, "grad_norm": 1.1642877319851186, "learning_rate": 1.9713348169110776e-05, "loss": 1.0929, "step": 614 }, { "epoch": 0.10428147520135651, "grad_norm": 1.1068291985391228, "learning_rate": 1.971204110036216e-05, "loss": 1.0841, "step": 615 }, { "epoch": 0.10445103857566766, "grad_norm": 1.1578356259788904, "learning_rate": 1.971073110194702e-05, "loss": 1.0424, "step": 616 }, { "epoch": 0.10462060194997881, "grad_norm": 1.11993680561627, "learning_rate": 1.9709418174260523e-05, "loss": 1.0873, "step": 617 }, { "epoch": 0.10479016532428995, "grad_norm": 1.1008254657288135, "learning_rate": 1.970810231769871e-05, "loss": 1.0719, "step": 618 }, { "epoch": 0.1049597286986011, "grad_norm": 1.0926669021841884, "learning_rate": 1.9706783532658528e-05, "loss": 1.07, "step": 619 }, { "epoch": 0.10512929207291224, "grad_norm": 1.1041001530651953, "learning_rate": 1.9705461819537776e-05, "loss": 1.0338, "step": 620 }, { "epoch": 0.1052988554472234, "grad_norm": 1.101936914258299, "learning_rate": 1.9704137178735164e-05, "loss": 1.0456, "step": 621 }, { "epoch": 0.10546841882153454, "grad_norm": 1.120099123347527, "learning_rate": 1.9702809610650272e-05, "loss": 1.0437, "step": 622 }, { "epoch": 0.10563798219584569, "grad_norm": 1.1192875521384187, "learning_rate": 1.9701479115683562e-05, "loss": 1.1211, "step": 623 }, { "epoch": 0.10580754557015684, "grad_norm": 1.0904903343796983, "learning_rate": 1.9700145694236384e-05, "loss": 1.0815, "step": 624 }, { "epoch": 0.10597710894446799, "grad_norm": 1.086279720601418, "learning_rate": 1.9698809346710965e-05, "loss": 1.0601, "step": 625 }, { "epoch": 0.10614667231877914, "grad_norm": 0.6752048722044736, "learning_rate": 1.9697470073510425e-05, "loss": 0.8137, "step": 626 }, { "epoch": 0.10631623569309029, "grad_norm": 1.1434325266029355, "learning_rate": 1.9696127875038753e-05, "loss": 1.0607, "step": 627 }, { "epoch": 0.10648579906740144, "grad_norm": 1.2106579149348138, "learning_rate": 1.969478275170083e-05, "loss": 1.0624, "step": 628 }, { "epoch": 0.10665536244171259, "grad_norm": 1.0834147893709303, "learning_rate": 1.9693434703902417e-05, "loss": 1.0591, "step": 629 }, { "epoch": 0.10682492581602374, "grad_norm": 1.1007392254472879, "learning_rate": 1.9692083732050157e-05, "loss": 1.0403, "step": 630 }, { "epoch": 0.10699448919033488, "grad_norm": 1.079640446936186, "learning_rate": 1.9690729836551576e-05, "loss": 1.0558, "step": 631 }, { "epoch": 0.10716405256464603, "grad_norm": 1.1995413113343978, "learning_rate": 1.9689373017815076e-05, "loss": 1.1018, "step": 632 }, { "epoch": 0.10733361593895718, "grad_norm": 1.1201878440803128, "learning_rate": 1.9688013276249947e-05, "loss": 1.0226, "step": 633 }, { "epoch": 0.10750317931326833, "grad_norm": 1.1020687785893686, "learning_rate": 1.9686650612266364e-05, "loss": 1.0367, "step": 634 }, { "epoch": 0.10767274268757948, "grad_norm": 0.6677923041852281, "learning_rate": 1.968528502627537e-05, "loss": 0.8784, "step": 635 }, { "epoch": 0.10784230606189063, "grad_norm": 1.146816649061468, "learning_rate": 1.968391651868891e-05, "loss": 1.1004, "step": 636 }, { "epoch": 0.10801186943620178, "grad_norm": 1.159486212227261, "learning_rate": 1.9682545089919784e-05, "loss": 1.0884, "step": 637 }, { "epoch": 0.10818143281051293, "grad_norm": 1.0978985243065382, "learning_rate": 1.9681170740381703e-05, "loss": 1.0587, "step": 638 }, { "epoch": 0.10835099618482408, "grad_norm": 1.054387258641634, "learning_rate": 1.967979347048923e-05, "loss": 1.0739, "step": 639 }, { "epoch": 0.10852055955913523, "grad_norm": 1.1656732373448082, "learning_rate": 1.967841328065783e-05, "loss": 1.0198, "step": 640 }, { "epoch": 0.10869012293344638, "grad_norm": 1.0760367036003744, "learning_rate": 1.9677030171303842e-05, "loss": 1.0478, "step": 641 }, { "epoch": 0.10885968630775752, "grad_norm": 1.1483066730433895, "learning_rate": 1.9675644142844482e-05, "loss": 1.0737, "step": 642 }, { "epoch": 0.10902924968206867, "grad_norm": 1.1296193475236855, "learning_rate": 1.9674255195697848e-05, "loss": 1.0454, "step": 643 }, { "epoch": 0.10919881305637982, "grad_norm": 1.1745512720919287, "learning_rate": 1.967286333028293e-05, "loss": 1.0621, "step": 644 }, { "epoch": 0.10936837643069097, "grad_norm": 1.106699964385633, "learning_rate": 1.9671468547019575e-05, "loss": 1.1019, "step": 645 }, { "epoch": 0.10953793980500212, "grad_norm": 1.1154322739929565, "learning_rate": 1.9670070846328532e-05, "loss": 1.0466, "step": 646 }, { "epoch": 0.10970750317931327, "grad_norm": 1.1161325733683853, "learning_rate": 1.9668670228631416e-05, "loss": 1.02, "step": 647 }, { "epoch": 0.10987706655362442, "grad_norm": 1.2100624469288324, "learning_rate": 1.9667266694350733e-05, "loss": 1.0725, "step": 648 }, { "epoch": 0.11004662992793557, "grad_norm": 1.0588563151186825, "learning_rate": 1.966586024390986e-05, "loss": 1.0941, "step": 649 }, { "epoch": 0.11021619330224672, "grad_norm": 1.0464532346569646, "learning_rate": 1.9664450877733065e-05, "loss": 1.0661, "step": 650 }, { "epoch": 0.11038575667655787, "grad_norm": 1.110846254232341, "learning_rate": 1.9663038596245477e-05, "loss": 1.0509, "step": 651 }, { "epoch": 0.11055532005086902, "grad_norm": 1.0478586310534652, "learning_rate": 1.966162339987312e-05, "loss": 1.0533, "step": 652 }, { "epoch": 0.11072488342518017, "grad_norm": 1.1198595469899033, "learning_rate": 1.9660205289042887e-05, "loss": 1.1076, "step": 653 }, { "epoch": 0.11089444679949131, "grad_norm": 1.03479090038442, "learning_rate": 1.9658784264182565e-05, "loss": 1.0756, "step": 654 }, { "epoch": 0.11106401017380246, "grad_norm": 1.0648956277315327, "learning_rate": 1.96573603257208e-05, "loss": 1.0706, "step": 655 }, { "epoch": 0.11123357354811361, "grad_norm": 1.0468737800536208, "learning_rate": 1.9655933474087135e-05, "loss": 1.0725, "step": 656 }, { "epoch": 0.11140313692242476, "grad_norm": 1.0472912629096596, "learning_rate": 1.9654503709711984e-05, "loss": 1.0887, "step": 657 }, { "epoch": 0.11157270029673591, "grad_norm": 1.154907312346853, "learning_rate": 1.9653071033026635e-05, "loss": 1.1054, "step": 658 }, { "epoch": 0.11174226367104706, "grad_norm": 1.0523325931961225, "learning_rate": 1.965163544446326e-05, "loss": 1.0467, "step": 659 }, { "epoch": 0.11191182704535821, "grad_norm": 1.134820358682994, "learning_rate": 1.9650196944454912e-05, "loss": 1.0879, "step": 660 }, { "epoch": 0.11208139041966936, "grad_norm": 1.0694754851200907, "learning_rate": 1.9648755533435517e-05, "loss": 1.068, "step": 661 }, { "epoch": 0.1122509537939805, "grad_norm": 1.0682206468984532, "learning_rate": 1.9647311211839878e-05, "loss": 1.0343, "step": 662 }, { "epoch": 0.11242051716829164, "grad_norm": 1.1420187495252647, "learning_rate": 1.9645863980103687e-05, "loss": 1.0538, "step": 663 }, { "epoch": 0.11259008054260279, "grad_norm": 0.671556464032117, "learning_rate": 1.96444138386635e-05, "loss": 0.8609, "step": 664 }, { "epoch": 0.11275964391691394, "grad_norm": 1.129692988580815, "learning_rate": 1.964296078795675e-05, "loss": 1.0803, "step": 665 }, { "epoch": 0.11292920729122509, "grad_norm": 1.0833946028329215, "learning_rate": 1.9641504828421772e-05, "loss": 1.0811, "step": 666 }, { "epoch": 0.11309877066553624, "grad_norm": 1.1053197486821598, "learning_rate": 1.9640045960497742e-05, "loss": 1.0701, "step": 667 }, { "epoch": 0.11326833403984739, "grad_norm": 1.0625837036341024, "learning_rate": 1.9638584184624744e-05, "loss": 1.0623, "step": 668 }, { "epoch": 0.11343789741415854, "grad_norm": 1.123356491188546, "learning_rate": 1.963711950124372e-05, "loss": 1.0898, "step": 669 }, { "epoch": 0.11360746078846969, "grad_norm": 1.019395746112488, "learning_rate": 1.9635651910796505e-05, "loss": 1.0126, "step": 670 }, { "epoch": 0.11377702416278083, "grad_norm": 1.104573382764544, "learning_rate": 1.963418141372579e-05, "loss": 1.0903, "step": 671 }, { "epoch": 0.11394658753709198, "grad_norm": 1.0517837405698718, "learning_rate": 1.9632708010475166e-05, "loss": 1.0681, "step": 672 }, { "epoch": 0.11411615091140313, "grad_norm": 1.1181651011054694, "learning_rate": 1.9631231701489083e-05, "loss": 1.0921, "step": 673 }, { "epoch": 0.11428571428571428, "grad_norm": 1.1365544432418553, "learning_rate": 1.9629752487212875e-05, "loss": 1.0869, "step": 674 }, { "epoch": 0.11445527766002543, "grad_norm": 1.1386681004720671, "learning_rate": 1.962827036809275e-05, "loss": 1.0506, "step": 675 }, { "epoch": 0.11462484103433658, "grad_norm": 0.6547819271104519, "learning_rate": 1.96267853445758e-05, "loss": 0.8654, "step": 676 }, { "epoch": 0.11479440440864773, "grad_norm": 1.1168296079216966, "learning_rate": 1.9625297417109982e-05, "loss": 1.0856, "step": 677 }, { "epoch": 0.11496396778295888, "grad_norm": 1.1578194105132944, "learning_rate": 1.9623806586144133e-05, "loss": 1.051, "step": 678 }, { "epoch": 0.11513353115727003, "grad_norm": 1.1104768251752244, "learning_rate": 1.962231285212797e-05, "loss": 1.08, "step": 679 }, { "epoch": 0.11530309453158118, "grad_norm": 1.122158328097251, "learning_rate": 1.962081621551208e-05, "loss": 1.0916, "step": 680 }, { "epoch": 0.11547265790589233, "grad_norm": 1.107177574310001, "learning_rate": 1.9619316676747928e-05, "loss": 1.0516, "step": 681 }, { "epoch": 0.11564222128020347, "grad_norm": 1.1810586326751367, "learning_rate": 1.9617814236287856e-05, "loss": 1.0952, "step": 682 }, { "epoch": 0.11581178465451462, "grad_norm": 1.1585617155722383, "learning_rate": 1.9616308894585078e-05, "loss": 1.0629, "step": 683 }, { "epoch": 0.11598134802882577, "grad_norm": 1.0441428322772266, "learning_rate": 1.9614800652093685e-05, "loss": 1.0452, "step": 684 }, { "epoch": 0.11615091140313692, "grad_norm": 1.1703165109066946, "learning_rate": 1.9613289509268647e-05, "loss": 1.0712, "step": 685 }, { "epoch": 0.11632047477744807, "grad_norm": 1.0760045428043057, "learning_rate": 1.9611775466565797e-05, "loss": 1.0481, "step": 686 }, { "epoch": 0.11649003815175922, "grad_norm": 1.0544541664556326, "learning_rate": 1.9610258524441855e-05, "loss": 1.0559, "step": 687 }, { "epoch": 0.11665960152607037, "grad_norm": 1.1774069881309783, "learning_rate": 1.9608738683354413e-05, "loss": 1.0504, "step": 688 }, { "epoch": 0.11682916490038152, "grad_norm": 1.1322872719387318, "learning_rate": 1.9607215943761933e-05, "loss": 1.0403, "step": 689 }, { "epoch": 0.11699872827469267, "grad_norm": 1.1226384251199562, "learning_rate": 1.9605690306123755e-05, "loss": 1.0594, "step": 690 }, { "epoch": 0.11716829164900382, "grad_norm": 1.2064638830304784, "learning_rate": 1.960416177090009e-05, "loss": 1.0815, "step": 691 }, { "epoch": 0.11733785502331497, "grad_norm": 1.1390336467265723, "learning_rate": 1.960263033855203e-05, "loss": 1.0897, "step": 692 }, { "epoch": 0.11750741839762611, "grad_norm": 1.0758109815913908, "learning_rate": 1.960109600954153e-05, "loss": 1.0271, "step": 693 }, { "epoch": 0.11767698177193726, "grad_norm": 1.0258838504650964, "learning_rate": 1.959955878433143e-05, "loss": 1.0671, "step": 694 }, { "epoch": 0.11784654514624841, "grad_norm": 1.0518237057593784, "learning_rate": 1.9598018663385437e-05, "loss": 1.0289, "step": 695 }, { "epoch": 0.11801610852055956, "grad_norm": 1.0357382821991248, "learning_rate": 1.9596475647168133e-05, "loss": 1.0139, "step": 696 }, { "epoch": 0.11818567189487071, "grad_norm": 1.0819302054485556, "learning_rate": 1.9594929736144978e-05, "loss": 1.0851, "step": 697 }, { "epoch": 0.11835523526918186, "grad_norm": 1.0772252705960152, "learning_rate": 1.959338093078229e-05, "loss": 1.0775, "step": 698 }, { "epoch": 0.11852479864349301, "grad_norm": 1.108405657406484, "learning_rate": 1.9591829231547278e-05, "loss": 1.018, "step": 699 }, { "epoch": 0.11869436201780416, "grad_norm": 1.0818438008956826, "learning_rate": 1.9590274638908018e-05, "loss": 1.0514, "step": 700 }, { "epoch": 0.11886392539211531, "grad_norm": 1.1066598853050535, "learning_rate": 1.9588717153333456e-05, "loss": 1.0678, "step": 701 }, { "epoch": 0.11903348876642646, "grad_norm": 0.67953328338625, "learning_rate": 1.9587156775293408e-05, "loss": 0.9174, "step": 702 }, { "epoch": 0.1192030521407376, "grad_norm": 1.1181498525515325, "learning_rate": 1.958559350525857e-05, "loss": 1.0284, "step": 703 }, { "epoch": 0.11937261551504876, "grad_norm": 1.040872193249211, "learning_rate": 1.958402734370051e-05, "loss": 0.9937, "step": 704 }, { "epoch": 0.1195421788893599, "grad_norm": 1.1189194083937848, "learning_rate": 1.9582458291091664e-05, "loss": 1.0689, "step": 705 }, { "epoch": 0.11971174226367105, "grad_norm": 1.11133344212292, "learning_rate": 1.9580886347905338e-05, "loss": 1.0595, "step": 706 }, { "epoch": 0.1198813056379822, "grad_norm": 1.1233961546509057, "learning_rate": 1.957931151461572e-05, "loss": 1.0562, "step": 707 }, { "epoch": 0.12005086901229335, "grad_norm": 1.1131976361459097, "learning_rate": 1.957773379169785e-05, "loss": 1.0864, "step": 708 }, { "epoch": 0.12022043238660449, "grad_norm": 1.0394011428209982, "learning_rate": 1.957615317962767e-05, "loss": 1.065, "step": 709 }, { "epoch": 0.12038999576091564, "grad_norm": 1.0447190779516728, "learning_rate": 1.9574569678881965e-05, "loss": 1.0048, "step": 710 }, { "epoch": 0.12055955913522678, "grad_norm": 1.0483807763582313, "learning_rate": 1.9572983289938406e-05, "loss": 1.0364, "step": 711 }, { "epoch": 0.12072912250953793, "grad_norm": 1.0515683963858184, "learning_rate": 1.9571394013275534e-05, "loss": 1.085, "step": 712 }, { "epoch": 0.12089868588384908, "grad_norm": 1.0619953761084722, "learning_rate": 1.9569801849372757e-05, "loss": 1.0674, "step": 713 }, { "epoch": 0.12106824925816023, "grad_norm": 1.0105711943887983, "learning_rate": 1.9568206798710354e-05, "loss": 1.0215, "step": 714 }, { "epoch": 0.12123781263247138, "grad_norm": 1.0981892715403259, "learning_rate": 1.956660886176948e-05, "loss": 1.0672, "step": 715 }, { "epoch": 0.12140737600678253, "grad_norm": 1.1217723541913862, "learning_rate": 1.9565008039032158e-05, "loss": 1.0582, "step": 716 }, { "epoch": 0.12157693938109368, "grad_norm": 1.1402128480032945, "learning_rate": 1.9563404330981276e-05, "loss": 1.0469, "step": 717 }, { "epoch": 0.12174650275540483, "grad_norm": 1.0498357097710411, "learning_rate": 1.9561797738100602e-05, "loss": 1.0508, "step": 718 }, { "epoch": 0.12191606612971598, "grad_norm": 1.0806630544206426, "learning_rate": 1.956018826087477e-05, "loss": 1.0161, "step": 719 }, { "epoch": 0.12208562950402713, "grad_norm": 1.0926475072234518, "learning_rate": 1.9558575899789284e-05, "loss": 1.03, "step": 720 }, { "epoch": 0.12225519287833828, "grad_norm": 1.0522616284189565, "learning_rate": 1.9556960655330512e-05, "loss": 1.0778, "step": 721 }, { "epoch": 0.12242475625264942, "grad_norm": 1.1103142898686582, "learning_rate": 1.9555342527985703e-05, "loss": 1.0836, "step": 722 }, { "epoch": 0.12259431962696057, "grad_norm": 1.0670638635131737, "learning_rate": 1.955372151824297e-05, "loss": 1.0628, "step": 723 }, { "epoch": 0.12276388300127172, "grad_norm": 1.0692612826387502, "learning_rate": 1.955209762659129e-05, "loss": 1.0743, "step": 724 }, { "epoch": 0.12293344637558287, "grad_norm": 1.1134326946024073, "learning_rate": 1.955047085352052e-05, "loss": 1.0914, "step": 725 }, { "epoch": 0.12310300974989402, "grad_norm": 1.066456579328852, "learning_rate": 1.954884119952138e-05, "loss": 1.0896, "step": 726 }, { "epoch": 0.12327257312420517, "grad_norm": 1.0516259676360957, "learning_rate": 1.954720866508546e-05, "loss": 1.0571, "step": 727 }, { "epoch": 0.12344213649851632, "grad_norm": 1.1091564482378367, "learning_rate": 1.9545573250705216e-05, "loss": 1.0591, "step": 728 }, { "epoch": 0.12361169987282747, "grad_norm": 1.0736827541771194, "learning_rate": 1.954393495687398e-05, "loss": 1.0592, "step": 729 }, { "epoch": 0.12378126324713862, "grad_norm": 1.0676995150976312, "learning_rate": 1.9542293784085943e-05, "loss": 1.0344, "step": 730 }, { "epoch": 0.12395082662144977, "grad_norm": 1.109094421697646, "learning_rate": 1.9540649732836177e-05, "loss": 1.036, "step": 731 }, { "epoch": 0.12412038999576092, "grad_norm": 1.0992163288953916, "learning_rate": 1.953900280362061e-05, "loss": 1.052, "step": 732 }, { "epoch": 0.12428995337007206, "grad_norm": 1.1143313658995975, "learning_rate": 1.9537352996936046e-05, "loss": 1.0596, "step": 733 }, { "epoch": 0.12445951674438321, "grad_norm": 1.1568450359004148, "learning_rate": 1.953570031328015e-05, "loss": 1.0661, "step": 734 }, { "epoch": 0.12462908011869436, "grad_norm": 1.0609061004604936, "learning_rate": 1.953404475315146e-05, "loss": 1.058, "step": 735 }, { "epoch": 0.12479864349300551, "grad_norm": 1.0871121305779712, "learning_rate": 1.9532386317049387e-05, "loss": 1.0575, "step": 736 }, { "epoch": 0.12496820686731666, "grad_norm": 1.1036971008330496, "learning_rate": 1.9530725005474195e-05, "loss": 1.0722, "step": 737 }, { "epoch": 0.1251377702416278, "grad_norm": 1.0546375145746436, "learning_rate": 1.9529060818927032e-05, "loss": 1.0666, "step": 738 }, { "epoch": 0.12530733361593896, "grad_norm": 1.155859502035293, "learning_rate": 1.9527393757909895e-05, "loss": 1.042, "step": 739 }, { "epoch": 0.1254768969902501, "grad_norm": 1.1256224150237286, "learning_rate": 1.9525723822925662e-05, "loss": 1.0458, "step": 740 }, { "epoch": 0.12564646036456126, "grad_norm": 1.1030769129505762, "learning_rate": 1.9524051014478078e-05, "loss": 1.0713, "step": 741 }, { "epoch": 0.1258160237388724, "grad_norm": 1.1048862735764426, "learning_rate": 1.952237533307175e-05, "loss": 1.029, "step": 742 }, { "epoch": 0.12598558711318356, "grad_norm": 1.1749086704818206, "learning_rate": 1.9520696779212144e-05, "loss": 1.0751, "step": 743 }, { "epoch": 0.1261551504874947, "grad_norm": 1.181040183652613, "learning_rate": 1.951901535340561e-05, "loss": 1.0259, "step": 744 }, { "epoch": 0.12632471386180585, "grad_norm": 1.0953267398854833, "learning_rate": 1.9517331056159353e-05, "loss": 1.0613, "step": 745 }, { "epoch": 0.126494277236117, "grad_norm": 1.1008872632118907, "learning_rate": 1.9515643887981445e-05, "loss": 1.0352, "step": 746 }, { "epoch": 0.12666384061042815, "grad_norm": 1.0745130679535873, "learning_rate": 1.9513953849380826e-05, "loss": 1.0484, "step": 747 }, { "epoch": 0.1268334039847393, "grad_norm": 1.057377233277173, "learning_rate": 1.9512260940867298e-05, "loss": 1.0782, "step": 748 }, { "epoch": 0.12700296735905045, "grad_norm": 1.0371938329011086, "learning_rate": 1.9510565162951538e-05, "loss": 1.0262, "step": 749 }, { "epoch": 0.1271725307333616, "grad_norm": 1.1109505303344525, "learning_rate": 1.950886651614508e-05, "loss": 1.0357, "step": 750 }, { "epoch": 0.12734209410767275, "grad_norm": 1.0608914607950886, "learning_rate": 1.950716500096032e-05, "loss": 1.0517, "step": 751 }, { "epoch": 0.1275116574819839, "grad_norm": 1.185057809555846, "learning_rate": 1.9505460617910537e-05, "loss": 1.0526, "step": 752 }, { "epoch": 0.12768122085629505, "grad_norm": 1.1714793370562924, "learning_rate": 1.9503753367509855e-05, "loss": 1.1017, "step": 753 }, { "epoch": 0.1278507842306062, "grad_norm": 1.0712385935965227, "learning_rate": 1.9502043250273274e-05, "loss": 1.0395, "step": 754 }, { "epoch": 0.12802034760491734, "grad_norm": 1.1018897785658721, "learning_rate": 1.950033026671665e-05, "loss": 1.0623, "step": 755 }, { "epoch": 0.1281899109792285, "grad_norm": 1.0541590259684965, "learning_rate": 1.949861441735672e-05, "loss": 1.0639, "step": 756 }, { "epoch": 0.12835947435353964, "grad_norm": 1.103876190414786, "learning_rate": 1.949689570271107e-05, "loss": 1.053, "step": 757 }, { "epoch": 0.1285290377278508, "grad_norm": 1.149601209818205, "learning_rate": 1.9495174123298156e-05, "loss": 1.074, "step": 758 }, { "epoch": 0.12869860110216194, "grad_norm": 1.1288929168116069, "learning_rate": 1.949344967963729e-05, "loss": 1.0311, "step": 759 }, { "epoch": 0.1288681644764731, "grad_norm": 1.1542075140935122, "learning_rate": 1.949172237224867e-05, "loss": 1.0535, "step": 760 }, { "epoch": 0.12903772785078424, "grad_norm": 1.1788709496912684, "learning_rate": 1.9489992201653337e-05, "loss": 1.0459, "step": 761 }, { "epoch": 0.1292072912250954, "grad_norm": 1.183206255652252, "learning_rate": 1.9488259168373198e-05, "loss": 1.0918, "step": 762 }, { "epoch": 0.12937685459940654, "grad_norm": 1.0696582001300654, "learning_rate": 1.948652327293103e-05, "loss": 1.0519, "step": 763 }, { "epoch": 0.1295464179737177, "grad_norm": 1.156945304413662, "learning_rate": 1.9484784515850474e-05, "loss": 1.0493, "step": 764 }, { "epoch": 0.12971598134802884, "grad_norm": 1.2245080523618486, "learning_rate": 1.9483042897656032e-05, "loss": 1.0585, "step": 765 }, { "epoch": 0.12988554472233999, "grad_norm": 1.057012084398467, "learning_rate": 1.9481298418873063e-05, "loss": 1.0238, "step": 766 }, { "epoch": 0.13005510809665113, "grad_norm": 1.050657871575477, "learning_rate": 1.94795510800278e-05, "loss": 1.0341, "step": 767 }, { "epoch": 0.13022467147096228, "grad_norm": 1.047017410590055, "learning_rate": 1.9477800881647327e-05, "loss": 1.0539, "step": 768 }, { "epoch": 0.13039423484527343, "grad_norm": 1.065130739659555, "learning_rate": 1.9476047824259602e-05, "loss": 1.009, "step": 769 }, { "epoch": 0.13056379821958458, "grad_norm": 1.0906992629658234, "learning_rate": 1.9474291908393437e-05, "loss": 1.0465, "step": 770 }, { "epoch": 0.13073336159389573, "grad_norm": 1.0428024448618274, "learning_rate": 1.947253313457851e-05, "loss": 1.0535, "step": 771 }, { "epoch": 0.13090292496820688, "grad_norm": 1.0792668868718736, "learning_rate": 1.947077150334536e-05, "loss": 1.0771, "step": 772 }, { "epoch": 0.13107248834251803, "grad_norm": 1.0482438377755623, "learning_rate": 1.946900701522539e-05, "loss": 1.0274, "step": 773 }, { "epoch": 0.13124205171682918, "grad_norm": 1.0694689387805867, "learning_rate": 1.946723967075086e-05, "loss": 1.0347, "step": 774 }, { "epoch": 0.13141161509114033, "grad_norm": 0.708629489669686, "learning_rate": 1.94654694704549e-05, "loss": 0.9248, "step": 775 }, { "epoch": 0.13158117846545148, "grad_norm": 1.1345356573802166, "learning_rate": 1.9463696414871493e-05, "loss": 1.0578, "step": 776 }, { "epoch": 0.13175074183976263, "grad_norm": 1.1449463455835753, "learning_rate": 1.946192050453549e-05, "loss": 1.0535, "step": 777 }, { "epoch": 0.13192030521407375, "grad_norm": 1.164004341203831, "learning_rate": 1.946014173998259e-05, "loss": 1.1073, "step": 778 }, { "epoch": 0.1320898685883849, "grad_norm": 1.0227968562490022, "learning_rate": 1.9458360121749372e-05, "loss": 1.0577, "step": 779 }, { "epoch": 0.13225943196269604, "grad_norm": 1.066166458355352, "learning_rate": 1.9456575650373267e-05, "loss": 1.0674, "step": 780 }, { "epoch": 0.1324289953370072, "grad_norm": 1.099183425357359, "learning_rate": 1.9454788326392558e-05, "loss": 1.0927, "step": 781 }, { "epoch": 0.13259855871131834, "grad_norm": 1.1327547948403307, "learning_rate": 1.9452998150346403e-05, "loss": 0.9931, "step": 782 }, { "epoch": 0.1327681220856295, "grad_norm": 1.0818730994846633, "learning_rate": 1.9451205122774815e-05, "loss": 1.0927, "step": 783 }, { "epoch": 0.13293768545994064, "grad_norm": 1.1058974106504893, "learning_rate": 1.9449409244218662e-05, "loss": 1.1077, "step": 784 }, { "epoch": 0.1331072488342518, "grad_norm": 1.0556971309698218, "learning_rate": 1.944761051521968e-05, "loss": 1.0346, "step": 785 }, { "epoch": 0.13327681220856294, "grad_norm": 1.1563290924721255, "learning_rate": 1.9445808936320457e-05, "loss": 1.1217, "step": 786 }, { "epoch": 0.1334463755828741, "grad_norm": 1.0528888161777412, "learning_rate": 1.9444004508064446e-05, "loss": 1.0575, "step": 787 }, { "epoch": 0.13361593895718524, "grad_norm": 0.9965187860897641, "learning_rate": 1.944219723099596e-05, "loss": 1.012, "step": 788 }, { "epoch": 0.1337855023314964, "grad_norm": 1.0152070539684257, "learning_rate": 1.944038710566017e-05, "loss": 1.0396, "step": 789 }, { "epoch": 0.13395506570580754, "grad_norm": 1.0311329132119162, "learning_rate": 1.9438574132603106e-05, "loss": 1.0219, "step": 790 }, { "epoch": 0.13412462908011868, "grad_norm": 1.0310376029257697, "learning_rate": 1.943675831237165e-05, "loss": 1.0241, "step": 791 }, { "epoch": 0.13429419245442983, "grad_norm": 1.02721440556268, "learning_rate": 1.9434939645513556e-05, "loss": 1.0143, "step": 792 }, { "epoch": 0.13446375582874098, "grad_norm": 1.0330958900978466, "learning_rate": 1.9433118132577432e-05, "loss": 1.0595, "step": 793 }, { "epoch": 0.13463331920305213, "grad_norm": 1.0765605631263437, "learning_rate": 1.9431293774112737e-05, "loss": 1.0576, "step": 794 }, { "epoch": 0.13480288257736328, "grad_norm": 1.0281136330649452, "learning_rate": 1.94294665706698e-05, "loss": 1.0162, "step": 795 }, { "epoch": 0.13497244595167443, "grad_norm": 1.0726772948352115, "learning_rate": 1.94276365227998e-05, "loss": 1.0614, "step": 796 }, { "epoch": 0.13514200932598558, "grad_norm": 1.0814789889013592, "learning_rate": 1.9425803631054773e-05, "loss": 1.0749, "step": 797 }, { "epoch": 0.13531157270029673, "grad_norm": 1.1647803406374613, "learning_rate": 1.9423967895987625e-05, "loss": 1.0062, "step": 798 }, { "epoch": 0.13548113607460788, "grad_norm": 1.078971477580237, "learning_rate": 1.94221293181521e-05, "loss": 1.038, "step": 799 }, { "epoch": 0.13565069944891903, "grad_norm": 1.0528174425184071, "learning_rate": 1.942028789810282e-05, "loss": 1.068, "step": 800 }, { "epoch": 0.13582026282323018, "grad_norm": 1.1118827397760795, "learning_rate": 1.941844363639525e-05, "loss": 1.0472, "step": 801 }, { "epoch": 0.13598982619754132, "grad_norm": 1.0636213590805914, "learning_rate": 1.9416596533585717e-05, "loss": 1.0542, "step": 802 }, { "epoch": 0.13615938957185247, "grad_norm": 1.1154868381971752, "learning_rate": 1.9414746590231407e-05, "loss": 1.074, "step": 803 }, { "epoch": 0.13632895294616362, "grad_norm": 1.0648594969353748, "learning_rate": 1.9412893806890358e-05, "loss": 1.0796, "step": 804 }, { "epoch": 0.13649851632047477, "grad_norm": 1.004976249971096, "learning_rate": 1.941103818412147e-05, "loss": 1.0544, "step": 805 }, { "epoch": 0.13666807969478592, "grad_norm": 1.0367836077040782, "learning_rate": 1.94091797224845e-05, "loss": 1.0323, "step": 806 }, { "epoch": 0.13683764306909707, "grad_norm": 1.0262546052925723, "learning_rate": 1.9407318422540057e-05, "loss": 0.9813, "step": 807 }, { "epoch": 0.13700720644340822, "grad_norm": 1.0475035535802888, "learning_rate": 1.9405454284849604e-05, "loss": 1.0546, "step": 808 }, { "epoch": 0.13717676981771937, "grad_norm": 1.0903173604692298, "learning_rate": 1.9403587309975467e-05, "loss": 1.0455, "step": 809 }, { "epoch": 0.13734633319203052, "grad_norm": 1.0426227028523483, "learning_rate": 1.9401717498480825e-05, "loss": 1.0694, "step": 810 }, { "epoch": 0.13751589656634167, "grad_norm": 1.0346758417438304, "learning_rate": 1.939984485092971e-05, "loss": 1.0504, "step": 811 }, { "epoch": 0.13768545994065282, "grad_norm": 1.1103498381770676, "learning_rate": 1.9397969367887014e-05, "loss": 1.0392, "step": 812 }, { "epoch": 0.13785502331496396, "grad_norm": 1.0873796773509765, "learning_rate": 1.9396091049918478e-05, "loss": 1.0285, "step": 813 }, { "epoch": 0.1380245866892751, "grad_norm": 1.0733102934679524, "learning_rate": 1.9394209897590707e-05, "loss": 1.0791, "step": 814 }, { "epoch": 0.13819415006358626, "grad_norm": 1.124458655592655, "learning_rate": 1.9392325911471154e-05, "loss": 1.0732, "step": 815 }, { "epoch": 0.1383637134378974, "grad_norm": 1.1732003630116532, "learning_rate": 1.939043909212813e-05, "loss": 1.0881, "step": 816 }, { "epoch": 0.13853327681220856, "grad_norm": 1.1864935574802329, "learning_rate": 1.93885494401308e-05, "loss": 1.0796, "step": 817 }, { "epoch": 0.1387028401865197, "grad_norm": 1.08967866372316, "learning_rate": 1.9386656956049182e-05, "loss": 1.0818, "step": 818 }, { "epoch": 0.13887240356083086, "grad_norm": 1.1246588499786248, "learning_rate": 1.9384761640454152e-05, "loss": 1.0862, "step": 819 }, { "epoch": 0.139041966935142, "grad_norm": 1.084218258164674, "learning_rate": 1.9382863493917433e-05, "loss": 1.0448, "step": 820 }, { "epoch": 0.13921153030945316, "grad_norm": 1.104604455415271, "learning_rate": 1.9380962517011614e-05, "loss": 1.0687, "step": 821 }, { "epoch": 0.1393810936837643, "grad_norm": 1.028278427220313, "learning_rate": 1.9379058710310124e-05, "loss": 1.016, "step": 822 }, { "epoch": 0.13955065705807546, "grad_norm": 1.077884981811719, "learning_rate": 1.9377152074387253e-05, "loss": 1.0515, "step": 823 }, { "epoch": 0.1397202204323866, "grad_norm": 1.185172547417553, "learning_rate": 1.9375242609818144e-05, "loss": 1.0673, "step": 824 }, { "epoch": 0.13988978380669775, "grad_norm": 1.082380869634408, "learning_rate": 1.9373330317178797e-05, "loss": 1.0594, "step": 825 }, { "epoch": 0.1400593471810089, "grad_norm": 1.1200443818381345, "learning_rate": 1.9371415197046054e-05, "loss": 1.0488, "step": 826 }, { "epoch": 0.14022891055532005, "grad_norm": 1.1395052604783387, "learning_rate": 1.936949724999762e-05, "loss": 1.0788, "step": 827 }, { "epoch": 0.1403984739296312, "grad_norm": 1.1674257978885036, "learning_rate": 1.9367576476612048e-05, "loss": 1.0628, "step": 828 }, { "epoch": 0.14056803730394235, "grad_norm": 1.0619372371369435, "learning_rate": 1.9365652877468747e-05, "loss": 1.0531, "step": 829 }, { "epoch": 0.1407376006782535, "grad_norm": 1.0434936336092933, "learning_rate": 1.936372645314798e-05, "loss": 1.0164, "step": 830 }, { "epoch": 0.14090716405256465, "grad_norm": 1.1004725737648793, "learning_rate": 1.936179720423085e-05, "loss": 1.0851, "step": 831 }, { "epoch": 0.1410767274268758, "grad_norm": 1.0589604061310893, "learning_rate": 1.9359865131299328e-05, "loss": 1.0321, "step": 832 }, { "epoch": 0.14124629080118695, "grad_norm": 1.0475759575945085, "learning_rate": 1.9357930234936228e-05, "loss": 1.0517, "step": 833 }, { "epoch": 0.1414158541754981, "grad_norm": 1.045474612726252, "learning_rate": 1.9355992515725216e-05, "loss": 1.0448, "step": 834 }, { "epoch": 0.14158541754980924, "grad_norm": 0.7745132914901004, "learning_rate": 1.935405197425081e-05, "loss": 0.8728, "step": 835 }, { "epoch": 0.1417549809241204, "grad_norm": 1.093659455283772, "learning_rate": 1.935210861109838e-05, "loss": 1.0387, "step": 836 }, { "epoch": 0.14192454429843154, "grad_norm": 1.1527962599465735, "learning_rate": 1.9350162426854152e-05, "loss": 1.0857, "step": 837 }, { "epoch": 0.1420941076727427, "grad_norm": 1.0862456354506587, "learning_rate": 1.9348213422105192e-05, "loss": 1.0166, "step": 838 }, { "epoch": 0.14226367104705384, "grad_norm": 1.128346863922026, "learning_rate": 1.9346261597439427e-05, "loss": 1.0729, "step": 839 }, { "epoch": 0.142433234421365, "grad_norm": 1.1093450255237425, "learning_rate": 1.9344306953445632e-05, "loss": 1.0602, "step": 840 }, { "epoch": 0.14260279779567614, "grad_norm": 0.7005186504674276, "learning_rate": 1.9342349490713427e-05, "loss": 0.8628, "step": 841 }, { "epoch": 0.1427723611699873, "grad_norm": 1.048667347671691, "learning_rate": 1.934038920983329e-05, "loss": 1.0228, "step": 842 }, { "epoch": 0.14294192454429844, "grad_norm": 1.1304776145369564, "learning_rate": 1.9338426111396548e-05, "loss": 1.0898, "step": 843 }, { "epoch": 0.1431114879186096, "grad_norm": 1.1004750119851474, "learning_rate": 1.9336460195995368e-05, "loss": 1.0672, "step": 844 }, { "epoch": 0.14328105129292074, "grad_norm": 1.1168790606557641, "learning_rate": 1.933449146422278e-05, "loss": 1.0568, "step": 845 }, { "epoch": 0.14345061466723188, "grad_norm": 1.0614059811728171, "learning_rate": 1.9332519916672656e-05, "loss": 0.9996, "step": 846 }, { "epoch": 0.14362017804154303, "grad_norm": 1.0596385562077324, "learning_rate": 1.933054555393972e-05, "loss": 1.0376, "step": 847 }, { "epoch": 0.14378974141585418, "grad_norm": 1.0533999496487665, "learning_rate": 1.932856837661954e-05, "loss": 1.0727, "step": 848 }, { "epoch": 0.14395930479016533, "grad_norm": 1.0231359277525693, "learning_rate": 1.932658838530855e-05, "loss": 1.0531, "step": 849 }, { "epoch": 0.14412886816447648, "grad_norm": 1.0567186211411237, "learning_rate": 1.9324605580604007e-05, "loss": 1.0606, "step": 850 }, { "epoch": 0.14429843153878763, "grad_norm": 1.0960097103405915, "learning_rate": 1.9322619963104036e-05, "loss": 1.0726, "step": 851 }, { "epoch": 0.14446799491309878, "grad_norm": 0.9766166614205893, "learning_rate": 1.93206315334076e-05, "loss": 1.0158, "step": 852 }, { "epoch": 0.14463755828740993, "grad_norm": 1.0086404438131777, "learning_rate": 1.9318640292114526e-05, "loss": 1.0015, "step": 853 }, { "epoch": 0.14480712166172108, "grad_norm": 1.074452694617385, "learning_rate": 1.9316646239825466e-05, "loss": 1.0602, "step": 854 }, { "epoch": 0.14497668503603223, "grad_norm": 1.0618087044378768, "learning_rate": 1.9314649377141935e-05, "loss": 1.0495, "step": 855 }, { "epoch": 0.14514624841034338, "grad_norm": 0.7219827404997976, "learning_rate": 1.9312649704666295e-05, "loss": 0.8791, "step": 856 }, { "epoch": 0.14531581178465452, "grad_norm": 1.1230848382726437, "learning_rate": 1.9310647223001752e-05, "loss": 1.053, "step": 857 }, { "epoch": 0.14548537515896567, "grad_norm": 1.1494633063944097, "learning_rate": 1.9308641932752362e-05, "loss": 1.0129, "step": 858 }, { "epoch": 0.14565493853327682, "grad_norm": 1.0218116847276328, "learning_rate": 1.9306633834523022e-05, "loss": 1.0269, "step": 859 }, { "epoch": 0.14582450190758797, "grad_norm": 1.0961351371659203, "learning_rate": 1.9304622928919486e-05, "loss": 1.0127, "step": 860 }, { "epoch": 0.14599406528189912, "grad_norm": 1.141579446852389, "learning_rate": 1.9302609216548352e-05, "loss": 1.0285, "step": 861 }, { "epoch": 0.14616362865621027, "grad_norm": 1.0793211648155234, "learning_rate": 1.9300592698017054e-05, "loss": 1.0495, "step": 862 }, { "epoch": 0.14633319203052142, "grad_norm": 1.1421417009184929, "learning_rate": 1.929857337393389e-05, "loss": 1.0903, "step": 863 }, { "epoch": 0.14650275540483257, "grad_norm": 1.110017135756902, "learning_rate": 1.9296551244907986e-05, "loss": 1.0898, "step": 864 }, { "epoch": 0.14667231877914372, "grad_norm": 1.0600519730432707, "learning_rate": 1.929452631154933e-05, "loss": 1.0365, "step": 865 }, { "epoch": 0.14684188215345487, "grad_norm": 1.1028595800699648, "learning_rate": 1.929249857446875e-05, "loss": 1.0861, "step": 866 }, { "epoch": 0.14701144552776602, "grad_norm": 1.0574596805822303, "learning_rate": 1.9290468034277912e-05, "loss": 1.0133, "step": 867 }, { "epoch": 0.14718100890207717, "grad_norm": 1.054189464636296, "learning_rate": 1.9288434691589343e-05, "loss": 1.0333, "step": 868 }, { "epoch": 0.1473505722763883, "grad_norm": 1.0141671044330676, "learning_rate": 1.9286398547016398e-05, "loss": 1.0779, "step": 869 }, { "epoch": 0.14752013565069944, "grad_norm": 1.064214895313415, "learning_rate": 1.9284359601173295e-05, "loss": 1.0211, "step": 870 }, { "epoch": 0.14768969902501058, "grad_norm": 0.7309553171238877, "learning_rate": 1.928231785467508e-05, "loss": 0.7908, "step": 871 }, { "epoch": 0.14785926239932173, "grad_norm": 1.09157235299895, "learning_rate": 1.9280273308137662e-05, "loss": 1.0596, "step": 872 }, { "epoch": 0.14802882577363288, "grad_norm": 1.0905388089422527, "learning_rate": 1.9278225962177776e-05, "loss": 1.079, "step": 873 }, { "epoch": 0.14819838914794403, "grad_norm": 1.0773539631614544, "learning_rate": 1.9276175817413013e-05, "loss": 1.0536, "step": 874 }, { "epoch": 0.14836795252225518, "grad_norm": 1.0712206356208391, "learning_rate": 1.9274122874461808e-05, "loss": 1.0556, "step": 875 }, { "epoch": 0.14853751589656633, "grad_norm": 1.0417419947535282, "learning_rate": 1.9272067133943432e-05, "loss": 1.0004, "step": 876 }, { "epoch": 0.14870707927087748, "grad_norm": 1.0486476963464526, "learning_rate": 1.9270008596478008e-05, "loss": 1.0165, "step": 877 }, { "epoch": 0.14887664264518863, "grad_norm": 0.6863337643252526, "learning_rate": 1.92679472626865e-05, "loss": 0.8939, "step": 878 }, { "epoch": 0.14904620601949978, "grad_norm": 1.0583938505102617, "learning_rate": 1.9265883133190715e-05, "loss": 1.0526, "step": 879 }, { "epoch": 0.14921576939381093, "grad_norm": 1.1128101186298782, "learning_rate": 1.9263816208613306e-05, "loss": 1.0686, "step": 880 }, { "epoch": 0.14938533276812208, "grad_norm": 1.0404999748912096, "learning_rate": 1.9261746489577767e-05, "loss": 1.0311, "step": 881 }, { "epoch": 0.14955489614243322, "grad_norm": 1.125837888018767, "learning_rate": 1.925967397670843e-05, "loss": 1.095, "step": 882 }, { "epoch": 0.14972445951674437, "grad_norm": 1.0011525930771923, "learning_rate": 1.9257598670630484e-05, "loss": 1.038, "step": 883 }, { "epoch": 0.14989402289105552, "grad_norm": 1.0791155860988666, "learning_rate": 1.925552057196994e-05, "loss": 1.0327, "step": 884 }, { "epoch": 0.15006358626536667, "grad_norm": 1.0920643778551258, "learning_rate": 1.9253439681353673e-05, "loss": 1.0643, "step": 885 }, { "epoch": 0.15023314963967782, "grad_norm": 1.0842220737705153, "learning_rate": 1.925135599940938e-05, "loss": 1.0686, "step": 886 }, { "epoch": 0.15040271301398897, "grad_norm": 1.0443544799260835, "learning_rate": 1.924926952676562e-05, "loss": 1.1122, "step": 887 }, { "epoch": 0.15057227638830012, "grad_norm": 1.067319759526223, "learning_rate": 1.9247180264051777e-05, "loss": 1.0375, "step": 888 }, { "epoch": 0.15074183976261127, "grad_norm": 0.6462489530664455, "learning_rate": 1.9245088211898086e-05, "loss": 0.8483, "step": 889 }, { "epoch": 0.15091140313692242, "grad_norm": 1.1231491563255163, "learning_rate": 1.9242993370935622e-05, "loss": 1.0804, "step": 890 }, { "epoch": 0.15108096651123357, "grad_norm": 1.0525430337391257, "learning_rate": 1.9240895741796297e-05, "loss": 1.0425, "step": 891 }, { "epoch": 0.15125052988554472, "grad_norm": 1.0477757204520055, "learning_rate": 1.9238795325112867e-05, "loss": 1.0448, "step": 892 }, { "epoch": 0.15142009325985586, "grad_norm": 1.0916191614000863, "learning_rate": 1.9236692121518934e-05, "loss": 1.0289, "step": 893 }, { "epoch": 0.151589656634167, "grad_norm": 1.1102801951240548, "learning_rate": 1.9234586131648933e-05, "loss": 1.0631, "step": 894 }, { "epoch": 0.15175922000847816, "grad_norm": 1.1220606572188465, "learning_rate": 1.923247735613814e-05, "loss": 1.0573, "step": 895 }, { "epoch": 0.1519287833827893, "grad_norm": 0.9998438621890287, "learning_rate": 1.9230365795622675e-05, "loss": 1.0113, "step": 896 }, { "epoch": 0.15209834675710046, "grad_norm": 1.0752354115172118, "learning_rate": 1.9228251450739495e-05, "loss": 1.0174, "step": 897 }, { "epoch": 0.1522679101314116, "grad_norm": 1.1136183818569536, "learning_rate": 1.92261343221264e-05, "loss": 1.0307, "step": 898 }, { "epoch": 0.15243747350572276, "grad_norm": 1.041094664900339, "learning_rate": 1.922401441042203e-05, "loss": 1.0793, "step": 899 }, { "epoch": 0.1526070368800339, "grad_norm": 1.0273988853337395, "learning_rate": 1.9221891716265865e-05, "loss": 1.0478, "step": 900 }, { "epoch": 0.15277660025434506, "grad_norm": 1.0150885591380276, "learning_rate": 1.921976624029821e-05, "loss": 1.0542, "step": 901 }, { "epoch": 0.1529461636286562, "grad_norm": 1.1072883536606641, "learning_rate": 1.9217637983160234e-05, "loss": 1.0541, "step": 902 }, { "epoch": 0.15311572700296736, "grad_norm": 1.1024232287688263, "learning_rate": 1.9215506945493933e-05, "loss": 1.0279, "step": 903 }, { "epoch": 0.1532852903772785, "grad_norm": 1.1046565029368205, "learning_rate": 1.921337312794213e-05, "loss": 1.0603, "step": 904 }, { "epoch": 0.15345485375158965, "grad_norm": 1.1020144538617223, "learning_rate": 1.92112365311485e-05, "loss": 1.0668, "step": 905 }, { "epoch": 0.1536244171259008, "grad_norm": 0.6306196845086575, "learning_rate": 1.9209097155757562e-05, "loss": 0.8127, "step": 906 }, { "epoch": 0.15379398050021195, "grad_norm": 1.1222438078603456, "learning_rate": 1.9206955002414662e-05, "loss": 1.0866, "step": 907 }, { "epoch": 0.1539635438745231, "grad_norm": 1.080412892285181, "learning_rate": 1.920481007176598e-05, "loss": 1.0516, "step": 908 }, { "epoch": 0.15413310724883425, "grad_norm": 1.1080426198720974, "learning_rate": 1.920266236445855e-05, "loss": 1.0851, "step": 909 }, { "epoch": 0.1543026706231454, "grad_norm": 1.0715007809411334, "learning_rate": 1.920051188114023e-05, "loss": 1.0426, "step": 910 }, { "epoch": 0.15447223399745655, "grad_norm": 1.0474262655109656, "learning_rate": 1.919835862245972e-05, "loss": 1.0341, "step": 911 }, { "epoch": 0.1546417973717677, "grad_norm": 1.0756568049177015, "learning_rate": 1.9196202589066556e-05, "loss": 1.0475, "step": 912 }, { "epoch": 0.15481136074607885, "grad_norm": 1.043877036715147, "learning_rate": 1.919404378161111e-05, "loss": 1.034, "step": 913 }, { "epoch": 0.15498092412039, "grad_norm": 1.0545499129547016, "learning_rate": 1.9191882200744602e-05, "loss": 1.0603, "step": 914 }, { "epoch": 0.15515048749470114, "grad_norm": 1.1419323750225032, "learning_rate": 1.918971784711907e-05, "loss": 1.0524, "step": 915 }, { "epoch": 0.1553200508690123, "grad_norm": 1.0848607341272305, "learning_rate": 1.91875507213874e-05, "loss": 1.0129, "step": 916 }, { "epoch": 0.15548961424332344, "grad_norm": 1.0304906798587208, "learning_rate": 1.918538082420332e-05, "loss": 1.0605, "step": 917 }, { "epoch": 0.1556591776176346, "grad_norm": 1.0863766933224028, "learning_rate": 1.918320815622137e-05, "loss": 1.0481, "step": 918 }, { "epoch": 0.15582874099194574, "grad_norm": 1.0710321794041036, "learning_rate": 1.9181032718096957e-05, "loss": 1.0496, "step": 919 }, { "epoch": 0.1559983043662569, "grad_norm": 1.0297742342897147, "learning_rate": 1.9178854510486298e-05, "loss": 0.9914, "step": 920 }, { "epoch": 0.15616786774056804, "grad_norm": 1.0732974568090625, "learning_rate": 1.9176673534046465e-05, "loss": 1.013, "step": 921 }, { "epoch": 0.1563374311148792, "grad_norm": 1.088103948993506, "learning_rate": 1.9174489789435348e-05, "loss": 1.0226, "step": 922 }, { "epoch": 0.15650699448919034, "grad_norm": 1.1119656602506007, "learning_rate": 1.9172303277311686e-05, "loss": 1.0285, "step": 923 }, { "epoch": 0.1566765578635015, "grad_norm": 1.1030374236288156, "learning_rate": 1.917011399833504e-05, "loss": 1.0452, "step": 924 }, { "epoch": 0.15684612123781264, "grad_norm": 0.6482444069676975, "learning_rate": 1.9167921953165827e-05, "loss": 0.8249, "step": 925 }, { "epoch": 0.15701568461212378, "grad_norm": 1.1033478237135148, "learning_rate": 1.9165727142465266e-05, "loss": 1.0043, "step": 926 }, { "epoch": 0.15718524798643493, "grad_norm": 1.1299739623715552, "learning_rate": 1.916352956689544e-05, "loss": 1.027, "step": 927 }, { "epoch": 0.15735481136074608, "grad_norm": 1.0584926185750587, "learning_rate": 1.916132922711925e-05, "loss": 1.022, "step": 928 }, { "epoch": 0.15752437473505723, "grad_norm": 1.056992187046396, "learning_rate": 1.9159126123800437e-05, "loss": 1.0276, "step": 929 }, { "epoch": 0.15769393810936838, "grad_norm": 1.0411380281761882, "learning_rate": 1.915692025760357e-05, "loss": 1.0546, "step": 930 }, { "epoch": 0.15786350148367953, "grad_norm": 1.0682731745814429, "learning_rate": 1.9154711629194062e-05, "loss": 1.054, "step": 931 }, { "epoch": 0.15803306485799068, "grad_norm": 1.1561727435104403, "learning_rate": 1.9152500239238144e-05, "loss": 1.0455, "step": 932 }, { "epoch": 0.15820262823230183, "grad_norm": 1.1312827819686595, "learning_rate": 1.9150286088402898e-05, "loss": 1.104, "step": 933 }, { "epoch": 0.15837219160661298, "grad_norm": 1.0695744152908222, "learning_rate": 1.9148069177356223e-05, "loss": 1.0678, "step": 934 }, { "epoch": 0.15854175498092413, "grad_norm": 1.0471665509140067, "learning_rate": 1.9145849506766856e-05, "loss": 1.018, "step": 935 }, { "epoch": 0.15871131835523528, "grad_norm": 1.09813822242061, "learning_rate": 1.914362707730437e-05, "loss": 1.0761, "step": 936 }, { "epoch": 0.15888088172954642, "grad_norm": 1.0772744799025893, "learning_rate": 1.9141401889639167e-05, "loss": 1.0782, "step": 937 }, { "epoch": 0.15905044510385757, "grad_norm": 1.0742866315924127, "learning_rate": 1.9139173944442482e-05, "loss": 1.0427, "step": 938 }, { "epoch": 0.15922000847816872, "grad_norm": 1.1626263938467596, "learning_rate": 1.913694324238638e-05, "loss": 1.0464, "step": 939 }, { "epoch": 0.15938957185247987, "grad_norm": 0.6635534612233993, "learning_rate": 1.9134709784143763e-05, "loss": 0.8521, "step": 940 }, { "epoch": 0.15955913522679102, "grad_norm": 1.0890073638226203, "learning_rate": 1.9132473570388354e-05, "loss": 1.0568, "step": 941 }, { "epoch": 0.15972869860110217, "grad_norm": 1.0754807621640843, "learning_rate": 1.913023460179472e-05, "loss": 1.041, "step": 942 }, { "epoch": 0.15989826197541332, "grad_norm": 1.1413046230610473, "learning_rate": 1.9127992879038245e-05, "loss": 1.0915, "step": 943 }, { "epoch": 0.16006782534972447, "grad_norm": 1.0200204711647267, "learning_rate": 1.912574840279516e-05, "loss": 1.0221, "step": 944 }, { "epoch": 0.16023738872403562, "grad_norm": 1.0199121463513054, "learning_rate": 1.9123501173742514e-05, "loss": 1.0164, "step": 945 }, { "epoch": 0.16040695209834677, "grad_norm": 1.0552427764536851, "learning_rate": 1.912125119255819e-05, "loss": 1.0639, "step": 946 }, { "epoch": 0.16057651547265792, "grad_norm": 1.0924354596458121, "learning_rate": 1.91189984599209e-05, "loss": 1.0964, "step": 947 }, { "epoch": 0.16074607884696906, "grad_norm": 1.1082698011187466, "learning_rate": 1.9116742976510195e-05, "loss": 1.061, "step": 948 }, { "epoch": 0.16091564222128021, "grad_norm": 1.087075860841995, "learning_rate": 1.911448474300644e-05, "loss": 1.0875, "step": 949 }, { "epoch": 0.16108520559559136, "grad_norm": 1.1090689673687966, "learning_rate": 1.911222376009084e-05, "loss": 1.0446, "step": 950 }, { "epoch": 0.1612547689699025, "grad_norm": 1.116789238696624, "learning_rate": 1.910996002844543e-05, "loss": 1.0076, "step": 951 }, { "epoch": 0.16142433234421366, "grad_norm": 1.0527060430720205, "learning_rate": 1.910769354875307e-05, "loss": 1.0154, "step": 952 }, { "epoch": 0.1615938957185248, "grad_norm": 1.0856744510099625, "learning_rate": 1.910542432169745e-05, "loss": 1.0301, "step": 953 }, { "epoch": 0.16176345909283596, "grad_norm": 1.0418508889820652, "learning_rate": 1.910315234796309e-05, "loss": 1.0508, "step": 954 }, { "epoch": 0.1619330224671471, "grad_norm": 1.2147413236375053, "learning_rate": 1.9100877628235337e-05, "loss": 1.0464, "step": 955 }, { "epoch": 0.16210258584145826, "grad_norm": 1.033459857420962, "learning_rate": 1.9098600163200366e-05, "loss": 1.0407, "step": 956 }, { "epoch": 0.1622721492157694, "grad_norm": 1.1262256829727249, "learning_rate": 1.9096319953545186e-05, "loss": 1.0507, "step": 957 }, { "epoch": 0.16244171259008056, "grad_norm": 1.0898486768419438, "learning_rate": 1.9094036999957623e-05, "loss": 1.0211, "step": 958 }, { "epoch": 0.1626112759643917, "grad_norm": 1.1217666553293602, "learning_rate": 1.909175130312634e-05, "loss": 1.0563, "step": 959 }, { "epoch": 0.16278083933870285, "grad_norm": 1.021613343893737, "learning_rate": 1.9089462863740825e-05, "loss": 1.0407, "step": 960 }, { "epoch": 0.16295040271301398, "grad_norm": 1.0492735332740109, "learning_rate": 1.908717168249139e-05, "loss": 1.0045, "step": 961 }, { "epoch": 0.16311996608732512, "grad_norm": 1.0919994505596375, "learning_rate": 1.908487776006918e-05, "loss": 1.0238, "step": 962 }, { "epoch": 0.16328952946163627, "grad_norm": 1.0604412964637524, "learning_rate": 1.908258109716616e-05, "loss": 1.069, "step": 963 }, { "epoch": 0.16345909283594742, "grad_norm": 0.7881503841452441, "learning_rate": 1.908028169447513e-05, "loss": 0.8649, "step": 964 }, { "epoch": 0.16362865621025857, "grad_norm": 1.0749029440794484, "learning_rate": 1.9077979552689708e-05, "loss": 1.069, "step": 965 }, { "epoch": 0.16379821958456972, "grad_norm": 1.1538941115995935, "learning_rate": 1.907567467250434e-05, "loss": 1.0753, "step": 966 }, { "epoch": 0.16396778295888087, "grad_norm": 1.1153404163764684, "learning_rate": 1.907336705461431e-05, "loss": 1.0278, "step": 967 }, { "epoch": 0.16413734633319202, "grad_norm": 1.0097503219960056, "learning_rate": 1.9071056699715704e-05, "loss": 1.0315, "step": 968 }, { "epoch": 0.16430690970750317, "grad_norm": 1.0534508324509975, "learning_rate": 1.9068743608505454e-05, "loss": 1.0693, "step": 969 }, { "epoch": 0.16447647308181432, "grad_norm": 1.061456522681583, "learning_rate": 1.9066427781681314e-05, "loss": 0.9942, "step": 970 }, { "epoch": 0.16464603645612547, "grad_norm": 1.0270933639792335, "learning_rate": 1.9064109219941863e-05, "loss": 1.0459, "step": 971 }, { "epoch": 0.16481559983043662, "grad_norm": 1.0180798327303335, "learning_rate": 1.906178792398649e-05, "loss": 0.9937, "step": 972 }, { "epoch": 0.16498516320474776, "grad_norm": 1.0375579307790832, "learning_rate": 1.9059463894515427e-05, "loss": 1.0506, "step": 973 }, { "epoch": 0.1651547265790589, "grad_norm": 1.0909691437736226, "learning_rate": 1.905713713222973e-05, "loss": 1.0945, "step": 974 }, { "epoch": 0.16532428995337006, "grad_norm": 1.0991256665699363, "learning_rate": 1.9054807637831268e-05, "loss": 1.035, "step": 975 }, { "epoch": 0.1654938533276812, "grad_norm": 1.0670928903741004, "learning_rate": 1.905247541202274e-05, "loss": 1.0237, "step": 976 }, { "epoch": 0.16566341670199236, "grad_norm": 1.112108157502983, "learning_rate": 1.905014045550767e-05, "loss": 1.0695, "step": 977 }, { "epoch": 0.1658329800763035, "grad_norm": 1.0901247921547923, "learning_rate": 1.9047802768990404e-05, "loss": 1.0802, "step": 978 }, { "epoch": 0.16600254345061466, "grad_norm": 1.0314235423645806, "learning_rate": 1.9045462353176115e-05, "loss": 1.0258, "step": 979 }, { "epoch": 0.1661721068249258, "grad_norm": 0.9948835587073573, "learning_rate": 1.904311920877079e-05, "loss": 1.0121, "step": 980 }, { "epoch": 0.16634167019923696, "grad_norm": 1.080837986453593, "learning_rate": 1.904077333648126e-05, "loss": 1.0016, "step": 981 }, { "epoch": 0.1665112335735481, "grad_norm": 1.005237192138, "learning_rate": 1.9038424737015144e-05, "loss": 0.9961, "step": 982 }, { "epoch": 0.16668079694785926, "grad_norm": 1.0453587105029483, "learning_rate": 1.9036073411080917e-05, "loss": 1.0385, "step": 983 }, { "epoch": 0.1668503603221704, "grad_norm": 1.018615226677672, "learning_rate": 1.9033719359387866e-05, "loss": 1.0045, "step": 984 }, { "epoch": 0.16701992369648155, "grad_norm": 1.0897893243816406, "learning_rate": 1.903136258264609e-05, "loss": 1.1031, "step": 985 }, { "epoch": 0.1671894870707927, "grad_norm": 1.0516457017084275, "learning_rate": 1.9029003081566517e-05, "loss": 1.0653, "step": 986 }, { "epoch": 0.16735905044510385, "grad_norm": 1.0443845006885772, "learning_rate": 1.9026640856860906e-05, "loss": 1.0211, "step": 987 }, { "epoch": 0.167528613819415, "grad_norm": 1.023114656589325, "learning_rate": 1.9024275909241824e-05, "loss": 1.0318, "step": 988 }, { "epoch": 0.16769817719372615, "grad_norm": 1.069577021077146, "learning_rate": 1.9021908239422665e-05, "loss": 1.0172, "step": 989 }, { "epoch": 0.1678677405680373, "grad_norm": 1.0441193429697493, "learning_rate": 1.9019537848117645e-05, "loss": 1.0408, "step": 990 }, { "epoch": 0.16803730394234845, "grad_norm": 1.0704201916746707, "learning_rate": 1.9017164736041795e-05, "loss": 1.0382, "step": 991 }, { "epoch": 0.1682068673166596, "grad_norm": 1.0859432836965746, "learning_rate": 1.901478890391098e-05, "loss": 1.0616, "step": 992 }, { "epoch": 0.16837643069097075, "grad_norm": 1.0865061444583897, "learning_rate": 1.9012410352441866e-05, "loss": 1.056, "step": 993 }, { "epoch": 0.1685459940652819, "grad_norm": 1.0777844748349725, "learning_rate": 1.901002908235196e-05, "loss": 1.0626, "step": 994 }, { "epoch": 0.16871555743959304, "grad_norm": 1.0316132529230884, "learning_rate": 1.9007645094359576e-05, "loss": 1.0703, "step": 995 }, { "epoch": 0.1688851208139042, "grad_norm": 0.976761393545048, "learning_rate": 1.900525838918385e-05, "loss": 0.987, "step": 996 }, { "epoch": 0.16905468418821534, "grad_norm": 1.1449160277983745, "learning_rate": 1.9002868967544743e-05, "loss": 1.0615, "step": 997 }, { "epoch": 0.1692242475625265, "grad_norm": 1.0210079276096267, "learning_rate": 1.9000476830163022e-05, "loss": 1.0399, "step": 998 }, { "epoch": 0.16939381093683764, "grad_norm": 1.0355190009838098, "learning_rate": 1.899808197776029e-05, "loss": 1.0554, "step": 999 }, { "epoch": 0.1695633743111488, "grad_norm": 1.0298578209665756, "learning_rate": 1.8995684411058965e-05, "loss": 1.0219, "step": 1000 }, { "epoch": 0.16973293768545994, "grad_norm": 1.0822818753800265, "learning_rate": 1.899328413078227e-05, "loss": 1.0505, "step": 1001 }, { "epoch": 0.1699025010597711, "grad_norm": 1.0399272477537211, "learning_rate": 1.899088113765426e-05, "loss": 1.0762, "step": 1002 }, { "epoch": 0.17007206443408224, "grad_norm": 1.054593804719732, "learning_rate": 1.898847543239981e-05, "loss": 1.0463, "step": 1003 }, { "epoch": 0.1702416278083934, "grad_norm": 1.0591632040708472, "learning_rate": 1.8986067015744605e-05, "loss": 1.0752, "step": 1004 }, { "epoch": 0.17041119118270454, "grad_norm": 1.1428832463362848, "learning_rate": 1.898365588841515e-05, "loss": 1.0756, "step": 1005 }, { "epoch": 0.17058075455701568, "grad_norm": 0.9936584279273152, "learning_rate": 1.8981242051138773e-05, "loss": 0.9864, "step": 1006 }, { "epoch": 0.17075031793132683, "grad_norm": 0.9941664303322485, "learning_rate": 1.897882550464361e-05, "loss": 0.9816, "step": 1007 }, { "epoch": 0.17091988130563798, "grad_norm": 1.0566276172028572, "learning_rate": 1.8976406249658624e-05, "loss": 1.0585, "step": 1008 }, { "epoch": 0.17108944467994913, "grad_norm": 1.0305138529894537, "learning_rate": 1.8973984286913584e-05, "loss": 0.9912, "step": 1009 }, { "epoch": 0.17125900805426028, "grad_norm": 1.0661358721764511, "learning_rate": 1.8971559617139092e-05, "loss": 1.0728, "step": 1010 }, { "epoch": 0.17142857142857143, "grad_norm": 1.0713830701065703, "learning_rate": 1.896913224106655e-05, "loss": 1.0394, "step": 1011 }, { "epoch": 0.17159813480288258, "grad_norm": 1.0417729740812105, "learning_rate": 1.8966702159428187e-05, "loss": 1.0433, "step": 1012 }, { "epoch": 0.17176769817719373, "grad_norm": 1.0135984016810344, "learning_rate": 1.896426937295704e-05, "loss": 1.0389, "step": 1013 }, { "epoch": 0.17193726155150488, "grad_norm": 1.0386784383548087, "learning_rate": 1.896183388238697e-05, "loss": 1.0536, "step": 1014 }, { "epoch": 0.17210682492581603, "grad_norm": 1.0308367201193958, "learning_rate": 1.8959395688452648e-05, "loss": 1.0798, "step": 1015 }, { "epoch": 0.17227638830012718, "grad_norm": 1.093693969708606, "learning_rate": 1.8956954791889567e-05, "loss": 1.0702, "step": 1016 }, { "epoch": 0.17244595167443832, "grad_norm": 1.206461847523564, "learning_rate": 1.8954511193434024e-05, "loss": 1.0782, "step": 1017 }, { "epoch": 0.17261551504874947, "grad_norm": 1.0313391042663693, "learning_rate": 1.8952064893823145e-05, "loss": 1.0279, "step": 1018 }, { "epoch": 0.17278507842306062, "grad_norm": 1.0372817079439989, "learning_rate": 1.8949615893794858e-05, "loss": 1.0128, "step": 1019 }, { "epoch": 0.17295464179737177, "grad_norm": 1.0665050651669081, "learning_rate": 1.8947164194087912e-05, "loss": 1.0347, "step": 1020 }, { "epoch": 0.17312420517168292, "grad_norm": 1.0907174010029659, "learning_rate": 1.8944709795441874e-05, "loss": 1.0463, "step": 1021 }, { "epoch": 0.17329376854599407, "grad_norm": 1.0184693548855424, "learning_rate": 1.8942252698597113e-05, "loss": 0.9921, "step": 1022 }, { "epoch": 0.17346333192030522, "grad_norm": 1.062863685731636, "learning_rate": 1.893979290429483e-05, "loss": 1.0486, "step": 1023 }, { "epoch": 0.17363289529461637, "grad_norm": 1.074012583014513, "learning_rate": 1.893733041327702e-05, "loss": 1.0892, "step": 1024 }, { "epoch": 0.17380245866892752, "grad_norm": 1.0518174563438558, "learning_rate": 1.8934865226286507e-05, "loss": 1.0453, "step": 1025 }, { "epoch": 0.17397202204323867, "grad_norm": 0.9881949718168048, "learning_rate": 1.8932397344066918e-05, "loss": 1.0071, "step": 1026 }, { "epoch": 0.17414158541754982, "grad_norm": 1.0736471442587825, "learning_rate": 1.89299267673627e-05, "loss": 1.011, "step": 1027 }, { "epoch": 0.17431114879186096, "grad_norm": 1.1076791922893015, "learning_rate": 1.8927453496919108e-05, "loss": 1.0443, "step": 1028 }, { "epoch": 0.1744807121661721, "grad_norm": 1.1113268860131447, "learning_rate": 1.892497753348221e-05, "loss": 1.0891, "step": 1029 }, { "epoch": 0.17465027554048326, "grad_norm": 0.9725135451900143, "learning_rate": 1.8922498877798893e-05, "loss": 1.0452, "step": 1030 }, { "epoch": 0.1748198389147944, "grad_norm": 1.0903289333819945, "learning_rate": 1.892001753061685e-05, "loss": 1.0636, "step": 1031 }, { "epoch": 0.17498940228910556, "grad_norm": 1.0477583452992338, "learning_rate": 1.8917533492684584e-05, "loss": 1.029, "step": 1032 }, { "epoch": 0.1751589656634167, "grad_norm": 1.0542077779157495, "learning_rate": 1.891504676475141e-05, "loss": 1.0339, "step": 1033 }, { "epoch": 0.17532852903772786, "grad_norm": 0.975375541174198, "learning_rate": 1.8912557347567462e-05, "loss": 0.9654, "step": 1034 }, { "epoch": 0.175498092412039, "grad_norm": 1.0729001926880484, "learning_rate": 1.891006524188368e-05, "loss": 1.0467, "step": 1035 }, { "epoch": 0.17566765578635016, "grad_norm": 0.7245216690535998, "learning_rate": 1.8907570448451812e-05, "loss": 0.833, "step": 1036 }, { "epoch": 0.1758372191606613, "grad_norm": 1.046610098949525, "learning_rate": 1.8905072968024424e-05, "loss": 1.0397, "step": 1037 }, { "epoch": 0.17600678253497246, "grad_norm": 1.0013872705892883, "learning_rate": 1.8902572801354887e-05, "loss": 1.0511, "step": 1038 }, { "epoch": 0.1761763459092836, "grad_norm": 1.1029554467376799, "learning_rate": 1.890006994919738e-05, "loss": 1.0522, "step": 1039 }, { "epoch": 0.17634590928359475, "grad_norm": 1.0508430758660994, "learning_rate": 1.8897564412306902e-05, "loss": 1.066, "step": 1040 }, { "epoch": 0.1765154726579059, "grad_norm": 1.0647486984009291, "learning_rate": 1.8895056191439252e-05, "loss": 1.0652, "step": 1041 }, { "epoch": 0.17668503603221705, "grad_norm": 0.9954204664408755, "learning_rate": 1.8892545287351044e-05, "loss": 1.0116, "step": 1042 }, { "epoch": 0.1768545994065282, "grad_norm": 1.09366622888092, "learning_rate": 1.8890031700799697e-05, "loss": 1.042, "step": 1043 }, { "epoch": 0.17702416278083935, "grad_norm": 1.1551720096157085, "learning_rate": 1.8887515432543445e-05, "loss": 1.0525, "step": 1044 }, { "epoch": 0.1771937261551505, "grad_norm": 1.016310177082837, "learning_rate": 1.888499648334133e-05, "loss": 0.9738, "step": 1045 }, { "epoch": 0.17736328952946165, "grad_norm": 1.0174982197476534, "learning_rate": 1.8882474853953193e-05, "loss": 1.0278, "step": 1046 }, { "epoch": 0.1775328529037728, "grad_norm": 1.1073615617411334, "learning_rate": 1.8879950545139697e-05, "loss": 1.0492, "step": 1047 }, { "epoch": 0.17770241627808395, "grad_norm": 1.0461526038661344, "learning_rate": 1.8877423557662307e-05, "loss": 1.0333, "step": 1048 }, { "epoch": 0.1778719796523951, "grad_norm": 1.068946921015294, "learning_rate": 1.8874893892283296e-05, "loss": 1.0265, "step": 1049 }, { "epoch": 0.17804154302670624, "grad_norm": 1.0792162961638039, "learning_rate": 1.887236154976574e-05, "loss": 1.0697, "step": 1050 }, { "epoch": 0.1782111064010174, "grad_norm": 1.0507105850359646, "learning_rate": 1.8869826530873537e-05, "loss": 1.0469, "step": 1051 }, { "epoch": 0.17838066977532852, "grad_norm": 0.9680010430639013, "learning_rate": 1.886728883637138e-05, "loss": 1.007, "step": 1052 }, { "epoch": 0.17855023314963966, "grad_norm": 1.0725567627421173, "learning_rate": 1.886474846702477e-05, "loss": 1.07, "step": 1053 }, { "epoch": 0.1787197965239508, "grad_norm": 1.0459122476351028, "learning_rate": 1.8862205423600016e-05, "loss": 1.0941, "step": 1054 }, { "epoch": 0.17888935989826196, "grad_norm": 1.0835932332012264, "learning_rate": 1.8859659706864234e-05, "loss": 1.0786, "step": 1055 }, { "epoch": 0.1790589232725731, "grad_norm": 0.7963938772057977, "learning_rate": 1.8857111317585354e-05, "loss": 0.8576, "step": 1056 }, { "epoch": 0.17922848664688426, "grad_norm": 1.099042760662731, "learning_rate": 1.8854560256532098e-05, "loss": 1.0444, "step": 1057 }, { "epoch": 0.1793980500211954, "grad_norm": 1.088615438395254, "learning_rate": 1.885200652447401e-05, "loss": 1.0957, "step": 1058 }, { "epoch": 0.17956761339550656, "grad_norm": 1.0640060539192748, "learning_rate": 1.8849450122181422e-05, "loss": 1.0287, "step": 1059 }, { "epoch": 0.1797371767698177, "grad_norm": 1.0842958887706413, "learning_rate": 1.8846891050425484e-05, "loss": 1.0449, "step": 1060 }, { "epoch": 0.17990674014412886, "grad_norm": 1.102621760318561, "learning_rate": 1.8844329309978146e-05, "loss": 1.056, "step": 1061 }, { "epoch": 0.18007630351844, "grad_norm": 0.6793547404122261, "learning_rate": 1.8841764901612167e-05, "loss": 0.875, "step": 1062 }, { "epoch": 0.18024586689275116, "grad_norm": 1.0777371505263507, "learning_rate": 1.883919782610111e-05, "loss": 1.0174, "step": 1063 }, { "epoch": 0.1804154302670623, "grad_norm": 1.0628359911400063, "learning_rate": 1.8836628084219332e-05, "loss": 0.9978, "step": 1064 }, { "epoch": 0.18058499364137345, "grad_norm": 1.069653533052602, "learning_rate": 1.8834055676742018e-05, "loss": 1.0749, "step": 1065 }, { "epoch": 0.1807545570156846, "grad_norm": 1.1587147736029462, "learning_rate": 1.883148060444513e-05, "loss": 1.0402, "step": 1066 }, { "epoch": 0.18092412038999575, "grad_norm": 1.025117978747166, "learning_rate": 1.8828902868105454e-05, "loss": 1.0425, "step": 1067 }, { "epoch": 0.1810936837643069, "grad_norm": 1.091496977715169, "learning_rate": 1.8826322468500567e-05, "loss": 1.0577, "step": 1068 }, { "epoch": 0.18126324713861805, "grad_norm": 0.674673181083146, "learning_rate": 1.8823739406408855e-05, "loss": 0.8771, "step": 1069 }, { "epoch": 0.1814328105129292, "grad_norm": 1.0763887923881363, "learning_rate": 1.8821153682609514e-05, "loss": 1.0412, "step": 1070 }, { "epoch": 0.18160237388724035, "grad_norm": 1.0254809783165464, "learning_rate": 1.8818565297882525e-05, "loss": 1.0369, "step": 1071 }, { "epoch": 0.1817719372615515, "grad_norm": 1.0590738106819182, "learning_rate": 1.8815974253008687e-05, "loss": 1.0188, "step": 1072 }, { "epoch": 0.18194150063586265, "grad_norm": 0.6373899011466477, "learning_rate": 1.8813380548769594e-05, "loss": 0.8633, "step": 1073 }, { "epoch": 0.1821110640101738, "grad_norm": 1.0621148660447535, "learning_rate": 1.8810784185947648e-05, "loss": 1.0301, "step": 1074 }, { "epoch": 0.18228062738448494, "grad_norm": 0.9560958091072056, "learning_rate": 1.880818516532605e-05, "loss": 1.0172, "step": 1075 }, { "epoch": 0.1824501907587961, "grad_norm": 1.2316105785351477, "learning_rate": 1.8805583487688796e-05, "loss": 0.9909, "step": 1076 }, { "epoch": 0.18261975413310724, "grad_norm": 1.00080139391916, "learning_rate": 1.88029791538207e-05, "loss": 1.0079, "step": 1077 }, { "epoch": 0.1827893175074184, "grad_norm": 1.0148533263873454, "learning_rate": 1.8800372164507358e-05, "loss": 1.0385, "step": 1078 }, { "epoch": 0.18295888088172954, "grad_norm": 1.0659653437505787, "learning_rate": 1.8797762520535178e-05, "loss": 1.0733, "step": 1079 }, { "epoch": 0.1831284442560407, "grad_norm": 1.0006065098846189, "learning_rate": 1.8795150222691375e-05, "loss": 1.0305, "step": 1080 }, { "epoch": 0.18329800763035184, "grad_norm": 1.1032054735983126, "learning_rate": 1.8792535271763944e-05, "loss": 1.0438, "step": 1081 }, { "epoch": 0.183467571004663, "grad_norm": 1.0595412598719354, "learning_rate": 1.8789917668541707e-05, "loss": 1.0337, "step": 1082 }, { "epoch": 0.18363713437897414, "grad_norm": 1.023972415180868, "learning_rate": 1.8787297413814257e-05, "loss": 1.0161, "step": 1083 }, { "epoch": 0.1838066977532853, "grad_norm": 1.0596608377381984, "learning_rate": 1.8784674508372013e-05, "loss": 1.077, "step": 1084 }, { "epoch": 0.18397626112759644, "grad_norm": 1.0901509199604031, "learning_rate": 1.8782048953006176e-05, "loss": 1.0071, "step": 1085 }, { "epoch": 0.18414582450190758, "grad_norm": 1.1185269003403535, "learning_rate": 1.8779420748508758e-05, "loss": 1.0717, "step": 1086 }, { "epoch": 0.18431538787621873, "grad_norm": 0.9773051664619716, "learning_rate": 1.8776789895672557e-05, "loss": 1.0229, "step": 1087 }, { "epoch": 0.18448495125052988, "grad_norm": 1.0180955003574748, "learning_rate": 1.8774156395291188e-05, "loss": 1.0412, "step": 1088 }, { "epoch": 0.18465451462484103, "grad_norm": 1.0316399436215293, "learning_rate": 1.8771520248159044e-05, "loss": 1.0466, "step": 1089 }, { "epoch": 0.18482407799915218, "grad_norm": 1.0467293399796018, "learning_rate": 1.876888145507133e-05, "loss": 1.0314, "step": 1090 }, { "epoch": 0.18499364137346333, "grad_norm": 1.0652202251188496, "learning_rate": 1.8766240016824056e-05, "loss": 1.0354, "step": 1091 }, { "epoch": 0.18516320474777448, "grad_norm": 1.03473017798002, "learning_rate": 1.8763595934214004e-05, "loss": 0.981, "step": 1092 }, { "epoch": 0.18533276812208563, "grad_norm": 1.0548904395935859, "learning_rate": 1.8760949208038782e-05, "loss": 1.009, "step": 1093 }, { "epoch": 0.18550233149639678, "grad_norm": 0.9952437756882081, "learning_rate": 1.8758299839096774e-05, "loss": 1.0013, "step": 1094 }, { "epoch": 0.18567189487070793, "grad_norm": 1.0098652834135369, "learning_rate": 1.8755647828187175e-05, "loss": 0.9756, "step": 1095 }, { "epoch": 0.18584145824501908, "grad_norm": 1.1036055794465418, "learning_rate": 1.875299317610997e-05, "loss": 1.0424, "step": 1096 }, { "epoch": 0.18601102161933022, "grad_norm": 1.032174354868484, "learning_rate": 1.8750335883665948e-05, "loss": 1.019, "step": 1097 }, { "epoch": 0.18618058499364137, "grad_norm": 1.025468560937419, "learning_rate": 1.874767595165668e-05, "loss": 1.0441, "step": 1098 }, { "epoch": 0.18635014836795252, "grad_norm": 0.654211416884946, "learning_rate": 1.874501338088455e-05, "loss": 0.8489, "step": 1099 }, { "epoch": 0.18651971174226367, "grad_norm": 1.079235417889397, "learning_rate": 1.8742348172152728e-05, "loss": 1.0267, "step": 1100 }, { "epoch": 0.18668927511657482, "grad_norm": 1.107932391473777, "learning_rate": 1.873968032626518e-05, "loss": 1.0253, "step": 1101 }, { "epoch": 0.18685883849088597, "grad_norm": 1.1030316316228774, "learning_rate": 1.8737009844026673e-05, "loss": 1.05, "step": 1102 }, { "epoch": 0.18702840186519712, "grad_norm": 1.0119577413469758, "learning_rate": 1.873433672624277e-05, "loss": 1.0125, "step": 1103 }, { "epoch": 0.18719796523950827, "grad_norm": 1.0783814751395921, "learning_rate": 1.8731660973719816e-05, "loss": 1.0085, "step": 1104 }, { "epoch": 0.18736752861381942, "grad_norm": 1.1321130047944756, "learning_rate": 1.8728982587264965e-05, "loss": 1.0279, "step": 1105 }, { "epoch": 0.18753709198813057, "grad_norm": 1.1810796160515018, "learning_rate": 1.872630156768616e-05, "loss": 1.0778, "step": 1106 }, { "epoch": 0.18770665536244172, "grad_norm": 1.0441024098014227, "learning_rate": 1.8723617915792136e-05, "loss": 1.0331, "step": 1107 }, { "epoch": 0.18787621873675286, "grad_norm": 1.0817205595077535, "learning_rate": 1.8720931632392427e-05, "loss": 1.0468, "step": 1108 }, { "epoch": 0.188045782111064, "grad_norm": 1.060619446098569, "learning_rate": 1.8718242718297358e-05, "loss": 1.04, "step": 1109 }, { "epoch": 0.18821534548537516, "grad_norm": 1.073263648619468, "learning_rate": 1.8715551174318053e-05, "loss": 0.9915, "step": 1110 }, { "epoch": 0.1883849088596863, "grad_norm": 1.1004794351266534, "learning_rate": 1.8712857001266417e-05, "loss": 1.0475, "step": 1111 }, { "epoch": 0.18855447223399746, "grad_norm": 1.1189192261883878, "learning_rate": 1.8710160199955158e-05, "loss": 1.0336, "step": 1112 }, { "epoch": 0.1887240356083086, "grad_norm": 1.018498383470065, "learning_rate": 1.8707460771197773e-05, "loss": 0.9956, "step": 1113 }, { "epoch": 0.18889359898261976, "grad_norm": 1.0432703900417342, "learning_rate": 1.8704758715808556e-05, "loss": 1.0097, "step": 1114 }, { "epoch": 0.1890631623569309, "grad_norm": 1.1080203566710376, "learning_rate": 1.870205403460259e-05, "loss": 1.0365, "step": 1115 }, { "epoch": 0.18923272573124206, "grad_norm": 1.019195808140228, "learning_rate": 1.8699346728395746e-05, "loss": 1.0455, "step": 1116 }, { "epoch": 0.1894022891055532, "grad_norm": 1.021689392882448, "learning_rate": 1.8696636798004693e-05, "loss": 1.0131, "step": 1117 }, { "epoch": 0.18957185247986436, "grad_norm": 1.0747533494398365, "learning_rate": 1.869392424424689e-05, "loss": 1.0618, "step": 1118 }, { "epoch": 0.1897414158541755, "grad_norm": 1.0306890105088173, "learning_rate": 1.869120906794059e-05, "loss": 1.0052, "step": 1119 }, { "epoch": 0.18991097922848665, "grad_norm": 1.060286016012655, "learning_rate": 1.868849126990483e-05, "loss": 1.0246, "step": 1120 }, { "epoch": 0.1900805426027978, "grad_norm": 1.0652619450500056, "learning_rate": 1.8685770850959444e-05, "loss": 1.0336, "step": 1121 }, { "epoch": 0.19025010597710895, "grad_norm": 1.0581830961944134, "learning_rate": 1.8683047811925057e-05, "loss": 1.0413, "step": 1122 }, { "epoch": 0.1904196693514201, "grad_norm": 1.0537043039203717, "learning_rate": 1.8680322153623077e-05, "loss": 1.0176, "step": 1123 }, { "epoch": 0.19058923272573125, "grad_norm": 1.0646885508930264, "learning_rate": 1.8677593876875707e-05, "loss": 1.0005, "step": 1124 }, { "epoch": 0.1907587961000424, "grad_norm": 1.0701149877370713, "learning_rate": 1.8674862982505946e-05, "loss": 1.0479, "step": 1125 }, { "epoch": 0.19092835947435355, "grad_norm": 1.075439626383217, "learning_rate": 1.8672129471337568e-05, "loss": 1.0603, "step": 1126 }, { "epoch": 0.1910979228486647, "grad_norm": 1.0337487197458968, "learning_rate": 1.8669393344195154e-05, "loss": 1.068, "step": 1127 }, { "epoch": 0.19126748622297585, "grad_norm": 1.024203162155067, "learning_rate": 1.8666654601904058e-05, "loss": 0.9965, "step": 1128 }, { "epoch": 0.191437049597287, "grad_norm": 0.981286089341937, "learning_rate": 1.8663913245290433e-05, "loss": 1.0307, "step": 1129 }, { "epoch": 0.19160661297159814, "grad_norm": 1.0577088808437018, "learning_rate": 1.8661169275181217e-05, "loss": 1.0364, "step": 1130 }, { "epoch": 0.1917761763459093, "grad_norm": 1.0227399863124151, "learning_rate": 1.8658422692404136e-05, "loss": 1.0129, "step": 1131 }, { "epoch": 0.19194573972022044, "grad_norm": 1.012120396741532, "learning_rate": 1.8655673497787708e-05, "loss": 1.0184, "step": 1132 }, { "epoch": 0.1921153030945316, "grad_norm": 0.9947006175914488, "learning_rate": 1.8652921692161233e-05, "loss": 0.9979, "step": 1133 }, { "epoch": 0.19228486646884274, "grad_norm": 1.070070395782131, "learning_rate": 1.8650167276354802e-05, "loss": 1.025, "step": 1134 }, { "epoch": 0.1924544298431539, "grad_norm": 1.0541155125979846, "learning_rate": 1.864741025119929e-05, "loss": 1.0103, "step": 1135 }, { "epoch": 0.19262399321746504, "grad_norm": 1.0532929465591445, "learning_rate": 1.8644650617526366e-05, "loss": 1.014, "step": 1136 }, { "epoch": 0.1927935565917762, "grad_norm": 1.0071293447802863, "learning_rate": 1.8641888376168483e-05, "loss": 0.99, "step": 1137 }, { "epoch": 0.19296311996608734, "grad_norm": 1.1374644443189281, "learning_rate": 1.8639123527958877e-05, "loss": 1.0784, "step": 1138 }, { "epoch": 0.1931326833403985, "grad_norm": 1.0120430243294007, "learning_rate": 1.863635607373157e-05, "loss": 1.0229, "step": 1139 }, { "epoch": 0.19330224671470964, "grad_norm": 1.007238215765678, "learning_rate": 1.8633586014321378e-05, "loss": 1.0051, "step": 1140 }, { "epoch": 0.19347181008902078, "grad_norm": 1.0760687459740548, "learning_rate": 1.8630813350563898e-05, "loss": 1.0396, "step": 1141 }, { "epoch": 0.19364137346333193, "grad_norm": 0.9648749524421074, "learning_rate": 1.8628038083295508e-05, "loss": 1.0246, "step": 1142 }, { "epoch": 0.19381093683764306, "grad_norm": 1.0100154097262342, "learning_rate": 1.862526021335338e-05, "loss": 1.0294, "step": 1143 }, { "epoch": 0.1939805002119542, "grad_norm": 1.0030581935913696, "learning_rate": 1.862247974157546e-05, "loss": 1.0629, "step": 1144 }, { "epoch": 0.19415006358626535, "grad_norm": 0.6574111899337789, "learning_rate": 1.8619696668800494e-05, "loss": 0.8062, "step": 1145 }, { "epoch": 0.1943196269605765, "grad_norm": 0.9868517241345818, "learning_rate": 1.8616910995868e-05, "loss": 1.0551, "step": 1146 }, { "epoch": 0.19448919033488765, "grad_norm": 1.0024926362794524, "learning_rate": 1.8614122723618284e-05, "loss": 0.9869, "step": 1147 }, { "epoch": 0.1946587537091988, "grad_norm": 0.9585164855704453, "learning_rate": 1.861133185289244e-05, "loss": 1.0078, "step": 1148 }, { "epoch": 0.19482831708350995, "grad_norm": 0.6478518330323907, "learning_rate": 1.860853838453234e-05, "loss": 0.8569, "step": 1149 }, { "epoch": 0.1949978804578211, "grad_norm": 1.0507163935200776, "learning_rate": 1.8605742319380643e-05, "loss": 1.0295, "step": 1150 }, { "epoch": 0.19516744383213225, "grad_norm": 1.0342546738444225, "learning_rate": 1.860294365828079e-05, "loss": 1.0313, "step": 1151 }, { "epoch": 0.1953370072064434, "grad_norm": 1.0106613728921512, "learning_rate": 1.8600142402077006e-05, "loss": 1.0463, "step": 1152 }, { "epoch": 0.19550657058075455, "grad_norm": 1.0431592337964368, "learning_rate": 1.85973385516143e-05, "loss": 1.0307, "step": 1153 }, { "epoch": 0.1956761339550657, "grad_norm": 0.9890348975975835, "learning_rate": 1.8594532107738458e-05, "loss": 1.0299, "step": 1154 }, { "epoch": 0.19584569732937684, "grad_norm": 1.0526313650412848, "learning_rate": 1.8591723071296054e-05, "loss": 1.0456, "step": 1155 }, { "epoch": 0.196015260703688, "grad_norm": 1.0224443765890652, "learning_rate": 1.858891144313445e-05, "loss": 1.0192, "step": 1156 }, { "epoch": 0.19618482407799914, "grad_norm": 1.059582021809335, "learning_rate": 1.8586097224101767e-05, "loss": 1.0665, "step": 1157 }, { "epoch": 0.1963543874523103, "grad_norm": 1.0487068051230162, "learning_rate": 1.858328041504693e-05, "loss": 0.9736, "step": 1158 }, { "epoch": 0.19652395082662144, "grad_norm": 1.0800321343509853, "learning_rate": 1.858046101681964e-05, "loss": 1.0453, "step": 1159 }, { "epoch": 0.1966935142009326, "grad_norm": 1.0425064305226417, "learning_rate": 1.857763903027038e-05, "loss": 1.0246, "step": 1160 }, { "epoch": 0.19686307757524374, "grad_norm": 1.1265201763594452, "learning_rate": 1.8574814456250406e-05, "loss": 1.0463, "step": 1161 }, { "epoch": 0.1970326409495549, "grad_norm": 1.034409127391492, "learning_rate": 1.8571987295611756e-05, "loss": 1.0062, "step": 1162 }, { "epoch": 0.19720220432386604, "grad_norm": 1.0628138767441144, "learning_rate": 1.8569157549207256e-05, "loss": 1.0689, "step": 1163 }, { "epoch": 0.1973717676981772, "grad_norm": 1.0982976324276519, "learning_rate": 1.8566325217890505e-05, "loss": 1.0464, "step": 1164 }, { "epoch": 0.19754133107248834, "grad_norm": 1.07130372592087, "learning_rate": 1.856349030251589e-05, "loss": 1.0516, "step": 1165 }, { "epoch": 0.19771089444679948, "grad_norm": 1.0642842358486948, "learning_rate": 1.856065280393857e-05, "loss": 1.0305, "step": 1166 }, { "epoch": 0.19788045782111063, "grad_norm": 1.0634295951779795, "learning_rate": 1.8557812723014476e-05, "loss": 1.0268, "step": 1167 }, { "epoch": 0.19805002119542178, "grad_norm": 1.0184802102487613, "learning_rate": 1.8554970060600338e-05, "loss": 1.0295, "step": 1168 }, { "epoch": 0.19821958456973293, "grad_norm": 1.0338580087698006, "learning_rate": 1.855212481755365e-05, "loss": 1.0309, "step": 1169 }, { "epoch": 0.19838914794404408, "grad_norm": 1.059172867948832, "learning_rate": 1.8549276994732684e-05, "loss": 1.0384, "step": 1170 }, { "epoch": 0.19855871131835523, "grad_norm": 1.096866449967696, "learning_rate": 1.85464265929965e-05, "loss": 1.0312, "step": 1171 }, { "epoch": 0.19872827469266638, "grad_norm": 1.0209748916916948, "learning_rate": 1.854357361320493e-05, "loss": 1.0169, "step": 1172 }, { "epoch": 0.19889783806697753, "grad_norm": 1.0691953369004308, "learning_rate": 1.854071805621858e-05, "loss": 1.0577, "step": 1173 }, { "epoch": 0.19906740144128868, "grad_norm": 1.0031412086315579, "learning_rate": 1.853785992289884e-05, "loss": 0.979, "step": 1174 }, { "epoch": 0.19923696481559983, "grad_norm": 1.0179558637017458, "learning_rate": 1.8534999214107878e-05, "loss": 1.0762, "step": 1175 }, { "epoch": 0.19940652818991098, "grad_norm": 1.0656437355325812, "learning_rate": 1.8532135930708626e-05, "loss": 1.053, "step": 1176 }, { "epoch": 0.19957609156422212, "grad_norm": 1.0261713645735637, "learning_rate": 1.852927007356481e-05, "loss": 1.0473, "step": 1177 }, { "epoch": 0.19974565493853327, "grad_norm": 1.0831025933827432, "learning_rate": 1.8526401643540924e-05, "loss": 1.0403, "step": 1178 }, { "epoch": 0.19991521831284442, "grad_norm": 1.0519500648356486, "learning_rate": 1.8523530641502234e-05, "loss": 1.0368, "step": 1179 }, { "epoch": 0.20008478168715557, "grad_norm": 1.0773293076388273, "learning_rate": 1.8520657068314792e-05, "loss": 1.0363, "step": 1180 }, { "epoch": 0.20025434506146672, "grad_norm": 1.0021809780492006, "learning_rate": 1.8517780924845415e-05, "loss": 1.0338, "step": 1181 }, { "epoch": 0.20042390843577787, "grad_norm": 1.0935346068038074, "learning_rate": 1.8514902211961704e-05, "loss": 1.0922, "step": 1182 }, { "epoch": 0.20059347181008902, "grad_norm": 1.050889657682733, "learning_rate": 1.851202093053203e-05, "loss": 1.0257, "step": 1183 }, { "epoch": 0.20076303518440017, "grad_norm": 1.0556391086169303, "learning_rate": 1.8509137081425538e-05, "loss": 1.0168, "step": 1184 }, { "epoch": 0.20093259855871132, "grad_norm": 1.035397993089505, "learning_rate": 1.8506250665512156e-05, "loss": 1.0164, "step": 1185 }, { "epoch": 0.20110216193302247, "grad_norm": 1.0290741634325278, "learning_rate": 1.8503361683662575e-05, "loss": 1.0237, "step": 1186 }, { "epoch": 0.20127172530733362, "grad_norm": 1.0748063179541278, "learning_rate": 1.8500470136748267e-05, "loss": 1.0373, "step": 1187 }, { "epoch": 0.20144128868164476, "grad_norm": 1.0662121318492281, "learning_rate": 1.849757602564147e-05, "loss": 1.0714, "step": 1188 }, { "epoch": 0.2016108520559559, "grad_norm": 1.0292944512112419, "learning_rate": 1.8494679351215212e-05, "loss": 1.0551, "step": 1189 }, { "epoch": 0.20178041543026706, "grad_norm": 0.9895423228539304, "learning_rate": 1.8491780114343275e-05, "loss": 0.9593, "step": 1190 }, { "epoch": 0.2019499788045782, "grad_norm": 1.1010847125002454, "learning_rate": 1.8488878315900228e-05, "loss": 1.0348, "step": 1191 }, { "epoch": 0.20211954217888936, "grad_norm": 1.0697923081803158, "learning_rate": 1.8485973956761402e-05, "loss": 1.0334, "step": 1192 }, { "epoch": 0.2022891055532005, "grad_norm": 1.0641982135941415, "learning_rate": 1.848306703780291e-05, "loss": 1.037, "step": 1193 }, { "epoch": 0.20245866892751166, "grad_norm": 1.02711613493122, "learning_rate": 1.8480157559901635e-05, "loss": 0.999, "step": 1194 }, { "epoch": 0.2026282323018228, "grad_norm": 1.0890737466188205, "learning_rate": 1.847724552393522e-05, "loss": 1.0396, "step": 1195 }, { "epoch": 0.20279779567613396, "grad_norm": 1.049366214873702, "learning_rate": 1.8474330930782102e-05, "loss": 1.0303, "step": 1196 }, { "epoch": 0.2029673590504451, "grad_norm": 1.0088642954511193, "learning_rate": 1.847141378132147e-05, "loss": 1.0, "step": 1197 }, { "epoch": 0.20313692242475626, "grad_norm": 1.0144124285540683, "learning_rate": 1.8468494076433287e-05, "loss": 1.0519, "step": 1198 }, { "epoch": 0.2033064857990674, "grad_norm": 0.9873543663400495, "learning_rate": 1.8465571816998296e-05, "loss": 1.017, "step": 1199 }, { "epoch": 0.20347604917337855, "grad_norm": 1.0409487868350844, "learning_rate": 1.8462647003898005e-05, "loss": 1.0184, "step": 1200 }, { "epoch": 0.2036456125476897, "grad_norm": 1.042680287184133, "learning_rate": 1.8459719638014693e-05, "loss": 1.0722, "step": 1201 }, { "epoch": 0.20381517592200085, "grad_norm": 1.0413534357481726, "learning_rate": 1.845678972023141e-05, "loss": 1.0861, "step": 1202 }, { "epoch": 0.203984739296312, "grad_norm": 1.014182308410673, "learning_rate": 1.845385725143197e-05, "loss": 1.0182, "step": 1203 }, { "epoch": 0.20415430267062315, "grad_norm": 1.0535576454817985, "learning_rate": 1.8450922232500966e-05, "loss": 1.0379, "step": 1204 }, { "epoch": 0.2043238660449343, "grad_norm": 1.001785214578283, "learning_rate": 1.844798466432375e-05, "loss": 1.0408, "step": 1205 }, { "epoch": 0.20449342941924545, "grad_norm": 1.018852245090246, "learning_rate": 1.8445044547786453e-05, "loss": 1.048, "step": 1206 }, { "epoch": 0.2046629927935566, "grad_norm": 1.0586772410639393, "learning_rate": 1.844210188377597e-05, "loss": 1.0208, "step": 1207 }, { "epoch": 0.20483255616786775, "grad_norm": 1.017886865946593, "learning_rate": 1.843915667317996e-05, "loss": 1.0257, "step": 1208 }, { "epoch": 0.2050021195421789, "grad_norm": 0.9896355726950156, "learning_rate": 1.843620891688686e-05, "loss": 0.9956, "step": 1209 }, { "epoch": 0.20517168291649004, "grad_norm": 0.6889570278504524, "learning_rate": 1.8433258615785865e-05, "loss": 0.8352, "step": 1210 }, { "epoch": 0.2053412462908012, "grad_norm": 0.5971048848971885, "learning_rate": 1.8430305770766947e-05, "loss": 0.8228, "step": 1211 }, { "epoch": 0.20551080966511234, "grad_norm": 1.0528300584534787, "learning_rate": 1.842735038272084e-05, "loss": 1.0676, "step": 1212 }, { "epoch": 0.2056803730394235, "grad_norm": 1.0392176737674814, "learning_rate": 1.842439245253904e-05, "loss": 1.0844, "step": 1213 }, { "epoch": 0.20584993641373464, "grad_norm": 1.0205284637979066, "learning_rate": 1.842143198111382e-05, "loss": 1.0115, "step": 1214 }, { "epoch": 0.2060194997880458, "grad_norm": 1.0679557115598792, "learning_rate": 1.841846896933821e-05, "loss": 1.0161, "step": 1215 }, { "epoch": 0.20618906316235694, "grad_norm": 1.032645681620988, "learning_rate": 1.841550341810602e-05, "loss": 1.0346, "step": 1216 }, { "epoch": 0.2063586265366681, "grad_norm": 1.0465575650502466, "learning_rate": 1.8412535328311813e-05, "loss": 1.041, "step": 1217 }, { "epoch": 0.20652818991097924, "grad_norm": 1.0808510629114338, "learning_rate": 1.8409564700850923e-05, "loss": 1.007, "step": 1218 }, { "epoch": 0.2066977532852904, "grad_norm": 1.0515010233070772, "learning_rate": 1.8406591536619448e-05, "loss": 0.9982, "step": 1219 }, { "epoch": 0.20686731665960154, "grad_norm": 1.0389335588330406, "learning_rate": 1.8403615836514254e-05, "loss": 1.053, "step": 1220 }, { "epoch": 0.20703688003391268, "grad_norm": 1.0255261956531991, "learning_rate": 1.840063760143297e-05, "loss": 1.0294, "step": 1221 }, { "epoch": 0.20720644340822383, "grad_norm": 1.0171415382541182, "learning_rate": 1.8397656832273982e-05, "loss": 1.0229, "step": 1222 }, { "epoch": 0.20737600678253498, "grad_norm": 1.079866293032394, "learning_rate": 1.8394673529936454e-05, "loss": 1.0569, "step": 1223 }, { "epoch": 0.20754557015684613, "grad_norm": 1.0158664268331172, "learning_rate": 1.839168769532031e-05, "loss": 1.0367, "step": 1224 }, { "epoch": 0.20771513353115728, "grad_norm": 1.0335426450900382, "learning_rate": 1.8388699329326237e-05, "loss": 1.0273, "step": 1225 }, { "epoch": 0.20788469690546843, "grad_norm": 1.0208568707443753, "learning_rate": 1.838570843285568e-05, "loss": 1.0213, "step": 1226 }, { "epoch": 0.20805426027977958, "grad_norm": 1.0448858748525611, "learning_rate": 1.8382715006810853e-05, "loss": 1.05, "step": 1227 }, { "epoch": 0.20822382365409073, "grad_norm": 1.0329047358240666, "learning_rate": 1.837971905209473e-05, "loss": 1.0184, "step": 1228 }, { "epoch": 0.20839338702840188, "grad_norm": 1.0066746878062711, "learning_rate": 1.8376720569611057e-05, "loss": 0.996, "step": 1229 }, { "epoch": 0.20856295040271303, "grad_norm": 1.035072265823961, "learning_rate": 1.837371956026433e-05, "loss": 1.0638, "step": 1230 }, { "epoch": 0.20873251377702418, "grad_norm": 1.0480752588150497, "learning_rate": 1.8370716024959812e-05, "loss": 1.0547, "step": 1231 }, { "epoch": 0.20890207715133532, "grad_norm": 1.006328704350034, "learning_rate": 1.8367709964603528e-05, "loss": 0.9756, "step": 1232 }, { "epoch": 0.20907164052564647, "grad_norm": 1.0200811040945212, "learning_rate": 1.8364701380102267e-05, "loss": 1.0433, "step": 1233 }, { "epoch": 0.20924120389995762, "grad_norm": 0.9891289209402269, "learning_rate": 1.8361690272363583e-05, "loss": 1.0438, "step": 1234 }, { "epoch": 0.20941076727426874, "grad_norm": 1.02658165754576, "learning_rate": 1.8358676642295775e-05, "loss": 1.0768, "step": 1235 }, { "epoch": 0.2095803306485799, "grad_norm": 1.030556044820343, "learning_rate": 1.835566049080792e-05, "loss": 1.0383, "step": 1236 }, { "epoch": 0.20974989402289104, "grad_norm": 1.0630707848002783, "learning_rate": 1.835264181880985e-05, "loss": 1.0503, "step": 1237 }, { "epoch": 0.2099194573972022, "grad_norm": 1.063608633053865, "learning_rate": 1.834962062721215e-05, "loss": 1.0648, "step": 1238 }, { "epoch": 0.21008902077151334, "grad_norm": 1.0575464474083969, "learning_rate": 1.8346596916926183e-05, "loss": 1.0503, "step": 1239 }, { "epoch": 0.2102585841458245, "grad_norm": 0.9850359702459629, "learning_rate": 1.834357068886405e-05, "loss": 1.0193, "step": 1240 }, { "epoch": 0.21042814752013564, "grad_norm": 1.1044158198046359, "learning_rate": 1.8340541943938623e-05, "loss": 1.0213, "step": 1241 }, { "epoch": 0.2105977108944468, "grad_norm": 1.0777778064264738, "learning_rate": 1.8337510683063536e-05, "loss": 1.0324, "step": 1242 }, { "epoch": 0.21076727426875794, "grad_norm": 1.008427484039492, "learning_rate": 1.8334476907153177e-05, "loss": 1.0214, "step": 1243 }, { "epoch": 0.2109368376430691, "grad_norm": 1.0786831769768992, "learning_rate": 1.8331440617122694e-05, "loss": 1.0185, "step": 1244 }, { "epoch": 0.21110640101738024, "grad_norm": 1.0500271329430848, "learning_rate": 1.8328401813887994e-05, "loss": 1.0481, "step": 1245 }, { "epoch": 0.21127596439169138, "grad_norm": 1.040386126783691, "learning_rate": 1.8325360498365736e-05, "loss": 0.9958, "step": 1246 }, { "epoch": 0.21144552776600253, "grad_norm": 1.0613500534183766, "learning_rate": 1.8322316671473344e-05, "loss": 1.0321, "step": 1247 }, { "epoch": 0.21161509114031368, "grad_norm": 1.032326639314487, "learning_rate": 1.8319270334129e-05, "loss": 1.0736, "step": 1248 }, { "epoch": 0.21178465451462483, "grad_norm": 1.0136031135159627, "learning_rate": 1.831622148725164e-05, "loss": 1.02, "step": 1249 }, { "epoch": 0.21195421788893598, "grad_norm": 1.002767274048018, "learning_rate": 1.8313170131760956e-05, "loss": 0.9849, "step": 1250 }, { "epoch": 0.21212378126324713, "grad_norm": 1.0736693800179298, "learning_rate": 1.8310116268577403e-05, "loss": 1.0423, "step": 1251 }, { "epoch": 0.21229334463755828, "grad_norm": 1.074661518154584, "learning_rate": 1.8307059898622184e-05, "loss": 1.0314, "step": 1252 }, { "epoch": 0.21246290801186943, "grad_norm": 0.9913767210688825, "learning_rate": 1.8304001022817267e-05, "loss": 0.9776, "step": 1253 }, { "epoch": 0.21263247138618058, "grad_norm": 1.2394338711259665, "learning_rate": 1.8300939642085363e-05, "loss": 1.0383, "step": 1254 }, { "epoch": 0.21280203476049173, "grad_norm": 1.0684339650916157, "learning_rate": 1.829787575734995e-05, "loss": 1.0058, "step": 1255 }, { "epoch": 0.21297159813480288, "grad_norm": 1.013255762764285, "learning_rate": 1.8294809369535265e-05, "loss": 1.0278, "step": 1256 }, { "epoch": 0.21314116150911402, "grad_norm": 1.0789297314983195, "learning_rate": 1.8291740479566286e-05, "loss": 1.0317, "step": 1257 }, { "epoch": 0.21331072488342517, "grad_norm": 1.0657885097766648, "learning_rate": 1.8288669088368754e-05, "loss": 1.0063, "step": 1258 }, { "epoch": 0.21348028825773632, "grad_norm": 1.0475686820281338, "learning_rate": 1.8285595196869162e-05, "loss": 1.0165, "step": 1259 }, { "epoch": 0.21364985163204747, "grad_norm": 1.0400619700944447, "learning_rate": 1.828251880599476e-05, "loss": 1.064, "step": 1260 }, { "epoch": 0.21381941500635862, "grad_norm": 1.0364421381237077, "learning_rate": 1.8279439916673553e-05, "loss": 1.0253, "step": 1261 }, { "epoch": 0.21398897838066977, "grad_norm": 0.9511034456224944, "learning_rate": 1.8276358529834296e-05, "loss": 0.8758, "step": 1262 }, { "epoch": 0.21415854175498092, "grad_norm": 1.0576833409232267, "learning_rate": 1.8273274646406494e-05, "loss": 1.037, "step": 1263 }, { "epoch": 0.21432810512929207, "grad_norm": 1.0640317479434482, "learning_rate": 1.827018826732042e-05, "loss": 1.0196, "step": 1264 }, { "epoch": 0.21449766850360322, "grad_norm": 1.0387038290415187, "learning_rate": 1.8267099393507083e-05, "loss": 1.0491, "step": 1265 }, { "epoch": 0.21466723187791437, "grad_norm": 0.9958605931383211, "learning_rate": 1.8264008025898248e-05, "loss": 1.0077, "step": 1266 }, { "epoch": 0.21483679525222552, "grad_norm": 1.0251389982241148, "learning_rate": 1.826091416542644e-05, "loss": 0.9803, "step": 1267 }, { "epoch": 0.21500635862653666, "grad_norm": 1.0730115868136572, "learning_rate": 1.8257817813024927e-05, "loss": 1.0296, "step": 1268 }, { "epoch": 0.2151759220008478, "grad_norm": 0.9972935578412699, "learning_rate": 1.825471896962774e-05, "loss": 1.0299, "step": 1269 }, { "epoch": 0.21534548537515896, "grad_norm": 1.1478958673086375, "learning_rate": 1.825161763616965e-05, "loss": 1.0628, "step": 1270 }, { "epoch": 0.2155150487494701, "grad_norm": 1.0272513646849883, "learning_rate": 1.8248513813586186e-05, "loss": 1.0026, "step": 1271 }, { "epoch": 0.21568461212378126, "grad_norm": 1.0427956863888466, "learning_rate": 1.8245407502813624e-05, "loss": 1.0443, "step": 1272 }, { "epoch": 0.2158541754980924, "grad_norm": 0.9744718706270733, "learning_rate": 1.8242298704788988e-05, "loss": 1.0171, "step": 1273 }, { "epoch": 0.21602373887240356, "grad_norm": 1.0930015929741534, "learning_rate": 1.8239187420450063e-05, "loss": 1.0521, "step": 1274 }, { "epoch": 0.2161933022467147, "grad_norm": 1.0571760423315228, "learning_rate": 1.823607365073537e-05, "loss": 1.035, "step": 1275 }, { "epoch": 0.21636286562102586, "grad_norm": 1.0650238078881098, "learning_rate": 1.82329573965842e-05, "loss": 1.0495, "step": 1276 }, { "epoch": 0.216532428995337, "grad_norm": 1.0799671215327404, "learning_rate": 1.8229838658936566e-05, "loss": 1.0541, "step": 1277 }, { "epoch": 0.21670199236964816, "grad_norm": 1.0583594622587944, "learning_rate": 1.822671743873325e-05, "loss": 1.0446, "step": 1278 }, { "epoch": 0.2168715557439593, "grad_norm": 1.036215478206204, "learning_rate": 1.822359373691578e-05, "loss": 1.0239, "step": 1279 }, { "epoch": 0.21704111911827045, "grad_norm": 0.9852319012496498, "learning_rate": 1.822046755442643e-05, "loss": 0.8963, "step": 1280 }, { "epoch": 0.2172106824925816, "grad_norm": 1.0482599829993722, "learning_rate": 1.821733889220822e-05, "loss": 1.0763, "step": 1281 }, { "epoch": 0.21738024586689275, "grad_norm": 1.0851643841701206, "learning_rate": 1.8214207751204917e-05, "loss": 0.9867, "step": 1282 }, { "epoch": 0.2175498092412039, "grad_norm": 1.1063757152775864, "learning_rate": 1.8211074132361046e-05, "loss": 1.0446, "step": 1283 }, { "epoch": 0.21771937261551505, "grad_norm": 0.9946630095404888, "learning_rate": 1.820793803662187e-05, "loss": 1.0402, "step": 1284 }, { "epoch": 0.2178889359898262, "grad_norm": 1.0358191690781497, "learning_rate": 1.82047994649334e-05, "loss": 1.0398, "step": 1285 }, { "epoch": 0.21805849936413735, "grad_norm": 1.0467543169481737, "learning_rate": 1.8201658418242397e-05, "loss": 1.0851, "step": 1286 }, { "epoch": 0.2182280627384485, "grad_norm": 1.0815008351611577, "learning_rate": 1.819851489749637e-05, "loss": 1.0186, "step": 1287 }, { "epoch": 0.21839762611275965, "grad_norm": 1.063526233148851, "learning_rate": 1.8195368903643565e-05, "loss": 0.9884, "step": 1288 }, { "epoch": 0.2185671894870708, "grad_norm": 1.0722804241887995, "learning_rate": 1.819222043763299e-05, "loss": 1.0471, "step": 1289 }, { "epoch": 0.21873675286138194, "grad_norm": 0.9996597507244086, "learning_rate": 1.8189069500414377e-05, "loss": 1.0027, "step": 1290 }, { "epoch": 0.2189063162356931, "grad_norm": 1.026944656527998, "learning_rate": 1.8185916092938226e-05, "loss": 1.0062, "step": 1291 }, { "epoch": 0.21907587961000424, "grad_norm": 1.0474907531783555, "learning_rate": 1.8182760216155766e-05, "loss": 1.0553, "step": 1292 }, { "epoch": 0.2192454429843154, "grad_norm": 1.0885964226662685, "learning_rate": 1.8179601871018983e-05, "loss": 1.0336, "step": 1293 }, { "epoch": 0.21941500635862654, "grad_norm": 1.020211263857882, "learning_rate": 1.8176441058480594e-05, "loss": 0.9855, "step": 1294 }, { "epoch": 0.2195845697329377, "grad_norm": 1.0762160904577527, "learning_rate": 1.817327777949407e-05, "loss": 1.043, "step": 1295 }, { "epoch": 0.21975413310724884, "grad_norm": 0.771564105076421, "learning_rate": 1.817011203501363e-05, "loss": 0.8648, "step": 1296 }, { "epoch": 0.21992369648156, "grad_norm": 1.0829557616152723, "learning_rate": 1.816694382599422e-05, "loss": 1.0714, "step": 1297 }, { "epoch": 0.22009325985587114, "grad_norm": 1.0379107415970839, "learning_rate": 1.8163773153391548e-05, "loss": 1.0217, "step": 1298 }, { "epoch": 0.2202628232301823, "grad_norm": 1.03751594999782, "learning_rate": 1.816060001816205e-05, "loss": 1.0545, "step": 1299 }, { "epoch": 0.22043238660449344, "grad_norm": 1.076376175821764, "learning_rate": 1.8157424421262918e-05, "loss": 1.0029, "step": 1300 }, { "epoch": 0.22060194997880458, "grad_norm": 1.0418823182472614, "learning_rate": 1.815424636365208e-05, "loss": 1.0051, "step": 1301 }, { "epoch": 0.22077151335311573, "grad_norm": 0.971363472594629, "learning_rate": 1.81510658462882e-05, "loss": 1.0356, "step": 1302 }, { "epoch": 0.22094107672742688, "grad_norm": 0.6288154148296818, "learning_rate": 1.81478828701307e-05, "loss": 0.8789, "step": 1303 }, { "epoch": 0.22111064010173803, "grad_norm": 1.0755855898186446, "learning_rate": 1.8144697436139725e-05, "loss": 1.0587, "step": 1304 }, { "epoch": 0.22128020347604918, "grad_norm": 1.0813624759267786, "learning_rate": 1.814150954527618e-05, "loss": 1.0093, "step": 1305 }, { "epoch": 0.22144976685036033, "grad_norm": 1.0004629949744186, "learning_rate": 1.8138319198501694e-05, "loss": 0.9996, "step": 1306 }, { "epoch": 0.22161933022467148, "grad_norm": 1.0241480674027421, "learning_rate": 1.8135126396778652e-05, "loss": 0.9763, "step": 1307 }, { "epoch": 0.22178889359898263, "grad_norm": 0.9968823716067734, "learning_rate": 1.8131931141070166e-05, "loss": 1.0115, "step": 1308 }, { "epoch": 0.22195845697329378, "grad_norm": 1.148109905828744, "learning_rate": 1.8128733432340095e-05, "loss": 1.0829, "step": 1309 }, { "epoch": 0.22212802034760493, "grad_norm": 1.0447363093685365, "learning_rate": 1.8125533271553045e-05, "loss": 1.0574, "step": 1310 }, { "epoch": 0.22229758372191608, "grad_norm": 1.0181360585746964, "learning_rate": 1.8122330659674345e-05, "loss": 1.0344, "step": 1311 }, { "epoch": 0.22246714709622722, "grad_norm": 0.9959337186795689, "learning_rate": 1.8119125597670075e-05, "loss": 1.0118, "step": 1312 }, { "epoch": 0.22263671047053837, "grad_norm": 1.0676401345366784, "learning_rate": 1.8115918086507054e-05, "loss": 1.0299, "step": 1313 }, { "epoch": 0.22280627384484952, "grad_norm": 1.050405734843094, "learning_rate": 1.8112708127152838e-05, "loss": 1.0114, "step": 1314 }, { "epoch": 0.22297583721916067, "grad_norm": 0.9874269393789414, "learning_rate": 1.8109495720575715e-05, "loss": 1.0445, "step": 1315 }, { "epoch": 0.22314540059347182, "grad_norm": 1.0589727994257228, "learning_rate": 1.8106280867744727e-05, "loss": 1.0504, "step": 1316 }, { "epoch": 0.22331496396778297, "grad_norm": 1.080231520547102, "learning_rate": 1.8103063569629635e-05, "loss": 1.0528, "step": 1317 }, { "epoch": 0.22348452734209412, "grad_norm": 0.9928448905011118, "learning_rate": 1.809984382720095e-05, "loss": 0.9939, "step": 1318 }, { "epoch": 0.22365409071640527, "grad_norm": 1.0760900349238813, "learning_rate": 1.809662164142992e-05, "loss": 1.0273, "step": 1319 }, { "epoch": 0.22382365409071642, "grad_norm": 1.0028974351579958, "learning_rate": 1.809339701328852e-05, "loss": 1.0063, "step": 1320 }, { "epoch": 0.22399321746502757, "grad_norm": 0.9814947331591503, "learning_rate": 1.8090169943749477e-05, "loss": 1.0708, "step": 1321 }, { "epoch": 0.22416278083933872, "grad_norm": 1.0928768555830481, "learning_rate": 1.808694043378624e-05, "loss": 1.0246, "step": 1322 }, { "epoch": 0.22433234421364986, "grad_norm": 1.0501688774526776, "learning_rate": 1.8083708484373002e-05, "loss": 0.9693, "step": 1323 }, { "epoch": 0.224501907587961, "grad_norm": 1.0571847202803455, "learning_rate": 1.8080474096484693e-05, "loss": 1.0228, "step": 1324 }, { "epoch": 0.22467147096227216, "grad_norm": 1.023996290343834, "learning_rate": 1.8077237271096972e-05, "loss": 1.0371, "step": 1325 }, { "epoch": 0.22484103433658328, "grad_norm": 1.036482684607099, "learning_rate": 1.807399800918624e-05, "loss": 1.02, "step": 1326 }, { "epoch": 0.22501059771089443, "grad_norm": 1.0179098496742172, "learning_rate": 1.807075631172963e-05, "loss": 1.0333, "step": 1327 }, { "epoch": 0.22518016108520558, "grad_norm": 0.6523825174397321, "learning_rate": 1.8067512179705008e-05, "loss": 0.873, "step": 1328 }, { "epoch": 0.22534972445951673, "grad_norm": 1.0628711185301398, "learning_rate": 1.8064265614090976e-05, "loss": 1.0229, "step": 1329 }, { "epoch": 0.22551928783382788, "grad_norm": 1.0352947449986687, "learning_rate": 1.806101661586687e-05, "loss": 1.0221, "step": 1330 }, { "epoch": 0.22568885120813903, "grad_norm": 1.041949899039187, "learning_rate": 1.8057765186012765e-05, "loss": 1.0135, "step": 1331 }, { "epoch": 0.22585841458245018, "grad_norm": 1.0608133556327568, "learning_rate": 1.805451132550946e-05, "loss": 1.0358, "step": 1332 }, { "epoch": 0.22602797795676133, "grad_norm": 1.0243662277600578, "learning_rate": 1.8051255035338494e-05, "loss": 1.0302, "step": 1333 }, { "epoch": 0.22619754133107248, "grad_norm": 1.0159980682814356, "learning_rate": 1.8047996316482134e-05, "loss": 1.0256, "step": 1334 }, { "epoch": 0.22636710470538363, "grad_norm": 1.0119958948997587, "learning_rate": 1.8044735169923387e-05, "loss": 1.0372, "step": 1335 }, { "epoch": 0.22653666807969478, "grad_norm": 1.0327662774912698, "learning_rate": 1.8041471596645984e-05, "loss": 1.0046, "step": 1336 }, { "epoch": 0.22670623145400592, "grad_norm": 1.0169170511905408, "learning_rate": 1.8038205597634392e-05, "loss": 1.027, "step": 1337 }, { "epoch": 0.22687579482831707, "grad_norm": 1.075221359976595, "learning_rate": 1.803493717387381e-05, "loss": 1.0472, "step": 1338 }, { "epoch": 0.22704535820262822, "grad_norm": 1.0320008243119638, "learning_rate": 1.803166632635017e-05, "loss": 1.0375, "step": 1339 }, { "epoch": 0.22721492157693937, "grad_norm": 0.9968248621821412, "learning_rate": 1.802839305605013e-05, "loss": 1.0138, "step": 1340 }, { "epoch": 0.22738448495125052, "grad_norm": 0.9996993934206426, "learning_rate": 1.8025117363961083e-05, "loss": 1.0422, "step": 1341 }, { "epoch": 0.22755404832556167, "grad_norm": 0.9902024394693787, "learning_rate": 1.802183925107115e-05, "loss": 1.0144, "step": 1342 }, { "epoch": 0.22772361169987282, "grad_norm": 0.98652821397121, "learning_rate": 1.8018558718369187e-05, "loss": 1.0327, "step": 1343 }, { "epoch": 0.22789317507418397, "grad_norm": 1.0278006009194856, "learning_rate": 1.8015275766844774e-05, "loss": 1.0188, "step": 1344 }, { "epoch": 0.22806273844849512, "grad_norm": 1.0169783606855654, "learning_rate": 1.801199039748822e-05, "loss": 1.0574, "step": 1345 }, { "epoch": 0.22823230182280627, "grad_norm": 1.064677203366401, "learning_rate": 1.8008702611290578e-05, "loss": 1.0107, "step": 1346 }, { "epoch": 0.22840186519711742, "grad_norm": 1.0395616526835498, "learning_rate": 1.8005412409243604e-05, "loss": 1.0228, "step": 1347 }, { "epoch": 0.22857142857142856, "grad_norm": 1.017112070567201, "learning_rate": 1.800211979233981e-05, "loss": 0.9958, "step": 1348 }, { "epoch": 0.2287409919457397, "grad_norm": 1.009005200988391, "learning_rate": 1.7998824761572415e-05, "loss": 1.0149, "step": 1349 }, { "epoch": 0.22891055532005086, "grad_norm": 1.038513230379797, "learning_rate": 1.799552731793538e-05, "loss": 0.9939, "step": 1350 }, { "epoch": 0.229080118694362, "grad_norm": 1.0290876338440214, "learning_rate": 1.7992227462423385e-05, "loss": 1.0653, "step": 1351 }, { "epoch": 0.22924968206867316, "grad_norm": 1.0796137744913767, "learning_rate": 1.7988925196031845e-05, "loss": 1.0164, "step": 1352 }, { "epoch": 0.2294192454429843, "grad_norm": 0.9998114488318148, "learning_rate": 1.7985620519756897e-05, "loss": 1.0528, "step": 1353 }, { "epoch": 0.22958880881729546, "grad_norm": 0.6723301352068823, "learning_rate": 1.7982313434595405e-05, "loss": 0.8593, "step": 1354 }, { "epoch": 0.2297583721916066, "grad_norm": 1.008352585274843, "learning_rate": 1.7979003941544965e-05, "loss": 1.0075, "step": 1355 }, { "epoch": 0.22992793556591776, "grad_norm": 1.0688361465719443, "learning_rate": 1.7975692041603893e-05, "loss": 1.0256, "step": 1356 }, { "epoch": 0.2300974989402289, "grad_norm": 1.035851941096092, "learning_rate": 1.7972377735771234e-05, "loss": 1.0351, "step": 1357 }, { "epoch": 0.23026706231454006, "grad_norm": 0.9876668321277083, "learning_rate": 1.7969061025046758e-05, "loss": 1.0302, "step": 1358 }, { "epoch": 0.2304366256888512, "grad_norm": 1.0301921163719239, "learning_rate": 1.7965741910430963e-05, "loss": 1.0382, "step": 1359 }, { "epoch": 0.23060618906316235, "grad_norm": 1.0392955779400515, "learning_rate": 1.7962420392925066e-05, "loss": 1.0136, "step": 1360 }, { "epoch": 0.2307757524374735, "grad_norm": 0.9853785816809418, "learning_rate": 1.7959096473531016e-05, "loss": 1.0114, "step": 1361 }, { "epoch": 0.23094531581178465, "grad_norm": 1.0352520425664744, "learning_rate": 1.7955770153251482e-05, "loss": 1.0451, "step": 1362 }, { "epoch": 0.2311148791860958, "grad_norm": 1.0478570736929989, "learning_rate": 1.795244143308986e-05, "loss": 1.029, "step": 1363 }, { "epoch": 0.23128444256040695, "grad_norm": 1.0891369350528002, "learning_rate": 1.7949110314050267e-05, "loss": 1.0685, "step": 1364 }, { "epoch": 0.2314540059347181, "grad_norm": 1.032754626337483, "learning_rate": 1.7945776797137544e-05, "loss": 1.0283, "step": 1365 }, { "epoch": 0.23162356930902925, "grad_norm": 1.0519404009097395, "learning_rate": 1.794244088335726e-05, "loss": 1.0383, "step": 1366 }, { "epoch": 0.2317931326833404, "grad_norm": 1.1037782436967822, "learning_rate": 1.7939102573715698e-05, "loss": 0.9956, "step": 1367 }, { "epoch": 0.23196269605765155, "grad_norm": 1.005907467681158, "learning_rate": 1.7935761869219876e-05, "loss": 0.9945, "step": 1368 }, { "epoch": 0.2321322594319627, "grad_norm": 1.0201308049072142, "learning_rate": 1.7932418770877523e-05, "loss": 1.0069, "step": 1369 }, { "epoch": 0.23230182280627384, "grad_norm": 0.9565666578932192, "learning_rate": 1.7929073279697096e-05, "loss": 1.0133, "step": 1370 }, { "epoch": 0.232471386180585, "grad_norm": 1.09555129926372, "learning_rate": 1.792572539668777e-05, "loss": 1.073, "step": 1371 }, { "epoch": 0.23264094955489614, "grad_norm": 1.0701035107289367, "learning_rate": 1.792237512285945e-05, "loss": 1.0468, "step": 1372 }, { "epoch": 0.2328105129292073, "grad_norm": 0.9909125772872428, "learning_rate": 1.7919022459222754e-05, "loss": 1.0301, "step": 1373 }, { "epoch": 0.23298007630351844, "grad_norm": 1.0521920899966322, "learning_rate": 1.7915667406789018e-05, "loss": 1.0229, "step": 1374 }, { "epoch": 0.2331496396778296, "grad_norm": 0.9992184122827568, "learning_rate": 1.791230996657031e-05, "loss": 1.0685, "step": 1375 }, { "epoch": 0.23331920305214074, "grad_norm": 1.0648044025187282, "learning_rate": 1.7908950139579406e-05, "loss": 1.0525, "step": 1376 }, { "epoch": 0.2334887664264519, "grad_norm": 1.0270826284903836, "learning_rate": 1.7905587926829815e-05, "loss": 1.006, "step": 1377 }, { "epoch": 0.23365832980076304, "grad_norm": 0.994109347184484, "learning_rate": 1.790222332933575e-05, "loss": 1.015, "step": 1378 }, { "epoch": 0.2338278931750742, "grad_norm": 1.0662296187363034, "learning_rate": 1.7898856348112163e-05, "loss": 1.0674, "step": 1379 }, { "epoch": 0.23399745654938534, "grad_norm": 0.9902622887080254, "learning_rate": 1.7895486984174707e-05, "loss": 0.9842, "step": 1380 }, { "epoch": 0.23416701992369648, "grad_norm": 1.0268383526807514, "learning_rate": 1.7892115238539757e-05, "loss": 1.0194, "step": 1381 }, { "epoch": 0.23433658329800763, "grad_norm": 0.9469023642741146, "learning_rate": 1.7888741112224418e-05, "loss": 0.9809, "step": 1382 }, { "epoch": 0.23450614667231878, "grad_norm": 1.017893828259254, "learning_rate": 1.7885364606246503e-05, "loss": 1.0395, "step": 1383 }, { "epoch": 0.23467571004662993, "grad_norm": 0.9887055191344767, "learning_rate": 1.7881985721624544e-05, "loss": 1.0122, "step": 1384 }, { "epoch": 0.23484527342094108, "grad_norm": 1.029189244899999, "learning_rate": 1.7878604459377795e-05, "loss": 1.0166, "step": 1385 }, { "epoch": 0.23501483679525223, "grad_norm": 1.0404898021636972, "learning_rate": 1.787522082052622e-05, "loss": 1.0386, "step": 1386 }, { "epoch": 0.23518440016956338, "grad_norm": 1.0061707856594195, "learning_rate": 1.7871834806090502e-05, "loss": 1.0598, "step": 1387 }, { "epoch": 0.23535396354387453, "grad_norm": 1.0029441355436106, "learning_rate": 1.786844641709205e-05, "loss": 0.9995, "step": 1388 }, { "epoch": 0.23552352691818568, "grad_norm": 1.0786000692928612, "learning_rate": 1.7865055654552977e-05, "loss": 1.0245, "step": 1389 }, { "epoch": 0.23569309029249683, "grad_norm": 1.0870057639557795, "learning_rate": 1.7861662519496116e-05, "loss": 1.0327, "step": 1390 }, { "epoch": 0.23586265366680798, "grad_norm": 1.019576780159488, "learning_rate": 1.785826701294502e-05, "loss": 1.0213, "step": 1391 }, { "epoch": 0.23603221704111912, "grad_norm": 0.9620448442900429, "learning_rate": 1.7854869135923946e-05, "loss": 0.9859, "step": 1392 }, { "epoch": 0.23620178041543027, "grad_norm": 1.0430154291142868, "learning_rate": 1.7851468889457883e-05, "loss": 1.0644, "step": 1393 }, { "epoch": 0.23637134378974142, "grad_norm": 0.9963760284863703, "learning_rate": 1.7848066274572523e-05, "loss": 1.0362, "step": 1394 }, { "epoch": 0.23654090716405257, "grad_norm": 0.9959007204573155, "learning_rate": 1.7844661292294274e-05, "loss": 0.9947, "step": 1395 }, { "epoch": 0.23671047053836372, "grad_norm": 1.0801377559183405, "learning_rate": 1.7841253943650258e-05, "loss": 1.0365, "step": 1396 }, { "epoch": 0.23688003391267487, "grad_norm": 1.0287693526888502, "learning_rate": 1.7837844229668312e-05, "loss": 1.0319, "step": 1397 }, { "epoch": 0.23704959728698602, "grad_norm": 1.0454540554090133, "learning_rate": 1.7834432151376992e-05, "loss": 1.0404, "step": 1398 }, { "epoch": 0.23721916066129717, "grad_norm": 1.0259811923709794, "learning_rate": 1.7831017709805555e-05, "loss": 1.0154, "step": 1399 }, { "epoch": 0.23738872403560832, "grad_norm": 1.0695287937242937, "learning_rate": 1.782760090598398e-05, "loss": 1.0582, "step": 1400 }, { "epoch": 0.23755828740991947, "grad_norm": 1.0273298841891763, "learning_rate": 1.7824181740942958e-05, "loss": 1.0099, "step": 1401 }, { "epoch": 0.23772785078423062, "grad_norm": 1.0320346777060623, "learning_rate": 1.7820760215713885e-05, "loss": 1.0245, "step": 1402 }, { "epoch": 0.23789741415854176, "grad_norm": 1.008865581468882, "learning_rate": 1.7817336331328882e-05, "loss": 1.0403, "step": 1403 }, { "epoch": 0.2380669775328529, "grad_norm": 1.124825571650674, "learning_rate": 1.781391008882077e-05, "loss": 1.0763, "step": 1404 }, { "epoch": 0.23823654090716406, "grad_norm": 0.9931723201014185, "learning_rate": 1.7810481489223082e-05, "loss": 1.0275, "step": 1405 }, { "epoch": 0.2384061042814752, "grad_norm": 1.048460249199628, "learning_rate": 1.7807050533570073e-05, "loss": 1.0369, "step": 1406 }, { "epoch": 0.23857566765578636, "grad_norm": 1.0509585873036642, "learning_rate": 1.7803617222896696e-05, "loss": 1.0562, "step": 1407 }, { "epoch": 0.2387452310300975, "grad_norm": 0.9601662419015582, "learning_rate": 1.780018155823862e-05, "loss": 1.023, "step": 1408 }, { "epoch": 0.23891479440440866, "grad_norm": 1.026072506803247, "learning_rate": 1.7796743540632226e-05, "loss": 1.0418, "step": 1409 }, { "epoch": 0.2390843577787198, "grad_norm": 0.9951475157202543, "learning_rate": 1.7793303171114597e-05, "loss": 1.0361, "step": 1410 }, { "epoch": 0.23925392115303096, "grad_norm": 0.9598733138895736, "learning_rate": 1.778986045072354e-05, "loss": 1.0064, "step": 1411 }, { "epoch": 0.2394234845273421, "grad_norm": 0.9629564424867945, "learning_rate": 1.778641538049755e-05, "loss": 0.9569, "step": 1412 }, { "epoch": 0.23959304790165326, "grad_norm": 1.0512995422784355, "learning_rate": 1.7782967961475855e-05, "loss": 1.0509, "step": 1413 }, { "epoch": 0.2397626112759644, "grad_norm": 1.0356932174970064, "learning_rate": 1.7779518194698374e-05, "loss": 1.0327, "step": 1414 }, { "epoch": 0.23993217465027555, "grad_norm": 1.0226000830433215, "learning_rate": 1.7776066081205738e-05, "loss": 1.0375, "step": 1415 }, { "epoch": 0.2401017380245867, "grad_norm": 0.9807925229585885, "learning_rate": 1.777261162203929e-05, "loss": 0.9846, "step": 1416 }, { "epoch": 0.24027130139889785, "grad_norm": 1.0554528099124916, "learning_rate": 1.776915481824107e-05, "loss": 1.0207, "step": 1417 }, { "epoch": 0.24044086477320897, "grad_norm": 1.0437309139656565, "learning_rate": 1.776569567085385e-05, "loss": 1.0325, "step": 1418 }, { "epoch": 0.24061042814752012, "grad_norm": 0.9785323164536257, "learning_rate": 1.7762234180921078e-05, "loss": 1.0016, "step": 1419 }, { "epoch": 0.24077999152183127, "grad_norm": 0.9638615457405866, "learning_rate": 1.7758770349486924e-05, "loss": 0.9759, "step": 1420 }, { "epoch": 0.24094955489614242, "grad_norm": 1.0567298938845457, "learning_rate": 1.775530417759627e-05, "loss": 0.9877, "step": 1421 }, { "epoch": 0.24111911827045357, "grad_norm": 1.0627658698166225, "learning_rate": 1.7751835666294694e-05, "loss": 0.9881, "step": 1422 }, { "epoch": 0.24128868164476472, "grad_norm": 1.0592558454321745, "learning_rate": 1.7748364816628482e-05, "loss": 1.0426, "step": 1423 }, { "epoch": 0.24145824501907587, "grad_norm": 0.9883069952704082, "learning_rate": 1.7744891629644627e-05, "loss": 1.0033, "step": 1424 }, { "epoch": 0.24162780839338702, "grad_norm": 1.0812288136016455, "learning_rate": 1.7741416106390828e-05, "loss": 1.0382, "step": 1425 }, { "epoch": 0.24179737176769817, "grad_norm": 1.1473853342550038, "learning_rate": 1.773793824791548e-05, "loss": 1.0545, "step": 1426 }, { "epoch": 0.24196693514200932, "grad_norm": 1.0549135788350918, "learning_rate": 1.77344580552677e-05, "loss": 1.0609, "step": 1427 }, { "epoch": 0.24213649851632046, "grad_norm": 1.0080150661065357, "learning_rate": 1.7730975529497292e-05, "loss": 1.033, "step": 1428 }, { "epoch": 0.2423060618906316, "grad_norm": 1.0346463488518975, "learning_rate": 1.772749067165477e-05, "loss": 1.0938, "step": 1429 }, { "epoch": 0.24247562526494276, "grad_norm": 1.0742667851191425, "learning_rate": 1.7724003482791358e-05, "loss": 1.0572, "step": 1430 }, { "epoch": 0.2426451886392539, "grad_norm": 1.0007468586433665, "learning_rate": 1.772051396395897e-05, "loss": 1.0146, "step": 1431 }, { "epoch": 0.24281475201356506, "grad_norm": 0.9788043975865908, "learning_rate": 1.7717022116210234e-05, "loss": 1.0153, "step": 1432 }, { "epoch": 0.2429843153878762, "grad_norm": 1.0664870046562671, "learning_rate": 1.7713527940598473e-05, "loss": 1.0259, "step": 1433 }, { "epoch": 0.24315387876218736, "grad_norm": 1.053225021916283, "learning_rate": 1.771003143817772e-05, "loss": 1.0231, "step": 1434 }, { "epoch": 0.2433234421364985, "grad_norm": 1.0747732929688687, "learning_rate": 1.77065326100027e-05, "loss": 1.046, "step": 1435 }, { "epoch": 0.24349300551080966, "grad_norm": 1.0289159349723926, "learning_rate": 1.7703031457128853e-05, "loss": 1.0268, "step": 1436 }, { "epoch": 0.2436625688851208, "grad_norm": 1.0307347972055092, "learning_rate": 1.7699527980612306e-05, "loss": 1.0333, "step": 1437 }, { "epoch": 0.24383213225943196, "grad_norm": 1.05095360333709, "learning_rate": 1.7696022181509892e-05, "loss": 0.9914, "step": 1438 }, { "epoch": 0.2440016956337431, "grad_norm": 1.0181522908961647, "learning_rate": 1.769251406087915e-05, "loss": 1.0537, "step": 1439 }, { "epoch": 0.24417125900805425, "grad_norm": 1.0404669561076565, "learning_rate": 1.768900361977832e-05, "loss": 1.0421, "step": 1440 }, { "epoch": 0.2443408223823654, "grad_norm": 0.9876662088756392, "learning_rate": 1.7685490859266324e-05, "loss": 1.008, "step": 1441 }, { "epoch": 0.24451038575667655, "grad_norm": 0.653233955255012, "learning_rate": 1.7681975780402807e-05, "loss": 0.8767, "step": 1442 }, { "epoch": 0.2446799491309877, "grad_norm": 0.9906944457386946, "learning_rate": 1.76784583842481e-05, "loss": 1.033, "step": 1443 }, { "epoch": 0.24484951250529885, "grad_norm": 0.9839244572378357, "learning_rate": 1.7674938671863237e-05, "loss": 1.0132, "step": 1444 }, { "epoch": 0.24501907587961, "grad_norm": 1.0107933846377484, "learning_rate": 1.7671416644309945e-05, "loss": 1.0077, "step": 1445 }, { "epoch": 0.24518863925392115, "grad_norm": 0.9952062322872111, "learning_rate": 1.7667892302650665e-05, "loss": 1.0243, "step": 1446 }, { "epoch": 0.2453582026282323, "grad_norm": 0.9775603072340857, "learning_rate": 1.7664365647948513e-05, "loss": 1.0094, "step": 1447 }, { "epoch": 0.24552776600254345, "grad_norm": 0.9816259213997773, "learning_rate": 1.7660836681267323e-05, "loss": 1.0049, "step": 1448 }, { "epoch": 0.2456973293768546, "grad_norm": 1.0064737928710894, "learning_rate": 1.7657305403671618e-05, "loss": 1.0466, "step": 1449 }, { "epoch": 0.24586689275116574, "grad_norm": 1.016947092204689, "learning_rate": 1.7653771816226614e-05, "loss": 1.0575, "step": 1450 }, { "epoch": 0.2460364561254769, "grad_norm": 1.0926432461997742, "learning_rate": 1.7650235919998234e-05, "loss": 1.0041, "step": 1451 }, { "epoch": 0.24620601949978804, "grad_norm": 1.0103729197371947, "learning_rate": 1.764669771605309e-05, "loss": 1.0177, "step": 1452 }, { "epoch": 0.2463755828740992, "grad_norm": 1.0125724351885979, "learning_rate": 1.7643157205458483e-05, "loss": 1.0049, "step": 1453 }, { "epoch": 0.24654514624841034, "grad_norm": 1.0062474149891296, "learning_rate": 1.7639614389282432e-05, "loss": 1.0237, "step": 1454 }, { "epoch": 0.2467147096227215, "grad_norm": 1.0130670679939415, "learning_rate": 1.7636069268593633e-05, "loss": 0.9878, "step": 1455 }, { "epoch": 0.24688427299703264, "grad_norm": 1.0288671551407782, "learning_rate": 1.7632521844461482e-05, "loss": 1.0209, "step": 1456 }, { "epoch": 0.2470538363713438, "grad_norm": 1.0577760257963837, "learning_rate": 1.762897211795607e-05, "loss": 1.0557, "step": 1457 }, { "epoch": 0.24722339974565494, "grad_norm": 0.9963844921034853, "learning_rate": 1.7625420090148182e-05, "loss": 1.0217, "step": 1458 }, { "epoch": 0.2473929631199661, "grad_norm": 1.0954657623631543, "learning_rate": 1.76218657621093e-05, "loss": 1.0305, "step": 1459 }, { "epoch": 0.24756252649427724, "grad_norm": 1.0921685324811647, "learning_rate": 1.7618309134911594e-05, "loss": 0.9985, "step": 1460 }, { "epoch": 0.24773208986858838, "grad_norm": 0.9821690248562863, "learning_rate": 1.7614750209627938e-05, "loss": 1.023, "step": 1461 }, { "epoch": 0.24790165324289953, "grad_norm": 0.9533657952772474, "learning_rate": 1.7611188987331885e-05, "loss": 1.0148, "step": 1462 }, { "epoch": 0.24807121661721068, "grad_norm": 0.6797986381439817, "learning_rate": 1.7607625469097697e-05, "loss": 0.8792, "step": 1463 }, { "epoch": 0.24824077999152183, "grad_norm": 1.097785575309812, "learning_rate": 1.7604059656000313e-05, "loss": 1.0051, "step": 1464 }, { "epoch": 0.24841034336583298, "grad_norm": 1.0323510046183746, "learning_rate": 1.760049154911537e-05, "loss": 1.0202, "step": 1465 }, { "epoch": 0.24857990674014413, "grad_norm": 0.6110246269673387, "learning_rate": 1.7596921149519203e-05, "loss": 0.8005, "step": 1466 }, { "epoch": 0.24874947011445528, "grad_norm": 0.5979707859141831, "learning_rate": 1.7593348458288834e-05, "loss": 0.8097, "step": 1467 }, { "epoch": 0.24891903348876643, "grad_norm": 1.0474172632435463, "learning_rate": 1.7589773476501974e-05, "loss": 1.0267, "step": 1468 }, { "epoch": 0.24908859686307758, "grad_norm": 1.0086100453838633, "learning_rate": 1.758619620523703e-05, "loss": 1.0472, "step": 1469 }, { "epoch": 0.24925816023738873, "grad_norm": 0.9952254966437639, "learning_rate": 1.758261664557309e-05, "loss": 1.0301, "step": 1470 }, { "epoch": 0.24942772361169988, "grad_norm": 0.963171841339179, "learning_rate": 1.7579034798589942e-05, "loss": 0.9986, "step": 1471 }, { "epoch": 0.24959728698601102, "grad_norm": 1.044690296682734, "learning_rate": 1.7575450665368068e-05, "loss": 1.0456, "step": 1472 }, { "epoch": 0.24976685036032217, "grad_norm": 1.0244138080273024, "learning_rate": 1.7571864246988623e-05, "loss": 1.0053, "step": 1473 }, { "epoch": 0.24993641373463332, "grad_norm": 0.9706876942255614, "learning_rate": 1.7568275544533464e-05, "loss": 0.9923, "step": 1474 }, { "epoch": 0.25010597710894444, "grad_norm": 0.6339118111809177, "learning_rate": 1.7564684559085138e-05, "loss": 0.8381, "step": 1475 }, { "epoch": 0.2502755404832556, "grad_norm": 1.0925280850352308, "learning_rate": 1.7561091291726867e-05, "loss": 1.012, "step": 1476 }, { "epoch": 0.25044510385756674, "grad_norm": 1.0441971739600981, "learning_rate": 1.7557495743542586e-05, "loss": 1.0229, "step": 1477 }, { "epoch": 0.2506146672318779, "grad_norm": 1.0074490589085838, "learning_rate": 1.755389791561689e-05, "loss": 1.0254, "step": 1478 }, { "epoch": 0.25078423060618904, "grad_norm": 1.1152970465612853, "learning_rate": 1.755029780903508e-05, "loss": 1.0299, "step": 1479 }, { "epoch": 0.2509537939805002, "grad_norm": 1.091375366635331, "learning_rate": 1.7546695424883133e-05, "loss": 1.0269, "step": 1480 }, { "epoch": 0.25112335735481134, "grad_norm": 0.7059197033064152, "learning_rate": 1.7543090764247726e-05, "loss": 0.8515, "step": 1481 }, { "epoch": 0.2512929207291225, "grad_norm": 1.0165101370600038, "learning_rate": 1.7539483828216216e-05, "loss": 1.0314, "step": 1482 }, { "epoch": 0.25146248410343364, "grad_norm": 1.032202986974254, "learning_rate": 1.753587461787664e-05, "loss": 1.0346, "step": 1483 }, { "epoch": 0.2516320474777448, "grad_norm": 1.0576941696371032, "learning_rate": 1.7532263134317735e-05, "loss": 0.9761, "step": 1484 }, { "epoch": 0.25180161085205593, "grad_norm": 0.9930654952122822, "learning_rate": 1.7528649378628912e-05, "loss": 1.0199, "step": 1485 }, { "epoch": 0.2519711742263671, "grad_norm": 1.0162042188951854, "learning_rate": 1.7525033351900268e-05, "loss": 1.0651, "step": 1486 }, { "epoch": 0.25214073760067823, "grad_norm": 1.0107672579984182, "learning_rate": 1.7521415055222593e-05, "loss": 1.0109, "step": 1487 }, { "epoch": 0.2523103009749894, "grad_norm": 1.0156062703495796, "learning_rate": 1.7517794489687355e-05, "loss": 1.014, "step": 1488 }, { "epoch": 0.25247986434930053, "grad_norm": 0.675215900675686, "learning_rate": 1.751417165638671e-05, "loss": 0.8449, "step": 1489 }, { "epoch": 0.2526494277236117, "grad_norm": 1.0478988523805661, "learning_rate": 1.75105465564135e-05, "loss": 1.0321, "step": 1490 }, { "epoch": 0.25281899109792283, "grad_norm": 0.7120965949161628, "learning_rate": 1.7506919190861238e-05, "loss": 0.9697, "step": 1491 }, { "epoch": 0.252988554472234, "grad_norm": 1.1155422342885284, "learning_rate": 1.7503289560824135e-05, "loss": 1.0334, "step": 1492 }, { "epoch": 0.2531581178465451, "grad_norm": 1.0764515352550739, "learning_rate": 1.7499657667397083e-05, "loss": 1.04, "step": 1493 }, { "epoch": 0.2533276812208563, "grad_norm": 1.040372474806951, "learning_rate": 1.749602351167565e-05, "loss": 0.9859, "step": 1494 }, { "epoch": 0.2534972445951674, "grad_norm": 1.1019232397348715, "learning_rate": 1.7492387094756088e-05, "loss": 1.0431, "step": 1495 }, { "epoch": 0.2536668079694786, "grad_norm": 0.9934189371839818, "learning_rate": 1.7488748417735334e-05, "loss": 1.0016, "step": 1496 }, { "epoch": 0.2538363713437897, "grad_norm": 1.0281861166727788, "learning_rate": 1.7485107481711014e-05, "loss": 1.0147, "step": 1497 }, { "epoch": 0.2540059347181009, "grad_norm": 1.0050445262239223, "learning_rate": 1.7481464287781416e-05, "loss": 0.9724, "step": 1498 }, { "epoch": 0.254175498092412, "grad_norm": 1.0193714164544145, "learning_rate": 1.7477818837045527e-05, "loss": 1.0081, "step": 1499 }, { "epoch": 0.2543450614667232, "grad_norm": 0.9632133499931924, "learning_rate": 1.7474171130603007e-05, "loss": 1.0197, "step": 1500 }, { "epoch": 0.2545146248410343, "grad_norm": 0.9946302411895425, "learning_rate": 1.7470521169554196e-05, "loss": 1.0214, "step": 1501 }, { "epoch": 0.2546841882153455, "grad_norm": 1.010551942615164, "learning_rate": 1.7466868955000117e-05, "loss": 1.0456, "step": 1502 }, { "epoch": 0.2548537515896566, "grad_norm": 1.0323430033612593, "learning_rate": 1.7463214488042472e-05, "loss": 1.0107, "step": 1503 }, { "epoch": 0.2550233149639678, "grad_norm": 1.0022888503923253, "learning_rate": 1.745955776978364e-05, "loss": 1.0061, "step": 1504 }, { "epoch": 0.2551928783382789, "grad_norm": 0.9922662665684321, "learning_rate": 1.7455898801326685e-05, "loss": 1.032, "step": 1505 }, { "epoch": 0.2553624417125901, "grad_norm": 1.034503521327479, "learning_rate": 1.7452237583775344e-05, "loss": 0.9921, "step": 1506 }, { "epoch": 0.2555320050869012, "grad_norm": 1.0147133041475178, "learning_rate": 1.7448574118234032e-05, "loss": 0.9865, "step": 1507 }, { "epoch": 0.2557015684612124, "grad_norm": 1.0360809115409286, "learning_rate": 1.7444908405807845e-05, "loss": 1.068, "step": 1508 }, { "epoch": 0.2558711318355235, "grad_norm": 1.0173743081161042, "learning_rate": 1.7441240447602565e-05, "loss": 1.0674, "step": 1509 }, { "epoch": 0.2560406952098347, "grad_norm": 0.9952798221689829, "learning_rate": 1.7437570244724625e-05, "loss": 1.0165, "step": 1510 }, { "epoch": 0.2562102585841458, "grad_norm": 1.0105035368109276, "learning_rate": 1.743389779828117e-05, "loss": 1.0274, "step": 1511 }, { "epoch": 0.256379821958457, "grad_norm": 1.0025947852506194, "learning_rate": 1.7430223109379995e-05, "loss": 0.9871, "step": 1512 }, { "epoch": 0.2565493853327681, "grad_norm": 0.9602305372254146, "learning_rate": 1.742654617912958e-05, "loss": 0.9913, "step": 1513 }, { "epoch": 0.2567189487070793, "grad_norm": 0.9220114231493487, "learning_rate": 1.7422867008639094e-05, "loss": 0.9976, "step": 1514 }, { "epoch": 0.2568885120813904, "grad_norm": 1.0249573837742179, "learning_rate": 1.7419185599018356e-05, "loss": 1.0361, "step": 1515 }, { "epoch": 0.2570580754557016, "grad_norm": 0.9551299605125825, "learning_rate": 1.741550195137788e-05, "loss": 0.9928, "step": 1516 }, { "epoch": 0.2572276388300127, "grad_norm": 1.0260119822193878, "learning_rate": 1.7411816066828852e-05, "loss": 1.0083, "step": 1517 }, { "epoch": 0.2573972022043239, "grad_norm": 0.9683669860935937, "learning_rate": 1.7408127946483127e-05, "loss": 0.9782, "step": 1518 }, { "epoch": 0.257566765578635, "grad_norm": 1.006290752282903, "learning_rate": 1.7404437591453237e-05, "loss": 0.9743, "step": 1519 }, { "epoch": 0.2577363289529462, "grad_norm": 1.0454112580276205, "learning_rate": 1.7400745002852388e-05, "loss": 1.0318, "step": 1520 }, { "epoch": 0.2579058923272573, "grad_norm": 1.0113736514684712, "learning_rate": 1.7397050181794463e-05, "loss": 0.9929, "step": 1521 }, { "epoch": 0.2580754557015685, "grad_norm": 0.9911816544299251, "learning_rate": 1.7393353129394017e-05, "loss": 1.0263, "step": 1522 }, { "epoch": 0.2582450190758796, "grad_norm": 0.9713589517498729, "learning_rate": 1.7389653846766276e-05, "loss": 1.0229, "step": 1523 }, { "epoch": 0.2584145824501908, "grad_norm": 0.9713910416200988, "learning_rate": 1.7385952335027136e-05, "loss": 1.0184, "step": 1524 }, { "epoch": 0.2585841458245019, "grad_norm": 1.0216158997121034, "learning_rate": 1.7382248595293175e-05, "loss": 1.0003, "step": 1525 }, { "epoch": 0.2587537091988131, "grad_norm": 1.0389125520173255, "learning_rate": 1.7378542628681634e-05, "loss": 1.0148, "step": 1526 }, { "epoch": 0.2589232725731242, "grad_norm": 0.9773163912815696, "learning_rate": 1.7374834436310427e-05, "loss": 1.039, "step": 1527 }, { "epoch": 0.2590928359474354, "grad_norm": 1.0232424540151142, "learning_rate": 1.7371124019298148e-05, "loss": 0.994, "step": 1528 }, { "epoch": 0.2592623993217465, "grad_norm": 1.0103535358616882, "learning_rate": 1.736741137876405e-05, "loss": 1.0279, "step": 1529 }, { "epoch": 0.25943196269605767, "grad_norm": 1.0634746851039283, "learning_rate": 1.7363696515828062e-05, "loss": 1.0381, "step": 1530 }, { "epoch": 0.2596015260703688, "grad_norm": 1.1023515103501251, "learning_rate": 1.735997943161079e-05, "loss": 1.0333, "step": 1531 }, { "epoch": 0.25977108944467997, "grad_norm": 1.0263854678361861, "learning_rate": 1.7356260127233496e-05, "loss": 1.0029, "step": 1532 }, { "epoch": 0.2599406528189911, "grad_norm": 1.032283138861617, "learning_rate": 1.7352538603818124e-05, "loss": 1.0434, "step": 1533 }, { "epoch": 0.26011021619330227, "grad_norm": 1.0337668282224568, "learning_rate": 1.7348814862487277e-05, "loss": 1.0128, "step": 1534 }, { "epoch": 0.2602797795676134, "grad_norm": 1.051583157278923, "learning_rate": 1.734508890436424e-05, "loss": 1.0444, "step": 1535 }, { "epoch": 0.26044934294192457, "grad_norm": 1.0070075004451315, "learning_rate": 1.7341360730572958e-05, "loss": 1.0175, "step": 1536 }, { "epoch": 0.2606189063162357, "grad_norm": 1.0494492020041937, "learning_rate": 1.733763034223804e-05, "loss": 1.0363, "step": 1537 }, { "epoch": 0.26078846969054686, "grad_norm": 1.041464603137685, "learning_rate": 1.7333897740484776e-05, "loss": 1.0528, "step": 1538 }, { "epoch": 0.260958033064858, "grad_norm": 0.9999454640958684, "learning_rate": 1.7330162926439116e-05, "loss": 1.0224, "step": 1539 }, { "epoch": 0.26112759643916916, "grad_norm": 0.9660982024683389, "learning_rate": 1.7326425901227676e-05, "loss": 1.0122, "step": 1540 }, { "epoch": 0.2612971598134803, "grad_norm": 1.1010065043813138, "learning_rate": 1.7322686665977738e-05, "loss": 0.9959, "step": 1541 }, { "epoch": 0.26146672318779146, "grad_norm": 1.0780044340378458, "learning_rate": 1.7318945221817255e-05, "loss": 1.0017, "step": 1542 }, { "epoch": 0.2616362865621026, "grad_norm": 0.9885453639924419, "learning_rate": 1.731520156987485e-05, "loss": 0.9975, "step": 1543 }, { "epoch": 0.26180584993641376, "grad_norm": 1.0052077950583855, "learning_rate": 1.7311455711279802e-05, "loss": 0.9835, "step": 1544 }, { "epoch": 0.2619754133107249, "grad_norm": 1.0300357419040997, "learning_rate": 1.730770764716206e-05, "loss": 1.015, "step": 1545 }, { "epoch": 0.26214497668503606, "grad_norm": 1.1119620555831413, "learning_rate": 1.7303957378652243e-05, "loss": 1.0642, "step": 1546 }, { "epoch": 0.2623145400593472, "grad_norm": 1.062119910039204, "learning_rate": 1.7300204906881627e-05, "loss": 1.0119, "step": 1547 }, { "epoch": 0.26248410343365836, "grad_norm": 1.0116705173235143, "learning_rate": 1.729645023298216e-05, "loss": 1.0106, "step": 1548 }, { "epoch": 0.2626536668079695, "grad_norm": 1.062110353745387, "learning_rate": 1.7292693358086447e-05, "loss": 1.0398, "step": 1549 }, { "epoch": 0.26282323018228065, "grad_norm": 1.0356213192297952, "learning_rate": 1.7288934283327763e-05, "loss": 1.0612, "step": 1550 }, { "epoch": 0.2629927935565918, "grad_norm": 0.9361254533742164, "learning_rate": 1.728517300984004e-05, "loss": 0.9991, "step": 1551 }, { "epoch": 0.26316235693090295, "grad_norm": 0.9719160209427214, "learning_rate": 1.7281409538757886e-05, "loss": 0.9924, "step": 1552 }, { "epoch": 0.2633319203052141, "grad_norm": 1.010106484642722, "learning_rate": 1.7277643871216558e-05, "loss": 1.0301, "step": 1553 }, { "epoch": 0.26350148367952525, "grad_norm": 0.9845021914368712, "learning_rate": 1.7273876008351977e-05, "loss": 1.0531, "step": 1554 }, { "epoch": 0.26367104705383637, "grad_norm": 0.9573328337978327, "learning_rate": 1.727010595130074e-05, "loss": 0.9875, "step": 1555 }, { "epoch": 0.2638406104281475, "grad_norm": 1.1090604910368012, "learning_rate": 1.7266333701200086e-05, "loss": 1.037, "step": 1556 }, { "epoch": 0.26401017380245867, "grad_norm": 0.9881677734299702, "learning_rate": 1.7262559259187936e-05, "loss": 1.0133, "step": 1557 }, { "epoch": 0.2641797371767698, "grad_norm": 1.0336625780449076, "learning_rate": 1.725878262640285e-05, "loss": 1.0082, "step": 1558 }, { "epoch": 0.26434930055108097, "grad_norm": 1.0039999420029906, "learning_rate": 1.725500380398407e-05, "loss": 1.0004, "step": 1559 }, { "epoch": 0.2645188639253921, "grad_norm": 0.9908248485760892, "learning_rate": 1.7251222793071485e-05, "loss": 1.0167, "step": 1560 }, { "epoch": 0.26468842729970327, "grad_norm": 1.1099042284300633, "learning_rate": 1.724743959480565e-05, "loss": 1.0378, "step": 1561 }, { "epoch": 0.2648579906740144, "grad_norm": 0.6886615064716238, "learning_rate": 1.724365421032778e-05, "loss": 0.8823, "step": 1562 }, { "epoch": 0.26502755404832556, "grad_norm": 0.9599530376455806, "learning_rate": 1.7239866640779745e-05, "loss": 1.043, "step": 1563 }, { "epoch": 0.2651971174226367, "grad_norm": 0.9675491895367597, "learning_rate": 1.7236076887304075e-05, "loss": 1.0347, "step": 1564 }, { "epoch": 0.26536668079694786, "grad_norm": 1.0018629844462017, "learning_rate": 1.7232284951043962e-05, "loss": 1.0015, "step": 1565 }, { "epoch": 0.265536244171259, "grad_norm": 1.0258476398592737, "learning_rate": 1.722849083314326e-05, "loss": 1.0015, "step": 1566 }, { "epoch": 0.26570580754557016, "grad_norm": 0.9790221675134905, "learning_rate": 1.7224694534746467e-05, "loss": 1.0003, "step": 1567 }, { "epoch": 0.2658753709198813, "grad_norm": 0.9529917892672408, "learning_rate": 1.7220896056998753e-05, "loss": 1.0217, "step": 1568 }, { "epoch": 0.26604493429419246, "grad_norm": 0.6736902867314191, "learning_rate": 1.721709540104594e-05, "loss": 0.8791, "step": 1569 }, { "epoch": 0.2662144976685036, "grad_norm": 1.0025373415041368, "learning_rate": 1.721329256803451e-05, "loss": 1.0324, "step": 1570 }, { "epoch": 0.26638406104281476, "grad_norm": 1.015766210954091, "learning_rate": 1.7209487559111594e-05, "loss": 1.0135, "step": 1571 }, { "epoch": 0.2665536244171259, "grad_norm": 1.0229792170867233, "learning_rate": 1.7205680375424988e-05, "loss": 1.0212, "step": 1572 }, { "epoch": 0.26672318779143706, "grad_norm": 0.999673508818176, "learning_rate": 1.720187101812314e-05, "loss": 0.9858, "step": 1573 }, { "epoch": 0.2668927511657482, "grad_norm": 0.9760398731966328, "learning_rate": 1.7198059488355153e-05, "loss": 0.9979, "step": 1574 }, { "epoch": 0.26706231454005935, "grad_norm": 0.9673388895471001, "learning_rate": 1.7194245787270784e-05, "loss": 1.0014, "step": 1575 }, { "epoch": 0.2672318779143705, "grad_norm": 1.0351461126249129, "learning_rate": 1.7190429916020454e-05, "loss": 1.0154, "step": 1576 }, { "epoch": 0.26740144128868165, "grad_norm": 0.993090040058639, "learning_rate": 1.7186611875755227e-05, "loss": 1.0405, "step": 1577 }, { "epoch": 0.2675710046629928, "grad_norm": 1.0531473085392273, "learning_rate": 1.718279166762683e-05, "loss": 1.058, "step": 1578 }, { "epoch": 0.26774056803730395, "grad_norm": 1.0208249971659966, "learning_rate": 1.7178969292787632e-05, "loss": 1.0288, "step": 1579 }, { "epoch": 0.26791013141161507, "grad_norm": 0.9552184269072861, "learning_rate": 1.7175144752390674e-05, "loss": 0.9774, "step": 1580 }, { "epoch": 0.26807969478592625, "grad_norm": 0.9627056098126374, "learning_rate": 1.7171318047589637e-05, "loss": 0.947, "step": 1581 }, { "epoch": 0.26824925816023737, "grad_norm": 1.0270390026434155, "learning_rate": 1.7167489179538856e-05, "loss": 1.0256, "step": 1582 }, { "epoch": 0.26841882153454855, "grad_norm": 1.0397651037964861, "learning_rate": 1.7163658149393323e-05, "loss": 1.0232, "step": 1583 }, { "epoch": 0.26858838490885967, "grad_norm": 0.9789880341364536, "learning_rate": 1.7159824958308675e-05, "loss": 0.9756, "step": 1584 }, { "epoch": 0.26875794828317084, "grad_norm": 0.990464094831154, "learning_rate": 1.715598960744121e-05, "loss": 1.0302, "step": 1585 }, { "epoch": 0.26892751165748197, "grad_norm": 1.0768583125341327, "learning_rate": 1.7152152097947875e-05, "loss": 1.0339, "step": 1586 }, { "epoch": 0.26909707503179314, "grad_norm": 1.0075401698259927, "learning_rate": 1.7148312430986263e-05, "loss": 1.0065, "step": 1587 }, { "epoch": 0.26926663840610426, "grad_norm": 1.042959535433281, "learning_rate": 1.7144470607714626e-05, "loss": 1.034, "step": 1588 }, { "epoch": 0.26943620178041544, "grad_norm": 1.096226383427998, "learning_rate": 1.7140626629291853e-05, "loss": 1.0441, "step": 1589 }, { "epoch": 0.26960576515472656, "grad_norm": 0.9650588782785957, "learning_rate": 1.7136780496877493e-05, "loss": 1.0095, "step": 1590 }, { "epoch": 0.26977532852903774, "grad_norm": 1.0183899041551177, "learning_rate": 1.7132932211631752e-05, "loss": 0.9646, "step": 1591 }, { "epoch": 0.26994489190334886, "grad_norm": 1.0734058709413015, "learning_rate": 1.712908177471547e-05, "loss": 1.039, "step": 1592 }, { "epoch": 0.27011445527766004, "grad_norm": 1.057644396769284, "learning_rate": 1.712522918729014e-05, "loss": 1.0577, "step": 1593 }, { "epoch": 0.27028401865197116, "grad_norm": 1.099128413877244, "learning_rate": 1.712137445051792e-05, "loss": 1.0646, "step": 1594 }, { "epoch": 0.27045358202628234, "grad_norm": 1.0784746532945182, "learning_rate": 1.7117517565561588e-05, "loss": 1.0099, "step": 1595 }, { "epoch": 0.27062314540059346, "grad_norm": 1.0815064123577858, "learning_rate": 1.7113658533584594e-05, "loss": 1.0474, "step": 1596 }, { "epoch": 0.27079270877490463, "grad_norm": 0.6253365999082476, "learning_rate": 1.7109797355751017e-05, "loss": 0.8361, "step": 1597 }, { "epoch": 0.27096227214921575, "grad_norm": 1.0061432879175651, "learning_rate": 1.7105934033225607e-05, "loss": 0.9849, "step": 1598 }, { "epoch": 0.27113183552352693, "grad_norm": 1.122395492825743, "learning_rate": 1.710206856717374e-05, "loss": 1.0141, "step": 1599 }, { "epoch": 0.27130139889783805, "grad_norm": 1.0267725651305846, "learning_rate": 1.7098200958761443e-05, "loss": 1.0249, "step": 1600 }, { "epoch": 0.27147096227214923, "grad_norm": 0.9379371238372052, "learning_rate": 1.7094331209155394e-05, "loss": 0.9797, "step": 1601 }, { "epoch": 0.27164052564646035, "grad_norm": 0.9798306107867417, "learning_rate": 1.709045931952291e-05, "loss": 1.0222, "step": 1602 }, { "epoch": 0.27181008902077153, "grad_norm": 1.0358573882610302, "learning_rate": 1.7086585291031968e-05, "loss": 1.0292, "step": 1603 }, { "epoch": 0.27197965239508265, "grad_norm": 1.0150268120023536, "learning_rate": 1.7082709124851172e-05, "loss": 1.0061, "step": 1604 }, { "epoch": 0.2721492157693938, "grad_norm": 0.9705604079713331, "learning_rate": 1.7078830822149784e-05, "loss": 1.0007, "step": 1605 }, { "epoch": 0.27231877914370495, "grad_norm": 0.997528917361982, "learning_rate": 1.7074950384097703e-05, "loss": 1.0399, "step": 1606 }, { "epoch": 0.2724883425180161, "grad_norm": 0.9940037799319106, "learning_rate": 1.7071067811865477e-05, "loss": 1.0462, "step": 1607 }, { "epoch": 0.27265790589232725, "grad_norm": 1.106455166868433, "learning_rate": 1.7067183106624292e-05, "loss": 1.0554, "step": 1608 }, { "epoch": 0.2728274692666384, "grad_norm": 0.9750123911830852, "learning_rate": 1.7063296269545988e-05, "loss": 1.0002, "step": 1609 }, { "epoch": 0.27299703264094954, "grad_norm": 1.0070456281082218, "learning_rate": 1.7059407301803034e-05, "loss": 1.0031, "step": 1610 }, { "epoch": 0.2731665960152607, "grad_norm": 1.033720862686279, "learning_rate": 1.7055516204568553e-05, "loss": 1.0209, "step": 1611 }, { "epoch": 0.27333615938957184, "grad_norm": 1.062785311091101, "learning_rate": 1.70516229790163e-05, "loss": 1.0289, "step": 1612 }, { "epoch": 0.273505722763883, "grad_norm": 0.9655456115861552, "learning_rate": 1.7047727626320688e-05, "loss": 1.0059, "step": 1613 }, { "epoch": 0.27367528613819414, "grad_norm": 1.0117964009532867, "learning_rate": 1.704383014765676e-05, "loss": 0.996, "step": 1614 }, { "epoch": 0.2738448495125053, "grad_norm": 1.0450251648537119, "learning_rate": 1.7039930544200194e-05, "loss": 0.9912, "step": 1615 }, { "epoch": 0.27401441288681644, "grad_norm": 1.0308121873368181, "learning_rate": 1.703602881712732e-05, "loss": 0.9985, "step": 1616 }, { "epoch": 0.2741839762611276, "grad_norm": 1.0206060253113676, "learning_rate": 1.7032124967615112e-05, "loss": 1.0175, "step": 1617 }, { "epoch": 0.27435353963543874, "grad_norm": 1.0337479581298368, "learning_rate": 1.7028218996841173e-05, "loss": 1.0275, "step": 1618 }, { "epoch": 0.2745231030097499, "grad_norm": 1.0220572273887358, "learning_rate": 1.7024310905983753e-05, "loss": 1.0237, "step": 1619 }, { "epoch": 0.27469266638406103, "grad_norm": 0.9903156721353659, "learning_rate": 1.7020400696221737e-05, "loss": 0.9867, "step": 1620 }, { "epoch": 0.2748622297583722, "grad_norm": 1.0309457605974588, "learning_rate": 1.7016488368734654e-05, "loss": 1.0694, "step": 1621 }, { "epoch": 0.27503179313268333, "grad_norm": 0.975180348499719, "learning_rate": 1.701257392470267e-05, "loss": 1.0281, "step": 1622 }, { "epoch": 0.2752013565069945, "grad_norm": 1.0285842441451898, "learning_rate": 1.700865736530658e-05, "loss": 1.0802, "step": 1623 }, { "epoch": 0.27537091988130563, "grad_norm": 0.9582058969760604, "learning_rate": 1.700473869172784e-05, "loss": 1.012, "step": 1624 }, { "epoch": 0.2755404832556168, "grad_norm": 0.9926281593540695, "learning_rate": 1.7000817905148523e-05, "loss": 0.9814, "step": 1625 }, { "epoch": 0.27571004662992793, "grad_norm": 1.050820290516294, "learning_rate": 1.699689500675134e-05, "loss": 0.9965, "step": 1626 }, { "epoch": 0.2758796100042391, "grad_norm": 1.0412493165414156, "learning_rate": 1.6992969997719658e-05, "loss": 0.9839, "step": 1627 }, { "epoch": 0.2760491733785502, "grad_norm": 1.0028225922310317, "learning_rate": 1.698904287923746e-05, "loss": 1.0033, "step": 1628 }, { "epoch": 0.2762187367528614, "grad_norm": 0.9626776510354971, "learning_rate": 1.6985113652489374e-05, "loss": 0.9842, "step": 1629 }, { "epoch": 0.2763883001271725, "grad_norm": 1.0083281465246772, "learning_rate": 1.698118231866066e-05, "loss": 1.0301, "step": 1630 }, { "epoch": 0.2765578635014837, "grad_norm": 0.9948967001337197, "learning_rate": 1.697724887893722e-05, "loss": 0.9978, "step": 1631 }, { "epoch": 0.2767274268757948, "grad_norm": 1.038288911389693, "learning_rate": 1.697331333450559e-05, "loss": 1.0137, "step": 1632 }, { "epoch": 0.276896990250106, "grad_norm": 0.9387921431254617, "learning_rate": 1.696937568655294e-05, "loss": 1.0008, "step": 1633 }, { "epoch": 0.2770665536244171, "grad_norm": 0.9927125216040253, "learning_rate": 1.6965435936267063e-05, "loss": 1.0014, "step": 1634 }, { "epoch": 0.2772361169987283, "grad_norm": 0.9807282902798052, "learning_rate": 1.6961494084836405e-05, "loss": 1.0125, "step": 1635 }, { "epoch": 0.2774056803730394, "grad_norm": 0.9869988153860896, "learning_rate": 1.695755013345004e-05, "loss": 1.0479, "step": 1636 }, { "epoch": 0.2775752437473506, "grad_norm": 0.9586014093732598, "learning_rate": 1.6953604083297665e-05, "loss": 1.0216, "step": 1637 }, { "epoch": 0.2777448071216617, "grad_norm": 0.9986941688715908, "learning_rate": 1.6949655935569627e-05, "loss": 1.0192, "step": 1638 }, { "epoch": 0.2779143704959729, "grad_norm": 0.9921378683440467, "learning_rate": 1.6945705691456888e-05, "loss": 1.0166, "step": 1639 }, { "epoch": 0.278083933870284, "grad_norm": 0.9985160017656568, "learning_rate": 1.6941753352151057e-05, "loss": 1.0269, "step": 1640 }, { "epoch": 0.2782534972445952, "grad_norm": 0.9993921652217467, "learning_rate": 1.6937798918844363e-05, "loss": 1.0241, "step": 1641 }, { "epoch": 0.2784230606189063, "grad_norm": 1.0423824485925233, "learning_rate": 1.6933842392729677e-05, "loss": 1.0285, "step": 1642 }, { "epoch": 0.2785926239932175, "grad_norm": 1.017623427485035, "learning_rate": 1.69298837750005e-05, "loss": 0.9646, "step": 1643 }, { "epoch": 0.2787621873675286, "grad_norm": 0.9797211102548051, "learning_rate": 1.6925923066850957e-05, "loss": 0.9992, "step": 1644 }, { "epoch": 0.2789317507418398, "grad_norm": 1.0497242337533619, "learning_rate": 1.6921960269475806e-05, "loss": 0.9898, "step": 1645 }, { "epoch": 0.2791013141161509, "grad_norm": 0.9845819080920777, "learning_rate": 1.691799538407044e-05, "loss": 1.0077, "step": 1646 }, { "epoch": 0.27927087749046203, "grad_norm": 1.0087890382371902, "learning_rate": 1.691402841183088e-05, "loss": 0.9614, "step": 1647 }, { "epoch": 0.2794404408647732, "grad_norm": 1.0018423463699977, "learning_rate": 1.6910059353953765e-05, "loss": 0.9937, "step": 1648 }, { "epoch": 0.27961000423908433, "grad_norm": 0.9927419196455894, "learning_rate": 1.6906088211636387e-05, "loss": 1.0281, "step": 1649 }, { "epoch": 0.2797795676133955, "grad_norm": 0.9961599419086233, "learning_rate": 1.6902114986076645e-05, "loss": 1.0207, "step": 1650 }, { "epoch": 0.27994913098770663, "grad_norm": 0.9711712043805547, "learning_rate": 1.689813967847308e-05, "loss": 1.0257, "step": 1651 }, { "epoch": 0.2801186943620178, "grad_norm": 1.01609985737106, "learning_rate": 1.6894162290024848e-05, "loss": 1.0497, "step": 1652 }, { "epoch": 0.2802882577363289, "grad_norm": 0.9975132655584638, "learning_rate": 1.6890182821931746e-05, "loss": 0.979, "step": 1653 }, { "epoch": 0.2804578211106401, "grad_norm": 1.0228467490700406, "learning_rate": 1.6886201275394193e-05, "loss": 1.0032, "step": 1654 }, { "epoch": 0.2806273844849512, "grad_norm": 1.0191542366571857, "learning_rate": 1.688221765161323e-05, "loss": 1.036, "step": 1655 }, { "epoch": 0.2807969478592624, "grad_norm": 0.9612783895285227, "learning_rate": 1.687823195179053e-05, "loss": 0.9963, "step": 1656 }, { "epoch": 0.2809665112335735, "grad_norm": 1.1322316892118682, "learning_rate": 1.6874244177128395e-05, "loss": 0.971, "step": 1657 }, { "epoch": 0.2811360746078847, "grad_norm": 1.0227971882768998, "learning_rate": 1.6870254328829748e-05, "loss": 1.0435, "step": 1658 }, { "epoch": 0.2813056379821958, "grad_norm": 1.0452956120131587, "learning_rate": 1.6866262408098134e-05, "loss": 1.0181, "step": 1659 }, { "epoch": 0.281475201356507, "grad_norm": 1.0107572660503896, "learning_rate": 1.6862268416137738e-05, "loss": 0.9973, "step": 1660 }, { "epoch": 0.2816447647308181, "grad_norm": 1.010141714195555, "learning_rate": 1.685827235415335e-05, "loss": 1.0111, "step": 1661 }, { "epoch": 0.2818143281051293, "grad_norm": 1.0538601111497838, "learning_rate": 1.68542742233504e-05, "loss": 0.9921, "step": 1662 }, { "epoch": 0.2819838914794404, "grad_norm": 1.046633623577513, "learning_rate": 1.685027402493493e-05, "loss": 1.0528, "step": 1663 }, { "epoch": 0.2821534548537516, "grad_norm": 1.0606159042913585, "learning_rate": 1.684627176011362e-05, "loss": 1.0116, "step": 1664 }, { "epoch": 0.2823230182280627, "grad_norm": 1.0100434793375952, "learning_rate": 1.6842267430093762e-05, "loss": 1.0165, "step": 1665 }, { "epoch": 0.2824925816023739, "grad_norm": 1.0438581417039956, "learning_rate": 1.683826103608327e-05, "loss": 1.0372, "step": 1666 }, { "epoch": 0.282662144976685, "grad_norm": 0.9879939539761703, "learning_rate": 1.6834252579290692e-05, "loss": 0.9725, "step": 1667 }, { "epoch": 0.2828317083509962, "grad_norm": 1.0270057895685785, "learning_rate": 1.6830242060925184e-05, "loss": 1.0373, "step": 1668 }, { "epoch": 0.2830012717253073, "grad_norm": 1.040186779060928, "learning_rate": 1.6826229482196535e-05, "loss": 1.0437, "step": 1669 }, { "epoch": 0.2831708350996185, "grad_norm": 0.963710584533114, "learning_rate": 1.6822214844315152e-05, "loss": 0.985, "step": 1670 }, { "epoch": 0.2833403984739296, "grad_norm": 0.6746281060544586, "learning_rate": 1.681819814849206e-05, "loss": 0.8753, "step": 1671 }, { "epoch": 0.2835099618482408, "grad_norm": 0.9804017133537226, "learning_rate": 1.6814179395938915e-05, "loss": 1.0046, "step": 1672 }, { "epoch": 0.2836795252225519, "grad_norm": 1.1161363116122796, "learning_rate": 1.6810158587867973e-05, "loss": 1.0102, "step": 1673 }, { "epoch": 0.2838490885968631, "grad_norm": 1.014553654632016, "learning_rate": 1.6806135725492133e-05, "loss": 1.0097, "step": 1674 }, { "epoch": 0.2840186519711742, "grad_norm": 1.015140415745618, "learning_rate": 1.68021108100249e-05, "loss": 0.997, "step": 1675 }, { "epoch": 0.2841882153454854, "grad_norm": 0.9803691346395613, "learning_rate": 1.6798083842680402e-05, "loss": 1.0058, "step": 1676 }, { "epoch": 0.2843577787197965, "grad_norm": 0.9915608851870706, "learning_rate": 1.679405482467338e-05, "loss": 1.0488, "step": 1677 }, { "epoch": 0.2845273420941077, "grad_norm": 0.9474478180574198, "learning_rate": 1.6790023757219215e-05, "loss": 1.0092, "step": 1678 }, { "epoch": 0.2846969054684188, "grad_norm": 1.0363667115985078, "learning_rate": 1.6785990641533878e-05, "loss": 1.0033, "step": 1679 }, { "epoch": 0.28486646884273, "grad_norm": 1.033268026108578, "learning_rate": 1.6781955478833973e-05, "loss": 1.0009, "step": 1680 }, { "epoch": 0.2850360322170411, "grad_norm": 1.032629367630231, "learning_rate": 1.6777918270336718e-05, "loss": 1.0175, "step": 1681 }, { "epoch": 0.2852055955913523, "grad_norm": 0.9806034247156681, "learning_rate": 1.6773879017259954e-05, "loss": 1.0341, "step": 1682 }, { "epoch": 0.2853751589656634, "grad_norm": 1.0507339071659962, "learning_rate": 1.676983772082213e-05, "loss": 1.0164, "step": 1683 }, { "epoch": 0.2855447223399746, "grad_norm": 1.0024353354442337, "learning_rate": 1.6765794382242315e-05, "loss": 1.0179, "step": 1684 }, { "epoch": 0.2857142857142857, "grad_norm": 1.068319577710856, "learning_rate": 1.6761749002740195e-05, "loss": 1.0089, "step": 1685 }, { "epoch": 0.2858838490885969, "grad_norm": 0.9878217601950019, "learning_rate": 1.675770158353607e-05, "loss": 1.0213, "step": 1686 }, { "epoch": 0.286053412462908, "grad_norm": 1.0041279074298652, "learning_rate": 1.6753652125850865e-05, "loss": 1.0227, "step": 1687 }, { "epoch": 0.2862229758372192, "grad_norm": 0.9762471508102554, "learning_rate": 1.6749600630906097e-05, "loss": 0.9977, "step": 1688 }, { "epoch": 0.2863925392115303, "grad_norm": 1.0002991927519336, "learning_rate": 1.6745547099923917e-05, "loss": 0.9828, "step": 1689 }, { "epoch": 0.28656210258584147, "grad_norm": 0.9748061224336259, "learning_rate": 1.674149153412709e-05, "loss": 1.0047, "step": 1690 }, { "epoch": 0.2867316659601526, "grad_norm": 0.9939597921609324, "learning_rate": 1.6737433934738984e-05, "loss": 1.0346, "step": 1691 }, { "epoch": 0.28690122933446377, "grad_norm": 0.9342447749963945, "learning_rate": 1.673337430298359e-05, "loss": 1.0097, "step": 1692 }, { "epoch": 0.2870707927087749, "grad_norm": 0.9817004159920719, "learning_rate": 1.6729312640085504e-05, "loss": 0.9667, "step": 1693 }, { "epoch": 0.28724035608308607, "grad_norm": 1.0278543887202118, "learning_rate": 1.6725248947269944e-05, "loss": 1.0388, "step": 1694 }, { "epoch": 0.2874099194573972, "grad_norm": 0.7823900191193831, "learning_rate": 1.6721183225762726e-05, "loss": 0.8986, "step": 1695 }, { "epoch": 0.28757948283170837, "grad_norm": 0.9761424079172065, "learning_rate": 1.67171154767903e-05, "loss": 0.9814, "step": 1696 }, { "epoch": 0.2877490462060195, "grad_norm": 0.9531139091992797, "learning_rate": 1.6713045701579705e-05, "loss": 0.9702, "step": 1697 }, { "epoch": 0.28791860958033066, "grad_norm": 1.0339778199641958, "learning_rate": 1.6708973901358603e-05, "loss": 1.0196, "step": 1698 }, { "epoch": 0.2880881729546418, "grad_norm": 1.0510497928905038, "learning_rate": 1.6704900077355267e-05, "loss": 1.0233, "step": 1699 }, { "epoch": 0.28825773632895296, "grad_norm": 1.0071444924742048, "learning_rate": 1.670082423079858e-05, "loss": 0.9955, "step": 1700 }, { "epoch": 0.2884272997032641, "grad_norm": 1.0091169399255349, "learning_rate": 1.6696746362918027e-05, "loss": 1.0338, "step": 1701 }, { "epoch": 0.28859686307757526, "grad_norm": 1.0595147055921148, "learning_rate": 1.6692666474943714e-05, "loss": 1.036, "step": 1702 }, { "epoch": 0.2887664264518864, "grad_norm": 0.9912060867006545, "learning_rate": 1.668858456810635e-05, "loss": 0.9894, "step": 1703 }, { "epoch": 0.28893598982619756, "grad_norm": 0.9580534910764097, "learning_rate": 1.6684500643637256e-05, "loss": 1.0321, "step": 1704 }, { "epoch": 0.2891055532005087, "grad_norm": 0.9357886309433708, "learning_rate": 1.6680414702768358e-05, "loss": 0.9652, "step": 1705 }, { "epoch": 0.28927511657481986, "grad_norm": 0.9636332083145411, "learning_rate": 1.6676326746732197e-05, "loss": 0.9909, "step": 1706 }, { "epoch": 0.289444679949131, "grad_norm": 1.0023999344361916, "learning_rate": 1.6672236776761906e-05, "loss": 0.9862, "step": 1707 }, { "epoch": 0.28961424332344216, "grad_norm": 0.976587780893446, "learning_rate": 1.6668144794091254e-05, "loss": 0.9914, "step": 1708 }, { "epoch": 0.2897838066977533, "grad_norm": 1.034296314547433, "learning_rate": 1.6664050799954587e-05, "loss": 0.998, "step": 1709 }, { "epoch": 0.28995337007206445, "grad_norm": 0.9854131890683052, "learning_rate": 1.665995479558687e-05, "loss": 1.0331, "step": 1710 }, { "epoch": 0.2901229334463756, "grad_norm": 0.9608910285703723, "learning_rate": 1.6655856782223682e-05, "loss": 0.9443, "step": 1711 }, { "epoch": 0.29029249682068675, "grad_norm": 0.9797247146404183, "learning_rate": 1.6651756761101202e-05, "loss": 1.0347, "step": 1712 }, { "epoch": 0.2904620601949979, "grad_norm": 0.946135130562679, "learning_rate": 1.66476547334562e-05, "loss": 0.9851, "step": 1713 }, { "epoch": 0.29063162356930905, "grad_norm": 0.9998182905903034, "learning_rate": 1.6643550700526084e-05, "loss": 0.9933, "step": 1714 }, { "epoch": 0.29080118694362017, "grad_norm": 0.9992474680091024, "learning_rate": 1.663944466354884e-05, "loss": 1.0217, "step": 1715 }, { "epoch": 0.29097075031793135, "grad_norm": 0.9892955976424613, "learning_rate": 1.663533662376306e-05, "loss": 0.9865, "step": 1716 }, { "epoch": 0.29114031369224247, "grad_norm": 1.0416426195644528, "learning_rate": 1.6631226582407954e-05, "loss": 1.0465, "step": 1717 }, { "epoch": 0.29130987706655365, "grad_norm": 0.9639488178238013, "learning_rate": 1.6627114540723327e-05, "loss": 1.0104, "step": 1718 }, { "epoch": 0.29147944044086477, "grad_norm": 0.9303799913380203, "learning_rate": 1.6623000499949586e-05, "loss": 0.9999, "step": 1719 }, { "epoch": 0.29164900381517594, "grad_norm": 0.993267452886458, "learning_rate": 1.6618884461327747e-05, "loss": 1.005, "step": 1720 }, { "epoch": 0.29181856718948707, "grad_norm": 0.9793605748736506, "learning_rate": 1.661476642609943e-05, "loss": 0.9992, "step": 1721 }, { "epoch": 0.29198813056379824, "grad_norm": 1.0006406629500666, "learning_rate": 1.661064639550684e-05, "loss": 1.0462, "step": 1722 }, { "epoch": 0.29215769393810936, "grad_norm": 0.9858334384178165, "learning_rate": 1.6606524370792806e-05, "loss": 1.0186, "step": 1723 }, { "epoch": 0.29232725731242054, "grad_norm": 0.7210513604013623, "learning_rate": 1.660240035320075e-05, "loss": 0.8266, "step": 1724 }, { "epoch": 0.29249682068673166, "grad_norm": 1.0010519453769744, "learning_rate": 1.6598274343974688e-05, "loss": 1.0034, "step": 1725 }, { "epoch": 0.29266638406104284, "grad_norm": 0.973236506156456, "learning_rate": 1.659414634435925e-05, "loss": 1.0245, "step": 1726 }, { "epoch": 0.29283594743535396, "grad_norm": 1.0548680249557763, "learning_rate": 1.6590016355599653e-05, "loss": 1.0698, "step": 1727 }, { "epoch": 0.29300551080966514, "grad_norm": 0.6713346677268494, "learning_rate": 1.6585884378941727e-05, "loss": 0.8334, "step": 1728 }, { "epoch": 0.29317507418397626, "grad_norm": 1.022166571418792, "learning_rate": 1.658175041563189e-05, "loss": 1.0397, "step": 1729 }, { "epoch": 0.29334463755828744, "grad_norm": 1.016097893708613, "learning_rate": 1.6577614466917168e-05, "loss": 0.9971, "step": 1730 }, { "epoch": 0.29351420093259856, "grad_norm": 1.0536720803219268, "learning_rate": 1.657347653404518e-05, "loss": 0.9915, "step": 1731 }, { "epoch": 0.29368376430690973, "grad_norm": 1.0061859178475074, "learning_rate": 1.6569336618264143e-05, "loss": 1.0175, "step": 1732 }, { "epoch": 0.29385332768122086, "grad_norm": 0.9673002936320002, "learning_rate": 1.6565194720822885e-05, "loss": 0.9918, "step": 1733 }, { "epoch": 0.29402289105553203, "grad_norm": 1.0616234863800256, "learning_rate": 1.6561050842970817e-05, "loss": 1.0134, "step": 1734 }, { "epoch": 0.29419245442984315, "grad_norm": 1.0319285346864768, "learning_rate": 1.6556904985957946e-05, "loss": 1.0286, "step": 1735 }, { "epoch": 0.29436201780415433, "grad_norm": 1.0091347917174902, "learning_rate": 1.655275715103489e-05, "loss": 1.0277, "step": 1736 }, { "epoch": 0.29453158117846545, "grad_norm": 1.0322359324464947, "learning_rate": 1.6548607339452853e-05, "loss": 1.0317, "step": 1737 }, { "epoch": 0.2947011445527766, "grad_norm": 0.9832355505979721, "learning_rate": 1.6544455552463637e-05, "loss": 0.9666, "step": 1738 }, { "epoch": 0.29487070792708775, "grad_norm": 1.0348463908522592, "learning_rate": 1.6540301791319647e-05, "loss": 1.0522, "step": 1739 }, { "epoch": 0.29504027130139887, "grad_norm": 1.0509929650105039, "learning_rate": 1.653614605727387e-05, "loss": 1.0634, "step": 1740 }, { "epoch": 0.29520983467571005, "grad_norm": 0.9715754676744554, "learning_rate": 1.6531988351579897e-05, "loss": 1.0131, "step": 1741 }, { "epoch": 0.29537939805002117, "grad_norm": 1.0338172489487074, "learning_rate": 1.6527828675491917e-05, "loss": 0.9918, "step": 1742 }, { "epoch": 0.29554896142433235, "grad_norm": 0.9935939626541185, "learning_rate": 1.6523667030264706e-05, "loss": 1.021, "step": 1743 }, { "epoch": 0.29571852479864347, "grad_norm": 1.0204378829862049, "learning_rate": 1.6519503417153638e-05, "loss": 0.9787, "step": 1744 }, { "epoch": 0.29588808817295464, "grad_norm": 1.0164353888366213, "learning_rate": 1.6515337837414677e-05, "loss": 1.0167, "step": 1745 }, { "epoch": 0.29605765154726577, "grad_norm": 0.9724056723108322, "learning_rate": 1.6511170292304385e-05, "loss": 0.977, "step": 1746 }, { "epoch": 0.29622721492157694, "grad_norm": 1.0048508098530742, "learning_rate": 1.6507000783079913e-05, "loss": 1.0219, "step": 1747 }, { "epoch": 0.29639677829588806, "grad_norm": 1.060458643969409, "learning_rate": 1.6502829310999012e-05, "loss": 1.0183, "step": 1748 }, { "epoch": 0.29656634167019924, "grad_norm": 1.0635806815416127, "learning_rate": 1.6498655877320008e-05, "loss": 1.0148, "step": 1749 }, { "epoch": 0.29673590504451036, "grad_norm": 0.9951429541588573, "learning_rate": 1.6494480483301836e-05, "loss": 1.0172, "step": 1750 }, { "epoch": 0.29690546841882154, "grad_norm": 1.0047707907680026, "learning_rate": 1.649030313020402e-05, "loss": 1.0415, "step": 1751 }, { "epoch": 0.29707503179313266, "grad_norm": 1.0080379685322103, "learning_rate": 1.6486123819286666e-05, "loss": 1.002, "step": 1752 }, { "epoch": 0.29724459516744384, "grad_norm": 1.0224586200512065, "learning_rate": 1.6481942551810476e-05, "loss": 1.0342, "step": 1753 }, { "epoch": 0.29741415854175496, "grad_norm": 1.0444815240757956, "learning_rate": 1.6477759329036743e-05, "loss": 1.0226, "step": 1754 }, { "epoch": 0.29758372191606614, "grad_norm": 0.94617496593671, "learning_rate": 1.647357415222735e-05, "loss": 0.9842, "step": 1755 }, { "epoch": 0.29775328529037726, "grad_norm": 0.9646250591216861, "learning_rate": 1.6469387022644768e-05, "loss": 0.985, "step": 1756 }, { "epoch": 0.29792284866468843, "grad_norm": 1.0025500245054189, "learning_rate": 1.6465197941552054e-05, "loss": 0.9895, "step": 1757 }, { "epoch": 0.29809241203899955, "grad_norm": 0.9637417682856418, "learning_rate": 1.646100691021286e-05, "loss": 0.9869, "step": 1758 }, { "epoch": 0.29826197541331073, "grad_norm": 0.7686090701175982, "learning_rate": 1.6456813929891425e-05, "loss": 0.9297, "step": 1759 }, { "epoch": 0.29843153878762185, "grad_norm": 1.025200665912597, "learning_rate": 1.6452619001852567e-05, "loss": 1.0113, "step": 1760 }, { "epoch": 0.29860110216193303, "grad_norm": 1.0006621563219802, "learning_rate": 1.6448422127361707e-05, "loss": 0.9735, "step": 1761 }, { "epoch": 0.29877066553624415, "grad_norm": 0.9468374829701318, "learning_rate": 1.6444223307684844e-05, "loss": 0.9841, "step": 1762 }, { "epoch": 0.29894022891055533, "grad_norm": 0.9577090668403472, "learning_rate": 1.6440022544088553e-05, "loss": 0.9567, "step": 1763 }, { "epoch": 0.29910979228486645, "grad_norm": 0.9735054188231101, "learning_rate": 1.6435819837840026e-05, "loss": 0.9724, "step": 1764 }, { "epoch": 0.2992793556591776, "grad_norm": 0.9656507668924623, "learning_rate": 1.6431615190207003e-05, "loss": 1.0553, "step": 1765 }, { "epoch": 0.29944891903348875, "grad_norm": 0.9597257964315393, "learning_rate": 1.6427408602457845e-05, "loss": 0.9624, "step": 1766 }, { "epoch": 0.2996184824077999, "grad_norm": 0.9921723334689856, "learning_rate": 1.6423200075861472e-05, "loss": 1.016, "step": 1767 }, { "epoch": 0.29978804578211105, "grad_norm": 1.0044958475699377, "learning_rate": 1.64189896116874e-05, "loss": 1.004, "step": 1768 }, { "epoch": 0.2999576091564222, "grad_norm": 0.9823954300465818, "learning_rate": 1.641477721120573e-05, "loss": 0.9965, "step": 1769 }, { "epoch": 0.30012717253073334, "grad_norm": 1.0092359267461424, "learning_rate": 1.6410562875687145e-05, "loss": 1.0, "step": 1770 }, { "epoch": 0.3002967359050445, "grad_norm": 0.9885825830035312, "learning_rate": 1.6406346606402913e-05, "loss": 1.0266, "step": 1771 }, { "epoch": 0.30046629927935564, "grad_norm": 0.9804222980130869, "learning_rate": 1.640212840462488e-05, "loss": 1.0018, "step": 1772 }, { "epoch": 0.3006358626536668, "grad_norm": 1.007118270528555, "learning_rate": 1.6397908271625488e-05, "loss": 1.0034, "step": 1773 }, { "epoch": 0.30080542602797794, "grad_norm": 0.9492726400708721, "learning_rate": 1.6393686208677744e-05, "loss": 1.0101, "step": 1774 }, { "epoch": 0.3009749894022891, "grad_norm": 0.9963323404652323, "learning_rate": 1.638946221705525e-05, "loss": 1.0242, "step": 1775 }, { "epoch": 0.30114455277660024, "grad_norm": 0.985676913192754, "learning_rate": 1.6385236298032183e-05, "loss": 0.9739, "step": 1776 }, { "epoch": 0.3013141161509114, "grad_norm": 1.0043484053972493, "learning_rate": 1.638100845288331e-05, "loss": 1.0246, "step": 1777 }, { "epoch": 0.30148367952522254, "grad_norm": 0.9916119223229497, "learning_rate": 1.6376778682883968e-05, "loss": 0.9945, "step": 1778 }, { "epoch": 0.3016532428995337, "grad_norm": 0.984423027045273, "learning_rate": 1.6372546989310083e-05, "loss": 1.0455, "step": 1779 }, { "epoch": 0.30182280627384483, "grad_norm": 1.044141024273506, "learning_rate": 1.6368313373438157e-05, "loss": 1.0018, "step": 1780 }, { "epoch": 0.301992369648156, "grad_norm": 0.9919904361515511, "learning_rate": 1.636407783654527e-05, "loss": 1.0132, "step": 1781 }, { "epoch": 0.30216193302246713, "grad_norm": 0.9889974008922935, "learning_rate": 1.6359840379909088e-05, "loss": 1.0367, "step": 1782 }, { "epoch": 0.3023314963967783, "grad_norm": 1.025382901857874, "learning_rate": 1.6355601004807856e-05, "loss": 1.0191, "step": 1783 }, { "epoch": 0.30250105977108943, "grad_norm": 1.0163750107875882, "learning_rate": 1.6351359712520383e-05, "loss": 1.0166, "step": 1784 }, { "epoch": 0.3026706231454006, "grad_norm": 1.0280941856356662, "learning_rate": 1.6347116504326082e-05, "loss": 1.0392, "step": 1785 }, { "epoch": 0.30284018651971173, "grad_norm": 0.9960837363552101, "learning_rate": 1.6342871381504916e-05, "loss": 0.9881, "step": 1786 }, { "epoch": 0.3030097498940229, "grad_norm": 0.9741358282430439, "learning_rate": 1.6338624345337452e-05, "loss": 0.9967, "step": 1787 }, { "epoch": 0.303179313268334, "grad_norm": 0.9927672707720894, "learning_rate": 1.6334375397104813e-05, "loss": 1.018, "step": 1788 }, { "epoch": 0.3033488766426452, "grad_norm": 0.98130278736917, "learning_rate": 1.6330124538088705e-05, "loss": 1.0085, "step": 1789 }, { "epoch": 0.3035184400169563, "grad_norm": 1.0359025823832217, "learning_rate": 1.632587176957142e-05, "loss": 1.0152, "step": 1790 }, { "epoch": 0.3036880033912675, "grad_norm": 0.7373946183970215, "learning_rate": 1.6321617092835813e-05, "loss": 0.8841, "step": 1791 }, { "epoch": 0.3038575667655786, "grad_norm": 0.963318794273647, "learning_rate": 1.631736050916532e-05, "loss": 0.9714, "step": 1792 }, { "epoch": 0.3040271301398898, "grad_norm": 1.0349902500299886, "learning_rate": 1.631310201984396e-05, "loss": 1.0408, "step": 1793 }, { "epoch": 0.3041966935142009, "grad_norm": 1.0126851909912866, "learning_rate": 1.630884162615631e-05, "loss": 1.0425, "step": 1794 }, { "epoch": 0.3043662568885121, "grad_norm": 0.9493235675489007, "learning_rate": 1.6304579329387534e-05, "loss": 0.9908, "step": 1795 }, { "epoch": 0.3045358202628232, "grad_norm": 0.9899299579017352, "learning_rate": 1.6300315130823366e-05, "loss": 0.9958, "step": 1796 }, { "epoch": 0.3047053836371344, "grad_norm": 1.0570092320556699, "learning_rate": 1.6296049031750114e-05, "loss": 1.0337, "step": 1797 }, { "epoch": 0.3048749470114455, "grad_norm": 0.9275633015379959, "learning_rate": 1.6291781033454664e-05, "loss": 0.9443, "step": 1798 }, { "epoch": 0.3050445103857567, "grad_norm": 0.9731472518937835, "learning_rate": 1.6287511137224467e-05, "loss": 0.9875, "step": 1799 }, { "epoch": 0.3052140737600678, "grad_norm": 0.9797427813970988, "learning_rate": 1.6283239344347547e-05, "loss": 1.0384, "step": 1800 }, { "epoch": 0.305383637134379, "grad_norm": 0.9555816393377661, "learning_rate": 1.627896565611251e-05, "loss": 1.0155, "step": 1801 }, { "epoch": 0.3055532005086901, "grad_norm": 0.9860341821838224, "learning_rate": 1.627469007380852e-05, "loss": 0.9974, "step": 1802 }, { "epoch": 0.3057227638830013, "grad_norm": 0.9590834289172773, "learning_rate": 1.6270412598725326e-05, "loss": 1.0198, "step": 1803 }, { "epoch": 0.3058923272573124, "grad_norm": 1.0050559504718641, "learning_rate": 1.626613323215324e-05, "loss": 1.0407, "step": 1804 }, { "epoch": 0.3060618906316236, "grad_norm": 0.9423169298203447, "learning_rate": 1.626185197538314e-05, "loss": 0.9977, "step": 1805 }, { "epoch": 0.3062314540059347, "grad_norm": 0.9864377869764335, "learning_rate": 1.6257568829706483e-05, "loss": 1.0391, "step": 1806 }, { "epoch": 0.3064010173802459, "grad_norm": 0.9424617691749402, "learning_rate": 1.6253283796415294e-05, "loss": 0.9966, "step": 1807 }, { "epoch": 0.306570580754557, "grad_norm": 0.9527314215754243, "learning_rate": 1.624899687680217e-05, "loss": 0.9941, "step": 1808 }, { "epoch": 0.3067401441288682, "grad_norm": 1.0725516587971475, "learning_rate": 1.6244708072160267e-05, "loss": 1.0614, "step": 1809 }, { "epoch": 0.3069097075031793, "grad_norm": 0.9858268411474339, "learning_rate": 1.624041738378332e-05, "loss": 0.9965, "step": 1810 }, { "epoch": 0.3070792708774905, "grad_norm": 0.9505983220531846, "learning_rate": 1.6236124812965622e-05, "loss": 0.9687, "step": 1811 }, { "epoch": 0.3072488342518016, "grad_norm": 0.9925222488352277, "learning_rate": 1.623183036100205e-05, "loss": 0.9858, "step": 1812 }, { "epoch": 0.3074183976261128, "grad_norm": 0.9789615847416095, "learning_rate": 1.6227534029188027e-05, "loss": 1.0156, "step": 1813 }, { "epoch": 0.3075879610004239, "grad_norm": 0.9644609138414463, "learning_rate": 1.6223235818819564e-05, "loss": 0.9756, "step": 1814 }, { "epoch": 0.3077575243747351, "grad_norm": 0.9880783419658973, "learning_rate": 1.6218935731193223e-05, "loss": 1.0069, "step": 1815 }, { "epoch": 0.3079270877490462, "grad_norm": 1.0281460358283794, "learning_rate": 1.6214633767606142e-05, "loss": 1.0077, "step": 1816 }, { "epoch": 0.3080966511233574, "grad_norm": 0.9685630942387603, "learning_rate": 1.6210329929356017e-05, "loss": 1.0027, "step": 1817 }, { "epoch": 0.3082662144976685, "grad_norm": 0.6556947755831345, "learning_rate": 1.6206024217741125e-05, "loss": 0.8692, "step": 1818 }, { "epoch": 0.3084357778719797, "grad_norm": 1.0191152051284371, "learning_rate": 1.620171663406028e-05, "loss": 1.0353, "step": 1819 }, { "epoch": 0.3086053412462908, "grad_norm": 0.9248084723691711, "learning_rate": 1.619740717961289e-05, "loss": 0.9497, "step": 1820 }, { "epoch": 0.308774904620602, "grad_norm": 1.017505589072633, "learning_rate": 1.619309585569891e-05, "loss": 0.9909, "step": 1821 }, { "epoch": 0.3089444679949131, "grad_norm": 0.9711824335427821, "learning_rate": 1.6188782663618866e-05, "loss": 1.0057, "step": 1822 }, { "epoch": 0.3091140313692243, "grad_norm": 0.9288939176781582, "learning_rate": 1.6184467604673843e-05, "loss": 0.9775, "step": 1823 }, { "epoch": 0.3092835947435354, "grad_norm": 0.9373969596353178, "learning_rate": 1.6180150680165496e-05, "loss": 0.9728, "step": 1824 }, { "epoch": 0.30945315811784657, "grad_norm": 0.954207371612142, "learning_rate": 1.6175831891396034e-05, "loss": 0.9858, "step": 1825 }, { "epoch": 0.3096227214921577, "grad_norm": 1.0573773356138934, "learning_rate": 1.6171511239668233e-05, "loss": 1.0212, "step": 1826 }, { "epoch": 0.30979228486646887, "grad_norm": 0.9704153505792887, "learning_rate": 1.6167188726285433e-05, "loss": 0.9918, "step": 1827 }, { "epoch": 0.30996184824078, "grad_norm": 1.060168869791843, "learning_rate": 1.616286435255153e-05, "loss": 1.0458, "step": 1828 }, { "epoch": 0.3101314116150911, "grad_norm": 0.9382939266881261, "learning_rate": 1.615853811977099e-05, "loss": 0.9721, "step": 1829 }, { "epoch": 0.3103009749894023, "grad_norm": 0.9258970755464889, "learning_rate": 1.6154210029248826e-05, "loss": 0.9888, "step": 1830 }, { "epoch": 0.3104705383637134, "grad_norm": 1.0297541461285236, "learning_rate": 1.6149880082290628e-05, "loss": 1.021, "step": 1831 }, { "epoch": 0.3106401017380246, "grad_norm": 0.6148328694730372, "learning_rate": 1.614554828020253e-05, "loss": 0.809, "step": 1832 }, { "epoch": 0.3108096651123357, "grad_norm": 0.9751239243279715, "learning_rate": 1.614121462429124e-05, "loss": 0.9968, "step": 1833 }, { "epoch": 0.3109792284866469, "grad_norm": 1.0179378075698877, "learning_rate": 1.613687911586401e-05, "loss": 1.0126, "step": 1834 }, { "epoch": 0.311148791860958, "grad_norm": 0.9402075024967866, "learning_rate": 1.613254175622867e-05, "loss": 0.976, "step": 1835 }, { "epoch": 0.3113183552352692, "grad_norm": 0.9823306599535597, "learning_rate": 1.6128202546693592e-05, "loss": 1.0025, "step": 1836 }, { "epoch": 0.3114879186095803, "grad_norm": 0.9718703614639245, "learning_rate": 1.612386148856771e-05, "loss": 0.988, "step": 1837 }, { "epoch": 0.3116574819838915, "grad_norm": 1.0411631714862228, "learning_rate": 1.611951858316052e-05, "loss": 1.0209, "step": 1838 }, { "epoch": 0.3118270453582026, "grad_norm": 0.9481017910965698, "learning_rate": 1.6115173831782072e-05, "loss": 1.0047, "step": 1839 }, { "epoch": 0.3119966087325138, "grad_norm": 0.9578261700583934, "learning_rate": 1.611082723574297e-05, "loss": 1.0319, "step": 1840 }, { "epoch": 0.3121661721068249, "grad_norm": 0.9740167314483015, "learning_rate": 1.6106478796354382e-05, "loss": 1.016, "step": 1841 }, { "epoch": 0.3123357354811361, "grad_norm": 0.9309827895982609, "learning_rate": 1.6102128514928028e-05, "loss": 1.0012, "step": 1842 }, { "epoch": 0.3125052988554472, "grad_norm": 0.9711408564367686, "learning_rate": 1.6097776392776182e-05, "loss": 1.0011, "step": 1843 }, { "epoch": 0.3126748622297584, "grad_norm": 0.6293486170572605, "learning_rate": 1.6093422431211674e-05, "loss": 0.8261, "step": 1844 }, { "epoch": 0.3128444256040695, "grad_norm": 0.9811287307797799, "learning_rate": 1.6089066631547893e-05, "loss": 0.9897, "step": 1845 }, { "epoch": 0.3130139889783807, "grad_norm": 0.9840862006734133, "learning_rate": 1.608470899509877e-05, "loss": 1.0017, "step": 1846 }, { "epoch": 0.3131835523526918, "grad_norm": 0.9832207842358262, "learning_rate": 1.608034952317881e-05, "loss": 0.9903, "step": 1847 }, { "epoch": 0.313353115727003, "grad_norm": 0.9857571817599534, "learning_rate": 1.607598821710306e-05, "loss": 0.979, "step": 1848 }, { "epoch": 0.3135226791013141, "grad_norm": 1.0268107585271726, "learning_rate": 1.6071625078187113e-05, "loss": 1.0196, "step": 1849 }, { "epoch": 0.31369224247562527, "grad_norm": 0.9818237677757576, "learning_rate": 1.6067260107747133e-05, "loss": 0.9966, "step": 1850 }, { "epoch": 0.3138618058499364, "grad_norm": 0.9846225689782029, "learning_rate": 1.6062893307099817e-05, "loss": 0.9906, "step": 1851 }, { "epoch": 0.31403136922424757, "grad_norm": 0.9631073592769925, "learning_rate": 1.6058524677562428e-05, "loss": 0.974, "step": 1852 }, { "epoch": 0.3142009325985587, "grad_norm": 0.9426263049950635, "learning_rate": 1.6054154220452776e-05, "loss": 0.9805, "step": 1853 }, { "epoch": 0.31437049597286987, "grad_norm": 0.9771935047093394, "learning_rate": 1.6049781937089227e-05, "loss": 0.9867, "step": 1854 }, { "epoch": 0.314540059347181, "grad_norm": 0.9439378442735374, "learning_rate": 1.6045407828790686e-05, "loss": 0.9521, "step": 1855 }, { "epoch": 0.31470962272149217, "grad_norm": 0.9940738226724326, "learning_rate": 1.604103189687662e-05, "loss": 0.9803, "step": 1856 }, { "epoch": 0.3148791860958033, "grad_norm": 0.981022097967025, "learning_rate": 1.6036654142667043e-05, "loss": 0.9884, "step": 1857 }, { "epoch": 0.31504874947011446, "grad_norm": 0.9763891018640374, "learning_rate": 1.6032274567482514e-05, "loss": 1.0455, "step": 1858 }, { "epoch": 0.3152183128444256, "grad_norm": 0.9580214076727748, "learning_rate": 1.602789317264415e-05, "loss": 1.0031, "step": 1859 }, { "epoch": 0.31538787621873676, "grad_norm": 0.949485377285947, "learning_rate": 1.6023509959473608e-05, "loss": 1.0197, "step": 1860 }, { "epoch": 0.3155574395930479, "grad_norm": 0.9677420266974983, "learning_rate": 1.6019124929293097e-05, "loss": 1.0507, "step": 1861 }, { "epoch": 0.31572700296735906, "grad_norm": 0.998869087670947, "learning_rate": 1.6014738083425378e-05, "loss": 0.9775, "step": 1862 }, { "epoch": 0.3158965663416702, "grad_norm": 0.995821233756967, "learning_rate": 1.6010349423193753e-05, "loss": 0.9776, "step": 1863 }, { "epoch": 0.31606612971598136, "grad_norm": 0.9431541526561014, "learning_rate": 1.6005958949922077e-05, "loss": 0.975, "step": 1864 }, { "epoch": 0.3162356930902925, "grad_norm": 0.9605142782302432, "learning_rate": 1.600156666493475e-05, "loss": 0.997, "step": 1865 }, { "epoch": 0.31640525646460366, "grad_norm": 1.0356940923737057, "learning_rate": 1.5997172569556717e-05, "loss": 1.0193, "step": 1866 }, { "epoch": 0.3165748198389148, "grad_norm": 0.9794872450451078, "learning_rate": 1.599277666511347e-05, "loss": 1.028, "step": 1867 }, { "epoch": 0.31674438321322596, "grad_norm": 0.9978961700610897, "learning_rate": 1.5988378952931047e-05, "loss": 1.0426, "step": 1868 }, { "epoch": 0.3169139465875371, "grad_norm": 0.9465061263038369, "learning_rate": 1.598397943433603e-05, "loss": 1.0075, "step": 1869 }, { "epoch": 0.31708350996184825, "grad_norm": 0.6579223000191161, "learning_rate": 1.597957811065555e-05, "loss": 0.8428, "step": 1870 }, { "epoch": 0.3172530733361594, "grad_norm": 0.9982834175268934, "learning_rate": 1.5975174983217273e-05, "loss": 1.0353, "step": 1871 }, { "epoch": 0.31742263671047055, "grad_norm": 0.6079058639237402, "learning_rate": 1.5970770053349426e-05, "loss": 0.8767, "step": 1872 }, { "epoch": 0.3175922000847817, "grad_norm": 1.0574585881261511, "learning_rate": 1.596636332238076e-05, "loss": 0.975, "step": 1873 }, { "epoch": 0.31776176345909285, "grad_norm": 0.9655541394825543, "learning_rate": 1.5961954791640582e-05, "loss": 1.0106, "step": 1874 }, { "epoch": 0.31793132683340397, "grad_norm": 1.0048781481777413, "learning_rate": 1.595754446245874e-05, "loss": 0.9786, "step": 1875 }, { "epoch": 0.31810089020771515, "grad_norm": 1.0063905949494099, "learning_rate": 1.595313233616562e-05, "loss": 1.0143, "step": 1876 }, { "epoch": 0.31827045358202627, "grad_norm": 1.0406411948150756, "learning_rate": 1.5948718414092163e-05, "loss": 1.0399, "step": 1877 }, { "epoch": 0.31844001695633745, "grad_norm": 0.9967238215167251, "learning_rate": 1.5944302697569828e-05, "loss": 0.9909, "step": 1878 }, { "epoch": 0.31860958033064857, "grad_norm": 1.0230635091406037, "learning_rate": 1.5939885187930636e-05, "loss": 1.0189, "step": 1879 }, { "epoch": 0.31877914370495974, "grad_norm": 0.946193375131637, "learning_rate": 1.5935465886507143e-05, "loss": 0.9927, "step": 1880 }, { "epoch": 0.31894870707927087, "grad_norm": 1.0308349922358215, "learning_rate": 1.593104479463244e-05, "loss": 1.0148, "step": 1881 }, { "epoch": 0.31911827045358204, "grad_norm": 1.0002855721720632, "learning_rate": 1.592662191364017e-05, "loss": 0.983, "step": 1882 }, { "epoch": 0.31928783382789316, "grad_norm": 0.9676552935956418, "learning_rate": 1.5922197244864503e-05, "loss": 1.0065, "step": 1883 }, { "epoch": 0.31945739720220434, "grad_norm": 1.0656128508421252, "learning_rate": 1.5917770789640153e-05, "loss": 1.0124, "step": 1884 }, { "epoch": 0.31962696057651546, "grad_norm": 0.9732578143627179, "learning_rate": 1.5913342549302378e-05, "loss": 0.9807, "step": 1885 }, { "epoch": 0.31979652395082664, "grad_norm": 0.9766799054330302, "learning_rate": 1.590891252518697e-05, "loss": 1.015, "step": 1886 }, { "epoch": 0.31996608732513776, "grad_norm": 0.9792386309551184, "learning_rate": 1.5904480718630252e-05, "loss": 0.9719, "step": 1887 }, { "epoch": 0.32013565069944894, "grad_norm": 1.0783478509657463, "learning_rate": 1.59000471309691e-05, "loss": 1.0309, "step": 1888 }, { "epoch": 0.32030521407376006, "grad_norm": 0.9787900448683673, "learning_rate": 1.5895611763540914e-05, "loss": 1.0468, "step": 1889 }, { "epoch": 0.32047477744807124, "grad_norm": 1.0192124868613712, "learning_rate": 1.5891174617683635e-05, "loss": 0.9936, "step": 1890 }, { "epoch": 0.32064434082238236, "grad_norm": 0.9636750402412061, "learning_rate": 1.588673569473575e-05, "loss": 1.0222, "step": 1891 }, { "epoch": 0.32081390419669353, "grad_norm": 0.9789679652829065, "learning_rate": 1.5882294996036264e-05, "loss": 1.0284, "step": 1892 }, { "epoch": 0.32098346757100465, "grad_norm": 0.9885958581632905, "learning_rate": 1.5877852522924733e-05, "loss": 1.0412, "step": 1893 }, { "epoch": 0.32115303094531583, "grad_norm": 0.9269950081507043, "learning_rate": 1.5873408276741237e-05, "loss": 1.0066, "step": 1894 }, { "epoch": 0.32132259431962695, "grad_norm": 1.011142141345352, "learning_rate": 1.5868962258826407e-05, "loss": 1.0217, "step": 1895 }, { "epoch": 0.32149215769393813, "grad_norm": 1.0098994425770482, "learning_rate": 1.5864514470521383e-05, "loss": 1.0075, "step": 1896 }, { "epoch": 0.32166172106824925, "grad_norm": 0.9645736430012207, "learning_rate": 1.5860064913167863e-05, "loss": 1.0338, "step": 1897 }, { "epoch": 0.32183128444256043, "grad_norm": 0.9558375423679968, "learning_rate": 1.5855613588108067e-05, "loss": 0.9943, "step": 1898 }, { "epoch": 0.32200084781687155, "grad_norm": 1.0303935285419652, "learning_rate": 1.585116049668475e-05, "loss": 1.0291, "step": 1899 }, { "epoch": 0.3221704111911827, "grad_norm": 1.0246587726385061, "learning_rate": 1.5846705640241206e-05, "loss": 1.0297, "step": 1900 }, { "epoch": 0.32233997456549385, "grad_norm": 0.9905973664457313, "learning_rate": 1.584224902012125e-05, "loss": 0.9942, "step": 1901 }, { "epoch": 0.322509537939805, "grad_norm": 0.9821434420253609, "learning_rate": 1.5837790637669237e-05, "loss": 1.0012, "step": 1902 }, { "epoch": 0.32267910131411615, "grad_norm": 0.9714831890867112, "learning_rate": 1.583333049423005e-05, "loss": 0.9968, "step": 1903 }, { "epoch": 0.3228486646884273, "grad_norm": 1.0450880483602858, "learning_rate": 1.5828868591149104e-05, "loss": 1.0139, "step": 1904 }, { "epoch": 0.32301822806273844, "grad_norm": 1.0362206270140941, "learning_rate": 1.5824404929772347e-05, "loss": 1.0109, "step": 1905 }, { "epoch": 0.3231877914370496, "grad_norm": 1.0683406522143062, "learning_rate": 1.581993951144626e-05, "loss": 1.0451, "step": 1906 }, { "epoch": 0.32335735481136074, "grad_norm": 0.9874279668704485, "learning_rate": 1.5815472337517843e-05, "loss": 0.9901, "step": 1907 }, { "epoch": 0.3235269181856719, "grad_norm": 0.9420503168241291, "learning_rate": 1.5811003409334635e-05, "loss": 1.0025, "step": 1908 }, { "epoch": 0.32369648155998304, "grad_norm": 0.9433224210660105, "learning_rate": 1.5806532728244707e-05, "loss": 0.9723, "step": 1909 }, { "epoch": 0.3238660449342942, "grad_norm": 1.0637555733920365, "learning_rate": 1.5802060295596643e-05, "loss": 1.0224, "step": 1910 }, { "epoch": 0.32403560830860534, "grad_norm": 0.9544715632346825, "learning_rate": 1.5797586112739575e-05, "loss": 0.9786, "step": 1911 }, { "epoch": 0.3242051716829165, "grad_norm": 1.003272481054091, "learning_rate": 1.579311018102315e-05, "loss": 1.0051, "step": 1912 }, { "epoch": 0.32437473505722764, "grad_norm": 1.012267656390877, "learning_rate": 1.5788632501797545e-05, "loss": 1.0374, "step": 1913 }, { "epoch": 0.3245442984315388, "grad_norm": 0.9626360713826805, "learning_rate": 1.578415307641347e-05, "loss": 1.047, "step": 1914 }, { "epoch": 0.32471386180584993, "grad_norm": 0.987019021841255, "learning_rate": 1.577967190622215e-05, "loss": 0.9992, "step": 1915 }, { "epoch": 0.3248834251801611, "grad_norm": 1.0597031415155116, "learning_rate": 1.577518899257535e-05, "loss": 1.0092, "step": 1916 }, { "epoch": 0.32505298855447223, "grad_norm": 1.0089623696987418, "learning_rate": 1.5770704336825354e-05, "loss": 1.0429, "step": 1917 }, { "epoch": 0.3252225519287834, "grad_norm": 0.9276868410811404, "learning_rate": 1.576621794032497e-05, "loss": 0.9488, "step": 1918 }, { "epoch": 0.32539211530309453, "grad_norm": 1.0549749916794338, "learning_rate": 1.576172980442753e-05, "loss": 1.0088, "step": 1919 }, { "epoch": 0.3255616786774057, "grad_norm": 0.9748917422847581, "learning_rate": 1.57572399304869e-05, "loss": 1.0183, "step": 1920 }, { "epoch": 0.32573124205171683, "grad_norm": 1.0166603459142411, "learning_rate": 1.575274831985746e-05, "loss": 1.0074, "step": 1921 }, { "epoch": 0.32590080542602795, "grad_norm": 1.0386849761493508, "learning_rate": 1.574825497389412e-05, "loss": 1.0136, "step": 1922 }, { "epoch": 0.3260703688003391, "grad_norm": 1.0355082073267796, "learning_rate": 1.5743759893952306e-05, "loss": 1.0294, "step": 1923 }, { "epoch": 0.32623993217465025, "grad_norm": 0.975107797332124, "learning_rate": 1.5739263081387983e-05, "loss": 1.0054, "step": 1924 }, { "epoch": 0.3264094955489614, "grad_norm": 1.0073725273436516, "learning_rate": 1.5734764537557617e-05, "loss": 0.9822, "step": 1925 }, { "epoch": 0.32657905892327255, "grad_norm": 1.0102409530630205, "learning_rate": 1.5730264263818212e-05, "loss": 1.022, "step": 1926 }, { "epoch": 0.3267486222975837, "grad_norm": 1.0297794794168857, "learning_rate": 1.5725762261527295e-05, "loss": 0.9982, "step": 1927 }, { "epoch": 0.32691818567189485, "grad_norm": 1.012319423695426, "learning_rate": 1.57212585320429e-05, "loss": 1.0259, "step": 1928 }, { "epoch": 0.327087749046206, "grad_norm": 0.9621073037164247, "learning_rate": 1.5716753076723594e-05, "loss": 0.9813, "step": 1929 }, { "epoch": 0.32725731242051714, "grad_norm": 1.0437756761144823, "learning_rate": 1.5712245896928458e-05, "loss": 1.0284, "step": 1930 }, { "epoch": 0.3274268757948283, "grad_norm": 1.0027007083831454, "learning_rate": 1.57077369940171e-05, "loss": 1.0078, "step": 1931 }, { "epoch": 0.32759643916913944, "grad_norm": 0.9622145131219082, "learning_rate": 1.5703226369349642e-05, "loss": 0.9976, "step": 1932 }, { "epoch": 0.3277660025434506, "grad_norm": 0.9694541250884402, "learning_rate": 1.5698714024286733e-05, "loss": 1.0035, "step": 1933 }, { "epoch": 0.32793556591776174, "grad_norm": 0.9567664175030857, "learning_rate": 1.5694199960189526e-05, "loss": 1.0125, "step": 1934 }, { "epoch": 0.3281051292920729, "grad_norm": 0.9968959199031922, "learning_rate": 1.568968417841971e-05, "loss": 1.0129, "step": 1935 }, { "epoch": 0.32827469266638404, "grad_norm": 0.9311772765969382, "learning_rate": 1.5685166680339483e-05, "loss": 0.9574, "step": 1936 }, { "epoch": 0.3284442560406952, "grad_norm": 1.0216924103822678, "learning_rate": 1.568064746731156e-05, "loss": 0.998, "step": 1937 }, { "epoch": 0.32861381941500634, "grad_norm": 1.024410162571907, "learning_rate": 1.5676126540699174e-05, "loss": 1.0365, "step": 1938 }, { "epoch": 0.3287833827893175, "grad_norm": 1.0090352555077087, "learning_rate": 1.5671603901866078e-05, "loss": 0.9918, "step": 1939 }, { "epoch": 0.32895294616362863, "grad_norm": 0.9674802350477768, "learning_rate": 1.5667079552176544e-05, "loss": 1.0133, "step": 1940 }, { "epoch": 0.3291225095379398, "grad_norm": 0.9472013801043612, "learning_rate": 1.5662553492995347e-05, "loss": 0.9959, "step": 1941 }, { "epoch": 0.32929207291225093, "grad_norm": 0.9957464801894853, "learning_rate": 1.5658025725687793e-05, "loss": 0.9866, "step": 1942 }, { "epoch": 0.3294616362865621, "grad_norm": 1.0001120207477021, "learning_rate": 1.5653496251619693e-05, "loss": 0.9937, "step": 1943 }, { "epoch": 0.32963119966087323, "grad_norm": 0.7111223088466704, "learning_rate": 1.5648965072157378e-05, "loss": 0.9294, "step": 1944 }, { "epoch": 0.3298007630351844, "grad_norm": 0.993306192326719, "learning_rate": 1.5644432188667695e-05, "loss": 1.0249, "step": 1945 }, { "epoch": 0.32997032640949553, "grad_norm": 1.0249390328546526, "learning_rate": 1.5639897602517996e-05, "loss": 1.0126, "step": 1946 }, { "epoch": 0.3301398897838067, "grad_norm": 1.058227942444184, "learning_rate": 1.5635361315076157e-05, "loss": 1.0492, "step": 1947 }, { "epoch": 0.3303094531581178, "grad_norm": 0.9888870900140873, "learning_rate": 1.5630823327710558e-05, "loss": 1.0094, "step": 1948 }, { "epoch": 0.330479016532429, "grad_norm": 0.9357541722164157, "learning_rate": 1.5626283641790105e-05, "loss": 0.9903, "step": 1949 }, { "epoch": 0.3306485799067401, "grad_norm": 0.6653962932999141, "learning_rate": 1.56217422586842e-05, "loss": 0.8853, "step": 1950 }, { "epoch": 0.3308181432810513, "grad_norm": 0.9647512499555115, "learning_rate": 1.5617199179762767e-05, "loss": 1.0045, "step": 1951 }, { "epoch": 0.3309877066553624, "grad_norm": 1.0420341363389451, "learning_rate": 1.561265440639624e-05, "loss": 1.0526, "step": 1952 }, { "epoch": 0.3311572700296736, "grad_norm": 1.022327382221898, "learning_rate": 1.5608107939955565e-05, "loss": 0.9993, "step": 1953 }, { "epoch": 0.3313268334039847, "grad_norm": 1.0567423133947502, "learning_rate": 1.56035597818122e-05, "loss": 1.0084, "step": 1954 }, { "epoch": 0.3314963967782959, "grad_norm": 0.9987288255497635, "learning_rate": 1.5599009933338102e-05, "loss": 1.0072, "step": 1955 }, { "epoch": 0.331665960152607, "grad_norm": 0.9767128024010041, "learning_rate": 1.5594458395905754e-05, "loss": 1.0091, "step": 1956 }, { "epoch": 0.3318355235269182, "grad_norm": 1.0721425878700144, "learning_rate": 1.5589905170888136e-05, "loss": 1.0127, "step": 1957 }, { "epoch": 0.3320050869012293, "grad_norm": 1.043154130609934, "learning_rate": 1.5585350259658748e-05, "loss": 1.0033, "step": 1958 }, { "epoch": 0.3321746502755405, "grad_norm": 0.9883145360312716, "learning_rate": 1.5580793663591583e-05, "loss": 0.9702, "step": 1959 }, { "epoch": 0.3323442136498516, "grad_norm": 1.0099266871840573, "learning_rate": 1.5576235384061168e-05, "loss": 0.9716, "step": 1960 }, { "epoch": 0.3325137770241628, "grad_norm": 0.9699130377267756, "learning_rate": 1.5571675422442504e-05, "loss": 0.987, "step": 1961 }, { "epoch": 0.3326833403984739, "grad_norm": 0.9592318073429286, "learning_rate": 1.5567113780111128e-05, "loss": 0.9793, "step": 1962 }, { "epoch": 0.3328529037727851, "grad_norm": 0.9688505927390096, "learning_rate": 1.556255045844307e-05, "loss": 1.0072, "step": 1963 }, { "epoch": 0.3330224671470962, "grad_norm": 1.0134694831162991, "learning_rate": 1.555798545881487e-05, "loss": 1.0319, "step": 1964 }, { "epoch": 0.3331920305214074, "grad_norm": 0.9870003333639381, "learning_rate": 1.5553418782603574e-05, "loss": 1.0046, "step": 1965 }, { "epoch": 0.3333615938957185, "grad_norm": 0.9966508367407503, "learning_rate": 1.5548850431186732e-05, "loss": 1.0116, "step": 1966 }, { "epoch": 0.3335311572700297, "grad_norm": 1.1014023281728036, "learning_rate": 1.554428040594241e-05, "loss": 0.9934, "step": 1967 }, { "epoch": 0.3337007206443408, "grad_norm": 0.9642444724824197, "learning_rate": 1.5539708708249155e-05, "loss": 1.0289, "step": 1968 }, { "epoch": 0.333870284018652, "grad_norm": 0.9664566647450222, "learning_rate": 1.5535135339486044e-05, "loss": 0.9852, "step": 1969 }, { "epoch": 0.3340398473929631, "grad_norm": 1.0395932237438719, "learning_rate": 1.5530560301032644e-05, "loss": 1.0025, "step": 1970 }, { "epoch": 0.3342094107672743, "grad_norm": 1.0041883169749564, "learning_rate": 1.5525983594269026e-05, "loss": 0.9601, "step": 1971 }, { "epoch": 0.3343789741415854, "grad_norm": 0.9948895018405378, "learning_rate": 1.552140522057578e-05, "loss": 1.0054, "step": 1972 }, { "epoch": 0.3345485375158966, "grad_norm": 1.0085855924292748, "learning_rate": 1.5516825181333967e-05, "loss": 1.0138, "step": 1973 }, { "epoch": 0.3347181008902077, "grad_norm": 0.9969111700914716, "learning_rate": 1.551224347792519e-05, "loss": 0.9863, "step": 1974 }, { "epoch": 0.3348876642645189, "grad_norm": 1.0078960859634392, "learning_rate": 1.5507660111731514e-05, "loss": 0.9961, "step": 1975 }, { "epoch": 0.33505722763883, "grad_norm": 1.012636963165273, "learning_rate": 1.550307508413554e-05, "loss": 1.0047, "step": 1976 }, { "epoch": 0.3352267910131412, "grad_norm": 1.064933940454274, "learning_rate": 1.549848839652035e-05, "loss": 1.0522, "step": 1977 }, { "epoch": 0.3353963543874523, "grad_norm": 0.9858142553685583, "learning_rate": 1.5493900050269536e-05, "loss": 0.9676, "step": 1978 }, { "epoch": 0.3355659177617635, "grad_norm": 0.9695329457844212, "learning_rate": 1.5489310046767178e-05, "loss": 0.9619, "step": 1979 }, { "epoch": 0.3357354811360746, "grad_norm": 0.9898193815478287, "learning_rate": 1.548471838739787e-05, "loss": 1.0095, "step": 1980 }, { "epoch": 0.3359050445103858, "grad_norm": 0.9616954499602726, "learning_rate": 1.5480125073546705e-05, "loss": 1.0091, "step": 1981 }, { "epoch": 0.3360746078846969, "grad_norm": 0.9923947777580014, "learning_rate": 1.5475530106599256e-05, "loss": 0.985, "step": 1982 }, { "epoch": 0.3362441712590081, "grad_norm": 1.041335852187222, "learning_rate": 1.547093348794162e-05, "loss": 1.0108, "step": 1983 }, { "epoch": 0.3364137346333192, "grad_norm": 0.9669472264361062, "learning_rate": 1.546633521896038e-05, "loss": 0.9785, "step": 1984 }, { "epoch": 0.33658329800763037, "grad_norm": 0.9881989601125073, "learning_rate": 1.5461735301042615e-05, "loss": 1.0171, "step": 1985 }, { "epoch": 0.3367528613819415, "grad_norm": 0.9854037263011841, "learning_rate": 1.5457133735575905e-05, "loss": 1.0127, "step": 1986 }, { "epoch": 0.33692242475625267, "grad_norm": 0.9742649102909576, "learning_rate": 1.5452530523948323e-05, "loss": 1.019, "step": 1987 }, { "epoch": 0.3370919881305638, "grad_norm": 1.0264095263409634, "learning_rate": 1.5447925667548448e-05, "loss": 1.0525, "step": 1988 }, { "epoch": 0.33726155150487497, "grad_norm": 0.9536302093637906, "learning_rate": 1.5443319167765345e-05, "loss": 0.9977, "step": 1989 }, { "epoch": 0.3374311148791861, "grad_norm": 0.9882925682687181, "learning_rate": 1.543871102598858e-05, "loss": 1.006, "step": 1990 }, { "epoch": 0.33760067825349727, "grad_norm": 0.9982911558620327, "learning_rate": 1.5434101243608205e-05, "loss": 1.0101, "step": 1991 }, { "epoch": 0.3377702416278084, "grad_norm": 0.9984375035046159, "learning_rate": 1.542948982201479e-05, "loss": 1.0205, "step": 1992 }, { "epoch": 0.33793980500211956, "grad_norm": 0.9901329379999525, "learning_rate": 1.542487676259937e-05, "loss": 0.9798, "step": 1993 }, { "epoch": 0.3381093683764307, "grad_norm": 0.9204799605729618, "learning_rate": 1.54202620667535e-05, "loss": 0.9613, "step": 1994 }, { "epoch": 0.33827893175074186, "grad_norm": 0.9564591902100879, "learning_rate": 1.5415645735869206e-05, "loss": 0.9798, "step": 1995 }, { "epoch": 0.338448495125053, "grad_norm": 0.9855650280510012, "learning_rate": 1.5411027771339023e-05, "loss": 0.9648, "step": 1996 }, { "epoch": 0.33861805849936416, "grad_norm": 0.9783289004141156, "learning_rate": 1.5406408174555978e-05, "loss": 0.9819, "step": 1997 }, { "epoch": 0.3387876218736753, "grad_norm": 1.0178840449502158, "learning_rate": 1.540178694691358e-05, "loss": 0.9668, "step": 1998 }, { "epoch": 0.33895718524798646, "grad_norm": 0.91657579096201, "learning_rate": 1.5397164089805842e-05, "loss": 0.9902, "step": 1999 }, { "epoch": 0.3391267486222976, "grad_norm": 0.9461462072822882, "learning_rate": 1.5392539604627255e-05, "loss": 0.9868, "step": 2000 }, { "epoch": 0.33929631199660876, "grad_norm": 0.9688217491830641, "learning_rate": 1.5387913492772816e-05, "loss": 0.9971, "step": 2001 }, { "epoch": 0.3394658753709199, "grad_norm": 0.9558164219939712, "learning_rate": 1.5383285755638002e-05, "loss": 1.016, "step": 2002 }, { "epoch": 0.33963543874523106, "grad_norm": 0.9623546503858995, "learning_rate": 1.5378656394618788e-05, "loss": 0.9913, "step": 2003 }, { "epoch": 0.3398050021195422, "grad_norm": 1.050059224730583, "learning_rate": 1.537402541111163e-05, "loss": 1.0203, "step": 2004 }, { "epoch": 0.33997456549385335, "grad_norm": 0.9992401293860915, "learning_rate": 1.536939280651348e-05, "loss": 0.9996, "step": 2005 }, { "epoch": 0.3401441288681645, "grad_norm": 0.9882078907203777, "learning_rate": 1.5364758582221775e-05, "loss": 1.0001, "step": 2006 }, { "epoch": 0.34031369224247565, "grad_norm": 0.9250831840923877, "learning_rate": 1.5360122739634444e-05, "loss": 0.9646, "step": 2007 }, { "epoch": 0.3404832556167868, "grad_norm": 1.005415331249681, "learning_rate": 1.5355485280149908e-05, "loss": 1.0214, "step": 2008 }, { "epoch": 0.34065281899109795, "grad_norm": 0.9172486724632182, "learning_rate": 1.5350846205167065e-05, "loss": 0.9444, "step": 2009 }, { "epoch": 0.34082238236540907, "grad_norm": 0.9613513738656416, "learning_rate": 1.5346205516085305e-05, "loss": 1.0225, "step": 2010 }, { "epoch": 0.34099194573972025, "grad_norm": 1.044286141829047, "learning_rate": 1.534156321430451e-05, "loss": 1.0439, "step": 2011 }, { "epoch": 0.34116150911403137, "grad_norm": 0.9825547205433247, "learning_rate": 1.5336919301225042e-05, "loss": 0.9979, "step": 2012 }, { "epoch": 0.3413310724883425, "grad_norm": 0.9699608596113349, "learning_rate": 1.533227377824775e-05, "loss": 1.0053, "step": 2013 }, { "epoch": 0.34150063586265367, "grad_norm": 1.0081056256837027, "learning_rate": 1.5327626646773975e-05, "loss": 1.0248, "step": 2014 }, { "epoch": 0.3416701992369648, "grad_norm": 0.9841112820981301, "learning_rate": 1.5322977908205537e-05, "loss": 0.9888, "step": 2015 }, { "epoch": 0.34183976261127597, "grad_norm": 0.9427539140869523, "learning_rate": 1.531832756394474e-05, "loss": 1.0081, "step": 2016 }, { "epoch": 0.3420093259855871, "grad_norm": 0.9850126420134957, "learning_rate": 1.5313675615394373e-05, "loss": 1.006, "step": 2017 }, { "epoch": 0.34217888935989826, "grad_norm": 0.9432197837408135, "learning_rate": 1.5309022063957712e-05, "loss": 1.0102, "step": 2018 }, { "epoch": 0.3423484527342094, "grad_norm": 0.9933170323915388, "learning_rate": 1.5304366911038517e-05, "loss": 1.0343, "step": 2019 }, { "epoch": 0.34251801610852056, "grad_norm": 1.0039903386596305, "learning_rate": 1.5299710158041023e-05, "loss": 1.0212, "step": 2020 }, { "epoch": 0.3426875794828317, "grad_norm": 0.9870840459336154, "learning_rate": 1.529505180636996e-05, "loss": 1.0015, "step": 2021 }, { "epoch": 0.34285714285714286, "grad_norm": 1.0071131021616893, "learning_rate": 1.5290391857430528e-05, "loss": 1.0019, "step": 2022 }, { "epoch": 0.343026706231454, "grad_norm": 0.9786450318999963, "learning_rate": 1.528573031262842e-05, "loss": 1.0149, "step": 2023 }, { "epoch": 0.34319626960576516, "grad_norm": 0.974870653942427, "learning_rate": 1.5281067173369804e-05, "loss": 0.9823, "step": 2024 }, { "epoch": 0.3433658329800763, "grad_norm": 0.9942748834805935, "learning_rate": 1.527640244106133e-05, "loss": 0.9934, "step": 2025 }, { "epoch": 0.34353539635438746, "grad_norm": 0.9764450285933127, "learning_rate": 1.5271736117110125e-05, "loss": 1.0121, "step": 2026 }, { "epoch": 0.3437049597286986, "grad_norm": 0.9956403755146589, "learning_rate": 1.5267068202923802e-05, "loss": 1.0196, "step": 2027 }, { "epoch": 0.34387452310300975, "grad_norm": 0.9646787279914002, "learning_rate": 1.5262398699910456e-05, "loss": 1.0145, "step": 2028 }, { "epoch": 0.3440440864773209, "grad_norm": 0.9476877958294668, "learning_rate": 1.5257727609478647e-05, "loss": 1.015, "step": 2029 }, { "epoch": 0.34421364985163205, "grad_norm": 0.9371017637390845, "learning_rate": 1.5253054933037433e-05, "loss": 0.9834, "step": 2030 }, { "epoch": 0.3443832132259432, "grad_norm": 1.0200467530193926, "learning_rate": 1.5248380671996333e-05, "loss": 1.0087, "step": 2031 }, { "epoch": 0.34455277660025435, "grad_norm": 0.9478642311363885, "learning_rate": 1.5243704827765355e-05, "loss": 0.9645, "step": 2032 }, { "epoch": 0.3447223399745655, "grad_norm": 0.9849038729822642, "learning_rate": 1.5239027401754987e-05, "loss": 1.0299, "step": 2033 }, { "epoch": 0.34489190334887665, "grad_norm": 1.008066785172618, "learning_rate": 1.5234348395376181e-05, "loss": 0.9889, "step": 2034 }, { "epoch": 0.34506146672318777, "grad_norm": 0.9890629169110476, "learning_rate": 1.522966781004038e-05, "loss": 1.006, "step": 2035 }, { "epoch": 0.34523103009749895, "grad_norm": 0.9892380352512955, "learning_rate": 1.5224985647159489e-05, "loss": 0.9774, "step": 2036 }, { "epoch": 0.34540059347181007, "grad_norm": 0.934807228595409, "learning_rate": 1.5220301908145905e-05, "loss": 0.9482, "step": 2037 }, { "epoch": 0.34557015684612125, "grad_norm": 0.9958996659542763, "learning_rate": 1.5215616594412483e-05, "loss": 1.0081, "step": 2038 }, { "epoch": 0.34573972022043237, "grad_norm": 0.9489549153343333, "learning_rate": 1.521092970737257e-05, "loss": 0.9583, "step": 2039 }, { "epoch": 0.34590928359474354, "grad_norm": 0.9745619440493434, "learning_rate": 1.5206241248439977e-05, "loss": 0.9813, "step": 2040 }, { "epoch": 0.34607884696905467, "grad_norm": 0.9588406776363732, "learning_rate": 1.5201551219028988e-05, "loss": 0.9711, "step": 2041 }, { "epoch": 0.34624841034336584, "grad_norm": 1.0720060950984562, "learning_rate": 1.519685962055437e-05, "loss": 1.0221, "step": 2042 }, { "epoch": 0.34641797371767696, "grad_norm": 0.9601982126000166, "learning_rate": 1.5192166454431357e-05, "loss": 0.9851, "step": 2043 }, { "epoch": 0.34658753709198814, "grad_norm": 0.9778824941821028, "learning_rate": 1.518747172207565e-05, "loss": 1.0103, "step": 2044 }, { "epoch": 0.34675710046629926, "grad_norm": 0.9599923893417894, "learning_rate": 1.5182775424903437e-05, "loss": 0.9766, "step": 2045 }, { "epoch": 0.34692666384061044, "grad_norm": 0.9582586657866646, "learning_rate": 1.5178077564331371e-05, "loss": 1.0008, "step": 2046 }, { "epoch": 0.34709622721492156, "grad_norm": 0.9797476428419623, "learning_rate": 1.5173378141776569e-05, "loss": 1.0216, "step": 2047 }, { "epoch": 0.34726579058923274, "grad_norm": 0.9814825111743928, "learning_rate": 1.5168677158656633e-05, "loss": 0.9631, "step": 2048 }, { "epoch": 0.34743535396354386, "grad_norm": 0.975072724699753, "learning_rate": 1.5163974616389621e-05, "loss": 1.0008, "step": 2049 }, { "epoch": 0.34760491733785503, "grad_norm": 0.9931577930713837, "learning_rate": 1.5159270516394074e-05, "loss": 0.9714, "step": 2050 }, { "epoch": 0.34777448071216616, "grad_norm": 0.9687591244382638, "learning_rate": 1.5154564860088998e-05, "loss": 1.0194, "step": 2051 }, { "epoch": 0.34794404408647733, "grad_norm": 0.9486807395019651, "learning_rate": 1.5149857648893871e-05, "loss": 1.0082, "step": 2052 }, { "epoch": 0.34811360746078845, "grad_norm": 0.9599413755492713, "learning_rate": 1.514514888422863e-05, "loss": 0.9723, "step": 2053 }, { "epoch": 0.34828317083509963, "grad_norm": 1.0240880542834017, "learning_rate": 1.5140438567513695e-05, "loss": 1.0041, "step": 2054 }, { "epoch": 0.34845273420941075, "grad_norm": 0.9789204918194641, "learning_rate": 1.5135726700169944e-05, "loss": 1.0003, "step": 2055 }, { "epoch": 0.34862229758372193, "grad_norm": 0.9653717275377449, "learning_rate": 1.5131013283618725e-05, "loss": 0.9981, "step": 2056 }, { "epoch": 0.34879186095803305, "grad_norm": 0.9728103891927375, "learning_rate": 1.5126298319281859e-05, "loss": 0.9858, "step": 2057 }, { "epoch": 0.3489614243323442, "grad_norm": 1.01168269180722, "learning_rate": 1.5121581808581623e-05, "loss": 0.9705, "step": 2058 }, { "epoch": 0.34913098770665535, "grad_norm": 0.9295484813073963, "learning_rate": 1.511686375294077e-05, "loss": 1.0018, "step": 2059 }, { "epoch": 0.3493005510809665, "grad_norm": 1.010828751609662, "learning_rate": 1.5112144153782517e-05, "loss": 1.0231, "step": 2060 }, { "epoch": 0.34947011445527765, "grad_norm": 0.9385574049809434, "learning_rate": 1.5107423012530546e-05, "loss": 0.9963, "step": 2061 }, { "epoch": 0.3496396778295888, "grad_norm": 0.9958651528186389, "learning_rate": 1.5102700330609e-05, "loss": 0.9962, "step": 2062 }, { "epoch": 0.34980924120389995, "grad_norm": 0.9615275481888363, "learning_rate": 1.5097976109442489e-05, "loss": 0.9946, "step": 2063 }, { "epoch": 0.3499788045782111, "grad_norm": 0.9947243347240171, "learning_rate": 1.5093250350456092e-05, "loss": 1.0012, "step": 2064 }, { "epoch": 0.35014836795252224, "grad_norm": 0.9827765460141319, "learning_rate": 1.508852305507535e-05, "loss": 0.9935, "step": 2065 }, { "epoch": 0.3503179313268334, "grad_norm": 0.9645647840904965, "learning_rate": 1.5083794224726262e-05, "loss": 0.9634, "step": 2066 }, { "epoch": 0.35048749470114454, "grad_norm": 0.9492096519486234, "learning_rate": 1.5079063860835295e-05, "loss": 0.9779, "step": 2067 }, { "epoch": 0.3506570580754557, "grad_norm": 0.9734174689268947, "learning_rate": 1.507433196482938e-05, "loss": 0.9702, "step": 2068 }, { "epoch": 0.35082662144976684, "grad_norm": 0.9264518521913037, "learning_rate": 1.5069598538135905e-05, "loss": 1.0106, "step": 2069 }, { "epoch": 0.350996184824078, "grad_norm": 0.9623859365502476, "learning_rate": 1.5064863582182725e-05, "loss": 0.9733, "step": 2070 }, { "epoch": 0.35116574819838914, "grad_norm": 0.9905612849049856, "learning_rate": 1.5060127098398151e-05, "loss": 1.0115, "step": 2071 }, { "epoch": 0.3513353115727003, "grad_norm": 0.9934109799673223, "learning_rate": 1.505538908821096e-05, "loss": 0.9464, "step": 2072 }, { "epoch": 0.35150487494701144, "grad_norm": 0.9538267705770282, "learning_rate": 1.5050649553050383e-05, "loss": 0.9873, "step": 2073 }, { "epoch": 0.3516744383213226, "grad_norm": 0.9340991033226421, "learning_rate": 1.5045908494346117e-05, "loss": 0.9821, "step": 2074 }, { "epoch": 0.35184400169563373, "grad_norm": 0.9300807794171914, "learning_rate": 1.504116591352832e-05, "loss": 0.9601, "step": 2075 }, { "epoch": 0.3520135650699449, "grad_norm": 0.9796421579581944, "learning_rate": 1.5036421812027604e-05, "loss": 0.955, "step": 2076 }, { "epoch": 0.35218312844425603, "grad_norm": 0.9963431326293527, "learning_rate": 1.503167619127504e-05, "loss": 1.0051, "step": 2077 }, { "epoch": 0.3523526918185672, "grad_norm": 1.022777336166867, "learning_rate": 1.5026929052702159e-05, "loss": 1.0482, "step": 2078 }, { "epoch": 0.35252225519287833, "grad_norm": 1.003562436958779, "learning_rate": 1.5022180397740953e-05, "loss": 0.9912, "step": 2079 }, { "epoch": 0.3526918185671895, "grad_norm": 0.9941493414383453, "learning_rate": 1.5017430227823867e-05, "loss": 1.0117, "step": 2080 }, { "epoch": 0.35286138194150063, "grad_norm": 0.9738283524302663, "learning_rate": 1.5012678544383797e-05, "loss": 0.9876, "step": 2081 }, { "epoch": 0.3530309453158118, "grad_norm": 0.6291859971830636, "learning_rate": 1.5007925348854112e-05, "loss": 0.7942, "step": 2082 }, { "epoch": 0.3532005086901229, "grad_norm": 0.9709256770650636, "learning_rate": 1.500317064266862e-05, "loss": 0.9697, "step": 2083 }, { "epoch": 0.3533700720644341, "grad_norm": 0.9826143611178059, "learning_rate": 1.4998414427261601e-05, "loss": 0.9948, "step": 2084 }, { "epoch": 0.3535396354387452, "grad_norm": 1.0725279959117793, "learning_rate": 1.4993656704067777e-05, "loss": 0.9961, "step": 2085 }, { "epoch": 0.3537091988130564, "grad_norm": 0.9810808774283746, "learning_rate": 1.4988897474522328e-05, "loss": 0.9949, "step": 2086 }, { "epoch": 0.3538787621873675, "grad_norm": 0.9780484660843558, "learning_rate": 1.4984136740060893e-05, "loss": 0.9921, "step": 2087 }, { "epoch": 0.3540483255616787, "grad_norm": 1.0205286753424472, "learning_rate": 1.497937450211956e-05, "loss": 1.027, "step": 2088 }, { "epoch": 0.3542178889359898, "grad_norm": 0.9531606724233822, "learning_rate": 1.4974610762134875e-05, "loss": 0.9724, "step": 2089 }, { "epoch": 0.354387452310301, "grad_norm": 1.0281141393128763, "learning_rate": 1.496984552154383e-05, "loss": 1.0319, "step": 2090 }, { "epoch": 0.3545570156846121, "grad_norm": 0.9656993127942579, "learning_rate": 1.4965078781783882e-05, "loss": 0.9542, "step": 2091 }, { "epoch": 0.3547265790589233, "grad_norm": 0.9888888120020639, "learning_rate": 1.4960310544292922e-05, "loss": 0.9819, "step": 2092 }, { "epoch": 0.3548961424332344, "grad_norm": 0.9439362596378621, "learning_rate": 1.495554081050931e-05, "loss": 0.9828, "step": 2093 }, { "epoch": 0.3550657058075456, "grad_norm": 1.027081446998009, "learning_rate": 1.495076958187185e-05, "loss": 0.9997, "step": 2094 }, { "epoch": 0.3552352691818567, "grad_norm": 0.9904497045976364, "learning_rate": 1.4945996859819799e-05, "loss": 1.0001, "step": 2095 }, { "epoch": 0.3554048325561679, "grad_norm": 0.966081720739253, "learning_rate": 1.4941222645792856e-05, "loss": 0.9337, "step": 2096 }, { "epoch": 0.355574395930479, "grad_norm": 0.9744187863278753, "learning_rate": 1.4936446941231186e-05, "loss": 1.0045, "step": 2097 }, { "epoch": 0.3557439593047902, "grad_norm": 1.0051121518130586, "learning_rate": 1.4931669747575389e-05, "loss": 0.9733, "step": 2098 }, { "epoch": 0.3559135226791013, "grad_norm": 1.0501871535414808, "learning_rate": 1.4926891066266523e-05, "loss": 1.0377, "step": 2099 }, { "epoch": 0.3560830860534125, "grad_norm": 0.9446024027913572, "learning_rate": 1.4922110898746089e-05, "loss": 0.98, "step": 2100 }, { "epoch": 0.3562526494277236, "grad_norm": 0.9866377395826637, "learning_rate": 1.491732924645604e-05, "loss": 0.9894, "step": 2101 }, { "epoch": 0.3564222128020348, "grad_norm": 0.9352987069621439, "learning_rate": 1.4912546110838775e-05, "loss": 0.9674, "step": 2102 }, { "epoch": 0.3565917761763459, "grad_norm": 0.7018484045669574, "learning_rate": 1.4907761493337143e-05, "loss": 0.8398, "step": 2103 }, { "epoch": 0.35676133955065703, "grad_norm": 0.9479973345877905, "learning_rate": 1.4902975395394437e-05, "loss": 0.9942, "step": 2104 }, { "epoch": 0.3569309029249682, "grad_norm": 0.9931903657381124, "learning_rate": 1.4898187818454401e-05, "loss": 1.0144, "step": 2105 }, { "epoch": 0.35710046629927933, "grad_norm": 1.0693892511901195, "learning_rate": 1.4893398763961217e-05, "loss": 1.0329, "step": 2106 }, { "epoch": 0.3572700296735905, "grad_norm": 1.014188407844201, "learning_rate": 1.488860823335952e-05, "loss": 1.0117, "step": 2107 }, { "epoch": 0.3574395930479016, "grad_norm": 0.9465493571813999, "learning_rate": 1.4883816228094392e-05, "loss": 1.0071, "step": 2108 }, { "epoch": 0.3576091564222128, "grad_norm": 1.0095925274863478, "learning_rate": 1.4879022749611349e-05, "loss": 1.0345, "step": 2109 }, { "epoch": 0.3577787197965239, "grad_norm": 0.9526886562301604, "learning_rate": 1.4874227799356362e-05, "loss": 0.995, "step": 2110 }, { "epoch": 0.3579482831708351, "grad_norm": 0.8995092600181381, "learning_rate": 1.4869431378775841e-05, "loss": 0.9326, "step": 2111 }, { "epoch": 0.3581178465451462, "grad_norm": 1.003368076243649, "learning_rate": 1.4864633489316643e-05, "loss": 1.0139, "step": 2112 }, { "epoch": 0.3582874099194574, "grad_norm": 0.9324686182940445, "learning_rate": 1.485983413242606e-05, "loss": 0.9916, "step": 2113 }, { "epoch": 0.3584569732937685, "grad_norm": 0.9617532631228993, "learning_rate": 1.4855033309551842e-05, "loss": 0.9359, "step": 2114 }, { "epoch": 0.3586265366680797, "grad_norm": 1.0416784044123946, "learning_rate": 1.4850231022142163e-05, "loss": 0.993, "step": 2115 }, { "epoch": 0.3587961000423908, "grad_norm": 0.9785218695171009, "learning_rate": 1.4845427271645654e-05, "loss": 0.9974, "step": 2116 }, { "epoch": 0.358965663416702, "grad_norm": 0.9863400278879627, "learning_rate": 1.4840622059511376e-05, "loss": 1.0045, "step": 2117 }, { "epoch": 0.3591352267910131, "grad_norm": 1.0519859642889278, "learning_rate": 1.4835815387188839e-05, "loss": 1.0044, "step": 2118 }, { "epoch": 0.3593047901653243, "grad_norm": 1.0234829669208403, "learning_rate": 1.4831007256127988e-05, "loss": 1.0243, "step": 2119 }, { "epoch": 0.3594743535396354, "grad_norm": 0.9257391581874472, "learning_rate": 1.4826197667779207e-05, "loss": 0.9806, "step": 2120 }, { "epoch": 0.3596439169139466, "grad_norm": 0.9551537262342021, "learning_rate": 1.4821386623593332e-05, "loss": 0.9819, "step": 2121 }, { "epoch": 0.3598134802882577, "grad_norm": 0.6665992334118088, "learning_rate": 1.4816574125021621e-05, "loss": 0.8599, "step": 2122 }, { "epoch": 0.3599830436625689, "grad_norm": 0.6332320711776434, "learning_rate": 1.4811760173515783e-05, "loss": 0.831, "step": 2123 }, { "epoch": 0.36015260703688, "grad_norm": 0.6674833253166653, "learning_rate": 1.4806944770527958e-05, "loss": 0.8643, "step": 2124 }, { "epoch": 0.3603221704111912, "grad_norm": 1.1268055197988103, "learning_rate": 1.4802127917510731e-05, "loss": 0.9642, "step": 2125 }, { "epoch": 0.3604917337855023, "grad_norm": 0.9515325932740403, "learning_rate": 1.4797309615917117e-05, "loss": 0.9983, "step": 2126 }, { "epoch": 0.3606612971598135, "grad_norm": 0.960957507632623, "learning_rate": 1.479248986720057e-05, "loss": 1.0074, "step": 2127 }, { "epoch": 0.3608308605341246, "grad_norm": 1.0134484052530388, "learning_rate": 1.4787668672814985e-05, "loss": 0.981, "step": 2128 }, { "epoch": 0.3610004239084358, "grad_norm": 1.027105251388156, "learning_rate": 1.4782846034214689e-05, "loss": 1.0068, "step": 2129 }, { "epoch": 0.3611699872827469, "grad_norm": 0.9975149036700401, "learning_rate": 1.4778021952854444e-05, "loss": 1.039, "step": 2130 }, { "epoch": 0.3613395506570581, "grad_norm": 1.019092810143808, "learning_rate": 1.4773196430189448e-05, "loss": 1.0305, "step": 2131 }, { "epoch": 0.3615091140313692, "grad_norm": 1.0211526406156899, "learning_rate": 1.4768369467675338e-05, "loss": 0.9966, "step": 2132 }, { "epoch": 0.3616786774056804, "grad_norm": 0.9664773308663944, "learning_rate": 1.476354106676818e-05, "loss": 1.04, "step": 2133 }, { "epoch": 0.3618482407799915, "grad_norm": 0.9636581321467599, "learning_rate": 1.4758711228924477e-05, "loss": 1.0166, "step": 2134 }, { "epoch": 0.3620178041543027, "grad_norm": 0.9524091775344606, "learning_rate": 1.4753879955601162e-05, "loss": 0.9968, "step": 2135 }, { "epoch": 0.3621873675286138, "grad_norm": 0.9743728498015906, "learning_rate": 1.4749047248255605e-05, "loss": 1.0145, "step": 2136 }, { "epoch": 0.362356930902925, "grad_norm": 0.995747233178651, "learning_rate": 1.4744213108345605e-05, "loss": 0.9934, "step": 2137 }, { "epoch": 0.3625264942772361, "grad_norm": 0.9537191035741277, "learning_rate": 1.4739377537329396e-05, "loss": 0.9844, "step": 2138 }, { "epoch": 0.3626960576515473, "grad_norm": 0.9956132533069475, "learning_rate": 1.4734540536665642e-05, "loss": 0.9991, "step": 2139 }, { "epoch": 0.3628656210258584, "grad_norm": 1.0296608724303693, "learning_rate": 1.4729702107813438e-05, "loss": 1.0151, "step": 2140 }, { "epoch": 0.3630351844001696, "grad_norm": 0.7715803235785113, "learning_rate": 1.4724862252232313e-05, "loss": 0.8861, "step": 2141 }, { "epoch": 0.3632047477744807, "grad_norm": 0.9720662706062622, "learning_rate": 1.4720020971382223e-05, "loss": 0.975, "step": 2142 }, { "epoch": 0.3633743111487919, "grad_norm": 1.0156789445922831, "learning_rate": 1.4715178266723556e-05, "loss": 0.9931, "step": 2143 }, { "epoch": 0.363543874523103, "grad_norm": 0.9772187033008063, "learning_rate": 1.4710334139717126e-05, "loss": 0.9775, "step": 2144 }, { "epoch": 0.36371343789741417, "grad_norm": 0.985320433902782, "learning_rate": 1.4705488591824182e-05, "loss": 0.986, "step": 2145 }, { "epoch": 0.3638830012717253, "grad_norm": 0.970568010359772, "learning_rate": 1.4700641624506392e-05, "loss": 0.9918, "step": 2146 }, { "epoch": 0.36405256464603647, "grad_norm": 1.0871718240769177, "learning_rate": 1.4695793239225864e-05, "loss": 1.0057, "step": 2147 }, { "epoch": 0.3642221280203476, "grad_norm": 1.0504833631976895, "learning_rate": 1.4690943437445129e-05, "loss": 0.9908, "step": 2148 }, { "epoch": 0.36439169139465877, "grad_norm": 0.9584611661693286, "learning_rate": 1.4686092220627139e-05, "loss": 0.9627, "step": 2149 }, { "epoch": 0.3645612547689699, "grad_norm": 0.6324928696992854, "learning_rate": 1.4681239590235281e-05, "loss": 0.8321, "step": 2150 }, { "epoch": 0.36473081814328107, "grad_norm": 0.9331345808869536, "learning_rate": 1.4676385547733367e-05, "loss": 0.972, "step": 2151 }, { "epoch": 0.3649003815175922, "grad_norm": 1.0394706859932694, "learning_rate": 1.467153009458563e-05, "loss": 0.9955, "step": 2152 }, { "epoch": 0.36506994489190336, "grad_norm": 1.0252552588863841, "learning_rate": 1.4666673232256738e-05, "loss": 1.0176, "step": 2153 }, { "epoch": 0.3652395082662145, "grad_norm": 0.9856233335350563, "learning_rate": 1.4661814962211775e-05, "loss": 1.0223, "step": 2154 }, { "epoch": 0.36540907164052566, "grad_norm": 1.0176826755835535, "learning_rate": 1.465695528591625e-05, "loss": 1.0119, "step": 2155 }, { "epoch": 0.3655786350148368, "grad_norm": 1.0322572206424578, "learning_rate": 1.46520942048361e-05, "loss": 1.0347, "step": 2156 }, { "epoch": 0.36574819838914796, "grad_norm": 0.9532447028554529, "learning_rate": 1.4647231720437687e-05, "loss": 1.0126, "step": 2157 }, { "epoch": 0.3659177617634591, "grad_norm": 0.9840195638383302, "learning_rate": 1.4642367834187795e-05, "loss": 1.034, "step": 2158 }, { "epoch": 0.36608732513777026, "grad_norm": 1.0221261421189956, "learning_rate": 1.4637502547553626e-05, "loss": 1.0173, "step": 2159 }, { "epoch": 0.3662568885120814, "grad_norm": 0.9274807913395638, "learning_rate": 1.4632635862002811e-05, "loss": 0.9736, "step": 2160 }, { "epoch": 0.36642645188639256, "grad_norm": 0.9784510702615073, "learning_rate": 1.4627767779003402e-05, "loss": 0.9457, "step": 2161 }, { "epoch": 0.3665960152607037, "grad_norm": 0.9469595398248766, "learning_rate": 1.4622898300023867e-05, "loss": 0.9603, "step": 2162 }, { "epoch": 0.36676557863501486, "grad_norm": 0.9882823599670864, "learning_rate": 1.4618027426533102e-05, "loss": 0.9984, "step": 2163 }, { "epoch": 0.366935142009326, "grad_norm": 1.0177975127886503, "learning_rate": 1.4613155160000419e-05, "loss": 0.9815, "step": 2164 }, { "epoch": 0.36710470538363715, "grad_norm": 0.9443049685903396, "learning_rate": 1.4608281501895551e-05, "loss": 0.9402, "step": 2165 }, { "epoch": 0.3672742687579483, "grad_norm": 0.9798713176768605, "learning_rate": 1.4603406453688656e-05, "loss": 1.0198, "step": 2166 }, { "epoch": 0.36744383213225945, "grad_norm": 0.8925247576119087, "learning_rate": 1.4598530016850302e-05, "loss": 0.9402, "step": 2167 }, { "epoch": 0.3676133955065706, "grad_norm": 0.9693608811871331, "learning_rate": 1.4593652192851487e-05, "loss": 0.9633, "step": 2168 }, { "epoch": 0.36778295888088175, "grad_norm": 1.0082727665263245, "learning_rate": 1.4588772983163612e-05, "loss": 0.9751, "step": 2169 }, { "epoch": 0.36795252225519287, "grad_norm": 0.9981943649764854, "learning_rate": 1.4583892389258517e-05, "loss": 1.0129, "step": 2170 }, { "epoch": 0.36812208562950405, "grad_norm": 0.6188167302342693, "learning_rate": 1.4579010412608439e-05, "loss": 0.8494, "step": 2171 }, { "epoch": 0.36829164900381517, "grad_norm": 1.043450576241928, "learning_rate": 1.4574127054686042e-05, "loss": 0.9476, "step": 2172 }, { "epoch": 0.36846121237812635, "grad_norm": 0.9690414053579273, "learning_rate": 1.4569242316964409e-05, "loss": 1.002, "step": 2173 }, { "epoch": 0.36863077575243747, "grad_norm": 0.9719526425967001, "learning_rate": 1.4564356200917034e-05, "loss": 0.9956, "step": 2174 }, { "epoch": 0.36880033912674864, "grad_norm": 1.0251037372383711, "learning_rate": 1.455946870801783e-05, "loss": 0.953, "step": 2175 }, { "epoch": 0.36896990250105977, "grad_norm": 0.9591895258561606, "learning_rate": 1.455457983974112e-05, "loss": 0.9768, "step": 2176 }, { "epoch": 0.36913946587537094, "grad_norm": 0.9540812839945211, "learning_rate": 1.4549689597561652e-05, "loss": 0.9849, "step": 2177 }, { "epoch": 0.36930902924968206, "grad_norm": 1.0099051775751366, "learning_rate": 1.4544797982954578e-05, "loss": 0.9813, "step": 2178 }, { "epoch": 0.36947859262399324, "grad_norm": 1.0169533762607017, "learning_rate": 1.4539904997395468e-05, "loss": 1.0313, "step": 2179 }, { "epoch": 0.36964815599830436, "grad_norm": 1.0086390668397849, "learning_rate": 1.453501064236031e-05, "loss": 1.0469, "step": 2180 }, { "epoch": 0.36981771937261554, "grad_norm": 0.9480250100958773, "learning_rate": 1.4530114919325498e-05, "loss": 0.9424, "step": 2181 }, { "epoch": 0.36998728274692666, "grad_norm": 1.0788100566379721, "learning_rate": 1.4525217829767842e-05, "loss": 1.03, "step": 2182 }, { "epoch": 0.37015684612123784, "grad_norm": 0.9975613179969579, "learning_rate": 1.452031937516456e-05, "loss": 1.0289, "step": 2183 }, { "epoch": 0.37032640949554896, "grad_norm": 0.976343456804483, "learning_rate": 1.4515419556993293e-05, "loss": 1.0006, "step": 2184 }, { "epoch": 0.37049597286986014, "grad_norm": 1.017704153055686, "learning_rate": 1.4510518376732081e-05, "loss": 1.0047, "step": 2185 }, { "epoch": 0.37066553624417126, "grad_norm": 0.9727329659497488, "learning_rate": 1.4505615835859383e-05, "loss": 1.0374, "step": 2186 }, { "epoch": 0.37083509961848243, "grad_norm": 0.944014283399242, "learning_rate": 1.4500711935854062e-05, "loss": 0.9936, "step": 2187 }, { "epoch": 0.37100466299279355, "grad_norm": 0.9063748235013017, "learning_rate": 1.4495806678195394e-05, "loss": 0.9684, "step": 2188 }, { "epoch": 0.37117422636710473, "grad_norm": 1.0239544505780156, "learning_rate": 1.449090006436307e-05, "loss": 0.9892, "step": 2189 }, { "epoch": 0.37134378974141585, "grad_norm": 0.6997375076476163, "learning_rate": 1.4485992095837178e-05, "loss": 0.8763, "step": 2190 }, { "epoch": 0.37151335311572703, "grad_norm": 0.6403112627956185, "learning_rate": 1.4481082774098227e-05, "loss": 0.8054, "step": 2191 }, { "epoch": 0.37168291649003815, "grad_norm": 0.9876180927042654, "learning_rate": 1.4476172100627127e-05, "loss": 1.0073, "step": 2192 }, { "epoch": 0.37185247986434933, "grad_norm": 0.6198122269866572, "learning_rate": 1.4471260076905198e-05, "loss": 0.8235, "step": 2193 }, { "epoch": 0.37202204323866045, "grad_norm": 1.0510590508083053, "learning_rate": 1.4466346704414163e-05, "loss": 1.0033, "step": 2194 }, { "epoch": 0.37219160661297157, "grad_norm": 1.0165482454816739, "learning_rate": 1.4461431984636158e-05, "loss": 1.0139, "step": 2195 }, { "epoch": 0.37236116998728275, "grad_norm": 1.0162435144249273, "learning_rate": 1.4456515919053727e-05, "loss": 0.9811, "step": 2196 }, { "epoch": 0.37253073336159387, "grad_norm": 0.9846996795965636, "learning_rate": 1.4451598509149809e-05, "loss": 0.9821, "step": 2197 }, { "epoch": 0.37270029673590505, "grad_norm": 0.9996938616630523, "learning_rate": 1.444667975640776e-05, "loss": 1.0183, "step": 2198 }, { "epoch": 0.37286986011021617, "grad_norm": 0.9467980762814778, "learning_rate": 1.4441759662311339e-05, "loss": 0.9919, "step": 2199 }, { "epoch": 0.37303942348452734, "grad_norm": 0.9737035329328998, "learning_rate": 1.44368382283447e-05, "loss": 0.98, "step": 2200 }, { "epoch": 0.37320898685883847, "grad_norm": 1.0077057457004774, "learning_rate": 1.4431915455992416e-05, "loss": 1.0321, "step": 2201 }, { "epoch": 0.37337855023314964, "grad_norm": 0.9505348512435962, "learning_rate": 1.442699134673945e-05, "loss": 0.9831, "step": 2202 }, { "epoch": 0.37354811360746076, "grad_norm": 1.006625401724298, "learning_rate": 1.4422065902071176e-05, "loss": 0.9995, "step": 2203 }, { "epoch": 0.37371767698177194, "grad_norm": 0.9094176063421371, "learning_rate": 1.4417139123473373e-05, "loss": 0.9731, "step": 2204 }, { "epoch": 0.37388724035608306, "grad_norm": 0.9801924957415139, "learning_rate": 1.4412211012432213e-05, "loss": 1.0118, "step": 2205 }, { "epoch": 0.37405680373039424, "grad_norm": 1.0180156669913525, "learning_rate": 1.4407281570434282e-05, "loss": 0.996, "step": 2206 }, { "epoch": 0.37422636710470536, "grad_norm": 0.9510224613961906, "learning_rate": 1.4402350798966556e-05, "loss": 1.0211, "step": 2207 }, { "epoch": 0.37439593047901654, "grad_norm": 0.9800709714823563, "learning_rate": 1.4397418699516416e-05, "loss": 0.9938, "step": 2208 }, { "epoch": 0.37456549385332766, "grad_norm": 0.9262473822170325, "learning_rate": 1.4392485273571652e-05, "loss": 0.9429, "step": 2209 }, { "epoch": 0.37473505722763883, "grad_norm": 0.9631492791663153, "learning_rate": 1.4387550522620439e-05, "loss": 0.9878, "step": 2210 }, { "epoch": 0.37490462060194996, "grad_norm": 0.9454810573784534, "learning_rate": 1.4382614448151365e-05, "loss": 1.0237, "step": 2211 }, { "epoch": 0.37507418397626113, "grad_norm": 0.9094834558068051, "learning_rate": 1.4377677051653404e-05, "loss": 0.992, "step": 2212 }, { "epoch": 0.37524374735057225, "grad_norm": 0.9687674689041978, "learning_rate": 1.4372738334615947e-05, "loss": 1.0184, "step": 2213 }, { "epoch": 0.37541331072488343, "grad_norm": 1.0144332635558633, "learning_rate": 1.4367798298528767e-05, "loss": 0.9686, "step": 2214 }, { "epoch": 0.37558287409919455, "grad_norm": 0.9797829910202636, "learning_rate": 1.4362856944882041e-05, "loss": 0.9767, "step": 2215 }, { "epoch": 0.37575243747350573, "grad_norm": 0.9784401979887182, "learning_rate": 1.4357914275166344e-05, "loss": 1.0161, "step": 2216 }, { "epoch": 0.37592200084781685, "grad_norm": 0.9715303267227695, "learning_rate": 1.435297029087265e-05, "loss": 0.9703, "step": 2217 }, { "epoch": 0.376091564222128, "grad_norm": 0.8117783404440864, "learning_rate": 1.4348024993492323e-05, "loss": 0.8807, "step": 2218 }, { "epoch": 0.37626112759643915, "grad_norm": 0.9897415152435739, "learning_rate": 1.4343078384517123e-05, "loss": 1.0423, "step": 2219 }, { "epoch": 0.3764306909707503, "grad_norm": 1.0569223100472929, "learning_rate": 1.433813046543922e-05, "loss": 1.0141, "step": 2220 }, { "epoch": 0.37660025434506145, "grad_norm": 1.012977729852736, "learning_rate": 1.4333181237751159e-05, "loss": 0.9955, "step": 2221 }, { "epoch": 0.3767698177193726, "grad_norm": 0.9575457575174791, "learning_rate": 1.4328230702945897e-05, "loss": 0.9711, "step": 2222 }, { "epoch": 0.37693938109368375, "grad_norm": 0.9489275664556899, "learning_rate": 1.4323278862516774e-05, "loss": 0.9776, "step": 2223 }, { "epoch": 0.3771089444679949, "grad_norm": 1.029675244642999, "learning_rate": 1.4318325717957526e-05, "loss": 0.9897, "step": 2224 }, { "epoch": 0.37727850784230604, "grad_norm": 1.0343116868914228, "learning_rate": 1.431337127076229e-05, "loss": 1.0606, "step": 2225 }, { "epoch": 0.3774480712166172, "grad_norm": 0.9397410886584606, "learning_rate": 1.4308415522425581e-05, "loss": 0.9878, "step": 2226 }, { "epoch": 0.37761763459092834, "grad_norm": 0.9581605756963152, "learning_rate": 1.4303458474442325e-05, "loss": 0.9813, "step": 2227 }, { "epoch": 0.3777871979652395, "grad_norm": 0.9692264001530689, "learning_rate": 1.4298500128307821e-05, "loss": 0.9848, "step": 2228 }, { "epoch": 0.37795676133955064, "grad_norm": 1.0172621260051655, "learning_rate": 1.4293540485517778e-05, "loss": 1.0044, "step": 2229 }, { "epoch": 0.3781263247138618, "grad_norm": 1.0416929080213642, "learning_rate": 1.4288579547568279e-05, "loss": 0.9983, "step": 2230 }, { "epoch": 0.37829588808817294, "grad_norm": 0.9241512464027279, "learning_rate": 1.4283617315955815e-05, "loss": 0.9642, "step": 2231 }, { "epoch": 0.3784654514624841, "grad_norm": 1.03148467610165, "learning_rate": 1.4278653792177251e-05, "loss": 1.0145, "step": 2232 }, { "epoch": 0.37863501483679524, "grad_norm": 0.9313765166250695, "learning_rate": 1.4273688977729852e-05, "loss": 0.9589, "step": 2233 }, { "epoch": 0.3788045782111064, "grad_norm": 0.9819203662463901, "learning_rate": 1.4268722874111265e-05, "loss": 0.9873, "step": 2234 }, { "epoch": 0.37897414158541753, "grad_norm": 0.9574039197325604, "learning_rate": 1.426375548281954e-05, "loss": 1.0223, "step": 2235 }, { "epoch": 0.3791437049597287, "grad_norm": 1.0218278931507878, "learning_rate": 1.4258786805353095e-05, "loss": 1.0137, "step": 2236 }, { "epoch": 0.37931326833403983, "grad_norm": 1.016045779585104, "learning_rate": 1.4253816843210751e-05, "loss": 0.9828, "step": 2237 }, { "epoch": 0.379482831708351, "grad_norm": 0.9735847159586615, "learning_rate": 1.4248845597891712e-05, "loss": 1.0052, "step": 2238 }, { "epoch": 0.37965239508266213, "grad_norm": 0.9431836042880376, "learning_rate": 1.4243873070895569e-05, "loss": 0.9726, "step": 2239 }, { "epoch": 0.3798219584569733, "grad_norm": 0.9845608542832943, "learning_rate": 1.4238899263722301e-05, "loss": 1.0038, "step": 2240 }, { "epoch": 0.37999152183128443, "grad_norm": 1.0362904448017518, "learning_rate": 1.4233924177872269e-05, "loss": 1.0141, "step": 2241 }, { "epoch": 0.3801610852055956, "grad_norm": 0.984863735245617, "learning_rate": 1.4228947814846226e-05, "loss": 0.99, "step": 2242 }, { "epoch": 0.3803306485799067, "grad_norm": 0.9934261084736491, "learning_rate": 1.4223970176145303e-05, "loss": 1.0171, "step": 2243 }, { "epoch": 0.3805002119542179, "grad_norm": 0.923604353066388, "learning_rate": 1.4218991263271024e-05, "loss": 0.9671, "step": 2244 }, { "epoch": 0.380669775328529, "grad_norm": 0.9422331404867692, "learning_rate": 1.4214011077725293e-05, "loss": 0.9995, "step": 2245 }, { "epoch": 0.3808393387028402, "grad_norm": 0.9844237079257445, "learning_rate": 1.4209029621010393e-05, "loss": 1.018, "step": 2246 }, { "epoch": 0.3810089020771513, "grad_norm": 0.9877117202318388, "learning_rate": 1.4204046894629002e-05, "loss": 0.9497, "step": 2247 }, { "epoch": 0.3811784654514625, "grad_norm": 0.9540860994891954, "learning_rate": 1.4199062900084168e-05, "loss": 1.0166, "step": 2248 }, { "epoch": 0.3813480288257736, "grad_norm": 0.9882145090140241, "learning_rate": 1.4194077638879333e-05, "loss": 0.9702, "step": 2249 }, { "epoch": 0.3815175922000848, "grad_norm": 0.9348298828640021, "learning_rate": 1.4189091112518311e-05, "loss": 0.9683, "step": 2250 }, { "epoch": 0.3816871555743959, "grad_norm": 0.9961979049149148, "learning_rate": 1.4184103322505311e-05, "loss": 1.0026, "step": 2251 }, { "epoch": 0.3818567189487071, "grad_norm": 1.0033325432211073, "learning_rate": 1.4179114270344907e-05, "loss": 1.0179, "step": 2252 }, { "epoch": 0.3820262823230182, "grad_norm": 0.9892249197024652, "learning_rate": 1.4174123957542065e-05, "loss": 0.9916, "step": 2253 }, { "epoch": 0.3821958456973294, "grad_norm": 0.9983865401910622, "learning_rate": 1.4169132385602129e-05, "loss": 1.0073, "step": 2254 }, { "epoch": 0.3823654090716405, "grad_norm": 0.9590033385672899, "learning_rate": 1.4164139556030818e-05, "loss": 0.9712, "step": 2255 }, { "epoch": 0.3825349724459517, "grad_norm": 0.9695565529420129, "learning_rate": 1.4159145470334237e-05, "loss": 0.9751, "step": 2256 }, { "epoch": 0.3827045358202628, "grad_norm": 0.9910308237044819, "learning_rate": 1.4154150130018867e-05, "loss": 0.9774, "step": 2257 }, { "epoch": 0.382874099194574, "grad_norm": 0.9557961258187446, "learning_rate": 1.4149153536591565e-05, "loss": 0.9551, "step": 2258 }, { "epoch": 0.3830436625688851, "grad_norm": 0.9970827490185687, "learning_rate": 1.4144155691559571e-05, "loss": 1.0184, "step": 2259 }, { "epoch": 0.3832132259431963, "grad_norm": 0.9891074259573283, "learning_rate": 1.4139156596430501e-05, "loss": 0.9608, "step": 2260 }, { "epoch": 0.3833827893175074, "grad_norm": 1.0594408406629534, "learning_rate": 1.4134156252712343e-05, "loss": 0.9889, "step": 2261 }, { "epoch": 0.3835523526918186, "grad_norm": 1.1090230365809288, "learning_rate": 1.412915466191347e-05, "loss": 0.9771, "step": 2262 }, { "epoch": 0.3837219160661297, "grad_norm": 1.0212738242529205, "learning_rate": 1.4124151825542627e-05, "loss": 1.0285, "step": 2263 }, { "epoch": 0.3838914794404409, "grad_norm": 1.004476177615105, "learning_rate": 1.411914774510893e-05, "loss": 1.0025, "step": 2264 }, { "epoch": 0.384061042814752, "grad_norm": 0.9617465797965565, "learning_rate": 1.4114142422121879e-05, "loss": 1.0129, "step": 2265 }, { "epoch": 0.3842306061890632, "grad_norm": 0.9467418934625155, "learning_rate": 1.4109135858091344e-05, "loss": 1.0114, "step": 2266 }, { "epoch": 0.3844001695633743, "grad_norm": 0.9635988950957718, "learning_rate": 1.410412805452757e-05, "loss": 0.9708, "step": 2267 }, { "epoch": 0.3845697329376855, "grad_norm": 0.9503973525471091, "learning_rate": 1.4099119012941173e-05, "loss": 0.9677, "step": 2268 }, { "epoch": 0.3847392963119966, "grad_norm": 1.0411407504075734, "learning_rate": 1.4094108734843155e-05, "loss": 1.0432, "step": 2269 }, { "epoch": 0.3849088596863078, "grad_norm": 1.0373513596907145, "learning_rate": 1.408909722174487e-05, "loss": 0.9908, "step": 2270 }, { "epoch": 0.3850784230606189, "grad_norm": 0.9424784639599362, "learning_rate": 1.4084084475158062e-05, "loss": 1.0313, "step": 2271 }, { "epoch": 0.3852479864349301, "grad_norm": 1.0223250393083914, "learning_rate": 1.407907049659484e-05, "loss": 0.9826, "step": 2272 }, { "epoch": 0.3854175498092412, "grad_norm": 1.0539912645044094, "learning_rate": 1.4074055287567685e-05, "loss": 0.975, "step": 2273 }, { "epoch": 0.3855871131835524, "grad_norm": 0.9603510715083279, "learning_rate": 1.4069038849589456e-05, "loss": 0.9522, "step": 2274 }, { "epoch": 0.3857566765578635, "grad_norm": 1.0206219997121104, "learning_rate": 1.4064021184173364e-05, "loss": 0.9982, "step": 2275 }, { "epoch": 0.3859262399321747, "grad_norm": 0.9640249416543981, "learning_rate": 1.4059002292833018e-05, "loss": 0.9958, "step": 2276 }, { "epoch": 0.3860958033064858, "grad_norm": 1.0024036919620214, "learning_rate": 1.4053982177082369e-05, "loss": 0.9935, "step": 2277 }, { "epoch": 0.386265366680797, "grad_norm": 1.0034321220658353, "learning_rate": 1.4048960838435755e-05, "loss": 1.0057, "step": 2278 }, { "epoch": 0.3864349300551081, "grad_norm": 0.9827904519728128, "learning_rate": 1.404393827840788e-05, "loss": 1.0133, "step": 2279 }, { "epoch": 0.38660449342941927, "grad_norm": 0.9676678752987752, "learning_rate": 1.4038914498513813e-05, "loss": 0.9639, "step": 2280 }, { "epoch": 0.3867740568037304, "grad_norm": 0.9131314769165926, "learning_rate": 1.4033889500268991e-05, "loss": 0.9869, "step": 2281 }, { "epoch": 0.38694362017804157, "grad_norm": 0.9614336932564229, "learning_rate": 1.4028863285189225e-05, "loss": 1.0046, "step": 2282 }, { "epoch": 0.3871131835523527, "grad_norm": 0.9841388849049107, "learning_rate": 1.4023835854790682e-05, "loss": 1.0021, "step": 2283 }, { "epoch": 0.38728274692666387, "grad_norm": 1.0003572069132323, "learning_rate": 1.40188072105899e-05, "loss": 1.0084, "step": 2284 }, { "epoch": 0.387452310300975, "grad_norm": 1.0110159730401558, "learning_rate": 1.401377735410379e-05, "loss": 1.0084, "step": 2285 }, { "epoch": 0.3876218736752861, "grad_norm": 0.9927037926213683, "learning_rate": 1.4008746286849621e-05, "loss": 0.9857, "step": 2286 }, { "epoch": 0.3877914370495973, "grad_norm": 1.0249747117143282, "learning_rate": 1.4003714010345031e-05, "loss": 0.9924, "step": 2287 }, { "epoch": 0.3879610004239084, "grad_norm": 0.9410591169179235, "learning_rate": 1.3998680526108022e-05, "loss": 0.9969, "step": 2288 }, { "epoch": 0.3881305637982196, "grad_norm": 0.9738227364855141, "learning_rate": 1.3993645835656955e-05, "loss": 0.9908, "step": 2289 }, { "epoch": 0.3883001271725307, "grad_norm": 1.0674994018153947, "learning_rate": 1.3988609940510566e-05, "loss": 0.9943, "step": 2290 }, { "epoch": 0.3884696905468419, "grad_norm": 0.963425959620499, "learning_rate": 1.3983572842187945e-05, "loss": 1.0015, "step": 2291 }, { "epoch": 0.388639253921153, "grad_norm": 0.9330659557033109, "learning_rate": 1.3978534542208549e-05, "loss": 0.9458, "step": 2292 }, { "epoch": 0.3888088172954642, "grad_norm": 1.000908880595011, "learning_rate": 1.3973495042092192e-05, "loss": 1.0249, "step": 2293 }, { "epoch": 0.3889783806697753, "grad_norm": 1.0122951520039227, "learning_rate": 1.3968454343359057e-05, "loss": 0.9754, "step": 2294 }, { "epoch": 0.3891479440440865, "grad_norm": 0.9546805852584749, "learning_rate": 1.3963412447529687e-05, "loss": 0.9701, "step": 2295 }, { "epoch": 0.3893175074183976, "grad_norm": 0.9300815730903562, "learning_rate": 1.3958369356124986e-05, "loss": 0.9654, "step": 2296 }, { "epoch": 0.3894870707927088, "grad_norm": 1.0062499873061528, "learning_rate": 1.3953325070666215e-05, "loss": 1.0035, "step": 2297 }, { "epoch": 0.3896566341670199, "grad_norm": 0.9767582937259379, "learning_rate": 1.3948279592675e-05, "loss": 0.9764, "step": 2298 }, { "epoch": 0.3898261975413311, "grad_norm": 0.9901494524437389, "learning_rate": 1.3943232923673327e-05, "loss": 0.9757, "step": 2299 }, { "epoch": 0.3899957609156422, "grad_norm": 0.950968865304944, "learning_rate": 1.3938185065183534e-05, "loss": 0.9618, "step": 2300 }, { "epoch": 0.3901653242899534, "grad_norm": 0.9572536218868889, "learning_rate": 1.3933136018728324e-05, "loss": 0.9993, "step": 2301 }, { "epoch": 0.3903348876642645, "grad_norm": 1.0497177292160054, "learning_rate": 1.3928085785830758e-05, "loss": 0.9822, "step": 2302 }, { "epoch": 0.3905044510385757, "grad_norm": 0.9369346921670006, "learning_rate": 1.3923034368014254e-05, "loss": 0.9512, "step": 2303 }, { "epoch": 0.3906740144128868, "grad_norm": 0.9690635693058144, "learning_rate": 1.3917981766802585e-05, "loss": 0.9786, "step": 2304 }, { "epoch": 0.39084357778719797, "grad_norm": 0.9969530028584265, "learning_rate": 1.3912927983719888e-05, "loss": 0.994, "step": 2305 }, { "epoch": 0.3910131411615091, "grad_norm": 0.9270820900469808, "learning_rate": 1.3907873020290653e-05, "loss": 0.9942, "step": 2306 }, { "epoch": 0.39118270453582027, "grad_norm": 0.9129685147582283, "learning_rate": 1.3902816878039715e-05, "loss": 0.9585, "step": 2307 }, { "epoch": 0.3913522679101314, "grad_norm": 1.0004351454247427, "learning_rate": 1.3897759558492286e-05, "loss": 0.9886, "step": 2308 }, { "epoch": 0.39152183128444257, "grad_norm": 0.9440282904317703, "learning_rate": 1.3892701063173917e-05, "loss": 0.9917, "step": 2309 }, { "epoch": 0.3916913946587537, "grad_norm": 0.9880527346461876, "learning_rate": 1.3887641393610518e-05, "loss": 1.0319, "step": 2310 }, { "epoch": 0.39186095803306487, "grad_norm": 0.9337751572129022, "learning_rate": 1.388258055132835e-05, "loss": 1.0158, "step": 2311 }, { "epoch": 0.392030521407376, "grad_norm": 1.020068420244944, "learning_rate": 1.387751853785404e-05, "loss": 1.0426, "step": 2312 }, { "epoch": 0.39220008478168716, "grad_norm": 0.9463995407653231, "learning_rate": 1.3872455354714552e-05, "loss": 0.9785, "step": 2313 }, { "epoch": 0.3923696481559983, "grad_norm": 0.7160668602145898, "learning_rate": 1.3867391003437213e-05, "loss": 0.8938, "step": 2314 }, { "epoch": 0.39253921153030946, "grad_norm": 0.9751857309982119, "learning_rate": 1.3862325485549702e-05, "loss": 1.0085, "step": 2315 }, { "epoch": 0.3927087749046206, "grad_norm": 0.9297358961999248, "learning_rate": 1.3857258802580045e-05, "loss": 0.9816, "step": 2316 }, { "epoch": 0.39287833827893176, "grad_norm": 0.9910030691848873, "learning_rate": 1.3852190956056623e-05, "loss": 1.0043, "step": 2317 }, { "epoch": 0.3930479016532429, "grad_norm": 0.6138639273144628, "learning_rate": 1.384712194750817e-05, "loss": 0.8091, "step": 2318 }, { "epoch": 0.39321746502755406, "grad_norm": 0.9627117654545204, "learning_rate": 1.3842051778463765e-05, "loss": 1.004, "step": 2319 }, { "epoch": 0.3933870284018652, "grad_norm": 0.9633553273816692, "learning_rate": 1.3836980450452836e-05, "loss": 1.0157, "step": 2320 }, { "epoch": 0.39355659177617636, "grad_norm": 0.9717303316148921, "learning_rate": 1.3831907965005173e-05, "loss": 1.0218, "step": 2321 }, { "epoch": 0.3937261551504875, "grad_norm": 0.9880904793965707, "learning_rate": 1.3826834323650899e-05, "loss": 1.0018, "step": 2322 }, { "epoch": 0.39389571852479865, "grad_norm": 0.9787174398100366, "learning_rate": 1.3821759527920496e-05, "loss": 0.9629, "step": 2323 }, { "epoch": 0.3940652818991098, "grad_norm": 0.965613579832241, "learning_rate": 1.3816683579344794e-05, "loss": 0.9996, "step": 2324 }, { "epoch": 0.39423484527342095, "grad_norm": 0.9429723518525117, "learning_rate": 1.3811606479454961e-05, "loss": 0.9358, "step": 2325 }, { "epoch": 0.3944044086477321, "grad_norm": 0.9385873522805684, "learning_rate": 1.380652822978253e-05, "loss": 0.9991, "step": 2326 }, { "epoch": 0.39457397202204325, "grad_norm": 0.9761246085993821, "learning_rate": 1.3801448831859363e-05, "loss": 1.0179, "step": 2327 }, { "epoch": 0.3947435353963544, "grad_norm": 0.9555085872264782, "learning_rate": 1.3796368287217678e-05, "loss": 0.9456, "step": 2328 }, { "epoch": 0.39491309877066555, "grad_norm": 0.9521487829442625, "learning_rate": 1.3791286597390035e-05, "loss": 0.9577, "step": 2329 }, { "epoch": 0.39508266214497667, "grad_norm": 0.9959551070660362, "learning_rate": 1.3786203763909342e-05, "loss": 1.0182, "step": 2330 }, { "epoch": 0.39525222551928785, "grad_norm": 1.0391903131074671, "learning_rate": 1.378111978830885e-05, "loss": 1.0088, "step": 2331 }, { "epoch": 0.39542178889359897, "grad_norm": 1.0557011150279045, "learning_rate": 1.3776034672122158e-05, "loss": 1.0137, "step": 2332 }, { "epoch": 0.39559135226791015, "grad_norm": 0.7014278160263907, "learning_rate": 1.3770948416883205e-05, "loss": 0.8679, "step": 2333 }, { "epoch": 0.39576091564222127, "grad_norm": 0.9675665903239292, "learning_rate": 1.3765861024126275e-05, "loss": 0.9765, "step": 2334 }, { "epoch": 0.39593047901653244, "grad_norm": 0.9943169198576736, "learning_rate": 1.3760772495385998e-05, "loss": 1.0241, "step": 2335 }, { "epoch": 0.39610004239084357, "grad_norm": 0.9699082319319825, "learning_rate": 1.3755682832197343e-05, "loss": 0.9749, "step": 2336 }, { "epoch": 0.39626960576515474, "grad_norm": 0.6440441366656101, "learning_rate": 1.375059203609562e-05, "loss": 0.916, "step": 2337 }, { "epoch": 0.39643916913946586, "grad_norm": 0.9688385963827819, "learning_rate": 1.3745500108616482e-05, "loss": 0.946, "step": 2338 }, { "epoch": 0.39660873251377704, "grad_norm": 0.9552298362122215, "learning_rate": 1.3740407051295931e-05, "loss": 0.9592, "step": 2339 }, { "epoch": 0.39677829588808816, "grad_norm": 0.9513457224844829, "learning_rate": 1.3735312865670296e-05, "loss": 0.9852, "step": 2340 }, { "epoch": 0.39694785926239934, "grad_norm": 0.9298622550155837, "learning_rate": 1.3730217553276257e-05, "loss": 0.9872, "step": 2341 }, { "epoch": 0.39711742263671046, "grad_norm": 0.93478486799767, "learning_rate": 1.372512111565083e-05, "loss": 1.0183, "step": 2342 }, { "epoch": 0.39728698601102164, "grad_norm": 0.9326465792693526, "learning_rate": 1.372002355433137e-05, "loss": 0.9716, "step": 2343 }, { "epoch": 0.39745654938533276, "grad_norm": 0.941124361783271, "learning_rate": 1.3714924870855573e-05, "loss": 0.9615, "step": 2344 }, { "epoch": 0.39762611275964393, "grad_norm": 0.9596761856573279, "learning_rate": 1.370982506676147e-05, "loss": 1.0, "step": 2345 }, { "epoch": 0.39779567613395506, "grad_norm": 0.9480477059089689, "learning_rate": 1.3704724143587438e-05, "loss": 0.9776, "step": 2346 }, { "epoch": 0.39796523950826623, "grad_norm": 0.9673078521470359, "learning_rate": 1.3699622102872177e-05, "loss": 0.9883, "step": 2347 }, { "epoch": 0.39813480288257735, "grad_norm": 0.9476460109969874, "learning_rate": 1.369451894615474e-05, "loss": 0.9951, "step": 2348 }, { "epoch": 0.39830436625688853, "grad_norm": 0.9250379836292365, "learning_rate": 1.3689414674974506e-05, "loss": 1.027, "step": 2349 }, { "epoch": 0.39847392963119965, "grad_norm": 0.9764037045224021, "learning_rate": 1.3684309290871194e-05, "loss": 0.9903, "step": 2350 }, { "epoch": 0.39864349300551083, "grad_norm": 0.9227253413425622, "learning_rate": 1.3679202795384862e-05, "loss": 0.9733, "step": 2351 }, { "epoch": 0.39881305637982195, "grad_norm": 1.0046961541746833, "learning_rate": 1.3674095190055895e-05, "loss": 0.9933, "step": 2352 }, { "epoch": 0.3989826197541331, "grad_norm": 0.9835904674618511, "learning_rate": 1.3668986476425024e-05, "loss": 0.9771, "step": 2353 }, { "epoch": 0.39915218312844425, "grad_norm": 0.9753961263363442, "learning_rate": 1.3663876656033303e-05, "loss": 0.9762, "step": 2354 }, { "epoch": 0.3993217465027554, "grad_norm": 0.9703335688555745, "learning_rate": 1.3658765730422126e-05, "loss": 0.9805, "step": 2355 }, { "epoch": 0.39949130987706655, "grad_norm": 0.9852702932838664, "learning_rate": 1.3653653701133215e-05, "loss": 0.9964, "step": 2356 }, { "epoch": 0.3996608732513777, "grad_norm": 0.9437667158938094, "learning_rate": 1.3648540569708637e-05, "loss": 0.9824, "step": 2357 }, { "epoch": 0.39983043662568885, "grad_norm": 0.9688775348783184, "learning_rate": 1.3643426337690776e-05, "loss": 0.9806, "step": 2358 }, { "epoch": 0.4, "grad_norm": 0.6782503039549057, "learning_rate": 1.3638311006622357e-05, "loss": 0.8288, "step": 2359 }, { "epoch": 0.40016956337431114, "grad_norm": 0.9725599257213288, "learning_rate": 1.3633194578046443e-05, "loss": 1.0057, "step": 2360 }, { "epoch": 0.4003391267486223, "grad_norm": 0.9370621842535285, "learning_rate": 1.362807705350641e-05, "loss": 1.0252, "step": 2361 }, { "epoch": 0.40050869012293344, "grad_norm": 0.9611046417496658, "learning_rate": 1.3622958434545983e-05, "loss": 1.0411, "step": 2362 }, { "epoch": 0.4006782534972446, "grad_norm": 0.9589236347776622, "learning_rate": 1.3617838722709203e-05, "loss": 0.9837, "step": 2363 }, { "epoch": 0.40084781687155574, "grad_norm": 1.0071145149316638, "learning_rate": 1.3612717919540446e-05, "loss": 0.9515, "step": 2364 }, { "epoch": 0.4010173802458669, "grad_norm": 0.9278981149898994, "learning_rate": 1.3607596026584423e-05, "loss": 0.9549, "step": 2365 }, { "epoch": 0.40118694362017804, "grad_norm": 0.9919482431933654, "learning_rate": 1.3602473045386165e-05, "loss": 1.0008, "step": 2366 }, { "epoch": 0.4013565069944892, "grad_norm": 0.9473502431654154, "learning_rate": 1.3597348977491031e-05, "loss": 0.9615, "step": 2367 }, { "epoch": 0.40152607036880034, "grad_norm": 0.9745121739828702, "learning_rate": 1.3592223824444716e-05, "loss": 0.9665, "step": 2368 }, { "epoch": 0.4016956337431115, "grad_norm": 0.9669094116206499, "learning_rate": 1.3587097587793243e-05, "loss": 1.0231, "step": 2369 }, { "epoch": 0.40186519711742263, "grad_norm": 0.9682205650910038, "learning_rate": 1.3581970269082948e-05, "loss": 0.9922, "step": 2370 }, { "epoch": 0.4020347604917338, "grad_norm": 0.9530206159840949, "learning_rate": 1.3576841869860506e-05, "loss": 0.9706, "step": 2371 }, { "epoch": 0.40220432386604493, "grad_norm": 0.9870412917112557, "learning_rate": 1.3571712391672916e-05, "loss": 1.0111, "step": 2372 }, { "epoch": 0.4023738872403561, "grad_norm": 0.9267932252121701, "learning_rate": 1.3566581836067495e-05, "loss": 0.9459, "step": 2373 }, { "epoch": 0.40254345061466723, "grad_norm": 0.9860442732113808, "learning_rate": 1.3561450204591898e-05, "loss": 0.9925, "step": 2374 }, { "epoch": 0.4027130139889784, "grad_norm": 0.9869017207559949, "learning_rate": 1.3556317498794086e-05, "loss": 1.0021, "step": 2375 }, { "epoch": 0.40288257736328953, "grad_norm": 0.9491023007688814, "learning_rate": 1.355118372022237e-05, "loss": 0.9882, "step": 2376 }, { "epoch": 0.4030521407376007, "grad_norm": 1.0022711307777008, "learning_rate": 1.3546048870425356e-05, "loss": 0.9684, "step": 2377 }, { "epoch": 0.4032217041119118, "grad_norm": 0.9599343764083719, "learning_rate": 1.3540912950951998e-05, "loss": 1.0161, "step": 2378 }, { "epoch": 0.40339126748622295, "grad_norm": 0.9608963061237126, "learning_rate": 1.3535775963351552e-05, "loss": 0.9945, "step": 2379 }, { "epoch": 0.4035608308605341, "grad_norm": 1.0126554227041162, "learning_rate": 1.3530637909173614e-05, "loss": 1.0129, "step": 2380 }, { "epoch": 0.40373039423484525, "grad_norm": 0.9510465833450211, "learning_rate": 1.3525498789968088e-05, "loss": 1.0069, "step": 2381 }, { "epoch": 0.4038999576091564, "grad_norm": 0.9662505541085203, "learning_rate": 1.3520358607285208e-05, "loss": 0.9786, "step": 2382 }, { "epoch": 0.40406952098346754, "grad_norm": 0.9358741434520518, "learning_rate": 1.3515217362675524e-05, "loss": 0.9834, "step": 2383 }, { "epoch": 0.4042390843577787, "grad_norm": 0.9447549474330913, "learning_rate": 1.3510075057689906e-05, "loss": 0.9826, "step": 2384 }, { "epoch": 0.40440864773208984, "grad_norm": 0.9754158889648276, "learning_rate": 1.3504931693879553e-05, "loss": 0.9733, "step": 2385 }, { "epoch": 0.404578211106401, "grad_norm": 0.9438735781819981, "learning_rate": 1.3499787272795968e-05, "loss": 0.9905, "step": 2386 }, { "epoch": 0.40474777448071214, "grad_norm": 0.9827473939629118, "learning_rate": 1.3494641795990986e-05, "loss": 0.9655, "step": 2387 }, { "epoch": 0.4049173378550233, "grad_norm": 0.9383232463399549, "learning_rate": 1.3489495265016753e-05, "loss": 1.006, "step": 2388 }, { "epoch": 0.40508690122933444, "grad_norm": 0.9758535360802354, "learning_rate": 1.3484347681425739e-05, "loss": 0.9831, "step": 2389 }, { "epoch": 0.4052564646036456, "grad_norm": 0.9825369535616311, "learning_rate": 1.3479199046770722e-05, "loss": 0.9735, "step": 2390 }, { "epoch": 0.40542602797795674, "grad_norm": 0.9649485719338737, "learning_rate": 1.3474049362604809e-05, "loss": 1.001, "step": 2391 }, { "epoch": 0.4055955913522679, "grad_norm": 1.0015840462951928, "learning_rate": 1.3468898630481417e-05, "loss": 1.0181, "step": 2392 }, { "epoch": 0.40576515472657904, "grad_norm": 0.9838667856306663, "learning_rate": 1.3463746851954275e-05, "loss": 1.0095, "step": 2393 }, { "epoch": 0.4059347181008902, "grad_norm": 0.9270255123873952, "learning_rate": 1.3458594028577444e-05, "loss": 0.955, "step": 2394 }, { "epoch": 0.40610428147520133, "grad_norm": 0.9342560383409726, "learning_rate": 1.3453440161905274e-05, "loss": 0.9467, "step": 2395 }, { "epoch": 0.4062738448495125, "grad_norm": 0.9815254745927546, "learning_rate": 1.3448285253492455e-05, "loss": 1.0145, "step": 2396 }, { "epoch": 0.40644340822382363, "grad_norm": 0.9442279755898566, "learning_rate": 1.3443129304893974e-05, "loss": 0.9324, "step": 2397 }, { "epoch": 0.4066129715981348, "grad_norm": 1.0353538609136759, "learning_rate": 1.3437972317665144e-05, "loss": 1.0014, "step": 2398 }, { "epoch": 0.40678253497244593, "grad_norm": 0.9865215179030016, "learning_rate": 1.3432814293361585e-05, "loss": 0.9297, "step": 2399 }, { "epoch": 0.4069520983467571, "grad_norm": 0.9825846108050071, "learning_rate": 1.3427655233539227e-05, "loss": 0.9928, "step": 2400 }, { "epoch": 0.40712166172106823, "grad_norm": 0.9625004527519626, "learning_rate": 1.342249513975432e-05, "loss": 0.9887, "step": 2401 }, { "epoch": 0.4072912250953794, "grad_norm": 0.9481900000656314, "learning_rate": 1.3417334013563417e-05, "loss": 0.9537, "step": 2402 }, { "epoch": 0.4074607884696905, "grad_norm": 0.9762642004668662, "learning_rate": 1.3412171856523393e-05, "loss": 0.9828, "step": 2403 }, { "epoch": 0.4076303518440017, "grad_norm": 0.9813760903086608, "learning_rate": 1.3407008670191422e-05, "loss": 1.0018, "step": 2404 }, { "epoch": 0.4077999152183128, "grad_norm": 0.9411898952849121, "learning_rate": 1.3401844456125002e-05, "loss": 0.9346, "step": 2405 }, { "epoch": 0.407969478592624, "grad_norm": 1.0237953771190902, "learning_rate": 1.3396679215881924e-05, "loss": 1.0019, "step": 2406 }, { "epoch": 0.4081390419669351, "grad_norm": 0.6681021351925773, "learning_rate": 1.339151295102031e-05, "loss": 0.8287, "step": 2407 }, { "epoch": 0.4083086053412463, "grad_norm": 0.9659366012451951, "learning_rate": 1.3386345663098573e-05, "loss": 0.9598, "step": 2408 }, { "epoch": 0.4084781687155574, "grad_norm": 0.9986445473341954, "learning_rate": 1.3381177353675441e-05, "loss": 1.0035, "step": 2409 }, { "epoch": 0.4086477320898686, "grad_norm": 0.9812635937885128, "learning_rate": 1.337600802430995e-05, "loss": 0.9823, "step": 2410 }, { "epoch": 0.4088172954641797, "grad_norm": 0.9990223069801192, "learning_rate": 1.3370837676561443e-05, "loss": 0.9658, "step": 2411 }, { "epoch": 0.4089868588384909, "grad_norm": 0.9546358726849846, "learning_rate": 1.3365666311989579e-05, "loss": 0.9634, "step": 2412 }, { "epoch": 0.409156422212802, "grad_norm": 0.9867318766504926, "learning_rate": 1.3360493932154301e-05, "loss": 0.9645, "step": 2413 }, { "epoch": 0.4093259855871132, "grad_norm": 0.9907507522666065, "learning_rate": 1.3355320538615888e-05, "loss": 1.0248, "step": 2414 }, { "epoch": 0.4094955489614243, "grad_norm": 0.9541243441659322, "learning_rate": 1.33501461329349e-05, "loss": 0.9904, "step": 2415 }, { "epoch": 0.4096651123357355, "grad_norm": 0.9868115088638221, "learning_rate": 1.3344970716672217e-05, "loss": 0.9936, "step": 2416 }, { "epoch": 0.4098346757100466, "grad_norm": 0.9912650716851038, "learning_rate": 1.3339794291389015e-05, "loss": 0.9875, "step": 2417 }, { "epoch": 0.4100042390843578, "grad_norm": 1.0052604483444878, "learning_rate": 1.3334616858646783e-05, "loss": 1.015, "step": 2418 }, { "epoch": 0.4101738024586689, "grad_norm": 0.9746475443318042, "learning_rate": 1.3329438420007306e-05, "loss": 1.0029, "step": 2419 }, { "epoch": 0.4103433658329801, "grad_norm": 1.024861447866936, "learning_rate": 1.3324258977032673e-05, "loss": 0.9888, "step": 2420 }, { "epoch": 0.4105129292072912, "grad_norm": 0.9606946019788624, "learning_rate": 1.3319078531285286e-05, "loss": 1.0007, "step": 2421 }, { "epoch": 0.4106824925816024, "grad_norm": 0.9567716672709103, "learning_rate": 1.3313897084327835e-05, "loss": 0.9795, "step": 2422 }, { "epoch": 0.4108520559559135, "grad_norm": 0.9558345382828642, "learning_rate": 1.3308714637723325e-05, "loss": 0.996, "step": 2423 }, { "epoch": 0.4110216193302247, "grad_norm": 0.9660114983788803, "learning_rate": 1.3303531193035053e-05, "loss": 0.965, "step": 2424 }, { "epoch": 0.4111911827045358, "grad_norm": 0.9651912507080008, "learning_rate": 1.3298346751826624e-05, "loss": 0.9811, "step": 2425 }, { "epoch": 0.411360746078847, "grad_norm": 0.9551552558269004, "learning_rate": 1.3293161315661934e-05, "loss": 0.968, "step": 2426 }, { "epoch": 0.4115303094531581, "grad_norm": 0.9955117454894147, "learning_rate": 1.328797488610519e-05, "loss": 1.0007, "step": 2427 }, { "epoch": 0.4116998728274693, "grad_norm": 0.9635515621972542, "learning_rate": 1.3282787464720897e-05, "loss": 0.96, "step": 2428 }, { "epoch": 0.4118694362017804, "grad_norm": 0.99811931315809, "learning_rate": 1.3277599053073848e-05, "loss": 0.9719, "step": 2429 }, { "epoch": 0.4120389995760916, "grad_norm": 0.9452590616724025, "learning_rate": 1.3272409652729152e-05, "loss": 0.9541, "step": 2430 }, { "epoch": 0.4122085629504027, "grad_norm": 0.9680828373453652, "learning_rate": 1.3267219265252202e-05, "loss": 0.9933, "step": 2431 }, { "epoch": 0.4123781263247139, "grad_norm": 0.7458563099691555, "learning_rate": 1.3262027892208696e-05, "loss": 0.9038, "step": 2432 }, { "epoch": 0.412547689699025, "grad_norm": 0.9523866219627274, "learning_rate": 1.3256835535164622e-05, "loss": 0.9705, "step": 2433 }, { "epoch": 0.4127172530733362, "grad_norm": 0.9678065981380277, "learning_rate": 1.325164219568628e-05, "loss": 0.9998, "step": 2434 }, { "epoch": 0.4128868164476473, "grad_norm": 1.016278860103581, "learning_rate": 1.3246447875340249e-05, "loss": 0.9837, "step": 2435 }, { "epoch": 0.4130563798219585, "grad_norm": 0.9459179710413834, "learning_rate": 1.3241252575693417e-05, "loss": 0.9539, "step": 2436 }, { "epoch": 0.4132259431962696, "grad_norm": 0.9735299517478437, "learning_rate": 1.3236056298312957e-05, "loss": 0.9912, "step": 2437 }, { "epoch": 0.4133955065705808, "grad_norm": 0.9963518744501685, "learning_rate": 1.3230859044766342e-05, "loss": 1.0197, "step": 2438 }, { "epoch": 0.4135650699448919, "grad_norm": 0.9350838329085976, "learning_rate": 1.3225660816621342e-05, "loss": 0.9614, "step": 2439 }, { "epoch": 0.41373463331920307, "grad_norm": 0.9336990141134371, "learning_rate": 1.3220461615446015e-05, "loss": 0.9454, "step": 2440 }, { "epoch": 0.4139041966935142, "grad_norm": 0.6081181731888237, "learning_rate": 1.3215261442808718e-05, "loss": 0.839, "step": 2441 }, { "epoch": 0.41407376006782537, "grad_norm": 0.950632744913241, "learning_rate": 1.3210060300278097e-05, "loss": 0.9796, "step": 2442 }, { "epoch": 0.4142433234421365, "grad_norm": 0.949278231400373, "learning_rate": 1.3204858189423097e-05, "loss": 0.9995, "step": 2443 }, { "epoch": 0.41441288681644767, "grad_norm": 0.9553885275517086, "learning_rate": 1.3199655111812945e-05, "loss": 0.9502, "step": 2444 }, { "epoch": 0.4145824501907588, "grad_norm": 0.963076422095927, "learning_rate": 1.319445106901717e-05, "loss": 1.0298, "step": 2445 }, { "epoch": 0.41475201356506997, "grad_norm": 1.0010508289634494, "learning_rate": 1.3189246062605582e-05, "loss": 1.0079, "step": 2446 }, { "epoch": 0.4149215769393811, "grad_norm": 0.9907056753153181, "learning_rate": 1.3184040094148289e-05, "loss": 0.9929, "step": 2447 }, { "epoch": 0.41509114031369226, "grad_norm": 1.0213781609836017, "learning_rate": 1.3178833165215687e-05, "loss": 1.005, "step": 2448 }, { "epoch": 0.4152607036880034, "grad_norm": 1.0003971571997028, "learning_rate": 1.3173625277378464e-05, "loss": 0.9928, "step": 2449 }, { "epoch": 0.41543026706231456, "grad_norm": 0.9770890721532138, "learning_rate": 1.3168416432207594e-05, "loss": 0.9589, "step": 2450 }, { "epoch": 0.4155998304366257, "grad_norm": 1.024539082736634, "learning_rate": 1.3163206631274337e-05, "loss": 1.0335, "step": 2451 }, { "epoch": 0.41576939381093686, "grad_norm": 0.9732741774989216, "learning_rate": 1.3157995876150252e-05, "loss": 0.9469, "step": 2452 }, { "epoch": 0.415938957185248, "grad_norm": 0.9888428976384802, "learning_rate": 1.315278416840717e-05, "loss": 0.9646, "step": 2453 }, { "epoch": 0.41610852055955916, "grad_norm": 0.9411013235231769, "learning_rate": 1.314757150961723e-05, "loss": 0.9785, "step": 2454 }, { "epoch": 0.4162780839338703, "grad_norm": 0.9605222670781098, "learning_rate": 1.3142357901352839e-05, "loss": 1.0101, "step": 2455 }, { "epoch": 0.41644764730818146, "grad_norm": 0.9640322397951829, "learning_rate": 1.3137143345186696e-05, "loss": 0.9961, "step": 2456 }, { "epoch": 0.4166172106824926, "grad_norm": 0.9871735762765294, "learning_rate": 1.3131927842691793e-05, "loss": 0.9745, "step": 2457 }, { "epoch": 0.41678677405680375, "grad_norm": 0.9702861095375536, "learning_rate": 1.3126711395441396e-05, "loss": 1.0003, "step": 2458 }, { "epoch": 0.4169563374311149, "grad_norm": 0.9280097044443271, "learning_rate": 1.3121494005009068e-05, "loss": 1.012, "step": 2459 }, { "epoch": 0.41712590080542605, "grad_norm": 0.9793618045601942, "learning_rate": 1.3116275672968646e-05, "loss": 1.054, "step": 2460 }, { "epoch": 0.4172954641797372, "grad_norm": 1.0175611513200622, "learning_rate": 1.311105640089426e-05, "loss": 1.0218, "step": 2461 }, { "epoch": 0.41746502755404835, "grad_norm": 0.9340913131437807, "learning_rate": 1.3105836190360315e-05, "loss": 0.9637, "step": 2462 }, { "epoch": 0.4176345909283595, "grad_norm": 0.966778154770852, "learning_rate": 1.3100615042941506e-05, "loss": 1.0387, "step": 2463 }, { "epoch": 0.41780415430267065, "grad_norm": 1.0002857021794538, "learning_rate": 1.3095392960212807e-05, "loss": 0.9729, "step": 2464 }, { "epoch": 0.41797371767698177, "grad_norm": 0.8835986390395162, "learning_rate": 1.3090169943749475e-05, "loss": 0.9784, "step": 2465 }, { "epoch": 0.41814328105129295, "grad_norm": 0.9830337720650365, "learning_rate": 1.308494599512705e-05, "loss": 1.0189, "step": 2466 }, { "epoch": 0.41831284442560407, "grad_norm": 0.9987921704690718, "learning_rate": 1.3079721115921348e-05, "loss": 0.9661, "step": 2467 }, { "epoch": 0.41848240779991525, "grad_norm": 0.9430457469859373, "learning_rate": 1.3074495307708475e-05, "loss": 0.9474, "step": 2468 }, { "epoch": 0.41865197117422637, "grad_norm": 0.9742792090624257, "learning_rate": 1.3069268572064808e-05, "loss": 0.9948, "step": 2469 }, { "epoch": 0.4188215345485375, "grad_norm": 0.9257353706271327, "learning_rate": 1.3064040910567008e-05, "loss": 0.955, "step": 2470 }, { "epoch": 0.41899109792284867, "grad_norm": 0.9883164111711251, "learning_rate": 1.3058812324792014e-05, "loss": 1.0006, "step": 2471 }, { "epoch": 0.4191606612971598, "grad_norm": 0.9394364750290908, "learning_rate": 1.305358281631705e-05, "loss": 0.9776, "step": 2472 }, { "epoch": 0.41933022467147096, "grad_norm": 0.9822555780458035, "learning_rate": 1.3048352386719605e-05, "loss": 1.0135, "step": 2473 }, { "epoch": 0.4194997880457821, "grad_norm": 0.9941729915918995, "learning_rate": 1.3043121037577463e-05, "loss": 1.007, "step": 2474 }, { "epoch": 0.41966935142009326, "grad_norm": 1.0148428321185825, "learning_rate": 1.3037888770468667e-05, "loss": 0.9833, "step": 2475 }, { "epoch": 0.4198389147944044, "grad_norm": 0.95849866287929, "learning_rate": 1.3032655586971552e-05, "loss": 0.9584, "step": 2476 }, { "epoch": 0.42000847816871556, "grad_norm": 1.0275166336153458, "learning_rate": 1.3027421488664723e-05, "loss": 0.9512, "step": 2477 }, { "epoch": 0.4201780415430267, "grad_norm": 0.9791400657686129, "learning_rate": 1.302218647712706e-05, "loss": 0.9827, "step": 2478 }, { "epoch": 0.42034760491733786, "grad_norm": 0.9339238893605596, "learning_rate": 1.301695055393772e-05, "loss": 0.9782, "step": 2479 }, { "epoch": 0.420517168291649, "grad_norm": 1.0596822737710774, "learning_rate": 1.3011713720676133e-05, "loss": 0.9606, "step": 2480 }, { "epoch": 0.42068673166596016, "grad_norm": 1.0550604314680185, "learning_rate": 1.3006475978922013e-05, "loss": 1.0004, "step": 2481 }, { "epoch": 0.4208562950402713, "grad_norm": 0.9867194430901839, "learning_rate": 1.3001237330255334e-05, "loss": 1.0204, "step": 2482 }, { "epoch": 0.42102585841458245, "grad_norm": 0.9209582580814403, "learning_rate": 1.2995997776256352e-05, "loss": 0.9282, "step": 2483 }, { "epoch": 0.4211954217888936, "grad_norm": 0.9472748276621806, "learning_rate": 1.2990757318505598e-05, "loss": 0.9451, "step": 2484 }, { "epoch": 0.42136498516320475, "grad_norm": 0.9682508048531342, "learning_rate": 1.2985515958583865e-05, "loss": 0.9606, "step": 2485 }, { "epoch": 0.4215345485375159, "grad_norm": 1.0236539166142857, "learning_rate": 1.2980273698072228e-05, "loss": 0.9932, "step": 2486 }, { "epoch": 0.42170411191182705, "grad_norm": 0.9776957575892024, "learning_rate": 1.297503053855203e-05, "loss": 0.977, "step": 2487 }, { "epoch": 0.4218736752861382, "grad_norm": 0.9281053374057722, "learning_rate": 1.2969786481604891e-05, "loss": 0.9496, "step": 2488 }, { "epoch": 0.42204323866044935, "grad_norm": 1.0197700263964224, "learning_rate": 1.2964541528812689e-05, "loss": 0.9932, "step": 2489 }, { "epoch": 0.42221280203476047, "grad_norm": 0.9743752641167894, "learning_rate": 1.2959295681757583e-05, "loss": 0.9901, "step": 2490 }, { "epoch": 0.42238236540907165, "grad_norm": 0.9436189553795822, "learning_rate": 1.2954048942022002e-05, "loss": 0.9315, "step": 2491 }, { "epoch": 0.42255192878338277, "grad_norm": 0.9865779154314379, "learning_rate": 1.2948801311188637e-05, "loss": 0.9777, "step": 2492 }, { "epoch": 0.42272149215769395, "grad_norm": 1.0094906192281845, "learning_rate": 1.2943552790840452e-05, "loss": 1.0358, "step": 2493 }, { "epoch": 0.42289105553200507, "grad_norm": 0.8994432905089988, "learning_rate": 1.293830338256068e-05, "loss": 0.9449, "step": 2494 }, { "epoch": 0.42306061890631624, "grad_norm": 0.9973310313175829, "learning_rate": 1.2933053087932821e-05, "loss": 1.0106, "step": 2495 }, { "epoch": 0.42323018228062737, "grad_norm": 1.0172593455287209, "learning_rate": 1.292780190854064e-05, "loss": 0.9827, "step": 2496 }, { "epoch": 0.42339974565493854, "grad_norm": 0.9995213856574289, "learning_rate": 1.2922549845968174e-05, "loss": 1.0188, "step": 2497 }, { "epoch": 0.42356930902924966, "grad_norm": 0.9379400401170712, "learning_rate": 1.291729690179972e-05, "loss": 0.9731, "step": 2498 }, { "epoch": 0.42373887240356084, "grad_norm": 0.9871638855286147, "learning_rate": 1.291204307761985e-05, "loss": 0.9695, "step": 2499 }, { "epoch": 0.42390843577787196, "grad_norm": 0.9900282879531233, "learning_rate": 1.2906788375013392e-05, "loss": 1.0391, "step": 2500 }, { "epoch": 0.42407799915218314, "grad_norm": 0.9419971756374754, "learning_rate": 1.2901532795565444e-05, "loss": 0.9292, "step": 2501 }, { "epoch": 0.42424756252649426, "grad_norm": 0.9778386004005973, "learning_rate": 1.2896276340861367e-05, "loss": 0.9824, "step": 2502 }, { "epoch": 0.42441712590080544, "grad_norm": 0.9818189876576768, "learning_rate": 1.2891019012486785e-05, "loss": 0.9702, "step": 2503 }, { "epoch": 0.42458668927511656, "grad_norm": 0.9540525657742854, "learning_rate": 1.288576081202759e-05, "loss": 0.9513, "step": 2504 }, { "epoch": 0.42475625264942773, "grad_norm": 0.9644918591182482, "learning_rate": 1.2880501741069931e-05, "loss": 1.0144, "step": 2505 }, { "epoch": 0.42492581602373886, "grad_norm": 0.9802432592505497, "learning_rate": 1.2875241801200224e-05, "loss": 0.9853, "step": 2506 }, { "epoch": 0.42509537939805003, "grad_norm": 0.9852407281876533, "learning_rate": 1.2869980994005146e-05, "loss": 0.9908, "step": 2507 }, { "epoch": 0.42526494277236115, "grad_norm": 0.9316066047742487, "learning_rate": 1.2864719321071638e-05, "loss": 0.9316, "step": 2508 }, { "epoch": 0.42543450614667233, "grad_norm": 0.9731369099438988, "learning_rate": 1.2859456783986892e-05, "loss": 1.0024, "step": 2509 }, { "epoch": 0.42560406952098345, "grad_norm": 0.9481133291884946, "learning_rate": 1.2854193384338378e-05, "loss": 0.9773, "step": 2510 }, { "epoch": 0.42577363289529463, "grad_norm": 0.6781756198370876, "learning_rate": 1.2848929123713811e-05, "loss": 0.8231, "step": 2511 }, { "epoch": 0.42594319626960575, "grad_norm": 1.0004076598531444, "learning_rate": 1.2843664003701168e-05, "loss": 1.0189, "step": 2512 }, { "epoch": 0.4261127596439169, "grad_norm": 1.0070122966388144, "learning_rate": 1.2838398025888695e-05, "loss": 1.0389, "step": 2513 }, { "epoch": 0.42628232301822805, "grad_norm": 0.972491172104366, "learning_rate": 1.2833131191864884e-05, "loss": 0.9646, "step": 2514 }, { "epoch": 0.4264518863925392, "grad_norm": 1.0230189239965226, "learning_rate": 1.2827863503218496e-05, "loss": 1.0017, "step": 2515 }, { "epoch": 0.42662144976685035, "grad_norm": 0.9849900602309155, "learning_rate": 1.2822594961538544e-05, "loss": 0.9854, "step": 2516 }, { "epoch": 0.4267910131411615, "grad_norm": 1.000557638063349, "learning_rate": 1.2817325568414299e-05, "loss": 0.9842, "step": 2517 }, { "epoch": 0.42696057651547265, "grad_norm": 0.9751420613359931, "learning_rate": 1.2812055325435289e-05, "loss": 0.9917, "step": 2518 }, { "epoch": 0.4271301398897838, "grad_norm": 0.6468111546759283, "learning_rate": 1.2806784234191298e-05, "loss": 0.8158, "step": 2519 }, { "epoch": 0.42729970326409494, "grad_norm": 0.9672189353178875, "learning_rate": 1.280151229627237e-05, "loss": 0.9963, "step": 2520 }, { "epoch": 0.4274692666384061, "grad_norm": 1.0032015491615498, "learning_rate": 1.2796239513268796e-05, "loss": 0.9927, "step": 2521 }, { "epoch": 0.42763883001271724, "grad_norm": 1.0432935191736354, "learning_rate": 1.2790965886771135e-05, "loss": 1.0004, "step": 2522 }, { "epoch": 0.4278083933870284, "grad_norm": 0.995297815162946, "learning_rate": 1.2785691418370178e-05, "loss": 0.9983, "step": 2523 }, { "epoch": 0.42797795676133954, "grad_norm": 0.9997405957886893, "learning_rate": 1.2780416109657001e-05, "loss": 1.0097, "step": 2524 }, { "epoch": 0.4281475201356507, "grad_norm": 0.9593389358873918, "learning_rate": 1.2775139962222905e-05, "loss": 1.0078, "step": 2525 }, { "epoch": 0.42831708350996184, "grad_norm": 0.6399708630769928, "learning_rate": 1.276986297765946e-05, "loss": 0.8159, "step": 2526 }, { "epoch": 0.428486646884273, "grad_norm": 1.009811719388693, "learning_rate": 1.2764585157558486e-05, "loss": 0.9422, "step": 2527 }, { "epoch": 0.42865621025858414, "grad_norm": 1.004243959315471, "learning_rate": 1.2759306503512052e-05, "loss": 0.9962, "step": 2528 }, { "epoch": 0.4288257736328953, "grad_norm": 0.9986222219718599, "learning_rate": 1.275402701711248e-05, "loss": 0.9843, "step": 2529 }, { "epoch": 0.42899533700720643, "grad_norm": 1.0261847797792232, "learning_rate": 1.2748746699952338e-05, "loss": 0.9682, "step": 2530 }, { "epoch": 0.4291649003815176, "grad_norm": 0.9554607464796744, "learning_rate": 1.274346555362446e-05, "loss": 0.9894, "step": 2531 }, { "epoch": 0.42933446375582873, "grad_norm": 0.981234836072138, "learning_rate": 1.273818357972191e-05, "loss": 0.9707, "step": 2532 }, { "epoch": 0.4295040271301399, "grad_norm": 0.9965098430352564, "learning_rate": 1.2732900779838016e-05, "loss": 1.0072, "step": 2533 }, { "epoch": 0.42967359050445103, "grad_norm": 1.0277003286550126, "learning_rate": 1.272761715556635e-05, "loss": 0.9675, "step": 2534 }, { "epoch": 0.4298431538787622, "grad_norm": 0.9461483265161289, "learning_rate": 1.272233270850073e-05, "loss": 0.9379, "step": 2535 }, { "epoch": 0.43001271725307333, "grad_norm": 1.0126090451137943, "learning_rate": 1.2717047440235234e-05, "loss": 0.9867, "step": 2536 }, { "epoch": 0.4301822806273845, "grad_norm": 0.9493238161506435, "learning_rate": 1.2711761352364172e-05, "loss": 0.9826, "step": 2537 }, { "epoch": 0.4303518440016956, "grad_norm": 0.913075140400223, "learning_rate": 1.2706474446482112e-05, "loss": 0.9337, "step": 2538 }, { "epoch": 0.4305214073760068, "grad_norm": 0.9647582411270827, "learning_rate": 1.2701186724183855e-05, "loss": 0.98, "step": 2539 }, { "epoch": 0.4306909707503179, "grad_norm": 0.9763555649713409, "learning_rate": 1.2695898187064475e-05, "loss": 0.9875, "step": 2540 }, { "epoch": 0.4308605341246291, "grad_norm": 0.9163409701928596, "learning_rate": 1.2690608836719261e-05, "loss": 0.9838, "step": 2541 }, { "epoch": 0.4310300974989402, "grad_norm": 0.9645401106782511, "learning_rate": 1.2685318674743769e-05, "loss": 0.9552, "step": 2542 }, { "epoch": 0.4311996608732514, "grad_norm": 0.9547156108507818, "learning_rate": 1.2680027702733791e-05, "loss": 0.969, "step": 2543 }, { "epoch": 0.4313692242475625, "grad_norm": 0.9453415634420721, "learning_rate": 1.2674735922285362e-05, "loss": 1.017, "step": 2544 }, { "epoch": 0.4315387876218737, "grad_norm": 0.9542884266710483, "learning_rate": 1.2669443334994768e-05, "loss": 1.0129, "step": 2545 }, { "epoch": 0.4317083509961848, "grad_norm": 0.8902177338216921, "learning_rate": 1.2664149942458533e-05, "loss": 0.9082, "step": 2546 }, { "epoch": 0.431877914370496, "grad_norm": 0.9592890897744861, "learning_rate": 1.265885574627342e-05, "loss": 1.0147, "step": 2547 }, { "epoch": 0.4320474777448071, "grad_norm": 0.9369973307062465, "learning_rate": 1.2653560748036443e-05, "loss": 0.9926, "step": 2548 }, { "epoch": 0.4322170411191183, "grad_norm": 0.9995794567502263, "learning_rate": 1.2648264949344858e-05, "loss": 1.0128, "step": 2549 }, { "epoch": 0.4323866044934294, "grad_norm": 0.9577114777321886, "learning_rate": 1.2642968351796153e-05, "loss": 0.9889, "step": 2550 }, { "epoch": 0.4325561678677406, "grad_norm": 0.972460664381493, "learning_rate": 1.2637670956988062e-05, "loss": 0.9637, "step": 2551 }, { "epoch": 0.4327257312420517, "grad_norm": 0.9480056743666075, "learning_rate": 1.2632372766518564e-05, "loss": 0.9917, "step": 2552 }, { "epoch": 0.4328952946163629, "grad_norm": 0.987207085084289, "learning_rate": 1.262707378198587e-05, "loss": 0.986, "step": 2553 }, { "epoch": 0.433064857990674, "grad_norm": 0.9688003378177562, "learning_rate": 1.2621774004988438e-05, "loss": 0.9624, "step": 2554 }, { "epoch": 0.4332344213649852, "grad_norm": 0.9736360650615529, "learning_rate": 1.2616473437124962e-05, "loss": 0.982, "step": 2555 }, { "epoch": 0.4334039847392963, "grad_norm": 0.932054823363419, "learning_rate": 1.2611172079994377e-05, "loss": 0.9915, "step": 2556 }, { "epoch": 0.4335735481136075, "grad_norm": 0.9628791706496478, "learning_rate": 1.2605869935195844e-05, "loss": 0.9658, "step": 2557 }, { "epoch": 0.4337431114879186, "grad_norm": 0.9466869512734183, "learning_rate": 1.2600567004328781e-05, "loss": 0.982, "step": 2558 }, { "epoch": 0.4339126748622298, "grad_norm": 0.9830992644340674, "learning_rate": 1.2595263288992825e-05, "loss": 1.0121, "step": 2559 }, { "epoch": 0.4340822382365409, "grad_norm": 0.6399999733775902, "learning_rate": 1.2589958790787864e-05, "loss": 0.7935, "step": 2560 }, { "epoch": 0.43425180161085203, "grad_norm": 0.9634089542685117, "learning_rate": 1.2584653511314012e-05, "loss": 1.0236, "step": 2561 }, { "epoch": 0.4344213649851632, "grad_norm": 0.9101099869920799, "learning_rate": 1.2579347452171624e-05, "loss": 0.9844, "step": 2562 }, { "epoch": 0.4345909283594743, "grad_norm": 0.9837374995386899, "learning_rate": 1.257404061496129e-05, "loss": 0.9849, "step": 2563 }, { "epoch": 0.4347604917337855, "grad_norm": 0.9748203258991123, "learning_rate": 1.2568733001283828e-05, "loss": 0.8952, "step": 2564 }, { "epoch": 0.4349300551080966, "grad_norm": 0.9797331751858684, "learning_rate": 1.2563424612740307e-05, "loss": 1.0237, "step": 2565 }, { "epoch": 0.4350996184824078, "grad_norm": 0.9601441620849211, "learning_rate": 1.2558115450932006e-05, "loss": 0.9633, "step": 2566 }, { "epoch": 0.4352691818567189, "grad_norm": 0.9580021719682639, "learning_rate": 1.2552805517460457e-05, "loss": 1.0009, "step": 2567 }, { "epoch": 0.4354387452310301, "grad_norm": 0.9323725512934854, "learning_rate": 1.2547494813927417e-05, "loss": 0.9685, "step": 2568 }, { "epoch": 0.4356083086053412, "grad_norm": 0.9844191223256091, "learning_rate": 1.2542183341934873e-05, "loss": 0.9776, "step": 2569 }, { "epoch": 0.4357778719796524, "grad_norm": 0.9310079039827244, "learning_rate": 1.2536871103085044e-05, "loss": 0.965, "step": 2570 }, { "epoch": 0.4359474353539635, "grad_norm": 1.0013468705229542, "learning_rate": 1.253155809898039e-05, "loss": 0.9973, "step": 2571 }, { "epoch": 0.4361169987282747, "grad_norm": 0.9992608024643401, "learning_rate": 1.2526244331223592e-05, "loss": 1.0512, "step": 2572 }, { "epoch": 0.4362865621025858, "grad_norm": 0.9585919912207673, "learning_rate": 1.252092980141756e-05, "loss": 0.9713, "step": 2573 }, { "epoch": 0.436456125476897, "grad_norm": 0.9658001415385551, "learning_rate": 1.2515614511165447e-05, "loss": 0.9539, "step": 2574 }, { "epoch": 0.4366256888512081, "grad_norm": 0.9542127820120717, "learning_rate": 1.2510298462070619e-05, "loss": 0.9708, "step": 2575 }, { "epoch": 0.4367952522255193, "grad_norm": 0.9662864977905982, "learning_rate": 1.250498165573668e-05, "loss": 0.9317, "step": 2576 }, { "epoch": 0.4369648155998304, "grad_norm": 0.9156334945671237, "learning_rate": 1.2499664093767458e-05, "loss": 0.9293, "step": 2577 }, { "epoch": 0.4371343789741416, "grad_norm": 0.9710372354065148, "learning_rate": 1.2494345777767016e-05, "loss": 0.9919, "step": 2578 }, { "epoch": 0.4373039423484527, "grad_norm": 0.6040330291387417, "learning_rate": 1.2489026709339639e-05, "loss": 0.8991, "step": 2579 }, { "epoch": 0.4374735057227639, "grad_norm": 0.9034640212551738, "learning_rate": 1.2483706890089838e-05, "loss": 0.9296, "step": 2580 }, { "epoch": 0.437643069097075, "grad_norm": 0.9465241878484302, "learning_rate": 1.2478386321622356e-05, "loss": 0.9864, "step": 2581 }, { "epoch": 0.4378126324713862, "grad_norm": 0.9649682466568654, "learning_rate": 1.2473065005542155e-05, "loss": 0.977, "step": 2582 }, { "epoch": 0.4379821958456973, "grad_norm": 0.9391603902914201, "learning_rate": 1.246774294345443e-05, "loss": 0.951, "step": 2583 }, { "epoch": 0.4381517592200085, "grad_norm": 0.9718981572288251, "learning_rate": 1.2462420136964595e-05, "loss": 1.0163, "step": 2584 }, { "epoch": 0.4383213225943196, "grad_norm": 0.9549219726982074, "learning_rate": 1.245709658767829e-05, "loss": 0.9852, "step": 2585 }, { "epoch": 0.4384908859686308, "grad_norm": 0.9676843916961262, "learning_rate": 1.2451772297201376e-05, "loss": 0.9737, "step": 2586 }, { "epoch": 0.4386604493429419, "grad_norm": 0.9482472010036822, "learning_rate": 1.2446447267139948e-05, "loss": 0.9535, "step": 2587 }, { "epoch": 0.4388300127172531, "grad_norm": 0.5843635488579886, "learning_rate": 1.2441121499100318e-05, "loss": 0.8307, "step": 2588 }, { "epoch": 0.4389995760915642, "grad_norm": 0.9511283693232185, "learning_rate": 1.243579499468901e-05, "loss": 0.9857, "step": 2589 }, { "epoch": 0.4391691394658754, "grad_norm": 0.9815884173773686, "learning_rate": 1.2430467755512794e-05, "loss": 0.9869, "step": 2590 }, { "epoch": 0.4393387028401865, "grad_norm": 1.005198212889262, "learning_rate": 1.2425139783178634e-05, "loss": 1.0149, "step": 2591 }, { "epoch": 0.4395082662144977, "grad_norm": 0.9563482175108111, "learning_rate": 1.2419811079293742e-05, "loss": 0.9613, "step": 2592 }, { "epoch": 0.4396778295888088, "grad_norm": 0.9167312685775793, "learning_rate": 1.241448164546553e-05, "loss": 0.9635, "step": 2593 }, { "epoch": 0.43984739296312, "grad_norm": 0.9525244869856538, "learning_rate": 1.240915148330164e-05, "loss": 0.9872, "step": 2594 }, { "epoch": 0.4400169563374311, "grad_norm": 0.9721471457636425, "learning_rate": 1.2403820594409926e-05, "loss": 0.9947, "step": 2595 }, { "epoch": 0.4401865197117423, "grad_norm": 0.9785457440196526, "learning_rate": 1.2398488980398473e-05, "loss": 0.9681, "step": 2596 }, { "epoch": 0.4403560830860534, "grad_norm": 0.9128257224099869, "learning_rate": 1.2393156642875579e-05, "loss": 0.9401, "step": 2597 }, { "epoch": 0.4405256464603646, "grad_norm": 0.9427585576111754, "learning_rate": 1.2387823583449757e-05, "loss": 0.9853, "step": 2598 }, { "epoch": 0.4406952098346757, "grad_norm": 0.9848384603759787, "learning_rate": 1.238248980372974e-05, "loss": 1.0284, "step": 2599 }, { "epoch": 0.44086477320898687, "grad_norm": 0.9255812009225528, "learning_rate": 1.237715530532448e-05, "loss": 0.9253, "step": 2600 }, { "epoch": 0.441034336583298, "grad_norm": 1.0086860783906586, "learning_rate": 1.2371820089843145e-05, "loss": 0.9788, "step": 2601 }, { "epoch": 0.44120389995760917, "grad_norm": 1.0099654358376329, "learning_rate": 1.2366484158895118e-05, "loss": 0.9757, "step": 2602 }, { "epoch": 0.4413734633319203, "grad_norm": 0.960186951386786, "learning_rate": 1.236114751409e-05, "loss": 0.9829, "step": 2603 }, { "epoch": 0.44154302670623147, "grad_norm": 0.9204818062401579, "learning_rate": 1.2355810157037601e-05, "loss": 0.9858, "step": 2604 }, { "epoch": 0.4417125900805426, "grad_norm": 0.9393047723784934, "learning_rate": 1.2350472089347957e-05, "loss": 0.9701, "step": 2605 }, { "epoch": 0.44188215345485377, "grad_norm": 0.9031940585296269, "learning_rate": 1.2345133312631313e-05, "loss": 0.9427, "step": 2606 }, { "epoch": 0.4420517168291649, "grad_norm": 0.9290010286306092, "learning_rate": 1.2339793828498119e-05, "loss": 0.9439, "step": 2607 }, { "epoch": 0.44222128020347606, "grad_norm": 0.987211699776085, "learning_rate": 1.2334453638559057e-05, "loss": 0.9914, "step": 2608 }, { "epoch": 0.4423908435777872, "grad_norm": 0.9936535892667405, "learning_rate": 1.2329112744425e-05, "loss": 0.9719, "step": 2609 }, { "epoch": 0.44256040695209836, "grad_norm": 1.0104702389839972, "learning_rate": 1.2323771147707055e-05, "loss": 0.9706, "step": 2610 }, { "epoch": 0.4427299703264095, "grad_norm": 0.9063456863797922, "learning_rate": 1.2318428850016528e-05, "loss": 0.9349, "step": 2611 }, { "epoch": 0.44289953370072066, "grad_norm": 0.9808512627871208, "learning_rate": 1.2313085852964937e-05, "loss": 1.0006, "step": 2612 }, { "epoch": 0.4430690970750318, "grad_norm": 0.9931097127581182, "learning_rate": 1.2307742158164012e-05, "loss": 0.9712, "step": 2613 }, { "epoch": 0.44323866044934296, "grad_norm": 0.9303357095269098, "learning_rate": 1.2302397767225696e-05, "loss": 0.9388, "step": 2614 }, { "epoch": 0.4434082238236541, "grad_norm": 0.9613931330119423, "learning_rate": 1.2297052681762143e-05, "loss": 0.9804, "step": 2615 }, { "epoch": 0.44357778719796526, "grad_norm": 1.0379705267557564, "learning_rate": 1.2291706903385711e-05, "loss": 0.9998, "step": 2616 }, { "epoch": 0.4437473505722764, "grad_norm": 0.9275558113510899, "learning_rate": 1.2286360433708976e-05, "loss": 0.9412, "step": 2617 }, { "epoch": 0.44391691394658755, "grad_norm": 0.9436017202419685, "learning_rate": 1.2281013274344709e-05, "loss": 0.9518, "step": 2618 }, { "epoch": 0.4440864773208987, "grad_norm": 1.0008706708887538, "learning_rate": 1.22756654269059e-05, "loss": 0.988, "step": 2619 }, { "epoch": 0.44425604069520985, "grad_norm": 0.9556374460075673, "learning_rate": 1.2270316893005747e-05, "loss": 0.95, "step": 2620 }, { "epoch": 0.444425604069521, "grad_norm": 0.9538381043541012, "learning_rate": 1.2264967674257647e-05, "loss": 0.9705, "step": 2621 }, { "epoch": 0.44459516744383215, "grad_norm": 0.9252129251895725, "learning_rate": 1.2259617772275207e-05, "loss": 0.9576, "step": 2622 }, { "epoch": 0.4447647308181433, "grad_norm": 0.9525795111034339, "learning_rate": 1.2254267188672242e-05, "loss": 0.9808, "step": 2623 }, { "epoch": 0.44493429419245445, "grad_norm": 0.9936359890930845, "learning_rate": 1.2248915925062776e-05, "loss": 0.9604, "step": 2624 }, { "epoch": 0.44510385756676557, "grad_norm": 0.9568178953615135, "learning_rate": 1.2243563983061029e-05, "loss": 0.9674, "step": 2625 }, { "epoch": 0.44527342094107675, "grad_norm": 0.9520514073465959, "learning_rate": 1.2238211364281434e-05, "loss": 0.9372, "step": 2626 }, { "epoch": 0.44544298431538787, "grad_norm": 1.0517533339308636, "learning_rate": 1.2232858070338618e-05, "loss": 0.9738, "step": 2627 }, { "epoch": 0.44561254768969905, "grad_norm": 1.0584344019632212, "learning_rate": 1.2227504102847427e-05, "loss": 0.9895, "step": 2628 }, { "epoch": 0.44578211106401017, "grad_norm": 0.9750600176878557, "learning_rate": 1.2222149463422898e-05, "loss": 0.9916, "step": 2629 }, { "epoch": 0.44595167443832134, "grad_norm": 0.9147701636518633, "learning_rate": 1.2216794153680274e-05, "loss": 0.9329, "step": 2630 }, { "epoch": 0.44612123781263247, "grad_norm": 0.6178372177595367, "learning_rate": 1.2211438175234998e-05, "loss": 0.8118, "step": 2631 }, { "epoch": 0.44629080118694364, "grad_norm": 0.9735646432975459, "learning_rate": 1.2206081529702717e-05, "loss": 0.945, "step": 2632 }, { "epoch": 0.44646036456125476, "grad_norm": 0.9943674416421485, "learning_rate": 1.2200724218699284e-05, "loss": 0.9639, "step": 2633 }, { "epoch": 0.44662992793556594, "grad_norm": 0.995016939721035, "learning_rate": 1.2195366243840745e-05, "loss": 0.9676, "step": 2634 }, { "epoch": 0.44679949130987706, "grad_norm": 0.9674588884725147, "learning_rate": 1.219000760674335e-05, "loss": 0.9772, "step": 2635 }, { "epoch": 0.44696905468418824, "grad_norm": 0.9398902840377271, "learning_rate": 1.2184648309023545e-05, "loss": 0.9754, "step": 2636 }, { "epoch": 0.44713861805849936, "grad_norm": 1.0079960388860927, "learning_rate": 1.2179288352297985e-05, "loss": 0.9858, "step": 2637 }, { "epoch": 0.44730818143281054, "grad_norm": 1.0083448550513037, "learning_rate": 1.217392773818351e-05, "loss": 0.9771, "step": 2638 }, { "epoch": 0.44747774480712166, "grad_norm": 1.0190554426314378, "learning_rate": 1.2168566468297172e-05, "loss": 0.9878, "step": 2639 }, { "epoch": 0.44764730818143283, "grad_norm": 0.9794039179845009, "learning_rate": 1.2163204544256209e-05, "loss": 0.9631, "step": 2640 }, { "epoch": 0.44781687155574396, "grad_norm": 0.9950696202258047, "learning_rate": 1.2157841967678064e-05, "loss": 0.9866, "step": 2641 }, { "epoch": 0.44798643493005513, "grad_norm": 0.9689424486708467, "learning_rate": 1.2152478740180374e-05, "loss": 0.989, "step": 2642 }, { "epoch": 0.44815599830436625, "grad_norm": 0.9883031878224542, "learning_rate": 1.2147114863380969e-05, "loss": 1.002, "step": 2643 }, { "epoch": 0.44832556167867743, "grad_norm": 1.0042404715586786, "learning_rate": 1.2141750338897887e-05, "loss": 1.0032, "step": 2644 }, { "epoch": 0.44849512505298855, "grad_norm": 0.9280754160907558, "learning_rate": 1.2136385168349345e-05, "loss": 0.9662, "step": 2645 }, { "epoch": 0.44866468842729973, "grad_norm": 1.037477001251102, "learning_rate": 1.2131019353353768e-05, "loss": 1.0295, "step": 2646 }, { "epoch": 0.44883425180161085, "grad_norm": 0.9737458468691693, "learning_rate": 1.2125652895529766e-05, "loss": 1.0159, "step": 2647 }, { "epoch": 0.449003815175922, "grad_norm": 1.0301307125855186, "learning_rate": 1.2120285796496153e-05, "loss": 0.994, "step": 2648 }, { "epoch": 0.44917337855023315, "grad_norm": 0.959385929367056, "learning_rate": 1.2114918057871928e-05, "loss": 0.9653, "step": 2649 }, { "epoch": 0.4493429419245443, "grad_norm": 1.0705578878212247, "learning_rate": 1.2109549681276281e-05, "loss": 1.0292, "step": 2650 }, { "epoch": 0.44951250529885545, "grad_norm": 0.6578480262499101, "learning_rate": 1.2104180668328606e-05, "loss": 0.8301, "step": 2651 }, { "epoch": 0.44968206867316657, "grad_norm": 0.9090928796566937, "learning_rate": 1.2098811020648475e-05, "loss": 0.9375, "step": 2652 }, { "epoch": 0.44985163204747775, "grad_norm": 0.974569345103213, "learning_rate": 1.2093440739855669e-05, "loss": 0.9744, "step": 2653 }, { "epoch": 0.45002119542178887, "grad_norm": 0.9644635016241943, "learning_rate": 1.2088069827570136e-05, "loss": 0.9819, "step": 2654 }, { "epoch": 0.45019075879610004, "grad_norm": 0.9645719831857741, "learning_rate": 1.2082698285412037e-05, "loss": 0.9778, "step": 2655 }, { "epoch": 0.45036032217041116, "grad_norm": 0.9228009480152504, "learning_rate": 1.207732611500171e-05, "loss": 0.9023, "step": 2656 }, { "epoch": 0.45052988554472234, "grad_norm": 0.6723316575432514, "learning_rate": 1.2071953317959692e-05, "loss": 0.8799, "step": 2657 }, { "epoch": 0.45069944891903346, "grad_norm": 0.9350788993948473, "learning_rate": 1.2066579895906699e-05, "loss": 0.978, "step": 2658 }, { "epoch": 0.45086901229334464, "grad_norm": 0.9794696241276973, "learning_rate": 1.2061205850463635e-05, "loss": 0.9451, "step": 2659 }, { "epoch": 0.45103857566765576, "grad_norm": 0.9920009712159011, "learning_rate": 1.2055831183251608e-05, "loss": 0.9457, "step": 2660 }, { "epoch": 0.45120813904196694, "grad_norm": 1.0303134553366446, "learning_rate": 1.2050455895891893e-05, "loss": 1.0489, "step": 2661 }, { "epoch": 0.45137770241627806, "grad_norm": 0.9680785846350365, "learning_rate": 1.2045079990005968e-05, "loss": 1.0202, "step": 2662 }, { "epoch": 0.45154726579058924, "grad_norm": 1.0188724474031694, "learning_rate": 1.2039703467215489e-05, "loss": 1.0019, "step": 2663 }, { "epoch": 0.45171682916490036, "grad_norm": 0.9612586165429831, "learning_rate": 1.20343263291423e-05, "loss": 0.9838, "step": 2664 }, { "epoch": 0.45188639253921153, "grad_norm": 0.9774052477580866, "learning_rate": 1.202894857740843e-05, "loss": 0.9451, "step": 2665 }, { "epoch": 0.45205595591352266, "grad_norm": 0.9501449121195908, "learning_rate": 1.2023570213636096e-05, "loss": 0.9792, "step": 2666 }, { "epoch": 0.45222551928783383, "grad_norm": 1.0146713244118473, "learning_rate": 1.2018191239447698e-05, "loss": 1.0078, "step": 2667 }, { "epoch": 0.45239508266214495, "grad_norm": 0.9307121174618093, "learning_rate": 1.2012811656465818e-05, "loss": 1.0127, "step": 2668 }, { "epoch": 0.45256464603645613, "grad_norm": 0.9269792577229103, "learning_rate": 1.2007431466313224e-05, "loss": 0.9774, "step": 2669 }, { "epoch": 0.45273420941076725, "grad_norm": 0.9615505244915513, "learning_rate": 1.2002050670612864e-05, "loss": 0.9879, "step": 2670 }, { "epoch": 0.45290377278507843, "grad_norm": 0.9523136343262971, "learning_rate": 1.1996669270987878e-05, "loss": 0.9652, "step": 2671 }, { "epoch": 0.45307333615938955, "grad_norm": 0.9830594287381288, "learning_rate": 1.1991287269061575e-05, "loss": 0.9669, "step": 2672 }, { "epoch": 0.4532428995337007, "grad_norm": 1.0010636198585519, "learning_rate": 1.1985904666457455e-05, "loss": 0.9936, "step": 2673 }, { "epoch": 0.45341246290801185, "grad_norm": 0.981074246875863, "learning_rate": 1.1980521464799197e-05, "loss": 0.9674, "step": 2674 }, { "epoch": 0.453582026282323, "grad_norm": 0.9763715055330665, "learning_rate": 1.1975137665710659e-05, "loss": 1.0242, "step": 2675 }, { "epoch": 0.45375158965663415, "grad_norm": 1.0281671424663383, "learning_rate": 1.1969753270815881e-05, "loss": 0.9943, "step": 2676 }, { "epoch": 0.4539211530309453, "grad_norm": 0.9883323699594073, "learning_rate": 1.1964368281739078e-05, "loss": 0.9713, "step": 2677 }, { "epoch": 0.45409071640525644, "grad_norm": 0.9456052638683551, "learning_rate": 1.1958982700104655e-05, "loss": 0.97, "step": 2678 }, { "epoch": 0.4542602797795676, "grad_norm": 0.9353048098356204, "learning_rate": 1.1953596527537184e-05, "loss": 0.9646, "step": 2679 }, { "epoch": 0.45442984315387874, "grad_norm": 1.036147116714233, "learning_rate": 1.1948209765661421e-05, "loss": 0.9442, "step": 2680 }, { "epoch": 0.4545994065281899, "grad_norm": 0.9992504658384659, "learning_rate": 1.19428224161023e-05, "loss": 0.975, "step": 2681 }, { "epoch": 0.45476896990250104, "grad_norm": 0.9604222757124211, "learning_rate": 1.1937434480484931e-05, "loss": 0.9945, "step": 2682 }, { "epoch": 0.4549385332768122, "grad_norm": 0.9317371911890258, "learning_rate": 1.19320459604346e-05, "loss": 0.9718, "step": 2683 }, { "epoch": 0.45510809665112334, "grad_norm": 0.9785087040040867, "learning_rate": 1.1926656857576773e-05, "loss": 0.9764, "step": 2684 }, { "epoch": 0.4552776600254345, "grad_norm": 0.9905911991290945, "learning_rate": 1.1921267173537085e-05, "loss": 0.9776, "step": 2685 }, { "epoch": 0.45544722339974564, "grad_norm": 0.6049238113888165, "learning_rate": 1.1915876909941356e-05, "loss": 0.8157, "step": 2686 }, { "epoch": 0.4556167867740568, "grad_norm": 1.0078670272491357, "learning_rate": 1.191048606841557e-05, "loss": 0.9482, "step": 2687 }, { "epoch": 0.45578635014836794, "grad_norm": 1.0173490694790077, "learning_rate": 1.190509465058589e-05, "loss": 0.9599, "step": 2688 }, { "epoch": 0.4559559135226791, "grad_norm": 0.9301070818226611, "learning_rate": 1.1899702658078663e-05, "loss": 0.9433, "step": 2689 }, { "epoch": 0.45612547689699023, "grad_norm": 0.9884613714526366, "learning_rate": 1.1894310092520387e-05, "loss": 1.007, "step": 2690 }, { "epoch": 0.4562950402713014, "grad_norm": 0.9771370890747765, "learning_rate": 1.1888916955537755e-05, "loss": 0.9754, "step": 2691 }, { "epoch": 0.45646460364561253, "grad_norm": 0.992714320883066, "learning_rate": 1.1883523248757619e-05, "loss": 0.967, "step": 2692 }, { "epoch": 0.4566341670199237, "grad_norm": 0.9590693795340549, "learning_rate": 1.1878128973807005e-05, "loss": 0.9833, "step": 2693 }, { "epoch": 0.45680373039423483, "grad_norm": 0.951380598737075, "learning_rate": 1.1872734132313121e-05, "loss": 0.9804, "step": 2694 }, { "epoch": 0.456973293768546, "grad_norm": 0.9591187610709947, "learning_rate": 1.1867338725903326e-05, "loss": 0.9467, "step": 2695 }, { "epoch": 0.45714285714285713, "grad_norm": 0.9932358249053632, "learning_rate": 1.186194275620517e-05, "loss": 0.9951, "step": 2696 }, { "epoch": 0.4573124205171683, "grad_norm": 0.9679722088923083, "learning_rate": 1.1856546224846354e-05, "loss": 0.9833, "step": 2697 }, { "epoch": 0.4574819838914794, "grad_norm": 0.9765738436595983, "learning_rate": 1.1851149133454769e-05, "loss": 0.9741, "step": 2698 }, { "epoch": 0.4576515472657906, "grad_norm": 0.9601067174873307, "learning_rate": 1.1845751483658454e-05, "loss": 0.9657, "step": 2699 }, { "epoch": 0.4578211106401017, "grad_norm": 0.9584460504878817, "learning_rate": 1.1840353277085635e-05, "loss": 0.968, "step": 2700 }, { "epoch": 0.4579906740144129, "grad_norm": 0.9268425539106802, "learning_rate": 1.183495451536469e-05, "loss": 0.9702, "step": 2701 }, { "epoch": 0.458160237388724, "grad_norm": 0.940852000205561, "learning_rate": 1.1829555200124176e-05, "loss": 0.9507, "step": 2702 }, { "epoch": 0.4583298007630352, "grad_norm": 0.604571277694446, "learning_rate": 1.1824155332992812e-05, "loss": 0.8064, "step": 2703 }, { "epoch": 0.4584993641373463, "grad_norm": 0.9261476336642703, "learning_rate": 1.1818754915599482e-05, "loss": 0.9715, "step": 2704 }, { "epoch": 0.4586689275116575, "grad_norm": 1.0145798687845644, "learning_rate": 1.181335394957324e-05, "loss": 0.963, "step": 2705 }, { "epoch": 0.4588384908859686, "grad_norm": 1.0139823579522385, "learning_rate": 1.1807952436543307e-05, "loss": 1.0111, "step": 2706 }, { "epoch": 0.4590080542602798, "grad_norm": 0.9814183347997688, "learning_rate": 1.180255037813906e-05, "loss": 0.9791, "step": 2707 }, { "epoch": 0.4591776176345909, "grad_norm": 0.94513410122389, "learning_rate": 1.1797147775990047e-05, "loss": 0.9674, "step": 2708 }, { "epoch": 0.4593471810089021, "grad_norm": 0.9709856232582966, "learning_rate": 1.1791744631725983e-05, "loss": 1.0338, "step": 2709 }, { "epoch": 0.4595167443832132, "grad_norm": 0.6411880963375893, "learning_rate": 1.178634094697674e-05, "loss": 0.8622, "step": 2710 }, { "epoch": 0.4596863077575244, "grad_norm": 0.9852955128573431, "learning_rate": 1.1780936723372359e-05, "loss": 0.9638, "step": 2711 }, { "epoch": 0.4598558711318355, "grad_norm": 0.956806964981468, "learning_rate": 1.1775531962543036e-05, "loss": 0.9446, "step": 2712 }, { "epoch": 0.4600254345061467, "grad_norm": 0.9294529749547876, "learning_rate": 1.1770126666119133e-05, "loss": 0.979, "step": 2713 }, { "epoch": 0.4601949978804578, "grad_norm": 0.9216704746293797, "learning_rate": 1.1764720835731179e-05, "loss": 0.9257, "step": 2714 }, { "epoch": 0.460364561254769, "grad_norm": 0.6127449748712405, "learning_rate": 1.1759314473009855e-05, "loss": 0.7867, "step": 2715 }, { "epoch": 0.4605341246290801, "grad_norm": 0.9737663217724459, "learning_rate": 1.175390757958601e-05, "loss": 0.9901, "step": 2716 }, { "epoch": 0.4607036880033913, "grad_norm": 0.9382196360529027, "learning_rate": 1.1748500157090645e-05, "loss": 0.9862, "step": 2717 }, { "epoch": 0.4608732513777024, "grad_norm": 0.9331284747136757, "learning_rate": 1.1743092207154929e-05, "loss": 0.9538, "step": 2718 }, { "epoch": 0.4610428147520136, "grad_norm": 1.0005362823871018, "learning_rate": 1.1737683731410185e-05, "loss": 0.9912, "step": 2719 }, { "epoch": 0.4612123781263247, "grad_norm": 0.9413155920929438, "learning_rate": 1.1732274731487899e-05, "loss": 0.9798, "step": 2720 }, { "epoch": 0.4613819415006359, "grad_norm": 0.9476531509194811, "learning_rate": 1.1726865209019709e-05, "loss": 0.9648, "step": 2721 }, { "epoch": 0.461551504874947, "grad_norm": 0.9343942599978295, "learning_rate": 1.1721455165637413e-05, "loss": 0.9348, "step": 2722 }, { "epoch": 0.4617210682492582, "grad_norm": 1.0366596084695492, "learning_rate": 1.171604460297297e-05, "loss": 1.013, "step": 2723 }, { "epoch": 0.4618906316235693, "grad_norm": 0.939574373175376, "learning_rate": 1.1710633522658488e-05, "loss": 0.9629, "step": 2724 }, { "epoch": 0.4620601949978805, "grad_norm": 0.9515881429800522, "learning_rate": 1.170522192632624e-05, "loss": 0.9613, "step": 2725 }, { "epoch": 0.4622297583721916, "grad_norm": 0.9685835311923924, "learning_rate": 1.1699809815608649e-05, "loss": 0.9667, "step": 2726 }, { "epoch": 0.4623993217465028, "grad_norm": 0.9842069987333234, "learning_rate": 1.1694397192138295e-05, "loss": 0.9944, "step": 2727 }, { "epoch": 0.4625688851208139, "grad_norm": 0.9634828549725546, "learning_rate": 1.168898405754791e-05, "loss": 0.9901, "step": 2728 }, { "epoch": 0.4627384484951251, "grad_norm": 0.9657462439909241, "learning_rate": 1.1683570413470384e-05, "loss": 1.0029, "step": 2729 }, { "epoch": 0.4629080118694362, "grad_norm": 0.9618231854479252, "learning_rate": 1.1678156261538762e-05, "loss": 1.0057, "step": 2730 }, { "epoch": 0.4630775752437474, "grad_norm": 0.9375298437319175, "learning_rate": 1.1672741603386237e-05, "loss": 0.9723, "step": 2731 }, { "epoch": 0.4632471386180585, "grad_norm": 0.9323680348028757, "learning_rate": 1.1667326440646157e-05, "loss": 0.9541, "step": 2732 }, { "epoch": 0.4634167019923697, "grad_norm": 0.996393404157614, "learning_rate": 1.1661910774952019e-05, "loss": 0.9385, "step": 2733 }, { "epoch": 0.4635862653666808, "grad_norm": 0.7256235953469011, "learning_rate": 1.1656494607937479e-05, "loss": 0.8218, "step": 2734 }, { "epoch": 0.46375582874099197, "grad_norm": 0.9632054333100473, "learning_rate": 1.1651077941236338e-05, "loss": 0.9491, "step": 2735 }, { "epoch": 0.4639253921153031, "grad_norm": 0.9894093204787605, "learning_rate": 1.164566077648255e-05, "loss": 0.9363, "step": 2736 }, { "epoch": 0.46409495548961427, "grad_norm": 0.6790453313878879, "learning_rate": 1.1640243115310219e-05, "loss": 0.8583, "step": 2737 }, { "epoch": 0.4642645188639254, "grad_norm": 0.928207831536817, "learning_rate": 1.1634824959353602e-05, "loss": 0.9777, "step": 2738 }, { "epoch": 0.46443408223823657, "grad_norm": 0.9714762841562918, "learning_rate": 1.1629406310247098e-05, "loss": 0.9345, "step": 2739 }, { "epoch": 0.4646036456125477, "grad_norm": 0.9543841690265451, "learning_rate": 1.1623987169625261e-05, "loss": 0.9929, "step": 2740 }, { "epoch": 0.46477320898685887, "grad_norm": 0.9496956797162672, "learning_rate": 1.1618567539122794e-05, "loss": 0.9785, "step": 2741 }, { "epoch": 0.46494277236117, "grad_norm": 0.933082178641956, "learning_rate": 1.1613147420374538e-05, "loss": 0.9831, "step": 2742 }, { "epoch": 0.46511233573548116, "grad_norm": 0.9904969803520491, "learning_rate": 1.1607726815015492e-05, "loss": 0.9887, "step": 2743 }, { "epoch": 0.4652818991097923, "grad_norm": 0.9651584944558408, "learning_rate": 1.1602305724680796e-05, "loss": 1.0011, "step": 2744 }, { "epoch": 0.4654514624841034, "grad_norm": 0.9471306037603745, "learning_rate": 1.1596884151005743e-05, "loss": 0.9918, "step": 2745 }, { "epoch": 0.4656210258584146, "grad_norm": 0.990833630121136, "learning_rate": 1.1591462095625763e-05, "loss": 1.0523, "step": 2746 }, { "epoch": 0.4657905892327257, "grad_norm": 0.9974612240540354, "learning_rate": 1.1586039560176434e-05, "loss": 0.9958, "step": 2747 }, { "epoch": 0.4659601526070369, "grad_norm": 0.9985791810806031, "learning_rate": 1.1580616546293485e-05, "loss": 0.9823, "step": 2748 }, { "epoch": 0.466129715981348, "grad_norm": 0.9574216040226441, "learning_rate": 1.1575193055612785e-05, "loss": 0.9768, "step": 2749 }, { "epoch": 0.4662992793556592, "grad_norm": 0.9341144309487299, "learning_rate": 1.1569769089770341e-05, "loss": 0.9557, "step": 2750 }, { "epoch": 0.4664688427299703, "grad_norm": 1.0096048343372679, "learning_rate": 1.156434465040231e-05, "loss": 0.9995, "step": 2751 }, { "epoch": 0.4666384061042815, "grad_norm": 0.95710004168116, "learning_rate": 1.1558919739144994e-05, "loss": 0.9723, "step": 2752 }, { "epoch": 0.4668079694785926, "grad_norm": 0.6853955382300533, "learning_rate": 1.155349435763483e-05, "loss": 0.7884, "step": 2753 }, { "epoch": 0.4669775328529038, "grad_norm": 0.9961303304130427, "learning_rate": 1.1548068507508403e-05, "loss": 1.0039, "step": 2754 }, { "epoch": 0.4671470962272149, "grad_norm": 0.9792270421832289, "learning_rate": 1.1542642190402434e-05, "loss": 0.9387, "step": 2755 }, { "epoch": 0.4673166596015261, "grad_norm": 1.0078748568271807, "learning_rate": 1.153721540795379e-05, "loss": 0.9701, "step": 2756 }, { "epoch": 0.4674862229758372, "grad_norm": 0.9350229791831816, "learning_rate": 1.153178816179948e-05, "loss": 0.9919, "step": 2757 }, { "epoch": 0.4676557863501484, "grad_norm": 0.973724854481201, "learning_rate": 1.1526360453576646e-05, "loss": 0.9655, "step": 2758 }, { "epoch": 0.4678253497244595, "grad_norm": 0.9272057766288919, "learning_rate": 1.152093228492257e-05, "loss": 0.9874, "step": 2759 }, { "epoch": 0.46799491309877067, "grad_norm": 1.0398840623387369, "learning_rate": 1.1515503657474678e-05, "loss": 0.9932, "step": 2760 }, { "epoch": 0.4681644764730818, "grad_norm": 0.946711302522535, "learning_rate": 1.1510074572870533e-05, "loss": 0.9671, "step": 2761 }, { "epoch": 0.46833403984739297, "grad_norm": 0.9681468539350723, "learning_rate": 1.1504645032747832e-05, "loss": 1.0121, "step": 2762 }, { "epoch": 0.4685036032217041, "grad_norm": 0.6902247782676347, "learning_rate": 1.1499215038744413e-05, "loss": 0.8304, "step": 2763 }, { "epoch": 0.46867316659601527, "grad_norm": 0.9604054947185507, "learning_rate": 1.1493784592498252e-05, "loss": 0.9659, "step": 2764 }, { "epoch": 0.4688427299703264, "grad_norm": 1.003480561636045, "learning_rate": 1.1488353695647456e-05, "loss": 0.9811, "step": 2765 }, { "epoch": 0.46901229334463757, "grad_norm": 1.0106849453147624, "learning_rate": 1.1482922349830279e-05, "loss": 0.9916, "step": 2766 }, { "epoch": 0.4691818567189487, "grad_norm": 0.9729822381769379, "learning_rate": 1.1477490556685094e-05, "loss": 0.9774, "step": 2767 }, { "epoch": 0.46935142009325986, "grad_norm": 1.0088846241710518, "learning_rate": 1.1472058317850423e-05, "loss": 0.9927, "step": 2768 }, { "epoch": 0.469520983467571, "grad_norm": 0.9593836706325065, "learning_rate": 1.1466625634964911e-05, "loss": 0.9577, "step": 2769 }, { "epoch": 0.46969054684188216, "grad_norm": 0.988350591461139, "learning_rate": 1.1461192509667354e-05, "loss": 0.9923, "step": 2770 }, { "epoch": 0.4698601102161933, "grad_norm": 0.9395954297498722, "learning_rate": 1.145575894359666e-05, "loss": 0.9444, "step": 2771 }, { "epoch": 0.47002967359050446, "grad_norm": 1.0066152601115128, "learning_rate": 1.1450324938391886e-05, "loss": 0.9748, "step": 2772 }, { "epoch": 0.4701992369648156, "grad_norm": 0.9831928545299302, "learning_rate": 1.1444890495692214e-05, "loss": 0.9345, "step": 2773 }, { "epoch": 0.47036880033912676, "grad_norm": 0.6802422466498235, "learning_rate": 1.1439455617136962e-05, "loss": 0.7771, "step": 2774 }, { "epoch": 0.4705383637134379, "grad_norm": 0.9669911260090596, "learning_rate": 1.1434020304365578e-05, "loss": 0.9566, "step": 2775 }, { "epoch": 0.47070792708774906, "grad_norm": 0.9316237621690796, "learning_rate": 1.142858455901764e-05, "loss": 0.9789, "step": 2776 }, { "epoch": 0.4708774904620602, "grad_norm": 0.9460866473889332, "learning_rate": 1.1423148382732854e-05, "loss": 0.9879, "step": 2777 }, { "epoch": 0.47104705383637135, "grad_norm": 0.9912339021145382, "learning_rate": 1.1417711777151061e-05, "loss": 1.0006, "step": 2778 }, { "epoch": 0.4712166172106825, "grad_norm": 0.9305160951666769, "learning_rate": 1.141227474391223e-05, "loss": 0.9832, "step": 2779 }, { "epoch": 0.47138618058499365, "grad_norm": 0.9740815736765068, "learning_rate": 1.1406837284656457e-05, "loss": 1.0069, "step": 2780 }, { "epoch": 0.4715557439593048, "grad_norm": 0.9296049467468304, "learning_rate": 1.1401399401023974e-05, "loss": 0.9617, "step": 2781 }, { "epoch": 0.47172530733361595, "grad_norm": 0.9320866836270234, "learning_rate": 1.1395961094655123e-05, "loss": 0.9843, "step": 2782 }, { "epoch": 0.47189487070792707, "grad_norm": 0.9353170617524206, "learning_rate": 1.1390522367190396e-05, "loss": 0.9668, "step": 2783 }, { "epoch": 0.47206443408223825, "grad_norm": 0.9077143992404432, "learning_rate": 1.13850832202704e-05, "loss": 0.9348, "step": 2784 }, { "epoch": 0.47223399745654937, "grad_norm": 0.9461444932481833, "learning_rate": 1.1379643655535869e-05, "loss": 0.9253, "step": 2785 }, { "epoch": 0.47240356083086055, "grad_norm": 0.9762728025009527, "learning_rate": 1.1374203674627661e-05, "loss": 0.9704, "step": 2786 }, { "epoch": 0.47257312420517167, "grad_norm": 0.9496984448671917, "learning_rate": 1.1368763279186765e-05, "loss": 0.9477, "step": 2787 }, { "epoch": 0.47274268757948285, "grad_norm": 0.9605721320318821, "learning_rate": 1.1363322470854294e-05, "loss": 0.999, "step": 2788 }, { "epoch": 0.47291225095379397, "grad_norm": 1.0018562419653159, "learning_rate": 1.1357881251271482e-05, "loss": 0.9615, "step": 2789 }, { "epoch": 0.47308181432810514, "grad_norm": 0.9337119597973723, "learning_rate": 1.1352439622079689e-05, "loss": 0.9737, "step": 2790 }, { "epoch": 0.47325137770241626, "grad_norm": 0.946047007858419, "learning_rate": 1.1346997584920404e-05, "loss": 0.9629, "step": 2791 }, { "epoch": 0.47342094107672744, "grad_norm": 0.9411868766962924, "learning_rate": 1.1341555141435227e-05, "loss": 0.9581, "step": 2792 }, { "epoch": 0.47359050445103856, "grad_norm": 0.9953648116763884, "learning_rate": 1.1336112293265896e-05, "loss": 0.966, "step": 2793 }, { "epoch": 0.47376006782534974, "grad_norm": 0.9760578251261003, "learning_rate": 1.1330669042054252e-05, "loss": 0.9902, "step": 2794 }, { "epoch": 0.47392963119966086, "grad_norm": 0.9861733463258744, "learning_rate": 1.1325225389442278e-05, "loss": 0.9835, "step": 2795 }, { "epoch": 0.47409919457397204, "grad_norm": 0.9691020975178761, "learning_rate": 1.131978133707206e-05, "loss": 0.9916, "step": 2796 }, { "epoch": 0.47426875794828316, "grad_norm": 1.0083477359357926, "learning_rate": 1.1314336886585818e-05, "loss": 0.9901, "step": 2797 }, { "epoch": 0.47443832132259434, "grad_norm": 1.0021452309895236, "learning_rate": 1.1308892039625883e-05, "loss": 1.0061, "step": 2798 }, { "epoch": 0.47460788469690546, "grad_norm": 0.6771713362301471, "learning_rate": 1.1303446797834714e-05, "loss": 0.8476, "step": 2799 }, { "epoch": 0.47477744807121663, "grad_norm": 0.9195331223936284, "learning_rate": 1.1298001162854883e-05, "loss": 0.9253, "step": 2800 }, { "epoch": 0.47494701144552776, "grad_norm": 0.9799791421900205, "learning_rate": 1.1292555136329082e-05, "loss": 0.9605, "step": 2801 }, { "epoch": 0.47511657481983893, "grad_norm": 0.972421064507152, "learning_rate": 1.1287108719900121e-05, "loss": 0.978, "step": 2802 }, { "epoch": 0.47528613819415005, "grad_norm": 0.9443956964967446, "learning_rate": 1.1281661915210931e-05, "loss": 0.9389, "step": 2803 }, { "epoch": 0.47545570156846123, "grad_norm": 0.974196531739681, "learning_rate": 1.127621472390455e-05, "loss": 0.9816, "step": 2804 }, { "epoch": 0.47562526494277235, "grad_norm": 0.967100944436867, "learning_rate": 1.1270767147624146e-05, "loss": 0.9669, "step": 2805 }, { "epoch": 0.47579482831708353, "grad_norm": 0.9352863318882042, "learning_rate": 1.1265319188012995e-05, "loss": 0.9986, "step": 2806 }, { "epoch": 0.47596439169139465, "grad_norm": 0.9687682470496168, "learning_rate": 1.1259870846714488e-05, "loss": 0.992, "step": 2807 }, { "epoch": 0.4761339550657058, "grad_norm": 1.0189588925982813, "learning_rate": 1.1254422125372137e-05, "loss": 1.0128, "step": 2808 }, { "epoch": 0.47630351844001695, "grad_norm": 0.9559207886250073, "learning_rate": 1.1248973025629567e-05, "loss": 0.9723, "step": 2809 }, { "epoch": 0.4764730818143281, "grad_norm": 0.9075739820136581, "learning_rate": 1.1243523549130509e-05, "loss": 0.9778, "step": 2810 }, { "epoch": 0.47664264518863925, "grad_norm": 0.9650468441790101, "learning_rate": 1.123807369751882e-05, "loss": 0.9732, "step": 2811 }, { "epoch": 0.4768122085629504, "grad_norm": 0.9496596416809825, "learning_rate": 1.1232623472438462e-05, "loss": 0.9642, "step": 2812 }, { "epoch": 0.47698177193726154, "grad_norm": 0.9793943299927493, "learning_rate": 1.122717287553351e-05, "loss": 0.9791, "step": 2813 }, { "epoch": 0.4771513353115727, "grad_norm": 0.9434484531633723, "learning_rate": 1.1221721908448156e-05, "loss": 0.9885, "step": 2814 }, { "epoch": 0.47732089868588384, "grad_norm": 0.9517378107627297, "learning_rate": 1.1216270572826697e-05, "loss": 0.9644, "step": 2815 }, { "epoch": 0.477490462060195, "grad_norm": 0.9447726285358997, "learning_rate": 1.1210818870313548e-05, "loss": 0.9838, "step": 2816 }, { "epoch": 0.47766002543450614, "grad_norm": 0.9791740522922071, "learning_rate": 1.1205366802553231e-05, "loss": 0.9941, "step": 2817 }, { "epoch": 0.4778295888088173, "grad_norm": 0.9010910112786696, "learning_rate": 1.1199914371190379e-05, "loss": 0.9751, "step": 2818 }, { "epoch": 0.47799915218312844, "grad_norm": 0.9765388123043496, "learning_rate": 1.1194461577869733e-05, "loss": 0.9983, "step": 2819 }, { "epoch": 0.4781687155574396, "grad_norm": 0.9825045537887909, "learning_rate": 1.1189008424236148e-05, "loss": 0.9557, "step": 2820 }, { "epoch": 0.47833827893175074, "grad_norm": 0.9763148696662606, "learning_rate": 1.1183554911934578e-05, "loss": 0.95, "step": 2821 }, { "epoch": 0.4785078423060619, "grad_norm": 0.9840187905158391, "learning_rate": 1.1178101042610097e-05, "loss": 0.9456, "step": 2822 }, { "epoch": 0.47867740568037304, "grad_norm": 0.6720637902574569, "learning_rate": 1.117264681790788e-05, "loss": 0.8293, "step": 2823 }, { "epoch": 0.4788469690546842, "grad_norm": 0.9912807047773171, "learning_rate": 1.1167192239473211e-05, "loss": 0.9666, "step": 2824 }, { "epoch": 0.47901653242899533, "grad_norm": 1.083525729628238, "learning_rate": 1.1161737308951473e-05, "loss": 1.009, "step": 2825 }, { "epoch": 0.4791860958033065, "grad_norm": 0.9108461908021732, "learning_rate": 1.115628202798817e-05, "loss": 0.971, "step": 2826 }, { "epoch": 0.47935565917761763, "grad_norm": 0.9810903159716253, "learning_rate": 1.1150826398228904e-05, "loss": 0.9537, "step": 2827 }, { "epoch": 0.4795252225519288, "grad_norm": 0.6493094241007337, "learning_rate": 1.1145370421319377e-05, "loss": 0.835, "step": 2828 }, { "epoch": 0.47969478592623993, "grad_norm": 0.9631910051508563, "learning_rate": 1.1139914098905406e-05, "loss": 1.0077, "step": 2829 }, { "epoch": 0.4798643493005511, "grad_norm": 0.9663254668587328, "learning_rate": 1.1134457432632905e-05, "loss": 0.9961, "step": 2830 }, { "epoch": 0.48003391267486223, "grad_norm": 1.0147527220682437, "learning_rate": 1.1129000424147896e-05, "loss": 0.993, "step": 2831 }, { "epoch": 0.4802034760491734, "grad_norm": 0.9740887378030193, "learning_rate": 1.1123543075096498e-05, "loss": 0.9742, "step": 2832 }, { "epoch": 0.4803730394234845, "grad_norm": 0.9685721659669371, "learning_rate": 1.111808538712494e-05, "loss": 0.9679, "step": 2833 }, { "epoch": 0.4805426027977957, "grad_norm": 0.9444620189900742, "learning_rate": 1.1112627361879546e-05, "loss": 0.9523, "step": 2834 }, { "epoch": 0.4807121661721068, "grad_norm": 0.9669478340579718, "learning_rate": 1.110716900100675e-05, "loss": 1.0119, "step": 2835 }, { "epoch": 0.48088172954641795, "grad_norm": 0.9666713557390871, "learning_rate": 1.1101710306153084e-05, "loss": 0.9597, "step": 2836 }, { "epoch": 0.4810512929207291, "grad_norm": 0.973445031849053, "learning_rate": 1.1096251278965173e-05, "loss": 0.9729, "step": 2837 }, { "epoch": 0.48122085629504024, "grad_norm": 0.99955165221053, "learning_rate": 1.1090791921089759e-05, "loss": 0.981, "step": 2838 }, { "epoch": 0.4813904196693514, "grad_norm": 0.9893899567080178, "learning_rate": 1.1085332234173664e-05, "loss": 0.9802, "step": 2839 }, { "epoch": 0.48155998304366254, "grad_norm": 0.9708006838502847, "learning_rate": 1.1079872219863826e-05, "loss": 0.9598, "step": 2840 }, { "epoch": 0.4817295464179737, "grad_norm": 0.9813826122148571, "learning_rate": 1.1074411879807271e-05, "loss": 0.9692, "step": 2841 }, { "epoch": 0.48189910979228484, "grad_norm": 0.9716444981902977, "learning_rate": 1.1068951215651132e-05, "loss": 0.9924, "step": 2842 }, { "epoch": 0.482068673166596, "grad_norm": 0.9363142527072663, "learning_rate": 1.1063490229042626e-05, "loss": 0.9493, "step": 2843 }, { "epoch": 0.48223823654090714, "grad_norm": 0.9660608671676179, "learning_rate": 1.105802892162908e-05, "loss": 0.9795, "step": 2844 }, { "epoch": 0.4824077999152183, "grad_norm": 0.9737525049145669, "learning_rate": 1.1052567295057921e-05, "loss": 0.9487, "step": 2845 }, { "epoch": 0.48257736328952944, "grad_norm": 1.0007023052805744, "learning_rate": 1.1047105350976655e-05, "loss": 0.9436, "step": 2846 }, { "epoch": 0.4827469266638406, "grad_norm": 0.9313339199602495, "learning_rate": 1.1041643091032901e-05, "loss": 0.9658, "step": 2847 }, { "epoch": 0.48291649003815174, "grad_norm": 0.9436495247658401, "learning_rate": 1.103618051687436e-05, "loss": 0.9771, "step": 2848 }, { "epoch": 0.4830860534124629, "grad_norm": 0.9752547055391775, "learning_rate": 1.1030717630148839e-05, "loss": 0.9547, "step": 2849 }, { "epoch": 0.48325561678677403, "grad_norm": 0.9942573743501482, "learning_rate": 1.1025254432504234e-05, "loss": 0.9999, "step": 2850 }, { "epoch": 0.4834251801610852, "grad_norm": 0.9939091504771833, "learning_rate": 1.1019790925588535e-05, "loss": 0.9777, "step": 2851 }, { "epoch": 0.48359474353539633, "grad_norm": 0.9905633325054976, "learning_rate": 1.1014327111049819e-05, "loss": 1.0093, "step": 2852 }, { "epoch": 0.4837643069097075, "grad_norm": 0.9552233625500757, "learning_rate": 1.1008862990536268e-05, "loss": 0.9624, "step": 2853 }, { "epoch": 0.48393387028401863, "grad_norm": 0.9781441331275021, "learning_rate": 1.1003398565696153e-05, "loss": 0.9922, "step": 2854 }, { "epoch": 0.4841034336583298, "grad_norm": 0.9551795529916132, "learning_rate": 1.0997933838177828e-05, "loss": 0.947, "step": 2855 }, { "epoch": 0.48427299703264093, "grad_norm": 0.9353122329094952, "learning_rate": 1.0992468809629749e-05, "loss": 0.9409, "step": 2856 }, { "epoch": 0.4844425604069521, "grad_norm": 0.9800619040088573, "learning_rate": 1.0987003481700456e-05, "loss": 0.9717, "step": 2857 }, { "epoch": 0.4846121237812632, "grad_norm": 0.9826245280042013, "learning_rate": 1.0981537856038584e-05, "loss": 0.9642, "step": 2858 }, { "epoch": 0.4847816871555744, "grad_norm": 0.9468911680078637, "learning_rate": 1.0976071934292854e-05, "loss": 0.9127, "step": 2859 }, { "epoch": 0.4849512505298855, "grad_norm": 0.979572897503583, "learning_rate": 1.0970605718112078e-05, "loss": 0.9969, "step": 2860 }, { "epoch": 0.4851208139041967, "grad_norm": 0.9612814191018793, "learning_rate": 1.0965139209145153e-05, "loss": 0.9779, "step": 2861 }, { "epoch": 0.4852903772785078, "grad_norm": 0.9745193314954537, "learning_rate": 1.0959672409041073e-05, "loss": 0.9389, "step": 2862 }, { "epoch": 0.485459940652819, "grad_norm": 0.9464011194496259, "learning_rate": 1.0954205319448914e-05, "loss": 0.9738, "step": 2863 }, { "epoch": 0.4856295040271301, "grad_norm": 0.999314092772429, "learning_rate": 1.0948737942017838e-05, "loss": 0.9955, "step": 2864 }, { "epoch": 0.4857990674014413, "grad_norm": 0.929910362490679, "learning_rate": 1.0943270278397097e-05, "loss": 0.9672, "step": 2865 }, { "epoch": 0.4859686307757524, "grad_norm": 0.9304272700292321, "learning_rate": 1.093780233023603e-05, "loss": 0.9364, "step": 2866 }, { "epoch": 0.4861381941500636, "grad_norm": 0.9769774525299784, "learning_rate": 1.0932334099184058e-05, "loss": 0.9693, "step": 2867 }, { "epoch": 0.4863077575243747, "grad_norm": 0.937351768846216, "learning_rate": 1.0926865586890689e-05, "loss": 0.9613, "step": 2868 }, { "epoch": 0.4864773208986859, "grad_norm": 1.020892550525075, "learning_rate": 1.0921396795005518e-05, "loss": 0.9438, "step": 2869 }, { "epoch": 0.486646884272997, "grad_norm": 0.9203304923703807, "learning_rate": 1.0915927725178222e-05, "loss": 0.933, "step": 2870 }, { "epoch": 0.4868164476473082, "grad_norm": 1.0009506993998527, "learning_rate": 1.0910458379058559e-05, "loss": 0.9692, "step": 2871 }, { "epoch": 0.4869860110216193, "grad_norm": 1.1315260145565647, "learning_rate": 1.090498875829638e-05, "loss": 0.9736, "step": 2872 }, { "epoch": 0.4871555743959305, "grad_norm": 0.9414408573825512, "learning_rate": 1.0899518864541607e-05, "loss": 0.9798, "step": 2873 }, { "epoch": 0.4873251377702416, "grad_norm": 1.0199135702472304, "learning_rate": 1.0894048699444255e-05, "loss": 0.9968, "step": 2874 }, { "epoch": 0.4874947011445528, "grad_norm": 0.9900960349575106, "learning_rate": 1.088857826465441e-05, "loss": 0.9828, "step": 2875 }, { "epoch": 0.4876642645188639, "grad_norm": 0.9424127123870729, "learning_rate": 1.0883107561822253e-05, "loss": 0.9436, "step": 2876 }, { "epoch": 0.4878338278931751, "grad_norm": 0.9884269481190834, "learning_rate": 1.087763659259803e-05, "loss": 1.0084, "step": 2877 }, { "epoch": 0.4880033912674862, "grad_norm": 0.9757803065886801, "learning_rate": 1.0872165358632083e-05, "loss": 0.9518, "step": 2878 }, { "epoch": 0.4881729546417974, "grad_norm": 0.9247122174281511, "learning_rate": 1.0866693861574817e-05, "loss": 0.9634, "step": 2879 }, { "epoch": 0.4883425180161085, "grad_norm": 0.9631759782867425, "learning_rate": 1.0861222103076732e-05, "loss": 0.97, "step": 2880 }, { "epoch": 0.4885120813904197, "grad_norm": 0.9707089403060241, "learning_rate": 1.08557500847884e-05, "loss": 0.9758, "step": 2881 }, { "epoch": 0.4886816447647308, "grad_norm": 1.0047634371169654, "learning_rate": 1.0850277808360468e-05, "loss": 1.0051, "step": 2882 }, { "epoch": 0.488851208139042, "grad_norm": 0.9361267158586902, "learning_rate": 1.0844805275443673e-05, "loss": 0.9587, "step": 2883 }, { "epoch": 0.4890207715133531, "grad_norm": 0.9633997905161528, "learning_rate": 1.0839332487688812e-05, "loss": 0.965, "step": 2884 }, { "epoch": 0.4891903348876643, "grad_norm": 0.9195097840141965, "learning_rate": 1.0833859446746773e-05, "loss": 0.9397, "step": 2885 }, { "epoch": 0.4893598982619754, "grad_norm": 0.9816701098115935, "learning_rate": 1.0828386154268516e-05, "loss": 0.9688, "step": 2886 }, { "epoch": 0.4895294616362866, "grad_norm": 0.9636355363670693, "learning_rate": 1.082291261190507e-05, "loss": 0.9519, "step": 2887 }, { "epoch": 0.4896990250105977, "grad_norm": 0.9796397963448259, "learning_rate": 1.0817438821307554e-05, "loss": 0.9784, "step": 2888 }, { "epoch": 0.4898685883849089, "grad_norm": 0.9520858824977589, "learning_rate": 1.0811964784127145e-05, "loss": 0.955, "step": 2889 }, { "epoch": 0.49003815175922, "grad_norm": 0.9953003622969355, "learning_rate": 1.080649050201511e-05, "loss": 0.9993, "step": 2890 }, { "epoch": 0.4902077151335312, "grad_norm": 1.011621971328168, "learning_rate": 1.0801015976622778e-05, "loss": 1.0004, "step": 2891 }, { "epoch": 0.4903772785078423, "grad_norm": 0.5966081013257017, "learning_rate": 1.0795541209601561e-05, "loss": 0.8133, "step": 2892 }, { "epoch": 0.4905468418821535, "grad_norm": 1.035474794914511, "learning_rate": 1.0790066202602931e-05, "loss": 1.0254, "step": 2893 }, { "epoch": 0.4907164052564646, "grad_norm": 0.9440224122938426, "learning_rate": 1.0784590957278452e-05, "loss": 0.9625, "step": 2894 }, { "epoch": 0.49088596863077577, "grad_norm": 0.9204862833062184, "learning_rate": 1.0779115475279737e-05, "loss": 0.9646, "step": 2895 }, { "epoch": 0.4910555320050869, "grad_norm": 0.9307037885604663, "learning_rate": 1.0773639758258487e-05, "loss": 0.9777, "step": 2896 }, { "epoch": 0.49122509537939807, "grad_norm": 0.9771607096899959, "learning_rate": 1.076816380786647e-05, "loss": 0.9534, "step": 2897 }, { "epoch": 0.4913946587537092, "grad_norm": 0.9773072368583425, "learning_rate": 1.076268762575552e-05, "loss": 0.9517, "step": 2898 }, { "epoch": 0.49156422212802037, "grad_norm": 1.0006326363280977, "learning_rate": 1.0757211213577543e-05, "loss": 0.9766, "step": 2899 }, { "epoch": 0.4917337855023315, "grad_norm": 1.0170669684583467, "learning_rate": 1.0751734572984518e-05, "loss": 1.0346, "step": 2900 }, { "epoch": 0.49190334887664267, "grad_norm": 0.956575148792613, "learning_rate": 1.0746257705628491e-05, "loss": 1.0059, "step": 2901 }, { "epoch": 0.4920729122509538, "grad_norm": 0.9652347200437185, "learning_rate": 1.074078061316157e-05, "loss": 0.9608, "step": 2902 }, { "epoch": 0.49224247562526496, "grad_norm": 0.9676806835934749, "learning_rate": 1.0735303297235944e-05, "loss": 0.9694, "step": 2903 }, { "epoch": 0.4924120389995761, "grad_norm": 1.0346639528568855, "learning_rate": 1.0729825759503856e-05, "loss": 0.9692, "step": 2904 }, { "epoch": 0.49258160237388726, "grad_norm": 0.9180378119773038, "learning_rate": 1.0724348001617626e-05, "loss": 0.9512, "step": 2905 }, { "epoch": 0.4927511657481984, "grad_norm": 0.6080972743140745, "learning_rate": 1.0718870025229633e-05, "loss": 0.7682, "step": 2906 }, { "epoch": 0.49292072912250956, "grad_norm": 0.9809803690911169, "learning_rate": 1.0713391831992324e-05, "loss": 0.9424, "step": 2907 }, { "epoch": 0.4930902924968207, "grad_norm": 0.9930391794344574, "learning_rate": 1.0707913423558219e-05, "loss": 0.9723, "step": 2908 }, { "epoch": 0.49325985587113186, "grad_norm": 0.9337079932901288, "learning_rate": 1.0702434801579888e-05, "loss": 0.9423, "step": 2909 }, { "epoch": 0.493429419245443, "grad_norm": 0.9933996119126327, "learning_rate": 1.0696955967709982e-05, "loss": 0.9716, "step": 2910 }, { "epoch": 0.49359898261975416, "grad_norm": 0.9714083348549709, "learning_rate": 1.06914769236012e-05, "loss": 1.0411, "step": 2911 }, { "epoch": 0.4937685459940653, "grad_norm": 0.9274656418017742, "learning_rate": 1.0685997670906318e-05, "loss": 1.01, "step": 2912 }, { "epoch": 0.49393810936837645, "grad_norm": 0.9546216973869839, "learning_rate": 1.0680518211278169e-05, "loss": 0.986, "step": 2913 }, { "epoch": 0.4941076727426876, "grad_norm": 1.0016634291536237, "learning_rate": 1.0675038546369645e-05, "loss": 0.9731, "step": 2914 }, { "epoch": 0.49427723611699875, "grad_norm": 1.0161099341494713, "learning_rate": 1.0669558677833707e-05, "loss": 0.9344, "step": 2915 }, { "epoch": 0.4944467994913099, "grad_norm": 0.9748366163279191, "learning_rate": 1.0664078607323367e-05, "loss": 0.9771, "step": 2916 }, { "epoch": 0.49461636286562105, "grad_norm": 0.9657419435484357, "learning_rate": 1.0658598336491715e-05, "loss": 0.9602, "step": 2917 }, { "epoch": 0.4947859262399322, "grad_norm": 0.9817849633090451, "learning_rate": 1.0653117866991884e-05, "loss": 0.9747, "step": 2918 }, { "epoch": 0.49495548961424335, "grad_norm": 1.0085780343771815, "learning_rate": 1.0647637200477077e-05, "loss": 0.9944, "step": 2919 }, { "epoch": 0.49512505298855447, "grad_norm": 0.9481176895888058, "learning_rate": 1.064215633860055e-05, "loss": 0.94, "step": 2920 }, { "epoch": 0.49529461636286565, "grad_norm": 0.9312999622172784, "learning_rate": 1.063667528301563e-05, "loss": 0.9441, "step": 2921 }, { "epoch": 0.49546417973717677, "grad_norm": 0.9799887727456387, "learning_rate": 1.0631194035375685e-05, "loss": 0.9724, "step": 2922 }, { "epoch": 0.49563374311148795, "grad_norm": 0.6105575030169635, "learning_rate": 1.0625712597334155e-05, "loss": 0.7755, "step": 2923 }, { "epoch": 0.49580330648579907, "grad_norm": 0.9719192882833773, "learning_rate": 1.062023097054453e-05, "loss": 0.9647, "step": 2924 }, { "epoch": 0.49597286986011024, "grad_norm": 0.9750845610038188, "learning_rate": 1.0614749156660357e-05, "loss": 0.9411, "step": 2925 }, { "epoch": 0.49614243323442137, "grad_norm": 0.9644441767801027, "learning_rate": 1.060926715733525e-05, "loss": 0.9411, "step": 2926 }, { "epoch": 0.4963119966087325, "grad_norm": 0.9472630444940732, "learning_rate": 1.0603784974222862e-05, "loss": 0.9626, "step": 2927 }, { "epoch": 0.49648155998304366, "grad_norm": 0.9291509705610462, "learning_rate": 1.0598302608976914e-05, "loss": 0.9438, "step": 2928 }, { "epoch": 0.4966511233573548, "grad_norm": 0.9714718917681049, "learning_rate": 1.0592820063251177e-05, "loss": 0.9468, "step": 2929 }, { "epoch": 0.49682068673166596, "grad_norm": 0.9837006935637556, "learning_rate": 1.0587337338699479e-05, "loss": 0.948, "step": 2930 }, { "epoch": 0.4969902501059771, "grad_norm": 0.9689110435583219, "learning_rate": 1.0581854436975699e-05, "loss": 0.9977, "step": 2931 }, { "epoch": 0.49715981348028826, "grad_norm": 0.9195019094075366, "learning_rate": 1.057637135973377e-05, "loss": 0.9479, "step": 2932 }, { "epoch": 0.4973293768545994, "grad_norm": 0.6093520159809416, "learning_rate": 1.0570888108627682e-05, "loss": 0.7789, "step": 2933 }, { "epoch": 0.49749894022891056, "grad_norm": 1.0026689203143766, "learning_rate": 1.056540468531147e-05, "loss": 0.9967, "step": 2934 }, { "epoch": 0.4976685036032217, "grad_norm": 0.9163752125603127, "learning_rate": 1.0559921091439229e-05, "loss": 0.956, "step": 2935 }, { "epoch": 0.49783806697753286, "grad_norm": 0.6210266475224575, "learning_rate": 1.0554437328665099e-05, "loss": 0.787, "step": 2936 }, { "epoch": 0.498007630351844, "grad_norm": 0.9654389042764755, "learning_rate": 1.0548953398643276e-05, "loss": 0.9437, "step": 2937 }, { "epoch": 0.49817719372615515, "grad_norm": 0.9106810632404411, "learning_rate": 1.0543469303028002e-05, "loss": 0.9808, "step": 2938 }, { "epoch": 0.4983467571004663, "grad_norm": 0.9461338095565945, "learning_rate": 1.0537985043473573e-05, "loss": 0.9899, "step": 2939 }, { "epoch": 0.49851632047477745, "grad_norm": 0.9675449794912596, "learning_rate": 1.0532500621634327e-05, "loss": 0.9658, "step": 2940 }, { "epoch": 0.4986858838490886, "grad_norm": 0.9784008593520793, "learning_rate": 1.0527016039164665e-05, "loss": 0.9966, "step": 2941 }, { "epoch": 0.49885544722339975, "grad_norm": 0.9205398819597493, "learning_rate": 1.0521531297719024e-05, "loss": 0.9579, "step": 2942 }, { "epoch": 0.49902501059771087, "grad_norm": 1.0040242007668332, "learning_rate": 1.051604639895189e-05, "loss": 0.9877, "step": 2943 }, { "epoch": 0.49919457397202205, "grad_norm": 0.9670113771094724, "learning_rate": 1.0510561344517802e-05, "loss": 1.0154, "step": 2944 }, { "epoch": 0.49936413734633317, "grad_norm": 0.927835688301253, "learning_rate": 1.0505076136071342e-05, "loss": 0.9557, "step": 2945 }, { "epoch": 0.49953370072064435, "grad_norm": 0.9485398891948655, "learning_rate": 1.0499590775267142e-05, "loss": 0.9839, "step": 2946 }, { "epoch": 0.49970326409495547, "grad_norm": 0.962753380642974, "learning_rate": 1.0494105263759873e-05, "loss": 0.9735, "step": 2947 }, { "epoch": 0.49987282746926665, "grad_norm": 0.9304507409126845, "learning_rate": 1.0488619603204263e-05, "loss": 0.9189, "step": 2948 }, { "epoch": 0.5000423908435778, "grad_norm": 0.9364607690003749, "learning_rate": 1.0483133795255072e-05, "loss": 0.9933, "step": 2949 }, { "epoch": 0.5002119542178889, "grad_norm": 0.8737630997134399, "learning_rate": 1.0477647841567113e-05, "loss": 0.9274, "step": 2950 }, { "epoch": 0.5003815175922001, "grad_norm": 0.9478657756581341, "learning_rate": 1.0472161743795245e-05, "loss": 0.974, "step": 2951 }, { "epoch": 0.5005510809665112, "grad_norm": 0.9985040575291176, "learning_rate": 1.0466675503594354e-05, "loss": 0.9949, "step": 2952 }, { "epoch": 0.5007206443408224, "grad_norm": 0.9818331672238509, "learning_rate": 1.0461189122619394e-05, "loss": 1.0089, "step": 2953 }, { "epoch": 0.5008902077151335, "grad_norm": 0.9568448695807307, "learning_rate": 1.0455702602525338e-05, "loss": 0.9678, "step": 2954 }, { "epoch": 0.5010597710894447, "grad_norm": 1.0665268812904696, "learning_rate": 1.045021594496722e-05, "loss": 0.943, "step": 2955 }, { "epoch": 0.5012293344637558, "grad_norm": 0.9405145666754654, "learning_rate": 1.04447291516001e-05, "loss": 0.9429, "step": 2956 }, { "epoch": 0.501398897838067, "grad_norm": 0.9219737585338114, "learning_rate": 1.043924222407909e-05, "loss": 0.937, "step": 2957 }, { "epoch": 0.5015684612123781, "grad_norm": 0.9655345972241506, "learning_rate": 1.0433755164059333e-05, "loss": 0.96, "step": 2958 }, { "epoch": 0.5017380245866893, "grad_norm": 0.9956097332699669, "learning_rate": 1.0428267973196027e-05, "loss": 0.9983, "step": 2959 }, { "epoch": 0.5019075879610004, "grad_norm": 1.0435488594291606, "learning_rate": 1.0422780653144392e-05, "loss": 0.9839, "step": 2960 }, { "epoch": 0.5020771513353116, "grad_norm": 1.0663192009309412, "learning_rate": 1.0417293205559694e-05, "loss": 0.9953, "step": 2961 }, { "epoch": 0.5022467147096227, "grad_norm": 0.9761557822826532, "learning_rate": 1.0411805632097242e-05, "loss": 0.9361, "step": 2962 }, { "epoch": 0.5024162780839339, "grad_norm": 0.9580962100611554, "learning_rate": 1.0406317934412375e-05, "loss": 0.9801, "step": 2963 }, { "epoch": 0.502585841458245, "grad_norm": 0.9249506342575414, "learning_rate": 1.040083011416048e-05, "loss": 0.9378, "step": 2964 }, { "epoch": 0.5027554048325562, "grad_norm": 0.9969984048374418, "learning_rate": 1.0395342172996969e-05, "loss": 1.0112, "step": 2965 }, { "epoch": 0.5029249682068673, "grad_norm": 0.9965147107649625, "learning_rate": 1.03898541125773e-05, "loss": 0.9719, "step": 2966 }, { "epoch": 0.5030945315811785, "grad_norm": 0.9677148166378071, "learning_rate": 1.0384365934556958e-05, "loss": 0.9577, "step": 2967 }, { "epoch": 0.5032640949554896, "grad_norm": 0.9338204116639005, "learning_rate": 1.0378877640591474e-05, "loss": 0.9552, "step": 2968 }, { "epoch": 0.5034336583298008, "grad_norm": 0.9300671492275149, "learning_rate": 1.0373389232336404e-05, "loss": 0.9503, "step": 2969 }, { "epoch": 0.5036032217041119, "grad_norm": 0.9672889622426338, "learning_rate": 1.0367900711447343e-05, "loss": 0.9743, "step": 2970 }, { "epoch": 0.503772785078423, "grad_norm": 0.9626616731845161, "learning_rate": 1.0362412079579925e-05, "loss": 0.9695, "step": 2971 }, { "epoch": 0.5039423484527342, "grad_norm": 0.9519564028321347, "learning_rate": 1.0356923338389807e-05, "loss": 0.969, "step": 2972 }, { "epoch": 0.5041119118270454, "grad_norm": 1.006600855018696, "learning_rate": 1.0351434489532685e-05, "loss": 0.9701, "step": 2973 }, { "epoch": 0.5042814752013565, "grad_norm": 1.0055354085184096, "learning_rate": 1.034594553466429e-05, "loss": 0.997, "step": 2974 }, { "epoch": 0.5044510385756676, "grad_norm": 0.922806722120795, "learning_rate": 1.034045647544038e-05, "loss": 0.9623, "step": 2975 }, { "epoch": 0.5046206019499788, "grad_norm": 0.99415815482347, "learning_rate": 1.0334967313516743e-05, "loss": 0.97, "step": 2976 }, { "epoch": 0.50479016532429, "grad_norm": 0.9314870741543382, "learning_rate": 1.0329478050549208e-05, "loss": 0.9412, "step": 2977 }, { "epoch": 0.5049597286986011, "grad_norm": 0.9944239460890043, "learning_rate": 1.0323988688193624e-05, "loss": 0.9797, "step": 2978 }, { "epoch": 0.5051292920729122, "grad_norm": 0.9552757776029114, "learning_rate": 1.0318499228105875e-05, "loss": 0.9669, "step": 2979 }, { "epoch": 0.5052988554472234, "grad_norm": 0.9371255377083604, "learning_rate": 1.031300967194187e-05, "loss": 0.9825, "step": 2980 }, { "epoch": 0.5054684188215346, "grad_norm": 0.9754954962516367, "learning_rate": 1.0307520021357552e-05, "loss": 0.9697, "step": 2981 }, { "epoch": 0.5056379821958457, "grad_norm": 0.9679627443272669, "learning_rate": 1.030203027800889e-05, "loss": 0.9528, "step": 2982 }, { "epoch": 0.5058075455701568, "grad_norm": 0.9701192527386252, "learning_rate": 1.0296540443551884e-05, "loss": 0.9409, "step": 2983 }, { "epoch": 0.505977108944468, "grad_norm": 0.9761307665938758, "learning_rate": 1.0291050519642559e-05, "loss": 0.9688, "step": 2984 }, { "epoch": 0.5061466723187792, "grad_norm": 1.0018002448076804, "learning_rate": 1.0285560507936962e-05, "loss": 0.9765, "step": 2985 }, { "epoch": 0.5063162356930903, "grad_norm": 0.9741453527839871, "learning_rate": 1.028007041009118e-05, "loss": 0.9512, "step": 2986 }, { "epoch": 0.5064857990674014, "grad_norm": 0.9822155127050368, "learning_rate": 1.0274580227761313e-05, "loss": 0.9944, "step": 2987 }, { "epoch": 0.5066553624417126, "grad_norm": 0.9607327292511342, "learning_rate": 1.026908996260349e-05, "loss": 0.9784, "step": 2988 }, { "epoch": 0.5068249258160238, "grad_norm": 0.9413785529013019, "learning_rate": 1.0263599616273868e-05, "loss": 0.9576, "step": 2989 }, { "epoch": 0.5069944891903349, "grad_norm": 0.9882109514827956, "learning_rate": 1.0258109190428623e-05, "loss": 0.9583, "step": 2990 }, { "epoch": 0.507164052564646, "grad_norm": 0.9505835701708236, "learning_rate": 1.0252618686723963e-05, "loss": 0.9955, "step": 2991 }, { "epoch": 0.5073336159389572, "grad_norm": 0.9556454802454527, "learning_rate": 1.0247128106816113e-05, "loss": 0.9907, "step": 2992 }, { "epoch": 0.5075031793132684, "grad_norm": 1.0174589943243582, "learning_rate": 1.0241637452361323e-05, "loss": 0.9463, "step": 2993 }, { "epoch": 0.5076727426875794, "grad_norm": 0.9729391181219331, "learning_rate": 1.0236146725015867e-05, "loss": 0.9571, "step": 2994 }, { "epoch": 0.5078423060618906, "grad_norm": 1.0148644473564856, "learning_rate": 1.0230655926436037e-05, "loss": 0.9568, "step": 2995 }, { "epoch": 0.5080118694362018, "grad_norm": 0.6777862321081547, "learning_rate": 1.0225165058278153e-05, "loss": 0.829, "step": 2996 }, { "epoch": 0.508181432810513, "grad_norm": 1.012201917788918, "learning_rate": 1.0219674122198548e-05, "loss": 0.9768, "step": 2997 }, { "epoch": 0.508350996184824, "grad_norm": 1.0324866649739812, "learning_rate": 1.0214183119853583e-05, "loss": 0.9974, "step": 2998 }, { "epoch": 0.5085205595591352, "grad_norm": 0.9824154571991718, "learning_rate": 1.020869205289963e-05, "loss": 0.9232, "step": 2999 }, { "epoch": 0.5086901229334464, "grad_norm": 0.9511751284352469, "learning_rate": 1.020320092299309e-05, "loss": 0.9276, "step": 3000 }, { "epoch": 0.5088596863077576, "grad_norm": 0.9705014870712267, "learning_rate": 1.0197709731790375e-05, "loss": 0.9753, "step": 3001 }, { "epoch": 0.5090292496820686, "grad_norm": 1.0223408137316228, "learning_rate": 1.0192218480947924e-05, "loss": 0.9941, "step": 3002 }, { "epoch": 0.5091988130563798, "grad_norm": 0.9958209748678288, "learning_rate": 1.018672717212219e-05, "loss": 0.9614, "step": 3003 }, { "epoch": 0.509368376430691, "grad_norm": 1.0067427709667378, "learning_rate": 1.018123580696964e-05, "loss": 0.9625, "step": 3004 }, { "epoch": 0.5095379398050022, "grad_norm": 1.0185727236792934, "learning_rate": 1.0175744387146763e-05, "loss": 0.9692, "step": 3005 }, { "epoch": 0.5097075031793132, "grad_norm": 0.897905468371671, "learning_rate": 1.0170252914310059e-05, "loss": 0.8972, "step": 3006 }, { "epoch": 0.5098770665536244, "grad_norm": 1.0066454521929236, "learning_rate": 1.0164761390116052e-05, "loss": 0.999, "step": 3007 }, { "epoch": 0.5100466299279356, "grad_norm": 0.9780826297195727, "learning_rate": 1.015926981622127e-05, "loss": 0.9873, "step": 3008 }, { "epoch": 0.5102161933022468, "grad_norm": 0.9340689904195036, "learning_rate": 1.0153778194282269e-05, "loss": 0.9558, "step": 3009 }, { "epoch": 0.5103857566765578, "grad_norm": 0.9519678555378752, "learning_rate": 1.014828652595561e-05, "loss": 0.9472, "step": 3010 }, { "epoch": 0.510555320050869, "grad_norm": 0.940992409153868, "learning_rate": 1.0142794812897874e-05, "loss": 0.9987, "step": 3011 }, { "epoch": 0.5107248834251802, "grad_norm": 1.0130972467903798, "learning_rate": 1.0137303056765651e-05, "loss": 0.9819, "step": 3012 }, { "epoch": 0.5108944467994914, "grad_norm": 0.9324760322202679, "learning_rate": 1.0131811259215545e-05, "loss": 0.9271, "step": 3013 }, { "epoch": 0.5110640101738024, "grad_norm": 0.915884183897883, "learning_rate": 1.0126319421904179e-05, "loss": 0.9622, "step": 3014 }, { "epoch": 0.5112335735481136, "grad_norm": 0.9391234432530088, "learning_rate": 1.0120827546488175e-05, "loss": 0.9447, "step": 3015 }, { "epoch": 0.5114031369224248, "grad_norm": 0.985767603698776, "learning_rate": 1.011533563462418e-05, "loss": 0.9421, "step": 3016 }, { "epoch": 0.511572700296736, "grad_norm": 0.9876067689540599, "learning_rate": 1.0109843687968837e-05, "loss": 0.9819, "step": 3017 }, { "epoch": 0.511742263671047, "grad_norm": 0.9131684987717541, "learning_rate": 1.0104351708178816e-05, "loss": 0.9064, "step": 3018 }, { "epoch": 0.5119118270453582, "grad_norm": 1.0683342866122005, "learning_rate": 1.0098859696910788e-05, "loss": 1.0151, "step": 3019 }, { "epoch": 0.5120813904196694, "grad_norm": 0.9622501703699717, "learning_rate": 1.0093367655821433e-05, "loss": 0.9712, "step": 3020 }, { "epoch": 0.5122509537939804, "grad_norm": 1.0675165805273066, "learning_rate": 1.0087875586567443e-05, "loss": 0.9321, "step": 3021 }, { "epoch": 0.5124205171682916, "grad_norm": 0.9544672129882473, "learning_rate": 1.0082383490805517e-05, "loss": 0.9858, "step": 3022 }, { "epoch": 0.5125900805426028, "grad_norm": 0.9210008923203861, "learning_rate": 1.0076891370192364e-05, "loss": 0.9839, "step": 3023 }, { "epoch": 0.512759643916914, "grad_norm": 0.9236018487123309, "learning_rate": 1.0071399226384695e-05, "loss": 0.9536, "step": 3024 }, { "epoch": 0.512929207291225, "grad_norm": 0.6141193255011235, "learning_rate": 1.0065907061039234e-05, "loss": 0.8117, "step": 3025 }, { "epoch": 0.5130987706655362, "grad_norm": 0.9973432664803016, "learning_rate": 1.0060414875812709e-05, "loss": 0.9689, "step": 3026 }, { "epoch": 0.5132683340398474, "grad_norm": 0.899647197533492, "learning_rate": 1.0054922672361858e-05, "loss": 0.9385, "step": 3027 }, { "epoch": 0.5134378974141586, "grad_norm": 0.9680158077330153, "learning_rate": 1.0049430452343412e-05, "loss": 0.9833, "step": 3028 }, { "epoch": 0.5136074607884696, "grad_norm": 0.9419715364760196, "learning_rate": 1.004393821741412e-05, "loss": 0.9832, "step": 3029 }, { "epoch": 0.5137770241627808, "grad_norm": 1.0063487453379598, "learning_rate": 1.0038445969230737e-05, "loss": 0.985, "step": 3030 }, { "epoch": 0.513946587537092, "grad_norm": 0.9910409081383873, "learning_rate": 1.0032953709450006e-05, "loss": 0.9566, "step": 3031 }, { "epoch": 0.5141161509114032, "grad_norm": 0.9861444723050511, "learning_rate": 1.0027461439728695e-05, "loss": 0.987, "step": 3032 }, { "epoch": 0.5142857142857142, "grad_norm": 0.9878644367724077, "learning_rate": 1.0021969161723555e-05, "loss": 0.9772, "step": 3033 }, { "epoch": 0.5144552776600254, "grad_norm": 0.9995895925753583, "learning_rate": 1.001647687709135e-05, "loss": 0.9817, "step": 3034 }, { "epoch": 0.5146248410343366, "grad_norm": 0.6285121486823686, "learning_rate": 1.0010984587488846e-05, "loss": 0.8288, "step": 3035 }, { "epoch": 0.5147944044086478, "grad_norm": 0.6566730797848159, "learning_rate": 1.0005492294572806e-05, "loss": 0.8014, "step": 3036 }, { "epoch": 0.5149639677829588, "grad_norm": 0.968587622505413, "learning_rate": 1e-05, "loss": 0.9096, "step": 3037 }, { "epoch": 0.51513353115727, "grad_norm": 0.9887804687730919, "learning_rate": 9.994507705427197e-06, "loss": 1.0124, "step": 3038 }, { "epoch": 0.5153030945315812, "grad_norm": 0.9916987462829414, "learning_rate": 9.989015412511156e-06, "loss": 0.9819, "step": 3039 }, { "epoch": 0.5154726579058924, "grad_norm": 0.9093402823364884, "learning_rate": 9.983523122908653e-06, "loss": 0.974, "step": 3040 }, { "epoch": 0.5156422212802034, "grad_norm": 0.9176245520247509, "learning_rate": 9.97803083827645e-06, "loss": 0.8978, "step": 3041 }, { "epoch": 0.5158117846545146, "grad_norm": 0.9319954736686507, "learning_rate": 9.97253856027131e-06, "loss": 0.9518, "step": 3042 }, { "epoch": 0.5159813480288258, "grad_norm": 0.9020564377447081, "learning_rate": 9.967046290549994e-06, "loss": 0.9325, "step": 3043 }, { "epoch": 0.516150911403137, "grad_norm": 0.9482998731154629, "learning_rate": 9.961554030769267e-06, "loss": 0.9464, "step": 3044 }, { "epoch": 0.516320474777448, "grad_norm": 1.0154247648085624, "learning_rate": 9.956061782585882e-06, "loss": 0.985, "step": 3045 }, { "epoch": 0.5164900381517592, "grad_norm": 0.9212286029238748, "learning_rate": 9.95056954765659e-06, "loss": 0.96, "step": 3046 }, { "epoch": 0.5166596015260704, "grad_norm": 1.0056753686145739, "learning_rate": 9.945077327638146e-06, "loss": 0.9511, "step": 3047 }, { "epoch": 0.5168291649003816, "grad_norm": 0.9487740091666449, "learning_rate": 9.939585124187293e-06, "loss": 0.9773, "step": 3048 }, { "epoch": 0.5169987282746926, "grad_norm": 0.9810533182193518, "learning_rate": 9.934092938960769e-06, "loss": 0.9597, "step": 3049 }, { "epoch": 0.5171682916490038, "grad_norm": 0.917200931335745, "learning_rate": 9.928600773615306e-06, "loss": 0.9067, "step": 3050 }, { "epoch": 0.517337855023315, "grad_norm": 1.0390656651226249, "learning_rate": 9.92310862980764e-06, "loss": 0.977, "step": 3051 }, { "epoch": 0.5175074183976262, "grad_norm": 0.9366534736998925, "learning_rate": 9.917616509194487e-06, "loss": 0.9975, "step": 3052 }, { "epoch": 0.5176769817719372, "grad_norm": 0.9627983455446224, "learning_rate": 9.912124413432562e-06, "loss": 0.9761, "step": 3053 }, { "epoch": 0.5178465451462484, "grad_norm": 0.97130746211671, "learning_rate": 9.906632344178569e-06, "loss": 0.9461, "step": 3054 }, { "epoch": 0.5180161085205596, "grad_norm": 1.0307864298374014, "learning_rate": 9.901140303089216e-06, "loss": 0.9703, "step": 3055 }, { "epoch": 0.5181856718948707, "grad_norm": 0.9653993044907914, "learning_rate": 9.895648291821187e-06, "loss": 0.9681, "step": 3056 }, { "epoch": 0.5183552352691818, "grad_norm": 0.9758498269771244, "learning_rate": 9.890156312031165e-06, "loss": 0.971, "step": 3057 }, { "epoch": 0.518524798643493, "grad_norm": 0.9473653063921438, "learning_rate": 9.884664365375824e-06, "loss": 0.9607, "step": 3058 }, { "epoch": 0.5186943620178042, "grad_norm": 1.0243254509080115, "learning_rate": 9.879172453511827e-06, "loss": 0.9841, "step": 3059 }, { "epoch": 0.5188639253921153, "grad_norm": 0.984989518683435, "learning_rate": 9.873680578095824e-06, "loss": 0.9637, "step": 3060 }, { "epoch": 0.5190334887664264, "grad_norm": 0.9573117848390525, "learning_rate": 9.868188740784455e-06, "loss": 0.9807, "step": 3061 }, { "epoch": 0.5192030521407376, "grad_norm": 0.9059562497684082, "learning_rate": 9.86269694323435e-06, "loss": 0.929, "step": 3062 }, { "epoch": 0.5193726155150488, "grad_norm": 0.9502470282554143, "learning_rate": 9.85720518710213e-06, "loss": 0.9968, "step": 3063 }, { "epoch": 0.5195421788893599, "grad_norm": 0.9657851638421482, "learning_rate": 9.851713474044391e-06, "loss": 0.9568, "step": 3064 }, { "epoch": 0.519711742263671, "grad_norm": 0.9672356013317819, "learning_rate": 9.846221805717734e-06, "loss": 0.9749, "step": 3065 }, { "epoch": 0.5198813056379822, "grad_norm": 0.9340912466841508, "learning_rate": 9.840730183778734e-06, "loss": 0.9653, "step": 3066 }, { "epoch": 0.5200508690122934, "grad_norm": 0.9572911428001519, "learning_rate": 9.835238609883955e-06, "loss": 0.9801, "step": 3067 }, { "epoch": 0.5202204323866045, "grad_norm": 0.9462924610134427, "learning_rate": 9.829747085689943e-06, "loss": 0.9759, "step": 3068 }, { "epoch": 0.5203899957609156, "grad_norm": 0.9172413013391196, "learning_rate": 9.82425561285324e-06, "loss": 0.938, "step": 3069 }, { "epoch": 0.5205595591352268, "grad_norm": 0.9447327106113326, "learning_rate": 9.818764193030364e-06, "loss": 0.9517, "step": 3070 }, { "epoch": 0.520729122509538, "grad_norm": 0.9852280378377472, "learning_rate": 9.813272827877814e-06, "loss": 0.9483, "step": 3071 }, { "epoch": 0.5208986858838491, "grad_norm": 0.9251579287224695, "learning_rate": 9.807781519052075e-06, "loss": 0.9465, "step": 3072 }, { "epoch": 0.5210682492581602, "grad_norm": 0.944118435301543, "learning_rate": 9.802290268209627e-06, "loss": 0.9269, "step": 3073 }, { "epoch": 0.5212378126324714, "grad_norm": 0.9019616032939692, "learning_rate": 9.796799077006916e-06, "loss": 0.9369, "step": 3074 }, { "epoch": 0.5214073760067826, "grad_norm": 0.9650637926858207, "learning_rate": 9.79130794710037e-06, "loss": 0.9641, "step": 3075 }, { "epoch": 0.5215769393810937, "grad_norm": 0.653121613874621, "learning_rate": 9.78581688014642e-06, "loss": 0.7765, "step": 3076 }, { "epoch": 0.5217465027554048, "grad_norm": 0.9175571756893963, "learning_rate": 9.780325877801455e-06, "loss": 0.9065, "step": 3077 }, { "epoch": 0.521916066129716, "grad_norm": 0.9436051084979671, "learning_rate": 9.77483494172185e-06, "loss": 0.9962, "step": 3078 }, { "epoch": 0.5220856295040271, "grad_norm": 0.9729981868750707, "learning_rate": 9.769344073563963e-06, "loss": 0.9223, "step": 3079 }, { "epoch": 0.5222551928783383, "grad_norm": 1.0092633368268316, "learning_rate": 9.763853274984135e-06, "loss": 0.9581, "step": 3080 }, { "epoch": 0.5224247562526494, "grad_norm": 0.9778963346359691, "learning_rate": 9.75836254763868e-06, "loss": 0.949, "step": 3081 }, { "epoch": 0.5225943196269606, "grad_norm": 0.912624606806099, "learning_rate": 9.752871893183886e-06, "loss": 0.9711, "step": 3082 }, { "epoch": 0.5227638830012717, "grad_norm": 0.92029296626889, "learning_rate": 9.747381313276039e-06, "loss": 0.9111, "step": 3083 }, { "epoch": 0.5229334463755829, "grad_norm": 0.9320979373138509, "learning_rate": 9.74189080957138e-06, "loss": 0.9464, "step": 3084 }, { "epoch": 0.523103009749894, "grad_norm": 0.9703105138491713, "learning_rate": 9.736400383726138e-06, "loss": 0.9342, "step": 3085 }, { "epoch": 0.5232725731242052, "grad_norm": 0.9245762093095746, "learning_rate": 9.730910037396514e-06, "loss": 0.9452, "step": 3086 }, { "epoch": 0.5234421364985163, "grad_norm": 1.1005781231672964, "learning_rate": 9.725419772238688e-06, "loss": 0.9697, "step": 3087 }, { "epoch": 0.5236116998728275, "grad_norm": 0.9488140372655249, "learning_rate": 9.719929589908823e-06, "loss": 0.9531, "step": 3088 }, { "epoch": 0.5237812632471386, "grad_norm": 0.929861906985759, "learning_rate": 9.71443949206304e-06, "loss": 0.961, "step": 3089 }, { "epoch": 0.5239508266214498, "grad_norm": 0.9505780217453594, "learning_rate": 9.708949480357445e-06, "loss": 0.9577, "step": 3090 }, { "epoch": 0.5241203899957609, "grad_norm": 0.977896863661659, "learning_rate": 9.703459556448119e-06, "loss": 0.9972, "step": 3091 }, { "epoch": 0.5242899533700721, "grad_norm": 0.598807210167383, "learning_rate": 9.697969721991114e-06, "loss": 0.7694, "step": 3092 }, { "epoch": 0.5244595167443832, "grad_norm": 0.9599633674631082, "learning_rate": 9.69247997864245e-06, "loss": 0.9581, "step": 3093 }, { "epoch": 0.5246290801186944, "grad_norm": 0.9599831766324823, "learning_rate": 9.686990328058134e-06, "loss": 0.9313, "step": 3094 }, { "epoch": 0.5247986434930055, "grad_norm": 0.9777655923261855, "learning_rate": 9.68150077189413e-06, "loss": 0.956, "step": 3095 }, { "epoch": 0.5249682068673167, "grad_norm": 1.0112682590547484, "learning_rate": 9.67601131180638e-06, "loss": 0.9849, "step": 3096 }, { "epoch": 0.5251377702416278, "grad_norm": 0.9246260336428133, "learning_rate": 9.670521949450793e-06, "loss": 0.9258, "step": 3097 }, { "epoch": 0.525307333615939, "grad_norm": 0.9254524304974351, "learning_rate": 9.665032686483259e-06, "loss": 0.9731, "step": 3098 }, { "epoch": 0.5254768969902501, "grad_norm": 0.9528109123354909, "learning_rate": 9.659543524559626e-06, "loss": 0.9428, "step": 3099 }, { "epoch": 0.5256464603645613, "grad_norm": 0.9479190221075431, "learning_rate": 9.654054465335712e-06, "loss": 0.9291, "step": 3100 }, { "epoch": 0.5258160237388724, "grad_norm": 0.9632111521628236, "learning_rate": 9.648565510467316e-06, "loss": 0.9795, "step": 3101 }, { "epoch": 0.5259855871131836, "grad_norm": 0.9673157350605677, "learning_rate": 9.643076661610197e-06, "loss": 0.9901, "step": 3102 }, { "epoch": 0.5261551504874947, "grad_norm": 0.9657160832553565, "learning_rate": 9.63758792042008e-06, "loss": 0.9428, "step": 3103 }, { "epoch": 0.5263247138618059, "grad_norm": 0.9936139466607169, "learning_rate": 9.632099288552657e-06, "loss": 0.9935, "step": 3104 }, { "epoch": 0.526494277236117, "grad_norm": 1.0295104051542312, "learning_rate": 9.6266107676636e-06, "loss": 0.9641, "step": 3105 }, { "epoch": 0.5266638406104281, "grad_norm": 0.9720985075142662, "learning_rate": 9.62112235940853e-06, "loss": 0.9647, "step": 3106 }, { "epoch": 0.5268334039847393, "grad_norm": 0.9397754196534936, "learning_rate": 9.615634065443044e-06, "loss": 0.9409, "step": 3107 }, { "epoch": 0.5270029673590505, "grad_norm": 1.00418036818939, "learning_rate": 9.610145887422703e-06, "loss": 0.9507, "step": 3108 }, { "epoch": 0.5271725307333616, "grad_norm": 0.9654893111080153, "learning_rate": 9.604657827003033e-06, "loss": 0.9517, "step": 3109 }, { "epoch": 0.5273420941076727, "grad_norm": 0.9680503255523386, "learning_rate": 9.599169885839523e-06, "loss": 0.9578, "step": 3110 }, { "epoch": 0.5275116574819839, "grad_norm": 0.9512270571636512, "learning_rate": 9.593682065587625e-06, "loss": 0.9701, "step": 3111 }, { "epoch": 0.527681220856295, "grad_norm": 1.000108982315616, "learning_rate": 9.588194367902761e-06, "loss": 0.9728, "step": 3112 }, { "epoch": 0.5278507842306062, "grad_norm": 0.9172975872203326, "learning_rate": 9.58270679444031e-06, "loss": 0.9514, "step": 3113 }, { "epoch": 0.5280203476049173, "grad_norm": 0.9279876688315852, "learning_rate": 9.577219346855613e-06, "loss": 0.9692, "step": 3114 }, { "epoch": 0.5281899109792285, "grad_norm": 0.9608482931857734, "learning_rate": 9.571732026803978e-06, "loss": 0.9724, "step": 3115 }, { "epoch": 0.5283594743535396, "grad_norm": 1.030356602281444, "learning_rate": 9.566244835940668e-06, "loss": 0.9946, "step": 3116 }, { "epoch": 0.5285290377278508, "grad_norm": 0.9386481852759996, "learning_rate": 9.560757775920917e-06, "loss": 0.9533, "step": 3117 }, { "epoch": 0.5286986011021619, "grad_norm": 0.9592496648790197, "learning_rate": 9.555270848399902e-06, "loss": 0.9567, "step": 3118 }, { "epoch": 0.5288681644764731, "grad_norm": 1.0126635642805253, "learning_rate": 9.549784055032785e-06, "loss": 0.9814, "step": 3119 }, { "epoch": 0.5290377278507842, "grad_norm": 0.9587447459707307, "learning_rate": 9.544297397474665e-06, "loss": 0.9446, "step": 3120 }, { "epoch": 0.5292072912250954, "grad_norm": 0.5734116702195434, "learning_rate": 9.538810877380611e-06, "loss": 0.766, "step": 3121 }, { "epoch": 0.5293768545994065, "grad_norm": 0.9037505519348362, "learning_rate": 9.533324496405647e-06, "loss": 0.8945, "step": 3122 }, { "epoch": 0.5295464179737177, "grad_norm": 0.9697862037813453, "learning_rate": 9.52783825620476e-06, "loss": 0.9534, "step": 3123 }, { "epoch": 0.5297159813480288, "grad_norm": 0.9972142728693013, "learning_rate": 9.522352158432889e-06, "loss": 0.9655, "step": 3124 }, { "epoch": 0.52988554472234, "grad_norm": 1.0022756206960943, "learning_rate": 9.516866204744932e-06, "loss": 0.9301, "step": 3125 }, { "epoch": 0.5300551080966511, "grad_norm": 1.0229288603992286, "learning_rate": 9.511380396795739e-06, "loss": 1.0038, "step": 3126 }, { "epoch": 0.5302246714709623, "grad_norm": 0.9491599002275521, "learning_rate": 9.50589473624013e-06, "loss": 0.939, "step": 3127 }, { "epoch": 0.5303942348452734, "grad_norm": 0.9551154693589398, "learning_rate": 9.500409224732863e-06, "loss": 0.9466, "step": 3128 }, { "epoch": 0.5305637982195845, "grad_norm": 0.9585231766092215, "learning_rate": 9.49492386392866e-06, "loss": 0.9607, "step": 3129 }, { "epoch": 0.5307333615938957, "grad_norm": 1.0014943233391826, "learning_rate": 9.489438655482201e-06, "loss": 0.9353, "step": 3130 }, { "epoch": 0.5309029249682069, "grad_norm": 0.9649535412939261, "learning_rate": 9.483953601048116e-06, "loss": 0.9572, "step": 3131 }, { "epoch": 0.531072488342518, "grad_norm": 0.5954540657336224, "learning_rate": 9.478468702280981e-06, "loss": 0.8185, "step": 3132 }, { "epoch": 0.5312420517168291, "grad_norm": 0.9948438984499713, "learning_rate": 9.472983960835338e-06, "loss": 0.9707, "step": 3133 }, { "epoch": 0.5314116150911403, "grad_norm": 0.608261270781024, "learning_rate": 9.467499378365675e-06, "loss": 0.7614, "step": 3134 }, { "epoch": 0.5315811784654515, "grad_norm": 0.9663545095139282, "learning_rate": 9.462014956526433e-06, "loss": 0.9818, "step": 3135 }, { "epoch": 0.5317507418397626, "grad_norm": 0.979400893361934, "learning_rate": 9.456530696972e-06, "loss": 1.0009, "step": 3136 }, { "epoch": 0.5319203052140737, "grad_norm": 0.9680298298339641, "learning_rate": 9.451046601356725e-06, "loss": 0.953, "step": 3137 }, { "epoch": 0.5320898685883849, "grad_norm": 1.0595822137042479, "learning_rate": 9.445562671334903e-06, "loss": 1.0015, "step": 3138 }, { "epoch": 0.5322594319626961, "grad_norm": 0.9868794204913204, "learning_rate": 9.440078908560776e-06, "loss": 0.9934, "step": 3139 }, { "epoch": 0.5324289953370072, "grad_norm": 0.9686333648816564, "learning_rate": 9.434595314688531e-06, "loss": 0.9371, "step": 3140 }, { "epoch": 0.5325985587113183, "grad_norm": 0.98517165692237, "learning_rate": 9.42911189137232e-06, "loss": 0.9592, "step": 3141 }, { "epoch": 0.5327681220856295, "grad_norm": 0.9557894036770974, "learning_rate": 9.423628640266232e-06, "loss": 0.9609, "step": 3142 }, { "epoch": 0.5329376854599407, "grad_norm": 0.9902892129234298, "learning_rate": 9.418145563024303e-06, "loss": 0.9895, "step": 3143 }, { "epoch": 0.5331072488342518, "grad_norm": 1.014496521744886, "learning_rate": 9.412662661300523e-06, "loss": 0.9996, "step": 3144 }, { "epoch": 0.5332768122085629, "grad_norm": 1.0140747696905492, "learning_rate": 9.407179936748827e-06, "loss": 1.0109, "step": 3145 }, { "epoch": 0.5334463755828741, "grad_norm": 1.0113259951731863, "learning_rate": 9.40169739102309e-06, "loss": 0.9836, "step": 3146 }, { "epoch": 0.5336159389571853, "grad_norm": 0.9764244353727426, "learning_rate": 9.39621502577714e-06, "loss": 0.8975, "step": 3147 }, { "epoch": 0.5337855023314964, "grad_norm": 0.9332875412728099, "learning_rate": 9.390732842664753e-06, "loss": 0.9862, "step": 3148 }, { "epoch": 0.5339550657058075, "grad_norm": 0.9488158876317377, "learning_rate": 9.385250843339644e-06, "loss": 0.9633, "step": 3149 }, { "epoch": 0.5341246290801187, "grad_norm": 0.9394539013784423, "learning_rate": 9.379769029455474e-06, "loss": 0.9513, "step": 3150 }, { "epoch": 0.5342941924544299, "grad_norm": 0.9978122925358326, "learning_rate": 9.374287402665848e-06, "loss": 0.9405, "step": 3151 }, { "epoch": 0.534463755828741, "grad_norm": 0.9838054263417751, "learning_rate": 9.368805964624318e-06, "loss": 0.9563, "step": 3152 }, { "epoch": 0.5346333192030521, "grad_norm": 0.9591081807956522, "learning_rate": 9.363324716984375e-06, "loss": 0.972, "step": 3153 }, { "epoch": 0.5348028825773633, "grad_norm": 0.9606133111021694, "learning_rate": 9.357843661399447e-06, "loss": 0.9483, "step": 3154 }, { "epoch": 0.5349724459516745, "grad_norm": 1.0056177630456957, "learning_rate": 9.352362799522925e-06, "loss": 0.956, "step": 3155 }, { "epoch": 0.5351420093259855, "grad_norm": 0.9501690288889847, "learning_rate": 9.34688213300812e-06, "loss": 0.9346, "step": 3156 }, { "epoch": 0.5353115727002967, "grad_norm": 1.001928322467254, "learning_rate": 9.34140166350829e-06, "loss": 0.9697, "step": 3157 }, { "epoch": 0.5354811360746079, "grad_norm": 0.980340152009788, "learning_rate": 9.335921392676633e-06, "loss": 0.9822, "step": 3158 }, { "epoch": 0.5356506994489191, "grad_norm": 0.9094156985985744, "learning_rate": 9.330441322166297e-06, "loss": 0.938, "step": 3159 }, { "epoch": 0.5358202628232301, "grad_norm": 0.9587342032860251, "learning_rate": 9.32496145363036e-06, "loss": 0.9681, "step": 3160 }, { "epoch": 0.5359898261975413, "grad_norm": 0.9424515384650101, "learning_rate": 9.319481788721833e-06, "loss": 0.9498, "step": 3161 }, { "epoch": 0.5361593895718525, "grad_norm": 0.9408213362217274, "learning_rate": 9.314002329093684e-06, "loss": 0.961, "step": 3162 }, { "epoch": 0.5363289529461637, "grad_norm": 0.9415935843340619, "learning_rate": 9.308523076398803e-06, "loss": 0.9821, "step": 3163 }, { "epoch": 0.5364985163204747, "grad_norm": 0.9540494433492968, "learning_rate": 9.303044032290023e-06, "loss": 0.9603, "step": 3164 }, { "epoch": 0.5366680796947859, "grad_norm": 0.953158803484815, "learning_rate": 9.297565198420112e-06, "loss": 0.9487, "step": 3165 }, { "epoch": 0.5368376430690971, "grad_norm": 0.9656874069082125, "learning_rate": 9.292086576441784e-06, "loss": 0.9607, "step": 3166 }, { "epoch": 0.5370072064434083, "grad_norm": 0.8936309077696528, "learning_rate": 9.286608168007678e-06, "loss": 0.9236, "step": 3167 }, { "epoch": 0.5371767698177193, "grad_norm": 0.935170897507696, "learning_rate": 9.281129974770372e-06, "loss": 0.9407, "step": 3168 }, { "epoch": 0.5373463331920305, "grad_norm": 0.9213500287711175, "learning_rate": 9.275651998382377e-06, "loss": 0.9207, "step": 3169 }, { "epoch": 0.5375158965663417, "grad_norm": 0.95949053164204, "learning_rate": 9.270174240496147e-06, "loss": 0.9731, "step": 3170 }, { "epoch": 0.5376854599406529, "grad_norm": 0.8932953520038451, "learning_rate": 9.264696702764058e-06, "loss": 0.9057, "step": 3171 }, { "epoch": 0.5378550233149639, "grad_norm": 0.9555131490695726, "learning_rate": 9.25921938683843e-06, "loss": 0.972, "step": 3172 }, { "epoch": 0.5380245866892751, "grad_norm": 0.9427691826625484, "learning_rate": 9.253742294371512e-06, "loss": 0.8964, "step": 3173 }, { "epoch": 0.5381941500635863, "grad_norm": 0.9624938890211117, "learning_rate": 9.248265427015486e-06, "loss": 0.9827, "step": 3174 }, { "epoch": 0.5383637134378975, "grad_norm": 0.9744558126028329, "learning_rate": 9.24278878642246e-06, "loss": 0.9622, "step": 3175 }, { "epoch": 0.5385332768122085, "grad_norm": 1.049256052471717, "learning_rate": 9.237312374244482e-06, "loss": 0.9656, "step": 3176 }, { "epoch": 0.5387028401865197, "grad_norm": 0.9388973685122264, "learning_rate": 9.231836192133532e-06, "loss": 0.9773, "step": 3177 }, { "epoch": 0.5388724035608309, "grad_norm": 1.0299849643327963, "learning_rate": 9.226360241741515e-06, "loss": 0.9477, "step": 3178 }, { "epoch": 0.5390419669351421, "grad_norm": 1.038873807945864, "learning_rate": 9.220884524720265e-06, "loss": 0.9722, "step": 3179 }, { "epoch": 0.5392115303094531, "grad_norm": 0.9982347659838471, "learning_rate": 9.215409042721553e-06, "loss": 0.9635, "step": 3180 }, { "epoch": 0.5393810936837643, "grad_norm": 0.9981752028964325, "learning_rate": 9.20993379739707e-06, "loss": 1.0005, "step": 3181 }, { "epoch": 0.5395506570580755, "grad_norm": 0.9504701891411228, "learning_rate": 9.204458790398446e-06, "loss": 0.9732, "step": 3182 }, { "epoch": 0.5397202204323867, "grad_norm": 0.9164495220540729, "learning_rate": 9.198984023377222e-06, "loss": 0.9485, "step": 3183 }, { "epoch": 0.5398897838066977, "grad_norm": 0.9592104408855677, "learning_rate": 9.193509497984892e-06, "loss": 0.9495, "step": 3184 }, { "epoch": 0.5400593471810089, "grad_norm": 0.9432299738872578, "learning_rate": 9.188035215872858e-06, "loss": 0.9553, "step": 3185 }, { "epoch": 0.5402289105553201, "grad_norm": 0.9978838868228843, "learning_rate": 9.182561178692453e-06, "loss": 0.9198, "step": 3186 }, { "epoch": 0.5403984739296313, "grad_norm": 0.6143182536374958, "learning_rate": 9.177087388094931e-06, "loss": 0.7753, "step": 3187 }, { "epoch": 0.5405680373039423, "grad_norm": 0.9604696315916927, "learning_rate": 9.171613845731489e-06, "loss": 0.9546, "step": 3188 }, { "epoch": 0.5407376006782535, "grad_norm": 0.9815014395974875, "learning_rate": 9.16614055325323e-06, "loss": 0.9706, "step": 3189 }, { "epoch": 0.5409071640525647, "grad_norm": 0.970522313416515, "learning_rate": 9.16066751231119e-06, "loss": 0.9785, "step": 3190 }, { "epoch": 0.5410767274268758, "grad_norm": 1.0064858052398356, "learning_rate": 9.15519472455633e-06, "loss": 0.9998, "step": 3191 }, { "epoch": 0.5412462908011869, "grad_norm": 0.9396538607117793, "learning_rate": 9.149722191639534e-06, "loss": 0.9318, "step": 3192 }, { "epoch": 0.5414158541754981, "grad_norm": 0.988232600138124, "learning_rate": 9.144249915211605e-06, "loss": 0.9932, "step": 3193 }, { "epoch": 0.5415854175498093, "grad_norm": 0.9236500547872332, "learning_rate": 9.13877789692327e-06, "loss": 0.935, "step": 3194 }, { "epoch": 0.5417549809241204, "grad_norm": 0.9618613133695751, "learning_rate": 9.133306138425186e-06, "loss": 0.967, "step": 3195 }, { "epoch": 0.5419245442984315, "grad_norm": 1.0072618256658485, "learning_rate": 9.127834641367924e-06, "loss": 0.9615, "step": 3196 }, { "epoch": 0.5420941076727427, "grad_norm": 0.9072413725698752, "learning_rate": 9.122363407401971e-06, "loss": 0.9374, "step": 3197 }, { "epoch": 0.5422636710470539, "grad_norm": 1.0080042482678069, "learning_rate": 9.11689243817775e-06, "loss": 1.0207, "step": 3198 }, { "epoch": 0.542433234421365, "grad_norm": 0.9614582449717499, "learning_rate": 9.111421735345593e-06, "loss": 0.993, "step": 3199 }, { "epoch": 0.5426027977956761, "grad_norm": 0.9359002999365073, "learning_rate": 9.10595130055575e-06, "loss": 0.9017, "step": 3200 }, { "epoch": 0.5427723611699873, "grad_norm": 0.9557910737230959, "learning_rate": 9.100481135458393e-06, "loss": 0.9541, "step": 3201 }, { "epoch": 0.5429419245442985, "grad_norm": 0.9548124066230677, "learning_rate": 9.095011241703623e-06, "loss": 0.9664, "step": 3202 }, { "epoch": 0.5431114879186095, "grad_norm": 0.9861999954084409, "learning_rate": 9.089541620941443e-06, "loss": 0.9865, "step": 3203 }, { "epoch": 0.5432810512929207, "grad_norm": 0.5916469552588054, "learning_rate": 9.084072274821783e-06, "loss": 0.776, "step": 3204 }, { "epoch": 0.5434506146672319, "grad_norm": 0.9700546761275866, "learning_rate": 9.078603204994484e-06, "loss": 0.9661, "step": 3205 }, { "epoch": 0.5436201780415431, "grad_norm": 0.9785307286392297, "learning_rate": 9.073134413109313e-06, "loss": 0.9593, "step": 3206 }, { "epoch": 0.5437897414158541, "grad_norm": 0.9971613670262534, "learning_rate": 9.067665900815945e-06, "loss": 0.9566, "step": 3207 }, { "epoch": 0.5439593047901653, "grad_norm": 0.9610277355221418, "learning_rate": 9.06219766976397e-06, "loss": 0.9812, "step": 3208 }, { "epoch": 0.5441288681644765, "grad_norm": 0.9785820995579787, "learning_rate": 9.056729721602904e-06, "loss": 0.9607, "step": 3209 }, { "epoch": 0.5442984315387877, "grad_norm": 0.9610424481457893, "learning_rate": 9.051262057982165e-06, "loss": 0.967, "step": 3210 }, { "epoch": 0.5444679949130987, "grad_norm": 0.9127160968897032, "learning_rate": 9.04579468055109e-06, "loss": 0.9483, "step": 3211 }, { "epoch": 0.5446375582874099, "grad_norm": 1.018936165202702, "learning_rate": 9.040327590958929e-06, "loss": 0.9732, "step": 3212 }, { "epoch": 0.5448071216617211, "grad_norm": 0.9288802712649358, "learning_rate": 9.034860790854848e-06, "loss": 0.9436, "step": 3213 }, { "epoch": 0.5449766850360322, "grad_norm": 0.9614540669930163, "learning_rate": 9.029394281887927e-06, "loss": 0.9518, "step": 3214 }, { "epoch": 0.5451462484103433, "grad_norm": 0.971333529820219, "learning_rate": 9.02392806570715e-06, "loss": 0.9531, "step": 3215 }, { "epoch": 0.5453158117846545, "grad_norm": 0.9343879263667006, "learning_rate": 9.01846214396142e-06, "loss": 0.9616, "step": 3216 }, { "epoch": 0.5454853751589657, "grad_norm": 0.9848840367297588, "learning_rate": 9.012996518299547e-06, "loss": 0.98, "step": 3217 }, { "epoch": 0.5456549385332768, "grad_norm": 0.9997035590913115, "learning_rate": 9.007531190370256e-06, "loss": 0.9429, "step": 3218 }, { "epoch": 0.5458245019075879, "grad_norm": 0.6256102935222682, "learning_rate": 9.002066161822174e-06, "loss": 0.7813, "step": 3219 }, { "epoch": 0.5459940652818991, "grad_norm": 0.9517302996060777, "learning_rate": 8.99660143430385e-06, "loss": 0.9626, "step": 3220 }, { "epoch": 0.5461636286562103, "grad_norm": 0.931969385105438, "learning_rate": 8.991137009463735e-06, "loss": 1.0092, "step": 3221 }, { "epoch": 0.5463331920305214, "grad_norm": 0.9332527093376284, "learning_rate": 8.985672888950186e-06, "loss": 0.9688, "step": 3222 }, { "epoch": 0.5465027554048325, "grad_norm": 0.9833687496890873, "learning_rate": 8.980209074411469e-06, "loss": 0.9739, "step": 3223 }, { "epoch": 0.5466723187791437, "grad_norm": 0.9463255894954833, "learning_rate": 8.974745567495768e-06, "loss": 0.988, "step": 3224 }, { "epoch": 0.5468418821534549, "grad_norm": 0.939352245355736, "learning_rate": 8.969282369851163e-06, "loss": 0.9423, "step": 3225 }, { "epoch": 0.547011445527766, "grad_norm": 0.9309724381358758, "learning_rate": 8.963819483125642e-06, "loss": 0.9529, "step": 3226 }, { "epoch": 0.5471810089020771, "grad_norm": 0.9335742657859284, "learning_rate": 8.958356908967104e-06, "loss": 0.9546, "step": 3227 }, { "epoch": 0.5473505722763883, "grad_norm": 0.9736911346638419, "learning_rate": 8.952894649023348e-06, "loss": 0.9486, "step": 3228 }, { "epoch": 0.5475201356506995, "grad_norm": 0.9527883544967858, "learning_rate": 8.947432704942085e-06, "loss": 0.959, "step": 3229 }, { "epoch": 0.5476896990250106, "grad_norm": 0.9312295195517352, "learning_rate": 8.94197107837092e-06, "loss": 0.9481, "step": 3230 }, { "epoch": 0.5478592623993217, "grad_norm": 0.9594724220331561, "learning_rate": 8.936509770957377e-06, "loss": 0.9518, "step": 3231 }, { "epoch": 0.5480288257736329, "grad_norm": 0.9431230233611217, "learning_rate": 8.931048784348875e-06, "loss": 0.9311, "step": 3232 }, { "epoch": 0.548198389147944, "grad_norm": 0.9464773100937168, "learning_rate": 8.92558812019273e-06, "loss": 0.9446, "step": 3233 }, { "epoch": 0.5483679525222552, "grad_norm": 0.9628701084528413, "learning_rate": 8.920127780136177e-06, "loss": 0.9982, "step": 3234 }, { "epoch": 0.5485375158965663, "grad_norm": 0.961296693273085, "learning_rate": 8.91466776582634e-06, "loss": 0.9466, "step": 3235 }, { "epoch": 0.5487070792708775, "grad_norm": 1.0040081228225466, "learning_rate": 8.909208078910246e-06, "loss": 0.9746, "step": 3236 }, { "epoch": 0.5488766426451887, "grad_norm": 0.9416099883424593, "learning_rate": 8.903748721034827e-06, "loss": 0.993, "step": 3237 }, { "epoch": 0.5490462060194998, "grad_norm": 0.9669934175172287, "learning_rate": 8.89828969384692e-06, "loss": 0.9672, "step": 3238 }, { "epoch": 0.5492157693938109, "grad_norm": 0.9980527104969046, "learning_rate": 8.892830998993253e-06, "loss": 0.9456, "step": 3239 }, { "epoch": 0.5493853327681221, "grad_norm": 0.9810865142720385, "learning_rate": 8.887372638120459e-06, "loss": 0.9521, "step": 3240 }, { "epoch": 0.5495548961424332, "grad_norm": 0.9830904216919492, "learning_rate": 8.881914612875062e-06, "loss": 0.9766, "step": 3241 }, { "epoch": 0.5497244595167444, "grad_norm": 0.9387194021504626, "learning_rate": 8.876456924903505e-06, "loss": 0.942, "step": 3242 }, { "epoch": 0.5498940228910555, "grad_norm": 0.936579158480159, "learning_rate": 8.870999575852108e-06, "loss": 0.9482, "step": 3243 }, { "epoch": 0.5500635862653667, "grad_norm": 0.9514138273405043, "learning_rate": 8.865542567367096e-06, "loss": 0.9628, "step": 3244 }, { "epoch": 0.5502331496396778, "grad_norm": 0.9781603168874184, "learning_rate": 8.860085901094595e-06, "loss": 0.9689, "step": 3245 }, { "epoch": 0.550402713013989, "grad_norm": 0.9645598937881817, "learning_rate": 8.854629578680625e-06, "loss": 0.9816, "step": 3246 }, { "epoch": 0.5505722763883001, "grad_norm": 0.9785156679703835, "learning_rate": 8.849173601771101e-06, "loss": 0.9678, "step": 3247 }, { "epoch": 0.5507418397626113, "grad_norm": 0.9576872279764431, "learning_rate": 8.84371797201183e-06, "loss": 0.9473, "step": 3248 }, { "epoch": 0.5509114031369224, "grad_norm": 0.9435233766027598, "learning_rate": 8.838262691048529e-06, "loss": 0.9481, "step": 3249 }, { "epoch": 0.5510809665112336, "grad_norm": 0.9271689747735359, "learning_rate": 8.832807760526796e-06, "loss": 0.9424, "step": 3250 }, { "epoch": 0.5512505298855447, "grad_norm": 0.9343325739945719, "learning_rate": 8.827353182092123e-06, "loss": 0.9391, "step": 3251 }, { "epoch": 0.5514200932598559, "grad_norm": 0.9749226501850451, "learning_rate": 8.821898957389906e-06, "loss": 0.9569, "step": 3252 }, { "epoch": 0.551589656634167, "grad_norm": 0.8984199695950069, "learning_rate": 8.816445088065425e-06, "loss": 0.9358, "step": 3253 }, { "epoch": 0.5517592200084782, "grad_norm": 0.9975603942053386, "learning_rate": 8.810991575763857e-06, "loss": 0.947, "step": 3254 }, { "epoch": 0.5519287833827893, "grad_norm": 0.9386562650616195, "learning_rate": 8.805538422130268e-06, "loss": 0.9723, "step": 3255 }, { "epoch": 0.5520983467571005, "grad_norm": 0.9317845688007161, "learning_rate": 8.800085628809623e-06, "loss": 0.9755, "step": 3256 }, { "epoch": 0.5522679101314116, "grad_norm": 0.9692964648436307, "learning_rate": 8.79463319744677e-06, "loss": 0.9463, "step": 3257 }, { "epoch": 0.5524374735057228, "grad_norm": 0.9410608044864087, "learning_rate": 8.789181129686452e-06, "loss": 0.922, "step": 3258 }, { "epoch": 0.5526070368800339, "grad_norm": 0.99818905255236, "learning_rate": 8.783729427173304e-06, "loss": 0.9579, "step": 3259 }, { "epoch": 0.552776600254345, "grad_norm": 1.0155599095629428, "learning_rate": 8.778278091551848e-06, "loss": 0.973, "step": 3260 }, { "epoch": 0.5529461636286562, "grad_norm": 0.9806864665825908, "learning_rate": 8.772827124466495e-06, "loss": 0.9701, "step": 3261 }, { "epoch": 0.5531157270029674, "grad_norm": 0.9856547893160948, "learning_rate": 8.767376527561542e-06, "loss": 0.9755, "step": 3262 }, { "epoch": 0.5532852903772785, "grad_norm": 0.955575853195002, "learning_rate": 8.761926302481182e-06, "loss": 0.9187, "step": 3263 }, { "epoch": 0.5534548537515896, "grad_norm": 0.9847088879285761, "learning_rate": 8.756476450869494e-06, "loss": 0.968, "step": 3264 }, { "epoch": 0.5536244171259008, "grad_norm": 0.9452515260850247, "learning_rate": 8.751026974370438e-06, "loss": 0.9258, "step": 3265 }, { "epoch": 0.553793980500212, "grad_norm": 1.0570672541721595, "learning_rate": 8.745577874627863e-06, "loss": 0.9873, "step": 3266 }, { "epoch": 0.5539635438745231, "grad_norm": 0.9717780614054712, "learning_rate": 8.740129153285513e-06, "loss": 0.9734, "step": 3267 }, { "epoch": 0.5541331072488342, "grad_norm": 0.907675161376436, "learning_rate": 8.73468081198701e-06, "loss": 0.9261, "step": 3268 }, { "epoch": 0.5543026706231454, "grad_norm": 0.9721569224983991, "learning_rate": 8.729232852375855e-06, "loss": 1.0044, "step": 3269 }, { "epoch": 0.5544722339974566, "grad_norm": 0.8998798179279173, "learning_rate": 8.723785276095451e-06, "loss": 0.9248, "step": 3270 }, { "epoch": 0.5546417973717677, "grad_norm": 0.9852597520905083, "learning_rate": 8.718338084789074e-06, "loss": 0.9983, "step": 3271 }, { "epoch": 0.5548113607460788, "grad_norm": 0.9666100426462805, "learning_rate": 8.712891280099882e-06, "loss": 0.9468, "step": 3272 }, { "epoch": 0.55498092412039, "grad_norm": 1.000679104836889, "learning_rate": 8.70744486367092e-06, "loss": 0.9802, "step": 3273 }, { "epoch": 0.5551504874947012, "grad_norm": 0.9655056714040969, "learning_rate": 8.701998837145119e-06, "loss": 0.9856, "step": 3274 }, { "epoch": 0.5553200508690123, "grad_norm": 0.9045761781178759, "learning_rate": 8.69655320216529e-06, "loss": 0.9269, "step": 3275 }, { "epoch": 0.5554896142433234, "grad_norm": 0.9804541193730871, "learning_rate": 8.691107960374117e-06, "loss": 0.9319, "step": 3276 }, { "epoch": 0.5556591776176346, "grad_norm": 0.9739095872897207, "learning_rate": 8.685663113414186e-06, "loss": 0.9554, "step": 3277 }, { "epoch": 0.5558287409919458, "grad_norm": 0.9064579283101131, "learning_rate": 8.680218662927944e-06, "loss": 0.9255, "step": 3278 }, { "epoch": 0.5559983043662569, "grad_norm": 0.600655944776659, "learning_rate": 8.674774610557728e-06, "loss": 0.789, "step": 3279 }, { "epoch": 0.556167867740568, "grad_norm": 0.6520002026944826, "learning_rate": 8.66933095794575e-06, "loss": 0.836, "step": 3280 }, { "epoch": 0.5563374311148792, "grad_norm": 0.9779765117275315, "learning_rate": 8.66388770673411e-06, "loss": 0.947, "step": 3281 }, { "epoch": 0.5565069944891904, "grad_norm": 0.9536049964424435, "learning_rate": 8.658444858564774e-06, "loss": 0.9295, "step": 3282 }, { "epoch": 0.5566765578635015, "grad_norm": 1.0439558600531138, "learning_rate": 8.6530024150796e-06, "loss": 1.0026, "step": 3283 }, { "epoch": 0.5568461212378126, "grad_norm": 0.9756079126914644, "learning_rate": 8.647560377920311e-06, "loss": 0.9126, "step": 3284 }, { "epoch": 0.5570156846121238, "grad_norm": 0.985298557273098, "learning_rate": 8.64211874872852e-06, "loss": 0.9384, "step": 3285 }, { "epoch": 0.557185247986435, "grad_norm": 0.9825517832707542, "learning_rate": 8.63667752914571e-06, "loss": 0.9625, "step": 3286 }, { "epoch": 0.557354811360746, "grad_norm": 0.9223060319293799, "learning_rate": 8.631236720813237e-06, "loss": 0.9491, "step": 3287 }, { "epoch": 0.5575243747350572, "grad_norm": 0.9861505208807566, "learning_rate": 8.625796325372342e-06, "loss": 0.9718, "step": 3288 }, { "epoch": 0.5576939381093684, "grad_norm": 0.9727998566983641, "learning_rate": 8.620356344464135e-06, "loss": 0.9151, "step": 3289 }, { "epoch": 0.5578635014836796, "grad_norm": 0.9943456981460446, "learning_rate": 8.614916779729603e-06, "loss": 0.986, "step": 3290 }, { "epoch": 0.5580330648579906, "grad_norm": 1.0006436142538297, "learning_rate": 8.609477632809604e-06, "loss": 0.9797, "step": 3291 }, { "epoch": 0.5582026282323018, "grad_norm": 0.9849960049022215, "learning_rate": 8.604038905344879e-06, "loss": 0.9683, "step": 3292 }, { "epoch": 0.558372191606613, "grad_norm": 0.9790641467557808, "learning_rate": 8.598600598976033e-06, "loss": 0.9631, "step": 3293 }, { "epoch": 0.5585417549809241, "grad_norm": 0.9289067313542648, "learning_rate": 8.593162715343543e-06, "loss": 0.956, "step": 3294 }, { "epoch": 0.5587113183552352, "grad_norm": 0.9955027413227778, "learning_rate": 8.587725256087771e-06, "loss": 0.9424, "step": 3295 }, { "epoch": 0.5588808817295464, "grad_norm": 0.9361243826157685, "learning_rate": 8.582288222848942e-06, "loss": 0.9516, "step": 3296 }, { "epoch": 0.5590504451038576, "grad_norm": 0.977667619187527, "learning_rate": 8.576851617267151e-06, "loss": 0.976, "step": 3297 }, { "epoch": 0.5592200084781687, "grad_norm": 0.9664295663231345, "learning_rate": 8.571415440982364e-06, "loss": 0.9825, "step": 3298 }, { "epoch": 0.5593895718524798, "grad_norm": 0.9551142769696808, "learning_rate": 8.565979695634426e-06, "loss": 0.9864, "step": 3299 }, { "epoch": 0.559559135226791, "grad_norm": 0.9446150664079876, "learning_rate": 8.56054438286304e-06, "loss": 0.9605, "step": 3300 }, { "epoch": 0.5597286986011022, "grad_norm": 0.961298702731066, "learning_rate": 8.55510950430779e-06, "loss": 0.9627, "step": 3301 }, { "epoch": 0.5598982619754133, "grad_norm": 0.9558230934447186, "learning_rate": 8.549675061608117e-06, "loss": 0.9734, "step": 3302 }, { "epoch": 0.5600678253497244, "grad_norm": 0.9742786830999808, "learning_rate": 8.544241056403344e-06, "loss": 0.9359, "step": 3303 }, { "epoch": 0.5602373887240356, "grad_norm": 0.9073722060064696, "learning_rate": 8.538807490332653e-06, "loss": 0.9078, "step": 3304 }, { "epoch": 0.5604069520983468, "grad_norm": 0.9525767751923666, "learning_rate": 8.533374365035089e-06, "loss": 0.9622, "step": 3305 }, { "epoch": 0.5605765154726579, "grad_norm": 0.9714734901097564, "learning_rate": 8.52794168214958e-06, "loss": 0.9716, "step": 3306 }, { "epoch": 0.560746078846969, "grad_norm": 0.9566782567470775, "learning_rate": 8.52250944331491e-06, "loss": 0.9319, "step": 3307 }, { "epoch": 0.5609156422212802, "grad_norm": 0.9793017871204114, "learning_rate": 8.517077650169724e-06, "loss": 0.9402, "step": 3308 }, { "epoch": 0.5610852055955914, "grad_norm": 0.9599908859188653, "learning_rate": 8.511646304352544e-06, "loss": 0.958, "step": 3309 }, { "epoch": 0.5612547689699025, "grad_norm": 0.9744963651654486, "learning_rate": 8.50621540750175e-06, "loss": 0.9818, "step": 3310 }, { "epoch": 0.5614243323442136, "grad_norm": 0.9638906191822316, "learning_rate": 8.50078496125559e-06, "loss": 0.9627, "step": 3311 }, { "epoch": 0.5615938957185248, "grad_norm": 1.0335600205515596, "learning_rate": 8.49535496725217e-06, "loss": 0.9818, "step": 3312 }, { "epoch": 0.561763459092836, "grad_norm": 0.9470592056742406, "learning_rate": 8.489925427129469e-06, "loss": 0.9433, "step": 3313 }, { "epoch": 0.561933022467147, "grad_norm": 0.9630904631982459, "learning_rate": 8.484496342525325e-06, "loss": 0.9667, "step": 3314 }, { "epoch": 0.5621025858414582, "grad_norm": 0.9797786858758676, "learning_rate": 8.479067715077435e-06, "loss": 0.9482, "step": 3315 }, { "epoch": 0.5622721492157694, "grad_norm": 0.9875586320081415, "learning_rate": 8.473639546423358e-06, "loss": 0.9532, "step": 3316 }, { "epoch": 0.5624417125900806, "grad_norm": 0.978358266211283, "learning_rate": 8.468211838200523e-06, "loss": 0.9453, "step": 3317 }, { "epoch": 0.5626112759643916, "grad_norm": 0.9859125172498366, "learning_rate": 8.462784592046212e-06, "loss": 0.9734, "step": 3318 }, { "epoch": 0.5627808393387028, "grad_norm": 1.0042691411047522, "learning_rate": 8.457357809597571e-06, "loss": 0.9502, "step": 3319 }, { "epoch": 0.562950402713014, "grad_norm": 0.6499226110680854, "learning_rate": 8.4519314924916e-06, "loss": 0.8187, "step": 3320 }, { "epoch": 0.5631199660873252, "grad_norm": 0.9633707636307078, "learning_rate": 8.446505642365174e-06, "loss": 0.9299, "step": 3321 }, { "epoch": 0.5632895294616362, "grad_norm": 1.019429594447877, "learning_rate": 8.44108026085501e-06, "loss": 0.9615, "step": 3322 }, { "epoch": 0.5634590928359474, "grad_norm": 0.9605392324906264, "learning_rate": 8.43565534959769e-06, "loss": 0.9322, "step": 3323 }, { "epoch": 0.5636286562102586, "grad_norm": 0.9429404524863974, "learning_rate": 8.430230910229662e-06, "loss": 0.9289, "step": 3324 }, { "epoch": 0.5637982195845698, "grad_norm": 0.9257498287400514, "learning_rate": 8.424806944387219e-06, "loss": 0.8815, "step": 3325 }, { "epoch": 0.5639677829588808, "grad_norm": 0.9483105817363174, "learning_rate": 8.419383453706516e-06, "loss": 0.9748, "step": 3326 }, { "epoch": 0.564137346333192, "grad_norm": 0.9883552544764169, "learning_rate": 8.413960439823567e-06, "loss": 0.9755, "step": 3327 }, { "epoch": 0.5643069097075032, "grad_norm": 1.0235268886835087, "learning_rate": 8.40853790437424e-06, "loss": 0.9519, "step": 3328 }, { "epoch": 0.5644764730818144, "grad_norm": 0.992044632582503, "learning_rate": 8.40311584899426e-06, "loss": 0.9377, "step": 3329 }, { "epoch": 0.5646460364561254, "grad_norm": 0.6396221583500626, "learning_rate": 8.397694275319204e-06, "loss": 0.8205, "step": 3330 }, { "epoch": 0.5648155998304366, "grad_norm": 0.903535773676659, "learning_rate": 8.39227318498451e-06, "loss": 0.9539, "step": 3331 }, { "epoch": 0.5649851632047478, "grad_norm": 0.930047968226833, "learning_rate": 8.386852579625467e-06, "loss": 0.9242, "step": 3332 }, { "epoch": 0.565154726579059, "grad_norm": 1.0568057248415546, "learning_rate": 8.381432460877213e-06, "loss": 1.0074, "step": 3333 }, { "epoch": 0.56532428995337, "grad_norm": 0.9377389123973981, "learning_rate": 8.37601283037474e-06, "loss": 0.9007, "step": 3334 }, { "epoch": 0.5654938533276812, "grad_norm": 0.9488911278192853, "learning_rate": 8.370593689752905e-06, "loss": 0.9565, "step": 3335 }, { "epoch": 0.5656634167019924, "grad_norm": 0.9650785402173176, "learning_rate": 8.365175040646403e-06, "loss": 0.972, "step": 3336 }, { "epoch": 0.5658329800763036, "grad_norm": 0.9832293633440671, "learning_rate": 8.359756884689785e-06, "loss": 0.9857, "step": 3337 }, { "epoch": 0.5660025434506146, "grad_norm": 0.951337711201333, "learning_rate": 8.354339223517452e-06, "loss": 1.0001, "step": 3338 }, { "epoch": 0.5661721068249258, "grad_norm": 0.926233023741234, "learning_rate": 8.348922058763667e-06, "loss": 0.9733, "step": 3339 }, { "epoch": 0.566341670199237, "grad_norm": 0.9499865274162356, "learning_rate": 8.343505392062526e-06, "loss": 0.9883, "step": 3340 }, { "epoch": 0.5665112335735482, "grad_norm": 1.003365566400872, "learning_rate": 8.338089225047983e-06, "loss": 1.0004, "step": 3341 }, { "epoch": 0.5666807969478592, "grad_norm": 0.9812993023009497, "learning_rate": 8.332673559353845e-06, "loss": 0.9554, "step": 3342 }, { "epoch": 0.5668503603221704, "grad_norm": 0.9463155243193977, "learning_rate": 8.327258396613766e-06, "loss": 0.927, "step": 3343 }, { "epoch": 0.5670199236964816, "grad_norm": 0.9915837655586575, "learning_rate": 8.32184373846124e-06, "loss": 0.9681, "step": 3344 }, { "epoch": 0.5671894870707928, "grad_norm": 0.9888268207601653, "learning_rate": 8.316429586529616e-06, "loss": 0.9894, "step": 3345 }, { "epoch": 0.5673590504451038, "grad_norm": 0.9572204963855915, "learning_rate": 8.311015942452091e-06, "loss": 0.975, "step": 3346 }, { "epoch": 0.567528613819415, "grad_norm": 0.6399731668243899, "learning_rate": 8.30560280786171e-06, "loss": 0.7878, "step": 3347 }, { "epoch": 0.5676981771937262, "grad_norm": 0.6290090000169403, "learning_rate": 8.300190184391353e-06, "loss": 0.7805, "step": 3348 }, { "epoch": 0.5678677405680373, "grad_norm": 0.9290403433601735, "learning_rate": 8.294778073673762e-06, "loss": 0.9716, "step": 3349 }, { "epoch": 0.5680373039423484, "grad_norm": 1.0321282648864374, "learning_rate": 8.289366477341517e-06, "loss": 0.9697, "step": 3350 }, { "epoch": 0.5682068673166596, "grad_norm": 0.9563499507339595, "learning_rate": 8.283955397027037e-06, "loss": 0.9488, "step": 3351 }, { "epoch": 0.5683764306909708, "grad_norm": 0.9527895390347536, "learning_rate": 8.278544834362592e-06, "loss": 0.9348, "step": 3352 }, { "epoch": 0.568545994065282, "grad_norm": 0.9447734550187241, "learning_rate": 8.273134790980295e-06, "loss": 0.9956, "step": 3353 }, { "epoch": 0.568715557439593, "grad_norm": 0.9513491951758145, "learning_rate": 8.267725268512104e-06, "loss": 0.9166, "step": 3354 }, { "epoch": 0.5688851208139042, "grad_norm": 1.0310956717415067, "learning_rate": 8.262316268589815e-06, "loss": 0.9476, "step": 3355 }, { "epoch": 0.5690546841882154, "grad_norm": 0.9791170386060745, "learning_rate": 8.256907792845073e-06, "loss": 0.9677, "step": 3356 }, { "epoch": 0.5692242475625265, "grad_norm": 0.9749652269356058, "learning_rate": 8.251499842909358e-06, "loss": 0.9435, "step": 3357 }, { "epoch": 0.5693938109368376, "grad_norm": 0.9984432953782569, "learning_rate": 8.246092420413996e-06, "loss": 0.9879, "step": 3358 }, { "epoch": 0.5695633743111488, "grad_norm": 0.973993978598013, "learning_rate": 8.240685526990147e-06, "loss": 0.9389, "step": 3359 }, { "epoch": 0.56973293768546, "grad_norm": 1.0096777408756197, "learning_rate": 8.235279164268823e-06, "loss": 0.9682, "step": 3360 }, { "epoch": 0.5699025010597711, "grad_norm": 0.9568364914035112, "learning_rate": 8.22987333388087e-06, "loss": 0.9341, "step": 3361 }, { "epoch": 0.5700720644340822, "grad_norm": 0.9520925152229687, "learning_rate": 8.224468037456969e-06, "loss": 0.9811, "step": 3362 }, { "epoch": 0.5702416278083934, "grad_norm": 0.9440168638043864, "learning_rate": 8.219063276627646e-06, "loss": 0.9511, "step": 3363 }, { "epoch": 0.5704111911827046, "grad_norm": 1.0079233348432537, "learning_rate": 8.213659053023263e-06, "loss": 1.0019, "step": 3364 }, { "epoch": 0.5705807545570157, "grad_norm": 0.9502092413812859, "learning_rate": 8.20825536827402e-06, "loss": 0.9757, "step": 3365 }, { "epoch": 0.5707503179313268, "grad_norm": 0.9557558413318101, "learning_rate": 8.202852224009955e-06, "loss": 0.956, "step": 3366 }, { "epoch": 0.570919881305638, "grad_norm": 0.9547814830155817, "learning_rate": 8.197449621860944e-06, "loss": 0.9495, "step": 3367 }, { "epoch": 0.5710894446799492, "grad_norm": 0.9580283694113226, "learning_rate": 8.192047563456697e-06, "loss": 0.9601, "step": 3368 }, { "epoch": 0.5712590080542603, "grad_norm": 0.9606886317343701, "learning_rate": 8.186646050426763e-06, "loss": 0.9359, "step": 3369 }, { "epoch": 0.5714285714285714, "grad_norm": 0.9749029796383327, "learning_rate": 8.181245084400518e-06, "loss": 0.9339, "step": 3370 }, { "epoch": 0.5715981348028826, "grad_norm": 0.9288222010534315, "learning_rate": 8.175844667007191e-06, "loss": 0.9703, "step": 3371 }, { "epoch": 0.5717676981771938, "grad_norm": 0.9664087517987092, "learning_rate": 8.170444799875827e-06, "loss": 0.922, "step": 3372 }, { "epoch": 0.5719372615515049, "grad_norm": 0.9926995136485463, "learning_rate": 8.16504548463531e-06, "loss": 0.98, "step": 3373 }, { "epoch": 0.572106824925816, "grad_norm": 0.9347949466978872, "learning_rate": 8.159646722914368e-06, "loss": 0.9647, "step": 3374 }, { "epoch": 0.5722763883001272, "grad_norm": 0.9186331637886513, "learning_rate": 8.154248516341547e-06, "loss": 0.9234, "step": 3375 }, { "epoch": 0.5724459516744383, "grad_norm": 0.9714013758547416, "learning_rate": 8.148850866545236e-06, "loss": 0.9778, "step": 3376 }, { "epoch": 0.5726155150487495, "grad_norm": 0.9299230138369605, "learning_rate": 8.143453775153646e-06, "loss": 0.9405, "step": 3377 }, { "epoch": 0.5727850784230606, "grad_norm": 1.012276157389143, "learning_rate": 8.138057243794834e-06, "loss": 0.9651, "step": 3378 }, { "epoch": 0.5729546417973718, "grad_norm": 0.962102975451542, "learning_rate": 8.132661274096676e-06, "loss": 0.98, "step": 3379 }, { "epoch": 0.5731242051716829, "grad_norm": 0.9348421325950335, "learning_rate": 8.127265867686884e-06, "loss": 0.9499, "step": 3380 }, { "epoch": 0.5732937685459941, "grad_norm": 0.9279097201506896, "learning_rate": 8.121871026192996e-06, "loss": 0.9339, "step": 3381 }, { "epoch": 0.5734633319203052, "grad_norm": 0.9699226917161088, "learning_rate": 8.116476751242386e-06, "loss": 0.9532, "step": 3382 }, { "epoch": 0.5736328952946164, "grad_norm": 0.8884174528377796, "learning_rate": 8.11108304446225e-06, "loss": 0.9116, "step": 3383 }, { "epoch": 0.5738024586689275, "grad_norm": 0.9705545532168752, "learning_rate": 8.105689907479613e-06, "loss": 0.9845, "step": 3384 }, { "epoch": 0.5739720220432386, "grad_norm": 0.9644779930523806, "learning_rate": 8.100297341921342e-06, "loss": 0.9539, "step": 3385 }, { "epoch": 0.5741415854175498, "grad_norm": 0.9238751569239929, "learning_rate": 8.094905349414111e-06, "loss": 0.9683, "step": 3386 }, { "epoch": 0.574311148791861, "grad_norm": 0.9905563243323852, "learning_rate": 8.089513931584437e-06, "loss": 0.9724, "step": 3387 }, { "epoch": 0.5744807121661721, "grad_norm": 0.9833194998413887, "learning_rate": 8.084123090058646e-06, "loss": 0.9872, "step": 3388 }, { "epoch": 0.5746502755404832, "grad_norm": 1.0046111484536078, "learning_rate": 8.078732826462917e-06, "loss": 0.9712, "step": 3389 }, { "epoch": 0.5748198389147944, "grad_norm": 1.0034997795590312, "learning_rate": 8.07334314242323e-06, "loss": 0.9692, "step": 3390 }, { "epoch": 0.5749894022891056, "grad_norm": 0.9167965029448817, "learning_rate": 8.067954039565402e-06, "loss": 0.9416, "step": 3391 }, { "epoch": 0.5751589656634167, "grad_norm": 0.9312496209230505, "learning_rate": 8.062565519515072e-06, "loss": 0.9652, "step": 3392 }, { "epoch": 0.5753285290377278, "grad_norm": 0.9593204939327644, "learning_rate": 8.057177583897704e-06, "loss": 0.9571, "step": 3393 }, { "epoch": 0.575498092412039, "grad_norm": 0.9164609626221537, "learning_rate": 8.051790234338584e-06, "loss": 0.9214, "step": 3394 }, { "epoch": 0.5756676557863502, "grad_norm": 0.9452808985904894, "learning_rate": 8.046403472462818e-06, "loss": 0.9498, "step": 3395 }, { "epoch": 0.5758372191606613, "grad_norm": 0.956539693736721, "learning_rate": 8.041017299895347e-06, "loss": 0.9547, "step": 3396 }, { "epoch": 0.5760067825349724, "grad_norm": 0.9222433050607953, "learning_rate": 8.035631718260923e-06, "loss": 0.9739, "step": 3397 }, { "epoch": 0.5761763459092836, "grad_norm": 0.9397494698910815, "learning_rate": 8.030246729184124e-06, "loss": 0.8946, "step": 3398 }, { "epoch": 0.5763459092835947, "grad_norm": 0.9744502547093705, "learning_rate": 8.024862334289345e-06, "loss": 0.9309, "step": 3399 }, { "epoch": 0.5765154726579059, "grad_norm": 0.9933423687334322, "learning_rate": 8.019478535200807e-06, "loss": 0.9898, "step": 3400 }, { "epoch": 0.576685036032217, "grad_norm": 0.9519146021318989, "learning_rate": 8.014095333542548e-06, "loss": 0.9677, "step": 3401 }, { "epoch": 0.5768545994065282, "grad_norm": 0.9887510770362908, "learning_rate": 8.008712730938426e-06, "loss": 0.9311, "step": 3402 }, { "epoch": 0.5770241627808393, "grad_norm": 1.0400176262061043, "learning_rate": 8.003330729012124e-06, "loss": 0.9636, "step": 3403 }, { "epoch": 0.5771937261551505, "grad_norm": 1.0136121022209934, "learning_rate": 7.997949329387138e-06, "loss": 0.9027, "step": 3404 }, { "epoch": 0.5773632895294616, "grad_norm": 0.9759125784771253, "learning_rate": 7.992568533686782e-06, "loss": 0.9357, "step": 3405 }, { "epoch": 0.5775328529037728, "grad_norm": 0.9521657987607298, "learning_rate": 7.987188343534184e-06, "loss": 0.9419, "step": 3406 }, { "epoch": 0.5777024162780839, "grad_norm": 0.9482680969680309, "learning_rate": 7.981808760552305e-06, "loss": 0.9525, "step": 3407 }, { "epoch": 0.5778719796523951, "grad_norm": 0.9492045026487005, "learning_rate": 7.976429786363906e-06, "loss": 0.9766, "step": 3408 }, { "epoch": 0.5780415430267062, "grad_norm": 1.057205188112986, "learning_rate": 7.971051422591571e-06, "loss": 0.962, "step": 3409 }, { "epoch": 0.5782111064010174, "grad_norm": 0.7093557363883277, "learning_rate": 7.965673670857702e-06, "loss": 0.825, "step": 3410 }, { "epoch": 0.5783806697753285, "grad_norm": 1.0249844179693937, "learning_rate": 7.960296532784515e-06, "loss": 0.973, "step": 3411 }, { "epoch": 0.5785502331496397, "grad_norm": 0.9710465336548872, "learning_rate": 7.954920009994035e-06, "loss": 0.9474, "step": 3412 }, { "epoch": 0.5787197965239508, "grad_norm": 0.9064305958036404, "learning_rate": 7.949544104108107e-06, "loss": 0.9525, "step": 3413 }, { "epoch": 0.578889359898262, "grad_norm": 0.9745465837372795, "learning_rate": 7.944168816748396e-06, "loss": 0.947, "step": 3414 }, { "epoch": 0.5790589232725731, "grad_norm": 0.933589251932011, "learning_rate": 7.938794149536367e-06, "loss": 0.9398, "step": 3415 }, { "epoch": 0.5792284866468843, "grad_norm": 0.9811964817017039, "learning_rate": 7.933420104093308e-06, "loss": 0.9952, "step": 3416 }, { "epoch": 0.5793980500211954, "grad_norm": 0.9260992005918665, "learning_rate": 7.928046682040311e-06, "loss": 0.9504, "step": 3417 }, { "epoch": 0.5795676133955066, "grad_norm": 1.0129467373354948, "learning_rate": 7.922673884998291e-06, "loss": 0.9887, "step": 3418 }, { "epoch": 0.5797371767698177, "grad_norm": 0.9490030311047655, "learning_rate": 7.917301714587968e-06, "loss": 0.9905, "step": 3419 }, { "epoch": 0.5799067401441289, "grad_norm": 0.9402054243692838, "learning_rate": 7.911930172429865e-06, "loss": 0.9455, "step": 3420 }, { "epoch": 0.58007630351844, "grad_norm": 0.9219371791575302, "learning_rate": 7.906559260144336e-06, "loss": 0.9317, "step": 3421 }, { "epoch": 0.5802458668927511, "grad_norm": 0.9109996257957591, "learning_rate": 7.901188979351527e-06, "loss": 0.9678, "step": 3422 }, { "epoch": 0.5804154302670623, "grad_norm": 1.0039971883743508, "learning_rate": 7.895819331671399e-06, "loss": 0.9584, "step": 3423 }, { "epoch": 0.5805849936413735, "grad_norm": 0.9984601258797432, "learning_rate": 7.890450318723719e-06, "loss": 0.9436, "step": 3424 }, { "epoch": 0.5807545570156846, "grad_norm": 0.9748430329698677, "learning_rate": 7.885081942128074e-06, "loss": 0.9026, "step": 3425 }, { "epoch": 0.5809241203899957, "grad_norm": 0.9784752790240956, "learning_rate": 7.879714203503848e-06, "loss": 0.9627, "step": 3426 }, { "epoch": 0.5810936837643069, "grad_norm": 0.9166479974176474, "learning_rate": 7.874347104470234e-06, "loss": 0.9336, "step": 3427 }, { "epoch": 0.5812632471386181, "grad_norm": 0.971997209431258, "learning_rate": 7.868980646646235e-06, "loss": 0.9851, "step": 3428 }, { "epoch": 0.5814328105129292, "grad_norm": 0.9609867211804319, "learning_rate": 7.863614831650658e-06, "loss": 0.9524, "step": 3429 }, { "epoch": 0.5816023738872403, "grad_norm": 0.9772767084510783, "learning_rate": 7.858249661102118e-06, "loss": 0.9761, "step": 3430 }, { "epoch": 0.5817719372615515, "grad_norm": 0.9758343214781775, "learning_rate": 7.852885136619031e-06, "loss": 0.8919, "step": 3431 }, { "epoch": 0.5819415006358627, "grad_norm": 0.6287788493208274, "learning_rate": 7.84752125981963e-06, "loss": 0.7989, "step": 3432 }, { "epoch": 0.5821110640101738, "grad_norm": 0.9456333877649048, "learning_rate": 7.84215803232194e-06, "loss": 0.9407, "step": 3433 }, { "epoch": 0.5822806273844849, "grad_norm": 0.997972350404109, "learning_rate": 7.836795455743796e-06, "loss": 0.9647, "step": 3434 }, { "epoch": 0.5824501907587961, "grad_norm": 1.0016519106105044, "learning_rate": 7.831433531702831e-06, "loss": 0.9346, "step": 3435 }, { "epoch": 0.5826197541331073, "grad_norm": 0.936976443789542, "learning_rate": 7.826072261816493e-06, "loss": 0.9508, "step": 3436 }, { "epoch": 0.5827893175074184, "grad_norm": 0.9697365757415702, "learning_rate": 7.820711647702017e-06, "loss": 0.9353, "step": 3437 }, { "epoch": 0.5829588808817295, "grad_norm": 0.9582611589976441, "learning_rate": 7.815351690976455e-06, "loss": 0.9516, "step": 3438 }, { "epoch": 0.5831284442560407, "grad_norm": 0.9977573591518735, "learning_rate": 7.809992393256653e-06, "loss": 0.9703, "step": 3439 }, { "epoch": 0.5832980076303519, "grad_norm": 1.023846616194263, "learning_rate": 7.804633756159258e-06, "loss": 0.9761, "step": 3440 }, { "epoch": 0.583467571004663, "grad_norm": 0.6184670480474209, "learning_rate": 7.79927578130072e-06, "loss": 0.7731, "step": 3441 }, { "epoch": 0.5836371343789741, "grad_norm": 0.9555614786776051, "learning_rate": 7.793918470297284e-06, "loss": 0.9561, "step": 3442 }, { "epoch": 0.5838066977532853, "grad_norm": 0.9161918809795976, "learning_rate": 7.788561824765007e-06, "loss": 0.929, "step": 3443 }, { "epoch": 0.5839762611275965, "grad_norm": 0.6148405042378875, "learning_rate": 7.783205846319731e-06, "loss": 0.7935, "step": 3444 }, { "epoch": 0.5841458245019076, "grad_norm": 1.0399815244172013, "learning_rate": 7.777850536577104e-06, "loss": 0.9689, "step": 3445 }, { "epoch": 0.5843153878762187, "grad_norm": 0.9470515943664412, "learning_rate": 7.772495897152575e-06, "loss": 0.9532, "step": 3446 }, { "epoch": 0.5844849512505299, "grad_norm": 0.9651051186509068, "learning_rate": 7.767141929661383e-06, "loss": 0.9617, "step": 3447 }, { "epoch": 0.5846545146248411, "grad_norm": 0.9841204194037927, "learning_rate": 7.761788635718572e-06, "loss": 0.9371, "step": 3448 }, { "epoch": 0.5848240779991521, "grad_norm": 0.9729482614068703, "learning_rate": 7.756436016938973e-06, "loss": 0.9775, "step": 3449 }, { "epoch": 0.5849936413734633, "grad_norm": 0.9993275872288293, "learning_rate": 7.751084074937226e-06, "loss": 0.963, "step": 3450 }, { "epoch": 0.5851632047477745, "grad_norm": 0.9888430517624435, "learning_rate": 7.74573281132776e-06, "loss": 0.9536, "step": 3451 }, { "epoch": 0.5853327681220857, "grad_norm": 0.9675483848533789, "learning_rate": 7.740382227724795e-06, "loss": 0.9654, "step": 3452 }, { "epoch": 0.5855023314963967, "grad_norm": 0.99293755890993, "learning_rate": 7.735032325742355e-06, "loss": 0.9735, "step": 3453 }, { "epoch": 0.5856718948707079, "grad_norm": 0.9298116805603497, "learning_rate": 7.729683106994256e-06, "loss": 0.9336, "step": 3454 }, { "epoch": 0.5858414582450191, "grad_norm": 0.9525145153091052, "learning_rate": 7.724334573094101e-06, "loss": 0.9424, "step": 3455 }, { "epoch": 0.5860110216193303, "grad_norm": 0.9654867587034026, "learning_rate": 7.718986725655293e-06, "loss": 0.9431, "step": 3456 }, { "epoch": 0.5861805849936413, "grad_norm": 0.6362319554138335, "learning_rate": 7.713639566291028e-06, "loss": 0.7702, "step": 3457 }, { "epoch": 0.5863501483679525, "grad_norm": 0.9777832807707518, "learning_rate": 7.70829309661429e-06, "loss": 0.9575, "step": 3458 }, { "epoch": 0.5865197117422637, "grad_norm": 0.9572424355479278, "learning_rate": 7.702947318237862e-06, "loss": 0.9428, "step": 3459 }, { "epoch": 0.5866892751165749, "grad_norm": 0.9440638464761222, "learning_rate": 7.697602232774304e-06, "loss": 0.9681, "step": 3460 }, { "epoch": 0.5868588384908859, "grad_norm": 0.9874135855687968, "learning_rate": 7.692257841835992e-06, "loss": 0.98, "step": 3461 }, { "epoch": 0.5870284018651971, "grad_norm": 0.9489967459339086, "learning_rate": 7.686914147035068e-06, "loss": 0.9423, "step": 3462 }, { "epoch": 0.5871979652395083, "grad_norm": 0.9300036218715391, "learning_rate": 7.681571149983475e-06, "loss": 0.9684, "step": 3463 }, { "epoch": 0.5873675286138195, "grad_norm": 0.9151042231783431, "learning_rate": 7.676228852292947e-06, "loss": 0.9241, "step": 3464 }, { "epoch": 0.5875370919881305, "grad_norm": 0.9633808506965221, "learning_rate": 7.670887255575003e-06, "loss": 0.9209, "step": 3465 }, { "epoch": 0.5877066553624417, "grad_norm": 0.9696190858478899, "learning_rate": 7.66554636144095e-06, "loss": 0.9137, "step": 3466 }, { "epoch": 0.5878762187367529, "grad_norm": 0.9478376336384198, "learning_rate": 7.660206171501881e-06, "loss": 0.9848, "step": 3467 }, { "epoch": 0.5880457821110641, "grad_norm": 0.9273514930199466, "learning_rate": 7.65486668736869e-06, "loss": 0.9107, "step": 3468 }, { "epoch": 0.5882153454853751, "grad_norm": 0.9650211704347943, "learning_rate": 7.649527910652044e-06, "loss": 0.9428, "step": 3469 }, { "epoch": 0.5883849088596863, "grad_norm": 0.9620438342023812, "learning_rate": 7.644189842962399e-06, "loss": 0.9055, "step": 3470 }, { "epoch": 0.5885544722339975, "grad_norm": 0.9653572004487774, "learning_rate": 7.638852485910002e-06, "loss": 0.9491, "step": 3471 }, { "epoch": 0.5887240356083087, "grad_norm": 0.9812444176795981, "learning_rate": 7.633515841104884e-06, "loss": 0.9763, "step": 3472 }, { "epoch": 0.5888935989826197, "grad_norm": 1.0119365013908592, "learning_rate": 7.628179910156859e-06, "loss": 0.9836, "step": 3473 }, { "epoch": 0.5890631623569309, "grad_norm": 0.9968362551201371, "learning_rate": 7.622844694675522e-06, "loss": 0.9872, "step": 3474 }, { "epoch": 0.5892327257312421, "grad_norm": 0.9829620008445998, "learning_rate": 7.6175101962702624e-06, "loss": 0.965, "step": 3475 }, { "epoch": 0.5894022891055531, "grad_norm": 0.9886391980251944, "learning_rate": 7.6121764165502476e-06, "loss": 0.9147, "step": 3476 }, { "epoch": 0.5895718524798643, "grad_norm": 0.9657999227095531, "learning_rate": 7.606843357124426e-06, "loss": 0.9842, "step": 3477 }, { "epoch": 0.5897414158541755, "grad_norm": 0.9317736167987475, "learning_rate": 7.6015110196015275e-06, "loss": 0.9195, "step": 3478 }, { "epoch": 0.5899109792284867, "grad_norm": 0.9517615658817935, "learning_rate": 7.596179405590076e-06, "loss": 0.9352, "step": 3479 }, { "epoch": 0.5900805426027977, "grad_norm": 0.9672015545296544, "learning_rate": 7.590848516698366e-06, "loss": 0.9606, "step": 3480 }, { "epoch": 0.5902501059771089, "grad_norm": 0.9943623372344311, "learning_rate": 7.585518354534473e-06, "loss": 0.9884, "step": 3481 }, { "epoch": 0.5904196693514201, "grad_norm": 1.0058224721607767, "learning_rate": 7.580188920706261e-06, "loss": 0.9779, "step": 3482 }, { "epoch": 0.5905892327257313, "grad_norm": 0.9646446057400887, "learning_rate": 7.574860216821367e-06, "loss": 0.9615, "step": 3483 }, { "epoch": 0.5907587961000423, "grad_norm": 0.9243343781309876, "learning_rate": 7.569532244487212e-06, "loss": 0.9081, "step": 3484 }, { "epoch": 0.5909283594743535, "grad_norm": 0.9910137168879887, "learning_rate": 7.56420500531099e-06, "loss": 0.9756, "step": 3485 }, { "epoch": 0.5910979228486647, "grad_norm": 0.939263716384389, "learning_rate": 7.558878500899687e-06, "loss": 0.9272, "step": 3486 }, { "epoch": 0.5912674862229759, "grad_norm": 0.9931354201892537, "learning_rate": 7.5535527328600544e-06, "loss": 1.0047, "step": 3487 }, { "epoch": 0.5914370495972869, "grad_norm": 0.9180442723042967, "learning_rate": 7.548227702798624e-06, "loss": 0.9387, "step": 3488 }, { "epoch": 0.5916066129715981, "grad_norm": 0.9446904704706182, "learning_rate": 7.542903412321714e-06, "loss": 0.9478, "step": 3489 }, { "epoch": 0.5917761763459093, "grad_norm": 0.9792374085693535, "learning_rate": 7.537579863035409e-06, "loss": 0.9573, "step": 3490 }, { "epoch": 0.5919457397202205, "grad_norm": 0.9537974691365961, "learning_rate": 7.532257056545573e-06, "loss": 0.9547, "step": 3491 }, { "epoch": 0.5921153030945315, "grad_norm": 0.9471769078208534, "learning_rate": 7.5269349944578454e-06, "loss": 0.9895, "step": 3492 }, { "epoch": 0.5922848664688427, "grad_norm": 0.9785249941487371, "learning_rate": 7.521613678377646e-06, "loss": 0.9488, "step": 3493 }, { "epoch": 0.5924544298431539, "grad_norm": 0.9366792912542713, "learning_rate": 7.516293109910165e-06, "loss": 0.9615, "step": 3494 }, { "epoch": 0.5926239932174651, "grad_norm": 0.9408330456569416, "learning_rate": 7.510973290660366e-06, "loss": 0.9313, "step": 3495 }, { "epoch": 0.5927935565917761, "grad_norm": 0.9473936256698582, "learning_rate": 7.505654222232985e-06, "loss": 0.9237, "step": 3496 }, { "epoch": 0.5929631199660873, "grad_norm": 0.9360998211765088, "learning_rate": 7.500335906232544e-06, "loss": 0.9441, "step": 3497 }, { "epoch": 0.5931326833403985, "grad_norm": 0.9642416803983946, "learning_rate": 7.4950183442633255e-06, "loss": 0.9202, "step": 3498 }, { "epoch": 0.5933022467147097, "grad_norm": 1.0380122274660852, "learning_rate": 7.489701537929384e-06, "loss": 0.9701, "step": 3499 }, { "epoch": 0.5934718100890207, "grad_norm": 0.9693441981521809, "learning_rate": 7.484385488834556e-06, "loss": 0.9402, "step": 3500 }, { "epoch": 0.5936413734633319, "grad_norm": 1.0057653995038704, "learning_rate": 7.479070198582441e-06, "loss": 1.0082, "step": 3501 }, { "epoch": 0.5938109368376431, "grad_norm": 0.9763316773725886, "learning_rate": 7.473755668776413e-06, "loss": 0.9721, "step": 3502 }, { "epoch": 0.5939805002119543, "grad_norm": 0.9376039149867716, "learning_rate": 7.468441901019612e-06, "loss": 0.9705, "step": 3503 }, { "epoch": 0.5941500635862653, "grad_norm": 0.9308121175258789, "learning_rate": 7.463128896914958e-06, "loss": 0.9544, "step": 3504 }, { "epoch": 0.5943196269605765, "grad_norm": 0.9196473565331038, "learning_rate": 7.4578166580651335e-06, "loss": 0.979, "step": 3505 }, { "epoch": 0.5944891903348877, "grad_norm": 0.9379661272557751, "learning_rate": 7.452505186072585e-06, "loss": 0.9056, "step": 3506 }, { "epoch": 0.5946587537091989, "grad_norm": 0.9553036761263827, "learning_rate": 7.447194482539544e-06, "loss": 0.9636, "step": 3507 }, { "epoch": 0.5948283170835099, "grad_norm": 0.947992886391597, "learning_rate": 7.441884549067994e-06, "loss": 0.9732, "step": 3508 }, { "epoch": 0.5949978804578211, "grad_norm": 0.9128820994683076, "learning_rate": 7.436575387259697e-06, "loss": 0.9295, "step": 3509 }, { "epoch": 0.5951674438321323, "grad_norm": 0.9321470900697977, "learning_rate": 7.431266998716171e-06, "loss": 0.9677, "step": 3510 }, { "epoch": 0.5953370072064434, "grad_norm": 0.94120561993968, "learning_rate": 7.425959385038714e-06, "loss": 0.9341, "step": 3511 }, { "epoch": 0.5955065705807545, "grad_norm": 0.9798798426240755, "learning_rate": 7.4206525478283795e-06, "loss": 0.9882, "step": 3512 }, { "epoch": 0.5956761339550657, "grad_norm": 0.9487395203915984, "learning_rate": 7.4153464886859925e-06, "loss": 0.953, "step": 3513 }, { "epoch": 0.5958456973293769, "grad_norm": 0.9050942153911852, "learning_rate": 7.410041209212138e-06, "loss": 0.8542, "step": 3514 }, { "epoch": 0.596015260703688, "grad_norm": 0.9571600132591642, "learning_rate": 7.404736711007176e-06, "loss": 0.9751, "step": 3515 }, { "epoch": 0.5961848240779991, "grad_norm": 0.9626870225768668, "learning_rate": 7.399432995671223e-06, "loss": 0.9102, "step": 3516 }, { "epoch": 0.5963543874523103, "grad_norm": 0.9577868520563464, "learning_rate": 7.394130064804157e-06, "loss": 0.9327, "step": 3517 }, { "epoch": 0.5965239508266215, "grad_norm": 0.6220022210084684, "learning_rate": 7.388827920005628e-06, "loss": 0.7842, "step": 3518 }, { "epoch": 0.5966935142009326, "grad_norm": 1.002680295034151, "learning_rate": 7.383526562875041e-06, "loss": 0.9499, "step": 3519 }, { "epoch": 0.5968630775752437, "grad_norm": 0.700044317387652, "learning_rate": 7.378225995011566e-06, "loss": 0.8501, "step": 3520 }, { "epoch": 0.5970326409495549, "grad_norm": 1.0155864390506473, "learning_rate": 7.372926218014131e-06, "loss": 0.9637, "step": 3521 }, { "epoch": 0.5972022043238661, "grad_norm": 0.9995842492263824, "learning_rate": 7.36762723348144e-06, "loss": 0.9321, "step": 3522 }, { "epoch": 0.5973717676981772, "grad_norm": 0.9964402228277096, "learning_rate": 7.362329043011942e-06, "loss": 0.9628, "step": 3523 }, { "epoch": 0.5975413310724883, "grad_norm": 0.973107619583477, "learning_rate": 7.357031648203849e-06, "loss": 0.9173, "step": 3524 }, { "epoch": 0.5977108944467995, "grad_norm": 0.9504310158845471, "learning_rate": 7.3517350506551446e-06, "loss": 0.9759, "step": 3525 }, { "epoch": 0.5978804578211107, "grad_norm": 0.9288831038605503, "learning_rate": 7.3464392519635574e-06, "loss": 0.9298, "step": 3526 }, { "epoch": 0.5980500211954218, "grad_norm": 0.9407148834422668, "learning_rate": 7.341144253726583e-06, "loss": 0.9067, "step": 3527 }, { "epoch": 0.5982195845697329, "grad_norm": 0.6368165589102066, "learning_rate": 7.335850057541471e-06, "loss": 0.8465, "step": 3528 }, { "epoch": 0.5983891479440441, "grad_norm": 1.03598637777532, "learning_rate": 7.330556665005235e-06, "loss": 0.9748, "step": 3529 }, { "epoch": 0.5985587113183553, "grad_norm": 0.9470462007610989, "learning_rate": 7.32526407771464e-06, "loss": 0.9817, "step": 3530 }, { "epoch": 0.5987282746926664, "grad_norm": 0.9543173026138349, "learning_rate": 7.319972297266215e-06, "loss": 0.9706, "step": 3531 }, { "epoch": 0.5988978380669775, "grad_norm": 0.9836449557475946, "learning_rate": 7.314681325256232e-06, "loss": 0.9891, "step": 3532 }, { "epoch": 0.5990674014412887, "grad_norm": 0.9640791457136666, "learning_rate": 7.3093911632807415e-06, "loss": 0.9256, "step": 3533 }, { "epoch": 0.5992369648155998, "grad_norm": 0.886457298695005, "learning_rate": 7.304101812935531e-06, "loss": 0.9149, "step": 3534 }, { "epoch": 0.599406528189911, "grad_norm": 0.6402390491174929, "learning_rate": 7.298813275816144e-06, "loss": 0.8033, "step": 3535 }, { "epoch": 0.5995760915642221, "grad_norm": 1.0605793596147146, "learning_rate": 7.2935255535178924e-06, "loss": 0.9962, "step": 3536 }, { "epoch": 0.5997456549385333, "grad_norm": 0.9758429926682303, "learning_rate": 7.2882386476358304e-06, "loss": 0.9599, "step": 3537 }, { "epoch": 0.5999152183128444, "grad_norm": 0.92878277910036, "learning_rate": 7.282952559764769e-06, "loss": 0.9392, "step": 3538 }, { "epoch": 0.6000847816871556, "grad_norm": 0.9773764245492319, "learning_rate": 7.277667291499268e-06, "loss": 0.9404, "step": 3539 }, { "epoch": 0.6002543450614667, "grad_norm": 0.9822468601188052, "learning_rate": 7.272382844433653e-06, "loss": 0.9564, "step": 3540 }, { "epoch": 0.6004239084357779, "grad_norm": 0.9603553481852201, "learning_rate": 7.267099220161989e-06, "loss": 0.9267, "step": 3541 }, { "epoch": 0.600593471810089, "grad_norm": 0.946922397392502, "learning_rate": 7.2618164202780914e-06, "loss": 0.9798, "step": 3542 }, { "epoch": 0.6007630351844002, "grad_norm": 1.0062277905823154, "learning_rate": 7.256534446375543e-06, "loss": 0.968, "step": 3543 }, { "epoch": 0.6009325985587113, "grad_norm": 0.9764568237814507, "learning_rate": 7.2512533000476625e-06, "loss": 0.9352, "step": 3544 }, { "epoch": 0.6011021619330225, "grad_norm": 0.9952745038344479, "learning_rate": 7.2459729828875256e-06, "loss": 0.9283, "step": 3545 }, { "epoch": 0.6012717253073336, "grad_norm": 1.0003705071359748, "learning_rate": 7.24069349648795e-06, "loss": 1.0048, "step": 3546 }, { "epoch": 0.6014412886816448, "grad_norm": 0.9653270068319545, "learning_rate": 7.235414842441517e-06, "loss": 0.9687, "step": 3547 }, { "epoch": 0.6016108520559559, "grad_norm": 1.008970256053457, "learning_rate": 7.230137022340542e-06, "loss": 0.9409, "step": 3548 }, { "epoch": 0.6017804154302671, "grad_norm": 0.955598642345728, "learning_rate": 7.224860037777095e-06, "loss": 0.9455, "step": 3549 }, { "epoch": 0.6019499788045782, "grad_norm": 0.9643830385379727, "learning_rate": 7.219583890343003e-06, "loss": 0.9712, "step": 3550 }, { "epoch": 0.6021195421788894, "grad_norm": 0.9718318222999374, "learning_rate": 7.2143085816298234e-06, "loss": 0.9707, "step": 3551 }, { "epoch": 0.6022891055532005, "grad_norm": 0.9547241384819419, "learning_rate": 7.209034113228872e-06, "loss": 0.9348, "step": 3552 }, { "epoch": 0.6024586689275117, "grad_norm": 0.9400110782529516, "learning_rate": 7.203760486731204e-06, "loss": 0.9085, "step": 3553 }, { "epoch": 0.6026282323018228, "grad_norm": 0.989651690332263, "learning_rate": 7.198487703727632e-06, "loss": 0.982, "step": 3554 }, { "epoch": 0.602797795676134, "grad_norm": 0.9426314691646828, "learning_rate": 7.193215765808703e-06, "loss": 0.9051, "step": 3555 }, { "epoch": 0.6029673590504451, "grad_norm": 1.0109609886761874, "learning_rate": 7.1879446745647155e-06, "loss": 0.9754, "step": 3556 }, { "epoch": 0.6031369224247562, "grad_norm": 0.9481125813550529, "learning_rate": 7.182674431585703e-06, "loss": 0.9607, "step": 3557 }, { "epoch": 0.6033064857990674, "grad_norm": 0.9624245788500905, "learning_rate": 7.177405038461459e-06, "loss": 0.9622, "step": 3558 }, { "epoch": 0.6034760491733786, "grad_norm": 0.9577243135413294, "learning_rate": 7.172136496781508e-06, "loss": 0.9662, "step": 3559 }, { "epoch": 0.6036456125476897, "grad_norm": 0.9569751644935419, "learning_rate": 7.1668688081351164e-06, "loss": 0.9521, "step": 3560 }, { "epoch": 0.6038151759220008, "grad_norm": 0.9360734253227487, "learning_rate": 7.161601974111308e-06, "loss": 0.9282, "step": 3561 }, { "epoch": 0.603984739296312, "grad_norm": 0.674245913055902, "learning_rate": 7.156335996298834e-06, "loss": 0.8913, "step": 3562 }, { "epoch": 0.6041543026706232, "grad_norm": 0.9182468337016708, "learning_rate": 7.1510708762861945e-06, "loss": 0.885, "step": 3563 }, { "epoch": 0.6043238660449343, "grad_norm": 0.9464453060337928, "learning_rate": 7.1458066156616244e-06, "loss": 0.9446, "step": 3564 }, { "epoch": 0.6044934294192454, "grad_norm": 0.9718076567862831, "learning_rate": 7.140543216013109e-06, "loss": 0.963, "step": 3565 }, { "epoch": 0.6046629927935566, "grad_norm": 0.9878707271699002, "learning_rate": 7.1352806789283664e-06, "loss": 0.9619, "step": 3566 }, { "epoch": 0.6048325561678677, "grad_norm": 0.9524419406485947, "learning_rate": 7.1300190059948535e-06, "loss": 0.9449, "step": 3567 }, { "epoch": 0.6050021195421789, "grad_norm": 0.9651545667980798, "learning_rate": 7.124758198799777e-06, "loss": 0.9486, "step": 3568 }, { "epoch": 0.60517168291649, "grad_norm": 0.9105105348367812, "learning_rate": 7.119498258930073e-06, "loss": 0.926, "step": 3569 }, { "epoch": 0.6053412462908012, "grad_norm": 0.9476184587646148, "learning_rate": 7.114239187972416e-06, "loss": 0.9442, "step": 3570 }, { "epoch": 0.6055108096651123, "grad_norm": 0.9779200212275795, "learning_rate": 7.108980987513216e-06, "loss": 0.9178, "step": 3571 }, { "epoch": 0.6056803730394235, "grad_norm": 0.9133960221742026, "learning_rate": 7.103723659138636e-06, "loss": 0.9303, "step": 3572 }, { "epoch": 0.6058499364137346, "grad_norm": 0.9901965390411374, "learning_rate": 7.098467204434559e-06, "loss": 0.9531, "step": 3573 }, { "epoch": 0.6060194997880458, "grad_norm": 0.6228061685981762, "learning_rate": 7.093211624986611e-06, "loss": 0.8017, "step": 3574 }, { "epoch": 0.6061890631623569, "grad_norm": 0.9500725921351076, "learning_rate": 7.0879569223801526e-06, "loss": 0.9554, "step": 3575 }, { "epoch": 0.606358626536668, "grad_norm": 1.0305642970489943, "learning_rate": 7.082703098200282e-06, "loss": 0.9632, "step": 3576 }, { "epoch": 0.6065281899109792, "grad_norm": 0.9367878114866866, "learning_rate": 7.0774501540318305e-06, "loss": 0.9479, "step": 3577 }, { "epoch": 0.6066977532852904, "grad_norm": 0.9729790581374097, "learning_rate": 7.072198091459361e-06, "loss": 0.9272, "step": 3578 }, { "epoch": 0.6068673166596015, "grad_norm": 0.9644565090309974, "learning_rate": 7.0669469120671815e-06, "loss": 0.927, "step": 3579 }, { "epoch": 0.6070368800339127, "grad_norm": 0.9970790373118575, "learning_rate": 7.061696617439323e-06, "loss": 0.958, "step": 3580 }, { "epoch": 0.6072064434082238, "grad_norm": 0.9746233992125029, "learning_rate": 7.056447209159552e-06, "loss": 0.9373, "step": 3581 }, { "epoch": 0.607376006782535, "grad_norm": 0.9977839208095215, "learning_rate": 7.051198688811366e-06, "loss": 0.9699, "step": 3582 }, { "epoch": 0.6075455701568461, "grad_norm": 0.9846190006746545, "learning_rate": 7.045951057978001e-06, "loss": 0.9312, "step": 3583 }, { "epoch": 0.6077151335311572, "grad_norm": 0.9778400612359939, "learning_rate": 7.040704318242419e-06, "loss": 0.968, "step": 3584 }, { "epoch": 0.6078846969054684, "grad_norm": 0.9940171253107976, "learning_rate": 7.035458471187312e-06, "loss": 0.9526, "step": 3585 }, { "epoch": 0.6080542602797796, "grad_norm": 0.9741839062136622, "learning_rate": 7.030213518395112e-06, "loss": 0.9508, "step": 3586 }, { "epoch": 0.6082238236540907, "grad_norm": 0.9635571456769719, "learning_rate": 7.024969461447973e-06, "loss": 0.8941, "step": 3587 }, { "epoch": 0.6083933870284018, "grad_norm": 0.890560656184682, "learning_rate": 7.019726301927776e-06, "loss": 0.9563, "step": 3588 }, { "epoch": 0.608562950402713, "grad_norm": 0.9266934141860115, "learning_rate": 7.014484041416137e-06, "loss": 0.9279, "step": 3589 }, { "epoch": 0.6087325137770242, "grad_norm": 0.9975322238544472, "learning_rate": 7.0092426814944045e-06, "loss": 0.9813, "step": 3590 }, { "epoch": 0.6089020771513353, "grad_norm": 0.9983275311633019, "learning_rate": 7.004002223743649e-06, "loss": 0.9249, "step": 3591 }, { "epoch": 0.6090716405256464, "grad_norm": 0.9586797655327807, "learning_rate": 6.998762669744668e-06, "loss": 0.9063, "step": 3592 }, { "epoch": 0.6092412038999576, "grad_norm": 0.9771303178769589, "learning_rate": 6.993524021077989e-06, "loss": 0.9641, "step": 3593 }, { "epoch": 0.6094107672742688, "grad_norm": 0.9108438938382009, "learning_rate": 6.9882862793238685e-06, "loss": 0.8488, "step": 3594 }, { "epoch": 0.6095803306485799, "grad_norm": 1.006793036955104, "learning_rate": 6.983049446062285e-06, "loss": 0.8998, "step": 3595 }, { "epoch": 0.609749894022891, "grad_norm": 0.9408193042652288, "learning_rate": 6.977813522872943e-06, "loss": 0.9686, "step": 3596 }, { "epoch": 0.6099194573972022, "grad_norm": 0.9272812594944455, "learning_rate": 6.97257851133528e-06, "loss": 0.9274, "step": 3597 }, { "epoch": 0.6100890207715134, "grad_norm": 0.7234740418392058, "learning_rate": 6.967344413028452e-06, "loss": 0.8403, "step": 3598 }, { "epoch": 0.6102585841458245, "grad_norm": 0.9991221948741854, "learning_rate": 6.962111229531337e-06, "loss": 0.9329, "step": 3599 }, { "epoch": 0.6104281475201356, "grad_norm": 1.04734634364071, "learning_rate": 6.9568789624225415e-06, "loss": 0.9653, "step": 3600 }, { "epoch": 0.6105977108944468, "grad_norm": 1.0261358786311874, "learning_rate": 6.951647613280397e-06, "loss": 0.9523, "step": 3601 }, { "epoch": 0.610767274268758, "grad_norm": 0.9952531994056946, "learning_rate": 6.946417183682955e-06, "loss": 0.9452, "step": 3602 }, { "epoch": 0.610936837643069, "grad_norm": 0.6898001366740375, "learning_rate": 6.9411876752079856e-06, "loss": 0.8044, "step": 3603 }, { "epoch": 0.6111064010173802, "grad_norm": 1.008820707094296, "learning_rate": 6.935959089432995e-06, "loss": 0.9821, "step": 3604 }, { "epoch": 0.6112759643916914, "grad_norm": 1.0293091512956585, "learning_rate": 6.930731427935196e-06, "loss": 0.9675, "step": 3605 }, { "epoch": 0.6114455277660026, "grad_norm": 0.9386580613482086, "learning_rate": 6.925504692291529e-06, "loss": 0.9369, "step": 3606 }, { "epoch": 0.6116150911403136, "grad_norm": 0.9274281634066788, "learning_rate": 6.920278884078652e-06, "loss": 0.9908, "step": 3607 }, { "epoch": 0.6117846545146248, "grad_norm": 0.9620406217875858, "learning_rate": 6.915054004872952e-06, "loss": 0.9485, "step": 3608 }, { "epoch": 0.611954217888936, "grad_norm": 0.9241775384908139, "learning_rate": 6.909830056250527e-06, "loss": 0.9171, "step": 3609 }, { "epoch": 0.6121237812632472, "grad_norm": 0.9545577391552628, "learning_rate": 6.904607039787197e-06, "loss": 0.9406, "step": 3610 }, { "epoch": 0.6122933446375582, "grad_norm": 0.9870401069288942, "learning_rate": 6.899384957058496e-06, "loss": 0.9617, "step": 3611 }, { "epoch": 0.6124629080118694, "grad_norm": 0.9223329815743532, "learning_rate": 6.894163809639688e-06, "loss": 0.9266, "step": 3612 }, { "epoch": 0.6126324713861806, "grad_norm": 0.9754436645170015, "learning_rate": 6.888943599105745e-06, "loss": 0.9728, "step": 3613 }, { "epoch": 0.6128020347604918, "grad_norm": 1.0075120743778265, "learning_rate": 6.883724327031355e-06, "loss": 0.9781, "step": 3614 }, { "epoch": 0.6129715981348028, "grad_norm": 0.9641327253741406, "learning_rate": 6.878505994990935e-06, "loss": 0.9183, "step": 3615 }, { "epoch": 0.613141161509114, "grad_norm": 0.9610536889133187, "learning_rate": 6.873288604558608e-06, "loss": 0.9811, "step": 3616 }, { "epoch": 0.6133107248834252, "grad_norm": 0.9535351515072967, "learning_rate": 6.868072157308213e-06, "loss": 0.9441, "step": 3617 }, { "epoch": 0.6134802882577364, "grad_norm": 0.9000106944239568, "learning_rate": 6.862856654813308e-06, "loss": 0.9307, "step": 3618 }, { "epoch": 0.6136498516320474, "grad_norm": 1.0117360581842432, "learning_rate": 6.857642098647165e-06, "loss": 0.9564, "step": 3619 }, { "epoch": 0.6138194150063586, "grad_norm": 0.9827183689143366, "learning_rate": 6.852428490382773e-06, "loss": 0.9955, "step": 3620 }, { "epoch": 0.6139889783806698, "grad_norm": 0.9272703313445352, "learning_rate": 6.84721583159283e-06, "loss": 0.9871, "step": 3621 }, { "epoch": 0.614158541754981, "grad_norm": 0.9183923895907284, "learning_rate": 6.8420041238497525e-06, "loss": 0.9085, "step": 3622 }, { "epoch": 0.614328105129292, "grad_norm": 0.6123462227947196, "learning_rate": 6.836793368725666e-06, "loss": 0.771, "step": 3623 }, { "epoch": 0.6144976685036032, "grad_norm": 0.9668384833623501, "learning_rate": 6.831583567792411e-06, "loss": 0.917, "step": 3624 }, { "epoch": 0.6146672318779144, "grad_norm": 0.9568118178707554, "learning_rate": 6.826374722621536e-06, "loss": 0.9616, "step": 3625 }, { "epoch": 0.6148367952522256, "grad_norm": 0.9586749034607601, "learning_rate": 6.821166834784314e-06, "loss": 0.9443, "step": 3626 }, { "epoch": 0.6150063586265366, "grad_norm": 0.9338314815642756, "learning_rate": 6.815959905851715e-06, "loss": 0.978, "step": 3627 }, { "epoch": 0.6151759220008478, "grad_norm": 0.9624952336640717, "learning_rate": 6.810753937394423e-06, "loss": 0.974, "step": 3628 }, { "epoch": 0.615345485375159, "grad_norm": 0.9289002363802685, "learning_rate": 6.805548930982832e-06, "loss": 0.9454, "step": 3629 }, { "epoch": 0.6155150487494702, "grad_norm": 0.9173123578164978, "learning_rate": 6.800344888187057e-06, "loss": 0.9547, "step": 3630 }, { "epoch": 0.6156846121237812, "grad_norm": 0.9304488996276987, "learning_rate": 6.795141810576906e-06, "loss": 0.9527, "step": 3631 }, { "epoch": 0.6158541754980924, "grad_norm": 0.9496860226046983, "learning_rate": 6.789939699721902e-06, "loss": 0.968, "step": 3632 }, { "epoch": 0.6160237388724036, "grad_norm": 0.9856173160167246, "learning_rate": 6.784738557191284e-06, "loss": 0.9762, "step": 3633 }, { "epoch": 0.6161933022467148, "grad_norm": 0.9688039043331194, "learning_rate": 6.779538384553989e-06, "loss": 0.9304, "step": 3634 }, { "epoch": 0.6163628656210258, "grad_norm": 0.9576583183590159, "learning_rate": 6.774339183378663e-06, "loss": 0.9714, "step": 3635 }, { "epoch": 0.616532428995337, "grad_norm": 1.1254625387537205, "learning_rate": 6.76914095523366e-06, "loss": 0.9434, "step": 3636 }, { "epoch": 0.6167019923696482, "grad_norm": 1.0224959548437016, "learning_rate": 6.763943701687046e-06, "loss": 0.9773, "step": 3637 }, { "epoch": 0.6168715557439594, "grad_norm": 0.9329769612350408, "learning_rate": 6.758747424306586e-06, "loss": 0.9547, "step": 3638 }, { "epoch": 0.6170411191182704, "grad_norm": 0.9703981089873205, "learning_rate": 6.75355212465975e-06, "loss": 0.8931, "step": 3639 }, { "epoch": 0.6172106824925816, "grad_norm": 0.9166830173951005, "learning_rate": 6.748357804313721e-06, "loss": 0.9425, "step": 3640 }, { "epoch": 0.6173802458668928, "grad_norm": 1.0127858287781513, "learning_rate": 6.7431644648353785e-06, "loss": 0.9252, "step": 3641 }, { "epoch": 0.617549809241204, "grad_norm": 0.9372534631539996, "learning_rate": 6.7379721077913095e-06, "loss": 0.9554, "step": 3642 }, { "epoch": 0.617719372615515, "grad_norm": 0.9751211374832992, "learning_rate": 6.732780734747799e-06, "loss": 0.93, "step": 3643 }, { "epoch": 0.6178889359898262, "grad_norm": 1.0002749612993924, "learning_rate": 6.727590347270849e-06, "loss": 0.96, "step": 3644 }, { "epoch": 0.6180584993641374, "grad_norm": 0.9561169569595386, "learning_rate": 6.7224009469261535e-06, "loss": 0.9377, "step": 3645 }, { "epoch": 0.6182280627384485, "grad_norm": 0.9645597845785824, "learning_rate": 6.717212535279108e-06, "loss": 0.9162, "step": 3646 }, { "epoch": 0.6183976261127596, "grad_norm": 0.9751821897681725, "learning_rate": 6.712025113894811e-06, "loss": 0.9768, "step": 3647 }, { "epoch": 0.6185671894870708, "grad_norm": 0.9154662572808107, "learning_rate": 6.7068386843380695e-06, "loss": 0.9121, "step": 3648 }, { "epoch": 0.618736752861382, "grad_norm": 0.9682449940877177, "learning_rate": 6.701653248173382e-06, "loss": 0.9334, "step": 3649 }, { "epoch": 0.6189063162356931, "grad_norm": 0.9455077850444994, "learning_rate": 6.6964688069649474e-06, "loss": 0.9569, "step": 3650 }, { "epoch": 0.6190758796100042, "grad_norm": 0.9238000350169707, "learning_rate": 6.691285362276676e-06, "loss": 0.9491, "step": 3651 }, { "epoch": 0.6192454429843154, "grad_norm": 0.9613940991210574, "learning_rate": 6.6861029156721654e-06, "loss": 0.9568, "step": 3652 }, { "epoch": 0.6194150063586266, "grad_norm": 0.9092712262020262, "learning_rate": 6.680921468714718e-06, "loss": 0.928, "step": 3653 }, { "epoch": 0.6195845697329377, "grad_norm": 0.9493851312472502, "learning_rate": 6.675741022967327e-06, "loss": 0.9455, "step": 3654 }, { "epoch": 0.6197541331072488, "grad_norm": 0.9502381988785242, "learning_rate": 6.670561579992698e-06, "loss": 0.937, "step": 3655 }, { "epoch": 0.61992369648156, "grad_norm": 0.939589242814108, "learning_rate": 6.665383141353221e-06, "loss": 0.9453, "step": 3656 }, { "epoch": 0.6200932598558712, "grad_norm": 0.955136883357873, "learning_rate": 6.660205708610987e-06, "loss": 0.9619, "step": 3657 }, { "epoch": 0.6202628232301822, "grad_norm": 0.9651081783025447, "learning_rate": 6.655029283327788e-06, "loss": 0.9422, "step": 3658 }, { "epoch": 0.6204323866044934, "grad_norm": 0.9149744042302325, "learning_rate": 6.649853867065104e-06, "loss": 0.9522, "step": 3659 }, { "epoch": 0.6206019499788046, "grad_norm": 0.9490884512939449, "learning_rate": 6.644679461384117e-06, "loss": 0.9134, "step": 3660 }, { "epoch": 0.6207715133531158, "grad_norm": 0.9875114999940674, "learning_rate": 6.639506067845698e-06, "loss": 0.9844, "step": 3661 }, { "epoch": 0.6209410767274268, "grad_norm": 0.9660246158846859, "learning_rate": 6.634333688010426e-06, "loss": 0.9427, "step": 3662 }, { "epoch": 0.621110640101738, "grad_norm": 1.0088895898344516, "learning_rate": 6.629162323438558e-06, "loss": 0.9807, "step": 3663 }, { "epoch": 0.6212802034760492, "grad_norm": 1.002968149854612, "learning_rate": 6.623991975690051e-06, "loss": 0.9528, "step": 3664 }, { "epoch": 0.6214497668503604, "grad_norm": 1.0848164228137362, "learning_rate": 6.618822646324563e-06, "loss": 0.983, "step": 3665 }, { "epoch": 0.6216193302246714, "grad_norm": 0.9754436523248472, "learning_rate": 6.613654336901431e-06, "loss": 0.9451, "step": 3666 }, { "epoch": 0.6217888935989826, "grad_norm": 0.9777425024532083, "learning_rate": 6.608487048979695e-06, "loss": 0.9353, "step": 3667 }, { "epoch": 0.6219584569732938, "grad_norm": 0.9860140172466118, "learning_rate": 6.603320784118075e-06, "loss": 0.9362, "step": 3668 }, { "epoch": 0.622128020347605, "grad_norm": 0.9834874902426562, "learning_rate": 6.598155543875002e-06, "loss": 0.9197, "step": 3669 }, { "epoch": 0.622297583721916, "grad_norm": 0.9394882901158351, "learning_rate": 6.5929913298085815e-06, "loss": 0.9187, "step": 3670 }, { "epoch": 0.6224671470962272, "grad_norm": 0.9607314021628965, "learning_rate": 6.5878281434766136e-06, "loss": 0.962, "step": 3671 }, { "epoch": 0.6226367104705384, "grad_norm": 0.9559558637460268, "learning_rate": 6.582665986436585e-06, "loss": 0.9378, "step": 3672 }, { "epoch": 0.6228062738448495, "grad_norm": 1.0090440876797908, "learning_rate": 6.577504860245684e-06, "loss": 0.9745, "step": 3673 }, { "epoch": 0.6229758372191606, "grad_norm": 0.9831311968800052, "learning_rate": 6.572344766460776e-06, "loss": 0.9582, "step": 3674 }, { "epoch": 0.6231454005934718, "grad_norm": 0.6714470899732261, "learning_rate": 6.567185706638417e-06, "loss": 0.7965, "step": 3675 }, { "epoch": 0.623314963967783, "grad_norm": 0.9500549126118222, "learning_rate": 6.562027682334857e-06, "loss": 0.9638, "step": 3676 }, { "epoch": 0.6234845273420941, "grad_norm": 1.0170718413224733, "learning_rate": 6.556870695106028e-06, "loss": 0.9334, "step": 3677 }, { "epoch": 0.6236540907164052, "grad_norm": 0.9443914976884543, "learning_rate": 6.55171474650755e-06, "loss": 0.9419, "step": 3678 }, { "epoch": 0.6238236540907164, "grad_norm": 0.9765768040988373, "learning_rate": 6.5465598380947274e-06, "loss": 0.9886, "step": 3679 }, { "epoch": 0.6239932174650276, "grad_norm": 0.9884567637502714, "learning_rate": 6.5414059714225605e-06, "loss": 0.9501, "step": 3680 }, { "epoch": 0.6241627808393387, "grad_norm": 0.9509851284529884, "learning_rate": 6.536253148045726e-06, "loss": 0.9466, "step": 3681 }, { "epoch": 0.6243323442136498, "grad_norm": 1.0311171820536678, "learning_rate": 6.531101369518585e-06, "loss": 0.9554, "step": 3682 }, { "epoch": 0.624501907587961, "grad_norm": 0.9602720873650847, "learning_rate": 6.525950637395193e-06, "loss": 0.9239, "step": 3683 }, { "epoch": 0.6246714709622722, "grad_norm": 0.9025769748096363, "learning_rate": 6.520800953229282e-06, "loss": 0.9153, "step": 3684 }, { "epoch": 0.6248410343365833, "grad_norm": 0.9088540058686904, "learning_rate": 6.515652318574268e-06, "loss": 0.9289, "step": 3685 }, { "epoch": 0.6250105977108944, "grad_norm": 0.9667571334743007, "learning_rate": 6.51050473498325e-06, "loss": 0.9482, "step": 3686 }, { "epoch": 0.6251801610852056, "grad_norm": 0.9764592923428709, "learning_rate": 6.505358204009018e-06, "loss": 0.9552, "step": 3687 }, { "epoch": 0.6253497244595168, "grad_norm": 1.003009845536553, "learning_rate": 6.500212727204036e-06, "loss": 0.9623, "step": 3688 }, { "epoch": 0.6255192878338279, "grad_norm": 0.9716172181982657, "learning_rate": 6.495068306120452e-06, "loss": 0.9309, "step": 3689 }, { "epoch": 0.625688851208139, "grad_norm": 0.9571644612825456, "learning_rate": 6.489924942310093e-06, "loss": 0.9407, "step": 3690 }, { "epoch": 0.6258584145824502, "grad_norm": 0.9223358918499669, "learning_rate": 6.484782637324479e-06, "loss": 0.918, "step": 3691 }, { "epoch": 0.6260279779567614, "grad_norm": 1.0040721141980677, "learning_rate": 6.479641392714795e-06, "loss": 0.9695, "step": 3692 }, { "epoch": 0.6261975413310725, "grad_norm": 0.9771888732023392, "learning_rate": 6.474501210031914e-06, "loss": 0.9235, "step": 3693 }, { "epoch": 0.6263671047053836, "grad_norm": 0.967482033057149, "learning_rate": 6.469362090826389e-06, "loss": 0.9421, "step": 3694 }, { "epoch": 0.6265366680796948, "grad_norm": 0.6666600352634424, "learning_rate": 6.46422403664845e-06, "loss": 0.8363, "step": 3695 }, { "epoch": 0.626706231454006, "grad_norm": 1.0099654352019811, "learning_rate": 6.459087049048007e-06, "loss": 0.9238, "step": 3696 }, { "epoch": 0.6268757948283171, "grad_norm": 0.9818404414423542, "learning_rate": 6.453951129574644e-06, "loss": 0.9493, "step": 3697 }, { "epoch": 0.6270453582026282, "grad_norm": 0.9841192499302777, "learning_rate": 6.448816279777633e-06, "loss": 0.9713, "step": 3698 }, { "epoch": 0.6272149215769394, "grad_norm": 0.9840368417943546, "learning_rate": 6.443682501205914e-06, "loss": 0.9363, "step": 3699 }, { "epoch": 0.6273844849512505, "grad_norm": 0.9536547576356458, "learning_rate": 6.438549795408107e-06, "loss": 0.9434, "step": 3700 }, { "epoch": 0.6275540483255617, "grad_norm": 0.9459360773508195, "learning_rate": 6.433418163932508e-06, "loss": 0.9685, "step": 3701 }, { "epoch": 0.6277236116998728, "grad_norm": 0.9547681146357571, "learning_rate": 6.428287608327088e-06, "loss": 0.9433, "step": 3702 }, { "epoch": 0.627893175074184, "grad_norm": 0.8781367843593298, "learning_rate": 6.4231581301394954e-06, "loss": 0.8929, "step": 3703 }, { "epoch": 0.6280627384484951, "grad_norm": 0.9778096975232439, "learning_rate": 6.418029730917052e-06, "loss": 0.9581, "step": 3704 }, { "epoch": 0.6282323018228063, "grad_norm": 0.9345090889656249, "learning_rate": 6.41290241220676e-06, "loss": 0.946, "step": 3705 }, { "epoch": 0.6284018651971174, "grad_norm": 0.9558259234034503, "learning_rate": 6.407776175555285e-06, "loss": 0.9359, "step": 3706 }, { "epoch": 0.6285714285714286, "grad_norm": 0.9989021828966471, "learning_rate": 6.402651022508975e-06, "loss": 0.9281, "step": 3707 }, { "epoch": 0.6287409919457397, "grad_norm": 0.9518018291265377, "learning_rate": 6.39752695461384e-06, "loss": 0.9286, "step": 3708 }, { "epoch": 0.6289105553200509, "grad_norm": 0.968957747409856, "learning_rate": 6.392403973415582e-06, "loss": 0.9014, "step": 3709 }, { "epoch": 0.629080118694362, "grad_norm": 0.9663233660100905, "learning_rate": 6.387282080459558e-06, "loss": 0.9474, "step": 3710 }, { "epoch": 0.6292496820686732, "grad_norm": 0.9385458433833092, "learning_rate": 6.382161277290801e-06, "loss": 0.911, "step": 3711 }, { "epoch": 0.6294192454429843, "grad_norm": 0.9443882867952118, "learning_rate": 6.377041565454021e-06, "loss": 0.9579, "step": 3712 }, { "epoch": 0.6295888088172955, "grad_norm": 0.9606984568748994, "learning_rate": 6.3719229464935915e-06, "loss": 0.9256, "step": 3713 }, { "epoch": 0.6297583721916066, "grad_norm": 0.9267929555006322, "learning_rate": 6.3668054219535616e-06, "loss": 0.9418, "step": 3714 }, { "epoch": 0.6299279355659178, "grad_norm": 0.9599468887830896, "learning_rate": 6.361688993377642e-06, "loss": 0.9569, "step": 3715 }, { "epoch": 0.6300974989402289, "grad_norm": 0.9234688762554217, "learning_rate": 6.356573662309227e-06, "loss": 0.9111, "step": 3716 }, { "epoch": 0.6302670623145401, "grad_norm": 0.9914524824202674, "learning_rate": 6.351459430291369e-06, "loss": 0.958, "step": 3717 }, { "epoch": 0.6304366256888512, "grad_norm": 1.0145103772439585, "learning_rate": 6.3463462988667855e-06, "loss": 0.971, "step": 3718 }, { "epoch": 0.6306061890631623, "grad_norm": 1.0210710754978762, "learning_rate": 6.341234269577878e-06, "loss": 0.9677, "step": 3719 }, { "epoch": 0.6307757524374735, "grad_norm": 0.9369572208111215, "learning_rate": 6.3361233439667e-06, "loss": 0.9618, "step": 3720 }, { "epoch": 0.6309453158117847, "grad_norm": 0.9963596004046189, "learning_rate": 6.331013523574978e-06, "loss": 0.9354, "step": 3721 }, { "epoch": 0.6311148791860958, "grad_norm": 0.9594972352973306, "learning_rate": 6.3259048099441045e-06, "loss": 0.9471, "step": 3722 }, { "epoch": 0.6312844425604069, "grad_norm": 0.9579331372509057, "learning_rate": 6.32079720461514e-06, "loss": 0.9437, "step": 3723 }, { "epoch": 0.6314540059347181, "grad_norm": 0.9566115669505098, "learning_rate": 6.315690709128808e-06, "loss": 0.9227, "step": 3724 }, { "epoch": 0.6316235693090293, "grad_norm": 0.9781590776047974, "learning_rate": 6.310585325025499e-06, "loss": 0.9944, "step": 3725 }, { "epoch": 0.6317931326833404, "grad_norm": 0.9851828662730695, "learning_rate": 6.305481053845262e-06, "loss": 0.985, "step": 3726 }, { "epoch": 0.6319626960576515, "grad_norm": 0.9656966272833261, "learning_rate": 6.300377897127825e-06, "loss": 0.9383, "step": 3727 }, { "epoch": 0.6321322594319627, "grad_norm": 0.9701303138664904, "learning_rate": 6.295275856412567e-06, "loss": 0.9607, "step": 3728 }, { "epoch": 0.6323018228062739, "grad_norm": 0.992522396568827, "learning_rate": 6.290174933238531e-06, "loss": 0.9474, "step": 3729 }, { "epoch": 0.632471386180585, "grad_norm": 0.9427977179391128, "learning_rate": 6.285075129144429e-06, "loss": 0.8795, "step": 3730 }, { "epoch": 0.6326409495548961, "grad_norm": 0.9823425163164353, "learning_rate": 6.2799764456686326e-06, "loss": 0.9573, "step": 3731 }, { "epoch": 0.6328105129292073, "grad_norm": 0.9443080726175307, "learning_rate": 6.274878884349174e-06, "loss": 0.9437, "step": 3732 }, { "epoch": 0.6329800763035185, "grad_norm": 0.9234219215681932, "learning_rate": 6.2697824467237445e-06, "loss": 0.9029, "step": 3733 }, { "epoch": 0.6331496396778296, "grad_norm": 0.9590176411797248, "learning_rate": 6.2646871343297055e-06, "loss": 0.9433, "step": 3734 }, { "epoch": 0.6333192030521407, "grad_norm": 0.9241641134740813, "learning_rate": 6.259592948704073e-06, "loss": 0.8862, "step": 3735 }, { "epoch": 0.6334887664264519, "grad_norm": 0.9629136949939168, "learning_rate": 6.254499891383517e-06, "loss": 0.979, "step": 3736 }, { "epoch": 0.6336583298007631, "grad_norm": 1.0129659639175814, "learning_rate": 6.249407963904381e-06, "loss": 0.9525, "step": 3737 }, { "epoch": 0.6338278931750742, "grad_norm": 0.9634058449209946, "learning_rate": 6.244317167802659e-06, "loss": 0.9786, "step": 3738 }, { "epoch": 0.6339974565493853, "grad_norm": 0.947641971181679, "learning_rate": 6.239227504614004e-06, "loss": 0.9791, "step": 3739 }, { "epoch": 0.6341670199236965, "grad_norm": 0.9732105239017287, "learning_rate": 6.234138975873724e-06, "loss": 0.9164, "step": 3740 }, { "epoch": 0.6343365832980077, "grad_norm": 0.9918580177844567, "learning_rate": 6.229051583116796e-06, "loss": 0.9375, "step": 3741 }, { "epoch": 0.6345061466723187, "grad_norm": 0.9768923877564517, "learning_rate": 6.223965327877846e-06, "loss": 0.9313, "step": 3742 }, { "epoch": 0.6346757100466299, "grad_norm": 0.9860459364488126, "learning_rate": 6.218880211691154e-06, "loss": 0.9539, "step": 3743 }, { "epoch": 0.6348452734209411, "grad_norm": 0.9437759422673568, "learning_rate": 6.213796236090661e-06, "loss": 0.9562, "step": 3744 }, { "epoch": 0.6350148367952523, "grad_norm": 0.9368994356142542, "learning_rate": 6.208713402609968e-06, "loss": 0.945, "step": 3745 }, { "epoch": 0.6351844001695633, "grad_norm": 0.9429873960550915, "learning_rate": 6.2036317127823264e-06, "loss": 0.9271, "step": 3746 }, { "epoch": 0.6353539635438745, "grad_norm": 1.0032998718661652, "learning_rate": 6.198551168140638e-06, "loss": 0.9877, "step": 3747 }, { "epoch": 0.6355235269181857, "grad_norm": 0.986679958402424, "learning_rate": 6.1934717702174714e-06, "loss": 0.9737, "step": 3748 }, { "epoch": 0.6356930902924968, "grad_norm": 0.9378020942782609, "learning_rate": 6.1883935205450396e-06, "loss": 0.9318, "step": 3749 }, { "epoch": 0.6358626536668079, "grad_norm": 0.9341357952399267, "learning_rate": 6.183316420655212e-06, "loss": 0.9337, "step": 3750 }, { "epoch": 0.6360322170411191, "grad_norm": 0.9382169870334379, "learning_rate": 6.178240472079504e-06, "loss": 0.9413, "step": 3751 }, { "epoch": 0.6362017804154303, "grad_norm": 0.9653000561077801, "learning_rate": 6.173165676349103e-06, "loss": 0.9426, "step": 3752 }, { "epoch": 0.6363713437897414, "grad_norm": 0.9758697136872103, "learning_rate": 6.168092034994832e-06, "loss": 0.9922, "step": 3753 }, { "epoch": 0.6365409071640525, "grad_norm": 0.9569341048053986, "learning_rate": 6.163019549547163e-06, "loss": 0.9025, "step": 3754 }, { "epoch": 0.6367104705383637, "grad_norm": 0.9378219496212093, "learning_rate": 6.157948221536237e-06, "loss": 0.9276, "step": 3755 }, { "epoch": 0.6368800339126749, "grad_norm": 0.9232938646637593, "learning_rate": 6.152878052491831e-06, "loss": 0.9198, "step": 3756 }, { "epoch": 0.637049597286986, "grad_norm": 0.9989026050269741, "learning_rate": 6.1478090439433776e-06, "loss": 0.9892, "step": 3757 }, { "epoch": 0.6372191606612971, "grad_norm": 0.9691622920384532, "learning_rate": 6.142741197419955e-06, "loss": 0.9326, "step": 3758 }, { "epoch": 0.6373887240356083, "grad_norm": 0.9522085365363817, "learning_rate": 6.1376745144503e-06, "loss": 0.9366, "step": 3759 }, { "epoch": 0.6375582874099195, "grad_norm": 0.9582571265377816, "learning_rate": 6.13260899656279e-06, "loss": 0.9359, "step": 3760 }, { "epoch": 0.6377278507842306, "grad_norm": 0.9364641420730115, "learning_rate": 6.127544645285448e-06, "loss": 0.9375, "step": 3761 }, { "epoch": 0.6378974141585417, "grad_norm": 1.0005435890273477, "learning_rate": 6.1224814621459625e-06, "loss": 0.9519, "step": 3762 }, { "epoch": 0.6380669775328529, "grad_norm": 0.9286981109389738, "learning_rate": 6.117419448671651e-06, "loss": 0.9446, "step": 3763 }, { "epoch": 0.6382365409071641, "grad_norm": 0.9339352276437187, "learning_rate": 6.112358606389488e-06, "loss": 0.9589, "step": 3764 }, { "epoch": 0.6384061042814752, "grad_norm": 1.0168912488267274, "learning_rate": 6.107298936826086e-06, "loss": 0.9731, "step": 3765 }, { "epoch": 0.6385756676557863, "grad_norm": 0.9931397852253987, "learning_rate": 6.102240441507716e-06, "loss": 0.9505, "step": 3766 }, { "epoch": 0.6387452310300975, "grad_norm": 0.9370962567417507, "learning_rate": 6.097183121960286e-06, "loss": 0.9326, "step": 3767 }, { "epoch": 0.6389147944044087, "grad_norm": 0.9632737865310094, "learning_rate": 6.092126979709354e-06, "loss": 0.963, "step": 3768 }, { "epoch": 0.6390843577787197, "grad_norm": 0.966099343780355, "learning_rate": 6.087072016280111e-06, "loss": 0.9369, "step": 3769 }, { "epoch": 0.6392539211530309, "grad_norm": 0.9639035738860314, "learning_rate": 6.082018233197415e-06, "loss": 0.9436, "step": 3770 }, { "epoch": 0.6394234845273421, "grad_norm": 0.9777180311286494, "learning_rate": 6.07696563198575e-06, "loss": 0.94, "step": 3771 }, { "epoch": 0.6395930479016533, "grad_norm": 0.9860377213178608, "learning_rate": 6.0719142141692435e-06, "loss": 0.9461, "step": 3772 }, { "epoch": 0.6397626112759643, "grad_norm": 0.9887980459181656, "learning_rate": 6.066863981271678e-06, "loss": 0.8976, "step": 3773 }, { "epoch": 0.6399321746502755, "grad_norm": 0.9540848540347339, "learning_rate": 6.06181493481647e-06, "loss": 0.9479, "step": 3774 }, { "epoch": 0.6401017380245867, "grad_norm": 0.9755088151515949, "learning_rate": 6.0567670763266775e-06, "loss": 0.9295, "step": 3775 }, { "epoch": 0.6402713013988979, "grad_norm": 0.9276352113353674, "learning_rate": 6.0517204073250015e-06, "loss": 0.9438, "step": 3776 }, { "epoch": 0.6404408647732089, "grad_norm": 0.9487214696110207, "learning_rate": 6.046674929333787e-06, "loss": 0.9228, "step": 3777 }, { "epoch": 0.6406104281475201, "grad_norm": 0.975742034416424, "learning_rate": 6.041630643875018e-06, "loss": 0.9324, "step": 3778 }, { "epoch": 0.6407799915218313, "grad_norm": 0.977433558812134, "learning_rate": 6.036587552470313e-06, "loss": 0.9122, "step": 3779 }, { "epoch": 0.6409495548961425, "grad_norm": 0.926737421211832, "learning_rate": 6.031545656640945e-06, "loss": 0.9258, "step": 3780 }, { "epoch": 0.6411191182704535, "grad_norm": 0.9530536859805545, "learning_rate": 6.0265049579078125e-06, "loss": 0.9563, "step": 3781 }, { "epoch": 0.6412886816447647, "grad_norm": 0.9415541557942966, "learning_rate": 6.021465457791458e-06, "loss": 0.914, "step": 3782 }, { "epoch": 0.6414582450190759, "grad_norm": 0.9704935297194401, "learning_rate": 6.016427157812057e-06, "loss": 0.9522, "step": 3783 }, { "epoch": 0.6416278083933871, "grad_norm": 0.9813209749295353, "learning_rate": 6.011390059489437e-06, "loss": 0.9371, "step": 3784 }, { "epoch": 0.6417973717676981, "grad_norm": 1.0186898672041642, "learning_rate": 6.006354164343047e-06, "loss": 0.9827, "step": 3785 }, { "epoch": 0.6419669351420093, "grad_norm": 0.919147310639371, "learning_rate": 6.0013194738919836e-06, "loss": 0.9132, "step": 3786 }, { "epoch": 0.6421364985163205, "grad_norm": 0.9869698742429506, "learning_rate": 5.9962859896549695e-06, "loss": 0.97, "step": 3787 }, { "epoch": 0.6423060618906317, "grad_norm": 0.9440495931648389, "learning_rate": 5.99125371315038e-06, "loss": 0.9385, "step": 3788 }, { "epoch": 0.6424756252649427, "grad_norm": 0.6225933578146026, "learning_rate": 5.986222645896214e-06, "loss": 0.7874, "step": 3789 }, { "epoch": 0.6426451886392539, "grad_norm": 0.9438024709284348, "learning_rate": 5.981192789410101e-06, "loss": 0.9397, "step": 3790 }, { "epoch": 0.6428147520135651, "grad_norm": 0.932640101026457, "learning_rate": 5.9761641452093225e-06, "loss": 0.9469, "step": 3791 }, { "epoch": 0.6429843153878763, "grad_norm": 0.9703739634846951, "learning_rate": 5.971136714810779e-06, "loss": 0.9159, "step": 3792 }, { "epoch": 0.6431538787621873, "grad_norm": 0.9938946680382339, "learning_rate": 5.96611049973101e-06, "loss": 0.9509, "step": 3793 }, { "epoch": 0.6433234421364985, "grad_norm": 0.9557716240954923, "learning_rate": 5.961085501486188e-06, "loss": 0.9451, "step": 3794 }, { "epoch": 0.6434930055108097, "grad_norm": 0.8732046374030294, "learning_rate": 5.956061721592121e-06, "loss": 0.939, "step": 3795 }, { "epoch": 0.6436625688851209, "grad_norm": 0.5954221307547285, "learning_rate": 5.951039161564247e-06, "loss": 0.7569, "step": 3796 }, { "epoch": 0.6438321322594319, "grad_norm": 0.9719689605506557, "learning_rate": 5.946017822917632e-06, "loss": 0.9531, "step": 3797 }, { "epoch": 0.6440016956337431, "grad_norm": 0.9339359663445854, "learning_rate": 5.940997707166986e-06, "loss": 0.9529, "step": 3798 }, { "epoch": 0.6441712590080543, "grad_norm": 0.9944675742016742, "learning_rate": 5.935978815826638e-06, "loss": 0.965, "step": 3799 }, { "epoch": 0.6443408223823655, "grad_norm": 0.9548131645939382, "learning_rate": 5.9309611504105505e-06, "loss": 0.933, "step": 3800 }, { "epoch": 0.6445103857566765, "grad_norm": 0.9704580915180497, "learning_rate": 5.925944712432317e-06, "loss": 0.9392, "step": 3801 }, { "epoch": 0.6446799491309877, "grad_norm": 0.9499939702296947, "learning_rate": 5.920929503405162e-06, "loss": 0.9333, "step": 3802 }, { "epoch": 0.6448495125052989, "grad_norm": 0.9651937529700388, "learning_rate": 5.915915524841941e-06, "loss": 0.9614, "step": 3803 }, { "epoch": 0.64501907587961, "grad_norm": 0.9698160965435713, "learning_rate": 5.910902778255134e-06, "loss": 0.9344, "step": 3804 }, { "epoch": 0.6451886392539211, "grad_norm": 0.9611238286463303, "learning_rate": 5.905891265156849e-06, "loss": 0.9399, "step": 3805 }, { "epoch": 0.6453582026282323, "grad_norm": 0.9064569039707927, "learning_rate": 5.9008809870588276e-06, "loss": 0.9243, "step": 3806 }, { "epoch": 0.6455277660025435, "grad_norm": 0.9173716267102014, "learning_rate": 5.895871945472434e-06, "loss": 0.9118, "step": 3807 }, { "epoch": 0.6456973293768546, "grad_norm": 0.9376133180151249, "learning_rate": 5.890864141908656e-06, "loss": 0.9075, "step": 3808 }, { "epoch": 0.6458668927511657, "grad_norm": 0.9605866500487444, "learning_rate": 5.885857577878122e-06, "loss": 0.9589, "step": 3809 }, { "epoch": 0.6460364561254769, "grad_norm": 0.984050906627267, "learning_rate": 5.880852254891072e-06, "loss": 0.9306, "step": 3810 }, { "epoch": 0.6462060194997881, "grad_norm": 0.9734765748501751, "learning_rate": 5.875848174457377e-06, "loss": 0.9293, "step": 3811 }, { "epoch": 0.6463755828740992, "grad_norm": 0.9684380212094916, "learning_rate": 5.870845338086532e-06, "loss": 0.9011, "step": 3812 }, { "epoch": 0.6465451462484103, "grad_norm": 0.9709957897262544, "learning_rate": 5.865843747287659e-06, "loss": 0.9724, "step": 3813 }, { "epoch": 0.6467147096227215, "grad_norm": 1.0134347473367822, "learning_rate": 5.860843403569504e-06, "loss": 0.9356, "step": 3814 }, { "epoch": 0.6468842729970327, "grad_norm": 0.9605237921787253, "learning_rate": 5.855844308440429e-06, "loss": 0.9342, "step": 3815 }, { "epoch": 0.6470538363713438, "grad_norm": 0.9836826905598642, "learning_rate": 5.850846463408437e-06, "loss": 0.931, "step": 3816 }, { "epoch": 0.6472233997456549, "grad_norm": 1.0025283263163867, "learning_rate": 5.845849869981137e-06, "loss": 0.961, "step": 3817 }, { "epoch": 0.6473929631199661, "grad_norm": 1.0059098553739443, "learning_rate": 5.840854529665767e-06, "loss": 0.9505, "step": 3818 }, { "epoch": 0.6475625264942773, "grad_norm": 0.9315054540105335, "learning_rate": 5.835860443969185e-06, "loss": 0.9187, "step": 3819 }, { "epoch": 0.6477320898685884, "grad_norm": 0.9623265219691896, "learning_rate": 5.830867614397876e-06, "loss": 0.9246, "step": 3820 }, { "epoch": 0.6479016532428995, "grad_norm": 0.9317562830213191, "learning_rate": 5.825876042457939e-06, "loss": 0.9115, "step": 3821 }, { "epoch": 0.6480712166172107, "grad_norm": 0.9688940693021881, "learning_rate": 5.820885729655098e-06, "loss": 0.9706, "step": 3822 }, { "epoch": 0.6482407799915219, "grad_norm": 0.9099049956166796, "learning_rate": 5.815896677494692e-06, "loss": 0.9486, "step": 3823 }, { "epoch": 0.648410343365833, "grad_norm": 0.952563353729191, "learning_rate": 5.81090888748169e-06, "loss": 0.9336, "step": 3824 }, { "epoch": 0.6485799067401441, "grad_norm": 0.9591061101884213, "learning_rate": 5.8059223611206716e-06, "loss": 0.9056, "step": 3825 }, { "epoch": 0.6487494701144553, "grad_norm": 0.9856020764612605, "learning_rate": 5.800937099915833e-06, "loss": 0.9542, "step": 3826 }, { "epoch": 0.6489190334887665, "grad_norm": 1.0106116342078262, "learning_rate": 5.795953105371e-06, "loss": 0.9407, "step": 3827 }, { "epoch": 0.6490885968630776, "grad_norm": 0.9591800443987448, "learning_rate": 5.790970378989609e-06, "loss": 0.9132, "step": 3828 }, { "epoch": 0.6492581602373887, "grad_norm": 0.9370959826643205, "learning_rate": 5.785988922274711e-06, "loss": 0.9464, "step": 3829 }, { "epoch": 0.6494277236116999, "grad_norm": 1.0294550988290696, "learning_rate": 5.781008736728975e-06, "loss": 0.9335, "step": 3830 }, { "epoch": 0.649597286986011, "grad_norm": 0.9735705262145118, "learning_rate": 5.776029823854697e-06, "loss": 0.9872, "step": 3831 }, { "epoch": 0.6497668503603222, "grad_norm": 0.9485886272470302, "learning_rate": 5.771052185153776e-06, "loss": 0.9537, "step": 3832 }, { "epoch": 0.6499364137346333, "grad_norm": 0.9329816930282926, "learning_rate": 5.766075822127735e-06, "loss": 0.9214, "step": 3833 }, { "epoch": 0.6501059771089445, "grad_norm": 0.9305533041366779, "learning_rate": 5.761100736277704e-06, "loss": 0.8937, "step": 3834 }, { "epoch": 0.6502755404832556, "grad_norm": 0.9386634109485363, "learning_rate": 5.756126929104435e-06, "loss": 0.953, "step": 3835 }, { "epoch": 0.6504451038575668, "grad_norm": 0.9571181726780283, "learning_rate": 5.7511544021082945e-06, "loss": 0.9222, "step": 3836 }, { "epoch": 0.6506146672318779, "grad_norm": 1.0052270376815449, "learning_rate": 5.746183156789252e-06, "loss": 0.9686, "step": 3837 }, { "epoch": 0.6507842306061891, "grad_norm": 0.9892800739945252, "learning_rate": 5.741213194646911e-06, "loss": 0.9593, "step": 3838 }, { "epoch": 0.6509537939805002, "grad_norm": 0.92406570230854, "learning_rate": 5.736244517180467e-06, "loss": 0.8803, "step": 3839 }, { "epoch": 0.6511233573548114, "grad_norm": 0.9683825539874887, "learning_rate": 5.731277125888739e-06, "loss": 0.9169, "step": 3840 }, { "epoch": 0.6512929207291225, "grad_norm": 0.9381000607399452, "learning_rate": 5.726311022270152e-06, "loss": 0.9638, "step": 3841 }, { "epoch": 0.6514624841034337, "grad_norm": 0.9684415990772047, "learning_rate": 5.721346207822753e-06, "loss": 0.9062, "step": 3842 }, { "epoch": 0.6516320474777448, "grad_norm": 0.9829332906301349, "learning_rate": 5.716382684044191e-06, "loss": 0.9929, "step": 3843 }, { "epoch": 0.6518016108520559, "grad_norm": 0.9709442787331626, "learning_rate": 5.711420452431721e-06, "loss": 0.9419, "step": 3844 }, { "epoch": 0.6519711742263671, "grad_norm": 0.9892489756371844, "learning_rate": 5.706459514482226e-06, "loss": 0.9546, "step": 3845 }, { "epoch": 0.6521407376006783, "grad_norm": 0.9324614807963302, "learning_rate": 5.701499871692182e-06, "loss": 0.8961, "step": 3846 }, { "epoch": 0.6523103009749894, "grad_norm": 0.9628225703296495, "learning_rate": 5.696541525557682e-06, "loss": 0.917, "step": 3847 }, { "epoch": 0.6524798643493005, "grad_norm": 1.002203238049265, "learning_rate": 5.691584477574419e-06, "loss": 0.9333, "step": 3848 }, { "epoch": 0.6526494277236117, "grad_norm": 0.9661109640999909, "learning_rate": 5.686628729237713e-06, "loss": 0.9547, "step": 3849 }, { "epoch": 0.6528189910979229, "grad_norm": 0.6236182732040507, "learning_rate": 5.681674282042475e-06, "loss": 0.8047, "step": 3850 }, { "epoch": 0.652988554472234, "grad_norm": 0.8645617052005022, "learning_rate": 5.676721137483226e-06, "loss": 0.8993, "step": 3851 }, { "epoch": 0.6531581178465451, "grad_norm": 1.004583038148914, "learning_rate": 5.671769297054103e-06, "loss": 0.9492, "step": 3852 }, { "epoch": 0.6533276812208563, "grad_norm": 0.9631994308494995, "learning_rate": 5.66681876224884e-06, "loss": 0.9508, "step": 3853 }, { "epoch": 0.6534972445951674, "grad_norm": 1.010882760062839, "learning_rate": 5.661869534560782e-06, "loss": 0.9654, "step": 3854 }, { "epoch": 0.6536668079694786, "grad_norm": 0.968272483345355, "learning_rate": 5.6569216154828776e-06, "loss": 0.9242, "step": 3855 }, { "epoch": 0.6538363713437897, "grad_norm": 0.9977732737322774, "learning_rate": 5.6519750065076815e-06, "loss": 0.9713, "step": 3856 }, { "epoch": 0.6540059347181009, "grad_norm": 1.065488571152939, "learning_rate": 5.647029709127355e-06, "loss": 0.9944, "step": 3857 }, { "epoch": 0.654175498092412, "grad_norm": 0.9604559546730017, "learning_rate": 5.642085724833656e-06, "loss": 0.9414, "step": 3858 }, { "epoch": 0.6543450614667232, "grad_norm": 0.9678646760492071, "learning_rate": 5.637143055117959e-06, "loss": 0.9472, "step": 3859 }, { "epoch": 0.6545146248410343, "grad_norm": 0.9492250514742354, "learning_rate": 5.632201701471236e-06, "loss": 0.9029, "step": 3860 }, { "epoch": 0.6546841882153455, "grad_norm": 0.9879380930784467, "learning_rate": 5.627261665384056e-06, "loss": 0.9553, "step": 3861 }, { "epoch": 0.6548537515896566, "grad_norm": 1.0105991334684823, "learning_rate": 5.622322948346595e-06, "loss": 0.9474, "step": 3862 }, { "epoch": 0.6550233149639678, "grad_norm": 0.960704636250499, "learning_rate": 5.6173855518486385e-06, "loss": 0.9325, "step": 3863 }, { "epoch": 0.6551928783382789, "grad_norm": 0.5786385742975265, "learning_rate": 5.612449477379564e-06, "loss": 0.7499, "step": 3864 }, { "epoch": 0.6553624417125901, "grad_norm": 1.025347047899016, "learning_rate": 5.6075147264283526e-06, "loss": 0.9808, "step": 3865 }, { "epoch": 0.6555320050869012, "grad_norm": 0.6307921451292178, "learning_rate": 5.602581300483583e-06, "loss": 0.793, "step": 3866 }, { "epoch": 0.6557015684612124, "grad_norm": 1.0464031003151537, "learning_rate": 5.597649201033446e-06, "loss": 1.006, "step": 3867 }, { "epoch": 0.6558711318355235, "grad_norm": 0.9623465383139722, "learning_rate": 5.592718429565721e-06, "loss": 0.9097, "step": 3868 }, { "epoch": 0.6560406952098347, "grad_norm": 0.9668153720541863, "learning_rate": 5.587788987567785e-06, "loss": 0.9648, "step": 3869 }, { "epoch": 0.6562102585841458, "grad_norm": 0.9592803462912153, "learning_rate": 5.582860876526628e-06, "loss": 0.9555, "step": 3870 }, { "epoch": 0.656379821958457, "grad_norm": 0.9203939591580858, "learning_rate": 5.577934097928824e-06, "loss": 0.8892, "step": 3871 }, { "epoch": 0.6565493853327681, "grad_norm": 0.9895137644750535, "learning_rate": 5.573008653260552e-06, "loss": 0.9467, "step": 3872 }, { "epoch": 0.6567189487070793, "grad_norm": 0.9887888152140254, "learning_rate": 5.5680845440075885e-06, "loss": 0.937, "step": 3873 }, { "epoch": 0.6568885120813904, "grad_norm": 0.940673035184092, "learning_rate": 5.5631617716553035e-06, "loss": 0.9485, "step": 3874 }, { "epoch": 0.6570580754557016, "grad_norm": 0.6544679125043726, "learning_rate": 5.558240337688667e-06, "loss": 0.7899, "step": 3875 }, { "epoch": 0.6572276388300127, "grad_norm": 0.9255038306115664, "learning_rate": 5.553320243592239e-06, "loss": 0.9038, "step": 3876 }, { "epoch": 0.6573972022043238, "grad_norm": 0.9437432305339511, "learning_rate": 5.548401490850193e-06, "loss": 0.9277, "step": 3877 }, { "epoch": 0.657566765578635, "grad_norm": 0.9704995551272598, "learning_rate": 5.5434840809462775e-06, "loss": 0.9464, "step": 3878 }, { "epoch": 0.6577363289529462, "grad_norm": 1.0094997653032582, "learning_rate": 5.538568015363846e-06, "loss": 0.9902, "step": 3879 }, { "epoch": 0.6579058923272573, "grad_norm": 0.9978035361638913, "learning_rate": 5.533653295585839e-06, "loss": 0.9303, "step": 3880 }, { "epoch": 0.6580754557015684, "grad_norm": 0.9707163439981172, "learning_rate": 5.528739923094806e-06, "loss": 0.9003, "step": 3881 }, { "epoch": 0.6582450190758796, "grad_norm": 0.9568219375540896, "learning_rate": 5.523827899372876e-06, "loss": 0.9279, "step": 3882 }, { "epoch": 0.6584145824501908, "grad_norm": 0.9631106481386111, "learning_rate": 5.518917225901777e-06, "loss": 0.9222, "step": 3883 }, { "epoch": 0.6585841458245019, "grad_norm": 0.971706362781142, "learning_rate": 5.514007904162822e-06, "loss": 0.9283, "step": 3884 }, { "epoch": 0.658753709198813, "grad_norm": 0.9341604651522025, "learning_rate": 5.509099935636932e-06, "loss": 0.95, "step": 3885 }, { "epoch": 0.6589232725731242, "grad_norm": 0.9506504484405172, "learning_rate": 5.504193321804607e-06, "loss": 0.899, "step": 3886 }, { "epoch": 0.6590928359474354, "grad_norm": 1.0314193236694018, "learning_rate": 5.499288064145938e-06, "loss": 0.9285, "step": 3887 }, { "epoch": 0.6592623993217465, "grad_norm": 0.9683228044749517, "learning_rate": 5.4943841641406185e-06, "loss": 0.9423, "step": 3888 }, { "epoch": 0.6594319626960576, "grad_norm": 0.9681478850057504, "learning_rate": 5.4894816232679195e-06, "loss": 0.9453, "step": 3889 }, { "epoch": 0.6596015260703688, "grad_norm": 0.971662525395111, "learning_rate": 5.484580443006709e-06, "loss": 0.9455, "step": 3890 }, { "epoch": 0.65977108944468, "grad_norm": 0.9107106240362632, "learning_rate": 5.4796806248354416e-06, "loss": 0.9114, "step": 3891 }, { "epoch": 0.6599406528189911, "grad_norm": 0.9915398558162213, "learning_rate": 5.474782170232163e-06, "loss": 1.0068, "step": 3892 }, { "epoch": 0.6601102161933022, "grad_norm": 0.9827792870127341, "learning_rate": 5.469885080674508e-06, "loss": 0.9699, "step": 3893 }, { "epoch": 0.6602797795676134, "grad_norm": 0.9440423204304675, "learning_rate": 5.464989357639692e-06, "loss": 0.9423, "step": 3894 }, { "epoch": 0.6604493429419246, "grad_norm": 0.957413072906512, "learning_rate": 5.460095002604533e-06, "loss": 0.9066, "step": 3895 }, { "epoch": 0.6606189063162357, "grad_norm": 0.9779414388238925, "learning_rate": 5.455202017045425e-06, "loss": 0.9446, "step": 3896 }, { "epoch": 0.6607884696905468, "grad_norm": 0.9884500073975315, "learning_rate": 5.450310402438353e-06, "loss": 0.9554, "step": 3897 }, { "epoch": 0.660958033064858, "grad_norm": 0.9643114567036216, "learning_rate": 5.445420160258881e-06, "loss": 0.9272, "step": 3898 }, { "epoch": 0.6611275964391692, "grad_norm": 0.9371902506912387, "learning_rate": 5.440531291982173e-06, "loss": 0.9244, "step": 3899 }, { "epoch": 0.6612971598134803, "grad_norm": 1.0093923687858606, "learning_rate": 5.435643799082969e-06, "loss": 0.9682, "step": 3900 }, { "epoch": 0.6614667231877914, "grad_norm": 0.970835594452102, "learning_rate": 5.4307576830355945e-06, "loss": 0.9337, "step": 3901 }, { "epoch": 0.6616362865621026, "grad_norm": 1.0176182433371783, "learning_rate": 5.425872945313959e-06, "loss": 0.9603, "step": 3902 }, { "epoch": 0.6618058499364138, "grad_norm": 0.9727334570886468, "learning_rate": 5.420989587391564e-06, "loss": 0.9417, "step": 3903 }, { "epoch": 0.6619754133107248, "grad_norm": 0.9300390063756614, "learning_rate": 5.416107610741487e-06, "loss": 0.9452, "step": 3904 }, { "epoch": 0.662144976685036, "grad_norm": 1.0049390271482403, "learning_rate": 5.4112270168363854e-06, "loss": 0.9614, "step": 3905 }, { "epoch": 0.6623145400593472, "grad_norm": 0.988710600579722, "learning_rate": 5.406347807148515e-06, "loss": 0.9333, "step": 3906 }, { "epoch": 0.6624841034336584, "grad_norm": 0.9856461056387464, "learning_rate": 5.401469983149699e-06, "loss": 0.9422, "step": 3907 }, { "epoch": 0.6626536668079694, "grad_norm": 0.9737655760456169, "learning_rate": 5.396593546311346e-06, "loss": 0.919, "step": 3908 }, { "epoch": 0.6628232301822806, "grad_norm": 0.9382680904507146, "learning_rate": 5.391718498104451e-06, "loss": 0.9216, "step": 3909 }, { "epoch": 0.6629927935565918, "grad_norm": 0.9878460275517319, "learning_rate": 5.386844839999586e-06, "loss": 0.9749, "step": 3910 }, { "epoch": 0.663162356930903, "grad_norm": 0.9358420654936995, "learning_rate": 5.381972573466905e-06, "loss": 0.9314, "step": 3911 }, { "epoch": 0.663331920305214, "grad_norm": 1.057242037704201, "learning_rate": 5.377101699976135e-06, "loss": 0.8988, "step": 3912 }, { "epoch": 0.6635014836795252, "grad_norm": 0.9807759525794161, "learning_rate": 5.3722322209966024e-06, "loss": 0.9191, "step": 3913 }, { "epoch": 0.6636710470538364, "grad_norm": 0.9838550976211243, "learning_rate": 5.367364137997193e-06, "loss": 0.8975, "step": 3914 }, { "epoch": 0.6638406104281476, "grad_norm": 0.9858722626023122, "learning_rate": 5.362497452446379e-06, "loss": 0.9523, "step": 3915 }, { "epoch": 0.6640101738024586, "grad_norm": 0.9917628083410989, "learning_rate": 5.357632165812208e-06, "loss": 0.9357, "step": 3916 }, { "epoch": 0.6641797371767698, "grad_norm": 0.9690695087891623, "learning_rate": 5.352768279562315e-06, "loss": 0.9444, "step": 3917 }, { "epoch": 0.664349300551081, "grad_norm": 0.9701944233249534, "learning_rate": 5.3479057951639034e-06, "loss": 0.9505, "step": 3918 }, { "epoch": 0.6645188639253922, "grad_norm": 0.9871633133861889, "learning_rate": 5.343044714083756e-06, "loss": 0.9445, "step": 3919 }, { "epoch": 0.6646884272997032, "grad_norm": 0.973757320636307, "learning_rate": 5.338185037788228e-06, "loss": 0.945, "step": 3920 }, { "epoch": 0.6648579906740144, "grad_norm": 0.9400542642201342, "learning_rate": 5.333326767743263e-06, "loss": 0.9071, "step": 3921 }, { "epoch": 0.6650275540483256, "grad_norm": 0.9742709175463057, "learning_rate": 5.3284699054143705e-06, "loss": 0.9504, "step": 3922 }, { "epoch": 0.6651971174226368, "grad_norm": 0.9640296549247036, "learning_rate": 5.323614452266632e-06, "loss": 0.9329, "step": 3923 }, { "epoch": 0.6653666807969478, "grad_norm": 1.0002729476983498, "learning_rate": 5.318760409764718e-06, "loss": 0.9387, "step": 3924 }, { "epoch": 0.665536244171259, "grad_norm": 0.9423150822636535, "learning_rate": 5.313907779372862e-06, "loss": 0.9324, "step": 3925 }, { "epoch": 0.6657058075455702, "grad_norm": 1.0138549107572683, "learning_rate": 5.3090565625548755e-06, "loss": 0.9453, "step": 3926 }, { "epoch": 0.6658753709198814, "grad_norm": 0.9378443199931459, "learning_rate": 5.304206760774139e-06, "loss": 0.9117, "step": 3927 }, { "epoch": 0.6660449342941924, "grad_norm": 0.9330418978120747, "learning_rate": 5.299358375493613e-06, "loss": 0.9395, "step": 3928 }, { "epoch": 0.6662144976685036, "grad_norm": 0.9606426036261803, "learning_rate": 5.294511408175825e-06, "loss": 0.947, "step": 3929 }, { "epoch": 0.6663840610428148, "grad_norm": 0.9418675926648232, "learning_rate": 5.289665860282877e-06, "loss": 0.9268, "step": 3930 }, { "epoch": 0.666553624417126, "grad_norm": 0.964392078095706, "learning_rate": 5.2848217332764476e-06, "loss": 0.9234, "step": 3931 }, { "epoch": 0.666723187791437, "grad_norm": 0.9787977772815747, "learning_rate": 5.279979028617781e-06, "loss": 0.9418, "step": 3932 }, { "epoch": 0.6668927511657482, "grad_norm": 0.9123543852829185, "learning_rate": 5.275137747767691e-06, "loss": 0.9292, "step": 3933 }, { "epoch": 0.6670623145400594, "grad_norm": 0.9175440557504575, "learning_rate": 5.270297892186563e-06, "loss": 0.9214, "step": 3934 }, { "epoch": 0.6672318779143704, "grad_norm": 0.9707448908463467, "learning_rate": 5.265459463334361e-06, "loss": 0.9442, "step": 3935 }, { "epoch": 0.6674014412886816, "grad_norm": 0.9549867467941378, "learning_rate": 5.260622462670608e-06, "loss": 0.9336, "step": 3936 }, { "epoch": 0.6675710046629928, "grad_norm": 1.0282995465836202, "learning_rate": 5.2557868916543996e-06, "loss": 0.9546, "step": 3937 }, { "epoch": 0.667740568037304, "grad_norm": 0.9140365773329413, "learning_rate": 5.250952751744396e-06, "loss": 0.954, "step": 3938 }, { "epoch": 0.667910131411615, "grad_norm": 0.9342080110904891, "learning_rate": 5.246120044398839e-06, "loss": 0.8971, "step": 3939 }, { "epoch": 0.6680796947859262, "grad_norm": 0.9388258644564519, "learning_rate": 5.241288771075526e-06, "loss": 0.9261, "step": 3940 }, { "epoch": 0.6682492581602374, "grad_norm": 0.9848489745521355, "learning_rate": 5.236458933231818e-06, "loss": 0.9394, "step": 3941 }, { "epoch": 0.6684188215345486, "grad_norm": 0.9546401120623125, "learning_rate": 5.231630532324661e-06, "loss": 0.923, "step": 3942 }, { "epoch": 0.6685883849088596, "grad_norm": 0.9790372986013418, "learning_rate": 5.226803569810552e-06, "loss": 0.9494, "step": 3943 }, { "epoch": 0.6687579482831708, "grad_norm": 0.9511914741826248, "learning_rate": 5.221978047145559e-06, "loss": 0.9142, "step": 3944 }, { "epoch": 0.668927511657482, "grad_norm": 0.979204322958994, "learning_rate": 5.217153965785315e-06, "loss": 0.9561, "step": 3945 }, { "epoch": 0.6690970750317932, "grad_norm": 0.9882434520547869, "learning_rate": 5.21233132718502e-06, "loss": 0.9754, "step": 3946 }, { "epoch": 0.6692666384061042, "grad_norm": 1.0240698051581076, "learning_rate": 5.207510132799436e-06, "loss": 0.9738, "step": 3947 }, { "epoch": 0.6694362017804154, "grad_norm": 0.9583803572017214, "learning_rate": 5.2026903840828864e-06, "loss": 0.9401, "step": 3948 }, { "epoch": 0.6696057651547266, "grad_norm": 0.9195709660329189, "learning_rate": 5.1978720824892725e-06, "loss": 0.9599, "step": 3949 }, { "epoch": 0.6697753285290378, "grad_norm": 0.9666904386466108, "learning_rate": 5.193055229472045e-06, "loss": 0.9435, "step": 3950 }, { "epoch": 0.6699448919033488, "grad_norm": 0.9531567204711313, "learning_rate": 5.1882398264842225e-06, "loss": 0.9093, "step": 3951 }, { "epoch": 0.67011445527766, "grad_norm": 0.9936746290244673, "learning_rate": 5.1834258749783805e-06, "loss": 0.9662, "step": 3952 }, { "epoch": 0.6702840186519712, "grad_norm": 0.9551629932249377, "learning_rate": 5.178613376406672e-06, "loss": 0.9403, "step": 3953 }, { "epoch": 0.6704535820262824, "grad_norm": 0.921796117153545, "learning_rate": 5.173802332220795e-06, "loss": 0.9393, "step": 3954 }, { "epoch": 0.6706231454005934, "grad_norm": 0.9507810627843465, "learning_rate": 5.168992743872019e-06, "loss": 0.922, "step": 3955 }, { "epoch": 0.6707927087749046, "grad_norm": 0.9554163157405162, "learning_rate": 5.164184612811164e-06, "loss": 0.9528, "step": 3956 }, { "epoch": 0.6709622721492158, "grad_norm": 0.9974234717396072, "learning_rate": 5.1593779404886255e-06, "loss": 0.8856, "step": 3957 }, { "epoch": 0.671131835523527, "grad_norm": 0.9699720154322813, "learning_rate": 5.154572728354349e-06, "loss": 0.9459, "step": 3958 }, { "epoch": 0.671301398897838, "grad_norm": 0.9356105747656451, "learning_rate": 5.149768977857835e-06, "loss": 0.921, "step": 3959 }, { "epoch": 0.6714709622721492, "grad_norm": 0.9851471475682955, "learning_rate": 5.144966690448159e-06, "loss": 0.998, "step": 3960 }, { "epoch": 0.6716405256464604, "grad_norm": 0.9585842581837655, "learning_rate": 5.14016586757394e-06, "loss": 0.9581, "step": 3961 }, { "epoch": 0.6718100890207716, "grad_norm": 0.9518273242639163, "learning_rate": 5.135366510683361e-06, "loss": 0.9256, "step": 3962 }, { "epoch": 0.6719796523950826, "grad_norm": 1.0145463954503386, "learning_rate": 5.130568621224162e-06, "loss": 0.9494, "step": 3963 }, { "epoch": 0.6721492157693938, "grad_norm": 0.9948365564700787, "learning_rate": 5.125772200643643e-06, "loss": 0.9414, "step": 3964 }, { "epoch": 0.672318779143705, "grad_norm": 0.9609231919465913, "learning_rate": 5.120977250388657e-06, "loss": 0.9439, "step": 3965 }, { "epoch": 0.6724883425180161, "grad_norm": 0.9000482538254113, "learning_rate": 5.116183771905612e-06, "loss": 0.9313, "step": 3966 }, { "epoch": 0.6726579058923272, "grad_norm": 0.9249819738921866, "learning_rate": 5.111391766640481e-06, "loss": 0.9061, "step": 3967 }, { "epoch": 0.6728274692666384, "grad_norm": 0.9762897079163759, "learning_rate": 5.106601236038786e-06, "loss": 0.9753, "step": 3968 }, { "epoch": 0.6729970326409496, "grad_norm": 0.9650549983853628, "learning_rate": 5.1018121815456045e-06, "loss": 0.943, "step": 3969 }, { "epoch": 0.6731665960152607, "grad_norm": 0.5990811662072381, "learning_rate": 5.097024604605563e-06, "loss": 0.7577, "step": 3970 }, { "epoch": 0.6733361593895718, "grad_norm": 1.0202747671421386, "learning_rate": 5.092238506662859e-06, "loss": 0.9622, "step": 3971 }, { "epoch": 0.673505722763883, "grad_norm": 0.9490346582744696, "learning_rate": 5.087453889161229e-06, "loss": 0.9248, "step": 3972 }, { "epoch": 0.6736752861381942, "grad_norm": 0.94744196292728, "learning_rate": 5.082670753543961e-06, "loss": 0.8843, "step": 3973 }, { "epoch": 0.6738448495125053, "grad_norm": 0.6213833628055527, "learning_rate": 5.077889101253914e-06, "loss": 0.8127, "step": 3974 }, { "epoch": 0.6740144128868164, "grad_norm": 0.9796786216879492, "learning_rate": 5.07310893373348e-06, "loss": 0.9482, "step": 3975 }, { "epoch": 0.6741839762611276, "grad_norm": 0.95104293414298, "learning_rate": 5.068330252424614e-06, "loss": 0.972, "step": 3976 }, { "epoch": 0.6743535396354388, "grad_norm": 0.9939579073756265, "learning_rate": 5.063553058768814e-06, "loss": 0.9247, "step": 3977 }, { "epoch": 0.6745231030097499, "grad_norm": 1.0022731705638037, "learning_rate": 5.058777354207143e-06, "loss": 0.9692, "step": 3978 }, { "epoch": 0.674692666384061, "grad_norm": 0.9878107457283639, "learning_rate": 5.054003140180204e-06, "loss": 0.9712, "step": 3979 }, { "epoch": 0.6748622297583722, "grad_norm": 1.0084259738394574, "learning_rate": 5.049230418128153e-06, "loss": 0.9405, "step": 3980 }, { "epoch": 0.6750317931326834, "grad_norm": 1.0003879894729153, "learning_rate": 5.044459189490694e-06, "loss": 0.9778, "step": 3981 }, { "epoch": 0.6752013565069945, "grad_norm": 0.936963536636467, "learning_rate": 5.039689455707082e-06, "loss": 0.9048, "step": 3982 }, { "epoch": 0.6753709198813056, "grad_norm": 0.9662145325304197, "learning_rate": 5.034921218216126e-06, "loss": 0.9458, "step": 3983 }, { "epoch": 0.6755404832556168, "grad_norm": 0.9676933985706532, "learning_rate": 5.03015447845617e-06, "loss": 0.9306, "step": 3984 }, { "epoch": 0.675710046629928, "grad_norm": 0.9282665005479991, "learning_rate": 5.025389237865128e-06, "loss": 0.9175, "step": 3985 }, { "epoch": 0.6758796100042391, "grad_norm": 0.9471878284834134, "learning_rate": 5.020625497880444e-06, "loss": 0.9411, "step": 3986 }, { "epoch": 0.6760491733785502, "grad_norm": 1.0035344099246344, "learning_rate": 5.0158632599391126e-06, "loss": 0.9369, "step": 3987 }, { "epoch": 0.6762187367528614, "grad_norm": 0.9433056947355806, "learning_rate": 5.011102525477673e-06, "loss": 0.9403, "step": 3988 }, { "epoch": 0.6763883001271725, "grad_norm": 0.9211897102222225, "learning_rate": 5.0063432959322265e-06, "loss": 0.9135, "step": 3989 }, { "epoch": 0.6765578635014837, "grad_norm": 0.6354653756596874, "learning_rate": 5.001585572738403e-06, "loss": 0.7484, "step": 3990 }, { "epoch": 0.6767274268757948, "grad_norm": 0.9494161209946261, "learning_rate": 4.9968293573313794e-06, "loss": 0.9472, "step": 3991 }, { "epoch": 0.676896990250106, "grad_norm": 0.9497001956446296, "learning_rate": 4.992074651145892e-06, "loss": 0.9113, "step": 3992 }, { "epoch": 0.6770665536244171, "grad_norm": 0.9489970475232807, "learning_rate": 4.987321455616206e-06, "loss": 0.9114, "step": 3993 }, { "epoch": 0.6772361169987283, "grad_norm": 0.9450701776997606, "learning_rate": 4.98256977217614e-06, "loss": 0.9347, "step": 3994 }, { "epoch": 0.6774056803730394, "grad_norm": 0.9462792989384963, "learning_rate": 4.977819602259048e-06, "loss": 0.9152, "step": 3995 }, { "epoch": 0.6775752437473506, "grad_norm": 0.9323501875106183, "learning_rate": 4.973070947297841e-06, "loss": 0.9519, "step": 3996 }, { "epoch": 0.6777448071216617, "grad_norm": 0.9319045031764531, "learning_rate": 4.968323808724962e-06, "loss": 0.9195, "step": 3997 }, { "epoch": 0.6779143704959729, "grad_norm": 0.9739140404015078, "learning_rate": 4.963578187972399e-06, "loss": 0.9584, "step": 3998 }, { "epoch": 0.678083933870284, "grad_norm": 0.9760266456278085, "learning_rate": 4.958834086471683e-06, "loss": 0.9434, "step": 3999 }, { "epoch": 0.6782534972445952, "grad_norm": 0.9733085620633999, "learning_rate": 4.954091505653886e-06, "loss": 0.941, "step": 4000 }, { "epoch": 0.6784230606189063, "grad_norm": 0.9567129090022204, "learning_rate": 4.9493504469496235e-06, "loss": 0.9273, "step": 4001 }, { "epoch": 0.6785926239932175, "grad_norm": 0.9706521020117985, "learning_rate": 4.9446109117890454e-06, "loss": 0.9636, "step": 4002 }, { "epoch": 0.6787621873675286, "grad_norm": 0.9499771902042635, "learning_rate": 4.939872901601853e-06, "loss": 0.9416, "step": 4003 }, { "epoch": 0.6789317507418398, "grad_norm": 0.9816643890586689, "learning_rate": 4.93513641781728e-06, "loss": 0.9289, "step": 4004 }, { "epoch": 0.6791013141161509, "grad_norm": 0.9132367691792639, "learning_rate": 4.930401461864099e-06, "loss": 0.9439, "step": 4005 }, { "epoch": 0.6792708774904621, "grad_norm": 0.9285855135124447, "learning_rate": 4.925668035170622e-06, "loss": 0.9179, "step": 4006 }, { "epoch": 0.6794404408647732, "grad_norm": 0.949273096146644, "learning_rate": 4.920936139164707e-06, "loss": 0.9274, "step": 4007 }, { "epoch": 0.6796100042390844, "grad_norm": 1.002864137269556, "learning_rate": 4.9162057752737415e-06, "loss": 0.9536, "step": 4008 }, { "epoch": 0.6797795676133955, "grad_norm": 0.9445691271730213, "learning_rate": 4.911476944924651e-06, "loss": 0.9107, "step": 4009 }, { "epoch": 0.6799491309877067, "grad_norm": 0.9451835108284893, "learning_rate": 4.9067496495439095e-06, "loss": 0.9633, "step": 4010 }, { "epoch": 0.6801186943620178, "grad_norm": 0.9507759125434737, "learning_rate": 4.9020238905575136e-06, "loss": 0.9548, "step": 4011 }, { "epoch": 0.680288257736329, "grad_norm": 0.9403580655731887, "learning_rate": 4.897299669391006e-06, "loss": 0.9596, "step": 4012 }, { "epoch": 0.6804578211106401, "grad_norm": 0.9322524026187559, "learning_rate": 4.892576987469456e-06, "loss": 0.9152, "step": 4013 }, { "epoch": 0.6806273844849513, "grad_norm": 0.9988271523318749, "learning_rate": 4.887855846217483e-06, "loss": 0.9324, "step": 4014 }, { "epoch": 0.6807969478592624, "grad_norm": 1.0105629334783448, "learning_rate": 4.883136247059231e-06, "loss": 0.9068, "step": 4015 }, { "epoch": 0.6809665112335735, "grad_norm": 0.9664817820568461, "learning_rate": 4.87841819141838e-06, "loss": 0.9226, "step": 4016 }, { "epoch": 0.6811360746078847, "grad_norm": 0.9537555856169774, "learning_rate": 4.873701680718146e-06, "loss": 0.9326, "step": 4017 }, { "epoch": 0.6813056379821959, "grad_norm": 0.9350691794900546, "learning_rate": 4.868986716381279e-06, "loss": 0.9178, "step": 4018 }, { "epoch": 0.681475201356507, "grad_norm": 0.9737372893166677, "learning_rate": 4.8642732998300575e-06, "loss": 0.8968, "step": 4019 }, { "epoch": 0.6816447647308181, "grad_norm": 0.9431492296282682, "learning_rate": 4.859561432486307e-06, "loss": 0.9153, "step": 4020 }, { "epoch": 0.6818143281051293, "grad_norm": 0.9570381445780026, "learning_rate": 4.854851115771373e-06, "loss": 0.9437, "step": 4021 }, { "epoch": 0.6819838914794405, "grad_norm": 0.9815747583401702, "learning_rate": 4.8501423511061344e-06, "loss": 0.9667, "step": 4022 }, { "epoch": 0.6821534548537516, "grad_norm": 0.948418824259176, "learning_rate": 4.845435139911006e-06, "loss": 0.9733, "step": 4023 }, { "epoch": 0.6823230182280627, "grad_norm": 0.9547014526082412, "learning_rate": 4.840729483605927e-06, "loss": 0.9084, "step": 4024 }, { "epoch": 0.6824925816023739, "grad_norm": 0.935289539696172, "learning_rate": 4.836025383610382e-06, "loss": 0.9314, "step": 4025 }, { "epoch": 0.682662144976685, "grad_norm": 0.940694533779955, "learning_rate": 4.8313228413433736e-06, "loss": 0.9207, "step": 4026 }, { "epoch": 0.6828317083509962, "grad_norm": 0.934832886052752, "learning_rate": 4.826621858223431e-06, "loss": 0.946, "step": 4027 }, { "epoch": 0.6830012717253073, "grad_norm": 0.9528890211506568, "learning_rate": 4.821922435668631e-06, "loss": 0.9342, "step": 4028 }, { "epoch": 0.6831708350996185, "grad_norm": 1.00398965594739, "learning_rate": 4.817224575096564e-06, "loss": 0.9447, "step": 4029 }, { "epoch": 0.6833403984739296, "grad_norm": 0.9837162458501686, "learning_rate": 4.812528277924352e-06, "loss": 0.9289, "step": 4030 }, { "epoch": 0.6835099618482408, "grad_norm": 0.9027343515968713, "learning_rate": 4.807833545568645e-06, "loss": 0.8989, "step": 4031 }, { "epoch": 0.6836795252225519, "grad_norm": 0.9548087488370355, "learning_rate": 4.803140379445632e-06, "loss": 0.9549, "step": 4032 }, { "epoch": 0.6838490885968631, "grad_norm": 1.0098880536957924, "learning_rate": 4.798448780971013e-06, "loss": 0.9635, "step": 4033 }, { "epoch": 0.6840186519711742, "grad_norm": 0.9303449195787494, "learning_rate": 4.793758751560027e-06, "loss": 0.9243, "step": 4034 }, { "epoch": 0.6841882153454854, "grad_norm": 1.005782356068551, "learning_rate": 4.78907029262743e-06, "loss": 0.9655, "step": 4035 }, { "epoch": 0.6843577787197965, "grad_norm": 0.9410591862548884, "learning_rate": 4.7843834055875174e-06, "loss": 0.9415, "step": 4036 }, { "epoch": 0.6845273420941077, "grad_norm": 0.9331602591336416, "learning_rate": 4.779698091854098e-06, "loss": 0.9266, "step": 4037 }, { "epoch": 0.6846969054684188, "grad_norm": 0.9312350157191608, "learning_rate": 4.775014352840512e-06, "loss": 0.9085, "step": 4038 }, { "epoch": 0.68486646884273, "grad_norm": 0.9552761689816897, "learning_rate": 4.7703321899596245e-06, "loss": 0.9194, "step": 4039 }, { "epoch": 0.6850360322170411, "grad_norm": 1.0293540654836395, "learning_rate": 4.765651604623822e-06, "loss": 0.9643, "step": 4040 }, { "epoch": 0.6852055955913523, "grad_norm": 0.9597569051150278, "learning_rate": 4.7609725982450176e-06, "loss": 0.9128, "step": 4041 }, { "epoch": 0.6853751589656634, "grad_norm": 0.9779729649048823, "learning_rate": 4.7562951722346454e-06, "loss": 0.9863, "step": 4042 }, { "epoch": 0.6855447223399745, "grad_norm": 1.0350634625397963, "learning_rate": 4.75161932800367e-06, "loss": 0.9697, "step": 4043 }, { "epoch": 0.6857142857142857, "grad_norm": 1.0261218050435719, "learning_rate": 4.746945066962574e-06, "loss": 0.9943, "step": 4044 }, { "epoch": 0.6858838490885969, "grad_norm": 0.9667322065147755, "learning_rate": 4.742272390521354e-06, "loss": 0.9494, "step": 4045 }, { "epoch": 0.686053412462908, "grad_norm": 0.9977838482756628, "learning_rate": 4.7376013000895486e-06, "loss": 0.9483, "step": 4046 }, { "epoch": 0.6862229758372191, "grad_norm": 0.9700314488244718, "learning_rate": 4.7329317970762e-06, "loss": 0.9424, "step": 4047 }, { "epoch": 0.6863925392115303, "grad_norm": 0.9567437917274273, "learning_rate": 4.728263882889879e-06, "loss": 0.8974, "step": 4048 }, { "epoch": 0.6865621025858415, "grad_norm": 0.9334437037773666, "learning_rate": 4.7235975589386715e-06, "loss": 0.9099, "step": 4049 }, { "epoch": 0.6867316659601526, "grad_norm": 0.9219998381798292, "learning_rate": 4.718932826630197e-06, "loss": 0.9159, "step": 4050 }, { "epoch": 0.6869012293344637, "grad_norm": 0.9995395086385241, "learning_rate": 4.714269687371581e-06, "loss": 0.9386, "step": 4051 }, { "epoch": 0.6870707927087749, "grad_norm": 0.9716738882772417, "learning_rate": 4.709608142569474e-06, "loss": 0.96, "step": 4052 }, { "epoch": 0.6872403560830861, "grad_norm": 0.9522885810932386, "learning_rate": 4.704948193630041e-06, "loss": 0.9501, "step": 4053 }, { "epoch": 0.6874099194573972, "grad_norm": 0.9649104683705396, "learning_rate": 4.700289841958978e-06, "loss": 0.9394, "step": 4054 }, { "epoch": 0.6875794828317083, "grad_norm": 0.9791398478386767, "learning_rate": 4.695633088961487e-06, "loss": 0.9564, "step": 4055 }, { "epoch": 0.6877490462060195, "grad_norm": 0.9818424553511681, "learning_rate": 4.69097793604229e-06, "loss": 0.9345, "step": 4056 }, { "epoch": 0.6879186095803307, "grad_norm": 0.9552428815190235, "learning_rate": 4.686324384605629e-06, "loss": 0.9633, "step": 4057 }, { "epoch": 0.6880881729546418, "grad_norm": 0.952707154493505, "learning_rate": 4.681672436055264e-06, "loss": 0.9428, "step": 4058 }, { "epoch": 0.6882577363289529, "grad_norm": 0.9446346804687629, "learning_rate": 4.677022091794466e-06, "loss": 0.9384, "step": 4059 }, { "epoch": 0.6884272997032641, "grad_norm": 0.9500786158942087, "learning_rate": 4.672373353226023e-06, "loss": 0.9169, "step": 4060 }, { "epoch": 0.6885968630775753, "grad_norm": 0.9415731167708214, "learning_rate": 4.667726221752249e-06, "loss": 0.9384, "step": 4061 }, { "epoch": 0.6887664264518863, "grad_norm": 0.9247989773843499, "learning_rate": 4.66308069877496e-06, "loss": 0.9167, "step": 4062 }, { "epoch": 0.6889359898261975, "grad_norm": 0.9848331758397039, "learning_rate": 4.65843678569549e-06, "loss": 0.9381, "step": 4063 }, { "epoch": 0.6891055532005087, "grad_norm": 1.0036892412422784, "learning_rate": 4.653794483914696e-06, "loss": 0.9735, "step": 4064 }, { "epoch": 0.6892751165748199, "grad_norm": 0.9314602082204769, "learning_rate": 4.649153794832939e-06, "loss": 0.8917, "step": 4065 }, { "epoch": 0.689444679949131, "grad_norm": 0.9659070506247437, "learning_rate": 4.6445147198500965e-06, "loss": 0.9307, "step": 4066 }, { "epoch": 0.6896142433234421, "grad_norm": 0.9427510745367279, "learning_rate": 4.639877260365555e-06, "loss": 0.9347, "step": 4067 }, { "epoch": 0.6897838066977533, "grad_norm": 0.9690135435204741, "learning_rate": 4.6352414177782275e-06, "loss": 0.9507, "step": 4068 }, { "epoch": 0.6899533700720645, "grad_norm": 0.9586304638312999, "learning_rate": 4.630607193486525e-06, "loss": 0.9544, "step": 4069 }, { "epoch": 0.6901229334463755, "grad_norm": 0.9537173228723301, "learning_rate": 4.6259745888883715e-06, "loss": 0.9481, "step": 4070 }, { "epoch": 0.6902924968206867, "grad_norm": 1.0011604447237028, "learning_rate": 4.621343605381215e-06, "loss": 0.9253, "step": 4071 }, { "epoch": 0.6904620601949979, "grad_norm": 0.606128128513411, "learning_rate": 4.616714244361998e-06, "loss": 0.7541, "step": 4072 }, { "epoch": 0.6906316235693091, "grad_norm": 0.9916724970754378, "learning_rate": 4.612086507227186e-06, "loss": 0.9287, "step": 4073 }, { "epoch": 0.6908011869436201, "grad_norm": 0.9658308613972185, "learning_rate": 4.607460395372748e-06, "loss": 0.9046, "step": 4074 }, { "epoch": 0.6909707503179313, "grad_norm": 0.9496397497985856, "learning_rate": 4.602835910194165e-06, "loss": 0.9406, "step": 4075 }, { "epoch": 0.6911403136922425, "grad_norm": 0.9403396277053561, "learning_rate": 4.5982130530864246e-06, "loss": 0.9306, "step": 4076 }, { "epoch": 0.6913098770665537, "grad_norm": 1.0089561930508963, "learning_rate": 4.593591825444028e-06, "loss": 0.9829, "step": 4077 }, { "epoch": 0.6914794404408647, "grad_norm": 0.9573769049109759, "learning_rate": 4.588972228660978e-06, "loss": 0.9344, "step": 4078 }, { "epoch": 0.6916490038151759, "grad_norm": 0.9961316318289198, "learning_rate": 4.584354264130798e-06, "loss": 0.9705, "step": 4079 }, { "epoch": 0.6918185671894871, "grad_norm": 0.9538864166686905, "learning_rate": 4.579737933246507e-06, "loss": 0.9264, "step": 4080 }, { "epoch": 0.6919881305637983, "grad_norm": 1.0594052832439238, "learning_rate": 4.5751232374006304e-06, "loss": 0.9723, "step": 4081 }, { "epoch": 0.6921576939381093, "grad_norm": 1.0225462666165246, "learning_rate": 4.570510177985213e-06, "loss": 0.9365, "step": 4082 }, { "epoch": 0.6923272573124205, "grad_norm": 0.979872304974286, "learning_rate": 4.565898756391797e-06, "loss": 0.9765, "step": 4083 }, { "epoch": 0.6924968206867317, "grad_norm": 0.977345335866861, "learning_rate": 4.561288974011427e-06, "loss": 0.9771, "step": 4084 }, { "epoch": 0.6926663840610429, "grad_norm": 0.9432590713027864, "learning_rate": 4.556680832234657e-06, "loss": 0.919, "step": 4085 }, { "epoch": 0.6928359474353539, "grad_norm": 1.0003878223159666, "learning_rate": 4.552074332451554e-06, "loss": 0.9459, "step": 4086 }, { "epoch": 0.6930055108096651, "grad_norm": 0.9477586608085584, "learning_rate": 4.547469476051679e-06, "loss": 0.9342, "step": 4087 }, { "epoch": 0.6931750741839763, "grad_norm": 0.9344744856227825, "learning_rate": 4.5428662644240964e-06, "loss": 0.9044, "step": 4088 }, { "epoch": 0.6933446375582875, "grad_norm": 0.9543062096168511, "learning_rate": 4.538264698957387e-06, "loss": 0.9544, "step": 4089 }, { "epoch": 0.6935142009325985, "grad_norm": 0.9676621165038252, "learning_rate": 4.533664781039622e-06, "loss": 0.9116, "step": 4090 }, { "epoch": 0.6936837643069097, "grad_norm": 0.9934313644010448, "learning_rate": 4.529066512058381e-06, "loss": 0.9266, "step": 4091 }, { "epoch": 0.6938533276812209, "grad_norm": 0.9572559231416885, "learning_rate": 4.524469893400747e-06, "loss": 0.9238, "step": 4092 }, { "epoch": 0.694022891055532, "grad_norm": 1.0154507875892895, "learning_rate": 4.519874926453303e-06, "loss": 0.9527, "step": 4093 }, { "epoch": 0.6941924544298431, "grad_norm": 0.962893146822662, "learning_rate": 4.515281612602134e-06, "loss": 0.9439, "step": 4094 }, { "epoch": 0.6943620178041543, "grad_norm": 0.9918397434038865, "learning_rate": 4.5106899532328275e-06, "loss": 0.9712, "step": 4095 }, { "epoch": 0.6945315811784655, "grad_norm": 0.9742779924742221, "learning_rate": 4.506099949730468e-06, "loss": 0.9478, "step": 4096 }, { "epoch": 0.6947011445527767, "grad_norm": 0.9533009545609286, "learning_rate": 4.501511603479653e-06, "loss": 0.9148, "step": 4097 }, { "epoch": 0.6948707079270877, "grad_norm": 1.0110241294092543, "learning_rate": 4.496924915864463e-06, "loss": 0.9688, "step": 4098 }, { "epoch": 0.6950402713013989, "grad_norm": 0.9731660788504265, "learning_rate": 4.492339888268486e-06, "loss": 0.9285, "step": 4099 }, { "epoch": 0.6952098346757101, "grad_norm": 0.9988175650309414, "learning_rate": 4.487756522074815e-06, "loss": 0.9361, "step": 4100 }, { "epoch": 0.6953793980500212, "grad_norm": 1.0374573746225664, "learning_rate": 4.483174818666034e-06, "loss": 0.9341, "step": 4101 }, { "epoch": 0.6955489614243323, "grad_norm": 0.9951237006938043, "learning_rate": 4.478594779424227e-06, "loss": 0.9262, "step": 4102 }, { "epoch": 0.6957185247986435, "grad_norm": 0.9948782227176328, "learning_rate": 4.474016405730973e-06, "loss": 0.9455, "step": 4103 }, { "epoch": 0.6958880881729547, "grad_norm": 0.9689389842245486, "learning_rate": 4.46943969896736e-06, "loss": 0.8939, "step": 4104 }, { "epoch": 0.6960576515472658, "grad_norm": 0.9022908704420627, "learning_rate": 4.4648646605139605e-06, "loss": 0.9229, "step": 4105 }, { "epoch": 0.6962272149215769, "grad_norm": 0.9716613458005243, "learning_rate": 4.460291291750846e-06, "loss": 0.9393, "step": 4106 }, { "epoch": 0.6963967782958881, "grad_norm": 0.9643208714797908, "learning_rate": 4.455719594057594e-06, "loss": 0.9213, "step": 4107 }, { "epoch": 0.6965663416701993, "grad_norm": 0.960324433673699, "learning_rate": 4.4511495688132675e-06, "loss": 0.9324, "step": 4108 }, { "epoch": 0.6967359050445104, "grad_norm": 0.9878668451522663, "learning_rate": 4.446581217396428e-06, "loss": 0.9902, "step": 4109 }, { "epoch": 0.6969054684188215, "grad_norm": 0.640666123070321, "learning_rate": 4.4420145411851336e-06, "loss": 0.7824, "step": 4110 }, { "epoch": 0.6970750317931327, "grad_norm": 1.0478185175607173, "learning_rate": 4.4374495415569344e-06, "loss": 0.9648, "step": 4111 }, { "epoch": 0.6972445951674439, "grad_norm": 0.945031422944879, "learning_rate": 4.432886219888877e-06, "loss": 0.9043, "step": 4112 }, { "epoch": 0.697414158541755, "grad_norm": 0.9934272627783942, "learning_rate": 4.428324577557501e-06, "loss": 0.9643, "step": 4113 }, { "epoch": 0.6975837219160661, "grad_norm": 0.9352222250679258, "learning_rate": 4.423764615938837e-06, "loss": 0.9139, "step": 4114 }, { "epoch": 0.6977532852903773, "grad_norm": 1.0129476589521536, "learning_rate": 4.419206336408418e-06, "loss": 0.9602, "step": 4115 }, { "epoch": 0.6979228486646885, "grad_norm": 1.0157424247962692, "learning_rate": 4.414649740341258e-06, "loss": 0.9897, "step": 4116 }, { "epoch": 0.6980924120389995, "grad_norm": 0.9589930630789485, "learning_rate": 4.410094829111865e-06, "loss": 0.9466, "step": 4117 }, { "epoch": 0.6982619754133107, "grad_norm": 0.9687373417271908, "learning_rate": 4.405541604094249e-06, "loss": 0.9523, "step": 4118 }, { "epoch": 0.6984315387876219, "grad_norm": 0.923766337216128, "learning_rate": 4.400990066661901e-06, "loss": 0.923, "step": 4119 }, { "epoch": 0.698601102161933, "grad_norm": 0.9967661891888636, "learning_rate": 4.396440218187805e-06, "loss": 0.9403, "step": 4120 }, { "epoch": 0.6987706655362441, "grad_norm": 0.9545476718469557, "learning_rate": 4.391892060044435e-06, "loss": 0.9319, "step": 4121 }, { "epoch": 0.6989402289105553, "grad_norm": 0.9832085814697785, "learning_rate": 4.387345593603761e-06, "loss": 0.9283, "step": 4122 }, { "epoch": 0.6991097922848665, "grad_norm": 0.9739904929116864, "learning_rate": 4.382800820237236e-06, "loss": 0.9331, "step": 4123 }, { "epoch": 0.6992793556591776, "grad_norm": 1.0174371909615483, "learning_rate": 4.378257741315801e-06, "loss": 0.9467, "step": 4124 }, { "epoch": 0.6994489190334887, "grad_norm": 0.9791070899328154, "learning_rate": 4.373716358209898e-06, "loss": 0.9342, "step": 4125 }, { "epoch": 0.6996184824077999, "grad_norm": 0.947486344414689, "learning_rate": 4.3691766722894435e-06, "loss": 0.9343, "step": 4126 }, { "epoch": 0.6997880457821111, "grad_norm": 1.057983954370814, "learning_rate": 4.364638684923848e-06, "loss": 0.9316, "step": 4127 }, { "epoch": 0.6999576091564222, "grad_norm": 0.9518314758532409, "learning_rate": 4.360102397482008e-06, "loss": 0.9521, "step": 4128 }, { "epoch": 0.7001271725307333, "grad_norm": 1.0085011926152079, "learning_rate": 4.355567811332311e-06, "loss": 0.9575, "step": 4129 }, { "epoch": 0.7002967359050445, "grad_norm": 0.9389350583902506, "learning_rate": 4.3510349278426255e-06, "loss": 0.9143, "step": 4130 }, { "epoch": 0.7004662992793557, "grad_norm": 0.9655126671833357, "learning_rate": 4.346503748380312e-06, "loss": 0.9268, "step": 4131 }, { "epoch": 0.7006358626536668, "grad_norm": 0.9671727341603766, "learning_rate": 4.341974274312211e-06, "loss": 0.9534, "step": 4132 }, { "epoch": 0.7008054260279779, "grad_norm": 1.0177605408277712, "learning_rate": 4.337446507004656e-06, "loss": 0.9637, "step": 4133 }, { "epoch": 0.7009749894022891, "grad_norm": 0.9694668859123609, "learning_rate": 4.332920447823461e-06, "loss": 0.9556, "step": 4134 }, { "epoch": 0.7011445527766003, "grad_norm": 0.939842372498322, "learning_rate": 4.328396098133921e-06, "loss": 0.9304, "step": 4135 }, { "epoch": 0.7013141161509114, "grad_norm": 0.9698516766666061, "learning_rate": 4.323873459300827e-06, "loss": 0.9428, "step": 4136 }, { "epoch": 0.7014836795252225, "grad_norm": 1.0131866026257155, "learning_rate": 4.319352532688444e-06, "loss": 0.9612, "step": 4137 }, { "epoch": 0.7016532428995337, "grad_norm": 0.9213948798612626, "learning_rate": 4.3148333196605205e-06, "loss": 0.8951, "step": 4138 }, { "epoch": 0.7018228062738449, "grad_norm": 0.9898315603593233, "learning_rate": 4.31031582158029e-06, "loss": 0.9471, "step": 4139 }, { "epoch": 0.701992369648156, "grad_norm": 1.0281437925468315, "learning_rate": 4.305800039810475e-06, "loss": 0.9685, "step": 4140 }, { "epoch": 0.7021619330224671, "grad_norm": 0.9653305196770438, "learning_rate": 4.3012859757132715e-06, "loss": 0.9578, "step": 4141 }, { "epoch": 0.7023314963967783, "grad_norm": 0.9486425375680066, "learning_rate": 4.296773630650358e-06, "loss": 0.9582, "step": 4142 }, { "epoch": 0.7025010597710895, "grad_norm": 0.9369857148552346, "learning_rate": 4.292263005982903e-06, "loss": 0.9233, "step": 4143 }, { "epoch": 0.7026706231454006, "grad_norm": 0.9555449712846975, "learning_rate": 4.287754103071545e-06, "loss": 0.9455, "step": 4144 }, { "epoch": 0.7028401865197117, "grad_norm": 0.8779842152695021, "learning_rate": 4.283246923276411e-06, "loss": 0.9273, "step": 4145 }, { "epoch": 0.7030097498940229, "grad_norm": 0.9494631264981629, "learning_rate": 4.278741467957105e-06, "loss": 0.9258, "step": 4146 }, { "epoch": 0.703179313268334, "grad_norm": 0.9620081558373502, "learning_rate": 4.2742377384727104e-06, "loss": 0.9199, "step": 4147 }, { "epoch": 0.7033488766426452, "grad_norm": 1.0172654363224458, "learning_rate": 4.26973573618179e-06, "loss": 0.9502, "step": 4148 }, { "epoch": 0.7035184400169563, "grad_norm": 0.9643114529200683, "learning_rate": 4.265235462442389e-06, "loss": 0.921, "step": 4149 }, { "epoch": 0.7036880033912675, "grad_norm": 0.9698599128661651, "learning_rate": 4.26073691861202e-06, "loss": 0.9361, "step": 4150 }, { "epoch": 0.7038575667655786, "grad_norm": 0.9616511043361519, "learning_rate": 4.256240106047695e-06, "loss": 0.9246, "step": 4151 }, { "epoch": 0.7040271301398898, "grad_norm": 1.0370304485116948, "learning_rate": 4.251745026105886e-06, "loss": 0.9334, "step": 4152 }, { "epoch": 0.7041966935142009, "grad_norm": 0.6341604999148488, "learning_rate": 4.247251680142542e-06, "loss": 0.7943, "step": 4153 }, { "epoch": 0.7043662568885121, "grad_norm": 0.9366874837174215, "learning_rate": 4.242760069513103e-06, "loss": 0.9024, "step": 4154 }, { "epoch": 0.7045358202628232, "grad_norm": 0.931439143836383, "learning_rate": 4.2382701955724724e-06, "loss": 0.9525, "step": 4155 }, { "epoch": 0.7047053836371344, "grad_norm": 0.9322017515907924, "learning_rate": 4.2337820596750356e-06, "loss": 0.9011, "step": 4156 }, { "epoch": 0.7048749470114455, "grad_norm": 0.9437749659761931, "learning_rate": 4.2292956631746475e-06, "loss": 0.9476, "step": 4157 }, { "epoch": 0.7050445103857567, "grad_norm": 0.9792416037230323, "learning_rate": 4.224811007424651e-06, "loss": 0.9594, "step": 4158 }, { "epoch": 0.7052140737600678, "grad_norm": 0.9445075750706137, "learning_rate": 4.220328093777851e-06, "loss": 0.9207, "step": 4159 }, { "epoch": 0.705383637134379, "grad_norm": 0.9763483957570636, "learning_rate": 4.215846923586531e-06, "loss": 0.9357, "step": 4160 }, { "epoch": 0.7055532005086901, "grad_norm": 0.9728893824852156, "learning_rate": 4.211367498202456e-06, "loss": 0.9892, "step": 4161 }, { "epoch": 0.7057227638830013, "grad_norm": 0.9674165888196323, "learning_rate": 4.206889818976852e-06, "loss": 0.947, "step": 4162 }, { "epoch": 0.7058923272573124, "grad_norm": 1.1758764590361632, "learning_rate": 4.202413887260427e-06, "loss": 0.876, "step": 4163 }, { "epoch": 0.7060618906316236, "grad_norm": 0.9167617302696355, "learning_rate": 4.197939704403359e-06, "loss": 0.9004, "step": 4164 }, { "epoch": 0.7062314540059347, "grad_norm": 0.9766285525720196, "learning_rate": 4.1934672717552986e-06, "loss": 0.9459, "step": 4165 }, { "epoch": 0.7064010173802459, "grad_norm": 0.9314903296113407, "learning_rate": 4.188996590665369e-06, "loss": 0.9081, "step": 4166 }, { "epoch": 0.706570580754557, "grad_norm": 0.9419043377450395, "learning_rate": 4.184527662482158e-06, "loss": 0.9376, "step": 4167 }, { "epoch": 0.7067401441288682, "grad_norm": 0.622738579444429, "learning_rate": 4.180060488553743e-06, "loss": 0.779, "step": 4168 }, { "epoch": 0.7069097075031793, "grad_norm": 1.0304125084468243, "learning_rate": 4.175595070227655e-06, "loss": 0.9695, "step": 4169 }, { "epoch": 0.7070792708774905, "grad_norm": 0.9758563592519464, "learning_rate": 4.171131408850901e-06, "loss": 0.9463, "step": 4170 }, { "epoch": 0.7072488342518016, "grad_norm": 1.0184915596143123, "learning_rate": 4.166669505769954e-06, "loss": 0.9802, "step": 4171 }, { "epoch": 0.7074183976261128, "grad_norm": 0.980196598961895, "learning_rate": 4.162209362330767e-06, "loss": 0.925, "step": 4172 }, { "epoch": 0.7075879610004239, "grad_norm": 0.9800256385418338, "learning_rate": 4.157750979878753e-06, "loss": 0.9414, "step": 4173 }, { "epoch": 0.707757524374735, "grad_norm": 0.9418468210967157, "learning_rate": 4.153294359758797e-06, "loss": 0.9636, "step": 4174 }, { "epoch": 0.7079270877490462, "grad_norm": 0.9314265038957935, "learning_rate": 4.1488395033152485e-06, "loss": 0.9554, "step": 4175 }, { "epoch": 0.7080966511233574, "grad_norm": 0.9444662071662503, "learning_rate": 4.144386411891934e-06, "loss": 0.9226, "step": 4176 }, { "epoch": 0.7082662144976685, "grad_norm": 1.0135840109264243, "learning_rate": 4.13993508683214e-06, "loss": 0.9648, "step": 4177 }, { "epoch": 0.7084357778719796, "grad_norm": 0.9530315918562487, "learning_rate": 4.135485529478618e-06, "loss": 0.9318, "step": 4178 }, { "epoch": 0.7086053412462908, "grad_norm": 0.9359676306992604, "learning_rate": 4.131037741173597e-06, "loss": 0.9474, "step": 4179 }, { "epoch": 0.708774904620602, "grad_norm": 0.9746231410435219, "learning_rate": 4.126591723258763e-06, "loss": 0.9373, "step": 4180 }, { "epoch": 0.7089444679949131, "grad_norm": 0.97016522873043, "learning_rate": 4.12214747707527e-06, "loss": 0.9626, "step": 4181 }, { "epoch": 0.7091140313692242, "grad_norm": 0.9463430623400254, "learning_rate": 4.117705003963739e-06, "loss": 0.9213, "step": 4182 }, { "epoch": 0.7092835947435354, "grad_norm": 0.9994084124631185, "learning_rate": 4.113264305264254e-06, "loss": 0.951, "step": 4183 }, { "epoch": 0.7094531581178466, "grad_norm": 0.9683313041858357, "learning_rate": 4.108825382316368e-06, "loss": 0.9449, "step": 4184 }, { "epoch": 0.7096227214921577, "grad_norm": 0.9685371183679886, "learning_rate": 4.1043882364590895e-06, "loss": 0.8995, "step": 4185 }, { "epoch": 0.7097922848664688, "grad_norm": 0.9210548612970206, "learning_rate": 4.099952869030905e-06, "loss": 0.9282, "step": 4186 }, { "epoch": 0.70996184824078, "grad_norm": 0.9859523775194875, "learning_rate": 4.095519281369752e-06, "loss": 0.9412, "step": 4187 }, { "epoch": 0.7101314116150912, "grad_norm": 0.954357388180877, "learning_rate": 4.091087474813037e-06, "loss": 0.9674, "step": 4188 }, { "epoch": 0.7103009749894023, "grad_norm": 0.9756277134438451, "learning_rate": 4.086657450697623e-06, "loss": 0.937, "step": 4189 }, { "epoch": 0.7104705383637134, "grad_norm": 0.946612553294586, "learning_rate": 4.082229210359848e-06, "loss": 0.9051, "step": 4190 }, { "epoch": 0.7106401017380246, "grad_norm": 1.001286330392442, "learning_rate": 4.077802755135501e-06, "loss": 0.9782, "step": 4191 }, { "epoch": 0.7108096651123358, "grad_norm": 0.9896694019979584, "learning_rate": 4.073378086359834e-06, "loss": 0.9254, "step": 4192 }, { "epoch": 0.7109792284866469, "grad_norm": 1.028395901818606, "learning_rate": 4.068955205367559e-06, "loss": 0.9152, "step": 4193 }, { "epoch": 0.711148791860958, "grad_norm": 0.9484471453245332, "learning_rate": 4.064534113492861e-06, "loss": 0.9222, "step": 4194 }, { "epoch": 0.7113183552352692, "grad_norm": 0.9737297671516091, "learning_rate": 4.060114812069367e-06, "loss": 0.9466, "step": 4195 }, { "epoch": 0.7114879186095804, "grad_norm": 0.9269985021001064, "learning_rate": 4.055697302430173e-06, "loss": 0.9091, "step": 4196 }, { "epoch": 0.7116574819838914, "grad_norm": 0.9585716454557712, "learning_rate": 4.051281585907841e-06, "loss": 0.9549, "step": 4197 }, { "epoch": 0.7118270453582026, "grad_norm": 0.9870018512553761, "learning_rate": 4.0468676638343786e-06, "loss": 0.914, "step": 4198 }, { "epoch": 0.7119966087325138, "grad_norm": 0.9602709908372734, "learning_rate": 4.0424555375412615e-06, "loss": 0.9166, "step": 4199 }, { "epoch": 0.712166172106825, "grad_norm": 0.9879011009436877, "learning_rate": 4.038045208359421e-06, "loss": 0.9411, "step": 4200 }, { "epoch": 0.712335735481136, "grad_norm": 1.0062119932861266, "learning_rate": 4.033636677619242e-06, "loss": 0.9554, "step": 4201 }, { "epoch": 0.7125052988554472, "grad_norm": 0.981331343143804, "learning_rate": 4.029229946650577e-06, "loss": 0.9276, "step": 4202 }, { "epoch": 0.7126748622297584, "grad_norm": 0.9623811537878895, "learning_rate": 4.024825016782727e-06, "loss": 0.9332, "step": 4203 }, { "epoch": 0.7128444256040696, "grad_norm": 0.9574647682292474, "learning_rate": 4.020421889344455e-06, "loss": 0.9175, "step": 4204 }, { "epoch": 0.7130139889783806, "grad_norm": 0.9694909974894934, "learning_rate": 4.016020565663974e-06, "loss": 0.9166, "step": 4205 }, { "epoch": 0.7131835523526918, "grad_norm": 1.0140401340404703, "learning_rate": 4.0116210470689574e-06, "loss": 0.9369, "step": 4206 }, { "epoch": 0.713353115727003, "grad_norm": 0.9447651807114108, "learning_rate": 4.007223334886531e-06, "loss": 0.9214, "step": 4207 }, { "epoch": 0.7135226791013141, "grad_norm": 0.9646063531442947, "learning_rate": 4.002827430443284e-06, "loss": 0.9394, "step": 4208 }, { "epoch": 0.7136922424756252, "grad_norm": 0.9181795079791082, "learning_rate": 3.998433335065251e-06, "loss": 0.9082, "step": 4209 }, { "epoch": 0.7138618058499364, "grad_norm": 1.015424388106665, "learning_rate": 3.994041050077925e-06, "loss": 0.9399, "step": 4210 }, { "epoch": 0.7140313692242476, "grad_norm": 0.9686567633449988, "learning_rate": 3.989650576806246e-06, "loss": 0.9188, "step": 4211 }, { "epoch": 0.7142009325985587, "grad_norm": 0.9561042222925232, "learning_rate": 3.985261916574624e-06, "loss": 0.9397, "step": 4212 }, { "epoch": 0.7143704959728698, "grad_norm": 0.9817965420791707, "learning_rate": 3.980875070706906e-06, "loss": 0.9324, "step": 4213 }, { "epoch": 0.714540059347181, "grad_norm": 0.9603639806927207, "learning_rate": 3.976490040526394e-06, "loss": 0.8832, "step": 4214 }, { "epoch": 0.7147096227214922, "grad_norm": 1.006585295375615, "learning_rate": 3.972106827355852e-06, "loss": 0.9144, "step": 4215 }, { "epoch": 0.7148791860958033, "grad_norm": 0.9635161966734009, "learning_rate": 3.967725432517487e-06, "loss": 0.9352, "step": 4216 }, { "epoch": 0.7150487494701144, "grad_norm": 0.9530313424107987, "learning_rate": 3.96334585733296e-06, "loss": 0.9361, "step": 4217 }, { "epoch": 0.7152183128444256, "grad_norm": 0.9541843397745873, "learning_rate": 3.958968103123379e-06, "loss": 0.9343, "step": 4218 }, { "epoch": 0.7153878762187368, "grad_norm": 1.0136209810855443, "learning_rate": 3.954592171209314e-06, "loss": 0.9332, "step": 4219 }, { "epoch": 0.7155574395930479, "grad_norm": 0.9610256292743388, "learning_rate": 3.950218062910776e-06, "loss": 0.9273, "step": 4220 }, { "epoch": 0.715727002967359, "grad_norm": 0.9281779762621568, "learning_rate": 3.9458457795472245e-06, "loss": 0.9004, "step": 4221 }, { "epoch": 0.7158965663416702, "grad_norm": 0.968142929073176, "learning_rate": 3.941475322437574e-06, "loss": 0.9198, "step": 4222 }, { "epoch": 0.7160661297159814, "grad_norm": 1.0029503939820588, "learning_rate": 3.937106692900188e-06, "loss": 0.9197, "step": 4223 }, { "epoch": 0.7162356930902924, "grad_norm": 0.9302128805116311, "learning_rate": 3.932739892252875e-06, "loss": 0.9122, "step": 4224 }, { "epoch": 0.7164052564646036, "grad_norm": 0.978723942302488, "learning_rate": 3.9283749218128885e-06, "loss": 0.9306, "step": 4225 }, { "epoch": 0.7165748198389148, "grad_norm": 1.01041538109347, "learning_rate": 3.924011782896944e-06, "loss": 0.993, "step": 4226 }, { "epoch": 0.716744383213226, "grad_norm": 0.955139527247398, "learning_rate": 3.919650476821192e-06, "loss": 0.9292, "step": 4227 }, { "epoch": 0.716913946587537, "grad_norm": 0.95086886340547, "learning_rate": 3.9152910049012325e-06, "loss": 0.9398, "step": 4228 }, { "epoch": 0.7170835099618482, "grad_norm": 0.9678073358056275, "learning_rate": 3.910933368452112e-06, "loss": 0.9143, "step": 4229 }, { "epoch": 0.7172530733361594, "grad_norm": 1.0046590240206252, "learning_rate": 3.906577568788329e-06, "loss": 0.943, "step": 4230 }, { "epoch": 0.7174226367104706, "grad_norm": 0.9517738882754091, "learning_rate": 3.902223607223822e-06, "loss": 0.9481, "step": 4231 }, { "epoch": 0.7175922000847816, "grad_norm": 0.9879663326320745, "learning_rate": 3.897871485071973e-06, "loss": 0.948, "step": 4232 }, { "epoch": 0.7177617634590928, "grad_norm": 0.9564776765823783, "learning_rate": 3.893521203645618e-06, "loss": 0.9234, "step": 4233 }, { "epoch": 0.717931326833404, "grad_norm": 0.90095485209741, "learning_rate": 3.889172764257032e-06, "loss": 0.9339, "step": 4234 }, { "epoch": 0.7181008902077152, "grad_norm": 0.9469264043542964, "learning_rate": 3.884826168217932e-06, "loss": 0.9494, "step": 4235 }, { "epoch": 0.7182704535820262, "grad_norm": 0.9930893579923585, "learning_rate": 3.88048141683948e-06, "loss": 0.9401, "step": 4236 }, { "epoch": 0.7184400169563374, "grad_norm": 0.9515428483684868, "learning_rate": 3.8761385114322905e-06, "loss": 0.9515, "step": 4237 }, { "epoch": 0.7186095803306486, "grad_norm": 1.013039131286697, "learning_rate": 3.87179745330641e-06, "loss": 0.9402, "step": 4238 }, { "epoch": 0.7187791437049598, "grad_norm": 0.9325257726817211, "learning_rate": 3.86745824377133e-06, "loss": 0.9082, "step": 4239 }, { "epoch": 0.7189487070792708, "grad_norm": 1.0064843016241667, "learning_rate": 3.8631208841359906e-06, "loss": 0.9828, "step": 4240 }, { "epoch": 0.719118270453582, "grad_norm": 0.9456348616024043, "learning_rate": 3.858785375708764e-06, "loss": 0.9031, "step": 4241 }, { "epoch": 0.7192878338278932, "grad_norm": 0.9586906255157827, "learning_rate": 3.854451719797474e-06, "loss": 0.9074, "step": 4242 }, { "epoch": 0.7194573972022044, "grad_norm": 0.9476220560140591, "learning_rate": 3.850119917709375e-06, "loss": 0.961, "step": 4243 }, { "epoch": 0.7196269605765154, "grad_norm": 0.9793403632134153, "learning_rate": 3.845789970751177e-06, "loss": 0.9512, "step": 4244 }, { "epoch": 0.7197965239508266, "grad_norm": 1.0151291146686052, "learning_rate": 3.841461880229016e-06, "loss": 0.9393, "step": 4245 }, { "epoch": 0.7199660873251378, "grad_norm": 1.0175045411665666, "learning_rate": 3.837135647448475e-06, "loss": 0.9264, "step": 4246 }, { "epoch": 0.720135650699449, "grad_norm": 0.9579048523983019, "learning_rate": 3.832811273714569e-06, "loss": 0.9403, "step": 4247 }, { "epoch": 0.72030521407376, "grad_norm": 0.9307016658230239, "learning_rate": 3.82848876033177e-06, "loss": 0.886, "step": 4248 }, { "epoch": 0.7204747774480712, "grad_norm": 0.9564840745789503, "learning_rate": 3.824168108603971e-06, "loss": 0.9466, "step": 4249 }, { "epoch": 0.7206443408223824, "grad_norm": 1.0285704958044026, "learning_rate": 3.8198493198345054e-06, "loss": 0.9437, "step": 4250 }, { "epoch": 0.7208139041966936, "grad_norm": 0.9742554495708574, "learning_rate": 3.815532395326157e-06, "loss": 0.9629, "step": 4251 }, { "epoch": 0.7209834675710046, "grad_norm": 0.9520880060031638, "learning_rate": 3.8112173363811367e-06, "loss": 0.9216, "step": 4252 }, { "epoch": 0.7211530309453158, "grad_norm": 1.0469652022247202, "learning_rate": 3.8069041443010924e-06, "loss": 0.9702, "step": 4253 }, { "epoch": 0.721322594319627, "grad_norm": 0.9560958643027575, "learning_rate": 3.802592820387111e-06, "loss": 0.9403, "step": 4254 }, { "epoch": 0.7214921576939382, "grad_norm": 0.9735100429242013, "learning_rate": 3.79828336593972e-06, "loss": 0.931, "step": 4255 }, { "epoch": 0.7216617210682492, "grad_norm": 0.9420872246822782, "learning_rate": 3.7939757822588796e-06, "loss": 0.9566, "step": 4256 }, { "epoch": 0.7218312844425604, "grad_norm": 0.9563350353440808, "learning_rate": 3.7896700706439826e-06, "loss": 0.9049, "step": 4257 }, { "epoch": 0.7220008478168716, "grad_norm": 0.9483657701006762, "learning_rate": 3.785366232393861e-06, "loss": 0.9272, "step": 4258 }, { "epoch": 0.7221704111911827, "grad_norm": 0.9590795564816833, "learning_rate": 3.78106426880678e-06, "loss": 0.9078, "step": 4259 }, { "epoch": 0.7223399745654938, "grad_norm": 0.9680953571066286, "learning_rate": 3.7767641811804413e-06, "loss": 0.9159, "step": 4260 }, { "epoch": 0.722509537939805, "grad_norm": 0.937566607576362, "learning_rate": 3.7724659708119737e-06, "loss": 0.9183, "step": 4261 }, { "epoch": 0.7226791013141162, "grad_norm": 0.9755092747352829, "learning_rate": 3.768169638997954e-06, "loss": 0.9327, "step": 4262 }, { "epoch": 0.7228486646884273, "grad_norm": 1.0074563321805, "learning_rate": 3.76387518703438e-06, "loss": 0.9743, "step": 4263 }, { "epoch": 0.7230182280627384, "grad_norm": 0.9590026076275152, "learning_rate": 3.7595826162166816e-06, "loss": 0.9225, "step": 4264 }, { "epoch": 0.7231877914370496, "grad_norm": 0.9419879959385101, "learning_rate": 3.7552919278397335e-06, "loss": 0.9102, "step": 4265 }, { "epoch": 0.7233573548113608, "grad_norm": 0.9767160352881189, "learning_rate": 3.7510031231978328e-06, "loss": 0.9281, "step": 4266 }, { "epoch": 0.7235269181856719, "grad_norm": 0.9686765520598654, "learning_rate": 3.746716203584707e-06, "loss": 0.9289, "step": 4267 }, { "epoch": 0.723696481559983, "grad_norm": 0.9943695123512349, "learning_rate": 3.742431170293517e-06, "loss": 0.9585, "step": 4268 }, { "epoch": 0.7238660449342942, "grad_norm": 1.0322719004446468, "learning_rate": 3.738148024616863e-06, "loss": 0.9282, "step": 4269 }, { "epoch": 0.7240356083086054, "grad_norm": 0.9785186357851025, "learning_rate": 3.7338667678467642e-06, "loss": 0.904, "step": 4270 }, { "epoch": 0.7242051716829165, "grad_norm": 0.918256418346633, "learning_rate": 3.729587401274677e-06, "loss": 0.9125, "step": 4271 }, { "epoch": 0.7243747350572276, "grad_norm": 0.9468923985214425, "learning_rate": 3.7253099261914794e-06, "loss": 0.9429, "step": 4272 }, { "epoch": 0.7245442984315388, "grad_norm": 0.948710373599677, "learning_rate": 3.7210343438874917e-06, "loss": 0.9176, "step": 4273 }, { "epoch": 0.72471386180585, "grad_norm": 0.9867316650313858, "learning_rate": 3.7167606556524536e-06, "loss": 0.9773, "step": 4274 }, { "epoch": 0.7248834251801611, "grad_norm": 1.005742161312537, "learning_rate": 3.7124888627755375e-06, "loss": 0.9656, "step": 4275 }, { "epoch": 0.7250529885544722, "grad_norm": 1.0428600549960652, "learning_rate": 3.7082189665453396e-06, "loss": 0.9733, "step": 4276 }, { "epoch": 0.7252225519287834, "grad_norm": 0.9329611709680067, "learning_rate": 3.7039509682498887e-06, "loss": 0.8863, "step": 4277 }, { "epoch": 0.7253921153030946, "grad_norm": 0.9162859928138347, "learning_rate": 3.69968486917664e-06, "loss": 0.9022, "step": 4278 }, { "epoch": 0.7255616786774057, "grad_norm": 0.9246200974283728, "learning_rate": 3.6954206706124697e-06, "loss": 0.9187, "step": 4279 }, { "epoch": 0.7257312420517168, "grad_norm": 0.9409091019944441, "learning_rate": 3.691158373843694e-06, "loss": 0.9192, "step": 4280 }, { "epoch": 0.725900805426028, "grad_norm": 0.9543625791461442, "learning_rate": 3.6868979801560443e-06, "loss": 0.9281, "step": 4281 }, { "epoch": 0.7260703688003391, "grad_norm": 0.9635825528069123, "learning_rate": 3.6826394908346786e-06, "loss": 0.9177, "step": 4282 }, { "epoch": 0.7262399321746503, "grad_norm": 0.9549329906780929, "learning_rate": 3.6783829071641886e-06, "loss": 0.945, "step": 4283 }, { "epoch": 0.7264094955489614, "grad_norm": 0.9698765301267126, "learning_rate": 3.674128230428583e-06, "loss": 0.9583, "step": 4284 }, { "epoch": 0.7265790589232726, "grad_norm": 0.9890742037360992, "learning_rate": 3.6698754619112974e-06, "loss": 0.9066, "step": 4285 }, { "epoch": 0.7267486222975837, "grad_norm": 0.6041378698757675, "learning_rate": 3.6656246028951904e-06, "loss": 0.728, "step": 4286 }, { "epoch": 0.7269181856718949, "grad_norm": 0.9745715840706757, "learning_rate": 3.6613756546625502e-06, "loss": 0.9257, "step": 4287 }, { "epoch": 0.727087749046206, "grad_norm": 0.963416366150616, "learning_rate": 3.657128618495084e-06, "loss": 0.9704, "step": 4288 }, { "epoch": 0.7272573124205172, "grad_norm": 0.9454908742405014, "learning_rate": 3.6528834956739224e-06, "loss": 0.9664, "step": 4289 }, { "epoch": 0.7274268757948283, "grad_norm": 0.9834697268432052, "learning_rate": 3.6486402874796157e-06, "loss": 0.9351, "step": 4290 }, { "epoch": 0.7275964391691395, "grad_norm": 0.9418158876763204, "learning_rate": 3.6443989951921478e-06, "loss": 0.9346, "step": 4291 }, { "epoch": 0.7277660025434506, "grad_norm": 0.9822541764143594, "learning_rate": 3.640159620090913e-06, "loss": 0.9398, "step": 4292 }, { "epoch": 0.7279355659177618, "grad_norm": 0.9846264360581913, "learning_rate": 3.6359221634547324e-06, "loss": 0.9181, "step": 4293 }, { "epoch": 0.7281051292920729, "grad_norm": 0.9562120488176881, "learning_rate": 3.631686626561849e-06, "loss": 0.9292, "step": 4294 }, { "epoch": 0.7282746926663841, "grad_norm": 1.0002723048846975, "learning_rate": 3.627453010689922e-06, "loss": 0.943, "step": 4295 }, { "epoch": 0.7284442560406952, "grad_norm": 0.9766585566785402, "learning_rate": 3.6232213171160368e-06, "loss": 0.9081, "step": 4296 }, { "epoch": 0.7286138194150064, "grad_norm": 0.9863184771995902, "learning_rate": 3.6189915471166927e-06, "loss": 0.9567, "step": 4297 }, { "epoch": 0.7287833827893175, "grad_norm": 0.9549767986374746, "learning_rate": 3.6147637019678195e-06, "loss": 0.9122, "step": 4298 }, { "epoch": 0.7289529461636286, "grad_norm": 0.9023533776943351, "learning_rate": 3.610537782944755e-06, "loss": 0.9252, "step": 4299 }, { "epoch": 0.7291225095379398, "grad_norm": 0.9435239300913615, "learning_rate": 3.6063137913222578e-06, "loss": 0.9172, "step": 4300 }, { "epoch": 0.729292072912251, "grad_norm": 1.0021638431132271, "learning_rate": 3.602091728374515e-06, "loss": 0.9109, "step": 4301 }, { "epoch": 0.7294616362865621, "grad_norm": 0.9681790969081856, "learning_rate": 3.5978715953751207e-06, "loss": 0.9167, "step": 4302 }, { "epoch": 0.7296311996608732, "grad_norm": 0.9625817052513054, "learning_rate": 3.5936533935970907e-06, "loss": 0.9039, "step": 4303 }, { "epoch": 0.7298007630351844, "grad_norm": 1.007769359857645, "learning_rate": 3.5894371243128557e-06, "loss": 0.9442, "step": 4304 }, { "epoch": 0.7299703264094956, "grad_norm": 1.002781608466404, "learning_rate": 3.5852227887942713e-06, "loss": 0.8953, "step": 4305 }, { "epoch": 0.7301398897838067, "grad_norm": 0.9180157018003722, "learning_rate": 3.5810103883126023e-06, "loss": 0.8952, "step": 4306 }, { "epoch": 0.7303094531581178, "grad_norm": 1.017691205164924, "learning_rate": 3.576799924138532e-06, "loss": 0.8931, "step": 4307 }, { "epoch": 0.730479016532429, "grad_norm": 0.9558936845782625, "learning_rate": 3.5725913975421565e-06, "loss": 0.8853, "step": 4308 }, { "epoch": 0.7306485799067401, "grad_norm": 0.9217020223487714, "learning_rate": 3.5683848097929963e-06, "loss": 0.906, "step": 4309 }, { "epoch": 0.7308181432810513, "grad_norm": 0.9721446511017972, "learning_rate": 3.564180162159978e-06, "loss": 0.9314, "step": 4310 }, { "epoch": 0.7309877066553624, "grad_norm": 0.9749751793561413, "learning_rate": 3.5599774559114475e-06, "loss": 0.9332, "step": 4311 }, { "epoch": 0.7311572700296736, "grad_norm": 0.9594564658418394, "learning_rate": 3.555776692315163e-06, "loss": 0.9207, "step": 4312 }, { "epoch": 0.7313268334039847, "grad_norm": 0.9883754917667471, "learning_rate": 3.5515778726382967e-06, "loss": 0.9352, "step": 4313 }, { "epoch": 0.7314963967782959, "grad_norm": 0.9671078267484942, "learning_rate": 3.5473809981474363e-06, "loss": 0.9198, "step": 4314 }, { "epoch": 0.731665960152607, "grad_norm": 0.9401087319210902, "learning_rate": 3.5431860701085785e-06, "loss": 0.9106, "step": 4315 }, { "epoch": 0.7318355235269182, "grad_norm": 0.6677505396187453, "learning_rate": 3.5389930897871415e-06, "loss": 0.7912, "step": 4316 }, { "epoch": 0.7320050869012293, "grad_norm": 0.9978618635551892, "learning_rate": 3.5348020584479492e-06, "loss": 0.9603, "step": 4317 }, { "epoch": 0.7321746502755405, "grad_norm": 0.9597117769790502, "learning_rate": 3.5306129773552334e-06, "loss": 0.9308, "step": 4318 }, { "epoch": 0.7323442136498516, "grad_norm": 0.9794325778962216, "learning_rate": 3.526425847772651e-06, "loss": 0.9511, "step": 4319 }, { "epoch": 0.7325137770241628, "grad_norm": 1.0091142931885073, "learning_rate": 3.5222406709632584e-06, "loss": 0.9708, "step": 4320 }, { "epoch": 0.7326833403984739, "grad_norm": 0.9970407515158157, "learning_rate": 3.518057448189527e-06, "loss": 0.9608, "step": 4321 }, { "epoch": 0.7328529037727851, "grad_norm": 0.9819706828917782, "learning_rate": 3.5138761807133346e-06, "loss": 0.9251, "step": 4322 }, { "epoch": 0.7330224671470962, "grad_norm": 0.9785024398292148, "learning_rate": 3.509696869795981e-06, "loss": 0.9398, "step": 4323 }, { "epoch": 0.7331920305214074, "grad_norm": 0.9769641943557719, "learning_rate": 3.505519516698165e-06, "loss": 0.9329, "step": 4324 }, { "epoch": 0.7333615938957185, "grad_norm": 0.6443691569076802, "learning_rate": 3.501344122679995e-06, "loss": 0.7745, "step": 4325 }, { "epoch": 0.7335311572700297, "grad_norm": 0.928221652945086, "learning_rate": 3.4971706890009906e-06, "loss": 0.9174, "step": 4326 }, { "epoch": 0.7337007206443408, "grad_norm": 1.0141043117888362, "learning_rate": 3.4929992169200865e-06, "loss": 0.9342, "step": 4327 }, { "epoch": 0.733870284018652, "grad_norm": 1.041849345447714, "learning_rate": 3.4888297076956167e-06, "loss": 0.9412, "step": 4328 }, { "epoch": 0.7340398473929631, "grad_norm": 0.9647566157432904, "learning_rate": 3.4846621625853248e-06, "loss": 0.9232, "step": 4329 }, { "epoch": 0.7342094107672743, "grad_norm": 0.9397124380689265, "learning_rate": 3.4804965828463655e-06, "loss": 0.9143, "step": 4330 }, { "epoch": 0.7343789741415854, "grad_norm": 0.9468661092411789, "learning_rate": 3.4763329697352976e-06, "loss": 0.9143, "step": 4331 }, { "epoch": 0.7345485375158965, "grad_norm": 0.9595394256736666, "learning_rate": 3.4721713245080878e-06, "loss": 0.9203, "step": 4332 }, { "epoch": 0.7347181008902077, "grad_norm": 0.9419547132964262, "learning_rate": 3.4680116484201055e-06, "loss": 0.9067, "step": 4333 }, { "epoch": 0.7348876642645189, "grad_norm": 0.9881132869036805, "learning_rate": 3.463853942726135e-06, "loss": 0.9498, "step": 4334 }, { "epoch": 0.73505722763883, "grad_norm": 0.9687558685183632, "learning_rate": 3.459698208680359e-06, "loss": 0.9306, "step": 4335 }, { "epoch": 0.7352267910131411, "grad_norm": 1.009164323084463, "learning_rate": 3.4555444475363643e-06, "loss": 0.9037, "step": 4336 }, { "epoch": 0.7353963543874523, "grad_norm": 0.9548587415234554, "learning_rate": 3.4513926605471504e-06, "loss": 0.9455, "step": 4337 }, { "epoch": 0.7355659177617635, "grad_norm": 1.0005783358612659, "learning_rate": 3.4472428489651134e-06, "loss": 0.8921, "step": 4338 }, { "epoch": 0.7357354811360746, "grad_norm": 0.954040912121246, "learning_rate": 3.443095014042058e-06, "loss": 0.9332, "step": 4339 }, { "epoch": 0.7359050445103857, "grad_norm": 0.9717284546618798, "learning_rate": 3.4389491570291868e-06, "loss": 0.9616, "step": 4340 }, { "epoch": 0.7360746078846969, "grad_norm": 0.9043929797325935, "learning_rate": 3.4348052791771158e-06, "loss": 0.8906, "step": 4341 }, { "epoch": 0.7362441712590081, "grad_norm": 0.952304884315069, "learning_rate": 3.430663381735857e-06, "loss": 0.8885, "step": 4342 }, { "epoch": 0.7364137346333192, "grad_norm": 0.9803815163570732, "learning_rate": 3.4265234659548243e-06, "loss": 0.9525, "step": 4343 }, { "epoch": 0.7365832980076303, "grad_norm": 1.0006934965146514, "learning_rate": 3.422385533082834e-06, "loss": 0.9686, "step": 4344 }, { "epoch": 0.7367528613819415, "grad_norm": 0.9640683932132267, "learning_rate": 3.4182495843681117e-06, "loss": 0.9003, "step": 4345 }, { "epoch": 0.7369224247562527, "grad_norm": 0.969118230461764, "learning_rate": 3.414115621058276e-06, "loss": 0.9339, "step": 4346 }, { "epoch": 0.7370919881305638, "grad_norm": 0.9792486919277618, "learning_rate": 3.4099836444003488e-06, "loss": 0.962, "step": 4347 }, { "epoch": 0.7372615515048749, "grad_norm": 1.0165065595790632, "learning_rate": 3.405853655640754e-06, "loss": 0.934, "step": 4348 }, { "epoch": 0.7374311148791861, "grad_norm": 0.9976014783003594, "learning_rate": 3.401725656025315e-06, "loss": 0.9625, "step": 4349 }, { "epoch": 0.7376006782534973, "grad_norm": 0.9967626834634267, "learning_rate": 3.3975996467992557e-06, "loss": 0.9482, "step": 4350 }, { "epoch": 0.7377702416278084, "grad_norm": 0.9764694595798615, "learning_rate": 3.3934756292071946e-06, "loss": 0.9516, "step": 4351 }, { "epoch": 0.7379398050021195, "grad_norm": 0.92378917130692, "learning_rate": 3.389353604493163e-06, "loss": 0.9097, "step": 4352 }, { "epoch": 0.7381093683764307, "grad_norm": 0.9907911076246924, "learning_rate": 3.385233573900576e-06, "loss": 0.9098, "step": 4353 }, { "epoch": 0.7382789317507419, "grad_norm": 1.0121982766756803, "learning_rate": 3.3811155386722527e-06, "loss": 0.9287, "step": 4354 }, { "epoch": 0.738448495125053, "grad_norm": 0.9582858963980068, "learning_rate": 3.3769995000504153e-06, "loss": 0.9245, "step": 4355 }, { "epoch": 0.7386180584993641, "grad_norm": 0.9269626281939402, "learning_rate": 3.3728854592766768e-06, "loss": 0.9178, "step": 4356 }, { "epoch": 0.7387876218736753, "grad_norm": 0.9667980736317202, "learning_rate": 3.3687734175920505e-06, "loss": 0.9325, "step": 4357 }, { "epoch": 0.7389571852479865, "grad_norm": 1.0229485136851653, "learning_rate": 3.3646633762369417e-06, "loss": 0.9542, "step": 4358 }, { "epoch": 0.7391267486222975, "grad_norm": 0.9638447929120565, "learning_rate": 3.3605553364511643e-06, "loss": 0.9127, "step": 4359 }, { "epoch": 0.7392963119966087, "grad_norm": 1.0390701949317438, "learning_rate": 3.3564492994739183e-06, "loss": 0.96, "step": 4360 }, { "epoch": 0.7394658753709199, "grad_norm": 0.9798055057484563, "learning_rate": 3.3523452665438004e-06, "loss": 0.9894, "step": 4361 }, { "epoch": 0.7396354387452311, "grad_norm": 0.9632728259207255, "learning_rate": 3.348243238898802e-06, "loss": 0.9473, "step": 4362 }, { "epoch": 0.7398050021195421, "grad_norm": 0.9442079991953367, "learning_rate": 3.344143217776319e-06, "loss": 0.9505, "step": 4363 }, { "epoch": 0.7399745654938533, "grad_norm": 0.9589125261233337, "learning_rate": 3.3400452044131326e-06, "loss": 0.926, "step": 4364 }, { "epoch": 0.7401441288681645, "grad_norm": 0.9355900636455357, "learning_rate": 3.3359492000454186e-06, "loss": 0.9535, "step": 4365 }, { "epoch": 0.7403136922424757, "grad_norm": 0.9867424295982432, "learning_rate": 3.331855205908752e-06, "loss": 0.9673, "step": 4366 }, { "epoch": 0.7404832556167867, "grad_norm": 0.9679867092196275, "learning_rate": 3.3277632232380953e-06, "loss": 0.9046, "step": 4367 }, { "epoch": 0.7406528189910979, "grad_norm": 0.9672549105450803, "learning_rate": 3.3236732532678097e-06, "loss": 0.9501, "step": 4368 }, { "epoch": 0.7408223823654091, "grad_norm": 1.004712217809272, "learning_rate": 3.3195852972316435e-06, "loss": 0.9737, "step": 4369 }, { "epoch": 0.7409919457397203, "grad_norm": 1.0124554558994783, "learning_rate": 3.315499356362747e-06, "loss": 0.9587, "step": 4370 }, { "epoch": 0.7411615091140313, "grad_norm": 0.9850451662913168, "learning_rate": 3.311415431893653e-06, "loss": 0.8806, "step": 4371 }, { "epoch": 0.7413310724883425, "grad_norm": 1.0021604429015305, "learning_rate": 3.3073335250562866e-06, "loss": 0.9516, "step": 4372 }, { "epoch": 0.7415006358626537, "grad_norm": 0.9576189598378064, "learning_rate": 3.3032536370819746e-06, "loss": 0.9344, "step": 4373 }, { "epoch": 0.7416701992369649, "grad_norm": 0.9641699289310747, "learning_rate": 3.2991757692014238e-06, "loss": 0.9244, "step": 4374 }, { "epoch": 0.7418397626112759, "grad_norm": 0.9692703716764848, "learning_rate": 3.2950999226447356e-06, "loss": 0.9303, "step": 4375 }, { "epoch": 0.7420093259855871, "grad_norm": 0.9768981911422421, "learning_rate": 3.291026098641398e-06, "loss": 0.927, "step": 4376 }, { "epoch": 0.7421788893598983, "grad_norm": 0.9833521428431294, "learning_rate": 3.2869542984202974e-06, "loss": 0.903, "step": 4377 }, { "epoch": 0.7423484527342095, "grad_norm": 1.039675443732567, "learning_rate": 3.282884523209704e-06, "loss": 0.9444, "step": 4378 }, { "epoch": 0.7425180161085205, "grad_norm": 0.9851995301371924, "learning_rate": 3.2788167742372725e-06, "loss": 0.9246, "step": 4379 }, { "epoch": 0.7426875794828317, "grad_norm": 1.0170174640694796, "learning_rate": 3.2747510527300597e-06, "loss": 0.9549, "step": 4380 }, { "epoch": 0.7428571428571429, "grad_norm": 1.0032140548812238, "learning_rate": 3.2706873599144973e-06, "loss": 0.9599, "step": 4381 }, { "epoch": 0.7430267062314541, "grad_norm": 0.937123527410746, "learning_rate": 3.2666256970164135e-06, "loss": 0.9447, "step": 4382 }, { "epoch": 0.7431962696057651, "grad_norm": 0.9618582315698139, "learning_rate": 3.262566065261015e-06, "loss": 0.9006, "step": 4383 }, { "epoch": 0.7433658329800763, "grad_norm": 0.9928462118917903, "learning_rate": 3.2585084658729106e-06, "loss": 0.9398, "step": 4384 }, { "epoch": 0.7435353963543875, "grad_norm": 0.9365770827527956, "learning_rate": 3.254452900076083e-06, "loss": 0.9341, "step": 4385 }, { "epoch": 0.7437049597286987, "grad_norm": 1.006633233963137, "learning_rate": 3.2503993690939063e-06, "loss": 0.9507, "step": 4386 }, { "epoch": 0.7438745231030097, "grad_norm": 1.0046099838946616, "learning_rate": 3.2463478741491404e-06, "loss": 0.9409, "step": 4387 }, { "epoch": 0.7440440864773209, "grad_norm": 1.0149787876367944, "learning_rate": 3.2422984164639306e-06, "loss": 0.9654, "step": 4388 }, { "epoch": 0.7442136498516321, "grad_norm": 0.958802583541351, "learning_rate": 3.2382509972598087e-06, "loss": 0.9548, "step": 4389 }, { "epoch": 0.7443832132259431, "grad_norm": 0.983243057761316, "learning_rate": 3.2342056177576865e-06, "loss": 0.922, "step": 4390 }, { "epoch": 0.7445527766002543, "grad_norm": 0.9265538820349005, "learning_rate": 3.230162279177873e-06, "loss": 0.9133, "step": 4391 }, { "epoch": 0.7447223399745655, "grad_norm": 0.9571164085635515, "learning_rate": 3.2261209827400497e-06, "loss": 0.911, "step": 4392 }, { "epoch": 0.7448919033488767, "grad_norm": 0.9154398132947973, "learning_rate": 3.2220817296632845e-06, "loss": 0.8631, "step": 4393 }, { "epoch": 0.7450614667231877, "grad_norm": 0.932430633889728, "learning_rate": 3.2180445211660294e-06, "loss": 0.8779, "step": 4394 }, { "epoch": 0.7452310300974989, "grad_norm": 0.9597318615952581, "learning_rate": 3.2140093584661247e-06, "loss": 0.8876, "step": 4395 }, { "epoch": 0.7454005934718101, "grad_norm": 0.958011630252295, "learning_rate": 3.209976242780788e-06, "loss": 0.9348, "step": 4396 }, { "epoch": 0.7455701568461213, "grad_norm": 0.990831197118341, "learning_rate": 3.205945175326617e-06, "loss": 0.9455, "step": 4397 }, { "epoch": 0.7457397202204323, "grad_norm": 0.9555986078041983, "learning_rate": 3.201916157319601e-06, "loss": 0.9378, "step": 4398 }, { "epoch": 0.7459092835947435, "grad_norm": 0.9795793845504193, "learning_rate": 3.197889189975103e-06, "loss": 0.8949, "step": 4399 }, { "epoch": 0.7460788469690547, "grad_norm": 0.988551161645154, "learning_rate": 3.1938642745078706e-06, "loss": 0.9074, "step": 4400 }, { "epoch": 0.7462484103433659, "grad_norm": 1.0076510925634532, "learning_rate": 3.1898414121320277e-06, "loss": 0.9442, "step": 4401 }, { "epoch": 0.7464179737176769, "grad_norm": 0.9890738866660633, "learning_rate": 3.1858206040610883e-06, "loss": 0.941, "step": 4402 }, { "epoch": 0.7465875370919881, "grad_norm": 1.0312736010415136, "learning_rate": 3.1818018515079396e-06, "loss": 0.9422, "step": 4403 }, { "epoch": 0.7467571004662993, "grad_norm": 1.0082089707531834, "learning_rate": 3.1777851556848494e-06, "loss": 0.9879, "step": 4404 }, { "epoch": 0.7469266638406105, "grad_norm": 0.959407068923717, "learning_rate": 3.173770517803467e-06, "loss": 0.9382, "step": 4405 }, { "epoch": 0.7470962272149215, "grad_norm": 0.9820399788517585, "learning_rate": 3.1697579390748202e-06, "loss": 0.9691, "step": 4406 }, { "epoch": 0.7472657905892327, "grad_norm": 0.9653437690097237, "learning_rate": 3.1657474207093144e-06, "loss": 0.951, "step": 4407 }, { "epoch": 0.7474353539635439, "grad_norm": 0.9852611448743962, "learning_rate": 3.1617389639167316e-06, "loss": 0.9477, "step": 4408 }, { "epoch": 0.7476049173378551, "grad_norm": 1.0209220148025913, "learning_rate": 3.1577325699062424e-06, "loss": 0.9281, "step": 4409 }, { "epoch": 0.7477744807121661, "grad_norm": 1.027427834225856, "learning_rate": 3.1537282398863823e-06, "loss": 0.9731, "step": 4410 }, { "epoch": 0.7479440440864773, "grad_norm": 0.9876472299520029, "learning_rate": 3.149725975065072e-06, "loss": 0.9392, "step": 4411 }, { "epoch": 0.7481136074607885, "grad_norm": 0.975123127162456, "learning_rate": 3.145725776649602e-06, "loss": 0.9467, "step": 4412 }, { "epoch": 0.7482831708350997, "grad_norm": 1.0081454770458542, "learning_rate": 3.1417276458466515e-06, "loss": 0.9249, "step": 4413 }, { "epoch": 0.7484527342094107, "grad_norm": 0.9467673110976009, "learning_rate": 3.137731583862266e-06, "loss": 0.9097, "step": 4414 }, { "epoch": 0.7486222975837219, "grad_norm": 0.979814013393843, "learning_rate": 3.133737591901864e-06, "loss": 0.943, "step": 4415 }, { "epoch": 0.7487918609580331, "grad_norm": 0.9629232998427483, "learning_rate": 3.1297456711702532e-06, "loss": 0.9821, "step": 4416 }, { "epoch": 0.7489614243323442, "grad_norm": 0.9658686505250396, "learning_rate": 3.125755822871607e-06, "loss": 0.9692, "step": 4417 }, { "epoch": 0.7491309877066553, "grad_norm": 1.0121199834544206, "learning_rate": 3.1217680482094726e-06, "loss": 0.9376, "step": 4418 }, { "epoch": 0.7493005510809665, "grad_norm": 0.981257171121615, "learning_rate": 3.117782348386772e-06, "loss": 0.9243, "step": 4419 }, { "epoch": 0.7494701144552777, "grad_norm": 0.9032061969602382, "learning_rate": 3.11379872460581e-06, "loss": 0.8745, "step": 4420 }, { "epoch": 0.7496396778295888, "grad_norm": 0.9944265657051083, "learning_rate": 3.1098171780682553e-06, "loss": 0.9549, "step": 4421 }, { "epoch": 0.7498092412038999, "grad_norm": 1.0039228848355548, "learning_rate": 3.1058377099751537e-06, "loss": 0.9578, "step": 4422 }, { "epoch": 0.7499788045782111, "grad_norm": 1.0090429182119505, "learning_rate": 3.101860321526924e-06, "loss": 0.9465, "step": 4423 }, { "epoch": 0.7501483679525223, "grad_norm": 1.0280748581628925, "learning_rate": 3.0978850139233576e-06, "loss": 0.934, "step": 4424 }, { "epoch": 0.7503179313268334, "grad_norm": 1.013815393650934, "learning_rate": 3.093911788363617e-06, "loss": 0.9513, "step": 4425 }, { "epoch": 0.7504874947011445, "grad_norm": 0.9234252456657739, "learning_rate": 3.0899406460462354e-06, "loss": 0.8779, "step": 4426 }, { "epoch": 0.7506570580754557, "grad_norm": 0.9971617101171185, "learning_rate": 3.0859715881691267e-06, "loss": 0.9288, "step": 4427 }, { "epoch": 0.7508266214497669, "grad_norm": 0.9951462303327652, "learning_rate": 3.0820046159295647e-06, "loss": 0.9497, "step": 4428 }, { "epoch": 0.750996184824078, "grad_norm": 0.9692073969744446, "learning_rate": 3.078039730524198e-06, "loss": 0.9156, "step": 4429 }, { "epoch": 0.7511657481983891, "grad_norm": 0.9639410091774172, "learning_rate": 3.074076933149046e-06, "loss": 0.9253, "step": 4430 }, { "epoch": 0.7513353115727003, "grad_norm": 0.9357625059899958, "learning_rate": 3.070116224999502e-06, "loss": 0.8836, "step": 4431 }, { "epoch": 0.7515048749470115, "grad_norm": 0.9746292810118539, "learning_rate": 3.0661576072703247e-06, "loss": 0.9517, "step": 4432 }, { "epoch": 0.7516744383213226, "grad_norm": 0.9669739131779993, "learning_rate": 3.062201081155637e-06, "loss": 0.9173, "step": 4433 }, { "epoch": 0.7518440016956337, "grad_norm": 1.0015131390982068, "learning_rate": 3.0582466478489457e-06, "loss": 0.9287, "step": 4434 }, { "epoch": 0.7520135650699449, "grad_norm": 0.9830748487213814, "learning_rate": 3.0542943085431144e-06, "loss": 0.9329, "step": 4435 }, { "epoch": 0.752183128444256, "grad_norm": 0.9613027760069434, "learning_rate": 3.050344064430377e-06, "loss": 0.9228, "step": 4436 }, { "epoch": 0.7523526918185672, "grad_norm": 0.9626850124706385, "learning_rate": 3.0463959167023336e-06, "loss": 0.926, "step": 4437 }, { "epoch": 0.7525222551928783, "grad_norm": 0.9394719976511217, "learning_rate": 3.0424498665499613e-06, "loss": 0.9251, "step": 4438 }, { "epoch": 0.7526918185671895, "grad_norm": 1.0143082597357949, "learning_rate": 3.0385059151635953e-06, "loss": 0.9216, "step": 4439 }, { "epoch": 0.7528613819415007, "grad_norm": 0.9556521437289349, "learning_rate": 3.03456406373294e-06, "loss": 0.9072, "step": 4440 }, { "epoch": 0.7530309453158118, "grad_norm": 0.9923594703104305, "learning_rate": 3.0306243134470668e-06, "loss": 0.9492, "step": 4441 }, { "epoch": 0.7532005086901229, "grad_norm": 1.0023781787885582, "learning_rate": 3.026686665494414e-06, "loss": 0.9592, "step": 4442 }, { "epoch": 0.7533700720644341, "grad_norm": 1.0161754933638931, "learning_rate": 3.0227511210627835e-06, "loss": 0.9585, "step": 4443 }, { "epoch": 0.7535396354387452, "grad_norm": 0.6646062478073941, "learning_rate": 3.0188176813393433e-06, "loss": 0.7528, "step": 4444 }, { "epoch": 0.7537091988130564, "grad_norm": 0.9896821998226273, "learning_rate": 3.0148863475106315e-06, "loss": 0.946, "step": 4445 }, { "epoch": 0.7538787621873675, "grad_norm": 0.9628167268998511, "learning_rate": 3.0109571207625443e-06, "loss": 0.9431, "step": 4446 }, { "epoch": 0.7540483255616787, "grad_norm": 0.9820762405663909, "learning_rate": 3.0070300022803454e-06, "loss": 0.927, "step": 4447 }, { "epoch": 0.7542178889359898, "grad_norm": 0.9994330736429512, "learning_rate": 3.003104993248658e-06, "loss": 0.9911, "step": 4448 }, { "epoch": 0.754387452310301, "grad_norm": 0.981153701673931, "learning_rate": 2.9991820948514795e-06, "loss": 0.9124, "step": 4449 }, { "epoch": 0.7545570156846121, "grad_norm": 0.9291673390802457, "learning_rate": 2.9952613082721616e-06, "loss": 0.9062, "step": 4450 }, { "epoch": 0.7547265790589233, "grad_norm": 0.9346902629200073, "learning_rate": 2.991342634693417e-06, "loss": 0.9149, "step": 4451 }, { "epoch": 0.7548961424332344, "grad_norm": 0.9422028183335565, "learning_rate": 2.987426075297333e-06, "loss": 0.9322, "step": 4452 }, { "epoch": 0.7550657058075456, "grad_norm": 0.9339476139004957, "learning_rate": 2.9835116312653477e-06, "loss": 0.9297, "step": 4453 }, { "epoch": 0.7552352691818567, "grad_norm": 0.9522436010519485, "learning_rate": 2.9795993037782657e-06, "loss": 0.9324, "step": 4454 }, { "epoch": 0.7554048325561679, "grad_norm": 0.9685702584475706, "learning_rate": 2.9756890940162476e-06, "loss": 0.9415, "step": 4455 }, { "epoch": 0.755574395930479, "grad_norm": 0.9460152176136803, "learning_rate": 2.971781003158828e-06, "loss": 0.9229, "step": 4456 }, { "epoch": 0.7557439593047902, "grad_norm": 0.9239955569796248, "learning_rate": 2.9678750323848893e-06, "loss": 0.9053, "step": 4457 }, { "epoch": 0.7559135226791013, "grad_norm": 0.9553112039641406, "learning_rate": 2.9639711828726813e-06, "loss": 0.9176, "step": 4458 }, { "epoch": 0.7560830860534125, "grad_norm": 1.032655611523563, "learning_rate": 2.960069455799811e-06, "loss": 0.9821, "step": 4459 }, { "epoch": 0.7562526494277236, "grad_norm": 0.929450690151674, "learning_rate": 2.956169852343247e-06, "loss": 0.8992, "step": 4460 }, { "epoch": 0.7564222128020348, "grad_norm": 0.9461703354141494, "learning_rate": 2.952272373679316e-06, "loss": 0.9086, "step": 4461 }, { "epoch": 0.7565917761763459, "grad_norm": 0.9472856658745393, "learning_rate": 2.9483770209836993e-06, "loss": 0.908, "step": 4462 }, { "epoch": 0.756761339550657, "grad_norm": 0.9719929041670496, "learning_rate": 2.9444837954314508e-06, "loss": 0.9422, "step": 4463 }, { "epoch": 0.7569309029249682, "grad_norm": 1.003345120438624, "learning_rate": 2.94059269819697e-06, "loss": 0.9498, "step": 4464 }, { "epoch": 0.7571004662992794, "grad_norm": 0.9955550428003392, "learning_rate": 2.936703730454017e-06, "loss": 0.9364, "step": 4465 }, { "epoch": 0.7572700296735905, "grad_norm": 0.9937816179963287, "learning_rate": 2.9328168933757085e-06, "loss": 0.9035, "step": 4466 }, { "epoch": 0.7574395930479016, "grad_norm": 0.9583681910524312, "learning_rate": 2.9289321881345257e-06, "loss": 0.9307, "step": 4467 }, { "epoch": 0.7576091564222128, "grad_norm": 0.9303540279982168, "learning_rate": 2.9250496159023e-06, "loss": 0.9211, "step": 4468 }, { "epoch": 0.757778719796524, "grad_norm": 0.9731788428952017, "learning_rate": 2.9211691778502173e-06, "loss": 0.9674, "step": 4469 }, { "epoch": 0.7579482831708351, "grad_norm": 0.9984079188959125, "learning_rate": 2.9172908751488292e-06, "loss": 0.9464, "step": 4470 }, { "epoch": 0.7581178465451462, "grad_norm": 0.9555255131406994, "learning_rate": 2.9134147089680353e-06, "loss": 0.9372, "step": 4471 }, { "epoch": 0.7582874099194574, "grad_norm": 1.0057650558792657, "learning_rate": 2.909540680477092e-06, "loss": 0.955, "step": 4472 }, { "epoch": 0.7584569732937686, "grad_norm": 0.9732212980932544, "learning_rate": 2.90566879084461e-06, "loss": 0.9337, "step": 4473 }, { "epoch": 0.7586265366680797, "grad_norm": 0.9647999807636803, "learning_rate": 2.901799041238561e-06, "loss": 0.918, "step": 4474 }, { "epoch": 0.7587961000423908, "grad_norm": 0.9664013894980109, "learning_rate": 2.897931432826263e-06, "loss": 0.9644, "step": 4475 }, { "epoch": 0.758965663416702, "grad_norm": 0.9401404986411364, "learning_rate": 2.8940659667743943e-06, "loss": 0.9109, "step": 4476 }, { "epoch": 0.7591352267910132, "grad_norm": 0.9673802829810737, "learning_rate": 2.890202644248983e-06, "loss": 0.9372, "step": 4477 }, { "epoch": 0.7593047901653243, "grad_norm": 0.9411434275538323, "learning_rate": 2.886341466415412e-06, "loss": 0.9395, "step": 4478 }, { "epoch": 0.7594743535396354, "grad_norm": 0.9611038043973206, "learning_rate": 2.8824824344384174e-06, "loss": 0.9175, "step": 4479 }, { "epoch": 0.7596439169139466, "grad_norm": 0.943049025701545, "learning_rate": 2.878625549482084e-06, "loss": 0.9417, "step": 4480 }, { "epoch": 0.7598134802882577, "grad_norm": 0.9665486758200093, "learning_rate": 2.8747708127098593e-06, "loss": 0.9615, "step": 4481 }, { "epoch": 0.7599830436625689, "grad_norm": 1.01981147336692, "learning_rate": 2.8709182252845347e-06, "loss": 0.9763, "step": 4482 }, { "epoch": 0.76015260703688, "grad_norm": 0.9835568993674455, "learning_rate": 2.8670677883682527e-06, "loss": 0.9281, "step": 4483 }, { "epoch": 0.7603221704111912, "grad_norm": 0.9280438886337239, "learning_rate": 2.8632195031225073e-06, "loss": 0.8887, "step": 4484 }, { "epoch": 0.7604917337855023, "grad_norm": 0.9704729739095292, "learning_rate": 2.8593733707081516e-06, "loss": 0.9188, "step": 4485 }, { "epoch": 0.7606612971598135, "grad_norm": 0.9792503927390757, "learning_rate": 2.85552939228538e-06, "loss": 0.9045, "step": 4486 }, { "epoch": 0.7608308605341246, "grad_norm": 0.9791749164475829, "learning_rate": 2.851687569013737e-06, "loss": 0.9218, "step": 4487 }, { "epoch": 0.7610004239084358, "grad_norm": 0.9591203111043733, "learning_rate": 2.8478479020521255e-06, "loss": 0.9319, "step": 4488 }, { "epoch": 0.7611699872827469, "grad_norm": 0.9752287848324088, "learning_rate": 2.8440103925587904e-06, "loss": 0.9497, "step": 4489 }, { "epoch": 0.761339550657058, "grad_norm": 0.9973850629474763, "learning_rate": 2.8401750416913275e-06, "loss": 0.9488, "step": 4490 }, { "epoch": 0.7615091140313692, "grad_norm": 0.9677755079847618, "learning_rate": 2.83634185060668e-06, "loss": 0.9063, "step": 4491 }, { "epoch": 0.7616786774056804, "grad_norm": 0.5965392855131872, "learning_rate": 2.832510820461146e-06, "loss": 0.7169, "step": 4492 }, { "epoch": 0.7618482407799915, "grad_norm": 0.9377319170330488, "learning_rate": 2.8286819524103657e-06, "loss": 0.8801, "step": 4493 }, { "epoch": 0.7620178041543026, "grad_norm": 0.9706974634840667, "learning_rate": 2.824855247609328e-06, "loss": 0.9354, "step": 4494 }, { "epoch": 0.7621873675286138, "grad_norm": 0.9938959904393707, "learning_rate": 2.82103070721237e-06, "loss": 0.9341, "step": 4495 }, { "epoch": 0.762356930902925, "grad_norm": 0.9635217182667041, "learning_rate": 2.817208332373177e-06, "loss": 0.9471, "step": 4496 }, { "epoch": 0.7625264942772361, "grad_norm": 0.9607075257366691, "learning_rate": 2.813388124244778e-06, "loss": 0.981, "step": 4497 }, { "epoch": 0.7626960576515472, "grad_norm": 1.0329341316859268, "learning_rate": 2.809570083979548e-06, "loss": 0.9629, "step": 4498 }, { "epoch": 0.7628656210258584, "grad_norm": 0.9719064531587033, "learning_rate": 2.805754212729218e-06, "loss": 0.9281, "step": 4499 }, { "epoch": 0.7630351844001696, "grad_norm": 1.0690122655341157, "learning_rate": 2.8019405116448516e-06, "loss": 0.9309, "step": 4500 }, { "epoch": 0.7632047477744807, "grad_norm": 1.0124808581623521, "learning_rate": 2.798128981876864e-06, "loss": 0.9331, "step": 4501 }, { "epoch": 0.7633743111487918, "grad_norm": 0.9711883662935386, "learning_rate": 2.7943196245750127e-06, "loss": 0.8796, "step": 4502 }, { "epoch": 0.763543874523103, "grad_norm": 0.961905607833146, "learning_rate": 2.7905124408884076e-06, "loss": 0.9203, "step": 4503 }, { "epoch": 0.7637134378974142, "grad_norm": 1.0261005248638975, "learning_rate": 2.786707431965493e-06, "loss": 0.9263, "step": 4504 }, { "epoch": 0.7638830012717253, "grad_norm": 0.944283608863925, "learning_rate": 2.7829045989540594e-06, "loss": 0.9547, "step": 4505 }, { "epoch": 0.7640525646460364, "grad_norm": 0.9921116543325004, "learning_rate": 2.779103943001248e-06, "loss": 0.9296, "step": 4506 }, { "epoch": 0.7642221280203476, "grad_norm": 0.9915669348823127, "learning_rate": 2.775305465253536e-06, "loss": 0.9147, "step": 4507 }, { "epoch": 0.7643916913946588, "grad_norm": 1.0228704800265054, "learning_rate": 2.771509166856745e-06, "loss": 0.9081, "step": 4508 }, { "epoch": 0.7645612547689699, "grad_norm": 0.9755221705155481, "learning_rate": 2.7677150489560378e-06, "loss": 0.9193, "step": 4509 }, { "epoch": 0.764730818143281, "grad_norm": 0.9961639819622458, "learning_rate": 2.7639231126959264e-06, "loss": 0.937, "step": 4510 }, { "epoch": 0.7649003815175922, "grad_norm": 1.0185472703407354, "learning_rate": 2.7601333592202583e-06, "loss": 0.9195, "step": 4511 }, { "epoch": 0.7650699448919034, "grad_norm": 1.0135888117345444, "learning_rate": 2.7563457896722225e-06, "loss": 0.9562, "step": 4512 }, { "epoch": 0.7652395082662145, "grad_norm": 0.9179687678155602, "learning_rate": 2.7525604051943512e-06, "loss": 0.9355, "step": 4513 }, { "epoch": 0.7654090716405256, "grad_norm": 1.0091615158550655, "learning_rate": 2.7487772069285166e-06, "loss": 0.9503, "step": 4514 }, { "epoch": 0.7655786350148368, "grad_norm": 0.9669593360718143, "learning_rate": 2.7449961960159333e-06, "loss": 0.9814, "step": 4515 }, { "epoch": 0.765748198389148, "grad_norm": 0.9424567334266846, "learning_rate": 2.7412173735971514e-06, "loss": 0.9465, "step": 4516 }, { "epoch": 0.765917761763459, "grad_norm": 0.9589192516992819, "learning_rate": 2.7374407408120685e-06, "loss": 0.9188, "step": 4517 }, { "epoch": 0.7660873251377702, "grad_norm": 0.9973797361005567, "learning_rate": 2.7336662987999164e-06, "loss": 0.9261, "step": 4518 }, { "epoch": 0.7662568885120814, "grad_norm": 0.9772477975643391, "learning_rate": 2.7298940486992654e-06, "loss": 0.9753, "step": 4519 }, { "epoch": 0.7664264518863926, "grad_norm": 0.5890005498228047, "learning_rate": 2.726123991648024e-06, "loss": 0.7457, "step": 4520 }, { "epoch": 0.7665960152607036, "grad_norm": 0.9896026548986693, "learning_rate": 2.7223561287834467e-06, "loss": 0.9302, "step": 4521 }, { "epoch": 0.7667655786350148, "grad_norm": 1.0089883936550648, "learning_rate": 2.7185904612421177e-06, "loss": 0.973, "step": 4522 }, { "epoch": 0.766935142009326, "grad_norm": 0.9334218459707342, "learning_rate": 2.714826990159959e-06, "loss": 0.9246, "step": 4523 }, { "epoch": 0.7671047053836372, "grad_norm": 1.0114123598581302, "learning_rate": 2.71106571667224e-06, "loss": 0.9409, "step": 4524 }, { "epoch": 0.7672742687579482, "grad_norm": 0.9654753150562002, "learning_rate": 2.707306641913556e-06, "loss": 0.9256, "step": 4525 }, { "epoch": 0.7674438321322594, "grad_norm": 0.9730472901939335, "learning_rate": 2.7035497670178447e-06, "loss": 0.9659, "step": 4526 }, { "epoch": 0.7676133955065706, "grad_norm": 0.9439084239167176, "learning_rate": 2.6997950931183736e-06, "loss": 0.9327, "step": 4527 }, { "epoch": 0.7677829588808818, "grad_norm": 0.9694370175820768, "learning_rate": 2.6960426213477587e-06, "loss": 0.89, "step": 4528 }, { "epoch": 0.7679525222551928, "grad_norm": 1.0420301073793403, "learning_rate": 2.692292352837942e-06, "loss": 0.9326, "step": 4529 }, { "epoch": 0.768122085629504, "grad_norm": 0.9770183369404557, "learning_rate": 2.688544288720202e-06, "loss": 0.9303, "step": 4530 }, { "epoch": 0.7682916490038152, "grad_norm": 0.9626644375355055, "learning_rate": 2.684798430125154e-06, "loss": 0.8655, "step": 4531 }, { "epoch": 0.7684612123781264, "grad_norm": 0.949320915318544, "learning_rate": 2.681054778182748e-06, "loss": 0.9181, "step": 4532 }, { "epoch": 0.7686307757524374, "grad_norm": 0.9806287327379162, "learning_rate": 2.6773133340222677e-06, "loss": 0.9476, "step": 4533 }, { "epoch": 0.7688003391267486, "grad_norm": 1.0003800675508434, "learning_rate": 2.673574098772328e-06, "loss": 0.9088, "step": 4534 }, { "epoch": 0.7689699025010598, "grad_norm": 1.0669664629692015, "learning_rate": 2.669837073560887e-06, "loss": 0.9375, "step": 4535 }, { "epoch": 0.769139465875371, "grad_norm": 0.9483483359865884, "learning_rate": 2.666102259515225e-06, "loss": 0.9027, "step": 4536 }, { "epoch": 0.769309029249682, "grad_norm": 0.9969605376018489, "learning_rate": 2.662369657761963e-06, "loss": 0.9525, "step": 4537 }, { "epoch": 0.7694785926239932, "grad_norm": 0.9484319240986899, "learning_rate": 2.6586392694270447e-06, "loss": 0.8892, "step": 4538 }, { "epoch": 0.7696481559983044, "grad_norm": 1.029377012576498, "learning_rate": 2.6549110956357616e-06, "loss": 0.954, "step": 4539 }, { "epoch": 0.7698177193726156, "grad_norm": 0.9626292983425511, "learning_rate": 2.651185137512725e-06, "loss": 0.9356, "step": 4540 }, { "epoch": 0.7699872827469266, "grad_norm": 0.9764107692421136, "learning_rate": 2.6474613961818785e-06, "loss": 0.9079, "step": 4541 }, { "epoch": 0.7701568461212378, "grad_norm": 0.992007228976807, "learning_rate": 2.6437398727665064e-06, "loss": 0.9408, "step": 4542 }, { "epoch": 0.770326409495549, "grad_norm": 1.0237555288161735, "learning_rate": 2.640020568389213e-06, "loss": 0.9277, "step": 4543 }, { "epoch": 0.7704959728698602, "grad_norm": 1.0109892166818706, "learning_rate": 2.6363034841719392e-06, "loss": 0.9203, "step": 4544 }, { "epoch": 0.7706655362441712, "grad_norm": 1.0086757517778953, "learning_rate": 2.6325886212359496e-06, "loss": 0.9718, "step": 4545 }, { "epoch": 0.7708350996184824, "grad_norm": 0.955116575538711, "learning_rate": 2.628875980701853e-06, "loss": 0.9119, "step": 4546 }, { "epoch": 0.7710046629927936, "grad_norm": 1.0343277032677225, "learning_rate": 2.6251655636895725e-06, "loss": 0.9409, "step": 4547 }, { "epoch": 0.7711742263671048, "grad_norm": 0.963963943291546, "learning_rate": 2.621457371318369e-06, "loss": 0.9218, "step": 4548 }, { "epoch": 0.7713437897414158, "grad_norm": 0.9467965919780134, "learning_rate": 2.6177514047068287e-06, "loss": 0.9324, "step": 4549 }, { "epoch": 0.771513353115727, "grad_norm": 0.9867982116701469, "learning_rate": 2.6140476649728673e-06, "loss": 0.9205, "step": 4550 }, { "epoch": 0.7716829164900382, "grad_norm": 1.0021998373541978, "learning_rate": 2.6103461532337305e-06, "loss": 0.9331, "step": 4551 }, { "epoch": 0.7718524798643494, "grad_norm": 0.9548073436803785, "learning_rate": 2.6066468706059857e-06, "loss": 0.9201, "step": 4552 }, { "epoch": 0.7720220432386604, "grad_norm": 0.9898353928826127, "learning_rate": 2.602949818205539e-06, "loss": 0.9256, "step": 4553 }, { "epoch": 0.7721916066129716, "grad_norm": 0.9937073711476034, "learning_rate": 2.5992549971476166e-06, "loss": 0.9263, "step": 4554 }, { "epoch": 0.7723611699872828, "grad_norm": 1.010621585589431, "learning_rate": 2.59556240854677e-06, "loss": 0.9341, "step": 4555 }, { "epoch": 0.772530733361594, "grad_norm": 0.9866826861082622, "learning_rate": 2.591872053516877e-06, "loss": 0.9242, "step": 4556 }, { "epoch": 0.772700296735905, "grad_norm": 0.9555609347168634, "learning_rate": 2.5881839331711524e-06, "loss": 0.8964, "step": 4557 }, { "epoch": 0.7728698601102162, "grad_norm": 0.95903536597104, "learning_rate": 2.5844980486221225e-06, "loss": 0.9262, "step": 4558 }, { "epoch": 0.7730394234845274, "grad_norm": 0.9461852995439718, "learning_rate": 2.5808144009816448e-06, "loss": 0.9071, "step": 4559 }, { "epoch": 0.7732089868588385, "grad_norm": 1.0127071503564309, "learning_rate": 2.577132991360909e-06, "loss": 0.9434, "step": 4560 }, { "epoch": 0.7733785502331496, "grad_norm": 0.9565675524156988, "learning_rate": 2.5734538208704197e-06, "loss": 0.9141, "step": 4561 }, { "epoch": 0.7735481136074608, "grad_norm": 0.9886000095923463, "learning_rate": 2.5697768906200084e-06, "loss": 0.9459, "step": 4562 }, { "epoch": 0.773717676981772, "grad_norm": 0.9601201041518646, "learning_rate": 2.566102201718832e-06, "loss": 0.9088, "step": 4563 }, { "epoch": 0.7738872403560831, "grad_norm": 0.9595414947304067, "learning_rate": 2.5624297552753753e-06, "loss": 0.9159, "step": 4564 }, { "epoch": 0.7740568037303942, "grad_norm": 1.0067506355137104, "learning_rate": 2.5587595523974408e-06, "loss": 0.9404, "step": 4565 }, { "epoch": 0.7742263671047054, "grad_norm": 0.9872154514545771, "learning_rate": 2.555091594192153e-06, "loss": 0.9012, "step": 4566 }, { "epoch": 0.7743959304790166, "grad_norm": 0.8960540726249263, "learning_rate": 2.5514258817659685e-06, "loss": 0.8986, "step": 4567 }, { "epoch": 0.7745654938533277, "grad_norm": 1.0134530080426263, "learning_rate": 2.5477624162246573e-06, "loss": 0.9416, "step": 4568 }, { "epoch": 0.7747350572276388, "grad_norm": 0.9570592798958361, "learning_rate": 2.5441011986733165e-06, "loss": 0.9485, "step": 4569 }, { "epoch": 0.77490462060195, "grad_norm": 1.0309560713032377, "learning_rate": 2.540442230216361e-06, "loss": 0.955, "step": 4570 }, { "epoch": 0.7750741839762612, "grad_norm": 0.9899029076169737, "learning_rate": 2.5367855119575314e-06, "loss": 0.9393, "step": 4571 }, { "epoch": 0.7752437473505722, "grad_norm": 0.9401622652468724, "learning_rate": 2.533131044999887e-06, "loss": 0.9084, "step": 4572 }, { "epoch": 0.7754133107248834, "grad_norm": 0.9131179357665576, "learning_rate": 2.5294788304458063e-06, "loss": 0.9234, "step": 4573 }, { "epoch": 0.7755828740991946, "grad_norm": 0.9316857143154513, "learning_rate": 2.5258288693969968e-06, "loss": 0.9284, "step": 4574 }, { "epoch": 0.7757524374735058, "grad_norm": 0.9999576473093155, "learning_rate": 2.5221811629544768e-06, "loss": 0.9458, "step": 4575 }, { "epoch": 0.7759220008478168, "grad_norm": 0.9955456412044222, "learning_rate": 2.518535712218587e-06, "loss": 0.9525, "step": 4576 }, { "epoch": 0.776091564222128, "grad_norm": 0.9364793313670482, "learning_rate": 2.514892518288988e-06, "loss": 0.9411, "step": 4577 }, { "epoch": 0.7762611275964392, "grad_norm": 0.9498691294792266, "learning_rate": 2.5112515822646655e-06, "loss": 0.9439, "step": 4578 }, { "epoch": 0.7764306909707503, "grad_norm": 0.9742256271138886, "learning_rate": 2.507612905243916e-06, "loss": 0.9391, "step": 4579 }, { "epoch": 0.7766002543450614, "grad_norm": 0.9867988929491583, "learning_rate": 2.5039764883243555e-06, "loss": 0.9258, "step": 4580 }, { "epoch": 0.7767698177193726, "grad_norm": 0.9779290388337014, "learning_rate": 2.5003423326029187e-06, "loss": 0.936, "step": 4581 }, { "epoch": 0.7769393810936838, "grad_norm": 0.9969522311928066, "learning_rate": 2.4967104391758657e-06, "loss": 0.9302, "step": 4582 }, { "epoch": 0.7771089444679949, "grad_norm": 0.9804579517930131, "learning_rate": 2.493080809138765e-06, "loss": 0.9545, "step": 4583 }, { "epoch": 0.777278507842306, "grad_norm": 0.9576352820090382, "learning_rate": 2.4894534435865015e-06, "loss": 0.9273, "step": 4584 }, { "epoch": 0.7774480712166172, "grad_norm": 0.9416018054001984, "learning_rate": 2.485828343613288e-06, "loss": 0.9347, "step": 4585 }, { "epoch": 0.7776176345909284, "grad_norm": 1.0175922324651776, "learning_rate": 2.482205510312644e-06, "loss": 0.9214, "step": 4586 }, { "epoch": 0.7777871979652395, "grad_norm": 0.9605266022301537, "learning_rate": 2.478584944777408e-06, "loss": 0.9166, "step": 4587 }, { "epoch": 0.7779567613395506, "grad_norm": 0.9771414522952164, "learning_rate": 2.4749666480997336e-06, "loss": 0.9306, "step": 4588 }, { "epoch": 0.7781263247138618, "grad_norm": 0.9972343904399632, "learning_rate": 2.4713506213710924e-06, "loss": 0.8713, "step": 4589 }, { "epoch": 0.778295888088173, "grad_norm": 0.9888592912550048, "learning_rate": 2.467736865682269e-06, "loss": 0.9205, "step": 4590 }, { "epoch": 0.7784654514624841, "grad_norm": 0.9194702433723356, "learning_rate": 2.46412538212336e-06, "loss": 0.9308, "step": 4591 }, { "epoch": 0.7786350148367952, "grad_norm": 0.9609175606956006, "learning_rate": 2.4605161717837866e-06, "loss": 0.8881, "step": 4592 }, { "epoch": 0.7788045782111064, "grad_norm": 0.9563015470368535, "learning_rate": 2.456909235752276e-06, "loss": 0.8982, "step": 4593 }, { "epoch": 0.7789741415854176, "grad_norm": 0.9356709164135482, "learning_rate": 2.4533045751168703e-06, "loss": 0.8836, "step": 4594 }, { "epoch": 0.7791437049597287, "grad_norm": 0.5796414686874024, "learning_rate": 2.4497021909649252e-06, "loss": 0.7278, "step": 4595 }, { "epoch": 0.7793132683340398, "grad_norm": 0.9677676567011475, "learning_rate": 2.446102084383114e-06, "loss": 0.941, "step": 4596 }, { "epoch": 0.779482831708351, "grad_norm": 0.6698494995233439, "learning_rate": 2.4425042564574186e-06, "loss": 0.7981, "step": 4597 }, { "epoch": 0.7796523950826622, "grad_norm": 0.9949907503026933, "learning_rate": 2.4389087082731333e-06, "loss": 0.8962, "step": 4598 }, { "epoch": 0.7798219584569733, "grad_norm": 1.0029170594196486, "learning_rate": 2.4353154409148637e-06, "loss": 0.9486, "step": 4599 }, { "epoch": 0.7799915218312844, "grad_norm": 0.9477840888037887, "learning_rate": 2.4317244554665363e-06, "loss": 0.8876, "step": 4600 }, { "epoch": 0.7801610852055956, "grad_norm": 1.01575366322494, "learning_rate": 2.4281357530113804e-06, "loss": 0.953, "step": 4601 }, { "epoch": 0.7803306485799067, "grad_norm": 0.9585513184495699, "learning_rate": 2.424549334631934e-06, "loss": 0.8769, "step": 4602 }, { "epoch": 0.7805002119542179, "grad_norm": 1.0138760329834557, "learning_rate": 2.420965201410057e-06, "loss": 0.9835, "step": 4603 }, { "epoch": 0.780669775328529, "grad_norm": 0.9400887669248903, "learning_rate": 2.417383354426912e-06, "loss": 0.8921, "step": 4604 }, { "epoch": 0.7808393387028402, "grad_norm": 0.9405632267543659, "learning_rate": 2.4138037947629743e-06, "loss": 0.9733, "step": 4605 }, { "epoch": 0.7810089020771513, "grad_norm": 1.011495831662797, "learning_rate": 2.4102265234980283e-06, "loss": 0.918, "step": 4606 }, { "epoch": 0.7811784654514625, "grad_norm": 0.9186774967759711, "learning_rate": 2.406651541711169e-06, "loss": 0.9043, "step": 4607 }, { "epoch": 0.7813480288257736, "grad_norm": 1.0173055488937643, "learning_rate": 2.4030788504808e-06, "loss": 0.9523, "step": 4608 }, { "epoch": 0.7815175922000848, "grad_norm": 0.9678865161540681, "learning_rate": 2.399508450884631e-06, "loss": 0.9137, "step": 4609 }, { "epoch": 0.7816871555743959, "grad_norm": 0.9614206121502811, "learning_rate": 2.395940343999691e-06, "loss": 0.9252, "step": 4610 }, { "epoch": 0.7818567189487071, "grad_norm": 0.95624753773201, "learning_rate": 2.3923745309023072e-06, "loss": 0.9219, "step": 4611 }, { "epoch": 0.7820262823230182, "grad_norm": 0.9569151513549119, "learning_rate": 2.3888110126681163e-06, "loss": 0.9238, "step": 4612 }, { "epoch": 0.7821958456973294, "grad_norm": 1.009470111931034, "learning_rate": 2.3852497903720626e-06, "loss": 0.9833, "step": 4613 }, { "epoch": 0.7823654090716405, "grad_norm": 1.0000534269331767, "learning_rate": 2.3816908650884063e-06, "loss": 0.9607, "step": 4614 }, { "epoch": 0.7825349724459517, "grad_norm": 1.0048695176894948, "learning_rate": 2.3781342378907023e-06, "loss": 0.9327, "step": 4615 }, { "epoch": 0.7827045358202628, "grad_norm": 0.9429319987981376, "learning_rate": 2.3745799098518208e-06, "loss": 0.9316, "step": 4616 }, { "epoch": 0.782874099194574, "grad_norm": 0.9660451250704443, "learning_rate": 2.3710278820439313e-06, "loss": 0.9452, "step": 4617 }, { "epoch": 0.7830436625688851, "grad_norm": 0.9371194277097062, "learning_rate": 2.3674781555385197e-06, "loss": 0.9498, "step": 4618 }, { "epoch": 0.7832132259431963, "grad_norm": 1.0346455180338319, "learning_rate": 2.363930731406369e-06, "loss": 0.9112, "step": 4619 }, { "epoch": 0.7833827893175074, "grad_norm": 0.9541504666714702, "learning_rate": 2.360385610717567e-06, "loss": 0.914, "step": 4620 }, { "epoch": 0.7835523526918186, "grad_norm": 0.6617943696480774, "learning_rate": 2.3568427945415163e-06, "loss": 0.7922, "step": 4621 }, { "epoch": 0.7837219160661297, "grad_norm": 1.0097916238720612, "learning_rate": 2.3533022839469154e-06, "loss": 0.9438, "step": 4622 }, { "epoch": 0.7838914794404409, "grad_norm": 1.0156756044780897, "learning_rate": 2.3497640800017687e-06, "loss": 0.9333, "step": 4623 }, { "epoch": 0.784061042814752, "grad_norm": 1.0161261544899474, "learning_rate": 2.346228183773388e-06, "loss": 0.9657, "step": 4624 }, { "epoch": 0.7842306061890632, "grad_norm": 0.9804343650680064, "learning_rate": 2.3426945963283853e-06, "loss": 0.934, "step": 4625 }, { "epoch": 0.7844001695633743, "grad_norm": 0.9721135344404941, "learning_rate": 2.3391633187326802e-06, "loss": 0.9113, "step": 4626 }, { "epoch": 0.7845697329376855, "grad_norm": 1.0070824174582935, "learning_rate": 2.335634352051488e-06, "loss": 0.9481, "step": 4627 }, { "epoch": 0.7847392963119966, "grad_norm": 0.9145311520420111, "learning_rate": 2.3321076973493396e-06, "loss": 0.9041, "step": 4628 }, { "epoch": 0.7849088596863077, "grad_norm": 0.9527946683105675, "learning_rate": 2.328583355690056e-06, "loss": 0.949, "step": 4629 }, { "epoch": 0.7850784230606189, "grad_norm": 0.973671720757159, "learning_rate": 2.3250613281367686e-06, "loss": 0.9477, "step": 4630 }, { "epoch": 0.7852479864349301, "grad_norm": 0.6159806016999128, "learning_rate": 2.3215416157519023e-06, "loss": 0.7851, "step": 4631 }, { "epoch": 0.7854175498092412, "grad_norm": 0.9906897486481537, "learning_rate": 2.318024219597196e-06, "loss": 0.9333, "step": 4632 }, { "epoch": 0.7855871131835523, "grad_norm": 0.9689762304004497, "learning_rate": 2.3145091407336785e-06, "loss": 0.9326, "step": 4633 }, { "epoch": 0.7857566765578635, "grad_norm": 0.978384954222861, "learning_rate": 2.3109963802216863e-06, "loss": 0.9614, "step": 4634 }, { "epoch": 0.7859262399321747, "grad_norm": 1.0009916482629015, "learning_rate": 2.3074859391208494e-06, "loss": 0.9314, "step": 4635 }, { "epoch": 0.7860958033064858, "grad_norm": 1.0297972356344058, "learning_rate": 2.3039778184901086e-06, "loss": 0.9782, "step": 4636 }, { "epoch": 0.7862653666807969, "grad_norm": 0.9280285053923906, "learning_rate": 2.3004720193876972e-06, "loss": 0.9071, "step": 4637 }, { "epoch": 0.7864349300551081, "grad_norm": 0.9941700642047782, "learning_rate": 2.2969685428711474e-06, "loss": 0.9241, "step": 4638 }, { "epoch": 0.7866044934294193, "grad_norm": 0.6647785117908844, "learning_rate": 2.293467389997299e-06, "loss": 0.7841, "step": 4639 }, { "epoch": 0.7867740568037304, "grad_norm": 0.9862723693615132, "learning_rate": 2.289968561822282e-06, "loss": 0.9258, "step": 4640 }, { "epoch": 0.7869436201780415, "grad_norm": 0.9484606916472316, "learning_rate": 2.2864720594015288e-06, "loss": 0.8873, "step": 4641 }, { "epoch": 0.7871131835523527, "grad_norm": 0.9335256089712528, "learning_rate": 2.2829778837897696e-06, "loss": 0.8744, "step": 4642 }, { "epoch": 0.7872827469266639, "grad_norm": 0.5691102090424409, "learning_rate": 2.279486036041034e-06, "loss": 0.7239, "step": 4643 }, { "epoch": 0.787452310300975, "grad_norm": 0.9825968589176427, "learning_rate": 2.2759965172086474e-06, "loss": 0.9306, "step": 4644 }, { "epoch": 0.7876218736752861, "grad_norm": 0.9564621383240213, "learning_rate": 2.2725093283452305e-06, "loss": 0.9129, "step": 4645 }, { "epoch": 0.7877914370495973, "grad_norm": 0.9601210859865337, "learning_rate": 2.269024470502711e-06, "loss": 0.9283, "step": 4646 }, { "epoch": 0.7879610004239085, "grad_norm": 0.9687480256909964, "learning_rate": 2.2655419447323035e-06, "loss": 0.9035, "step": 4647 }, { "epoch": 0.7881305637982196, "grad_norm": 0.9753461758185377, "learning_rate": 2.262061752084522e-06, "loss": 0.9414, "step": 4648 }, { "epoch": 0.7883001271725307, "grad_norm": 0.92653080541803, "learning_rate": 2.2585838936091753e-06, "loss": 0.9551, "step": 4649 }, { "epoch": 0.7884696905468419, "grad_norm": 0.9624997063709831, "learning_rate": 2.2551083703553755e-06, "loss": 0.918, "step": 4650 }, { "epoch": 0.7886392539211531, "grad_norm": 0.9665142475004171, "learning_rate": 2.251635183371521e-06, "loss": 0.9023, "step": 4651 }, { "epoch": 0.7888088172954641, "grad_norm": 0.610728222213766, "learning_rate": 2.2481643337053095e-06, "loss": 0.7961, "step": 4652 }, { "epoch": 0.7889783806697753, "grad_norm": 1.0218472449110727, "learning_rate": 2.244695822403731e-06, "loss": 0.9384, "step": 4653 }, { "epoch": 0.7891479440440865, "grad_norm": 0.9498022811356979, "learning_rate": 2.241229650513077e-06, "loss": 0.9219, "step": 4654 }, { "epoch": 0.7893175074183977, "grad_norm": 0.9450072519810307, "learning_rate": 2.2377658190789263e-06, "loss": 0.9128, "step": 4655 }, { "epoch": 0.7894870707927087, "grad_norm": 1.049688709567374, "learning_rate": 2.234304329146152e-06, "loss": 0.9233, "step": 4656 }, { "epoch": 0.7896566341670199, "grad_norm": 0.9796988891329942, "learning_rate": 2.230845181758928e-06, "loss": 0.9518, "step": 4657 }, { "epoch": 0.7898261975413311, "grad_norm": 0.9809919913716446, "learning_rate": 2.2273883779607142e-06, "loss": 0.9276, "step": 4658 }, { "epoch": 0.7899957609156423, "grad_norm": 0.968919751988224, "learning_rate": 2.2239339187942653e-06, "loss": 0.8738, "step": 4659 }, { "epoch": 0.7901653242899533, "grad_norm": 0.9516042657289775, "learning_rate": 2.2204818053016286e-06, "loss": 0.9047, "step": 4660 }, { "epoch": 0.7903348876642645, "grad_norm": 0.9369762346753585, "learning_rate": 2.2170320385241475e-06, "loss": 0.8771, "step": 4661 }, { "epoch": 0.7905044510385757, "grad_norm": 0.9743103585735804, "learning_rate": 2.213584619502451e-06, "loss": 0.9268, "step": 4662 }, { "epoch": 0.7906740144128869, "grad_norm": 0.9820394981217843, "learning_rate": 2.2101395492764623e-06, "loss": 0.9273, "step": 4663 }, { "epoch": 0.7908435777871979, "grad_norm": 0.9461391037416478, "learning_rate": 2.206696828885403e-06, "loss": 0.929, "step": 4664 }, { "epoch": 0.7910131411615091, "grad_norm": 0.9271677872468416, "learning_rate": 2.2032564593677773e-06, "loss": 0.9155, "step": 4665 }, { "epoch": 0.7911827045358203, "grad_norm": 0.9655526067282698, "learning_rate": 2.199818441761383e-06, "loss": 0.9449, "step": 4666 }, { "epoch": 0.7913522679101314, "grad_norm": 0.670667135717453, "learning_rate": 2.1963827771033053e-06, "loss": 0.7807, "step": 4667 }, { "epoch": 0.7915218312844425, "grad_norm": 0.9724236998091764, "learning_rate": 2.192949466429929e-06, "loss": 0.9066, "step": 4668 }, { "epoch": 0.7916913946587537, "grad_norm": 0.9945015401439281, "learning_rate": 2.189518510776919e-06, "loss": 0.9376, "step": 4669 }, { "epoch": 0.7918609580330649, "grad_norm": 0.9856264100769789, "learning_rate": 2.1860899111792343e-06, "loss": 0.9589, "step": 4670 }, { "epoch": 0.792030521407376, "grad_norm": 0.9663913871456851, "learning_rate": 2.182663668671119e-06, "loss": 0.9129, "step": 4671 }, { "epoch": 0.7922000847816871, "grad_norm": 0.9462425753041646, "learning_rate": 2.1792397842861156e-06, "loss": 0.933, "step": 4672 }, { "epoch": 0.7923696481559983, "grad_norm": 0.9540240836225714, "learning_rate": 2.1758182590570454e-06, "loss": 0.9208, "step": 4673 }, { "epoch": 0.7925392115303095, "grad_norm": 0.9665364142248658, "learning_rate": 2.17239909401602e-06, "loss": 0.8776, "step": 4674 }, { "epoch": 0.7927087749046206, "grad_norm": 0.9333832088508213, "learning_rate": 2.1689822901944456e-06, "loss": 0.9418, "step": 4675 }, { "epoch": 0.7928783382789317, "grad_norm": 0.9586033569905082, "learning_rate": 2.165567848623009e-06, "loss": 0.9451, "step": 4676 }, { "epoch": 0.7930479016532429, "grad_norm": 0.9597056144881574, "learning_rate": 2.1621557703316876e-06, "loss": 0.9595, "step": 4677 }, { "epoch": 0.7932174650275541, "grad_norm": 0.9555872140892432, "learning_rate": 2.158746056349744e-06, "loss": 0.9402, "step": 4678 }, { "epoch": 0.7933870284018651, "grad_norm": 0.9679237813005439, "learning_rate": 2.15533870770573e-06, "loss": 0.9331, "step": 4679 }, { "epoch": 0.7935565917761763, "grad_norm": 0.9779881149786491, "learning_rate": 2.151933725427481e-06, "loss": 0.9351, "step": 4680 }, { "epoch": 0.7937261551504875, "grad_norm": 0.9734316327419267, "learning_rate": 2.148531110542118e-06, "loss": 0.8757, "step": 4681 }, { "epoch": 0.7938957185247987, "grad_norm": 0.9719291624253216, "learning_rate": 2.145130864076055e-06, "loss": 0.9153, "step": 4682 }, { "epoch": 0.7940652818991097, "grad_norm": 0.9830559712044682, "learning_rate": 2.1417329870549852e-06, "loss": 0.8693, "step": 4683 }, { "epoch": 0.7942348452734209, "grad_norm": 0.9401441818180429, "learning_rate": 2.138337480503888e-06, "loss": 0.9559, "step": 4684 }, { "epoch": 0.7944044086477321, "grad_norm": 0.9726940934553796, "learning_rate": 2.1349443454470254e-06, "loss": 0.9369, "step": 4685 }, { "epoch": 0.7945739720220433, "grad_norm": 0.9510115605898624, "learning_rate": 2.1315535829079524e-06, "loss": 0.8917, "step": 4686 }, { "epoch": 0.7947435353963543, "grad_norm": 0.9400314193832459, "learning_rate": 2.1281651939094996e-06, "loss": 0.9215, "step": 4687 }, { "epoch": 0.7949130987706655, "grad_norm": 0.9653972117441191, "learning_rate": 2.1247791794737827e-06, "loss": 0.9233, "step": 4688 }, { "epoch": 0.7950826621449767, "grad_norm": 0.9789152637556073, "learning_rate": 2.1213955406222076e-06, "loss": 0.9419, "step": 4689 }, { "epoch": 0.7952522255192879, "grad_norm": 0.9351063773477509, "learning_rate": 2.1180142783754565e-06, "loss": 0.8875, "step": 4690 }, { "epoch": 0.7954217888935989, "grad_norm": 0.9876075494532389, "learning_rate": 2.1146353937534993e-06, "loss": 0.9494, "step": 4691 }, { "epoch": 0.7955913522679101, "grad_norm": 1.0366653502184795, "learning_rate": 2.111258887775581e-06, "loss": 0.9417, "step": 4692 }, { "epoch": 0.7957609156422213, "grad_norm": 0.9608788663441238, "learning_rate": 2.1078847614602437e-06, "loss": 0.926, "step": 4693 }, { "epoch": 0.7959304790165325, "grad_norm": 1.0094349104826035, "learning_rate": 2.104513015825297e-06, "loss": 0.8996, "step": 4694 }, { "epoch": 0.7961000423908435, "grad_norm": 0.9748103477058154, "learning_rate": 2.10114365188784e-06, "loss": 0.932, "step": 4695 }, { "epoch": 0.7962696057651547, "grad_norm": 0.9584480408794203, "learning_rate": 2.097776670664251e-06, "loss": 0.9142, "step": 4696 }, { "epoch": 0.7964391691394659, "grad_norm": 0.9587778951120278, "learning_rate": 2.09441207317019e-06, "loss": 0.9519, "step": 4697 }, { "epoch": 0.7966087325137771, "grad_norm": 0.9352151520045621, "learning_rate": 2.091049860420599e-06, "loss": 0.8854, "step": 4698 }, { "epoch": 0.7967782958880881, "grad_norm": 1.0030555159339016, "learning_rate": 2.0876900334296936e-06, "loss": 0.9789, "step": 4699 }, { "epoch": 0.7969478592623993, "grad_norm": 0.9419132146557154, "learning_rate": 2.084332593210985e-06, "loss": 0.9296, "step": 4700 }, { "epoch": 0.7971174226367105, "grad_norm": 0.9692116882960738, "learning_rate": 2.0809775407772505e-06, "loss": 0.9232, "step": 4701 }, { "epoch": 0.7972869860110217, "grad_norm": 0.9249619308759223, "learning_rate": 2.0776248771405526e-06, "loss": 0.8713, "step": 4702 }, { "epoch": 0.7974565493853327, "grad_norm": 0.973262325677416, "learning_rate": 2.0742746033122296e-06, "loss": 0.9292, "step": 4703 }, { "epoch": 0.7976261127596439, "grad_norm": 1.0177147650953429, "learning_rate": 2.070926720302906e-06, "loss": 0.9113, "step": 4704 }, { "epoch": 0.7977956761339551, "grad_norm": 0.9649081215528689, "learning_rate": 2.0675812291224796e-06, "loss": 0.9009, "step": 4705 }, { "epoch": 0.7979652395082663, "grad_norm": 0.9810061966011068, "learning_rate": 2.064238130780125e-06, "loss": 0.9119, "step": 4706 }, { "epoch": 0.7981348028825773, "grad_norm": 0.9282221780095217, "learning_rate": 2.0608974262843018e-06, "loss": 0.8843, "step": 4707 }, { "epoch": 0.7983043662568885, "grad_norm": 1.0638705566041549, "learning_rate": 2.0575591166427433e-06, "loss": 0.9466, "step": 4708 }, { "epoch": 0.7984739296311997, "grad_norm": 0.9701446145620254, "learning_rate": 2.0542232028624585e-06, "loss": 0.9167, "step": 4709 }, { "epoch": 0.7986434930055109, "grad_norm": 0.9869482864301755, "learning_rate": 2.050889685949734e-06, "loss": 0.9224, "step": 4710 }, { "epoch": 0.7988130563798219, "grad_norm": 1.0060000680420302, "learning_rate": 2.0475585669101415e-06, "loss": 0.9188, "step": 4711 }, { "epoch": 0.7989826197541331, "grad_norm": 0.9718750049975254, "learning_rate": 2.0442298467485187e-06, "loss": 0.9269, "step": 4712 }, { "epoch": 0.7991521831284443, "grad_norm": 1.0189104140211012, "learning_rate": 2.0409035264689857e-06, "loss": 0.947, "step": 4713 }, { "epoch": 0.7993217465027554, "grad_norm": 0.9612282930283035, "learning_rate": 2.0375796070749366e-06, "loss": 0.9229, "step": 4714 }, { "epoch": 0.7994913098770665, "grad_norm": 0.9855196614464193, "learning_rate": 2.034258089569041e-06, "loss": 0.9431, "step": 4715 }, { "epoch": 0.7996608732513777, "grad_norm": 1.0454491186056527, "learning_rate": 2.030938974953245e-06, "loss": 0.9624, "step": 4716 }, { "epoch": 0.7998304366256889, "grad_norm": 1.0116574038257442, "learning_rate": 2.027622264228768e-06, "loss": 0.9288, "step": 4717 }, { "epoch": 0.8, "grad_norm": 0.999263751826078, "learning_rate": 2.024307958396109e-06, "loss": 0.9356, "step": 4718 }, { "epoch": 0.8001695633743111, "grad_norm": 0.9843516105114978, "learning_rate": 2.020996058455038e-06, "loss": 0.9114, "step": 4719 }, { "epoch": 0.8003391267486223, "grad_norm": 0.9625613349652331, "learning_rate": 2.017686565404597e-06, "loss": 0.9177, "step": 4720 }, { "epoch": 0.8005086901229335, "grad_norm": 0.9667283576725946, "learning_rate": 2.014379480243105e-06, "loss": 0.9356, "step": 4721 }, { "epoch": 0.8006782534972446, "grad_norm": 1.0103221123897892, "learning_rate": 2.0110748039681573e-06, "loss": 0.909, "step": 4722 }, { "epoch": 0.8008478168715557, "grad_norm": 0.970450649918271, "learning_rate": 2.0077725375766175e-06, "loss": 0.9373, "step": 4723 }, { "epoch": 0.8010173802458669, "grad_norm": 0.9651817633746508, "learning_rate": 2.004472682064622e-06, "loss": 0.8974, "step": 4724 }, { "epoch": 0.8011869436201781, "grad_norm": 0.981208472608705, "learning_rate": 2.0011752384275862e-06, "loss": 0.9323, "step": 4725 }, { "epoch": 0.8013565069944892, "grad_norm": 0.9601056681389586, "learning_rate": 1.9978802076601934e-06, "loss": 0.9059, "step": 4726 }, { "epoch": 0.8015260703688003, "grad_norm": 0.9487048034399439, "learning_rate": 1.994587590756397e-06, "loss": 0.9184, "step": 4727 }, { "epoch": 0.8016956337431115, "grad_norm": 0.9769453971356413, "learning_rate": 1.9912973887094246e-06, "loss": 0.954, "step": 4728 }, { "epoch": 0.8018651971174227, "grad_norm": 0.9165536290195544, "learning_rate": 1.988009602511779e-06, "loss": 0.9094, "step": 4729 }, { "epoch": 0.8020347604917338, "grad_norm": 0.9633130725762168, "learning_rate": 1.9847242331552285e-06, "loss": 0.9183, "step": 4730 }, { "epoch": 0.8022043238660449, "grad_norm": 0.9963984385591905, "learning_rate": 1.981441281630816e-06, "loss": 0.9423, "step": 4731 }, { "epoch": 0.8023738872403561, "grad_norm": 1.0049660232386082, "learning_rate": 1.9781607489288524e-06, "loss": 0.9349, "step": 4732 }, { "epoch": 0.8025434506146673, "grad_norm": 0.9843477907829624, "learning_rate": 1.9748826360389216e-06, "loss": 0.9509, "step": 4733 }, { "epoch": 0.8027130139889784, "grad_norm": 1.0141658707565955, "learning_rate": 1.971606943949872e-06, "loss": 0.9263, "step": 4734 }, { "epoch": 0.8028825773632895, "grad_norm": 0.9649612059454581, "learning_rate": 1.9683336736498326e-06, "loss": 0.883, "step": 4735 }, { "epoch": 0.8030521407376007, "grad_norm": 0.9498566631837178, "learning_rate": 1.965062826126192e-06, "loss": 0.9296, "step": 4736 }, { "epoch": 0.8032217041119118, "grad_norm": 0.9287207625370278, "learning_rate": 1.961794402365611e-06, "loss": 0.9139, "step": 4737 }, { "epoch": 0.803391267486223, "grad_norm": 0.9950462719813227, "learning_rate": 1.9585284033540197e-06, "loss": 0.9031, "step": 4738 }, { "epoch": 0.8035608308605341, "grad_norm": 0.9944065331843382, "learning_rate": 1.955264830076614e-06, "loss": 0.9566, "step": 4739 }, { "epoch": 0.8037303942348453, "grad_norm": 1.0612649194383519, "learning_rate": 1.9520036835178667e-06, "loss": 0.9035, "step": 4740 }, { "epoch": 0.8038999576091564, "grad_norm": 1.0672855654527342, "learning_rate": 1.9487449646615087e-06, "loss": 0.9329, "step": 4741 }, { "epoch": 0.8040695209834676, "grad_norm": 0.9931615533220247, "learning_rate": 1.94548867449054e-06, "loss": 0.9488, "step": 4742 }, { "epoch": 0.8042390843577787, "grad_norm": 0.9803644811657122, "learning_rate": 1.942234813987236e-06, "loss": 0.9415, "step": 4743 }, { "epoch": 0.8044086477320899, "grad_norm": 1.010334356645439, "learning_rate": 1.9389833841331306e-06, "loss": 0.9499, "step": 4744 }, { "epoch": 0.804578211106401, "grad_norm": 0.9647480940687069, "learning_rate": 1.935734385909028e-06, "loss": 0.911, "step": 4745 }, { "epoch": 0.8047477744807122, "grad_norm": 0.9807356795805566, "learning_rate": 1.932487820294995e-06, "loss": 0.9495, "step": 4746 }, { "epoch": 0.8049173378550233, "grad_norm": 0.9542263453922717, "learning_rate": 1.9292436882703735e-06, "loss": 0.8983, "step": 4747 }, { "epoch": 0.8050869012293345, "grad_norm": 0.98025254757285, "learning_rate": 1.926001990813763e-06, "loss": 0.9196, "step": 4748 }, { "epoch": 0.8052564646036456, "grad_norm": 0.9753260204686327, "learning_rate": 1.9227627289030315e-06, "loss": 0.899, "step": 4749 }, { "epoch": 0.8054260279779568, "grad_norm": 0.9779000833260253, "learning_rate": 1.919525903515309e-06, "loss": 0.9204, "step": 4750 }, { "epoch": 0.8055955913522679, "grad_norm": 0.9326980957721716, "learning_rate": 1.916291515626999e-06, "loss": 0.9753, "step": 4751 }, { "epoch": 0.8057651547265791, "grad_norm": 0.9469597139423694, "learning_rate": 1.913059566213763e-06, "loss": 0.9442, "step": 4752 }, { "epoch": 0.8059347181008902, "grad_norm": 0.9747722745405499, "learning_rate": 1.9098300562505266e-06, "loss": 0.9507, "step": 4753 }, { "epoch": 0.8061042814752014, "grad_norm": 0.9461085853542899, "learning_rate": 1.9066029867114822e-06, "loss": 0.9341, "step": 4754 }, { "epoch": 0.8062738448495125, "grad_norm": 1.0286523524797848, "learning_rate": 1.9033783585700848e-06, "loss": 0.9422, "step": 4755 }, { "epoch": 0.8064434082238237, "grad_norm": 0.988493867489724, "learning_rate": 1.9001561727990524e-06, "loss": 0.9463, "step": 4756 }, { "epoch": 0.8066129715981348, "grad_norm": 0.972605324692901, "learning_rate": 1.8969364303703664e-06, "loss": 0.9704, "step": 4757 }, { "epoch": 0.8067825349724459, "grad_norm": 1.026823343133025, "learning_rate": 1.8937191322552762e-06, "loss": 0.9029, "step": 4758 }, { "epoch": 0.8069520983467571, "grad_norm": 0.9411587249968623, "learning_rate": 1.8905042794242857e-06, "loss": 0.8702, "step": 4759 }, { "epoch": 0.8071216617210683, "grad_norm": 0.9589204462788418, "learning_rate": 1.8872918728471635e-06, "loss": 0.9565, "step": 4760 }, { "epoch": 0.8072912250953794, "grad_norm": 0.9650962709510975, "learning_rate": 1.8840819134929467e-06, "loss": 0.9305, "step": 4761 }, { "epoch": 0.8074607884696905, "grad_norm": 0.986957929967007, "learning_rate": 1.8808744023299263e-06, "loss": 0.9608, "step": 4762 }, { "epoch": 0.8076303518440017, "grad_norm": 0.951733607074917, "learning_rate": 1.8776693403256585e-06, "loss": 0.9168, "step": 4763 }, { "epoch": 0.8077999152183128, "grad_norm": 0.9427834536601994, "learning_rate": 1.8744667284469575e-06, "loss": 0.9309, "step": 4764 }, { "epoch": 0.807969478592624, "grad_norm": 0.95219258966223, "learning_rate": 1.871266567659905e-06, "loss": 0.895, "step": 4765 }, { "epoch": 0.8081390419669351, "grad_norm": 1.0150858001959628, "learning_rate": 1.8680688589298368e-06, "loss": 0.9384, "step": 4766 }, { "epoch": 0.8083086053412463, "grad_norm": 0.9661334003359936, "learning_rate": 1.8648736032213521e-06, "loss": 0.9022, "step": 4767 }, { "epoch": 0.8084781687155574, "grad_norm": 0.9918694887957046, "learning_rate": 1.8616808014983057e-06, "loss": 0.9254, "step": 4768 }, { "epoch": 0.8086477320898686, "grad_norm": 0.9400378081980244, "learning_rate": 1.8584904547238214e-06, "loss": 0.9168, "step": 4769 }, { "epoch": 0.8088172954641797, "grad_norm": 0.9592460430470862, "learning_rate": 1.8553025638602762e-06, "loss": 0.9496, "step": 4770 }, { "epoch": 0.8089868588384909, "grad_norm": 0.6852958235590761, "learning_rate": 1.8521171298693042e-06, "loss": 0.7791, "step": 4771 }, { "epoch": 0.809156422212802, "grad_norm": 0.9953318296084129, "learning_rate": 1.8489341537118021e-06, "loss": 0.9639, "step": 4772 }, { "epoch": 0.8093259855871132, "grad_norm": 0.5902330545525112, "learning_rate": 1.8457536363479257e-06, "loss": 0.7947, "step": 4773 }, { "epoch": 0.8094955489614243, "grad_norm": 0.9500980248028972, "learning_rate": 1.8425755787370869e-06, "loss": 0.9524, "step": 4774 }, { "epoch": 0.8096651123357355, "grad_norm": 0.9712167050616632, "learning_rate": 1.8393999818379527e-06, "loss": 0.9226, "step": 4775 }, { "epoch": 0.8098346757100466, "grad_norm": 0.9957401480069289, "learning_rate": 1.8362268466084577e-06, "loss": 0.93, "step": 4776 }, { "epoch": 0.8100042390843578, "grad_norm": 0.9904257801419981, "learning_rate": 1.8330561740057839e-06, "loss": 0.9347, "step": 4777 }, { "epoch": 0.8101738024586689, "grad_norm": 0.9789113847761655, "learning_rate": 1.8298879649863733e-06, "loss": 0.9264, "step": 4778 }, { "epoch": 0.8103433658329801, "grad_norm": 1.0034344426941375, "learning_rate": 1.826722220505931e-06, "loss": 0.9204, "step": 4779 }, { "epoch": 0.8105129292072912, "grad_norm": 0.9749030656390125, "learning_rate": 1.8235589415194089e-06, "loss": 0.9125, "step": 4780 }, { "epoch": 0.8106824925816024, "grad_norm": 0.985667145582083, "learning_rate": 1.8203981289810212e-06, "loss": 0.9293, "step": 4781 }, { "epoch": 0.8108520559559135, "grad_norm": 0.9814106765867971, "learning_rate": 1.8172397838442345e-06, "loss": 0.9305, "step": 4782 }, { "epoch": 0.8110216193302247, "grad_norm": 0.9468807243533414, "learning_rate": 1.8140839070617765e-06, "loss": 0.9227, "step": 4783 }, { "epoch": 0.8111911827045358, "grad_norm": 0.994909327743116, "learning_rate": 1.8109304995856247e-06, "loss": 0.906, "step": 4784 }, { "epoch": 0.811360746078847, "grad_norm": 0.9923423759762594, "learning_rate": 1.8077795623670135e-06, "loss": 0.9412, "step": 4785 }, { "epoch": 0.8115303094531581, "grad_norm": 0.9658799119450953, "learning_rate": 1.804631096356435e-06, "loss": 0.9174, "step": 4786 }, { "epoch": 0.8116998728274692, "grad_norm": 0.6558538099505516, "learning_rate": 1.8014851025036329e-06, "loss": 0.7908, "step": 4787 }, { "epoch": 0.8118694362017804, "grad_norm": 0.9436291218934865, "learning_rate": 1.7983415817576044e-06, "loss": 0.915, "step": 4788 }, { "epoch": 0.8120389995760916, "grad_norm": 0.9418942397454481, "learning_rate": 1.7952005350666023e-06, "loss": 0.9018, "step": 4789 }, { "epoch": 0.8122085629504027, "grad_norm": 0.9110827827944578, "learning_rate": 1.7920619633781332e-06, "loss": 0.9273, "step": 4790 }, { "epoch": 0.8123781263247138, "grad_norm": 0.9870548105748547, "learning_rate": 1.7889258676389577e-06, "loss": 0.9644, "step": 4791 }, { "epoch": 0.812547689699025, "grad_norm": 0.9755421965284303, "learning_rate": 1.7857922487950873e-06, "loss": 0.8938, "step": 4792 }, { "epoch": 0.8127172530733362, "grad_norm": 1.0061508546765001, "learning_rate": 1.7826611077917843e-06, "loss": 0.9369, "step": 4793 }, { "epoch": 0.8128868164476473, "grad_norm": 0.9684683002304985, "learning_rate": 1.779532445573574e-06, "loss": 0.9117, "step": 4794 }, { "epoch": 0.8130563798219584, "grad_norm": 0.9962328589287732, "learning_rate": 1.7764062630842226e-06, "loss": 0.9251, "step": 4795 }, { "epoch": 0.8132259431962696, "grad_norm": 0.9540049900629931, "learning_rate": 1.7732825612667503e-06, "loss": 0.9302, "step": 4796 }, { "epoch": 0.8133955065705808, "grad_norm": 0.9863235761950255, "learning_rate": 1.7701613410634367e-06, "loss": 0.9395, "step": 4797 }, { "epoch": 0.8135650699448919, "grad_norm": 0.9886242855691526, "learning_rate": 1.7670426034158039e-06, "loss": 0.9406, "step": 4798 }, { "epoch": 0.813734633319203, "grad_norm": 1.0281674291172385, "learning_rate": 1.7639263492646298e-06, "loss": 0.939, "step": 4799 }, { "epoch": 0.8139041966935142, "grad_norm": 0.9823049664506998, "learning_rate": 1.7608125795499386e-06, "loss": 0.924, "step": 4800 }, { "epoch": 0.8140737600678254, "grad_norm": 0.9733385794078995, "learning_rate": 1.757701295211014e-06, "loss": 0.9427, "step": 4801 }, { "epoch": 0.8142433234421365, "grad_norm": 0.9894128000419272, "learning_rate": 1.7545924971863804e-06, "loss": 0.9011, "step": 4802 }, { "epoch": 0.8144128868164476, "grad_norm": 1.0059160916454422, "learning_rate": 1.7514861864138145e-06, "loss": 0.9457, "step": 4803 }, { "epoch": 0.8145824501907588, "grad_norm": 1.0159541314761331, "learning_rate": 1.74838236383035e-06, "loss": 0.9381, "step": 4804 }, { "epoch": 0.81475201356507, "grad_norm": 0.9960947205135507, "learning_rate": 1.74528103037226e-06, "loss": 0.9156, "step": 4805 }, { "epoch": 0.814921576939381, "grad_norm": 0.9622416591995683, "learning_rate": 1.7421821869750732e-06, "loss": 0.8923, "step": 4806 }, { "epoch": 0.8150911403136922, "grad_norm": 0.9354823927744681, "learning_rate": 1.739085834573564e-06, "loss": 0.9126, "step": 4807 }, { "epoch": 0.8152607036880034, "grad_norm": 1.0496368876410247, "learning_rate": 1.735991974101756e-06, "loss": 0.9701, "step": 4808 }, { "epoch": 0.8154302670623146, "grad_norm": 0.9449191610223884, "learning_rate": 1.7329006064929232e-06, "loss": 0.9316, "step": 4809 }, { "epoch": 0.8155998304366257, "grad_norm": 1.0275039639697148, "learning_rate": 1.7298117326795838e-06, "loss": 0.9138, "step": 4810 }, { "epoch": 0.8157693938109368, "grad_norm": 0.981654738732103, "learning_rate": 1.7267253535935057e-06, "loss": 0.9087, "step": 4811 }, { "epoch": 0.815938957185248, "grad_norm": 1.0400078485669713, "learning_rate": 1.7236414701657067e-06, "loss": 0.9751, "step": 4812 }, { "epoch": 0.8161085205595592, "grad_norm": 0.9003217428781333, "learning_rate": 1.7205600833264501e-06, "loss": 0.8723, "step": 4813 }, { "epoch": 0.8162780839338702, "grad_norm": 0.989271252893948, "learning_rate": 1.7174811940052404e-06, "loss": 0.9158, "step": 4814 }, { "epoch": 0.8164476473081814, "grad_norm": 0.9098563778099721, "learning_rate": 1.7144048031308414e-06, "loss": 0.9235, "step": 4815 }, { "epoch": 0.8166172106824926, "grad_norm": 0.9782634007564286, "learning_rate": 1.7113309116312505e-06, "loss": 0.9032, "step": 4816 }, { "epoch": 0.8167867740568038, "grad_norm": 0.6349325824962954, "learning_rate": 1.7082595204337183e-06, "loss": 0.742, "step": 4817 }, { "epoch": 0.8169563374311148, "grad_norm": 0.9747100475429629, "learning_rate": 1.705190630464737e-06, "loss": 0.9107, "step": 4818 }, { "epoch": 0.817125900805426, "grad_norm": 0.9754332236128412, "learning_rate": 1.7021242426500495e-06, "loss": 0.9203, "step": 4819 }, { "epoch": 0.8172954641797372, "grad_norm": 0.9812566911680318, "learning_rate": 1.6990603579146391e-06, "loss": 0.9023, "step": 4820 }, { "epoch": 0.8174650275540484, "grad_norm": 0.958057701484152, "learning_rate": 1.6959989771827346e-06, "loss": 0.9066, "step": 4821 }, { "epoch": 0.8176345909283594, "grad_norm": 0.9346876372465223, "learning_rate": 1.6929401013778157e-06, "loss": 0.925, "step": 4822 }, { "epoch": 0.8178041543026706, "grad_norm": 1.0067900945594015, "learning_rate": 1.6898837314225969e-06, "loss": 0.9004, "step": 4823 }, { "epoch": 0.8179737176769818, "grad_norm": 1.005496920176738, "learning_rate": 1.6868298682390437e-06, "loss": 0.9438, "step": 4824 }, { "epoch": 0.818143281051293, "grad_norm": 1.0196362997833366, "learning_rate": 1.683778512748362e-06, "loss": 0.9382, "step": 4825 }, { "epoch": 0.818312844425604, "grad_norm": 0.6867945510298027, "learning_rate": 1.6807296658710038e-06, "loss": 0.837, "step": 4826 }, { "epoch": 0.8184824077999152, "grad_norm": 0.968737511817879, "learning_rate": 1.6776833285266602e-06, "loss": 0.895, "step": 4827 }, { "epoch": 0.8186519711742264, "grad_norm": 0.964416935501097, "learning_rate": 1.6746395016342708e-06, "loss": 0.92, "step": 4828 }, { "epoch": 0.8188215345485376, "grad_norm": 0.9796298690479828, "learning_rate": 1.6715981861120112e-06, "loss": 0.9131, "step": 4829 }, { "epoch": 0.8189910979228486, "grad_norm": 0.9522268016283855, "learning_rate": 1.6685593828773095e-06, "loss": 0.9081, "step": 4830 }, { "epoch": 0.8191606612971598, "grad_norm": 0.9845229378868322, "learning_rate": 1.6655230928468257e-06, "loss": 0.9231, "step": 4831 }, { "epoch": 0.819330224671471, "grad_norm": 0.9597390804072871, "learning_rate": 1.6624893169364641e-06, "loss": 0.9333, "step": 4832 }, { "epoch": 0.8194997880457822, "grad_norm": 0.9487069875424946, "learning_rate": 1.6594580560613782e-06, "loss": 0.9391, "step": 4833 }, { "epoch": 0.8196693514200932, "grad_norm": 0.6473279012223865, "learning_rate": 1.6564293111359541e-06, "loss": 0.77, "step": 4834 }, { "epoch": 0.8198389147944044, "grad_norm": 0.9996786144045007, "learning_rate": 1.6534030830738223e-06, "loss": 0.933, "step": 4835 }, { "epoch": 0.8200084781687156, "grad_norm": 0.9351334734665407, "learning_rate": 1.6503793727878493e-06, "loss": 0.9011, "step": 4836 }, { "epoch": 0.8201780415430268, "grad_norm": 0.9819576205406819, "learning_rate": 1.6473581811901529e-06, "loss": 0.934, "step": 4837 }, { "epoch": 0.8203476049173378, "grad_norm": 0.9776310942287822, "learning_rate": 1.6443395091920822e-06, "loss": 0.9199, "step": 4838 }, { "epoch": 0.820517168291649, "grad_norm": 0.9536046115177798, "learning_rate": 1.6413233577042253e-06, "loss": 0.9294, "step": 4839 }, { "epoch": 0.8206867316659602, "grad_norm": 0.9680768397474379, "learning_rate": 1.6383097276364202e-06, "loss": 0.9061, "step": 4840 }, { "epoch": 0.8208562950402714, "grad_norm": 0.609376517722487, "learning_rate": 1.6352986198977327e-06, "loss": 0.7629, "step": 4841 }, { "epoch": 0.8210258584145824, "grad_norm": 1.009160569244628, "learning_rate": 1.6322900353964732e-06, "loss": 0.9107, "step": 4842 }, { "epoch": 0.8211954217888936, "grad_norm": 0.6463859033489396, "learning_rate": 1.6292839750401924e-06, "loss": 0.7836, "step": 4843 }, { "epoch": 0.8213649851632048, "grad_norm": 0.9703655318167864, "learning_rate": 1.6262804397356747e-06, "loss": 0.9364, "step": 4844 }, { "epoch": 0.821534548537516, "grad_norm": 1.010602545527202, "learning_rate": 1.6232794303889466e-06, "loss": 0.9356, "step": 4845 }, { "epoch": 0.821704111911827, "grad_norm": 0.5898944731230332, "learning_rate": 1.6202809479052728e-06, "loss": 0.7388, "step": 4846 }, { "epoch": 0.8218736752861382, "grad_norm": 1.01095244106849, "learning_rate": 1.617284993189151e-06, "loss": 0.9488, "step": 4847 }, { "epoch": 0.8220432386604494, "grad_norm": 0.9628062432037369, "learning_rate": 1.6142915671443238e-06, "loss": 0.9035, "step": 4848 }, { "epoch": 0.8222128020347604, "grad_norm": 0.9694490357189731, "learning_rate": 1.6113006706737667e-06, "loss": 0.8903, "step": 4849 }, { "epoch": 0.8223823654090716, "grad_norm": 1.056685661018095, "learning_rate": 1.60831230467969e-06, "loss": 0.9387, "step": 4850 }, { "epoch": 0.8225519287833828, "grad_norm": 0.9836485925737894, "learning_rate": 1.6053264700635474e-06, "loss": 0.9324, "step": 4851 }, { "epoch": 0.822721492157694, "grad_norm": 0.9616794407709552, "learning_rate": 1.6023431677260215e-06, "loss": 0.8904, "step": 4852 }, { "epoch": 0.822891055532005, "grad_norm": 0.9889852040409204, "learning_rate": 1.599362398567037e-06, "loss": 0.9208, "step": 4853 }, { "epoch": 0.8230606189063162, "grad_norm": 0.9709834492704014, "learning_rate": 1.596384163485748e-06, "loss": 0.9187, "step": 4854 }, { "epoch": 0.8232301822806274, "grad_norm": 0.9874173204574763, "learning_rate": 1.5934084633805536e-06, "loss": 0.9121, "step": 4855 }, { "epoch": 0.8233997456549386, "grad_norm": 1.023751431287385, "learning_rate": 1.590435299149079e-06, "loss": 0.9184, "step": 4856 }, { "epoch": 0.8235693090292496, "grad_norm": 0.9556901891046856, "learning_rate": 1.587464671688187e-06, "loss": 0.8759, "step": 4857 }, { "epoch": 0.8237388724035608, "grad_norm": 0.9480987982580044, "learning_rate": 1.5844965818939806e-06, "loss": 0.8633, "step": 4858 }, { "epoch": 0.823908435777872, "grad_norm": 0.9637821483082929, "learning_rate": 1.5815310306617914e-06, "loss": 0.8703, "step": 4859 }, { "epoch": 0.8240779991521832, "grad_norm": 0.9318774036591555, "learning_rate": 1.5785680188861862e-06, "loss": 0.8983, "step": 4860 }, { "epoch": 0.8242475625264942, "grad_norm": 0.9716611389266541, "learning_rate": 1.5756075474609667e-06, "loss": 0.9194, "step": 4861 }, { "epoch": 0.8244171259008054, "grad_norm": 0.9924323162563781, "learning_rate": 1.5726496172791671e-06, "loss": 0.919, "step": 4862 }, { "epoch": 0.8245866892751166, "grad_norm": 0.9361311174503837, "learning_rate": 1.5696942292330574e-06, "loss": 0.9405, "step": 4863 }, { "epoch": 0.8247562526494278, "grad_norm": 1.0355391555405462, "learning_rate": 1.5667413842141377e-06, "loss": 0.9002, "step": 4864 }, { "epoch": 0.8249258160237388, "grad_norm": 0.9786538489118394, "learning_rate": 1.563791083113142e-06, "loss": 0.9266, "step": 4865 }, { "epoch": 0.82509537939805, "grad_norm": 1.0118083016574306, "learning_rate": 1.5608433268200418e-06, "loss": 0.9399, "step": 4866 }, { "epoch": 0.8252649427723612, "grad_norm": 0.9535172859201975, "learning_rate": 1.5578981162240337e-06, "loss": 0.9558, "step": 4867 }, { "epoch": 0.8254345061466724, "grad_norm": 0.9893033245905223, "learning_rate": 1.554955452213548e-06, "loss": 0.9359, "step": 4868 }, { "epoch": 0.8256040695209834, "grad_norm": 0.9570735510047722, "learning_rate": 1.5520153356762514e-06, "loss": 0.9077, "step": 4869 }, { "epoch": 0.8257736328952946, "grad_norm": 1.0246930199545017, "learning_rate": 1.5490777674990376e-06, "loss": 0.9371, "step": 4870 }, { "epoch": 0.8259431962696058, "grad_norm": 0.9431737353875405, "learning_rate": 1.5461427485680336e-06, "loss": 0.9469, "step": 4871 }, { "epoch": 0.826112759643917, "grad_norm": 1.0339554508298139, "learning_rate": 1.5432102797685922e-06, "loss": 0.9553, "step": 4872 }, { "epoch": 0.826282323018228, "grad_norm": 0.967358767980075, "learning_rate": 1.540280361985308e-06, "loss": 0.8975, "step": 4873 }, { "epoch": 0.8264518863925392, "grad_norm": 1.0003183024050315, "learning_rate": 1.5373529961019972e-06, "loss": 0.9638, "step": 4874 }, { "epoch": 0.8266214497668504, "grad_norm": 0.9159703449042248, "learning_rate": 1.534428183001705e-06, "loss": 0.9161, "step": 4875 }, { "epoch": 0.8267910131411615, "grad_norm": 0.9694259077321321, "learning_rate": 1.5315059235667161e-06, "loss": 0.9299, "step": 4876 }, { "epoch": 0.8269605765154726, "grad_norm": 0.9385417863702417, "learning_rate": 1.528586218678535e-06, "loss": 0.9158, "step": 4877 }, { "epoch": 0.8271301398897838, "grad_norm": 0.9847733491247092, "learning_rate": 1.5256690692179011e-06, "loss": 0.9137, "step": 4878 }, { "epoch": 0.827299703264095, "grad_norm": 1.0301076951117067, "learning_rate": 1.5227544760647805e-06, "loss": 0.9422, "step": 4879 }, { "epoch": 0.8274692666384061, "grad_norm": 0.9518676501888206, "learning_rate": 1.5198424400983692e-06, "loss": 0.91, "step": 4880 }, { "epoch": 0.8276388300127172, "grad_norm": 1.019618675263682, "learning_rate": 1.5169329621970918e-06, "loss": 0.9485, "step": 4881 }, { "epoch": 0.8278083933870284, "grad_norm": 0.9521386133644539, "learning_rate": 1.514026043238598e-06, "loss": 0.957, "step": 4882 }, { "epoch": 0.8279779567613396, "grad_norm": 0.9921188141718126, "learning_rate": 1.5111216840997745e-06, "loss": 0.9399, "step": 4883 }, { "epoch": 0.8281475201356507, "grad_norm": 0.9563047570235461, "learning_rate": 1.5082198856567265e-06, "loss": 0.8918, "step": 4884 }, { "epoch": 0.8283170835099618, "grad_norm": 0.702179174404355, "learning_rate": 1.5053206487847916e-06, "loss": 0.8378, "step": 4885 }, { "epoch": 0.828486646884273, "grad_norm": 0.9808465524386978, "learning_rate": 1.5024239743585301e-06, "loss": 0.9276, "step": 4886 }, { "epoch": 0.8286562102585842, "grad_norm": 0.974430369560909, "learning_rate": 1.4995298632517374e-06, "loss": 0.9074, "step": 4887 }, { "epoch": 0.8288257736328953, "grad_norm": 0.9524635652761271, "learning_rate": 1.4966383163374288e-06, "loss": 0.9249, "step": 4888 }, { "epoch": 0.8289953370072064, "grad_norm": 0.9603500943050038, "learning_rate": 1.4937493344878474e-06, "loss": 0.8669, "step": 4889 }, { "epoch": 0.8291649003815176, "grad_norm": 0.952715669585621, "learning_rate": 1.4908629185744617e-06, "loss": 0.8949, "step": 4890 }, { "epoch": 0.8293344637558288, "grad_norm": 0.9670199946840378, "learning_rate": 1.487979069467972e-06, "loss": 0.8651, "step": 4891 }, { "epoch": 0.8295040271301399, "grad_norm": 0.5795050208616122, "learning_rate": 1.4850977880382977e-06, "loss": 0.7673, "step": 4892 }, { "epoch": 0.829673590504451, "grad_norm": 0.9768997812905504, "learning_rate": 1.482219075154585e-06, "loss": 0.9557, "step": 4893 }, { "epoch": 0.8298431538787622, "grad_norm": 0.9920070573106412, "learning_rate": 1.4793429316852092e-06, "loss": 0.9376, "step": 4894 }, { "epoch": 0.8300127172530734, "grad_norm": 0.9563862719736099, "learning_rate": 1.4764693584977663e-06, "loss": 0.9583, "step": 4895 }, { "epoch": 0.8301822806273845, "grad_norm": 0.9962689462156771, "learning_rate": 1.4735983564590784e-06, "loss": 0.9428, "step": 4896 }, { "epoch": 0.8303518440016956, "grad_norm": 1.0030443357978136, "learning_rate": 1.4707299264351914e-06, "loss": 0.9446, "step": 4897 }, { "epoch": 0.8305214073760068, "grad_norm": 1.0019083955092671, "learning_rate": 1.467864069291376e-06, "loss": 0.9058, "step": 4898 }, { "epoch": 0.830690970750318, "grad_norm": 0.9380533336191916, "learning_rate": 1.4650007858921279e-06, "loss": 0.9111, "step": 4899 }, { "epoch": 0.8308605341246291, "grad_norm": 0.9841045475935494, "learning_rate": 1.4621400771011607e-06, "loss": 0.9398, "step": 4900 }, { "epoch": 0.8310300974989402, "grad_norm": 0.9983828334245135, "learning_rate": 1.459281943781422e-06, "loss": 0.9174, "step": 4901 }, { "epoch": 0.8311996608732514, "grad_norm": 1.0282451440043119, "learning_rate": 1.4564263867950733e-06, "loss": 0.9406, "step": 4902 }, { "epoch": 0.8313692242475625, "grad_norm": 0.9264131165546978, "learning_rate": 1.4535734070035024e-06, "loss": 0.8977, "step": 4903 }, { "epoch": 0.8315387876218737, "grad_norm": 0.9696854041122743, "learning_rate": 1.450723005267317e-06, "loss": 0.9192, "step": 4904 }, { "epoch": 0.8317083509961848, "grad_norm": 1.016881718587947, "learning_rate": 1.4478751824463543e-06, "loss": 0.9723, "step": 4905 }, { "epoch": 0.831877914370496, "grad_norm": 0.9699394048370444, "learning_rate": 1.4450299393996647e-06, "loss": 0.9263, "step": 4906 }, { "epoch": 0.8320474777448071, "grad_norm": 0.9559044181349066, "learning_rate": 1.4421872769855262e-06, "loss": 0.8792, "step": 4907 }, { "epoch": 0.8322170411191183, "grad_norm": 0.9834967046558155, "learning_rate": 1.4393471960614336e-06, "loss": 0.9072, "step": 4908 }, { "epoch": 0.8323866044934294, "grad_norm": 0.9586249063145813, "learning_rate": 1.436509697484111e-06, "loss": 0.9072, "step": 4909 }, { "epoch": 0.8325561678677406, "grad_norm": 0.9479908156064399, "learning_rate": 1.4336747821094942e-06, "loss": 0.9268, "step": 4910 }, { "epoch": 0.8327257312420517, "grad_norm": 1.013493554150115, "learning_rate": 1.4308424507927442e-06, "loss": 0.9574, "step": 4911 }, { "epoch": 0.8328952946163629, "grad_norm": 0.9874573443113831, "learning_rate": 1.4280127043882452e-06, "loss": 0.9287, "step": 4912 }, { "epoch": 0.833064857990674, "grad_norm": 0.9521073735529032, "learning_rate": 1.4251855437495976e-06, "loss": 0.9297, "step": 4913 }, { "epoch": 0.8332344213649852, "grad_norm": 1.0392229429675954, "learning_rate": 1.4223609697296214e-06, "loss": 0.9652, "step": 4914 }, { "epoch": 0.8334039847392963, "grad_norm": 0.9995407037311419, "learning_rate": 1.4195389831803596e-06, "loss": 0.9432, "step": 4915 }, { "epoch": 0.8335735481136075, "grad_norm": 0.9640359212398271, "learning_rate": 1.416719584953069e-06, "loss": 0.8951, "step": 4916 }, { "epoch": 0.8337431114879186, "grad_norm": 0.999230910967728, "learning_rate": 1.413902775898236e-06, "loss": 0.9116, "step": 4917 }, { "epoch": 0.8339126748622298, "grad_norm": 0.6102136717163895, "learning_rate": 1.4110885568655564e-06, "loss": 0.7349, "step": 4918 }, { "epoch": 0.8340822382365409, "grad_norm": 0.9604053185813455, "learning_rate": 1.4082769287039465e-06, "loss": 0.9449, "step": 4919 }, { "epoch": 0.8342518016108521, "grad_norm": 0.9571973842234587, "learning_rate": 1.405467892261545e-06, "loss": 0.8934, "step": 4920 }, { "epoch": 0.8344213649851632, "grad_norm": 1.0191331904116319, "learning_rate": 1.4026614483857037e-06, "loss": 0.9309, "step": 4921 }, { "epoch": 0.8345909283594743, "grad_norm": 1.006271078738936, "learning_rate": 1.3998575979229944e-06, "loss": 0.8823, "step": 4922 }, { "epoch": 0.8347604917337855, "grad_norm": 0.9863906821835449, "learning_rate": 1.3970563417192117e-06, "loss": 0.8679, "step": 4923 }, { "epoch": 0.8349300551080967, "grad_norm": 0.9768584301538424, "learning_rate": 1.3942576806193597e-06, "loss": 0.9066, "step": 4924 }, { "epoch": 0.8350996184824078, "grad_norm": 0.9796722255798455, "learning_rate": 1.391461615467663e-06, "loss": 0.9128, "step": 4925 }, { "epoch": 0.835269181856719, "grad_norm": 0.96682017045703, "learning_rate": 1.3886681471075614e-06, "loss": 0.9174, "step": 4926 }, { "epoch": 0.8354387452310301, "grad_norm": 0.9884606016088618, "learning_rate": 1.3858772763817174e-06, "loss": 0.9356, "step": 4927 }, { "epoch": 0.8356083086053413, "grad_norm": 1.0281187927452053, "learning_rate": 1.3830890041320034e-06, "loss": 0.9438, "step": 4928 }, { "epoch": 0.8357778719796524, "grad_norm": 1.0082471503138797, "learning_rate": 1.3803033311995072e-06, "loss": 0.9299, "step": 4929 }, { "epoch": 0.8359474353539635, "grad_norm": 1.0139703466929773, "learning_rate": 1.3775202584245407e-06, "loss": 0.9123, "step": 4930 }, { "epoch": 0.8361169987282747, "grad_norm": 0.9668120722955628, "learning_rate": 1.374739786646624e-06, "loss": 0.9238, "step": 4931 }, { "epoch": 0.8362865621025859, "grad_norm": 0.9804851628398163, "learning_rate": 1.371961916704494e-06, "loss": 0.9197, "step": 4932 }, { "epoch": 0.836456125476897, "grad_norm": 1.0597314863763998, "learning_rate": 1.3691866494361029e-06, "loss": 0.9174, "step": 4933 }, { "epoch": 0.8366256888512081, "grad_norm": 0.9233662667103956, "learning_rate": 1.3664139856786207e-06, "loss": 0.9025, "step": 4934 }, { "epoch": 0.8367952522255193, "grad_norm": 0.9428850174027956, "learning_rate": 1.3636439262684299e-06, "loss": 0.9014, "step": 4935 }, { "epoch": 0.8369648155998305, "grad_norm": 0.901665153353305, "learning_rate": 1.3608764720411249e-06, "loss": 0.8708, "step": 4936 }, { "epoch": 0.8371343789741416, "grad_norm": 1.0132756975610824, "learning_rate": 1.3581116238315194e-06, "loss": 0.9266, "step": 4937 }, { "epoch": 0.8373039423484527, "grad_norm": 1.0121643889877587, "learning_rate": 1.3553493824736352e-06, "loss": 0.9319, "step": 4938 }, { "epoch": 0.8374735057227639, "grad_norm": 0.9851845851018192, "learning_rate": 1.3525897488007134e-06, "loss": 0.9114, "step": 4939 }, { "epoch": 0.837643069097075, "grad_norm": 0.9729350990858207, "learning_rate": 1.3498327236452013e-06, "loss": 0.9263, "step": 4940 }, { "epoch": 0.8378126324713862, "grad_norm": 0.9393086894425914, "learning_rate": 1.3470783078387705e-06, "loss": 0.9441, "step": 4941 }, { "epoch": 0.8379821958456973, "grad_norm": 0.9414341135615347, "learning_rate": 1.3443265022122952e-06, "loss": 0.9392, "step": 4942 }, { "epoch": 0.8381517592200085, "grad_norm": 0.9431857808933181, "learning_rate": 1.341577307595867e-06, "loss": 0.9128, "step": 4943 }, { "epoch": 0.8383213225943196, "grad_norm": 1.0252996872904543, "learning_rate": 1.3388307248187849e-06, "loss": 0.9408, "step": 4944 }, { "epoch": 0.8384908859686308, "grad_norm": 1.0260337618074395, "learning_rate": 1.336086754709569e-06, "loss": 0.9224, "step": 4945 }, { "epoch": 0.8386604493429419, "grad_norm": 0.9901928693582054, "learning_rate": 1.3333453980959455e-06, "loss": 0.9395, "step": 4946 }, { "epoch": 0.8388300127172531, "grad_norm": 0.978192593910684, "learning_rate": 1.330606655804848e-06, "loss": 0.9192, "step": 4947 }, { "epoch": 0.8389995760915642, "grad_norm": 1.003724616434047, "learning_rate": 1.3278705286624328e-06, "loss": 0.9197, "step": 4948 }, { "epoch": 0.8391691394658753, "grad_norm": 0.9725596652530581, "learning_rate": 1.3251370174940582e-06, "loss": 0.8885, "step": 4949 }, { "epoch": 0.8393387028401865, "grad_norm": 0.9704560395108309, "learning_rate": 1.3224061231242946e-06, "loss": 0.9078, "step": 4950 }, { "epoch": 0.8395082662144977, "grad_norm": 1.0143817881479997, "learning_rate": 1.3196778463769256e-06, "loss": 0.9369, "step": 4951 }, { "epoch": 0.8396778295888088, "grad_norm": 1.0195787508483363, "learning_rate": 1.316952188074946e-06, "loss": 0.9061, "step": 4952 }, { "epoch": 0.8398473929631199, "grad_norm": 0.9991372709615308, "learning_rate": 1.3142291490405568e-06, "loss": 0.9062, "step": 4953 }, { "epoch": 0.8400169563374311, "grad_norm": 0.9486706313893987, "learning_rate": 1.3115087300951711e-06, "loss": 0.8833, "step": 4954 }, { "epoch": 0.8401865197117423, "grad_norm": 0.9933496215537114, "learning_rate": 1.3087909320594128e-06, "loss": 0.9239, "step": 4955 }, { "epoch": 0.8403560830860534, "grad_norm": 0.9948770760026168, "learning_rate": 1.3060757557531124e-06, "loss": 0.9095, "step": 4956 }, { "epoch": 0.8405256464603645, "grad_norm": 1.0165086880868526, "learning_rate": 1.3033632019953113e-06, "loss": 0.9435, "step": 4957 }, { "epoch": 0.8406952098346757, "grad_norm": 0.9825165424599075, "learning_rate": 1.3006532716042575e-06, "loss": 0.8781, "step": 4958 }, { "epoch": 0.8408647732089869, "grad_norm": 0.97443457630941, "learning_rate": 1.2979459653974146e-06, "loss": 0.9411, "step": 4959 }, { "epoch": 0.841034336583298, "grad_norm": 0.9889189897518945, "learning_rate": 1.2952412841914474e-06, "loss": 0.9378, "step": 4960 }, { "epoch": 0.8412038999576091, "grad_norm": 1.0208831118651156, "learning_rate": 1.2925392288022299e-06, "loss": 0.941, "step": 4961 }, { "epoch": 0.8413734633319203, "grad_norm": 0.9943332742831625, "learning_rate": 1.2898398000448441e-06, "loss": 0.9514, "step": 4962 }, { "epoch": 0.8415430267062315, "grad_norm": 0.94668515723054, "learning_rate": 1.2871429987335858e-06, "loss": 0.9004, "step": 4963 }, { "epoch": 0.8417125900805426, "grad_norm": 0.9558742788248252, "learning_rate": 1.2844488256819497e-06, "loss": 0.9058, "step": 4964 }, { "epoch": 0.8418821534548537, "grad_norm": 0.973229122560861, "learning_rate": 1.2817572817026402e-06, "loss": 0.9315, "step": 4965 }, { "epoch": 0.8420517168291649, "grad_norm": 1.0854281586788086, "learning_rate": 1.2790683676075732e-06, "loss": 0.9091, "step": 4966 }, { "epoch": 0.8422212802034761, "grad_norm": 1.0249099048024033, "learning_rate": 1.2763820842078657e-06, "loss": 0.9531, "step": 4967 }, { "epoch": 0.8423908435777872, "grad_norm": 0.9648933247195882, "learning_rate": 1.2736984323138435e-06, "loss": 0.9544, "step": 4968 }, { "epoch": 0.8425604069520983, "grad_norm": 0.9168651190565301, "learning_rate": 1.2710174127350362e-06, "loss": 0.9141, "step": 4969 }, { "epoch": 0.8427299703264095, "grad_norm": 0.9651421591225124, "learning_rate": 1.2683390262801853e-06, "loss": 0.8777, "step": 4970 }, { "epoch": 0.8428995337007207, "grad_norm": 0.9875249691452004, "learning_rate": 1.2656632737572327e-06, "loss": 0.9144, "step": 4971 }, { "epoch": 0.8430690970750317, "grad_norm": 0.6142240460768515, "learning_rate": 1.262990155973327e-06, "loss": 0.7395, "step": 4972 }, { "epoch": 0.8432386604493429, "grad_norm": 0.9512273713004991, "learning_rate": 1.2603196737348211e-06, "loss": 0.9254, "step": 4973 }, { "epoch": 0.8434082238236541, "grad_norm": 1.0182518950407085, "learning_rate": 1.257651827847276e-06, "loss": 0.9026, "step": 4974 }, { "epoch": 0.8435777871979653, "grad_norm": 1.0303991261657348, "learning_rate": 1.2549866191154547e-06, "loss": 0.8844, "step": 4975 }, { "epoch": 0.8437473505722763, "grad_norm": 1.0234561465497893, "learning_rate": 1.252324048343322e-06, "loss": 0.8939, "step": 4976 }, { "epoch": 0.8439169139465875, "grad_norm": 0.9713062809278666, "learning_rate": 1.2496641163340562e-06, "loss": 0.9026, "step": 4977 }, { "epoch": 0.8440864773208987, "grad_norm": 0.986330491605456, "learning_rate": 1.2470068238900323e-06, "loss": 0.9265, "step": 4978 }, { "epoch": 0.8442560406952099, "grad_norm": 0.6566579100299678, "learning_rate": 1.2443521718128259e-06, "loss": 0.8019, "step": 4979 }, { "epoch": 0.8444256040695209, "grad_norm": 0.9870275002071415, "learning_rate": 1.2417001609032275e-06, "loss": 0.9497, "step": 4980 }, { "epoch": 0.8445951674438321, "grad_norm": 1.0061971871093598, "learning_rate": 1.2390507919612215e-06, "loss": 0.9339, "step": 4981 }, { "epoch": 0.8447647308181433, "grad_norm": 0.9775779735701265, "learning_rate": 1.2364040657859976e-06, "loss": 0.9211, "step": 4982 }, { "epoch": 0.8449342941924545, "grad_norm": 1.0060622902477008, "learning_rate": 1.233759983175946e-06, "loss": 0.9286, "step": 4983 }, { "epoch": 0.8451038575667655, "grad_norm": 0.979018507866352, "learning_rate": 1.2311185449286677e-06, "loss": 0.9753, "step": 4984 }, { "epoch": 0.8452734209410767, "grad_norm": 0.9198706477023796, "learning_rate": 1.2284797518409575e-06, "loss": 0.9205, "step": 4985 }, { "epoch": 0.8454429843153879, "grad_norm": 1.011939611350744, "learning_rate": 1.2258436047088162e-06, "loss": 0.9262, "step": 4986 }, { "epoch": 0.8456125476896991, "grad_norm": 0.9394857394643923, "learning_rate": 1.2232101043274437e-06, "loss": 0.9058, "step": 4987 }, { "epoch": 0.8457821110640101, "grad_norm": 0.9807670870589137, "learning_rate": 1.220579251491245e-06, "loss": 0.899, "step": 4988 }, { "epoch": 0.8459516744383213, "grad_norm": 0.9710975257113879, "learning_rate": 1.217951046993826e-06, "loss": 0.9633, "step": 4989 }, { "epoch": 0.8461212378126325, "grad_norm": 1.0472037859390761, "learning_rate": 1.2153254916279899e-06, "loss": 0.9604, "step": 4990 }, { "epoch": 0.8462908011869437, "grad_norm": 0.9628090000004781, "learning_rate": 1.2127025861857455e-06, "loss": 0.9254, "step": 4991 }, { "epoch": 0.8464603645612547, "grad_norm": 1.0037713136667632, "learning_rate": 1.2100823314582989e-06, "loss": 0.9151, "step": 4992 }, { "epoch": 0.8466299279355659, "grad_norm": 1.0077026030991845, "learning_rate": 1.2074647282360573e-06, "loss": 0.935, "step": 4993 }, { "epoch": 0.8467994913098771, "grad_norm": 0.9834971925879705, "learning_rate": 1.2048497773086276e-06, "loss": 0.9636, "step": 4994 }, { "epoch": 0.8469690546841883, "grad_norm": 0.9323495899469612, "learning_rate": 1.2022374794648229e-06, "loss": 0.8843, "step": 4995 }, { "epoch": 0.8471386180584993, "grad_norm": 0.9567302487230924, "learning_rate": 1.199627835492646e-06, "loss": 0.923, "step": 4996 }, { "epoch": 0.8473081814328105, "grad_norm": 0.9758300805544459, "learning_rate": 1.197020846179303e-06, "loss": 0.9108, "step": 4997 }, { "epoch": 0.8474777448071217, "grad_norm": 0.9379315815069381, "learning_rate": 1.1944165123112051e-06, "loss": 0.9141, "step": 4998 }, { "epoch": 0.8476473081814329, "grad_norm": 0.9549278742172951, "learning_rate": 1.1918148346739545e-06, "loss": 0.9284, "step": 4999 }, { "epoch": 0.8478168715557439, "grad_norm": 0.9691541797084137, "learning_rate": 1.1892158140523546e-06, "loss": 0.9101, "step": 5000 }, { "epoch": 0.8479864349300551, "grad_norm": 0.9770643814016914, "learning_rate": 1.1866194512304075e-06, "loss": 0.9442, "step": 5001 }, { "epoch": 0.8481559983043663, "grad_norm": 0.9278702655626347, "learning_rate": 1.1840257469913163e-06, "loss": 0.8945, "step": 5002 }, { "epoch": 0.8483255616786775, "grad_norm": 0.9737224079464902, "learning_rate": 1.1814347021174777e-06, "loss": 0.935, "step": 5003 }, { "epoch": 0.8484951250529885, "grad_norm": 1.0111504822565995, "learning_rate": 1.1788463173904896e-06, "loss": 0.9521, "step": 5004 }, { "epoch": 0.8486646884272997, "grad_norm": 0.9920139851895824, "learning_rate": 1.1762605935911432e-06, "loss": 0.9271, "step": 5005 }, { "epoch": 0.8488342518016109, "grad_norm": 0.9698127376309369, "learning_rate": 1.1736775314994341e-06, "loss": 0.9058, "step": 5006 }, { "epoch": 0.849003815175922, "grad_norm": 1.0091131338837878, "learning_rate": 1.1710971318945485e-06, "loss": 0.92, "step": 5007 }, { "epoch": 0.8491733785502331, "grad_norm": 0.8772068668172168, "learning_rate": 1.1685193955548712e-06, "loss": 0.8562, "step": 5008 }, { "epoch": 0.8493429419245443, "grad_norm": 0.9707871856364261, "learning_rate": 1.165944323257986e-06, "loss": 1.0002, "step": 5009 }, { "epoch": 0.8495125052988555, "grad_norm": 0.9973843428172391, "learning_rate": 1.16337191578067e-06, "loss": 0.9656, "step": 5010 }, { "epoch": 0.8496820686731666, "grad_norm": 1.0352908351051708, "learning_rate": 1.1608021738988973e-06, "loss": 0.952, "step": 5011 }, { "epoch": 0.8498516320474777, "grad_norm": 0.9539738447034236, "learning_rate": 1.1582350983878365e-06, "loss": 0.881, "step": 5012 }, { "epoch": 0.8500211954217889, "grad_norm": 0.9609608253415943, "learning_rate": 1.1556706900218572e-06, "loss": 0.9386, "step": 5013 }, { "epoch": 0.8501907587961001, "grad_norm": 0.9441463880293872, "learning_rate": 1.1531089495745206e-06, "loss": 0.8891, "step": 5014 }, { "epoch": 0.8503603221704112, "grad_norm": 0.9918451684588193, "learning_rate": 1.150549877818581e-06, "loss": 0.9339, "step": 5015 }, { "epoch": 0.8505298855447223, "grad_norm": 0.9197835009671675, "learning_rate": 1.1479934755259924e-06, "loss": 0.938, "step": 5016 }, { "epoch": 0.8506994489190335, "grad_norm": 0.9664962209301132, "learning_rate": 1.1454397434679022e-06, "loss": 0.9371, "step": 5017 }, { "epoch": 0.8508690122933447, "grad_norm": 0.9460492858808486, "learning_rate": 1.142888682414648e-06, "loss": 0.9275, "step": 5018 }, { "epoch": 0.8510385756676558, "grad_norm": 0.9530415317084844, "learning_rate": 1.1403402931357655e-06, "loss": 0.9105, "step": 5019 }, { "epoch": 0.8512081390419669, "grad_norm": 1.0093988497385549, "learning_rate": 1.1377945763999875e-06, "loss": 0.9366, "step": 5020 }, { "epoch": 0.8513777024162781, "grad_norm": 1.0869680924795317, "learning_rate": 1.1352515329752345e-06, "loss": 0.9472, "step": 5021 }, { "epoch": 0.8515472657905893, "grad_norm": 0.9968130560263498, "learning_rate": 1.1327111636286237e-06, "loss": 0.963, "step": 5022 }, { "epoch": 0.8517168291649004, "grad_norm": 0.9836133517952427, "learning_rate": 1.1301734691264633e-06, "loss": 0.938, "step": 5023 }, { "epoch": 0.8518863925392115, "grad_norm": 0.9775731948677512, "learning_rate": 1.1276384502342596e-06, "loss": 0.8817, "step": 5024 }, { "epoch": 0.8520559559135227, "grad_norm": 0.94942800199654, "learning_rate": 1.125106107716708e-06, "loss": 0.9302, "step": 5025 }, { "epoch": 0.8522255192878339, "grad_norm": 0.9059019330196004, "learning_rate": 1.122576442337696e-06, "loss": 0.8911, "step": 5026 }, { "epoch": 0.852395082662145, "grad_norm": 1.0257272814505092, "learning_rate": 1.120049454860307e-06, "loss": 0.907, "step": 5027 }, { "epoch": 0.8525646460364561, "grad_norm": 0.9515118956153226, "learning_rate": 1.1175251460468117e-06, "loss": 0.9011, "step": 5028 }, { "epoch": 0.8527342094107673, "grad_norm": 0.9751532162967015, "learning_rate": 1.115003516658677e-06, "loss": 0.9002, "step": 5029 }, { "epoch": 0.8529037727850785, "grad_norm": 0.9633003561564547, "learning_rate": 1.1124845674565577e-06, "loss": 0.9401, "step": 5030 }, { "epoch": 0.8530733361593895, "grad_norm": 0.9865426048591225, "learning_rate": 1.1099682992003058e-06, "loss": 0.9379, "step": 5031 }, { "epoch": 0.8532428995337007, "grad_norm": 0.948565856806794, "learning_rate": 1.1074547126489609e-06, "loss": 0.9288, "step": 5032 }, { "epoch": 0.8534124629080119, "grad_norm": 0.9817690244995123, "learning_rate": 1.10494380856075e-06, "loss": 0.9174, "step": 5033 }, { "epoch": 0.853582026282323, "grad_norm": 0.9593227641583841, "learning_rate": 1.1024355876931004e-06, "loss": 0.9362, "step": 5034 }, { "epoch": 0.8537515896566341, "grad_norm": 0.914609356229114, "learning_rate": 1.099930050802621e-06, "loss": 0.9356, "step": 5035 }, { "epoch": 0.8539211530309453, "grad_norm": 0.9536917885637378, "learning_rate": 1.0974271986451169e-06, "loss": 0.8942, "step": 5036 }, { "epoch": 0.8540907164052565, "grad_norm": 0.9699646014679921, "learning_rate": 1.0949270319755768e-06, "loss": 0.9163, "step": 5037 }, { "epoch": 0.8542602797795676, "grad_norm": 0.9363367333687395, "learning_rate": 1.0924295515481886e-06, "loss": 0.9098, "step": 5038 }, { "epoch": 0.8544298431538787, "grad_norm": 0.9553517547818056, "learning_rate": 1.0899347581163222e-06, "loss": 0.933, "step": 5039 }, { "epoch": 0.8545994065281899, "grad_norm": 0.9329261976014018, "learning_rate": 1.0874426524325398e-06, "loss": 0.8527, "step": 5040 }, { "epoch": 0.8547689699025011, "grad_norm": 0.9672761525526294, "learning_rate": 1.0849532352485903e-06, "loss": 0.9329, "step": 5041 }, { "epoch": 0.8549385332768122, "grad_norm": 0.957320375598468, "learning_rate": 1.0824665073154196e-06, "loss": 0.9199, "step": 5042 }, { "epoch": 0.8551080966511233, "grad_norm": 0.9529171321970592, "learning_rate": 1.0799824693831529e-06, "loss": 0.9286, "step": 5043 }, { "epoch": 0.8552776600254345, "grad_norm": 0.9558869517362364, "learning_rate": 1.0775011222011078e-06, "loss": 0.9204, "step": 5044 }, { "epoch": 0.8554472233997457, "grad_norm": 0.6120500743838968, "learning_rate": 1.075022466517791e-06, "loss": 0.7522, "step": 5045 }, { "epoch": 0.8556167867740568, "grad_norm": 0.9127718671865394, "learning_rate": 1.0725465030808958e-06, "loss": 0.8934, "step": 5046 }, { "epoch": 0.8557863501483679, "grad_norm": 1.0310084597028693, "learning_rate": 1.0700732326373042e-06, "loss": 0.9544, "step": 5047 }, { "epoch": 0.8559559135226791, "grad_norm": 0.9894276963012947, "learning_rate": 1.0676026559330842e-06, "loss": 0.9015, "step": 5048 }, { "epoch": 0.8561254768969903, "grad_norm": 0.9570954490836404, "learning_rate": 1.0651347737134965e-06, "loss": 0.9528, "step": 5049 }, { "epoch": 0.8562950402713014, "grad_norm": 0.9552637051064249, "learning_rate": 1.062669586722983e-06, "loss": 0.9047, "step": 5050 }, { "epoch": 0.8564646036456125, "grad_norm": 0.9669956212822644, "learning_rate": 1.0602070957051725e-06, "loss": 0.8861, "step": 5051 }, { "epoch": 0.8566341670199237, "grad_norm": 0.9956227487211226, "learning_rate": 1.0577473014028872e-06, "loss": 0.8881, "step": 5052 }, { "epoch": 0.8568037303942349, "grad_norm": 0.9809285225589254, "learning_rate": 1.0552902045581305e-06, "loss": 0.9232, "step": 5053 }, { "epoch": 0.856973293768546, "grad_norm": 0.993206320743409, "learning_rate": 1.0528358059120913e-06, "loss": 0.8922, "step": 5054 }, { "epoch": 0.8571428571428571, "grad_norm": 0.9260263257570618, "learning_rate": 1.0503841062051445e-06, "loss": 0.9162, "step": 5055 }, { "epoch": 0.8573124205171683, "grad_norm": 0.6330074201581573, "learning_rate": 1.0479351061768584e-06, "loss": 0.7974, "step": 5056 }, { "epoch": 0.8574819838914794, "grad_norm": 0.9988409387609664, "learning_rate": 1.0454888065659775e-06, "loss": 0.9143, "step": 5057 }, { "epoch": 0.8576515472657906, "grad_norm": 0.9799454972930053, "learning_rate": 1.0430452081104369e-06, "loss": 0.9238, "step": 5058 }, { "epoch": 0.8578211106401017, "grad_norm": 0.9413935993757301, "learning_rate": 1.040604311547353e-06, "loss": 0.9183, "step": 5059 }, { "epoch": 0.8579906740144129, "grad_norm": 1.0296562243864147, "learning_rate": 1.038166117613032e-06, "loss": 0.931, "step": 5060 }, { "epoch": 0.858160237388724, "grad_norm": 1.0144033052136412, "learning_rate": 1.0357306270429623e-06, "loss": 0.9642, "step": 5061 }, { "epoch": 0.8583298007630352, "grad_norm": 0.953876184537609, "learning_rate": 1.0332978405718175e-06, "loss": 0.9419, "step": 5062 }, { "epoch": 0.8584993641373463, "grad_norm": 0.9400286309399865, "learning_rate": 1.0308677589334526e-06, "loss": 0.9517, "step": 5063 }, { "epoch": 0.8586689275116575, "grad_norm": 0.948599186172546, "learning_rate": 1.0284403828609113e-06, "loss": 0.9143, "step": 5064 }, { "epoch": 0.8588384908859686, "grad_norm": 0.9126977614520076, "learning_rate": 1.0260157130864178e-06, "loss": 0.9245, "step": 5065 }, { "epoch": 0.8590080542602798, "grad_norm": 1.0104631532358312, "learning_rate": 1.0235937503413795e-06, "loss": 0.8879, "step": 5066 }, { "epoch": 0.8591776176345909, "grad_norm": 0.9428781928449234, "learning_rate": 1.021174495356393e-06, "loss": 0.9164, "step": 5067 }, { "epoch": 0.8593471810089021, "grad_norm": 0.989405194899477, "learning_rate": 1.018757948861231e-06, "loss": 0.9331, "step": 5068 }, { "epoch": 0.8595167443832132, "grad_norm": 0.6407133517194157, "learning_rate": 1.0163441115848506e-06, "loss": 0.7512, "step": 5069 }, { "epoch": 0.8596863077575244, "grad_norm": 1.0326855127397307, "learning_rate": 1.013932984255397e-06, "loss": 0.9031, "step": 5070 }, { "epoch": 0.8598558711318355, "grad_norm": 0.932399265546685, "learning_rate": 1.0115245676001917e-06, "loss": 0.8816, "step": 5071 }, { "epoch": 0.8600254345061467, "grad_norm": 0.9800511945841957, "learning_rate": 1.0091188623457415e-06, "loss": 0.9236, "step": 5072 }, { "epoch": 0.8601949978804578, "grad_norm": 0.9746683454341022, "learning_rate": 1.0067158692177325e-06, "loss": 0.9453, "step": 5073 }, { "epoch": 0.860364561254769, "grad_norm": 0.9955081226460942, "learning_rate": 1.0043155889410382e-06, "loss": 0.9577, "step": 5074 }, { "epoch": 0.8605341246290801, "grad_norm": 0.9583981958171508, "learning_rate": 1.0019180222397095e-06, "loss": 0.9084, "step": 5075 }, { "epoch": 0.8607036880033913, "grad_norm": 1.0317375977263978, "learning_rate": 9.995231698369789e-07, "loss": 0.971, "step": 5076 }, { "epoch": 0.8608732513777024, "grad_norm": 0.9806402206391475, "learning_rate": 9.971310324552597e-07, "loss": 0.9334, "step": 5077 }, { "epoch": 0.8610428147520136, "grad_norm": 0.9443382406010195, "learning_rate": 9.9474161081615e-07, "loss": 0.9284, "step": 5078 }, { "epoch": 0.8612123781263247, "grad_norm": 0.9666104446729988, "learning_rate": 9.923549056404247e-07, "loss": 0.9268, "step": 5079 }, { "epoch": 0.8613819415006359, "grad_norm": 0.9467338251715124, "learning_rate": 9.899709176480398e-07, "loss": 0.9227, "step": 5080 }, { "epoch": 0.861551504874947, "grad_norm": 1.0284768922078236, "learning_rate": 9.87589647558135e-07, "loss": 0.9551, "step": 5081 }, { "epoch": 0.8617210682492582, "grad_norm": 0.9733707674390222, "learning_rate": 9.852110960890248e-07, "loss": 0.9287, "step": 5082 }, { "epoch": 0.8618906316235693, "grad_norm": 0.947050357571709, "learning_rate": 9.828352639582073e-07, "loss": 0.8879, "step": 5083 }, { "epoch": 0.8620601949978804, "grad_norm": 0.9739477936521534, "learning_rate": 9.804621518823587e-07, "loss": 0.9124, "step": 5084 }, { "epoch": 0.8622297583721916, "grad_norm": 1.0018942714344834, "learning_rate": 9.780917605773376e-07, "loss": 0.9376, "step": 5085 }, { "epoch": 0.8623993217465028, "grad_norm": 0.9527752592911204, "learning_rate": 9.75724090758179e-07, "loss": 0.9055, "step": 5086 }, { "epoch": 0.8625688851208139, "grad_norm": 0.9535126539181107, "learning_rate": 9.733591431390955e-07, "loss": 0.947, "step": 5087 }, { "epoch": 0.862738448495125, "grad_norm": 0.9359181190575948, "learning_rate": 9.709969184334832e-07, "loss": 0.9032, "step": 5088 }, { "epoch": 0.8629080118694362, "grad_norm": 0.9482045494639502, "learning_rate": 9.686374173539147e-07, "loss": 0.897, "step": 5089 }, { "epoch": 0.8630775752437474, "grad_norm": 0.9693326013304164, "learning_rate": 9.662806406121383e-07, "loss": 0.9336, "step": 5090 }, { "epoch": 0.8632471386180585, "grad_norm": 0.9689238817165925, "learning_rate": 9.63926588919083e-07, "loss": 0.9541, "step": 5091 }, { "epoch": 0.8634167019923696, "grad_norm": 0.9765267973915636, "learning_rate": 9.615752629848574e-07, "loss": 0.9308, "step": 5092 }, { "epoch": 0.8635862653666808, "grad_norm": 0.9888109493786731, "learning_rate": 9.592266635187464e-07, "loss": 0.9416, "step": 5093 }, { "epoch": 0.863755828740992, "grad_norm": 0.9655935379937343, "learning_rate": 9.568807912292077e-07, "loss": 0.8973, "step": 5094 }, { "epoch": 0.8639253921153031, "grad_norm": 0.9803580597876733, "learning_rate": 9.545376468238864e-07, "loss": 0.9219, "step": 5095 }, { "epoch": 0.8640949554896142, "grad_norm": 0.9851122596695164, "learning_rate": 9.521972310095973e-07, "loss": 0.9155, "step": 5096 }, { "epoch": 0.8642645188639254, "grad_norm": 0.9432599969636443, "learning_rate": 9.49859544492332e-07, "loss": 0.9497, "step": 5097 }, { "epoch": 0.8644340822382366, "grad_norm": 0.9592869670580622, "learning_rate": 9.47524587977261e-07, "loss": 0.9604, "step": 5098 }, { "epoch": 0.8646036456125477, "grad_norm": 0.9842954513786379, "learning_rate": 9.451923621687343e-07, "loss": 0.9145, "step": 5099 }, { "epoch": 0.8647732089868588, "grad_norm": 0.9605784169678584, "learning_rate": 9.428628677702711e-07, "loss": 0.9002, "step": 5100 }, { "epoch": 0.86494277236117, "grad_norm": 0.9988090173366728, "learning_rate": 9.40536105484573e-07, "loss": 0.8995, "step": 5101 }, { "epoch": 0.8651123357354812, "grad_norm": 0.9773795024933145, "learning_rate": 9.382120760135128e-07, "loss": 0.8731, "step": 5102 }, { "epoch": 0.8652818991097923, "grad_norm": 0.9303532450179893, "learning_rate": 9.358907800581418e-07, "loss": 0.9161, "step": 5103 }, { "epoch": 0.8654514624841034, "grad_norm": 0.9920428859111302, "learning_rate": 9.335722183186868e-07, "loss": 0.8877, "step": 5104 }, { "epoch": 0.8656210258584146, "grad_norm": 0.9720520982774197, "learning_rate": 9.312563914945461e-07, "loss": 0.9076, "step": 5105 }, { "epoch": 0.8657905892327258, "grad_norm": 0.9798397963625498, "learning_rate": 9.289433002842996e-07, "loss": 0.9176, "step": 5106 }, { "epoch": 0.8659601526070368, "grad_norm": 0.9399891292967149, "learning_rate": 9.266329453856959e-07, "loss": 0.8936, "step": 5107 }, { "epoch": 0.866129715981348, "grad_norm": 1.013982693981116, "learning_rate": 9.24325327495662e-07, "loss": 0.9356, "step": 5108 }, { "epoch": 0.8662992793556592, "grad_norm": 0.9810885175628435, "learning_rate": 9.220204473102945e-07, "loss": 0.9247, "step": 5109 }, { "epoch": 0.8664688427299704, "grad_norm": 1.0110672639347154, "learning_rate": 9.197183055248726e-07, "loss": 0.9541, "step": 5110 }, { "epoch": 0.8666384061042814, "grad_norm": 0.9692478686835012, "learning_rate": 9.174189028338421e-07, "loss": 0.9109, "step": 5111 }, { "epoch": 0.8668079694785926, "grad_norm": 0.9652561634163828, "learning_rate": 9.151222399308213e-07, "loss": 0.9135, "step": 5112 }, { "epoch": 0.8669775328529038, "grad_norm": 0.9676494873901076, "learning_rate": 9.128283175086106e-07, "loss": 0.9315, "step": 5113 }, { "epoch": 0.867147096227215, "grad_norm": 0.9777480948214932, "learning_rate": 9.105371362591775e-07, "loss": 0.9347, "step": 5114 }, { "epoch": 0.867316659601526, "grad_norm": 0.9603568113276452, "learning_rate": 9.082486968736614e-07, "loss": 0.9142, "step": 5115 }, { "epoch": 0.8674862229758372, "grad_norm": 0.9829871514212433, "learning_rate": 9.05963000042378e-07, "loss": 0.8821, "step": 5116 }, { "epoch": 0.8676557863501484, "grad_norm": 1.029973578956744, "learning_rate": 9.036800464548157e-07, "loss": 0.9857, "step": 5117 }, { "epoch": 0.8678253497244596, "grad_norm": 1.0101965327438291, "learning_rate": 9.013998367996346e-07, "loss": 0.9619, "step": 5118 }, { "epoch": 0.8679949130987706, "grad_norm": 0.9583588451416015, "learning_rate": 8.991223717646646e-07, "loss": 0.9111, "step": 5119 }, { "epoch": 0.8681644764730818, "grad_norm": 0.9717138900718927, "learning_rate": 8.96847652036913e-07, "loss": 0.8926, "step": 5120 }, { "epoch": 0.868334039847393, "grad_norm": 0.9486978102703995, "learning_rate": 8.945756783025528e-07, "loss": 0.9216, "step": 5121 }, { "epoch": 0.8685036032217041, "grad_norm": 0.9850946131914637, "learning_rate": 8.923064512469326e-07, "loss": 0.9203, "step": 5122 }, { "epoch": 0.8686731665960152, "grad_norm": 0.9796159596066807, "learning_rate": 8.900399715545715e-07, "loss": 0.9228, "step": 5123 }, { "epoch": 0.8688427299703264, "grad_norm": 0.9589229564601963, "learning_rate": 8.877762399091616e-07, "loss": 0.8848, "step": 5124 }, { "epoch": 0.8690122933446376, "grad_norm": 0.9421268030153078, "learning_rate": 8.855152569935632e-07, "loss": 0.9155, "step": 5125 }, { "epoch": 0.8691818567189487, "grad_norm": 0.9789323689720132, "learning_rate": 8.832570234898086e-07, "loss": 0.9476, "step": 5126 }, { "epoch": 0.8693514200932598, "grad_norm": 0.9874458383880508, "learning_rate": 8.810015400790994e-07, "loss": 0.933, "step": 5127 }, { "epoch": 0.869520983467571, "grad_norm": 0.9667220151280363, "learning_rate": 8.787488074418116e-07, "loss": 0.9425, "step": 5128 }, { "epoch": 0.8696905468418822, "grad_norm": 0.9928975300421444, "learning_rate": 8.76498826257488e-07, "loss": 0.9369, "step": 5129 }, { "epoch": 0.8698601102161932, "grad_norm": 0.9730032154260898, "learning_rate": 8.742515972048404e-07, "loss": 0.9198, "step": 5130 }, { "epoch": 0.8700296735905044, "grad_norm": 0.9919648412287673, "learning_rate": 8.720071209617542e-07, "loss": 0.9111, "step": 5131 }, { "epoch": 0.8701992369648156, "grad_norm": 0.9959727042773086, "learning_rate": 8.697653982052834e-07, "loss": 0.9337, "step": 5132 }, { "epoch": 0.8703688003391268, "grad_norm": 1.1064447611436845, "learning_rate": 8.675264296116481e-07, "loss": 0.9342, "step": 5133 }, { "epoch": 0.8705383637134378, "grad_norm": 0.9767851439552029, "learning_rate": 8.652902158562382e-07, "loss": 0.9473, "step": 5134 }, { "epoch": 0.870707927087749, "grad_norm": 0.9338987396113887, "learning_rate": 8.630567576136196e-07, "loss": 0.8882, "step": 5135 }, { "epoch": 0.8708774904620602, "grad_norm": 0.9600186868531667, "learning_rate": 8.608260555575187e-07, "loss": 0.9699, "step": 5136 }, { "epoch": 0.8710470538363714, "grad_norm": 0.979677963133246, "learning_rate": 8.585981103608343e-07, "loss": 0.9064, "step": 5137 }, { "epoch": 0.8712166172106824, "grad_norm": 0.9671268770018754, "learning_rate": 8.563729226956318e-07, "loss": 0.9273, "step": 5138 }, { "epoch": 0.8713861805849936, "grad_norm": 0.9907340733200737, "learning_rate": 8.541504932331468e-07, "loss": 0.9414, "step": 5139 }, { "epoch": 0.8715557439593048, "grad_norm": 1.0260306143590605, "learning_rate": 8.519308226437806e-07, "loss": 0.9356, "step": 5140 }, { "epoch": 0.871725307333616, "grad_norm": 0.9511893600174607, "learning_rate": 8.497139115971031e-07, "loss": 0.8998, "step": 5141 }, { "epoch": 0.871894870707927, "grad_norm": 0.9804482693550187, "learning_rate": 8.474997607618551e-07, "loss": 0.9382, "step": 5142 }, { "epoch": 0.8720644340822382, "grad_norm": 0.9472559062781107, "learning_rate": 8.4528837080594e-07, "loss": 0.9288, "step": 5143 }, { "epoch": 0.8722339974565494, "grad_norm": 0.9571216055207906, "learning_rate": 8.43079742396431e-07, "loss": 0.9362, "step": 5144 }, { "epoch": 0.8724035608308606, "grad_norm": 1.0118548847309516, "learning_rate": 8.40873876199565e-07, "loss": 0.9261, "step": 5145 }, { "epoch": 0.8725731242051716, "grad_norm": 0.9780578968409174, "learning_rate": 8.386707728807509e-07, "loss": 0.9351, "step": 5146 }, { "epoch": 0.8727426875794828, "grad_norm": 0.9709627366549358, "learning_rate": 8.364704331045614e-07, "loss": 0.8931, "step": 5147 }, { "epoch": 0.872912250953794, "grad_norm": 0.9812281645275513, "learning_rate": 8.342728575347336e-07, "loss": 0.9241, "step": 5148 }, { "epoch": 0.8730818143281052, "grad_norm": 0.64539157697727, "learning_rate": 8.320780468341761e-07, "loss": 0.766, "step": 5149 }, { "epoch": 0.8732513777024162, "grad_norm": 0.9848845391928729, "learning_rate": 8.29886001664958e-07, "loss": 0.9632, "step": 5150 }, { "epoch": 0.8734209410767274, "grad_norm": 0.9780947487480185, "learning_rate": 8.276967226883159e-07, "loss": 0.8723, "step": 5151 }, { "epoch": 0.8735905044510386, "grad_norm": 0.9598900846428129, "learning_rate": 8.255102105646517e-07, "loss": 0.9218, "step": 5152 }, { "epoch": 0.8737600678253498, "grad_norm": 1.021072062223685, "learning_rate": 8.233264659535367e-07, "loss": 0.9441, "step": 5153 }, { "epoch": 0.8739296311996608, "grad_norm": 0.9608923903830944, "learning_rate": 8.211454895137027e-07, "loss": 0.9016, "step": 5154 }, { "epoch": 0.874099194573972, "grad_norm": 0.977958864749516, "learning_rate": 8.189672819030459e-07, "loss": 0.9292, "step": 5155 }, { "epoch": 0.8742687579482832, "grad_norm": 0.9993933430233476, "learning_rate": 8.167918437786316e-07, "loss": 0.9779, "step": 5156 }, { "epoch": 0.8744383213225944, "grad_norm": 0.985386215617427, "learning_rate": 8.146191757966859e-07, "loss": 0.9111, "step": 5157 }, { "epoch": 0.8746078846969054, "grad_norm": 0.9521532282748394, "learning_rate": 8.124492786126015e-07, "loss": 0.8809, "step": 5158 }, { "epoch": 0.8747774480712166, "grad_norm": 0.9340203567379062, "learning_rate": 8.102821528809324e-07, "loss": 0.9037, "step": 5159 }, { "epoch": 0.8749470114455278, "grad_norm": 0.9798508301080222, "learning_rate": 8.081177992554012e-07, "loss": 0.9144, "step": 5160 }, { "epoch": 0.875116574819839, "grad_norm": 1.00522929453304, "learning_rate": 8.059562183888903e-07, "loss": 0.9514, "step": 5161 }, { "epoch": 0.87528613819415, "grad_norm": 0.9701294235974999, "learning_rate": 8.037974109334478e-07, "loss": 0.8929, "step": 5162 }, { "epoch": 0.8754557015684612, "grad_norm": 0.9272003059920836, "learning_rate": 8.016413775402832e-07, "loss": 0.8847, "step": 5163 }, { "epoch": 0.8756252649427724, "grad_norm": 0.9518751650123822, "learning_rate": 7.994881188597726e-07, "loss": 0.9088, "step": 5164 }, { "epoch": 0.8757948283170836, "grad_norm": 1.0291401781770853, "learning_rate": 7.97337635541452e-07, "loss": 0.9597, "step": 5165 }, { "epoch": 0.8759643916913946, "grad_norm": 0.958568782022767, "learning_rate": 7.951899282340192e-07, "loss": 0.919, "step": 5166 }, { "epoch": 0.8761339550657058, "grad_norm": 1.065434479117011, "learning_rate": 7.930449975853405e-07, "loss": 0.9478, "step": 5167 }, { "epoch": 0.876303518440017, "grad_norm": 1.0037286433260262, "learning_rate": 7.909028442424383e-07, "loss": 0.9597, "step": 5168 }, { "epoch": 0.8764730818143281, "grad_norm": 1.0186036681404256, "learning_rate": 7.887634688515e-07, "loss": 0.9694, "step": 5169 }, { "epoch": 0.8766426451886392, "grad_norm": 1.0100752447623533, "learning_rate": 7.866268720578718e-07, "loss": 0.9365, "step": 5170 }, { "epoch": 0.8768122085629504, "grad_norm": 0.9728314290647561, "learning_rate": 7.844930545060703e-07, "loss": 0.9076, "step": 5171 }, { "epoch": 0.8769817719372616, "grad_norm": 1.0345615727910933, "learning_rate": 7.82362016839765e-07, "loss": 0.936, "step": 5172 }, { "epoch": 0.8771513353115727, "grad_norm": 0.999284035088074, "learning_rate": 7.802337597017895e-07, "loss": 0.9656, "step": 5173 }, { "epoch": 0.8773208986858838, "grad_norm": 0.9323629990420651, "learning_rate": 7.781082837341403e-07, "loss": 0.9112, "step": 5174 }, { "epoch": 0.877490462060195, "grad_norm": 1.0052745558685974, "learning_rate": 7.759855895779711e-07, "loss": 0.915, "step": 5175 }, { "epoch": 0.8776600254345062, "grad_norm": 0.9368470802114017, "learning_rate": 7.73865677873602e-07, "loss": 0.9212, "step": 5176 }, { "epoch": 0.8778295888088173, "grad_norm": 1.013384639361475, "learning_rate": 7.71748549260507e-07, "loss": 0.9257, "step": 5177 }, { "epoch": 0.8779991521831284, "grad_norm": 0.9830292976705274, "learning_rate": 7.696342043773297e-07, "loss": 0.9032, "step": 5178 }, { "epoch": 0.8781687155574396, "grad_norm": 1.0208422638851524, "learning_rate": 7.675226438618643e-07, "loss": 0.9308, "step": 5179 }, { "epoch": 0.8783382789317508, "grad_norm": 1.0537077462192097, "learning_rate": 7.654138683510715e-07, "loss": 0.8913, "step": 5180 }, { "epoch": 0.8785078423060619, "grad_norm": 0.9648511967871872, "learning_rate": 7.633078784810666e-07, "loss": 0.9242, "step": 5181 }, { "epoch": 0.878677405680373, "grad_norm": 0.9817768262742032, "learning_rate": 7.612046748871327e-07, "loss": 0.9234, "step": 5182 }, { "epoch": 0.8788469690546842, "grad_norm": 0.9904457207777576, "learning_rate": 7.591042582037055e-07, "loss": 0.9616, "step": 5183 }, { "epoch": 0.8790165324289954, "grad_norm": 0.9628743537930682, "learning_rate": 7.570066290643784e-07, "loss": 0.868, "step": 5184 }, { "epoch": 0.8791860958033065, "grad_norm": 0.9744800234184157, "learning_rate": 7.549117881019141e-07, "loss": 0.9362, "step": 5185 }, { "epoch": 0.8793556591776176, "grad_norm": 0.9477881467648516, "learning_rate": 7.528197359482237e-07, "loss": 0.8884, "step": 5186 }, { "epoch": 0.8795252225519288, "grad_norm": 0.977958023644168, "learning_rate": 7.507304732343823e-07, "loss": 0.9039, "step": 5187 }, { "epoch": 0.87969478592624, "grad_norm": 0.9934082532115938, "learning_rate": 7.486440005906193e-07, "loss": 0.8932, "step": 5188 }, { "epoch": 0.8798643493005511, "grad_norm": 0.9913470365558636, "learning_rate": 7.465603186463299e-07, "loss": 0.9268, "step": 5189 }, { "epoch": 0.8800339126748622, "grad_norm": 1.1338060063614095, "learning_rate": 7.444794280300605e-07, "loss": 0.9099, "step": 5190 }, { "epoch": 0.8802034760491734, "grad_norm": 1.016483344725631, "learning_rate": 7.424013293695199e-07, "loss": 0.9671, "step": 5191 }, { "epoch": 0.8803730394234845, "grad_norm": 0.9714348803599815, "learning_rate": 7.40326023291571e-07, "loss": 0.9025, "step": 5192 }, { "epoch": 0.8805426027977957, "grad_norm": 1.0242147478811012, "learning_rate": 7.382535104222366e-07, "loss": 0.925, "step": 5193 }, { "epoch": 0.8807121661721068, "grad_norm": 1.004007701099195, "learning_rate": 7.361837913866965e-07, "loss": 0.96, "step": 5194 }, { "epoch": 0.880881729546418, "grad_norm": 0.9253379131107716, "learning_rate": 7.341168668092857e-07, "loss": 0.9227, "step": 5195 }, { "epoch": 0.8810512929207291, "grad_norm": 0.956403367681735, "learning_rate": 7.320527373135023e-07, "loss": 0.8884, "step": 5196 }, { "epoch": 0.8812208562950403, "grad_norm": 0.9716111779630627, "learning_rate": 7.299914035219957e-07, "loss": 0.9487, "step": 5197 }, { "epoch": 0.8813904196693514, "grad_norm": 0.951417939782792, "learning_rate": 7.279328660565721e-07, "loss": 0.9048, "step": 5198 }, { "epoch": 0.8815599830436626, "grad_norm": 0.9894361274107053, "learning_rate": 7.258771255381947e-07, "loss": 0.9306, "step": 5199 }, { "epoch": 0.8817295464179737, "grad_norm": 0.9617427817825863, "learning_rate": 7.238241825869885e-07, "loss": 0.9272, "step": 5200 }, { "epoch": 0.8818991097922849, "grad_norm": 1.022314754342777, "learning_rate": 7.21774037822226e-07, "loss": 0.9343, "step": 5201 }, { "epoch": 0.882068673166596, "grad_norm": 0.962593647347168, "learning_rate": 7.197266918623392e-07, "loss": 0.921, "step": 5202 }, { "epoch": 0.8822382365409072, "grad_norm": 0.9598241917809326, "learning_rate": 7.176821453249183e-07, "loss": 0.9348, "step": 5203 }, { "epoch": 0.8824077999152183, "grad_norm": 0.9780403951496743, "learning_rate": 7.156403988267069e-07, "loss": 0.9336, "step": 5204 }, { "epoch": 0.8825773632895295, "grad_norm": 0.9685610535137948, "learning_rate": 7.136014529836033e-07, "loss": 0.9473, "step": 5205 }, { "epoch": 0.8827469266638406, "grad_norm": 0.9628777695022261, "learning_rate": 7.115653084106599e-07, "loss": 0.881, "step": 5206 }, { "epoch": 0.8829164900381518, "grad_norm": 0.996109948351507, "learning_rate": 7.09531965722089e-07, "loss": 0.9526, "step": 5207 }, { "epoch": 0.8830860534124629, "grad_norm": 0.9823749419932903, "learning_rate": 7.07501425531254e-07, "loss": 0.9122, "step": 5208 }, { "epoch": 0.8832556167867741, "grad_norm": 1.001868883864078, "learning_rate": 7.054736884506718e-07, "loss": 0.9054, "step": 5209 }, { "epoch": 0.8834251801610852, "grad_norm": 0.9685851510283865, "learning_rate": 7.034487550920166e-07, "loss": 0.9434, "step": 5210 }, { "epoch": 0.8835947435353964, "grad_norm": 1.0012563259655243, "learning_rate": 7.014266260661151e-07, "loss": 0.939, "step": 5211 }, { "epoch": 0.8837643069097075, "grad_norm": 0.9514074295961853, "learning_rate": 6.994073019829483e-07, "loss": 0.8785, "step": 5212 }, { "epoch": 0.8839338702840186, "grad_norm": 1.0073062747137473, "learning_rate": 6.973907834516513e-07, "loss": 0.9067, "step": 5213 }, { "epoch": 0.8841034336583298, "grad_norm": 0.9832678015341291, "learning_rate": 6.953770710805141e-07, "loss": 0.9214, "step": 5214 }, { "epoch": 0.884272997032641, "grad_norm": 0.9755433882119658, "learning_rate": 6.933661654769797e-07, "loss": 0.9342, "step": 5215 }, { "epoch": 0.8844425604069521, "grad_norm": 0.9276341405930084, "learning_rate": 6.913580672476428e-07, "loss": 0.8886, "step": 5216 }, { "epoch": 0.8846121237812632, "grad_norm": 1.0060530108779318, "learning_rate": 6.8935277699825e-07, "loss": 0.9173, "step": 5217 }, { "epoch": 0.8847816871555744, "grad_norm": 0.9856993523135025, "learning_rate": 6.873502953337075e-07, "loss": 0.9448, "step": 5218 }, { "epoch": 0.8849512505298855, "grad_norm": 0.9539331742318515, "learning_rate": 6.853506228580675e-07, "loss": 0.9397, "step": 5219 }, { "epoch": 0.8851208139041967, "grad_norm": 1.0063102143893765, "learning_rate": 6.833537601745366e-07, "loss": 0.9473, "step": 5220 }, { "epoch": 0.8852903772785078, "grad_norm": 0.9765543547723605, "learning_rate": 6.813597078854772e-07, "loss": 0.9238, "step": 5221 }, { "epoch": 0.885459940652819, "grad_norm": 0.9780907928921038, "learning_rate": 6.793684665923983e-07, "loss": 0.9256, "step": 5222 }, { "epoch": 0.8856295040271301, "grad_norm": 0.9878822235226608, "learning_rate": 6.77380036895966e-07, "loss": 0.936, "step": 5223 }, { "epoch": 0.8857990674014413, "grad_norm": 0.9504494205405626, "learning_rate": 6.753944193959938e-07, "loss": 0.8783, "step": 5224 }, { "epoch": 0.8859686307757524, "grad_norm": 0.9790420113619818, "learning_rate": 6.734116146914516e-07, "loss": 0.909, "step": 5225 }, { "epoch": 0.8861381941500636, "grad_norm": 0.973751204979467, "learning_rate": 6.714316233804574e-07, "loss": 0.9311, "step": 5226 }, { "epoch": 0.8863077575243747, "grad_norm": 0.9795812048593907, "learning_rate": 6.694544460602825e-07, "loss": 0.9186, "step": 5227 }, { "epoch": 0.8864773208986859, "grad_norm": 1.0096764097554634, "learning_rate": 6.674800833273465e-07, "loss": 0.9566, "step": 5228 }, { "epoch": 0.886646884272997, "grad_norm": 0.9637532340329327, "learning_rate": 6.655085357772229e-07, "loss": 0.9148, "step": 5229 }, { "epoch": 0.8868164476473082, "grad_norm": 1.016729573317816, "learning_rate": 6.635398040046348e-07, "loss": 0.9572, "step": 5230 }, { "epoch": 0.8869860110216193, "grad_norm": 0.9922862214584582, "learning_rate": 6.615738886034551e-07, "loss": 0.9217, "step": 5231 }, { "epoch": 0.8871555743959305, "grad_norm": 0.641080241759164, "learning_rate": 6.596107901667103e-07, "loss": 0.7823, "step": 5232 }, { "epoch": 0.8873251377702416, "grad_norm": 0.6028825955841568, "learning_rate": 6.576505092865748e-07, "loss": 0.7256, "step": 5233 }, { "epoch": 0.8874947011445528, "grad_norm": 1.024517253575578, "learning_rate": 6.556930465543709e-07, "loss": 0.9343, "step": 5234 }, { "epoch": 0.8876642645188639, "grad_norm": 1.0159189030447726, "learning_rate": 6.537384025605742e-07, "loss": 0.9337, "step": 5235 }, { "epoch": 0.8878338278931751, "grad_norm": 0.954881906372445, "learning_rate": 6.517865778948108e-07, "loss": 0.9197, "step": 5236 }, { "epoch": 0.8880033912674862, "grad_norm": 0.9641775202736786, "learning_rate": 6.498375731458529e-07, "loss": 0.9222, "step": 5237 }, { "epoch": 0.8881729546417974, "grad_norm": 0.9943370339371903, "learning_rate": 6.478913889016214e-07, "loss": 0.9284, "step": 5238 }, { "epoch": 0.8883425180161085, "grad_norm": 1.0166260018741513, "learning_rate": 6.459480257491935e-07, "loss": 0.9368, "step": 5239 }, { "epoch": 0.8885120813904197, "grad_norm": 0.9602770168078034, "learning_rate": 6.440074842747879e-07, "loss": 0.9622, "step": 5240 }, { "epoch": 0.8886816447647308, "grad_norm": 1.011185471489416, "learning_rate": 6.420697650637753e-07, "loss": 0.9432, "step": 5241 }, { "epoch": 0.888851208139042, "grad_norm": 0.9692960221110054, "learning_rate": 6.401348687006725e-07, "loss": 0.9108, "step": 5242 }, { "epoch": 0.8890207715133531, "grad_norm": 0.9768566773025212, "learning_rate": 6.382027957691506e-07, "loss": 0.9209, "step": 5243 }, { "epoch": 0.8891903348876643, "grad_norm": 0.9360427922861517, "learning_rate": 6.362735468520232e-07, "loss": 0.8772, "step": 5244 }, { "epoch": 0.8893598982619754, "grad_norm": 1.012193441894449, "learning_rate": 6.343471225312536e-07, "loss": 0.9382, "step": 5245 }, { "epoch": 0.8895294616362865, "grad_norm": 0.9683443376292931, "learning_rate": 6.324235233879539e-07, "loss": 0.9225, "step": 5246 }, { "epoch": 0.8896990250105977, "grad_norm": 0.9850483600082118, "learning_rate": 6.305027500023841e-07, "loss": 0.9136, "step": 5247 }, { "epoch": 0.8898685883849089, "grad_norm": 0.6518534506406859, "learning_rate": 6.28584802953951e-07, "loss": 0.7962, "step": 5248 }, { "epoch": 0.89003815175922, "grad_norm": 0.9831450716270983, "learning_rate": 6.266696828212071e-07, "loss": 0.922, "step": 5249 }, { "epoch": 0.8902077151335311, "grad_norm": 0.9821689684049345, "learning_rate": 6.247573901818571e-07, "loss": 0.9236, "step": 5250 }, { "epoch": 0.8903772785078423, "grad_norm": 0.9812600102435868, "learning_rate": 6.228479256127495e-07, "loss": 0.95, "step": 5251 }, { "epoch": 0.8905468418821535, "grad_norm": 0.9570220059341775, "learning_rate": 6.209412896898792e-07, "loss": 0.9075, "step": 5252 }, { "epoch": 0.8907164052564646, "grad_norm": 0.6327268333559012, "learning_rate": 6.190374829883883e-07, "loss": 0.7797, "step": 5253 }, { "epoch": 0.8908859686307757, "grad_norm": 0.8919280776056981, "learning_rate": 6.171365060825674e-07, "loss": 0.8814, "step": 5254 }, { "epoch": 0.8910555320050869, "grad_norm": 0.9818274474964976, "learning_rate": 6.1523835954585e-07, "loss": 0.9506, "step": 5255 }, { "epoch": 0.8912250953793981, "grad_norm": 1.003481521128498, "learning_rate": 6.133430439508181e-07, "loss": 0.9569, "step": 5256 }, { "epoch": 0.8913946587537092, "grad_norm": 0.9938198852653174, "learning_rate": 6.114505598692011e-07, "loss": 0.9067, "step": 5257 }, { "epoch": 0.8915642221280203, "grad_norm": 0.9710700454374221, "learning_rate": 6.095609078718712e-07, "loss": 0.928, "step": 5258 }, { "epoch": 0.8917337855023315, "grad_norm": 1.015557959628636, "learning_rate": 6.076740885288479e-07, "loss": 0.9796, "step": 5259 }, { "epoch": 0.8919033488766427, "grad_norm": 0.992793079506282, "learning_rate": 6.057901024092949e-07, "loss": 0.9007, "step": 5260 }, { "epoch": 0.8920729122509538, "grad_norm": 0.9668869791624148, "learning_rate": 6.039089500815243e-07, "loss": 0.917, "step": 5261 }, { "epoch": 0.8922424756252649, "grad_norm": 1.017636019801052, "learning_rate": 6.02030632112991e-07, "loss": 0.9146, "step": 5262 }, { "epoch": 0.8924120389995761, "grad_norm": 0.9179743512419041, "learning_rate": 6.001551490702939e-07, "loss": 0.9005, "step": 5263 }, { "epoch": 0.8925816023738873, "grad_norm": 1.0086567042742047, "learning_rate": 5.982825015191785e-07, "loss": 0.9155, "step": 5264 }, { "epoch": 0.8927511657481983, "grad_norm": 0.9323295563038754, "learning_rate": 5.964126900245359e-07, "loss": 0.8926, "step": 5265 }, { "epoch": 0.8929207291225095, "grad_norm": 0.9946118197879867, "learning_rate": 5.945457151503986e-07, "loss": 0.8892, "step": 5266 }, { "epoch": 0.8930902924968207, "grad_norm": 0.9708677222069694, "learning_rate": 5.926815774599449e-07, "loss": 0.9008, "step": 5267 }, { "epoch": 0.8932598558711319, "grad_norm": 0.958213429980109, "learning_rate": 5.908202775155003e-07, "loss": 0.9204, "step": 5268 }, { "epoch": 0.893429419245443, "grad_norm": 1.0216725181792323, "learning_rate": 5.889618158785304e-07, "loss": 0.9326, "step": 5269 }, { "epoch": 0.8935989826197541, "grad_norm": 0.952656794617966, "learning_rate": 5.871061931096445e-07, "loss": 0.9319, "step": 5270 }, { "epoch": 0.8937685459940653, "grad_norm": 0.961869827435826, "learning_rate": 5.852534097685958e-07, "loss": 0.9364, "step": 5271 }, { "epoch": 0.8939381093683765, "grad_norm": 0.9528223691943507, "learning_rate": 5.834034664142862e-07, "loss": 0.9051, "step": 5272 }, { "epoch": 0.8941076727426875, "grad_norm": 1.0023317416333115, "learning_rate": 5.815563636047539e-07, "loss": 0.9245, "step": 5273 }, { "epoch": 0.8942772361169987, "grad_norm": 0.9532275624838807, "learning_rate": 5.797121018971818e-07, "loss": 0.9126, "step": 5274 }, { "epoch": 0.8944467994913099, "grad_norm": 1.0021281624248375, "learning_rate": 5.778706818479007e-07, "loss": 0.954, "step": 5275 }, { "epoch": 0.8946163628656211, "grad_norm": 0.6564871789560096, "learning_rate": 5.760321040123784e-07, "loss": 0.7657, "step": 5276 }, { "epoch": 0.8947859262399321, "grad_norm": 1.023635057461732, "learning_rate": 5.741963689452268e-07, "loss": 0.9213, "step": 5277 }, { "epoch": 0.8949554896142433, "grad_norm": 1.0119325135444848, "learning_rate": 5.723634772002007e-07, "loss": 0.9381, "step": 5278 }, { "epoch": 0.8951250529885545, "grad_norm": 0.9635630471053847, "learning_rate": 5.705334293302e-07, "loss": 0.9503, "step": 5279 }, { "epoch": 0.8952946163628657, "grad_norm": 1.0764490539676979, "learning_rate": 5.687062258872622e-07, "loss": 0.917, "step": 5280 }, { "epoch": 0.8954641797371767, "grad_norm": 1.002630091769928, "learning_rate": 5.668818674225684e-07, "loss": 0.9354, "step": 5281 }, { "epoch": 0.8956337431114879, "grad_norm": 0.9516109984492871, "learning_rate": 5.65060354486443e-07, "loss": 0.933, "step": 5282 }, { "epoch": 0.8958033064857991, "grad_norm": 1.036804485805387, "learning_rate": 5.632416876283508e-07, "loss": 0.933, "step": 5283 }, { "epoch": 0.8959728698601103, "grad_norm": 0.9632250556881334, "learning_rate": 5.614258673968976e-07, "loss": 0.9326, "step": 5284 }, { "epoch": 0.8961424332344213, "grad_norm": 0.9536898853698114, "learning_rate": 5.596128943398316e-07, "loss": 0.9307, "step": 5285 }, { "epoch": 0.8963119966087325, "grad_norm": 1.0056816957816208, "learning_rate": 5.578027690040411e-07, "loss": 0.9385, "step": 5286 }, { "epoch": 0.8964815599830437, "grad_norm": 0.9543181101782496, "learning_rate": 5.559954919355559e-07, "loss": 0.9253, "step": 5287 }, { "epoch": 0.8966511233573549, "grad_norm": 0.9569725968159479, "learning_rate": 5.541910636795455e-07, "loss": 0.8771, "step": 5288 }, { "epoch": 0.8968206867316659, "grad_norm": 0.9639028215842664, "learning_rate": 5.523894847803235e-07, "loss": 0.9487, "step": 5289 }, { "epoch": 0.8969902501059771, "grad_norm": 0.9527339518093475, "learning_rate": 5.505907557813395e-07, "loss": 0.9363, "step": 5290 }, { "epoch": 0.8971598134802883, "grad_norm": 0.9820282129080463, "learning_rate": 5.487948772251872e-07, "loss": 0.9138, "step": 5291 }, { "epoch": 0.8973293768545995, "grad_norm": 0.9451089806125501, "learning_rate": 5.470018496535967e-07, "loss": 0.9094, "step": 5292 }, { "epoch": 0.8974989402289105, "grad_norm": 0.9696214107428724, "learning_rate": 5.452116736074431e-07, "loss": 0.921, "step": 5293 }, { "epoch": 0.8976685036032217, "grad_norm": 0.9836080700131032, "learning_rate": 5.434243496267366e-07, "loss": 0.9224, "step": 5294 }, { "epoch": 0.8978380669775329, "grad_norm": 0.9570644176853008, "learning_rate": 5.416398782506294e-07, "loss": 0.8912, "step": 5295 }, { "epoch": 0.898007630351844, "grad_norm": 0.9692392647198594, "learning_rate": 5.398582600174107e-07, "loss": 0.8828, "step": 5296 }, { "epoch": 0.8981771937261551, "grad_norm": 0.932679314652526, "learning_rate": 5.380794954645141e-07, "loss": 0.9036, "step": 5297 }, { "epoch": 0.8983467571004663, "grad_norm": 0.958945221366242, "learning_rate": 5.363035851285081e-07, "loss": 0.9262, "step": 5298 }, { "epoch": 0.8985163204747775, "grad_norm": 1.054896570874422, "learning_rate": 5.345305295450997e-07, "loss": 0.9603, "step": 5299 }, { "epoch": 0.8986858838490887, "grad_norm": 0.9853113013458369, "learning_rate": 5.32760329249139e-07, "loss": 0.9173, "step": 5300 }, { "epoch": 0.8988554472233997, "grad_norm": 0.9721358648701095, "learning_rate": 5.30992984774612e-07, "loss": 0.95, "step": 5301 }, { "epoch": 0.8990250105977109, "grad_norm": 0.9728220144867263, "learning_rate": 5.292284966546424e-07, "loss": 0.8987, "step": 5302 }, { "epoch": 0.8991945739720221, "grad_norm": 0.9387027892380185, "learning_rate": 5.274668654214931e-07, "loss": 0.9396, "step": 5303 }, { "epoch": 0.8993641373463331, "grad_norm": 0.9696021367192991, "learning_rate": 5.257080916065671e-07, "loss": 0.9732, "step": 5304 }, { "epoch": 0.8995337007206443, "grad_norm": 0.9995815736940336, "learning_rate": 5.23952175740402e-07, "loss": 0.9208, "step": 5305 }, { "epoch": 0.8997032640949555, "grad_norm": 0.9381786629999485, "learning_rate": 5.221991183526753e-07, "loss": 0.9574, "step": 5306 }, { "epoch": 0.8998728274692667, "grad_norm": 0.6804674425166404, "learning_rate": 5.20448919972204e-07, "loss": 0.7873, "step": 5307 }, { "epoch": 0.9000423908435777, "grad_norm": 0.9255734150491965, "learning_rate": 5.187015811269391e-07, "loss": 0.8995, "step": 5308 }, { "epoch": 0.9002119542178889, "grad_norm": 0.9262934754730586, "learning_rate": 5.169571023439712e-07, "loss": 0.8953, "step": 5309 }, { "epoch": 0.9003815175922001, "grad_norm": 0.9726524956698935, "learning_rate": 5.152154841495249e-07, "loss": 0.9565, "step": 5310 }, { "epoch": 0.9005510809665113, "grad_norm": 0.9686538984864316, "learning_rate": 5.1347672706897e-07, "loss": 0.9071, "step": 5311 }, { "epoch": 0.9007206443408223, "grad_norm": 0.9461515617358507, "learning_rate": 5.117408316268047e-07, "loss": 0.8926, "step": 5312 }, { "epoch": 0.9008902077151335, "grad_norm": 0.9679127852350202, "learning_rate": 5.100077983466667e-07, "loss": 0.9066, "step": 5313 }, { "epoch": 0.9010597710894447, "grad_norm": 0.9769173863195825, "learning_rate": 5.08277627751329e-07, "loss": 0.9172, "step": 5314 }, { "epoch": 0.9012293344637559, "grad_norm": 0.9313000820685344, "learning_rate": 5.065503203627076e-07, "loss": 0.8606, "step": 5315 }, { "epoch": 0.9013988978380669, "grad_norm": 0.9900429787673576, "learning_rate": 5.048258767018477e-07, "loss": 0.9348, "step": 5316 }, { "epoch": 0.9015684612123781, "grad_norm": 0.6569017524250269, "learning_rate": 5.031042972889311e-07, "loss": 0.7979, "step": 5317 }, { "epoch": 0.9017380245866893, "grad_norm": 0.9831181767384738, "learning_rate": 5.013855826432801e-07, "loss": 0.9077, "step": 5318 }, { "epoch": 0.9019075879610005, "grad_norm": 0.9513029618092855, "learning_rate": 4.996697332833489e-07, "loss": 0.9179, "step": 5319 }, { "epoch": 0.9020771513353115, "grad_norm": 1.0150485130948095, "learning_rate": 4.979567497267302e-07, "loss": 0.9319, "step": 5320 }, { "epoch": 0.9022467147096227, "grad_norm": 1.0123890516368361, "learning_rate": 4.962466324901483e-07, "loss": 0.9312, "step": 5321 }, { "epoch": 0.9024162780839339, "grad_norm": 0.9879395724300434, "learning_rate": 4.945393820894662e-07, "loss": 0.885, "step": 5322 }, { "epoch": 0.902585841458245, "grad_norm": 0.9606900271195277, "learning_rate": 4.928349990396808e-07, "loss": 0.9675, "step": 5323 }, { "epoch": 0.9027554048325561, "grad_norm": 0.9767697787127416, "learning_rate": 4.911334838549242e-07, "loss": 0.9142, "step": 5324 }, { "epoch": 0.9029249682068673, "grad_norm": 0.9696775612775178, "learning_rate": 4.894348370484648e-07, "loss": 0.9083, "step": 5325 }, { "epoch": 0.9030945315811785, "grad_norm": 0.9440806168822182, "learning_rate": 4.877390591327036e-07, "loss": 0.9074, "step": 5326 }, { "epoch": 0.9032640949554896, "grad_norm": 1.051395713264764, "learning_rate": 4.860461506191782e-07, "loss": 0.9537, "step": 5327 }, { "epoch": 0.9034336583298007, "grad_norm": 0.9842090616624319, "learning_rate": 4.843561120185581e-07, "loss": 0.8999, "step": 5328 }, { "epoch": 0.9036032217041119, "grad_norm": 1.019286767442933, "learning_rate": 4.826689438406495e-07, "loss": 0.9914, "step": 5329 }, { "epoch": 0.9037727850784231, "grad_norm": 0.9975617622676345, "learning_rate": 4.809846465943912e-07, "loss": 0.9124, "step": 5330 }, { "epoch": 0.9039423484527342, "grad_norm": 0.9266459039272539, "learning_rate": 4.793032207878579e-07, "loss": 0.9105, "step": 5331 }, { "epoch": 0.9041119118270453, "grad_norm": 0.9778496315796683, "learning_rate": 4.776246669282536e-07, "loss": 0.9309, "step": 5332 }, { "epoch": 0.9042814752013565, "grad_norm": 0.9504848514653697, "learning_rate": 4.759489855219235e-07, "loss": 0.9086, "step": 5333 }, { "epoch": 0.9044510385756677, "grad_norm": 0.9475976851297315, "learning_rate": 4.742761770743387e-07, "loss": 0.902, "step": 5334 }, { "epoch": 0.9046206019499788, "grad_norm": 0.9674882792684568, "learning_rate": 4.7260624209010675e-07, "loss": 0.9006, "step": 5335 }, { "epoch": 0.9047901653242899, "grad_norm": 1.0190261397888378, "learning_rate": 4.709391810729713e-07, "loss": 0.9133, "step": 5336 }, { "epoch": 0.9049597286986011, "grad_norm": 0.9767734090303771, "learning_rate": 4.6927499452580574e-07, "loss": 0.917, "step": 5337 }, { "epoch": 0.9051292920729123, "grad_norm": 0.9555990305750007, "learning_rate": 4.67613682950615e-07, "loss": 0.9173, "step": 5338 }, { "epoch": 0.9052988554472234, "grad_norm": 1.0132778736031969, "learning_rate": 4.6595524684853954e-07, "loss": 0.9041, "step": 5339 }, { "epoch": 0.9054684188215345, "grad_norm": 1.0073560455047579, "learning_rate": 4.6429968671985235e-07, "loss": 0.9199, "step": 5340 }, { "epoch": 0.9056379821958457, "grad_norm": 0.6131839902783626, "learning_rate": 4.626470030639574e-07, "loss": 0.773, "step": 5341 }, { "epoch": 0.9058075455701569, "grad_norm": 1.0051760144127484, "learning_rate": 4.6099719637939136e-07, "loss": 0.9321, "step": 5342 }, { "epoch": 0.905977108944468, "grad_norm": 0.9850913808183326, "learning_rate": 4.59350267163825e-07, "loss": 0.929, "step": 5343 }, { "epoch": 0.9061466723187791, "grad_norm": 1.0481881351182123, "learning_rate": 4.5770621591405773e-07, "loss": 0.933, "step": 5344 }, { "epoch": 0.9063162356930903, "grad_norm": 0.965853837730131, "learning_rate": 4.5606504312602384e-07, "loss": 0.968, "step": 5345 }, { "epoch": 0.9064857990674015, "grad_norm": 0.9454795167133183, "learning_rate": 4.5442674929478625e-07, "loss": 0.9332, "step": 5346 }, { "epoch": 0.9066553624417126, "grad_norm": 0.9609462911927776, "learning_rate": 4.5279133491454406e-07, "loss": 0.9122, "step": 5347 }, { "epoch": 0.9068249258160237, "grad_norm": 0.9627821897110885, "learning_rate": 4.511588004786227e-07, "loss": 0.9508, "step": 5348 }, { "epoch": 0.9069944891903349, "grad_norm": 0.9464531931770697, "learning_rate": 4.4952914647948264e-07, "loss": 0.8912, "step": 5349 }, { "epoch": 0.907164052564646, "grad_norm": 0.9563311701679522, "learning_rate": 4.479023734087118e-07, "loss": 0.9263, "step": 5350 }, { "epoch": 0.9073336159389572, "grad_norm": 0.9838140660453529, "learning_rate": 4.4627848175703315e-07, "loss": 0.9418, "step": 5351 }, { "epoch": 0.9075031793132683, "grad_norm": 0.9551869502004656, "learning_rate": 4.4465747201429823e-07, "loss": 0.9063, "step": 5352 }, { "epoch": 0.9076727426875795, "grad_norm": 1.0141068195639051, "learning_rate": 4.4303934466948804e-07, "loss": 0.9384, "step": 5353 }, { "epoch": 0.9078423060618906, "grad_norm": 0.9530027881094659, "learning_rate": 4.414241002107178e-07, "loss": 0.9125, "step": 5354 }, { "epoch": 0.9080118694362018, "grad_norm": 0.98425247386984, "learning_rate": 4.398117391252299e-07, "loss": 0.9186, "step": 5355 }, { "epoch": 0.9081814328105129, "grad_norm": 0.9619651161774531, "learning_rate": 4.382022618993975e-07, "loss": 0.9173, "step": 5356 }, { "epoch": 0.9083509961848241, "grad_norm": 0.9264821942169333, "learning_rate": 4.365956690187256e-07, "loss": 0.9443, "step": 5357 }, { "epoch": 0.9085205595591352, "grad_norm": 0.9917467694505719, "learning_rate": 4.3499196096784544e-07, "loss": 0.9366, "step": 5358 }, { "epoch": 0.9086901229334464, "grad_norm": 0.9629837102454752, "learning_rate": 4.3339113823052223e-07, "loss": 0.8951, "step": 5359 }, { "epoch": 0.9088596863077575, "grad_norm": 0.9456688365160955, "learning_rate": 4.317932012896475e-07, "loss": 0.9103, "step": 5360 }, { "epoch": 0.9090292496820687, "grad_norm": 0.9866153383422445, "learning_rate": 4.3019815062724567e-07, "loss": 0.9458, "step": 5361 }, { "epoch": 0.9091988130563798, "grad_norm": 0.9528917367648084, "learning_rate": 4.286059867244685e-07, "loss": 0.8758, "step": 5362 }, { "epoch": 0.909368376430691, "grad_norm": 1.0479345752413354, "learning_rate": 4.270167100615952e-07, "loss": 0.9488, "step": 5363 }, { "epoch": 0.9095379398050021, "grad_norm": 0.9412771103362266, "learning_rate": 4.254303211180355e-07, "loss": 0.8842, "step": 5364 }, { "epoch": 0.9097075031793133, "grad_norm": 1.0014740399517312, "learning_rate": 4.2384682037233115e-07, "loss": 0.9197, "step": 5365 }, { "epoch": 0.9098770665536244, "grad_norm": 0.9763866678925637, "learning_rate": 4.222662083021489e-07, "loss": 0.9296, "step": 5366 }, { "epoch": 0.9100466299279356, "grad_norm": 1.0040310918329611, "learning_rate": 4.206884853842852e-07, "loss": 0.8908, "step": 5367 }, { "epoch": 0.9102161933022467, "grad_norm": 0.9576255730098154, "learning_rate": 4.191136520946626e-07, "loss": 0.9385, "step": 5368 }, { "epoch": 0.9103857566765579, "grad_norm": 1.0055933207058636, "learning_rate": 4.1754170890833777e-07, "loss": 0.9843, "step": 5369 }, { "epoch": 0.910555320050869, "grad_norm": 0.9973837267287784, "learning_rate": 4.1597265629949146e-07, "loss": 0.9359, "step": 5370 }, { "epoch": 0.9107248834251802, "grad_norm": 0.9466030246624016, "learning_rate": 4.144064947414295e-07, "loss": 0.9063, "step": 5371 }, { "epoch": 0.9108944467994913, "grad_norm": 0.9215907355186325, "learning_rate": 4.1284322470659386e-07, "loss": 0.9104, "step": 5372 }, { "epoch": 0.9110640101738025, "grad_norm": 0.963263187659054, "learning_rate": 4.112828466665486e-07, "loss": 0.9077, "step": 5373 }, { "epoch": 0.9112335735481136, "grad_norm": 0.988887337624277, "learning_rate": 4.0972536109198493e-07, "loss": 0.9613, "step": 5374 }, { "epoch": 0.9114031369224248, "grad_norm": 0.9716510077462397, "learning_rate": 4.081707684527236e-07, "loss": 0.9356, "step": 5375 }, { "epoch": 0.9115727002967359, "grad_norm": 0.9434431025953428, "learning_rate": 4.0661906921771297e-07, "loss": 0.9504, "step": 5376 }, { "epoch": 0.911742263671047, "grad_norm": 0.9087093935147786, "learning_rate": 4.0507026385502747e-07, "loss": 0.8905, "step": 5377 }, { "epoch": 0.9119118270453582, "grad_norm": 0.9289058127702048, "learning_rate": 4.035243528318666e-07, "loss": 0.9064, "step": 5378 }, { "epoch": 0.9120813904196694, "grad_norm": 0.9306911482992167, "learning_rate": 4.019813366145631e-07, "loss": 0.9247, "step": 5379 }, { "epoch": 0.9122509537939805, "grad_norm": 0.9627105141700903, "learning_rate": 4.0044121566857106e-07, "loss": 0.8919, "step": 5380 }, { "epoch": 0.9124205171682916, "grad_norm": 1.033443833616269, "learning_rate": 3.9890399045847127e-07, "loss": 0.9329, "step": 5381 }, { "epoch": 0.9125900805426028, "grad_norm": 0.931559687121707, "learning_rate": 3.9736966144797164e-07, "loss": 0.9159, "step": 5382 }, { "epoch": 0.912759643916914, "grad_norm": 0.9763002306429589, "learning_rate": 3.958382290999108e-07, "loss": 0.9459, "step": 5383 }, { "epoch": 0.9129292072912251, "grad_norm": 1.0190309312128152, "learning_rate": 3.9430969387624694e-07, "loss": 0.925, "step": 5384 }, { "epoch": 0.9130987706655362, "grad_norm": 0.9589514814186912, "learning_rate": 3.9278405623806914e-07, "loss": 0.9203, "step": 5385 }, { "epoch": 0.9132683340398474, "grad_norm": 0.6757902629308058, "learning_rate": 3.912613166455881e-07, "loss": 0.7678, "step": 5386 }, { "epoch": 0.9134378974141586, "grad_norm": 0.9723433186510121, "learning_rate": 3.897414755581463e-07, "loss": 0.8983, "step": 5387 }, { "epoch": 0.9136074607884697, "grad_norm": 0.9661531137532681, "learning_rate": 3.882245334342061e-07, "loss": 0.921, "step": 5388 }, { "epoch": 0.9137770241627808, "grad_norm": 0.9930248420028948, "learning_rate": 3.867104907313557e-07, "loss": 0.9196, "step": 5389 }, { "epoch": 0.913946587537092, "grad_norm": 0.9761736950576347, "learning_rate": 3.851993479063154e-07, "loss": 0.918, "step": 5390 }, { "epoch": 0.9141161509114032, "grad_norm": 0.9714243326655925, "learning_rate": 3.8369110541492396e-07, "loss": 0.9304, "step": 5391 }, { "epoch": 0.9142857142857143, "grad_norm": 1.0111816639053728, "learning_rate": 3.821857637121462e-07, "loss": 0.9121, "step": 5392 }, { "epoch": 0.9144552776600254, "grad_norm": 0.9562584743620902, "learning_rate": 3.806833232520746e-07, "loss": 0.9187, "step": 5393 }, { "epoch": 0.9146248410343366, "grad_norm": 0.9179208036264318, "learning_rate": 3.7918378448792316e-07, "loss": 0.9016, "step": 5394 }, { "epoch": 0.9147944044086477, "grad_norm": 1.0318724655216793, "learning_rate": 3.776871478720334e-07, "loss": 0.9738, "step": 5395 }, { "epoch": 0.9149639677829589, "grad_norm": 0.9877189104966257, "learning_rate": 3.761934138558687e-07, "loss": 0.9345, "step": 5396 }, { "epoch": 0.91513353115727, "grad_norm": 0.961421111491527, "learning_rate": 3.747025828900208e-07, "loss": 0.9049, "step": 5397 }, { "epoch": 0.9153030945315812, "grad_norm": 0.9740789070011456, "learning_rate": 3.732146554242022e-07, "loss": 0.903, "step": 5398 }, { "epoch": 0.9154726579058923, "grad_norm": 0.9550882789299242, "learning_rate": 3.7172963190725164e-07, "loss": 0.9378, "step": 5399 }, { "epoch": 0.9156422212802034, "grad_norm": 0.9575834152999672, "learning_rate": 3.7024751278712744e-07, "loss": 0.9141, "step": 5400 }, { "epoch": 0.9158117846545146, "grad_norm": 0.9766625715813362, "learning_rate": 3.687682985109209e-07, "loss": 0.9368, "step": 5401 }, { "epoch": 0.9159813480288258, "grad_norm": 0.959606068005571, "learning_rate": 3.6729198952483725e-07, "loss": 0.9661, "step": 5402 }, { "epoch": 0.9161509114031369, "grad_norm": 0.9831822183201825, "learning_rate": 3.658185862742103e-07, "loss": 0.9585, "step": 5403 }, { "epoch": 0.916320474777448, "grad_norm": 0.9194956259616271, "learning_rate": 3.6434808920349787e-07, "loss": 0.8483, "step": 5404 }, { "epoch": 0.9164900381517592, "grad_norm": 0.9460217958792853, "learning_rate": 3.628804987562795e-07, "loss": 0.9456, "step": 5405 }, { "epoch": 0.9166596015260704, "grad_norm": 0.932499906714206, "learning_rate": 3.614158153752578e-07, "loss": 0.8892, "step": 5406 }, { "epoch": 0.9168291649003815, "grad_norm": 0.9502948231957661, "learning_rate": 3.599540395022583e-07, "loss": 0.901, "step": 5407 }, { "epoch": 0.9169987282746926, "grad_norm": 0.9920578897311115, "learning_rate": 3.5849517157823143e-07, "loss": 0.9221, "step": 5408 }, { "epoch": 0.9171682916490038, "grad_norm": 0.9313523808453857, "learning_rate": 3.5703921204324863e-07, "loss": 0.9135, "step": 5409 }, { "epoch": 0.917337855023315, "grad_norm": 0.9746902661040927, "learning_rate": 3.5558616133650413e-07, "loss": 0.9033, "step": 5410 }, { "epoch": 0.9175074183976261, "grad_norm": 1.0035283102741095, "learning_rate": 3.5413601989631616e-07, "loss": 0.9315, "step": 5411 }, { "epoch": 0.9176769817719372, "grad_norm": 1.0048795473133891, "learning_rate": 3.5268878816012265e-07, "loss": 0.9262, "step": 5412 }, { "epoch": 0.9178465451462484, "grad_norm": 0.9826734454281971, "learning_rate": 3.5124446656448654e-07, "loss": 0.9665, "step": 5413 }, { "epoch": 0.9180161085205596, "grad_norm": 1.0102837678151142, "learning_rate": 3.498030555450904e-07, "loss": 0.9326, "step": 5414 }, { "epoch": 0.9181856718948707, "grad_norm": 0.9855348954572305, "learning_rate": 3.483645555367421e-07, "loss": 0.9476, "step": 5415 }, { "epoch": 0.9183552352691818, "grad_norm": 0.9339472132343781, "learning_rate": 3.4692896697336887e-07, "loss": 0.9005, "step": 5416 }, { "epoch": 0.918524798643493, "grad_norm": 0.9639269646676377, "learning_rate": 3.454962902880199e-07, "loss": 0.9276, "step": 5417 }, { "epoch": 0.9186943620178042, "grad_norm": 1.0275373407904178, "learning_rate": 3.4406652591286507e-07, "loss": 0.9558, "step": 5418 }, { "epoch": 0.9188639253921153, "grad_norm": 0.9610171386420859, "learning_rate": 3.426396742792004e-07, "loss": 0.9313, "step": 5419 }, { "epoch": 0.9190334887664264, "grad_norm": 0.9826964548150668, "learning_rate": 3.412157358174384e-07, "loss": 0.931, "step": 5420 }, { "epoch": 0.9192030521407376, "grad_norm": 0.5864439574398642, "learning_rate": 3.397947109571131e-07, "loss": 0.7674, "step": 5421 }, { "epoch": 0.9193726155150488, "grad_norm": 0.9420992595847931, "learning_rate": 3.38376600126884e-07, "loss": 0.9384, "step": 5422 }, { "epoch": 0.9195421788893599, "grad_norm": 1.004375833066848, "learning_rate": 3.3696140375452544e-07, "loss": 0.8952, "step": 5423 }, { "epoch": 0.919711742263671, "grad_norm": 0.958464811356412, "learning_rate": 3.3554912226693714e-07, "loss": 0.9192, "step": 5424 }, { "epoch": 0.9198813056379822, "grad_norm": 0.9992589274460407, "learning_rate": 3.3413975609013713e-07, "loss": 0.9297, "step": 5425 }, { "epoch": 0.9200508690122934, "grad_norm": 0.9602548891218543, "learning_rate": 3.3273330564926766e-07, "loss": 0.8977, "step": 5426 }, { "epoch": 0.9202204323866044, "grad_norm": 0.9564208966657699, "learning_rate": 3.313297713685859e-07, "loss": 0.9112, "step": 5427 }, { "epoch": 0.9203899957609156, "grad_norm": 0.6551809507934392, "learning_rate": 3.299291536714722e-07, "loss": 0.7744, "step": 5428 }, { "epoch": 0.9205595591352268, "grad_norm": 0.9909382809102839, "learning_rate": 3.2853145298042954e-07, "loss": 0.8767, "step": 5429 }, { "epoch": 0.920729122509538, "grad_norm": 0.9836627492482466, "learning_rate": 3.271366697170764e-07, "loss": 0.9117, "step": 5430 }, { "epoch": 0.920898685883849, "grad_norm": 0.9395079072695641, "learning_rate": 3.257448043021538e-07, "loss": 0.9016, "step": 5431 }, { "epoch": 0.9210682492581602, "grad_norm": 0.9120250231051668, "learning_rate": 3.2435585715552164e-07, "loss": 0.9061, "step": 5432 }, { "epoch": 0.9212378126324714, "grad_norm": 0.9802404822411753, "learning_rate": 3.2296982869616134e-07, "loss": 0.9278, "step": 5433 }, { "epoch": 0.9214073760067826, "grad_norm": 1.0110692105201524, "learning_rate": 3.215867193421718e-07, "loss": 0.9559, "step": 5434 }, { "epoch": 0.9215769393810936, "grad_norm": 0.9766251321150854, "learning_rate": 3.2020652951077256e-07, "loss": 0.9198, "step": 5435 }, { "epoch": 0.9217465027554048, "grad_norm": 0.9525585021322519, "learning_rate": 3.188292596183007e-07, "loss": 0.8835, "step": 5436 }, { "epoch": 0.921916066129716, "grad_norm": 0.9602038533099616, "learning_rate": 3.1745491008021603e-07, "loss": 0.8949, "step": 5437 }, { "epoch": 0.9220856295040272, "grad_norm": 1.003781034042751, "learning_rate": 3.160834813110947e-07, "loss": 0.9369, "step": 5438 }, { "epoch": 0.9222551928783382, "grad_norm": 0.7104834836846216, "learning_rate": 3.147149737246302e-07, "loss": 0.7674, "step": 5439 }, { "epoch": 0.9224247562526494, "grad_norm": 0.9800090606314543, "learning_rate": 3.133493877336391e-07, "loss": 0.9215, "step": 5440 }, { "epoch": 0.9225943196269606, "grad_norm": 0.9333006892043587, "learning_rate": 3.1198672375005403e-07, "loss": 0.8994, "step": 5441 }, { "epoch": 0.9227638830012718, "grad_norm": 0.9514621600841879, "learning_rate": 3.106269821849273e-07, "loss": 0.9277, "step": 5442 }, { "epoch": 0.9229334463755828, "grad_norm": 0.9539791701091482, "learning_rate": 3.092701634484274e-07, "loss": 0.9022, "step": 5443 }, { "epoch": 0.923103009749894, "grad_norm": 1.190105551687011, "learning_rate": 3.0791626794984377e-07, "loss": 0.8983, "step": 5444 }, { "epoch": 0.9232725731242052, "grad_norm": 0.9932389376448827, "learning_rate": 3.06565296097584e-07, "loss": 0.9203, "step": 5445 }, { "epoch": 0.9234421364985164, "grad_norm": 0.9788596238609185, "learning_rate": 3.052172482991711e-07, "loss": 0.9624, "step": 5446 }, { "epoch": 0.9236116998728274, "grad_norm": 1.0380243004597096, "learning_rate": 3.0387212496124974e-07, "loss": 0.9219, "step": 5447 }, { "epoch": 0.9237812632471386, "grad_norm": 1.0113902300688296, "learning_rate": 3.025299264895787e-07, "loss": 0.8886, "step": 5448 }, { "epoch": 0.9239508266214498, "grad_norm": 0.63149154328108, "learning_rate": 3.0119065328903517e-07, "loss": 0.7782, "step": 5449 }, { "epoch": 0.924120389995761, "grad_norm": 0.5589468374793891, "learning_rate": 2.998543057636183e-07, "loss": 0.7202, "step": 5450 }, { "epoch": 0.924289953370072, "grad_norm": 0.9780248892759397, "learning_rate": 2.9852088431644e-07, "loss": 0.8921, "step": 5451 }, { "epoch": 0.9244595167443832, "grad_norm": 0.9939655788799581, "learning_rate": 2.9719038934972964e-07, "loss": 0.8924, "step": 5452 }, { "epoch": 0.9246290801186944, "grad_norm": 0.654090335724019, "learning_rate": 2.9586282126483625e-07, "loss": 0.798, "step": 5453 }, { "epoch": 0.9247986434930056, "grad_norm": 0.9874470089105412, "learning_rate": 2.945381804622238e-07, "loss": 0.9116, "step": 5454 }, { "epoch": 0.9249682068673166, "grad_norm": 0.9990081153474144, "learning_rate": 2.93216467341475e-07, "loss": 0.9166, "step": 5455 }, { "epoch": 0.9251377702416278, "grad_norm": 0.9977786065598294, "learning_rate": 2.918976823012887e-07, "loss": 0.9261, "step": 5456 }, { "epoch": 0.925307333615939, "grad_norm": 0.9786420251410374, "learning_rate": 2.905818257394799e-07, "loss": 0.8987, "step": 5457 }, { "epoch": 0.9254768969902502, "grad_norm": 0.9351485593254598, "learning_rate": 2.892688980529812e-07, "loss": 0.9137, "step": 5458 }, { "epoch": 0.9256464603645612, "grad_norm": 0.9881128074961543, "learning_rate": 2.879588996378402e-07, "loss": 0.914, "step": 5459 }, { "epoch": 0.9258160237388724, "grad_norm": 0.9709518774562353, "learning_rate": 2.86651830889223e-07, "loss": 0.9463, "step": 5460 }, { "epoch": 0.9259855871131836, "grad_norm": 0.9811259952601982, "learning_rate": 2.853476922014098e-07, "loss": 0.9102, "step": 5461 }, { "epoch": 0.9261551504874947, "grad_norm": 0.9686376678290065, "learning_rate": 2.840464839677992e-07, "loss": 0.9348, "step": 5462 }, { "epoch": 0.9263247138618058, "grad_norm": 1.005719347351487, "learning_rate": 2.8274820658090506e-07, "loss": 0.9297, "step": 5463 }, { "epoch": 0.926494277236117, "grad_norm": 1.0367614580892663, "learning_rate": 2.8145286043235407e-07, "loss": 0.97, "step": 5464 }, { "epoch": 0.9266638406104282, "grad_norm": 0.9836851328703371, "learning_rate": 2.801604459128926e-07, "loss": 0.9272, "step": 5465 }, { "epoch": 0.9268334039847393, "grad_norm": 0.9650917589108308, "learning_rate": 2.788709634123821e-07, "loss": 0.9371, "step": 5466 }, { "epoch": 0.9270029673590504, "grad_norm": 0.9800319300033921, "learning_rate": 2.7758441331979914e-07, "loss": 0.8935, "step": 5467 }, { "epoch": 0.9271725307333616, "grad_norm": 0.9911754307768186, "learning_rate": 2.7630079602323447e-07, "loss": 0.9414, "step": 5468 }, { "epoch": 0.9273420941076728, "grad_norm": 0.9895292365279797, "learning_rate": 2.75020111909895e-07, "loss": 0.9144, "step": 5469 }, { "epoch": 0.9275116574819839, "grad_norm": 0.9765261342110486, "learning_rate": 2.737423613661028e-07, "loss": 0.9608, "step": 5470 }, { "epoch": 0.927681220856295, "grad_norm": 0.9879221227215401, "learning_rate": 2.724675447772973e-07, "loss": 0.9401, "step": 5471 }, { "epoch": 0.9278507842306062, "grad_norm": 1.0071323980106321, "learning_rate": 2.7119566252802656e-07, "loss": 0.9525, "step": 5472 }, { "epoch": 0.9280203476049174, "grad_norm": 0.9824768063606365, "learning_rate": 2.6992671500196134e-07, "loss": 0.9341, "step": 5473 }, { "epoch": 0.9281899109792285, "grad_norm": 0.9459273440730945, "learning_rate": 2.6866070258188324e-07, "loss": 0.9023, "step": 5474 }, { "epoch": 0.9283594743535396, "grad_norm": 0.9764163138743068, "learning_rate": 2.6739762564968686e-07, "loss": 0.8964, "step": 5475 }, { "epoch": 0.9285290377278508, "grad_norm": 0.9819088893037986, "learning_rate": 2.661374845863851e-07, "loss": 0.9378, "step": 5476 }, { "epoch": 0.928698601102162, "grad_norm": 0.9508932159708138, "learning_rate": 2.6488027977210175e-07, "loss": 0.853, "step": 5477 }, { "epoch": 0.9288681644764731, "grad_norm": 0.6419977853689585, "learning_rate": 2.636260115860778e-07, "loss": 0.8053, "step": 5478 }, { "epoch": 0.9290377278507842, "grad_norm": 0.9693479238780413, "learning_rate": 2.6237468040666515e-07, "loss": 0.9005, "step": 5479 }, { "epoch": 0.9292072912250954, "grad_norm": 0.9556824438132773, "learning_rate": 2.61126286611334e-07, "loss": 0.9, "step": 5480 }, { "epoch": 0.9293768545994066, "grad_norm": 1.039279132784763, "learning_rate": 2.5988083057666534e-07, "loss": 0.9347, "step": 5481 }, { "epoch": 0.9295464179737177, "grad_norm": 0.995902014678631, "learning_rate": 2.586383126783532e-07, "loss": 0.9244, "step": 5482 }, { "epoch": 0.9297159813480288, "grad_norm": 0.9407804494372483, "learning_rate": 2.573987332912087e-07, "loss": 0.9263, "step": 5483 }, { "epoch": 0.92988554472234, "grad_norm": 0.911538157433653, "learning_rate": 2.561620927891539e-07, "loss": 0.9135, "step": 5484 }, { "epoch": 0.9300551080966512, "grad_norm": 0.9520464880655851, "learning_rate": 2.5492839154522495e-07, "loss": 0.8978, "step": 5485 }, { "epoch": 0.9302246714709623, "grad_norm": 0.9867480022884948, "learning_rate": 2.53697629931573e-07, "loss": 0.9329, "step": 5486 }, { "epoch": 0.9303942348452734, "grad_norm": 0.6857170062643512, "learning_rate": 2.5246980831945877e-07, "loss": 0.7953, "step": 5487 }, { "epoch": 0.9305637982195846, "grad_norm": 0.9641602117190036, "learning_rate": 2.512449270792594e-07, "loss": 0.9165, "step": 5488 }, { "epoch": 0.9307333615938957, "grad_norm": 0.9319781517032217, "learning_rate": 2.5002298658046484e-07, "loss": 0.9119, "step": 5489 }, { "epoch": 0.9309029249682068, "grad_norm": 0.9683516646842466, "learning_rate": 2.4880398719167584e-07, "loss": 0.9381, "step": 5490 }, { "epoch": 0.931072488342518, "grad_norm": 1.021949784427159, "learning_rate": 2.4758792928060715e-07, "loss": 0.9907, "step": 5491 }, { "epoch": 0.9312420517168292, "grad_norm": 0.987326968389827, "learning_rate": 2.4637481321408863e-07, "loss": 0.8731, "step": 5492 }, { "epoch": 0.9314116150911403, "grad_norm": 0.9508026560198607, "learning_rate": 2.4516463935805644e-07, "loss": 0.9599, "step": 5493 }, { "epoch": 0.9315811784654514, "grad_norm": 0.9332219601549946, "learning_rate": 2.439574080775675e-07, "loss": 0.9106, "step": 5494 }, { "epoch": 0.9317507418397626, "grad_norm": 0.9766262841226447, "learning_rate": 2.4275311973678384e-07, "loss": 0.8992, "step": 5495 }, { "epoch": 0.9319203052140738, "grad_norm": 0.9333527754702658, "learning_rate": 2.4155177469898373e-07, "loss": 0.9176, "step": 5496 }, { "epoch": 0.9320898685883849, "grad_norm": 0.9668309153421941, "learning_rate": 2.4035337332655504e-07, "loss": 0.9181, "step": 5497 }, { "epoch": 0.932259431962696, "grad_norm": 0.9820661538244929, "learning_rate": 2.3915791598100205e-07, "loss": 0.9398, "step": 5498 }, { "epoch": 0.9324289953370072, "grad_norm": 0.9963045821264599, "learning_rate": 2.3796540302293724e-07, "loss": 0.9039, "step": 5499 }, { "epoch": 0.9325985587113184, "grad_norm": 0.9417061394871139, "learning_rate": 2.36775834812083e-07, "loss": 0.9056, "step": 5500 }, { "epoch": 0.9327681220856295, "grad_norm": 0.9791666419445099, "learning_rate": 2.355892117072789e-07, "loss": 0.9131, "step": 5501 }, { "epoch": 0.9329376854599406, "grad_norm": 0.9692132121935689, "learning_rate": 2.3440553406647305e-07, "loss": 0.9017, "step": 5502 }, { "epoch": 0.9331072488342518, "grad_norm": 0.985485094805859, "learning_rate": 2.332248022467254e-07, "loss": 0.8937, "step": 5503 }, { "epoch": 0.933276812208563, "grad_norm": 0.9505013258552553, "learning_rate": 2.320470166042066e-07, "loss": 0.887, "step": 5504 }, { "epoch": 0.9334463755828741, "grad_norm": 0.982934702945291, "learning_rate": 2.308721774941991e-07, "loss": 0.8981, "step": 5505 }, { "epoch": 0.9336159389571852, "grad_norm": 0.9607421256328981, "learning_rate": 2.2970028527109724e-07, "loss": 0.8796, "step": 5506 }, { "epoch": 0.9337855023314964, "grad_norm": 0.969513999784764, "learning_rate": 2.2853134028840594e-07, "loss": 0.9252, "step": 5507 }, { "epoch": 0.9339550657058076, "grad_norm": 0.9600935230399994, "learning_rate": 2.273653428987399e-07, "loss": 0.8967, "step": 5508 }, { "epoch": 0.9341246290801187, "grad_norm": 0.9694123817405524, "learning_rate": 2.262022934538266e-07, "loss": 0.8888, "step": 5509 }, { "epoch": 0.9342941924544298, "grad_norm": 0.5898540136692917, "learning_rate": 2.2504219230450431e-07, "loss": 0.7494, "step": 5510 }, { "epoch": 0.934463755828741, "grad_norm": 0.9696883623750306, "learning_rate": 2.2388503980071862e-07, "loss": 0.9072, "step": 5511 }, { "epoch": 0.9346333192030521, "grad_norm": 0.9573773378137203, "learning_rate": 2.2273083629153148e-07, "loss": 0.8997, "step": 5512 }, { "epoch": 0.9348028825773633, "grad_norm": 0.9797542895525152, "learning_rate": 2.2157958212510877e-07, "loss": 0.9218, "step": 5513 }, { "epoch": 0.9349724459516744, "grad_norm": 0.9985364081071842, "learning_rate": 2.2043127764873162e-07, "loss": 0.9249, "step": 5514 }, { "epoch": 0.9351420093259856, "grad_norm": 0.6558730867826927, "learning_rate": 2.192859232087885e-07, "loss": 0.7905, "step": 5515 }, { "epoch": 0.9353115727002967, "grad_norm": 0.9715127965895606, "learning_rate": 2.181435191507797e-07, "loss": 0.9163, "step": 5516 }, { "epoch": 0.9354811360746079, "grad_norm": 0.6849751762026847, "learning_rate": 2.1700406581931398e-07, "loss": 0.7699, "step": 5517 }, { "epoch": 0.935650699448919, "grad_norm": 0.9929621706345273, "learning_rate": 2.15867563558112e-07, "loss": 0.9099, "step": 5518 }, { "epoch": 0.9358202628232302, "grad_norm": 0.9756665543966292, "learning_rate": 2.1473401271000283e-07, "loss": 0.8924, "step": 5519 }, { "epoch": 0.9359898261975413, "grad_norm": 0.9919281699363885, "learning_rate": 2.1360341361692517e-07, "loss": 0.8788, "step": 5520 }, { "epoch": 0.9361593895718525, "grad_norm": 0.9907203119270258, "learning_rate": 2.124757666199273e-07, "loss": 0.9473, "step": 5521 }, { "epoch": 0.9363289529461636, "grad_norm": 0.9740373524020767, "learning_rate": 2.1135107205916826e-07, "loss": 0.9202, "step": 5522 }, { "epoch": 0.9364985163204748, "grad_norm": 0.9799372514367808, "learning_rate": 2.1022933027391555e-07, "loss": 0.9651, "step": 5523 }, { "epoch": 0.9366680796947859, "grad_norm": 0.6274185839242921, "learning_rate": 2.0911054160254517e-07, "loss": 0.7484, "step": 5524 }, { "epoch": 0.9368376430690971, "grad_norm": 0.9837883547575718, "learning_rate": 2.079947063825427e-07, "loss": 0.9584, "step": 5525 }, { "epoch": 0.9370072064434082, "grad_norm": 0.9454725601288959, "learning_rate": 2.0688182495050446e-07, "loss": 0.8732, "step": 5526 }, { "epoch": 0.9371767698177194, "grad_norm": 0.9483920164930612, "learning_rate": 2.057718976421341e-07, "loss": 0.8729, "step": 5527 }, { "epoch": 0.9373463331920305, "grad_norm": 0.6039447323116278, "learning_rate": 2.0466492479224387e-07, "loss": 0.7654, "step": 5528 }, { "epoch": 0.9375158965663417, "grad_norm": 0.9731955916171043, "learning_rate": 2.035609067347566e-07, "loss": 0.9243, "step": 5529 }, { "epoch": 0.9376854599406528, "grad_norm": 0.9312673364514508, "learning_rate": 2.0245984380270145e-07, "loss": 0.8914, "step": 5530 }, { "epoch": 0.937855023314964, "grad_norm": 0.9656315441515511, "learning_rate": 2.0136173632821944e-07, "loss": 0.8916, "step": 5531 }, { "epoch": 0.9380245866892751, "grad_norm": 0.968091760917464, "learning_rate": 2.0026658464255554e-07, "loss": 0.9207, "step": 5532 }, { "epoch": 0.9381941500635863, "grad_norm": 0.9355634426300802, "learning_rate": 1.9917438907606556e-07, "loss": 0.9358, "step": 5533 }, { "epoch": 0.9383637134378974, "grad_norm": 0.9819685760318001, "learning_rate": 1.9808514995821592e-07, "loss": 0.8975, "step": 5534 }, { "epoch": 0.9385332768122086, "grad_norm": 0.9444549546433799, "learning_rate": 1.9699886761757826e-07, "loss": 0.8914, "step": 5535 }, { "epoch": 0.9387028401865197, "grad_norm": 0.9749889066526239, "learning_rate": 1.959155423818304e-07, "loss": 0.9295, "step": 5536 }, { "epoch": 0.9388724035608309, "grad_norm": 0.9715401945162648, "learning_rate": 1.9483517457776436e-07, "loss": 0.9077, "step": 5537 }, { "epoch": 0.939041966935142, "grad_norm": 0.9726166793665533, "learning_rate": 1.937577645312738e-07, "loss": 0.9198, "step": 5538 }, { "epoch": 0.9392115303094531, "grad_norm": 0.9963681974344389, "learning_rate": 1.926833125673633e-07, "loss": 0.9248, "step": 5539 }, { "epoch": 0.9393810936837643, "grad_norm": 0.9859102992739459, "learning_rate": 1.916118190101457e-07, "loss": 0.9399, "step": 5540 }, { "epoch": 0.9395506570580755, "grad_norm": 0.9568208210463655, "learning_rate": 1.9054328418283808e-07, "loss": 0.9061, "step": 5541 }, { "epoch": 0.9397202204323866, "grad_norm": 0.9686416629208863, "learning_rate": 1.8947770840776925e-07, "loss": 0.9496, "step": 5542 }, { "epoch": 0.9398897838066977, "grad_norm": 0.9391330366724194, "learning_rate": 1.884150920063721e-07, "loss": 0.9409, "step": 5543 }, { "epoch": 0.9400593471810089, "grad_norm": 0.9426771707889775, "learning_rate": 1.87355435299188e-07, "loss": 0.9143, "step": 5544 }, { "epoch": 0.9402289105553201, "grad_norm": 1.0253838406119677, "learning_rate": 1.8629873860586567e-07, "loss": 0.9308, "step": 5545 }, { "epoch": 0.9403984739296312, "grad_norm": 0.9804098725527529, "learning_rate": 1.852450022451624e-07, "loss": 0.9084, "step": 5546 }, { "epoch": 0.9405680373039423, "grad_norm": 0.9933360669735185, "learning_rate": 1.8419422653493835e-07, "loss": 0.909, "step": 5547 }, { "epoch": 0.9407376006782535, "grad_norm": 0.984649373396994, "learning_rate": 1.8314641179216663e-07, "loss": 0.919, "step": 5548 }, { "epoch": 0.9409071640525647, "grad_norm": 0.9329842295678624, "learning_rate": 1.8210155833291998e-07, "loss": 0.8807, "step": 5549 }, { "epoch": 0.9410767274268758, "grad_norm": 0.9969967261709832, "learning_rate": 1.8105966647238515e-07, "loss": 0.9447, "step": 5550 }, { "epoch": 0.9412462908011869, "grad_norm": 0.9820930359991815, "learning_rate": 1.8002073652484852e-07, "loss": 0.9366, "step": 5551 }, { "epoch": 0.9414158541754981, "grad_norm": 0.5773918879392964, "learning_rate": 1.789847688037083e-07, "loss": 0.7294, "step": 5552 }, { "epoch": 0.9415854175498093, "grad_norm": 0.9188618466256349, "learning_rate": 1.7795176362146783e-07, "loss": 0.9154, "step": 5553 }, { "epoch": 0.9417549809241204, "grad_norm": 0.9438107286937366, "learning_rate": 1.769217212897345e-07, "loss": 0.9052, "step": 5554 }, { "epoch": 0.9419245442984315, "grad_norm": 0.9839383401916435, "learning_rate": 1.7589464211922537e-07, "loss": 0.9183, "step": 5555 }, { "epoch": 0.9420941076727427, "grad_norm": 0.9691832452196864, "learning_rate": 1.748705264197603e-07, "loss": 0.9165, "step": 5556 }, { "epoch": 0.9422636710470539, "grad_norm": 0.9533571444589288, "learning_rate": 1.7384937450026895e-07, "loss": 0.9013, "step": 5557 }, { "epoch": 0.942433234421365, "grad_norm": 0.999708131973898, "learning_rate": 1.7283118666878374e-07, "loss": 0.9423, "step": 5558 }, { "epoch": 0.9426027977956761, "grad_norm": 0.653302681894168, "learning_rate": 1.7181596323244453e-07, "loss": 0.7422, "step": 5559 }, { "epoch": 0.9427723611699873, "grad_norm": 0.9892735021750785, "learning_rate": 1.7080370449749528e-07, "loss": 0.9112, "step": 5560 }, { "epoch": 0.9429419245442985, "grad_norm": 0.5708981756493947, "learning_rate": 1.6979441076928837e-07, "loss": 0.7875, "step": 5561 }, { "epoch": 0.9431114879186095, "grad_norm": 1.0079362041064297, "learning_rate": 1.6878808235227806e-07, "loss": 0.9342, "step": 5562 }, { "epoch": 0.9432810512929207, "grad_norm": 0.994009404217517, "learning_rate": 1.677847195500304e-07, "loss": 0.9109, "step": 5563 }, { "epoch": 0.9434506146672319, "grad_norm": 0.9775203300782569, "learning_rate": 1.6678432266520882e-07, "loss": 0.9324, "step": 5564 }, { "epoch": 0.9436201780415431, "grad_norm": 1.00607211286155, "learning_rate": 1.6578689199958753e-07, "loss": 0.9196, "step": 5565 }, { "epoch": 0.9437897414158541, "grad_norm": 0.9548045689969014, "learning_rate": 1.647924278540447e-07, "loss": 0.9438, "step": 5566 }, { "epoch": 0.9439593047901653, "grad_norm": 0.9707667507146128, "learning_rate": 1.6380093052856482e-07, "loss": 0.9233, "step": 5567 }, { "epoch": 0.9441288681644765, "grad_norm": 0.9081739895660971, "learning_rate": 1.628124003222331e-07, "loss": 0.9158, "step": 5568 }, { "epoch": 0.9442984315387877, "grad_norm": 0.9580113632717862, "learning_rate": 1.6182683753324435e-07, "loss": 0.9248, "step": 5569 }, { "epoch": 0.9444679949130987, "grad_norm": 1.0202207410989919, "learning_rate": 1.6084424245889628e-07, "loss": 0.9215, "step": 5570 }, { "epoch": 0.9446375582874099, "grad_norm": 0.9239649460492493, "learning_rate": 1.5986461539559294e-07, "loss": 0.8863, "step": 5571 }, { "epoch": 0.9448071216617211, "grad_norm": 0.9615469796975518, "learning_rate": 1.5888795663883904e-07, "loss": 0.9344, "step": 5572 }, { "epoch": 0.9449766850360323, "grad_norm": 0.9917975247635588, "learning_rate": 1.5791426648324893e-07, "loss": 0.9329, "step": 5573 }, { "epoch": 0.9451462484103433, "grad_norm": 0.9623696250200654, "learning_rate": 1.5694354522253763e-07, "loss": 0.9385, "step": 5574 }, { "epoch": 0.9453158117846545, "grad_norm": 0.9719410098060581, "learning_rate": 1.5597579314952872e-07, "loss": 0.919, "step": 5575 }, { "epoch": 0.9454853751589657, "grad_norm": 0.9292574672349995, "learning_rate": 1.550110105561442e-07, "loss": 0.9224, "step": 5576 }, { "epoch": 0.9456549385332769, "grad_norm": 0.9622159312119182, "learning_rate": 1.5404919773341576e-07, "loss": 0.8969, "step": 5577 }, { "epoch": 0.9458245019075879, "grad_norm": 0.8900848039540217, "learning_rate": 1.5309035497147685e-07, "loss": 0.9167, "step": 5578 }, { "epoch": 0.9459940652818991, "grad_norm": 0.9567406557746101, "learning_rate": 1.5213448255956498e-07, "loss": 0.9223, "step": 5579 }, { "epoch": 0.9461636286562103, "grad_norm": 0.9649112549224468, "learning_rate": 1.5118158078602174e-07, "loss": 0.9278, "step": 5580 }, { "epoch": 0.9463331920305214, "grad_norm": 0.9899204061293503, "learning_rate": 1.5023164993829277e-07, "loss": 0.9634, "step": 5581 }, { "epoch": 0.9465027554048325, "grad_norm": 0.9910174614858378, "learning_rate": 1.492846903029288e-07, "loss": 0.9543, "step": 5582 }, { "epoch": 0.9466723187791437, "grad_norm": 1.1165891299652846, "learning_rate": 1.4834070216558138e-07, "loss": 0.934, "step": 5583 }, { "epoch": 0.9468418821534549, "grad_norm": 0.9531604091243622, "learning_rate": 1.4739968581100827e-07, "loss": 0.9056, "step": 5584 }, { "epoch": 0.947011445527766, "grad_norm": 0.9466397260320445, "learning_rate": 1.464616415230702e-07, "loss": 0.9336, "step": 5585 }, { "epoch": 0.9471810089020771, "grad_norm": 0.6330410316796224, "learning_rate": 1.4552656958473077e-07, "loss": 0.7907, "step": 5586 }, { "epoch": 0.9473505722763883, "grad_norm": 0.9728390949111491, "learning_rate": 1.4459447027805663e-07, "loss": 0.9321, "step": 5587 }, { "epoch": 0.9475201356506995, "grad_norm": 0.9417407924810578, "learning_rate": 1.4366534388421837e-07, "loss": 0.9103, "step": 5588 }, { "epoch": 0.9476896990250105, "grad_norm": 1.0079916016676647, "learning_rate": 1.4273919068349184e-07, "loss": 0.9681, "step": 5589 }, { "epoch": 0.9478592623993217, "grad_norm": 0.9966992765724542, "learning_rate": 1.418160109552502e-07, "loss": 0.9136, "step": 5590 }, { "epoch": 0.9480288257736329, "grad_norm": 1.0172678390116543, "learning_rate": 1.4089580497797738e-07, "loss": 0.888, "step": 5591 }, { "epoch": 0.9481983891479441, "grad_norm": 0.9566771617983861, "learning_rate": 1.3997857302925355e-07, "loss": 0.9057, "step": 5592 }, { "epoch": 0.9483679525222551, "grad_norm": 1.0312257714558524, "learning_rate": 1.3906431538576626e-07, "loss": 0.923, "step": 5593 }, { "epoch": 0.9485375158965663, "grad_norm": 0.9564312245215095, "learning_rate": 1.3815303232330267e-07, "loss": 0.9449, "step": 5594 }, { "epoch": 0.9487070792708775, "grad_norm": 0.9659078727613436, "learning_rate": 1.3724472411675517e-07, "loss": 0.8931, "step": 5595 }, { "epoch": 0.9488766426451887, "grad_norm": 0.9634178519869354, "learning_rate": 1.3633939104011784e-07, "loss": 0.9148, "step": 5596 }, { "epoch": 0.9490462060194997, "grad_norm": 0.9962956420596855, "learning_rate": 1.354370333664845e-07, "loss": 0.9597, "step": 5597 }, { "epoch": 0.9492157693938109, "grad_norm": 0.6187121049586382, "learning_rate": 1.345376513680574e-07, "loss": 0.7834, "step": 5598 }, { "epoch": 0.9493853327681221, "grad_norm": 1.0139951230608446, "learning_rate": 1.3364124531613622e-07, "loss": 0.9376, "step": 5599 }, { "epoch": 0.9495548961424333, "grad_norm": 0.9719922818769989, "learning_rate": 1.327478154811246e-07, "loss": 0.9379, "step": 5600 }, { "epoch": 0.9497244595167443, "grad_norm": 0.9591858631345868, "learning_rate": 1.3185736213252808e-07, "loss": 0.895, "step": 5601 }, { "epoch": 0.9498940228910555, "grad_norm": 1.022002593311473, "learning_rate": 1.3096988553895517e-07, "loss": 0.9291, "step": 5602 }, { "epoch": 0.9500635862653667, "grad_norm": 1.0316665256589774, "learning_rate": 1.3008538596811616e-07, "loss": 0.9481, "step": 5603 }, { "epoch": 0.9502331496396779, "grad_norm": 1.0308684421858336, "learning_rate": 1.2920386368682313e-07, "loss": 0.9579, "step": 5604 }, { "epoch": 0.9504027130139889, "grad_norm": 1.0165402809228128, "learning_rate": 1.2832531896098788e-07, "loss": 0.9524, "step": 5605 }, { "epoch": 0.9505722763883001, "grad_norm": 0.942635156534935, "learning_rate": 1.274497520556295e-07, "loss": 0.9095, "step": 5606 }, { "epoch": 0.9507418397626113, "grad_norm": 1.0356681253524946, "learning_rate": 1.2657716323486224e-07, "loss": 0.9257, "step": 5607 }, { "epoch": 0.9509114031369225, "grad_norm": 0.9819511028991497, "learning_rate": 1.257075527619067e-07, "loss": 0.9337, "step": 5608 }, { "epoch": 0.9510809665112335, "grad_norm": 0.9748422301015786, "learning_rate": 1.2484092089908307e-07, "loss": 0.9394, "step": 5609 }, { "epoch": 0.9512505298855447, "grad_norm": 0.9421552096809374, "learning_rate": 1.2397726790781438e-07, "loss": 0.9373, "step": 5610 }, { "epoch": 0.9514200932598559, "grad_norm": 1.0301769988605656, "learning_rate": 1.231165940486234e-07, "loss": 0.9349, "step": 5611 }, { "epoch": 0.9515896566341671, "grad_norm": 0.9672446073622168, "learning_rate": 1.2225889958113468e-07, "loss": 0.9162, "step": 5612 }, { "epoch": 0.9517592200084781, "grad_norm": 1.0234706563231974, "learning_rate": 1.2140418476407457e-07, "loss": 0.9458, "step": 5613 }, { "epoch": 0.9519287833827893, "grad_norm": 1.0242176883224865, "learning_rate": 1.2055244985527015e-07, "loss": 0.9184, "step": 5614 }, { "epoch": 0.9520983467571005, "grad_norm": 0.973263470313115, "learning_rate": 1.1970369511165035e-07, "loss": 0.8953, "step": 5615 }, { "epoch": 0.9522679101314117, "grad_norm": 0.9578182638458529, "learning_rate": 1.1885792078924375e-07, "loss": 0.9347, "step": 5616 }, { "epoch": 0.9524374735057227, "grad_norm": 0.5755626615677899, "learning_rate": 1.1801512714318286e-07, "loss": 0.7367, "step": 5617 }, { "epoch": 0.9526070368800339, "grad_norm": 0.9502889625309332, "learning_rate": 1.1717531442769658e-07, "loss": 0.882, "step": 5618 }, { "epoch": 0.9527766002543451, "grad_norm": 0.9394678663587318, "learning_rate": 1.1633848289611783e-07, "loss": 0.9261, "step": 5619 }, { "epoch": 0.9529461636286563, "grad_norm": 1.0459279682755407, "learning_rate": 1.1550463280087909e-07, "loss": 0.9175, "step": 5620 }, { "epoch": 0.9531157270029673, "grad_norm": 1.0038510674010808, "learning_rate": 1.1467376439351474e-07, "loss": 0.9375, "step": 5621 }, { "epoch": 0.9532852903772785, "grad_norm": 0.9477082037529725, "learning_rate": 1.1384587792465873e-07, "loss": 0.8881, "step": 5622 }, { "epoch": 0.9534548537515897, "grad_norm": 1.007230381478742, "learning_rate": 1.1302097364404241e-07, "loss": 0.929, "step": 5623 }, { "epoch": 0.9536244171259008, "grad_norm": 0.9902014910636292, "learning_rate": 1.121990518005045e-07, "loss": 0.9396, "step": 5624 }, { "epoch": 0.9537939805002119, "grad_norm": 0.9469107127998239, "learning_rate": 1.113801126419789e-07, "loss": 0.9476, "step": 5625 }, { "epoch": 0.9539635438745231, "grad_norm": 1.0363518686033886, "learning_rate": 1.1056415641550134e-07, "loss": 0.9559, "step": 5626 }, { "epoch": 0.9541331072488343, "grad_norm": 0.9767580486798779, "learning_rate": 1.0975118336720603e-07, "loss": 0.8872, "step": 5627 }, { "epoch": 0.9543026706231454, "grad_norm": 0.9808314506206055, "learning_rate": 1.0894119374233014e-07, "loss": 0.9195, "step": 5628 }, { "epoch": 0.9544722339974565, "grad_norm": 0.9146829940523473, "learning_rate": 1.0813418778521046e-07, "loss": 0.8736, "step": 5629 }, { "epoch": 0.9546417973717677, "grad_norm": 0.9904559510690045, "learning_rate": 1.0733016573928002e-07, "loss": 0.9268, "step": 5630 }, { "epoch": 0.9548113607460789, "grad_norm": 0.9888897543704092, "learning_rate": 1.0652912784707592e-07, "loss": 0.9612, "step": 5631 }, { "epoch": 0.95498092412039, "grad_norm": 0.9802965341644693, "learning_rate": 1.0573107435023378e-07, "loss": 0.9215, "step": 5632 }, { "epoch": 0.9551504874947011, "grad_norm": 0.955485779386987, "learning_rate": 1.0493600548948879e-07, "loss": 0.8895, "step": 5633 }, { "epoch": 0.9553200508690123, "grad_norm": 1.0085835751406802, "learning_rate": 1.041439215046769e-07, "loss": 0.9104, "step": 5634 }, { "epoch": 0.9554896142433235, "grad_norm": 0.9717792844394751, "learning_rate": 1.0335482263473028e-07, "loss": 0.8878, "step": 5635 }, { "epoch": 0.9556591776176346, "grad_norm": 0.945749492533743, "learning_rate": 1.0256870911768524e-07, "loss": 0.9162, "step": 5636 }, { "epoch": 0.9558287409919457, "grad_norm": 0.9761849266881163, "learning_rate": 1.0178558119067316e-07, "loss": 0.9254, "step": 5637 }, { "epoch": 0.9559983043662569, "grad_norm": 0.9777727793975898, "learning_rate": 1.0100543908992843e-07, "loss": 0.8997, "step": 5638 }, { "epoch": 0.9561678677405681, "grad_norm": 0.9770299570773744, "learning_rate": 1.002282830507828e-07, "loss": 0.9164, "step": 5639 }, { "epoch": 0.9563374311148792, "grad_norm": 1.0206630293165762, "learning_rate": 9.945411330766874e-08, "loss": 0.9347, "step": 5640 }, { "epoch": 0.9565069944891903, "grad_norm": 0.9108921607267623, "learning_rate": 9.8682930094115e-08, "loss": 0.898, "step": 5641 }, { "epoch": 0.9566765578635015, "grad_norm": 1.0443273276846412, "learning_rate": 9.791473364275328e-08, "loss": 0.914, "step": 5642 }, { "epoch": 0.9568461212378127, "grad_norm": 0.9706920878560784, "learning_rate": 9.714952418531154e-08, "loss": 0.9157, "step": 5643 }, { "epoch": 0.9570156846121238, "grad_norm": 0.9934523266760733, "learning_rate": 9.638730195261625e-08, "loss": 0.9099, "step": 5644 }, { "epoch": 0.9571852479864349, "grad_norm": 0.9815389649253861, "learning_rate": 9.562806717459572e-08, "loss": 0.901, "step": 5645 }, { "epoch": 0.9573548113607461, "grad_norm": 0.9631592108966471, "learning_rate": 9.487182008027563e-08, "loss": 0.9446, "step": 5646 }, { "epoch": 0.9575243747350572, "grad_norm": 0.884174264077493, "learning_rate": 9.411856089777904e-08, "loss": 0.9035, "step": 5647 }, { "epoch": 0.9576939381093684, "grad_norm": 0.9519611298011554, "learning_rate": 9.336828985432866e-08, "loss": 0.9343, "step": 5648 }, { "epoch": 0.9578635014836795, "grad_norm": 1.010741076170822, "learning_rate": 9.262100717624678e-08, "loss": 0.8845, "step": 5649 }, { "epoch": 0.9580330648579907, "grad_norm": 1.0096867389950603, "learning_rate": 9.187671308895418e-08, "loss": 0.9361, "step": 5650 }, { "epoch": 0.9582026282323018, "grad_norm": 0.992811217632035, "learning_rate": 9.113540781696795e-08, "loss": 0.9638, "step": 5651 }, { "epoch": 0.958372191606613, "grad_norm": 0.9853723854520304, "learning_rate": 9.039709158390587e-08, "loss": 0.9322, "step": 5652 }, { "epoch": 0.9585417549809241, "grad_norm": 0.991761359071895, "learning_rate": 8.966176461248422e-08, "loss": 0.9378, "step": 5653 }, { "epoch": 0.9587113183552353, "grad_norm": 0.962925073424598, "learning_rate": 8.892942712451447e-08, "loss": 0.9005, "step": 5654 }, { "epoch": 0.9588808817295464, "grad_norm": 1.0137321182282732, "learning_rate": 8.82000793409088e-08, "loss": 0.9227, "step": 5655 }, { "epoch": 0.9590504451038576, "grad_norm": 0.9567330325695068, "learning_rate": 8.747372148167787e-08, "loss": 0.8776, "step": 5656 }, { "epoch": 0.9592200084781687, "grad_norm": 0.9544805413220517, "learning_rate": 8.675035376593088e-08, "loss": 0.9257, "step": 5657 }, { "epoch": 0.9593895718524799, "grad_norm": 1.0089489011254036, "learning_rate": 8.602997641187217e-08, "loss": 0.9236, "step": 5658 }, { "epoch": 0.959559135226791, "grad_norm": 0.9390273492041998, "learning_rate": 8.531258963680567e-08, "loss": 0.8938, "step": 5659 }, { "epoch": 0.9597286986011022, "grad_norm": 0.9869598819749525, "learning_rate": 8.459819365713384e-08, "loss": 0.9204, "step": 5660 }, { "epoch": 0.9598982619754133, "grad_norm": 0.9590113172342194, "learning_rate": 8.388678868835653e-08, "loss": 0.8856, "step": 5661 }, { "epoch": 0.9600678253497245, "grad_norm": 0.9627742367098747, "learning_rate": 8.317837494507097e-08, "loss": 0.9451, "step": 5662 }, { "epoch": 0.9602373887240356, "grad_norm": 0.9925573175071802, "learning_rate": 8.247295264097288e-08, "loss": 0.9234, "step": 5663 }, { "epoch": 0.9604069520983468, "grad_norm": 0.6525565568088377, "learning_rate": 8.177052198885426e-08, "loss": 0.7322, "step": 5664 }, { "epoch": 0.9605765154726579, "grad_norm": 0.9880303470347077, "learning_rate": 8.107108320060675e-08, "loss": 0.8895, "step": 5665 }, { "epoch": 0.960746078846969, "grad_norm": 0.9613606650954206, "learning_rate": 8.037463648721488e-08, "loss": 0.9345, "step": 5666 }, { "epoch": 0.9609156422212802, "grad_norm": 1.0317409598980691, "learning_rate": 7.96811820587684e-08, "loss": 0.9076, "step": 5667 }, { "epoch": 0.9610852055955914, "grad_norm": 0.9413026092247981, "learning_rate": 7.899072012444664e-08, "loss": 0.911, "step": 5668 }, { "epoch": 0.9612547689699025, "grad_norm": 0.9661800906856106, "learning_rate": 7.830325089253077e-08, "loss": 0.9192, "step": 5669 }, { "epoch": 0.9614243323442137, "grad_norm": 0.9960374005420917, "learning_rate": 7.761877457039712e-08, "loss": 0.9051, "step": 5670 }, { "epoch": 0.9615938957185248, "grad_norm": 1.0164163523231122, "learning_rate": 7.693729136452165e-08, "loss": 0.9635, "step": 5671 }, { "epoch": 0.9617634590928359, "grad_norm": 0.9686163005741266, "learning_rate": 7.625880148047437e-08, "loss": 0.8772, "step": 5672 }, { "epoch": 0.9619330224671471, "grad_norm": 0.9602098714588723, "learning_rate": 7.558330512292378e-08, "loss": 0.9524, "step": 5673 }, { "epoch": 0.9621025858414582, "grad_norm": 1.0509085868271995, "learning_rate": 7.491080249563687e-08, "loss": 0.9339, "step": 5674 }, { "epoch": 0.9622721492157694, "grad_norm": 0.610630238315975, "learning_rate": 7.424129380147471e-08, "loss": 0.7404, "step": 5675 }, { "epoch": 0.9624417125900805, "grad_norm": 0.9789889536923958, "learning_rate": 7.357477924239797e-08, "loss": 0.9418, "step": 5676 }, { "epoch": 0.9626112759643917, "grad_norm": 0.9925782389575215, "learning_rate": 7.291125901946027e-08, "loss": 0.9241, "step": 5677 }, { "epoch": 0.9627808393387028, "grad_norm": 0.9469651976198064, "learning_rate": 7.225073333281707e-08, "loss": 0.9366, "step": 5678 }, { "epoch": 0.962950402713014, "grad_norm": 0.9815750036675475, "learning_rate": 7.159320238171674e-08, "loss": 0.9352, "step": 5679 }, { "epoch": 0.9631199660873251, "grad_norm": 0.9431867658980311, "learning_rate": 7.093866636450508e-08, "loss": 0.9148, "step": 5680 }, { "epoch": 0.9632895294616363, "grad_norm": 0.9345922278115185, "learning_rate": 7.028712547862526e-08, "loss": 0.9068, "step": 5681 }, { "epoch": 0.9634590928359474, "grad_norm": 0.9996099228733981, "learning_rate": 6.963857992061785e-08, "loss": 0.9278, "step": 5682 }, { "epoch": 0.9636286562102586, "grad_norm": 0.9603346693015956, "learning_rate": 6.899302988611744e-08, "loss": 0.8796, "step": 5683 }, { "epoch": 0.9637982195845697, "grad_norm": 1.0636078220456575, "learning_rate": 6.835047556985497e-08, "loss": 0.9006, "step": 5684 }, { "epoch": 0.9639677829588809, "grad_norm": 1.0039187387359212, "learning_rate": 6.771091716566091e-08, "loss": 0.8885, "step": 5685 }, { "epoch": 0.964137346333192, "grad_norm": 0.9230866095184542, "learning_rate": 6.707435486645986e-08, "loss": 0.9108, "step": 5686 }, { "epoch": 0.9643069097075032, "grad_norm": 0.934253790219709, "learning_rate": 6.644078886427042e-08, "loss": 0.9287, "step": 5687 }, { "epoch": 0.9644764730818143, "grad_norm": 0.9707086107212991, "learning_rate": 6.581021935021303e-08, "loss": 0.91, "step": 5688 }, { "epoch": 0.9646460364561255, "grad_norm": 0.953535368647796, "learning_rate": 6.51826465144978e-08, "loss": 0.8943, "step": 5689 }, { "epoch": 0.9648155998304366, "grad_norm": 0.9939304626034535, "learning_rate": 6.455807054643659e-08, "loss": 0.9545, "step": 5690 }, { "epoch": 0.9649851632047478, "grad_norm": 1.002876986588269, "learning_rate": 6.393649163443205e-08, "loss": 0.8996, "step": 5691 }, { "epoch": 0.9651547265790589, "grad_norm": 0.9857816398379425, "learning_rate": 6.331790996598753e-08, "loss": 0.9097, "step": 5692 }, { "epoch": 0.96532428995337, "grad_norm": 0.9742143311603425, "learning_rate": 6.27023257276993e-08, "loss": 0.9084, "step": 5693 }, { "epoch": 0.9654938533276812, "grad_norm": 0.958610934198604, "learning_rate": 6.208973910525995e-08, "loss": 0.9235, "step": 5694 }, { "epoch": 0.9656634167019924, "grad_norm": 0.9531214441805956, "learning_rate": 6.148015028345833e-08, "loss": 0.9068, "step": 5695 }, { "epoch": 0.9658329800763035, "grad_norm": 0.939572689423655, "learning_rate": 6.087355944617845e-08, "loss": 0.924, "step": 5696 }, { "epoch": 0.9660025434506146, "grad_norm": 0.9874965333327451, "learning_rate": 6.026996677640062e-08, "loss": 0.9602, "step": 5697 }, { "epoch": 0.9661721068249258, "grad_norm": 0.9380812117772717, "learning_rate": 5.96693724562003e-08, "loss": 0.9185, "step": 5698 }, { "epoch": 0.966341670199237, "grad_norm": 0.9480641879644737, "learning_rate": 5.907177666674813e-08, "loss": 0.8965, "step": 5699 }, { "epoch": 0.9665112335735481, "grad_norm": 0.9875226351824268, "learning_rate": 5.8477179588311004e-08, "loss": 0.9807, "step": 5700 }, { "epoch": 0.9666807969478592, "grad_norm": 0.9909539808267465, "learning_rate": 5.788558140025213e-08, "loss": 0.9084, "step": 5701 }, { "epoch": 0.9668503603221704, "grad_norm": 0.9510467986338492, "learning_rate": 5.7296982281026534e-08, "loss": 0.9007, "step": 5702 }, { "epoch": 0.9670199236964816, "grad_norm": 0.9817580813449093, "learning_rate": 5.671138240818885e-08, "loss": 0.941, "step": 5703 }, { "epoch": 0.9671894870707927, "grad_norm": 0.9920873009432711, "learning_rate": 5.612878195838667e-08, "loss": 0.9236, "step": 5704 }, { "epoch": 0.9673590504451038, "grad_norm": 0.9477881284159222, "learning_rate": 5.5549181107362734e-08, "loss": 0.9479, "step": 5705 }, { "epoch": 0.967528613819415, "grad_norm": 0.9566807984243456, "learning_rate": 5.497258002995498e-08, "loss": 0.9042, "step": 5706 }, { "epoch": 0.9676981771937262, "grad_norm": 0.9604977221777516, "learning_rate": 5.43989789000976e-08, "loss": 0.8951, "step": 5707 }, { "epoch": 0.9678677405680373, "grad_norm": 0.9830219808201357, "learning_rate": 5.382837789081885e-08, "loss": 0.9261, "step": 5708 }, { "epoch": 0.9680373039423484, "grad_norm": 1.0012836036733856, "learning_rate": 5.326077717424216e-08, "loss": 0.942, "step": 5709 }, { "epoch": 0.9682068673166596, "grad_norm": 0.9718438947246102, "learning_rate": 5.269617692158613e-08, "loss": 0.8627, "step": 5710 }, { "epoch": 0.9683764306909708, "grad_norm": 1.0044877670802885, "learning_rate": 5.213457730316451e-08, "loss": 0.9284, "step": 5711 }, { "epoch": 0.9685459940652819, "grad_norm": 0.9654535072698528, "learning_rate": 5.157597848838514e-08, "loss": 0.9299, "step": 5712 }, { "epoch": 0.968715557439593, "grad_norm": 0.9606219114708701, "learning_rate": 5.102038064575099e-08, "loss": 0.9236, "step": 5713 }, { "epoch": 0.9688851208139042, "grad_norm": 0.967816388816639, "learning_rate": 5.0467783942860226e-08, "loss": 0.9339, "step": 5714 }, { "epoch": 0.9690546841882154, "grad_norm": 0.9552734444853576, "learning_rate": 4.991818854640396e-08, "loss": 0.9104, "step": 5715 }, { "epoch": 0.9692242475625265, "grad_norm": 0.9613849111860473, "learning_rate": 4.937159462217067e-08, "loss": 0.9262, "step": 5716 }, { "epoch": 0.9693938109368376, "grad_norm": 1.0084712509573805, "learning_rate": 4.882800233504292e-08, "loss": 0.9156, "step": 5717 }, { "epoch": 0.9695633743111488, "grad_norm": 0.9944151443537893, "learning_rate": 4.82874118489951e-08, "loss": 0.9485, "step": 5718 }, { "epoch": 0.96973293768546, "grad_norm": 0.9992288448640044, "learning_rate": 4.774982332709788e-08, "loss": 0.9458, "step": 5719 }, { "epoch": 0.969902501059771, "grad_norm": 0.9826194930467924, "learning_rate": 4.7215236931517084e-08, "loss": 0.9209, "step": 5720 }, { "epoch": 0.9700720644340822, "grad_norm": 0.9659032676055406, "learning_rate": 4.6683652823513725e-08, "loss": 0.9488, "step": 5721 }, { "epoch": 0.9702416278083934, "grad_norm": 0.9933828402574991, "learning_rate": 4.615507116343954e-08, "loss": 0.9016, "step": 5722 }, { "epoch": 0.9704111911827046, "grad_norm": 0.9819132288854548, "learning_rate": 4.562949211074474e-08, "loss": 0.8873, "step": 5723 }, { "epoch": 0.9705807545570156, "grad_norm": 0.7290138869877412, "learning_rate": 4.510691582397031e-08, "loss": 0.7607, "step": 5724 }, { "epoch": 0.9707503179313268, "grad_norm": 0.9147663896758559, "learning_rate": 4.458734246075236e-08, "loss": 0.8632, "step": 5725 }, { "epoch": 0.970919881305638, "grad_norm": 0.9536132939718771, "learning_rate": 4.407077217782441e-08, "loss": 0.8999, "step": 5726 }, { "epoch": 0.9710894446799492, "grad_norm": 1.0237760834816376, "learning_rate": 4.3557205131008475e-08, "loss": 0.9838, "step": 5727 }, { "epoch": 0.9712590080542602, "grad_norm": 0.9587203742855486, "learning_rate": 4.304664147522619e-08, "loss": 0.9156, "step": 5728 }, { "epoch": 0.9714285714285714, "grad_norm": 0.9773449551023887, "learning_rate": 4.253908136448881e-08, "loss": 0.8916, "step": 5729 }, { "epoch": 0.9715981348028826, "grad_norm": 0.9852177110324621, "learning_rate": 4.203452495190386e-08, "loss": 0.9112, "step": 5730 }, { "epoch": 0.9717676981771938, "grad_norm": 0.9686742891979236, "learning_rate": 4.153297238967291e-08, "loss": 0.8842, "step": 5731 }, { "epoch": 0.9719372615515048, "grad_norm": 0.9684951003641104, "learning_rate": 4.103442382909051e-08, "loss": 0.9128, "step": 5732 }, { "epoch": 0.972106824925816, "grad_norm": 0.992194231264156, "learning_rate": 4.053887942054524e-08, "loss": 0.9302, "step": 5733 }, { "epoch": 0.9722763883001272, "grad_norm": 0.9759890506299673, "learning_rate": 4.004633931351864e-08, "loss": 0.9111, "step": 5734 }, { "epoch": 0.9724459516744384, "grad_norm": 0.935065101666358, "learning_rate": 3.9556803656588536e-08, "loss": 0.8615, "step": 5735 }, { "epoch": 0.9726155150487494, "grad_norm": 0.997075330245176, "learning_rate": 3.907027259742347e-08, "loss": 0.9203, "step": 5736 }, { "epoch": 0.9727850784230606, "grad_norm": 0.9278613670177845, "learning_rate": 3.858674628278825e-08, "loss": 0.9001, "step": 5737 }, { "epoch": 0.9729546417973718, "grad_norm": 1.0076548126693798, "learning_rate": 3.810622485853954e-08, "loss": 0.9463, "step": 5738 }, { "epoch": 0.973124205171683, "grad_norm": 0.9755484425427924, "learning_rate": 3.762870846962807e-08, "loss": 0.9437, "step": 5739 }, { "epoch": 0.973293768545994, "grad_norm": 0.9537629882345634, "learning_rate": 3.7154197260097503e-08, "loss": 0.9294, "step": 5740 }, { "epoch": 0.9734633319203052, "grad_norm": 1.0160183945440993, "learning_rate": 3.668269137308666e-08, "loss": 0.9099, "step": 5741 }, { "epoch": 0.9736328952946164, "grad_norm": 0.9914671709405419, "learning_rate": 3.6214190950825126e-08, "loss": 0.9656, "step": 5742 }, { "epoch": 0.9738024586689276, "grad_norm": 1.0009969481059364, "learning_rate": 3.5748696134639825e-08, "loss": 0.9561, "step": 5743 }, { "epoch": 0.9739720220432386, "grad_norm": 0.9505026087309247, "learning_rate": 3.528620706494623e-08, "loss": 0.9055, "step": 5744 }, { "epoch": 0.9741415854175498, "grad_norm": 0.9630168602402888, "learning_rate": 3.482672388125719e-08, "loss": 0.9261, "step": 5745 }, { "epoch": 0.974311148791861, "grad_norm": 1.050634437925557, "learning_rate": 3.437024672217626e-08, "loss": 0.9419, "step": 5746 }, { "epoch": 0.9744807121661722, "grad_norm": 0.9731626729401607, "learning_rate": 3.3916775725402195e-08, "loss": 0.8853, "step": 5747 }, { "epoch": 0.9746502755404832, "grad_norm": 0.9970614054701489, "learning_rate": 3.346631102772446e-08, "loss": 0.918, "step": 5748 }, { "epoch": 0.9748198389147944, "grad_norm": 1.0058552266255143, "learning_rate": 3.3018852765027696e-08, "loss": 0.9109, "step": 5749 }, { "epoch": 0.9749894022891056, "grad_norm": 0.953404379849714, "learning_rate": 3.257440107229059e-08, "loss": 0.8999, "step": 5750 }, { "epoch": 0.9751589656634168, "grad_norm": 1.0066125749200567, "learning_rate": 3.213295608358036e-08, "loss": 0.9143, "step": 5751 }, { "epoch": 0.9753285290377278, "grad_norm": 0.9720353939399032, "learning_rate": 3.169451793206268e-08, "loss": 0.9328, "step": 5752 }, { "epoch": 0.975498092412039, "grad_norm": 0.9840971558662146, "learning_rate": 3.125908674999289e-08, "loss": 0.9438, "step": 5753 }, { "epoch": 0.9756676557863502, "grad_norm": 0.9975905987164495, "learning_rate": 3.082666266872036e-08, "loss": 0.9312, "step": 5754 }, { "epoch": 0.9758372191606614, "grad_norm": 1.0086611952218774, "learning_rate": 3.039724581868631e-08, "loss": 0.9417, "step": 5755 }, { "epoch": 0.9760067825349724, "grad_norm": 1.0000978637974742, "learning_rate": 2.99708363294271e-08, "loss": 0.9214, "step": 5756 }, { "epoch": 0.9761763459092836, "grad_norm": 0.9686143878410364, "learning_rate": 2.9547434329568747e-08, "loss": 0.9506, "step": 5757 }, { "epoch": 0.9763459092835948, "grad_norm": 0.9618460329437244, "learning_rate": 2.9127039946832413e-08, "loss": 0.9213, "step": 5758 }, { "epoch": 0.976515472657906, "grad_norm": 0.985636762931922, "learning_rate": 2.8709653308032216e-08, "loss": 0.9016, "step": 5759 }, { "epoch": 0.976685036032217, "grad_norm": 0.9940909677380797, "learning_rate": 2.829527453907299e-08, "loss": 0.9519, "step": 5760 }, { "epoch": 0.9768545994065282, "grad_norm": 0.978051707722402, "learning_rate": 2.7883903764953647e-08, "loss": 0.93, "step": 5761 }, { "epoch": 0.9770241627808394, "grad_norm": 0.9729715025861804, "learning_rate": 2.7475541109766023e-08, "loss": 0.9585, "step": 5762 }, { "epoch": 0.9771937261551504, "grad_norm": 0.9605491081213781, "learning_rate": 2.7070186696692702e-08, "loss": 0.9137, "step": 5763 }, { "epoch": 0.9773632895294616, "grad_norm": 0.9880329085831095, "learning_rate": 2.6667840648010314e-08, "loss": 0.9345, "step": 5764 }, { "epoch": 0.9775328529037728, "grad_norm": 0.9868937318654798, "learning_rate": 2.6268503085089547e-08, "loss": 0.9468, "step": 5765 }, { "epoch": 0.977702416278084, "grad_norm": 0.9393080976762372, "learning_rate": 2.5872174128388494e-08, "loss": 0.8913, "step": 5766 }, { "epoch": 0.977871979652395, "grad_norm": 1.0155322351056533, "learning_rate": 2.547885389746485e-08, "loss": 0.9441, "step": 5767 }, { "epoch": 0.9780415430267062, "grad_norm": 0.9684832938941004, "learning_rate": 2.50885425109626e-08, "loss": 0.9032, "step": 5768 }, { "epoch": 0.9782111064010174, "grad_norm": 0.9458642196609883, "learning_rate": 2.470124008661978e-08, "loss": 0.9172, "step": 5769 }, { "epoch": 0.9783806697753286, "grad_norm": 1.032689397630441, "learning_rate": 2.43169467412685e-08, "loss": 0.9607, "step": 5770 }, { "epoch": 0.9785502331496396, "grad_norm": 0.9677141493209032, "learning_rate": 2.3935662590831578e-08, "loss": 0.9173, "step": 5771 }, { "epoch": 0.9787197965239508, "grad_norm": 0.95934637203301, "learning_rate": 2.35573877503259e-08, "loss": 0.9013, "step": 5772 }, { "epoch": 0.978889359898262, "grad_norm": 0.6423029197825266, "learning_rate": 2.318212233385686e-08, "loss": 0.7507, "step": 5773 }, { "epoch": 0.9790589232725732, "grad_norm": 0.9832334420626041, "learning_rate": 2.280986645462613e-08, "loss": 0.9256, "step": 5774 }, { "epoch": 0.9792284866468842, "grad_norm": 0.9727075222037297, "learning_rate": 2.244062022492499e-08, "loss": 0.9089, "step": 5775 }, { "epoch": 0.9793980500211954, "grad_norm": 0.9743146804347741, "learning_rate": 2.2074383756137686e-08, "loss": 0.8786, "step": 5776 }, { "epoch": 0.9795676133955066, "grad_norm": 0.6649035059316148, "learning_rate": 2.171115715874139e-08, "loss": 0.763, "step": 5777 }, { "epoch": 0.9797371767698178, "grad_norm": 0.9425552551595091, "learning_rate": 2.135094054230402e-08, "loss": 0.9071, "step": 5778 }, { "epoch": 0.9799067401441288, "grad_norm": 1.0072208577935549, "learning_rate": 2.0993734015485324e-08, "loss": 0.922, "step": 5779 }, { "epoch": 0.98007630351844, "grad_norm": 0.9397666776229328, "learning_rate": 2.063953768603799e-08, "loss": 0.9314, "step": 5780 }, { "epoch": 0.9802458668927512, "grad_norm": 0.9852923629008613, "learning_rate": 2.028835166080767e-08, "loss": 0.9516, "step": 5781 }, { "epoch": 0.9804154302670623, "grad_norm": 1.0732409665693492, "learning_rate": 1.994017604572851e-08, "loss": 0.9145, "step": 5782 }, { "epoch": 0.9805849936413734, "grad_norm": 0.9538396900567173, "learning_rate": 1.9595010945830937e-08, "loss": 0.9192, "step": 5783 }, { "epoch": 0.9807545570156846, "grad_norm": 0.973310101202835, "learning_rate": 1.9252856465233893e-08, "loss": 0.9268, "step": 5784 }, { "epoch": 0.9809241203899958, "grad_norm": 0.9940966776700779, "learning_rate": 1.8913712707149255e-08, "loss": 0.9482, "step": 5785 }, { "epoch": 0.981093683764307, "grad_norm": 0.9738376906588351, "learning_rate": 1.857757977388186e-08, "loss": 0.9332, "step": 5786 }, { "epoch": 0.981263247138618, "grad_norm": 0.9518809549935605, "learning_rate": 1.824445776682504e-08, "loss": 0.9225, "step": 5787 }, { "epoch": 0.9814328105129292, "grad_norm": 0.9883159436394309, "learning_rate": 1.7914346786468416e-08, "loss": 0.9213, "step": 5788 }, { "epoch": 0.9816023738872404, "grad_norm": 0.9777938140908404, "learning_rate": 1.7587246932389003e-08, "loss": 0.9261, "step": 5789 }, { "epoch": 0.9817719372615515, "grad_norm": 0.9678373609473974, "learning_rate": 1.7263158303258975e-08, "loss": 0.8956, "step": 5790 }, { "epoch": 0.9819415006358626, "grad_norm": 0.953466937473238, "learning_rate": 1.6942080996840137e-08, "loss": 0.8772, "step": 5791 }, { "epoch": 0.9821110640101738, "grad_norm": 0.9810846228931703, "learning_rate": 1.6624015109986125e-08, "loss": 0.9228, "step": 5792 }, { "epoch": 0.982280627384485, "grad_norm": 0.979224213646194, "learning_rate": 1.630896073864352e-08, "loss": 0.913, "step": 5793 }, { "epoch": 0.9824501907587961, "grad_norm": 0.958177217421729, "learning_rate": 1.5996917977847416e-08, "loss": 0.9292, "step": 5794 }, { "epoch": 0.9826197541331072, "grad_norm": 0.9961155070730692, "learning_rate": 1.568788692172807e-08, "loss": 0.9029, "step": 5795 }, { "epoch": 0.9827893175074184, "grad_norm": 0.9785147274800566, "learning_rate": 1.5381867663505358e-08, "loss": 0.9256, "step": 5796 }, { "epoch": 0.9829588808817296, "grad_norm": 1.0353491713847516, "learning_rate": 1.5078860295490995e-08, "loss": 0.9592, "step": 5797 }, { "epoch": 0.9831284442560407, "grad_norm": 0.9837419037972505, "learning_rate": 1.477886490908742e-08, "loss": 0.9068, "step": 5798 }, { "epoch": 0.9832980076303518, "grad_norm": 0.9932699866230158, "learning_rate": 1.4481881594788917e-08, "loss": 0.9425, "step": 5799 }, { "epoch": 0.983467571004663, "grad_norm": 1.017134505521778, "learning_rate": 1.4187910442182706e-08, "loss": 0.9214, "step": 5800 }, { "epoch": 0.9836371343789742, "grad_norm": 0.7060905039414138, "learning_rate": 1.3896951539945635e-08, "loss": 0.7667, "step": 5801 }, { "epoch": 0.9838066977532853, "grad_norm": 0.9633906408581391, "learning_rate": 1.3609004975846385e-08, "loss": 0.933, "step": 5802 }, { "epoch": 0.9839762611275964, "grad_norm": 0.9897757984614768, "learning_rate": 1.3324070836743252e-08, "loss": 0.9013, "step": 5803 }, { "epoch": 0.9841458245019076, "grad_norm": 0.9590556830235841, "learning_rate": 1.3042149208589705e-08, "loss": 0.907, "step": 5804 }, { "epoch": 0.9843153878762188, "grad_norm": 0.979845303368506, "learning_rate": 1.2763240176427715e-08, "loss": 0.9374, "step": 5805 }, { "epoch": 0.9844849512505299, "grad_norm": 0.9412766436834193, "learning_rate": 1.2487343824389986e-08, "loss": 0.92, "step": 5806 }, { "epoch": 0.984654514624841, "grad_norm": 0.9558483119495058, "learning_rate": 1.2214460235703273e-08, "loss": 0.9192, "step": 5807 }, { "epoch": 0.9848240779991522, "grad_norm": 1.0047360796444338, "learning_rate": 1.1944589492681735e-08, "loss": 0.8924, "step": 5808 }, { "epoch": 0.9849936413734633, "grad_norm": 0.9699925532658182, "learning_rate": 1.1677731676733584e-08, "loss": 0.8803, "step": 5809 }, { "epoch": 0.9851632047477745, "grad_norm": 0.9388798112198161, "learning_rate": 1.141388686835776e-08, "loss": 0.8974, "step": 5810 }, { "epoch": 0.9853327681220856, "grad_norm": 1.0076872419097724, "learning_rate": 1.1153055147143932e-08, "loss": 0.9443, "step": 5811 }, { "epoch": 0.9855023314963968, "grad_norm": 0.9447579113690551, "learning_rate": 1.0895236591771385e-08, "loss": 0.887, "step": 5812 }, { "epoch": 0.9856718948707079, "grad_norm": 0.975548955816519, "learning_rate": 1.0640431280013463e-08, "loss": 0.9039, "step": 5813 }, { "epoch": 0.9858414582450191, "grad_norm": 0.9653471513006129, "learning_rate": 1.0388639288732017e-08, "loss": 0.9228, "step": 5814 }, { "epoch": 0.9860110216193302, "grad_norm": 0.9775914660207732, "learning_rate": 1.0139860693880732e-08, "loss": 0.9026, "step": 5815 }, { "epoch": 0.9861805849936414, "grad_norm": 0.6436646740636643, "learning_rate": 9.894095570505136e-09, "loss": 0.7655, "step": 5816 }, { "epoch": 0.9863501483679525, "grad_norm": 1.0235827902158852, "learning_rate": 9.651343992740369e-09, "loss": 0.9291, "step": 5817 }, { "epoch": 0.9865197117422637, "grad_norm": 1.007073135159899, "learning_rate": 9.411606033813413e-09, "loss": 0.9288, "step": 5818 }, { "epoch": 0.9866892751165748, "grad_norm": 0.9513369507166303, "learning_rate": 9.174881766043086e-09, "loss": 0.898, "step": 5819 }, { "epoch": 0.986858838490886, "grad_norm": 0.9479366986872202, "learning_rate": 8.941171260835602e-09, "loss": 0.8823, "step": 5820 }, { "epoch": 0.9870284018651971, "grad_norm": 0.9731323710849982, "learning_rate": 8.71047458869234e-09, "loss": 0.9318, "step": 5821 }, { "epoch": 0.9871979652395083, "grad_norm": 0.9924270055852914, "learning_rate": 8.482791819203195e-09, "loss": 0.8959, "step": 5822 }, { "epoch": 0.9873675286138194, "grad_norm": 0.6674942691795429, "learning_rate": 8.25812302104878e-09, "loss": 0.7325, "step": 5823 }, { "epoch": 0.9875370919881306, "grad_norm": 0.9939415808753221, "learning_rate": 8.036468262001551e-09, "loss": 0.9254, "step": 5824 }, { "epoch": 0.9877066553624417, "grad_norm": 0.9853251903001841, "learning_rate": 7.817827608924689e-09, "loss": 0.9461, "step": 5825 }, { "epoch": 0.9878762187367529, "grad_norm": 0.9432771888845365, "learning_rate": 7.602201127770991e-09, "loss": 0.9342, "step": 5826 }, { "epoch": 0.988045782111064, "grad_norm": 0.9446451906872987, "learning_rate": 7.389588883585097e-09, "loss": 0.9166, "step": 5827 }, { "epoch": 0.9882153454853752, "grad_norm": 0.9914247035390438, "learning_rate": 7.1799909405034786e-09, "loss": 0.9434, "step": 5828 }, { "epoch": 0.9883849088596863, "grad_norm": 0.9582802470164575, "learning_rate": 6.973407361750006e-09, "loss": 0.8944, "step": 5829 }, { "epoch": 0.9885544722339975, "grad_norm": 0.9535694007677109, "learning_rate": 6.76983820964261e-09, "loss": 0.8791, "step": 5830 }, { "epoch": 0.9887240356083086, "grad_norm": 0.9541306842504396, "learning_rate": 6.569283545587724e-09, "loss": 0.9517, "step": 5831 }, { "epoch": 0.9888935989826197, "grad_norm": 0.9284378080196115, "learning_rate": 6.371743430082511e-09, "loss": 0.9227, "step": 5832 }, { "epoch": 0.9890631623569309, "grad_norm": 0.9796712607493825, "learning_rate": 6.1772179227181926e-09, "loss": 0.9083, "step": 5833 }, { "epoch": 0.9892327257312421, "grad_norm": 0.9823053574433819, "learning_rate": 5.985707082172277e-09, "loss": 0.9343, "step": 5834 }, { "epoch": 0.9894022891055532, "grad_norm": 0.9476368598497723, "learning_rate": 5.7972109662141065e-09, "loss": 0.9183, "step": 5835 }, { "epoch": 0.9895718524798643, "grad_norm": 1.0149649535842422, "learning_rate": 5.611729631703755e-09, "loss": 0.9246, "step": 5836 }, { "epoch": 0.9897414158541755, "grad_norm": 0.9632984619230546, "learning_rate": 5.429263134594242e-09, "loss": 0.9291, "step": 5837 }, { "epoch": 0.9899109792284867, "grad_norm": 0.952097399941554, "learning_rate": 5.249811529925985e-09, "loss": 0.9398, "step": 5838 }, { "epoch": 0.9900805426027978, "grad_norm": 0.9684569394622005, "learning_rate": 5.073374871831238e-09, "loss": 0.9012, "step": 5839 }, { "epoch": 0.9902501059771089, "grad_norm": 0.9394407124091958, "learning_rate": 4.899953213532982e-09, "loss": 0.9203, "step": 5840 }, { "epoch": 0.9904196693514201, "grad_norm": 0.9413710522573973, "learning_rate": 4.7295466073427055e-09, "loss": 0.936, "step": 5841 }, { "epoch": 0.9905892327257313, "grad_norm": 0.9844324366950045, "learning_rate": 4.562155104665955e-09, "loss": 0.9292, "step": 5842 }, { "epoch": 0.9907587961000424, "grad_norm": 1.0165033962821426, "learning_rate": 4.3977787559967845e-09, "loss": 0.9602, "step": 5843 }, { "epoch": 0.9909283594743535, "grad_norm": 0.9746049196258337, "learning_rate": 4.236417610918864e-09, "loss": 0.9267, "step": 5844 }, { "epoch": 0.9910979228486647, "grad_norm": 0.9532953938969321, "learning_rate": 4.0780717181077015e-09, "loss": 0.9637, "step": 5845 }, { "epoch": 0.9912674862229759, "grad_norm": 0.6747532539338036, "learning_rate": 3.922741125328422e-09, "loss": 0.7913, "step": 5846 }, { "epoch": 0.991437049597287, "grad_norm": 0.9935089975069966, "learning_rate": 3.770425879437989e-09, "loss": 0.9522, "step": 5847 }, { "epoch": 0.9916066129715981, "grad_norm": 0.974368758116912, "learning_rate": 3.6211260263818717e-09, "loss": 0.8928, "step": 5848 }, { "epoch": 0.9917761763459093, "grad_norm": 0.9590536449179392, "learning_rate": 3.474841611197377e-09, "loss": 0.9101, "step": 5849 }, { "epoch": 0.9919457397202205, "grad_norm": 0.9723126119139327, "learning_rate": 3.33157267801143e-09, "loss": 0.9131, "step": 5850 }, { "epoch": 0.9921153030945316, "grad_norm": 0.9955463437004766, "learning_rate": 3.1913192700405715e-09, "loss": 0.9345, "step": 5851 }, { "epoch": 0.9922848664688427, "grad_norm": 1.0040663640133396, "learning_rate": 3.0540814295942913e-09, "loss": 0.9505, "step": 5852 }, { "epoch": 0.9924544298431539, "grad_norm": 0.6599730884173741, "learning_rate": 2.9198591980705847e-09, "loss": 0.7619, "step": 5853 }, { "epoch": 0.992623993217465, "grad_norm": 1.0084344166407948, "learning_rate": 2.788652615957066e-09, "loss": 0.9555, "step": 5854 }, { "epoch": 0.9927935565917761, "grad_norm": 0.9824581687215178, "learning_rate": 2.660461722832075e-09, "loss": 0.9091, "step": 5855 }, { "epoch": 0.9929631199660873, "grad_norm": 0.9946139732230425, "learning_rate": 2.5352865573669007e-09, "loss": 0.9239, "step": 5856 }, { "epoch": 0.9931326833403985, "grad_norm": 0.9584922360558523, "learning_rate": 2.4131271573191172e-09, "loss": 0.9369, "step": 5857 }, { "epoch": 0.9933022467147096, "grad_norm": 1.0013648998270042, "learning_rate": 2.2939835595392477e-09, "loss": 0.9138, "step": 5858 }, { "epoch": 0.9934718100890207, "grad_norm": 0.9928014921492164, "learning_rate": 2.1778557999674323e-09, "loss": 0.948, "step": 5859 }, { "epoch": 0.9936413734633319, "grad_norm": 0.9409007338041752, "learning_rate": 2.0647439136334267e-09, "loss": 0.9133, "step": 5860 }, { "epoch": 0.9938109368376431, "grad_norm": 1.0032080483004022, "learning_rate": 1.9546479346588265e-09, "loss": 0.9277, "step": 5861 }, { "epoch": 0.9939805002119542, "grad_norm": 0.988165760607772, "learning_rate": 1.8475678962526222e-09, "loss": 0.9353, "step": 5862 }, { "epoch": 0.9941500635862653, "grad_norm": 0.9749968177346672, "learning_rate": 1.743503830717863e-09, "loss": 0.8852, "step": 5863 }, { "epoch": 0.9943196269605765, "grad_norm": 0.9998456219885161, "learning_rate": 1.642455769444995e-09, "loss": 0.9766, "step": 5864 }, { "epoch": 0.9944891903348877, "grad_norm": 0.9765047105760973, "learning_rate": 1.5444237429140806e-09, "loss": 0.8787, "step": 5865 }, { "epoch": 0.9946587537091988, "grad_norm": 0.9735036897514259, "learning_rate": 1.4494077806992413e-09, "loss": 0.9031, "step": 5866 }, { "epoch": 0.9948283170835099, "grad_norm": 0.9793359924666379, "learning_rate": 1.357407911460884e-09, "loss": 0.9273, "step": 5867 }, { "epoch": 0.9949978804578211, "grad_norm": 0.9533612035915295, "learning_rate": 1.2684241629501438e-09, "loss": 0.8917, "step": 5868 }, { "epoch": 0.9951674438321323, "grad_norm": 0.9419399191606024, "learning_rate": 1.182456562012213e-09, "loss": 0.9071, "step": 5869 }, { "epoch": 0.9953370072064434, "grad_norm": 0.9669345793416564, "learning_rate": 1.0995051345763508e-09, "loss": 0.9288, "step": 5870 }, { "epoch": 0.9955065705807545, "grad_norm": 0.9850270403705859, "learning_rate": 1.019569905666984e-09, "loss": 0.8851, "step": 5871 }, { "epoch": 0.9956761339550657, "grad_norm": 0.9356390494037607, "learning_rate": 9.42650899395936e-10, "loss": 0.841, "step": 5872 }, { "epoch": 0.9958456973293769, "grad_norm": 1.000494634844948, "learning_rate": 8.687481389657582e-10, "loss": 0.9327, "step": 5873 }, { "epoch": 0.996015260703688, "grad_norm": 0.9411500450246553, "learning_rate": 7.978616466708388e-10, "loss": 0.9005, "step": 5874 }, { "epoch": 0.9961848240779991, "grad_norm": 0.6372601212475879, "learning_rate": 7.299914438929634e-10, "loss": 0.783, "step": 5875 }, { "epoch": 0.9963543874523103, "grad_norm": 1.0164318701062847, "learning_rate": 6.651375511057546e-10, "loss": 0.966, "step": 5876 }, { "epoch": 0.9965239508266215, "grad_norm": 1.0190447807288807, "learning_rate": 6.032999878735624e-10, "loss": 0.9351, "step": 5877 }, { "epoch": 0.9966935142009326, "grad_norm": 0.9119025372120443, "learning_rate": 5.444787728481338e-10, "loss": 0.8955, "step": 5878 }, { "epoch": 0.9968630775752437, "grad_norm": 1.0178435477402283, "learning_rate": 4.886739237752735e-10, "loss": 0.9343, "step": 5879 }, { "epoch": 0.9970326409495549, "grad_norm": 0.9671811323332676, "learning_rate": 4.3588545748596276e-10, "loss": 0.9247, "step": 5880 }, { "epoch": 0.9972022043238661, "grad_norm": 1.0032340180032104, "learning_rate": 3.861133899063507e-10, "loss": 0.9504, "step": 5881 }, { "epoch": 0.9973717676981771, "grad_norm": 0.9864779551961973, "learning_rate": 3.3935773604887313e-10, "loss": 0.9327, "step": 5882 }, { "epoch": 0.9975413310724883, "grad_norm": 0.9957873522498607, "learning_rate": 2.956185100178033e-10, "loss": 0.913, "step": 5883 }, { "epoch": 0.9977108944467995, "grad_norm": 1.005407829298245, "learning_rate": 2.5489572500814184e-10, "loss": 0.9355, "step": 5884 }, { "epoch": 0.9978804578211107, "grad_norm": 1.0046951183568882, "learning_rate": 2.171893933033964e-10, "loss": 0.9156, "step": 5885 }, { "epoch": 0.9980500211954217, "grad_norm": 0.9785766446867428, "learning_rate": 1.8249952627669154e-10, "loss": 0.9248, "step": 5886 }, { "epoch": 0.9982195845697329, "grad_norm": 0.9977959749729899, "learning_rate": 1.5082613439409977e-10, "loss": 0.959, "step": 5887 }, { "epoch": 0.9983891479440441, "grad_norm": 0.9549255917821977, "learning_rate": 1.2216922721020043e-10, "loss": 0.9169, "step": 5888 }, { "epoch": 0.9985587113183553, "grad_norm": 0.9547502515170834, "learning_rate": 9.652881336696951e-11, "loss": 0.9468, "step": 5889 }, { "epoch": 0.9987282746926663, "grad_norm": 0.9570247883966239, "learning_rate": 7.390490060155132e-11, "loss": 0.9017, "step": 5890 }, { "epoch": 0.9988978380669775, "grad_norm": 0.9951097764396317, "learning_rate": 5.42974957362663e-11, "loss": 0.9143, "step": 5891 }, { "epoch": 0.9990674014412887, "grad_norm": 1.0278976206464985, "learning_rate": 3.770660468749299e-11, "loss": 0.9479, "step": 5892 }, { "epoch": 0.9992369648155999, "grad_norm": 1.0120379464525961, "learning_rate": 2.4132232460116754e-11, "loss": 0.9105, "step": 5893 }, { "epoch": 0.9994065281899109, "grad_norm": 0.9956627845994828, "learning_rate": 1.3574383147529901e-11, "loss": 0.9383, "step": 5894 }, { "epoch": 0.9995760915642221, "grad_norm": 0.9570221998315199, "learning_rate": 6.033059934962282e-12, "loss": 0.9469, "step": 5895 }, { "epoch": 0.9997456549385333, "grad_norm": 0.9531973300356545, "learning_rate": 1.5082650972608748e-12, "loss": 0.9365, "step": 5896 }, { "epoch": 0.9999152183128445, "grad_norm": 1.0056373913215644, "learning_rate": 0.0, "loss": 0.8894, "step": 5897 }, { "epoch": 0.9999152183128445, "step": 5897, "total_flos": 1.7424153564413952e+16, "train_loss": 0.978339256261069, "train_runtime": 59144.3222, "train_samples_per_second": 25.526, "train_steps_per_second": 0.1 } ], "logging_steps": 1.0, "max_steps": 5897, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.7424153564413952e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }