[ { "loss": 1.2057, "grad_norm": 1.1315475702285767, "learning_rate": 0.00019992790844372354, "epoch": 0.0, "step": 25 }, { "loss": 1.1553, "grad_norm": 1.134278655052185, "learning_rate": 0.00019985281307260223, "epoch": 0.0, "step": 50 }, { "loss": 1.0896, "grad_norm": 1.4255499839782715, "learning_rate": 0.00019977771770148087, "epoch": 0.0, "step": 75 }, { "loss": 1.1009, "grad_norm": 1.0321508646011353, "learning_rate": 0.00019970262233035956, "epoch": 0.0, "step": 100 }, { "loss": 1.0906, "grad_norm": 1.065851092338562, "learning_rate": 0.00019962752695923823, "epoch": 0.0, "step": 125 }, { "loss": 1.2285, "grad_norm": 1.3161128759384155, "learning_rate": 0.00019955243158811692, "epoch": 0.0, "step": 150 }, { "loss": 1.2242, "grad_norm": 1.0888772010803223, "learning_rate": 0.0001994773362169956, "epoch": 0.0, "step": 175 }, { "loss": 1.1169, "grad_norm": 1.3239021301269531, "learning_rate": 0.00019940224084587428, "epoch": 0.0, "step": 200 }, { "loss": 1.2012, "grad_norm": 1.0381895303726196, "learning_rate": 0.00019932714547475294, "epoch": 0.0, "step": 225 }, { "loss": 1.1967, "grad_norm": 1.1826602220535278, "learning_rate": 0.0001992520501036316, "epoch": 0.0, "step": 250 }, { "loss": 1.1632, "grad_norm": 1.3360055685043335, "learning_rate": 0.0001991769547325103, "epoch": 0.0, "step": 275 }, { "loss": 1.1724, "grad_norm": 1.4393656253814697, "learning_rate": 0.00019910185936138896, "epoch": 0.0, "step": 300 }, { "loss": 1.1483, "grad_norm": 0.7849452495574951, "learning_rate": 0.00019902676399026766, "epoch": 0.0, "step": 325 }, { "loss": 1.1145, "grad_norm": 1.1918078660964966, "learning_rate": 0.00019895166861914635, "epoch": 0.01, "step": 350 }, { "loss": 1.1695, "grad_norm": 0.9958588480949402, "learning_rate": 0.000198876573248025, "epoch": 0.01, "step": 375 }, { "loss": 1.1884, "grad_norm": 1.6034517288208008, "learning_rate": 0.00019880147787690368, "epoch": 0.01, "step": 400 }, { "loss": 1.1034, "grad_norm": 1.0951026678085327, "learning_rate": 0.00019872638250578235, "epoch": 0.01, "step": 425 }, { "loss": 1.1748, "grad_norm": 1.3773316144943237, "learning_rate": 0.00019865128713466104, "epoch": 0.01, "step": 450 }, { "loss": 1.1022, "grad_norm": 0.9198249578475952, "learning_rate": 0.0001985761917635397, "epoch": 0.01, "step": 475 }, { "loss": 1.1509, "grad_norm": 1.4825350046157837, "learning_rate": 0.0001985010963924184, "epoch": 0.01, "step": 500 }, { "loss": 1.1985, "grad_norm": 1.1909708976745605, "learning_rate": 0.00019842600102129706, "epoch": 0.01, "step": 525 }, { "loss": 1.2119, "grad_norm": 1.6249839067459106, "learning_rate": 0.00019835090565017573, "epoch": 0.01, "step": 550 }, { "loss": 1.1233, "grad_norm": 1.279699444770813, "learning_rate": 0.00019827581027905442, "epoch": 0.01, "step": 575 }, { "loss": 1.2015, "grad_norm": 1.2097326517105103, "learning_rate": 0.00019820071490793308, "epoch": 0.01, "step": 600 }, { "loss": 1.0881, "grad_norm": 1.3173500299453735, "learning_rate": 0.00019812561953681178, "epoch": 0.01, "step": 625 }, { "loss": 1.1941, "grad_norm": 1.4401942491531372, "learning_rate": 0.00019805052416569044, "epoch": 0.01, "step": 650 }, { "loss": 1.1371, "grad_norm": 1.4831886291503906, "learning_rate": 0.0001979754287945691, "epoch": 0.01, "step": 675 }, { "loss": 1.1284, "grad_norm": 1.3925830125808716, "learning_rate": 0.00019790033342344777, "epoch": 0.01, "step": 700 }, { "loss": 1.151, "grad_norm": 1.1581556797027588, "learning_rate": 0.00019782523805232647, "epoch": 0.01, "step": 725 }, { "loss": 1.1213, "grad_norm": 0.9816817045211792, "learning_rate": 0.00019775014268120513, "epoch": 0.01, "step": 750 }, { "loss": 1.189, "grad_norm": 1.771814227104187, "learning_rate": 0.00019767504731008382, "epoch": 0.01, "step": 775 }, { "loss": 1.1613, "grad_norm": 1.2354599237442017, "learning_rate": 0.00019759995193896252, "epoch": 0.01, "step": 800 }, { "loss": 1.1801, "grad_norm": 1.1712677478790283, "learning_rate": 0.00019752485656784115, "epoch": 0.01, "step": 825 }, { "loss": 1.1177, "grad_norm": 1.372815489768982, "learning_rate": 0.00019744976119671985, "epoch": 0.01, "step": 850 }, { "loss": 1.2127, "grad_norm": 1.7617968320846558, "learning_rate": 0.0001973746658255985, "epoch": 0.01, "step": 875 }, { "loss": 1.1913, "grad_norm": 1.5143325328826904, "learning_rate": 0.0001972995704544772, "epoch": 0.01, "step": 900 }, { "loss": 1.1401, "grad_norm": 1.9220173358917236, "learning_rate": 0.00019722447508335587, "epoch": 0.01, "step": 925 }, { "loss": 1.209, "grad_norm": 1.1575367450714111, "learning_rate": 0.0001971523835270794, "epoch": 0.01, "step": 950 }, { "loss": 1.1926, "grad_norm": 0.950126051902771, "learning_rate": 0.00019707728815595806, "epoch": 0.01, "step": 975 }, { "loss": 1.1288, "grad_norm": 1.4479137659072876, "learning_rate": 0.00019700219278483676, "epoch": 0.02, "step": 1000 }, { "loss": 1.1002, "grad_norm": 1.2142207622528076, "learning_rate": 0.00019692709741371542, "epoch": 0.02, "step": 1025 }, { "loss": 1.1094, "grad_norm": 1.5564340353012085, "learning_rate": 0.0001968520020425941, "epoch": 0.02, "step": 1050 }, { "loss": 1.1565, "grad_norm": 0.9252703189849854, "learning_rate": 0.00019677690667147278, "epoch": 0.02, "step": 1075 }, { "loss": 1.1714, "grad_norm": 1.2928420305252075, "learning_rate": 0.00019670181130035147, "epoch": 0.02, "step": 1100 }, { "loss": 1.1822, "grad_norm": 6.4705281257629395, "learning_rate": 0.0001966267159292301, "epoch": 0.02, "step": 1125 }, { "loss": 1.1806, "grad_norm": 1.6441676616668701, "learning_rate": 0.0001965516205581088, "epoch": 0.02, "step": 1150 }, { "loss": 1.1523, "grad_norm": 1.981781244277954, "learning_rate": 0.0001964765251869875, "epoch": 0.02, "step": 1175 }, { "loss": 1.1556, "grad_norm": 1.6746410131454468, "learning_rate": 0.00019640142981586616, "epoch": 0.02, "step": 1200 }, { "loss": 1.1145, "grad_norm": 1.4516457319259644, "learning_rate": 0.00019632633444474485, "epoch": 0.02, "step": 1225 }, { "loss": 1.1196, "grad_norm": 1.1103088855743408, "learning_rate": 0.00019625123907362352, "epoch": 0.02, "step": 1250 }, { "loss": 1.1693, "grad_norm": 2.238402843475342, "learning_rate": 0.00019617614370250218, "epoch": 0.02, "step": 1275 }, { "loss": 1.1292, "grad_norm": 1.3010998964309692, "learning_rate": 0.00019610104833138085, "epoch": 0.02, "step": 1300 }, { "loss": 1.1058, "grad_norm": 1.2125681638717651, "learning_rate": 0.00019602595296025954, "epoch": 0.02, "step": 1325 }, { "loss": 1.2092, "grad_norm": 1.263877272605896, "learning_rate": 0.00019595085758913823, "epoch": 0.02, "step": 1350 }, { "loss": 1.1451, "grad_norm": 1.3143881559371948, "learning_rate": 0.0001958757622180169, "epoch": 0.02, "step": 1375 }, { "loss": 1.1226, "grad_norm": 1.081262230873108, "learning_rate": 0.00019580066684689556, "epoch": 0.02, "step": 1400 }, { "loss": 1.1559, "grad_norm": 1.4016741514205933, "learning_rate": 0.00019572557147577423, "epoch": 0.02, "step": 1425 }, { "loss": 1.1568, "grad_norm": 1.435803771018982, "learning_rate": 0.00019565047610465292, "epoch": 0.02, "step": 1450 }, { "loss": 1.1661, "grad_norm": 0.9757218956947327, "learning_rate": 0.00019557538073353159, "epoch": 0.02, "step": 1475 }, { "loss": 1.1674, "grad_norm": 1.1646333932876587, "learning_rate": 0.00019550028536241028, "epoch": 0.02, "step": 1500 }, { "loss": 1.229, "grad_norm": 1.707133412361145, "learning_rate": 0.00019542518999128894, "epoch": 0.02, "step": 1525 }, { "loss": 1.3046, "grad_norm": 1.5240947008132935, "learning_rate": 0.00019535009462016764, "epoch": 0.02, "step": 1550 }, { "loss": 1.3126, "grad_norm": 1.0018959045410156, "learning_rate": 0.0001952749992490463, "epoch": 0.02, "step": 1575 }, { "loss": 1.2822, "grad_norm": 1.017600655555725, "learning_rate": 0.00019519990387792497, "epoch": 0.02, "step": 1600 }, { "loss": 1.339, "grad_norm": 1.2864093780517578, "learning_rate": 0.00019512480850680366, "epoch": 0.02, "step": 1625 }, { "loss": 1.277, "grad_norm": 1.2390028238296509, "learning_rate": 0.00019504971313568232, "epoch": 0.02, "step": 1650 }, { "loss": 1.2122, "grad_norm": 1.8026188611984253, "learning_rate": 0.00019497461776456102, "epoch": 0.03, "step": 1675 }, { "loss": 1.3177, "grad_norm": 1.143813967704773, "learning_rate": 0.00019489952239343968, "epoch": 0.03, "step": 1700 }, { "loss": 1.3457, "grad_norm": 1.5295052528381348, "learning_rate": 0.00019482442702231835, "epoch": 0.03, "step": 1725 }, { "loss": 1.2132, "grad_norm": 1.7378249168395996, "learning_rate": 0.000194749331651197, "epoch": 0.03, "step": 1750 }, { "loss": 1.2284, "grad_norm": 2.3345251083374023, "learning_rate": 0.0001946742362800757, "epoch": 0.03, "step": 1775 }, { "loss": 1.3233, "grad_norm": 2.047725200653076, "learning_rate": 0.0001945991409089544, "epoch": 0.03, "step": 1800 }, { "loss": 1.2398, "grad_norm": 1.7765051126480103, "learning_rate": 0.00019452404553783306, "epoch": 0.03, "step": 1825 }, { "loss": 1.226, "grad_norm": 1.4397103786468506, "learning_rate": 0.00019444895016671173, "epoch": 0.03, "step": 1850 }, { "loss": 1.3669, "grad_norm": 1.741700530052185, "learning_rate": 0.0001943738547955904, "epoch": 0.03, "step": 1875 }, { "loss": 1.2807, "grad_norm": 1.4095584154129028, "learning_rate": 0.00019429875942446909, "epoch": 0.03, "step": 1900 }, { "loss": 1.1971, "grad_norm": 1.1434788703918457, "learning_rate": 0.00019422366405334775, "epoch": 0.03, "step": 1925 }, { "loss": 1.3073, "grad_norm": 1.1965715885162354, "learning_rate": 0.00019414856868222644, "epoch": 0.03, "step": 1950 }, { "loss": 1.1621, "grad_norm": 1.6625946760177612, "learning_rate": 0.0001940734733111051, "epoch": 0.03, "step": 1975 }, { "loss": 1.2672, "grad_norm": 1.454901933670044, "learning_rate": 0.0001939983779399838, "epoch": 0.03, "step": 2000 }, { "loss": 1.2422, "grad_norm": 1.198033094406128, "learning_rate": 0.00019392328256886247, "epoch": 0.03, "step": 2025 }, { "loss": 1.1992, "grad_norm": 1.9674854278564453, "learning_rate": 0.00019384818719774113, "epoch": 0.03, "step": 2050 }, { "loss": 1.2854, "grad_norm": 1.4203040599822998, "learning_rate": 0.00019377309182661982, "epoch": 0.03, "step": 2075 }, { "loss": 1.2896, "grad_norm": 2.059704065322876, "learning_rate": 0.0001936979964554985, "epoch": 0.03, "step": 2100 }, { "loss": 1.2304, "grad_norm": 3.035451889038086, "learning_rate": 0.00019362290108437718, "epoch": 0.03, "step": 2125 }, { "loss": 1.2555, "grad_norm": 0.9351598620414734, "learning_rate": 0.00019354780571325585, "epoch": 0.03, "step": 2150 }, { "loss": 1.2276, "grad_norm": 1.0075334310531616, "learning_rate": 0.0001934727103421345, "epoch": 0.03, "step": 2175 }, { "loss": 1.2042, "grad_norm": 1.5209815502166748, "learning_rate": 0.00019339761497101318, "epoch": 0.03, "step": 2200 }, { "loss": 1.2262, "grad_norm": 0.9942296147346497, "learning_rate": 0.00019332251959989187, "epoch": 0.03, "step": 2225 }, { "loss": 1.2108, "grad_norm": 1.755386233329773, "learning_rate": 0.00019324742422877056, "epoch": 0.03, "step": 2250 }, { "loss": 1.2354, "grad_norm": 1.129966378211975, "learning_rate": 0.00019317232885764923, "epoch": 0.03, "step": 2275 }, { "loss": 1.1907, "grad_norm": 0.871713399887085, "learning_rate": 0.0001930972334865279, "epoch": 0.03, "step": 2300 }, { "loss": 1.2465, "grad_norm": 1.4679888486862183, "learning_rate": 0.00019302213811540656, "epoch": 0.03, "step": 2325 }, { "loss": 1.2956, "grad_norm": 1.5835942029953003, "learning_rate": 0.00019294704274428525, "epoch": 0.04, "step": 2350 }, { "loss": 1.238, "grad_norm": 1.7425931692123413, "learning_rate": 0.00019287194737316392, "epoch": 0.04, "step": 2375 }, { "loss": 1.3505, "grad_norm": 0.9919810891151428, "learning_rate": 0.0001927968520020426, "epoch": 0.04, "step": 2400 }, { "loss": 1.3327, "grad_norm": 1.3356170654296875, "learning_rate": 0.00019272175663092127, "epoch": 0.04, "step": 2425 }, { "loss": 1.2885, "grad_norm": 0.9927514791488647, "learning_rate": 0.00019264666125979997, "epoch": 0.04, "step": 2450 }, { "loss": 1.245, "grad_norm": 1.3974716663360596, "learning_rate": 0.00019257156588867863, "epoch": 0.04, "step": 2475 }, { "loss": 1.2714, "grad_norm": 1.3735284805297852, "learning_rate": 0.0001924964705175573, "epoch": 0.04, "step": 2500 }, { "loss": 1.2481, "grad_norm": 1.096691370010376, "learning_rate": 0.000192421375146436, "epoch": 0.04, "step": 2525 }, { "loss": 1.2765, "grad_norm": 1.1713175773620605, "learning_rate": 0.00019234928359015952, "epoch": 0.04, "step": 2550 }, { "loss": 1.1508, "grad_norm": 1.2733495235443115, "learning_rate": 0.0001922741882190382, "epoch": 0.04, "step": 2575 }, { "loss": 1.2932, "grad_norm": 0.9217672944068909, "learning_rate": 0.00019219909284791685, "epoch": 0.04, "step": 2600 }, { "loss": 1.2598, "grad_norm": 2.117608070373535, "learning_rate": 0.00019212399747679554, "epoch": 0.04, "step": 2625 }, { "loss": 1.3049, "grad_norm": 1.0736849308013916, "learning_rate": 0.0001920489021056742, "epoch": 0.04, "step": 2650 }, { "loss": 1.2122, "grad_norm": 0.9429724812507629, "learning_rate": 0.0001919738067345529, "epoch": 0.04, "step": 2675 }, { "loss": 1.2863, "grad_norm": 1.8061331510543823, "learning_rate": 0.00019189871136343156, "epoch": 0.04, "step": 2700 }, { "loss": 1.2269, "grad_norm": 1.2970006465911865, "learning_rate": 0.00019182361599231026, "epoch": 0.04, "step": 2725 }, { "loss": 1.2015, "grad_norm": 1.2956987619400024, "learning_rate": 0.00019174852062118892, "epoch": 0.04, "step": 2750 }, { "loss": 1.2501, "grad_norm": 2.2317731380462646, "learning_rate": 0.0001916734252500676, "epoch": 0.04, "step": 2775 }, { "loss": 1.256, "grad_norm": 1.3463131189346313, "learning_rate": 0.00019159832987894628, "epoch": 0.04, "step": 2800 }, { "loss": 1.2649, "grad_norm": 1.146892786026001, "learning_rate": 0.00019152323450782495, "epoch": 0.04, "step": 2825 }, { "loss": 1.3137, "grad_norm": 1.524172306060791, "learning_rate": 0.00019144813913670364, "epoch": 0.04, "step": 2850 }, { "loss": 1.2658, "grad_norm": 1.0975860357284546, "learning_rate": 0.0001913730437655823, "epoch": 0.04, "step": 2875 }, { "loss": 1.2279, "grad_norm": 1.350852608680725, "learning_rate": 0.00019129794839446097, "epoch": 0.04, "step": 2900 }, { "loss": 1.2253, "grad_norm": 1.8295092582702637, "learning_rate": 0.00019122285302333963, "epoch": 0.04, "step": 2925 }, { "loss": 1.3226, "grad_norm": 2.276642322540283, "learning_rate": 0.00019114775765221833, "epoch": 0.04, "step": 2950 }, { "loss": 1.2016, "grad_norm": 1.5693854093551636, "learning_rate": 0.000191072662281097, "epoch": 0.04, "step": 2975 }, { "loss": 1.1537, "grad_norm": 1.3827277421951294, "learning_rate": 0.00019099756690997568, "epoch": 0.05, "step": 3000 }, { "loss": 1.3013, "grad_norm": 1.0224173069000244, "learning_rate": 0.00019092247153885438, "epoch": 0.05, "step": 3025 }, { "loss": 1.248, "grad_norm": 1.8332293033599854, "learning_rate": 0.00019084737616773301, "epoch": 0.05, "step": 3050 }, { "loss": 1.2415, "grad_norm": 0.9008692502975464, "learning_rate": 0.0001907722807966117, "epoch": 0.05, "step": 3075 }, { "loss": 1.2225, "grad_norm": 1.5711129903793335, "learning_rate": 0.00019069718542549037, "epoch": 0.05, "step": 3100 }, { "loss": 1.197, "grad_norm": 2.6612651348114014, "learning_rate": 0.00019062209005436906, "epoch": 0.05, "step": 3125 }, { "loss": 1.2788, "grad_norm": 1.3320034742355347, "learning_rate": 0.00019054699468324773, "epoch": 0.05, "step": 3150 }, { "loss": 1.3182, "grad_norm": 1.8735719919204712, "learning_rate": 0.00019047189931212642, "epoch": 0.05, "step": 3175 }, { "loss": 1.2997, "grad_norm": 1.550970435142517, "learning_rate": 0.0001903968039410051, "epoch": 0.05, "step": 3200 }, { "loss": 1.2142, "grad_norm": 1.3348615169525146, "learning_rate": 0.00019032170856988375, "epoch": 0.05, "step": 3225 }, { "loss": 1.1912, "grad_norm": 1.036444902420044, "learning_rate": 0.00019024661319876245, "epoch": 0.05, "step": 3250 }, { "loss": 1.2365, "grad_norm": 1.920847773551941, "learning_rate": 0.0001901715178276411, "epoch": 0.05, "step": 3275 }, { "loss": 1.2733, "grad_norm": 3.4518144130706787, "learning_rate": 0.0001900964224565198, "epoch": 0.05, "step": 3300 }, { "loss": 1.1872, "grad_norm": 2.0837206840515137, "learning_rate": 0.00019002132708539847, "epoch": 0.05, "step": 3325 }, { "loss": 1.2276, "grad_norm": 1.4272059202194214, "learning_rate": 0.00018994623171427713, "epoch": 0.05, "step": 3350 }, { "loss": 1.2622, "grad_norm": 1.0555847883224487, "learning_rate": 0.0001898711363431558, "epoch": 0.05, "step": 3375 }, { "loss": 1.2896, "grad_norm": 0.9901136755943298, "learning_rate": 0.0001897960409720345, "epoch": 0.05, "step": 3400 }, { "loss": 1.2141, "grad_norm": 2.292473793029785, "learning_rate": 0.00018972094560091316, "epoch": 0.05, "step": 3425 }, { "loss": 1.353, "grad_norm": 1.7795960903167725, "learning_rate": 0.00018964585022979185, "epoch": 0.05, "step": 3450 }, { "loss": 1.2276, "grad_norm": 1.4300271272659302, "learning_rate": 0.00018957075485867054, "epoch": 0.05, "step": 3475 }, { "loss": 1.2115, "grad_norm": 1.8741381168365479, "learning_rate": 0.00018949565948754918, "epoch": 0.05, "step": 3500 }, { "loss": 1.2229, "grad_norm": 1.2686586380004883, "learning_rate": 0.00018942056411642787, "epoch": 0.05, "step": 3525 }, { "loss": 1.2484, "grad_norm": 1.245797872543335, "learning_rate": 0.00018934546874530654, "epoch": 0.05, "step": 3550 }, { "loss": 1.2026, "grad_norm": 1.744106650352478, "learning_rate": 0.00018927037337418523, "epoch": 0.05, "step": 3575 }, { "loss": 1.1809, "grad_norm": 1.4250385761260986, "learning_rate": 0.0001891952780030639, "epoch": 0.05, "step": 3600 }, { "loss": 1.2408, "grad_norm": 3.535332202911377, "learning_rate": 0.0001891201826319426, "epoch": 0.05, "step": 3625 }, { "loss": 1.3137, "grad_norm": 1.612424612045288, "learning_rate": 0.00018904508726082125, "epoch": 0.05, "step": 3650 }, { "loss": 1.2419, "grad_norm": 2.110978841781616, "learning_rate": 0.00018896999188969992, "epoch": 0.06, "step": 3675 }, { "loss": 1.2782, "grad_norm": 1.3754994869232178, "learning_rate": 0.0001888948965185786, "epoch": 0.06, "step": 3700 }, { "loss": 1.1582, "grad_norm": 0.849475085735321, "learning_rate": 0.00018881980114745728, "epoch": 0.06, "step": 3725 }, { "loss": 1.1607, "grad_norm": 1.4278253316879272, "learning_rate": 0.00018874470577633597, "epoch": 0.06, "step": 3750 }, { "loss": 1.2154, "grad_norm": 3.2679355144500732, "learning_rate": 0.00018866961040521463, "epoch": 0.06, "step": 3775 }, { "loss": 1.1998, "grad_norm": 0.9529170393943787, "learning_rate": 0.0001885945150340933, "epoch": 0.06, "step": 3800 }, { "loss": 1.2879, "grad_norm": 1.243181586265564, "learning_rate": 0.00018851941966297196, "epoch": 0.06, "step": 3825 }, { "loss": 1.2077, "grad_norm": 1.7659603357315063, "learning_rate": 0.00018844432429185066, "epoch": 0.06, "step": 3850 }, { "loss": 1.2047, "grad_norm": 1.5011489391326904, "learning_rate": 0.00018836922892072935, "epoch": 0.06, "step": 3875 }, { "loss": 1.2347, "grad_norm": 1.761047601699829, "learning_rate": 0.00018829413354960801, "epoch": 0.06, "step": 3900 }, { "loss": 1.1974, "grad_norm": 3.6463253498077393, "learning_rate": 0.0001882190381784867, "epoch": 0.06, "step": 3925 }, { "loss": 1.1995, "grad_norm": 0.955506443977356, "learning_rate": 0.00018814394280736537, "epoch": 0.06, "step": 3950 }, { "loss": 1.2918, "grad_norm": 1.0750863552093506, "learning_rate": 0.00018806884743624404, "epoch": 0.06, "step": 3975 }, { "loss": 1.2288, "grad_norm": 1.1837942600250244, "learning_rate": 0.0001879937520651227, "epoch": 0.06, "step": 4000 }, { "loss": 1.2013, "grad_norm": 1.5817101001739502, "learning_rate": 0.0001879186566940014, "epoch": 0.06, "step": 4025 }, { "loss": 1.1851, "grad_norm": 1.1778972148895264, "learning_rate": 0.00018784356132288006, "epoch": 0.06, "step": 4050 }, { "loss": 1.2426, "grad_norm": 1.3166766166687012, "learning_rate": 0.00018776846595175875, "epoch": 0.06, "step": 4075 }, { "loss": 1.2837, "grad_norm": 2.850275993347168, "learning_rate": 0.00018769337058063742, "epoch": 0.06, "step": 4100 }, { "loss": 1.2267, "grad_norm": 1.6599894762039185, "learning_rate": 0.00018761827520951608, "epoch": 0.06, "step": 4125 }, { "loss": 1.2885, "grad_norm": 1.5684510469436646, "learning_rate": 0.00018754317983839478, "epoch": 0.06, "step": 4150 }, { "loss": 1.2471, "grad_norm": 2.0227372646331787, "learning_rate": 0.00018746808446727344, "epoch": 0.06, "step": 4175 }, { "loss": 1.2944, "grad_norm": 1.747456669807434, "learning_rate": 0.00018739298909615213, "epoch": 0.06, "step": 4200 }, { "loss": 1.2535, "grad_norm": 1.6037201881408691, "learning_rate": 0.0001873178937250308, "epoch": 0.06, "step": 4225 }, { "loss": 1.2354, "grad_norm": 0.9807868003845215, "learning_rate": 0.00018724279835390946, "epoch": 0.06, "step": 4250 }, { "loss": 1.1942, "grad_norm": 0.8951900601387024, "learning_rate": 0.00018716770298278813, "epoch": 0.06, "step": 4275 }, { "loss": 1.2506, "grad_norm": 1.4032387733459473, "learning_rate": 0.00018709260761166682, "epoch": 0.06, "step": 4300 }, { "loss": 1.2388, "grad_norm": 1.407606840133667, "learning_rate": 0.00018701751224054551, "epoch": 0.06, "step": 4325 }, { "loss": 1.2856, "grad_norm": 1.153029441833496, "learning_rate": 0.00018694241686942418, "epoch": 0.07, "step": 4350 }, { "loss": 1.2268, "grad_norm": 1.075202226638794, "learning_rate": 0.00018686732149830287, "epoch": 0.07, "step": 4375 }, { "loss": 1.257, "grad_norm": 1.2537505626678467, "learning_rate": 0.00018679222612718154, "epoch": 0.07, "step": 4400 }, { "loss": 1.2183, "grad_norm": 0.9912234544754028, "learning_rate": 0.0001867171307560602, "epoch": 0.07, "step": 4425 }, { "loss": 1.1376, "grad_norm": 1.3906209468841553, "learning_rate": 0.00018664203538493887, "epoch": 0.07, "step": 4450 }, { "loss": 1.2794, "grad_norm": 1.3347073793411255, "learning_rate": 0.00018656694001381756, "epoch": 0.07, "step": 4475 }, { "loss": 1.2486, "grad_norm": 1.259150505065918, "learning_rate": 0.00018649184464269623, "epoch": 0.07, "step": 4500 }, { "loss": 1.2408, "grad_norm": 1.7800498008728027, "learning_rate": 0.00018641674927157492, "epoch": 0.07, "step": 4525 }, { "loss": 1.282, "grad_norm": 0.9904906153678894, "learning_rate": 0.00018634165390045358, "epoch": 0.07, "step": 4550 }, { "loss": 1.2944, "grad_norm": 1.2220566272735596, "learning_rate": 0.00018626655852933225, "epoch": 0.07, "step": 4575 }, { "loss": 1.266, "grad_norm": 1.4289559125900269, "learning_rate": 0.00018619146315821094, "epoch": 0.07, "step": 4600 }, { "loss": 1.2164, "grad_norm": 1.5805399417877197, "learning_rate": 0.0001861163677870896, "epoch": 0.07, "step": 4625 }, { "loss": 1.2176, "grad_norm": 1.2486138343811035, "learning_rate": 0.0001860412724159683, "epoch": 0.07, "step": 4650 }, { "loss": 1.2165, "grad_norm": 1.4444175958633423, "learning_rate": 0.00018596617704484696, "epoch": 0.07, "step": 4675 }, { "loss": 1.2247, "grad_norm": 1.6640115976333618, "learning_rate": 0.00018589108167372563, "epoch": 0.07, "step": 4700 }, { "loss": 1.2123, "grad_norm": 1.1432693004608154, "learning_rate": 0.0001858159863026043, "epoch": 0.07, "step": 4725 }, { "loss": 1.2347, "grad_norm": 0.9574340581893921, "learning_rate": 0.000185740890931483, "epoch": 0.07, "step": 4750 }, { "loss": 1.2177, "grad_norm": 1.5829005241394043, "learning_rate": 0.00018566579556036168, "epoch": 0.07, "step": 4775 }, { "loss": 1.2693, "grad_norm": 1.0968513488769531, "learning_rate": 0.00018559070018924035, "epoch": 0.07, "step": 4800 }, { "loss": 1.2203, "grad_norm": 1.2009191513061523, "learning_rate": 0.00018551560481811904, "epoch": 0.07, "step": 4825 }, { "loss": 1.2341, "grad_norm": 1.4881080389022827, "learning_rate": 0.0001854405094469977, "epoch": 0.07, "step": 4850 }, { "loss": 1.1719, "grad_norm": 1.083778977394104, "learning_rate": 0.00018536541407587637, "epoch": 0.07, "step": 4875 }, { "loss": 1.2091, "grad_norm": 1.382657766342163, "learning_rate": 0.00018529031870475503, "epoch": 0.07, "step": 4900 }, { "loss": 1.2022, "grad_norm": 1.0024495124816895, "learning_rate": 0.00018521522333363373, "epoch": 0.07, "step": 4925 }, { "loss": 1.2967, "grad_norm": 1.2385984659194946, "learning_rate": 0.0001851401279625124, "epoch": 0.07, "step": 4950 }, { "loss": 1.2568, "grad_norm": 2.7820701599121094, "learning_rate": 0.00018506503259139108, "epoch": 0.07, "step": 4975 }, { "loss": 1.2483, "grad_norm": 1.5247501134872437, "learning_rate": 0.00018498993722026975, "epoch": 0.08, "step": 5000 }, { "loss": 1.2827, "grad_norm": 1.855117678642273, "learning_rate": 0.00018491484184914841, "epoch": 0.08, "step": 5025 }, { "loss": 1.2518, "grad_norm": 1.2518528699874878, "learning_rate": 0.0001848397464780271, "epoch": 0.08, "step": 5050 }, { "loss": 1.2962, "grad_norm": 1.2159770727157593, "learning_rate": 0.00018476465110690577, "epoch": 0.08, "step": 5075 }, { "loss": 1.2611, "grad_norm": 1.3085296154022217, "learning_rate": 0.00018468955573578446, "epoch": 0.08, "step": 5100 }, { "loss": 1.2261, "grad_norm": 2.2151153087615967, "learning_rate": 0.00018461446036466313, "epoch": 0.08, "step": 5125 }, { "loss": 1.3178, "grad_norm": 2.004448175430298, "learning_rate": 0.0001845393649935418, "epoch": 0.08, "step": 5150 }, { "loss": 1.2419, "grad_norm": 3.063715934753418, "learning_rate": 0.0001844642696224205, "epoch": 0.08, "step": 5175 }, { "loss": 1.2084, "grad_norm": 1.432442545890808, "learning_rate": 0.00018438917425129915, "epoch": 0.08, "step": 5200 }, { "loss": 1.2254, "grad_norm": 2.0840189456939697, "learning_rate": 0.00018431407888017785, "epoch": 0.08, "step": 5225 }, { "loss": 1.1547, "grad_norm": 1.479894995689392, "learning_rate": 0.0001842389835090565, "epoch": 0.08, "step": 5250 }, { "loss": 1.2749, "grad_norm": 1.097493290901184, "learning_rate": 0.0001841638881379352, "epoch": 0.08, "step": 5275 }, { "loss": 1.2417, "grad_norm": 4.6398539543151855, "learning_rate": 0.00018408879276681387, "epoch": 0.08, "step": 5300 }, { "loss": 1.2506, "grad_norm": 0.9642776250839233, "learning_rate": 0.00018401369739569253, "epoch": 0.08, "step": 5325 }, { "loss": 1.2349, "grad_norm": 1.2694449424743652, "learning_rate": 0.0001839386020245712, "epoch": 0.08, "step": 5350 }, { "loss": 1.2158, "grad_norm": 1.2243396043777466, "learning_rate": 0.0001838635066534499, "epoch": 0.08, "step": 5375 }, { "loss": 1.2665, "grad_norm": 1.859632134437561, "learning_rate": 0.00018378841128232858, "epoch": 0.08, "step": 5400 }, { "loss": 1.2325, "grad_norm": 1.0260474681854248, "learning_rate": 0.00018371331591120725, "epoch": 0.08, "step": 5425 }, { "loss": 1.2785, "grad_norm": 1.646101713180542, "learning_rate": 0.00018363822054008591, "epoch": 0.08, "step": 5450 }, { "loss": 1.1545, "grad_norm": 0.9569182395935059, "learning_rate": 0.00018356312516896458, "epoch": 0.08, "step": 5475 }, { "loss": 1.2172, "grad_norm": 1.4357048273086548, "learning_rate": 0.00018348802979784327, "epoch": 0.08, "step": 5500 }, { "loss": 1.2557, "grad_norm": 1.1532384157180786, "learning_rate": 0.00018341293442672194, "epoch": 0.08, "step": 5525 }, { "loss": 1.2258, "grad_norm": 1.1566283702850342, "learning_rate": 0.00018333783905560063, "epoch": 0.08, "step": 5550 }, { "loss": 1.1953, "grad_norm": 1.5711147785186768, "learning_rate": 0.0001832627436844793, "epoch": 0.08, "step": 5575 }, { "loss": 1.1736, "grad_norm": 1.2852180004119873, "learning_rate": 0.00018318764831335796, "epoch": 0.08, "step": 5600 }, { "loss": 1.2572, "grad_norm": 1.2723329067230225, "learning_rate": 0.00018311255294223665, "epoch": 0.08, "step": 5625 }, { "loss": 1.241, "grad_norm": 1.6078953742980957, "learning_rate": 0.00018303745757111532, "epoch": 0.08, "step": 5650 }, { "loss": 1.2206, "grad_norm": 1.821363925933838, "learning_rate": 0.000182962362199994, "epoch": 0.09, "step": 5675 }, { "loss": 1.2775, "grad_norm": 1.3025563955307007, "learning_rate": 0.00018288726682887268, "epoch": 0.09, "step": 5700 }, { "loss": 1.2296, "grad_norm": 1.7497808933258057, "learning_rate": 0.00018281217145775137, "epoch": 0.09, "step": 5725 }, { "loss": 1.306, "grad_norm": 1.5627915859222412, "learning_rate": 0.00018273707608663003, "epoch": 0.09, "step": 5750 }, { "loss": 1.2339, "grad_norm": 1.217433214187622, "learning_rate": 0.0001826619807155087, "epoch": 0.09, "step": 5775 }, { "loss": 1.2396, "grad_norm": 2.4516422748565674, "learning_rate": 0.00018258688534438736, "epoch": 0.09, "step": 5800 }, { "loss": 1.2847, "grad_norm": 1.0460309982299805, "learning_rate": 0.00018251178997326606, "epoch": 0.09, "step": 5825 }, { "loss": 1.2075, "grad_norm": 1.3472404479980469, "learning_rate": 0.00018243669460214475, "epoch": 0.09, "step": 5850 }, { "loss": 1.1984, "grad_norm": 1.1247375011444092, "learning_rate": 0.00018236159923102341, "epoch": 0.09, "step": 5875 }, { "loss": 1.1911, "grad_norm": 1.175132393836975, "learning_rate": 0.00018228650385990208, "epoch": 0.09, "step": 5900 }, { "loss": 1.2405, "grad_norm": 1.176147699356079, "learning_rate": 0.00018221140848878075, "epoch": 0.09, "step": 5925 }, { "loss": 1.1924, "grad_norm": 1.0580313205718994, "learning_rate": 0.00018213631311765944, "epoch": 0.09, "step": 5950 }, { "loss": 1.2185, "grad_norm": 1.1505495309829712, "learning_rate": 0.0001820612177465381, "epoch": 0.09, "step": 5975 }, { "loss": 1.2511, "grad_norm": 1.1643320322036743, "learning_rate": 0.0001819861223754168, "epoch": 0.09, "step": 6000 }, { "loss": 1.2177, "grad_norm": 1.3354824781417847, "learning_rate": 0.00018191102700429546, "epoch": 0.09, "step": 6025 }, { "loss": 1.2254, "grad_norm": 1.547897458076477, "learning_rate": 0.00018183593163317413, "epoch": 0.09, "step": 6050 }, { "loss": 1.2464, "grad_norm": 1.5310362577438354, "learning_rate": 0.00018176083626205282, "epoch": 0.09, "step": 6075 }, { "loss": 1.2767, "grad_norm": 1.947996735572815, "learning_rate": 0.00018168574089093148, "epoch": 0.09, "step": 6100 }, { "loss": 1.1888, "grad_norm": 1.7055374383926392, "learning_rate": 0.00018161064551981018, "epoch": 0.09, "step": 6125 }, { "loss": 1.1704, "grad_norm": 1.5656442642211914, "learning_rate": 0.00018153555014868884, "epoch": 0.09, "step": 6150 }, { "loss": 1.2117, "grad_norm": 2.2562601566314697, "learning_rate": 0.00018146045477756753, "epoch": 0.09, "step": 6175 }, { "loss": 1.2164, "grad_norm": 1.9555296897888184, "learning_rate": 0.0001813853594064462, "epoch": 0.09, "step": 6200 }, { "loss": 1.181, "grad_norm": 1.350396990776062, "learning_rate": 0.00018131026403532486, "epoch": 0.09, "step": 6225 }, { "loss": 1.1364, "grad_norm": 1.306662678718567, "learning_rate": 0.00018123516866420353, "epoch": 0.09, "step": 6250 }, { "loss": 1.3395, "grad_norm": 2.2892208099365234, "learning_rate": 0.00018116007329308222, "epoch": 0.09, "step": 6275 }, { "loss": 1.2155, "grad_norm": 1.4777361154556274, "learning_rate": 0.00018108497792196091, "epoch": 0.09, "step": 6300 }, { "loss": 1.2512, "grad_norm": 1.4510390758514404, "learning_rate": 0.00018100988255083958, "epoch": 0.09, "step": 6325 }, { "loss": 1.3135, "grad_norm": 1.2510138750076294, "learning_rate": 0.00018093478717971825, "epoch": 0.1, "step": 6350 }, { "loss": 1.2027, "grad_norm": 1.6383109092712402, "learning_rate": 0.0001808596918085969, "epoch": 0.1, "step": 6375 }, { "loss": 1.1677, "grad_norm": 1.6669763326644897, "learning_rate": 0.0001807845964374756, "epoch": 0.1, "step": 6400 }, { "loss": 1.2344, "grad_norm": 1.2947137355804443, "learning_rate": 0.00018070950106635427, "epoch": 0.1, "step": 6425 }, { "loss": 1.2195, "grad_norm": 1.673285961151123, "learning_rate": 0.00018063440569523296, "epoch": 0.1, "step": 6450 }, { "loss": 1.2719, "grad_norm": 2.102374792098999, "learning_rate": 0.00018055931032411165, "epoch": 0.1, "step": 6475 }, { "loss": 1.2524, "grad_norm": 1.37187659740448, "learning_rate": 0.0001804842149529903, "epoch": 0.1, "step": 6500 }, { "loss": 1.2169, "grad_norm": 2.1124603748321533, "learning_rate": 0.00018040911958186898, "epoch": 0.1, "step": 6525 }, { "loss": 1.2206, "grad_norm": 1.2506129741668701, "learning_rate": 0.00018033402421074765, "epoch": 0.1, "step": 6550 }, { "loss": 1.1774, "grad_norm": 1.5893070697784424, "learning_rate": 0.00018025892883962634, "epoch": 0.1, "step": 6575 }, { "loss": 1.2101, "grad_norm": 2.9019079208374023, "learning_rate": 0.000180183833468505, "epoch": 0.1, "step": 6600 }, { "loss": 1.205, "grad_norm": 1.71237313747406, "learning_rate": 0.0001801087380973837, "epoch": 0.1, "step": 6625 }, { "loss": 1.2044, "grad_norm": 1.9124983549118042, "learning_rate": 0.00018003364272626236, "epoch": 0.1, "step": 6650 }, { "loss": 1.2125, "grad_norm": 1.4448764324188232, "learning_rate": 0.00017995854735514103, "epoch": 0.1, "step": 6675 }, { "loss": 1.2076, "grad_norm": 3.0220255851745605, "learning_rate": 0.00017988345198401972, "epoch": 0.1, "step": 6700 }, { "loss": 1.1899, "grad_norm": 1.3249489068984985, "learning_rate": 0.0001798083566128984, "epoch": 0.1, "step": 6725 }, { "loss": 1.2352, "grad_norm": 1.4463756084442139, "learning_rate": 0.00017973326124177708, "epoch": 0.1, "step": 6750 }, { "loss": 1.3259, "grad_norm": 1.4026572704315186, "learning_rate": 0.00017965816587065575, "epoch": 0.1, "step": 6775 }, { "loss": 1.2282, "grad_norm": 0.9847255349159241, "learning_rate": 0.0001795830704995344, "epoch": 0.1, "step": 6800 }, { "loss": 1.1899, "grad_norm": 0.91238933801651, "learning_rate": 0.00017950797512841308, "epoch": 0.1, "step": 6825 }, { "loss": 1.2386, "grad_norm": 2.1862552165985107, "learning_rate": 0.00017943287975729177, "epoch": 0.1, "step": 6850 }, { "loss": 1.2094, "grad_norm": 1.10003662109375, "learning_rate": 0.00017935778438617043, "epoch": 0.1, "step": 6875 }, { "loss": 1.2218, "grad_norm": 1.8453437089920044, "learning_rate": 0.00017928268901504913, "epoch": 0.1, "step": 6900 }, { "loss": 1.262, "grad_norm": 1.0639673471450806, "learning_rate": 0.00017920759364392782, "epoch": 0.1, "step": 6925 }, { "loss": 1.2188, "grad_norm": 1.3008592128753662, "learning_rate": 0.00017913249827280646, "epoch": 0.1, "step": 6950 }, { "loss": 1.23, "grad_norm": 1.7703325748443604, "learning_rate": 0.00017905740290168515, "epoch": 0.1, "step": 6975 }, { "loss": 1.2316, "grad_norm": 1.1259385347366333, "learning_rate": 0.00017898230753056381, "epoch": 0.11, "step": 7000 }, { "loss": 1.2378, "grad_norm": 2.1661126613616943, "learning_rate": 0.0001789072121594425, "epoch": 0.11, "step": 7025 }, { "loss": 1.1915, "grad_norm": 2.1428678035736084, "learning_rate": 0.00017883211678832117, "epoch": 0.11, "step": 7050 }, { "loss": 1.2344, "grad_norm": 1.4568270444869995, "learning_rate": 0.00017875702141719986, "epoch": 0.11, "step": 7075 }, { "loss": 1.2825, "grad_norm": 1.5431725978851318, "learning_rate": 0.00017868192604607853, "epoch": 0.11, "step": 7100 }, { "loss": 1.2178, "grad_norm": 1.2386250495910645, "learning_rate": 0.0001786068306749572, "epoch": 0.11, "step": 7125 }, { "loss": 1.2189, "grad_norm": 1.1443992853164673, "learning_rate": 0.0001785317353038359, "epoch": 0.11, "step": 7150 }, { "loss": 1.2145, "grad_norm": 1.0868651866912842, "learning_rate": 0.00017845663993271455, "epoch": 0.11, "step": 7175 }, { "loss": 1.2001, "grad_norm": 1.2621536254882812, "learning_rate": 0.00017838154456159325, "epoch": 0.11, "step": 7200 }, { "loss": 1.2605, "grad_norm": 1.3004405498504639, "learning_rate": 0.0001783064491904719, "epoch": 0.11, "step": 7225 }, { "loss": 1.1685, "grad_norm": 1.7868775129318237, "learning_rate": 0.00017823135381935058, "epoch": 0.11, "step": 7250 }, { "loss": 1.1724, "grad_norm": 1.525883674621582, "learning_rate": 0.00017815625844822924, "epoch": 0.11, "step": 7275 }, { "loss": 1.1917, "grad_norm": 1.7897926568984985, "learning_rate": 0.00017808116307710793, "epoch": 0.11, "step": 7300 }, { "loss": 1.17, "grad_norm": 1.770201325416565, "learning_rate": 0.0001780060677059866, "epoch": 0.11, "step": 7325 }, { "loss": 1.1758, "grad_norm": 1.23914635181427, "learning_rate": 0.0001779309723348653, "epoch": 0.11, "step": 7350 }, { "loss": 1.266, "grad_norm": 1.5685780048370361, "learning_rate": 0.00017785587696374398, "epoch": 0.11, "step": 7375 }, { "loss": 1.1515, "grad_norm": 1.4432404041290283, "learning_rate": 0.00017778078159262265, "epoch": 0.11, "step": 7400 }, { "loss": 1.2221, "grad_norm": 1.4710851907730103, "learning_rate": 0.00017770568622150131, "epoch": 0.11, "step": 7425 }, { "loss": 1.2636, "grad_norm": 1.5943934917449951, "learning_rate": 0.00017763059085037998, "epoch": 0.11, "step": 7450 }, { "loss": 1.1915, "grad_norm": 1.3364222049713135, "learning_rate": 0.00017755549547925867, "epoch": 0.11, "step": 7475 }, { "loss": 1.2061, "grad_norm": 1.3201831579208374, "learning_rate": 0.00017748040010813734, "epoch": 0.11, "step": 7500 }, { "loss": 1.1852, "grad_norm": 1.3895928859710693, "learning_rate": 0.00017740530473701603, "epoch": 0.11, "step": 7525 }, { "loss": 1.2233, "grad_norm": 1.0795204639434814, "learning_rate": 0.0001773302093658947, "epoch": 0.11, "step": 7550 }, { "loss": 1.2034, "grad_norm": 1.7997777462005615, "learning_rate": 0.00017725511399477336, "epoch": 0.11, "step": 7575 }, { "loss": 1.1786, "grad_norm": 1.156964898109436, "learning_rate": 0.00017718001862365205, "epoch": 0.11, "step": 7600 }, { "loss": 1.1597, "grad_norm": 1.6956669092178345, "learning_rate": 0.00017710492325253072, "epoch": 0.11, "step": 7625 }, { "loss": 1.272, "grad_norm": 1.3330657482147217, "learning_rate": 0.0001770298278814094, "epoch": 0.11, "step": 7650 }, { "loss": 1.2597, "grad_norm": 0.8610468506813049, "learning_rate": 0.00017695473251028808, "epoch": 0.12, "step": 7675 }, { "loss": 1.2418, "grad_norm": 1.9568647146224976, "learning_rate": 0.00017687963713916674, "epoch": 0.12, "step": 7700 }, { "loss": 1.0969, "grad_norm": 1.2442560195922852, "learning_rate": 0.0001768045417680454, "epoch": 0.12, "step": 7725 }, { "loss": 1.1941, "grad_norm": 1.949724793434143, "learning_rate": 0.0001767294463969241, "epoch": 0.12, "step": 7750 }, { "loss": 1.2424, "grad_norm": 1.4135985374450684, "learning_rate": 0.0001766543510258028, "epoch": 0.12, "step": 7775 }, { "loss": 1.1894, "grad_norm": 1.8493655920028687, "learning_rate": 0.00017657925565468146, "epoch": 0.12, "step": 7800 }, { "loss": 1.2078, "grad_norm": 1.921787977218628, "learning_rate": 0.00017650416028356015, "epoch": 0.12, "step": 7825 }, { "loss": 1.2593, "grad_norm": 1.7355767488479614, "learning_rate": 0.00017642906491243881, "epoch": 0.12, "step": 7850 }, { "loss": 1.2583, "grad_norm": 2.5150203704833984, "learning_rate": 0.00017635396954131748, "epoch": 0.12, "step": 7875 }, { "loss": 1.1617, "grad_norm": 1.4067972898483276, "learning_rate": 0.00017627887417019615, "epoch": 0.12, "step": 7900 }, { "loss": 1.2691, "grad_norm": 1.6826621294021606, "learning_rate": 0.00017620377879907484, "epoch": 0.12, "step": 7925 }, { "loss": 1.1767, "grad_norm": 1.0282503366470337, "learning_rate": 0.0001761286834279535, "epoch": 0.12, "step": 7950 }, { "loss": 1.1887, "grad_norm": 2.0548410415649414, "learning_rate": 0.0001760535880568322, "epoch": 0.12, "step": 7975 }, { "loss": 1.2639, "grad_norm": 1.1392240524291992, "learning_rate": 0.00017597849268571086, "epoch": 0.12, "step": 8000 }, { "loss": 1.1991, "grad_norm": 1.8351316452026367, "learning_rate": 0.00017590339731458953, "epoch": 0.12, "step": 8025 }, { "loss": 1.265, "grad_norm": 1.801256775856018, "learning_rate": 0.00017582830194346822, "epoch": 0.12, "step": 8050 }, { "loss": 1.2895, "grad_norm": 1.379420280456543, "learning_rate": 0.00017575320657234688, "epoch": 0.12, "step": 8075 }, { "loss": 1.1896, "grad_norm": 1.7537370920181274, "learning_rate": 0.00017567811120122558, "epoch": 0.12, "step": 8100 }, { "loss": 1.2039, "grad_norm": 2.159595489501953, "learning_rate": 0.00017560301583010424, "epoch": 0.12, "step": 8125 }, { "loss": 1.2145, "grad_norm": 1.690534234046936, "learning_rate": 0.0001755279204589829, "epoch": 0.12, "step": 8150 }, { "loss": 1.1935, "grad_norm": 1.0568920373916626, "learning_rate": 0.00017545282508786157, "epoch": 0.12, "step": 8175 }, { "loss": 1.2304, "grad_norm": 1.6981247663497925, "learning_rate": 0.00017537772971674026, "epoch": 0.12, "step": 8200 }, { "loss": 1.1961, "grad_norm": 2.1610305309295654, "learning_rate": 0.00017530263434561896, "epoch": 0.12, "step": 8225 }, { "loss": 1.2387, "grad_norm": 1.8722481727600098, "learning_rate": 0.00017522753897449762, "epoch": 0.12, "step": 8250 }, { "loss": 1.2477, "grad_norm": 2.1257529258728027, "learning_rate": 0.00017515244360337631, "epoch": 0.12, "step": 8275 }, { "loss": 1.173, "grad_norm": 2.786665439605713, "learning_rate": 0.00017507734823225498, "epoch": 0.12, "step": 8300 }, { "loss": 1.3121, "grad_norm": 1.4147156476974487, "learning_rate": 0.00017500225286113365, "epoch": 0.13, "step": 8325 }, { "loss": 1.2495, "grad_norm": 1.6025553941726685, "learning_rate": 0.0001749271574900123, "epoch": 0.13, "step": 8350 }, { "loss": 1.2802, "grad_norm": 1.6167206764221191, "learning_rate": 0.000174852062118891, "epoch": 0.13, "step": 8375 }, { "loss": 1.2587, "grad_norm": 1.346677303314209, "learning_rate": 0.00017477696674776967, "epoch": 0.13, "step": 8400 }, { "loss": 1.1743, "grad_norm": 1.8484021425247192, "learning_rate": 0.00017470187137664836, "epoch": 0.13, "step": 8425 }, { "loss": 1.2875, "grad_norm": 2.0601062774658203, "learning_rate": 0.00017462677600552703, "epoch": 0.13, "step": 8450 }, { "loss": 1.1846, "grad_norm": 1.455112338066101, "learning_rate": 0.0001745516806344057, "epoch": 0.13, "step": 8475 }, { "loss": 1.2606, "grad_norm": 1.336016058921814, "learning_rate": 0.00017447658526328438, "epoch": 0.13, "step": 8500 }, { "loss": 1.2648, "grad_norm": 0.9691543579101562, "learning_rate": 0.00017440148989216305, "epoch": 0.13, "step": 8525 }, { "loss": 1.2686, "grad_norm": 1.4051158428192139, "learning_rate": 0.00017432639452104174, "epoch": 0.13, "step": 8550 }, { "loss": 1.2357, "grad_norm": 1.1069400310516357, "learning_rate": 0.0001742512991499204, "epoch": 0.13, "step": 8575 }, { "loss": 1.243, "grad_norm": 1.2926398515701294, "learning_rate": 0.00017417920759364394, "epoch": 0.13, "step": 8600 }, { "loss": 1.2178, "grad_norm": 2.581450939178467, "learning_rate": 0.0001741041122225226, "epoch": 0.13, "step": 8625 }, { "loss": 1.2037, "grad_norm": 3.525554656982422, "learning_rate": 0.0001740290168514013, "epoch": 0.13, "step": 8650 }, { "loss": 1.2447, "grad_norm": 1.7870151996612549, "learning_rate": 0.00017395392148027996, "epoch": 0.13, "step": 8675 }, { "loss": 1.2299, "grad_norm": 1.8541524410247803, "learning_rate": 0.00017387882610915865, "epoch": 0.13, "step": 8700 }, { "loss": 1.2425, "grad_norm": 1.767638921737671, "learning_rate": 0.00017380373073803732, "epoch": 0.13, "step": 8725 }, { "loss": 1.2167, "grad_norm": 1.1607838869094849, "learning_rate": 0.00017372863536691598, "epoch": 0.13, "step": 8750 }, { "loss": 1.2148, "grad_norm": 2.044637441635132, "learning_rate": 0.00017365353999579467, "epoch": 0.13, "step": 8775 }, { "loss": 1.1762, "grad_norm": 1.519467830657959, "learning_rate": 0.00017357844462467334, "epoch": 0.13, "step": 8800 }, { "loss": 1.2796, "grad_norm": 1.461225986480713, "learning_rate": 0.00017350334925355203, "epoch": 0.13, "step": 8825 }, { "loss": 1.2516, "grad_norm": 1.52583646774292, "learning_rate": 0.0001734282538824307, "epoch": 0.13, "step": 8850 }, { "loss": 1.1365, "grad_norm": 1.617851734161377, "learning_rate": 0.0001733531585113094, "epoch": 0.13, "step": 8875 }, { "loss": 1.1494, "grad_norm": 1.112012267112732, "learning_rate": 0.00017327806314018803, "epoch": 0.13, "step": 8900 }, { "loss": 1.199, "grad_norm": 1.9818586111068726, "learning_rate": 0.00017320296776906672, "epoch": 0.13, "step": 8925 }, { "loss": 1.2784, "grad_norm": 1.3736644983291626, "learning_rate": 0.00017312787239794539, "epoch": 0.13, "step": 8950 }, { "loss": 1.2714, "grad_norm": 1.3875724077224731, "learning_rate": 0.00017305277702682408, "epoch": 0.13, "step": 8975 }, { "loss": 1.2155, "grad_norm": 1.4930505752563477, "learning_rate": 0.00017297768165570277, "epoch": 0.14, "step": 9000 }, { "loss": 1.237, "grad_norm": 1.9728326797485352, "learning_rate": 0.00017290258628458144, "epoch": 0.14, "step": 9025 }, { "loss": 1.1828, "grad_norm": 1.2152589559555054, "learning_rate": 0.0001728274909134601, "epoch": 0.14, "step": 9050 }, { "loss": 1.2602, "grad_norm": 2.241239547729492, "learning_rate": 0.00017275239554233877, "epoch": 0.14, "step": 9075 }, { "loss": 1.2174, "grad_norm": 2.192455768585205, "learning_rate": 0.00017267730017121746, "epoch": 0.14, "step": 9100 }, { "loss": 1.2197, "grad_norm": 1.2781050205230713, "learning_rate": 0.00017260220480009612, "epoch": 0.14, "step": 9125 }, { "loss": 1.1877, "grad_norm": 1.4289566278457642, "learning_rate": 0.00017252710942897482, "epoch": 0.14, "step": 9150 }, { "loss": 1.2482, "grad_norm": 1.4809447526931763, "learning_rate": 0.00017245201405785348, "epoch": 0.14, "step": 9175 }, { "loss": 1.2194, "grad_norm": 1.5703109502792358, "learning_rate": 0.00017237691868673215, "epoch": 0.14, "step": 9200 }, { "loss": 1.2036, "grad_norm": 1.5251587629318237, "learning_rate": 0.00017230182331561084, "epoch": 0.14, "step": 9225 }, { "loss": 1.2469, "grad_norm": 0.9070261120796204, "learning_rate": 0.0001722267279444895, "epoch": 0.14, "step": 9250 }, { "loss": 1.2316, "grad_norm": 2.398056745529175, "learning_rate": 0.0001721516325733682, "epoch": 0.14, "step": 9275 }, { "loss": 1.2225, "grad_norm": 1.3680628538131714, "learning_rate": 0.00017207653720224686, "epoch": 0.14, "step": 9300 }, { "loss": 1.2154, "grad_norm": 1.577989935874939, "learning_rate": 0.00017200144183112556, "epoch": 0.14, "step": 9325 }, { "loss": 1.1482, "grad_norm": 1.868891716003418, "learning_rate": 0.0001719263464600042, "epoch": 0.14, "step": 9350 }, { "loss": 1.2566, "grad_norm": 2.225888729095459, "learning_rate": 0.00017185125108888289, "epoch": 0.14, "step": 9375 }, { "loss": 1.3212, "grad_norm": 1.519579529762268, "learning_rate": 0.00017177615571776155, "epoch": 0.14, "step": 9400 }, { "loss": 1.2287, "grad_norm": 1.0716164112091064, "learning_rate": 0.00017170106034664024, "epoch": 0.14, "step": 9425 }, { "loss": 1.1992, "grad_norm": 1.8280526399612427, "learning_rate": 0.00017162596497551894, "epoch": 0.14, "step": 9450 }, { "loss": 1.1633, "grad_norm": 1.8167325258255005, "learning_rate": 0.0001715508696043976, "epoch": 0.14, "step": 9475 }, { "loss": 1.2104, "grad_norm": 1.3616135120391846, "learning_rate": 0.00017147577423327627, "epoch": 0.14, "step": 9500 }, { "loss": 1.1943, "grad_norm": 2.055335283279419, "learning_rate": 0.00017140067886215493, "epoch": 0.14, "step": 9525 }, { "loss": 1.1744, "grad_norm": 1.173204779624939, "learning_rate": 0.00017132558349103362, "epoch": 0.14, "step": 9550 }, { "loss": 1.204, "grad_norm": 1.9650391340255737, "learning_rate": 0.0001712504881199123, "epoch": 0.14, "step": 9575 }, { "loss": 1.1757, "grad_norm": 1.167233943939209, "learning_rate": 0.00017117539274879098, "epoch": 0.14, "step": 9600 }, { "loss": 1.1676, "grad_norm": 1.160571575164795, "learning_rate": 0.00017110029737766965, "epoch": 0.14, "step": 9625 }, { "loss": 1.2729, "grad_norm": 1.3096935749053955, "learning_rate": 0.0001710252020065483, "epoch": 0.14, "step": 9650 }, { "loss": 1.1832, "grad_norm": 1.2549477815628052, "learning_rate": 0.000170950106635427, "epoch": 0.15, "step": 9675 }, { "loss": 1.1984, "grad_norm": 1.156880497932434, "learning_rate": 0.00017087501126430567, "epoch": 0.15, "step": 9700 }, { "loss": 1.2719, "grad_norm": 1.974812626838684, "learning_rate": 0.00017079991589318436, "epoch": 0.15, "step": 9725 }, { "loss": 1.2269, "grad_norm": 1.3916237354278564, "learning_rate": 0.00017072482052206303, "epoch": 0.15, "step": 9750 }, { "loss": 1.2587, "grad_norm": 1.1099380254745483, "learning_rate": 0.00017064972515094172, "epoch": 0.15, "step": 9775 }, { "loss": 1.171, "grad_norm": 1.169327735900879, "learning_rate": 0.00017057462977982036, "epoch": 0.15, "step": 9800 }, { "loss": 1.1879, "grad_norm": 1.3058301210403442, "learning_rate": 0.00017049953440869905, "epoch": 0.15, "step": 9825 }, { "loss": 1.3067, "grad_norm": 1.6860321760177612, "learning_rate": 0.00017042443903757772, "epoch": 0.15, "step": 9850 }, { "loss": 1.2436, "grad_norm": 1.7748676538467407, "learning_rate": 0.0001703493436664564, "epoch": 0.15, "step": 9875 }, { "loss": 1.2847, "grad_norm": 1.3805527687072754, "learning_rate": 0.0001702742482953351, "epoch": 0.15, "step": 9900 }, { "loss": 1.1507, "grad_norm": 1.1719329357147217, "learning_rate": 0.00017019915292421377, "epoch": 0.15, "step": 9925 }, { "loss": 1.1982, "grad_norm": 3.254032850265503, "learning_rate": 0.00017012405755309243, "epoch": 0.15, "step": 9950 }, { "loss": 1.2556, "grad_norm": 1.6937150955200195, "learning_rate": 0.0001700489621819711, "epoch": 0.15, "step": 9975 }, { "loss": 1.1951, "grad_norm": 0.9732112884521484, "learning_rate": 0.0001699738668108498, "epoch": 0.15, "step": 10000 }, { "loss": 1.2225, "grad_norm": 1.152357816696167, "learning_rate": 0.00016989877143972845, "epoch": 0.15, "step": 10025 }, { "loss": 1.2185, "grad_norm": 1.9174104928970337, "learning_rate": 0.00016982367606860715, "epoch": 0.15, "step": 10050 }, { "loss": 1.1886, "grad_norm": 2.638831377029419, "learning_rate": 0.0001697485806974858, "epoch": 0.15, "step": 10075 }, { "loss": 1.2805, "grad_norm": 1.4505808353424072, "learning_rate": 0.00016967348532636448, "epoch": 0.15, "step": 10100 }, { "loss": 1.2714, "grad_norm": 1.9908664226531982, "learning_rate": 0.00016959838995524317, "epoch": 0.15, "step": 10125 }, { "loss": 1.2362, "grad_norm": 0.9299131035804749, "learning_rate": 0.00016952329458412184, "epoch": 0.15, "step": 10150 }, { "loss": 1.212, "grad_norm": 2.036597490310669, "learning_rate": 0.00016944819921300053, "epoch": 0.15, "step": 10175 }, { "loss": 1.1789, "grad_norm": 0.9963513016700745, "learning_rate": 0.0001693731038418792, "epoch": 0.15, "step": 10200 }, { "loss": 1.2206, "grad_norm": 1.2980750799179077, "learning_rate": 0.00016929800847075789, "epoch": 0.15, "step": 10225 }, { "loss": 1.2015, "grad_norm": 2.1614062786102295, "learning_rate": 0.00016922291309963652, "epoch": 0.15, "step": 10250 }, { "loss": 1.2411, "grad_norm": 2.045715093612671, "learning_rate": 0.00016914781772851522, "epoch": 0.15, "step": 10275 }, { "loss": 1.2117, "grad_norm": 1.5198246240615845, "learning_rate": 0.0001690727223573939, "epoch": 0.15, "step": 10300 }, { "loss": 1.2269, "grad_norm": 1.563063383102417, "learning_rate": 0.00016900063080111744, "epoch": 0.16, "step": 10325 }, { "loss": 1.2857, "grad_norm": 1.2115886211395264, "learning_rate": 0.0001689255354299961, "epoch": 0.16, "step": 10350 }, { "loss": 1.2339, "grad_norm": 1.6522163152694702, "learning_rate": 0.00016885044005887477, "epoch": 0.16, "step": 10375 }, { "loss": 1.2416, "grad_norm": 1.334186315536499, "learning_rate": 0.00016877534468775343, "epoch": 0.16, "step": 10400 }, { "loss": 1.1811, "grad_norm": 2.520540475845337, "learning_rate": 0.00016870024931663213, "epoch": 0.16, "step": 10425 }, { "loss": 1.2504, "grad_norm": 1.4244968891143799, "learning_rate": 0.00016862515394551082, "epoch": 0.16, "step": 10450 }, { "loss": 1.3024, "grad_norm": 1.7212327718734741, "learning_rate": 0.00016855005857438948, "epoch": 0.16, "step": 10475 }, { "loss": 1.2481, "grad_norm": 1.369234323501587, "learning_rate": 0.00016847496320326818, "epoch": 0.16, "step": 10500 }, { "loss": 1.2456, "grad_norm": 1.2592421770095825, "learning_rate": 0.00016839986783214684, "epoch": 0.16, "step": 10525 }, { "loss": 1.2845, "grad_norm": 1.891320824623108, "learning_rate": 0.0001683247724610255, "epoch": 0.16, "step": 10550 }, { "loss": 1.1581, "grad_norm": 1.653385877609253, "learning_rate": 0.00016824967708990417, "epoch": 0.16, "step": 10575 }, { "loss": 1.2469, "grad_norm": 1.4522532224655151, "learning_rate": 0.00016817458171878286, "epoch": 0.16, "step": 10600 }, { "loss": 1.1964, "grad_norm": 2.0278687477111816, "learning_rate": 0.00016809948634766153, "epoch": 0.16, "step": 10625 }, { "loss": 1.1826, "grad_norm": 7.241672039031982, "learning_rate": 0.00016802439097654022, "epoch": 0.16, "step": 10650 }, { "loss": 1.2321, "grad_norm": 1.5438281297683716, "learning_rate": 0.0001679553032351086, "epoch": 0.16, "step": 10675 }, { "loss": 1.1776, "grad_norm": 1.0334819555282593, "learning_rate": 0.00016788020786398728, "epoch": 0.16, "step": 10700 }, { "loss": 1.2905, "grad_norm": 1.578046202659607, "learning_rate": 0.0001678081163077108, "epoch": 0.16, "step": 10725 }, { "loss": 1.1721, "grad_norm": 1.1447938680648804, "learning_rate": 0.00016773302093658947, "epoch": 0.16, "step": 10750 }, { "loss": 1.1305, "grad_norm": 1.328674554824829, "learning_rate": 0.00016765792556546817, "epoch": 0.16, "step": 10775 }, { "loss": 1.1729, "grad_norm": 1.5958043336868286, "learning_rate": 0.00016758283019434683, "epoch": 0.16, "step": 10800 }, { "loss": 1.1459, "grad_norm": 1.4962025880813599, "learning_rate": 0.0001675077348232255, "epoch": 0.16, "step": 10825 }, { "loss": 1.1864, "grad_norm": 1.540279507637024, "learning_rate": 0.0001674326394521042, "epoch": 0.16, "step": 10850 }, { "loss": 1.2842, "grad_norm": 1.6456973552703857, "learning_rate": 0.00016735754408098285, "epoch": 0.16, "step": 10875 }, { "loss": 1.2236, "grad_norm": 1.9610776901245117, "learning_rate": 0.00016728244870986155, "epoch": 0.16, "step": 10900 }, { "loss": 1.1825, "grad_norm": 1.4995285272598267, "learning_rate": 0.0001672073533387402, "epoch": 0.16, "step": 10925 }, { "loss": 1.2341, "grad_norm": 1.0755623579025269, "learning_rate": 0.00016713225796761888, "epoch": 0.16, "step": 10950 }, { "loss": 1.2119, "grad_norm": 1.8127145767211914, "learning_rate": 0.00016705716259649754, "epoch": 0.16, "step": 10975 }, { "loss": 1.2211, "grad_norm": 1.8502295017242432, "learning_rate": 0.00016698206722537623, "epoch": 0.17, "step": 11000 }, { "loss": 1.2158, "grad_norm": 1.6311461925506592, "learning_rate": 0.0001669069718542549, "epoch": 0.17, "step": 11025 }, { "loss": 1.2579, "grad_norm": 1.3985036611557007, "learning_rate": 0.0001668318764831336, "epoch": 0.17, "step": 11050 }, { "loss": 1.1595, "grad_norm": 2.4565913677215576, "learning_rate": 0.00016675678111201228, "epoch": 0.17, "step": 11075 }, { "loss": 1.2151, "grad_norm": 1.9943021535873413, "learning_rate": 0.00016668168574089092, "epoch": 0.17, "step": 11100 }, { "loss": 1.2253, "grad_norm": 1.246576189994812, "learning_rate": 0.00016660659036976962, "epoch": 0.17, "step": 11125 }, { "loss": 1.1946, "grad_norm": 1.1769171953201294, "learning_rate": 0.00016653149499864828, "epoch": 0.17, "step": 11150 }, { "loss": 1.2189, "grad_norm": 1.2264093160629272, "learning_rate": 0.00016645639962752697, "epoch": 0.17, "step": 11175 }, { "loss": 1.1773, "grad_norm": 1.0736924409866333, "learning_rate": 0.00016638130425640564, "epoch": 0.17, "step": 11200 }, { "loss": 1.2057, "grad_norm": 1.527783751487732, "learning_rate": 0.00016630620888528433, "epoch": 0.17, "step": 11225 }, { "loss": 1.2292, "grad_norm": 1.3747711181640625, "learning_rate": 0.000166231113514163, "epoch": 0.17, "step": 11250 }, { "loss": 1.2385, "grad_norm": 1.5345367193222046, "learning_rate": 0.00016615601814304166, "epoch": 0.17, "step": 11275 }, { "loss": 1.2456, "grad_norm": 1.1881415843963623, "learning_rate": 0.00016608092277192035, "epoch": 0.17, "step": 11300 }, { "loss": 1.2099, "grad_norm": 1.1072256565093994, "learning_rate": 0.00016600582740079902, "epoch": 0.17, "step": 11325 }, { "loss": 1.2128, "grad_norm": 2.2356455326080322, "learning_rate": 0.0001659307320296777, "epoch": 0.17, "step": 11350 }, { "loss": 1.1638, "grad_norm": 3.343397855758667, "learning_rate": 0.00016585563665855638, "epoch": 0.17, "step": 11375 }, { "loss": 1.1879, "grad_norm": 1.5066440105438232, "learning_rate": 0.00016578054128743504, "epoch": 0.17, "step": 11400 }, { "loss": 1.1868, "grad_norm": 1.4099555015563965, "learning_rate": 0.0001657054459163137, "epoch": 0.17, "step": 11425 }, { "loss": 1.1882, "grad_norm": 1.6867519617080688, "learning_rate": 0.0001656303505451924, "epoch": 0.17, "step": 11450 }, { "loss": 1.1986, "grad_norm": 1.4436876773834229, "learning_rate": 0.0001655552551740711, "epoch": 0.17, "step": 11475 }, { "loss": 1.2023, "grad_norm": 1.2447092533111572, "learning_rate": 0.00016548015980294976, "epoch": 0.17, "step": 11500 }, { "loss": 1.1136, "grad_norm": 2.2803616523742676, "learning_rate": 0.00016540506443182845, "epoch": 0.17, "step": 11525 }, { "loss": 1.2135, "grad_norm": 2.992870807647705, "learning_rate": 0.00016532996906070712, "epoch": 0.17, "step": 11550 }, { "loss": 1.1864, "grad_norm": 1.6845029592514038, "learning_rate": 0.00016525487368958578, "epoch": 0.17, "step": 11575 }, { "loss": 1.1924, "grad_norm": 1.52716863155365, "learning_rate": 0.00016517977831846445, "epoch": 0.17, "step": 11600 }, { "loss": 1.2462, "grad_norm": 1.1273847818374634, "learning_rate": 0.00016510468294734314, "epoch": 0.17, "step": 11625 }, { "loss": 1.2411, "grad_norm": 1.472161054611206, "learning_rate": 0.0001650295875762218, "epoch": 0.17, "step": 11650 }, { "loss": 1.2534, "grad_norm": 1.0381845235824585, "learning_rate": 0.0001649544922051005, "epoch": 0.18, "step": 11675 }, { "loss": 1.1666, "grad_norm": 1.7663735151290894, "learning_rate": 0.00016487939683397916, "epoch": 0.18, "step": 11700 }, { "loss": 1.1811, "grad_norm": 1.8977352380752563, "learning_rate": 0.00016480430146285783, "epoch": 0.18, "step": 11725 }, { "loss": 1.2674, "grad_norm": 1.2944955825805664, "learning_rate": 0.00016472920609173652, "epoch": 0.18, "step": 11750 }, { "loss": 1.259, "grad_norm": 1.2735075950622559, "learning_rate": 0.00016465411072061518, "epoch": 0.18, "step": 11775 }, { "loss": 1.1718, "grad_norm": 1.3027160167694092, "learning_rate": 0.00016457901534949388, "epoch": 0.18, "step": 11800 }, { "loss": 1.2034, "grad_norm": 1.0722211599349976, "learning_rate": 0.00016450391997837254, "epoch": 0.18, "step": 11825 }, { "loss": 1.1761, "grad_norm": 2.5194356441497803, "learning_rate": 0.0001644288246072512, "epoch": 0.18, "step": 11850 }, { "loss": 1.213, "grad_norm": 1.5070539712905884, "learning_rate": 0.00016435372923612987, "epoch": 0.18, "step": 11875 }, { "loss": 1.1899, "grad_norm": 2.126924991607666, "learning_rate": 0.00016427863386500857, "epoch": 0.18, "step": 11900 }, { "loss": 1.2114, "grad_norm": 1.6474621295928955, "learning_rate": 0.00016420353849388726, "epoch": 0.18, "step": 11925 }, { "loss": 1.2885, "grad_norm": 1.4247677326202393, "learning_rate": 0.00016412844312276592, "epoch": 0.18, "step": 11950 }, { "loss": 1.1922, "grad_norm": 1.7299450635910034, "learning_rate": 0.00016405334775164462, "epoch": 0.18, "step": 11975 }, { "loss": 1.255, "grad_norm": 1.1741243600845337, "learning_rate": 0.00016397825238052328, "epoch": 0.18, "step": 12000 }, { "loss": 1.1358, "grad_norm": 3.301985502243042, "learning_rate": 0.00016390315700940195, "epoch": 0.18, "step": 12025 }, { "loss": 1.2252, "grad_norm": 1.730089783668518, "learning_rate": 0.0001638280616382806, "epoch": 0.18, "step": 12050 }, { "loss": 1.2015, "grad_norm": 0.9660411477088928, "learning_rate": 0.0001637529662671593, "epoch": 0.18, "step": 12075 }, { "loss": 1.1952, "grad_norm": 1.2349923849105835, "learning_rate": 0.00016367787089603797, "epoch": 0.18, "step": 12100 }, { "loss": 1.1539, "grad_norm": 1.5074914693832397, "learning_rate": 0.00016360277552491666, "epoch": 0.18, "step": 12125 }, { "loss": 1.2497, "grad_norm": 1.2159485816955566, "learning_rate": 0.00016352768015379533, "epoch": 0.18, "step": 12150 }, { "loss": 1.2624, "grad_norm": 1.7598042488098145, "learning_rate": 0.000163452584782674, "epoch": 0.18, "step": 12175 }, { "loss": 1.2243, "grad_norm": 1.2727563381195068, "learning_rate": 0.00016337748941155268, "epoch": 0.18, "step": 12200 }, { "loss": 1.2093, "grad_norm": 1.205769658088684, "learning_rate": 0.00016330239404043135, "epoch": 0.18, "step": 12225 }, { "loss": 1.1866, "grad_norm": 5.114007949829102, "learning_rate": 0.00016322729866931004, "epoch": 0.18, "step": 12250 }, { "loss": 1.2824, "grad_norm": 2.016160488128662, "learning_rate": 0.0001631522032981887, "epoch": 0.18, "step": 12275 }, { "loss": 1.1501, "grad_norm": 1.1405665874481201, "learning_rate": 0.00016307710792706737, "epoch": 0.18, "step": 12300 }, { "loss": 1.1795, "grad_norm": 2.2503092288970947, "learning_rate": 0.00016300201255594604, "epoch": 0.19, "step": 12325 }, { "loss": 1.2146, "grad_norm": 1.0414721965789795, "learning_rate": 0.00016292691718482473, "epoch": 0.19, "step": 12350 }, { "loss": 1.2338, "grad_norm": 1.7036515474319458, "learning_rate": 0.00016285182181370342, "epoch": 0.19, "step": 12375 }, { "loss": 1.2638, "grad_norm": 1.2566134929656982, "learning_rate": 0.0001627767264425821, "epoch": 0.19, "step": 12400 }, { "loss": 1.1694, "grad_norm": 1.4524366855621338, "learning_rate": 0.00016270163107146078, "epoch": 0.19, "step": 12425 }, { "loss": 1.1826, "grad_norm": 1.2547303438186646, "learning_rate": 0.00016262653570033945, "epoch": 0.19, "step": 12450 }, { "loss": 1.1885, "grad_norm": 3.284105062484741, "learning_rate": 0.0001625514403292181, "epoch": 0.19, "step": 12475 }, { "loss": 1.1785, "grad_norm": 1.14161217212677, "learning_rate": 0.00016247634495809678, "epoch": 0.19, "step": 12500 }, { "loss": 1.156, "grad_norm": 1.9379956722259521, "learning_rate": 0.00016240124958697547, "epoch": 0.19, "step": 12525 }, { "loss": 1.1911, "grad_norm": 1.1594531536102295, "learning_rate": 0.00016232615421585413, "epoch": 0.19, "step": 12550 }, { "loss": 1.1905, "grad_norm": 1.3584635257720947, "learning_rate": 0.00016225105884473283, "epoch": 0.19, "step": 12575 }, { "loss": 1.2023, "grad_norm": 1.402160406112671, "learning_rate": 0.0001621759634736115, "epoch": 0.19, "step": 12600 }, { "loss": 1.235, "grad_norm": 1.3611042499542236, "learning_rate": 0.00016210086810249016, "epoch": 0.19, "step": 12625 }, { "loss": 1.1894, "grad_norm": 1.1458463668823242, "learning_rate": 0.00016202577273136885, "epoch": 0.19, "step": 12650 }, { "loss": 1.1829, "grad_norm": 0.8500710725784302, "learning_rate": 0.00016195067736024752, "epoch": 0.19, "step": 12675 }, { "loss": 1.1632, "grad_norm": 1.5723693370819092, "learning_rate": 0.0001618755819891262, "epoch": 0.19, "step": 12700 }, { "loss": 1.1982, "grad_norm": 1.399224042892456, "learning_rate": 0.00016180048661800487, "epoch": 0.19, "step": 12725 }, { "loss": 1.2511, "grad_norm": 2.703968048095703, "learning_rate": 0.00016172539124688354, "epoch": 0.19, "step": 12750 }, { "loss": 1.1905, "grad_norm": 1.6090725660324097, "learning_rate": 0.00016165029587576223, "epoch": 0.19, "step": 12775 }, { "loss": 1.2074, "grad_norm": 2.323432207107544, "learning_rate": 0.0001615752005046409, "epoch": 0.19, "step": 12800 }, { "loss": 1.2514, "grad_norm": 1.0441837310791016, "learning_rate": 0.0001615001051335196, "epoch": 0.19, "step": 12825 }, { "loss": 1.2018, "grad_norm": 1.3072987794876099, "learning_rate": 0.00016142500976239825, "epoch": 0.19, "step": 12850 }, { "loss": 1.169, "grad_norm": 2.1105244159698486, "learning_rate": 0.00016134991439127695, "epoch": 0.19, "step": 12875 }, { "loss": 1.2361, "grad_norm": 1.4109976291656494, "learning_rate": 0.0001612748190201556, "epoch": 0.19, "step": 12900 }, { "loss": 1.2543, "grad_norm": 1.5119200944900513, "learning_rate": 0.00016119972364903428, "epoch": 0.19, "step": 12925 }, { "loss": 1.2326, "grad_norm": 1.3456885814666748, "learning_rate": 0.00016112462827791294, "epoch": 0.19, "step": 12950 }, { "loss": 1.1761, "grad_norm": 2.7535812854766846, "learning_rate": 0.00016104953290679163, "epoch": 0.19, "step": 12975 }, { "loss": 1.1413, "grad_norm": 1.665337085723877, "learning_rate": 0.00016097443753567033, "epoch": 0.2, "step": 13000 }, { "loss": 1.1836, "grad_norm": 1.1174890995025635, "learning_rate": 0.000160899342164549, "epoch": 0.2, "step": 13025 }, { "loss": 1.1405, "grad_norm": 2.4042136669158936, "learning_rate": 0.00016082424679342766, "epoch": 0.2, "step": 13050 }, { "loss": 1.2339, "grad_norm": 2.3713090419769287, "learning_rate": 0.00016074915142230632, "epoch": 0.2, "step": 13075 }, { "loss": 1.2351, "grad_norm": 1.7716904878616333, "learning_rate": 0.00016067405605118502, "epoch": 0.2, "step": 13100 }, { "loss": 1.194, "grad_norm": 1.2277339696884155, "learning_rate": 0.00016059896068006368, "epoch": 0.2, "step": 13125 }, { "loss": 1.2429, "grad_norm": 1.2725192308425903, "learning_rate": 0.00016052386530894237, "epoch": 0.2, "step": 13150 }, { "loss": 1.1837, "grad_norm": 1.4028089046478271, "learning_rate": 0.00016044876993782104, "epoch": 0.2, "step": 13175 }, { "loss": 1.2633, "grad_norm": 3.1674065589904785, "learning_rate": 0.0001603736745666997, "epoch": 0.2, "step": 13200 }, { "loss": 1.2575, "grad_norm": 1.3717881441116333, "learning_rate": 0.0001602985791955784, "epoch": 0.2, "step": 13225 }, { "loss": 1.2385, "grad_norm": 1.5640596151351929, "learning_rate": 0.00016022348382445706, "epoch": 0.2, "step": 13250 }, { "loss": 1.231, "grad_norm": 1.336003303527832, "learning_rate": 0.00016014838845333575, "epoch": 0.2, "step": 13275 }, { "loss": 1.1247, "grad_norm": 1.0398321151733398, "learning_rate": 0.00016007329308221442, "epoch": 0.2, "step": 13300 }, { "loss": 1.1848, "grad_norm": 1.5215067863464355, "learning_rate": 0.0001599981977110931, "epoch": 0.2, "step": 13325 }, { "loss": 1.2053, "grad_norm": 1.184665560722351, "learning_rate": 0.00015992310233997178, "epoch": 0.2, "step": 13350 }, { "loss": 1.2213, "grad_norm": 3.2756311893463135, "learning_rate": 0.00015984800696885044, "epoch": 0.2, "step": 13375 }, { "loss": 1.1623, "grad_norm": 2.2092206478118896, "learning_rate": 0.0001597729115977291, "epoch": 0.2, "step": 13400 }, { "loss": 1.1939, "grad_norm": 1.701504111289978, "learning_rate": 0.0001596978162266078, "epoch": 0.2, "step": 13425 }, { "loss": 1.194, "grad_norm": 1.0575650930404663, "learning_rate": 0.0001596227208554865, "epoch": 0.2, "step": 13450 }, { "loss": 1.1561, "grad_norm": 2.7198948860168457, "learning_rate": 0.00015954762548436516, "epoch": 0.2, "step": 13475 }, { "loss": 1.1449, "grad_norm": 1.2031759023666382, "learning_rate": 0.00015947253011324382, "epoch": 0.2, "step": 13500 }, { "loss": 1.195, "grad_norm": 1.3267816305160522, "learning_rate": 0.0001593974347421225, "epoch": 0.2, "step": 13525 }, { "loss": 1.1566, "grad_norm": 1.4941660165786743, "learning_rate": 0.00015932233937100118, "epoch": 0.2, "step": 13550 }, { "loss": 1.1218, "grad_norm": 0.9819481372833252, "learning_rate": 0.00015924724399987985, "epoch": 0.2, "step": 13575 }, { "loss": 1.2457, "grad_norm": 1.1329920291900635, "learning_rate": 0.00015917214862875854, "epoch": 0.2, "step": 13600 }, { "loss": 1.2218, "grad_norm": 1.0208684206008911, "learning_rate": 0.0001590970532576372, "epoch": 0.2, "step": 13625 }, { "loss": 1.3095, "grad_norm": 1.9692599773406982, "learning_rate": 0.00015902195788651587, "epoch": 0.21, "step": 13650 }, { "loss": 1.1426, "grad_norm": 1.1488243341445923, "learning_rate": 0.00015894686251539456, "epoch": 0.21, "step": 13675 }, { "loss": 1.1786, "grad_norm": 2.137523651123047, "learning_rate": 0.00015887176714427323, "epoch": 0.21, "step": 13700 }, { "loss": 1.2123, "grad_norm": 1.74925696849823, "learning_rate": 0.00015879667177315192, "epoch": 0.21, "step": 13725 }, { "loss": 1.2237, "grad_norm": 1.931201457977295, "learning_rate": 0.00015872157640203058, "epoch": 0.21, "step": 13750 }, { "loss": 1.1822, "grad_norm": 1.3742233514785767, "learning_rate": 0.00015864648103090928, "epoch": 0.21, "step": 13775 }, { "loss": 1.2393, "grad_norm": 1.860449194908142, "learning_rate": 0.00015857138565978794, "epoch": 0.21, "step": 13800 }, { "loss": 1.16, "grad_norm": 2.664776086807251, "learning_rate": 0.0001584962902886666, "epoch": 0.21, "step": 13825 }, { "loss": 1.172, "grad_norm": 2.5164761543273926, "learning_rate": 0.00015842119491754527, "epoch": 0.21, "step": 13850 }, { "loss": 1.1531, "grad_norm": 1.644278645515442, "learning_rate": 0.00015834609954642397, "epoch": 0.21, "step": 13875 }, { "loss": 1.2801, "grad_norm": 1.2100858688354492, "learning_rate": 0.00015827100417530266, "epoch": 0.21, "step": 13900 }, { "loss": 1.2011, "grad_norm": 1.9542933702468872, "learning_rate": 0.00015819590880418132, "epoch": 0.21, "step": 13925 }, { "loss": 1.2344, "grad_norm": 1.1991852521896362, "learning_rate": 0.00015812081343306, "epoch": 0.21, "step": 13950 }, { "loss": 1.1884, "grad_norm": 1.9113025665283203, "learning_rate": 0.00015804571806193865, "epoch": 0.21, "step": 13975 }, { "loss": 1.242, "grad_norm": 1.4621787071228027, "learning_rate": 0.00015797062269081735, "epoch": 0.21, "step": 14000 }, { "loss": 1.1961, "grad_norm": 1.9302442073822021, "learning_rate": 0.000157895527319696, "epoch": 0.21, "step": 14025 }, { "loss": 1.2159, "grad_norm": 1.3267945051193237, "learning_rate": 0.0001578204319485747, "epoch": 0.21, "step": 14050 }, { "loss": 1.1573, "grad_norm": 1.2569104433059692, "learning_rate": 0.0001577453365774534, "epoch": 0.21, "step": 14075 }, { "loss": 1.2149, "grad_norm": 1.3353804349899292, "learning_rate": 0.00015767024120633203, "epoch": 0.21, "step": 14100 }, { "loss": 1.198, "grad_norm": 1.9309898614883423, "learning_rate": 0.00015759514583521073, "epoch": 0.21, "step": 14125 }, { "loss": 1.1742, "grad_norm": 1.2149921655654907, "learning_rate": 0.0001575200504640894, "epoch": 0.21, "step": 14150 }, { "loss": 1.1855, "grad_norm": 1.9573317766189575, "learning_rate": 0.00015744495509296808, "epoch": 0.21, "step": 14175 }, { "loss": 1.2459, "grad_norm": 1.384567379951477, "learning_rate": 0.00015736985972184675, "epoch": 0.21, "step": 14200 }, { "loss": 1.1853, "grad_norm": 1.7285842895507812, "learning_rate": 0.00015729476435072544, "epoch": 0.21, "step": 14225 }, { "loss": 1.1728, "grad_norm": 2.050541877746582, "learning_rate": 0.0001572196689796041, "epoch": 0.21, "step": 14250 }, { "loss": 1.2248, "grad_norm": 1.735643744468689, "learning_rate": 0.00015714457360848277, "epoch": 0.21, "step": 14275 }, { "loss": 1.1792, "grad_norm": 1.511836290359497, "learning_rate": 0.00015706947823736147, "epoch": 0.21, "step": 14300 }, { "loss": 1.1978, "grad_norm": 1.1453663110733032, "learning_rate": 0.00015699438286624013, "epoch": 0.22, "step": 14325 }, { "loss": 1.1747, "grad_norm": 1.8787868022918701, "learning_rate": 0.00015691928749511882, "epoch": 0.22, "step": 14350 }, { "loss": 1.1946, "grad_norm": 2.0433459281921387, "learning_rate": 0.0001568441921239975, "epoch": 0.22, "step": 14375 }, { "loss": 1.1676, "grad_norm": 1.6258106231689453, "learning_rate": 0.00015676909675287615, "epoch": 0.22, "step": 14400 }, { "loss": 1.1486, "grad_norm": 1.0429004430770874, "learning_rate": 0.00015669400138175482, "epoch": 0.22, "step": 14425 }, { "loss": 1.2211, "grad_norm": 1.5074403285980225, "learning_rate": 0.0001566189060106335, "epoch": 0.22, "step": 14450 }, { "loss": 1.2161, "grad_norm": 1.4326659440994263, "learning_rate": 0.00015654381063951218, "epoch": 0.22, "step": 14475 }, { "loss": 1.2538, "grad_norm": 1.8539921045303345, "learning_rate": 0.00015646871526839087, "epoch": 0.22, "step": 14500 }, { "loss": 1.1929, "grad_norm": 1.7635362148284912, "learning_rate": 0.00015639361989726956, "epoch": 0.22, "step": 14525 }, { "loss": 1.2115, "grad_norm": 1.3895171880722046, "learning_rate": 0.00015631852452614823, "epoch": 0.22, "step": 14550 }, { "loss": 1.1723, "grad_norm": 1.5900187492370605, "learning_rate": 0.0001562434291550269, "epoch": 0.22, "step": 14575 }, { "loss": 1.186, "grad_norm": 1.7074415683746338, "learning_rate": 0.00015616833378390556, "epoch": 0.22, "step": 14600 }, { "loss": 1.187, "grad_norm": 1.3961682319641113, "learning_rate": 0.00015609323841278425, "epoch": 0.22, "step": 14625 }, { "loss": 1.1981, "grad_norm": 1.4976271390914917, "learning_rate": 0.00015601814304166292, "epoch": 0.22, "step": 14650 }, { "loss": 1.1749, "grad_norm": 1.286617398262024, "learning_rate": 0.0001559430476705416, "epoch": 0.22, "step": 14675 }, { "loss": 1.1506, "grad_norm": 1.8841774463653564, "learning_rate": 0.00015586795229942027, "epoch": 0.22, "step": 14700 }, { "loss": 1.1846, "grad_norm": 2.3921959400177, "learning_rate": 0.00015579285692829894, "epoch": 0.22, "step": 14725 }, { "loss": 1.1553, "grad_norm": 1.139286756515503, "learning_rate": 0.00015571776155717763, "epoch": 0.22, "step": 14750 }, { "loss": 1.213, "grad_norm": 1.5389468669891357, "learning_rate": 0.0001556426661860563, "epoch": 0.22, "step": 14775 }, { "loss": 1.2504, "grad_norm": 1.1002377271652222, "learning_rate": 0.000155567570814935, "epoch": 0.22, "step": 14800 }, { "loss": 1.2369, "grad_norm": 1.2907332181930542, "learning_rate": 0.00015549247544381365, "epoch": 0.22, "step": 14825 }, { "loss": 1.2327, "grad_norm": 2.8189125061035156, "learning_rate": 0.00015541738007269232, "epoch": 0.22, "step": 14850 }, { "loss": 1.2142, "grad_norm": 1.4760026931762695, "learning_rate": 0.00015534228470157098, "epoch": 0.22, "step": 14875 }, { "loss": 1.2538, "grad_norm": 1.4497836828231812, "learning_rate": 0.00015526718933044968, "epoch": 0.22, "step": 14900 }, { "loss": 1.2757, "grad_norm": 1.2099194526672363, "learning_rate": 0.00015519209395932834, "epoch": 0.22, "step": 14925 }, { "loss": 1.2636, "grad_norm": 1.2008768320083618, "learning_rate": 0.00015511699858820703, "epoch": 0.22, "step": 14950 }, { "loss": 1.165, "grad_norm": 4.421905040740967, "learning_rate": 0.00015504190321708573, "epoch": 0.22, "step": 14975 }, { "loss": 1.164, "grad_norm": 1.2725390195846558, "learning_rate": 0.0001549668078459644, "epoch": 0.23, "step": 15000 }, { "loss": 1.2026, "grad_norm": 2.9403913021087646, "learning_rate": 0.00015489171247484306, "epoch": 0.23, "step": 15025 }, { "loss": 1.1938, "grad_norm": 1.8553730249404907, "learning_rate": 0.00015481661710372172, "epoch": 0.23, "step": 15050 }, { "loss": 1.1879, "grad_norm": 1.242799162864685, "learning_rate": 0.00015474152173260042, "epoch": 0.23, "step": 15075 }, { "loss": 1.1512, "grad_norm": 1.5785107612609863, "learning_rate": 0.00015466642636147908, "epoch": 0.23, "step": 15100 }, { "loss": 1.1802, "grad_norm": 2.665036916732788, "learning_rate": 0.00015459133099035777, "epoch": 0.23, "step": 15125 }, { "loss": 1.1603, "grad_norm": 1.8509407043457031, "learning_rate": 0.00015451623561923644, "epoch": 0.23, "step": 15150 }, { "loss": 1.1711, "grad_norm": 1.2315629720687866, "learning_rate": 0.0001544411402481151, "epoch": 0.23, "step": 15175 }, { "loss": 1.1784, "grad_norm": 1.6980071067810059, "learning_rate": 0.0001543660448769938, "epoch": 0.23, "step": 15200 }, { "loss": 1.2922, "grad_norm": 1.1929773092269897, "learning_rate": 0.00015429094950587246, "epoch": 0.23, "step": 15225 }, { "loss": 1.1272, "grad_norm": 1.722090244293213, "learning_rate": 0.00015421585413475115, "epoch": 0.23, "step": 15250 }, { "loss": 1.2982, "grad_norm": 1.712141990661621, "learning_rate": 0.00015414075876362982, "epoch": 0.23, "step": 15275 }, { "loss": 1.237, "grad_norm": 2.6743271350860596, "learning_rate": 0.00015406566339250848, "epoch": 0.23, "step": 15300 }, { "loss": 1.1982, "grad_norm": 1.842942714691162, "learning_rate": 0.00015399056802138715, "epoch": 0.23, "step": 15325 }, { "loss": 1.2102, "grad_norm": 1.8020812273025513, "learning_rate": 0.00015391547265026584, "epoch": 0.23, "step": 15350 }, { "loss": 1.2009, "grad_norm": 1.4913078546524048, "learning_rate": 0.00015384037727914453, "epoch": 0.23, "step": 15375 }, { "loss": 1.2133, "grad_norm": 1.1852643489837646, "learning_rate": 0.0001537652819080232, "epoch": 0.23, "step": 15400 }, { "loss": 1.2375, "grad_norm": 1.9560911655426025, "learning_rate": 0.0001536901865369019, "epoch": 0.23, "step": 15425 }, { "loss": 1.2484, "grad_norm": 1.743415355682373, "learning_rate": 0.00015361509116578056, "epoch": 0.23, "step": 15450 }, { "loss": 1.1939, "grad_norm": 2.6720640659332275, "learning_rate": 0.00015353999579465922, "epoch": 0.23, "step": 15475 }, { "loss": 1.2031, "grad_norm": 1.5238986015319824, "learning_rate": 0.0001534649004235379, "epoch": 0.23, "step": 15500 }, { "loss": 1.2155, "grad_norm": 1.7103843688964844, "learning_rate": 0.00015338980505241658, "epoch": 0.23, "step": 15525 }, { "loss": 1.2001, "grad_norm": 1.6735540628433228, "learning_rate": 0.00015331470968129525, "epoch": 0.23, "step": 15550 }, { "loss": 1.1737, "grad_norm": 1.4866646528244019, "learning_rate": 0.00015323961431017394, "epoch": 0.23, "step": 15575 }, { "loss": 1.2778, "grad_norm": 1.4038907289505005, "learning_rate": 0.0001531645189390526, "epoch": 0.23, "step": 15600 }, { "loss": 1.1966, "grad_norm": 2.238800048828125, "learning_rate": 0.00015308942356793127, "epoch": 0.23, "step": 15625 }, { "loss": 1.2119, "grad_norm": 1.6463327407836914, "learning_rate": 0.00015301432819680996, "epoch": 0.24, "step": 15650 }, { "loss": 1.2049, "grad_norm": 1.1655962467193604, "learning_rate": 0.00015293923282568863, "epoch": 0.24, "step": 15675 }, { "loss": 1.1357, "grad_norm": 1.2663848400115967, "learning_rate": 0.00015286413745456732, "epoch": 0.24, "step": 15700 }, { "loss": 1.2133, "grad_norm": 1.140039324760437, "learning_rate": 0.00015278904208344598, "epoch": 0.24, "step": 15725 }, { "loss": 1.174, "grad_norm": 2.119586944580078, "learning_rate": 0.00015271394671232465, "epoch": 0.24, "step": 15750 }, { "loss": 1.2107, "grad_norm": 1.7722172737121582, "learning_rate": 0.00015263885134120332, "epoch": 0.24, "step": 15775 }, { "loss": 1.2139, "grad_norm": 1.7310364246368408, "learning_rate": 0.000152563755970082, "epoch": 0.24, "step": 15800 }, { "loss": 1.126, "grad_norm": 0.9670734405517578, "learning_rate": 0.0001524886605989607, "epoch": 0.24, "step": 15825 }, { "loss": 1.2521, "grad_norm": 2.075798511505127, "learning_rate": 0.00015241356522783937, "epoch": 0.24, "step": 15850 }, { "loss": 1.2131, "grad_norm": 1.7291430234909058, "learning_rate": 0.00015233846985671806, "epoch": 0.24, "step": 15875 }, { "loss": 1.2042, "grad_norm": 2.976837635040283, "learning_rate": 0.00015226337448559672, "epoch": 0.24, "step": 15900 }, { "loss": 1.2391, "grad_norm": 1.3992162942886353, "learning_rate": 0.0001521882791144754, "epoch": 0.24, "step": 15925 }, { "loss": 1.1702, "grad_norm": 0.8179588317871094, "learning_rate": 0.00015211618755819892, "epoch": 0.24, "step": 15950 }, { "loss": 1.1432, "grad_norm": 1.6531869173049927, "learning_rate": 0.0001520410921870776, "epoch": 0.24, "step": 15975 }, { "loss": 1.1531, "grad_norm": 2.893293857574463, "learning_rate": 0.00015196599681595627, "epoch": 0.24, "step": 16000 }, { "loss": 1.1839, "grad_norm": 1.686982274055481, "learning_rate": 0.00015189090144483497, "epoch": 0.24, "step": 16025 }, { "loss": 1.1529, "grad_norm": 1.0813180208206177, "learning_rate": 0.0001518158060737136, "epoch": 0.24, "step": 16050 }, { "loss": 1.2914, "grad_norm": 1.8390347957611084, "learning_rate": 0.0001517407107025923, "epoch": 0.24, "step": 16075 }, { "loss": 1.2263, "grad_norm": 1.4947305917739868, "learning_rate": 0.00015166561533147096, "epoch": 0.24, "step": 16100 }, { "loss": 1.2098, "grad_norm": 1.0743931531906128, "learning_rate": 0.00015159051996034966, "epoch": 0.24, "step": 16125 }, { "loss": 1.1824, "grad_norm": 1.6704978942871094, "learning_rate": 0.00015151542458922832, "epoch": 0.24, "step": 16150 }, { "loss": 1.1727, "grad_norm": 1.23310387134552, "learning_rate": 0.000151440329218107, "epoch": 0.24, "step": 16175 }, { "loss": 1.1947, "grad_norm": 1.678554892539978, "learning_rate": 0.00015136523384698568, "epoch": 0.24, "step": 16200 }, { "loss": 1.2033, "grad_norm": 1.4678512811660767, "learning_rate": 0.00015129013847586434, "epoch": 0.24, "step": 16225 }, { "loss": 1.2855, "grad_norm": 2.2149295806884766, "learning_rate": 0.00015121504310474304, "epoch": 0.24, "step": 16250 }, { "loss": 1.1601, "grad_norm": 0.9399513006210327, "learning_rate": 0.0001511399477336217, "epoch": 0.24, "step": 16275 }, { "loss": 1.1618, "grad_norm": 1.5738555192947388, "learning_rate": 0.0001510648523625004, "epoch": 0.24, "step": 16300 }, { "loss": 1.1984, "grad_norm": 2.3447060585021973, "learning_rate": 0.00015098975699137906, "epoch": 0.25, "step": 16325 }, { "loss": 1.2129, "grad_norm": 2.5573129653930664, "learning_rate": 0.00015091466162025772, "epoch": 0.25, "step": 16350 }, { "loss": 1.2152, "grad_norm": 1.161568284034729, "learning_rate": 0.00015083956624913642, "epoch": 0.25, "step": 16375 }, { "loss": 1.1788, "grad_norm": 1.2641152143478394, "learning_rate": 0.00015076447087801508, "epoch": 0.25, "step": 16400 }, { "loss": 1.2635, "grad_norm": 1.1497838497161865, "learning_rate": 0.00015068937550689377, "epoch": 0.25, "step": 16425 }, { "loss": 1.2427, "grad_norm": 1.777820110321045, "learning_rate": 0.00015061428013577244, "epoch": 0.25, "step": 16450 }, { "loss": 1.2167, "grad_norm": 1.704571008682251, "learning_rate": 0.00015053918476465113, "epoch": 0.25, "step": 16475 }, { "loss": 1.1348, "grad_norm": 1.2531949281692505, "learning_rate": 0.00015046408939352977, "epoch": 0.25, "step": 16500 }, { "loss": 1.2118, "grad_norm": 2.0152504444122314, "learning_rate": 0.00015038899402240846, "epoch": 0.25, "step": 16525 }, { "loss": 1.2169, "grad_norm": 1.327596664428711, "learning_rate": 0.00015031389865128713, "epoch": 0.25, "step": 16550 }, { "loss": 1.1057, "grad_norm": 2.2122318744659424, "learning_rate": 0.00015023880328016582, "epoch": 0.25, "step": 16575 }, { "loss": 1.1939, "grad_norm": 1.4037036895751953, "learning_rate": 0.0001501637079090445, "epoch": 0.25, "step": 16600 }, { "loss": 1.1178, "grad_norm": 1.947090983390808, "learning_rate": 0.00015008861253792318, "epoch": 0.25, "step": 16625 }, { "loss": 1.2499, "grad_norm": 1.9275078773498535, "learning_rate": 0.00015001351716680184, "epoch": 0.25, "step": 16650 }, { "loss": 1.203, "grad_norm": 1.6140542030334473, "learning_rate": 0.0001499384217956805, "epoch": 0.25, "step": 16675 }, { "loss": 1.1617, "grad_norm": 1.370875358581543, "learning_rate": 0.0001498633264245592, "epoch": 0.25, "step": 16700 }, { "loss": 1.1351, "grad_norm": 2.523732900619507, "learning_rate": 0.00014978823105343787, "epoch": 0.25, "step": 16725 }, { "loss": 1.1793, "grad_norm": 1.3012944459915161, "learning_rate": 0.00014971313568231656, "epoch": 0.25, "step": 16750 }, { "loss": 1.1834, "grad_norm": 1.382142424583435, "learning_rate": 0.00014963804031119522, "epoch": 0.25, "step": 16775 }, { "loss": 1.1841, "grad_norm": 3.1386773586273193, "learning_rate": 0.0001495629449400739, "epoch": 0.25, "step": 16800 }, { "loss": 1.1846, "grad_norm": 1.6328222751617432, "learning_rate": 0.00014948784956895258, "epoch": 0.25, "step": 16825 }, { "loss": 1.1879, "grad_norm": 1.3339941501617432, "learning_rate": 0.00014941275419783125, "epoch": 0.25, "step": 16850 }, { "loss": 1.1679, "grad_norm": 2.250485897064209, "learning_rate": 0.00014933765882670994, "epoch": 0.25, "step": 16875 }, { "loss": 1.1362, "grad_norm": 2.045668363571167, "learning_rate": 0.0001492625634555886, "epoch": 0.25, "step": 16900 }, { "loss": 1.1654, "grad_norm": 1.1913504600524902, "learning_rate": 0.0001491874680844673, "epoch": 0.25, "step": 16925 }, { "loss": 1.2208, "grad_norm": 1.6065621376037598, "learning_rate": 0.00014911237271334594, "epoch": 0.25, "step": 16950 }, { "loss": 1.1542, "grad_norm": 1.5805847644805908, "learning_rate": 0.00014903727734222463, "epoch": 0.25, "step": 16975 }, { "loss": 1.2015, "grad_norm": 5.944768905639648, "learning_rate": 0.0001489621819711033, "epoch": 0.26, "step": 17000 }, { "loss": 1.1992, "grad_norm": 3.976229667663574, "learning_rate": 0.00014888708659998199, "epoch": 0.26, "step": 17025 }, { "loss": 1.1746, "grad_norm": 2.31911301612854, "learning_rate": 0.00014881199122886068, "epoch": 0.26, "step": 17050 }, { "loss": 1.1205, "grad_norm": 1.8674370050430298, "learning_rate": 0.00014873689585773934, "epoch": 0.26, "step": 17075 }, { "loss": 1.2318, "grad_norm": 1.6549973487854004, "learning_rate": 0.000148661800486618, "epoch": 0.26, "step": 17100 }, { "loss": 1.2004, "grad_norm": 1.3497843742370605, "learning_rate": 0.00014858670511549667, "epoch": 0.26, "step": 17125 }, { "loss": 1.2421, "grad_norm": 1.8397778272628784, "learning_rate": 0.00014851160974437537, "epoch": 0.26, "step": 17150 }, { "loss": 1.1316, "grad_norm": 0.9151533842086792, "learning_rate": 0.00014843651437325403, "epoch": 0.26, "step": 17175 }, { "loss": 1.1847, "grad_norm": 1.389743447303772, "learning_rate": 0.00014836141900213272, "epoch": 0.26, "step": 17200 }, { "loss": 1.1785, "grad_norm": 0.9278027415275574, "learning_rate": 0.0001482863236310114, "epoch": 0.26, "step": 17225 }, { "loss": 1.1768, "grad_norm": 1.018211841583252, "learning_rate": 0.00014821122825989006, "epoch": 0.26, "step": 17250 }, { "loss": 1.1549, "grad_norm": 1.9112569093704224, "learning_rate": 0.00014813613288876875, "epoch": 0.26, "step": 17275 }, { "loss": 1.1876, "grad_norm": 1.2178176641464233, "learning_rate": 0.0001480610375176474, "epoch": 0.26, "step": 17300 }, { "loss": 1.2158, "grad_norm": 1.7924511432647705, "learning_rate": 0.0001479859421465261, "epoch": 0.26, "step": 17325 }, { "loss": 1.2083, "grad_norm": 2.1684257984161377, "learning_rate": 0.00014791084677540477, "epoch": 0.26, "step": 17350 }, { "loss": 1.1649, "grad_norm": 1.368639349937439, "learning_rate": 0.00014783575140428346, "epoch": 0.26, "step": 17375 }, { "loss": 1.2448, "grad_norm": 1.5606473684310913, "learning_rate": 0.0001477606560331621, "epoch": 0.26, "step": 17400 }, { "loss": 1.2516, "grad_norm": 1.3743770122528076, "learning_rate": 0.0001476855606620408, "epoch": 0.26, "step": 17425 }, { "loss": 1.1748, "grad_norm": 1.4341908693313599, "learning_rate": 0.00014761046529091946, "epoch": 0.26, "step": 17450 }, { "loss": 1.1752, "grad_norm": 2.299916982650757, "learning_rate": 0.00014753536991979815, "epoch": 0.26, "step": 17475 }, { "loss": 1.2068, "grad_norm": 2.3646254539489746, "learning_rate": 0.00014746027454867684, "epoch": 0.26, "step": 17500 }, { "loss": 1.171, "grad_norm": 2.4026846885681152, "learning_rate": 0.0001473851791775555, "epoch": 0.26, "step": 17525 }, { "loss": 1.2248, "grad_norm": 1.358500599861145, "learning_rate": 0.00014731008380643417, "epoch": 0.26, "step": 17550 }, { "loss": 1.2743, "grad_norm": 2.302159547805786, "learning_rate": 0.00014723498843531284, "epoch": 0.26, "step": 17575 }, { "loss": 1.1939, "grad_norm": 1.4632925987243652, "learning_rate": 0.00014715989306419153, "epoch": 0.26, "step": 17600 }, { "loss": 1.1962, "grad_norm": 3.442080020904541, "learning_rate": 0.0001470847976930702, "epoch": 0.26, "step": 17625 }, { "loss": 1.1649, "grad_norm": 0.879815936088562, "learning_rate": 0.0001470097023219489, "epoch": 0.27, "step": 17650 }, { "loss": 1.2207, "grad_norm": 1.877156376838684, "learning_rate": 0.00014693460695082758, "epoch": 0.27, "step": 17675 }, { "loss": 1.2056, "grad_norm": 1.6536662578582764, "learning_rate": 0.00014685951157970622, "epoch": 0.27, "step": 17700 }, { "loss": 1.1719, "grad_norm": 1.321970820426941, "learning_rate": 0.0001467844162085849, "epoch": 0.27, "step": 17725 }, { "loss": 1.2081, "grad_norm": 1.4853167533874512, "learning_rate": 0.00014670932083746358, "epoch": 0.27, "step": 17750 }, { "loss": 1.1692, "grad_norm": 1.9838991165161133, "learning_rate": 0.00014663422546634227, "epoch": 0.27, "step": 17775 }, { "loss": 1.1826, "grad_norm": 2.436300039291382, "learning_rate": 0.00014655913009522094, "epoch": 0.27, "step": 17800 }, { "loss": 1.1814, "grad_norm": 1.899038314819336, "learning_rate": 0.00014648403472409963, "epoch": 0.27, "step": 17825 }, { "loss": 1.2291, "grad_norm": 1.3306931257247925, "learning_rate": 0.00014640893935297827, "epoch": 0.27, "step": 17850 }, { "loss": 1.1888, "grad_norm": 1.6196904182434082, "learning_rate": 0.00014633384398185696, "epoch": 0.27, "step": 17875 }, { "loss": 1.2531, "grad_norm": 1.9150115251541138, "learning_rate": 0.00014625874861073565, "epoch": 0.27, "step": 17900 }, { "loss": 1.1236, "grad_norm": 1.7596296072006226, "learning_rate": 0.00014618365323961432, "epoch": 0.27, "step": 17925 }, { "loss": 1.264, "grad_norm": 2.536665678024292, "learning_rate": 0.000146108557868493, "epoch": 0.27, "step": 17950 }, { "loss": 1.2295, "grad_norm": 1.5203639268875122, "learning_rate": 0.00014603346249737167, "epoch": 0.27, "step": 17975 }, { "loss": 1.1534, "grad_norm": 1.316978931427002, "learning_rate": 0.00014595836712625034, "epoch": 0.27, "step": 18000 }, { "loss": 1.2754, "grad_norm": 1.4424588680267334, "learning_rate": 0.000145883271755129, "epoch": 0.27, "step": 18025 }, { "loss": 1.2349, "grad_norm": 2.4499781131744385, "learning_rate": 0.0001458081763840077, "epoch": 0.27, "step": 18050 }, { "loss": 1.1908, "grad_norm": 1.3816992044448853, "learning_rate": 0.00014573308101288636, "epoch": 0.27, "step": 18075 }, { "loss": 1.2685, "grad_norm": 1.1324695348739624, "learning_rate": 0.00014565798564176506, "epoch": 0.27, "step": 18100 }, { "loss": 1.1553, "grad_norm": 1.7215017080307007, "learning_rate": 0.00014558289027064375, "epoch": 0.27, "step": 18125 }, { "loss": 1.153, "grad_norm": 0.9789482355117798, "learning_rate": 0.00014550779489952239, "epoch": 0.27, "step": 18150 }, { "loss": 1.2484, "grad_norm": 3.6144516468048096, "learning_rate": 0.00014543269952840108, "epoch": 0.27, "step": 18175 }, { "loss": 1.1652, "grad_norm": 5.405023574829102, "learning_rate": 0.00014535760415727974, "epoch": 0.27, "step": 18200 }, { "loss": 1.1736, "grad_norm": 1.360303521156311, "learning_rate": 0.00014528250878615844, "epoch": 0.27, "step": 18225 }, { "loss": 1.1258, "grad_norm": 2.1543657779693604, "learning_rate": 0.0001452074134150371, "epoch": 0.27, "step": 18250 }, { "loss": 1.2295, "grad_norm": 1.6289156675338745, "learning_rate": 0.0001451323180439158, "epoch": 0.27, "step": 18275 }, { "loss": 1.1509, "grad_norm": 1.6996594667434692, "learning_rate": 0.00014505722267279446, "epoch": 0.27, "step": 18300 }, { "loss": 1.1466, "grad_norm": 1.9973461627960205, "learning_rate": 0.00014498212730167312, "epoch": 0.28, "step": 18325 }, { "loss": 1.1387, "grad_norm": 1.3268439769744873, "learning_rate": 0.00014490703193055182, "epoch": 0.28, "step": 18350 }, { "loss": 1.2239, "grad_norm": 1.3260868787765503, "learning_rate": 0.00014483193655943048, "epoch": 0.28, "step": 18375 }, { "loss": 1.2155, "grad_norm": 1.745481014251709, "learning_rate": 0.00014475684118830917, "epoch": 0.28, "step": 18400 }, { "loss": 1.1715, "grad_norm": 1.1252262592315674, "learning_rate": 0.00014468174581718784, "epoch": 0.28, "step": 18425 }, { "loss": 1.1727, "grad_norm": 2.9935803413391113, "learning_rate": 0.0001446066504460665, "epoch": 0.28, "step": 18450 }, { "loss": 1.1934, "grad_norm": 3.0998411178588867, "learning_rate": 0.00014453155507494517, "epoch": 0.28, "step": 18475 }, { "loss": 1.19, "grad_norm": 2.01745343208313, "learning_rate": 0.00014445645970382386, "epoch": 0.28, "step": 18500 }, { "loss": 1.1656, "grad_norm": 1.6752148866653442, "learning_rate": 0.00014438136433270253, "epoch": 0.28, "step": 18525 }, { "loss": 1.1701, "grad_norm": 1.126939058303833, "learning_rate": 0.00014430626896158122, "epoch": 0.28, "step": 18550 }, { "loss": 1.1228, "grad_norm": 1.5768241882324219, "learning_rate": 0.0001442311735904599, "epoch": 0.28, "step": 18575 }, { "loss": 1.1935, "grad_norm": 1.1016457080841064, "learning_rate": 0.00014415607821933855, "epoch": 0.28, "step": 18600 }, { "loss": 1.2472, "grad_norm": 2.9630792140960693, "learning_rate": 0.00014408098284821724, "epoch": 0.28, "step": 18625 }, { "loss": 1.191, "grad_norm": 1.2299975156784058, "learning_rate": 0.0001440058874770959, "epoch": 0.28, "step": 18650 }, { "loss": 1.1604, "grad_norm": 1.3096675872802734, "learning_rate": 0.0001439307921059746, "epoch": 0.28, "step": 18675 }, { "loss": 1.1423, "grad_norm": 2.186399459838867, "learning_rate": 0.00014385569673485327, "epoch": 0.28, "step": 18700 }, { "loss": 1.1783, "grad_norm": 1.5450773239135742, "learning_rate": 0.00014378060136373196, "epoch": 0.28, "step": 18725 }, { "loss": 1.2721, "grad_norm": 1.384564757347107, "learning_rate": 0.00014370550599261062, "epoch": 0.28, "step": 18750 }, { "loss": 1.2521, "grad_norm": 2.277376174926758, "learning_rate": 0.0001436304106214893, "epoch": 0.28, "step": 18775 }, { "loss": 1.2283, "grad_norm": 1.0917941331863403, "learning_rate": 0.00014355531525036798, "epoch": 0.28, "step": 18800 }, { "loss": 1.2139, "grad_norm": 2.3607280254364014, "learning_rate": 0.00014348021987924665, "epoch": 0.28, "step": 18825 }, { "loss": 1.2017, "grad_norm": 1.4834787845611572, "learning_rate": 0.00014340512450812534, "epoch": 0.28, "step": 18850 }, { "loss": 1.1556, "grad_norm": 1.913205623626709, "learning_rate": 0.000143330029137004, "epoch": 0.28, "step": 18875 }, { "loss": 1.1796, "grad_norm": 1.4506784677505493, "learning_rate": 0.00014325493376588267, "epoch": 0.28, "step": 18900 }, { "loss": 1.1792, "grad_norm": 1.0843782424926758, "learning_rate": 0.00014317983839476134, "epoch": 0.28, "step": 18925 }, { "loss": 1.1894, "grad_norm": 1.2553937435150146, "learning_rate": 0.00014310474302364003, "epoch": 0.28, "step": 18950 }, { "loss": 1.1944, "grad_norm": 0.9680384397506714, "learning_rate": 0.0001430296476525187, "epoch": 0.28, "step": 18975 }, { "loss": 1.2441, "grad_norm": 1.4088304042816162, "learning_rate": 0.00014295455228139739, "epoch": 0.29, "step": 19000 }, { "loss": 1.1978, "grad_norm": 1.0669535398483276, "learning_rate": 0.00014287945691027608, "epoch": 0.29, "step": 19025 }, { "loss": 1.2014, "grad_norm": 1.6889104843139648, "learning_rate": 0.00014280436153915472, "epoch": 0.29, "step": 19050 }, { "loss": 1.2006, "grad_norm": 1.6797627210617065, "learning_rate": 0.0001427292661680334, "epoch": 0.29, "step": 19075 }, { "loss": 1.203, "grad_norm": 1.4236091375350952, "learning_rate": 0.00014265417079691207, "epoch": 0.29, "step": 19100 }, { "loss": 1.1643, "grad_norm": 1.0303690433502197, "learning_rate": 0.00014257907542579077, "epoch": 0.29, "step": 19125 }, { "loss": 1.1999, "grad_norm": 1.8537395000457764, "learning_rate": 0.00014250398005466943, "epoch": 0.29, "step": 19150 }, { "loss": 1.123, "grad_norm": 1.440233588218689, "learning_rate": 0.00014242888468354812, "epoch": 0.29, "step": 19175 }, { "loss": 1.1654, "grad_norm": 2.0533230304718018, "learning_rate": 0.0001423537893124268, "epoch": 0.29, "step": 19200 }, { "loss": 1.1724, "grad_norm": 1.7699745893478394, "learning_rate": 0.00014227869394130546, "epoch": 0.29, "step": 19225 }, { "loss": 1.1701, "grad_norm": 1.248593807220459, "learning_rate": 0.00014220359857018415, "epoch": 0.29, "step": 19250 }, { "loss": 1.2097, "grad_norm": 1.6481257677078247, "learning_rate": 0.0001421285031990628, "epoch": 0.29, "step": 19275 }, { "loss": 1.1776, "grad_norm": 1.5135223865509033, "learning_rate": 0.0001420534078279415, "epoch": 0.29, "step": 19300 }, { "loss": 1.166, "grad_norm": 1.790306568145752, "learning_rate": 0.00014197831245682017, "epoch": 0.29, "step": 19325 }, { "loss": 1.1318, "grad_norm": 2.1356446743011475, "learning_rate": 0.00014190321708569884, "epoch": 0.29, "step": 19350 }, { "loss": 1.178, "grad_norm": 1.4826107025146484, "learning_rate": 0.0001418281217145775, "epoch": 0.29, "step": 19375 }, { "loss": 1.1652, "grad_norm": 1.3520580530166626, "learning_rate": 0.0001417530263434562, "epoch": 0.29, "step": 19400 }, { "loss": 1.2568, "grad_norm": 1.3266022205352783, "learning_rate": 0.0001416779309723349, "epoch": 0.29, "step": 19425 }, { "loss": 1.1697, "grad_norm": 1.5133330821990967, "learning_rate": 0.00014160283560121355, "epoch": 0.29, "step": 19450 }, { "loss": 1.1317, "grad_norm": 1.729530692100525, "learning_rate": 0.00014152774023009224, "epoch": 0.29, "step": 19475 }, { "loss": 1.1676, "grad_norm": 1.2013927698135376, "learning_rate": 0.00014145264485897088, "epoch": 0.29, "step": 19500 }, { "loss": 1.2311, "grad_norm": 1.1489402055740356, "learning_rate": 0.00014137754948784957, "epoch": 0.29, "step": 19525 }, { "loss": 1.1642, "grad_norm": 1.405923843383789, "learning_rate": 0.00014130245411672824, "epoch": 0.29, "step": 19550 }, { "loss": 1.1818, "grad_norm": 1.4068244695663452, "learning_rate": 0.00014122735874560693, "epoch": 0.29, "step": 19575 }, { "loss": 1.228, "grad_norm": 1.8172351121902466, "learning_rate": 0.0001411522633744856, "epoch": 0.29, "step": 19600 }, { "loss": 1.1981, "grad_norm": 2.907489776611328, "learning_rate": 0.0001410771680033643, "epoch": 0.29, "step": 19625 }, { "loss": 1.1957, "grad_norm": 2.162321090698242, "learning_rate": 0.00014100207263224296, "epoch": 0.3, "step": 19650 }, { "loss": 1.1492, "grad_norm": 1.433248519897461, "learning_rate": 0.00014092697726112162, "epoch": 0.3, "step": 19675 }, { "loss": 1.16, "grad_norm": 1.9054490327835083, "learning_rate": 0.0001408518818900003, "epoch": 0.3, "step": 19700 }, { "loss": 1.1988, "grad_norm": 1.7673982381820679, "learning_rate": 0.00014077678651887898, "epoch": 0.3, "step": 19725 }, { "loss": 1.2049, "grad_norm": 1.3216012716293335, "learning_rate": 0.00014070169114775767, "epoch": 0.3, "step": 19750 }, { "loss": 1.1345, "grad_norm": 1.4515612125396729, "learning_rate": 0.00014062659577663634, "epoch": 0.3, "step": 19775 }, { "loss": 1.1776, "grad_norm": 1.968056559562683, "learning_rate": 0.000140551500405515, "epoch": 0.3, "step": 19800 }, { "loss": 1.2182, "grad_norm": 1.6644461154937744, "learning_rate": 0.00014047640503439367, "epoch": 0.3, "step": 19825 }, { "loss": 1.1897, "grad_norm": 2.2730207443237305, "learning_rate": 0.00014040130966327236, "epoch": 0.3, "step": 19850 }, { "loss": 1.1552, "grad_norm": 1.038794755935669, "learning_rate": 0.00014032621429215105, "epoch": 0.3, "step": 19875 }, { "loss": 1.1796, "grad_norm": 1.4719074964523315, "learning_rate": 0.00014025111892102972, "epoch": 0.3, "step": 19900 }, { "loss": 1.2031, "grad_norm": 1.8013041019439697, "learning_rate": 0.0001401760235499084, "epoch": 0.3, "step": 19925 }, { "loss": 1.1864, "grad_norm": 2.0032236576080322, "learning_rate": 0.00014010092817878705, "epoch": 0.3, "step": 19950 }, { "loss": 1.225, "grad_norm": 2.1414427757263184, "learning_rate": 0.00014002583280766574, "epoch": 0.3, "step": 19975 }, { "loss": 1.1585, "grad_norm": 4.096096515655518, "learning_rate": 0.0001399507374365444, "epoch": 0.3, "step": 20000 }, { "loss": 1.2254, "grad_norm": 1.5664288997650146, "learning_rate": 0.0001398756420654231, "epoch": 0.3, "step": 20025 }, { "loss": 1.0905, "grad_norm": 1.7429243326187134, "learning_rate": 0.00013980054669430176, "epoch": 0.3, "step": 20050 }, { "loss": 1.1744, "grad_norm": 1.551805019378662, "learning_rate": 0.00013972545132318046, "epoch": 0.3, "step": 20075 }, { "loss": 1.1998, "grad_norm": 1.483031988143921, "learning_rate": 0.00013965035595205912, "epoch": 0.3, "step": 20100 }, { "loss": 1.1391, "grad_norm": 1.2282016277313232, "learning_rate": 0.00013957526058093779, "epoch": 0.3, "step": 20125 }, { "loss": 1.0928, "grad_norm": 1.4983934164047241, "learning_rate": 0.00013950016520981648, "epoch": 0.3, "step": 20150 }, { "loss": 1.2218, "grad_norm": 1.7510052919387817, "learning_rate": 0.00013942506983869514, "epoch": 0.3, "step": 20175 }, { "loss": 1.2014, "grad_norm": 1.6214317083358765, "learning_rate": 0.00013934997446757384, "epoch": 0.3, "step": 20200 }, { "loss": 1.2299, "grad_norm": 1.8761943578720093, "learning_rate": 0.0001392748790964525, "epoch": 0.3, "step": 20225 }, { "loss": 1.2065, "grad_norm": 2.8093338012695312, "learning_rate": 0.00013919978372533117, "epoch": 0.3, "step": 20250 }, { "loss": 1.145, "grad_norm": 1.5288567543029785, "learning_rate": 0.00013912468835420983, "epoch": 0.3, "step": 20275 }, { "loss": 1.1886, "grad_norm": 1.5765314102172852, "learning_rate": 0.00013904959298308852, "epoch": 0.3, "step": 20300 }, { "loss": 1.1761, "grad_norm": 1.0417560338974, "learning_rate": 0.00013897449761196722, "epoch": 0.31, "step": 20325 }, { "loss": 1.2366, "grad_norm": 1.2328884601593018, "learning_rate": 0.00013889940224084588, "epoch": 0.31, "step": 20350 }, { "loss": 1.1157, "grad_norm": 1.6982795000076294, "learning_rate": 0.00013882430686972458, "epoch": 0.31, "step": 20375 }, { "loss": 1.2139, "grad_norm": 1.3879860639572144, "learning_rate": 0.0001387492114986032, "epoch": 0.31, "step": 20400 }, { "loss": 1.1945, "grad_norm": 1.8985368013381958, "learning_rate": 0.0001386741161274819, "epoch": 0.31, "step": 20425 }, { "loss": 1.1541, "grad_norm": 1.1783545017242432, "learning_rate": 0.00013859902075636057, "epoch": 0.31, "step": 20450 }, { "loss": 1.1777, "grad_norm": 1.639700174331665, "learning_rate": 0.00013852392538523926, "epoch": 0.31, "step": 20475 }, { "loss": 1.1743, "grad_norm": 1.1630868911743164, "learning_rate": 0.00013844883001411796, "epoch": 0.31, "step": 20500 }, { "loss": 1.231, "grad_norm": 1.5663248300552368, "learning_rate": 0.00013837373464299662, "epoch": 0.31, "step": 20525 }, { "loss": 1.2136, "grad_norm": 1.1791601181030273, "learning_rate": 0.0001382986392718753, "epoch": 0.31, "step": 20550 }, { "loss": 1.1534, "grad_norm": 1.1631137132644653, "learning_rate": 0.00013822354390075395, "epoch": 0.31, "step": 20575 }, { "loss": 1.2065, "grad_norm": 3.0869953632354736, "learning_rate": 0.00013814844852963264, "epoch": 0.31, "step": 20600 }, { "loss": 1.1659, "grad_norm": 1.5045863389968872, "learning_rate": 0.0001380733531585113, "epoch": 0.31, "step": 20625 }, { "loss": 1.2137, "grad_norm": 1.555591344833374, "learning_rate": 0.00013799825778739, "epoch": 0.31, "step": 20650 }, { "loss": 1.1867, "grad_norm": 1.1660338640213013, "learning_rate": 0.00013792316241626867, "epoch": 0.31, "step": 20675 }, { "loss": 1.1656, "grad_norm": 1.3633331060409546, "learning_rate": 0.00013784806704514733, "epoch": 0.31, "step": 20700 }, { "loss": 1.2777, "grad_norm": 1.714920163154602, "learning_rate": 0.00013777297167402603, "epoch": 0.31, "step": 20725 }, { "loss": 1.226, "grad_norm": 2.6200525760650635, "learning_rate": 0.0001376978763029047, "epoch": 0.31, "step": 20750 }, { "loss": 1.2066, "grad_norm": 1.176538109779358, "learning_rate": 0.00013762278093178338, "epoch": 0.31, "step": 20775 }, { "loss": 1.204, "grad_norm": 1.6918548345565796, "learning_rate": 0.00013754768556066205, "epoch": 0.31, "step": 20800 }, { "loss": 1.1103, "grad_norm": 1.2101995944976807, "learning_rate": 0.00013747259018954074, "epoch": 0.31, "step": 20825 }, { "loss": 1.2484, "grad_norm": 2.0804872512817383, "learning_rate": 0.00013739749481841938, "epoch": 0.31, "step": 20850 }, { "loss": 1.2183, "grad_norm": 2.115626573562622, "learning_rate": 0.00013732239944729807, "epoch": 0.31, "step": 20875 }, { "loss": 1.1542, "grad_norm": 1.6519482135772705, "learning_rate": 0.00013724730407617674, "epoch": 0.31, "step": 20900 }, { "loss": 1.1894, "grad_norm": 2.619948625564575, "learning_rate": 0.00013717220870505543, "epoch": 0.31, "step": 20925 }, { "loss": 1.2, "grad_norm": 1.5296428203582764, "learning_rate": 0.00013709711333393412, "epoch": 0.31, "step": 20950 }, { "loss": 1.1985, "grad_norm": 2.707340717315674, "learning_rate": 0.0001370220179628128, "epoch": 0.32, "step": 20975 }, { "loss": 1.179, "grad_norm": 1.8074674606323242, "learning_rate": 0.00013694692259169145, "epoch": 0.32, "step": 21000 }, { "loss": 1.1687, "grad_norm": 1.1176238059997559, "learning_rate": 0.00013687182722057012, "epoch": 0.32, "step": 21025 }, { "loss": 1.2645, "grad_norm": 2.0191187858581543, "learning_rate": 0.0001367967318494488, "epoch": 0.32, "step": 21050 }, { "loss": 1.2807, "grad_norm": 1.368486762046814, "learning_rate": 0.00013672163647832747, "epoch": 0.32, "step": 21075 }, { "loss": 1.1935, "grad_norm": 1.582977294921875, "learning_rate": 0.00013664654110720617, "epoch": 0.32, "step": 21100 }, { "loss": 1.2249, "grad_norm": 1.6462111473083496, "learning_rate": 0.00013657144573608483, "epoch": 0.32, "step": 21125 }, { "loss": 1.1481, "grad_norm": 2.2449021339416504, "learning_rate": 0.0001364963503649635, "epoch": 0.32, "step": 21150 }, { "loss": 1.2925, "grad_norm": 1.45096755027771, "learning_rate": 0.0001364212549938422, "epoch": 0.32, "step": 21175 }, { "loss": 1.2251, "grad_norm": 1.5417848825454712, "learning_rate": 0.00013634615962272086, "epoch": 0.32, "step": 21200 }, { "loss": 1.1838, "grad_norm": 1.4828438758850098, "learning_rate": 0.00013627106425159955, "epoch": 0.32, "step": 21225 }, { "loss": 1.2017, "grad_norm": 1.9270501136779785, "learning_rate": 0.0001361959688804782, "epoch": 0.32, "step": 21250 }, { "loss": 1.167, "grad_norm": 1.438550353050232, "learning_rate": 0.0001361208735093569, "epoch": 0.32, "step": 21275 }, { "loss": 1.1553, "grad_norm": 1.860770344734192, "learning_rate": 0.00013604577813823557, "epoch": 0.32, "step": 21300 }, { "loss": 1.1355, "grad_norm": 2.12158203125, "learning_rate": 0.00013597068276711424, "epoch": 0.32, "step": 21325 }, { "loss": 1.1958, "grad_norm": 1.2415894269943237, "learning_rate": 0.0001358955873959929, "epoch": 0.32, "step": 21350 }, { "loss": 1.0986, "grad_norm": 2.1204869747161865, "learning_rate": 0.0001358204920248716, "epoch": 0.32, "step": 21375 }, { "loss": 1.1916, "grad_norm": 2.0683250427246094, "learning_rate": 0.0001357453966537503, "epoch": 0.32, "step": 21400 }, { "loss": 1.2799, "grad_norm": 1.136094331741333, "learning_rate": 0.00013567030128262895, "epoch": 0.32, "step": 21425 }, { "loss": 1.1714, "grad_norm": 2.614771842956543, "learning_rate": 0.00013559520591150762, "epoch": 0.32, "step": 21450 }, { "loss": 1.1808, "grad_norm": 1.1263775825500488, "learning_rate": 0.00013552011054038628, "epoch": 0.32, "step": 21475 }, { "loss": 1.1903, "grad_norm": 1.8330289125442505, "learning_rate": 0.00013544501516926498, "epoch": 0.32, "step": 21500 }, { "loss": 1.1863, "grad_norm": 2.0172111988067627, "learning_rate": 0.00013536991979814364, "epoch": 0.32, "step": 21525 }, { "loss": 1.2356, "grad_norm": 1.7615008354187012, "learning_rate": 0.00013529482442702233, "epoch": 0.32, "step": 21550 }, { "loss": 1.2365, "grad_norm": 3.3480842113494873, "learning_rate": 0.000135219729055901, "epoch": 0.32, "step": 21575 }, { "loss": 1.0925, "grad_norm": 1.5129296779632568, "learning_rate": 0.00013514463368477966, "epoch": 0.32, "step": 21600 }, { "loss": 1.1838, "grad_norm": 1.1446235179901123, "learning_rate": 0.00013506953831365836, "epoch": 0.32, "step": 21625 }, { "loss": 1.2593, "grad_norm": 1.2927684783935547, "learning_rate": 0.00013499444294253702, "epoch": 0.33, "step": 21650 }, { "loss": 1.1879, "grad_norm": 1.6593775749206543, "learning_rate": 0.00013491934757141571, "epoch": 0.33, "step": 21675 }, { "loss": 1.169, "grad_norm": 1.3151673078536987, "learning_rate": 0.00013484425220029438, "epoch": 0.33, "step": 21700 }, { "loss": 1.159, "grad_norm": 1.4625322818756104, "learning_rate": 0.00013476915682917307, "epoch": 0.33, "step": 21725 }, { "loss": 1.1482, "grad_norm": 1.4630295038223267, "learning_rate": 0.00013469406145805174, "epoch": 0.33, "step": 21750 }, { "loss": 1.1558, "grad_norm": 1.914694905281067, "learning_rate": 0.0001346189660869304, "epoch": 0.33, "step": 21775 }, { "loss": 1.1895, "grad_norm": 1.1685444116592407, "learning_rate": 0.0001345438707158091, "epoch": 0.33, "step": 21800 }, { "loss": 1.137, "grad_norm": 1.8522282838821411, "learning_rate": 0.00013446877534468776, "epoch": 0.33, "step": 21825 }, { "loss": 1.2181, "grad_norm": 2.1433138847351074, "learning_rate": 0.00013439367997356645, "epoch": 0.33, "step": 21850 }, { "loss": 1.1694, "grad_norm": 1.1134564876556396, "learning_rate": 0.00013431858460244512, "epoch": 0.33, "step": 21875 }, { "loss": 1.1859, "grad_norm": 0.9985026121139526, "learning_rate": 0.00013424348923132378, "epoch": 0.33, "step": 21900 }, { "loss": 1.1866, "grad_norm": 1.732964038848877, "learning_rate": 0.00013416839386020245, "epoch": 0.33, "step": 21925 }, { "loss": 1.1623, "grad_norm": 1.2273517847061157, "learning_rate": 0.00013409329848908114, "epoch": 0.33, "step": 21950 }, { "loss": 1.1336, "grad_norm": 1.2174320220947266, "learning_rate": 0.0001340182031179598, "epoch": 0.33, "step": 21975 }, { "loss": 1.1903, "grad_norm": 2.137214422225952, "learning_rate": 0.0001339431077468385, "epoch": 0.33, "step": 22000 }, { "loss": 1.214, "grad_norm": 1.2529860734939575, "learning_rate": 0.0001338680123757172, "epoch": 0.33, "step": 22025 }, { "loss": 1.1389, "grad_norm": 1.8254303932189941, "learning_rate": 0.00013379291700459583, "epoch": 0.33, "step": 22050 }, { "loss": 1.1559, "grad_norm": 2.0765380859375, "learning_rate": 0.00013371782163347452, "epoch": 0.33, "step": 22075 }, { "loss": 1.1536, "grad_norm": 1.504064917564392, "learning_rate": 0.0001336427262623532, "epoch": 0.33, "step": 22100 }, { "loss": 1.2144, "grad_norm": 2.490610122680664, "learning_rate": 0.00013356763089123188, "epoch": 0.33, "step": 22125 }, { "loss": 1.1543, "grad_norm": 1.6488279104232788, "learning_rate": 0.00013349253552011054, "epoch": 0.33, "step": 22150 }, { "loss": 1.1756, "grad_norm": 2.970743417739868, "learning_rate": 0.00013341744014898924, "epoch": 0.33, "step": 22175 }, { "loss": 1.2094, "grad_norm": 1.299083948135376, "learning_rate": 0.0001333423447778679, "epoch": 0.33, "step": 22200 }, { "loss": 1.0779, "grad_norm": 1.3857295513153076, "learning_rate": 0.00013326724940674657, "epoch": 0.33, "step": 22225 }, { "loss": 1.2177, "grad_norm": 1.7416950464248657, "learning_rate": 0.00013319215403562526, "epoch": 0.33, "step": 22250 }, { "loss": 1.179, "grad_norm": 2.380249261856079, "learning_rate": 0.00013311705866450393, "epoch": 0.33, "step": 22275 }, { "loss": 1.2057, "grad_norm": 1.3791347742080688, "learning_rate": 0.00013304196329338262, "epoch": 0.33, "step": 22300 }, { "loss": 1.1725, "grad_norm": 1.0284641981124878, "learning_rate": 0.00013296686792226128, "epoch": 0.34, "step": 22325 }, { "loss": 1.1518, "grad_norm": 2.1696279048919678, "learning_rate": 0.00013289177255113995, "epoch": 0.34, "step": 22350 }, { "loss": 1.2832, "grad_norm": 1.2163208723068237, "learning_rate": 0.0001328166771800186, "epoch": 0.34, "step": 22375 }, { "loss": 1.1366, "grad_norm": 1.724770426750183, "learning_rate": 0.0001327415818088973, "epoch": 0.34, "step": 22400 }, { "loss": 1.2067, "grad_norm": 1.9105318784713745, "learning_rate": 0.00013266648643777597, "epoch": 0.34, "step": 22425 }, { "loss": 1.1917, "grad_norm": 1.1520806550979614, "learning_rate": 0.00013259139106665466, "epoch": 0.34, "step": 22450 }, { "loss": 1.1637, "grad_norm": 1.8389378786087036, "learning_rate": 0.00013251629569553336, "epoch": 0.34, "step": 22475 }, { "loss": 1.2151, "grad_norm": 4.63606595993042, "learning_rate": 0.000132441200324412, "epoch": 0.34, "step": 22500 }, { "loss": 1.196, "grad_norm": 2.179290771484375, "learning_rate": 0.0001323661049532907, "epoch": 0.34, "step": 22525 }, { "loss": 1.158, "grad_norm": 1.1105175018310547, "learning_rate": 0.00013229100958216935, "epoch": 0.34, "step": 22550 }, { "loss": 1.1638, "grad_norm": 1.1015607118606567, "learning_rate": 0.00013221591421104804, "epoch": 0.34, "step": 22575 }, { "loss": 1.1948, "grad_norm": 1.314866304397583, "learning_rate": 0.0001321408188399267, "epoch": 0.34, "step": 22600 }, { "loss": 1.1234, "grad_norm": 1.3410804271697998, "learning_rate": 0.0001320657234688054, "epoch": 0.34, "step": 22625 }, { "loss": 1.2106, "grad_norm": 1.4340014457702637, "learning_rate": 0.00013199062809768407, "epoch": 0.34, "step": 22650 }, { "loss": 1.2023, "grad_norm": 2.40155291557312, "learning_rate": 0.00013191553272656273, "epoch": 0.34, "step": 22675 }, { "loss": 1.1545, "grad_norm": 1.752961277961731, "learning_rate": 0.00013184043735544143, "epoch": 0.34, "step": 22700 }, { "loss": 1.2083, "grad_norm": 2.0551249980926514, "learning_rate": 0.0001317653419843201, "epoch": 0.34, "step": 22725 }, { "loss": 1.2815, "grad_norm": 2.0029456615448, "learning_rate": 0.00013169024661319878, "epoch": 0.34, "step": 22750 }, { "loss": 1.1618, "grad_norm": 1.6569886207580566, "learning_rate": 0.00013161515124207745, "epoch": 0.34, "step": 22775 }, { "loss": 1.1506, "grad_norm": 1.0627089738845825, "learning_rate": 0.00013154005587095611, "epoch": 0.34, "step": 22800 }, { "loss": 1.178, "grad_norm": 1.4119595289230347, "learning_rate": 0.00013146496049983478, "epoch": 0.34, "step": 22825 }, { "loss": 1.2293, "grad_norm": 2.070948839187622, "learning_rate": 0.00013138986512871347, "epoch": 0.34, "step": 22850 }, { "loss": 1.2195, "grad_norm": 3.2543933391571045, "learning_rate": 0.00013131476975759214, "epoch": 0.34, "step": 22875 }, { "loss": 1.196, "grad_norm": 2.154444694519043, "learning_rate": 0.00013123967438647083, "epoch": 0.34, "step": 22900 }, { "loss": 1.1807, "grad_norm": 1.9498579502105713, "learning_rate": 0.00013116457901534952, "epoch": 0.34, "step": 22925 }, { "loss": 1.1659, "grad_norm": 1.2425457239151, "learning_rate": 0.00013108948364422816, "epoch": 0.34, "step": 22950 }, { "loss": 1.181, "grad_norm": 1.0989060401916504, "learning_rate": 0.00013101438827310685, "epoch": 0.35, "step": 22975 }, { "loss": 1.1095, "grad_norm": 1.509493350982666, "learning_rate": 0.00013093929290198552, "epoch": 0.35, "step": 23000 }, { "loss": 1.1686, "grad_norm": 1.762772798538208, "learning_rate": 0.0001308641975308642, "epoch": 0.35, "step": 23025 }, { "loss": 1.1062, "grad_norm": 2.1119191646575928, "learning_rate": 0.00013078910215974288, "epoch": 0.35, "step": 23050 }, { "loss": 1.1906, "grad_norm": 1.1782546043395996, "learning_rate": 0.00013071400678862157, "epoch": 0.35, "step": 23075 }, { "loss": 1.2047, "grad_norm": 1.2365734577178955, "learning_rate": 0.00013063891141750023, "epoch": 0.35, "step": 23100 }, { "loss": 1.1824, "grad_norm": 1.0874519348144531, "learning_rate": 0.0001305638160463789, "epoch": 0.35, "step": 23125 }, { "loss": 1.127, "grad_norm": 1.9339088201522827, "learning_rate": 0.0001304887206752576, "epoch": 0.35, "step": 23150 }, { "loss": 1.1529, "grad_norm": 2.087249517440796, "learning_rate": 0.00013041362530413626, "epoch": 0.35, "step": 23175 }, { "loss": 1.1736, "grad_norm": 1.0799955129623413, "learning_rate": 0.00013033852993301495, "epoch": 0.35, "step": 23200 }, { "loss": 1.1652, "grad_norm": 4.290017127990723, "learning_rate": 0.00013026343456189361, "epoch": 0.35, "step": 23225 }, { "loss": 1.1845, "grad_norm": 1.8332254886627197, "learning_rate": 0.00013018833919077228, "epoch": 0.35, "step": 23250 }, { "loss": 1.227, "grad_norm": 2.3208718299865723, "learning_rate": 0.00013011324381965094, "epoch": 0.35, "step": 23275 }, { "loss": 1.0917, "grad_norm": 1.9536670446395874, "learning_rate": 0.00013003814844852964, "epoch": 0.35, "step": 23300 }, { "loss": 1.1812, "grad_norm": 1.225029468536377, "learning_rate": 0.00012996305307740833, "epoch": 0.35, "step": 23325 }, { "loss": 1.2249, "grad_norm": 2.538161039352417, "learning_rate": 0.000129887957706287, "epoch": 0.35, "step": 23350 }, { "loss": 1.1578, "grad_norm": 1.2378344535827637, "learning_rate": 0.0001298128623351657, "epoch": 0.35, "step": 23375 }, { "loss": 1.1544, "grad_norm": 3.9860634803771973, "learning_rate": 0.00012973776696404433, "epoch": 0.35, "step": 23400 }, { "loss": 1.1704, "grad_norm": 1.1592284440994263, "learning_rate": 0.00012966267159292302, "epoch": 0.35, "step": 23425 }, { "loss": 1.2261, "grad_norm": 0.9641034603118896, "learning_rate": 0.00012958757622180168, "epoch": 0.35, "step": 23450 }, { "loss": 1.1879, "grad_norm": 2.3419320583343506, "learning_rate": 0.00012951248085068038, "epoch": 0.35, "step": 23475 }, { "loss": 1.1237, "grad_norm": 1.641772747039795, "learning_rate": 0.00012943738547955904, "epoch": 0.35, "step": 23500 }, { "loss": 1.1636, "grad_norm": 1.8921740055084229, "learning_rate": 0.00012936229010843773, "epoch": 0.35, "step": 23525 }, { "loss": 1.1919, "grad_norm": 1.5332955121994019, "learning_rate": 0.0001292871947373164, "epoch": 0.35, "step": 23550 }, { "loss": 1.1632, "grad_norm": 1.6443663835525513, "learning_rate": 0.00012921209936619506, "epoch": 0.35, "step": 23575 }, { "loss": 1.1963, "grad_norm": 2.044127941131592, "learning_rate": 0.00012913700399507376, "epoch": 0.35, "step": 23600 }, { "loss": 1.1971, "grad_norm": 2.1552951335906982, "learning_rate": 0.00012906190862395242, "epoch": 0.35, "step": 23625 }, { "loss": 1.221, "grad_norm": 1.7061282396316528, "learning_rate": 0.00012898681325283111, "epoch": 0.36, "step": 23650 }, { "loss": 1.1243, "grad_norm": 1.581986904144287, "learning_rate": 0.00012891171788170978, "epoch": 0.36, "step": 23675 }, { "loss": 1.2158, "grad_norm": 1.999489665031433, "learning_rate": 0.00012883662251058844, "epoch": 0.36, "step": 23700 }, { "loss": 1.1868, "grad_norm": 1.5865546464920044, "learning_rate": 0.0001287615271394671, "epoch": 0.36, "step": 23725 }, { "loss": 1.1772, "grad_norm": 1.1765635013580322, "learning_rate": 0.0001286864317683458, "epoch": 0.36, "step": 23750 }, { "loss": 1.1669, "grad_norm": 2.248819589614868, "learning_rate": 0.0001286113363972245, "epoch": 0.36, "step": 23775 }, { "loss": 1.1574, "grad_norm": 1.4647800922393799, "learning_rate": 0.00012853624102610316, "epoch": 0.36, "step": 23800 }, { "loss": 1.1986, "grad_norm": 1.1818993091583252, "learning_rate": 0.00012846114565498185, "epoch": 0.36, "step": 23825 }, { "loss": 1.1631, "grad_norm": 1.785582423210144, "learning_rate": 0.0001283860502838605, "epoch": 0.36, "step": 23850 }, { "loss": 1.2067, "grad_norm": 1.7691236734390259, "learning_rate": 0.00012831095491273918, "epoch": 0.36, "step": 23875 }, { "loss": 1.0843, "grad_norm": 1.4879204034805298, "learning_rate": 0.00012823585954161785, "epoch": 0.36, "step": 23900 }, { "loss": 1.1911, "grad_norm": 1.4341880083084106, "learning_rate": 0.00012816076417049654, "epoch": 0.36, "step": 23925 }, { "loss": 1.162, "grad_norm": 0.8942863345146179, "learning_rate": 0.0001280856687993752, "epoch": 0.36, "step": 23950 }, { "loss": 1.2, "grad_norm": 1.329323172569275, "learning_rate": 0.0001280105734282539, "epoch": 0.36, "step": 23975 }, { "loss": 1.1484, "grad_norm": 1.621002197265625, "learning_rate": 0.00012793547805713256, "epoch": 0.36, "step": 24000 }, { "loss": 1.181, "grad_norm": 1.8257761001586914, "learning_rate": 0.00012786038268601123, "epoch": 0.36, "step": 24025 }, { "loss": 1.1984, "grad_norm": 2.572247266769409, "learning_rate": 0.00012778528731488992, "epoch": 0.36, "step": 24050 }, { "loss": 1.1867, "grad_norm": 1.7765648365020752, "learning_rate": 0.0001277101919437686, "epoch": 0.36, "step": 24075 }, { "loss": 1.1984, "grad_norm": 1.3976967334747314, "learning_rate": 0.00012763509657264728, "epoch": 0.36, "step": 24100 }, { "loss": 1.1685, "grad_norm": 1.6491625308990479, "learning_rate": 0.00012756000120152594, "epoch": 0.36, "step": 24125 }, { "loss": 1.1497, "grad_norm": 1.698404312133789, "learning_rate": 0.0001274849058304046, "epoch": 0.36, "step": 24150 }, { "loss": 1.2076, "grad_norm": 1.2471705675125122, "learning_rate": 0.00012740981045928328, "epoch": 0.36, "step": 24175 }, { "loss": 1.1596, "grad_norm": 1.2114017009735107, "learning_rate": 0.00012733471508816197, "epoch": 0.36, "step": 24200 }, { "loss": 1.2032, "grad_norm": 1.1424446105957031, "learning_rate": 0.00012725961971704066, "epoch": 0.36, "step": 24225 }, { "loss": 1.1548, "grad_norm": 1.3526264429092407, "learning_rate": 0.00012718452434591933, "epoch": 0.36, "step": 24250 }, { "loss": 1.2415, "grad_norm": 1.2714468240737915, "learning_rate": 0.00012710942897479802, "epoch": 0.36, "step": 24275 }, { "loss": 1.1264, "grad_norm": 2.064203977584839, "learning_rate": 0.00012703433360367668, "epoch": 0.36, "step": 24300 }, { "loss": 1.1578, "grad_norm": 1.4952439069747925, "learning_rate": 0.00012695923823255535, "epoch": 0.37, "step": 24325 }, { "loss": 1.1495, "grad_norm": 1.4773337841033936, "learning_rate": 0.00012688414286143401, "epoch": 0.37, "step": 24350 }, { "loss": 1.1591, "grad_norm": 1.1870368719100952, "learning_rate": 0.0001268090474903127, "epoch": 0.37, "step": 24375 }, { "loss": 1.1744, "grad_norm": 1.824880838394165, "learning_rate": 0.00012673395211919137, "epoch": 0.37, "step": 24400 }, { "loss": 1.198, "grad_norm": 1.18766188621521, "learning_rate": 0.00012665885674807006, "epoch": 0.37, "step": 24425 }, { "loss": 1.2227, "grad_norm": 1.719905138015747, "learning_rate": 0.00012658376137694873, "epoch": 0.37, "step": 24450 }, { "loss": 1.294, "grad_norm": 1.9146957397460938, "learning_rate": 0.0001265086660058274, "epoch": 0.37, "step": 24475 }, { "loss": 1.2087, "grad_norm": 2.0763649940490723, "learning_rate": 0.0001264335706347061, "epoch": 0.37, "step": 24500 }, { "loss": 1.1887, "grad_norm": 2.3640265464782715, "learning_rate": 0.00012635847526358475, "epoch": 0.37, "step": 24525 }, { "loss": 1.2053, "grad_norm": 1.9339317083358765, "learning_rate": 0.00012628337989246344, "epoch": 0.37, "step": 24550 }, { "loss": 1.1123, "grad_norm": 1.4369031190872192, "learning_rate": 0.0001262082845213421, "epoch": 0.37, "step": 24575 }, { "loss": 1.1309, "grad_norm": 1.2952880859375, "learning_rate": 0.00012613318915022078, "epoch": 0.37, "step": 24600 }, { "loss": 1.1589, "grad_norm": 2.8487777709960938, "learning_rate": 0.00012605809377909947, "epoch": 0.37, "step": 24625 }, { "loss": 1.1054, "grad_norm": 1.1736781597137451, "learning_rate": 0.00012598299840797813, "epoch": 0.37, "step": 24650 }, { "loss": 1.1255, "grad_norm": 1.5358980894088745, "learning_rate": 0.00012590790303685683, "epoch": 0.37, "step": 24675 }, { "loss": 1.2041, "grad_norm": 2.0065975189208984, "learning_rate": 0.0001258328076657355, "epoch": 0.37, "step": 24700 }, { "loss": 1.15, "grad_norm": 1.2211554050445557, "learning_rate": 0.00012575771229461418, "epoch": 0.37, "step": 24725 }, { "loss": 1.1834, "grad_norm": 1.3376033306121826, "learning_rate": 0.00012568261692349285, "epoch": 0.37, "step": 24750 }, { "loss": 1.2355, "grad_norm": 2.8535170555114746, "learning_rate": 0.00012560752155237151, "epoch": 0.37, "step": 24775 }, { "loss": 1.1949, "grad_norm": 1.9856910705566406, "learning_rate": 0.00012553242618125018, "epoch": 0.37, "step": 24800 }, { "loss": 1.1887, "grad_norm": 2.9144210815429688, "learning_rate": 0.00012545733081012887, "epoch": 0.37, "step": 24825 }, { "loss": 1.1893, "grad_norm": 1.4913091659545898, "learning_rate": 0.00012538223543900756, "epoch": 0.37, "step": 24850 }, { "loss": 1.1173, "grad_norm": 1.685804009437561, "learning_rate": 0.00012530714006788623, "epoch": 0.37, "step": 24875 }, { "loss": 1.1303, "grad_norm": 1.3694686889648438, "learning_rate": 0.00012523504851160973, "epoch": 0.37, "step": 24900 }, { "loss": 1.2075, "grad_norm": 1.3392975330352783, "learning_rate": 0.00012515995314048842, "epoch": 0.37, "step": 24925 }, { "loss": 1.1981, "grad_norm": 1.352869987487793, "learning_rate": 0.0001250848577693671, "epoch": 0.37, "step": 24950 }, { "loss": 1.1808, "grad_norm": 1.1106911897659302, "learning_rate": 0.00012500976239824578, "epoch": 0.38, "step": 24975 }, { "loss": 1.1819, "grad_norm": 1.2609456777572632, "learning_rate": 0.00012493466702712447, "epoch": 0.38, "step": 25000 }, { "loss": 1.1571, "grad_norm": 1.3581352233886719, "learning_rate": 0.00012485957165600314, "epoch": 0.38, "step": 25025 }, { "loss": 1.2111, "grad_norm": 1.7891106605529785, "learning_rate": 0.0001247844762848818, "epoch": 0.38, "step": 25050 }, { "loss": 1.2029, "grad_norm": 2.628241539001465, "learning_rate": 0.00012470938091376047, "epoch": 0.38, "step": 25075 }, { "loss": 1.1415, "grad_norm": 1.5528656244277954, "learning_rate": 0.00012463428554263916, "epoch": 0.38, "step": 25100 }, { "loss": 1.0769, "grad_norm": 2.0100932121276855, "learning_rate": 0.00012455919017151783, "epoch": 0.38, "step": 25125 }, { "loss": 1.171, "grad_norm": 2.7479538917541504, "learning_rate": 0.00012448409480039652, "epoch": 0.38, "step": 25150 }, { "loss": 1.1868, "grad_norm": 2.177091360092163, "learning_rate": 0.00012440899942927518, "epoch": 0.38, "step": 25175 }, { "loss": 1.1472, "grad_norm": 1.9711464643478394, "learning_rate": 0.00012433390405815385, "epoch": 0.38, "step": 25200 }, { "loss": 1.1982, "grad_norm": 1.4624091386795044, "learning_rate": 0.00012425880868703254, "epoch": 0.38, "step": 25225 }, { "loss": 1.1806, "grad_norm": 1.7121859788894653, "learning_rate": 0.0001241837133159112, "epoch": 0.38, "step": 25250 }, { "loss": 1.1943, "grad_norm": 2.1174204349517822, "learning_rate": 0.0001241086179447899, "epoch": 0.38, "step": 25275 }, { "loss": 1.1742, "grad_norm": 1.2425144910812378, "learning_rate": 0.00012403352257366857, "epoch": 0.38, "step": 25300 }, { "loss": 1.1316, "grad_norm": 2.102142572402954, "learning_rate": 0.00012395842720254726, "epoch": 0.38, "step": 25325 }, { "loss": 1.1717, "grad_norm": 1.7592540979385376, "learning_rate": 0.0001238833318314259, "epoch": 0.38, "step": 25350 }, { "loss": 1.2086, "grad_norm": 1.7676315307617188, "learning_rate": 0.0001238082364603046, "epoch": 0.38, "step": 25375 }, { "loss": 1.1386, "grad_norm": 1.154153823852539, "learning_rate": 0.00012373314108918325, "epoch": 0.38, "step": 25400 }, { "loss": 1.1803, "grad_norm": 2.522324800491333, "learning_rate": 0.00012365804571806195, "epoch": 0.38, "step": 25425 }, { "loss": 1.2331, "grad_norm": 1.699385404586792, "learning_rate": 0.00012358295034694064, "epoch": 0.38, "step": 25450 }, { "loss": 1.2247, "grad_norm": 1.836391568183899, "learning_rate": 0.0001235078549758193, "epoch": 0.38, "step": 25475 }, { "loss": 1.1509, "grad_norm": 1.2097364664077759, "learning_rate": 0.00012343275960469797, "epoch": 0.38, "step": 25500 }, { "loss": 1.1488, "grad_norm": 0.8426992893218994, "learning_rate": 0.00012335766423357663, "epoch": 0.38, "step": 25525 }, { "loss": 1.1434, "grad_norm": 1.2710751295089722, "learning_rate": 0.00012328256886245533, "epoch": 0.38, "step": 25550 }, { "loss": 1.131, "grad_norm": 1.567521095275879, "learning_rate": 0.000123207473491334, "epoch": 0.38, "step": 25575 }, { "loss": 1.2268, "grad_norm": 1.6876307725906372, "learning_rate": 0.00012313237812021268, "epoch": 0.38, "step": 25600 }, { "loss": 1.178, "grad_norm": 1.5570650100708008, "learning_rate": 0.00012305728274909135, "epoch": 0.38, "step": 25625 }, { "loss": 1.1128, "grad_norm": 1.9181684255599976, "learning_rate": 0.00012298218737797002, "epoch": 0.39, "step": 25650 }, { "loss": 1.1662, "grad_norm": 1.4703614711761475, "learning_rate": 0.0001229070920068487, "epoch": 0.39, "step": 25675 }, { "loss": 1.2166, "grad_norm": 1.1674293279647827, "learning_rate": 0.00012283199663572737, "epoch": 0.39, "step": 25700 }, { "loss": 1.1962, "grad_norm": 2.910494565963745, "learning_rate": 0.00012275690126460607, "epoch": 0.39, "step": 25725 }, { "loss": 1.1996, "grad_norm": 1.249042272567749, "learning_rate": 0.00012268180589348473, "epoch": 0.39, "step": 25750 }, { "loss": 1.1962, "grad_norm": 2.1757421493530273, "learning_rate": 0.00012260671052236342, "epoch": 0.39, "step": 25775 }, { "loss": 1.1302, "grad_norm": 1.8201817274093628, "learning_rate": 0.00012253161515124206, "epoch": 0.39, "step": 25800 }, { "loss": 1.1242, "grad_norm": 1.2587064504623413, "learning_rate": 0.00012245651978012075, "epoch": 0.39, "step": 25825 }, { "loss": 1.1353, "grad_norm": 1.9519400596618652, "learning_rate": 0.00012238142440899945, "epoch": 0.39, "step": 25850 }, { "loss": 1.2128, "grad_norm": 1.997555136680603, "learning_rate": 0.0001223063290378781, "epoch": 0.39, "step": 25875 }, { "loss": 1.1383, "grad_norm": 1.9942442178726196, "learning_rate": 0.0001222312336667568, "epoch": 0.39, "step": 25900 }, { "loss": 1.1726, "grad_norm": 2.1078426837921143, "learning_rate": 0.00012215613829563547, "epoch": 0.39, "step": 25925 }, { "loss": 1.1349, "grad_norm": 2.8128950595855713, "learning_rate": 0.00012208104292451413, "epoch": 0.39, "step": 25950 }, { "loss": 1.1536, "grad_norm": 1.986128330230713, "learning_rate": 0.00012200594755339281, "epoch": 0.39, "step": 25975 }, { "loss": 1.1194, "grad_norm": 1.418022871017456, "learning_rate": 0.00012193085218227149, "epoch": 0.39, "step": 26000 }, { "loss": 1.1819, "grad_norm": 1.2267699241638184, "learning_rate": 0.00012185575681115016, "epoch": 0.39, "step": 26025 }, { "loss": 1.1222, "grad_norm": 1.4214072227478027, "learning_rate": 0.00012178066144002884, "epoch": 0.39, "step": 26050 }, { "loss": 1.2028, "grad_norm": 3.486180543899536, "learning_rate": 0.00012170556606890753, "epoch": 0.39, "step": 26075 }, { "loss": 1.1714, "grad_norm": 1.6389093399047852, "learning_rate": 0.0001216304706977862, "epoch": 0.39, "step": 26100 }, { "loss": 1.1689, "grad_norm": 1.5613031387329102, "learning_rate": 0.00012155537532666487, "epoch": 0.39, "step": 26125 }, { "loss": 1.1821, "grad_norm": 1.5050113201141357, "learning_rate": 0.00012148027995554354, "epoch": 0.39, "step": 26150 }, { "loss": 1.167, "grad_norm": 1.2190027236938477, "learning_rate": 0.00012140518458442223, "epoch": 0.39, "step": 26175 }, { "loss": 1.2042, "grad_norm": 1.0376909971237183, "learning_rate": 0.0001213300892133009, "epoch": 0.39, "step": 26200 }, { "loss": 1.1713, "grad_norm": 1.036734938621521, "learning_rate": 0.00012126100147186927, "epoch": 0.39, "step": 26225 }, { "loss": 1.1867, "grad_norm": 0.933276355266571, "learning_rate": 0.00012118890991559282, "epoch": 0.39, "step": 26250 }, { "loss": 1.1568, "grad_norm": 1.8247997760772705, "learning_rate": 0.00012111381454447148, "epoch": 0.39, "step": 26275 }, { "loss": 1.1209, "grad_norm": 1.7920253276824951, "learning_rate": 0.00012103871917335017, "epoch": 0.39, "step": 26300 }, { "loss": 1.1424, "grad_norm": 1.558129906654358, "learning_rate": 0.00012096362380222883, "epoch": 0.4, "step": 26325 }, { "loss": 1.1207, "grad_norm": 2.0236053466796875, "learning_rate": 0.00012088852843110752, "epoch": 0.4, "step": 26350 }, { "loss": 1.1367, "grad_norm": 2.042004108428955, "learning_rate": 0.00012081343305998618, "epoch": 0.4, "step": 26375 }, { "loss": 1.1823, "grad_norm": 1.694769024848938, "learning_rate": 0.00012073833768886486, "epoch": 0.4, "step": 26400 }, { "loss": 1.1536, "grad_norm": 2.398012399673462, "learning_rate": 0.00012066324231774353, "epoch": 0.4, "step": 26425 }, { "loss": 1.2019, "grad_norm": 3.8714237213134766, "learning_rate": 0.00012058814694662222, "epoch": 0.4, "step": 26450 }, { "loss": 1.2109, "grad_norm": 2.893437147140503, "learning_rate": 0.0001205130515755009, "epoch": 0.4, "step": 26475 }, { "loss": 1.2099, "grad_norm": 1.7134922742843628, "learning_rate": 0.00012043795620437956, "epoch": 0.4, "step": 26500 }, { "loss": 1.1989, "grad_norm": 2.3126907348632812, "learning_rate": 0.00012036286083325826, "epoch": 0.4, "step": 26525 }, { "loss": 1.121, "grad_norm": 2.5289969444274902, "learning_rate": 0.00012028776546213691, "epoch": 0.4, "step": 26550 }, { "loss": 1.21, "grad_norm": 2.551736354827881, "learning_rate": 0.0001202126700910156, "epoch": 0.4, "step": 26575 }, { "loss": 1.1433, "grad_norm": 1.8382607698440552, "learning_rate": 0.00012013757471989427, "epoch": 0.4, "step": 26600 }, { "loss": 1.1714, "grad_norm": 1.9856308698654175, "learning_rate": 0.00012006247934877295, "epoch": 0.4, "step": 26625 }, { "loss": 1.1646, "grad_norm": 1.3132210969924927, "learning_rate": 0.00011998738397765161, "epoch": 0.4, "step": 26650 }, { "loss": 1.1394, "grad_norm": 1.8171156644821167, "learning_rate": 0.0001199122886065303, "epoch": 0.4, "step": 26675 }, { "loss": 1.1494, "grad_norm": 1.6852163076400757, "learning_rate": 0.00011983719323540898, "epoch": 0.4, "step": 26700 }, { "loss": 1.1522, "grad_norm": 1.7947680950164795, "learning_rate": 0.00011976209786428765, "epoch": 0.4, "step": 26725 }, { "loss": 1.1794, "grad_norm": 2.0458626747131348, "learning_rate": 0.00011968700249316634, "epoch": 0.4, "step": 26750 }, { "loss": 1.2013, "grad_norm": 1.6670138835906982, "learning_rate": 0.00011961190712204499, "epoch": 0.4, "step": 26775 }, { "loss": 1.2052, "grad_norm": 1.9082565307617188, "learning_rate": 0.00011953681175092368, "epoch": 0.4, "step": 26800 }, { "loss": 1.1175, "grad_norm": 1.3584920167922974, "learning_rate": 0.00011946171637980235, "epoch": 0.4, "step": 26825 }, { "loss": 1.1264, "grad_norm": 2.0976293087005615, "learning_rate": 0.00011938662100868103, "epoch": 0.4, "step": 26850 }, { "loss": 1.2392, "grad_norm": 2.034069776535034, "learning_rate": 0.00011931152563755969, "epoch": 0.4, "step": 26875 }, { "loss": 1.1871, "grad_norm": 1.4389294385910034, "learning_rate": 0.00011923643026643839, "epoch": 0.4, "step": 26900 }, { "loss": 1.1544, "grad_norm": 1.7886531352996826, "learning_rate": 0.00011916133489531706, "epoch": 0.4, "step": 26925 }, { "loss": 1.1557, "grad_norm": 1.1227729320526123, "learning_rate": 0.00011908623952419573, "epoch": 0.4, "step": 26950 }, { "loss": 1.113, "grad_norm": 1.5332506895065308, "learning_rate": 0.00011901114415307442, "epoch": 0.41, "step": 26975 }, { "loss": 1.2323, "grad_norm": 1.5316015481948853, "learning_rate": 0.00011893604878195307, "epoch": 0.41, "step": 27000 }, { "loss": 1.1814, "grad_norm": 1.7715721130371094, "learning_rate": 0.00011886095341083177, "epoch": 0.41, "step": 27025 }, { "loss": 1.117, "grad_norm": 1.1491894721984863, "learning_rate": 0.00011878585803971043, "epoch": 0.41, "step": 27050 }, { "loss": 1.1541, "grad_norm": 1.2926276922225952, "learning_rate": 0.00011871076266858911, "epoch": 0.41, "step": 27075 }, { "loss": 1.1933, "grad_norm": 1.9231313467025757, "learning_rate": 0.0001186356672974678, "epoch": 0.41, "step": 27100 }, { "loss": 1.1409, "grad_norm": 2.1319782733917236, "learning_rate": 0.00011856057192634647, "epoch": 0.41, "step": 27125 }, { "loss": 1.2189, "grad_norm": 1.5915454626083374, "learning_rate": 0.00011848547655522515, "epoch": 0.41, "step": 27150 }, { "loss": 1.1335, "grad_norm": 1.5728384256362915, "learning_rate": 0.00011841038118410381, "epoch": 0.41, "step": 27175 }, { "loss": 1.1534, "grad_norm": 1.0829964876174927, "learning_rate": 0.0001183352858129825, "epoch": 0.41, "step": 27200 }, { "loss": 1.0959, "grad_norm": 2.1620664596557617, "learning_rate": 0.00011826019044186116, "epoch": 0.41, "step": 27225 }, { "loss": 1.1316, "grad_norm": 1.7385821342468262, "learning_rate": 0.00011818509507073985, "epoch": 0.41, "step": 27250 }, { "loss": 1.121, "grad_norm": 2.2649617195129395, "learning_rate": 0.00011810999969961851, "epoch": 0.41, "step": 27275 }, { "loss": 1.1831, "grad_norm": 1.98993980884552, "learning_rate": 0.00011803490432849719, "epoch": 0.41, "step": 27300 }, { "loss": 1.1661, "grad_norm": 1.3478261232376099, "learning_rate": 0.00011795980895737589, "epoch": 0.41, "step": 27325 }, { "loss": 1.1912, "grad_norm": 1.5697304010391235, "learning_rate": 0.00011788471358625455, "epoch": 0.41, "step": 27350 }, { "loss": 1.2364, "grad_norm": 1.7027043104171753, "learning_rate": 0.00011780961821513323, "epoch": 0.41, "step": 27375 }, { "loss": 1.1422, "grad_norm": 1.8551706075668335, "learning_rate": 0.0001177345228440119, "epoch": 0.41, "step": 27400 }, { "loss": 1.1839, "grad_norm": 1.9152601957321167, "learning_rate": 0.00011765942747289059, "epoch": 0.41, "step": 27425 }, { "loss": 1.201, "grad_norm": 2.2264368534088135, "learning_rate": 0.00011758433210176924, "epoch": 0.41, "step": 27450 }, { "loss": 1.1721, "grad_norm": 1.2363280057907104, "learning_rate": 0.00011750923673064793, "epoch": 0.41, "step": 27475 }, { "loss": 1.1533, "grad_norm": 1.1803810596466064, "learning_rate": 0.0001174341413595266, "epoch": 0.41, "step": 27500 }, { "loss": 1.1379, "grad_norm": 1.3785597085952759, "learning_rate": 0.00011735904598840528, "epoch": 0.41, "step": 27525 }, { "loss": 1.1466, "grad_norm": 1.222312331199646, "learning_rate": 0.00011728395061728397, "epoch": 0.41, "step": 27550 }, { "loss": 1.1886, "grad_norm": 1.3862022161483765, "learning_rate": 0.00011720885524616263, "epoch": 0.41, "step": 27575 }, { "loss": 1.1591, "grad_norm": 1.8599638938903809, "learning_rate": 0.00011713375987504131, "epoch": 0.41, "step": 27600 }, { "loss": 1.2353, "grad_norm": 2.57729172706604, "learning_rate": 0.00011705866450391998, "epoch": 0.41, "step": 27625 }, { "loss": 1.1376, "grad_norm": 1.734212040901184, "learning_rate": 0.00011698356913279867, "epoch": 0.42, "step": 27650 }, { "loss": 1.1869, "grad_norm": 1.5688458681106567, "learning_rate": 0.00011690847376167732, "epoch": 0.42, "step": 27675 }, { "loss": 1.2175, "grad_norm": 1.8952748775482178, "learning_rate": 0.00011683337839055601, "epoch": 0.42, "step": 27700 }, { "loss": 1.1552, "grad_norm": 1.0788073539733887, "learning_rate": 0.00011675828301943468, "epoch": 0.42, "step": 27725 }, { "loss": 1.1604, "grad_norm": 2.346510410308838, "learning_rate": 0.00011668318764831336, "epoch": 0.42, "step": 27750 }, { "loss": 1.1789, "grad_norm": 1.775448203086853, "learning_rate": 0.00011660809227719205, "epoch": 0.42, "step": 27775 }, { "loss": 1.1386, "grad_norm": 2.772287130355835, "learning_rate": 0.00011653299690607072, "epoch": 0.42, "step": 27800 }, { "loss": 1.1659, "grad_norm": 1.017460584640503, "learning_rate": 0.0001164579015349494, "epoch": 0.42, "step": 27825 }, { "loss": 1.1647, "grad_norm": 1.9251552820205688, "learning_rate": 0.00011638280616382806, "epoch": 0.42, "step": 27850 }, { "loss": 1.2273, "grad_norm": 1.44833242893219, "learning_rate": 0.00011630771079270675, "epoch": 0.42, "step": 27875 }, { "loss": 1.2112, "grad_norm": 1.3559473752975464, "learning_rate": 0.0001162326154215854, "epoch": 0.42, "step": 27900 }, { "loss": 1.219, "grad_norm": 2.958477258682251, "learning_rate": 0.0001161575200504641, "epoch": 0.42, "step": 27925 }, { "loss": 1.142, "grad_norm": 1.5322625637054443, "learning_rate": 0.00011608242467934276, "epoch": 0.42, "step": 27950 }, { "loss": 1.1681, "grad_norm": 1.0819323062896729, "learning_rate": 0.00011600732930822144, "epoch": 0.42, "step": 27975 }, { "loss": 1.1307, "grad_norm": 1.086040735244751, "learning_rate": 0.00011593223393710013, "epoch": 0.42, "step": 28000 }, { "loss": 1.1872, "grad_norm": 2.5879430770874023, "learning_rate": 0.0001158571385659788, "epoch": 0.42, "step": 28025 }, { "loss": 1.1294, "grad_norm": 1.5664997100830078, "learning_rate": 0.00011578204319485748, "epoch": 0.42, "step": 28050 }, { "loss": 1.1441, "grad_norm": 1.5841997861862183, "learning_rate": 0.00011570694782373614, "epoch": 0.42, "step": 28075 }, { "loss": 1.1663, "grad_norm": 1.0428881645202637, "learning_rate": 0.00011563185245261484, "epoch": 0.42, "step": 28100 }, { "loss": 1.1412, "grad_norm": 1.4296401739120483, "learning_rate": 0.00011555675708149349, "epoch": 0.42, "step": 28125 }, { "loss": 1.1706, "grad_norm": 1.3985766172409058, "learning_rate": 0.00011548166171037218, "epoch": 0.42, "step": 28150 }, { "loss": 1.1258, "grad_norm": 1.4393442869186401, "learning_rate": 0.00011540656633925085, "epoch": 0.42, "step": 28175 }, { "loss": 1.1365, "grad_norm": 1.9533663988113403, "learning_rate": 0.00011533147096812952, "epoch": 0.42, "step": 28200 }, { "loss": 1.1164, "grad_norm": 1.4977903366088867, "learning_rate": 0.00011525637559700822, "epoch": 0.42, "step": 28225 }, { "loss": 1.2153, "grad_norm": 1.857847809791565, "learning_rate": 0.00011518128022588688, "epoch": 0.42, "step": 28250 }, { "loss": 1.1949, "grad_norm": 1.774740219116211, "learning_rate": 0.00011510618485476556, "epoch": 0.42, "step": 28275 }, { "loss": 1.1738, "grad_norm": 1.1024271249771118, "learning_rate": 0.00011503108948364423, "epoch": 0.43, "step": 28300 }, { "loss": 1.1814, "grad_norm": 3.308375358581543, "learning_rate": 0.00011495599411252292, "epoch": 0.43, "step": 28325 }, { "loss": 1.1684, "grad_norm": 1.0142186880111694, "learning_rate": 0.00011488089874140158, "epoch": 0.43, "step": 28350 }, { "loss": 1.1109, "grad_norm": 2.34968638420105, "learning_rate": 0.00011480580337028026, "epoch": 0.43, "step": 28375 }, { "loss": 1.1584, "grad_norm": 1.1933414936065674, "learning_rate": 0.00011473070799915895, "epoch": 0.43, "step": 28400 }, { "loss": 1.1562, "grad_norm": 1.6852394342422485, "learning_rate": 0.0001146556126280376, "epoch": 0.43, "step": 28425 }, { "loss": 1.0875, "grad_norm": 1.276416301727295, "learning_rate": 0.0001145805172569163, "epoch": 0.43, "step": 28450 }, { "loss": 1.2261, "grad_norm": 2.792825937271118, "learning_rate": 0.00011450542188579496, "epoch": 0.43, "step": 28475 }, { "loss": 1.166, "grad_norm": 1.2241714000701904, "learning_rate": 0.00011443032651467364, "epoch": 0.43, "step": 28500 }, { "loss": 1.2041, "grad_norm": 1.3080065250396729, "learning_rate": 0.00011435523114355231, "epoch": 0.43, "step": 28525 }, { "loss": 1.1149, "grad_norm": 1.7479028701782227, "learning_rate": 0.000114280135772431, "epoch": 0.43, "step": 28550 }, { "loss": 1.1577, "grad_norm": 2.0920069217681885, "learning_rate": 0.00011420504040130967, "epoch": 0.43, "step": 28575 }, { "loss": 1.1439, "grad_norm": 1.1147267818450928, "learning_rate": 0.00011412994503018835, "epoch": 0.43, "step": 28600 }, { "loss": 1.1928, "grad_norm": 1.3617130517959595, "learning_rate": 0.00011405484965906704, "epoch": 0.43, "step": 28625 }, { "loss": 1.1807, "grad_norm": 1.8022890090942383, "learning_rate": 0.00011397975428794569, "epoch": 0.43, "step": 28650 }, { "loss": 1.1285, "grad_norm": 1.4314754009246826, "learning_rate": 0.00011390465891682438, "epoch": 0.43, "step": 28675 }, { "loss": 1.2854, "grad_norm": 1.2290889024734497, "learning_rate": 0.00011382956354570305, "epoch": 0.43, "step": 28700 }, { "loss": 1.1868, "grad_norm": 1.2961443662643433, "learning_rate": 0.00011375446817458173, "epoch": 0.43, "step": 28725 }, { "loss": 1.1627, "grad_norm": 1.629899501800537, "learning_rate": 0.00011367937280346039, "epoch": 0.43, "step": 28750 }, { "loss": 1.1232, "grad_norm": 1.3125689029693604, "learning_rate": 0.00011360427743233908, "epoch": 0.43, "step": 28775 }, { "loss": 1.1396, "grad_norm": 1.3124148845672607, "learning_rate": 0.00011352918206121775, "epoch": 0.43, "step": 28800 }, { "loss": 1.0702, "grad_norm": 1.1580018997192383, "learning_rate": 0.00011345408669009643, "epoch": 0.43, "step": 28825 }, { "loss": 1.1338, "grad_norm": 1.5117197036743164, "learning_rate": 0.00011337899131897512, "epoch": 0.43, "step": 28850 }, { "loss": 1.1742, "grad_norm": 1.6845176219940186, "learning_rate": 0.00011330389594785377, "epoch": 0.43, "step": 28875 }, { "loss": 1.1828, "grad_norm": 1.1892350912094116, "learning_rate": 0.00011322880057673246, "epoch": 0.43, "step": 28900 }, { "loss": 1.237, "grad_norm": 1.562537431716919, "learning_rate": 0.00011315370520561113, "epoch": 0.43, "step": 28925 }, { "loss": 1.193, "grad_norm": 1.7920253276824951, "learning_rate": 0.00011307860983448981, "epoch": 0.43, "step": 28950 }, { "loss": 1.1734, "grad_norm": 1.7338802814483643, "learning_rate": 0.00011300351446336847, "epoch": 0.44, "step": 28975 }, { "loss": 1.1254, "grad_norm": 1.6084978580474854, "learning_rate": 0.00011292841909224717, "epoch": 0.44, "step": 29000 }, { "loss": 1.1564, "grad_norm": 2.1127138137817383, "learning_rate": 0.00011285332372112583, "epoch": 0.44, "step": 29025 }, { "loss": 1.1894, "grad_norm": 2.373610019683838, "learning_rate": 0.00011277822835000451, "epoch": 0.44, "step": 29050 }, { "loss": 1.0381, "grad_norm": 1.090454339981079, "learning_rate": 0.0001127031329788832, "epoch": 0.44, "step": 29075 }, { "loss": 1.1866, "grad_norm": 1.2997491359710693, "learning_rate": 0.00011262803760776185, "epoch": 0.44, "step": 29100 }, { "loss": 1.2086, "grad_norm": 1.9946448802947998, "learning_rate": 0.00011255294223664055, "epoch": 0.44, "step": 29125 }, { "loss": 1.1127, "grad_norm": 1.276667594909668, "learning_rate": 0.00011247784686551921, "epoch": 0.44, "step": 29150 }, { "loss": 1.2735, "grad_norm": 1.8735250234603882, "learning_rate": 0.00011240275149439789, "epoch": 0.44, "step": 29175 }, { "loss": 1.1269, "grad_norm": 1.4805363416671753, "learning_rate": 0.00011232765612327656, "epoch": 0.44, "step": 29200 }, { "loss": 1.1203, "grad_norm": 1.6462610960006714, "learning_rate": 0.00011225256075215525, "epoch": 0.44, "step": 29225 }, { "loss": 1.1661, "grad_norm": 1.334406852722168, "learning_rate": 0.00011217746538103391, "epoch": 0.44, "step": 29250 }, { "loss": 1.1603, "grad_norm": 1.3393394947052002, "learning_rate": 0.0001121023700099126, "epoch": 0.44, "step": 29275 }, { "loss": 1.1295, "grad_norm": 2.316953420639038, "learning_rate": 0.00011202727463879129, "epoch": 0.44, "step": 29300 }, { "loss": 1.1566, "grad_norm": 1.7229734659194946, "learning_rate": 0.00011195217926766994, "epoch": 0.44, "step": 29325 }, { "loss": 1.1669, "grad_norm": 2.08143949508667, "learning_rate": 0.00011187708389654863, "epoch": 0.44, "step": 29350 }, { "loss": 1.1525, "grad_norm": 2.7917256355285645, "learning_rate": 0.0001118019885254273, "epoch": 0.44, "step": 29375 }, { "loss": 1.1114, "grad_norm": 1.8444219827651978, "learning_rate": 0.00011172689315430597, "epoch": 0.44, "step": 29400 }, { "loss": 1.1588, "grad_norm": 1.2194463014602661, "learning_rate": 0.00011165179778318464, "epoch": 0.44, "step": 29425 }, { "loss": 1.2405, "grad_norm": 1.1201077699661255, "learning_rate": 0.00011157670241206333, "epoch": 0.44, "step": 29450 }, { "loss": 1.2188, "grad_norm": 2.771019220352173, "learning_rate": 0.000111501607040942, "epoch": 0.44, "step": 29475 }, { "loss": 1.1978, "grad_norm": 2.0680384635925293, "learning_rate": 0.00011142651166982068, "epoch": 0.44, "step": 29500 }, { "loss": 1.1814, "grad_norm": 1.2148905992507935, "learning_rate": 0.00011135141629869937, "epoch": 0.44, "step": 29525 }, { "loss": 1.1382, "grad_norm": 1.3024623394012451, "learning_rate": 0.00011127632092757802, "epoch": 0.44, "step": 29550 }, { "loss": 1.173, "grad_norm": 1.3196483850479126, "learning_rate": 0.00011120122555645671, "epoch": 0.44, "step": 29575 }, { "loss": 1.2608, "grad_norm": 1.9761130809783936, "learning_rate": 0.00011112613018533538, "epoch": 0.44, "step": 29600 }, { "loss": 1.1977, "grad_norm": 2.152472734451294, "learning_rate": 0.00011105103481421406, "epoch": 0.44, "step": 29625 }, { "loss": 1.2164, "grad_norm": 1.2230114936828613, "learning_rate": 0.00011097593944309272, "epoch": 0.45, "step": 29650 }, { "loss": 1.1368, "grad_norm": 1.2674063444137573, "learning_rate": 0.00011090084407197141, "epoch": 0.45, "step": 29675 }, { "loss": 1.1773, "grad_norm": 1.7089192867279053, "learning_rate": 0.00011082574870085008, "epoch": 0.45, "step": 29700 }, { "loss": 1.2058, "grad_norm": 1.6862412691116333, "learning_rate": 0.00011075065332972876, "epoch": 0.45, "step": 29725 }, { "loss": 1.1363, "grad_norm": 1.8428794145584106, "learning_rate": 0.00011067555795860745, "epoch": 0.45, "step": 29750 }, { "loss": 1.1239, "grad_norm": 1.7620809078216553, "learning_rate": 0.0001106004625874861, "epoch": 0.45, "step": 29775 }, { "loss": 1.1175, "grad_norm": 2.246371269226074, "learning_rate": 0.0001105253672163648, "epoch": 0.45, "step": 29800 }, { "loss": 1.1432, "grad_norm": 1.3259189128875732, "learning_rate": 0.00011045027184524346, "epoch": 0.45, "step": 29825 }, { "loss": 1.1352, "grad_norm": 1.642720103263855, "learning_rate": 0.00011037517647412214, "epoch": 0.45, "step": 29850 }, { "loss": 1.1642, "grad_norm": 1.3091384172439575, "learning_rate": 0.0001103000811030008, "epoch": 0.45, "step": 29875 }, { "loss": 1.155, "grad_norm": 1.44764244556427, "learning_rate": 0.0001102249857318795, "epoch": 0.45, "step": 29900 }, { "loss": 1.1144, "grad_norm": 3.290072441101074, "learning_rate": 0.00011014989036075818, "epoch": 0.45, "step": 29925 }, { "loss": 1.1852, "grad_norm": 1.8344993591308594, "learning_rate": 0.00011007479498963684, "epoch": 0.45, "step": 29950 }, { "loss": 1.2341, "grad_norm": 1.0677040815353394, "learning_rate": 0.00010999969961851553, "epoch": 0.45, "step": 29975 }, { "loss": 1.1522, "grad_norm": 1.430322527885437, "learning_rate": 0.00010992460424739419, "epoch": 0.45, "step": 30000 }, { "loss": 1.1885, "grad_norm": 2.407017230987549, "learning_rate": 0.00010984950887627288, "epoch": 0.45, "step": 30025 }, { "loss": 1.2033, "grad_norm": 1.9406884908676147, "learning_rate": 0.00010977441350515154, "epoch": 0.45, "step": 30050 }, { "loss": 1.1372, "grad_norm": 2.1446497440338135, "learning_rate": 0.00010969931813403022, "epoch": 0.45, "step": 30075 }, { "loss": 1.1924, "grad_norm": 1.4735894203186035, "learning_rate": 0.00010962422276290889, "epoch": 0.45, "step": 30100 }, { "loss": 1.1923, "grad_norm": 1.4889634847640991, "learning_rate": 0.00010954912739178758, "epoch": 0.45, "step": 30125 }, { "loss": 1.1588, "grad_norm": 1.8243343830108643, "learning_rate": 0.00010947403202066626, "epoch": 0.45, "step": 30150 }, { "loss": 1.1383, "grad_norm": 1.3423229455947876, "learning_rate": 0.00010939893664954492, "epoch": 0.45, "step": 30175 }, { "loss": 1.1461, "grad_norm": 2.072646141052246, "learning_rate": 0.00010932384127842362, "epoch": 0.45, "step": 30200 }, { "loss": 1.2188, "grad_norm": 2.139387845993042, "learning_rate": 0.00010924874590730227, "epoch": 0.45, "step": 30225 }, { "loss": 1.1639, "grad_norm": 6.252641677856445, "learning_rate": 0.00010917365053618096, "epoch": 0.45, "step": 30250 }, { "loss": 1.1542, "grad_norm": 1.225797176361084, "learning_rate": 0.00010909855516505963, "epoch": 0.45, "step": 30275 }, { "loss": 1.1593, "grad_norm": 1.5084859132766724, "learning_rate": 0.0001090234597939383, "epoch": 0.46, "step": 30300 }, { "loss": 1.1765, "grad_norm": 1.0552685260772705, "learning_rate": 0.00010894836442281697, "epoch": 0.46, "step": 30325 }, { "loss": 1.1944, "grad_norm": 3.4387400150299072, "learning_rate": 0.00010887326905169566, "epoch": 0.46, "step": 30350 }, { "loss": 1.0752, "grad_norm": 1.3896501064300537, "learning_rate": 0.00010879817368057434, "epoch": 0.46, "step": 30375 }, { "loss": 1.1576, "grad_norm": 1.6324450969696045, "learning_rate": 0.00010872307830945301, "epoch": 0.46, "step": 30400 }, { "loss": 1.1854, "grad_norm": 2.059718132019043, "learning_rate": 0.0001086479829383317, "epoch": 0.46, "step": 30425 }, { "loss": 1.1918, "grad_norm": 1.7998640537261963, "learning_rate": 0.00010857288756721035, "epoch": 0.46, "step": 30450 }, { "loss": 1.186, "grad_norm": 1.9032535552978516, "learning_rate": 0.00010849779219608904, "epoch": 0.46, "step": 30475 }, { "loss": 1.1738, "grad_norm": 1.7081289291381836, "learning_rate": 0.00010842269682496771, "epoch": 0.46, "step": 30500 }, { "loss": 1.1147, "grad_norm": 1.2194355726242065, "learning_rate": 0.00010834760145384639, "epoch": 0.46, "step": 30525 }, { "loss": 1.0827, "grad_norm": 1.396530032157898, "learning_rate": 0.00010827250608272505, "epoch": 0.46, "step": 30550 }, { "loss": 1.1544, "grad_norm": 2.1990020275115967, "learning_rate": 0.00010819741071160375, "epoch": 0.46, "step": 30575 }, { "loss": 1.1533, "grad_norm": 1.4652187824249268, "learning_rate": 0.00010812231534048242, "epoch": 0.46, "step": 30600 }, { "loss": 1.1761, "grad_norm": 1.4150506258010864, "learning_rate": 0.00010804721996936109, "epoch": 0.46, "step": 30625 }, { "loss": 1.2356, "grad_norm": 1.5214896202087402, "learning_rate": 0.00010797212459823978, "epoch": 0.46, "step": 30650 }, { "loss": 1.1893, "grad_norm": 1.470495581626892, "learning_rate": 0.00010789702922711843, "epoch": 0.46, "step": 30675 }, { "loss": 1.1708, "grad_norm": 2.846820592880249, "learning_rate": 0.00010782193385599713, "epoch": 0.46, "step": 30700 }, { "loss": 1.1546, "grad_norm": 1.4119728803634644, "learning_rate": 0.00010774683848487579, "epoch": 0.46, "step": 30725 }, { "loss": 1.1822, "grad_norm": 1.4061907529830933, "learning_rate": 0.00010767174311375447, "epoch": 0.46, "step": 30750 }, { "loss": 1.1481, "grad_norm": 1.3078978061676025, "learning_rate": 0.00010759664774263314, "epoch": 0.46, "step": 30775 }, { "loss": 1.1322, "grad_norm": 2.0098421573638916, "learning_rate": 0.00010752155237151183, "epoch": 0.46, "step": 30800 }, { "loss": 1.1943, "grad_norm": 2.3420894145965576, "learning_rate": 0.00010744645700039051, "epoch": 0.46, "step": 30825 }, { "loss": 1.1382, "grad_norm": 2.183663845062256, "learning_rate": 0.00010737136162926917, "epoch": 0.46, "step": 30850 }, { "loss": 1.2107, "grad_norm": 1.6581045389175415, "learning_rate": 0.00010729626625814786, "epoch": 0.46, "step": 30875 }, { "loss": 1.2586, "grad_norm": 1.961310625076294, "learning_rate": 0.00010722117088702652, "epoch": 0.46, "step": 30900 }, { "loss": 1.1607, "grad_norm": 1.231471061706543, "learning_rate": 0.00010714607551590521, "epoch": 0.46, "step": 30925 }, { "loss": 1.1467, "grad_norm": 1.653730869293213, "learning_rate": 0.00010707098014478387, "epoch": 0.46, "step": 30950 }, { "loss": 1.2346, "grad_norm": 1.830336332321167, "learning_rate": 0.00010699588477366255, "epoch": 0.47, "step": 30975 }, { "loss": 1.174, "grad_norm": 1.4249459505081177, "learning_rate": 0.00010692078940254122, "epoch": 0.47, "step": 31000 }, { "loss": 1.1379, "grad_norm": 1.7390903234481812, "learning_rate": 0.00010684569403141991, "epoch": 0.47, "step": 31025 }, { "loss": 1.2185, "grad_norm": 1.3198795318603516, "learning_rate": 0.00010677059866029859, "epoch": 0.47, "step": 31050 }, { "loss": 1.1644, "grad_norm": 1.7585688829421997, "learning_rate": 0.00010669550328917725, "epoch": 0.47, "step": 31075 }, { "loss": 1.2051, "grad_norm": 1.4614295959472656, "learning_rate": 0.00010662040791805595, "epoch": 0.47, "step": 31100 }, { "loss": 1.0994, "grad_norm": 2.1233184337615967, "learning_rate": 0.0001065453125469346, "epoch": 0.47, "step": 31125 }, { "loss": 1.1336, "grad_norm": 2.0219411849975586, "learning_rate": 0.00010647021717581329, "epoch": 0.47, "step": 31150 }, { "loss": 1.1349, "grad_norm": 1.2599328756332397, "learning_rate": 0.00010639512180469196, "epoch": 0.47, "step": 31175 }, { "loss": 1.2062, "grad_norm": 1.209994912147522, "learning_rate": 0.00010632002643357064, "epoch": 0.47, "step": 31200 }, { "loss": 1.1555, "grad_norm": 1.5804765224456787, "learning_rate": 0.00010624493106244933, "epoch": 0.47, "step": 31225 }, { "loss": 1.2051, "grad_norm": 1.906879186630249, "learning_rate": 0.000106169835691328, "epoch": 0.47, "step": 31250 }, { "loss": 1.2132, "grad_norm": 1.4314424991607666, "learning_rate": 0.00010609474032020667, "epoch": 0.47, "step": 31275 }, { "loss": 1.2105, "grad_norm": 1.4528160095214844, "learning_rate": 0.00010601964494908534, "epoch": 0.47, "step": 31300 }, { "loss": 1.2308, "grad_norm": 1.5849334001541138, "learning_rate": 0.00010594454957796403, "epoch": 0.47, "step": 31325 }, { "loss": 1.1983, "grad_norm": 1.6990954875946045, "learning_rate": 0.0001058694542068427, "epoch": 0.47, "step": 31350 }, { "loss": 1.1251, "grad_norm": 1.3091074228286743, "learning_rate": 0.00010579435883572137, "epoch": 0.47, "step": 31375 }, { "loss": 1.1784, "grad_norm": 2.4281911849975586, "learning_rate": 0.00010571926346460004, "epoch": 0.47, "step": 31400 }, { "loss": 1.1874, "grad_norm": 1.9910012483596802, "learning_rate": 0.00010564416809347872, "epoch": 0.47, "step": 31425 }, { "loss": 1.2311, "grad_norm": 2.100861072540283, "learning_rate": 0.00010556907272235741, "epoch": 0.47, "step": 31450 }, { "loss": 1.1173, "grad_norm": 1.6685750484466553, "learning_rate": 0.00010549397735123608, "epoch": 0.47, "step": 31475 }, { "loss": 1.1874, "grad_norm": 3.5001275539398193, "learning_rate": 0.00010541888198011476, "epoch": 0.47, "step": 31500 }, { "loss": 1.1384, "grad_norm": 1.6073639392852783, "learning_rate": 0.00010534378660899342, "epoch": 0.47, "step": 31525 }, { "loss": 1.2495, "grad_norm": 1.9744518995285034, "learning_rate": 0.00010526869123787211, "epoch": 0.47, "step": 31550 }, { "loss": 1.1328, "grad_norm": 1.4878309965133667, "learning_rate": 0.00010519359586675078, "epoch": 0.47, "step": 31575 }, { "loss": 1.2093, "grad_norm": 3.258043050765991, "learning_rate": 0.00010511850049562946, "epoch": 0.47, "step": 31600 }, { "loss": 1.1802, "grad_norm": 2.012786865234375, "learning_rate": 0.00010504340512450812, "epoch": 0.47, "step": 31625 }, { "loss": 1.096, "grad_norm": 1.3581587076187134, "learning_rate": 0.0001049683097533868, "epoch": 0.48, "step": 31650 }, { "loss": 1.1548, "grad_norm": 1.2571851015090942, "learning_rate": 0.0001048932143822655, "epoch": 0.48, "step": 31675 }, { "loss": 1.1615, "grad_norm": 1.5408381223678589, "learning_rate": 0.00010481811901114416, "epoch": 0.48, "step": 31700 }, { "loss": 1.1491, "grad_norm": 2.3489863872528076, "learning_rate": 0.00010474302364002284, "epoch": 0.48, "step": 31725 }, { "loss": 1.174, "grad_norm": 1.5670727491378784, "learning_rate": 0.0001046679282689015, "epoch": 0.48, "step": 31750 }, { "loss": 1.1087, "grad_norm": 1.6657809019088745, "learning_rate": 0.0001045928328977802, "epoch": 0.48, "step": 31775 }, { "loss": 1.1351, "grad_norm": 2.1541805267333984, "learning_rate": 0.00010451773752665886, "epoch": 0.48, "step": 31800 }, { "loss": 1.0992, "grad_norm": 1.6802806854248047, "learning_rate": 0.00010444264215553754, "epoch": 0.48, "step": 31825 }, { "loss": 1.2207, "grad_norm": 1.513509750366211, "learning_rate": 0.0001043675467844162, "epoch": 0.48, "step": 31850 }, { "loss": 1.1775, "grad_norm": 1.223694920539856, "learning_rate": 0.00010429245141329488, "epoch": 0.48, "step": 31875 }, { "loss": 1.1863, "grad_norm": 1.8998793363571167, "learning_rate": 0.00010421735604217358, "epoch": 0.48, "step": 31900 }, { "loss": 1.1404, "grad_norm": 2.1678850650787354, "learning_rate": 0.00010414226067105224, "epoch": 0.48, "step": 31925 }, { "loss": 1.1979, "grad_norm": 1.7826672792434692, "learning_rate": 0.00010406716529993092, "epoch": 0.48, "step": 31950 }, { "loss": 1.2179, "grad_norm": 2.165457248687744, "learning_rate": 0.00010399206992880959, "epoch": 0.48, "step": 31975 }, { "loss": 1.1489, "grad_norm": 1.6185364723205566, "learning_rate": 0.00010391697455768828, "epoch": 0.48, "step": 32000 }, { "loss": 1.1699, "grad_norm": 1.2954517602920532, "learning_rate": 0.00010384187918656694, "epoch": 0.48, "step": 32025 }, { "loss": 1.2003, "grad_norm": 1.919216275215149, "learning_rate": 0.00010376678381544562, "epoch": 0.48, "step": 32050 }, { "loss": 1.1411, "grad_norm": 1.045401692390442, "learning_rate": 0.00010369168844432429, "epoch": 0.48, "step": 32075 }, { "loss": 1.1166, "grad_norm": 1.860318660736084, "learning_rate": 0.00010361659307320297, "epoch": 0.48, "step": 32100 }, { "loss": 1.2215, "grad_norm": 0.9368788599967957, "learning_rate": 0.00010354149770208166, "epoch": 0.48, "step": 32125 }, { "loss": 1.1697, "grad_norm": 1.7075835466384888, "learning_rate": 0.00010346640233096032, "epoch": 0.48, "step": 32150 }, { "loss": 1.1934, "grad_norm": 1.1933406591415405, "learning_rate": 0.000103391306959839, "epoch": 0.48, "step": 32175 }, { "loss": 1.2051, "grad_norm": 1.681666612625122, "learning_rate": 0.00010331621158871767, "epoch": 0.48, "step": 32200 }, { "loss": 1.1314, "grad_norm": 2.0042386054992676, "learning_rate": 0.00010324111621759636, "epoch": 0.48, "step": 32225 }, { "loss": 1.1505, "grad_norm": 1.976456880569458, "learning_rate": 0.00010316602084647503, "epoch": 0.48, "step": 32250 }, { "loss": 1.1234, "grad_norm": 1.852589726448059, "learning_rate": 0.0001030909254753537, "epoch": 0.48, "step": 32275 }, { "loss": 1.1631, "grad_norm": 1.812740445137024, "learning_rate": 0.00010301583010423237, "epoch": 0.49, "step": 32300 }, { "loss": 1.2414, "grad_norm": 1.424230694770813, "learning_rate": 0.00010294073473311105, "epoch": 0.49, "step": 32325 }, { "loss": 1.1732, "grad_norm": 1.4877756834030151, "learning_rate": 0.00010286563936198974, "epoch": 0.49, "step": 32350 }, { "loss": 1.0968, "grad_norm": 0.8852760195732117, "learning_rate": 0.00010279054399086841, "epoch": 0.49, "step": 32375 }, { "loss": 1.1685, "grad_norm": 1.217244029045105, "learning_rate": 0.00010271544861974709, "epoch": 0.49, "step": 32400 }, { "loss": 1.2013, "grad_norm": 1.0668590068817139, "learning_rate": 0.00010264035324862575, "epoch": 0.49, "step": 32425 }, { "loss": 1.2111, "grad_norm": 2.8531405925750732, "learning_rate": 0.00010256525787750444, "epoch": 0.49, "step": 32450 }, { "loss": 1.1358, "grad_norm": 1.3973661661148071, "learning_rate": 0.00010249016250638311, "epoch": 0.49, "step": 32475 }, { "loss": 1.1817, "grad_norm": 1.641974925994873, "learning_rate": 0.00010241506713526179, "epoch": 0.49, "step": 32500 }, { "loss": 1.162, "grad_norm": 1.5248854160308838, "learning_rate": 0.00010233997176414048, "epoch": 0.49, "step": 32525 }, { "loss": 1.1503, "grad_norm": 1.7267481088638306, "learning_rate": 0.00010226487639301913, "epoch": 0.49, "step": 32550 }, { "loss": 1.1459, "grad_norm": 1.0616050958633423, "learning_rate": 0.00010218978102189782, "epoch": 0.49, "step": 32575 }, { "loss": 1.0668, "grad_norm": 3.3019354343414307, "learning_rate": 0.00010211468565077649, "epoch": 0.49, "step": 32600 }, { "loss": 1.1959, "grad_norm": 0.9270702004432678, "learning_rate": 0.00010203959027965517, "epoch": 0.49, "step": 32625 }, { "loss": 1.1643, "grad_norm": 1.6093809604644775, "learning_rate": 0.00010196449490853383, "epoch": 0.49, "step": 32650 }, { "loss": 1.1398, "grad_norm": 1.0636630058288574, "learning_rate": 0.00010188939953741253, "epoch": 0.49, "step": 32675 }, { "loss": 1.1828, "grad_norm": 1.4841707944869995, "learning_rate": 0.00010181430416629119, "epoch": 0.49, "step": 32700 }, { "loss": 1.2011, "grad_norm": 1.9186432361602783, "learning_rate": 0.00010173920879516987, "epoch": 0.49, "step": 32725 }, { "loss": 1.1309, "grad_norm": 1.3214590549468994, "learning_rate": 0.00010166411342404856, "epoch": 0.49, "step": 32750 }, { "loss": 1.1553, "grad_norm": 1.2666594982147217, "learning_rate": 0.00010158901805292721, "epoch": 0.49, "step": 32775 }, { "loss": 1.1451, "grad_norm": 1.2383131980895996, "learning_rate": 0.00010151392268180591, "epoch": 0.49, "step": 32800 }, { "loss": 1.1737, "grad_norm": 1.585282564163208, "learning_rate": 0.00010143882731068457, "epoch": 0.49, "step": 32825 }, { "loss": 1.189, "grad_norm": 2.2569665908813477, "learning_rate": 0.00010136373193956325, "epoch": 0.49, "step": 32850 }, { "loss": 1.1316, "grad_norm": 1.6479202508926392, "learning_rate": 0.00010128863656844192, "epoch": 0.49, "step": 32875 }, { "loss": 1.2062, "grad_norm": 1.6843442916870117, "learning_rate": 0.00010121354119732061, "epoch": 0.49, "step": 32900 }, { "loss": 1.216, "grad_norm": 1.4396450519561768, "learning_rate": 0.00010113844582619927, "epoch": 0.49, "step": 32925 }, { "loss": 1.1504, "grad_norm": 2.33687162399292, "learning_rate": 0.00010106335045507795, "epoch": 0.49, "step": 32950 }, { "loss": 1.1324, "grad_norm": 1.071869969367981, "learning_rate": 0.00010098825508395665, "epoch": 0.5, "step": 32975 }, { "loss": 1.1709, "grad_norm": 1.5846800804138184, "learning_rate": 0.0001009131597128353, "epoch": 0.5, "step": 33000 }, { "loss": 1.1503, "grad_norm": 2.1538047790527344, "learning_rate": 0.00010083806434171399, "epoch": 0.5, "step": 33025 }, { "loss": 1.1561, "grad_norm": 1.5584303140640259, "learning_rate": 0.00010076296897059266, "epoch": 0.5, "step": 33050 }, { "loss": 1.156, "grad_norm": 1.192090392112732, "learning_rate": 0.00010068787359947133, "epoch": 0.5, "step": 33075 }, { "loss": 1.1814, "grad_norm": 1.8236268758773804, "learning_rate": 0.00010061277822835, "epoch": 0.5, "step": 33100 }, { "loss": 1.1463, "grad_norm": 1.067664384841919, "learning_rate": 0.00010053768285722869, "epoch": 0.5, "step": 33125 }, { "loss": 1.151, "grad_norm": 2.6256847381591797, "learning_rate": 0.00010046258748610736, "epoch": 0.5, "step": 33150 }, { "loss": 1.2417, "grad_norm": 2.632324695587158, "learning_rate": 0.00010038749211498604, "epoch": 0.5, "step": 33175 }, { "loss": 1.2562, "grad_norm": 1.5089225769042969, "learning_rate": 0.00010031239674386473, "epoch": 0.5, "step": 33200 }, { "loss": 1.1732, "grad_norm": 2.253978967666626, "learning_rate": 0.00010023730137274338, "epoch": 0.5, "step": 33225 }, { "loss": 1.1754, "grad_norm": 1.3430489301681519, "learning_rate": 0.00010016220600162207, "epoch": 0.5, "step": 33250 }, { "loss": 1.1398, "grad_norm": 1.406375765800476, "learning_rate": 0.00010008711063050074, "epoch": 0.5, "step": 33275 }, { "loss": 1.1073, "grad_norm": 1.3083038330078125, "learning_rate": 0.00010001201525937942, "epoch": 0.5, "step": 33300 }, { "loss": 1.1303, "grad_norm": 1.0769158601760864, "learning_rate": 9.99369198882581e-05, "epoch": 0.5, "step": 33325 }, { "loss": 1.1311, "grad_norm": 1.7924445867538452, "learning_rate": 9.986182451713677e-05, "epoch": 0.5, "step": 33350 }, { "loss": 1.1369, "grad_norm": 2.062908411026001, "learning_rate": 9.978672914601544e-05, "epoch": 0.5, "step": 33375 }, { "loss": 1.0535, "grad_norm": 1.7589771747589111, "learning_rate": 9.971163377489412e-05, "epoch": 0.5, "step": 33400 }, { "loss": 1.2129, "grad_norm": 2.6144607067108154, "learning_rate": 9.96365384037728e-05, "epoch": 0.5, "step": 33425 }, { "loss": 1.1676, "grad_norm": 1.4699029922485352, "learning_rate": 9.956144303265146e-05, "epoch": 0.5, "step": 33450 }, { "loss": 1.1679, "grad_norm": 2.1169466972351074, "learning_rate": 9.948634766153014e-05, "epoch": 0.5, "step": 33475 }, { "loss": 1.1909, "grad_norm": 2.1322262287139893, "learning_rate": 9.941125229040883e-05, "epoch": 0.5, "step": 33500 }, { "loss": 1.1367, "grad_norm": 1.2691850662231445, "learning_rate": 9.93361569192875e-05, "epoch": 0.5, "step": 33525 }, { "loss": 1.1963, "grad_norm": 1.6899739503860474, "learning_rate": 9.926106154816618e-05, "epoch": 0.5, "step": 33550 }, { "loss": 1.174, "grad_norm": 2.2241880893707275, "learning_rate": 9.918596617704486e-05, "epoch": 0.5, "step": 33575 }, { "loss": 1.0904, "grad_norm": 1.1701431274414062, "learning_rate": 9.911087080592352e-05, "epoch": 0.5, "step": 33600 }, { "loss": 1.1726, "grad_norm": 2.3160314559936523, "learning_rate": 9.90357754348022e-05, "epoch": 0.51, "step": 33625 }, { "loss": 1.2542, "grad_norm": 1.301832675933838, "learning_rate": 9.896068006368088e-05, "epoch": 0.51, "step": 33650 }, { "loss": 1.1653, "grad_norm": 2.0493037700653076, "learning_rate": 9.888558469255955e-05, "epoch": 0.51, "step": 33675 }, { "loss": 1.1477, "grad_norm": 1.5900102853775024, "learning_rate": 9.881048932143822e-05, "epoch": 0.51, "step": 33700 }, { "loss": 1.2375, "grad_norm": 1.2943110466003418, "learning_rate": 9.873539395031692e-05, "epoch": 0.51, "step": 33725 }, { "loss": 1.1826, "grad_norm": 1.2338217496871948, "learning_rate": 9.866029857919558e-05, "epoch": 0.51, "step": 33750 }, { "loss": 1.0829, "grad_norm": 1.4232094287872314, "learning_rate": 9.858520320807426e-05, "epoch": 0.51, "step": 33775 }, { "loss": 1.1526, "grad_norm": 1.8396953344345093, "learning_rate": 9.851010783695294e-05, "epoch": 0.51, "step": 33800 }, { "loss": 1.1136, "grad_norm": 2.607694625854492, "learning_rate": 9.84350124658316e-05, "epoch": 0.51, "step": 33825 }, { "loss": 1.1214, "grad_norm": 1.8624433279037476, "learning_rate": 9.835991709471028e-05, "epoch": 0.51, "step": 33850 }, { "loss": 1.1397, "grad_norm": 0.9864051938056946, "learning_rate": 9.828482172358896e-05, "epoch": 0.51, "step": 33875 }, { "loss": 1.1547, "grad_norm": 2.811202049255371, "learning_rate": 9.820972635246763e-05, "epoch": 0.51, "step": 33900 }, { "loss": 1.1279, "grad_norm": 1.2450023889541626, "learning_rate": 9.813463098134631e-05, "epoch": 0.51, "step": 33925 }, { "loss": 1.1307, "grad_norm": 1.43215811252594, "learning_rate": 9.8059535610225e-05, "epoch": 0.51, "step": 33950 }, { "loss": 1.2241, "grad_norm": 2.1191306114196777, "learning_rate": 9.798444023910366e-05, "epoch": 0.51, "step": 33975 }, { "loss": 1.0813, "grad_norm": 1.793713092803955, "learning_rate": 9.790934486798234e-05, "epoch": 0.51, "step": 34000 }, { "loss": 1.1351, "grad_norm": 1.3615442514419556, "learning_rate": 9.783424949686102e-05, "epoch": 0.51, "step": 34025 }, { "loss": 1.1081, "grad_norm": 1.8019038438796997, "learning_rate": 9.775915412573969e-05, "epoch": 0.51, "step": 34050 }, { "loss": 1.1063, "grad_norm": 2.837644100189209, "learning_rate": 9.768405875461837e-05, "epoch": 0.51, "step": 34075 }, { "loss": 1.1894, "grad_norm": 1.2097357511520386, "learning_rate": 9.760896338349705e-05, "epoch": 0.51, "step": 34100 }, { "loss": 1.1942, "grad_norm": 1.1675305366516113, "learning_rate": 9.753386801237571e-05, "epoch": 0.51, "step": 34125 }, { "loss": 1.1185, "grad_norm": 1.949704885482788, "learning_rate": 9.745877264125439e-05, "epoch": 0.51, "step": 34150 }, { "loss": 1.1904, "grad_norm": 1.5967111587524414, "learning_rate": 9.738367727013308e-05, "epoch": 0.51, "step": 34175 }, { "loss": 1.0594, "grad_norm": 0.8626694083213806, "learning_rate": 9.730858189901175e-05, "epoch": 0.51, "step": 34200 }, { "loss": 1.1637, "grad_norm": 1.7055697441101074, "learning_rate": 9.723348652789043e-05, "epoch": 0.51, "step": 34225 }, { "loss": 1.1241, "grad_norm": 1.559312105178833, "learning_rate": 9.71583911567691e-05, "epoch": 0.51, "step": 34250 }, { "loss": 1.1188, "grad_norm": 1.095895767211914, "learning_rate": 9.708329578564777e-05, "epoch": 0.51, "step": 34275 }, { "loss": 1.1647, "grad_norm": 1.85615074634552, "learning_rate": 9.700820041452645e-05, "epoch": 0.52, "step": 34300 }, { "loss": 1.2034, "grad_norm": 1.3546233177185059, "learning_rate": 9.693310504340513e-05, "epoch": 0.52, "step": 34325 }, { "loss": 1.1603, "grad_norm": 1.756169080734253, "learning_rate": 9.68580096722838e-05, "epoch": 0.52, "step": 34350 }, { "loss": 1.1961, "grad_norm": 1.8548756837844849, "learning_rate": 9.678291430116249e-05, "epoch": 0.52, "step": 34375 }, { "loss": 1.1465, "grad_norm": 1.8958889245986938, "learning_rate": 9.670781893004116e-05, "epoch": 0.52, "step": 34400 }, { "loss": 1.1634, "grad_norm": 1.0698497295379639, "learning_rate": 9.663272355891983e-05, "epoch": 0.52, "step": 34425 }, { "loss": 1.1787, "grad_norm": 1.7011641263961792, "learning_rate": 9.655762818779851e-05, "epoch": 0.52, "step": 34450 }, { "loss": 1.2188, "grad_norm": 1.8526285886764526, "learning_rate": 9.648253281667719e-05, "epoch": 0.52, "step": 34475 }, { "loss": 1.1691, "grad_norm": 1.3944551944732666, "learning_rate": 9.640743744555585e-05, "epoch": 0.52, "step": 34500 }, { "loss": 1.2114, "grad_norm": 1.9814480543136597, "learning_rate": 9.633234207443453e-05, "epoch": 0.52, "step": 34525 }, { "loss": 1.2095, "grad_norm": 2.6231672763824463, "learning_rate": 9.625724670331321e-05, "epoch": 0.52, "step": 34550 }, { "loss": 1.0963, "grad_norm": 1.4759525060653687, "learning_rate": 9.618215133219189e-05, "epoch": 0.52, "step": 34575 }, { "loss": 1.1529, "grad_norm": 1.029731273651123, "learning_rate": 9.610705596107057e-05, "epoch": 0.52, "step": 34600 }, { "loss": 1.1502, "grad_norm": 1.3262224197387695, "learning_rate": 9.603196058994925e-05, "epoch": 0.52, "step": 34625 }, { "loss": 1.1467, "grad_norm": 2.3982503414154053, "learning_rate": 9.595686521882791e-05, "epoch": 0.52, "step": 34650 }, { "loss": 1.1648, "grad_norm": 1.6530815362930298, "learning_rate": 9.588176984770659e-05, "epoch": 0.52, "step": 34675 }, { "loss": 1.1066, "grad_norm": 1.987338662147522, "learning_rate": 9.580667447658527e-05, "epoch": 0.52, "step": 34700 }, { "loss": 1.1234, "grad_norm": 1.4502911567687988, "learning_rate": 9.573157910546394e-05, "epoch": 0.52, "step": 34725 }, { "loss": 1.1992, "grad_norm": 2.197833776473999, "learning_rate": 9.565648373434261e-05, "epoch": 0.52, "step": 34750 }, { "loss": 1.1252, "grad_norm": 1.55136239528656, "learning_rate": 9.55813883632213e-05, "epoch": 0.52, "step": 34775 }, { "loss": 1.2176, "grad_norm": 3.1894094944000244, "learning_rate": 9.550629299209997e-05, "epoch": 0.52, "step": 34800 }, { "loss": 1.232, "grad_norm": 1.971354603767395, "learning_rate": 9.543119762097865e-05, "epoch": 0.52, "step": 34825 }, { "loss": 1.1468, "grad_norm": 2.7667553424835205, "learning_rate": 9.535610224985733e-05, "epoch": 0.52, "step": 34850 }, { "loss": 1.0825, "grad_norm": 1.0196588039398193, "learning_rate": 9.5281006878736e-05, "epoch": 0.52, "step": 34875 }, { "loss": 1.1179, "grad_norm": 1.2213141918182373, "learning_rate": 9.520591150761467e-05, "epoch": 0.52, "step": 34900 }, { "loss": 1.1399, "grad_norm": 1.981288194656372, "learning_rate": 9.513081613649335e-05, "epoch": 0.52, "step": 34925 }, { "loss": 1.1343, "grad_norm": 1.6537185907363892, "learning_rate": 9.505572076537202e-05, "epoch": 0.52, "step": 34950 }, { "loss": 1.0935, "grad_norm": 1.4479026794433594, "learning_rate": 9.49806253942507e-05, "epoch": 0.53, "step": 34975 }, { "loss": 1.1032, "grad_norm": 1.706716537475586, "learning_rate": 9.490553002312938e-05, "epoch": 0.53, "step": 35000 }, { "loss": 1.2447, "grad_norm": 1.594125747680664, "learning_rate": 9.483043465200806e-05, "epoch": 0.53, "step": 35025 }, { "loss": 1.1596, "grad_norm": 1.97038996219635, "learning_rate": 9.475533928088673e-05, "epoch": 0.53, "step": 35050 }, { "loss": 1.0628, "grad_norm": 1.643943428993225, "learning_rate": 9.468024390976541e-05, "epoch": 0.53, "step": 35075 }, { "loss": 1.1548, "grad_norm": 1.9907810688018799, "learning_rate": 9.460514853864408e-05, "epoch": 0.53, "step": 35100 }, { "loss": 1.1677, "grad_norm": 1.1716595888137817, "learning_rate": 9.453005316752276e-05, "epoch": 0.53, "step": 35125 }, { "loss": 1.1908, "grad_norm": 2.2323215007781982, "learning_rate": 9.445495779640144e-05, "epoch": 0.53, "step": 35150 }, { "loss": 1.1572, "grad_norm": 1.4074227809906006, "learning_rate": 9.43798624252801e-05, "epoch": 0.53, "step": 35175 }, { "loss": 1.167, "grad_norm": 4.070502281188965, "learning_rate": 9.430476705415878e-05, "epoch": 0.53, "step": 35200 }, { "loss": 1.1227, "grad_norm": 1.3812352418899536, "learning_rate": 9.422967168303746e-05, "epoch": 0.53, "step": 35225 }, { "loss": 1.1987, "grad_norm": 1.3674787282943726, "learning_rate": 9.415457631191614e-05, "epoch": 0.53, "step": 35250 }, { "loss": 1.1032, "grad_norm": 1.9738848209381104, "learning_rate": 9.407948094079482e-05, "epoch": 0.53, "step": 35275 }, { "loss": 1.1589, "grad_norm": 1.1826382875442505, "learning_rate": 9.40043855696735e-05, "epoch": 0.53, "step": 35300 }, { "loss": 1.1984, "grad_norm": 2.014425277709961, "learning_rate": 9.392929019855216e-05, "epoch": 0.53, "step": 35325 }, { "loss": 1.1509, "grad_norm": 1.1934667825698853, "learning_rate": 9.385419482743084e-05, "epoch": 0.53, "step": 35350 }, { "loss": 1.2054, "grad_norm": 1.2793216705322266, "learning_rate": 9.377909945630952e-05, "epoch": 0.53, "step": 35375 }, { "loss": 1.1462, "grad_norm": 1.46218740940094, "learning_rate": 9.370400408518818e-05, "epoch": 0.53, "step": 35400 }, { "loss": 1.09, "grad_norm": 1.2986016273498535, "learning_rate": 9.362890871406686e-05, "epoch": 0.53, "step": 35425 }, { "loss": 1.1911, "grad_norm": 1.3429057598114014, "learning_rate": 9.355381334294554e-05, "epoch": 0.53, "step": 35450 }, { "loss": 1.0894, "grad_norm": 2.130441904067993, "learning_rate": 9.347871797182422e-05, "epoch": 0.53, "step": 35475 }, { "loss": 1.0808, "grad_norm": 2.666227102279663, "learning_rate": 9.34036226007029e-05, "epoch": 0.53, "step": 35500 }, { "loss": 1.1766, "grad_norm": 2.273437023162842, "learning_rate": 9.332852722958158e-05, "epoch": 0.53, "step": 35525 }, { "loss": 1.1705, "grad_norm": 2.2733075618743896, "learning_rate": 9.325343185846024e-05, "epoch": 0.53, "step": 35550 }, { "loss": 1.1519, "grad_norm": 3.6471107006073, "learning_rate": 9.317833648733892e-05, "epoch": 0.53, "step": 35575 }, { "loss": 1.1314, "grad_norm": 1.2116317749023438, "learning_rate": 9.31032411162176e-05, "epoch": 0.53, "step": 35600 }, { "loss": 1.2151, "grad_norm": 2.057880163192749, "learning_rate": 9.302814574509627e-05, "epoch": 0.54, "step": 35625 }, { "loss": 1.104, "grad_norm": 1.8840137720108032, "learning_rate": 9.295305037397495e-05, "epoch": 0.54, "step": 35650 }, { "loss": 1.153, "grad_norm": 1.324926733970642, "learning_rate": 9.287795500285364e-05, "epoch": 0.54, "step": 35675 }, { "loss": 1.1716, "grad_norm": 1.6749731302261353, "learning_rate": 9.28028596317323e-05, "epoch": 0.54, "step": 35700 }, { "loss": 1.2032, "grad_norm": 2.625720977783203, "learning_rate": 9.272776426061098e-05, "epoch": 0.54, "step": 35725 }, { "loss": 1.1532, "grad_norm": 1.7075999975204468, "learning_rate": 9.265266888948966e-05, "epoch": 0.54, "step": 35750 }, { "loss": 1.099, "grad_norm": 2.0305819511413574, "learning_rate": 9.257757351836833e-05, "epoch": 0.54, "step": 35775 }, { "loss": 1.1765, "grad_norm": 1.1253600120544434, "learning_rate": 9.2502478147247e-05, "epoch": 0.54, "step": 35800 }, { "loss": 1.1198, "grad_norm": 1.2533643245697021, "learning_rate": 9.242738277612568e-05, "epoch": 0.54, "step": 35825 }, { "loss": 1.1672, "grad_norm": 2.331897735595703, "learning_rate": 9.235228740500435e-05, "epoch": 0.54, "step": 35850 }, { "loss": 1.1543, "grad_norm": 1.556606411933899, "learning_rate": 9.227719203388303e-05, "epoch": 0.54, "step": 35875 }, { "loss": 1.143, "grad_norm": 2.3850412368774414, "learning_rate": 9.220209666276172e-05, "epoch": 0.54, "step": 35900 }, { "loss": 1.1789, "grad_norm": 2.364520788192749, "learning_rate": 9.212700129164039e-05, "epoch": 0.54, "step": 35925 }, { "loss": 1.1408, "grad_norm": 1.6768343448638916, "learning_rate": 9.205190592051907e-05, "epoch": 0.54, "step": 35950 }, { "loss": 1.2382, "grad_norm": 1.6469035148620605, "learning_rate": 9.197681054939774e-05, "epoch": 0.54, "step": 35975 }, { "loss": 1.1741, "grad_norm": 1.1769710779190063, "learning_rate": 9.190171517827641e-05, "epoch": 0.54, "step": 36000 }, { "loss": 1.1858, "grad_norm": 1.3249248266220093, "learning_rate": 9.182661980715509e-05, "epoch": 0.54, "step": 36025 }, { "loss": 1.1672, "grad_norm": 1.8996431827545166, "learning_rate": 9.175152443603377e-05, "epoch": 0.54, "step": 36050 }, { "loss": 1.1392, "grad_norm": 1.7952135801315308, "learning_rate": 9.167642906491245e-05, "epoch": 0.54, "step": 36075 }, { "loss": 1.1508, "grad_norm": 1.642858862876892, "learning_rate": 9.160133369379111e-05, "epoch": 0.54, "step": 36100 }, { "loss": 1.1122, "grad_norm": 1.287514090538025, "learning_rate": 9.15262383226698e-05, "epoch": 0.54, "step": 36125 }, { "loss": 1.0894, "grad_norm": 1.7376103401184082, "learning_rate": 9.145114295154847e-05, "epoch": 0.54, "step": 36150 }, { "loss": 1.0989, "grad_norm": 1.9539545774459839, "learning_rate": 9.137604758042715e-05, "epoch": 0.54, "step": 36175 }, { "loss": 1.2123, "grad_norm": 1.42177414894104, "learning_rate": 9.130095220930583e-05, "epoch": 0.54, "step": 36200 }, { "loss": 1.178, "grad_norm": 1.0218976736068726, "learning_rate": 9.122585683818449e-05, "epoch": 0.54, "step": 36225 }, { "loss": 1.1677, "grad_norm": 2.1340959072113037, "learning_rate": 9.115076146706317e-05, "epoch": 0.54, "step": 36250 }, { "loss": 1.1543, "grad_norm": 1.4234435558319092, "learning_rate": 9.107866991078671e-05, "epoch": 0.54, "step": 36275 }, { "loss": 1.1862, "grad_norm": 2.7633371353149414, "learning_rate": 9.100357453966538e-05, "epoch": 0.55, "step": 36300 }, { "loss": 1.1253, "grad_norm": 1.0972270965576172, "learning_rate": 9.092847916854406e-05, "epoch": 0.55, "step": 36325 }, { "loss": 1.1722, "grad_norm": 1.9171634912490845, "learning_rate": 9.085338379742274e-05, "epoch": 0.55, "step": 36350 }, { "loss": 1.2198, "grad_norm": 1.712023138999939, "learning_rate": 9.07782884263014e-05, "epoch": 0.55, "step": 36375 }, { "loss": 1.1141, "grad_norm": 1.8943400382995605, "learning_rate": 9.070319305518008e-05, "epoch": 0.55, "step": 36400 }, { "loss": 1.14, "grad_norm": 2.2785110473632812, "learning_rate": 9.062809768405876e-05, "epoch": 0.55, "step": 36425 }, { "loss": 1.1121, "grad_norm": 1.8687163591384888, "learning_rate": 9.055300231293742e-05, "epoch": 0.55, "step": 36450 }, { "loss": 1.1212, "grad_norm": 1.2206839323043823, "learning_rate": 9.047790694181612e-05, "epoch": 0.55, "step": 36475 }, { "loss": 1.1483, "grad_norm": 1.451223611831665, "learning_rate": 9.04028115706948e-05, "epoch": 0.55, "step": 36500 }, { "loss": 1.1291, "grad_norm": 1.4097929000854492, "learning_rate": 9.032771619957346e-05, "epoch": 0.55, "step": 36525 }, { "loss": 1.212, "grad_norm": 1.2845067977905273, "learning_rate": 9.025262082845214e-05, "epoch": 0.55, "step": 36550 }, { "loss": 1.1501, "grad_norm": 1.6853928565979004, "learning_rate": 9.017752545733082e-05, "epoch": 0.55, "step": 36575 }, { "loss": 1.1752, "grad_norm": 1.6147632598876953, "learning_rate": 9.010243008620948e-05, "epoch": 0.55, "step": 36600 }, { "loss": 1.0684, "grad_norm": 1.1251403093338013, "learning_rate": 9.002733471508816e-05, "epoch": 0.55, "step": 36625 }, { "loss": 1.182, "grad_norm": 2.0793652534484863, "learning_rate": 8.995223934396684e-05, "epoch": 0.55, "step": 36650 }, { "loss": 1.1353, "grad_norm": 1.7693026065826416, "learning_rate": 8.987714397284552e-05, "epoch": 0.55, "step": 36675 }, { "loss": 1.1888, "grad_norm": 2.8078482151031494, "learning_rate": 8.98020486017242e-05, "epoch": 0.55, "step": 36700 }, { "loss": 1.1771, "grad_norm": 1.857947587966919, "learning_rate": 8.972695323060288e-05, "epoch": 0.55, "step": 36725 }, { "loss": 1.1906, "grad_norm": 1.8160065412521362, "learning_rate": 8.965185785948154e-05, "epoch": 0.55, "step": 36750 }, { "loss": 1.2188, "grad_norm": 1.113638997077942, "learning_rate": 8.957676248836022e-05, "epoch": 0.55, "step": 36775 }, { "loss": 1.1625, "grad_norm": 2.105451822280884, "learning_rate": 8.95016671172389e-05, "epoch": 0.55, "step": 36800 }, { "loss": 1.1613, "grad_norm": 2.07865309715271, "learning_rate": 8.942657174611757e-05, "epoch": 0.55, "step": 36825 }, { "loss": 1.1396, "grad_norm": 3.033181667327881, "learning_rate": 8.935147637499625e-05, "epoch": 0.55, "step": 36850 }, { "loss": 1.1236, "grad_norm": 1.4927865266799927, "learning_rate": 8.927638100387492e-05, "epoch": 0.55, "step": 36875 }, { "loss": 1.218, "grad_norm": 1.5946248769760132, "learning_rate": 8.92012856327536e-05, "epoch": 0.55, "step": 36900 }, { "loss": 1.1734, "grad_norm": 2.349677562713623, "learning_rate": 8.912619026163228e-05, "epoch": 0.55, "step": 36925 }, { "loss": 1.1605, "grad_norm": 2.3983821868896484, "learning_rate": 8.905109489051096e-05, "epoch": 0.55, "step": 36950 }, { "loss": 1.1976, "grad_norm": 2.228635787963867, "learning_rate": 8.897599951938963e-05, "epoch": 0.56, "step": 36975 }, { "loss": 1.1316, "grad_norm": 1.0619374513626099, "learning_rate": 8.89009041482683e-05, "epoch": 0.56, "step": 37000 }, { "loss": 1.0945, "grad_norm": 2.490835428237915, "learning_rate": 8.882580877714698e-05, "epoch": 0.56, "step": 37025 }, { "loss": 1.1367, "grad_norm": 1.3563780784606934, "learning_rate": 8.875071340602565e-05, "epoch": 0.56, "step": 37050 }, { "loss": 1.1431, "grad_norm": 2.2259418964385986, "learning_rate": 8.867561803490433e-05, "epoch": 0.56, "step": 37075 }, { "loss": 1.1641, "grad_norm": 1.6267977952957153, "learning_rate": 8.860052266378301e-05, "epoch": 0.56, "step": 37100 }, { "loss": 1.1515, "grad_norm": 1.1950840950012207, "learning_rate": 8.852542729266169e-05, "epoch": 0.56, "step": 37125 }, { "loss": 1.1531, "grad_norm": 2.104607343673706, "learning_rate": 8.845033192154036e-05, "epoch": 0.56, "step": 37150 }, { "loss": 1.1484, "grad_norm": 1.70024573802948, "learning_rate": 8.837523655041904e-05, "epoch": 0.56, "step": 37175 }, { "loss": 1.1501, "grad_norm": 1.2371578216552734, "learning_rate": 8.830014117929771e-05, "epoch": 0.56, "step": 37200 }, { "loss": 1.186, "grad_norm": 0.9835503101348877, "learning_rate": 8.822504580817639e-05, "epoch": 0.56, "step": 37225 }, { "loss": 1.2074, "grad_norm": 1.633745551109314, "learning_rate": 8.814995043705507e-05, "epoch": 0.56, "step": 37250 }, { "loss": 1.0625, "grad_norm": 0.9619184732437134, "learning_rate": 8.807485506593373e-05, "epoch": 0.56, "step": 37275 }, { "loss": 1.1741, "grad_norm": 2.740689516067505, "learning_rate": 8.799975969481241e-05, "epoch": 0.56, "step": 37300 }, { "loss": 1.1879, "grad_norm": 1.7593574523925781, "learning_rate": 8.79246643236911e-05, "epoch": 0.56, "step": 37325 }, { "loss": 1.1428, "grad_norm": 1.7317709922790527, "learning_rate": 8.784956895256977e-05, "epoch": 0.56, "step": 37350 }, { "loss": 1.178, "grad_norm": 2.689879894256592, "learning_rate": 8.777447358144845e-05, "epoch": 0.56, "step": 37375 }, { "loss": 1.1386, "grad_norm": 1.3778091669082642, "learning_rate": 8.769937821032713e-05, "epoch": 0.56, "step": 37400 }, { "loss": 1.1171, "grad_norm": 1.4201562404632568, "learning_rate": 8.762428283920579e-05, "epoch": 0.56, "step": 37425 }, { "loss": 1.1575, "grad_norm": 1.2890523672103882, "learning_rate": 8.754918746808447e-05, "epoch": 0.56, "step": 37450 }, { "loss": 1.1488, "grad_norm": 1.5881069898605347, "learning_rate": 8.747409209696315e-05, "epoch": 0.56, "step": 37475 }, { "loss": 1.1466, "grad_norm": 2.2226951122283936, "learning_rate": 8.739899672584181e-05, "epoch": 0.56, "step": 37500 }, { "loss": 1.2388, "grad_norm": 1.865116000175476, "learning_rate": 8.73239013547205e-05, "epoch": 0.56, "step": 37525 }, { "loss": 1.1089, "grad_norm": 1.7630786895751953, "learning_rate": 8.724880598359919e-05, "epoch": 0.56, "step": 37550 }, { "loss": 1.1414, "grad_norm": 1.465029001235962, "learning_rate": 8.717371061247785e-05, "epoch": 0.56, "step": 37575 }, { "loss": 1.1533, "grad_norm": 1.2078875303268433, "learning_rate": 8.709861524135653e-05, "epoch": 0.56, "step": 37600 }, { "loss": 1.1931, "grad_norm": 1.6650409698486328, "learning_rate": 8.702351987023521e-05, "epoch": 0.57, "step": 37625 }, { "loss": 1.2402, "grad_norm": 2.3124303817749023, "learning_rate": 8.694842449911387e-05, "epoch": 0.57, "step": 37650 }, { "loss": 1.1566, "grad_norm": 1.6543500423431396, "learning_rate": 8.687332912799255e-05, "epoch": 0.57, "step": 37675 }, { "loss": 1.1466, "grad_norm": 1.0098353624343872, "learning_rate": 8.680123757171608e-05, "epoch": 0.57, "step": 37700 }, { "loss": 1.1809, "grad_norm": 4.323051929473877, "learning_rate": 8.672614220059476e-05, "epoch": 0.57, "step": 37725 }, { "loss": 1.1547, "grad_norm": 1.610032081604004, "learning_rate": 8.665104682947344e-05, "epoch": 0.57, "step": 37750 }, { "loss": 1.1416, "grad_norm": 2.1819934844970703, "learning_rate": 8.65759514583521e-05, "epoch": 0.57, "step": 37775 }, { "loss": 1.1733, "grad_norm": 1.3869298696517944, "learning_rate": 8.650085608723078e-05, "epoch": 0.57, "step": 37800 }, { "loss": 1.1933, "grad_norm": 4.251647472381592, "learning_rate": 8.642576071610946e-05, "epoch": 0.57, "step": 37825 }, { "loss": 1.1496, "grad_norm": 1.59364914894104, "learning_rate": 8.635066534498814e-05, "epoch": 0.57, "step": 37850 }, { "loss": 1.1961, "grad_norm": 1.4821110963821411, "learning_rate": 8.62755699738668e-05, "epoch": 0.57, "step": 37875 }, { "loss": 1.1613, "grad_norm": 1.1281379461288452, "learning_rate": 8.62004746027455e-05, "epoch": 0.57, "step": 37900 }, { "loss": 1.1104, "grad_norm": 1.1649848222732544, "learning_rate": 8.612537923162416e-05, "epoch": 0.57, "step": 37925 }, { "loss": 1.1502, "grad_norm": 1.2780572175979614, "learning_rate": 8.605028386050284e-05, "epoch": 0.57, "step": 37950 }, { "loss": 1.1132, "grad_norm": 0.8712659478187561, "learning_rate": 8.597518848938152e-05, "epoch": 0.57, "step": 37975 }, { "loss": 1.1839, "grad_norm": 1.912044882774353, "learning_rate": 8.59000931182602e-05, "epoch": 0.57, "step": 38000 }, { "loss": 1.2002, "grad_norm": 1.3458526134490967, "learning_rate": 8.582499774713887e-05, "epoch": 0.57, "step": 38025 }, { "loss": 1.0894, "grad_norm": 0.9828363060951233, "learning_rate": 8.574990237601755e-05, "epoch": 0.57, "step": 38050 }, { "loss": 1.1063, "grad_norm": 1.625246286392212, "learning_rate": 8.567480700489622e-05, "epoch": 0.57, "step": 38075 }, { "loss": 1.1812, "grad_norm": 2.1737546920776367, "learning_rate": 8.559971163377489e-05, "epoch": 0.57, "step": 38100 }, { "loss": 1.2085, "grad_norm": 2.304011583328247, "learning_rate": 8.552461626265358e-05, "epoch": 0.57, "step": 38125 }, { "loss": 1.2123, "grad_norm": 2.7804811000823975, "learning_rate": 8.544952089153225e-05, "epoch": 0.57, "step": 38150 }, { "loss": 1.0834, "grad_norm": 1.5996559858322144, "learning_rate": 8.537442552041093e-05, "epoch": 0.57, "step": 38175 }, { "loss": 1.1438, "grad_norm": 2.293555736541748, "learning_rate": 8.52993301492896e-05, "epoch": 0.57, "step": 38200 }, { "loss": 1.1268, "grad_norm": 3.2344138622283936, "learning_rate": 8.522423477816828e-05, "epoch": 0.57, "step": 38225 }, { "loss": 1.1241, "grad_norm": 1.3770357370376587, "learning_rate": 8.514913940704695e-05, "epoch": 0.57, "step": 38250 }, { "loss": 1.1562, "grad_norm": 1.8360143899917603, "learning_rate": 8.507404403592563e-05, "epoch": 0.57, "step": 38275 }, { "loss": 1.0981, "grad_norm": 1.4072625637054443, "learning_rate": 8.49989486648043e-05, "epoch": 0.58, "step": 38300 }, { "loss": 1.1859, "grad_norm": 1.3473397493362427, "learning_rate": 8.492385329368299e-05, "epoch": 0.58, "step": 38325 }, { "loss": 1.1482, "grad_norm": 1.2026944160461426, "learning_rate": 8.484875792256166e-05, "epoch": 0.58, "step": 38350 }, { "loss": 1.1669, "grad_norm": 1.541365146636963, "learning_rate": 8.477366255144033e-05, "epoch": 0.58, "step": 38375 }, { "loss": 1.1388, "grad_norm": 1.4856247901916504, "learning_rate": 8.469856718031901e-05, "epoch": 0.58, "step": 38400 }, { "loss": 1.1062, "grad_norm": 1.3066248893737793, "learning_rate": 8.462347180919769e-05, "epoch": 0.58, "step": 38425 }, { "loss": 1.2007, "grad_norm": 1.4467949867248535, "learning_rate": 8.454837643807637e-05, "epoch": 0.58, "step": 38450 }, { "loss": 1.1274, "grad_norm": 1.0385124683380127, "learning_rate": 8.447328106695503e-05, "epoch": 0.58, "step": 38475 }, { "loss": 1.1398, "grad_norm": 1.994707465171814, "learning_rate": 8.439818569583371e-05, "epoch": 0.58, "step": 38500 }, { "loss": 1.1434, "grad_norm": 1.9492372274398804, "learning_rate": 8.432309032471239e-05, "epoch": 0.58, "step": 38525 }, { "loss": 1.1526, "grad_norm": 1.5071452856063843, "learning_rate": 8.424799495359107e-05, "epoch": 0.58, "step": 38550 }, { "loss": 1.2361, "grad_norm": 1.9257084131240845, "learning_rate": 8.417289958246975e-05, "epoch": 0.58, "step": 38575 }, { "loss": 1.1397, "grad_norm": 1.549302101135254, "learning_rate": 8.409780421134841e-05, "epoch": 0.58, "step": 38600 }, { "loss": 1.0881, "grad_norm": 1.1353474855422974, "learning_rate": 8.402270884022709e-05, "epoch": 0.58, "step": 38625 }, { "loss": 1.1171, "grad_norm": 1.092421293258667, "learning_rate": 8.394761346910577e-05, "epoch": 0.58, "step": 38650 }, { "loss": 1.1855, "grad_norm": 2.2004194259643555, "learning_rate": 8.387251809798445e-05, "epoch": 0.58, "step": 38675 }, { "loss": 1.1005, "grad_norm": 1.2563297748565674, "learning_rate": 8.379742272686311e-05, "epoch": 0.58, "step": 38700 }, { "loss": 1.1269, "grad_norm": 1.0468568801879883, "learning_rate": 8.37223273557418e-05, "epoch": 0.58, "step": 38725 }, { "loss": 1.1281, "grad_norm": 1.2064344882965088, "learning_rate": 8.364723198462047e-05, "epoch": 0.58, "step": 38750 }, { "loss": 1.1261, "grad_norm": 1.8252434730529785, "learning_rate": 8.357213661349915e-05, "epoch": 0.58, "step": 38775 }, { "loss": 1.1461, "grad_norm": 2.017496109008789, "learning_rate": 8.349704124237783e-05, "epoch": 0.58, "step": 38800 }, { "loss": 1.1649, "grad_norm": 2.0913796424865723, "learning_rate": 8.342494968610134e-05, "epoch": 0.58, "step": 38825 }, { "loss": 1.109, "grad_norm": 1.446608304977417, "learning_rate": 8.334985431498002e-05, "epoch": 0.58, "step": 38850 }, { "loss": 1.1402, "grad_norm": 1.3379923105239868, "learning_rate": 8.32747589438587e-05, "epoch": 0.58, "step": 38875 }, { "loss": 1.151, "grad_norm": 1.2311291694641113, "learning_rate": 8.319966357273738e-05, "epoch": 0.58, "step": 38900 }, { "loss": 1.0964, "grad_norm": 1.5600236654281616, "learning_rate": 8.312456820161606e-05, "epoch": 0.58, "step": 38925 }, { "loss": 1.2035, "grad_norm": 1.953704833984375, "learning_rate": 8.304947283049474e-05, "epoch": 0.58, "step": 38950 }, { "loss": 1.1308, "grad_norm": 2.2543299198150635, "learning_rate": 8.29743774593734e-05, "epoch": 0.59, "step": 38975 }, { "loss": 1.1265, "grad_norm": 1.1865688562393188, "learning_rate": 8.289928208825208e-05, "epoch": 0.59, "step": 39000 }, { "loss": 1.1292, "grad_norm": 5.55220890045166, "learning_rate": 8.282418671713076e-05, "epoch": 0.59, "step": 39025 }, { "loss": 1.1661, "grad_norm": 4.146172523498535, "learning_rate": 8.274909134600943e-05, "epoch": 0.59, "step": 39050 }, { "loss": 1.0739, "grad_norm": 1.3395634889602661, "learning_rate": 8.26739959748881e-05, "epoch": 0.59, "step": 39075 }, { "loss": 1.1592, "grad_norm": 1.3389477729797363, "learning_rate": 8.259890060376679e-05, "epoch": 0.59, "step": 39100 }, { "loss": 1.2078, "grad_norm": 1.6884431838989258, "learning_rate": 8.252380523264546e-05, "epoch": 0.59, "step": 39125 }, { "loss": 1.1245, "grad_norm": 2.1312379837036133, "learning_rate": 8.244870986152414e-05, "epoch": 0.59, "step": 39150 }, { "loss": 1.1295, "grad_norm": 1.0998674631118774, "learning_rate": 8.237361449040282e-05, "epoch": 0.59, "step": 39175 }, { "loss": 1.0666, "grad_norm": 1.3891421556472778, "learning_rate": 8.229851911928149e-05, "epoch": 0.59, "step": 39200 }, { "loss": 1.2052, "grad_norm": 1.069043755531311, "learning_rate": 8.222342374816017e-05, "epoch": 0.59, "step": 39225 }, { "loss": 1.1251, "grad_norm": 2.247673273086548, "learning_rate": 8.214832837703884e-05, "epoch": 0.59, "step": 39250 }, { "loss": 1.124, "grad_norm": 1.6093597412109375, "learning_rate": 8.207323300591751e-05, "epoch": 0.59, "step": 39275 }, { "loss": 1.0996, "grad_norm": 2.3855221271514893, "learning_rate": 8.199813763479619e-05, "epoch": 0.59, "step": 39300 }, { "loss": 1.1916, "grad_norm": 2.479825258255005, "learning_rate": 8.192304226367488e-05, "epoch": 0.59, "step": 39325 }, { "loss": 1.1752, "grad_norm": 1.302335262298584, "learning_rate": 8.184794689255355e-05, "epoch": 0.59, "step": 39350 }, { "loss": 1.0752, "grad_norm": 1.7336974143981934, "learning_rate": 8.177285152143223e-05, "epoch": 0.59, "step": 39375 }, { "loss": 1.1491, "grad_norm": 1.7334376573562622, "learning_rate": 8.16977561503109e-05, "epoch": 0.59, "step": 39400 }, { "loss": 1.1288, "grad_norm": 1.5370564460754395, "learning_rate": 8.162266077918957e-05, "epoch": 0.59, "step": 39425 }, { "loss": 1.1544, "grad_norm": 2.190082550048828, "learning_rate": 8.154756540806825e-05, "epoch": 0.59, "step": 39450 }, { "loss": 1.1265, "grad_norm": 1.2925649881362915, "learning_rate": 8.147247003694693e-05, "epoch": 0.59, "step": 39475 }, { "loss": 1.1175, "grad_norm": 1.5995088815689087, "learning_rate": 8.139737466582559e-05, "epoch": 0.59, "step": 39500 }, { "loss": 1.1401, "grad_norm": 2.018927812576294, "learning_rate": 8.132227929470427e-05, "epoch": 0.59, "step": 39525 }, { "loss": 1.145, "grad_norm": 2.101435661315918, "learning_rate": 8.124718392358296e-05, "epoch": 0.59, "step": 39550 }, { "loss": 1.2138, "grad_norm": 1.0594968795776367, "learning_rate": 8.117208855246163e-05, "epoch": 0.59, "step": 39575 }, { "loss": 1.1428, "grad_norm": 1.3600685596466064, "learning_rate": 8.109699318134031e-05, "epoch": 0.59, "step": 39600 }, { "loss": 1.1717, "grad_norm": 1.4228684902191162, "learning_rate": 8.102189781021899e-05, "epoch": 0.6, "step": 39625 }, { "loss": 1.1585, "grad_norm": 1.1526142358779907, "learning_rate": 8.094680243909765e-05, "epoch": 0.6, "step": 39650 }, { "loss": 1.182, "grad_norm": 1.6106703281402588, "learning_rate": 8.087170706797633e-05, "epoch": 0.6, "step": 39675 }, { "loss": 1.0861, "grad_norm": 1.4839822053909302, "learning_rate": 8.079661169685501e-05, "epoch": 0.6, "step": 39700 }, { "loss": 1.1069, "grad_norm": 1.2879067659378052, "learning_rate": 8.072151632573368e-05, "epoch": 0.6, "step": 39725 }, { "loss": 1.0987, "grad_norm": 1.0558810234069824, "learning_rate": 8.064642095461235e-05, "epoch": 0.6, "step": 39750 }, { "loss": 1.1328, "grad_norm": 1.1778608560562134, "learning_rate": 8.057132558349105e-05, "epoch": 0.6, "step": 39775 }, { "loss": 1.1309, "grad_norm": 6.927417755126953, "learning_rate": 8.049623021236971e-05, "epoch": 0.6, "step": 39800 }, { "loss": 1.0943, "grad_norm": 2.03812837600708, "learning_rate": 8.042113484124839e-05, "epoch": 0.6, "step": 39825 }, { "loss": 1.1745, "grad_norm": 1.8612788915634155, "learning_rate": 8.034603947012707e-05, "epoch": 0.6, "step": 39850 }, { "loss": 1.1406, "grad_norm": 1.7891243696212769, "learning_rate": 8.027094409900574e-05, "epoch": 0.6, "step": 39875 }, { "loss": 1.1637, "grad_norm": 1.3130542039871216, "learning_rate": 8.019584872788441e-05, "epoch": 0.6, "step": 39900 }, { "loss": 1.161, "grad_norm": 1.4459270238876343, "learning_rate": 8.012075335676309e-05, "epoch": 0.6, "step": 39925 }, { "loss": 1.0771, "grad_norm": 1.576799988746643, "learning_rate": 8.004565798564176e-05, "epoch": 0.6, "step": 39950 }, { "loss": 1.1488, "grad_norm": 1.5947468280792236, "learning_rate": 7.997056261452045e-05, "epoch": 0.6, "step": 39975 }, { "loss": 1.082, "grad_norm": 1.6659477949142456, "learning_rate": 7.989546724339913e-05, "epoch": 0.6, "step": 40000 }, { "loss": 1.1407, "grad_norm": 5.243545055389404, "learning_rate": 7.98203718722778e-05, "epoch": 0.6, "step": 40025 }, { "loss": 1.0316, "grad_norm": 1.1737557649612427, "learning_rate": 7.974527650115647e-05, "epoch": 0.6, "step": 40050 }, { "loss": 1.1271, "grad_norm": 1.7314780950546265, "learning_rate": 7.967018113003515e-05, "epoch": 0.6, "step": 40075 }, { "loss": 1.1789, "grad_norm": 2.0266127586364746, "learning_rate": 7.959508575891382e-05, "epoch": 0.6, "step": 40100 }, { "loss": 1.1694, "grad_norm": 1.2344083786010742, "learning_rate": 7.95199903877925e-05, "epoch": 0.6, "step": 40125 }, { "loss": 1.2533, "grad_norm": 1.9182096719741821, "learning_rate": 7.944489501667118e-05, "epoch": 0.6, "step": 40150 }, { "loss": 1.1746, "grad_norm": 1.2988171577453613, "learning_rate": 7.936979964554984e-05, "epoch": 0.6, "step": 40175 }, { "loss": 1.1278, "grad_norm": 1.847265601158142, "learning_rate": 7.929470427442853e-05, "epoch": 0.6, "step": 40200 }, { "loss": 1.1505, "grad_norm": 1.2763010263442993, "learning_rate": 7.921960890330721e-05, "epoch": 0.6, "step": 40225 }, { "loss": 1.1054, "grad_norm": 1.8603181838989258, "learning_rate": 7.914451353218588e-05, "epoch": 0.6, "step": 40250 }, { "loss": 1.141, "grad_norm": 1.320823073387146, "learning_rate": 7.906941816106456e-05, "epoch": 0.6, "step": 40275 }, { "loss": 1.1459, "grad_norm": 1.6374232769012451, "learning_rate": 7.899432278994324e-05, "epoch": 0.61, "step": 40300 }, { "loss": 1.1029, "grad_norm": 2.58848237991333, "learning_rate": 7.89192274188219e-05, "epoch": 0.61, "step": 40325 }, { "loss": 1.0986, "grad_norm": 2.0660908222198486, "learning_rate": 7.884413204770058e-05, "epoch": 0.61, "step": 40350 }, { "loss": 1.1133, "grad_norm": 1.2920788526535034, "learning_rate": 7.876903667657926e-05, "epoch": 0.61, "step": 40375 }, { "loss": 1.1248, "grad_norm": 1.9770618677139282, "learning_rate": 7.869394130545792e-05, "epoch": 0.61, "step": 40400 }, { "loss": 1.1426, "grad_norm": 1.2287840843200684, "learning_rate": 7.861884593433662e-05, "epoch": 0.61, "step": 40425 }, { "loss": 1.13, "grad_norm": 1.4473248720169067, "learning_rate": 7.85437505632153e-05, "epoch": 0.61, "step": 40450 }, { "loss": 1.1542, "grad_norm": 1.8491151332855225, "learning_rate": 7.846865519209396e-05, "epoch": 0.61, "step": 40475 }, { "loss": 1.1202, "grad_norm": 1.6479681730270386, "learning_rate": 7.839355982097264e-05, "epoch": 0.61, "step": 40500 }, { "loss": 1.1155, "grad_norm": 1.355073094367981, "learning_rate": 7.831846444985132e-05, "epoch": 0.61, "step": 40525 }, { "loss": 1.0916, "grad_norm": 2.459308385848999, "learning_rate": 7.824336907872998e-05, "epoch": 0.61, "step": 40550 }, { "loss": 1.0323, "grad_norm": 2.234042167663574, "learning_rate": 7.816827370760866e-05, "epoch": 0.61, "step": 40575 }, { "loss": 1.082, "grad_norm": 1.3024649620056152, "learning_rate": 7.809317833648734e-05, "epoch": 0.61, "step": 40600 }, { "loss": 1.1336, "grad_norm": 1.6423088312149048, "learning_rate": 7.801808296536602e-05, "epoch": 0.61, "step": 40625 }, { "loss": 1.1655, "grad_norm": 1.7982234954833984, "learning_rate": 7.79429875942447e-05, "epoch": 0.61, "step": 40650 }, { "loss": 1.1202, "grad_norm": 1.7683314085006714, "learning_rate": 7.786789222312338e-05, "epoch": 0.61, "step": 40675 }, { "loss": 1.148, "grad_norm": 1.1973870992660522, "learning_rate": 7.779279685200204e-05, "epoch": 0.61, "step": 40700 }, { "loss": 1.1323, "grad_norm": 1.366228461265564, "learning_rate": 7.771770148088072e-05, "epoch": 0.61, "step": 40725 }, { "loss": 1.1777, "grad_norm": 1.8791155815124512, "learning_rate": 7.76426061097594e-05, "epoch": 0.61, "step": 40750 }, { "loss": 1.2012, "grad_norm": 1.5715335607528687, "learning_rate": 7.756751073863807e-05, "epoch": 0.61, "step": 40775 }, { "loss": 1.1607, "grad_norm": 2.0917367935180664, "learning_rate": 7.749241536751674e-05, "epoch": 0.61, "step": 40800 }, { "loss": 1.1775, "grad_norm": 2.049710988998413, "learning_rate": 7.741731999639542e-05, "epoch": 0.61, "step": 40825 }, { "loss": 1.171, "grad_norm": 1.9413490295410156, "learning_rate": 7.73422246252741e-05, "epoch": 0.61, "step": 40850 }, { "loss": 1.147, "grad_norm": 1.760611891746521, "learning_rate": 7.726712925415278e-05, "epoch": 0.61, "step": 40875 }, { "loss": 1.0593, "grad_norm": 1.515251874923706, "learning_rate": 7.719203388303146e-05, "epoch": 0.61, "step": 40900 }, { "loss": 1.1917, "grad_norm": 1.9760046005249023, "learning_rate": 7.711693851191013e-05, "epoch": 0.61, "step": 40925 }, { "loss": 1.1901, "grad_norm": 2.0077996253967285, "learning_rate": 7.70418431407888e-05, "epoch": 0.62, "step": 40950 }, { "loss": 1.1352, "grad_norm": 1.6817588806152344, "learning_rate": 7.696674776966748e-05, "epoch": 0.62, "step": 40975 }, { "loss": 1.1396, "grad_norm": 1.1577296257019043, "learning_rate": 7.689165239854615e-05, "epoch": 0.62, "step": 41000 }, { "loss": 1.1017, "grad_norm": 1.2814109325408936, "learning_rate": 7.681655702742483e-05, "epoch": 0.62, "step": 41025 }, { "loss": 1.1205, "grad_norm": 2.1550331115722656, "learning_rate": 7.67414616563035e-05, "epoch": 0.62, "step": 41050 }, { "loss": 1.1016, "grad_norm": 0.8784595131874084, "learning_rate": 7.666636628518219e-05, "epoch": 0.62, "step": 41075 }, { "loss": 1.2382, "grad_norm": 1.159589171409607, "learning_rate": 7.659127091406086e-05, "epoch": 0.62, "step": 41100 }, { "loss": 1.2277, "grad_norm": 2.137002468109131, "learning_rate": 7.651617554293954e-05, "epoch": 0.62, "step": 41125 }, { "loss": 1.2025, "grad_norm": 1.5546560287475586, "learning_rate": 7.644108017181821e-05, "epoch": 0.62, "step": 41150 }, { "loss": 1.1419, "grad_norm": 1.3389211893081665, "learning_rate": 7.636598480069689e-05, "epoch": 0.62, "step": 41175 }, { "loss": 1.1326, "grad_norm": 1.7950804233551025, "learning_rate": 7.629088942957557e-05, "epoch": 0.62, "step": 41200 }, { "loss": 1.1444, "grad_norm": 6.291619777679443, "learning_rate": 7.621579405845423e-05, "epoch": 0.62, "step": 41225 }, { "loss": 1.1504, "grad_norm": 1.4547916650772095, "learning_rate": 7.614069868733291e-05, "epoch": 0.62, "step": 41250 }, { "loss": 1.2186, "grad_norm": 1.098617672920227, "learning_rate": 7.60656033162116e-05, "epoch": 0.62, "step": 41275 }, { "loss": 1.1272, "grad_norm": 2.3291661739349365, "learning_rate": 7.599050794509027e-05, "epoch": 0.62, "step": 41300 }, { "loss": 1.118, "grad_norm": 2.5848639011383057, "learning_rate": 7.591541257396895e-05, "epoch": 0.62, "step": 41325 }, { "loss": 1.0754, "grad_norm": 1.1806175708770752, "learning_rate": 7.584031720284763e-05, "epoch": 0.62, "step": 41350 }, { "loss": 1.1162, "grad_norm": 2.0705809593200684, "learning_rate": 7.576522183172629e-05, "epoch": 0.62, "step": 41375 }, { "loss": 1.1894, "grad_norm": 2.1457672119140625, "learning_rate": 7.569012646060497e-05, "epoch": 0.62, "step": 41400 }, { "loss": 1.1474, "grad_norm": 2.5769121646881104, "learning_rate": 7.561503108948365e-05, "epoch": 0.62, "step": 41425 }, { "loss": 1.1491, "grad_norm": 1.2455226182937622, "learning_rate": 7.553993571836231e-05, "epoch": 0.62, "step": 41450 }, { "loss": 1.1613, "grad_norm": 1.5082957744598389, "learning_rate": 7.546484034724099e-05, "epoch": 0.62, "step": 41475 }, { "loss": 1.132, "grad_norm": 1.4825623035430908, "learning_rate": 7.538974497611969e-05, "epoch": 0.62, "step": 41500 }, { "loss": 1.132, "grad_norm": 1.490708589553833, "learning_rate": 7.531464960499835e-05, "epoch": 0.62, "step": 41525 }, { "loss": 1.1115, "grad_norm": 1.2338794469833374, "learning_rate": 7.523955423387703e-05, "epoch": 0.62, "step": 41550 }, { "loss": 1.0703, "grad_norm": 2.4002695083618164, "learning_rate": 7.516445886275571e-05, "epoch": 0.62, "step": 41575 }, { "loss": 1.1811, "grad_norm": 1.6812978982925415, "learning_rate": 7.508936349163437e-05, "epoch": 0.62, "step": 41600 }, { "loss": 1.1221, "grad_norm": 1.1364158391952515, "learning_rate": 7.501426812051305e-05, "epoch": 0.63, "step": 41625 }, { "loss": 1.1513, "grad_norm": 1.2960102558135986, "learning_rate": 7.493917274939173e-05, "epoch": 0.63, "step": 41650 }, { "loss": 1.1192, "grad_norm": 1.6246578693389893, "learning_rate": 7.48640773782704e-05, "epoch": 0.63, "step": 41675 }, { "loss": 1.1717, "grad_norm": 3.2029411792755127, "learning_rate": 7.478898200714908e-05, "epoch": 0.63, "step": 41700 }, { "loss": 1.1804, "grad_norm": 1.4046978950500488, "learning_rate": 7.471388663602777e-05, "epoch": 0.63, "step": 41725 }, { "loss": 1.075, "grad_norm": 1.478977918624878, "learning_rate": 7.463879126490643e-05, "epoch": 0.63, "step": 41750 }, { "loss": 1.1016, "grad_norm": 1.5317085981369019, "learning_rate": 7.456369589378511e-05, "epoch": 0.63, "step": 41775 }, { "loss": 1.124, "grad_norm": 1.253780484199524, "learning_rate": 7.448860052266379e-05, "epoch": 0.63, "step": 41800 }, { "loss": 1.1664, "grad_norm": 1.669776439666748, "learning_rate": 7.441350515154246e-05, "epoch": 0.63, "step": 41825 }, { "loss": 1.2216, "grad_norm": 1.3725230693817139, "learning_rate": 7.433840978042114e-05, "epoch": 0.63, "step": 41850 }, { "loss": 1.1648, "grad_norm": 1.3599203824996948, "learning_rate": 7.426331440929981e-05, "epoch": 0.63, "step": 41875 }, { "loss": 1.1791, "grad_norm": 0.8592632412910461, "learning_rate": 7.418821903817848e-05, "epoch": 0.63, "step": 41900 }, { "loss": 1.0981, "grad_norm": 2.0452094078063965, "learning_rate": 7.411312366705717e-05, "epoch": 0.63, "step": 41925 }, { "loss": 1.1431, "grad_norm": 2.367089033126831, "learning_rate": 7.403802829593585e-05, "epoch": 0.63, "step": 41950 }, { "loss": 1.1168, "grad_norm": 1.2674223184585571, "learning_rate": 7.396293292481452e-05, "epoch": 0.63, "step": 41975 }, { "loss": 1.1096, "grad_norm": 1.589810848236084, "learning_rate": 7.38878375536932e-05, "epoch": 0.63, "step": 42000 }, { "loss": 1.1267, "grad_norm": 1.9553534984588623, "learning_rate": 7.381274218257187e-05, "epoch": 0.63, "step": 42025 }, { "loss": 1.1687, "grad_norm": 1.646224021911621, "learning_rate": 7.373764681145054e-05, "epoch": 0.63, "step": 42050 }, { "loss": 1.1368, "grad_norm": 1.515450119972229, "learning_rate": 7.366255144032922e-05, "epoch": 0.63, "step": 42075 }, { "loss": 1.1289, "grad_norm": 1.4205098152160645, "learning_rate": 7.35874560692079e-05, "epoch": 0.63, "step": 42100 }, { "loss": 1.0966, "grad_norm": 1.0404484272003174, "learning_rate": 7.351236069808656e-05, "epoch": 0.63, "step": 42125 }, { "loss": 1.0713, "grad_norm": 1.615555763244629, "learning_rate": 7.343726532696525e-05, "epoch": 0.63, "step": 42150 }, { "loss": 1.1928, "grad_norm": 1.5871440172195435, "learning_rate": 7.336216995584393e-05, "epoch": 0.63, "step": 42175 }, { "loss": 1.1243, "grad_norm": 1.4905834197998047, "learning_rate": 7.32870745847226e-05, "epoch": 0.63, "step": 42200 }, { "loss": 1.1623, "grad_norm": 1.8793108463287354, "learning_rate": 7.321197921360128e-05, "epoch": 0.63, "step": 42225 }, { "loss": 1.1306, "grad_norm": 1.7370017766952515, "learning_rate": 7.313688384247996e-05, "epoch": 0.63, "step": 42250 }, { "loss": 1.1043, "grad_norm": 1.2322642803192139, "learning_rate": 7.306178847135862e-05, "epoch": 0.63, "step": 42275 }, { "loss": 1.1336, "grad_norm": 1.9522937536239624, "learning_rate": 7.29866931002373e-05, "epoch": 0.64, "step": 42300 }, { "loss": 1.2031, "grad_norm": 1.7146880626678467, "learning_rate": 7.291159772911598e-05, "epoch": 0.64, "step": 42325 }, { "loss": 1.0689, "grad_norm": 1.353827953338623, "learning_rate": 7.283650235799465e-05, "epoch": 0.64, "step": 42350 }, { "loss": 1.046, "grad_norm": 1.0300394296646118, "learning_rate": 7.276140698687334e-05, "epoch": 0.64, "step": 42375 }, { "loss": 1.1271, "grad_norm": 1.7847486734390259, "learning_rate": 7.268631161575202e-05, "epoch": 0.64, "step": 42400 }, { "loss": 1.1355, "grad_norm": 1.5269598960876465, "learning_rate": 7.261121624463068e-05, "epoch": 0.64, "step": 42425 }, { "loss": 1.1399, "grad_norm": 1.863671898841858, "learning_rate": 7.253612087350936e-05, "epoch": 0.64, "step": 42450 }, { "loss": 1.1696, "grad_norm": 1.3562769889831543, "learning_rate": 7.246102550238804e-05, "epoch": 0.64, "step": 42475 }, { "loss": 1.1442, "grad_norm": 1.9278922080993652, "learning_rate": 7.23859301312667e-05, "epoch": 0.64, "step": 42500 }, { "loss": 1.1339, "grad_norm": 1.8279402256011963, "learning_rate": 7.231083476014538e-05, "epoch": 0.64, "step": 42525 }, { "loss": 1.1319, "grad_norm": 1.165343999862671, "learning_rate": 7.223573938902406e-05, "epoch": 0.64, "step": 42550 }, { "loss": 1.0568, "grad_norm": 1.109803318977356, "learning_rate": 7.216064401790274e-05, "epoch": 0.64, "step": 42575 }, { "loss": 1.214, "grad_norm": 1.3198269605636597, "learning_rate": 7.208554864678142e-05, "epoch": 0.64, "step": 42600 }, { "loss": 1.1511, "grad_norm": 1.4255495071411133, "learning_rate": 7.20104532756601e-05, "epoch": 0.64, "step": 42625 }, { "loss": 1.2014, "grad_norm": 2.5674383640289307, "learning_rate": 7.193535790453876e-05, "epoch": 0.64, "step": 42650 }, { "loss": 1.1177, "grad_norm": 2.265868663787842, "learning_rate": 7.186026253341744e-05, "epoch": 0.64, "step": 42675 }, { "loss": 1.151, "grad_norm": 1.6792558431625366, "learning_rate": 7.178516716229612e-05, "epoch": 0.64, "step": 42700 }, { "loss": 1.211, "grad_norm": 1.5409029722213745, "learning_rate": 7.171007179117479e-05, "epoch": 0.64, "step": 42725 }, { "loss": 1.1475, "grad_norm": 2.21852970123291, "learning_rate": 7.163497642005347e-05, "epoch": 0.64, "step": 42750 }, { "loss": 1.1559, "grad_norm": 1.770150065422058, "learning_rate": 7.155988104893215e-05, "epoch": 0.64, "step": 42775 }, { "loss": 1.1443, "grad_norm": 2.0208752155303955, "learning_rate": 7.148478567781082e-05, "epoch": 0.64, "step": 42800 }, { "loss": 1.1486, "grad_norm": 1.3952795267105103, "learning_rate": 7.14096903066895e-05, "epoch": 0.64, "step": 42825 }, { "loss": 1.1166, "grad_norm": 1.055246114730835, "learning_rate": 7.133459493556818e-05, "epoch": 0.64, "step": 42850 }, { "loss": 1.1424, "grad_norm": 1.9727169275283813, "learning_rate": 7.125949956444685e-05, "epoch": 0.64, "step": 42875 }, { "loss": 1.1111, "grad_norm": 1.2607585191726685, "learning_rate": 7.118440419332553e-05, "epoch": 0.64, "step": 42900 }, { "loss": 1.1684, "grad_norm": 1.3263331651687622, "learning_rate": 7.11093088222042e-05, "epoch": 0.64, "step": 42925 }, { "loss": 1.1461, "grad_norm": 1.7771466970443726, "learning_rate": 7.103421345108287e-05, "epoch": 0.65, "step": 42950 }, { "loss": 1.0846, "grad_norm": 1.842961311340332, "learning_rate": 7.095911807996155e-05, "epoch": 0.65, "step": 42975 }, { "loss": 1.143, "grad_norm": 1.8266829252243042, "learning_rate": 7.088402270884023e-05, "epoch": 0.65, "step": 43000 }, { "loss": 1.1259, "grad_norm": 2.703138828277588, "learning_rate": 7.08089273377189e-05, "epoch": 0.65, "step": 43025 }, { "loss": 1.1592, "grad_norm": 1.7669565677642822, "learning_rate": 7.073383196659759e-05, "epoch": 0.65, "step": 43050 }, { "loss": 1.157, "grad_norm": 2.708080530166626, "learning_rate": 7.065873659547626e-05, "epoch": 0.65, "step": 43075 }, { "loss": 1.1409, "grad_norm": 0.9941558241844177, "learning_rate": 7.058364122435493e-05, "epoch": 0.65, "step": 43100 }, { "loss": 1.09, "grad_norm": 2.0420825481414795, "learning_rate": 7.050854585323361e-05, "epoch": 0.65, "step": 43125 }, { "loss": 1.1426, "grad_norm": 2.205864667892456, "learning_rate": 7.043345048211229e-05, "epoch": 0.65, "step": 43150 }, { "loss": 1.0324, "grad_norm": 1.426829218864441, "learning_rate": 7.035835511099095e-05, "epoch": 0.65, "step": 43175 }, { "loss": 1.0635, "grad_norm": 2.733449935913086, "learning_rate": 7.028325973986963e-05, "epoch": 0.65, "step": 43200 }, { "loss": 1.0572, "grad_norm": 1.3944034576416016, "learning_rate": 7.020816436874832e-05, "epoch": 0.65, "step": 43225 }, { "loss": 1.0696, "grad_norm": 3.0517678260803223, "learning_rate": 7.013306899762699e-05, "epoch": 0.65, "step": 43250 }, { "loss": 1.1799, "grad_norm": 1.6215708255767822, "learning_rate": 7.005797362650567e-05, "epoch": 0.65, "step": 43275 }, { "loss": 1.2173, "grad_norm": 1.6113760471343994, "learning_rate": 6.99858820702292e-05, "epoch": 0.65, "step": 43300 }, { "loss": 1.1714, "grad_norm": 2.9850752353668213, "learning_rate": 6.991078669910786e-05, "epoch": 0.65, "step": 43325 }, { "loss": 1.0623, "grad_norm": 1.9807687997817993, "learning_rate": 6.983569132798654e-05, "epoch": 0.65, "step": 43350 }, { "loss": 1.1881, "grad_norm": 1.4547510147094727, "learning_rate": 6.976059595686523e-05, "epoch": 0.65, "step": 43375 }, { "loss": 1.1096, "grad_norm": 1.768142819404602, "learning_rate": 6.96855005857439e-05, "epoch": 0.65, "step": 43400 }, { "loss": 1.1513, "grad_norm": 1.4340214729309082, "learning_rate": 6.961040521462258e-05, "epoch": 0.65, "step": 43425 }, { "loss": 1.1259, "grad_norm": 2.260941743850708, "learning_rate": 6.953530984350126e-05, "epoch": 0.65, "step": 43450 }, { "loss": 1.0911, "grad_norm": 1.3704345226287842, "learning_rate": 6.946021447237992e-05, "epoch": 0.65, "step": 43475 }, { "loss": 1.1814, "grad_norm": 2.298049211502075, "learning_rate": 6.93851191012586e-05, "epoch": 0.65, "step": 43500 }, { "loss": 1.127, "grad_norm": 1.4709994792938232, "learning_rate": 6.931002373013728e-05, "epoch": 0.65, "step": 43525 }, { "loss": 1.0889, "grad_norm": 1.7260873317718506, "learning_rate": 6.923492835901594e-05, "epoch": 0.65, "step": 43550 }, { "loss": 1.1871, "grad_norm": 1.912359595298767, "learning_rate": 6.915983298789464e-05, "epoch": 0.65, "step": 43575 }, { "loss": 1.1445, "grad_norm": 1.4736065864562988, "learning_rate": 6.908473761677332e-05, "epoch": 0.65, "step": 43600 }, { "loss": 1.1363, "grad_norm": 2.0081377029418945, "learning_rate": 6.900964224565198e-05, "epoch": 0.66, "step": 43625 }, { "loss": 1.1329, "grad_norm": 1.3601313829421997, "learning_rate": 6.893454687453066e-05, "epoch": 0.66, "step": 43650 }, { "loss": 1.0892, "grad_norm": 1.9667214155197144, "learning_rate": 6.885945150340934e-05, "epoch": 0.66, "step": 43675 }, { "loss": 1.1483, "grad_norm": 1.3687251806259155, "learning_rate": 6.878735994713285e-05, "epoch": 0.66, "step": 43700 }, { "loss": 1.0915, "grad_norm": 2.4071569442749023, "learning_rate": 6.871226457601155e-05, "epoch": 0.66, "step": 43725 }, { "loss": 1.1005, "grad_norm": 1.577147126197815, "learning_rate": 6.863716920489021e-05, "epoch": 0.66, "step": 43750 }, { "loss": 1.1643, "grad_norm": 4.050117015838623, "learning_rate": 6.856207383376889e-05, "epoch": 0.66, "step": 43775 }, { "loss": 1.1211, "grad_norm": 1.6273080110549927, "learning_rate": 6.848697846264757e-05, "epoch": 0.66, "step": 43800 }, { "loss": 1.082, "grad_norm": 1.4899144172668457, "learning_rate": 6.841188309152623e-05, "epoch": 0.66, "step": 43825 }, { "loss": 1.1292, "grad_norm": 2.176234006881714, "learning_rate": 6.833678772040491e-05, "epoch": 0.66, "step": 43850 }, { "loss": 1.1189, "grad_norm": 1.6825004816055298, "learning_rate": 6.826169234928359e-05, "epoch": 0.66, "step": 43875 }, { "loss": 1.0886, "grad_norm": 1.342085361480713, "learning_rate": 6.818659697816227e-05, "epoch": 0.66, "step": 43900 }, { "loss": 1.146, "grad_norm": 0.8940933346748352, "learning_rate": 6.811150160704095e-05, "epoch": 0.66, "step": 43925 }, { "loss": 1.0925, "grad_norm": 1.4835485219955444, "learning_rate": 6.803640623591963e-05, "epoch": 0.66, "step": 43950 }, { "loss": 1.1992, "grad_norm": 1.9649572372436523, "learning_rate": 6.79613108647983e-05, "epoch": 0.66, "step": 43975 }, { "loss": 1.1081, "grad_norm": 1.4442362785339355, "learning_rate": 6.788621549367697e-05, "epoch": 0.66, "step": 44000 }, { "loss": 1.1475, "grad_norm": 3.4296488761901855, "learning_rate": 6.781112012255565e-05, "epoch": 0.66, "step": 44025 }, { "loss": 1.1343, "grad_norm": 2.3116354942321777, "learning_rate": 6.773602475143432e-05, "epoch": 0.66, "step": 44050 }, { "loss": 1.1686, "grad_norm": 1.6859666109085083, "learning_rate": 6.7660929380313e-05, "epoch": 0.66, "step": 44075 }, { "loss": 1.1612, "grad_norm": 1.4094436168670654, "learning_rate": 6.758583400919168e-05, "epoch": 0.66, "step": 44100 }, { "loss": 1.1254, "grad_norm": 2.2246947288513184, "learning_rate": 6.751073863807035e-05, "epoch": 0.66, "step": 44125 }, { "loss": 1.16, "grad_norm": 1.2146118879318237, "learning_rate": 6.743564326694903e-05, "epoch": 0.66, "step": 44150 }, { "loss": 1.1512, "grad_norm": 1.7137471437454224, "learning_rate": 6.736054789582771e-05, "epoch": 0.66, "step": 44175 }, { "loss": 1.0637, "grad_norm": 1.3975019454956055, "learning_rate": 6.728545252470638e-05, "epoch": 0.66, "step": 44200 }, { "loss": 1.1295, "grad_norm": 1.0238609313964844, "learning_rate": 6.721035715358506e-05, "epoch": 0.66, "step": 44225 }, { "loss": 1.0978, "grad_norm": 1.798096776008606, "learning_rate": 6.713526178246373e-05, "epoch": 0.66, "step": 44250 }, { "loss": 1.1513, "grad_norm": 1.8276596069335938, "learning_rate": 6.70601664113424e-05, "epoch": 0.66, "step": 44275 }, { "loss": 1.1275, "grad_norm": 2.5206449031829834, "learning_rate": 6.698507104022108e-05, "epoch": 0.67, "step": 44300 }, { "loss": 1.1837, "grad_norm": 1.401331901550293, "learning_rate": 6.690997566909976e-05, "epoch": 0.67, "step": 44325 }, { "loss": 1.137, "grad_norm": 1.3737742900848389, "learning_rate": 6.683488029797844e-05, "epoch": 0.67, "step": 44350 }, { "loss": 1.161, "grad_norm": 1.461982011795044, "learning_rate": 6.675978492685712e-05, "epoch": 0.67, "step": 44375 }, { "loss": 1.1194, "grad_norm": 1.4352518320083618, "learning_rate": 6.66846895557358e-05, "epoch": 0.67, "step": 44400 }, { "loss": 1.2318, "grad_norm": 1.9560725688934326, "learning_rate": 6.660959418461446e-05, "epoch": 0.67, "step": 44425 }, { "loss": 1.1218, "grad_norm": 1.0845290422439575, "learning_rate": 6.653449881349314e-05, "epoch": 0.67, "step": 44450 }, { "loss": 1.2239, "grad_norm": 1.4423961639404297, "learning_rate": 6.645940344237182e-05, "epoch": 0.67, "step": 44475 }, { "loss": 1.18, "grad_norm": 2.2228844165802, "learning_rate": 6.638430807125048e-05, "epoch": 0.67, "step": 44500 }, { "loss": 1.1529, "grad_norm": 1.524857521057129, "learning_rate": 6.630921270012916e-05, "epoch": 0.67, "step": 44525 }, { "loss": 1.121, "grad_norm": 1.349579930305481, "learning_rate": 6.623411732900784e-05, "epoch": 0.67, "step": 44550 }, { "loss": 1.1108, "grad_norm": 1.5544127225875854, "learning_rate": 6.615902195788652e-05, "epoch": 0.67, "step": 44575 }, { "loss": 1.1202, "grad_norm": 1.1992143392562866, "learning_rate": 6.60839265867652e-05, "epoch": 0.67, "step": 44600 }, { "loss": 1.1227, "grad_norm": 1.020192265510559, "learning_rate": 6.600883121564388e-05, "epoch": 0.67, "step": 44625 }, { "loss": 1.0743, "grad_norm": 0.9846924543380737, "learning_rate": 6.593373584452254e-05, "epoch": 0.67, "step": 44650 }, { "loss": 1.1423, "grad_norm": 1.3693522214889526, "learning_rate": 6.585864047340122e-05, "epoch": 0.67, "step": 44675 }, { "loss": 1.1301, "grad_norm": 1.9597666263580322, "learning_rate": 6.57835451022799e-05, "epoch": 0.67, "step": 44700 }, { "loss": 1.1554, "grad_norm": 1.1841599941253662, "learning_rate": 6.570844973115857e-05, "epoch": 0.67, "step": 44725 }, { "loss": 1.1822, "grad_norm": 1.071419596672058, "learning_rate": 6.563335436003724e-05, "epoch": 0.67, "step": 44750 }, { "loss": 1.1234, "grad_norm": 1.3992092609405518, "learning_rate": 6.555825898891592e-05, "epoch": 0.67, "step": 44775 }, { "loss": 1.089, "grad_norm": 1.3875787258148193, "learning_rate": 6.54831636177946e-05, "epoch": 0.67, "step": 44800 }, { "loss": 1.1601, "grad_norm": 1.7100720405578613, "learning_rate": 6.540806824667328e-05, "epoch": 0.67, "step": 44825 }, { "loss": 1.1281, "grad_norm": 1.3852653503417969, "learning_rate": 6.533297287555196e-05, "epoch": 0.67, "step": 44850 }, { "loss": 1.1651, "grad_norm": 2.1557846069335938, "learning_rate": 6.525787750443063e-05, "epoch": 0.67, "step": 44875 }, { "loss": 1.1061, "grad_norm": 1.711010217666626, "learning_rate": 6.51827821333093e-05, "epoch": 0.67, "step": 44900 }, { "loss": 1.1282, "grad_norm": 1.7825603485107422, "learning_rate": 6.510768676218798e-05, "epoch": 0.67, "step": 44925 }, { "loss": 1.1178, "grad_norm": 1.6146150827407837, "learning_rate": 6.503259139106666e-05, "epoch": 0.68, "step": 44950 }, { "loss": 1.1025, "grad_norm": 2.3008651733398438, "learning_rate": 6.495749601994533e-05, "epoch": 0.68, "step": 44975 }, { "loss": 1.1944, "grad_norm": 1.2394930124282837, "learning_rate": 6.4882400648824e-05, "epoch": 0.68, "step": 45000 }, { "loss": 1.1354, "grad_norm": 1.3226121664047241, "learning_rate": 6.480730527770268e-05, "epoch": 0.68, "step": 45025 }, { "loss": 1.082, "grad_norm": 1.0096830129623413, "learning_rate": 6.473220990658136e-05, "epoch": 0.68, "step": 45050 }, { "loss": 1.1808, "grad_norm": 1.8499752283096313, "learning_rate": 6.465711453546004e-05, "epoch": 0.68, "step": 45075 }, { "loss": 1.1287, "grad_norm": 1.8377041816711426, "learning_rate": 6.458201916433871e-05, "epoch": 0.68, "step": 45100 }, { "loss": 1.1541, "grad_norm": 1.5449110269546509, "learning_rate": 6.450692379321739e-05, "epoch": 0.68, "step": 45125 }, { "loss": 1.054, "grad_norm": 1.7001621723175049, "learning_rate": 6.443182842209607e-05, "epoch": 0.68, "step": 45150 }, { "loss": 1.0684, "grad_norm": 1.0393651723861694, "learning_rate": 6.435673305097474e-05, "epoch": 0.68, "step": 45175 }, { "loss": 1.1805, "grad_norm": 1.6634325981140137, "learning_rate": 6.428163767985341e-05, "epoch": 0.68, "step": 45200 }, { "loss": 1.1241, "grad_norm": 1.3879557847976685, "learning_rate": 6.42065423087321e-05, "epoch": 0.68, "step": 45225 }, { "loss": 1.0822, "grad_norm": 1.3733693361282349, "learning_rate": 6.413144693761077e-05, "epoch": 0.68, "step": 45250 }, { "loss": 1.1762, "grad_norm": 1.540552020072937, "learning_rate": 6.405635156648945e-05, "epoch": 0.68, "step": 45275 }, { "loss": 1.1056, "grad_norm": 1.7979633808135986, "learning_rate": 6.398125619536813e-05, "epoch": 0.68, "step": 45300 }, { "loss": 1.0967, "grad_norm": 1.601650357246399, "learning_rate": 6.390616082424679e-05, "epoch": 0.68, "step": 45325 }, { "loss": 1.13, "grad_norm": 1.2980515956878662, "learning_rate": 6.383106545312547e-05, "epoch": 0.68, "step": 45350 }, { "loss": 1.1626, "grad_norm": 1.1858327388763428, "learning_rate": 6.375597008200415e-05, "epoch": 0.68, "step": 45375 }, { "loss": 1.097, "grad_norm": 1.3785401582717896, "learning_rate": 6.368087471088283e-05, "epoch": 0.68, "step": 45400 }, { "loss": 1.1721, "grad_norm": 0.9480896592140198, "learning_rate": 6.360577933976149e-05, "epoch": 0.68, "step": 45425 }, { "loss": 1.1218, "grad_norm": 1.55319344997406, "learning_rate": 6.353068396864019e-05, "epoch": 0.68, "step": 45450 }, { "loss": 1.1096, "grad_norm": 1.589424729347229, "learning_rate": 6.345558859751885e-05, "epoch": 0.68, "step": 45475 }, { "loss": 1.1252, "grad_norm": 1.1488457918167114, "learning_rate": 6.338049322639753e-05, "epoch": 0.68, "step": 45500 }, { "loss": 1.1776, "grad_norm": 2.43381667137146, "learning_rate": 6.330539785527621e-05, "epoch": 0.68, "step": 45525 }, { "loss": 1.1062, "grad_norm": 1.4571008682250977, "learning_rate": 6.323030248415487e-05, "epoch": 0.68, "step": 45550 }, { "loss": 1.0569, "grad_norm": 2.75005841255188, "learning_rate": 6.315520711303355e-05, "epoch": 0.68, "step": 45575 }, { "loss": 1.1457, "grad_norm": 1.2686755657196045, "learning_rate": 6.308011174191223e-05, "epoch": 0.68, "step": 45600 }, { "loss": 1.1216, "grad_norm": 1.4540945291519165, "learning_rate": 6.300501637079091e-05, "epoch": 0.69, "step": 45625 }, { "loss": 1.1018, "grad_norm": 1.2058906555175781, "learning_rate": 6.292992099966958e-05, "epoch": 0.69, "step": 45650 }, { "loss": 1.1387, "grad_norm": 2.0346646308898926, "learning_rate": 6.285482562854827e-05, "epoch": 0.69, "step": 45675 }, { "loss": 1.1894, "grad_norm": 1.4020074605941772, "learning_rate": 6.277973025742693e-05, "epoch": 0.69, "step": 45700 }, { "loss": 1.1201, "grad_norm": 1.4608796834945679, "learning_rate": 6.270463488630561e-05, "epoch": 0.69, "step": 45725 }, { "loss": 1.1399, "grad_norm": 1.7157222032546997, "learning_rate": 6.262953951518429e-05, "epoch": 0.69, "step": 45750 }, { "loss": 1.1118, "grad_norm": 1.1699299812316895, "learning_rate": 6.255444414406296e-05, "epoch": 0.69, "step": 45775 }, { "loss": 1.1491, "grad_norm": 1.5801405906677246, "learning_rate": 6.247934877294163e-05, "epoch": 0.69, "step": 45800 }, { "loss": 1.1591, "grad_norm": 2.6088712215423584, "learning_rate": 6.240425340182031e-05, "epoch": 0.69, "step": 45825 }, { "loss": 1.1241, "grad_norm": 1.1088968515396118, "learning_rate": 6.232915803069899e-05, "epoch": 0.69, "step": 45850 }, { "loss": 1.1937, "grad_norm": 1.9700263738632202, "learning_rate": 6.225406265957767e-05, "epoch": 0.69, "step": 45875 }, { "loss": 1.0887, "grad_norm": 1.8540663719177246, "learning_rate": 6.217896728845635e-05, "epoch": 0.69, "step": 45900 }, { "loss": 1.1068, "grad_norm": 1.1432939767837524, "learning_rate": 6.210387191733502e-05, "epoch": 0.69, "step": 45925 }, { "loss": 1.1209, "grad_norm": 1.9326074123382568, "learning_rate": 6.20287765462137e-05, "epoch": 0.69, "step": 45950 }, { "loss": 1.1356, "grad_norm": 1.928639531135559, "learning_rate": 6.195368117509237e-05, "epoch": 0.69, "step": 45975 }, { "loss": 1.1867, "grad_norm": 1.4352381229400635, "learning_rate": 6.187858580397104e-05, "epoch": 0.69, "step": 46000 }, { "loss": 1.1867, "grad_norm": 1.4124820232391357, "learning_rate": 6.180349043284972e-05, "epoch": 0.69, "step": 46025 }, { "loss": 1.1144, "grad_norm": 2.557101249694824, "learning_rate": 6.17283950617284e-05, "epoch": 0.69, "step": 46050 }, { "loss": 1.0894, "grad_norm": 1.6086410284042358, "learning_rate": 6.165630350545192e-05, "epoch": 0.69, "step": 46075 }, { "loss": 1.1666, "grad_norm": 1.3408067226409912, "learning_rate": 6.15812081343306e-05, "epoch": 0.69, "step": 46100 }, { "loss": 1.0923, "grad_norm": 1.303733229637146, "learning_rate": 6.150611276320928e-05, "epoch": 0.69, "step": 46125 }, { "loss": 1.1683, "grad_norm": 2.2426815032958984, "learning_rate": 6.143101739208795e-05, "epoch": 0.69, "step": 46150 }, { "loss": 1.1749, "grad_norm": 1.519041657447815, "learning_rate": 6.135592202096663e-05, "epoch": 0.69, "step": 46175 }, { "loss": 1.1571, "grad_norm": 1.224007248878479, "learning_rate": 6.12808266498453e-05, "epoch": 0.69, "step": 46200 }, { "loss": 1.1916, "grad_norm": 1.8132357597351074, "learning_rate": 6.120573127872398e-05, "epoch": 0.69, "step": 46225 }, { "loss": 1.122, "grad_norm": 1.465853214263916, "learning_rate": 6.113063590760266e-05, "epoch": 0.69, "step": 46250 }, { "loss": 1.126, "grad_norm": 1.07510507106781, "learning_rate": 6.105554053648134e-05, "epoch": 0.69, "step": 46275 }, { "loss": 1.1603, "grad_norm": 1.3960785865783691, "learning_rate": 6.0980445165360014e-05, "epoch": 0.7, "step": 46300 }, { "loss": 1.1132, "grad_norm": 1.2278742790222168, "learning_rate": 6.0905349794238687e-05, "epoch": 0.7, "step": 46325 }, { "loss": 1.159, "grad_norm": 1.0870790481567383, "learning_rate": 6.083025442311736e-05, "epoch": 0.7, "step": 46350 }, { "loss": 1.1108, "grad_norm": 1.3281099796295166, "learning_rate": 6.075515905199604e-05, "epoch": 0.7, "step": 46375 }, { "loss": 1.1345, "grad_norm": 1.004966139793396, "learning_rate": 6.068006368087471e-05, "epoch": 0.7, "step": 46400 }, { "loss": 1.1449, "grad_norm": 1.102554202079773, "learning_rate": 6.060496830975338e-05, "epoch": 0.7, "step": 46425 }, { "loss": 1.1252, "grad_norm": 1.598632574081421, "learning_rate": 6.052987293863207e-05, "epoch": 0.7, "step": 46450 }, { "loss": 1.1236, "grad_norm": 1.7185138463974, "learning_rate": 6.0454777567510746e-05, "epoch": 0.7, "step": 46475 }, { "loss": 1.0913, "grad_norm": 1.3190034627914429, "learning_rate": 6.037968219638942e-05, "epoch": 0.7, "step": 46500 }, { "loss": 1.0544, "grad_norm": 2.019202947616577, "learning_rate": 6.03045868252681e-05, "epoch": 0.7, "step": 46525 }, { "loss": 1.12, "grad_norm": 1.3409463167190552, "learning_rate": 6.022949145414677e-05, "epoch": 0.7, "step": 46550 }, { "loss": 1.1206, "grad_norm": 1.9381085634231567, "learning_rate": 6.015439608302544e-05, "epoch": 0.7, "step": 46575 }, { "loss": 1.0804, "grad_norm": 1.1460068225860596, "learning_rate": 6.007930071190412e-05, "epoch": 0.7, "step": 46600 }, { "loss": 1.0772, "grad_norm": 1.3288995027542114, "learning_rate": 6.000420534078279e-05, "epoch": 0.7, "step": 46625 }, { "loss": 1.1105, "grad_norm": 1.3131263256072998, "learning_rate": 5.9929109969661464e-05, "epoch": 0.7, "step": 46650 }, { "loss": 1.1337, "grad_norm": 4.209181308746338, "learning_rate": 5.985401459854016e-05, "epoch": 0.7, "step": 46675 }, { "loss": 1.1791, "grad_norm": 1.737858772277832, "learning_rate": 5.977891922741883e-05, "epoch": 0.7, "step": 46700 }, { "loss": 1.1116, "grad_norm": 1.5516709089279175, "learning_rate": 5.97038238562975e-05, "epoch": 0.7, "step": 46725 }, { "loss": 1.1036, "grad_norm": 1.0340015888214111, "learning_rate": 5.962872848517618e-05, "epoch": 0.7, "step": 46750 }, { "loss": 1.1996, "grad_norm": 2.427554130554199, "learning_rate": 5.955363311405485e-05, "epoch": 0.7, "step": 46775 }, { "loss": 1.195, "grad_norm": 2.024531602859497, "learning_rate": 5.9478537742933524e-05, "epoch": 0.7, "step": 46800 }, { "loss": 1.2264, "grad_norm": 1.2301228046417236, "learning_rate": 5.94034423718122e-05, "epoch": 0.7, "step": 46825 }, { "loss": 1.0799, "grad_norm": 1.2450840473175049, "learning_rate": 5.9328347000690875e-05, "epoch": 0.7, "step": 46850 }, { "loss": 1.1088, "grad_norm": 1.3164767026901245, "learning_rate": 5.925325162956956e-05, "epoch": 0.7, "step": 46875 }, { "loss": 1.1358, "grad_norm": 1.6260111331939697, "learning_rate": 5.917815625844824e-05, "epoch": 0.7, "step": 46900 }, { "loss": 1.1555, "grad_norm": 1.2355117797851562, "learning_rate": 5.910306088732691e-05, "epoch": 0.7, "step": 46925 }, { "loss": 1.1982, "grad_norm": 1.639583706855774, "learning_rate": 5.9027965516205584e-05, "epoch": 0.71, "step": 46950 }, { "loss": 1.1229, "grad_norm": 1.5127111673355103, "learning_rate": 5.895287014508426e-05, "epoch": 0.71, "step": 46975 }, { "loss": 1.0991, "grad_norm": 1.5103808641433716, "learning_rate": 5.8877774773962935e-05, "epoch": 0.71, "step": 47000 }, { "loss": 1.1568, "grad_norm": 1.9068769216537476, "learning_rate": 5.880267940284161e-05, "epoch": 0.71, "step": 47025 }, { "loss": 1.1621, "grad_norm": 1.520690679550171, "learning_rate": 5.8727584031720286e-05, "epoch": 0.71, "step": 47050 }, { "loss": 1.148, "grad_norm": 3.158663749694824, "learning_rate": 5.865248866059896e-05, "epoch": 0.71, "step": 47075 }, { "loss": 1.1379, "grad_norm": 2.538459300994873, "learning_rate": 5.857739328947764e-05, "epoch": 0.71, "step": 47100 }, { "loss": 1.2194, "grad_norm": 1.1677653789520264, "learning_rate": 5.850229791835632e-05, "epoch": 0.71, "step": 47125 }, { "loss": 1.1959, "grad_norm": 1.7188001871109009, "learning_rate": 5.8427202547234994e-05, "epoch": 0.71, "step": 47150 }, { "loss": 1.1502, "grad_norm": 1.4081776142120361, "learning_rate": 5.8352107176113666e-05, "epoch": 0.71, "step": 47175 }, { "loss": 1.182, "grad_norm": 1.6037064790725708, "learning_rate": 5.8277011804992345e-05, "epoch": 0.71, "step": 47200 }, { "loss": 1.1552, "grad_norm": 2.5029053688049316, "learning_rate": 5.820191643387102e-05, "epoch": 0.71, "step": 47225 }, { "loss": 1.0832, "grad_norm": 1.6237151622772217, "learning_rate": 5.812682106274969e-05, "epoch": 0.71, "step": 47250 }, { "loss": 1.1307, "grad_norm": 1.8060946464538574, "learning_rate": 5.805172569162837e-05, "epoch": 0.71, "step": 47275 }, { "loss": 1.0566, "grad_norm": 1.7570223808288574, "learning_rate": 5.797663032050704e-05, "epoch": 0.71, "step": 47300 }, { "loss": 1.1227, "grad_norm": 0.9945117831230164, "learning_rate": 5.7901534949385726e-05, "epoch": 0.71, "step": 47325 }, { "loss": 1.1882, "grad_norm": 2.105391025543213, "learning_rate": 5.7826439578264405e-05, "epoch": 0.71, "step": 47350 }, { "loss": 1.1754, "grad_norm": 2.504192352294922, "learning_rate": 5.775134420714308e-05, "epoch": 0.71, "step": 47375 }, { "loss": 1.0904, "grad_norm": 1.456892967224121, "learning_rate": 5.767624883602175e-05, "epoch": 0.71, "step": 47400 }, { "loss": 1.1371, "grad_norm": 1.5804184675216675, "learning_rate": 5.760115346490043e-05, "epoch": 0.71, "step": 47425 }, { "loss": 1.132, "grad_norm": 2.141827344894409, "learning_rate": 5.75260580937791e-05, "epoch": 0.71, "step": 47450 }, { "loss": 1.142, "grad_norm": 2.439383029937744, "learning_rate": 5.745096272265777e-05, "epoch": 0.71, "step": 47475 }, { "loss": 1.1004, "grad_norm": 1.3394807577133179, "learning_rate": 5.737586735153645e-05, "epoch": 0.71, "step": 47500 }, { "loss": 1.0454, "grad_norm": 1.1147385835647583, "learning_rate": 5.730077198041514e-05, "epoch": 0.71, "step": 47525 }, { "loss": 1.1216, "grad_norm": 2.184941291809082, "learning_rate": 5.722567660929381e-05, "epoch": 0.71, "step": 47550 }, { "loss": 1.1123, "grad_norm": 1.4739607572555542, "learning_rate": 5.715058123817249e-05, "epoch": 0.71, "step": 47575 }, { "loss": 1.0541, "grad_norm": 1.1950966119766235, "learning_rate": 5.707548586705116e-05, "epoch": 0.71, "step": 47600 }, { "loss": 1.0943, "grad_norm": 1.8516206741333008, "learning_rate": 5.700039049592983e-05, "epoch": 0.72, "step": 47625 }, { "loss": 1.1332, "grad_norm": 1.2161145210266113, "learning_rate": 5.692529512480851e-05, "epoch": 0.72, "step": 47650 }, { "loss": 1.182, "grad_norm": 1.776667833328247, "learning_rate": 5.685019975368718e-05, "epoch": 0.72, "step": 47675 }, { "loss": 1.1426, "grad_norm": 1.0378066301345825, "learning_rate": 5.6775104382565855e-05, "epoch": 0.72, "step": 47700 }, { "loss": 1.1334, "grad_norm": 1.2625921964645386, "learning_rate": 5.6700009011444534e-05, "epoch": 0.72, "step": 47725 }, { "loss": 1.0911, "grad_norm": 1.5473499298095703, "learning_rate": 5.662491364032322e-05, "epoch": 0.72, "step": 47750 }, { "loss": 1.1503, "grad_norm": 2.243377447128296, "learning_rate": 5.654981826920189e-05, "epoch": 0.72, "step": 47775 }, { "loss": 1.1469, "grad_norm": 1.3182121515274048, "learning_rate": 5.647472289808057e-05, "epoch": 0.72, "step": 47800 }, { "loss": 1.1264, "grad_norm": 1.2882803678512573, "learning_rate": 5.639962752695924e-05, "epoch": 0.72, "step": 47825 }, { "loss": 1.1511, "grad_norm": 1.2225452661514282, "learning_rate": 5.6324532155837915e-05, "epoch": 0.72, "step": 47850 }, { "loss": 1.1468, "grad_norm": 2.1035497188568115, "learning_rate": 5.6249436784716593e-05, "epoch": 0.72, "step": 47875 }, { "loss": 1.1549, "grad_norm": 1.6698153018951416, "learning_rate": 5.6174341413595265e-05, "epoch": 0.72, "step": 47900 }, { "loss": 1.176, "grad_norm": 1.939454436302185, "learning_rate": 5.609924604247394e-05, "epoch": 0.72, "step": 47925 }, { "loss": 1.1494, "grad_norm": 1.100350260734558, "learning_rate": 5.6024150671352616e-05, "epoch": 0.72, "step": 47950 }, { "loss": 1.1401, "grad_norm": 1.3846749067306519, "learning_rate": 5.59490553002313e-05, "epoch": 0.72, "step": 47975 }, { "loss": 1.1711, "grad_norm": 1.5334635972976685, "learning_rate": 5.5873959929109974e-05, "epoch": 0.72, "step": 48000 }, { "loss": 1.1391, "grad_norm": 1.1351529359817505, "learning_rate": 5.579886455798865e-05, "epoch": 0.72, "step": 48025 }, { "loss": 1.1284, "grad_norm": 1.8126013278961182, "learning_rate": 5.5723769186867325e-05, "epoch": 0.72, "step": 48050 }, { "loss": 1.1845, "grad_norm": 1.187787652015686, "learning_rate": 5.5648673815746e-05, "epoch": 0.72, "step": 48075 }, { "loss": 1.1659, "grad_norm": 2.978299140930176, "learning_rate": 5.5573578444624676e-05, "epoch": 0.72, "step": 48100 }, { "loss": 1.1497, "grad_norm": 1.6019436120986938, "learning_rate": 5.549848307350335e-05, "epoch": 0.72, "step": 48125 }, { "loss": 1.0611, "grad_norm": 1.380462408065796, "learning_rate": 5.542338770238202e-05, "epoch": 0.72, "step": 48150 }, { "loss": 1.1723, "grad_norm": 1.732176661491394, "learning_rate": 5.53482923312607e-05, "epoch": 0.72, "step": 48175 }, { "loss": 1.2292, "grad_norm": 1.4990782737731934, "learning_rate": 5.5273196960139385e-05, "epoch": 0.72, "step": 48200 }, { "loss": 1.1319, "grad_norm": 1.7996023893356323, "learning_rate": 5.519810158901806e-05, "epoch": 0.72, "step": 48225 }, { "loss": 1.1087, "grad_norm": 1.8955588340759277, "learning_rate": 5.5123006217896736e-05, "epoch": 0.72, "step": 48250 }, { "loss": 1.1206, "grad_norm": 1.337724208831787, "learning_rate": 5.504791084677541e-05, "epoch": 0.73, "step": 48275 }, { "loss": 1.1627, "grad_norm": 1.515310525894165, "learning_rate": 5.497281547565408e-05, "epoch": 0.73, "step": 48300 }, { "loss": 1.1334, "grad_norm": 1.9032014608383179, "learning_rate": 5.489772010453276e-05, "epoch": 0.73, "step": 48325 }, { "loss": 1.0996, "grad_norm": 1.8782274723052979, "learning_rate": 5.482262473341143e-05, "epoch": 0.73, "step": 48350 }, { "loss": 1.1287, "grad_norm": 1.1794263124465942, "learning_rate": 5.474752936229011e-05, "epoch": 0.73, "step": 48375 }, { "loss": 1.1001, "grad_norm": 2.261495351791382, "learning_rate": 5.4672433991168795e-05, "epoch": 0.73, "step": 48400 }, { "loss": 1.0928, "grad_norm": 1.380417823791504, "learning_rate": 5.459733862004747e-05, "epoch": 0.73, "step": 48425 }, { "loss": 1.0945, "grad_norm": 1.0887725353240967, "learning_rate": 5.452224324892614e-05, "epoch": 0.73, "step": 48450 }, { "loss": 1.1247, "grad_norm": 1.2714431285858154, "learning_rate": 5.444714787780482e-05, "epoch": 0.73, "step": 48475 }, { "loss": 1.0856, "grad_norm": 1.475818157196045, "learning_rate": 5.437205250668349e-05, "epoch": 0.73, "step": 48500 }, { "loss": 1.0579, "grad_norm": 1.0250446796417236, "learning_rate": 5.429695713556216e-05, "epoch": 0.73, "step": 48525 }, { "loss": 1.0451, "grad_norm": 1.4754000902175903, "learning_rate": 5.422186176444084e-05, "epoch": 0.73, "step": 48550 }, { "loss": 1.1335, "grad_norm": 1.6405061483383179, "learning_rate": 5.4146766393319514e-05, "epoch": 0.73, "step": 48575 }, { "loss": 1.1399, "grad_norm": 1.1049416065216064, "learning_rate": 5.407167102219819e-05, "epoch": 0.73, "step": 48600 }, { "loss": 1.1112, "grad_norm": 1.7172602415084839, "learning_rate": 5.399657565107688e-05, "epoch": 0.73, "step": 48625 }, { "loss": 1.1008, "grad_norm": 1.833646297454834, "learning_rate": 5.392148027995555e-05, "epoch": 0.73, "step": 48650 }, { "loss": 1.1053, "grad_norm": 1.4659416675567627, "learning_rate": 5.384638490883422e-05, "epoch": 0.73, "step": 48675 }, { "loss": 1.0825, "grad_norm": 1.6295710802078247, "learning_rate": 5.37712895377129e-05, "epoch": 0.73, "step": 48700 }, { "loss": 1.1665, "grad_norm": 1.4064006805419922, "learning_rate": 5.369619416659157e-05, "epoch": 0.73, "step": 48725 }, { "loss": 1.0461, "grad_norm": 1.548251986503601, "learning_rate": 5.3621098795470245e-05, "epoch": 0.73, "step": 48750 }, { "loss": 1.1471, "grad_norm": 1.6318676471710205, "learning_rate": 5.3546003424348924e-05, "epoch": 0.73, "step": 48775 }, { "loss": 1.1411, "grad_norm": 1.321753978729248, "learning_rate": 5.3470908053227596e-05, "epoch": 0.73, "step": 48800 }, { "loss": 1.1102, "grad_norm": 1.167902946472168, "learning_rate": 5.3395812682106275e-05, "epoch": 0.73, "step": 48825 }, { "loss": 1.0909, "grad_norm": 1.3905353546142578, "learning_rate": 5.332071731098496e-05, "epoch": 0.73, "step": 48850 }, { "loss": 1.1796, "grad_norm": 1.0988661050796509, "learning_rate": 5.324562193986363e-05, "epoch": 0.73, "step": 48875 }, { "loss": 1.2214, "grad_norm": 1.348551630973816, "learning_rate": 5.3170526568742305e-05, "epoch": 0.73, "step": 48900 }, { "loss": 1.0649, "grad_norm": 1.176352858543396, "learning_rate": 5.3095431197620984e-05, "epoch": 0.73, "step": 48925 }, { "loss": 1.1334, "grad_norm": 1.448281168937683, "learning_rate": 5.3020335826499656e-05, "epoch": 0.74, "step": 48950 }, { "loss": 1.1764, "grad_norm": 1.7525986433029175, "learning_rate": 5.294524045537833e-05, "epoch": 0.74, "step": 48975 }, { "loss": 1.1873, "grad_norm": 1.6281651258468628, "learning_rate": 5.287014508425701e-05, "epoch": 0.74, "step": 49000 }, { "loss": 1.1259, "grad_norm": 1.1979387998580933, "learning_rate": 5.279504971313568e-05, "epoch": 0.74, "step": 49025 }, { "loss": 1.084, "grad_norm": 1.2359240055084229, "learning_rate": 5.2719954342014365e-05, "epoch": 0.74, "step": 49050 }, { "loss": 1.0872, "grad_norm": 1.6398156881332397, "learning_rate": 5.2644858970893044e-05, "epoch": 0.74, "step": 49075 }, { "loss": 1.1564, "grad_norm": 1.6317737102508545, "learning_rate": 5.2569763599771716e-05, "epoch": 0.74, "step": 49100 }, { "loss": 1.1798, "grad_norm": 1.1270239353179932, "learning_rate": 5.249466822865039e-05, "epoch": 0.74, "step": 49125 }, { "loss": 1.1076, "grad_norm": 1.504706621170044, "learning_rate": 5.2419572857529067e-05, "epoch": 0.74, "step": 49150 }, { "loss": 1.0761, "grad_norm": 2.199673891067505, "learning_rate": 5.234447748640774e-05, "epoch": 0.74, "step": 49175 }, { "loss": 1.1395, "grad_norm": 1.288529396057129, "learning_rate": 5.226938211528641e-05, "epoch": 0.74, "step": 49200 }, { "loss": 1.1776, "grad_norm": 1.4537650346755981, "learning_rate": 5.219428674416509e-05, "epoch": 0.74, "step": 49225 }, { "loss": 1.1639, "grad_norm": 2.028994560241699, "learning_rate": 5.211919137304376e-05, "epoch": 0.74, "step": 49250 }, { "loss": 1.0929, "grad_norm": 1.8188201189041138, "learning_rate": 5.204409600192245e-05, "epoch": 0.74, "step": 49275 }, { "loss": 1.1054, "grad_norm": 1.5506641864776611, "learning_rate": 5.1969000630801126e-05, "epoch": 0.74, "step": 49300 }, { "loss": 1.0699, "grad_norm": 1.526999592781067, "learning_rate": 5.18939052596798e-05, "epoch": 0.74, "step": 49325 }, { "loss": 1.1666, "grad_norm": 1.4270501136779785, "learning_rate": 5.181880988855847e-05, "epoch": 0.74, "step": 49350 }, { "loss": 1.11, "grad_norm": 1.3699140548706055, "learning_rate": 5.174371451743715e-05, "epoch": 0.74, "step": 49375 }, { "loss": 1.1069, "grad_norm": 1.468616008758545, "learning_rate": 5.166861914631582e-05, "epoch": 0.74, "step": 49400 }, { "loss": 1.0625, "grad_norm": 2.158735513687134, "learning_rate": 5.1593523775194493e-05, "epoch": 0.74, "step": 49425 }, { "loss": 1.1034, "grad_norm": 1.0673458576202393, "learning_rate": 5.151842840407317e-05, "epoch": 0.74, "step": 49450 }, { "loss": 1.1262, "grad_norm": 1.5599156618118286, "learning_rate": 5.1443333032951844e-05, "epoch": 0.74, "step": 49475 }, { "loss": 1.1222, "grad_norm": 1.7378026247024536, "learning_rate": 5.136823766183053e-05, "epoch": 0.74, "step": 49500 }, { "loss": 1.2029, "grad_norm": 2.2808854579925537, "learning_rate": 5.129314229070921e-05, "epoch": 0.74, "step": 49525 }, { "loss": 1.1349, "grad_norm": 1.3279706239700317, "learning_rate": 5.121804691958788e-05, "epoch": 0.74, "step": 49550 }, { "loss": 1.1291, "grad_norm": 1.7178452014923096, "learning_rate": 5.114295154846655e-05, "epoch": 0.74, "step": 49575 }, { "loss": 1.1029, "grad_norm": 1.7041009664535522, "learning_rate": 5.106785617734523e-05, "epoch": 0.74, "step": 49600 }, { "loss": 1.1401, "grad_norm": 1.7986174821853638, "learning_rate": 5.0992760806223904e-05, "epoch": 0.75, "step": 49625 }, { "loss": 1.095, "grad_norm": 1.9708353281021118, "learning_rate": 5.0917665435102576e-05, "epoch": 0.75, "step": 49650 }, { "loss": 1.0777, "grad_norm": 1.675958275794983, "learning_rate": 5.0842570063981255e-05, "epoch": 0.75, "step": 49675 }, { "loss": 1.0473, "grad_norm": 1.1292997598648071, "learning_rate": 5.076747469285994e-05, "epoch": 0.75, "step": 49700 }, { "loss": 1.1196, "grad_norm": 1.3241393566131592, "learning_rate": 5.069237932173861e-05, "epoch": 0.75, "step": 49725 }, { "loss": 1.1062, "grad_norm": 1.148818850517273, "learning_rate": 5.061728395061729e-05, "epoch": 0.75, "step": 49750 }, { "loss": 1.1366, "grad_norm": 1.6072300672531128, "learning_rate": 5.0542188579495964e-05, "epoch": 0.75, "step": 49775 }, { "loss": 1.062, "grad_norm": 2.6563072204589844, "learning_rate": 5.0467093208374636e-05, "epoch": 0.75, "step": 49800 }, { "loss": 1.1615, "grad_norm": 1.5061039924621582, "learning_rate": 5.0391997837253315e-05, "epoch": 0.75, "step": 49825 }, { "loss": 1.1299, "grad_norm": 1.6999189853668213, "learning_rate": 5.031690246613199e-05, "epoch": 0.75, "step": 49850 }, { "loss": 1.157, "grad_norm": 1.8330657482147217, "learning_rate": 5.0241807095010666e-05, "epoch": 0.75, "step": 49875 }, { "loss": 1.1452, "grad_norm": 1.6632230281829834, "learning_rate": 5.016671172388934e-05, "epoch": 0.75, "step": 49900 }, { "loss": 1.1767, "grad_norm": 1.2450637817382812, "learning_rate": 5.0091616352768023e-05, "epoch": 0.75, "step": 49925 }, { "loss": 1.1398, "grad_norm": 1.1371906995773315, "learning_rate": 5.0016520981646695e-05, "epoch": 0.75, "step": 49950 }, { "loss": 1.1173, "grad_norm": 1.1480075120925903, "learning_rate": 4.9941425610525374e-05, "epoch": 0.75, "step": 49975 }, { "loss": 1.0499, "grad_norm": 1.0953096151351929, "learning_rate": 4.9866330239404046e-05, "epoch": 0.75, "step": 50000 }, { "loss": 1.0824, "grad_norm": 1.4675097465515137, "learning_rate": 4.979123486828272e-05, "epoch": 0.75, "step": 50025 }, { "loss": 1.1413, "grad_norm": 1.9132678508758545, "learning_rate": 4.97161394971614e-05, "epoch": 0.75, "step": 50050 }, { "loss": 1.0562, "grad_norm": 1.3277969360351562, "learning_rate": 4.9641044126040076e-05, "epoch": 0.75, "step": 50075 }, { "loss": 1.1071, "grad_norm": 1.4362881183624268, "learning_rate": 4.956594875491875e-05, "epoch": 0.75, "step": 50100 }, { "loss": 1.1572, "grad_norm": 0.8633365631103516, "learning_rate": 4.949085338379743e-05, "epoch": 0.75, "step": 50125 }, { "loss": 1.0881, "grad_norm": 1.653272271156311, "learning_rate": 4.94157580126761e-05, "epoch": 0.75, "step": 50150 }, { "loss": 1.158, "grad_norm": 2.0135273933410645, "learning_rate": 4.934066264155477e-05, "epoch": 0.75, "step": 50175 }, { "loss": 1.1024, "grad_norm": 1.119586706161499, "learning_rate": 4.926556727043346e-05, "epoch": 0.75, "step": 50200 }, { "loss": 1.1258, "grad_norm": 0.9510914087295532, "learning_rate": 4.919047189931213e-05, "epoch": 0.75, "step": 50225 }, { "loss": 1.2239, "grad_norm": 1.510134220123291, "learning_rate": 4.91153765281908e-05, "epoch": 0.75, "step": 50250 }, { "loss": 1.0673, "grad_norm": 1.0487359762191772, "learning_rate": 4.904028115706948e-05, "epoch": 0.76, "step": 50275 }, { "loss": 1.1268, "grad_norm": 1.5456733703613281, "learning_rate": 4.896518578594816e-05, "epoch": 0.76, "step": 50300 }, { "loss": 1.2115, "grad_norm": 1.761313557624817, "learning_rate": 4.889009041482683e-05, "epoch": 0.76, "step": 50325 }, { "loss": 1.1656, "grad_norm": 1.7086901664733887, "learning_rate": 4.881499504370551e-05, "epoch": 0.76, "step": 50350 }, { "loss": 1.0727, "grad_norm": 1.7276190519332886, "learning_rate": 4.874290348742904e-05, "epoch": 0.76, "step": 50375 }, { "loss": 1.0991, "grad_norm": 1.3871339559555054, "learning_rate": 4.866780811630771e-05, "epoch": 0.76, "step": 50400 }, { "loss": 1.0951, "grad_norm": 1.9214799404144287, "learning_rate": 4.859271274518639e-05, "epoch": 0.76, "step": 50425 }, { "loss": 1.1219, "grad_norm": 1.5749818086624146, "learning_rate": 4.851761737406507e-05, "epoch": 0.76, "step": 50450 }, { "loss": 1.106, "grad_norm": 4.110257625579834, "learning_rate": 4.844252200294374e-05, "epoch": 0.76, "step": 50475 }, { "loss": 1.0866, "grad_norm": 1.6532156467437744, "learning_rate": 4.836742663182241e-05, "epoch": 0.76, "step": 50500 }, { "loss": 1.1183, "grad_norm": 1.485727071762085, "learning_rate": 4.829233126070109e-05, "epoch": 0.76, "step": 50525 }, { "loss": 1.1405, "grad_norm": 1.4788265228271484, "learning_rate": 4.821723588957977e-05, "epoch": 0.76, "step": 50550 }, { "loss": 1.1491, "grad_norm": 2.762058973312378, "learning_rate": 4.814214051845844e-05, "epoch": 0.76, "step": 50575 }, { "loss": 1.1483, "grad_norm": 1.7897312641143799, "learning_rate": 4.806704514733712e-05, "epoch": 0.76, "step": 50600 }, { "loss": 1.0947, "grad_norm": 1.1446094512939453, "learning_rate": 4.799194977621579e-05, "epoch": 0.76, "step": 50625 }, { "loss": 1.1116, "grad_norm": 0.997673749923706, "learning_rate": 4.791685440509447e-05, "epoch": 0.76, "step": 50650 }, { "loss": 1.1374, "grad_norm": 1.243049144744873, "learning_rate": 4.784175903397315e-05, "epoch": 0.76, "step": 50675 }, { "loss": 1.1458, "grad_norm": 1.5746535062789917, "learning_rate": 4.776666366285182e-05, "epoch": 0.76, "step": 50700 }, { "loss": 1.1191, "grad_norm": 1.0133841037750244, "learning_rate": 4.7691568291730495e-05, "epoch": 0.76, "step": 50725 }, { "loss": 1.0886, "grad_norm": 2.07098650932312, "learning_rate": 4.761647292060918e-05, "epoch": 0.76, "step": 50750 }, { "loss": 1.1666, "grad_norm": 2.41629695892334, "learning_rate": 4.754137754948785e-05, "epoch": 0.76, "step": 50775 }, { "loss": 1.1094, "grad_norm": 1.2683985233306885, "learning_rate": 4.7466282178366525e-05, "epoch": 0.76, "step": 50800 }, { "loss": 1.1979, "grad_norm": 2.520268201828003, "learning_rate": 4.7391186807245204e-05, "epoch": 0.76, "step": 50825 }, { "loss": 1.124, "grad_norm": 1.5505998134613037, "learning_rate": 4.7316091436123876e-05, "epoch": 0.76, "step": 50850 }, { "loss": 1.1447, "grad_norm": 1.3630999326705933, "learning_rate": 4.7240996065002555e-05, "epoch": 0.76, "step": 50875 }, { "loss": 1.0995, "grad_norm": 1.36336350440979, "learning_rate": 4.7165900693881234e-05, "epoch": 0.76, "step": 50900 }, { "loss": 1.0873, "grad_norm": 1.852036952972412, "learning_rate": 4.7090805322759906e-05, "epoch": 0.76, "step": 50925 }, { "loss": 1.1525, "grad_norm": 1.3926178216934204, "learning_rate": 4.7015709951638585e-05, "epoch": 0.77, "step": 50950 }, { "loss": 1.0773, "grad_norm": 1.5449055433273315, "learning_rate": 4.6940614580517263e-05, "epoch": 0.77, "step": 50975 }, { "loss": 1.1175, "grad_norm": 1.1615759134292603, "learning_rate": 4.6865519209395936e-05, "epoch": 0.77, "step": 51000 }, { "loss": 1.1354, "grad_norm": 1.8657679557800293, "learning_rate": 4.679042383827461e-05, "epoch": 0.77, "step": 51025 }, { "loss": 1.0671, "grad_norm": 1.0491373538970947, "learning_rate": 4.6715328467153287e-05, "epoch": 0.77, "step": 51050 }, { "loss": 1.0703, "grad_norm": 1.1459797620773315, "learning_rate": 4.6640233096031965e-05, "epoch": 0.77, "step": 51075 }, { "loss": 1.0243, "grad_norm": 2.31217098236084, "learning_rate": 4.656513772491064e-05, "epoch": 0.77, "step": 51100 }, { "loss": 1.1454, "grad_norm": 1.3556299209594727, "learning_rate": 4.6490042353789316e-05, "epoch": 0.77, "step": 51125 }, { "loss": 1.0407, "grad_norm": 1.560073971748352, "learning_rate": 4.641494698266799e-05, "epoch": 0.77, "step": 51150 }, { "loss": 1.1132, "grad_norm": 2.1798226833343506, "learning_rate": 4.633985161154667e-05, "epoch": 0.77, "step": 51175 }, { "loss": 1.1708, "grad_norm": 1.246620774269104, "learning_rate": 4.6264756240425346e-05, "epoch": 0.77, "step": 51200 }, { "loss": 1.1629, "grad_norm": 1.2318778038024902, "learning_rate": 4.618966086930402e-05, "epoch": 0.77, "step": 51225 }, { "loss": 1.1289, "grad_norm": 1.7169677019119263, "learning_rate": 4.611456549818269e-05, "epoch": 0.77, "step": 51250 }, { "loss": 1.1142, "grad_norm": 1.4329315423965454, "learning_rate": 4.603947012706137e-05, "epoch": 0.77, "step": 51275 }, { "loss": 1.0928, "grad_norm": 1.3384554386138916, "learning_rate": 4.596437475594005e-05, "epoch": 0.77, "step": 51300 }, { "loss": 1.0633, "grad_norm": 1.2550382614135742, "learning_rate": 4.588927938481872e-05, "epoch": 0.77, "step": 51325 }, { "loss": 1.1229, "grad_norm": 1.7870949506759644, "learning_rate": 4.58141840136974e-05, "epoch": 0.77, "step": 51350 }, { "loss": 1.0861, "grad_norm": 1.3869023323059082, "learning_rate": 4.573908864257607e-05, "epoch": 0.77, "step": 51375 }, { "loss": 1.129, "grad_norm": 1.477853536605835, "learning_rate": 4.566399327145475e-05, "epoch": 0.77, "step": 51400 }, { "loss": 1.1998, "grad_norm": 1.8617823123931885, "learning_rate": 4.558889790033343e-05, "epoch": 0.77, "step": 51425 }, { "loss": 1.1041, "grad_norm": 1.5415617227554321, "learning_rate": 4.55138025292121e-05, "epoch": 0.77, "step": 51450 }, { "loss": 1.0528, "grad_norm": 1.7439303398132324, "learning_rate": 4.543870715809077e-05, "epoch": 0.77, "step": 51475 }, { "loss": 1.1482, "grad_norm": 1.3494808673858643, "learning_rate": 4.536361178696945e-05, "epoch": 0.77, "step": 51500 }, { "loss": 1.1014, "grad_norm": 2.855989456176758, "learning_rate": 4.528851641584813e-05, "epoch": 0.77, "step": 51525 }, { "loss": 1.1184, "grad_norm": 1.9916918277740479, "learning_rate": 4.52134210447268e-05, "epoch": 0.77, "step": 51550 }, { "loss": 1.1653, "grad_norm": 2.4080750942230225, "learning_rate": 4.513832567360548e-05, "epoch": 0.77, "step": 51575 }, { "loss": 1.0892, "grad_norm": 2.9220423698425293, "learning_rate": 4.5063230302484154e-05, "epoch": 0.77, "step": 51600 }, { "loss": 1.1106, "grad_norm": 1.840510368347168, "learning_rate": 4.498813493136283e-05, "epoch": 0.78, "step": 51625 }, { "loss": 1.1684, "grad_norm": 1.8185040950775146, "learning_rate": 4.491303956024151e-05, "epoch": 0.78, "step": 51650 }, { "loss": 1.1476, "grad_norm": 1.5126756429672241, "learning_rate": 4.4837944189120184e-05, "epoch": 0.78, "step": 51675 }, { "loss": 1.0972, "grad_norm": 2.2125062942504883, "learning_rate": 4.476284881799886e-05, "epoch": 0.78, "step": 51700 }, { "loss": 1.1401, "grad_norm": 2.3873279094696045, "learning_rate": 4.4687753446877535e-05, "epoch": 0.78, "step": 51725 }, { "loss": 1.0909, "grad_norm": 1.7589930295944214, "learning_rate": 4.4612658075756213e-05, "epoch": 0.78, "step": 51750 }, { "loss": 1.1803, "grad_norm": 1.4225094318389893, "learning_rate": 4.4537562704634886e-05, "epoch": 0.78, "step": 51775 }, { "loss": 1.1538, "grad_norm": 1.1532394886016846, "learning_rate": 4.4462467333513564e-05, "epoch": 0.78, "step": 51800 }, { "loss": 1.1152, "grad_norm": 1.6151081323623657, "learning_rate": 4.4387371962392237e-05, "epoch": 0.78, "step": 51825 }, { "loss": 1.1382, "grad_norm": 2.63031005859375, "learning_rate": 4.4312276591270915e-05, "epoch": 0.78, "step": 51850 }, { "loss": 1.1601, "grad_norm": 2.0375289916992188, "learning_rate": 4.4237181220149594e-05, "epoch": 0.78, "step": 51875 }, { "loss": 1.1179, "grad_norm": 1.1906908750534058, "learning_rate": 4.4162085849028266e-05, "epoch": 0.78, "step": 51900 }, { "loss": 1.1114, "grad_norm": 1.8762083053588867, "learning_rate": 4.4086990477906945e-05, "epoch": 0.78, "step": 51925 }, { "loss": 1.1274, "grad_norm": 1.7392081022262573, "learning_rate": 4.4011895106785624e-05, "epoch": 0.78, "step": 51950 }, { "loss": 1.1117, "grad_norm": 1.480962872505188, "learning_rate": 4.3936799735664296e-05, "epoch": 0.78, "step": 51975 }, { "loss": 1.1623, "grad_norm": 1.1408778429031372, "learning_rate": 4.386170436454297e-05, "epoch": 0.78, "step": 52000 }, { "loss": 1.0937, "grad_norm": 2.5744760036468506, "learning_rate": 4.378660899342165e-05, "epoch": 0.78, "step": 52025 }, { "loss": 1.1968, "grad_norm": 1.890031337738037, "learning_rate": 4.371151362230032e-05, "epoch": 0.78, "step": 52050 }, { "loss": 1.1761, "grad_norm": 1.9659225940704346, "learning_rate": 4.3636418251179e-05, "epoch": 0.78, "step": 52075 }, { "loss": 1.176, "grad_norm": 1.4123088121414185, "learning_rate": 4.356132288005768e-05, "epoch": 0.78, "step": 52100 }, { "loss": 1.1493, "grad_norm": 1.9861273765563965, "learning_rate": 4.348622750893635e-05, "epoch": 0.78, "step": 52125 }, { "loss": 1.1376, "grad_norm": 1.3282297849655151, "learning_rate": 4.341113213781503e-05, "epoch": 0.78, "step": 52150 }, { "loss": 1.1337, "grad_norm": 1.8567203283309937, "learning_rate": 4.333603676669371e-05, "epoch": 0.78, "step": 52175 }, { "loss": 1.1678, "grad_norm": 1.387803077697754, "learning_rate": 4.326094139557238e-05, "epoch": 0.78, "step": 52200 }, { "loss": 1.1369, "grad_norm": 1.4993411302566528, "learning_rate": 4.318584602445105e-05, "epoch": 0.78, "step": 52225 }, { "loss": 1.1072, "grad_norm": 1.83002769947052, "learning_rate": 4.311075065332973e-05, "epoch": 0.78, "step": 52250 }, { "loss": 1.1275, "grad_norm": 1.0453954935073853, "learning_rate": 4.303565528220841e-05, "epoch": 0.79, "step": 52275 }, { "loss": 1.0546, "grad_norm": 1.6221436262130737, "learning_rate": 4.296055991108708e-05, "epoch": 0.79, "step": 52300 }, { "loss": 1.1474, "grad_norm": 1.4290169477462769, "learning_rate": 4.288546453996576e-05, "epoch": 0.79, "step": 52325 }, { "loss": 1.0947, "grad_norm": 2.136678695678711, "learning_rate": 4.281036916884443e-05, "epoch": 0.79, "step": 52350 }, { "loss": 1.1738, "grad_norm": 1.6790881156921387, "learning_rate": 4.273527379772311e-05, "epoch": 0.79, "step": 52375 }, { "loss": 1.1474, "grad_norm": 1.1431602239608765, "learning_rate": 4.266017842660179e-05, "epoch": 0.79, "step": 52400 }, { "loss": 1.1478, "grad_norm": 1.6592998504638672, "learning_rate": 4.258508305548046e-05, "epoch": 0.79, "step": 52425 }, { "loss": 1.0866, "grad_norm": 3.1507890224456787, "learning_rate": 4.250998768435914e-05, "epoch": 0.79, "step": 52450 }, { "loss": 1.1061, "grad_norm": 2.271561861038208, "learning_rate": 4.243489231323781e-05, "epoch": 0.79, "step": 52475 }, { "loss": 1.1853, "grad_norm": 1.3959341049194336, "learning_rate": 4.235979694211649e-05, "epoch": 0.79, "step": 52500 }, { "loss": 1.1685, "grad_norm": 1.9828035831451416, "learning_rate": 4.2284701570995164e-05, "epoch": 0.79, "step": 52525 }, { "loss": 1.1353, "grad_norm": 1.348754644393921, "learning_rate": 4.220960619987384e-05, "epoch": 0.79, "step": 52550 }, { "loss": 1.0879, "grad_norm": 2.036592483520508, "learning_rate": 4.2134510828752514e-05, "epoch": 0.79, "step": 52575 }, { "loss": 1.1763, "grad_norm": 2.1805171966552734, "learning_rate": 4.205941545763119e-05, "epoch": 0.79, "step": 52600 }, { "loss": 1.0777, "grad_norm": 1.3825914859771729, "learning_rate": 4.198432008650987e-05, "epoch": 0.79, "step": 52625 }, { "loss": 1.1068, "grad_norm": 1.282179832458496, "learning_rate": 4.1909224715388544e-05, "epoch": 0.79, "step": 52650 }, { "loss": 1.1387, "grad_norm": 1.4758460521697998, "learning_rate": 4.183412934426722e-05, "epoch": 0.79, "step": 52675 }, { "loss": 1.1992, "grad_norm": 1.5327672958374023, "learning_rate": 4.1759033973145895e-05, "epoch": 0.79, "step": 52700 }, { "loss": 1.1051, "grad_norm": 1.300261378288269, "learning_rate": 4.1683938602024574e-05, "epoch": 0.79, "step": 52725 }, { "loss": 1.1114, "grad_norm": 1.1575740575790405, "learning_rate": 4.1608843230903246e-05, "epoch": 0.79, "step": 52750 }, { "loss": 1.1132, "grad_norm": 1.5386431217193604, "learning_rate": 4.1533747859781925e-05, "epoch": 0.79, "step": 52775 }, { "loss": 1.1619, "grad_norm": 1.5786181688308716, "learning_rate": 4.14586524886606e-05, "epoch": 0.79, "step": 52800 }, { "loss": 1.1353, "grad_norm": 2.4933083057403564, "learning_rate": 4.1383557117539276e-05, "epoch": 0.79, "step": 52825 }, { "loss": 1.1693, "grad_norm": 2.640106439590454, "learning_rate": 4.1308461746417955e-05, "epoch": 0.79, "step": 52850 }, { "loss": 1.1121, "grad_norm": 1.0569820404052734, "learning_rate": 4.123336637529663e-05, "epoch": 0.79, "step": 52875 }, { "loss": 1.0855, "grad_norm": 1.2210026979446411, "learning_rate": 4.1158271004175306e-05, "epoch": 0.79, "step": 52900 }, { "loss": 1.1511, "grad_norm": 1.5878472328186035, "learning_rate": 4.1083175633053985e-05, "epoch": 0.79, "step": 52925 }, { "loss": 1.1387, "grad_norm": 1.8040430545806885, "learning_rate": 4.100808026193266e-05, "epoch": 0.8, "step": 52950 }, { "loss": 1.1351, "grad_norm": 1.5732437372207642, "learning_rate": 4.093298489081133e-05, "epoch": 0.8, "step": 52975 }, { "loss": 1.1317, "grad_norm": 1.9610670804977417, "learning_rate": 4.085788951969001e-05, "epoch": 0.8, "step": 53000 }, { "loss": 1.1201, "grad_norm": 1.1342912912368774, "learning_rate": 4.078279414856868e-05, "epoch": 0.8, "step": 53025 }, { "loss": 1.1302, "grad_norm": 1.7887520790100098, "learning_rate": 4.070769877744736e-05, "epoch": 0.8, "step": 53050 }, { "loss": 1.1894, "grad_norm": 2.0694801807403564, "learning_rate": 4.063260340632604e-05, "epoch": 0.8, "step": 53075 }, { "loss": 1.106, "grad_norm": 1.582124948501587, "learning_rate": 4.055750803520471e-05, "epoch": 0.8, "step": 53100 }, { "loss": 1.1535, "grad_norm": 0.8474487066268921, "learning_rate": 4.048241266408339e-05, "epoch": 0.8, "step": 53125 }, { "loss": 1.1005, "grad_norm": 1.3499822616577148, "learning_rate": 4.041032110780692e-05, "epoch": 0.8, "step": 53150 }, { "loss": 1.1252, "grad_norm": 3.777379274368286, "learning_rate": 4.0335225736685596e-05, "epoch": 0.8, "step": 53175 }, { "loss": 1.0519, "grad_norm": 1.1216979026794434, "learning_rate": 4.026013036556427e-05, "epoch": 0.8, "step": 53200 }, { "loss": 1.1814, "grad_norm": 1.710165023803711, "learning_rate": 4.018503499444295e-05, "epoch": 0.8, "step": 53225 }, { "loss": 1.1011, "grad_norm": 1.4310054779052734, "learning_rate": 4.010993962332162e-05, "epoch": 0.8, "step": 53250 }, { "loss": 1.1253, "grad_norm": 1.3393102884292603, "learning_rate": 4.00348442522003e-05, "epoch": 0.8, "step": 53275 }, { "loss": 1.2024, "grad_norm": 0.9497338533401489, "learning_rate": 3.995974888107897e-05, "epoch": 0.8, "step": 53300 }, { "loss": 1.1295, "grad_norm": 1.777761697769165, "learning_rate": 3.988465350995765e-05, "epoch": 0.8, "step": 53325 }, { "loss": 1.0795, "grad_norm": 1.3849236965179443, "learning_rate": 3.980955813883632e-05, "epoch": 0.8, "step": 53350 }, { "loss": 1.0983, "grad_norm": 1.8323969841003418, "learning_rate": 3.9734462767715e-05, "epoch": 0.8, "step": 53375 }, { "loss": 1.1308, "grad_norm": 1.5328776836395264, "learning_rate": 3.965936739659368e-05, "epoch": 0.8, "step": 53400 }, { "loss": 1.1389, "grad_norm": 1.2152605056762695, "learning_rate": 3.958427202547235e-05, "epoch": 0.8, "step": 53425 }, { "loss": 1.1156, "grad_norm": 1.0716402530670166, "learning_rate": 3.950917665435103e-05, "epoch": 0.8, "step": 53450 }, { "loss": 1.0923, "grad_norm": 1.3493958711624146, "learning_rate": 3.94340812832297e-05, "epoch": 0.8, "step": 53475 }, { "loss": 1.0859, "grad_norm": 1.8882994651794434, "learning_rate": 3.935898591210838e-05, "epoch": 0.8, "step": 53500 }, { "loss": 1.0888, "grad_norm": 1.1161054372787476, "learning_rate": 3.928389054098706e-05, "epoch": 0.8, "step": 53525 }, { "loss": 1.1155, "grad_norm": 1.3479957580566406, "learning_rate": 3.920879516986573e-05, "epoch": 0.8, "step": 53550 }, { "loss": 1.135, "grad_norm": 1.2609208822250366, "learning_rate": 3.9133699798744404e-05, "epoch": 0.8, "step": 53575 }, { "loss": 1.0736, "grad_norm": 1.8553820848464966, "learning_rate": 3.905860442762308e-05, "epoch": 0.81, "step": 53600 }, { "loss": 1.091, "grad_norm": 1.7198560237884521, "learning_rate": 3.898350905650176e-05, "epoch": 0.81, "step": 53625 }, { "loss": 1.1926, "grad_norm": 1.0929125547409058, "learning_rate": 3.8908413685380433e-05, "epoch": 0.81, "step": 53650 }, { "loss": 1.1161, "grad_norm": 1.4651769399642944, "learning_rate": 3.883331831425911e-05, "epoch": 0.81, "step": 53675 }, { "loss": 1.1704, "grad_norm": 2.1259841918945312, "learning_rate": 3.8758222943137784e-05, "epoch": 0.81, "step": 53700 }, { "loss": 1.1024, "grad_norm": 1.6856151819229126, "learning_rate": 3.868312757201646e-05, "epoch": 0.81, "step": 53725 }, { "loss": 1.1725, "grad_norm": 1.9457602500915527, "learning_rate": 3.860803220089514e-05, "epoch": 0.81, "step": 53750 }, { "loss": 1.1107, "grad_norm": 1.1278740167617798, "learning_rate": 3.8532936829773814e-05, "epoch": 0.81, "step": 53775 }, { "loss": 1.0866, "grad_norm": 1.989402174949646, "learning_rate": 3.8457841458652486e-05, "epoch": 0.81, "step": 53800 }, { "loss": 1.09, "grad_norm": 2.686849355697632, "learning_rate": 3.8382746087531165e-05, "epoch": 0.81, "step": 53825 }, { "loss": 1.0897, "grad_norm": 2.199162244796753, "learning_rate": 3.8307650716409844e-05, "epoch": 0.81, "step": 53850 }, { "loss": 1.1071, "grad_norm": 0.9810658693313599, "learning_rate": 3.8232555345288516e-05, "epoch": 0.81, "step": 53875 }, { "loss": 1.1032, "grad_norm": 1.1024478673934937, "learning_rate": 3.8157459974167195e-05, "epoch": 0.81, "step": 53900 }, { "loss": 1.0515, "grad_norm": 1.8465054035186768, "learning_rate": 3.8082364603045874e-05, "epoch": 0.81, "step": 53925 }, { "loss": 1.0606, "grad_norm": 0.9782311320304871, "learning_rate": 3.8007269231924546e-05, "epoch": 0.81, "step": 53950 }, { "loss": 1.1054, "grad_norm": 1.4638195037841797, "learning_rate": 3.7932173860803225e-05, "epoch": 0.81, "step": 53975 }, { "loss": 1.1584, "grad_norm": 2.066131114959717, "learning_rate": 3.78570784896819e-05, "epoch": 0.81, "step": 54000 }, { "loss": 1.1254, "grad_norm": 1.6561390161514282, "learning_rate": 3.778198311856057e-05, "epoch": 0.81, "step": 54025 }, { "loss": 1.1213, "grad_norm": 1.693764090538025, "learning_rate": 3.770688774743925e-05, "epoch": 0.81, "step": 54050 }, { "loss": 1.0754, "grad_norm": 1.5490859746932983, "learning_rate": 3.763179237631793e-05, "epoch": 0.81, "step": 54075 }, { "loss": 1.1704, "grad_norm": 1.7576946020126343, "learning_rate": 3.75566970051966e-05, "epoch": 0.81, "step": 54100 }, { "loss": 1.1473, "grad_norm": 1.8954912424087524, "learning_rate": 3.748160163407528e-05, "epoch": 0.81, "step": 54125 }, { "loss": 1.1391, "grad_norm": 2.0620269775390625, "learning_rate": 3.7406506262953957e-05, "epoch": 0.81, "step": 54150 }, { "loss": 1.1656, "grad_norm": 1.3634029626846313, "learning_rate": 3.733141089183263e-05, "epoch": 0.81, "step": 54175 }, { "loss": 1.1246, "grad_norm": 1.4298192262649536, "learning_rate": 3.725631552071131e-05, "epoch": 0.81, "step": 54200 }, { "loss": 1.1954, "grad_norm": 1.760016679763794, "learning_rate": 3.718122014958998e-05, "epoch": 0.81, "step": 54225 }, { "loss": 1.1634, "grad_norm": 1.460942268371582, "learning_rate": 3.710612477846866e-05, "epoch": 0.81, "step": 54250 }, { "loss": 1.0715, "grad_norm": 0.9953238368034363, "learning_rate": 3.703102940734734e-05, "epoch": 0.82, "step": 54275 }, { "loss": 1.1588, "grad_norm": 1.3567308187484741, "learning_rate": 3.695593403622601e-05, "epoch": 0.82, "step": 54300 }, { "loss": 1.1086, "grad_norm": 1.4111878871917725, "learning_rate": 3.688083866510468e-05, "epoch": 0.82, "step": 54325 }, { "loss": 1.161, "grad_norm": 1.7530951499938965, "learning_rate": 3.680574329398336e-05, "epoch": 0.82, "step": 54350 }, { "loss": 1.1003, "grad_norm": 1.5563117265701294, "learning_rate": 3.673064792286204e-05, "epoch": 0.82, "step": 54375 }, { "loss": 1.163, "grad_norm": 1.0254262685775757, "learning_rate": 3.665555255174071e-05, "epoch": 0.82, "step": 54400 }, { "loss": 1.1384, "grad_norm": 2.547769784927368, "learning_rate": 3.658045718061939e-05, "epoch": 0.82, "step": 54425 }, { "loss": 1.107, "grad_norm": 1.0468461513519287, "learning_rate": 3.650536180949806e-05, "epoch": 0.82, "step": 54450 }, { "loss": 1.1431, "grad_norm": 1.1783130168914795, "learning_rate": 3.643026643837674e-05, "epoch": 0.82, "step": 54475 }, { "loss": 1.1398, "grad_norm": 1.3592449426651, "learning_rate": 3.635517106725542e-05, "epoch": 0.82, "step": 54500 }, { "loss": 1.0942, "grad_norm": 1.687246322631836, "learning_rate": 3.628007569613409e-05, "epoch": 0.82, "step": 54525 }, { "loss": 1.0334, "grad_norm": 1.396044373512268, "learning_rate": 3.6204980325012764e-05, "epoch": 0.82, "step": 54550 }, { "loss": 1.0524, "grad_norm": 2.7093379497528076, "learning_rate": 3.612988495389144e-05, "epoch": 0.82, "step": 54575 }, { "loss": 1.1404, "grad_norm": 1.8118054866790771, "learning_rate": 3.605478958277012e-05, "epoch": 0.82, "step": 54600 }, { "loss": 1.0971, "grad_norm": 1.241155982017517, "learning_rate": 3.5979694211648794e-05, "epoch": 0.82, "step": 54625 }, { "loss": 1.0957, "grad_norm": 1.7652029991149902, "learning_rate": 3.590459884052747e-05, "epoch": 0.82, "step": 54650 }, { "loss": 1.1159, "grad_norm": 2.1295764446258545, "learning_rate": 3.5829503469406145e-05, "epoch": 0.82, "step": 54675 }, { "loss": 1.1132, "grad_norm": 1.4499530792236328, "learning_rate": 3.5754408098284824e-05, "epoch": 0.82, "step": 54700 }, { "loss": 1.1356, "grad_norm": 1.3263312578201294, "learning_rate": 3.56793127271635e-05, "epoch": 0.82, "step": 54725 }, { "loss": 1.0752, "grad_norm": 1.5875509977340698, "learning_rate": 3.5604217356042175e-05, "epoch": 0.82, "step": 54750 }, { "loss": 1.1495, "grad_norm": 1.2747198343276978, "learning_rate": 3.552912198492085e-05, "epoch": 0.82, "step": 54775 }, { "loss": 1.0794, "grad_norm": 3.039198637008667, "learning_rate": 3.5454026613799526e-05, "epoch": 0.82, "step": 54800 }, { "loss": 1.0468, "grad_norm": 1.2731279134750366, "learning_rate": 3.5378931242678205e-05, "epoch": 0.82, "step": 54825 }, { "loss": 1.0554, "grad_norm": 2.2691845893859863, "learning_rate": 3.530383587155688e-05, "epoch": 0.82, "step": 54850 }, { "loss": 1.1324, "grad_norm": 1.3395280838012695, "learning_rate": 3.5228740500435556e-05, "epoch": 0.82, "step": 54875 }, { "loss": 1.036, "grad_norm": 1.1065006256103516, "learning_rate": 3.5153645129314235e-05, "epoch": 0.82, "step": 54900 }, { "loss": 1.05, "grad_norm": 1.7697545289993286, "learning_rate": 3.507854975819291e-05, "epoch": 0.82, "step": 54925 }, { "loss": 1.1362, "grad_norm": 1.0653153657913208, "learning_rate": 3.5003454387071585e-05, "epoch": 0.83, "step": 54950 }, { "loss": 1.0546, "grad_norm": 1.5754921436309814, "learning_rate": 3.492835901595026e-05, "epoch": 0.83, "step": 54975 }, { "loss": 0.9954, "grad_norm": 1.166438341140747, "learning_rate": 3.485326364482893e-05, "epoch": 0.83, "step": 55000 }, { "loss": 1.0758, "grad_norm": 1.2350513935089111, "learning_rate": 3.4778168273707615e-05, "epoch": 0.83, "step": 55025 }, { "loss": 1.0664, "grad_norm": 1.1638765335083008, "learning_rate": 3.470307290258629e-05, "epoch": 0.83, "step": 55050 }, { "loss": 1.1558, "grad_norm": 1.0206239223480225, "learning_rate": 3.462797753146496e-05, "epoch": 0.83, "step": 55075 }, { "loss": 1.1298, "grad_norm": 1.1066503524780273, "learning_rate": 3.455288216034364e-05, "epoch": 0.83, "step": 55100 }, { "loss": 1.114, "grad_norm": 1.242811918258667, "learning_rate": 3.447778678922232e-05, "epoch": 0.83, "step": 55125 }, { "loss": 1.0795, "grad_norm": 0.8851810097694397, "learning_rate": 3.440269141810099e-05, "epoch": 0.83, "step": 55150 }, { "loss": 1.1065, "grad_norm": 1.7904212474822998, "learning_rate": 3.432759604697967e-05, "epoch": 0.83, "step": 55175 }, { "loss": 1.013, "grad_norm": 1.2688441276550293, "learning_rate": 3.425250067585834e-05, "epoch": 0.83, "step": 55200 }, { "loss": 1.1373, "grad_norm": 1.1899800300598145, "learning_rate": 3.417740530473702e-05, "epoch": 0.83, "step": 55225 }, { "loss": 1.1135, "grad_norm": 1.9331419467926025, "learning_rate": 3.41023099336157e-05, "epoch": 0.83, "step": 55250 }, { "loss": 1.0993, "grad_norm": 1.6777852773666382, "learning_rate": 3.402721456249437e-05, "epoch": 0.83, "step": 55275 }, { "loss": 1.0528, "grad_norm": 1.469103217124939, "learning_rate": 3.395211919137304e-05, "epoch": 0.83, "step": 55300 }, { "loss": 1.1432, "grad_norm": 1.6784319877624512, "learning_rate": 3.387702382025172e-05, "epoch": 0.83, "step": 55325 }, { "loss": 1.0986, "grad_norm": 1.4824069738388062, "learning_rate": 3.38019284491304e-05, "epoch": 0.83, "step": 55350 }, { "loss": 1.1599, "grad_norm": 1.1543418169021606, "learning_rate": 3.372683307800907e-05, "epoch": 0.83, "step": 55375 }, { "loss": 1.0552, "grad_norm": 2.3624870777130127, "learning_rate": 3.365173770688775e-05, "epoch": 0.83, "step": 55400 }, { "loss": 1.1395, "grad_norm": 1.5518018007278442, "learning_rate": 3.357664233576642e-05, "epoch": 0.83, "step": 55425 }, { "loss": 1.1172, "grad_norm": 1.820732593536377, "learning_rate": 3.35015469646451e-05, "epoch": 0.83, "step": 55450 }, { "loss": 1.1409, "grad_norm": 1.5625290870666504, "learning_rate": 3.342645159352378e-05, "epoch": 0.83, "step": 55475 }, { "loss": 1.1088, "grad_norm": 1.6013075113296509, "learning_rate": 3.335135622240245e-05, "epoch": 0.83, "step": 55500 }, { "loss": 1.1256, "grad_norm": 1.2917579412460327, "learning_rate": 3.3276260851281125e-05, "epoch": 0.83, "step": 55525 }, { "loss": 1.1221, "grad_norm": 2.3396828174591064, "learning_rate": 3.3201165480159804e-05, "epoch": 0.83, "step": 55550 }, { "loss": 1.1166, "grad_norm": 0.9828691482543945, "learning_rate": 3.312607010903848e-05, "epoch": 0.83, "step": 55575 }, { "loss": 1.0945, "grad_norm": 1.5887751579284668, "learning_rate": 3.3050974737917155e-05, "epoch": 0.84, "step": 55600 }, { "loss": 1.0711, "grad_norm": 1.1289055347442627, "learning_rate": 3.2975879366795834e-05, "epoch": 0.84, "step": 55625 }, { "loss": 1.0644, "grad_norm": 1.4812935590744019, "learning_rate": 3.2900783995674506e-05, "epoch": 0.84, "step": 55650 }, { "loss": 1.102, "grad_norm": 1.3823920488357544, "learning_rate": 3.2825688624553185e-05, "epoch": 0.84, "step": 55675 }, { "loss": 1.1011, "grad_norm": 1.7511022090911865, "learning_rate": 3.2750593253431863e-05, "epoch": 0.84, "step": 55700 }, { "loss": 1.183, "grad_norm": 1.9509655237197876, "learning_rate": 3.2675497882310536e-05, "epoch": 0.84, "step": 55725 }, { "loss": 1.0795, "grad_norm": 0.9376107454299927, "learning_rate": 3.260040251118921e-05, "epoch": 0.84, "step": 55750 }, { "loss": 1.1001, "grad_norm": 1.123742938041687, "learning_rate": 3.252530714006789e-05, "epoch": 0.84, "step": 55775 }, { "loss": 1.1292, "grad_norm": 2.6337194442749023, "learning_rate": 3.2450211768946565e-05, "epoch": 0.84, "step": 55800 }, { "loss": 1.0631, "grad_norm": 1.1294831037521362, "learning_rate": 3.237511639782524e-05, "epoch": 0.84, "step": 55825 }, { "loss": 1.1375, "grad_norm": 1.2995752096176147, "learning_rate": 3.2300021026703916e-05, "epoch": 0.84, "step": 55850 }, { "loss": 1.1308, "grad_norm": 4.79863166809082, "learning_rate": 3.2224925655582595e-05, "epoch": 0.84, "step": 55875 }, { "loss": 1.1005, "grad_norm": 1.031606674194336, "learning_rate": 3.214983028446127e-05, "epoch": 0.84, "step": 55900 }, { "loss": 1.011, "grad_norm": 1.0204112529754639, "learning_rate": 3.2074734913339946e-05, "epoch": 0.84, "step": 55925 }, { "loss": 1.1464, "grad_norm": 1.0727862119674683, "learning_rate": 3.199963954221862e-05, "epoch": 0.84, "step": 55950 }, { "loss": 1.1208, "grad_norm": 1.1318399906158447, "learning_rate": 3.192454417109729e-05, "epoch": 0.84, "step": 55975 }, { "loss": 1.1243, "grad_norm": 1.1461580991744995, "learning_rate": 3.1849448799975976e-05, "epoch": 0.84, "step": 56000 }, { "loss": 1.1367, "grad_norm": 1.8615853786468506, "learning_rate": 3.177435342885465e-05, "epoch": 0.84, "step": 56025 }, { "loss": 1.1489, "grad_norm": 1.4172084331512451, "learning_rate": 3.169925805773332e-05, "epoch": 0.84, "step": 56050 }, { "loss": 1.1035, "grad_norm": 1.299654245376587, "learning_rate": 3.1624162686612e-05, "epoch": 0.84, "step": 56075 }, { "loss": 1.1718, "grad_norm": 1.4679521322250366, "learning_rate": 3.154906731549068e-05, "epoch": 0.84, "step": 56100 }, { "loss": 1.11, "grad_norm": 2.0875778198242188, "learning_rate": 3.147397194436935e-05, "epoch": 0.84, "step": 56125 }, { "loss": 1.1843, "grad_norm": 0.9587807655334473, "learning_rate": 3.139887657324803e-05, "epoch": 0.84, "step": 56150 }, { "loss": 1.1362, "grad_norm": 2.253598690032959, "learning_rate": 3.13237812021267e-05, "epoch": 0.84, "step": 56175 }, { "loss": 1.0859, "grad_norm": 2.3193461894989014, "learning_rate": 3.124868583100538e-05, "epoch": 0.84, "step": 56200 }, { "loss": 1.1347, "grad_norm": 1.5526835918426514, "learning_rate": 3.117359045988406e-05, "epoch": 0.84, "step": 56225 }, { "loss": 1.1155, "grad_norm": 1.7318717241287231, "learning_rate": 3.109849508876273e-05, "epoch": 0.84, "step": 56250 }, { "loss": 1.1701, "grad_norm": 0.9637216925621033, "learning_rate": 3.10233997176414e-05, "epoch": 0.85, "step": 56275 }, { "loss": 1.0931, "grad_norm": 1.6898939609527588, "learning_rate": 3.094830434652008e-05, "epoch": 0.85, "step": 56300 }, { "loss": 1.1269, "grad_norm": 1.4921387434005737, "learning_rate": 3.087320897539876e-05, "epoch": 0.85, "step": 56325 }, { "loss": 1.098, "grad_norm": 1.4121395349502563, "learning_rate": 3.079811360427743e-05, "epoch": 0.85, "step": 56350 }, { "loss": 1.1518, "grad_norm": 1.144964575767517, "learning_rate": 3.072301823315611e-05, "epoch": 0.85, "step": 56375 }, { "loss": 1.1257, "grad_norm": 1.5664513111114502, "learning_rate": 3.0647922862034784e-05, "epoch": 0.85, "step": 56400 }, { "loss": 1.12, "grad_norm": 1.3426709175109863, "learning_rate": 3.057282749091346e-05, "epoch": 0.85, "step": 56425 }, { "loss": 1.0914, "grad_norm": 1.4727264642715454, "learning_rate": 3.0497732119792138e-05, "epoch": 0.85, "step": 56450 }, { "loss": 1.2281, "grad_norm": 3.242955207824707, "learning_rate": 3.0422636748670813e-05, "epoch": 0.85, "step": 56475 }, { "loss": 1.1146, "grad_norm": 1.4398702383041382, "learning_rate": 3.034754137754949e-05, "epoch": 0.85, "step": 56500 }, { "loss": 1.1887, "grad_norm": 1.533019781112671, "learning_rate": 3.0272446006428168e-05, "epoch": 0.85, "step": 56525 }, { "loss": 1.0688, "grad_norm": 1.3019578456878662, "learning_rate": 3.0197350635306843e-05, "epoch": 0.85, "step": 56550 }, { "loss": 1.0937, "grad_norm": 3.774083375930786, "learning_rate": 3.012225526418552e-05, "epoch": 0.85, "step": 56575 }, { "loss": 1.0797, "grad_norm": 1.535316824913025, "learning_rate": 3.004715989306419e-05, "epoch": 0.85, "step": 56600 }, { "loss": 1.1406, "grad_norm": 3.624013662338257, "learning_rate": 2.9972064521942866e-05, "epoch": 0.85, "step": 56625 }, { "loss": 1.0986, "grad_norm": 2.2039883136749268, "learning_rate": 2.9896969150821545e-05, "epoch": 0.85, "step": 56650 }, { "loss": 1.1318, "grad_norm": 1.0096391439437866, "learning_rate": 2.982187377970022e-05, "epoch": 0.85, "step": 56675 }, { "loss": 1.1223, "grad_norm": 1.8855111598968506, "learning_rate": 2.9746778408578896e-05, "epoch": 0.85, "step": 56700 }, { "loss": 1.0381, "grad_norm": 1.7449959516525269, "learning_rate": 2.967168303745757e-05, "epoch": 0.85, "step": 56725 }, { "loss": 1.1104, "grad_norm": 1.9731217622756958, "learning_rate": 2.959658766633625e-05, "epoch": 0.85, "step": 56750 }, { "loss": 1.1399, "grad_norm": 1.2654249668121338, "learning_rate": 2.9521492295214926e-05, "epoch": 0.85, "step": 56775 }, { "loss": 1.0705, "grad_norm": 2.2102811336517334, "learning_rate": 2.94463969240936e-05, "epoch": 0.85, "step": 56800 }, { "loss": 1.1073, "grad_norm": 1.3911298513412476, "learning_rate": 2.9371301552972274e-05, "epoch": 0.85, "step": 56825 }, { "loss": 1.132, "grad_norm": 1.2584389448165894, "learning_rate": 2.9296206181850956e-05, "epoch": 0.85, "step": 56850 }, { "loss": 1.0874, "grad_norm": 2.038180351257324, "learning_rate": 2.9221110810729628e-05, "epoch": 0.85, "step": 56875 }, { "loss": 1.1324, "grad_norm": 2.4636690616607666, "learning_rate": 2.9146015439608303e-05, "epoch": 0.85, "step": 56900 }, { "loss": 1.1063, "grad_norm": 1.3092725276947021, "learning_rate": 2.907092006848698e-05, "epoch": 0.85, "step": 56925 }, { "loss": 1.0472, "grad_norm": 1.3636903762817383, "learning_rate": 2.8995824697365654e-05, "epoch": 0.86, "step": 56950 }, { "loss": 1.1155, "grad_norm": 1.915647029876709, "learning_rate": 2.8920729326244333e-05, "epoch": 0.86, "step": 56975 }, { "loss": 1.141, "grad_norm": 1.1012380123138428, "learning_rate": 2.884563395512301e-05, "epoch": 0.86, "step": 57000 }, { "loss": 1.1004, "grad_norm": 1.4259084463119507, "learning_rate": 2.8770538584001684e-05, "epoch": 0.86, "step": 57025 }, { "loss": 1.0786, "grad_norm": 1.411359190940857, "learning_rate": 2.8695443212880356e-05, "epoch": 0.86, "step": 57050 }, { "loss": 1.1746, "grad_norm": 1.6087229251861572, "learning_rate": 2.862034784175904e-05, "epoch": 0.86, "step": 57075 }, { "loss": 1.1506, "grad_norm": 1.2207622528076172, "learning_rate": 2.854525247063771e-05, "epoch": 0.86, "step": 57100 }, { "loss": 1.177, "grad_norm": 1.542277455329895, "learning_rate": 2.8470157099516386e-05, "epoch": 0.86, "step": 57125 }, { "loss": 1.1666, "grad_norm": 1.7982580661773682, "learning_rate": 2.839506172839506e-05, "epoch": 0.86, "step": 57150 }, { "loss": 1.1294, "grad_norm": 1.2220053672790527, "learning_rate": 2.831996635727374e-05, "epoch": 0.86, "step": 57175 }, { "loss": 1.1134, "grad_norm": 2.173220157623291, "learning_rate": 2.8244870986152416e-05, "epoch": 0.86, "step": 57200 }, { "loss": 1.1039, "grad_norm": 1.5563225746154785, "learning_rate": 2.816977561503109e-05, "epoch": 0.86, "step": 57225 }, { "loss": 1.1248, "grad_norm": 1.477427363395691, "learning_rate": 2.8094680243909767e-05, "epoch": 0.86, "step": 57250 }, { "loss": 1.106, "grad_norm": 1.374334692955017, "learning_rate": 2.801958487278844e-05, "epoch": 0.86, "step": 57275 }, { "loss": 1.1058, "grad_norm": 1.437056541442871, "learning_rate": 2.794448950166712e-05, "epoch": 0.86, "step": 57300 }, { "loss": 1.1278, "grad_norm": 1.2822084426879883, "learning_rate": 2.7869394130545797e-05, "epoch": 0.86, "step": 57325 }, { "loss": 1.1003, "grad_norm": 0.9716039299964905, "learning_rate": 2.779429875942447e-05, "epoch": 0.86, "step": 57350 }, { "loss": 1.0853, "grad_norm": 1.4873470067977905, "learning_rate": 2.7719203388303144e-05, "epoch": 0.86, "step": 57375 }, { "loss": 1.0961, "grad_norm": 1.8672046661376953, "learning_rate": 2.7644108017181823e-05, "epoch": 0.86, "step": 57400 }, { "loss": 1.1141, "grad_norm": 1.4299520254135132, "learning_rate": 2.75690126460605e-05, "epoch": 0.86, "step": 57425 }, { "loss": 1.1041, "grad_norm": 2.402892827987671, "learning_rate": 2.7493917274939174e-05, "epoch": 0.86, "step": 57450 }, { "loss": 1.1231, "grad_norm": 1.2294812202453613, "learning_rate": 2.741882190381785e-05, "epoch": 0.86, "step": 57475 }, { "loss": 1.127, "grad_norm": 1.7184091806411743, "learning_rate": 2.734372653269653e-05, "epoch": 0.86, "step": 57500 }, { "loss": 1.1925, "grad_norm": 1.3573827743530273, "learning_rate": 2.7268631161575204e-05, "epoch": 0.86, "step": 57525 }, { "loss": 1.1482, "grad_norm": 1.7570611238479614, "learning_rate": 2.719353579045388e-05, "epoch": 0.86, "step": 57550 }, { "loss": 1.1096, "grad_norm": 1.4989982843399048, "learning_rate": 2.711844041933255e-05, "epoch": 0.86, "step": 57575 }, { "loss": 1.0666, "grad_norm": 1.59767746925354, "learning_rate": 2.7043345048211227e-05, "epoch": 0.87, "step": 57600 }, { "loss": 1.125, "grad_norm": 1.3916709423065186, "learning_rate": 2.6968249677089906e-05, "epoch": 0.87, "step": 57625 }, { "loss": 1.038, "grad_norm": 1.6316527128219604, "learning_rate": 2.689315430596858e-05, "epoch": 0.87, "step": 57650 }, { "loss": 1.0536, "grad_norm": 1.4137283563613892, "learning_rate": 2.6818058934847257e-05, "epoch": 0.87, "step": 57675 }, { "loss": 1.0917, "grad_norm": 1.9997875690460205, "learning_rate": 2.6742963563725932e-05, "epoch": 0.87, "step": 57700 }, { "loss": 1.0194, "grad_norm": 1.3425413370132446, "learning_rate": 2.666786819260461e-05, "epoch": 0.87, "step": 57725 }, { "loss": 1.0669, "grad_norm": 0.9919766783714294, "learning_rate": 2.6592772821483287e-05, "epoch": 0.87, "step": 57750 }, { "loss": 1.15, "grad_norm": 1.5039972066879272, "learning_rate": 2.6517677450361962e-05, "epoch": 0.87, "step": 57775 }, { "loss": 1.0898, "grad_norm": 2.429229974746704, "learning_rate": 2.6442582079240634e-05, "epoch": 0.87, "step": 57800 }, { "loss": 1.1132, "grad_norm": 1.3402752876281738, "learning_rate": 2.6367486708119316e-05, "epoch": 0.87, "step": 57825 }, { "loss": 1.0645, "grad_norm": 1.041297435760498, "learning_rate": 2.629239133699799e-05, "epoch": 0.87, "step": 57850 }, { "loss": 1.105, "grad_norm": 1.0299885272979736, "learning_rate": 2.6217295965876664e-05, "epoch": 0.87, "step": 57875 }, { "loss": 1.1492, "grad_norm": 1.3811683654785156, "learning_rate": 2.614220059475534e-05, "epoch": 0.87, "step": 57900 }, { "loss": 1.1381, "grad_norm": 1.3449524641036987, "learning_rate": 2.6067105223634015e-05, "epoch": 0.87, "step": 57925 }, { "loss": 1.1077, "grad_norm": 1.1133577823638916, "learning_rate": 2.5995013667357543e-05, "epoch": 0.87, "step": 57950 }, { "loss": 1.1155, "grad_norm": 1.2379744052886963, "learning_rate": 2.5919918296236222e-05, "epoch": 0.87, "step": 57975 }, { "loss": 1.0956, "grad_norm": 1.4516429901123047, "learning_rate": 2.5844822925114898e-05, "epoch": 0.87, "step": 58000 }, { "loss": 1.157, "grad_norm": 2.1705074310302734, "learning_rate": 2.5769727553993573e-05, "epoch": 0.87, "step": 58025 }, { "loss": 1.1116, "grad_norm": 1.8582936525344849, "learning_rate": 2.5694632182872245e-05, "epoch": 0.87, "step": 58050 }, { "loss": 1.0901, "grad_norm": 1.2407045364379883, "learning_rate": 2.5619536811750928e-05, "epoch": 0.87, "step": 58075 }, { "loss": 1.0979, "grad_norm": 1.4852651357650757, "learning_rate": 2.5544441440629603e-05, "epoch": 0.87, "step": 58100 }, { "loss": 1.1655, "grad_norm": 1.1345808506011963, "learning_rate": 2.5469346069508275e-05, "epoch": 0.87, "step": 58125 }, { "loss": 1.1008, "grad_norm": 1.741289734840393, "learning_rate": 2.539425069838695e-05, "epoch": 0.87, "step": 58150 }, { "loss": 1.1831, "grad_norm": 1.26760995388031, "learning_rate": 2.532215914211048e-05, "epoch": 0.87, "step": 58175 }, { "loss": 1.0911, "grad_norm": 1.9289544820785522, "learning_rate": 2.524706377098916e-05, "epoch": 0.87, "step": 58200 }, { "loss": 1.0442, "grad_norm": 1.9321314096450806, "learning_rate": 2.5171968399867834e-05, "epoch": 0.87, "step": 58225 }, { "loss": 1.1097, "grad_norm": 1.1289350986480713, "learning_rate": 2.509687302874651e-05, "epoch": 0.87, "step": 58250 }, { "loss": 1.0505, "grad_norm": 1.3914735317230225, "learning_rate": 2.5021777657625184e-05, "epoch": 0.88, "step": 58275 }, { "loss": 1.0804, "grad_norm": 1.2914477586746216, "learning_rate": 2.494668228650386e-05, "epoch": 0.88, "step": 58300 }, { "loss": 1.064, "grad_norm": 1.1069772243499756, "learning_rate": 2.4871586915382535e-05, "epoch": 0.88, "step": 58325 }, { "loss": 1.0294, "grad_norm": 1.7709311246871948, "learning_rate": 2.4796491544261214e-05, "epoch": 0.88, "step": 58350 }, { "loss": 1.0995, "grad_norm": 1.3731812238693237, "learning_rate": 2.4721396173139886e-05, "epoch": 0.88, "step": 58375 }, { "loss": 1.0574, "grad_norm": 1.3423503637313843, "learning_rate": 2.4646300802018565e-05, "epoch": 0.88, "step": 58400 }, { "loss": 1.1297, "grad_norm": 1.5664671659469604, "learning_rate": 2.457120543089724e-05, "epoch": 0.88, "step": 58425 }, { "loss": 1.1686, "grad_norm": 1.7989689111709595, "learning_rate": 2.4496110059775916e-05, "epoch": 0.88, "step": 58450 }, { "loss": 1.063, "grad_norm": 2.50423526763916, "learning_rate": 2.442101468865459e-05, "epoch": 0.88, "step": 58475 }, { "loss": 1.1528, "grad_norm": 2.081894636154175, "learning_rate": 2.434591931753327e-05, "epoch": 0.88, "step": 58500 }, { "loss": 1.0845, "grad_norm": 1.7260534763336182, "learning_rate": 2.4270823946411943e-05, "epoch": 0.88, "step": 58525 }, { "loss": 1.1555, "grad_norm": 1.6785259246826172, "learning_rate": 2.419572857529062e-05, "epoch": 0.88, "step": 58550 }, { "loss": 1.1315, "grad_norm": 1.5150628089904785, "learning_rate": 2.4120633204169297e-05, "epoch": 0.88, "step": 58575 }, { "loss": 1.1247, "grad_norm": 1.3232154846191406, "learning_rate": 2.4045537833047972e-05, "epoch": 0.88, "step": 58600 }, { "loss": 1.1638, "grad_norm": 1.435685157775879, "learning_rate": 2.3970442461926648e-05, "epoch": 0.88, "step": 58625 }, { "loss": 1.1466, "grad_norm": 1.562098741531372, "learning_rate": 2.3895347090805323e-05, "epoch": 0.88, "step": 58650 }, { "loss": 1.0823, "grad_norm": 1.6774852275848389, "learning_rate": 2.3820251719684e-05, "epoch": 0.88, "step": 58675 }, { "loss": 1.1152, "grad_norm": 2.8691372871398926, "learning_rate": 2.3745156348562674e-05, "epoch": 0.88, "step": 58700 }, { "loss": 1.0675, "grad_norm": 1.2133371829986572, "learning_rate": 2.3670060977441353e-05, "epoch": 0.88, "step": 58725 }, { "loss": 1.1358, "grad_norm": 1.280999779701233, "learning_rate": 2.3594965606320025e-05, "epoch": 0.88, "step": 58750 }, { "loss": 1.073, "grad_norm": 2.144066333770752, "learning_rate": 2.3519870235198704e-05, "epoch": 0.88, "step": 58775 }, { "loss": 1.1203, "grad_norm": 1.4125479459762573, "learning_rate": 2.344477486407738e-05, "epoch": 0.88, "step": 58800 }, { "loss": 1.1467, "grad_norm": 1.402156949043274, "learning_rate": 2.3369679492956055e-05, "epoch": 0.88, "step": 58825 }, { "loss": 1.1691, "grad_norm": 1.3000797033309937, "learning_rate": 2.329458412183473e-05, "epoch": 0.88, "step": 58850 }, { "loss": 1.1105, "grad_norm": 1.9694422483444214, "learning_rate": 2.321948875071341e-05, "epoch": 0.88, "step": 58875 }, { "loss": 1.098, "grad_norm": 1.4404619932174683, "learning_rate": 2.314439337959208e-05, "epoch": 0.88, "step": 58900 }, { "loss": 1.0979, "grad_norm": 2.1054556369781494, "learning_rate": 2.3069298008470757e-05, "epoch": 0.88, "step": 58925 }, { "loss": 1.1015, "grad_norm": 1.2658005952835083, "learning_rate": 2.2994202637349436e-05, "epoch": 0.89, "step": 58950 }, { "loss": 1.1349, "grad_norm": 1.4039870500564575, "learning_rate": 2.291910726622811e-05, "epoch": 0.89, "step": 58975 }, { "loss": 1.0923, "grad_norm": 1.5480154752731323, "learning_rate": 2.2844011895106787e-05, "epoch": 0.89, "step": 59000 }, { "loss": 1.1652, "grad_norm": 1.9261832237243652, "learning_rate": 2.2768916523985462e-05, "epoch": 0.89, "step": 59025 }, { "loss": 1.0539, "grad_norm": 1.2835638523101807, "learning_rate": 2.2693821152864138e-05, "epoch": 0.89, "step": 59050 }, { "loss": 1.0895, "grad_norm": 1.7522798776626587, "learning_rate": 2.2618725781742813e-05, "epoch": 0.89, "step": 59075 }, { "loss": 1.0988, "grad_norm": 1.2995007038116455, "learning_rate": 2.2543630410621492e-05, "epoch": 0.89, "step": 59100 }, { "loss": 1.0471, "grad_norm": 1.5621485710144043, "learning_rate": 2.2468535039500164e-05, "epoch": 0.89, "step": 59125 }, { "loss": 1.1299, "grad_norm": 3.184175968170166, "learning_rate": 2.2393439668378843e-05, "epoch": 0.89, "step": 59150 }, { "loss": 1.201, "grad_norm": 1.7400543689727783, "learning_rate": 2.231834429725752e-05, "epoch": 0.89, "step": 59175 }, { "loss": 1.148, "grad_norm": 1.880234956741333, "learning_rate": 2.2243248926136194e-05, "epoch": 0.89, "step": 59200 }, { "loss": 1.0385, "grad_norm": 1.2461950778961182, "learning_rate": 2.216815355501487e-05, "epoch": 0.89, "step": 59225 }, { "loss": 1.1849, "grad_norm": 2.8920862674713135, "learning_rate": 2.2093058183893545e-05, "epoch": 0.89, "step": 59250 }, { "loss": 1.0813, "grad_norm": 1.3439332246780396, "learning_rate": 2.201796281277222e-05, "epoch": 0.89, "step": 59275 }, { "loss": 1.0912, "grad_norm": 1.2441843748092651, "learning_rate": 2.1942867441650896e-05, "epoch": 0.89, "step": 59300 }, { "loss": 1.121, "grad_norm": 1.5612194538116455, "learning_rate": 2.1867772070529575e-05, "epoch": 0.89, "step": 59325 }, { "loss": 1.1187, "grad_norm": 2.292187213897705, "learning_rate": 2.179267669940825e-05, "epoch": 0.89, "step": 59350 }, { "loss": 1.098, "grad_norm": 1.3217053413391113, "learning_rate": 2.1717581328286926e-05, "epoch": 0.89, "step": 59375 }, { "loss": 1.07, "grad_norm": 2.108124017715454, "learning_rate": 2.16424859571656e-05, "epoch": 0.89, "step": 59400 }, { "loss": 1.1967, "grad_norm": 1.461854100227356, "learning_rate": 2.1567390586044277e-05, "epoch": 0.89, "step": 59425 }, { "loss": 1.0829, "grad_norm": 2.4140448570251465, "learning_rate": 2.1492295214922952e-05, "epoch": 0.89, "step": 59450 }, { "loss": 1.084, "grad_norm": 1.3833210468292236, "learning_rate": 2.141719984380163e-05, "epoch": 0.89, "step": 59475 }, { "loss": 1.0312, "grad_norm": 1.463707447052002, "learning_rate": 2.1342104472680303e-05, "epoch": 0.89, "step": 59500 }, { "loss": 1.0947, "grad_norm": 1.0634888410568237, "learning_rate": 2.1267009101558982e-05, "epoch": 0.89, "step": 59525 }, { "loss": 1.1448, "grad_norm": 1.6115715503692627, "learning_rate": 2.1191913730437658e-05, "epoch": 0.89, "step": 59550 }, { "loss": 1.1279, "grad_norm": 2.023573398590088, "learning_rate": 2.1116818359316333e-05, "epoch": 0.89, "step": 59575 }, { "loss": 1.0725, "grad_norm": 1.3353021144866943, "learning_rate": 2.104172298819501e-05, "epoch": 0.9, "step": 59600 }, { "loss": 1.1597, "grad_norm": 2.067376136779785, "learning_rate": 2.0966627617073684e-05, "epoch": 0.9, "step": 59625 }, { "loss": 1.0665, "grad_norm": 1.4394888877868652, "learning_rate": 2.089153224595236e-05, "epoch": 0.9, "step": 59650 }, { "loss": 1.1449, "grad_norm": 1.1642546653747559, "learning_rate": 2.0816436874831035e-05, "epoch": 0.9, "step": 59675 }, { "loss": 1.0973, "grad_norm": 1.6994637250900269, "learning_rate": 2.0741341503709714e-05, "epoch": 0.9, "step": 59700 }, { "loss": 1.1076, "grad_norm": 2.0998518466949463, "learning_rate": 2.066624613258839e-05, "epoch": 0.9, "step": 59725 }, { "loss": 1.1332, "grad_norm": 1.608519196510315, "learning_rate": 2.0591150761467065e-05, "epoch": 0.9, "step": 59750 }, { "loss": 1.0639, "grad_norm": 5.120492935180664, "learning_rate": 2.051605539034574e-05, "epoch": 0.9, "step": 59775 }, { "loss": 1.1295, "grad_norm": 1.2980087995529175, "learning_rate": 2.0440960019224416e-05, "epoch": 0.9, "step": 59800 }, { "loss": 1.0998, "grad_norm": 1.518433928489685, "learning_rate": 2.036586464810309e-05, "epoch": 0.9, "step": 59825 }, { "loss": 1.0911, "grad_norm": 1.1310094594955444, "learning_rate": 2.0290769276981767e-05, "epoch": 0.9, "step": 59850 }, { "loss": 1.1567, "grad_norm": 0.9931915998458862, "learning_rate": 2.0215673905860442e-05, "epoch": 0.9, "step": 59875 }, { "loss": 1.1115, "grad_norm": 2.011012077331543, "learning_rate": 2.0140578534739118e-05, "epoch": 0.9, "step": 59900 }, { "loss": 1.119, "grad_norm": 1.6782035827636719, "learning_rate": 2.0065483163617797e-05, "epoch": 0.9, "step": 59925 }, { "loss": 1.0836, "grad_norm": 1.6010968685150146, "learning_rate": 1.9990387792496472e-05, "epoch": 0.9, "step": 59950 }, { "loss": 1.0171, "grad_norm": 1.8368406295776367, "learning_rate": 1.9915292421375148e-05, "epoch": 0.9, "step": 59975 }, { "loss": 1.1612, "grad_norm": 1.6433417797088623, "learning_rate": 1.9840197050253823e-05, "epoch": 0.9, "step": 60000 }, { "loss": 1.1051, "grad_norm": 1.0590778589248657, "learning_rate": 1.97651016791325e-05, "epoch": 0.9, "step": 60025 }, { "loss": 1.1467, "grad_norm": 2.4711523056030273, "learning_rate": 1.9690006308011174e-05, "epoch": 0.9, "step": 60050 }, { "loss": 1.1249, "grad_norm": 1.817872166633606, "learning_rate": 1.9614910936889853e-05, "epoch": 0.9, "step": 60075 }, { "loss": 1.1509, "grad_norm": 1.9354240894317627, "learning_rate": 1.953981556576853e-05, "epoch": 0.9, "step": 60100 }, { "loss": 1.1021, "grad_norm": 2.382876396179199, "learning_rate": 1.9464720194647204e-05, "epoch": 0.9, "step": 60125 }, { "loss": 1.0521, "grad_norm": 1.6886651515960693, "learning_rate": 1.938962482352588e-05, "epoch": 0.9, "step": 60150 }, { "loss": 1.0828, "grad_norm": 1.0179933309555054, "learning_rate": 1.9314529452404555e-05, "epoch": 0.9, "step": 60175 }, { "loss": 1.0767, "grad_norm": 1.041438102722168, "learning_rate": 1.923943408128323e-05, "epoch": 0.9, "step": 60200 }, { "loss": 1.0739, "grad_norm": 1.1048403978347778, "learning_rate": 1.9164338710161906e-05, "epoch": 0.9, "step": 60225 }, { "loss": 1.1025, "grad_norm": 1.113214373588562, "learning_rate": 1.908924333904058e-05, "epoch": 0.9, "step": 60250 }, { "loss": 1.1523, "grad_norm": 2.7546420097351074, "learning_rate": 1.9014147967919257e-05, "epoch": 0.91, "step": 60275 }, { "loss": 1.043, "grad_norm": 1.3055835962295532, "learning_rate": 1.8939052596797936e-05, "epoch": 0.91, "step": 60300 }, { "loss": 1.0768, "grad_norm": 1.9900767803192139, "learning_rate": 1.8866961040521464e-05, "epoch": 0.91, "step": 60325 }, { "loss": 1.1571, "grad_norm": 1.605908751487732, "learning_rate": 1.879186566940014e-05, "epoch": 0.91, "step": 60350 }, { "loss": 1.1399, "grad_norm": 1.9245578050613403, "learning_rate": 1.8716770298278815e-05, "epoch": 0.91, "step": 60375 }, { "loss": 1.0959, "grad_norm": 1.897222638130188, "learning_rate": 1.864167492715749e-05, "epoch": 0.91, "step": 60400 }, { "loss": 1.1132, "grad_norm": 2.4311060905456543, "learning_rate": 1.856657955603617e-05, "epoch": 0.91, "step": 60425 }, { "loss": 1.1102, "grad_norm": 1.378459095954895, "learning_rate": 1.849148418491484e-05, "epoch": 0.91, "step": 60450 }, { "loss": 1.0765, "grad_norm": 2.5490572452545166, "learning_rate": 1.841638881379352e-05, "epoch": 0.91, "step": 60475 }, { "loss": 1.1314, "grad_norm": 1.2700508832931519, "learning_rate": 1.8341293442672196e-05, "epoch": 0.91, "step": 60500 }, { "loss": 1.1452, "grad_norm": 1.636888027191162, "learning_rate": 1.826619807155087e-05, "epoch": 0.91, "step": 60525 }, { "loss": 1.0367, "grad_norm": 1.4893200397491455, "learning_rate": 1.8191102700429547e-05, "epoch": 0.91, "step": 60550 }, { "loss": 1.0222, "grad_norm": 0.9594138860702515, "learning_rate": 1.8116007329308222e-05, "epoch": 0.91, "step": 60575 }, { "loss": 1.115, "grad_norm": 2.1128294467926025, "learning_rate": 1.8040911958186898e-05, "epoch": 0.91, "step": 60600 }, { "loss": 1.1439, "grad_norm": 1.5294193029403687, "learning_rate": 1.7965816587065573e-05, "epoch": 0.91, "step": 60625 }, { "loss": 1.121, "grad_norm": 1.5716066360473633, "learning_rate": 1.7890721215944252e-05, "epoch": 0.91, "step": 60650 }, { "loss": 1.0839, "grad_norm": 1.2781248092651367, "learning_rate": 1.7815625844822924e-05, "epoch": 0.91, "step": 60675 }, { "loss": 1.1282, "grad_norm": 1.1077611446380615, "learning_rate": 1.7740530473701603e-05, "epoch": 0.91, "step": 60700 }, { "loss": 1.1352, "grad_norm": 1.1450996398925781, "learning_rate": 1.766543510258028e-05, "epoch": 0.91, "step": 60725 }, { "loss": 1.0536, "grad_norm": 1.3349822759628296, "learning_rate": 1.7590339731458954e-05, "epoch": 0.91, "step": 60750 }, { "loss": 1.13, "grad_norm": 1.6306883096694946, "learning_rate": 1.751524436033763e-05, "epoch": 0.91, "step": 60775 }, { "loss": 1.1103, "grad_norm": 1.7724149227142334, "learning_rate": 1.744014898921631e-05, "epoch": 0.91, "step": 60800 }, { "loss": 1.0242, "grad_norm": 1.5324548482894897, "learning_rate": 1.736505361809498e-05, "epoch": 0.91, "step": 60825 }, { "loss": 1.0809, "grad_norm": 0.8965089917182922, "learning_rate": 1.728995824697366e-05, "epoch": 0.91, "step": 60850 }, { "loss": 1.1012, "grad_norm": 1.551774263381958, "learning_rate": 1.7214862875852335e-05, "epoch": 0.91, "step": 60875 }, { "loss": 1.1264, "grad_norm": 1.070957064628601, "learning_rate": 1.7139767504731007e-05, "epoch": 0.91, "step": 60900 }, { "loss": 1.1098, "grad_norm": 1.5298128128051758, "learning_rate": 1.7064672133609686e-05, "epoch": 0.92, "step": 60925 }, { "loss": 1.089, "grad_norm": 1.6738872528076172, "learning_rate": 1.698957676248836e-05, "epoch": 0.92, "step": 60950 }, { "loss": 1.023, "grad_norm": 1.2859163284301758, "learning_rate": 1.6914481391367037e-05, "epoch": 0.92, "step": 60975 }, { "loss": 1.227, "grad_norm": 1.181386113166809, "learning_rate": 1.6839386020245712e-05, "epoch": 0.92, "step": 61000 }, { "loss": 1.1462, "grad_norm": 1.9334174394607544, "learning_rate": 1.676429064912439e-05, "epoch": 0.92, "step": 61025 }, { "loss": 1.0915, "grad_norm": 1.1935040950775146, "learning_rate": 1.6689195278003063e-05, "epoch": 0.92, "step": 61050 }, { "loss": 1.029, "grad_norm": 1.1765645742416382, "learning_rate": 1.6614099906881742e-05, "epoch": 0.92, "step": 61075 }, { "loss": 1.15, "grad_norm": 2.0349085330963135, "learning_rate": 1.6539004535760417e-05, "epoch": 0.92, "step": 61100 }, { "loss": 1.1565, "grad_norm": 2.330791711807251, "learning_rate": 1.6463909164639093e-05, "epoch": 0.92, "step": 61125 }, { "loss": 1.0929, "grad_norm": 1.6865901947021484, "learning_rate": 1.638881379351777e-05, "epoch": 0.92, "step": 61150 }, { "loss": 1.199, "grad_norm": 2.1335840225219727, "learning_rate": 1.6313718422396444e-05, "epoch": 0.92, "step": 61175 }, { "loss": 1.1875, "grad_norm": 0.9578272104263306, "learning_rate": 1.623862305127512e-05, "epoch": 0.92, "step": 61200 }, { "loss": 1.117, "grad_norm": 1.564257025718689, "learning_rate": 1.6163527680153795e-05, "epoch": 0.92, "step": 61225 }, { "loss": 1.1286, "grad_norm": 2.076204538345337, "learning_rate": 1.6088432309032474e-05, "epoch": 0.92, "step": 61250 }, { "loss": 1.1491, "grad_norm": 1.695163607597351, "learning_rate": 1.6013336937911146e-05, "epoch": 0.92, "step": 61275 }, { "loss": 1.1108, "grad_norm": 1.0644354820251465, "learning_rate": 1.5938241566789825e-05, "epoch": 0.92, "step": 61300 }, { "loss": 1.0865, "grad_norm": 1.13369619846344, "learning_rate": 1.58631461956685e-05, "epoch": 0.92, "step": 61325 }, { "loss": 1.09, "grad_norm": 0.8873293995857239, "learning_rate": 1.5788050824547176e-05, "epoch": 0.92, "step": 61350 }, { "loss": 1.1836, "grad_norm": 1.4285056591033936, "learning_rate": 1.571295545342585e-05, "epoch": 0.92, "step": 61375 }, { "loss": 1.1597, "grad_norm": 0.8853715658187866, "learning_rate": 1.563786008230453e-05, "epoch": 0.92, "step": 61400 }, { "loss": 1.1089, "grad_norm": 1.2858846187591553, "learning_rate": 1.5562764711183202e-05, "epoch": 0.92, "step": 61425 }, { "loss": 1.1041, "grad_norm": 1.2523924112319946, "learning_rate": 1.548766934006188e-05, "epoch": 0.92, "step": 61450 }, { "loss": 1.1523, "grad_norm": 1.9986999034881592, "learning_rate": 1.5412573968940556e-05, "epoch": 0.92, "step": 61475 }, { "loss": 1.0461, "grad_norm": 2.06295108795166, "learning_rate": 1.533747859781923e-05, "epoch": 0.92, "step": 61500 }, { "loss": 1.0988, "grad_norm": 0.9304774403572083, "learning_rate": 1.5262383226697907e-05, "epoch": 0.92, "step": 61525 }, { "loss": 1.1569, "grad_norm": 1.6813061237335205, "learning_rate": 1.5187287855576581e-05, "epoch": 0.92, "step": 61550 }, { "loss": 1.1434, "grad_norm": 1.320822834968567, "learning_rate": 1.511219248445526e-05, "epoch": 0.92, "step": 61575 }, { "loss": 1.0182, "grad_norm": 1.2629307508468628, "learning_rate": 1.5037097113333934e-05, "epoch": 0.93, "step": 61600 }, { "loss": 1.0842, "grad_norm": 2.044494390487671, "learning_rate": 1.4962001742212611e-05, "epoch": 0.93, "step": 61625 }, { "loss": 1.0935, "grad_norm": 1.1815024614334106, "learning_rate": 1.4886906371091286e-05, "epoch": 0.93, "step": 61650 }, { "loss": 1.1862, "grad_norm": 1.5776236057281494, "learning_rate": 1.4811810999969964e-05, "epoch": 0.93, "step": 61675 }, { "loss": 1.0616, "grad_norm": 2.1838979721069336, "learning_rate": 1.4736715628848637e-05, "epoch": 0.93, "step": 61700 }, { "loss": 1.1325, "grad_norm": 1.7168885469436646, "learning_rate": 1.4661620257727315e-05, "epoch": 0.93, "step": 61725 }, { "loss": 1.1015, "grad_norm": 1.0847703218460083, "learning_rate": 1.458652488660599e-05, "epoch": 0.93, "step": 61750 }, { "loss": 1.0838, "grad_norm": 1.3423173427581787, "learning_rate": 1.4511429515484667e-05, "epoch": 0.93, "step": 61775 }, { "loss": 1.1676, "grad_norm": 1.405914306640625, "learning_rate": 1.4436334144363343e-05, "epoch": 0.93, "step": 61800 }, { "loss": 1.1124, "grad_norm": 1.570953607559204, "learning_rate": 1.4361238773242017e-05, "epoch": 0.93, "step": 61825 }, { "loss": 1.1462, "grad_norm": 2.7975118160247803, "learning_rate": 1.4286143402120694e-05, "epoch": 0.93, "step": 61850 }, { "loss": 1.117, "grad_norm": 3.7135069370269775, "learning_rate": 1.421104803099937e-05, "epoch": 0.93, "step": 61875 }, { "loss": 1.1057, "grad_norm": 1.8608477115631104, "learning_rate": 1.4135952659878046e-05, "epoch": 0.93, "step": 61900 }, { "loss": 1.0607, "grad_norm": 1.1181379556655884, "learning_rate": 1.406085728875672e-05, "epoch": 0.93, "step": 61925 }, { "loss": 1.1118, "grad_norm": 1.5385795831680298, "learning_rate": 1.3985761917635399e-05, "epoch": 0.93, "step": 61950 }, { "loss": 1.1192, "grad_norm": 1.4369099140167236, "learning_rate": 1.3910666546514073e-05, "epoch": 0.93, "step": 61975 }, { "loss": 1.1036, "grad_norm": 1.2244880199432373, "learning_rate": 1.383557117539275e-05, "epoch": 0.93, "step": 62000 }, { "loss": 1.0913, "grad_norm": 1.3832460641860962, "learning_rate": 1.3760475804271425e-05, "epoch": 0.93, "step": 62025 }, { "loss": 1.1684, "grad_norm": 0.9169008135795593, "learning_rate": 1.3685380433150103e-05, "epoch": 0.93, "step": 62050 }, { "loss": 1.1886, "grad_norm": 2.110548973083496, "learning_rate": 1.3610285062028776e-05, "epoch": 0.93, "step": 62075 }, { "loss": 1.1511, "grad_norm": 1.203637957572937, "learning_rate": 1.3535189690907454e-05, "epoch": 0.93, "step": 62100 }, { "loss": 1.1469, "grad_norm": 1.3341647386550903, "learning_rate": 1.3460094319786129e-05, "epoch": 0.93, "step": 62125 }, { "loss": 1.1022, "grad_norm": 1.5815610885620117, "learning_rate": 1.3384998948664803e-05, "epoch": 0.93, "step": 62150 }, { "loss": 1.0537, "grad_norm": 1.7284424304962158, "learning_rate": 1.3309903577543482e-05, "epoch": 0.93, "step": 62175 }, { "loss": 1.0917, "grad_norm": 1.2951127290725708, "learning_rate": 1.3234808206422156e-05, "epoch": 0.93, "step": 62200 }, { "loss": 1.0818, "grad_norm": 1.735390305519104, "learning_rate": 1.3159712835300833e-05, "epoch": 0.93, "step": 62225 }, { "loss": 1.1024, "grad_norm": 0.9933769702911377, "learning_rate": 1.3084617464179508e-05, "epoch": 0.93, "step": 62250 }, { "loss": 1.1004, "grad_norm": 1.7689695358276367, "learning_rate": 1.3009522093058185e-05, "epoch": 0.94, "step": 62275 }, { "loss": 1.1127, "grad_norm": 1.0094436407089233, "learning_rate": 1.2934426721936859e-05, "epoch": 0.94, "step": 62300 }, { "loss": 1.086, "grad_norm": 1.3532946109771729, "learning_rate": 1.2859331350815538e-05, "epoch": 0.94, "step": 62325 }, { "loss": 1.1568, "grad_norm": 1.7976974248886108, "learning_rate": 1.2784235979694212e-05, "epoch": 0.94, "step": 62350 }, { "loss": 1.1215, "grad_norm": 1.748487114906311, "learning_rate": 1.2709140608572889e-05, "epoch": 0.94, "step": 62375 }, { "loss": 1.0682, "grad_norm": 1.3425058126449585, "learning_rate": 1.2634045237451564e-05, "epoch": 0.94, "step": 62400 }, { "loss": 1.0849, "grad_norm": 2.302241802215576, "learning_rate": 1.2558949866330242e-05, "epoch": 0.94, "step": 62425 }, { "loss": 1.0932, "grad_norm": 1.4172135591506958, "learning_rate": 1.2483854495208915e-05, "epoch": 0.94, "step": 62450 }, { "loss": 1.1309, "grad_norm": 1.8036898374557495, "learning_rate": 1.2408759124087593e-05, "epoch": 0.94, "step": 62475 }, { "loss": 1.1123, "grad_norm": 2.0429811477661133, "learning_rate": 1.2333663752966268e-05, "epoch": 0.94, "step": 62500 }, { "loss": 1.1829, "grad_norm": 1.2276302576065063, "learning_rate": 1.2258568381844944e-05, "epoch": 0.94, "step": 62525 }, { "loss": 1.1149, "grad_norm": 1.4691849946975708, "learning_rate": 1.218347301072362e-05, "epoch": 0.94, "step": 62550 }, { "loss": 1.1291, "grad_norm": 1.780098557472229, "learning_rate": 1.2108377639602296e-05, "epoch": 0.94, "step": 62575 }, { "loss": 1.0855, "grad_norm": 1.4932245016098022, "learning_rate": 1.2033282268480972e-05, "epoch": 0.94, "step": 62600 }, { "loss": 1.0832, "grad_norm": 1.277098536491394, "learning_rate": 1.1958186897359647e-05, "epoch": 0.94, "step": 62625 }, { "loss": 1.2369, "grad_norm": 1.7345349788665771, "learning_rate": 1.1883091526238323e-05, "epoch": 0.94, "step": 62650 }, { "loss": 1.1604, "grad_norm": 1.9038455486297607, "learning_rate": 1.1807996155116998e-05, "epoch": 0.94, "step": 62675 }, { "loss": 1.1065, "grad_norm": 1.0243260860443115, "learning_rate": 1.1732900783995675e-05, "epoch": 0.94, "step": 62700 }, { "loss": 1.1043, "grad_norm": 0.9342716336250305, "learning_rate": 1.165780541287435e-05, "epoch": 0.94, "step": 62725 }, { "loss": 1.1245, "grad_norm": 1.554945707321167, "learning_rate": 1.1582710041753026e-05, "epoch": 0.94, "step": 62750 }, { "loss": 1.074, "grad_norm": 1.1340545415878296, "learning_rate": 1.1507614670631703e-05, "epoch": 0.94, "step": 62775 }, { "loss": 1.1226, "grad_norm": 2.2141757011413574, "learning_rate": 1.1432519299510379e-05, "epoch": 0.94, "step": 62800 }, { "loss": 1.1022, "grad_norm": 1.2455902099609375, "learning_rate": 1.1357423928389054e-05, "epoch": 0.94, "step": 62825 }, { "loss": 1.135, "grad_norm": 1.0841847658157349, "learning_rate": 1.1282328557267732e-05, "epoch": 0.94, "step": 62850 }, { "loss": 1.1242, "grad_norm": 2.3354759216308594, "learning_rate": 1.1207233186146407e-05, "epoch": 0.94, "step": 62875 }, { "loss": 1.1036, "grad_norm": 1.0070022344589233, "learning_rate": 1.1132137815025083e-05, "epoch": 0.94, "step": 62900 }, { "loss": 1.0723, "grad_norm": 1.8489924669265747, "learning_rate": 1.105704244390376e-05, "epoch": 0.95, "step": 62925 }, { "loss": 1.0952, "grad_norm": 1.4337140321731567, "learning_rate": 1.0981947072782433e-05, "epoch": 0.95, "step": 62950 }, { "loss": 1.092, "grad_norm": 1.2222257852554321, "learning_rate": 1.0906851701661109e-05, "epoch": 0.95, "step": 62975 }, { "loss": 1.1448, "grad_norm": 1.270473837852478, "learning_rate": 1.0831756330539786e-05, "epoch": 0.95, "step": 63000 }, { "loss": 1.186, "grad_norm": 2.165717363357544, "learning_rate": 1.0756660959418462e-05, "epoch": 0.95, "step": 63025 }, { "loss": 1.0859, "grad_norm": 1.2544116973876953, "learning_rate": 1.0681565588297137e-05, "epoch": 0.95, "step": 63050 }, { "loss": 1.1105, "grad_norm": 1.404388666152954, "learning_rate": 1.0606470217175814e-05, "epoch": 0.95, "step": 63075 }, { "loss": 1.1805, "grad_norm": 1.3540233373641968, "learning_rate": 1.0534378660899343e-05, "epoch": 0.95, "step": 63100 }, { "loss": 1.1223, "grad_norm": 1.7471164464950562, "learning_rate": 1.0459283289778018e-05, "epoch": 0.95, "step": 63125 }, { "loss": 1.0884, "grad_norm": 1.466888189315796, "learning_rate": 1.0384187918656695e-05, "epoch": 0.95, "step": 63150 }, { "loss": 1.1307, "grad_norm": 1.0170552730560303, "learning_rate": 1.0309092547535371e-05, "epoch": 0.95, "step": 63175 }, { "loss": 1.0969, "grad_norm": 1.400824785232544, "learning_rate": 1.0233997176414046e-05, "epoch": 0.95, "step": 63200 }, { "loss": 1.1182, "grad_norm": 1.231128454208374, "learning_rate": 1.0158901805292722e-05, "epoch": 0.95, "step": 63225 }, { "loss": 1.1886, "grad_norm": 1.5293277502059937, "learning_rate": 1.0083806434171399e-05, "epoch": 0.95, "step": 63250 }, { "loss": 1.1112, "grad_norm": 1.315816879272461, "learning_rate": 1.0008711063050074e-05, "epoch": 0.95, "step": 63275 }, { "loss": 1.1224, "grad_norm": 1.0503865480422974, "learning_rate": 9.93361569192875e-06, "epoch": 0.95, "step": 63300 }, { "loss": 1.1516, "grad_norm": 1.5667177438735962, "learning_rate": 9.858520320807425e-06, "epoch": 0.95, "step": 63325 }, { "loss": 1.2137, "grad_norm": 1.9724977016448975, "learning_rate": 9.783424949686101e-06, "epoch": 0.95, "step": 63350 }, { "loss": 1.1568, "grad_norm": 1.0087287425994873, "learning_rate": 9.708329578564778e-06, "epoch": 0.95, "step": 63375 }, { "loss": 1.0902, "grad_norm": 1.067909836769104, "learning_rate": 9.633234207443454e-06, "epoch": 0.95, "step": 63400 }, { "loss": 1.1043, "grad_norm": 2.0196101665496826, "learning_rate": 9.558138836322129e-06, "epoch": 0.95, "step": 63425 }, { "loss": 1.0683, "grad_norm": 1.6897556781768799, "learning_rate": 9.483043465200806e-06, "epoch": 0.95, "step": 63450 }, { "loss": 1.1969, "grad_norm": 1.4092940092086792, "learning_rate": 9.407948094079482e-06, "epoch": 0.95, "step": 63475 }, { "loss": 1.1159, "grad_norm": 1.5447856187820435, "learning_rate": 9.332852722958157e-06, "epoch": 0.95, "step": 63500 }, { "loss": 1.119, "grad_norm": 1.5372124910354614, "learning_rate": 9.257757351836834e-06, "epoch": 0.95, "step": 63525 }, { "loss": 1.1478, "grad_norm": 1.2936185598373413, "learning_rate": 9.18266198071551e-06, "epoch": 0.95, "step": 63550 }, { "loss": 1.1206, "grad_norm": 0.9974470138549805, "learning_rate": 9.107566609594185e-06, "epoch": 0.95, "step": 63575 }, { "loss": 1.1306, "grad_norm": 1.8973299264907837, "learning_rate": 9.03247123847286e-06, "epoch": 0.96, "step": 63600 }, { "loss": 1.1003, "grad_norm": 1.2269550561904907, "learning_rate": 8.957375867351536e-06, "epoch": 0.96, "step": 63625 }, { "loss": 1.089, "grad_norm": 0.9575774073600769, "learning_rate": 8.882280496230212e-06, "epoch": 0.96, "step": 63650 }, { "loss": 1.1122, "grad_norm": 1.47458016872406, "learning_rate": 8.807185125108889e-06, "epoch": 0.96, "step": 63675 }, { "loss": 1.0881, "grad_norm": 1.407483696937561, "learning_rate": 8.732089753987564e-06, "epoch": 0.96, "step": 63700 }, { "loss": 1.1247, "grad_norm": 1.4554179906845093, "learning_rate": 8.65699438286624e-06, "epoch": 0.96, "step": 63725 }, { "loss": 1.1963, "grad_norm": 1.2854880094528198, "learning_rate": 8.581899011744917e-06, "epoch": 0.96, "step": 63750 }, { "loss": 1.1419, "grad_norm": 1.089011311531067, "learning_rate": 8.506803640623593e-06, "epoch": 0.96, "step": 63775 }, { "loss": 1.0494, "grad_norm": 1.1109488010406494, "learning_rate": 8.431708269502268e-06, "epoch": 0.96, "step": 63800 }, { "loss": 1.084, "grad_norm": 1.5390805006027222, "learning_rate": 8.356612898380945e-06, "epoch": 0.96, "step": 63825 }, { "loss": 1.0779, "grad_norm": 1.3624422550201416, "learning_rate": 8.28151752725962e-06, "epoch": 0.96, "step": 63850 }, { "loss": 1.092, "grad_norm": 1.3689720630645752, "learning_rate": 8.206422156138296e-06, "epoch": 0.96, "step": 63875 }, { "loss": 1.1746, "grad_norm": 1.2376459836959839, "learning_rate": 8.131326785016973e-06, "epoch": 0.96, "step": 63900 }, { "loss": 1.147, "grad_norm": 1.5905089378356934, "learning_rate": 8.056231413895649e-06, "epoch": 0.96, "step": 63925 }, { "loss": 1.1585, "grad_norm": 2.2680752277374268, "learning_rate": 7.981136042774323e-06, "epoch": 0.96, "step": 63950 }, { "loss": 1.1892, "grad_norm": 1.5471032857894897, "learning_rate": 7.906040671653e-06, "epoch": 0.96, "step": 63975 }, { "loss": 1.1173, "grad_norm": 1.456756591796875, "learning_rate": 7.830945300531675e-06, "epoch": 0.96, "step": 64000 }, { "loss": 1.0896, "grad_norm": 1.550498604774475, "learning_rate": 7.75584992941035e-06, "epoch": 0.96, "step": 64025 }, { "loss": 1.0944, "grad_norm": 1.8201286792755127, "learning_rate": 7.680754558289028e-06, "epoch": 0.96, "step": 64050 }, { "loss": 1.1145, "grad_norm": 1.392923355102539, "learning_rate": 7.605659187167703e-06, "epoch": 0.96, "step": 64075 }, { "loss": 1.103, "grad_norm": 2.5812623500823975, "learning_rate": 7.53056381604638e-06, "epoch": 0.96, "step": 64100 }, { "loss": 1.1365, "grad_norm": 1.7856642007827759, "learning_rate": 7.455468444925055e-06, "epoch": 0.96, "step": 64125 }, { "loss": 1.0761, "grad_norm": 1.8361400365829468, "learning_rate": 7.3803730738037315e-06, "epoch": 0.96, "step": 64150 }, { "loss": 1.0984, "grad_norm": 1.183370590209961, "learning_rate": 7.305277702682407e-06, "epoch": 0.96, "step": 64175 }, { "loss": 1.1064, "grad_norm": 1.8606791496276855, "learning_rate": 7.230182331561083e-06, "epoch": 0.96, "step": 64200 }, { "loss": 1.1489, "grad_norm": 1.3013999462127686, "learning_rate": 7.15508696043976e-06, "epoch": 0.96, "step": 64225 }, { "loss": 1.0736, "grad_norm": 1.1197832822799683, "learning_rate": 7.079991589318435e-06, "epoch": 0.96, "step": 64250 }, { "loss": 1.05, "grad_norm": 1.160477876663208, "learning_rate": 7.00489621819711e-06, "epoch": 0.97, "step": 64275 }, { "loss": 1.1556, "grad_norm": 1.7113288640975952, "learning_rate": 6.929800847075786e-06, "epoch": 0.97, "step": 64300 }, { "loss": 1.154, "grad_norm": 0.7315987348556519, "learning_rate": 6.854705475954462e-06, "epoch": 0.97, "step": 64325 }, { "loss": 1.086, "grad_norm": 1.7214363813400269, "learning_rate": 6.779610104833138e-06, "epoch": 0.97, "step": 64350 }, { "loss": 1.0921, "grad_norm": 0.8723170161247253, "learning_rate": 6.704514733711814e-06, "epoch": 0.97, "step": 64375 }, { "loss": 1.1255, "grad_norm": 1.9772207736968994, "learning_rate": 6.6294193625904905e-06, "epoch": 0.97, "step": 64400 }, { "loss": 1.1423, "grad_norm": 2.272956371307373, "learning_rate": 6.554323991469166e-06, "epoch": 0.97, "step": 64425 }, { "loss": 1.1113, "grad_norm": 1.6277108192443848, "learning_rate": 6.479228620347842e-06, "epoch": 0.97, "step": 64450 }, { "loss": 1.0637, "grad_norm": 1.5888078212738037, "learning_rate": 6.404133249226519e-06, "epoch": 0.97, "step": 64475 }, { "loss": 1.1832, "grad_norm": 1.4354815483093262, "learning_rate": 6.329037878105194e-06, "epoch": 0.97, "step": 64500 }, { "loss": 1.0812, "grad_norm": 1.2866464853286743, "learning_rate": 6.2539425069838705e-06, "epoch": 0.97, "step": 64525 }, { "loss": 1.0742, "grad_norm": 1.206624984741211, "learning_rate": 6.178847135862546e-06, "epoch": 0.97, "step": 64550 }, { "loss": 1.1, "grad_norm": 1.9013807773590088, "learning_rate": 6.1037517647412214e-06, "epoch": 0.97, "step": 64575 }, { "loss": 1.1164, "grad_norm": 1.2918732166290283, "learning_rate": 6.028656393619898e-06, "epoch": 0.97, "step": 64600 }, { "loss": 1.126, "grad_norm": 0.9611725211143494, "learning_rate": 5.953561022498574e-06, "epoch": 0.97, "step": 64625 }, { "loss": 1.1867, "grad_norm": 1.8491181135177612, "learning_rate": 5.8784656513772496e-06, "epoch": 0.97, "step": 64650 }, { "loss": 1.1415, "grad_norm": 1.3857682943344116, "learning_rate": 5.803370280255925e-06, "epoch": 0.97, "step": 64675 }, { "loss": 1.1016, "grad_norm": 1.7419966459274292, "learning_rate": 5.728274909134601e-06, "epoch": 0.97, "step": 64700 }, { "loss": 1.1174, "grad_norm": 2.3053975105285645, "learning_rate": 5.653179538013277e-06, "epoch": 0.97, "step": 64725 }, { "loss": 1.125, "grad_norm": 1.3925187587738037, "learning_rate": 5.578084166891953e-06, "epoch": 0.97, "step": 64750 }, { "loss": 1.0828, "grad_norm": 2.014289140701294, "learning_rate": 5.5029887957706295e-06, "epoch": 0.97, "step": 64775 }, { "loss": 1.1461, "grad_norm": 2.213609457015991, "learning_rate": 5.427893424649305e-06, "epoch": 0.97, "step": 64800 }, { "loss": 1.0558, "grad_norm": 1.0734851360321045, "learning_rate": 5.3527980535279805e-06, "epoch": 0.97, "step": 64825 }, { "loss": 1.1006, "grad_norm": 1.362658977508545, "learning_rate": 5.277702682406657e-06, "epoch": 0.97, "step": 64850 }, { "loss": 1.1512, "grad_norm": 1.9621925354003906, "learning_rate": 5.202607311285332e-06, "epoch": 0.97, "step": 64875 }, { "loss": 1.0506, "grad_norm": 1.6093008518218994, "learning_rate": 5.127511940164009e-06, "epoch": 0.97, "step": 64900 }, { "loss": 1.174, "grad_norm": 2.4825665950775146, "learning_rate": 5.052416569042685e-06, "epoch": 0.98, "step": 64925 }, { "loss": 1.0542, "grad_norm": 1.142391562461853, "learning_rate": 4.97732119792136e-06, "epoch": 0.98, "step": 64950 }, { "loss": 1.158, "grad_norm": 2.0994620323181152, "learning_rate": 4.902225826800036e-06, "epoch": 0.98, "step": 64975 }, { "loss": 1.1055, "grad_norm": 1.4533177614212036, "learning_rate": 4.827130455678712e-06, "epoch": 0.98, "step": 65000 }, { "loss": 1.1457, "grad_norm": 2.113051176071167, "learning_rate": 4.752035084557388e-06, "epoch": 0.98, "step": 65025 }, { "loss": 1.1301, "grad_norm": 1.4814103841781616, "learning_rate": 4.676939713436064e-06, "epoch": 0.98, "step": 65050 }, { "loss": 1.1091, "grad_norm": 1.3998606204986572, "learning_rate": 4.60184434231474e-06, "epoch": 0.98, "step": 65075 }, { "loss": 1.1468, "grad_norm": 1.4728342294692993, "learning_rate": 4.526748971193416e-06, "epoch": 0.98, "step": 65100 }, { "loss": 1.1008, "grad_norm": 1.29282808303833, "learning_rate": 4.451653600072092e-06, "epoch": 0.98, "step": 65125 }, { "loss": 1.0818, "grad_norm": 0.9691277146339417, "learning_rate": 4.376558228950768e-06, "epoch": 0.98, "step": 65150 }, { "loss": 1.1651, "grad_norm": 1.5705621242523193, "learning_rate": 4.301462857829443e-06, "epoch": 0.98, "step": 65175 }, { "loss": 1.0648, "grad_norm": 1.7766458988189697, "learning_rate": 4.2263674867081194e-06, "epoch": 0.98, "step": 65200 }, { "loss": 1.0789, "grad_norm": 1.3525621891021729, "learning_rate": 4.151272115586796e-06, "epoch": 0.98, "step": 65225 }, { "loss": 1.0551, "grad_norm": 1.631650447845459, "learning_rate": 4.076176744465471e-06, "epoch": 0.98, "step": 65250 }, { "loss": 1.1308, "grad_norm": 1.7099614143371582, "learning_rate": 4.0010813733441476e-06, "epoch": 0.98, "step": 65275 }, { "loss": 1.1203, "grad_norm": 1.104038119316101, "learning_rate": 3.925986002222823e-06, "epoch": 0.98, "step": 65300 }, { "loss": 1.0968, "grad_norm": 1.4031529426574707, "learning_rate": 3.8508906311014985e-06, "epoch": 0.98, "step": 65325 }, { "loss": 1.15, "grad_norm": 2.0685653686523438, "learning_rate": 3.775795259980175e-06, "epoch": 0.98, "step": 65350 }, { "loss": 1.0778, "grad_norm": 1.4602687358856201, "learning_rate": 3.7006998888588508e-06, "epoch": 0.98, "step": 65375 }, { "loss": 1.1119, "grad_norm": 1.377066969871521, "learning_rate": 3.625604517737527e-06, "epoch": 0.98, "step": 65400 }, { "loss": 1.1806, "grad_norm": 1.3793482780456543, "learning_rate": 3.550509146616203e-06, "epoch": 0.98, "step": 65425 }, { "loss": 1.0924, "grad_norm": 1.323262095451355, "learning_rate": 3.4754137754948785e-06, "epoch": 0.98, "step": 65450 }, { "loss": 1.1011, "grad_norm": 1.6005733013153076, "learning_rate": 3.4003184043735544e-06, "epoch": 0.98, "step": 65475 }, { "loss": 1.1188, "grad_norm": 1.2906062602996826, "learning_rate": 3.3252230332522303e-06, "epoch": 0.98, "step": 65500 }, { "loss": 1.0887, "grad_norm": 2.869511365890503, "learning_rate": 3.250127662130906e-06, "epoch": 0.98, "step": 65525 }, { "loss": 1.1348, "grad_norm": 1.084037184715271, "learning_rate": 3.1750322910095825e-06, "epoch": 0.98, "step": 65550 }, { "loss": 1.1218, "grad_norm": 1.7096983194351196, "learning_rate": 3.099936919888258e-06, "epoch": 0.98, "step": 65575 }, { "loss": 1.134, "grad_norm": 2.19433856010437, "learning_rate": 3.0278453636117873e-06, "epoch": 0.99, "step": 65600 }, { "loss": 1.1709, "grad_norm": 2.7771689891815186, "learning_rate": 2.9527499924904628e-06, "epoch": 0.99, "step": 65625 }, { "loss": 1.1184, "grad_norm": 1.367202877998352, "learning_rate": 2.877654621369139e-06, "epoch": 0.99, "step": 65650 }, { "loss": 1.1393, "grad_norm": 1.163167953491211, "learning_rate": 2.802559250247815e-06, "epoch": 0.99, "step": 65675 }, { "loss": 1.1185, "grad_norm": 1.9196585416793823, "learning_rate": 2.727463879126491e-06, "epoch": 0.99, "step": 65700 }, { "loss": 1.0776, "grad_norm": 1.1097601652145386, "learning_rate": 2.652368508005167e-06, "epoch": 0.99, "step": 65725 }, { "loss": 1.1115, "grad_norm": 1.8407388925552368, "learning_rate": 2.5772731368838427e-06, "epoch": 0.99, "step": 65750 }, { "loss": 1.1397, "grad_norm": 1.3508464097976685, "learning_rate": 2.5021777657625186e-06, "epoch": 0.99, "step": 65775 }, { "loss": 1.0408, "grad_norm": 1.3656666278839111, "learning_rate": 2.4270823946411945e-06, "epoch": 0.99, "step": 65800 }, { "loss": 1.1232, "grad_norm": 1.121551275253296, "learning_rate": 2.3519870235198704e-06, "epoch": 0.99, "step": 65825 }, { "loss": 1.1695, "grad_norm": 3.1583876609802246, "learning_rate": 2.2768916523985463e-06, "epoch": 0.99, "step": 65850 }, { "loss": 1.114, "grad_norm": 1.4626102447509766, "learning_rate": 2.2017962812772222e-06, "epoch": 0.99, "step": 65875 }, { "loss": 1.1404, "grad_norm": 1.164562702178955, "learning_rate": 2.126700910155898e-06, "epoch": 0.99, "step": 65900 }, { "loss": 1.0749, "grad_norm": 1.151390790939331, "learning_rate": 2.051605539034574e-06, "epoch": 0.99, "step": 65925 }, { "loss": 1.1223, "grad_norm": 1.4878361225128174, "learning_rate": 1.97651016791325e-06, "epoch": 0.99, "step": 65950 }, { "loss": 1.0713, "grad_norm": 0.9274216294288635, "learning_rate": 1.9014147967919258e-06, "epoch": 0.99, "step": 65975 }, { "loss": 1.0495, "grad_norm": 1.1772902011871338, "learning_rate": 1.8263194256706017e-06, "epoch": 0.99, "step": 66000 }, { "loss": 1.1357, "grad_norm": 1.2464003562927246, "learning_rate": 1.7512240545492774e-06, "epoch": 0.99, "step": 66025 }, { "loss": 1.0778, "grad_norm": 1.813460350036621, "learning_rate": 1.6761286834279536e-06, "epoch": 0.99, "step": 66050 }, { "loss": 1.1034, "grad_norm": 1.6727650165557861, "learning_rate": 1.6010333123066297e-06, "epoch": 0.99, "step": 66075 }, { "loss": 1.1252, "grad_norm": 1.8909765481948853, "learning_rate": 1.5259379411853054e-06, "epoch": 0.99, "step": 66100 }, { "loss": 1.0249, "grad_norm": 1.8321037292480469, "learning_rate": 1.4508425700639813e-06, "epoch": 0.99, "step": 66125 }, { "loss": 1.0836, "grad_norm": 1.3860995769500732, "learning_rate": 1.3757471989426574e-06, "epoch": 0.99, "step": 66150 }, { "loss": 1.0984, "grad_norm": 1.2683864831924438, "learning_rate": 1.300651827821333e-06, "epoch": 0.99, "step": 66175 }, { "loss": 1.0977, "grad_norm": 2.86045503616333, "learning_rate": 1.225556456700009e-06, "epoch": 0.99, "step": 66200 }, { "loss": 1.138, "grad_norm": 1.2112616300582886, "learning_rate": 1.150461085578685e-06, "epoch": 0.99, "step": 66225 }, { "loss": 1.1231, "grad_norm": 1.550032615661621, "learning_rate": 1.0753657144573608e-06, "epoch": 0.99, "step": 66250 }, { "loss": 1.1238, "grad_norm": 1.13444185256958, "learning_rate": 1.0002703433360369e-06, "epoch": 1.0, "step": 66275 }, { "loss": 1.1818, "grad_norm": 2.8684732913970947, "learning_rate": 9.251749722147127e-07, "epoch": 1.0, "step": 66300 }, { "loss": 1.1398, "grad_norm": 1.3792351484298706, "learning_rate": 8.500796010933886e-07, "epoch": 1.0, "step": 66325 }, { "loss": 1.1148, "grad_norm": 1.5899792909622192, "learning_rate": 7.749842299720645e-07, "epoch": 1.0, "step": 66350 }, { "loss": 1.1708, "grad_norm": 2.143692970275879, "learning_rate": 6.998888588507405e-07, "epoch": 1.0, "step": 66375 }, { "loss": 1.1092, "grad_norm": 1.2674062252044678, "learning_rate": 6.247934877294164e-07, "epoch": 1.0, "step": 66400 }, { "loss": 1.0326, "grad_norm": 1.1335889101028442, "learning_rate": 5.496981166080923e-07, "epoch": 1.0, "step": 66425 }, { "loss": 1.0948, "grad_norm": 1.5896003246307373, "learning_rate": 4.7460274548676816e-07, "epoch": 1.0, "step": 66450 }, { "loss": 1.1036, "grad_norm": 1.4150667190551758, "learning_rate": 3.995073743654441e-07, "epoch": 1.0, "step": 66475 }, { "loss": 1.14, "grad_norm": 1.4912337064743042, "learning_rate": 3.244120032441201e-07, "epoch": 1.0, "step": 66500 }, { "loss": 1.0704, "grad_norm": 1.5823650360107422, "learning_rate": 2.493166321227959e-07, "epoch": 1.0, "step": 66525 }, { "loss": 1.1301, "grad_norm": 1.9806722402572632, "learning_rate": 1.7422126100147188e-07, "epoch": 1.0, "step": 66550 }, { "loss": 1.1368, "grad_norm": 1.6522107124328613, "learning_rate": 9.91258898801478e-08, "epoch": 1.0, "step": 66575 }, { "train_runtime": 164326.412, "train_samples_per_second": 0.81, "train_steps_per_second": 0.405, "total_flos": 7.363589651988972e+17, "train_loss": 1.1616554066605727, "epoch": 1.0, "step": 66583 } ]